1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
10#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11#include "MCTargetDesc/AMDGPUTargetStreamer.h"
12#include "SIDefines.h"
13#include "SIInstrInfo.h"
14#include "SIRegisterInfo.h"
15#include "TargetInfo/AMDGPUTargetInfo.h"
16#include "Utils/AMDGPUAsmUtils.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/SmallBitVector.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Twine.h"
23#include "llvm/MC/MCAsmInfo.h"
24#include "llvm/MC/MCContext.h"
25#include "llvm/MC/MCExpr.h"
26#include "llvm/MC/MCInst.h"
27#include "llvm/MC/MCParser/MCAsmParser.h"
28#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29#include "llvm/MC/MCParser/MCTargetAsmParser.h"
30#include "llvm/MC/MCSymbol.h"
31#include "llvm/Support/AMDGPUMetadata.h"
32#include "llvm/Support/AMDHSAKernelDescriptor.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/MachineValueType.h"
35#include "llvm/Support/TargetParser.h"
36#include "llvm/Support/TargetRegistry.h"
37
38using namespace llvm;
39using namespace llvm::AMDGPU;
40using namespace llvm::amdhsa;
41
42namespace {
43
44class AMDGPUAsmParser;
45
46enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47
48//===----------------------------------------------------------------------===//
49// Operand
50//===----------------------------------------------------------------------===//
51
52class AMDGPUOperand : public MCParsedAsmOperand {
53  enum KindTy {
54    Token,
55    Immediate,
56    Register,
57    Expression
58  } Kind;
59
60  SMLoc StartLoc, EndLoc;
61  const AMDGPUAsmParser *AsmParser;
62
63public:
64  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65    : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66
67  using Ptr = std::unique_ptr<AMDGPUOperand>;
68
69  struct Modifiers {
70    bool Abs = false;
71    bool Neg = false;
72    bool Sext = false;
73
74    bool hasFPModifiers() const { return Abs || Neg; }
75    bool hasIntModifiers() const { return Sext; }
76    bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77
78    int64_t getFPModifiersOperand() const {
79      int64_t Operand = 0;
80      Operand |= Abs ? SISrcMods::ABS : 0u;
81      Operand |= Neg ? SISrcMods::NEG : 0u;
82      return Operand;
83    }
84
85    int64_t getIntModifiersOperand() const {
86      int64_t Operand = 0;
87      Operand |= Sext ? SISrcMods::SEXT : 0u;
88      return Operand;
89    }
90
91    int64_t getModifiersOperand() const {
92      assert(!(hasFPModifiers() && hasIntModifiers())
93           && "fp and int modifiers should not be used simultaneously");
94      if (hasFPModifiers()) {
95        return getFPModifiersOperand();
96      } else if (hasIntModifiers()) {
97        return getIntModifiersOperand();
98      } else {
99        return 0;
100      }
101    }
102
103    friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104  };
105
106  enum ImmTy {
107    ImmTyNone,
108    ImmTyGDS,
109    ImmTyLDS,
110    ImmTyOffen,
111    ImmTyIdxen,
112    ImmTyAddr64,
113    ImmTyOffset,
114    ImmTyInstOffset,
115    ImmTyOffset0,
116    ImmTyOffset1,
117    ImmTyCPol,
118    ImmTySWZ,
119    ImmTyTFE,
120    ImmTyD16,
121    ImmTyClampSI,
122    ImmTyOModSI,
123    ImmTyDPP8,
124    ImmTyDppCtrl,
125    ImmTyDppRowMask,
126    ImmTyDppBankMask,
127    ImmTyDppBoundCtrl,
128    ImmTyDppFi,
129    ImmTySdwaDstSel,
130    ImmTySdwaSrc0Sel,
131    ImmTySdwaSrc1Sel,
132    ImmTySdwaDstUnused,
133    ImmTyDMask,
134    ImmTyDim,
135    ImmTyUNorm,
136    ImmTyDA,
137    ImmTyR128A16,
138    ImmTyA16,
139    ImmTyLWE,
140    ImmTyExpTgt,
141    ImmTyExpCompr,
142    ImmTyExpVM,
143    ImmTyFORMAT,
144    ImmTyHwreg,
145    ImmTyOff,
146    ImmTySendMsg,
147    ImmTyInterpSlot,
148    ImmTyInterpAttr,
149    ImmTyAttrChan,
150    ImmTyOpSel,
151    ImmTyOpSelHi,
152    ImmTyNegLo,
153    ImmTyNegHi,
154    ImmTySwizzle,
155    ImmTyGprIdxMode,
156    ImmTyHigh,
157    ImmTyBLGP,
158    ImmTyCBSZ,
159    ImmTyABID,
160    ImmTyEndpgm,
161  };
162
163  enum ImmKindTy {
164    ImmKindTyNone,
165    ImmKindTyLiteral,
166    ImmKindTyConst,
167  };
168
169private:
170  struct TokOp {
171    const char *Data;
172    unsigned Length;
173  };
174
175  struct ImmOp {
176    int64_t Val;
177    ImmTy Type;
178    bool IsFPImm;
179    mutable ImmKindTy Kind;
180    Modifiers Mods;
181  };
182
183  struct RegOp {
184    unsigned RegNo;
185    Modifiers Mods;
186  };
187
188  union {
189    TokOp Tok;
190    ImmOp Imm;
191    RegOp Reg;
192    const MCExpr *Expr;
193  };
194
195public:
196  bool isToken() const override {
197    if (Kind == Token)
198      return true;
199
200    // When parsing operands, we can't always tell if something was meant to be
201    // a token, like 'gds', or an expression that references a global variable.
202    // In this case, we assume the string is an expression, and if we need to
203    // interpret is a token, then we treat the symbol name as the token.
204    return isSymbolRefExpr();
205  }
206
207  bool isSymbolRefExpr() const {
208    return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209  }
210
211  bool isImm() const override {
212    return Kind == Immediate;
213  }
214
215  void setImmKindNone() const {
216    assert(isImm());
217    Imm.Kind = ImmKindTyNone;
218  }
219
220  void setImmKindLiteral() const {
221    assert(isImm());
222    Imm.Kind = ImmKindTyLiteral;
223  }
224
225  void setImmKindConst() const {
226    assert(isImm());
227    Imm.Kind = ImmKindTyConst;
228  }
229
230  bool IsImmKindLiteral() const {
231    return isImm() && Imm.Kind == ImmKindTyLiteral;
232  }
233
234  bool isImmKindConst() const {
235    return isImm() && Imm.Kind == ImmKindTyConst;
236  }
237
238  bool isInlinableImm(MVT type) const;
239  bool isLiteralImm(MVT type) const;
240
241  bool isRegKind() const {
242    return Kind == Register;
243  }
244
245  bool isReg() const override {
246    return isRegKind() && !hasModifiers();
247  }
248
249  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250    return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251  }
252
253  bool isRegOrImmWithInt16InputMods() const {
254    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255  }
256
257  bool isRegOrImmWithInt32InputMods() const {
258    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259  }
260
261  bool isRegOrImmWithInt64InputMods() const {
262    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263  }
264
265  bool isRegOrImmWithFP16InputMods() const {
266    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267  }
268
269  bool isRegOrImmWithFP32InputMods() const {
270    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271  }
272
273  bool isRegOrImmWithFP64InputMods() const {
274    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275  }
276
277  bool isVReg() const {
278    return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279           isRegClass(AMDGPU::VReg_64RegClassID) ||
280           isRegClass(AMDGPU::VReg_96RegClassID) ||
281           isRegClass(AMDGPU::VReg_128RegClassID) ||
282           isRegClass(AMDGPU::VReg_160RegClassID) ||
283           isRegClass(AMDGPU::VReg_192RegClassID) ||
284           isRegClass(AMDGPU::VReg_256RegClassID) ||
285           isRegClass(AMDGPU::VReg_512RegClassID) ||
286           isRegClass(AMDGPU::VReg_1024RegClassID);
287  }
288
289  bool isVReg32() const {
290    return isRegClass(AMDGPU::VGPR_32RegClassID);
291  }
292
293  bool isVReg32OrOff() const {
294    return isOff() || isVReg32();
295  }
296
297  bool isNull() const {
298    return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299  }
300
301  bool isVRegWithInputMods() const;
302
303  bool isSDWAOperand(MVT type) const;
304  bool isSDWAFP16Operand() const;
305  bool isSDWAFP32Operand() const;
306  bool isSDWAInt16Operand() const;
307  bool isSDWAInt32Operand() const;
308
309  bool isImmTy(ImmTy ImmT) const {
310    return isImm() && Imm.Type == ImmT;
311  }
312
313  bool isImmModifier() const {
314    return isImm() && Imm.Type != ImmTyNone;
315  }
316
317  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
318  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
319  bool isDMask() const { return isImmTy(ImmTyDMask); }
320  bool isDim() const { return isImmTy(ImmTyDim); }
321  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
322  bool isDA() const { return isImmTy(ImmTyDA); }
323  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
324  bool isGFX10A16() const { return isImmTy(ImmTyA16); }
325  bool isLWE() const { return isImmTy(ImmTyLWE); }
326  bool isOff() const { return isImmTy(ImmTyOff); }
327  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
328  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
329  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
330  bool isOffen() const { return isImmTy(ImmTyOffen); }
331  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
332  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
333  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
334  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
335  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336
337  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
338  bool isGDS() const { return isImmTy(ImmTyGDS); }
339  bool isLDS() const { return isImmTy(ImmTyLDS); }
340  bool isCPol() const { return isImmTy(ImmTyCPol); }
341  bool isSWZ() const { return isImmTy(ImmTySWZ); }
342  bool isTFE() const { return isImmTy(ImmTyTFE); }
343  bool isD16() const { return isImmTy(ImmTyD16); }
344  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
345  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
346  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
347  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
348  bool isFI() const { return isImmTy(ImmTyDppFi); }
349  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
350  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
351  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
352  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
353  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
354  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
355  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
356  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
357  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
358  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
359  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
360  bool isHigh() const { return isImmTy(ImmTyHigh); }
361
362  bool isMod() const {
363    return isClampSI() || isOModSI();
364  }
365
366  bool isRegOrImm() const {
367    return isReg() || isImm();
368  }
369
370  bool isRegClass(unsigned RCID) const;
371
372  bool isInlineValue() const;
373
374  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375    return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376  }
377
378  bool isSCSrcB16() const {
379    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380  }
381
382  bool isSCSrcV2B16() const {
383    return isSCSrcB16();
384  }
385
386  bool isSCSrcB32() const {
387    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388  }
389
390  bool isSCSrcB64() const {
391    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392  }
393
394  bool isBoolReg() const;
395
396  bool isSCSrcF16() const {
397    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398  }
399
400  bool isSCSrcV2F16() const {
401    return isSCSrcF16();
402  }
403
404  bool isSCSrcF32() const {
405    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406  }
407
408  bool isSCSrcF64() const {
409    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410  }
411
412  bool isSSrcB32() const {
413    return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414  }
415
416  bool isSSrcB16() const {
417    return isSCSrcB16() || isLiteralImm(MVT::i16);
418  }
419
420  bool isSSrcV2B16() const {
421    llvm_unreachable("cannot happen");
422    return isSSrcB16();
423  }
424
425  bool isSSrcB64() const {
426    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427    // See isVSrc64().
428    return isSCSrcB64() || isLiteralImm(MVT::i64);
429  }
430
431  bool isSSrcF32() const {
432    return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433  }
434
435  bool isSSrcF64() const {
436    return isSCSrcB64() || isLiteralImm(MVT::f64);
437  }
438
439  bool isSSrcF16() const {
440    return isSCSrcB16() || isLiteralImm(MVT::f16);
441  }
442
443  bool isSSrcV2F16() const {
444    llvm_unreachable("cannot happen");
445    return isSSrcF16();
446  }
447
448  bool isSSrcV2FP32() const {
449    llvm_unreachable("cannot happen");
450    return isSSrcF32();
451  }
452
453  bool isSCSrcV2FP32() const {
454    llvm_unreachable("cannot happen");
455    return isSCSrcF32();
456  }
457
458  bool isSSrcV2INT32() const {
459    llvm_unreachable("cannot happen");
460    return isSSrcB32();
461  }
462
463  bool isSCSrcV2INT32() const {
464    llvm_unreachable("cannot happen");
465    return isSCSrcB32();
466  }
467
468  bool isSSrcOrLdsB32() const {
469    return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470           isLiteralImm(MVT::i32) || isExpr();
471  }
472
473  bool isVCSrcB32() const {
474    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475  }
476
477  bool isVCSrcB64() const {
478    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479  }
480
481  bool isVCSrcB16() const {
482    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483  }
484
485  bool isVCSrcV2B16() const {
486    return isVCSrcB16();
487  }
488
489  bool isVCSrcF32() const {
490    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491  }
492
493  bool isVCSrcF64() const {
494    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495  }
496
497  bool isVCSrcF16() const {
498    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499  }
500
501  bool isVCSrcV2F16() const {
502    return isVCSrcF16();
503  }
504
505  bool isVSrcB32() const {
506    return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507  }
508
509  bool isVSrcB64() const {
510    return isVCSrcF64() || isLiteralImm(MVT::i64);
511  }
512
513  bool isVSrcB16() const {
514    return isVCSrcB16() || isLiteralImm(MVT::i16);
515  }
516
517  bool isVSrcV2B16() const {
518    return isVSrcB16() || isLiteralImm(MVT::v2i16);
519  }
520
521  bool isVCSrcV2FP32() const {
522    return isVCSrcF64();
523  }
524
525  bool isVSrcV2FP32() const {
526    return isVSrcF64() || isLiteralImm(MVT::v2f32);
527  }
528
529  bool isVCSrcV2INT32() const {
530    return isVCSrcB64();
531  }
532
533  bool isVSrcV2INT32() const {
534    return isVSrcB64() || isLiteralImm(MVT::v2i32);
535  }
536
537  bool isVSrcF32() const {
538    return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539  }
540
541  bool isVSrcF64() const {
542    return isVCSrcF64() || isLiteralImm(MVT::f64);
543  }
544
545  bool isVSrcF16() const {
546    return isVCSrcF16() || isLiteralImm(MVT::f16);
547  }
548
549  bool isVSrcV2F16() const {
550    return isVSrcF16() || isLiteralImm(MVT::v2f16);
551  }
552
553  bool isVISrcB32() const {
554    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555  }
556
557  bool isVISrcB16() const {
558    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559  }
560
561  bool isVISrcV2B16() const {
562    return isVISrcB16();
563  }
564
565  bool isVISrcF32() const {
566    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567  }
568
569  bool isVISrcF16() const {
570    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571  }
572
573  bool isVISrcV2F16() const {
574    return isVISrcF16() || isVISrcB32();
575  }
576
577  bool isVISrc_64B64() const {
578    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579  }
580
581  bool isVISrc_64F64() const {
582    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583  }
584
585  bool isVISrc_64V2FP32() const {
586    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587  }
588
589  bool isVISrc_64V2INT32() const {
590    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591  }
592
593  bool isVISrc_256B64() const {
594    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595  }
596
597  bool isVISrc_256F64() const {
598    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599  }
600
601  bool isVISrc_128B16() const {
602    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603  }
604
605  bool isVISrc_128V2B16() const {
606    return isVISrc_128B16();
607  }
608
609  bool isVISrc_128B32() const {
610    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611  }
612
613  bool isVISrc_128F32() const {
614    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615  }
616
617  bool isVISrc_256V2FP32() const {
618    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619  }
620
621  bool isVISrc_256V2INT32() const {
622    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623  }
624
625  bool isVISrc_512B32() const {
626    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627  }
628
629  bool isVISrc_512B16() const {
630    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631  }
632
633  bool isVISrc_512V2B16() const {
634    return isVISrc_512B16();
635  }
636
637  bool isVISrc_512F32() const {
638    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639  }
640
641  bool isVISrc_512F16() const {
642    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643  }
644
645  bool isVISrc_512V2F16() const {
646    return isVISrc_512F16() || isVISrc_512B32();
647  }
648
649  bool isVISrc_1024B32() const {
650    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651  }
652
653  bool isVISrc_1024B16() const {
654    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655  }
656
657  bool isVISrc_1024V2B16() const {
658    return isVISrc_1024B16();
659  }
660
661  bool isVISrc_1024F32() const {
662    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663  }
664
665  bool isVISrc_1024F16() const {
666    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667  }
668
669  bool isVISrc_1024V2F16() const {
670    return isVISrc_1024F16() || isVISrc_1024B32();
671  }
672
673  bool isAISrcB32() const {
674    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675  }
676
677  bool isAISrcB16() const {
678    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679  }
680
681  bool isAISrcV2B16() const {
682    return isAISrcB16();
683  }
684
685  bool isAISrcF32() const {
686    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687  }
688
689  bool isAISrcF16() const {
690    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691  }
692
693  bool isAISrcV2F16() const {
694    return isAISrcF16() || isAISrcB32();
695  }
696
697  bool isAISrc_64B64() const {
698    return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699  }
700
701  bool isAISrc_64F64() const {
702    return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703  }
704
705  bool isAISrc_128B32() const {
706    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707  }
708
709  bool isAISrc_128B16() const {
710    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711  }
712
713  bool isAISrc_128V2B16() const {
714    return isAISrc_128B16();
715  }
716
717  bool isAISrc_128F32() const {
718    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719  }
720
721  bool isAISrc_128F16() const {
722    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723  }
724
725  bool isAISrc_128V2F16() const {
726    return isAISrc_128F16() || isAISrc_128B32();
727  }
728
729  bool isVISrc_128F16() const {
730    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731  }
732
733  bool isVISrc_128V2F16() const {
734    return isVISrc_128F16() || isVISrc_128B32();
735  }
736
737  bool isAISrc_256B64() const {
738    return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739  }
740
741  bool isAISrc_256F64() const {
742    return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743  }
744
745  bool isAISrc_512B32() const {
746    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747  }
748
749  bool isAISrc_512B16() const {
750    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751  }
752
753  bool isAISrc_512V2B16() const {
754    return isAISrc_512B16();
755  }
756
757  bool isAISrc_512F32() const {
758    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759  }
760
761  bool isAISrc_512F16() const {
762    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763  }
764
765  bool isAISrc_512V2F16() const {
766    return isAISrc_512F16() || isAISrc_512B32();
767  }
768
769  bool isAISrc_1024B32() const {
770    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771  }
772
773  bool isAISrc_1024B16() const {
774    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775  }
776
777  bool isAISrc_1024V2B16() const {
778    return isAISrc_1024B16();
779  }
780
781  bool isAISrc_1024F32() const {
782    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783  }
784
785  bool isAISrc_1024F16() const {
786    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787  }
788
789  bool isAISrc_1024V2F16() const {
790    return isAISrc_1024F16() || isAISrc_1024B32();
791  }
792
793  bool isKImmFP32() const {
794    return isLiteralImm(MVT::f32);
795  }
796
797  bool isKImmFP16() const {
798    return isLiteralImm(MVT::f16);
799  }
800
801  bool isMem() const override {
802    return false;
803  }
804
805  bool isExpr() const {
806    return Kind == Expression;
807  }
808
809  bool isSoppBrTarget() const {
810    return isExpr() || isImm();
811  }
812
813  bool isSWaitCnt() const;
814  bool isHwreg() const;
815  bool isSendMsg() const;
816  bool isSwizzle() const;
817  bool isSMRDOffset8() const;
818  bool isSMEMOffset() const;
819  bool isSMRDLiteralOffset() const;
820  bool isDPP8() const;
821  bool isDPPCtrl() const;
822  bool isBLGP() const;
823  bool isCBSZ() const;
824  bool isABID() const;
825  bool isGPRIdxMode() const;
826  bool isS16Imm() const;
827  bool isU16Imm() const;
828  bool isEndpgm() const;
829
830  StringRef getExpressionAsToken() const {
831    assert(isExpr());
832    const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833    return S->getSymbol().getName();
834  }
835
836  StringRef getToken() const {
837    assert(isToken());
838
839    if (Kind == Expression)
840      return getExpressionAsToken();
841
842    return StringRef(Tok.Data, Tok.Length);
843  }
844
845  int64_t getImm() const {
846    assert(isImm());
847    return Imm.Val;
848  }
849
850  void setImm(int64_t Val) {
851    assert(isImm());
852    Imm.Val = Val;
853  }
854
855  ImmTy getImmTy() const {
856    assert(isImm());
857    return Imm.Type;
858  }
859
860  unsigned getReg() const override {
861    assert(isRegKind());
862    return Reg.RegNo;
863  }
864
865  SMLoc getStartLoc() const override {
866    return StartLoc;
867  }
868
869  SMLoc getEndLoc() const override {
870    return EndLoc;
871  }
872
873  SMRange getLocRange() const {
874    return SMRange(StartLoc, EndLoc);
875  }
876
877  Modifiers getModifiers() const {
878    assert(isRegKind() || isImmTy(ImmTyNone));
879    return isRegKind() ? Reg.Mods : Imm.Mods;
880  }
881
882  void setModifiers(Modifiers Mods) {
883    assert(isRegKind() || isImmTy(ImmTyNone));
884    if (isRegKind())
885      Reg.Mods = Mods;
886    else
887      Imm.Mods = Mods;
888  }
889
890  bool hasModifiers() const {
891    return getModifiers().hasModifiers();
892  }
893
894  bool hasFPModifiers() const {
895    return getModifiers().hasFPModifiers();
896  }
897
898  bool hasIntModifiers() const {
899    return getModifiers().hasIntModifiers();
900  }
901
902  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903
904  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905
906  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907
908  template <unsigned Bitwidth>
909  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910
911  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912    addKImmFPOperands<16>(Inst, N);
913  }
914
915  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916    addKImmFPOperands<32>(Inst, N);
917  }
918
919  void addRegOperands(MCInst &Inst, unsigned N) const;
920
921  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922    addRegOperands(Inst, N);
923  }
924
925  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926    if (isRegKind())
927      addRegOperands(Inst, N);
928    else if (isExpr())
929      Inst.addOperand(MCOperand::createExpr(Expr));
930    else
931      addImmOperands(Inst, N);
932  }
933
934  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935    Modifiers Mods = getModifiers();
936    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937    if (isRegKind()) {
938      addRegOperands(Inst, N);
939    } else {
940      addImmOperands(Inst, N, false);
941    }
942  }
943
944  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945    assert(!hasIntModifiers());
946    addRegOrImmWithInputModsOperands(Inst, N);
947  }
948
949  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950    assert(!hasFPModifiers());
951    addRegOrImmWithInputModsOperands(Inst, N);
952  }
953
954  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955    Modifiers Mods = getModifiers();
956    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957    assert(isRegKind());
958    addRegOperands(Inst, N);
959  }
960
961  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962    assert(!hasIntModifiers());
963    addRegWithInputModsOperands(Inst, N);
964  }
965
966  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967    assert(!hasFPModifiers());
968    addRegWithInputModsOperands(Inst, N);
969  }
970
971  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972    if (isImm())
973      addImmOperands(Inst, N);
974    else {
975      assert(isExpr());
976      Inst.addOperand(MCOperand::createExpr(Expr));
977    }
978  }
979
980  static void printImmTy(raw_ostream& OS, ImmTy Type) {
981    switch (Type) {
982    case ImmTyNone: OS << "None"; break;
983    case ImmTyGDS: OS << "GDS"; break;
984    case ImmTyLDS: OS << "LDS"; break;
985    case ImmTyOffen: OS << "Offen"; break;
986    case ImmTyIdxen: OS << "Idxen"; break;
987    case ImmTyAddr64: OS << "Addr64"; break;
988    case ImmTyOffset: OS << "Offset"; break;
989    case ImmTyInstOffset: OS << "InstOffset"; break;
990    case ImmTyOffset0: OS << "Offset0"; break;
991    case ImmTyOffset1: OS << "Offset1"; break;
992    case ImmTyCPol: OS << "CPol"; break;
993    case ImmTySWZ: OS << "SWZ"; break;
994    case ImmTyTFE: OS << "TFE"; break;
995    case ImmTyD16: OS << "D16"; break;
996    case ImmTyFORMAT: OS << "FORMAT"; break;
997    case ImmTyClampSI: OS << "ClampSI"; break;
998    case ImmTyOModSI: OS << "OModSI"; break;
999    case ImmTyDPP8: OS << "DPP8"; break;
1000    case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001    case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002    case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004    case ImmTyDppFi: OS << "FI"; break;
1005    case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006    case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007    case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008    case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009    case ImmTyDMask: OS << "DMask"; break;
1010    case ImmTyDim: OS << "Dim"; break;
1011    case ImmTyUNorm: OS << "UNorm"; break;
1012    case ImmTyDA: OS << "DA"; break;
1013    case ImmTyR128A16: OS << "R128A16"; break;
1014    case ImmTyA16: OS << "A16"; break;
1015    case ImmTyLWE: OS << "LWE"; break;
1016    case ImmTyOff: OS << "Off"; break;
1017    case ImmTyExpTgt: OS << "ExpTgt"; break;
1018    case ImmTyExpCompr: OS << "ExpCompr"; break;
1019    case ImmTyExpVM: OS << "ExpVM"; break;
1020    case ImmTyHwreg: OS << "Hwreg"; break;
1021    case ImmTySendMsg: OS << "SendMsg"; break;
1022    case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023    case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024    case ImmTyAttrChan: OS << "AttrChan"; break;
1025    case ImmTyOpSel: OS << "OpSel"; break;
1026    case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027    case ImmTyNegLo: OS << "NegLo"; break;
1028    case ImmTyNegHi: OS << "NegHi"; break;
1029    case ImmTySwizzle: OS << "Swizzle"; break;
1030    case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031    case ImmTyHigh: OS << "High"; break;
1032    case ImmTyBLGP: OS << "BLGP"; break;
1033    case ImmTyCBSZ: OS << "CBSZ"; break;
1034    case ImmTyABID: OS << "ABID"; break;
1035    case ImmTyEndpgm: OS << "Endpgm"; break;
1036    }
1037  }
1038
1039  void print(raw_ostream &OS) const override {
1040    switch (Kind) {
1041    case Register:
1042      OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043      break;
1044    case Immediate:
1045      OS << '<' << getImm();
1046      if (getImmTy() != ImmTyNone) {
1047        OS << " type: "; printImmTy(OS, getImmTy());
1048      }
1049      OS << " mods: " << Imm.Mods << '>';
1050      break;
1051    case Token:
1052      OS << '\'' << getToken() << '\'';
1053      break;
1054    case Expression:
1055      OS << "<expr " << *Expr << '>';
1056      break;
1057    }
1058  }
1059
1060  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061                                      int64_t Val, SMLoc Loc,
1062                                      ImmTy Type = ImmTyNone,
1063                                      bool IsFPImm = false) {
1064    auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065    Op->Imm.Val = Val;
1066    Op->Imm.IsFPImm = IsFPImm;
1067    Op->Imm.Kind = ImmKindTyNone;
1068    Op->Imm.Type = Type;
1069    Op->Imm.Mods = Modifiers();
1070    Op->StartLoc = Loc;
1071    Op->EndLoc = Loc;
1072    return Op;
1073  }
1074
1075  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076                                        StringRef Str, SMLoc Loc,
1077                                        bool HasExplicitEncodingSize = true) {
1078    auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079    Res->Tok.Data = Str.data();
1080    Res->Tok.Length = Str.size();
1081    Res->StartLoc = Loc;
1082    Res->EndLoc = Loc;
1083    return Res;
1084  }
1085
1086  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087                                      unsigned RegNo, SMLoc S,
1088                                      SMLoc E) {
1089    auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090    Op->Reg.RegNo = RegNo;
1091    Op->Reg.Mods = Modifiers();
1092    Op->StartLoc = S;
1093    Op->EndLoc = E;
1094    return Op;
1095  }
1096
1097  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098                                       const class MCExpr *Expr, SMLoc S) {
1099    auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100    Op->Expr = Expr;
1101    Op->StartLoc = S;
1102    Op->EndLoc = S;
1103    return Op;
1104  }
1105};
1106
1107raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109  return OS;
1110}
1111
1112//===----------------------------------------------------------------------===//
1113// AsmParser
1114//===----------------------------------------------------------------------===//
1115
1116// Holds info related to the current kernel, e.g. count of SGPRs used.
1117// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118// .amdgpu_hsa_kernel or at EOF.
1119class KernelScopeInfo {
1120  int SgprIndexUnusedMin = -1;
1121  int VgprIndexUnusedMin = -1;
1122  MCContext *Ctx = nullptr;
1123
1124  void usesSgprAt(int i) {
1125    if (i >= SgprIndexUnusedMin) {
1126      SgprIndexUnusedMin = ++i;
1127      if (Ctx) {
1128        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129        Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130      }
1131    }
1132  }
1133
1134  void usesVgprAt(int i) {
1135    if (i >= VgprIndexUnusedMin) {
1136      VgprIndexUnusedMin = ++i;
1137      if (Ctx) {
1138        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139        Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140      }
1141    }
1142  }
1143
1144public:
1145  KernelScopeInfo() = default;
1146
1147  void initialize(MCContext &Context) {
1148    Ctx = &Context;
1149    usesSgprAt(SgprIndexUnusedMin = -1);
1150    usesVgprAt(VgprIndexUnusedMin = -1);
1151  }
1152
1153  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154    switch (RegKind) {
1155      case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156      case IS_AGPR: // fall through
1157      case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158      default: break;
1159    }
1160  }
1161};
1162
1163class AMDGPUAsmParser : public MCTargetAsmParser {
1164  MCAsmParser &Parser;
1165
1166  // Number of extra operands parsed after the first optional operand.
1167  // This may be necessary to skip hardcoded mandatory operands.
1168  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169
1170  unsigned ForcedEncodingSize = 0;
1171  bool ForcedDPP = false;
1172  bool ForcedSDWA = false;
1173  KernelScopeInfo KernelScope;
1174  unsigned CPolSeen;
1175
1176  /// @name Auto-generated Match Functions
1177  /// {
1178
1179#define GET_ASSEMBLER_HEADER
1180#include "AMDGPUGenAsmMatcher.inc"
1181
1182  /// }
1183
1184private:
1185  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186  bool OutOfRangeError(SMRange Range);
1187  /// Calculate VGPR/SGPR blocks required for given target, reserved
1188  /// registers, and user-specified NextFreeXGPR values.
1189  ///
1190  /// \param Features [in] Target features, used for bug corrections.
1191  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195  /// descriptor field, if valid.
1196  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200  /// \param VGPRBlocks [out] Result VGPR block count.
1201  /// \param SGPRBlocks [out] Result SGPR block count.
1202  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203                          bool FlatScrUsed, bool XNACKUsed,
1204                          Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205                          SMRange VGPRRange, unsigned NextFreeSGPR,
1206                          SMRange SGPRRange, unsigned &VGPRBlocks,
1207                          unsigned &SGPRBlocks);
1208  bool ParseDirectiveAMDGCNTarget();
1209  bool ParseDirectiveAMDHSAKernel();
1210  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211  bool ParseDirectiveHSACodeObjectVersion();
1212  bool ParseDirectiveHSACodeObjectISA();
1213  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214  bool ParseDirectiveAMDKernelCodeT();
1215  // TODO: Possibly make subtargetHasRegister const.
1216  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217  bool ParseDirectiveAMDGPUHsaKernel();
1218
1219  bool ParseDirectiveISAVersion();
1220  bool ParseDirectiveHSAMetadata();
1221  bool ParseDirectivePALMetadataBegin();
1222  bool ParseDirectivePALMetadata();
1223  bool ParseDirectiveAMDGPULDS();
1224
1225  /// Common code to parse out a block of text (typically YAML) between start and
1226  /// end directives.
1227  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228                           const char *AssemblerDirectiveEnd,
1229                           std::string &CollectString);
1230
1231  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232                             RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234                           unsigned &RegNum, unsigned &RegWidth,
1235                           bool RestoreOnFailure = false);
1236  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237                           unsigned &RegNum, unsigned &RegWidth,
1238                           SmallVectorImpl<AsmToken> &Tokens);
1239  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240                           unsigned &RegWidth,
1241                           SmallVectorImpl<AsmToken> &Tokens);
1242  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243                           unsigned &RegWidth,
1244                           SmallVectorImpl<AsmToken> &Tokens);
1245  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246                        unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247  bool ParseRegRange(unsigned& Num, unsigned& Width);
1248  unsigned getRegularReg(RegisterKind RegKind,
1249                         unsigned RegNum,
1250                         unsigned RegWidth,
1251                         SMLoc Loc);
1252
1253  bool isRegister();
1254  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256  void initializeGprCountSymbol(RegisterKind RegKind);
1257  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258                             unsigned RegWidth);
1259  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260                    bool IsAtomic, bool IsLds = false);
1261  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262                 bool IsGdsHardcoded);
1263
1264public:
1265  enum AMDGPUMatchResultTy {
1266    Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267  };
1268  enum OperandMode {
1269    OperandMode_Default,
1270    OperandMode_NSA,
1271  };
1272
1273  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274
1275  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276               const MCInstrInfo &MII,
1277               const MCTargetOptions &Options)
1278      : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279    MCAsmParserExtension::Initialize(Parser);
1280
1281    if (getFeatureBits().none()) {
1282      // Set default features.
1283      copySTI().ToggleFeature("southern-islands");
1284    }
1285
1286    setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287
1288    {
1289      // TODO: make those pre-defined variables read-only.
1290      // Currently there is none suitable machinery in the core llvm-mc for this.
1291      // MCSymbol::isRedefinable is intended for another purpose, and
1292      // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294      MCContext &Ctx = getContext();
1295      if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296        MCSymbol *Sym =
1297            Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303      } else {
1304        MCSymbol *Sym =
1305            Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311      }
1312      if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313        initializeGprCountSymbol(IS_VGPR);
1314        initializeGprCountSymbol(IS_SGPR);
1315      } else
1316        KernelScope.initialize(getContext());
1317    }
1318  }
1319
1320  bool hasMIMG_R128() const {
1321    return AMDGPU::hasMIMG_R128(getSTI());
1322  }
1323
1324  bool hasPackedD16() const {
1325    return AMDGPU::hasPackedD16(getSTI());
1326  }
1327
1328  bool hasGFX10A16() const {
1329    return AMDGPU::hasGFX10A16(getSTI());
1330  }
1331
1332  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333
1334  bool isSI() const {
1335    return AMDGPU::isSI(getSTI());
1336  }
1337
1338  bool isCI() const {
1339    return AMDGPU::isCI(getSTI());
1340  }
1341
1342  bool isVI() const {
1343    return AMDGPU::isVI(getSTI());
1344  }
1345
1346  bool isGFX9() const {
1347    return AMDGPU::isGFX9(getSTI());
1348  }
1349
1350  bool isGFX90A() const {
1351    return AMDGPU::isGFX90A(getSTI());
1352  }
1353
1354  bool isGFX9Plus() const {
1355    return AMDGPU::isGFX9Plus(getSTI());
1356  }
1357
1358  bool isGFX10() const {
1359    return AMDGPU::isGFX10(getSTI());
1360  }
1361
1362  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363
1364  bool isGFX10_BEncoding() const {
1365    return AMDGPU::isGFX10_BEncoding(getSTI());
1366  }
1367
1368  bool hasInv2PiInlineImm() const {
1369    return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370  }
1371
1372  bool hasFlatOffsets() const {
1373    return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374  }
1375
1376  bool hasArchitectedFlatScratch() const {
1377    return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378  }
1379
1380  bool hasSGPR102_SGPR103() const {
1381    return !isVI() && !isGFX9();
1382  }
1383
1384  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385
1386  bool hasIntClamp() const {
1387    return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388  }
1389
1390  AMDGPUTargetStreamer &getTargetStreamer() {
1391    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392    return static_cast<AMDGPUTargetStreamer &>(TS);
1393  }
1394
1395  const MCRegisterInfo *getMRI() const {
1396    // We need this const_cast because for some reason getContext() is not const
1397    // in MCAsmParser.
1398    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399  }
1400
1401  const MCInstrInfo *getMII() const {
1402    return &MII;
1403  }
1404
1405  const FeatureBitset &getFeatureBits() const {
1406    return getSTI().getFeatureBits();
1407  }
1408
1409  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1410  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1411  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412
1413  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1414  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1415  bool isForcedDPP() const { return ForcedDPP; }
1416  bool isForcedSDWA() const { return ForcedSDWA; }
1417  ArrayRef<unsigned> getMatchedVariants() const;
1418  StringRef getMatchedVariantName() const;
1419
1420  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422                     bool RestoreOnFailure);
1423  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424  OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425                                        SMLoc &EndLoc) override;
1426  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428                                      unsigned Kind) override;
1429  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430                               OperandVector &Operands, MCStreamer &Out,
1431                               uint64_t &ErrorInfo,
1432                               bool MatchingInlineAsm) override;
1433  bool ParseDirective(AsmToken DirectiveID) override;
1434  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435                                    OperandMode Mode = OperandMode_Default);
1436  StringRef parseMnemonicSuffix(StringRef Name);
1437  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438                        SMLoc NameLoc, OperandVector &Operands) override;
1439  //bool ProcessInstruction(MCInst &Inst);
1440
1441  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442
1443  OperandMatchResultTy
1444  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445                     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446                     bool (*ConvertResult)(int64_t &) = nullptr);
1447
1448  OperandMatchResultTy
1449  parseOperandArrayWithPrefix(const char *Prefix,
1450                              OperandVector &Operands,
1451                              AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452                              bool (*ConvertResult)(int64_t&) = nullptr);
1453
1454  OperandMatchResultTy
1455  parseNamedBit(StringRef Name, OperandVector &Operands,
1456                AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457  OperandMatchResultTy parseCPol(OperandVector &Operands);
1458  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459                                             StringRef &Value,
1460                                             SMLoc &StringLoc);
1461
1462  bool isModifier();
1463  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467  bool parseSP3NegModifier();
1468  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469  OperandMatchResultTy parseReg(OperandVector &Operands);
1470  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477  OperandMatchResultTy parseUfmt(int64_t &Format);
1478  OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479  OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481  OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482  OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485
1486  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1487  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1488  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490
1491  bool parseCnt(int64_t &IntVal);
1492  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494
1495private:
1496  struct OperandInfoTy {
1497    SMLoc Loc;
1498    int64_t Id;
1499    bool IsSymbolic = false;
1500    bool IsDefined = false;
1501
1502    OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503  };
1504
1505  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506  bool validateSendMsg(const OperandInfoTy &Msg,
1507                       const OperandInfoTy &Op,
1508                       const OperandInfoTy &Stream);
1509
1510  bool parseHwregBody(OperandInfoTy &HwReg,
1511                      OperandInfoTy &Offset,
1512                      OperandInfoTy &Width);
1513  bool validateHwreg(const OperandInfoTy &HwReg,
1514                     const OperandInfoTy &Offset,
1515                     const OperandInfoTy &Width);
1516
1517  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519
1520  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521                      const OperandVector &Operands) const;
1522  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524  SMLoc getLitLoc(const OperandVector &Operands) const;
1525  SMLoc getConstLoc(const OperandVector &Operands) const;
1526
1527  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530  bool validateSOPLiteral(const MCInst &Inst) const;
1531  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532  bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533  bool validateIntClampSupported(const MCInst &Inst);
1534  bool validateMIMGAtomicDMask(const MCInst &Inst);
1535  bool validateMIMGGatherDMask(const MCInst &Inst);
1536  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537  bool validateMIMGDataSize(const MCInst &Inst);
1538  bool validateMIMGAddrSize(const MCInst &Inst);
1539  bool validateMIMGD16(const MCInst &Inst);
1540  bool validateMIMGDim(const MCInst &Inst);
1541  bool validateMIMGMSAA(const MCInst &Inst);
1542  bool validateOpSel(const MCInst &Inst);
1543  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544  bool validateVccOperand(unsigned Reg) const;
1545  bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1546  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547  bool validateAGPRLdSt(const MCInst &Inst) const;
1548  bool validateVGPRAlign(const MCInst &Inst) const;
1549  bool validateDivScale(const MCInst &Inst);
1550  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1551                             const SMLoc &IDLoc);
1552  Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1553  unsigned getConstantBusLimit(unsigned Opcode) const;
1554  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1555  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1556  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1557
1558  bool isSupportedMnemo(StringRef Mnemo,
1559                        const FeatureBitset &FBS);
1560  bool isSupportedMnemo(StringRef Mnemo,
1561                        const FeatureBitset &FBS,
1562                        ArrayRef<unsigned> Variants);
1563  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1564
1565  bool isId(const StringRef Id) const;
1566  bool isId(const AsmToken &Token, const StringRef Id) const;
1567  bool isToken(const AsmToken::TokenKind Kind) const;
1568  bool trySkipId(const StringRef Id);
1569  bool trySkipId(const StringRef Pref, const StringRef Id);
1570  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1571  bool trySkipToken(const AsmToken::TokenKind Kind);
1572  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1573  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1574  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1575
1576  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1577  AsmToken::TokenKind getTokenKind() const;
1578  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1579  bool parseExpr(OperandVector &Operands);
1580  StringRef getTokenStr() const;
1581  AsmToken peekToken();
1582  AsmToken getToken() const;
1583  SMLoc getLoc() const;
1584  void lex();
1585
1586public:
1587  void onBeginOfFile() override;
1588
1589  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1590  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1591
1592  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1593  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1594  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1595  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1596  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1597  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1598
1599  bool parseSwizzleOperand(int64_t &Op,
1600                           const unsigned MinVal,
1601                           const unsigned MaxVal,
1602                           const StringRef ErrMsg,
1603                           SMLoc &Loc);
1604  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1605                            const unsigned MinVal,
1606                            const unsigned MaxVal,
1607                            const StringRef ErrMsg);
1608  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1609  bool parseSwizzleOffset(int64_t &Imm);
1610  bool parseSwizzleMacro(int64_t &Imm);
1611  bool parseSwizzleQuadPerm(int64_t &Imm);
1612  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1613  bool parseSwizzleBroadcast(int64_t &Imm);
1614  bool parseSwizzleSwap(int64_t &Imm);
1615  bool parseSwizzleReverse(int64_t &Imm);
1616
1617  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1618  int64_t parseGPRIdxMacro();
1619
1620  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1621  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1622  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1623  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1624
1625  AMDGPUOperand::Ptr defaultCPol() const;
1626
1627  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1628  AMDGPUOperand::Ptr defaultSMEMOffset() const;
1629  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1630  AMDGPUOperand::Ptr defaultFlatOffset() const;
1631
1632  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1633
1634  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1635               OptionalImmIndexMap &OptionalIdx);
1636  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1637  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1638  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1639  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1640                OptionalImmIndexMap &OptionalIdx);
1641
1642  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1643
1644  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1645               bool IsAtomic = false);
1646  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1647  void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1648
1649  void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1650
1651  bool parseDimId(unsigned &Encoding);
1652  OperandMatchResultTy parseDim(OperandVector &Operands);
1653  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1654  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1655  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1656  int64_t parseDPPCtrlSel(StringRef Ctrl);
1657  int64_t parseDPPCtrlPerm();
1658  AMDGPUOperand::Ptr defaultRowMask() const;
1659  AMDGPUOperand::Ptr defaultBankMask() const;
1660  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1661  AMDGPUOperand::Ptr defaultFI() const;
1662  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1663  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1664
1665  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1666                                    AMDGPUOperand::ImmTy Type);
1667  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1668  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1669  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1670  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1671  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1672  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1673  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1674               uint64_t BasicInstType,
1675               bool SkipDstVcc = false,
1676               bool SkipSrcVcc = false);
1677
1678  AMDGPUOperand::Ptr defaultBLGP() const;
1679  AMDGPUOperand::Ptr defaultCBSZ() const;
1680  AMDGPUOperand::Ptr defaultABID() const;
1681
1682  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1683  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1684};
1685
1686struct OptionalOperand {
1687  const char *Name;
1688  AMDGPUOperand::ImmTy Type;
1689  bool IsBit;
1690  bool (*ConvertResult)(int64_t&);
1691};
1692
1693} // end anonymous namespace
1694
1695// May be called with integer type with equivalent bitwidth.
1696static const fltSemantics *getFltSemantics(unsigned Size) {
1697  switch (Size) {
1698  case 4:
1699    return &APFloat::IEEEsingle();
1700  case 8:
1701    return &APFloat::IEEEdouble();
1702  case 2:
1703    return &APFloat::IEEEhalf();
1704  default:
1705    llvm_unreachable("unsupported fp type");
1706  }
1707}
1708
1709static const fltSemantics *getFltSemantics(MVT VT) {
1710  return getFltSemantics(VT.getSizeInBits() / 8);
1711}
1712
1713static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1714  switch (OperandType) {
1715  case AMDGPU::OPERAND_REG_IMM_INT32:
1716  case AMDGPU::OPERAND_REG_IMM_FP32:
1717  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1718  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1719  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1720  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1721  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1722  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1723  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1724  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1725    return &APFloat::IEEEsingle();
1726  case AMDGPU::OPERAND_REG_IMM_INT64:
1727  case AMDGPU::OPERAND_REG_IMM_FP64:
1728  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1729  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1730  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1731    return &APFloat::IEEEdouble();
1732  case AMDGPU::OPERAND_REG_IMM_INT16:
1733  case AMDGPU::OPERAND_REG_IMM_FP16:
1734  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1735  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1736  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1737  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1738  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1739  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1740  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1741  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1742  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1743  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1744    return &APFloat::IEEEhalf();
1745  default:
1746    llvm_unreachable("unsupported fp type");
1747  }
1748}
1749
1750//===----------------------------------------------------------------------===//
1751// Operand
1752//===----------------------------------------------------------------------===//
1753
1754static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1755  bool Lost;
1756
1757  // Convert literal to single precision
1758  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1759                                               APFloat::rmNearestTiesToEven,
1760                                               &Lost);
1761  // We allow precision lost but not overflow or underflow
1762  if (Status != APFloat::opOK &&
1763      Lost &&
1764      ((Status & APFloat::opOverflow)  != 0 ||
1765       (Status & APFloat::opUnderflow) != 0)) {
1766    return false;
1767  }
1768
1769  return true;
1770}
1771
1772static bool isSafeTruncation(int64_t Val, unsigned Size) {
1773  return isUIntN(Size, Val) || isIntN(Size, Val);
1774}
1775
1776static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1777  if (VT.getScalarType() == MVT::i16) {
1778    // FP immediate values are broken.
1779    return isInlinableIntLiteral(Val);
1780  }
1781
1782  // f16/v2f16 operands work correctly for all values.
1783  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1784}
1785
1786bool AMDGPUOperand::isInlinableImm(MVT type) const {
1787
1788  // This is a hack to enable named inline values like
1789  // shared_base with both 32-bit and 64-bit operands.
1790  // Note that these values are defined as
1791  // 32-bit operands only.
1792  if (isInlineValue()) {
1793    return true;
1794  }
1795
1796  if (!isImmTy(ImmTyNone)) {
1797    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1798    return false;
1799  }
1800  // TODO: We should avoid using host float here. It would be better to
1801  // check the float bit values which is what a few other places do.
1802  // We've had bot failures before due to weird NaN support on mips hosts.
1803
1804  APInt Literal(64, Imm.Val);
1805
1806  if (Imm.IsFPImm) { // We got fp literal token
1807    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1808      return AMDGPU::isInlinableLiteral64(Imm.Val,
1809                                          AsmParser->hasInv2PiInlineImm());
1810    }
1811
1812    APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1813    if (!canLosslesslyConvertToFPType(FPLiteral, type))
1814      return false;
1815
1816    if (type.getScalarSizeInBits() == 16) {
1817      return isInlineableLiteralOp16(
1818        static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1819        type, AsmParser->hasInv2PiInlineImm());
1820    }
1821
1822    // Check if single precision literal is inlinable
1823    return AMDGPU::isInlinableLiteral32(
1824      static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1825      AsmParser->hasInv2PiInlineImm());
1826  }
1827
1828  // We got int literal token.
1829  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1830    return AMDGPU::isInlinableLiteral64(Imm.Val,
1831                                        AsmParser->hasInv2PiInlineImm());
1832  }
1833
1834  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1835    return false;
1836  }
1837
1838  if (type.getScalarSizeInBits() == 16) {
1839    return isInlineableLiteralOp16(
1840      static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1841      type, AsmParser->hasInv2PiInlineImm());
1842  }
1843
1844  return AMDGPU::isInlinableLiteral32(
1845    static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1846    AsmParser->hasInv2PiInlineImm());
1847}
1848
1849bool AMDGPUOperand::isLiteralImm(MVT type) const {
1850  // Check that this immediate can be added as literal
1851  if (!isImmTy(ImmTyNone)) {
1852    return false;
1853  }
1854
1855  if (!Imm.IsFPImm) {
1856    // We got int literal token.
1857
1858    if (type == MVT::f64 && hasFPModifiers()) {
1859      // Cannot apply fp modifiers to int literals preserving the same semantics
1860      // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1861      // disable these cases.
1862      return false;
1863    }
1864
1865    unsigned Size = type.getSizeInBits();
1866    if (Size == 64)
1867      Size = 32;
1868
1869    // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1870    // types.
1871    return isSafeTruncation(Imm.Val, Size);
1872  }
1873
1874  // We got fp literal token
1875  if (type == MVT::f64) { // Expected 64-bit fp operand
1876    // We would set low 64-bits of literal to zeroes but we accept this literals
1877    return true;
1878  }
1879
1880  if (type == MVT::i64) { // Expected 64-bit int operand
1881    // We don't allow fp literals in 64-bit integer instructions. It is
1882    // unclear how we should encode them.
1883    return false;
1884  }
1885
1886  // We allow fp literals with f16x2 operands assuming that the specified
1887  // literal goes into the lower half and the upper half is zero. We also
1888  // require that the literal may be losslesly converted to f16.
1889  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1890                     (type == MVT::v2i16)? MVT::i16 :
1891                     (type == MVT::v2f32)? MVT::f32 : type;
1892
1893  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1894  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1895}
1896
1897bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1898  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1899}
1900
1901bool AMDGPUOperand::isVRegWithInputMods() const {
1902  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1903         // GFX90A allows DPP on 64-bit operands.
1904         (isRegClass(AMDGPU::VReg_64RegClassID) &&
1905          AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1906}
1907
1908bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1909  if (AsmParser->isVI())
1910    return isVReg32();
1911  else if (AsmParser->isGFX9Plus())
1912    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1913  else
1914    return false;
1915}
1916
1917bool AMDGPUOperand::isSDWAFP16Operand() const {
1918  return isSDWAOperand(MVT::f16);
1919}
1920
1921bool AMDGPUOperand::isSDWAFP32Operand() const {
1922  return isSDWAOperand(MVT::f32);
1923}
1924
1925bool AMDGPUOperand::isSDWAInt16Operand() const {
1926  return isSDWAOperand(MVT::i16);
1927}
1928
1929bool AMDGPUOperand::isSDWAInt32Operand() const {
1930  return isSDWAOperand(MVT::i32);
1931}
1932
1933bool AMDGPUOperand::isBoolReg() const {
1934  auto FB = AsmParser->getFeatureBits();
1935  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1936                     (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1937}
1938
1939uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1940{
1941  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1942  assert(Size == 2 || Size == 4 || Size == 8);
1943
1944  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1945
1946  if (Imm.Mods.Abs) {
1947    Val &= ~FpSignMask;
1948  }
1949  if (Imm.Mods.Neg) {
1950    Val ^= FpSignMask;
1951  }
1952
1953  return Val;
1954}
1955
1956void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1957  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1958                             Inst.getNumOperands())) {
1959    addLiteralImmOperand(Inst, Imm.Val,
1960                         ApplyModifiers &
1961                         isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1962  } else {
1963    assert(!isImmTy(ImmTyNone) || !hasModifiers());
1964    Inst.addOperand(MCOperand::createImm(Imm.Val));
1965    setImmKindNone();
1966  }
1967}
1968
1969void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1970  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1971  auto OpNum = Inst.getNumOperands();
1972  // Check that this operand accepts literals
1973  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1974
1975  if (ApplyModifiers) {
1976    assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1977    const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1978    Val = applyInputFPModifiers(Val, Size);
1979  }
1980
1981  APInt Literal(64, Val);
1982  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1983
1984  if (Imm.IsFPImm) { // We got fp literal token
1985    switch (OpTy) {
1986    case AMDGPU::OPERAND_REG_IMM_INT64:
1987    case AMDGPU::OPERAND_REG_IMM_FP64:
1988    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1989    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1990    case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1991      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1992                                       AsmParser->hasInv2PiInlineImm())) {
1993        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1994        setImmKindConst();
1995        return;
1996      }
1997
1998      // Non-inlineable
1999      if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2000        // For fp operands we check if low 32 bits are zeros
2001        if (Literal.getLoBits(32) != 0) {
2002          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2003          "Can't encode literal as exact 64-bit floating-point operand. "
2004          "Low 32-bits will be set to zero");
2005        }
2006
2007        Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2008        setImmKindLiteral();
2009        return;
2010      }
2011
2012      // We don't allow fp literals in 64-bit integer instructions. It is
2013      // unclear how we should encode them. This case should be checked earlier
2014      // in predicate methods (isLiteralImm())
2015      llvm_unreachable("fp literal in 64-bit integer instruction.");
2016
2017    case AMDGPU::OPERAND_REG_IMM_INT32:
2018    case AMDGPU::OPERAND_REG_IMM_FP32:
2019    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2020    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2021    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2022    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2023    case AMDGPU::OPERAND_REG_IMM_INT16:
2024    case AMDGPU::OPERAND_REG_IMM_FP16:
2025    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2026    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2027    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2028    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2029    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2030    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2031    case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2032    case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2033    case AMDGPU::OPERAND_REG_IMM_V2INT16:
2034    case AMDGPU::OPERAND_REG_IMM_V2FP16:
2035    case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2036    case AMDGPU::OPERAND_REG_IMM_V2FP32:
2037    case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2038    case AMDGPU::OPERAND_REG_IMM_V2INT32: {
2039      bool lost;
2040      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2041      // Convert literal to single precision
2042      FPLiteral.convert(*getOpFltSemantics(OpTy),
2043                        APFloat::rmNearestTiesToEven, &lost);
2044      // We allow precision lost but not overflow or underflow. This should be
2045      // checked earlier in isLiteralImm()
2046
2047      uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2048      Inst.addOperand(MCOperand::createImm(ImmVal));
2049      setImmKindLiteral();
2050      return;
2051    }
2052    default:
2053      llvm_unreachable("invalid operand size");
2054    }
2055
2056    return;
2057  }
2058
2059  // We got int literal token.
2060  // Only sign extend inline immediates.
2061  switch (OpTy) {
2062  case AMDGPU::OPERAND_REG_IMM_INT32:
2063  case AMDGPU::OPERAND_REG_IMM_FP32:
2064  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2065  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2066  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2067  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2068  case AMDGPU::OPERAND_REG_IMM_V2INT16:
2069  case AMDGPU::OPERAND_REG_IMM_V2FP16:
2070  case AMDGPU::OPERAND_REG_IMM_V2FP32:
2071  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2072  case AMDGPU::OPERAND_REG_IMM_V2INT32:
2073  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2074    if (isSafeTruncation(Val, 32) &&
2075        AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2076                                     AsmParser->hasInv2PiInlineImm())) {
2077      Inst.addOperand(MCOperand::createImm(Val));
2078      setImmKindConst();
2079      return;
2080    }
2081
2082    Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2083    setImmKindLiteral();
2084    return;
2085
2086  case AMDGPU::OPERAND_REG_IMM_INT64:
2087  case AMDGPU::OPERAND_REG_IMM_FP64:
2088  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2089  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2090  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2091    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2092      Inst.addOperand(MCOperand::createImm(Val));
2093      setImmKindConst();
2094      return;
2095    }
2096
2097    Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2098    setImmKindLiteral();
2099    return;
2100
2101  case AMDGPU::OPERAND_REG_IMM_INT16:
2102  case AMDGPU::OPERAND_REG_IMM_FP16:
2103  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2104  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2105  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2106  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2107    if (isSafeTruncation(Val, 16) &&
2108        AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2109                                     AsmParser->hasInv2PiInlineImm())) {
2110      Inst.addOperand(MCOperand::createImm(Val));
2111      setImmKindConst();
2112      return;
2113    }
2114
2115    Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2116    setImmKindLiteral();
2117    return;
2118
2119  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2120  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2121  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2122  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2123    assert(isSafeTruncation(Val, 16));
2124    assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2125                                        AsmParser->hasInv2PiInlineImm()));
2126
2127    Inst.addOperand(MCOperand::createImm(Val));
2128    return;
2129  }
2130  default:
2131    llvm_unreachable("invalid operand size");
2132  }
2133}
2134
2135template <unsigned Bitwidth>
2136void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2137  APInt Literal(64, Imm.Val);
2138  setImmKindNone();
2139
2140  if (!Imm.IsFPImm) {
2141    // We got int literal token.
2142    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2143    return;
2144  }
2145
2146  bool Lost;
2147  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2148  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2149                    APFloat::rmNearestTiesToEven, &Lost);
2150  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2151}
2152
2153void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2154  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2155}
2156
2157static bool isInlineValue(unsigned Reg) {
2158  switch (Reg) {
2159  case AMDGPU::SRC_SHARED_BASE:
2160  case AMDGPU::SRC_SHARED_LIMIT:
2161  case AMDGPU::SRC_PRIVATE_BASE:
2162  case AMDGPU::SRC_PRIVATE_LIMIT:
2163  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2164    return true;
2165  case AMDGPU::SRC_VCCZ:
2166  case AMDGPU::SRC_EXECZ:
2167  case AMDGPU::SRC_SCC:
2168    return true;
2169  case AMDGPU::SGPR_NULL:
2170    return true;
2171  default:
2172    return false;
2173  }
2174}
2175
2176bool AMDGPUOperand::isInlineValue() const {
2177  return isRegKind() && ::isInlineValue(getReg());
2178}
2179
2180//===----------------------------------------------------------------------===//
2181// AsmParser
2182//===----------------------------------------------------------------------===//
2183
2184static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2185  if (Is == IS_VGPR) {
2186    switch (RegWidth) {
2187      default: return -1;
2188      case 1: return AMDGPU::VGPR_32RegClassID;
2189      case 2: return AMDGPU::VReg_64RegClassID;
2190      case 3: return AMDGPU::VReg_96RegClassID;
2191      case 4: return AMDGPU::VReg_128RegClassID;
2192      case 5: return AMDGPU::VReg_160RegClassID;
2193      case 6: return AMDGPU::VReg_192RegClassID;
2194      case 8: return AMDGPU::VReg_256RegClassID;
2195      case 16: return AMDGPU::VReg_512RegClassID;
2196      case 32: return AMDGPU::VReg_1024RegClassID;
2197    }
2198  } else if (Is == IS_TTMP) {
2199    switch (RegWidth) {
2200      default: return -1;
2201      case 1: return AMDGPU::TTMP_32RegClassID;
2202      case 2: return AMDGPU::TTMP_64RegClassID;
2203      case 4: return AMDGPU::TTMP_128RegClassID;
2204      case 8: return AMDGPU::TTMP_256RegClassID;
2205      case 16: return AMDGPU::TTMP_512RegClassID;
2206    }
2207  } else if (Is == IS_SGPR) {
2208    switch (RegWidth) {
2209      default: return -1;
2210      case 1: return AMDGPU::SGPR_32RegClassID;
2211      case 2: return AMDGPU::SGPR_64RegClassID;
2212      case 3: return AMDGPU::SGPR_96RegClassID;
2213      case 4: return AMDGPU::SGPR_128RegClassID;
2214      case 5: return AMDGPU::SGPR_160RegClassID;
2215      case 6: return AMDGPU::SGPR_192RegClassID;
2216      case 8: return AMDGPU::SGPR_256RegClassID;
2217      case 16: return AMDGPU::SGPR_512RegClassID;
2218    }
2219  } else if (Is == IS_AGPR) {
2220    switch (RegWidth) {
2221      default: return -1;
2222      case 1: return AMDGPU::AGPR_32RegClassID;
2223      case 2: return AMDGPU::AReg_64RegClassID;
2224      case 3: return AMDGPU::AReg_96RegClassID;
2225      case 4: return AMDGPU::AReg_128RegClassID;
2226      case 5: return AMDGPU::AReg_160RegClassID;
2227      case 6: return AMDGPU::AReg_192RegClassID;
2228      case 8: return AMDGPU::AReg_256RegClassID;
2229      case 16: return AMDGPU::AReg_512RegClassID;
2230      case 32: return AMDGPU::AReg_1024RegClassID;
2231    }
2232  }
2233  return -1;
2234}
2235
2236static unsigned getSpecialRegForName(StringRef RegName) {
2237  return StringSwitch<unsigned>(RegName)
2238    .Case("exec", AMDGPU::EXEC)
2239    .Case("vcc", AMDGPU::VCC)
2240    .Case("flat_scratch", AMDGPU::FLAT_SCR)
2241    .Case("xnack_mask", AMDGPU::XNACK_MASK)
2242    .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2243    .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2244    .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2245    .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2246    .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2247    .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2248    .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2249    .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2250    .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2251    .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2252    .Case("lds_direct", AMDGPU::LDS_DIRECT)
2253    .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2254    .Case("m0", AMDGPU::M0)
2255    .Case("vccz", AMDGPU::SRC_VCCZ)
2256    .Case("src_vccz", AMDGPU::SRC_VCCZ)
2257    .Case("execz", AMDGPU::SRC_EXECZ)
2258    .Case("src_execz", AMDGPU::SRC_EXECZ)
2259    .Case("scc", AMDGPU::SRC_SCC)
2260    .Case("src_scc", AMDGPU::SRC_SCC)
2261    .Case("tba", AMDGPU::TBA)
2262    .Case("tma", AMDGPU::TMA)
2263    .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2264    .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2265    .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2266    .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2267    .Case("vcc_lo", AMDGPU::VCC_LO)
2268    .Case("vcc_hi", AMDGPU::VCC_HI)
2269    .Case("exec_lo", AMDGPU::EXEC_LO)
2270    .Case("exec_hi", AMDGPU::EXEC_HI)
2271    .Case("tma_lo", AMDGPU::TMA_LO)
2272    .Case("tma_hi", AMDGPU::TMA_HI)
2273    .Case("tba_lo", AMDGPU::TBA_LO)
2274    .Case("tba_hi", AMDGPU::TBA_HI)
2275    .Case("pc", AMDGPU::PC_REG)
2276    .Case("null", AMDGPU::SGPR_NULL)
2277    .Default(AMDGPU::NoRegister);
2278}
2279
2280bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2281                                    SMLoc &EndLoc, bool RestoreOnFailure) {
2282  auto R = parseRegister();
2283  if (!R) return true;
2284  assert(R->isReg());
2285  RegNo = R->getReg();
2286  StartLoc = R->getStartLoc();
2287  EndLoc = R->getEndLoc();
2288  return false;
2289}
2290
2291bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2292                                    SMLoc &EndLoc) {
2293  return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2294}
2295
2296OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2297                                                       SMLoc &StartLoc,
2298                                                       SMLoc &EndLoc) {
2299  bool Result =
2300      ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2301  bool PendingErrors = getParser().hasPendingError();
2302  getParser().clearPendingErrors();
2303  if (PendingErrors)
2304    return MatchOperand_ParseFail;
2305  if (Result)
2306    return MatchOperand_NoMatch;
2307  return MatchOperand_Success;
2308}
2309
2310bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2311                                            RegisterKind RegKind, unsigned Reg1,
2312                                            SMLoc Loc) {
2313  switch (RegKind) {
2314  case IS_SPECIAL:
2315    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2316      Reg = AMDGPU::EXEC;
2317      RegWidth = 2;
2318      return true;
2319    }
2320    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2321      Reg = AMDGPU::FLAT_SCR;
2322      RegWidth = 2;
2323      return true;
2324    }
2325    if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2326      Reg = AMDGPU::XNACK_MASK;
2327      RegWidth = 2;
2328      return true;
2329    }
2330    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2331      Reg = AMDGPU::VCC;
2332      RegWidth = 2;
2333      return true;
2334    }
2335    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2336      Reg = AMDGPU::TBA;
2337      RegWidth = 2;
2338      return true;
2339    }
2340    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2341      Reg = AMDGPU::TMA;
2342      RegWidth = 2;
2343      return true;
2344    }
2345    Error(Loc, "register does not fit in the list");
2346    return false;
2347  case IS_VGPR:
2348  case IS_SGPR:
2349  case IS_AGPR:
2350  case IS_TTMP:
2351    if (Reg1 != Reg + RegWidth) {
2352      Error(Loc, "registers in a list must have consecutive indices");
2353      return false;
2354    }
2355    RegWidth++;
2356    return true;
2357  default:
2358    llvm_unreachable("unexpected register kind");
2359  }
2360}
2361
2362struct RegInfo {
2363  StringLiteral Name;
2364  RegisterKind Kind;
2365};
2366
2367static constexpr RegInfo RegularRegisters[] = {
2368  {{"v"},    IS_VGPR},
2369  {{"s"},    IS_SGPR},
2370  {{"ttmp"}, IS_TTMP},
2371  {{"acc"},  IS_AGPR},
2372  {{"a"},    IS_AGPR},
2373};
2374
2375static bool isRegularReg(RegisterKind Kind) {
2376  return Kind == IS_VGPR ||
2377         Kind == IS_SGPR ||
2378         Kind == IS_TTMP ||
2379         Kind == IS_AGPR;
2380}
2381
2382static const RegInfo* getRegularRegInfo(StringRef Str) {
2383  for (const RegInfo &Reg : RegularRegisters)
2384    if (Str.startswith(Reg.Name))
2385      return &Reg;
2386  return nullptr;
2387}
2388
2389static bool getRegNum(StringRef Str, unsigned& Num) {
2390  return !Str.getAsInteger(10, Num);
2391}
2392
2393bool
2394AMDGPUAsmParser::isRegister(const AsmToken &Token,
2395                            const AsmToken &NextToken) const {
2396
2397  // A list of consecutive registers: [s0,s1,s2,s3]
2398  if (Token.is(AsmToken::LBrac))
2399    return true;
2400
2401  if (!Token.is(AsmToken::Identifier))
2402    return false;
2403
2404  // A single register like s0 or a range of registers like s[0:1]
2405
2406  StringRef Str = Token.getString();
2407  const RegInfo *Reg = getRegularRegInfo(Str);
2408  if (Reg) {
2409    StringRef RegName = Reg->Name;
2410    StringRef RegSuffix = Str.substr(RegName.size());
2411    if (!RegSuffix.empty()) {
2412      unsigned Num;
2413      // A single register with an index: rXX
2414      if (getRegNum(RegSuffix, Num))
2415        return true;
2416    } else {
2417      // A range of registers: r[XX:YY].
2418      if (NextToken.is(AsmToken::LBrac))
2419        return true;
2420    }
2421  }
2422
2423  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2424}
2425
2426bool
2427AMDGPUAsmParser::isRegister()
2428{
2429  return isRegister(getToken(), peekToken());
2430}
2431
2432unsigned
2433AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2434                               unsigned RegNum,
2435                               unsigned RegWidth,
2436                               SMLoc Loc) {
2437
2438  assert(isRegularReg(RegKind));
2439
2440  unsigned AlignSize = 1;
2441  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2442    // SGPR and TTMP registers must be aligned.
2443    // Max required alignment is 4 dwords.
2444    AlignSize = std::min(RegWidth, 4u);
2445  }
2446
2447  if (RegNum % AlignSize != 0) {
2448    Error(Loc, "invalid register alignment");
2449    return AMDGPU::NoRegister;
2450  }
2451
2452  unsigned RegIdx = RegNum / AlignSize;
2453  int RCID = getRegClass(RegKind, RegWidth);
2454  if (RCID == -1) {
2455    Error(Loc, "invalid or unsupported register size");
2456    return AMDGPU::NoRegister;
2457  }
2458
2459  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2460  const MCRegisterClass RC = TRI->getRegClass(RCID);
2461  if (RegIdx >= RC.getNumRegs()) {
2462    Error(Loc, "register index is out of range");
2463    return AMDGPU::NoRegister;
2464  }
2465
2466  return RC.getRegister(RegIdx);
2467}
2468
2469bool
2470AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2471  int64_t RegLo, RegHi;
2472  if (!skipToken(AsmToken::LBrac, "missing register index"))
2473    return false;
2474
2475  SMLoc FirstIdxLoc = getLoc();
2476  SMLoc SecondIdxLoc;
2477
2478  if (!parseExpr(RegLo))
2479    return false;
2480
2481  if (trySkipToken(AsmToken::Colon)) {
2482    SecondIdxLoc = getLoc();
2483    if (!parseExpr(RegHi))
2484      return false;
2485  } else {
2486    RegHi = RegLo;
2487  }
2488
2489  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2490    return false;
2491
2492  if (!isUInt<32>(RegLo)) {
2493    Error(FirstIdxLoc, "invalid register index");
2494    return false;
2495  }
2496
2497  if (!isUInt<32>(RegHi)) {
2498    Error(SecondIdxLoc, "invalid register index");
2499    return false;
2500  }
2501
2502  if (RegLo > RegHi) {
2503    Error(FirstIdxLoc, "first register index should not exceed second index");
2504    return false;
2505  }
2506
2507  Num = static_cast<unsigned>(RegLo);
2508  Width = (RegHi - RegLo) + 1;
2509  return true;
2510}
2511
2512unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2513                                          unsigned &RegNum, unsigned &RegWidth,
2514                                          SmallVectorImpl<AsmToken> &Tokens) {
2515  assert(isToken(AsmToken::Identifier));
2516  unsigned Reg = getSpecialRegForName(getTokenStr());
2517  if (Reg) {
2518    RegNum = 0;
2519    RegWidth = 1;
2520    RegKind = IS_SPECIAL;
2521    Tokens.push_back(getToken());
2522    lex(); // skip register name
2523  }
2524  return Reg;
2525}
2526
2527unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2528                                          unsigned &RegNum, unsigned &RegWidth,
2529                                          SmallVectorImpl<AsmToken> &Tokens) {
2530  assert(isToken(AsmToken::Identifier));
2531  StringRef RegName = getTokenStr();
2532  auto Loc = getLoc();
2533
2534  const RegInfo *RI = getRegularRegInfo(RegName);
2535  if (!RI) {
2536    Error(Loc, "invalid register name");
2537    return AMDGPU::NoRegister;
2538  }
2539
2540  Tokens.push_back(getToken());
2541  lex(); // skip register name
2542
2543  RegKind = RI->Kind;
2544  StringRef RegSuffix = RegName.substr(RI->Name.size());
2545  if (!RegSuffix.empty()) {
2546    // Single 32-bit register: vXX.
2547    if (!getRegNum(RegSuffix, RegNum)) {
2548      Error(Loc, "invalid register index");
2549      return AMDGPU::NoRegister;
2550    }
2551    RegWidth = 1;
2552  } else {
2553    // Range of registers: v[XX:YY]. ":YY" is optional.
2554    if (!ParseRegRange(RegNum, RegWidth))
2555      return AMDGPU::NoRegister;
2556  }
2557
2558  return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2559}
2560
2561unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2562                                       unsigned &RegWidth,
2563                                       SmallVectorImpl<AsmToken> &Tokens) {
2564  unsigned Reg = AMDGPU::NoRegister;
2565  auto ListLoc = getLoc();
2566
2567  if (!skipToken(AsmToken::LBrac,
2568                 "expected a register or a list of registers")) {
2569    return AMDGPU::NoRegister;
2570  }
2571
2572  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2573
2574  auto Loc = getLoc();
2575  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2576    return AMDGPU::NoRegister;
2577  if (RegWidth != 1) {
2578    Error(Loc, "expected a single 32-bit register");
2579    return AMDGPU::NoRegister;
2580  }
2581
2582  for (; trySkipToken(AsmToken::Comma); ) {
2583    RegisterKind NextRegKind;
2584    unsigned NextReg, NextRegNum, NextRegWidth;
2585    Loc = getLoc();
2586
2587    if (!ParseAMDGPURegister(NextRegKind, NextReg,
2588                             NextRegNum, NextRegWidth,
2589                             Tokens)) {
2590      return AMDGPU::NoRegister;
2591    }
2592    if (NextRegWidth != 1) {
2593      Error(Loc, "expected a single 32-bit register");
2594      return AMDGPU::NoRegister;
2595    }
2596    if (NextRegKind != RegKind) {
2597      Error(Loc, "registers in a list must be of the same kind");
2598      return AMDGPU::NoRegister;
2599    }
2600    if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2601      return AMDGPU::NoRegister;
2602  }
2603
2604  if (!skipToken(AsmToken::RBrac,
2605                 "expected a comma or a closing square bracket")) {
2606    return AMDGPU::NoRegister;
2607  }
2608
2609  if (isRegularReg(RegKind))
2610    Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2611
2612  return Reg;
2613}
2614
2615bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2616                                          unsigned &RegNum, unsigned &RegWidth,
2617                                          SmallVectorImpl<AsmToken> &Tokens) {
2618  auto Loc = getLoc();
2619  Reg = AMDGPU::NoRegister;
2620
2621  if (isToken(AsmToken::Identifier)) {
2622    Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2623    if (Reg == AMDGPU::NoRegister)
2624      Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2625  } else {
2626    Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2627  }
2628
2629  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2630  if (Reg == AMDGPU::NoRegister) {
2631    assert(Parser.hasPendingError());
2632    return false;
2633  }
2634
2635  if (!subtargetHasRegister(*TRI, Reg)) {
2636    if (Reg == AMDGPU::SGPR_NULL) {
2637      Error(Loc, "'null' operand is not supported on this GPU");
2638    } else {
2639      Error(Loc, "register not available on this GPU");
2640    }
2641    return false;
2642  }
2643
2644  return true;
2645}
2646
2647bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2648                                          unsigned &RegNum, unsigned &RegWidth,
2649                                          bool RestoreOnFailure /*=false*/) {
2650  Reg = AMDGPU::NoRegister;
2651
2652  SmallVector<AsmToken, 1> Tokens;
2653  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2654    if (RestoreOnFailure) {
2655      while (!Tokens.empty()) {
2656        getLexer().UnLex(Tokens.pop_back_val());
2657      }
2658    }
2659    return true;
2660  }
2661  return false;
2662}
2663
2664Optional<StringRef>
2665AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2666  switch (RegKind) {
2667  case IS_VGPR:
2668    return StringRef(".amdgcn.next_free_vgpr");
2669  case IS_SGPR:
2670    return StringRef(".amdgcn.next_free_sgpr");
2671  default:
2672    return None;
2673  }
2674}
2675
2676void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2677  auto SymbolName = getGprCountSymbolName(RegKind);
2678  assert(SymbolName && "initializing invalid register kind");
2679  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2680  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2681}
2682
2683bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2684                                            unsigned DwordRegIndex,
2685                                            unsigned RegWidth) {
2686  // Symbols are only defined for GCN targets
2687  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2688    return true;
2689
2690  auto SymbolName = getGprCountSymbolName(RegKind);
2691  if (!SymbolName)
2692    return true;
2693  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2694
2695  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2696  int64_t OldCount;
2697
2698  if (!Sym->isVariable())
2699    return !Error(getLoc(),
2700                  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2701  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2702    return !Error(
2703        getLoc(),
2704        ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2705
2706  if (OldCount <= NewMax)
2707    Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2708
2709  return true;
2710}
2711
2712std::unique_ptr<AMDGPUOperand>
2713AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2714  const auto &Tok = getToken();
2715  SMLoc StartLoc = Tok.getLoc();
2716  SMLoc EndLoc = Tok.getEndLoc();
2717  RegisterKind RegKind;
2718  unsigned Reg, RegNum, RegWidth;
2719
2720  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2721    return nullptr;
2722  }
2723  if (isHsaAbiVersion3Or4(&getSTI())) {
2724    if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2725      return nullptr;
2726  } else
2727    KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2728  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2729}
2730
2731OperandMatchResultTy
2732AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2733  // TODO: add syntactic sugar for 1/(2*PI)
2734
2735  assert(!isRegister());
2736  assert(!isModifier());
2737
2738  const auto& Tok = getToken();
2739  const auto& NextTok = peekToken();
2740  bool IsReal = Tok.is(AsmToken::Real);
2741  SMLoc S = getLoc();
2742  bool Negate = false;
2743
2744  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2745    lex();
2746    IsReal = true;
2747    Negate = true;
2748  }
2749
2750  if (IsReal) {
2751    // Floating-point expressions are not supported.
2752    // Can only allow floating-point literals with an
2753    // optional sign.
2754
2755    StringRef Num = getTokenStr();
2756    lex();
2757
2758    APFloat RealVal(APFloat::IEEEdouble());
2759    auto roundMode = APFloat::rmNearestTiesToEven;
2760    if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2761      return MatchOperand_ParseFail;
2762    }
2763    if (Negate)
2764      RealVal.changeSign();
2765
2766    Operands.push_back(
2767      AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2768                               AMDGPUOperand::ImmTyNone, true));
2769
2770    return MatchOperand_Success;
2771
2772  } else {
2773    int64_t IntVal;
2774    const MCExpr *Expr;
2775    SMLoc S = getLoc();
2776
2777    if (HasSP3AbsModifier) {
2778      // This is a workaround for handling expressions
2779      // as arguments of SP3 'abs' modifier, for example:
2780      //     |1.0|
2781      //     |-1|
2782      //     |1+x|
2783      // This syntax is not compatible with syntax of standard
2784      // MC expressions (due to the trailing '|').
2785      SMLoc EndLoc;
2786      if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2787        return MatchOperand_ParseFail;
2788    } else {
2789      if (Parser.parseExpression(Expr))
2790        return MatchOperand_ParseFail;
2791    }
2792
2793    if (Expr->evaluateAsAbsolute(IntVal)) {
2794      Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2795    } else {
2796      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2797    }
2798
2799    return MatchOperand_Success;
2800  }
2801
2802  return MatchOperand_NoMatch;
2803}
2804
2805OperandMatchResultTy
2806AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2807  if (!isRegister())
2808    return MatchOperand_NoMatch;
2809
2810  if (auto R = parseRegister()) {
2811    assert(R->isReg());
2812    Operands.push_back(std::move(R));
2813    return MatchOperand_Success;
2814  }
2815  return MatchOperand_ParseFail;
2816}
2817
2818OperandMatchResultTy
2819AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2820  auto res = parseReg(Operands);
2821  if (res != MatchOperand_NoMatch) {
2822    return res;
2823  } else if (isModifier()) {
2824    return MatchOperand_NoMatch;
2825  } else {
2826    return parseImm(Operands, HasSP3AbsMod);
2827  }
2828}
2829
2830bool
2831AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2832  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2833    const auto &str = Token.getString();
2834    return str == "abs" || str == "neg" || str == "sext";
2835  }
2836  return false;
2837}
2838
2839bool
2840AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2841  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2842}
2843
2844bool
2845AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2846  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2847}
2848
2849bool
2850AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2851  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2852}
2853
2854// Check if this is an operand modifier or an opcode modifier
2855// which may look like an expression but it is not. We should
2856// avoid parsing these modifiers as expressions. Currently
2857// recognized sequences are:
2858//   |...|
2859//   abs(...)
2860//   neg(...)
2861//   sext(...)
2862//   -reg
2863//   -|...|
2864//   -abs(...)
2865//   name:...
2866// Note that simple opcode modifiers like 'gds' may be parsed as
2867// expressions; this is a special case. See getExpressionAsToken.
2868//
2869bool
2870AMDGPUAsmParser::isModifier() {
2871
2872  AsmToken Tok = getToken();
2873  AsmToken NextToken[2];
2874  peekTokens(NextToken);
2875
2876  return isOperandModifier(Tok, NextToken[0]) ||
2877         (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2878         isOpcodeModifierWithVal(Tok, NextToken[0]);
2879}
2880
2881// Check if the current token is an SP3 'neg' modifier.
2882// Currently this modifier is allowed in the following context:
2883//
2884// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2885// 2. Before an 'abs' modifier: -abs(...)
2886// 3. Before an SP3 'abs' modifier: -|...|
2887//
2888// In all other cases "-" is handled as a part
2889// of an expression that follows the sign.
2890//
2891// Note: When "-" is followed by an integer literal,
2892// this is interpreted as integer negation rather
2893// than a floating-point NEG modifier applied to N.
2894// Beside being contr-intuitive, such use of floating-point
2895// NEG modifier would have resulted in different meaning
2896// of integer literals used with VOP1/2/C and VOP3,
2897// for example:
2898//    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2899//    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2900// Negative fp literals with preceding "-" are
2901// handled likewise for unifomtity
2902//
2903bool
2904AMDGPUAsmParser::parseSP3NegModifier() {
2905
2906  AsmToken NextToken[2];
2907  peekTokens(NextToken);
2908
2909  if (isToken(AsmToken::Minus) &&
2910      (isRegister(NextToken[0], NextToken[1]) ||
2911       NextToken[0].is(AsmToken::Pipe) ||
2912       isId(NextToken[0], "abs"))) {
2913    lex();
2914    return true;
2915  }
2916
2917  return false;
2918}
2919
2920OperandMatchResultTy
2921AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2922                                              bool AllowImm) {
2923  bool Neg, SP3Neg;
2924  bool Abs, SP3Abs;
2925  SMLoc Loc;
2926
2927  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2928  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2929    Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2930    return MatchOperand_ParseFail;
2931  }
2932
2933  SP3Neg = parseSP3NegModifier();
2934
2935  Loc = getLoc();
2936  Neg = trySkipId("neg");
2937  if (Neg && SP3Neg) {
2938    Error(Loc, "expected register or immediate");
2939    return MatchOperand_ParseFail;
2940  }
2941  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2942    return MatchOperand_ParseFail;
2943
2944  Abs = trySkipId("abs");
2945  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2946    return MatchOperand_ParseFail;
2947
2948  Loc = getLoc();
2949  SP3Abs = trySkipToken(AsmToken::Pipe);
2950  if (Abs && SP3Abs) {
2951    Error(Loc, "expected register or immediate");
2952    return MatchOperand_ParseFail;
2953  }
2954
2955  OperandMatchResultTy Res;
2956  if (AllowImm) {
2957    Res = parseRegOrImm(Operands, SP3Abs);
2958  } else {
2959    Res = parseReg(Operands);
2960  }
2961  if (Res != MatchOperand_Success) {
2962    return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2963  }
2964
2965  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2966    return MatchOperand_ParseFail;
2967  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2968    return MatchOperand_ParseFail;
2969  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2970    return MatchOperand_ParseFail;
2971
2972  AMDGPUOperand::Modifiers Mods;
2973  Mods.Abs = Abs || SP3Abs;
2974  Mods.Neg = Neg || SP3Neg;
2975
2976  if (Mods.hasFPModifiers()) {
2977    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2978    if (Op.isExpr()) {
2979      Error(Op.getStartLoc(), "expected an absolute expression");
2980      return MatchOperand_ParseFail;
2981    }
2982    Op.setModifiers(Mods);
2983  }
2984  return MatchOperand_Success;
2985}
2986
2987OperandMatchResultTy
2988AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2989                                               bool AllowImm) {
2990  bool Sext = trySkipId("sext");
2991  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2992    return MatchOperand_ParseFail;
2993
2994  OperandMatchResultTy Res;
2995  if (AllowImm) {
2996    Res = parseRegOrImm(Operands);
2997  } else {
2998    Res = parseReg(Operands);
2999  }
3000  if (Res != MatchOperand_Success) {
3001    return Sext? MatchOperand_ParseFail : Res;
3002  }
3003
3004  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3005    return MatchOperand_ParseFail;
3006
3007  AMDGPUOperand::Modifiers Mods;
3008  Mods.Sext = Sext;
3009
3010  if (Mods.hasIntModifiers()) {
3011    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3012    if (Op.isExpr()) {
3013      Error(Op.getStartLoc(), "expected an absolute expression");
3014      return MatchOperand_ParseFail;
3015    }
3016    Op.setModifiers(Mods);
3017  }
3018
3019  return MatchOperand_Success;
3020}
3021
3022OperandMatchResultTy
3023AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3024  return parseRegOrImmWithFPInputMods(Operands, false);
3025}
3026
3027OperandMatchResultTy
3028AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3029  return parseRegOrImmWithIntInputMods(Operands, false);
3030}
3031
3032OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3033  auto Loc = getLoc();
3034  if (trySkipId("off")) {
3035    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3036                                                AMDGPUOperand::ImmTyOff, false));
3037    return MatchOperand_Success;
3038  }
3039
3040  if (!isRegister())
3041    return MatchOperand_NoMatch;
3042
3043  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3044  if (Reg) {
3045    Operands.push_back(std::move(Reg));
3046    return MatchOperand_Success;
3047  }
3048
3049  return MatchOperand_ParseFail;
3050
3051}
3052
3053unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3054  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3055
3056  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3057      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3058      (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3059      (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3060    return Match_InvalidOperand;
3061
3062  if ((TSFlags & SIInstrFlags::VOP3) &&
3063      (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3064      getForcedEncodingSize() != 64)
3065    return Match_PreferE32;
3066
3067  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3068      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3069    // v_mac_f32/16 allow only dst_sel == DWORD;
3070    auto OpNum =
3071        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3072    const auto &Op = Inst.getOperand(OpNum);
3073    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3074      return Match_InvalidOperand;
3075    }
3076  }
3077
3078  return Match_Success;
3079}
3080
3081static ArrayRef<unsigned> getAllVariants() {
3082  static const unsigned Variants[] = {
3083    AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3084    AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3085  };
3086
3087  return makeArrayRef(Variants);
3088}
3089
3090// What asm variants we should check
3091ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3092  if (getForcedEncodingSize() == 32) {
3093    static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3094    return makeArrayRef(Variants);
3095  }
3096
3097  if (isForcedVOP3()) {
3098    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3099    return makeArrayRef(Variants);
3100  }
3101
3102  if (isForcedSDWA()) {
3103    static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3104                                        AMDGPUAsmVariants::SDWA9};
3105    return makeArrayRef(Variants);
3106  }
3107
3108  if (isForcedDPP()) {
3109    static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3110    return makeArrayRef(Variants);
3111  }
3112
3113  return getAllVariants();
3114}
3115
3116StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3117  if (getForcedEncodingSize() == 32)
3118    return "e32";
3119
3120  if (isForcedVOP3())
3121    return "e64";
3122
3123  if (isForcedSDWA())
3124    return "sdwa";
3125
3126  if (isForcedDPP())
3127    return "dpp";
3128
3129  return "";
3130}
3131
3132unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3133  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3134  const unsigned Num = Desc.getNumImplicitUses();
3135  for (unsigned i = 0; i < Num; ++i) {
3136    unsigned Reg = Desc.ImplicitUses[i];
3137    switch (Reg) {
3138    case AMDGPU::FLAT_SCR:
3139    case AMDGPU::VCC:
3140    case AMDGPU::VCC_LO:
3141    case AMDGPU::VCC_HI:
3142    case AMDGPU::M0:
3143      return Reg;
3144    default:
3145      break;
3146    }
3147  }
3148  return AMDGPU::NoRegister;
3149}
3150
3151// NB: This code is correct only when used to check constant
3152// bus limitations because GFX7 support no f16 inline constants.
3153// Note that there are no cases when a GFX7 opcode violates
3154// constant bus limitations due to the use of an f16 constant.
3155bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3156                                       unsigned OpIdx) const {
3157  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3158
3159  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3160    return false;
3161  }
3162
3163  const MCOperand &MO = Inst.getOperand(OpIdx);
3164
3165  int64_t Val = MO.getImm();
3166  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3167
3168  switch (OpSize) { // expected operand size
3169  case 8:
3170    return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3171  case 4:
3172    return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3173  case 2: {
3174    const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3175    if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3176        OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3177        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3178      return AMDGPU::isInlinableIntLiteral(Val);
3179
3180    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3181        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3182        OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3183      return AMDGPU::isInlinableIntLiteralV216(Val);
3184
3185    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3186        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3187        OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3188      return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3189
3190    return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3191  }
3192  default:
3193    llvm_unreachable("invalid operand size");
3194  }
3195}
3196
3197unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3198  if (!isGFX10Plus())
3199    return 1;
3200
3201  switch (Opcode) {
3202  // 64-bit shift instructions can use only one scalar value input
3203  case AMDGPU::V_LSHLREV_B64_e64:
3204  case AMDGPU::V_LSHLREV_B64_gfx10:
3205  case AMDGPU::V_LSHRREV_B64_e64:
3206  case AMDGPU::V_LSHRREV_B64_gfx10:
3207  case AMDGPU::V_ASHRREV_I64_e64:
3208  case AMDGPU::V_ASHRREV_I64_gfx10:
3209  case AMDGPU::V_LSHL_B64_e64:
3210  case AMDGPU::V_LSHR_B64_e64:
3211  case AMDGPU::V_ASHR_I64_e64:
3212    return 1;
3213  default:
3214    return 2;
3215  }
3216}
3217
3218bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3219  const MCOperand &MO = Inst.getOperand(OpIdx);
3220  if (MO.isImm()) {
3221    return !isInlineConstant(Inst, OpIdx);
3222  } else if (MO.isReg()) {
3223    auto Reg = MO.getReg();
3224    const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3225    auto PReg = mc2PseudoReg(Reg);
3226    return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3227  } else {
3228    return true;
3229  }
3230}
3231
3232bool
3233AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3234                                                const OperandVector &Operands) {
3235  const unsigned Opcode = Inst.getOpcode();
3236  const MCInstrDesc &Desc = MII.get(Opcode);
3237  unsigned LastSGPR = AMDGPU::NoRegister;
3238  unsigned ConstantBusUseCount = 0;
3239  unsigned NumLiterals = 0;
3240  unsigned LiteralSize;
3241
3242  if (Desc.TSFlags &
3243      (SIInstrFlags::VOPC |
3244       SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3245       SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3246       SIInstrFlags::SDWA)) {
3247    // Check special imm operands (used by madmk, etc)
3248    if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3249      ++ConstantBusUseCount;
3250    }
3251
3252    SmallDenseSet<unsigned> SGPRsUsed;
3253    unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3254    if (SGPRUsed != AMDGPU::NoRegister) {
3255      SGPRsUsed.insert(SGPRUsed);
3256      ++ConstantBusUseCount;
3257    }
3258
3259    const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3260    const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3261    const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3262
3263    const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3264
3265    for (int OpIdx : OpIndices) {
3266      if (OpIdx == -1) break;
3267
3268      const MCOperand &MO = Inst.getOperand(OpIdx);
3269      if (usesConstantBus(Inst, OpIdx)) {
3270        if (MO.isReg()) {
3271          LastSGPR = mc2PseudoReg(MO.getReg());
3272          // Pairs of registers with a partial intersections like these
3273          //   s0, s[0:1]
3274          //   flat_scratch_lo, flat_scratch
3275          //   flat_scratch_lo, flat_scratch_hi
3276          // are theoretically valid but they are disabled anyway.
3277          // Note that this code mimics SIInstrInfo::verifyInstruction
3278          if (!SGPRsUsed.count(LastSGPR)) {
3279            SGPRsUsed.insert(LastSGPR);
3280            ++ConstantBusUseCount;
3281          }
3282        } else { // Expression or a literal
3283
3284          if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3285            continue; // special operand like VINTERP attr_chan
3286
3287          // An instruction may use only one literal.
3288          // This has been validated on the previous step.
3289          // See validateVOP3Literal.
3290          // This literal may be used as more than one operand.
3291          // If all these operands are of the same size,
3292          // this literal counts as one scalar value.
3293          // Otherwise it counts as 2 scalar values.
3294          // See "GFX10 Shader Programming", section 3.6.2.3.
3295
3296          unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3297          if (Size < 4) Size = 4;
3298
3299          if (NumLiterals == 0) {
3300            NumLiterals = 1;
3301            LiteralSize = Size;
3302          } else if (LiteralSize != Size) {
3303            NumLiterals = 2;
3304          }
3305        }
3306      }
3307    }
3308  }
3309  ConstantBusUseCount += NumLiterals;
3310
3311  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3312    return true;
3313
3314  SMLoc LitLoc = getLitLoc(Operands);
3315  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3316  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3317  Error(Loc, "invalid operand (violates constant bus restrictions)");
3318  return false;
3319}
3320
3321bool
3322AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3323                                                 const OperandVector &Operands) {
3324  const unsigned Opcode = Inst.getOpcode();
3325  const MCInstrDesc &Desc = MII.get(Opcode);
3326
3327  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3328  if (DstIdx == -1 ||
3329      Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3330    return true;
3331  }
3332
3333  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3334
3335  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3336  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3337  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3338
3339  assert(DstIdx != -1);
3340  const MCOperand &Dst = Inst.getOperand(DstIdx);
3341  assert(Dst.isReg());
3342  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3343
3344  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3345
3346  for (int SrcIdx : SrcIndices) {
3347    if (SrcIdx == -1) break;
3348    const MCOperand &Src = Inst.getOperand(SrcIdx);
3349    if (Src.isReg()) {
3350      const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3351      if (isRegIntersect(DstReg, SrcReg, TRI)) {
3352        Error(getRegLoc(SrcReg, Operands),
3353          "destination must be different than all sources");
3354        return false;
3355      }
3356    }
3357  }
3358
3359  return true;
3360}
3361
3362bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3363
3364  const unsigned Opc = Inst.getOpcode();
3365  const MCInstrDesc &Desc = MII.get(Opc);
3366
3367  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3368    int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3369    assert(ClampIdx != -1);
3370    return Inst.getOperand(ClampIdx).getImm() == 0;
3371  }
3372
3373  return true;
3374}
3375
3376bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3377
3378  const unsigned Opc = Inst.getOpcode();
3379  const MCInstrDesc &Desc = MII.get(Opc);
3380
3381  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3382    return true;
3383
3384  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3385  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3386  int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3387
3388  assert(VDataIdx != -1);
3389
3390  if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3391    return true;
3392
3393  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3394  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3395  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3396  if (DMask == 0)
3397    DMask = 1;
3398
3399  unsigned DataSize =
3400    (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3401  if (hasPackedD16()) {
3402    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3403    if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3404      DataSize = (DataSize + 1) / 2;
3405  }
3406
3407  return (VDataSize / 4) == DataSize + TFESize;
3408}
3409
3410bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3411  const unsigned Opc = Inst.getOpcode();
3412  const MCInstrDesc &Desc = MII.get(Opc);
3413
3414  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3415    return true;
3416
3417  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3418
3419  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3420      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3421  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3422  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3423  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3424  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3425
3426  assert(VAddr0Idx != -1);
3427  assert(SrsrcIdx != -1);
3428  assert(SrsrcIdx > VAddr0Idx);
3429
3430  if (DimIdx == -1)
3431    return true; // intersect_ray
3432
3433  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3434  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3435  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3436  unsigned VAddrSize =
3437      IsNSA ? SrsrcIdx - VAddr0Idx
3438            : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3439  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3440
3441  unsigned AddrSize =
3442      AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3443
3444  if (!IsNSA) {
3445    if (AddrSize > 8)
3446      AddrSize = 16;
3447    else if (AddrSize > 4)
3448      AddrSize = 8;
3449  }
3450
3451  return VAddrSize == AddrSize;
3452}
3453
3454bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3455
3456  const unsigned Opc = Inst.getOpcode();
3457  const MCInstrDesc &Desc = MII.get(Opc);
3458
3459  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3460    return true;
3461  if (!Desc.mayLoad() || !Desc.mayStore())
3462    return true; // Not atomic
3463
3464  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3465  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3466
3467  // This is an incomplete check because image_atomic_cmpswap
3468  // may only use 0x3 and 0xf while other atomic operations
3469  // may use 0x1 and 0x3. However these limitations are
3470  // verified when we check that dmask matches dst size.
3471  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3472}
3473
3474bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3475
3476  const unsigned Opc = Inst.getOpcode();
3477  const MCInstrDesc &Desc = MII.get(Opc);
3478
3479  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3480    return true;
3481
3482  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3483  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3484
3485  // GATHER4 instructions use dmask in a different fashion compared to
3486  // other MIMG instructions. The only useful DMASK values are
3487  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3488  // (red,red,red,red) etc.) The ISA document doesn't mention
3489  // this.
3490  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3491}
3492
3493bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3494  const unsigned Opc = Inst.getOpcode();
3495  const MCInstrDesc &Desc = MII.get(Opc);
3496
3497  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3498    return true;
3499
3500  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3501  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3502      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3503
3504  if (!BaseOpcode->MSAA)
3505    return true;
3506
3507  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3508  assert(DimIdx != -1);
3509
3510  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3511  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3512
3513  return DimInfo->MSAA;
3514}
3515
3516static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3517{
3518  switch (Opcode) {
3519  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3520  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3521  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3522    return true;
3523  default:
3524    return false;
3525  }
3526}
3527
3528// movrels* opcodes should only allow VGPRS as src0.
3529// This is specified in .td description for vop1/vop3,
3530// but sdwa is handled differently. See isSDWAOperand.
3531bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3532                                      const OperandVector &Operands) {
3533
3534  const unsigned Opc = Inst.getOpcode();
3535  const MCInstrDesc &Desc = MII.get(Opc);
3536
3537  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3538    return true;
3539
3540  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3541  assert(Src0Idx != -1);
3542
3543  SMLoc ErrLoc;
3544  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3545  if (Src0.isReg()) {
3546    auto Reg = mc2PseudoReg(Src0.getReg());
3547    const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3548    if (!isSGPR(Reg, TRI))
3549      return true;
3550    ErrLoc = getRegLoc(Reg, Operands);
3551  } else {
3552    ErrLoc = getConstLoc(Operands);
3553  }
3554
3555  Error(ErrLoc, "source operand must be a VGPR");
3556  return false;
3557}
3558
3559bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3560                                          const OperandVector &Operands) {
3561
3562  const unsigned Opc = Inst.getOpcode();
3563
3564  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3565    return true;
3566
3567  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3568  assert(Src0Idx != -1);
3569
3570  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3571  if (!Src0.isReg())
3572    return true;
3573
3574  auto Reg = mc2PseudoReg(Src0.getReg());
3575  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3576  if (isSGPR(Reg, TRI)) {
3577    Error(getRegLoc(Reg, Operands),
3578          "source operand must be either a VGPR or an inline constant");
3579    return false;
3580  }
3581
3582  return true;
3583}
3584
3585bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3586  switch (Inst.getOpcode()) {
3587  default:
3588    return true;
3589  case V_DIV_SCALE_F32_gfx6_gfx7:
3590  case V_DIV_SCALE_F32_vi:
3591  case V_DIV_SCALE_F32_gfx10:
3592  case V_DIV_SCALE_F64_gfx6_gfx7:
3593  case V_DIV_SCALE_F64_vi:
3594  case V_DIV_SCALE_F64_gfx10:
3595    break;
3596  }
3597
3598  // TODO: Check that src0 = src1 or src2.
3599
3600  for (auto Name : {AMDGPU::OpName::src0_modifiers,
3601                    AMDGPU::OpName::src2_modifiers,
3602                    AMDGPU::OpName::src2_modifiers}) {
3603    if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3604            .getImm() &
3605        SISrcMods::ABS) {
3606      return false;
3607    }
3608  }
3609
3610  return true;
3611}
3612
3613bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3614
3615  const unsigned Opc = Inst.getOpcode();
3616  const MCInstrDesc &Desc = MII.get(Opc);
3617
3618  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3619    return true;
3620
3621  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3622  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3623    if (isCI() || isSI())
3624      return false;
3625  }
3626
3627  return true;
3628}
3629
3630bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3631  const unsigned Opc = Inst.getOpcode();
3632  const MCInstrDesc &Desc = MII.get(Opc);
3633
3634  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3635    return true;
3636
3637  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3638  if (DimIdx < 0)
3639    return true;
3640
3641  long Imm = Inst.getOperand(DimIdx).getImm();
3642  if (Imm < 0 || Imm >= 8)
3643    return false;
3644
3645  return true;
3646}
3647
3648static bool IsRevOpcode(const unsigned Opcode)
3649{
3650  switch (Opcode) {
3651  case AMDGPU::V_SUBREV_F32_e32:
3652  case AMDGPU::V_SUBREV_F32_e64:
3653  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3654  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3655  case AMDGPU::V_SUBREV_F32_e32_vi:
3656  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3657  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3658  case AMDGPU::V_SUBREV_F32_e64_vi:
3659
3660  case AMDGPU::V_SUBREV_CO_U32_e32:
3661  case AMDGPU::V_SUBREV_CO_U32_e64:
3662  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3663  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3664
3665  case AMDGPU::V_SUBBREV_U32_e32:
3666  case AMDGPU::V_SUBBREV_U32_e64:
3667  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3668  case AMDGPU::V_SUBBREV_U32_e32_vi:
3669  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3670  case AMDGPU::V_SUBBREV_U32_e64_vi:
3671
3672  case AMDGPU::V_SUBREV_U32_e32:
3673  case AMDGPU::V_SUBREV_U32_e64:
3674  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3675  case AMDGPU::V_SUBREV_U32_e32_vi:
3676  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3677  case AMDGPU::V_SUBREV_U32_e64_vi:
3678
3679  case AMDGPU::V_SUBREV_F16_e32:
3680  case AMDGPU::V_SUBREV_F16_e64:
3681  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3682  case AMDGPU::V_SUBREV_F16_e32_vi:
3683  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3684  case AMDGPU::V_SUBREV_F16_e64_vi:
3685
3686  case AMDGPU::V_SUBREV_U16_e32:
3687  case AMDGPU::V_SUBREV_U16_e64:
3688  case AMDGPU::V_SUBREV_U16_e32_vi:
3689  case AMDGPU::V_SUBREV_U16_e64_vi:
3690
3691  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3692  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3693  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3694
3695  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3696  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3697
3698  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3699  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3700
3701  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3702  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3703
3704  case AMDGPU::V_LSHRREV_B32_e32:
3705  case AMDGPU::V_LSHRREV_B32_e64:
3706  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3707  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3708  case AMDGPU::V_LSHRREV_B32_e32_vi:
3709  case AMDGPU::V_LSHRREV_B32_e64_vi:
3710  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3711  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3712
3713  case AMDGPU::V_ASHRREV_I32_e32:
3714  case AMDGPU::V_ASHRREV_I32_e64:
3715  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3716  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3717  case AMDGPU::V_ASHRREV_I32_e32_vi:
3718  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3719  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3720  case AMDGPU::V_ASHRREV_I32_e64_vi:
3721
3722  case AMDGPU::V_LSHLREV_B32_e32:
3723  case AMDGPU::V_LSHLREV_B32_e64:
3724  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3725  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3726  case AMDGPU::V_LSHLREV_B32_e32_vi:
3727  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3728  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3729  case AMDGPU::V_LSHLREV_B32_e64_vi:
3730
3731  case AMDGPU::V_LSHLREV_B16_e32:
3732  case AMDGPU::V_LSHLREV_B16_e64:
3733  case AMDGPU::V_LSHLREV_B16_e32_vi:
3734  case AMDGPU::V_LSHLREV_B16_e64_vi:
3735  case AMDGPU::V_LSHLREV_B16_gfx10:
3736
3737  case AMDGPU::V_LSHRREV_B16_e32:
3738  case AMDGPU::V_LSHRREV_B16_e64:
3739  case AMDGPU::V_LSHRREV_B16_e32_vi:
3740  case AMDGPU::V_LSHRREV_B16_e64_vi:
3741  case AMDGPU::V_LSHRREV_B16_gfx10:
3742
3743  case AMDGPU::V_ASHRREV_I16_e32:
3744  case AMDGPU::V_ASHRREV_I16_e64:
3745  case AMDGPU::V_ASHRREV_I16_e32_vi:
3746  case AMDGPU::V_ASHRREV_I16_e64_vi:
3747  case AMDGPU::V_ASHRREV_I16_gfx10:
3748
3749  case AMDGPU::V_LSHLREV_B64_e64:
3750  case AMDGPU::V_LSHLREV_B64_gfx10:
3751  case AMDGPU::V_LSHLREV_B64_vi:
3752
3753  case AMDGPU::V_LSHRREV_B64_e64:
3754  case AMDGPU::V_LSHRREV_B64_gfx10:
3755  case AMDGPU::V_LSHRREV_B64_vi:
3756
3757  case AMDGPU::V_ASHRREV_I64_e64:
3758  case AMDGPU::V_ASHRREV_I64_gfx10:
3759  case AMDGPU::V_ASHRREV_I64_vi:
3760
3761  case AMDGPU::V_PK_LSHLREV_B16:
3762  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3763  case AMDGPU::V_PK_LSHLREV_B16_vi:
3764
3765  case AMDGPU::V_PK_LSHRREV_B16:
3766  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3767  case AMDGPU::V_PK_LSHRREV_B16_vi:
3768  case AMDGPU::V_PK_ASHRREV_I16:
3769  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3770  case AMDGPU::V_PK_ASHRREV_I16_vi:
3771    return true;
3772  default:
3773    return false;
3774  }
3775}
3776
3777Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3778
3779  using namespace SIInstrFlags;
3780  const unsigned Opcode = Inst.getOpcode();
3781  const MCInstrDesc &Desc = MII.get(Opcode);
3782
3783  // lds_direct register is defined so that it can be used
3784  // with 9-bit operands only. Ignore encodings which do not accept these.
3785  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3786  if ((Desc.TSFlags & Enc) == 0)
3787    return None;
3788
3789  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3790    auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3791    if (SrcIdx == -1)
3792      break;
3793    const auto &Src = Inst.getOperand(SrcIdx);
3794    if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3795
3796      if (isGFX90A())
3797        return StringRef("lds_direct is not supported on this GPU");
3798
3799      if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3800        return StringRef("lds_direct cannot be used with this instruction");
3801
3802      if (SrcName != OpName::src0)
3803        return StringRef("lds_direct may be used as src0 only");
3804    }
3805  }
3806
3807  return None;
3808}
3809
3810SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3811  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3812    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3813    if (Op.isFlatOffset())
3814      return Op.getStartLoc();
3815  }
3816  return getLoc();
3817}
3818
3819bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3820                                         const OperandVector &Operands) {
3821  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3822  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3823    return true;
3824
3825  auto Opcode = Inst.getOpcode();
3826  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3827  assert(OpNum != -1);
3828
3829  const auto &Op = Inst.getOperand(OpNum);
3830  if (!hasFlatOffsets() && Op.getImm() != 0) {
3831    Error(getFlatOffsetLoc(Operands),
3832          "flat offset modifier is not supported on this GPU");
3833    return false;
3834  }
3835
3836  // For FLAT segment the offset must be positive;
3837  // MSB is ignored and forced to zero.
3838  if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3839    unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3840    if (!isIntN(OffsetSize, Op.getImm())) {
3841      Error(getFlatOffsetLoc(Operands),
3842            Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3843      return false;
3844    }
3845  } else {
3846    unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3847    if (!isUIntN(OffsetSize, Op.getImm())) {
3848      Error(getFlatOffsetLoc(Operands),
3849            Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3850      return false;
3851    }
3852  }
3853
3854  return true;
3855}
3856
3857SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3858  // Start with second operand because SMEM Offset cannot be dst or src0.
3859  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3860    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3861    if (Op.isSMEMOffset())
3862      return Op.getStartLoc();
3863  }
3864  return getLoc();
3865}
3866
3867bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3868                                         const OperandVector &Operands) {
3869  if (isCI() || isSI())
3870    return true;
3871
3872  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3873  if ((TSFlags & SIInstrFlags::SMRD) == 0)
3874    return true;
3875
3876  auto Opcode = Inst.getOpcode();
3877  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3878  if (OpNum == -1)
3879    return true;
3880
3881  const auto &Op = Inst.getOperand(OpNum);
3882  if (!Op.isImm())
3883    return true;
3884
3885  uint64_t Offset = Op.getImm();
3886  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3887  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3888      AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3889    return true;
3890
3891  Error(getSMEMOffsetLoc(Operands),
3892        (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3893                               "expected a 21-bit signed offset");
3894
3895  return false;
3896}
3897
3898bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3899  unsigned Opcode = Inst.getOpcode();
3900  const MCInstrDesc &Desc = MII.get(Opcode);
3901  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3902    return true;
3903
3904  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3905  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3906
3907  const int OpIndices[] = { Src0Idx, Src1Idx };
3908
3909  unsigned NumExprs = 0;
3910  unsigned NumLiterals = 0;
3911  uint32_t LiteralValue;
3912
3913  for (int OpIdx : OpIndices) {
3914    if (OpIdx == -1) break;
3915
3916    const MCOperand &MO = Inst.getOperand(OpIdx);
3917    // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3918    if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3919      if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3920        uint32_t Value = static_cast<uint32_t>(MO.getImm());
3921        if (NumLiterals == 0 || LiteralValue != Value) {
3922          LiteralValue = Value;
3923          ++NumLiterals;
3924        }
3925      } else if (MO.isExpr()) {
3926        ++NumExprs;
3927      }
3928    }
3929  }
3930
3931  return NumLiterals + NumExprs <= 1;
3932}
3933
3934bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3935  const unsigned Opc = Inst.getOpcode();
3936  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3937      Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3938    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3939    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3940
3941    if (OpSel & ~3)
3942      return false;
3943  }
3944  return true;
3945}
3946
3947bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3948                                  const OperandVector &Operands) {
3949  const unsigned Opc = Inst.getOpcode();
3950  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3951  if (DppCtrlIdx < 0)
3952    return true;
3953  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3954
3955  if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3956    // DPP64 is supported for row_newbcast only.
3957    int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3958    if (Src0Idx >= 0 &&
3959        getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3960      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3961      Error(S, "64 bit dpp only supports row_newbcast");
3962      return false;
3963    }
3964  }
3965
3966  return true;
3967}
3968
3969// Check if VCC register matches wavefront size
3970bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3971  auto FB = getFeatureBits();
3972  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3973    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3974}
3975
3976// VOP3 literal is only allowed in GFX10+ and only one can be used
3977bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3978                                          const OperandVector &Operands) {
3979  unsigned Opcode = Inst.getOpcode();
3980  const MCInstrDesc &Desc = MII.get(Opcode);
3981  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3982    return true;
3983
3984  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3985  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3986  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3987
3988  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3989
3990  unsigned NumExprs = 0;
3991  unsigned NumLiterals = 0;
3992  uint32_t LiteralValue;
3993
3994  for (int OpIdx : OpIndices) {
3995    if (OpIdx == -1) break;
3996
3997    const MCOperand &MO = Inst.getOperand(OpIdx);
3998    if (!MO.isImm() && !MO.isExpr())
3999      continue;
4000    if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4001      continue;
4002
4003    if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4004        getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4005      Error(getConstLoc(Operands),
4006            "inline constants are not allowed for this operand");
4007      return false;
4008    }
4009
4010    if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4011      uint32_t Value = static_cast<uint32_t>(MO.getImm());
4012      if (NumLiterals == 0 || LiteralValue != Value) {
4013        LiteralValue = Value;
4014        ++NumLiterals;
4015      }
4016    } else if (MO.isExpr()) {
4017      ++NumExprs;
4018    }
4019  }
4020  NumLiterals += NumExprs;
4021
4022  if (!NumLiterals)
4023    return true;
4024
4025  if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4026    Error(getLitLoc(Operands), "literal operands are not supported");
4027    return false;
4028  }
4029
4030  if (NumLiterals > 1) {
4031    Error(getLitLoc(Operands), "only one literal operand is allowed");
4032    return false;
4033  }
4034
4035  return true;
4036}
4037
4038// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4039static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4040                         const MCRegisterInfo *MRI) {
4041  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4042  if (OpIdx < 0)
4043    return -1;
4044
4045  const MCOperand &Op = Inst.getOperand(OpIdx);
4046  if (!Op.isReg())
4047    return -1;
4048
4049  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4050  auto Reg = Sub ? Sub : Op.getReg();
4051  const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4052  return AGRP32.contains(Reg) ? 1 : 0;
4053}
4054
4055bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4056  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4057  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4058                  SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4059                  SIInstrFlags::DS)) == 0)
4060    return true;
4061
4062  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4063                                                      : AMDGPU::OpName::vdata;
4064
4065  const MCRegisterInfo *MRI = getMRI();
4066  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4067  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4068
4069  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4070    int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4071    if (Data2Areg >= 0 && Data2Areg != DataAreg)
4072      return false;
4073  }
4074
4075  auto FB = getFeatureBits();
4076  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4077    if (DataAreg < 0 || DstAreg < 0)
4078      return true;
4079    return DstAreg == DataAreg;
4080  }
4081
4082  return DstAreg < 1 && DataAreg < 1;
4083}
4084
4085bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4086  auto FB = getFeatureBits();
4087  if (!FB[AMDGPU::FeatureGFX90AInsts])
4088    return true;
4089
4090  const MCRegisterInfo *MRI = getMRI();
4091  const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4092  const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4093  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4094    const MCOperand &Op = Inst.getOperand(I);
4095    if (!Op.isReg())
4096      continue;
4097
4098    unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4099    if (!Sub)
4100      continue;
4101
4102    if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4103      return false;
4104    if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4105      return false;
4106  }
4107
4108  return true;
4109}
4110
4111bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4112                                            const OperandVector &Operands,
4113                                            const SMLoc &IDLoc) {
4114  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4115                                           AMDGPU::OpName::cpol);
4116  if (CPolPos == -1)
4117    return true;
4118
4119  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4120
4121  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4122  if ((TSFlags & (SIInstrFlags::SMRD)) &&
4123      (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4124    Error(IDLoc, "invalid cache policy for SMRD instruction");
4125    return false;
4126  }
4127
4128  if (isGFX90A() && (CPol & CPol::SCC)) {
4129    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4130    StringRef CStr(S.getPointer());
4131    S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4132    Error(S, "scc is not supported on this GPU");
4133    return false;
4134  }
4135
4136  if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4137    return true;
4138
4139  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4140    if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4141      Error(IDLoc, "instruction must use glc");
4142      return false;
4143    }
4144  } else {
4145    if (CPol & CPol::GLC) {
4146      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4147      StringRef CStr(S.getPointer());
4148      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4149      Error(S, "instruction must not use glc");
4150      return false;
4151    }
4152  }
4153
4154  return true;
4155}
4156
4157bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4158                                          const SMLoc &IDLoc,
4159                                          const OperandVector &Operands) {
4160  if (auto ErrMsg = validateLdsDirect(Inst)) {
4161    Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4162    return false;
4163  }
4164  if (!validateSOPLiteral(Inst)) {
4165    Error(getLitLoc(Operands),
4166      "only one literal operand is allowed");
4167    return false;
4168  }
4169  if (!validateVOP3Literal(Inst, Operands)) {
4170    return false;
4171  }
4172  if (!validateConstantBusLimitations(Inst, Operands)) {
4173    return false;
4174  }
4175  if (!validateEarlyClobberLimitations(Inst, Operands)) {
4176    return false;
4177  }
4178  if (!validateIntClampSupported(Inst)) {
4179    Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4180      "integer clamping is not supported on this GPU");
4181    return false;
4182  }
4183  if (!validateOpSel(Inst)) {
4184    Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4185      "invalid op_sel operand");
4186    return false;
4187  }
4188  if (!validateDPP(Inst, Operands)) {
4189    return false;
4190  }
4191  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4192  if (!validateMIMGD16(Inst)) {
4193    Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4194      "d16 modifier is not supported on this GPU");
4195    return false;
4196  }
4197  if (!validateMIMGDim(Inst)) {
4198    Error(IDLoc, "dim modifier is required on this GPU");
4199    return false;
4200  }
4201  if (!validateMIMGMSAA(Inst)) {
4202    Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4203          "invalid dim; must be MSAA type");
4204    return false;
4205  }
4206  if (!validateMIMGDataSize(Inst)) {
4207    Error(IDLoc,
4208      "image data size does not match dmask and tfe");
4209    return false;
4210  }
4211  if (!validateMIMGAddrSize(Inst)) {
4212    Error(IDLoc,
4213      "image address size does not match dim and a16");
4214    return false;
4215  }
4216  if (!validateMIMGAtomicDMask(Inst)) {
4217    Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4218      "invalid atomic image dmask");
4219    return false;
4220  }
4221  if (!validateMIMGGatherDMask(Inst)) {
4222    Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4223      "invalid image_gather dmask: only one bit must be set");
4224    return false;
4225  }
4226  if (!validateMovrels(Inst, Operands)) {
4227    return false;
4228  }
4229  if (!validateFlatOffset(Inst, Operands)) {
4230    return false;
4231  }
4232  if (!validateSMEMOffset(Inst, Operands)) {
4233    return false;
4234  }
4235  if (!validateMAIAccWrite(Inst, Operands)) {
4236    return false;
4237  }
4238  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4239    return false;
4240  }
4241
4242  if (!validateAGPRLdSt(Inst)) {
4243    Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4244    ? "invalid register class: data and dst should be all VGPR or AGPR"
4245    : "invalid register class: agpr loads and stores not supported on this GPU"
4246    );
4247    return false;
4248  }
4249  if (!validateVGPRAlign(Inst)) {
4250    Error(IDLoc,
4251      "invalid register class: vgpr tuples must be 64 bit aligned");
4252    return false;
4253  }
4254
4255  if (!validateDivScale(Inst)) {
4256    Error(IDLoc, "ABS not allowed in VOP3B instructions");
4257    return false;
4258  }
4259  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4260    return false;
4261  }
4262
4263  return true;
4264}
4265
4266static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4267                                            const FeatureBitset &FBS,
4268                                            unsigned VariantID = 0);
4269
4270static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4271                                const FeatureBitset &AvailableFeatures,
4272                                unsigned VariantID);
4273
4274bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4275                                       const FeatureBitset &FBS) {
4276  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4277}
4278
4279bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4280                                       const FeatureBitset &FBS,
4281                                       ArrayRef<unsigned> Variants) {
4282  for (auto Variant : Variants) {
4283    if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4284      return true;
4285  }
4286
4287  return false;
4288}
4289
4290bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4291                                                  const SMLoc &IDLoc) {
4292  FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4293
4294  // Check if requested instruction variant is supported.
4295  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4296    return false;
4297
4298  // This instruction is not supported.
4299  // Clear any other pending errors because they are no longer relevant.
4300  getParser().clearPendingErrors();
4301
4302  // Requested instruction variant is not supported.
4303  // Check if any other variants are supported.
4304  StringRef VariantName = getMatchedVariantName();
4305  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4306    return Error(IDLoc,
4307                 Twine(VariantName,
4308                       " variant of this instruction is not supported"));
4309  }
4310
4311  // Finally check if this instruction is supported on any other GPU.
4312  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4313    return Error(IDLoc, "instruction not supported on this GPU");
4314  }
4315
4316  // Instruction not supported on any GPU. Probably a typo.
4317  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4318  return Error(IDLoc, "invalid instruction" + Suggestion);
4319}
4320
4321bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4322                                              OperandVector &Operands,
4323                                              MCStreamer &Out,
4324                                              uint64_t &ErrorInfo,
4325                                              bool MatchingInlineAsm) {
4326  MCInst Inst;
4327  unsigned Result = Match_Success;
4328  for (auto Variant : getMatchedVariants()) {
4329    uint64_t EI;
4330    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4331                                  Variant);
4332    // We order match statuses from least to most specific. We use most specific
4333    // status as resulting
4334    // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4335    if ((R == Match_Success) ||
4336        (R == Match_PreferE32) ||
4337        (R == Match_MissingFeature && Result != Match_PreferE32) ||
4338        (R == Match_InvalidOperand && Result != Match_MissingFeature
4339                                   && Result != Match_PreferE32) ||
4340        (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4341                                   && Result != Match_MissingFeature
4342                                   && Result != Match_PreferE32)) {
4343      Result = R;
4344      ErrorInfo = EI;
4345    }
4346    if (R == Match_Success)
4347      break;
4348  }
4349
4350  if (Result == Match_Success) {
4351    if (!validateInstruction(Inst, IDLoc, Operands)) {
4352      return true;
4353    }
4354    Inst.setLoc(IDLoc);
4355    Out.emitInstruction(Inst, getSTI());
4356    return false;
4357  }
4358
4359  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4360  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4361    return true;
4362  }
4363
4364  switch (Result) {
4365  default: break;
4366  case Match_MissingFeature:
4367    // It has been verified that the specified instruction
4368    // mnemonic is valid. A match was found but it requires
4369    // features which are not supported on this GPU.
4370    return Error(IDLoc, "operands are not valid for this GPU or mode");
4371
4372  case Match_InvalidOperand: {
4373    SMLoc ErrorLoc = IDLoc;
4374    if (ErrorInfo != ~0ULL) {
4375      if (ErrorInfo >= Operands.size()) {
4376        return Error(IDLoc, "too few operands for instruction");
4377      }
4378      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4379      if (ErrorLoc == SMLoc())
4380        ErrorLoc = IDLoc;
4381    }
4382    return Error(ErrorLoc, "invalid operand for instruction");
4383  }
4384
4385  case Match_PreferE32:
4386    return Error(IDLoc, "internal error: instruction without _e64 suffix "
4387                        "should be encoded as e32");
4388  case Match_MnemonicFail:
4389    llvm_unreachable("Invalid instructions should have been handled already");
4390  }
4391  llvm_unreachable("Implement any new match types added!");
4392}
4393
4394bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4395  int64_t Tmp = -1;
4396  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4397    return true;
4398  }
4399  if (getParser().parseAbsoluteExpression(Tmp)) {
4400    return true;
4401  }
4402  Ret = static_cast<uint32_t>(Tmp);
4403  return false;
4404}
4405
4406bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4407                                               uint32_t &Minor) {
4408  if (ParseAsAbsoluteExpression(Major))
4409    return TokError("invalid major version");
4410
4411  if (!trySkipToken(AsmToken::Comma))
4412    return TokError("minor version number required, comma expected");
4413
4414  if (ParseAsAbsoluteExpression(Minor))
4415    return TokError("invalid minor version");
4416
4417  return false;
4418}
4419
4420bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4421  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4422    return TokError("directive only supported for amdgcn architecture");
4423
4424  std::string TargetIDDirective;
4425  SMLoc TargetStart = getTok().getLoc();
4426  if (getParser().parseEscapedString(TargetIDDirective))
4427    return true;
4428
4429  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4430  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4431    return getParser().Error(TargetRange.Start,
4432        (Twine(".amdgcn_target directive's target id ") +
4433         Twine(TargetIDDirective) +
4434         Twine(" does not match the specified target id ") +
4435         Twine(getTargetStreamer().getTargetID()->toString())).str());
4436
4437  return false;
4438}
4439
4440bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4441  return Error(Range.Start, "value out of range", Range);
4442}
4443
4444bool AMDGPUAsmParser::calculateGPRBlocks(
4445    const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4446    bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4447    SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4448    unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4449  // TODO(scott.linder): These calculations are duplicated from
4450  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4451  IsaVersion Version = getIsaVersion(getSTI().getCPU());
4452
4453  unsigned NumVGPRs = NextFreeVGPR;
4454  unsigned NumSGPRs = NextFreeSGPR;
4455
4456  if (Version.Major >= 10)
4457    NumSGPRs = 0;
4458  else {
4459    unsigned MaxAddressableNumSGPRs =
4460        IsaInfo::getAddressableNumSGPRs(&getSTI());
4461
4462    if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4463        NumSGPRs > MaxAddressableNumSGPRs)
4464      return OutOfRangeError(SGPRRange);
4465
4466    NumSGPRs +=
4467        IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4468
4469    if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4470        NumSGPRs > MaxAddressableNumSGPRs)
4471      return OutOfRangeError(SGPRRange);
4472
4473    if (Features.test(FeatureSGPRInitBug))
4474      NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4475  }
4476
4477  VGPRBlocks =
4478      IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4479  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4480
4481  return false;
4482}
4483
4484bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4485  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4486    return TokError("directive only supported for amdgcn architecture");
4487
4488  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4489    return TokError("directive only supported for amdhsa OS");
4490
4491  StringRef KernelName;
4492  if (getParser().parseIdentifier(KernelName))
4493    return true;
4494
4495  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4496
4497  StringSet<> Seen;
4498
4499  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4500
4501  SMRange VGPRRange;
4502  uint64_t NextFreeVGPR = 0;
4503  uint64_t AccumOffset = 0;
4504  SMRange SGPRRange;
4505  uint64_t NextFreeSGPR = 0;
4506  unsigned UserSGPRCount = 0;
4507  bool ReserveVCC = true;
4508  bool ReserveFlatScr = true;
4509  Optional<bool> EnableWavefrontSize32;
4510
4511  while (true) {
4512    while (trySkipToken(AsmToken::EndOfStatement));
4513
4514    StringRef ID;
4515    SMRange IDRange = getTok().getLocRange();
4516    if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4517      return true;
4518
4519    if (ID == ".end_amdhsa_kernel")
4520      break;
4521
4522    if (Seen.find(ID) != Seen.end())
4523      return TokError(".amdhsa_ directives cannot be repeated");
4524    Seen.insert(ID);
4525
4526    SMLoc ValStart = getLoc();
4527    int64_t IVal;
4528    if (getParser().parseAbsoluteExpression(IVal))
4529      return true;
4530    SMLoc ValEnd = getLoc();
4531    SMRange ValRange = SMRange(ValStart, ValEnd);
4532
4533    if (IVal < 0)
4534      return OutOfRangeError(ValRange);
4535
4536    uint64_t Val = IVal;
4537
4538#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4539  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4540    return OutOfRangeError(RANGE);                                             \
4541  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4542
4543    if (ID == ".amdhsa_group_segment_fixed_size") {
4544      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4545        return OutOfRangeError(ValRange);
4546      KD.group_segment_fixed_size = Val;
4547    } else if (ID == ".amdhsa_private_segment_fixed_size") {
4548      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4549        return OutOfRangeError(ValRange);
4550      KD.private_segment_fixed_size = Val;
4551    } else if (ID == ".amdhsa_kernarg_size") {
4552      if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4553        return OutOfRangeError(ValRange);
4554      KD.kernarg_size = Val;
4555    } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4556      if (hasArchitectedFlatScratch())
4557        return Error(IDRange.Start,
4558                     "directive is not supported with architected flat scratch",
4559                     IDRange);
4560      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4561                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4562                       Val, ValRange);
4563      if (Val)
4564        UserSGPRCount += 4;
4565    } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4566      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4567                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4568                       ValRange);
4569      if (Val)
4570        UserSGPRCount += 2;
4571    } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4572      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4573                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4574                       ValRange);
4575      if (Val)
4576        UserSGPRCount += 2;
4577    } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4578      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4579                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4580                       Val, ValRange);
4581      if (Val)
4582        UserSGPRCount += 2;
4583    } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4584      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4585                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4586                       ValRange);
4587      if (Val)
4588        UserSGPRCount += 2;
4589    } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4590      if (hasArchitectedFlatScratch())
4591        return Error(IDRange.Start,
4592                     "directive is not supported with architected flat scratch",
4593                     IDRange);
4594      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4595                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4596                       ValRange);
4597      if (Val)
4598        UserSGPRCount += 2;
4599    } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4600      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4601                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4602                       Val, ValRange);
4603      if (Val)
4604        UserSGPRCount += 1;
4605    } else if (ID == ".amdhsa_wavefront_size32") {
4606      if (IVersion.Major < 10)
4607        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4608      EnableWavefrontSize32 = Val;
4609      PARSE_BITS_ENTRY(KD.kernel_code_properties,
4610                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4611                       Val, ValRange);
4612    } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4613      if (hasArchitectedFlatScratch())
4614        return Error(IDRange.Start,
4615                     "directive is not supported with architected flat scratch",
4616                     IDRange);
4617      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4618                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4619    } else if (ID == ".amdhsa_enable_private_segment") {
4620      if (!hasArchitectedFlatScratch())
4621        return Error(
4622            IDRange.Start,
4623            "directive is not supported without architected flat scratch",
4624            IDRange);
4625      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4626                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4627    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4628      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4629                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4630                       ValRange);
4631    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4632      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4633                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4634                       ValRange);
4635    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4636      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4637                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4638                       ValRange);
4639    } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4640      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4641                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4642                       ValRange);
4643    } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4644      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4645                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4646                       ValRange);
4647    } else if (ID == ".amdhsa_next_free_vgpr") {
4648      VGPRRange = ValRange;
4649      NextFreeVGPR = Val;
4650    } else if (ID == ".amdhsa_next_free_sgpr") {
4651      SGPRRange = ValRange;
4652      NextFreeSGPR = Val;
4653    } else if (ID == ".amdhsa_accum_offset") {
4654      if (!isGFX90A())
4655        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4656      AccumOffset = Val;
4657    } else if (ID == ".amdhsa_reserve_vcc") {
4658      if (!isUInt<1>(Val))
4659        return OutOfRangeError(ValRange);
4660      ReserveVCC = Val;
4661    } else if (ID == ".amdhsa_reserve_flat_scratch") {
4662      if (IVersion.Major < 7)
4663        return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4664      if (hasArchitectedFlatScratch())
4665        return Error(IDRange.Start,
4666                     "directive is not supported with architected flat scratch",
4667                     IDRange);
4668      if (!isUInt<1>(Val))
4669        return OutOfRangeError(ValRange);
4670      ReserveFlatScr = Val;
4671    } else if (ID == ".amdhsa_reserve_xnack_mask") {
4672      if (IVersion.Major < 8)
4673        return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4674      if (!isUInt<1>(Val))
4675        return OutOfRangeError(ValRange);
4676      if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4677        return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4678                                 IDRange);
4679    } else if (ID == ".amdhsa_float_round_mode_32") {
4680      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4681                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4682    } else if (ID == ".amdhsa_float_round_mode_16_64") {
4683      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4684                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4685    } else if (ID == ".amdhsa_float_denorm_mode_32") {
4686      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4687                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4688    } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4689      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4690                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4691                       ValRange);
4692    } else if (ID == ".amdhsa_dx10_clamp") {
4693      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4694                       COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4695    } else if (ID == ".amdhsa_ieee_mode") {
4696      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4697                       Val, ValRange);
4698    } else if (ID == ".amdhsa_fp16_overflow") {
4699      if (IVersion.Major < 9)
4700        return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4701      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4702                       ValRange);
4703    } else if (ID == ".amdhsa_tg_split") {
4704      if (!isGFX90A())
4705        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4706      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4707                       ValRange);
4708    } else if (ID == ".amdhsa_workgroup_processor_mode") {
4709      if (IVersion.Major < 10)
4710        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4711      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4712                       ValRange);
4713    } else if (ID == ".amdhsa_memory_ordered") {
4714      if (IVersion.Major < 10)
4715        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4716      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4717                       ValRange);
4718    } else if (ID == ".amdhsa_forward_progress") {
4719      if (IVersion.Major < 10)
4720        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4721      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4722                       ValRange);
4723    } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4724      PARSE_BITS_ENTRY(
4725          KD.compute_pgm_rsrc2,
4726          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4727          ValRange);
4728    } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4729      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4730                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4731                       Val, ValRange);
4732    } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4733      PARSE_BITS_ENTRY(
4734          KD.compute_pgm_rsrc2,
4735          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4736          ValRange);
4737    } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4738      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4739                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4740                       Val, ValRange);
4741    } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4742      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4743                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4744                       Val, ValRange);
4745    } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4746      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4747                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4748                       Val, ValRange);
4749    } else if (ID == ".amdhsa_exception_int_div_zero") {
4750      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4751                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4752                       Val, ValRange);
4753    } else {
4754      return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4755    }
4756
4757#undef PARSE_BITS_ENTRY
4758  }
4759
4760  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4761    return TokError(".amdhsa_next_free_vgpr directive is required");
4762
4763  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4764    return TokError(".amdhsa_next_free_sgpr directive is required");
4765
4766  unsigned VGPRBlocks;
4767  unsigned SGPRBlocks;
4768  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4769                         getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4770                         EnableWavefrontSize32, NextFreeVGPR,
4771                         VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4772                         SGPRBlocks))
4773    return true;
4774
4775  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4776          VGPRBlocks))
4777    return OutOfRangeError(VGPRRange);
4778  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4779                  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4780
4781  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4782          SGPRBlocks))
4783    return OutOfRangeError(SGPRRange);
4784  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4785                  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4786                  SGPRBlocks);
4787
4788  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4789    return TokError("too many user SGPRs enabled");
4790  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4791                  UserSGPRCount);
4792
4793  if (isGFX90A()) {
4794    if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4795      return TokError(".amdhsa_accum_offset directive is required");
4796    if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4797      return TokError("accum_offset should be in range [4..256] in "
4798                      "increments of 4");
4799    if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4800      return TokError("accum_offset exceeds total VGPR allocation");
4801    AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4802                    (AccumOffset / 4 - 1));
4803  }
4804
4805  getTargetStreamer().EmitAmdhsaKernelDescriptor(
4806      getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4807      ReserveFlatScr);
4808  return false;
4809}
4810
4811bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4812  uint32_t Major;
4813  uint32_t Minor;
4814
4815  if (ParseDirectiveMajorMinor(Major, Minor))
4816    return true;
4817
4818  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4819  return false;
4820}
4821
4822bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4823  uint32_t Major;
4824  uint32_t Minor;
4825  uint32_t Stepping;
4826  StringRef VendorName;
4827  StringRef ArchName;
4828
4829  // If this directive has no arguments, then use the ISA version for the
4830  // targeted GPU.
4831  if (isToken(AsmToken::EndOfStatement)) {
4832    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4833    getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4834                                                        ISA.Stepping,
4835                                                        "AMD", "AMDGPU");
4836    return false;
4837  }
4838
4839  if (ParseDirectiveMajorMinor(Major, Minor))
4840    return true;
4841
4842  if (!trySkipToken(AsmToken::Comma))
4843    return TokError("stepping version number required, comma expected");
4844
4845  if (ParseAsAbsoluteExpression(Stepping))
4846    return TokError("invalid stepping version");
4847
4848  if (!trySkipToken(AsmToken::Comma))
4849    return TokError("vendor name required, comma expected");
4850
4851  if (!parseString(VendorName, "invalid vendor name"))
4852    return true;
4853
4854  if (!trySkipToken(AsmToken::Comma))
4855    return TokError("arch name required, comma expected");
4856
4857  if (!parseString(ArchName, "invalid arch name"))
4858    return true;
4859
4860  getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4861                                                      VendorName, ArchName);
4862  return false;
4863}
4864
4865bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4866                                               amd_kernel_code_t &Header) {
4867  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4868  // assembly for backwards compatibility.
4869  if (ID == "max_scratch_backing_memory_byte_size") {
4870    Parser.eatToEndOfStatement();
4871    return false;
4872  }
4873
4874  SmallString<40> ErrStr;
4875  raw_svector_ostream Err(ErrStr);
4876  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4877    return TokError(Err.str());
4878  }
4879  Lex();
4880
4881  if (ID == "enable_wavefront_size32") {
4882    if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4883      if (!isGFX10Plus())
4884        return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4885      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4886        return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4887    } else {
4888      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4889        return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4890    }
4891  }
4892
4893  if (ID == "wavefront_size") {
4894    if (Header.wavefront_size == 5) {
4895      if (!isGFX10Plus())
4896        return TokError("wavefront_size=5 is only allowed on GFX10+");
4897      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4898        return TokError("wavefront_size=5 requires +WavefrontSize32");
4899    } else if (Header.wavefront_size == 6) {
4900      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4901        return TokError("wavefront_size=6 requires +WavefrontSize64");
4902    }
4903  }
4904
4905  if (ID == "enable_wgp_mode") {
4906    if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4907        !isGFX10Plus())
4908      return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4909  }
4910
4911  if (ID == "enable_mem_ordered") {
4912    if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4913        !isGFX10Plus())
4914      return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4915  }
4916
4917  if (ID == "enable_fwd_progress") {
4918    if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4919        !isGFX10Plus())
4920      return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4921  }
4922
4923  return false;
4924}
4925
4926bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4927  amd_kernel_code_t Header;
4928  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4929
4930  while (true) {
4931    // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4932    // will set the current token to EndOfStatement.
4933    while(trySkipToken(AsmToken::EndOfStatement));
4934
4935    StringRef ID;
4936    if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4937      return true;
4938
4939    if (ID == ".end_amd_kernel_code_t")
4940      break;
4941
4942    if (ParseAMDKernelCodeTValue(ID, Header))
4943      return true;
4944  }
4945
4946  getTargetStreamer().EmitAMDKernelCodeT(Header);
4947
4948  return false;
4949}
4950
4951bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4952  StringRef KernelName;
4953  if (!parseId(KernelName, "expected symbol name"))
4954    return true;
4955
4956  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4957                                           ELF::STT_AMDGPU_HSA_KERNEL);
4958
4959  KernelScope.initialize(getContext());
4960  return false;
4961}
4962
4963bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4964  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4965    return Error(getLoc(),
4966                 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4967                 "architectures");
4968  }
4969
4970  auto TargetIDDirective = getLexer().getTok().getStringContents();
4971  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4972    return Error(getParser().getTok().getLoc(), "target id must match options");
4973
4974  getTargetStreamer().EmitISAVersion();
4975  Lex();
4976
4977  return false;
4978}
4979
4980bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4981  const char *AssemblerDirectiveBegin;
4982  const char *AssemblerDirectiveEnd;
4983  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4984      isHsaAbiVersion3Or4(&getSTI())
4985          ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4986                            HSAMD::V3::AssemblerDirectiveEnd)
4987          : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4988                            HSAMD::AssemblerDirectiveEnd);
4989
4990  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4991    return Error(getLoc(),
4992                 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4993                 "not available on non-amdhsa OSes")).str());
4994  }
4995
4996  std::string HSAMetadataString;
4997  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4998                          HSAMetadataString))
4999    return true;
5000
5001  if (isHsaAbiVersion3Or4(&getSTI())) {
5002    if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5003      return Error(getLoc(), "invalid HSA metadata");
5004  } else {
5005    if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5006      return Error(getLoc(), "invalid HSA metadata");
5007  }
5008
5009  return false;
5010}
5011
5012/// Common code to parse out a block of text (typically YAML) between start and
5013/// end directives.
5014bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5015                                          const char *AssemblerDirectiveEnd,
5016                                          std::string &CollectString) {
5017
5018  raw_string_ostream CollectStream(CollectString);
5019
5020  getLexer().setSkipSpace(false);
5021
5022  bool FoundEnd = false;
5023  while (!isToken(AsmToken::Eof)) {
5024    while (isToken(AsmToken::Space)) {
5025      CollectStream << getTokenStr();
5026      Lex();
5027    }
5028
5029    if (trySkipId(AssemblerDirectiveEnd)) {
5030      FoundEnd = true;
5031      break;
5032    }
5033
5034    CollectStream << Parser.parseStringToEndOfStatement()
5035                  << getContext().getAsmInfo()->getSeparatorString();
5036
5037    Parser.eatToEndOfStatement();
5038  }
5039
5040  getLexer().setSkipSpace(true);
5041
5042  if (isToken(AsmToken::Eof) && !FoundEnd) {
5043    return TokError(Twine("expected directive ") +
5044                    Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5045  }
5046
5047  CollectStream.flush();
5048  return false;
5049}
5050
5051/// Parse the assembler directive for new MsgPack-format PAL metadata.
5052bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5053  std::string String;
5054  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5055                          AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5056    return true;
5057
5058  auto PALMetadata = getTargetStreamer().getPALMetadata();
5059  if (!PALMetadata->setFromString(String))
5060    return Error(getLoc(), "invalid PAL metadata");
5061  return false;
5062}
5063
5064/// Parse the assembler directive for old linear-format PAL metadata.
5065bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5066  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5067    return Error(getLoc(),
5068                 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5069                 "not available on non-amdpal OSes")).str());
5070  }
5071
5072  auto PALMetadata = getTargetStreamer().getPALMetadata();
5073  PALMetadata->setLegacy();
5074  for (;;) {
5075    uint32_t Key, Value;
5076    if (ParseAsAbsoluteExpression(Key)) {
5077      return TokError(Twine("invalid value in ") +
5078                      Twine(PALMD::AssemblerDirective));
5079    }
5080    if (!trySkipToken(AsmToken::Comma)) {
5081      return TokError(Twine("expected an even number of values in ") +
5082                      Twine(PALMD::AssemblerDirective));
5083    }
5084    if (ParseAsAbsoluteExpression(Value)) {
5085      return TokError(Twine("invalid value in ") +
5086                      Twine(PALMD::AssemblerDirective));
5087    }
5088    PALMetadata->setRegister(Key, Value);
5089    if (!trySkipToken(AsmToken::Comma))
5090      break;
5091  }
5092  return false;
5093}
5094
5095/// ParseDirectiveAMDGPULDS
5096///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5097bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5098  if (getParser().checkForValidSection())
5099    return true;
5100
5101  StringRef Name;
5102  SMLoc NameLoc = getLoc();
5103  if (getParser().parseIdentifier(Name))
5104    return TokError("expected identifier in directive");
5105
5106  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5107  if (parseToken(AsmToken::Comma, "expected ','"))
5108    return true;
5109
5110  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5111
5112  int64_t Size;
5113  SMLoc SizeLoc = getLoc();
5114  if (getParser().parseAbsoluteExpression(Size))
5115    return true;
5116  if (Size < 0)
5117    return Error(SizeLoc, "size must be non-negative");
5118  if (Size > LocalMemorySize)
5119    return Error(SizeLoc, "size is too large");
5120
5121  int64_t Alignment = 4;
5122  if (trySkipToken(AsmToken::Comma)) {
5123    SMLoc AlignLoc = getLoc();
5124    if (getParser().parseAbsoluteExpression(Alignment))
5125      return true;
5126    if (Alignment < 0 || !isPowerOf2_64(Alignment))
5127      return Error(AlignLoc, "alignment must be a power of two");
5128
5129    // Alignment larger than the size of LDS is possible in theory, as long
5130    // as the linker manages to place to symbol at address 0, but we do want
5131    // to make sure the alignment fits nicely into a 32-bit integer.
5132    if (Alignment >= 1u << 31)
5133      return Error(AlignLoc, "alignment is too large");
5134  }
5135
5136  if (parseToken(AsmToken::EndOfStatement,
5137                 "unexpected token in '.amdgpu_lds' directive"))
5138    return true;
5139
5140  Symbol->redefineIfPossible();
5141  if (!Symbol->isUndefined())
5142    return Error(NameLoc, "invalid symbol redefinition");
5143
5144  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5145  return false;
5146}
5147
5148bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5149  StringRef IDVal = DirectiveID.getString();
5150
5151  if (isHsaAbiVersion3Or4(&getSTI())) {
5152    if (IDVal == ".amdhsa_kernel")
5153     return ParseDirectiveAMDHSAKernel();
5154
5155    // TODO: Restructure/combine with PAL metadata directive.
5156    if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5157      return ParseDirectiveHSAMetadata();
5158  } else {
5159    if (IDVal == ".hsa_code_object_version")
5160      return ParseDirectiveHSACodeObjectVersion();
5161
5162    if (IDVal == ".hsa_code_object_isa")
5163      return ParseDirectiveHSACodeObjectISA();
5164
5165    if (IDVal == ".amd_kernel_code_t")
5166      return ParseDirectiveAMDKernelCodeT();
5167
5168    if (IDVal == ".amdgpu_hsa_kernel")
5169      return ParseDirectiveAMDGPUHsaKernel();
5170
5171    if (IDVal == ".amd_amdgpu_isa")
5172      return ParseDirectiveISAVersion();
5173
5174    if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5175      return ParseDirectiveHSAMetadata();
5176  }
5177
5178  if (IDVal == ".amdgcn_target")
5179    return ParseDirectiveAMDGCNTarget();
5180
5181  if (IDVal == ".amdgpu_lds")
5182    return ParseDirectiveAMDGPULDS();
5183
5184  if (IDVal == PALMD::AssemblerDirectiveBegin)
5185    return ParseDirectivePALMetadataBegin();
5186
5187  if (IDVal == PALMD::AssemblerDirective)
5188    return ParseDirectivePALMetadata();
5189
5190  return true;
5191}
5192
5193bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5194                                           unsigned RegNo) {
5195
5196  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5197       R.isValid(); ++R) {
5198    if (*R == RegNo)
5199      return isGFX9Plus();
5200  }
5201
5202  // GFX10 has 2 more SGPRs 104 and 105.
5203  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5204       R.isValid(); ++R) {
5205    if (*R == RegNo)
5206      return hasSGPR104_SGPR105();
5207  }
5208
5209  switch (RegNo) {
5210  case AMDGPU::SRC_SHARED_BASE:
5211  case AMDGPU::SRC_SHARED_LIMIT:
5212  case AMDGPU::SRC_PRIVATE_BASE:
5213  case AMDGPU::SRC_PRIVATE_LIMIT:
5214  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5215    return isGFX9Plus();
5216  case AMDGPU::TBA:
5217  case AMDGPU::TBA_LO:
5218  case AMDGPU::TBA_HI:
5219  case AMDGPU::TMA:
5220  case AMDGPU::TMA_LO:
5221  case AMDGPU::TMA_HI:
5222    return !isGFX9Plus();
5223  case AMDGPU::XNACK_MASK:
5224  case AMDGPU::XNACK_MASK_LO:
5225  case AMDGPU::XNACK_MASK_HI:
5226    return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5227  case AMDGPU::SGPR_NULL:
5228    return isGFX10Plus();
5229  default:
5230    break;
5231  }
5232
5233  if (isCI())
5234    return true;
5235
5236  if (isSI() || isGFX10Plus()) {
5237    // No flat_scr on SI.
5238    // On GFX10 flat scratch is not a valid register operand and can only be
5239    // accessed with s_setreg/s_getreg.
5240    switch (RegNo) {
5241    case AMDGPU::FLAT_SCR:
5242    case AMDGPU::FLAT_SCR_LO:
5243    case AMDGPU::FLAT_SCR_HI:
5244      return false;
5245    default:
5246      return true;
5247    }
5248  }
5249
5250  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5251  // SI/CI have.
5252  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5253       R.isValid(); ++R) {
5254    if (*R == RegNo)
5255      return hasSGPR102_SGPR103();
5256  }
5257
5258  return true;
5259}
5260
5261OperandMatchResultTy
5262AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5263                              OperandMode Mode) {
5264  // Try to parse with a custom parser
5265  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5266
5267  // If we successfully parsed the operand or if there as an error parsing,
5268  // we are done.
5269  //
5270  // If we are parsing after we reach EndOfStatement then this means we
5271  // are appending default values to the Operands list.  This is only done
5272  // by custom parser, so we shouldn't continue on to the generic parsing.
5273  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5274      isToken(AsmToken::EndOfStatement))
5275    return ResTy;
5276
5277  SMLoc RBraceLoc;
5278  SMLoc LBraceLoc = getLoc();
5279  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5280    unsigned Prefix = Operands.size();
5281
5282    for (;;) {
5283      auto Loc = getLoc();
5284      ResTy = parseReg(Operands);
5285      if (ResTy == MatchOperand_NoMatch)
5286        Error(Loc, "expected a register");
5287      if (ResTy != MatchOperand_Success)
5288        return MatchOperand_ParseFail;
5289
5290      RBraceLoc = getLoc();
5291      if (trySkipToken(AsmToken::RBrac))
5292        break;
5293
5294      if (!skipToken(AsmToken::Comma,
5295                     "expected a comma or a closing square bracket")) {
5296        return MatchOperand_ParseFail;
5297      }
5298    }
5299
5300    if (Operands.size() - Prefix > 1) {
5301      Operands.insert(Operands.begin() + Prefix,
5302                      AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5303      Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5304    }
5305
5306    return MatchOperand_Success;
5307  }
5308
5309  return parseRegOrImm(Operands);
5310}
5311
5312StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5313  // Clear any forced encodings from the previous instruction.
5314  setForcedEncodingSize(0);
5315  setForcedDPP(false);
5316  setForcedSDWA(false);
5317
5318  if (Name.endswith("_e64")) {
5319    setForcedEncodingSize(64);
5320    return Name.substr(0, Name.size() - 4);
5321  } else if (Name.endswith("_e32")) {
5322    setForcedEncodingSize(32);
5323    return Name.substr(0, Name.size() - 4);
5324  } else if (Name.endswith("_dpp")) {
5325    setForcedDPP(true);
5326    return Name.substr(0, Name.size() - 4);
5327  } else if (Name.endswith("_sdwa")) {
5328    setForcedSDWA(true);
5329    return Name.substr(0, Name.size() - 5);
5330  }
5331  return Name;
5332}
5333
5334bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5335                                       StringRef Name,
5336                                       SMLoc NameLoc, OperandVector &Operands) {
5337  // Add the instruction mnemonic
5338  Name = parseMnemonicSuffix(Name);
5339  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5340
5341  bool IsMIMG = Name.startswith("image_");
5342
5343  while (!trySkipToken(AsmToken::EndOfStatement)) {
5344    OperandMode Mode = OperandMode_Default;
5345    if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5346      Mode = OperandMode_NSA;
5347    CPolSeen = 0;
5348    OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5349
5350    if (Res != MatchOperand_Success) {
5351      checkUnsupportedInstruction(Name, NameLoc);
5352      if (!Parser.hasPendingError()) {
5353        // FIXME: use real operand location rather than the current location.
5354        StringRef Msg =
5355          (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5356                                            "not a valid operand.";
5357        Error(getLoc(), Msg);
5358      }
5359      while (!trySkipToken(AsmToken::EndOfStatement)) {
5360        lex();
5361      }
5362      return true;
5363    }
5364
5365    // Eat the comma or space if there is one.
5366    trySkipToken(AsmToken::Comma);
5367  }
5368
5369  return false;
5370}
5371
5372//===----------------------------------------------------------------------===//
5373// Utility functions
5374//===----------------------------------------------------------------------===//
5375
5376OperandMatchResultTy
5377AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5378
5379  if (!trySkipId(Prefix, AsmToken::Colon))
5380    return MatchOperand_NoMatch;
5381
5382  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5383}
5384
5385OperandMatchResultTy
5386AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5387                                    AMDGPUOperand::ImmTy ImmTy,
5388                                    bool (*ConvertResult)(int64_t&)) {
5389  SMLoc S = getLoc();
5390  int64_t Value = 0;
5391
5392  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5393  if (Res != MatchOperand_Success)
5394    return Res;
5395
5396  if (ConvertResult && !ConvertResult(Value)) {
5397    Error(S, "invalid " + StringRef(Prefix) + " value.");
5398  }
5399
5400  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5401  return MatchOperand_Success;
5402}
5403
5404OperandMatchResultTy
5405AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5406                                             OperandVector &Operands,
5407                                             AMDGPUOperand::ImmTy ImmTy,
5408                                             bool (*ConvertResult)(int64_t&)) {
5409  SMLoc S = getLoc();
5410  if (!trySkipId(Prefix, AsmToken::Colon))
5411    return MatchOperand_NoMatch;
5412
5413  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5414    return MatchOperand_ParseFail;
5415
5416  unsigned Val = 0;
5417  const unsigned MaxSize = 4;
5418
5419  // FIXME: How to verify the number of elements matches the number of src
5420  // operands?
5421  for (int I = 0; ; ++I) {
5422    int64_t Op;
5423    SMLoc Loc = getLoc();
5424    if (!parseExpr(Op))
5425      return MatchOperand_ParseFail;
5426
5427    if (Op != 0 && Op != 1) {
5428      Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5429      return MatchOperand_ParseFail;
5430    }
5431
5432    Val |= (Op << I);
5433
5434    if (trySkipToken(AsmToken::RBrac))
5435      break;
5436
5437    if (I + 1 == MaxSize) {
5438      Error(getLoc(), "expected a closing square bracket");
5439      return MatchOperand_ParseFail;
5440    }
5441
5442    if (!skipToken(AsmToken::Comma, "expected a comma"))
5443      return MatchOperand_ParseFail;
5444  }
5445
5446  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5447  return MatchOperand_Success;
5448}
5449
5450OperandMatchResultTy
5451AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5452                               AMDGPUOperand::ImmTy ImmTy) {
5453  int64_t Bit;
5454  SMLoc S = getLoc();
5455
5456  if (trySkipId(Name)) {
5457    Bit = 1;
5458  } else if (trySkipId("no", Name)) {
5459    Bit = 0;
5460  } else {
5461    return MatchOperand_NoMatch;
5462  }
5463
5464  if (Name == "r128" && !hasMIMG_R128()) {
5465    Error(S, "r128 modifier is not supported on this GPU");
5466    return MatchOperand_ParseFail;
5467  }
5468  if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5469    Error(S, "a16 modifier is not supported on this GPU");
5470    return MatchOperand_ParseFail;
5471  }
5472
5473  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5474    ImmTy = AMDGPUOperand::ImmTyR128A16;
5475
5476  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5477  return MatchOperand_Success;
5478}
5479
5480OperandMatchResultTy
5481AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5482  unsigned CPolOn = 0;
5483  unsigned CPolOff = 0;
5484  SMLoc S = getLoc();
5485
5486  if (trySkipId("glc"))
5487    CPolOn = AMDGPU::CPol::GLC;
5488  else if (trySkipId("noglc"))
5489    CPolOff = AMDGPU::CPol::GLC;
5490  else if (trySkipId("slc"))
5491    CPolOn = AMDGPU::CPol::SLC;
5492  else if (trySkipId("noslc"))
5493    CPolOff = AMDGPU::CPol::SLC;
5494  else if (trySkipId("dlc"))
5495    CPolOn = AMDGPU::CPol::DLC;
5496  else if (trySkipId("nodlc"))
5497    CPolOff = AMDGPU::CPol::DLC;
5498  else if (trySkipId("scc"))
5499    CPolOn = AMDGPU::CPol::SCC;
5500  else if (trySkipId("noscc"))
5501    CPolOff = AMDGPU::CPol::SCC;
5502  else
5503    return MatchOperand_NoMatch;
5504
5505  if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5506    Error(S, "dlc modifier is not supported on this GPU");
5507    return MatchOperand_ParseFail;
5508  }
5509
5510  if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5511    Error(S, "scc modifier is not supported on this GPU");
5512    return MatchOperand_ParseFail;
5513  }
5514
5515  if (CPolSeen & (CPolOn | CPolOff)) {
5516    Error(S, "duplicate cache policy modifier");
5517    return MatchOperand_ParseFail;
5518  }
5519
5520  CPolSeen |= (CPolOn | CPolOff);
5521
5522  for (unsigned I = 1; I != Operands.size(); ++I) {
5523    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5524    if (Op.isCPol()) {
5525      Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5526      return MatchOperand_Success;
5527    }
5528  }
5529
5530  Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5531                                              AMDGPUOperand::ImmTyCPol));
5532
5533  return MatchOperand_Success;
5534}
5535
5536static void addOptionalImmOperand(
5537  MCInst& Inst, const OperandVector& Operands,
5538  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5539  AMDGPUOperand::ImmTy ImmT,
5540  int64_t Default = 0) {
5541  auto i = OptionalIdx.find(ImmT);
5542  if (i != OptionalIdx.end()) {
5543    unsigned Idx = i->second;
5544    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5545  } else {
5546    Inst.addOperand(MCOperand::createImm(Default));
5547  }
5548}
5549
5550OperandMatchResultTy
5551AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5552                                       StringRef &Value,
5553                                       SMLoc &StringLoc) {
5554  if (!trySkipId(Prefix, AsmToken::Colon))
5555    return MatchOperand_NoMatch;
5556
5557  StringLoc = getLoc();
5558  return parseId(Value, "expected an identifier") ? MatchOperand_Success
5559                                                  : MatchOperand_ParseFail;
5560}
5561
5562//===----------------------------------------------------------------------===//
5563// MTBUF format
5564//===----------------------------------------------------------------------===//
5565
5566bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5567                                  int64_t MaxVal,
5568                                  int64_t &Fmt) {
5569  int64_t Val;
5570  SMLoc Loc = getLoc();
5571
5572  auto Res = parseIntWithPrefix(Pref, Val);
5573  if (Res == MatchOperand_ParseFail)
5574    return false;
5575  if (Res == MatchOperand_NoMatch)
5576    return true;
5577
5578  if (Val < 0 || Val > MaxVal) {
5579    Error(Loc, Twine("out of range ", StringRef(Pref)));
5580    return false;
5581  }
5582
5583  Fmt = Val;
5584  return true;
5585}
5586
5587// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5588// values to live in a joint format operand in the MCInst encoding.
5589OperandMatchResultTy
5590AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5591  using namespace llvm::AMDGPU::MTBUFFormat;
5592
5593  int64_t Dfmt = DFMT_UNDEF;
5594  int64_t Nfmt = NFMT_UNDEF;
5595
5596  // dfmt and nfmt can appear in either order, and each is optional.
5597  for (int I = 0; I < 2; ++I) {
5598    if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5599      return MatchOperand_ParseFail;
5600
5601    if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5602      return MatchOperand_ParseFail;
5603    }
5604    // Skip optional comma between dfmt/nfmt
5605    // but guard against 2 commas following each other.
5606    if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5607        !peekToken().is(AsmToken::Comma)) {
5608      trySkipToken(AsmToken::Comma);
5609    }
5610  }
5611
5612  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5613    return MatchOperand_NoMatch;
5614
5615  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5616  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5617
5618  Format = encodeDfmtNfmt(Dfmt, Nfmt);
5619  return MatchOperand_Success;
5620}
5621
5622OperandMatchResultTy
5623AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5624  using namespace llvm::AMDGPU::MTBUFFormat;
5625
5626  int64_t Fmt = UFMT_UNDEF;
5627
5628  if (!tryParseFmt("format", UFMT_MAX, Fmt))
5629    return MatchOperand_ParseFail;
5630
5631  if (Fmt == UFMT_UNDEF)
5632    return MatchOperand_NoMatch;
5633
5634  Format = Fmt;
5635  return MatchOperand_Success;
5636}
5637
5638bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5639                                    int64_t &Nfmt,
5640                                    StringRef FormatStr,
5641                                    SMLoc Loc) {
5642  using namespace llvm::AMDGPU::MTBUFFormat;
5643  int64_t Format;
5644
5645  Format = getDfmt(FormatStr);
5646  if (Format != DFMT_UNDEF) {
5647    Dfmt = Format;
5648    return true;
5649  }
5650
5651  Format = getNfmt(FormatStr, getSTI());
5652  if (Format != NFMT_UNDEF) {
5653    Nfmt = Format;
5654    return true;
5655  }
5656
5657  Error(Loc, "unsupported format");
5658  return false;
5659}
5660
5661OperandMatchResultTy
5662AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5663                                          SMLoc FormatLoc,
5664                                          int64_t &Format) {
5665  using namespace llvm::AMDGPU::MTBUFFormat;
5666
5667  int64_t Dfmt = DFMT_UNDEF;
5668  int64_t Nfmt = NFMT_UNDEF;
5669  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5670    return MatchOperand_ParseFail;
5671
5672  if (trySkipToken(AsmToken::Comma)) {
5673    StringRef Str;
5674    SMLoc Loc = getLoc();
5675    if (!parseId(Str, "expected a format string") ||
5676        !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5677      return MatchOperand_ParseFail;
5678    }
5679    if (Dfmt == DFMT_UNDEF) {
5680      Error(Loc, "duplicate numeric format");
5681      return MatchOperand_ParseFail;
5682    } else if (Nfmt == NFMT_UNDEF) {
5683      Error(Loc, "duplicate data format");
5684      return MatchOperand_ParseFail;
5685    }
5686  }
5687
5688  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5689  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5690
5691  if (isGFX10Plus()) {
5692    auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5693    if (Ufmt == UFMT_UNDEF) {
5694      Error(FormatLoc, "unsupported format");
5695      return MatchOperand_ParseFail;
5696    }
5697    Format = Ufmt;
5698  } else {
5699    Format = encodeDfmtNfmt(Dfmt, Nfmt);
5700  }
5701
5702  return MatchOperand_Success;
5703}
5704
5705OperandMatchResultTy
5706AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5707                                            SMLoc Loc,
5708                                            int64_t &Format) {
5709  using namespace llvm::AMDGPU::MTBUFFormat;
5710
5711  auto Id = getUnifiedFormat(FormatStr);
5712  if (Id == UFMT_UNDEF)
5713    return MatchOperand_NoMatch;
5714
5715  if (!isGFX10Plus()) {
5716    Error(Loc, "unified format is not supported on this GPU");
5717    return MatchOperand_ParseFail;
5718  }
5719
5720  Format = Id;
5721  return MatchOperand_Success;
5722}
5723
5724OperandMatchResultTy
5725AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5726  using namespace llvm::AMDGPU::MTBUFFormat;
5727  SMLoc Loc = getLoc();
5728
5729  if (!parseExpr(Format))
5730    return MatchOperand_ParseFail;
5731  if (!isValidFormatEncoding(Format, getSTI())) {
5732    Error(Loc, "out of range format");
5733    return MatchOperand_ParseFail;
5734  }
5735
5736  return MatchOperand_Success;
5737}
5738
5739OperandMatchResultTy
5740AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5741  using namespace llvm::AMDGPU::MTBUFFormat;
5742
5743  if (!trySkipId("format", AsmToken::Colon))
5744    return MatchOperand_NoMatch;
5745
5746  if (trySkipToken(AsmToken::LBrac)) {
5747    StringRef FormatStr;
5748    SMLoc Loc = getLoc();
5749    if (!parseId(FormatStr, "expected a format string"))
5750      return MatchOperand_ParseFail;
5751
5752    auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5753    if (Res == MatchOperand_NoMatch)
5754      Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5755    if (Res != MatchOperand_Success)
5756      return Res;
5757
5758    if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5759      return MatchOperand_ParseFail;
5760
5761    return MatchOperand_Success;
5762  }
5763
5764  return parseNumericFormat(Format);
5765}
5766
5767OperandMatchResultTy
5768AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5769  using namespace llvm::AMDGPU::MTBUFFormat;
5770
5771  int64_t Format = getDefaultFormatEncoding(getSTI());
5772  OperandMatchResultTy Res;
5773  SMLoc Loc = getLoc();
5774
5775  // Parse legacy format syntax.
5776  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5777  if (Res == MatchOperand_ParseFail)
5778    return Res;
5779
5780  bool FormatFound = (Res == MatchOperand_Success);
5781
5782  Operands.push_back(
5783    AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5784
5785  if (FormatFound)
5786    trySkipToken(AsmToken::Comma);
5787
5788  if (isToken(AsmToken::EndOfStatement)) {
5789    // We are expecting an soffset operand,
5790    // but let matcher handle the error.
5791    return MatchOperand_Success;
5792  }
5793
5794  // Parse soffset.
5795  Res = parseRegOrImm(Operands);
5796  if (Res != MatchOperand_Success)
5797    return Res;
5798
5799  trySkipToken(AsmToken::Comma);
5800
5801  if (!FormatFound) {
5802    Res = parseSymbolicOrNumericFormat(Format);
5803    if (Res == MatchOperand_ParseFail)
5804      return Res;
5805    if (Res == MatchOperand_Success) {
5806      auto Size = Operands.size();
5807      AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5808      assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5809      Op.setImm(Format);
5810    }
5811    return MatchOperand_Success;
5812  }
5813
5814  if (isId("format") && peekToken().is(AsmToken::Colon)) {
5815    Error(getLoc(), "duplicate format");
5816    return MatchOperand_ParseFail;
5817  }
5818  return MatchOperand_Success;
5819}
5820
5821//===----------------------------------------------------------------------===//
5822// ds
5823//===----------------------------------------------------------------------===//
5824
5825void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5826                                    const OperandVector &Operands) {
5827  OptionalImmIndexMap OptionalIdx;
5828
5829  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5830    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5831
5832    // Add the register arguments
5833    if (Op.isReg()) {
5834      Op.addRegOperands(Inst, 1);
5835      continue;
5836    }
5837
5838    // Handle optional arguments
5839    OptionalIdx[Op.getImmTy()] = i;
5840  }
5841
5842  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5843  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5844  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5845
5846  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5847}
5848
5849void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5850                                bool IsGdsHardcoded) {
5851  OptionalImmIndexMap OptionalIdx;
5852
5853  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5854    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5855
5856    // Add the register arguments
5857    if (Op.isReg()) {
5858      Op.addRegOperands(Inst, 1);
5859      continue;
5860    }
5861
5862    if (Op.isToken() && Op.getToken() == "gds") {
5863      IsGdsHardcoded = true;
5864      continue;
5865    }
5866
5867    // Handle optional arguments
5868    OptionalIdx[Op.getImmTy()] = i;
5869  }
5870
5871  AMDGPUOperand::ImmTy OffsetType =
5872    (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5873     Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5874     Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5875                                                      AMDGPUOperand::ImmTyOffset;
5876
5877  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5878
5879  if (!IsGdsHardcoded) {
5880    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5881  }
5882  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5883}
5884
5885void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5886  OptionalImmIndexMap OptionalIdx;
5887
5888  unsigned OperandIdx[4];
5889  unsigned EnMask = 0;
5890  int SrcIdx = 0;
5891
5892  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5893    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5894
5895    // Add the register arguments
5896    if (Op.isReg()) {
5897      assert(SrcIdx < 4);
5898      OperandIdx[SrcIdx] = Inst.size();
5899      Op.addRegOperands(Inst, 1);
5900      ++SrcIdx;
5901      continue;
5902    }
5903
5904    if (Op.isOff()) {
5905      assert(SrcIdx < 4);
5906      OperandIdx[SrcIdx] = Inst.size();
5907      Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5908      ++SrcIdx;
5909      continue;
5910    }
5911
5912    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5913      Op.addImmOperands(Inst, 1);
5914      continue;
5915    }
5916
5917    if (Op.isToken() && Op.getToken() == "done")
5918      continue;
5919
5920    // Handle optional arguments
5921    OptionalIdx[Op.getImmTy()] = i;
5922  }
5923
5924  assert(SrcIdx == 4);
5925
5926  bool Compr = false;
5927  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5928    Compr = true;
5929    Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5930    Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5931    Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5932  }
5933
5934  for (auto i = 0; i < SrcIdx; ++i) {
5935    if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5936      EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5937    }
5938  }
5939
5940  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5941  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5942
5943  Inst.addOperand(MCOperand::createImm(EnMask));
5944}
5945
5946//===----------------------------------------------------------------------===//
5947// s_waitcnt
5948//===----------------------------------------------------------------------===//
5949
5950static bool
5951encodeCnt(
5952  const AMDGPU::IsaVersion ISA,
5953  int64_t &IntVal,
5954  int64_t CntVal,
5955  bool Saturate,
5956  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5957  unsigned (*decode)(const IsaVersion &Version, unsigned))
5958{
5959  bool Failed = false;
5960
5961  IntVal = encode(ISA, IntVal, CntVal);
5962  if (CntVal != decode(ISA, IntVal)) {
5963    if (Saturate) {
5964      IntVal = encode(ISA, IntVal, -1);
5965    } else {
5966      Failed = true;
5967    }
5968  }
5969  return Failed;
5970}
5971
5972bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5973
5974  SMLoc CntLoc = getLoc();
5975  StringRef CntName = getTokenStr();
5976
5977  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5978      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5979    return false;
5980
5981  int64_t CntVal;
5982  SMLoc ValLoc = getLoc();
5983  if (!parseExpr(CntVal))
5984    return false;
5985
5986  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5987
5988  bool Failed = true;
5989  bool Sat = CntName.endswith("_sat");
5990
5991  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5992    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5993  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5994    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5995  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5996    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5997  } else {
5998    Error(CntLoc, "invalid counter name " + CntName);
5999    return false;
6000  }
6001
6002  if (Failed) {
6003    Error(ValLoc, "too large value for " + CntName);
6004    return false;
6005  }
6006
6007  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6008    return false;
6009
6010  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6011    if (isToken(AsmToken::EndOfStatement)) {
6012      Error(getLoc(), "expected a counter name");
6013      return false;
6014    }
6015  }
6016
6017  return true;
6018}
6019
6020OperandMatchResultTy
6021AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6022  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6023  int64_t Waitcnt = getWaitcntBitMask(ISA);
6024  SMLoc S = getLoc();
6025
6026  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6027    while (!isToken(AsmToken::EndOfStatement)) {
6028      if (!parseCnt(Waitcnt))
6029        return MatchOperand_ParseFail;
6030    }
6031  } else {
6032    if (!parseExpr(Waitcnt))
6033      return MatchOperand_ParseFail;
6034  }
6035
6036  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6037  return MatchOperand_Success;
6038}
6039
6040bool
6041AMDGPUOperand::isSWaitCnt() const {
6042  return isImm();
6043}
6044
6045//===----------------------------------------------------------------------===//
6046// hwreg
6047//===----------------------------------------------------------------------===//
6048
6049bool
6050AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6051                                OperandInfoTy &Offset,
6052                                OperandInfoTy &Width) {
6053  using namespace llvm::AMDGPU::Hwreg;
6054
6055  // The register may be specified by name or using a numeric code
6056  HwReg.Loc = getLoc();
6057  if (isToken(AsmToken::Identifier) &&
6058      (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6059    HwReg.IsSymbolic = true;
6060    lex(); // skip register name
6061  } else if (!parseExpr(HwReg.Id, "a register name")) {
6062    return false;
6063  }
6064
6065  if (trySkipToken(AsmToken::RParen))
6066    return true;
6067
6068  // parse optional params
6069  if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6070    return false;
6071
6072  Offset.Loc = getLoc();
6073  if (!parseExpr(Offset.Id))
6074    return false;
6075
6076  if (!skipToken(AsmToken::Comma, "expected a comma"))
6077    return false;
6078
6079  Width.Loc = getLoc();
6080  return parseExpr(Width.Id) &&
6081         skipToken(AsmToken::RParen, "expected a closing parenthesis");
6082}
6083
6084bool
6085AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6086                               const OperandInfoTy &Offset,
6087                               const OperandInfoTy &Width) {
6088
6089  using namespace llvm::AMDGPU::Hwreg;
6090
6091  if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6092    Error(HwReg.Loc,
6093          "specified hardware register is not supported on this GPU");
6094    return false;
6095  }
6096  if (!isValidHwreg(HwReg.Id)) {
6097    Error(HwReg.Loc,
6098          "invalid code of hardware register: only 6-bit values are legal");
6099    return false;
6100  }
6101  if (!isValidHwregOffset(Offset.Id)) {
6102    Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6103    return false;
6104  }
6105  if (!isValidHwregWidth(Width.Id)) {
6106    Error(Width.Loc,
6107          "invalid bitfield width: only values from 1 to 32 are legal");
6108    return false;
6109  }
6110  return true;
6111}
6112
6113OperandMatchResultTy
6114AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6115  using namespace llvm::AMDGPU::Hwreg;
6116
6117  int64_t ImmVal = 0;
6118  SMLoc Loc = getLoc();
6119
6120  if (trySkipId("hwreg", AsmToken::LParen)) {
6121    OperandInfoTy HwReg(ID_UNKNOWN_);
6122    OperandInfoTy Offset(OFFSET_DEFAULT_);
6123    OperandInfoTy Width(WIDTH_DEFAULT_);
6124    if (parseHwregBody(HwReg, Offset, Width) &&
6125        validateHwreg(HwReg, Offset, Width)) {
6126      ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6127    } else {
6128      return MatchOperand_ParseFail;
6129    }
6130  } else if (parseExpr(ImmVal, "a hwreg macro")) {
6131    if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6132      Error(Loc, "invalid immediate: only 16-bit values are legal");
6133      return MatchOperand_ParseFail;
6134    }
6135  } else {
6136    return MatchOperand_ParseFail;
6137  }
6138
6139  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6140  return MatchOperand_Success;
6141}
6142
6143bool AMDGPUOperand::isHwreg() const {
6144  return isImmTy(ImmTyHwreg);
6145}
6146
6147//===----------------------------------------------------------------------===//
6148// sendmsg
6149//===----------------------------------------------------------------------===//
6150
6151bool
6152AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6153                                  OperandInfoTy &Op,
6154                                  OperandInfoTy &Stream) {
6155  using namespace llvm::AMDGPU::SendMsg;
6156
6157  Msg.Loc = getLoc();
6158  if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6159    Msg.IsSymbolic = true;
6160    lex(); // skip message name
6161  } else if (!parseExpr(Msg.Id, "a message name")) {
6162    return false;
6163  }
6164
6165  if (trySkipToken(AsmToken::Comma)) {
6166    Op.IsDefined = true;
6167    Op.Loc = getLoc();
6168    if (isToken(AsmToken::Identifier) &&
6169        (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6170      lex(); // skip operation name
6171    } else if (!parseExpr(Op.Id, "an operation name")) {
6172      return false;
6173    }
6174
6175    if (trySkipToken(AsmToken::Comma)) {
6176      Stream.IsDefined = true;
6177      Stream.Loc = getLoc();
6178      if (!parseExpr(Stream.Id))
6179        return false;
6180    }
6181  }
6182
6183  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6184}
6185
6186bool
6187AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6188                                 const OperandInfoTy &Op,
6189                                 const OperandInfoTy &Stream) {
6190  using namespace llvm::AMDGPU::SendMsg;
6191
6192  // Validation strictness depends on whether message is specified
6193  // in a symbolc or in a numeric form. In the latter case
6194  // only encoding possibility is checked.
6195  bool Strict = Msg.IsSymbolic;
6196
6197  if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6198    Error(Msg.Loc, "invalid message id");
6199    return false;
6200  }
6201  if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6202    if (Op.IsDefined) {
6203      Error(Op.Loc, "message does not support operations");
6204    } else {
6205      Error(Msg.Loc, "missing message operation");
6206    }
6207    return false;
6208  }
6209  if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6210    Error(Op.Loc, "invalid operation id");
6211    return false;
6212  }
6213  if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6214    Error(Stream.Loc, "message operation does not support streams");
6215    return false;
6216  }
6217  if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6218    Error(Stream.Loc, "invalid message stream id");
6219    return false;
6220  }
6221  return true;
6222}
6223
6224OperandMatchResultTy
6225AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6226  using namespace llvm::AMDGPU::SendMsg;
6227
6228  int64_t ImmVal = 0;
6229  SMLoc Loc = getLoc();
6230
6231  if (trySkipId("sendmsg", AsmToken::LParen)) {
6232    OperandInfoTy Msg(ID_UNKNOWN_);
6233    OperandInfoTy Op(OP_NONE_);
6234    OperandInfoTy Stream(STREAM_ID_NONE_);
6235    if (parseSendMsgBody(Msg, Op, Stream) &&
6236        validateSendMsg(Msg, Op, Stream)) {
6237      ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6238    } else {
6239      return MatchOperand_ParseFail;
6240    }
6241  } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6242    if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6243      Error(Loc, "invalid immediate: only 16-bit values are legal");
6244      return MatchOperand_ParseFail;
6245    }
6246  } else {
6247    return MatchOperand_ParseFail;
6248  }
6249
6250  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6251  return MatchOperand_Success;
6252}
6253
6254bool AMDGPUOperand::isSendMsg() const {
6255  return isImmTy(ImmTySendMsg);
6256}
6257
6258//===----------------------------------------------------------------------===//
6259// v_interp
6260//===----------------------------------------------------------------------===//
6261
6262OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6263  StringRef Str;
6264  SMLoc S = getLoc();
6265
6266  if (!parseId(Str))
6267    return MatchOperand_NoMatch;
6268
6269  int Slot = StringSwitch<int>(Str)
6270    .Case("p10", 0)
6271    .Case("p20", 1)
6272    .Case("p0", 2)
6273    .Default(-1);
6274
6275  if (Slot == -1) {
6276    Error(S, "invalid interpolation slot");
6277    return MatchOperand_ParseFail;
6278  }
6279
6280  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6281                                              AMDGPUOperand::ImmTyInterpSlot));
6282  return MatchOperand_Success;
6283}
6284
6285OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6286  StringRef Str;
6287  SMLoc S = getLoc();
6288
6289  if (!parseId(Str))
6290    return MatchOperand_NoMatch;
6291
6292  if (!Str.startswith("attr")) {
6293    Error(S, "invalid interpolation attribute");
6294    return MatchOperand_ParseFail;
6295  }
6296
6297  StringRef Chan = Str.take_back(2);
6298  int AttrChan = StringSwitch<int>(Chan)
6299    .Case(".x", 0)
6300    .Case(".y", 1)
6301    .Case(".z", 2)
6302    .Case(".w", 3)
6303    .Default(-1);
6304  if (AttrChan == -1) {
6305    Error(S, "invalid or missing interpolation attribute channel");
6306    return MatchOperand_ParseFail;
6307  }
6308
6309  Str = Str.drop_back(2).drop_front(4);
6310
6311  uint8_t Attr;
6312  if (Str.getAsInteger(10, Attr)) {
6313    Error(S, "invalid or missing interpolation attribute number");
6314    return MatchOperand_ParseFail;
6315  }
6316
6317  if (Attr > 63) {
6318    Error(S, "out of bounds interpolation attribute number");
6319    return MatchOperand_ParseFail;
6320  }
6321
6322  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6323
6324  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6325                                              AMDGPUOperand::ImmTyInterpAttr));
6326  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6327                                              AMDGPUOperand::ImmTyAttrChan));
6328  return MatchOperand_Success;
6329}
6330
6331//===----------------------------------------------------------------------===//
6332// exp
6333//===----------------------------------------------------------------------===//
6334
6335OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6336  using namespace llvm::AMDGPU::Exp;
6337
6338  StringRef Str;
6339  SMLoc S = getLoc();
6340
6341  if (!parseId(Str))
6342    return MatchOperand_NoMatch;
6343
6344  unsigned Id = getTgtId(Str);
6345  if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6346    Error(S, (Id == ET_INVALID) ?
6347                "invalid exp target" :
6348                "exp target is not supported on this GPU");
6349    return MatchOperand_ParseFail;
6350  }
6351
6352  Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6353                                              AMDGPUOperand::ImmTyExpTgt));
6354  return MatchOperand_Success;
6355}
6356
6357//===----------------------------------------------------------------------===//
6358// parser helpers
6359//===----------------------------------------------------------------------===//
6360
6361bool
6362AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6363  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6364}
6365
6366bool
6367AMDGPUAsmParser::isId(const StringRef Id) const {
6368  return isId(getToken(), Id);
6369}
6370
6371bool
6372AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6373  return getTokenKind() == Kind;
6374}
6375
6376bool
6377AMDGPUAsmParser::trySkipId(const StringRef Id) {
6378  if (isId(Id)) {
6379    lex();
6380    return true;
6381  }
6382  return false;
6383}
6384
6385bool
6386AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6387  if (isToken(AsmToken::Identifier)) {
6388    StringRef Tok = getTokenStr();
6389    if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6390      lex();
6391      return true;
6392    }
6393  }
6394  return false;
6395}
6396
6397bool
6398AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6399  if (isId(Id) && peekToken().is(Kind)) {
6400    lex();
6401    lex();
6402    return true;
6403  }
6404  return false;
6405}
6406
6407bool
6408AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6409  if (isToken(Kind)) {
6410    lex();
6411    return true;
6412  }
6413  return false;
6414}
6415
6416bool
6417AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6418                           const StringRef ErrMsg) {
6419  if (!trySkipToken(Kind)) {
6420    Error(getLoc(), ErrMsg);
6421    return false;
6422  }
6423  return true;
6424}
6425
6426bool
6427AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6428  SMLoc S = getLoc();
6429
6430  const MCExpr *Expr;
6431  if (Parser.parseExpression(Expr))
6432    return false;
6433
6434  if (Expr->evaluateAsAbsolute(Imm))
6435    return true;
6436
6437  if (Expected.empty()) {
6438    Error(S, "expected absolute expression");
6439  } else {
6440    Error(S, Twine("expected ", Expected) +
6441             Twine(" or an absolute expression"));
6442  }
6443  return false;
6444}
6445
6446bool
6447AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6448  SMLoc S = getLoc();
6449
6450  const MCExpr *Expr;
6451  if (Parser.parseExpression(Expr))
6452    return false;
6453
6454  int64_t IntVal;
6455  if (Expr->evaluateAsAbsolute(IntVal)) {
6456    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6457  } else {
6458    Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6459  }
6460  return true;
6461}
6462
6463bool
6464AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6465  if (isToken(AsmToken::String)) {
6466    Val = getToken().getStringContents();
6467    lex();
6468    return true;
6469  } else {
6470    Error(getLoc(), ErrMsg);
6471    return false;
6472  }
6473}
6474
6475bool
6476AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6477  if (isToken(AsmToken::Identifier)) {
6478    Val = getTokenStr();
6479    lex();
6480    return true;
6481  } else {
6482    if (!ErrMsg.empty())
6483      Error(getLoc(), ErrMsg);
6484    return false;
6485  }
6486}
6487
6488AsmToken
6489AMDGPUAsmParser::getToken() const {
6490  return Parser.getTok();
6491}
6492
6493AsmToken
6494AMDGPUAsmParser::peekToken() {
6495  return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6496}
6497
6498void
6499AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6500  auto TokCount = getLexer().peekTokens(Tokens);
6501
6502  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6503    Tokens[Idx] = AsmToken(AsmToken::Error, "");
6504}
6505
6506AsmToken::TokenKind
6507AMDGPUAsmParser::getTokenKind() const {
6508  return getLexer().getKind();
6509}
6510
6511SMLoc
6512AMDGPUAsmParser::getLoc() const {
6513  return getToken().getLoc();
6514}
6515
6516StringRef
6517AMDGPUAsmParser::getTokenStr() const {
6518  return getToken().getString();
6519}
6520
6521void
6522AMDGPUAsmParser::lex() {
6523  Parser.Lex();
6524}
6525
6526SMLoc
6527AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6528                               const OperandVector &Operands) const {
6529  for (unsigned i = Operands.size() - 1; i > 0; --i) {
6530    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6531    if (Test(Op))
6532      return Op.getStartLoc();
6533  }
6534  return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6535}
6536
6537SMLoc
6538AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6539                           const OperandVector &Operands) const {
6540  auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6541  return getOperandLoc(Test, Operands);
6542}
6543
6544SMLoc
6545AMDGPUAsmParser::getRegLoc(unsigned Reg,
6546                           const OperandVector &Operands) const {
6547  auto Test = [=](const AMDGPUOperand& Op) {
6548    return Op.isRegKind() && Op.getReg() == Reg;
6549  };
6550  return getOperandLoc(Test, Operands);
6551}
6552
6553SMLoc
6554AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6555  auto Test = [](const AMDGPUOperand& Op) {
6556    return Op.IsImmKindLiteral() || Op.isExpr();
6557  };
6558  return getOperandLoc(Test, Operands);
6559}
6560
6561SMLoc
6562AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6563  auto Test = [](const AMDGPUOperand& Op) {
6564    return Op.isImmKindConst();
6565  };
6566  return getOperandLoc(Test, Operands);
6567}
6568
6569//===----------------------------------------------------------------------===//
6570// swizzle
6571//===----------------------------------------------------------------------===//
6572
6573LLVM_READNONE
6574static unsigned
6575encodeBitmaskPerm(const unsigned AndMask,
6576                  const unsigned OrMask,
6577                  const unsigned XorMask) {
6578  using namespace llvm::AMDGPU::Swizzle;
6579
6580  return BITMASK_PERM_ENC |
6581         (AndMask << BITMASK_AND_SHIFT) |
6582         (OrMask  << BITMASK_OR_SHIFT)  |
6583         (XorMask << BITMASK_XOR_SHIFT);
6584}
6585
6586bool
6587AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6588                                     const unsigned MinVal,
6589                                     const unsigned MaxVal,
6590                                     const StringRef ErrMsg,
6591                                     SMLoc &Loc) {
6592  if (!skipToken(AsmToken::Comma, "expected a comma")) {
6593    return false;
6594  }
6595  Loc = getLoc();
6596  if (!parseExpr(Op)) {
6597    return false;
6598  }
6599  if (Op < MinVal || Op > MaxVal) {
6600    Error(Loc, ErrMsg);
6601    return false;
6602  }
6603
6604  return true;
6605}
6606
6607bool
6608AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6609                                      const unsigned MinVal,
6610                                      const unsigned MaxVal,
6611                                      const StringRef ErrMsg) {
6612  SMLoc Loc;
6613  for (unsigned i = 0; i < OpNum; ++i) {
6614    if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6615      return false;
6616  }
6617
6618  return true;
6619}
6620
6621bool
6622AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6623  using namespace llvm::AMDGPU::Swizzle;
6624
6625  int64_t Lane[LANE_NUM];
6626  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6627                           "expected a 2-bit lane id")) {
6628    Imm = QUAD_PERM_ENC;
6629    for (unsigned I = 0; I < LANE_NUM; ++I) {
6630      Imm |= Lane[I] << (LANE_SHIFT * I);
6631    }
6632    return true;
6633  }
6634  return false;
6635}
6636
6637bool
6638AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6639  using namespace llvm::AMDGPU::Swizzle;
6640
6641  SMLoc Loc;
6642  int64_t GroupSize;
6643  int64_t LaneIdx;
6644
6645  if (!parseSwizzleOperand(GroupSize,
6646                           2, 32,
6647                           "group size must be in the interval [2,32]",
6648                           Loc)) {
6649    return false;
6650  }
6651  if (!isPowerOf2_64(GroupSize)) {
6652    Error(Loc, "group size must be a power of two");
6653    return false;
6654  }
6655  if (parseSwizzleOperand(LaneIdx,
6656                          0, GroupSize - 1,
6657                          "lane id must be in the interval [0,group size - 1]",
6658                          Loc)) {
6659    Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6660    return true;
6661  }
6662  return false;
6663}
6664
6665bool
6666AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6667  using namespace llvm::AMDGPU::Swizzle;
6668
6669  SMLoc Loc;
6670  int64_t GroupSize;
6671
6672  if (!parseSwizzleOperand(GroupSize,
6673                           2, 32,
6674                           "group size must be in the interval [2,32]",
6675                           Loc)) {
6676    return false;
6677  }
6678  if (!isPowerOf2_64(GroupSize)) {
6679    Error(Loc, "group size must be a power of two");
6680    return false;
6681  }
6682
6683  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6684  return true;
6685}
6686
6687bool
6688AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6689  using namespace llvm::AMDGPU::Swizzle;
6690
6691  SMLoc Loc;
6692  int64_t GroupSize;
6693
6694  if (!parseSwizzleOperand(GroupSize,
6695                           1, 16,
6696                           "group size must be in the interval [1,16]",
6697                           Loc)) {
6698    return false;
6699  }
6700  if (!isPowerOf2_64(GroupSize)) {
6701    Error(Loc, "group size must be a power of two");
6702    return false;
6703  }
6704
6705  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6706  return true;
6707}
6708
6709bool
6710AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6711  using namespace llvm::AMDGPU::Swizzle;
6712
6713  if (!skipToken(AsmToken::Comma, "expected a comma")) {
6714    return false;
6715  }
6716
6717  StringRef Ctl;
6718  SMLoc StrLoc = getLoc();
6719  if (!parseString(Ctl)) {
6720    return false;
6721  }
6722  if (Ctl.size() != BITMASK_WIDTH) {
6723    Error(StrLoc, "expected a 5-character mask");
6724    return false;
6725  }
6726
6727  unsigned AndMask = 0;
6728  unsigned OrMask = 0;
6729  unsigned XorMask = 0;
6730
6731  for (size_t i = 0; i < Ctl.size(); ++i) {
6732    unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6733    switch(Ctl[i]) {
6734    default:
6735      Error(StrLoc, "invalid mask");
6736      return false;
6737    case '0':
6738      break;
6739    case '1':
6740      OrMask |= Mask;
6741      break;
6742    case 'p':
6743      AndMask |= Mask;
6744      break;
6745    case 'i':
6746      AndMask |= Mask;
6747      XorMask |= Mask;
6748      break;
6749    }
6750  }
6751
6752  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6753  return true;
6754}
6755
6756bool
6757AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6758
6759  SMLoc OffsetLoc = getLoc();
6760
6761  if (!parseExpr(Imm, "a swizzle macro")) {
6762    return false;
6763  }
6764  if (!isUInt<16>(Imm)) {
6765    Error(OffsetLoc, "expected a 16-bit offset");
6766    return false;
6767  }
6768  return true;
6769}
6770
6771bool
6772AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6773  using namespace llvm::AMDGPU::Swizzle;
6774
6775  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6776
6777    SMLoc ModeLoc = getLoc();
6778    bool Ok = false;
6779
6780    if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6781      Ok = parseSwizzleQuadPerm(Imm);
6782    } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6783      Ok = parseSwizzleBitmaskPerm(Imm);
6784    } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6785      Ok = parseSwizzleBroadcast(Imm);
6786    } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6787      Ok = parseSwizzleSwap(Imm);
6788    } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6789      Ok = parseSwizzleReverse(Imm);
6790    } else {
6791      Error(ModeLoc, "expected a swizzle mode");
6792    }
6793
6794    return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6795  }
6796
6797  return false;
6798}
6799
6800OperandMatchResultTy
6801AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6802  SMLoc S = getLoc();
6803  int64_t Imm = 0;
6804
6805  if (trySkipId("offset")) {
6806
6807    bool Ok = false;
6808    if (skipToken(AsmToken::Colon, "expected a colon")) {
6809      if (trySkipId("swizzle")) {
6810        Ok = parseSwizzleMacro(Imm);
6811      } else {
6812        Ok = parseSwizzleOffset(Imm);
6813      }
6814    }
6815
6816    Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6817
6818    return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6819  } else {
6820    // Swizzle "offset" operand is optional.
6821    // If it is omitted, try parsing other optional operands.
6822    return parseOptionalOpr(Operands);
6823  }
6824}
6825
6826bool
6827AMDGPUOperand::isSwizzle() const {
6828  return isImmTy(ImmTySwizzle);
6829}
6830
6831//===----------------------------------------------------------------------===//
6832// VGPR Index Mode
6833//===----------------------------------------------------------------------===//
6834
6835int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6836
6837  using namespace llvm::AMDGPU::VGPRIndexMode;
6838
6839  if (trySkipToken(AsmToken::RParen)) {
6840    return OFF;
6841  }
6842
6843  int64_t Imm = 0;
6844
6845  while (true) {
6846    unsigned Mode = 0;
6847    SMLoc S = getLoc();
6848
6849    for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6850      if (trySkipId(IdSymbolic[ModeId])) {
6851        Mode = 1 << ModeId;
6852        break;
6853      }
6854    }
6855
6856    if (Mode == 0) {
6857      Error(S, (Imm == 0)?
6858               "expected a VGPR index mode or a closing parenthesis" :
6859               "expected a VGPR index mode");
6860      return UNDEF;
6861    }
6862
6863    if (Imm & Mode) {
6864      Error(S, "duplicate VGPR index mode");
6865      return UNDEF;
6866    }
6867    Imm |= Mode;
6868
6869    if (trySkipToken(AsmToken::RParen))
6870      break;
6871    if (!skipToken(AsmToken::Comma,
6872                   "expected a comma or a closing parenthesis"))
6873      return UNDEF;
6874  }
6875
6876  return Imm;
6877}
6878
6879OperandMatchResultTy
6880AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6881
6882  using namespace llvm::AMDGPU::VGPRIndexMode;
6883
6884  int64_t Imm = 0;
6885  SMLoc S = getLoc();
6886
6887  if (trySkipId("gpr_idx", AsmToken::LParen)) {
6888    Imm = parseGPRIdxMacro();
6889    if (Imm == UNDEF)
6890      return MatchOperand_ParseFail;
6891  } else {
6892    if (getParser().parseAbsoluteExpression(Imm))
6893      return MatchOperand_ParseFail;
6894    if (Imm < 0 || !isUInt<4>(Imm)) {
6895      Error(S, "invalid immediate: only 4-bit values are legal");
6896      return MatchOperand_ParseFail;
6897    }
6898  }
6899
6900  Operands.push_back(
6901      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6902  return MatchOperand_Success;
6903}
6904
6905bool AMDGPUOperand::isGPRIdxMode() const {
6906  return isImmTy(ImmTyGprIdxMode);
6907}
6908
6909//===----------------------------------------------------------------------===//
6910// sopp branch targets
6911//===----------------------------------------------------------------------===//
6912
6913OperandMatchResultTy
6914AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6915
6916  // Make sure we are not parsing something
6917  // that looks like a label or an expression but is not.
6918  // This will improve error messages.
6919  if (isRegister() || isModifier())
6920    return MatchOperand_NoMatch;
6921
6922  if (!parseExpr(Operands))
6923    return MatchOperand_ParseFail;
6924
6925  AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6926  assert(Opr.isImm() || Opr.isExpr());
6927  SMLoc Loc = Opr.getStartLoc();
6928
6929  // Currently we do not support arbitrary expressions as branch targets.
6930  // Only labels and absolute expressions are accepted.
6931  if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6932    Error(Loc, "expected an absolute expression or a label");
6933  } else if (Opr.isImm() && !Opr.isS16Imm()) {
6934    Error(Loc, "expected a 16-bit signed jump offset");
6935  }
6936
6937  return MatchOperand_Success;
6938}
6939
6940//===----------------------------------------------------------------------===//
6941// Boolean holding registers
6942//===----------------------------------------------------------------------===//
6943
6944OperandMatchResultTy
6945AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6946  return parseReg(Operands);
6947}
6948
6949//===----------------------------------------------------------------------===//
6950// mubuf
6951//===----------------------------------------------------------------------===//
6952
6953AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
6954  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
6955}
6956
6957void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6958                                   const OperandVector &Operands,
6959                                   bool IsAtomic,
6960                                   bool IsLds) {
6961  bool IsLdsOpcode = IsLds;
6962  bool HasLdsModifier = false;
6963  OptionalImmIndexMap OptionalIdx;
6964  unsigned FirstOperandIdx = 1;
6965  bool IsAtomicReturn = false;
6966
6967  if (IsAtomic) {
6968    for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6969      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6970      if (!Op.isCPol())
6971        continue;
6972      IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
6973      break;
6974    }
6975
6976    if (!IsAtomicReturn) {
6977      int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
6978      if (NewOpc != -1)
6979        Inst.setOpcode(NewOpc);
6980    }
6981
6982    IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
6983                      SIInstrFlags::IsAtomicRet;
6984  }
6985
6986  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6987    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6988
6989    // Add the register arguments
6990    if (Op.isReg()) {
6991      Op.addRegOperands(Inst, 1);
6992      // Insert a tied src for atomic return dst.
6993      // This cannot be postponed as subsequent calls to
6994      // addImmOperands rely on correct number of MC operands.
6995      if (IsAtomicReturn && i == FirstOperandIdx)
6996        Op.addRegOperands(Inst, 1);
6997      continue;
6998    }
6999
7000    // Handle the case where soffset is an immediate
7001    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7002      Op.addImmOperands(Inst, 1);
7003      continue;
7004    }
7005
7006    HasLdsModifier |= Op.isLDS();
7007
7008    // Handle tokens like 'offen' which are sometimes hard-coded into the
7009    // asm string.  There are no MCInst operands for these.
7010    if (Op.isToken()) {
7011      continue;
7012    }
7013    assert(Op.isImm());
7014
7015    // Handle optional arguments
7016    OptionalIdx[Op.getImmTy()] = i;
7017  }
7018
7019  // This is a workaround for an llvm quirk which may result in an
7020  // incorrect instruction selection. Lds and non-lds versions of
7021  // MUBUF instructions are identical except that lds versions
7022  // have mandatory 'lds' modifier. However this modifier follows
7023  // optional modifiers and llvm asm matcher regards this 'lds'
7024  // modifier as an optional one. As a result, an lds version
7025  // of opcode may be selected even if it has no 'lds' modifier.
7026  if (IsLdsOpcode && !HasLdsModifier) {
7027    int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7028    if (NoLdsOpcode != -1) { // Got lds version - correct it.
7029      Inst.setOpcode(NoLdsOpcode);
7030      IsLdsOpcode = false;
7031    }
7032  }
7033
7034  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7035  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7036
7037  if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7038    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7039  }
7040  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7041}
7042
7043void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7044  OptionalImmIndexMap OptionalIdx;
7045
7046  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7047    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7048
7049    // Add the register arguments
7050    if (Op.isReg()) {
7051      Op.addRegOperands(Inst, 1);
7052      continue;
7053    }
7054
7055    // Handle the case where soffset is an immediate
7056    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7057      Op.addImmOperands(Inst, 1);
7058      continue;
7059    }
7060
7061    // Handle tokens like 'offen' which are sometimes hard-coded into the
7062    // asm string.  There are no MCInst operands for these.
7063    if (Op.isToken()) {
7064      continue;
7065    }
7066    assert(Op.isImm());
7067
7068    // Handle optional arguments
7069    OptionalIdx[Op.getImmTy()] = i;
7070  }
7071
7072  addOptionalImmOperand(Inst, Operands, OptionalIdx,
7073                        AMDGPUOperand::ImmTyOffset);
7074  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7075  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7076  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7077  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7078}
7079
7080//===----------------------------------------------------------------------===//
7081// mimg
7082//===----------------------------------------------------------------------===//
7083
7084void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7085                              bool IsAtomic) {
7086  unsigned I = 1;
7087  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7088  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7089    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7090  }
7091
7092  if (IsAtomic) {
7093    // Add src, same as dst
7094    assert(Desc.getNumDefs() == 1);
7095    ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7096  }
7097
7098  OptionalImmIndexMap OptionalIdx;
7099
7100  for (unsigned E = Operands.size(); I != E; ++I) {
7101    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7102
7103    // Add the register arguments
7104    if (Op.isReg()) {
7105      Op.addRegOperands(Inst, 1);
7106    } else if (Op.isImmModifier()) {
7107      OptionalIdx[Op.getImmTy()] = I;
7108    } else if (!Op.isToken()) {
7109      llvm_unreachable("unexpected operand type");
7110    }
7111  }
7112
7113  bool IsGFX10Plus = isGFX10Plus();
7114
7115  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7116  if (IsGFX10Plus)
7117    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7118  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7119  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7120  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7121  if (IsGFX10Plus)
7122    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7123  if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7124    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7125  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7126  if (!IsGFX10Plus)
7127    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7128  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7129}
7130
7131void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7132  cvtMIMG(Inst, Operands, true);
7133}
7134
7135void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7136  OptionalImmIndexMap OptionalIdx;
7137  bool IsAtomicReturn = false;
7138
7139  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7140    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7141    if (!Op.isCPol())
7142      continue;
7143    IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7144    break;
7145  }
7146
7147  if (!IsAtomicReturn) {
7148    int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7149    if (NewOpc != -1)
7150      Inst.setOpcode(NewOpc);
7151  }
7152
7153  IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7154                    SIInstrFlags::IsAtomicRet;
7155
7156  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7157    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7158
7159    // Add the register arguments
7160    if (Op.isReg()) {
7161      Op.addRegOperands(Inst, 1);
7162      if (IsAtomicReturn && i == 1)
7163        Op.addRegOperands(Inst, 1);
7164      continue;
7165    }
7166
7167    // Handle the case where soffset is an immediate
7168    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7169      Op.addImmOperands(Inst, 1);
7170      continue;
7171    }
7172
7173    // Handle tokens like 'offen' which are sometimes hard-coded into the
7174    // asm string.  There are no MCInst operands for these.
7175    if (Op.isToken()) {
7176      continue;
7177    }
7178    assert(Op.isImm());
7179
7180    // Handle optional arguments
7181    OptionalIdx[Op.getImmTy()] = i;
7182  }
7183
7184  if ((int)Inst.getNumOperands() <=
7185      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7186    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7187  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7188}
7189
7190void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7191                                      const OperandVector &Operands) {
7192  for (unsigned I = 1; I < Operands.size(); ++I) {
7193    auto &Operand = (AMDGPUOperand &)*Operands[I];
7194    if (Operand.isReg())
7195      Operand.addRegOperands(Inst, 1);
7196  }
7197
7198  Inst.addOperand(MCOperand::createImm(1)); // a16
7199}
7200
7201//===----------------------------------------------------------------------===//
7202// smrd
7203//===----------------------------------------------------------------------===//
7204
7205bool AMDGPUOperand::isSMRDOffset8() const {
7206  return isImm() && isUInt<8>(getImm());
7207}
7208
7209bool AMDGPUOperand::isSMEMOffset() const {
7210  return isImm(); // Offset range is checked later by validator.
7211}
7212
7213bool AMDGPUOperand::isSMRDLiteralOffset() const {
7214  // 32-bit literals are only supported on CI and we only want to use them
7215  // when the offset is > 8-bits.
7216  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7217}
7218
7219AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7220  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7221}
7222
7223AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7224  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7225}
7226
7227AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7228  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7229}
7230
7231AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7232  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7233}
7234
7235//===----------------------------------------------------------------------===//
7236// vop3
7237//===----------------------------------------------------------------------===//
7238
7239static bool ConvertOmodMul(int64_t &Mul) {
7240  if (Mul != 1 && Mul != 2 && Mul != 4)
7241    return false;
7242
7243  Mul >>= 1;
7244  return true;
7245}
7246
7247static bool ConvertOmodDiv(int64_t &Div) {
7248  if (Div == 1) {
7249    Div = 0;
7250    return true;
7251  }
7252
7253  if (Div == 2) {
7254    Div = 3;
7255    return true;
7256  }
7257
7258  return false;
7259}
7260
7261// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7262// This is intentional and ensures compatibility with sp3.
7263// See bug 35397 for details.
7264static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7265  if (BoundCtrl == 0 || BoundCtrl == 1) {
7266    BoundCtrl = 1;
7267    return true;
7268  }
7269  return false;
7270}
7271
7272// Note: the order in this table matches the order of operands in AsmString.
7273static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7274  {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7275  {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7276  {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7277  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7278  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7279  {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7280  {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7281  {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7282  {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7283  {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7284  {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7285  {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7286  {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7287  {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7288  {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7289  {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7290  {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7291  {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7292  {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7293  {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7294  {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7295  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7296  {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7297  {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7298  {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7299  {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7300  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7301  {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
7302  {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7303  {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7304  {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7305  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7306  {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7307  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7308  {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7309  {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7310  {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7311  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7312  {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7313  {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7314  {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7315};
7316
7317void AMDGPUAsmParser::onBeginOfFile() {
7318  if (!getParser().getStreamer().getTargetStreamer() ||
7319      getSTI().getTargetTriple().getArch() == Triple::r600)
7320    return;
7321
7322  if (!getTargetStreamer().getTargetID())
7323    getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7324
7325  if (isHsaAbiVersion3Or4(&getSTI()))
7326    getTargetStreamer().EmitDirectiveAMDGCNTarget();
7327}
7328
7329OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7330
7331  OperandMatchResultTy res = parseOptionalOpr(Operands);
7332
7333  // This is a hack to enable hardcoded mandatory operands which follow
7334  // optional operands.
7335  //
7336  // Current design assumes that all operands after the first optional operand
7337  // are also optional. However implementation of some instructions violates
7338  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7339  //
7340  // To alleviate this problem, we have to (implicitly) parse extra operands
7341  // to make sure autogenerated parser of custom operands never hit hardcoded
7342  // mandatory operands.
7343
7344  for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7345    if (res != MatchOperand_Success ||
7346        isToken(AsmToken::EndOfStatement))
7347      break;
7348
7349    trySkipToken(AsmToken::Comma);
7350    res = parseOptionalOpr(Operands);
7351  }
7352
7353  return res;
7354}
7355
7356OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7357  OperandMatchResultTy res;
7358  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7359    // try to parse any optional operand here
7360    if (Op.IsBit) {
7361      res = parseNamedBit(Op.Name, Operands, Op.Type);
7362    } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7363      res = parseOModOperand(Operands);
7364    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7365               Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7366               Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7367      res = parseSDWASel(Operands, Op.Name, Op.Type);
7368    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7369      res = parseSDWADstUnused(Operands);
7370    } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7371               Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7372               Op.Type == AMDGPUOperand::ImmTyNegLo ||
7373               Op.Type == AMDGPUOperand::ImmTyNegHi) {
7374      res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7375                                        Op.ConvertResult);
7376    } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7377      res = parseDim(Operands);
7378    } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7379      res = parseCPol(Operands);
7380    } else {
7381      res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7382    }
7383    if (res != MatchOperand_NoMatch) {
7384      return res;
7385    }
7386  }
7387  return MatchOperand_NoMatch;
7388}
7389
7390OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7391  StringRef Name = getTokenStr();
7392  if (Name == "mul") {
7393    return parseIntWithPrefix("mul", Operands,
7394                              AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7395  }
7396
7397  if (Name == "div") {
7398    return parseIntWithPrefix("div", Operands,
7399                              AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7400  }
7401
7402  return MatchOperand_NoMatch;
7403}
7404
7405void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7406  cvtVOP3P(Inst, Operands);
7407
7408  int Opc = Inst.getOpcode();
7409
7410  int SrcNum;
7411  const int Ops[] = { AMDGPU::OpName::src0,
7412                      AMDGPU::OpName::src1,
7413                      AMDGPU::OpName::src2 };
7414  for (SrcNum = 0;
7415       SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7416       ++SrcNum);
7417  assert(SrcNum > 0);
7418
7419  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7420  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7421
7422  if ((OpSel & (1 << SrcNum)) != 0) {
7423    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7424    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7425    Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7426  }
7427}
7428
7429static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7430      // 1. This operand is input modifiers
7431  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7432      // 2. This is not last operand
7433      && Desc.NumOperands > (OpNum + 1)
7434      // 3. Next operand is register class
7435      && Desc.OpInfo[OpNum + 1].RegClass != -1
7436      // 4. Next register is not tied to any other operand
7437      && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7438}
7439
7440void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7441{
7442  OptionalImmIndexMap OptionalIdx;
7443  unsigned Opc = Inst.getOpcode();
7444
7445  unsigned I = 1;
7446  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7447  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7448    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7449  }
7450
7451  for (unsigned E = Operands.size(); I != E; ++I) {
7452    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7453    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7454      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7455    } else if (Op.isInterpSlot() ||
7456               Op.isInterpAttr() ||
7457               Op.isAttrChan()) {
7458      Inst.addOperand(MCOperand::createImm(Op.getImm()));
7459    } else if (Op.isImmModifier()) {
7460      OptionalIdx[Op.getImmTy()] = I;
7461    } else {
7462      llvm_unreachable("unhandled operand type");
7463    }
7464  }
7465
7466  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7467    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7468  }
7469
7470  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7471    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7472  }
7473
7474  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7475    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7476  }
7477}
7478
7479void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7480                              OptionalImmIndexMap &OptionalIdx) {
7481  unsigned Opc = Inst.getOpcode();
7482
7483  unsigned I = 1;
7484  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7485  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7486    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7487  }
7488
7489  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7490    // This instruction has src modifiers
7491    for (unsigned E = Operands.size(); I != E; ++I) {
7492      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7493      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7494        Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7495      } else if (Op.isImmModifier()) {
7496        OptionalIdx[Op.getImmTy()] = I;
7497      } else if (Op.isRegOrImm()) {
7498        Op.addRegOrImmOperands(Inst, 1);
7499      } else {
7500        llvm_unreachable("unhandled operand type");
7501      }
7502    }
7503  } else {
7504    // No src modifiers
7505    for (unsigned E = Operands.size(); I != E; ++I) {
7506      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7507      if (Op.isMod()) {
7508        OptionalIdx[Op.getImmTy()] = I;
7509      } else {
7510        Op.addRegOrImmOperands(Inst, 1);
7511      }
7512    }
7513  }
7514
7515  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7516    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7517  }
7518
7519  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7520    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7521  }
7522
7523  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7524  // it has src2 register operand that is tied to dst operand
7525  // we don't allow modifiers for this operand in assembler so src2_modifiers
7526  // should be 0.
7527  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7528      Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7529      Opc == AMDGPU::V_MAC_F32_e64_vi ||
7530      Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7531      Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7532      Opc == AMDGPU::V_MAC_F16_e64_vi ||
7533      Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7534      Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7535      Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7536      Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7537      Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7538    auto it = Inst.begin();
7539    std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7540    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7541    ++it;
7542    // Copy the operand to ensure it's not invalidated when Inst grows.
7543    Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7544  }
7545}
7546
7547void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7548  OptionalImmIndexMap OptionalIdx;
7549  cvtVOP3(Inst, Operands, OptionalIdx);
7550}
7551
7552void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7553                               OptionalImmIndexMap &OptIdx) {
7554  const int Opc = Inst.getOpcode();
7555  const MCInstrDesc &Desc = MII.get(Opc);
7556
7557  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7558
7559  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7560    assert(!IsPacked);
7561    Inst.addOperand(Inst.getOperand(0));
7562  }
7563
7564  // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7565  // instruction, and then figure out where to actually put the modifiers
7566
7567  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7568  if (OpSelIdx != -1) {
7569    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7570  }
7571
7572  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7573  if (OpSelHiIdx != -1) {
7574    int DefaultVal = IsPacked ? -1 : 0;
7575    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7576                          DefaultVal);
7577  }
7578
7579  int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7580  if (NegLoIdx != -1) {
7581    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7582    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7583  }
7584
7585  const int Ops[] = { AMDGPU::OpName::src0,
7586                      AMDGPU::OpName::src1,
7587                      AMDGPU::OpName::src2 };
7588  const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7589                         AMDGPU::OpName::src1_modifiers,
7590                         AMDGPU::OpName::src2_modifiers };
7591
7592  unsigned OpSel = 0;
7593  unsigned OpSelHi = 0;
7594  unsigned NegLo = 0;
7595  unsigned NegHi = 0;
7596
7597  if (OpSelIdx != -1)
7598    OpSel = Inst.getOperand(OpSelIdx).getImm();
7599
7600  if (OpSelHiIdx != -1)
7601    OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7602
7603  if (NegLoIdx != -1) {
7604    int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7605    NegLo = Inst.getOperand(NegLoIdx).getImm();
7606    NegHi = Inst.getOperand(NegHiIdx).getImm();
7607  }
7608
7609  for (int J = 0; J < 3; ++J) {
7610    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7611    if (OpIdx == -1)
7612      break;
7613
7614    uint32_t ModVal = 0;
7615
7616    if ((OpSel & (1 << J)) != 0)
7617      ModVal |= SISrcMods::OP_SEL_0;
7618
7619    if ((OpSelHi & (1 << J)) != 0)
7620      ModVal |= SISrcMods::OP_SEL_1;
7621
7622    if ((NegLo & (1 << J)) != 0)
7623      ModVal |= SISrcMods::NEG;
7624
7625    if ((NegHi & (1 << J)) != 0)
7626      ModVal |= SISrcMods::NEG_HI;
7627
7628    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7629
7630    Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7631  }
7632}
7633
7634void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7635  OptionalImmIndexMap OptIdx;
7636  cvtVOP3(Inst, Operands, OptIdx);
7637  cvtVOP3P(Inst, Operands, OptIdx);
7638}
7639
7640//===----------------------------------------------------------------------===//
7641// dpp
7642//===----------------------------------------------------------------------===//
7643
7644bool AMDGPUOperand::isDPP8() const {
7645  return isImmTy(ImmTyDPP8);
7646}
7647
7648bool AMDGPUOperand::isDPPCtrl() const {
7649  using namespace AMDGPU::DPP;
7650
7651  bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7652  if (result) {
7653    int64_t Imm = getImm();
7654    return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7655           (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7656           (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7657           (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7658           (Imm == DppCtrl::WAVE_SHL1) ||
7659           (Imm == DppCtrl::WAVE_ROL1) ||
7660           (Imm == DppCtrl::WAVE_SHR1) ||
7661           (Imm == DppCtrl::WAVE_ROR1) ||
7662           (Imm == DppCtrl::ROW_MIRROR) ||
7663           (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7664           (Imm == DppCtrl::BCAST15) ||
7665           (Imm == DppCtrl::BCAST31) ||
7666           (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7667           (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7668  }
7669  return false;
7670}
7671
7672//===----------------------------------------------------------------------===//
7673// mAI
7674//===----------------------------------------------------------------------===//
7675
7676bool AMDGPUOperand::isBLGP() const {
7677  return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7678}
7679
7680bool AMDGPUOperand::isCBSZ() const {
7681  return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7682}
7683
7684bool AMDGPUOperand::isABID() const {
7685  return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7686}
7687
7688bool AMDGPUOperand::isS16Imm() const {
7689  return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7690}
7691
7692bool AMDGPUOperand::isU16Imm() const {
7693  return isImm() && isUInt<16>(getImm());
7694}
7695
7696//===----------------------------------------------------------------------===//
7697// dim
7698//===----------------------------------------------------------------------===//
7699
7700bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7701  // We want to allow "dim:1D" etc.,
7702  // but the initial 1 is tokenized as an integer.
7703  std::string Token;
7704  if (isToken(AsmToken::Integer)) {
7705    SMLoc Loc = getToken().getEndLoc();
7706    Token = std::string(getTokenStr());
7707    lex();
7708    if (getLoc() != Loc)
7709      return false;
7710  }
7711
7712  StringRef Suffix;
7713  if (!parseId(Suffix))
7714    return false;
7715  Token += Suffix;
7716
7717  StringRef DimId = Token;
7718  if (DimId.startswith("SQ_RSRC_IMG_"))
7719    DimId = DimId.drop_front(12);
7720
7721  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7722  if (!DimInfo)
7723    return false;
7724
7725  Encoding = DimInfo->Encoding;
7726  return true;
7727}
7728
7729OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7730  if (!isGFX10Plus())
7731    return MatchOperand_NoMatch;
7732
7733  SMLoc S = getLoc();
7734
7735  if (!trySkipId("dim", AsmToken::Colon))
7736    return MatchOperand_NoMatch;
7737
7738  unsigned Encoding;
7739  SMLoc Loc = getLoc();
7740  if (!parseDimId(Encoding)) {
7741    Error(Loc, "invalid dim value");
7742    return MatchOperand_ParseFail;
7743  }
7744
7745  Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7746                                              AMDGPUOperand::ImmTyDim));
7747  return MatchOperand_Success;
7748}
7749
7750//===----------------------------------------------------------------------===//
7751// dpp
7752//===----------------------------------------------------------------------===//
7753
7754OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7755  SMLoc S = getLoc();
7756
7757  if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7758    return MatchOperand_NoMatch;
7759
7760  // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7761
7762  int64_t Sels[8];
7763
7764  if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7765    return MatchOperand_ParseFail;
7766
7767  for (size_t i = 0; i < 8; ++i) {
7768    if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7769      return MatchOperand_ParseFail;
7770
7771    SMLoc Loc = getLoc();
7772    if (getParser().parseAbsoluteExpression(Sels[i]))
7773      return MatchOperand_ParseFail;
7774    if (0 > Sels[i] || 7 < Sels[i]) {
7775      Error(Loc, "expected a 3-bit value");
7776      return MatchOperand_ParseFail;
7777    }
7778  }
7779
7780  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7781    return MatchOperand_ParseFail;
7782
7783  unsigned DPP8 = 0;
7784  for (size_t i = 0; i < 8; ++i)
7785    DPP8 |= (Sels[i] << (i * 3));
7786
7787  Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7788  return MatchOperand_Success;
7789}
7790
7791bool
7792AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7793                                    const OperandVector &Operands) {
7794  if (Ctrl == "row_newbcast")
7795    return isGFX90A();
7796
7797  if (Ctrl == "row_share" ||
7798      Ctrl == "row_xmask")
7799    return isGFX10Plus();
7800
7801  if (Ctrl == "wave_shl" ||
7802      Ctrl == "wave_shr" ||
7803      Ctrl == "wave_rol" ||
7804      Ctrl == "wave_ror" ||
7805      Ctrl == "row_bcast")
7806    return isVI() || isGFX9();
7807
7808  return Ctrl == "row_mirror" ||
7809         Ctrl == "row_half_mirror" ||
7810         Ctrl == "quad_perm" ||
7811         Ctrl == "row_shl" ||
7812         Ctrl == "row_shr" ||
7813         Ctrl == "row_ror";
7814}
7815
7816int64_t
7817AMDGPUAsmParser::parseDPPCtrlPerm() {
7818  // quad_perm:[%d,%d,%d,%d]
7819
7820  if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7821    return -1;
7822
7823  int64_t Val = 0;
7824  for (int i = 0; i < 4; ++i) {
7825    if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7826      return -1;
7827
7828    int64_t Temp;
7829    SMLoc Loc = getLoc();
7830    if (getParser().parseAbsoluteExpression(Temp))
7831      return -1;
7832    if (Temp < 0 || Temp > 3) {
7833      Error(Loc, "expected a 2-bit value");
7834      return -1;
7835    }
7836
7837    Val += (Temp << i * 2);
7838  }
7839
7840  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7841    return -1;
7842
7843  return Val;
7844}
7845
7846int64_t
7847AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7848  using namespace AMDGPU::DPP;
7849
7850  // sel:%d
7851
7852  int64_t Val;
7853  SMLoc Loc = getLoc();
7854
7855  if (getParser().parseAbsoluteExpression(Val))
7856    return -1;
7857
7858  struct DppCtrlCheck {
7859    int64_t Ctrl;
7860    int Lo;
7861    int Hi;
7862  };
7863
7864  DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7865    .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
7866    .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
7867    .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
7868    .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
7869    .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
7870    .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
7871    .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
7872    .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7873    .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7874    .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7875    .Default({-1, 0, 0});
7876
7877  bool Valid;
7878  if (Check.Ctrl == -1) {
7879    Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7880    Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7881  } else {
7882    Valid = Check.Lo <= Val && Val <= Check.Hi;
7883    Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7884  }
7885
7886  if (!Valid) {
7887    Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7888    return -1;
7889  }
7890
7891  return Val;
7892}
7893
7894OperandMatchResultTy
7895AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7896  using namespace AMDGPU::DPP;
7897
7898  if (!isToken(AsmToken::Identifier) ||
7899      !isSupportedDPPCtrl(getTokenStr(), Operands))
7900    return MatchOperand_NoMatch;
7901
7902  SMLoc S = getLoc();
7903  int64_t Val = -1;
7904  StringRef Ctrl;
7905
7906  parseId(Ctrl);
7907
7908  if (Ctrl == "row_mirror") {
7909    Val = DppCtrl::ROW_MIRROR;
7910  } else if (Ctrl == "row_half_mirror") {
7911    Val = DppCtrl::ROW_HALF_MIRROR;
7912  } else {
7913    if (skipToken(AsmToken::Colon, "expected a colon")) {
7914      if (Ctrl == "quad_perm") {
7915        Val = parseDPPCtrlPerm();
7916      } else {
7917        Val = parseDPPCtrlSel(Ctrl);
7918      }
7919    }
7920  }
7921
7922  if (Val == -1)
7923    return MatchOperand_ParseFail;
7924
7925  Operands.push_back(
7926    AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7927  return MatchOperand_Success;
7928}
7929
7930AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7931  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7932}
7933
7934AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7935  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7936}
7937
7938AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7939  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7940}
7941
7942AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7943  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7944}
7945
7946AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7947  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7948}
7949
7950void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7951  OptionalImmIndexMap OptionalIdx;
7952
7953  unsigned I = 1;
7954  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7955  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7956    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7957  }
7958
7959  int Fi = 0;
7960  for (unsigned E = Operands.size(); I != E; ++I) {
7961    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7962                                            MCOI::TIED_TO);
7963    if (TiedTo != -1) {
7964      assert((unsigned)TiedTo < Inst.getNumOperands());
7965      // handle tied old or src2 for MAC instructions
7966      Inst.addOperand(Inst.getOperand(TiedTo));
7967    }
7968    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7969    // Add the register arguments
7970    if (Op.isReg() && validateVccOperand(Op.getReg())) {
7971      // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7972      // Skip it.
7973      continue;
7974    }
7975
7976    if (IsDPP8) {
7977      if (Op.isDPP8()) {
7978        Op.addImmOperands(Inst, 1);
7979      } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7980        Op.addRegWithFPInputModsOperands(Inst, 2);
7981      } else if (Op.isFI()) {
7982        Fi = Op.getImm();
7983      } else if (Op.isReg()) {
7984        Op.addRegOperands(Inst, 1);
7985      } else {
7986        llvm_unreachable("Invalid operand type");
7987      }
7988    } else {
7989      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7990        Op.addRegWithFPInputModsOperands(Inst, 2);
7991      } else if (Op.isDPPCtrl()) {
7992        Op.addImmOperands(Inst, 1);
7993      } else if (Op.isImm()) {
7994        // Handle optional arguments
7995        OptionalIdx[Op.getImmTy()] = I;
7996      } else {
7997        llvm_unreachable("Invalid operand type");
7998      }
7999    }
8000  }
8001
8002  if (IsDPP8) {
8003    using namespace llvm::AMDGPU::DPP;
8004    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8005  } else {
8006    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8007    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8008    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8009    if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8010      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8011    }
8012  }
8013}
8014
8015//===----------------------------------------------------------------------===//
8016// sdwa
8017//===----------------------------------------------------------------------===//
8018
8019OperandMatchResultTy
8020AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8021                              AMDGPUOperand::ImmTy Type) {
8022  using namespace llvm::AMDGPU::SDWA;
8023
8024  SMLoc S = getLoc();
8025  StringRef Value;
8026  OperandMatchResultTy res;
8027
8028  SMLoc StringLoc;
8029  res = parseStringWithPrefix(Prefix, Value, StringLoc);
8030  if (res != MatchOperand_Success) {
8031    return res;
8032  }
8033
8034  int64_t Int;
8035  Int = StringSwitch<int64_t>(Value)
8036        .Case("BYTE_0", SdwaSel::BYTE_0)
8037        .Case("BYTE_1", SdwaSel::BYTE_1)
8038        .Case("BYTE_2", SdwaSel::BYTE_2)
8039        .Case("BYTE_3", SdwaSel::BYTE_3)
8040        .Case("WORD_0", SdwaSel::WORD_0)
8041        .Case("WORD_1", SdwaSel::WORD_1)
8042        .Case("DWORD", SdwaSel::DWORD)
8043        .Default(0xffffffff);
8044
8045  if (Int == 0xffffffff) {
8046    Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8047    return MatchOperand_ParseFail;
8048  }
8049
8050  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8051  return MatchOperand_Success;
8052}
8053
8054OperandMatchResultTy
8055AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8056  using namespace llvm::AMDGPU::SDWA;
8057
8058  SMLoc S = getLoc();
8059  StringRef Value;
8060  OperandMatchResultTy res;
8061
8062  SMLoc StringLoc;
8063  res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8064  if (res != MatchOperand_Success) {
8065    return res;
8066  }
8067
8068  int64_t Int;
8069  Int = StringSwitch<int64_t>(Value)
8070        .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8071        .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8072        .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8073        .Default(0xffffffff);
8074
8075  if (Int == 0xffffffff) {
8076    Error(StringLoc, "invalid dst_unused value");
8077    return MatchOperand_ParseFail;
8078  }
8079
8080  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8081  return MatchOperand_Success;
8082}
8083
8084void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8085  cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8086}
8087
8088void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8089  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8090}
8091
8092void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8093  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8094}
8095
8096void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8097  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8098}
8099
8100void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8101  cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8102}
8103
8104void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8105                              uint64_t BasicInstType,
8106                              bool SkipDstVcc,
8107                              bool SkipSrcVcc) {
8108  using namespace llvm::AMDGPU::SDWA;
8109
8110  OptionalImmIndexMap OptionalIdx;
8111  bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8112  bool SkippedVcc = false;
8113
8114  unsigned I = 1;
8115  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8116  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8117    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8118  }
8119
8120  for (unsigned E = Operands.size(); I != E; ++I) {
8121    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8122    if (SkipVcc && !SkippedVcc && Op.isReg() &&
8123        (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8124      // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8125      // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8126      // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8127      // Skip VCC only if we didn't skip it on previous iteration.
8128      // Note that src0 and src1 occupy 2 slots each because of modifiers.
8129      if (BasicInstType == SIInstrFlags::VOP2 &&
8130          ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8131           (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8132        SkippedVcc = true;
8133        continue;
8134      } else if (BasicInstType == SIInstrFlags::VOPC &&
8135                 Inst.getNumOperands() == 0) {
8136        SkippedVcc = true;
8137        continue;
8138      }
8139    }
8140    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8141      Op.addRegOrImmWithInputModsOperands(Inst, 2);
8142    } else if (Op.isImm()) {
8143      // Handle optional arguments
8144      OptionalIdx[Op.getImmTy()] = I;
8145    } else {
8146      llvm_unreachable("Invalid operand type");
8147    }
8148    SkippedVcc = false;
8149  }
8150
8151  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8152      Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8153      Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8154    // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8155    switch (BasicInstType) {
8156    case SIInstrFlags::VOP1:
8157      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8158      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8159        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8160      }
8161      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8162      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8163      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8164      break;
8165
8166    case SIInstrFlags::VOP2:
8167      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8168      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8169        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8170      }
8171      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8172      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8173      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8174      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8175      break;
8176
8177    case SIInstrFlags::VOPC:
8178      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8179        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8180      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8181      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8182      break;
8183
8184    default:
8185      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8186    }
8187  }
8188
8189  // special case v_mac_{f16, f32}:
8190  // it has src2 register operand that is tied to dst operand
8191  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8192      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8193    auto it = Inst.begin();
8194    std::advance(
8195      it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8196    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8197  }
8198}
8199
8200//===----------------------------------------------------------------------===//
8201// mAI
8202//===----------------------------------------------------------------------===//
8203
8204AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8205  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8206}
8207
8208AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8209  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8210}
8211
8212AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8213  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8214}
8215
8216/// Force static initialization.
8217extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8218  RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8219  RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8220}
8221
8222#define GET_REGISTER_MATCHER
8223#define GET_MATCHER_IMPLEMENTATION
8224#define GET_MNEMONIC_SPELL_CHECKER
8225#define GET_MNEMONIC_CHECKER
8226#include "AMDGPUGenAsmMatcher.inc"
8227
8228// This fuction should be defined after auto-generated include so that we have
8229// MatchClassKind enum defined
8230unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8231                                                     unsigned Kind) {
8232  // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8233  // But MatchInstructionImpl() expects to meet token and fails to validate
8234  // operand. This method checks if we are given immediate operand but expect to
8235  // get corresponding token.
8236  AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8237  switch (Kind) {
8238  case MCK_addr64:
8239    return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8240  case MCK_gds:
8241    return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8242  case MCK_lds:
8243    return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8244  case MCK_idxen:
8245    return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8246  case MCK_offen:
8247    return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8248  case MCK_SSrcB32:
8249    // When operands have expression values, they will return true for isToken,
8250    // because it is not possible to distinguish between a token and an
8251    // expression at parse time. MatchInstructionImpl() will always try to
8252    // match an operand as a token, when isToken returns true, and when the
8253    // name of the expression is not a valid token, the match will fail,
8254    // so we need to handle it here.
8255    return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8256  case MCK_SSrcF32:
8257    return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8258  case MCK_SoppBrTarget:
8259    return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8260  case MCK_VReg32OrOff:
8261    return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8262  case MCK_InterpSlot:
8263    return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8264  case MCK_Attr:
8265    return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8266  case MCK_AttrChan:
8267    return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8268  case MCK_ImmSMEMOffset:
8269    return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8270  case MCK_SReg_64:
8271  case MCK_SReg_64_XEXEC:
8272    // Null is defined as a 32-bit register but
8273    // it should also be enabled with 64-bit operands.
8274    // The following code enables it for SReg_64 operands
8275    // used as source and destination. Remaining source
8276    // operands are handled in isInlinableImm.
8277    return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8278  default:
8279    return Match_InvalidOperand;
8280  }
8281}
8282
8283//===----------------------------------------------------------------------===//
8284// endpgm
8285//===----------------------------------------------------------------------===//
8286
8287OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8288  SMLoc S = getLoc();
8289  int64_t Imm = 0;
8290
8291  if (!parseExpr(Imm)) {
8292    // The operand is optional, if not present default to 0
8293    Imm = 0;
8294  }
8295
8296  if (!isUInt<16>(Imm)) {
8297    Error(S, "expected a 16-bit value");
8298    return MatchOperand_ParseFail;
8299  }
8300
8301  Operands.push_back(
8302      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8303  return MatchOperand_Success;
8304}
8305
8306bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8307