116359Sasami//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
216359Sasami//
316359Sasami// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
416359Sasami// See https://llvm.org/LICENSE.txt for license information.
516359Sasami// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
616359Sasami//
716359Sasami//===----------------------------------------------------------------------===//
816359Sasami
916359Sasami#include "AMDKernelCodeT.h"
1016359Sasami#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1116359Sasami#include "MCTargetDesc/AMDGPUTargetStreamer.h"
1216359Sasami#include "SIDefines.h"
1316359Sasami#include "SIInstrInfo.h"
1416359Sasami#include "SIRegisterInfo.h"
1516359Sasami#include "TargetInfo/AMDGPUTargetInfo.h"
1616359Sasami#include "Utils/AMDGPUAsmUtils.h"
1716359Sasami#include "Utils/AMDGPUBaseInfo.h"
1816359Sasami#include "Utils/AMDKernelCodeTUtils.h"
1916359Sasami#include "llvm/ADT/APFloat.h"
2016359Sasami#include "llvm/ADT/SmallBitVector.h"
2116359Sasami#include "llvm/ADT/StringSet.h"
2216359Sasami#include "llvm/ADT/Twine.h"
2316359Sasami#include "llvm/BinaryFormat/ELF.h"
2416359Sasami#include "llvm/CodeGen/MachineValueType.h"
2516359Sasami#include "llvm/MC/MCAsmInfo.h"
2616359Sasami#include "llvm/MC/MCContext.h"
2759689Snyan#include "llvm/MC/MCExpr.h"
2859689Snyan#include "llvm/MC/MCInst.h"
2916359Sasami#include "llvm/MC/MCInstrDesc.h"
3016359Sasami#include "llvm/MC/MCParser/MCAsmLexer.h"
3116359Sasami#include "llvm/MC/MCParser/MCAsmParser.h"
3216359Sasami#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
3316359Sasami#include "llvm/MC/MCParser/MCTargetAsmParser.h"
3416359Sasami#include "llvm/MC/MCSymbol.h"
3516359Sasami#include "llvm/MC/TargetRegistry.h"
3616359Sasami#include "llvm/Support/AMDGPUMetadata.h"
3716359Sasami#include "llvm/Support/AMDHSAKernelDescriptor.h"
3816359Sasami#include "llvm/Support/Casting.h"
3916359Sasami#include "llvm/Support/MathExtras.h"
4016359Sasami#include "llvm/TargetParser/TargetParser.h"
4116359Sasami#include <optional>
4259689Snyan
4316359Sasamiusing namespace llvm;
4416359Sasamiusing namespace llvm::AMDGPU;
4559689Snyanusing namespace llvm::amdhsa;
4616359Sasami
4716359Sasaminamespace {
4859689Snyan
4916359Sasamiclass AMDGPUAsmParser;
5016359Sasami
5159689Snyanenum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
5216359Sasami
5316359Sasami//===----------------------------------------------------------------------===//
5459689Snyan// Operand
5516359Sasami//===----------------------------------------------------------------------===//
5616359Sasami
5759689Snyanclass AMDGPUOperand : public MCParsedAsmOperand {
5816359Sasami  enum KindTy {
5916359Sasami    Token,
6016359Sasami    Immediate,
6116359Sasami    Register,
6216359Sasami    Expression
6316359Sasami  } Kind;
6416359Sasami
6516359Sasami  SMLoc StartLoc, EndLoc;
6616359Sasami  const AMDGPUAsmParser *AsmParser;
6716359Sasami
6816359Sasamipublic:
6916359Sasami  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
7016359Sasami      : Kind(Kind_), AsmParser(AsmParser_) {}
7116359Sasami
7216359Sasami  using Ptr = std::unique_ptr<AMDGPUOperand>;
7316359Sasami
7416359Sasami  struct Modifiers {
7516359Sasami    bool Abs = false;
7616359Sasami    bool Neg = false;
7716359Sasami    bool Sext = false;
7816359Sasami    bool Lit = false;
7916359Sasami
8016359Sasami    bool hasFPModifiers() const { return Abs || Neg; }
8159689Snyan    bool hasIntModifiers() const { return Sext; }
8259689Snyan    bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
8359689Snyan
8416359Sasami    int64_t getFPModifiersOperand() const {
8516359Sasami      int64_t Operand = 0;
8616359Sasami      Operand |= Abs ? SISrcMods::ABS : 0u;
8716359Sasami      Operand |= Neg ? SISrcMods::NEG : 0u;
8816359Sasami      return Operand;
8916359Sasami    }
9016359Sasami
9116359Sasami    int64_t getIntModifiersOperand() const {
9216359Sasami      int64_t Operand = 0;
9316359Sasami      Operand |= Sext ? SISrcMods::SEXT : 0u;
9416359Sasami      return Operand;
9516359Sasami    }
9616359Sasami
9716359Sasami    int64_t getModifiersOperand() const {
9816359Sasami      assert(!(hasFPModifiers() && hasIntModifiers())
9916359Sasami           && "fp and int modifiers should not be used simultaneously");
10016359Sasami      if (hasFPModifiers()) {
10116359Sasami        return getFPModifiersOperand();
10216359Sasami      } else if (hasIntModifiers()) {
10316359Sasami        return getIntModifiersOperand();
10416359Sasami      } else {
10516359Sasami        return 0;
10616359Sasami      }
10716359Sasami    }
10816359Sasami
10959689Snyan    friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
11016359Sasami  };
11159689Snyan
11259689Snyan  enum ImmTy {
11359689Snyan    ImmTyNone,
11459689Snyan    ImmTyGDS,
11559689Snyan    ImmTyLDS,
11616359Sasami    ImmTyOffen,
11759689Snyan    ImmTyIdxen,
11859689Snyan    ImmTyAddr64,
11959689Snyan    ImmTyOffset,
12059689Snyan    ImmTyInstOffset,
12159689Snyan    ImmTyOffset0,
12259689Snyan    ImmTyOffset1,
12359689Snyan    ImmTySMEMOffsetMod,
12459689Snyan    ImmTyCPol,
12559689Snyan    ImmTyTFE,
12659689Snyan    ImmTyD16,
12759689Snyan    ImmTyClampSI,
12859689Snyan    ImmTyOModSI,
12959689Snyan    ImmTySDWADstSel,
13059689Snyan    ImmTySDWASrc0Sel,
13159689Snyan    ImmTySDWASrc1Sel,
13259689Snyan    ImmTySDWADstUnused,
13316359Sasami    ImmTyDMask,
13416359Sasami    ImmTyDim,
13516359Sasami    ImmTyUNorm,
13616359Sasami    ImmTyDA,
13716359Sasami    ImmTyR128A16,
13816359Sasami    ImmTyA16,
139    ImmTyLWE,
140    ImmTyExpTgt,
141    ImmTyExpCompr,
142    ImmTyExpVM,
143    ImmTyFORMAT,
144    ImmTyHwreg,
145    ImmTyOff,
146    ImmTySendMsg,
147    ImmTyInterpSlot,
148    ImmTyInterpAttr,
149    ImmTyInterpAttrChan,
150    ImmTyOpSel,
151    ImmTyOpSelHi,
152    ImmTyNegLo,
153    ImmTyNegHi,
154    ImmTyIndexKey8bit,
155    ImmTyIndexKey16bit,
156    ImmTyDPP8,
157    ImmTyDppCtrl,
158    ImmTyDppRowMask,
159    ImmTyDppBankMask,
160    ImmTyDppBoundCtrl,
161    ImmTyDppFI,
162    ImmTySwizzle,
163    ImmTyGprIdxMode,
164    ImmTyHigh,
165    ImmTyBLGP,
166    ImmTyCBSZ,
167    ImmTyABID,
168    ImmTyEndpgm,
169    ImmTyWaitVDST,
170    ImmTyWaitEXP,
171    ImmTyWaitVAVDst,
172    ImmTyWaitVMVSrc,
173  };
174
175  // Immediate operand kind.
176  // It helps to identify the location of an offending operand after an error.
177  // Note that regular literals and mandatory literals (KImm) must be handled
178  // differently. When looking for an offending operand, we should usually
179  // ignore mandatory literals because they are part of the instruction and
180  // cannot be changed. Report location of mandatory operands only for VOPD,
181  // when both OpX and OpY have a KImm and there are no other literals.
182  enum ImmKindTy {
183    ImmKindTyNone,
184    ImmKindTyLiteral,
185    ImmKindTyMandatoryLiteral,
186    ImmKindTyConst,
187  };
188
189private:
190  struct TokOp {
191    const char *Data;
192    unsigned Length;
193  };
194
195  struct ImmOp {
196    int64_t Val;
197    ImmTy Type;
198    bool IsFPImm;
199    mutable ImmKindTy Kind;
200    Modifiers Mods;
201  };
202
203  struct RegOp {
204    unsigned RegNo;
205    Modifiers Mods;
206  };
207
208  union {
209    TokOp Tok;
210    ImmOp Imm;
211    RegOp Reg;
212    const MCExpr *Expr;
213  };
214
215public:
216  bool isToken() const override { return Kind == Token; }
217
218  bool isSymbolRefExpr() const {
219    return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
220  }
221
222  bool isImm() const override {
223    return Kind == Immediate;
224  }
225
226  void setImmKindNone() const {
227    assert(isImm());
228    Imm.Kind = ImmKindTyNone;
229  }
230
231  void setImmKindLiteral() const {
232    assert(isImm());
233    Imm.Kind = ImmKindTyLiteral;
234  }
235
236  void setImmKindMandatoryLiteral() const {
237    assert(isImm());
238    Imm.Kind = ImmKindTyMandatoryLiteral;
239  }
240
241  void setImmKindConst() const {
242    assert(isImm());
243    Imm.Kind = ImmKindTyConst;
244  }
245
246  bool IsImmKindLiteral() const {
247    return isImm() && Imm.Kind == ImmKindTyLiteral;
248  }
249
250  bool IsImmKindMandatoryLiteral() const {
251    return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
252  }
253
254  bool isImmKindConst() const {
255    return isImm() && Imm.Kind == ImmKindTyConst;
256  }
257
258  bool isInlinableImm(MVT type) const;
259  bool isLiteralImm(MVT type) const;
260
261  bool isRegKind() const {
262    return Kind == Register;
263  }
264
265  bool isReg() const override {
266    return isRegKind() && !hasModifiers();
267  }
268
269  bool isRegOrInline(unsigned RCID, MVT type) const {
270    return isRegClass(RCID) || isInlinableImm(type);
271  }
272
273  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
274    return isRegOrInline(RCID, type) || isLiteralImm(type);
275  }
276
277  bool isRegOrImmWithInt16InputMods() const {
278    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
279  }
280
281  bool isRegOrImmWithIntT16InputMods() const {
282    return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
283  }
284
285  bool isRegOrImmWithInt32InputMods() const {
286    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
287  }
288
289  bool isRegOrInlineImmWithInt16InputMods() const {
290    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
291  }
292
293  bool isRegOrInlineImmWithInt32InputMods() const {
294    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
295  }
296
297  bool isRegOrImmWithInt64InputMods() const {
298    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
299  }
300
301  bool isRegOrImmWithFP16InputMods() const {
302    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
303  }
304
305  bool isRegOrImmWithFPT16InputMods() const {
306    return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
307  }
308
309  bool isRegOrImmWithFP32InputMods() const {
310    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
311  }
312
313  bool isRegOrImmWithFP64InputMods() const {
314    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
315  }
316
317  bool isRegOrInlineImmWithFP16InputMods() const {
318    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
319  }
320
321  bool isRegOrInlineImmWithFP32InputMods() const {
322    return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
323  }
324
325
326  bool isVReg() const {
327    return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328           isRegClass(AMDGPU::VReg_64RegClassID) ||
329           isRegClass(AMDGPU::VReg_96RegClassID) ||
330           isRegClass(AMDGPU::VReg_128RegClassID) ||
331           isRegClass(AMDGPU::VReg_160RegClassID) ||
332           isRegClass(AMDGPU::VReg_192RegClassID) ||
333           isRegClass(AMDGPU::VReg_256RegClassID) ||
334           isRegClass(AMDGPU::VReg_512RegClassID) ||
335           isRegClass(AMDGPU::VReg_1024RegClassID);
336  }
337
338  bool isVReg32() const {
339    return isRegClass(AMDGPU::VGPR_32RegClassID);
340  }
341
342  bool isVReg32OrOff() const {
343    return isOff() || isVReg32();
344  }
345
346  bool isNull() const {
347    return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348  }
349
350  bool isVRegWithInputMods() const;
351  template <bool IsFake16> bool isT16VRegWithInputMods() const;
352
353  bool isSDWAOperand(MVT type) const;
354  bool isSDWAFP16Operand() const;
355  bool isSDWAFP32Operand() const;
356  bool isSDWAInt16Operand() const;
357  bool isSDWAInt32Operand() const;
358
359  bool isImmTy(ImmTy ImmT) const {
360    return isImm() && Imm.Type == ImmT;
361  }
362
363  template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
364
365  bool isImmLiteral() const { return isImmTy(ImmTyNone); }
366
367  bool isImmModifier() const {
368    return isImm() && Imm.Type != ImmTyNone;
369  }
370
371  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
372  bool isDMask() const { return isImmTy(ImmTyDMask); }
373  bool isDim() const { return isImmTy(ImmTyDim); }
374  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
375  bool isOff() const { return isImmTy(ImmTyOff); }
376  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
377  bool isOffen() const { return isImmTy(ImmTyOffen); }
378  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
379  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
380  bool isOffset() const { return isImmTy(ImmTyOffset); }
381  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
382  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
383  bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
384  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
385  bool isGDS() const { return isImmTy(ImmTyGDS); }
386  bool isLDS() const { return isImmTy(ImmTyLDS); }
387  bool isCPol() const { return isImmTy(ImmTyCPol); }
388  bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
389  bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
390  bool isTFE() const { return isImmTy(ImmTyTFE); }
391  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
392  bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
393  bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
394  bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
395  bool isDppFI() const { return isImmTy(ImmTyDppFI); }
396  bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
397  bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
398  bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
399  bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
400  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
401  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
402  bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
403  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
404  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
405  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
406  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
407
408  bool isRegOrImm() const {
409    return isReg() || isImm();
410  }
411
412  bool isRegClass(unsigned RCID) const;
413
414  bool isInlineValue() const;
415
416  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417    return isRegOrInline(RCID, type) && !hasModifiers();
418  }
419
420  bool isSCSrcB16() const {
421    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422  }
423
424  bool isSCSrcV2B16() const {
425    return isSCSrcB16();
426  }
427
428  bool isSCSrcB32() const {
429    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430  }
431
432  bool isSCSrcB64() const {
433    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434  }
435
436  bool isBoolReg() const;
437
438  bool isSCSrcF16() const {
439    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440  }
441
442  bool isSCSrcV2F16() const {
443    return isSCSrcF16();
444  }
445
446  bool isSCSrcF32() const {
447    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448  }
449
450  bool isSCSrcF64() const {
451    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452  }
453
454  bool isSSrcB32() const {
455    return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
456  }
457
458  bool isSSrcB16() const {
459    return isSCSrcB16() || isLiteralImm(MVT::i16);
460  }
461
462  bool isSSrcV2B16() const {
463    llvm_unreachable("cannot happen");
464    return isSSrcB16();
465  }
466
467  bool isSSrcB64() const {
468    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
469    // See isVSrc64().
470    return isSCSrcB64() || isLiteralImm(MVT::i64);
471  }
472
473  bool isSSrcF32() const {
474    return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
475  }
476
477  bool isSSrcF64() const {
478    return isSCSrcB64() || isLiteralImm(MVT::f64);
479  }
480
481  bool isSSrcF16() const {
482    return isSCSrcB16() || isLiteralImm(MVT::f16);
483  }
484
485  bool isSSrcV2F16() const {
486    llvm_unreachable("cannot happen");
487    return isSSrcF16();
488  }
489
490  bool isSSrcV2FP32() const {
491    llvm_unreachable("cannot happen");
492    return isSSrcF32();
493  }
494
495  bool isSCSrcV2FP32() const {
496    llvm_unreachable("cannot happen");
497    return isSCSrcF32();
498  }
499
500  bool isSSrcV2INT32() const {
501    llvm_unreachable("cannot happen");
502    return isSSrcB32();
503  }
504
505  bool isSCSrcV2INT32() const {
506    llvm_unreachable("cannot happen");
507    return isSCSrcB32();
508  }
509
510  bool isSSrcOrLdsB32() const {
511    return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
512           isLiteralImm(MVT::i32) || isExpr();
513  }
514
515  bool isVCSrcB32() const {
516    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
517  }
518
519  bool isVCSrcB64() const {
520    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
521  }
522
523  bool isVCSrcTB16() const {
524    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
525  }
526
527  bool isVCSrcTB16_Lo128() const {
528    return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
529  }
530
531  bool isVCSrcFake16B16_Lo128() const {
532    return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
533  }
534
535  bool isVCSrcB16() const {
536    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
537  }
538
539  bool isVCSrcV2B16() const {
540    return isVCSrcB16();
541  }
542
543  bool isVCSrcF32() const {
544    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545  }
546
547  bool isVCSrcF64() const {
548    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549  }
550
551  bool isVCSrcTF16() const {
552    return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
553  }
554
555  bool isVCSrcTF16_Lo128() const {
556    return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557  }
558
559  bool isVCSrcFake16F16_Lo128() const {
560    return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
561  }
562
563  bool isVCSrcF16() const {
564    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
565  }
566
567  bool isVCSrcV2F16() const {
568    return isVCSrcF16();
569  }
570
571  bool isVSrcB32() const {
572    return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
573  }
574
575  bool isVSrcB64() const {
576    return isVCSrcF64() || isLiteralImm(MVT::i64);
577  }
578
579  bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
580
581  bool isVSrcTB16_Lo128() const {
582    return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
583  }
584
585  bool isVSrcFake16B16_Lo128() const {
586    return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
587  }
588
589  bool isVSrcB16() const {
590    return isVCSrcB16() || isLiteralImm(MVT::i16);
591  }
592
593  bool isVSrcV2B16() const {
594    return isVSrcB16() || isLiteralImm(MVT::v2i16);
595  }
596
597  bool isVCSrcV2FP32() const {
598    return isVCSrcF64();
599  }
600
601  bool isVSrcV2FP32() const {
602    return isVSrcF64() || isLiteralImm(MVT::v2f32);
603  }
604
605  bool isVCSrcV2INT32() const {
606    return isVCSrcB64();
607  }
608
609  bool isVSrcV2INT32() const {
610    return isVSrcB64() || isLiteralImm(MVT::v2i32);
611  }
612
613  bool isVSrcF32() const {
614    return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
615  }
616
617  bool isVSrcF64() const {
618    return isVCSrcF64() || isLiteralImm(MVT::f64);
619  }
620
621  bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622
623  bool isVSrcTF16_Lo128() const {
624    return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
625  }
626
627  bool isVSrcFake16F16_Lo128() const {
628    return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
629  }
630
631  bool isVSrcF16() const {
632    return isVCSrcF16() || isLiteralImm(MVT::f16);
633  }
634
635  bool isVSrcV2F16() const {
636    return isVSrcF16() || isLiteralImm(MVT::v2f16);
637  }
638
639  bool isVISrcB32() const {
640    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
641  }
642
643  bool isVISrcB16() const {
644    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
645  }
646
647  bool isVISrcV2B16() const {
648    return isVISrcB16();
649  }
650
651  bool isVISrcF32() const {
652    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
653  }
654
655  bool isVISrcF16() const {
656    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
657  }
658
659  bool isVISrcV2F16() const {
660    return isVISrcF16() || isVISrcB32();
661  }
662
663  bool isVISrc_64F16() const {
664    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
665  }
666
667  bool isVISrc_64B32() const {
668    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
669  }
670
671  bool isVISrc_64B64() const {
672    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
673  }
674
675  bool isVISrc_64F64() const {
676    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
677  }
678
679  bool isVISrc_64V2FP32() const {
680    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
681  }
682
683  bool isVISrc_64V2INT32() const {
684    return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
685  }
686
687  bool isVISrc_256B32() const {
688    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
689  }
690
691  bool isVISrc_256F32() const {
692    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
693  }
694
695  bool isVISrc_256B64() const {
696    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
697  }
698
699  bool isVISrc_256F64() const {
700    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
701  }
702
703  bool isVISrc_128B16() const {
704    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
705  }
706
707  bool isVISrc_128V2B16() const {
708    return isVISrc_128B16();
709  }
710
711  bool isVISrc_128B32() const {
712    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
713  }
714
715  bool isVISrc_128F32() const {
716    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
717  }
718
719  bool isVISrc_256V2FP32() const {
720    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
721  }
722
723  bool isVISrc_256V2INT32() const {
724    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
725  }
726
727  bool isVISrc_512B32() const {
728    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
729  }
730
731  bool isVISrc_512B16() const {
732    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
733  }
734
735  bool isVISrc_512V2B16() const {
736    return isVISrc_512B16();
737  }
738
739  bool isVISrc_512F32() const {
740    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
741  }
742
743  bool isVISrc_512F16() const {
744    return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
745  }
746
747  bool isVISrc_512V2F16() const {
748    return isVISrc_512F16() || isVISrc_512B32();
749  }
750
751  bool isVISrc_1024B32() const {
752    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
753  }
754
755  bool isVISrc_1024B16() const {
756    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
757  }
758
759  bool isVISrc_1024V2B16() const {
760    return isVISrc_1024B16();
761  }
762
763  bool isVISrc_1024F32() const {
764    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
765  }
766
767  bool isVISrc_1024F16() const {
768    return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
769  }
770
771  bool isVISrc_1024V2F16() const {
772    return isVISrc_1024F16() || isVISrc_1024B32();
773  }
774
775  bool isAISrcB32() const {
776    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
777  }
778
779  bool isAISrcB16() const {
780    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
781  }
782
783  bool isAISrcV2B16() const {
784    return isAISrcB16();
785  }
786
787  bool isAISrcF32() const {
788    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
789  }
790
791  bool isAISrcF16() const {
792    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
793  }
794
795  bool isAISrcV2F16() const {
796    return isAISrcF16() || isAISrcB32();
797  }
798
799  bool isAISrc_64B64() const {
800    return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
801  }
802
803  bool isAISrc_64F64() const {
804    return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
805  }
806
807  bool isAISrc_128B32() const {
808    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
809  }
810
811  bool isAISrc_128B16() const {
812    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
813  }
814
815  bool isAISrc_128V2B16() const {
816    return isAISrc_128B16();
817  }
818
819  bool isAISrc_128F32() const {
820    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
821  }
822
823  bool isAISrc_128F16() const {
824    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
825  }
826
827  bool isAISrc_128V2F16() const {
828    return isAISrc_128F16() || isAISrc_128B32();
829  }
830
831  bool isVISrc_128F16() const {
832    return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
833  }
834
835  bool isVISrc_128V2F16() const {
836    return isVISrc_128F16() || isVISrc_128B32();
837  }
838
839  bool isAISrc_256B64() const {
840    return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
841  }
842
843  bool isAISrc_256F64() const {
844    return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
845  }
846
847  bool isAISrc_512B32() const {
848    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
849  }
850
851  bool isAISrc_512B16() const {
852    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
853  }
854
855  bool isAISrc_512V2B16() const {
856    return isAISrc_512B16();
857  }
858
859  bool isAISrc_512F32() const {
860    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
861  }
862
863  bool isAISrc_512F16() const {
864    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
865  }
866
867  bool isAISrc_512V2F16() const {
868    return isAISrc_512F16() || isAISrc_512B32();
869  }
870
871  bool isAISrc_1024B32() const {
872    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
873  }
874
875  bool isAISrc_1024B16() const {
876    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
877  }
878
879  bool isAISrc_1024V2B16() const {
880    return isAISrc_1024B16();
881  }
882
883  bool isAISrc_1024F32() const {
884    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
885  }
886
887  bool isAISrc_1024F16() const {
888    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
889  }
890
891  bool isAISrc_1024V2F16() const {
892    return isAISrc_1024F16() || isAISrc_1024B32();
893  }
894
895  bool isKImmFP32() const {
896    return isLiteralImm(MVT::f32);
897  }
898
899  bool isKImmFP16() const {
900    return isLiteralImm(MVT::f16);
901  }
902
903  bool isMem() const override {
904    return false;
905  }
906
907  bool isExpr() const {
908    return Kind == Expression;
909  }
910
911  bool isSOPPBrTarget() const { return isExpr() || isImm(); }
912
913  bool isSWaitCnt() const;
914  bool isDepCtr() const;
915  bool isSDelayALU() const;
916  bool isHwreg() const;
917  bool isSendMsg() const;
918  bool isSplitBarrier() const;
919  bool isSwizzle() const;
920  bool isSMRDOffset8() const;
921  bool isSMEMOffset() const;
922  bool isSMRDLiteralOffset() const;
923  bool isDPP8() const;
924  bool isDPPCtrl() const;
925  bool isBLGP() const;
926  bool isCBSZ() const;
927  bool isABID() const;
928  bool isGPRIdxMode() const;
929  bool isS16Imm() const;
930  bool isU16Imm() const;
931  bool isEndpgm() const;
932  bool isWaitVDST() const;
933  bool isWaitEXP() const;
934  bool isWaitVAVDst() const;
935  bool isWaitVMVSrc() const;
936
937  auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
938    return std::bind(P, *this);
939  }
940
941  StringRef getToken() const {
942    assert(isToken());
943    return StringRef(Tok.Data, Tok.Length);
944  }
945
946  int64_t getImm() const {
947    assert(isImm());
948    return Imm.Val;
949  }
950
951  void setImm(int64_t Val) {
952    assert(isImm());
953    Imm.Val = Val;
954  }
955
956  ImmTy getImmTy() const {
957    assert(isImm());
958    return Imm.Type;
959  }
960
961  unsigned getReg() const override {
962    assert(isRegKind());
963    return Reg.RegNo;
964  }
965
966  SMLoc getStartLoc() const override {
967    return StartLoc;
968  }
969
970  SMLoc getEndLoc() const override {
971    return EndLoc;
972  }
973
974  SMRange getLocRange() const {
975    return SMRange(StartLoc, EndLoc);
976  }
977
978  Modifiers getModifiers() const {
979    assert(isRegKind() || isImmTy(ImmTyNone));
980    return isRegKind() ? Reg.Mods : Imm.Mods;
981  }
982
983  void setModifiers(Modifiers Mods) {
984    assert(isRegKind() || isImmTy(ImmTyNone));
985    if (isRegKind())
986      Reg.Mods = Mods;
987    else
988      Imm.Mods = Mods;
989  }
990
991  bool hasModifiers() const {
992    return getModifiers().hasModifiers();
993  }
994
995  bool hasFPModifiers() const {
996    return getModifiers().hasFPModifiers();
997  }
998
999  bool hasIntModifiers() const {
1000    return getModifiers().hasIntModifiers();
1001  }
1002
1003  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1004
1005  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1006
1007  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1008
1009  void addRegOperands(MCInst &Inst, unsigned N) const;
1010
1011  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1012    if (isRegKind())
1013      addRegOperands(Inst, N);
1014    else
1015      addImmOperands(Inst, N);
1016  }
1017
1018  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1019    Modifiers Mods = getModifiers();
1020    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1021    if (isRegKind()) {
1022      addRegOperands(Inst, N);
1023    } else {
1024      addImmOperands(Inst, N, false);
1025    }
1026  }
1027
1028  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1029    assert(!hasIntModifiers());
1030    addRegOrImmWithInputModsOperands(Inst, N);
1031  }
1032
1033  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1034    assert(!hasFPModifiers());
1035    addRegOrImmWithInputModsOperands(Inst, N);
1036  }
1037
1038  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1039    Modifiers Mods = getModifiers();
1040    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1041    assert(isRegKind());
1042    addRegOperands(Inst, N);
1043  }
1044
1045  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1046    assert(!hasIntModifiers());
1047    addRegWithInputModsOperands(Inst, N);
1048  }
1049
1050  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1051    assert(!hasFPModifiers());
1052    addRegWithInputModsOperands(Inst, N);
1053  }
1054
1055  static void printImmTy(raw_ostream& OS, ImmTy Type) {
1056    // clang-format off
1057    switch (Type) {
1058    case ImmTyNone: OS << "None"; break;
1059    case ImmTyGDS: OS << "GDS"; break;
1060    case ImmTyLDS: OS << "LDS"; break;
1061    case ImmTyOffen: OS << "Offen"; break;
1062    case ImmTyIdxen: OS << "Idxen"; break;
1063    case ImmTyAddr64: OS << "Addr64"; break;
1064    case ImmTyOffset: OS << "Offset"; break;
1065    case ImmTyInstOffset: OS << "InstOffset"; break;
1066    case ImmTyOffset0: OS << "Offset0"; break;
1067    case ImmTyOffset1: OS << "Offset1"; break;
1068    case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1069    case ImmTyCPol: OS << "CPol"; break;
1070    case ImmTyIndexKey8bit: OS << "index_key"; break;
1071    case ImmTyIndexKey16bit: OS << "index_key"; break;
1072    case ImmTyTFE: OS << "TFE"; break;
1073    case ImmTyD16: OS << "D16"; break;
1074    case ImmTyFORMAT: OS << "FORMAT"; break;
1075    case ImmTyClampSI: OS << "ClampSI"; break;
1076    case ImmTyOModSI: OS << "OModSI"; break;
1077    case ImmTyDPP8: OS << "DPP8"; break;
1078    case ImmTyDppCtrl: OS << "DppCtrl"; break;
1079    case ImmTyDppRowMask: OS << "DppRowMask"; break;
1080    case ImmTyDppBankMask: OS << "DppBankMask"; break;
1081    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1082    case ImmTyDppFI: OS << "DppFI"; break;
1083    case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1084    case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1085    case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1086    case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1087    case ImmTyDMask: OS << "DMask"; break;
1088    case ImmTyDim: OS << "Dim"; break;
1089    case ImmTyUNorm: OS << "UNorm"; break;
1090    case ImmTyDA: OS << "DA"; break;
1091    case ImmTyR128A16: OS << "R128A16"; break;
1092    case ImmTyA16: OS << "A16"; break;
1093    case ImmTyLWE: OS << "LWE"; break;
1094    case ImmTyOff: OS << "Off"; break;
1095    case ImmTyExpTgt: OS << "ExpTgt"; break;
1096    case ImmTyExpCompr: OS << "ExpCompr"; break;
1097    case ImmTyExpVM: OS << "ExpVM"; break;
1098    case ImmTyHwreg: OS << "Hwreg"; break;
1099    case ImmTySendMsg: OS << "SendMsg"; break;
1100    case ImmTyInterpSlot: OS << "InterpSlot"; break;
1101    case ImmTyInterpAttr: OS << "InterpAttr"; break;
1102    case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1103    case ImmTyOpSel: OS << "OpSel"; break;
1104    case ImmTyOpSelHi: OS << "OpSelHi"; break;
1105    case ImmTyNegLo: OS << "NegLo"; break;
1106    case ImmTyNegHi: OS << "NegHi"; break;
1107    case ImmTySwizzle: OS << "Swizzle"; break;
1108    case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1109    case ImmTyHigh: OS << "High"; break;
1110    case ImmTyBLGP: OS << "BLGP"; break;
1111    case ImmTyCBSZ: OS << "CBSZ"; break;
1112    case ImmTyABID: OS << "ABID"; break;
1113    case ImmTyEndpgm: OS << "Endpgm"; break;
1114    case ImmTyWaitVDST: OS << "WaitVDST"; break;
1115    case ImmTyWaitEXP: OS << "WaitEXP"; break;
1116    case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1117    case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1118    }
1119    // clang-format on
1120  }
1121
1122  void print(raw_ostream &OS) const override {
1123    switch (Kind) {
1124    case Register:
1125      OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1126      break;
1127    case Immediate:
1128      OS << '<' << getImm();
1129      if (getImmTy() != ImmTyNone) {
1130        OS << " type: "; printImmTy(OS, getImmTy());
1131      }
1132      OS << " mods: " << Imm.Mods << '>';
1133      break;
1134    case Token:
1135      OS << '\'' << getToken() << '\'';
1136      break;
1137    case Expression:
1138      OS << "<expr " << *Expr << '>';
1139      break;
1140    }
1141  }
1142
1143  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1144                                      int64_t Val, SMLoc Loc,
1145                                      ImmTy Type = ImmTyNone,
1146                                      bool IsFPImm = false) {
1147    auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1148    Op->Imm.Val = Val;
1149    Op->Imm.IsFPImm = IsFPImm;
1150    Op->Imm.Kind = ImmKindTyNone;
1151    Op->Imm.Type = Type;
1152    Op->Imm.Mods = Modifiers();
1153    Op->StartLoc = Loc;
1154    Op->EndLoc = Loc;
1155    return Op;
1156  }
1157
1158  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1159                                        StringRef Str, SMLoc Loc,
1160                                        bool HasExplicitEncodingSize = true) {
1161    auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1162    Res->Tok.Data = Str.data();
1163    Res->Tok.Length = Str.size();
1164    Res->StartLoc = Loc;
1165    Res->EndLoc = Loc;
1166    return Res;
1167  }
1168
1169  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1170                                      unsigned RegNo, SMLoc S,
1171                                      SMLoc E) {
1172    auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1173    Op->Reg.RegNo = RegNo;
1174    Op->Reg.Mods = Modifiers();
1175    Op->StartLoc = S;
1176    Op->EndLoc = E;
1177    return Op;
1178  }
1179
1180  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1181                                       const class MCExpr *Expr, SMLoc S) {
1182    auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1183    Op->Expr = Expr;
1184    Op->StartLoc = S;
1185    Op->EndLoc = S;
1186    return Op;
1187  }
1188};
1189
1190raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1191  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1192  return OS;
1193}
1194
1195//===----------------------------------------------------------------------===//
1196// AsmParser
1197//===----------------------------------------------------------------------===//
1198
1199// Holds info related to the current kernel, e.g. count of SGPRs used.
1200// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1201// .amdgpu_hsa_kernel or at EOF.
1202class KernelScopeInfo {
1203  int SgprIndexUnusedMin = -1;
1204  int VgprIndexUnusedMin = -1;
1205  int AgprIndexUnusedMin = -1;
1206  MCContext *Ctx = nullptr;
1207  MCSubtargetInfo const *MSTI = nullptr;
1208
1209  void usesSgprAt(int i) {
1210    if (i >= SgprIndexUnusedMin) {
1211      SgprIndexUnusedMin = ++i;
1212      if (Ctx) {
1213        MCSymbol* const Sym =
1214          Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1215        Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1216      }
1217    }
1218  }
1219
1220  void usesVgprAt(int i) {
1221    if (i >= VgprIndexUnusedMin) {
1222      VgprIndexUnusedMin = ++i;
1223      if (Ctx) {
1224        MCSymbol* const Sym =
1225          Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1226        int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1227                                         VgprIndexUnusedMin);
1228        Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1229      }
1230    }
1231  }
1232
1233  void usesAgprAt(int i) {
1234    // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1235    if (!hasMAIInsts(*MSTI))
1236      return;
1237
1238    if (i >= AgprIndexUnusedMin) {
1239      AgprIndexUnusedMin = ++i;
1240      if (Ctx) {
1241        MCSymbol* const Sym =
1242          Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1243        Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1244
1245        // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1246        MCSymbol* const vSym =
1247          Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1248        int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1249                                         VgprIndexUnusedMin);
1250        vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1251      }
1252    }
1253  }
1254
1255public:
1256  KernelScopeInfo() = default;
1257
1258  void initialize(MCContext &Context) {
1259    Ctx = &Context;
1260    MSTI = Ctx->getSubtargetInfo();
1261
1262    usesSgprAt(SgprIndexUnusedMin = -1);
1263    usesVgprAt(VgprIndexUnusedMin = -1);
1264    if (hasMAIInsts(*MSTI)) {
1265      usesAgprAt(AgprIndexUnusedMin = -1);
1266    }
1267  }
1268
1269  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1270                    unsigned RegWidth) {
1271    switch (RegKind) {
1272    case IS_SGPR:
1273      usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1274      break;
1275    case IS_AGPR:
1276      usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1277      break;
1278    case IS_VGPR:
1279      usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1280      break;
1281    default:
1282      break;
1283    }
1284  }
1285};
1286
1287class AMDGPUAsmParser : public MCTargetAsmParser {
1288  MCAsmParser &Parser;
1289
1290  unsigned ForcedEncodingSize = 0;
1291  bool ForcedDPP = false;
1292  bool ForcedSDWA = false;
1293  KernelScopeInfo KernelScope;
1294
1295  /// @name Auto-generated Match Functions
1296  /// {
1297
1298#define GET_ASSEMBLER_HEADER
1299#include "AMDGPUGenAsmMatcher.inc"
1300
1301  /// }
1302
1303private:
1304  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1305  bool OutOfRangeError(SMRange Range);
1306  /// Calculate VGPR/SGPR blocks required for given target, reserved
1307  /// registers, and user-specified NextFreeXGPR values.
1308  ///
1309  /// \param Features [in] Target features, used for bug corrections.
1310  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1311  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1312  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1313  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1314  /// descriptor field, if valid.
1315  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1316  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1317  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1318  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1319  /// \param VGPRBlocks [out] Result VGPR block count.
1320  /// \param SGPRBlocks [out] Result SGPR block count.
1321  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1322                          bool FlatScrUsed, bool XNACKUsed,
1323                          std::optional<bool> EnableWavefrontSize32,
1324                          unsigned NextFreeVGPR, SMRange VGPRRange,
1325                          unsigned NextFreeSGPR, SMRange SGPRRange,
1326                          unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1327  bool ParseDirectiveAMDGCNTarget();
1328  bool ParseDirectiveAMDHSACodeObjectVersion();
1329  bool ParseDirectiveAMDHSAKernel();
1330  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1331  bool ParseDirectiveAMDKernelCodeT();
1332  // TODO: Possibly make subtargetHasRegister const.
1333  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1334  bool ParseDirectiveAMDGPUHsaKernel();
1335
1336  bool ParseDirectiveISAVersion();
1337  bool ParseDirectiveHSAMetadata();
1338  bool ParseDirectivePALMetadataBegin();
1339  bool ParseDirectivePALMetadata();
1340  bool ParseDirectiveAMDGPULDS();
1341
1342  /// Common code to parse out a block of text (typically YAML) between start and
1343  /// end directives.
1344  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1345                           const char *AssemblerDirectiveEnd,
1346                           std::string &CollectString);
1347
1348  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1349                             RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1350  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1351                           unsigned &RegNum, unsigned &RegWidth,
1352                           bool RestoreOnFailure = false);
1353  bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1354                           unsigned &RegNum, unsigned &RegWidth,
1355                           SmallVectorImpl<AsmToken> &Tokens);
1356  unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1357                           unsigned &RegWidth,
1358                           SmallVectorImpl<AsmToken> &Tokens);
1359  unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1360                           unsigned &RegWidth,
1361                           SmallVectorImpl<AsmToken> &Tokens);
1362  unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1363                        unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1364  bool ParseRegRange(unsigned& Num, unsigned& Width);
1365  unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1366                         unsigned RegWidth, SMLoc Loc);
1367
1368  bool isRegister();
1369  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1370  std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1371  void initializeGprCountSymbol(RegisterKind RegKind);
1372  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1373                             unsigned RegWidth);
1374  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1375                    bool IsAtomic);
1376
1377public:
1378  enum AMDGPUMatchResultTy {
1379    Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1380  };
1381  enum OperandMode {
1382    OperandMode_Default,
1383    OperandMode_NSA,
1384  };
1385
1386  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1387
1388  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1389               const MCInstrInfo &MII,
1390               const MCTargetOptions &Options)
1391      : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1392    MCAsmParserExtension::Initialize(Parser);
1393
1394    if (getFeatureBits().none()) {
1395      // Set default features.
1396      copySTI().ToggleFeature("southern-islands");
1397    }
1398
1399    setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1400
1401    {
1402      // TODO: make those pre-defined variables read-only.
1403      // Currently there is none suitable machinery in the core llvm-mc for this.
1404      // MCSymbol::isRedefinable is intended for another purpose, and
1405      // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1406      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1407      MCContext &Ctx = getContext();
1408      if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1409        MCSymbol *Sym =
1410            Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1411        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1412        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1413        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1414        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1415        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1416      } else {
1417        MCSymbol *Sym =
1418            Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1419        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1420        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1421        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1422        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1423        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1424      }
1425      if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1426        initializeGprCountSymbol(IS_VGPR);
1427        initializeGprCountSymbol(IS_SGPR);
1428      } else
1429        KernelScope.initialize(getContext());
1430    }
1431  }
1432
1433  bool hasMIMG_R128() const {
1434    return AMDGPU::hasMIMG_R128(getSTI());
1435  }
1436
1437  bool hasPackedD16() const {
1438    return AMDGPU::hasPackedD16(getSTI());
1439  }
1440
1441  bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1442
1443  bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1444
1445  bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1446
1447  bool isSI() const {
1448    return AMDGPU::isSI(getSTI());
1449  }
1450
1451  bool isCI() const {
1452    return AMDGPU::isCI(getSTI());
1453  }
1454
1455  bool isVI() const {
1456    return AMDGPU::isVI(getSTI());
1457  }
1458
1459  bool isGFX9() const {
1460    return AMDGPU::isGFX9(getSTI());
1461  }
1462
1463  // TODO: isGFX90A is also true for GFX940. We need to clean it.
1464  bool isGFX90A() const {
1465    return AMDGPU::isGFX90A(getSTI());
1466  }
1467
1468  bool isGFX940() const {
1469    return AMDGPU::isGFX940(getSTI());
1470  }
1471
1472  bool isGFX9Plus() const {
1473    return AMDGPU::isGFX9Plus(getSTI());
1474  }
1475
1476  bool isGFX10() const {
1477    return AMDGPU::isGFX10(getSTI());
1478  }
1479
1480  bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1481
1482  bool isGFX11() const {
1483    return AMDGPU::isGFX11(getSTI());
1484  }
1485
1486  bool isGFX11Plus() const {
1487    return AMDGPU::isGFX11Plus(getSTI());
1488  }
1489
1490  bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1491
1492  bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1493
1494  bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1495
1496  bool isGFX10_BEncoding() const {
1497    return AMDGPU::isGFX10_BEncoding(getSTI());
1498  }
1499
1500  bool hasInv2PiInlineImm() const {
1501    return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1502  }
1503
1504  bool hasFlatOffsets() const {
1505    return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1506  }
1507
1508  bool hasArchitectedFlatScratch() const {
1509    return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1510  }
1511
1512  bool hasSGPR102_SGPR103() const {
1513    return !isVI() && !isGFX9();
1514  }
1515
1516  bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1517
1518  bool hasIntClamp() const {
1519    return getFeatureBits()[AMDGPU::FeatureIntClamp];
1520  }
1521
1522  bool hasPartialNSAEncoding() const {
1523    return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1524  }
1525
1526  unsigned getNSAMaxSize(bool HasSampler = false) const {
1527    return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1528  }
1529
1530  unsigned getMaxNumUserSGPRs() const {
1531    return AMDGPU::getMaxNumUserSGPRs(getSTI());
1532  }
1533
1534  bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1535
1536  AMDGPUTargetStreamer &getTargetStreamer() {
1537    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1538    return static_cast<AMDGPUTargetStreamer &>(TS);
1539  }
1540
1541  const MCRegisterInfo *getMRI() const {
1542    // We need this const_cast because for some reason getContext() is not const
1543    // in MCAsmParser.
1544    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1545  }
1546
1547  const MCInstrInfo *getMII() const {
1548    return &MII;
1549  }
1550
1551  const FeatureBitset &getFeatureBits() const {
1552    return getSTI().getFeatureBits();
1553  }
1554
1555  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1556  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1557  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1558
1559  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1560  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1561  bool isForcedDPP() const { return ForcedDPP; }
1562  bool isForcedSDWA() const { return ForcedSDWA; }
1563  ArrayRef<unsigned> getMatchedVariants() const;
1564  StringRef getMatchedVariantName() const;
1565
1566  std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1567  bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1568                     bool RestoreOnFailure);
1569  bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1570  ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1571                               SMLoc &EndLoc) override;
1572  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1573  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1574                                      unsigned Kind) override;
1575  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1576                               OperandVector &Operands, MCStreamer &Out,
1577                               uint64_t &ErrorInfo,
1578                               bool MatchingInlineAsm) override;
1579  bool ParseDirective(AsmToken DirectiveID) override;
1580  ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1581                           OperandMode Mode = OperandMode_Default);
1582  StringRef parseMnemonicSuffix(StringRef Name);
1583  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1584                        SMLoc NameLoc, OperandVector &Operands) override;
1585  //bool ProcessInstruction(MCInst &Inst);
1586
1587  ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1588
1589  ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1590
1591  ParseStatus
1592  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1593                     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1594                     std::function<bool(int64_t &)> ConvertResult = nullptr);
1595
1596  ParseStatus parseOperandArrayWithPrefix(
1597      const char *Prefix, OperandVector &Operands,
1598      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1599      bool (*ConvertResult)(int64_t &) = nullptr);
1600
1601  ParseStatus
1602  parseNamedBit(StringRef Name, OperandVector &Operands,
1603                AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1604  unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1605  ParseStatus parseCPol(OperandVector &Operands);
1606  ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1607  ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1608  ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1609                                    SMLoc &StringLoc);
1610
1611  bool isModifier();
1612  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1613  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1614  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1615  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1616  bool parseSP3NegModifier();
1617  ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1618                       bool HasLit = false);
1619  ParseStatus parseReg(OperandVector &Operands);
1620  ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1621                            bool HasLit = false);
1622  ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1623                                           bool AllowImm = true);
1624  ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1625                                            bool AllowImm = true);
1626  ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1627  ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1628  ParseStatus parseVReg32OrOff(OperandVector &Operands);
1629  ParseStatus tryParseIndexKey(OperandVector &Operands,
1630                               AMDGPUOperand::ImmTy ImmTy);
1631  ParseStatus parseIndexKey8bit(OperandVector &Operands);
1632  ParseStatus parseIndexKey16bit(OperandVector &Operands);
1633
1634  ParseStatus parseDfmtNfmt(int64_t &Format);
1635  ParseStatus parseUfmt(int64_t &Format);
1636  ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1637                                       int64_t &Format);
1638  ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1639                                         int64_t &Format);
1640  ParseStatus parseFORMAT(OperandVector &Operands);
1641  ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1642  ParseStatus parseNumericFormat(int64_t &Format);
1643  ParseStatus parseFlatOffset(OperandVector &Operands);
1644  ParseStatus parseR128A16(OperandVector &Operands);
1645  ParseStatus parseBLGP(OperandVector &Operands);
1646  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1647  bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1648
1649  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1650
1651  bool parseCnt(int64_t &IntVal);
1652  ParseStatus parseSWaitCnt(OperandVector &Operands);
1653
1654  bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1655  void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1656  ParseStatus parseDepCtr(OperandVector &Operands);
1657
1658  bool parseDelay(int64_t &Delay);
1659  ParseStatus parseSDelayALU(OperandVector &Operands);
1660
1661  ParseStatus parseHwreg(OperandVector &Operands);
1662
1663private:
1664  struct OperandInfoTy {
1665    SMLoc Loc;
1666    int64_t Id;
1667    bool IsSymbolic = false;
1668    bool IsDefined = false;
1669
1670    OperandInfoTy(int64_t Id_) : Id(Id_) {}
1671  };
1672
1673  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1674  bool validateSendMsg(const OperandInfoTy &Msg,
1675                       const OperandInfoTy &Op,
1676                       const OperandInfoTy &Stream);
1677
1678  bool parseHwregBody(OperandInfoTy &HwReg,
1679                      OperandInfoTy &Offset,
1680                      OperandInfoTy &Width);
1681  bool validateHwreg(const OperandInfoTy &HwReg,
1682                     const OperandInfoTy &Offset,
1683                     const OperandInfoTy &Width);
1684
1685  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1686  SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1687  SMLoc getBLGPLoc(const OperandVector &Operands) const;
1688
1689  SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1690                      const OperandVector &Operands) const;
1691  SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1692  SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1693  SMLoc getLitLoc(const OperandVector &Operands,
1694                  bool SearchMandatoryLiterals = false) const;
1695  SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1696  SMLoc getConstLoc(const OperandVector &Operands) const;
1697  SMLoc getInstLoc(const OperandVector &Operands) const;
1698
1699  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1700  bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1701  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1702  bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1703  bool validateSOPLiteral(const MCInst &Inst) const;
1704  bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1705  bool validateVOPDRegBankConstraints(const MCInst &Inst,
1706                                      const OperandVector &Operands);
1707  bool validateIntClampSupported(const MCInst &Inst);
1708  bool validateMIMGAtomicDMask(const MCInst &Inst);
1709  bool validateMIMGGatherDMask(const MCInst &Inst);
1710  bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1711  bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1712  bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1713  bool validateMIMGD16(const MCInst &Inst);
1714  bool validateMIMGMSAA(const MCInst &Inst);
1715  bool validateOpSel(const MCInst &Inst);
1716  bool validateNeg(const MCInst &Inst, int OpName);
1717  bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1718  bool validateVccOperand(unsigned Reg) const;
1719  bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1720  bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1721  bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1722  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1723  bool validateAGPRLdSt(const MCInst &Inst) const;
1724  bool validateVGPRAlign(const MCInst &Inst) const;
1725  bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1726  bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1727  bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1728  bool validateDivScale(const MCInst &Inst);
1729  bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1730  bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1731                             const SMLoc &IDLoc);
1732  bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1733                              const unsigned CPol);
1734  bool validateExeczVcczOperands(const OperandVector &Operands);
1735  bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1736  std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1737  unsigned getConstantBusLimit(unsigned Opcode) const;
1738  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1739  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1740  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1741
1742  bool isSupportedMnemo(StringRef Mnemo,
1743                        const FeatureBitset &FBS);
1744  bool isSupportedMnemo(StringRef Mnemo,
1745                        const FeatureBitset &FBS,
1746                        ArrayRef<unsigned> Variants);
1747  bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1748
1749  bool isId(const StringRef Id) const;
1750  bool isId(const AsmToken &Token, const StringRef Id) const;
1751  bool isToken(const AsmToken::TokenKind Kind) const;
1752  StringRef getId() const;
1753  bool trySkipId(const StringRef Id);
1754  bool trySkipId(const StringRef Pref, const StringRef Id);
1755  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1756  bool trySkipToken(const AsmToken::TokenKind Kind);
1757  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1758  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1759  bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1760
1761  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1762  AsmToken::TokenKind getTokenKind() const;
1763  bool parseExpr(int64_t &Imm, StringRef Expected = "");
1764  bool parseExpr(OperandVector &Operands);
1765  StringRef getTokenStr() const;
1766  AsmToken peekToken(bool ShouldSkipSpace = true);
1767  AsmToken getToken() const;
1768  SMLoc getLoc() const;
1769  void lex();
1770
1771public:
1772  void onBeginOfFile() override;
1773
1774  ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1775
1776  ParseStatus parseExpTgt(OperandVector &Operands);
1777  ParseStatus parseSendMsg(OperandVector &Operands);
1778  ParseStatus parseInterpSlot(OperandVector &Operands);
1779  ParseStatus parseInterpAttr(OperandVector &Operands);
1780  ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1781  ParseStatus parseBoolReg(OperandVector &Operands);
1782
1783  bool parseSwizzleOperand(int64_t &Op,
1784                           const unsigned MinVal,
1785                           const unsigned MaxVal,
1786                           const StringRef ErrMsg,
1787                           SMLoc &Loc);
1788  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1789                            const unsigned MinVal,
1790                            const unsigned MaxVal,
1791                            const StringRef ErrMsg);
1792  ParseStatus parseSwizzle(OperandVector &Operands);
1793  bool parseSwizzleOffset(int64_t &Imm);
1794  bool parseSwizzleMacro(int64_t &Imm);
1795  bool parseSwizzleQuadPerm(int64_t &Imm);
1796  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1797  bool parseSwizzleBroadcast(int64_t &Imm);
1798  bool parseSwizzleSwap(int64_t &Imm);
1799  bool parseSwizzleReverse(int64_t &Imm);
1800
1801  ParseStatus parseGPRIdxMode(OperandVector &Operands);
1802  int64_t parseGPRIdxMacro();
1803
1804  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1805  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1806
1807  ParseStatus parseOModSI(OperandVector &Operands);
1808
1809  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1810               OptionalImmIndexMap &OptionalIdx);
1811  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1812  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1813  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1814  void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1815
1816  void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1817  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1818                    OptionalImmIndexMap &OptionalIdx);
1819  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1820                OptionalImmIndexMap &OptionalIdx);
1821
1822  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1823  void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1824
1825  bool parseDimId(unsigned &Encoding);
1826  ParseStatus parseDim(OperandVector &Operands);
1827  bool convertDppBoundCtrl(int64_t &BoundCtrl);
1828  ParseStatus parseDPP8(OperandVector &Operands);
1829  ParseStatus parseDPPCtrl(OperandVector &Operands);
1830  bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1831  int64_t parseDPPCtrlSel(StringRef Ctrl);
1832  int64_t parseDPPCtrlPerm();
1833  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1834  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1835    cvtDPP(Inst, Operands, true);
1836  }
1837  void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1838                  bool IsDPP8 = false);
1839  void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1840    cvtVOP3DPP(Inst, Operands, true);
1841  }
1842
1843  ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1844                           AMDGPUOperand::ImmTy Type);
1845  ParseStatus parseSDWADstUnused(OperandVector &Operands);
1846  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1847  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1848  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1849  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1850  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1851  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1852               uint64_t BasicInstType,
1853               bool SkipDstVcc = false,
1854               bool SkipSrcVcc = false);
1855
1856  ParseStatus parseEndpgm(OperandVector &Operands);
1857
1858  ParseStatus parseVOPD(OperandVector &Operands);
1859};
1860
1861} // end anonymous namespace
1862
1863// May be called with integer type with equivalent bitwidth.
1864static const fltSemantics *getFltSemantics(unsigned Size) {
1865  switch (Size) {
1866  case 4:
1867    return &APFloat::IEEEsingle();
1868  case 8:
1869    return &APFloat::IEEEdouble();
1870  case 2:
1871    return &APFloat::IEEEhalf();
1872  default:
1873    llvm_unreachable("unsupported fp type");
1874  }
1875}
1876
1877static const fltSemantics *getFltSemantics(MVT VT) {
1878  return getFltSemantics(VT.getSizeInBits() / 8);
1879}
1880
1881static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1882  switch (OperandType) {
1883  case AMDGPU::OPERAND_REG_IMM_INT32:
1884  case AMDGPU::OPERAND_REG_IMM_FP32:
1885  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1886  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1887  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1888  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1889  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1890  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1891  case AMDGPU::OPERAND_REG_IMM_V2FP32:
1892  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1893  case AMDGPU::OPERAND_REG_IMM_V2INT32:
1894  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1895  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1896  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1897  case AMDGPU::OPERAND_KIMM32:
1898  case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1899    return &APFloat::IEEEsingle();
1900  case AMDGPU::OPERAND_REG_IMM_INT64:
1901  case AMDGPU::OPERAND_REG_IMM_FP64:
1902  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1903  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1904  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1905    return &APFloat::IEEEdouble();
1906  case AMDGPU::OPERAND_REG_IMM_INT16:
1907  case AMDGPU::OPERAND_REG_IMM_FP16:
1908  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1909  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1910  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1911  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1912  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1913  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1914  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1915  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1916  case AMDGPU::OPERAND_KIMM16:
1917    return &APFloat::IEEEhalf();
1918  default:
1919    llvm_unreachable("unsupported fp type");
1920  }
1921}
1922
1923//===----------------------------------------------------------------------===//
1924// Operand
1925//===----------------------------------------------------------------------===//
1926
1927static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1928  bool Lost;
1929
1930  // Convert literal to single precision
1931  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1932                                               APFloat::rmNearestTiesToEven,
1933                                               &Lost);
1934  // We allow precision lost but not overflow or underflow
1935  if (Status != APFloat::opOK &&
1936      Lost &&
1937      ((Status & APFloat::opOverflow)  != 0 ||
1938       (Status & APFloat::opUnderflow) != 0)) {
1939    return false;
1940  }
1941
1942  return true;
1943}
1944
1945static bool isSafeTruncation(int64_t Val, unsigned Size) {
1946  return isUIntN(Size, Val) || isIntN(Size, Val);
1947}
1948
1949static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1950  if (VT.getScalarType() == MVT::i16) {
1951    // FP immediate values are broken.
1952    return isInlinableIntLiteral(Val);
1953  }
1954
1955  // f16/v2f16 operands work correctly for all values.
1956  return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1957}
1958
1959bool AMDGPUOperand::isInlinableImm(MVT type) const {
1960
1961  // This is a hack to enable named inline values like
1962  // shared_base with both 32-bit and 64-bit operands.
1963  // Note that these values are defined as
1964  // 32-bit operands only.
1965  if (isInlineValue()) {
1966    return true;
1967  }
1968
1969  if (!isImmTy(ImmTyNone)) {
1970    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1971    return false;
1972  }
1973  // TODO: We should avoid using host float here. It would be better to
1974  // check the float bit values which is what a few other places do.
1975  // We've had bot failures before due to weird NaN support on mips hosts.
1976
1977  APInt Literal(64, Imm.Val);
1978
1979  if (Imm.IsFPImm) { // We got fp literal token
1980    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1981      return AMDGPU::isInlinableLiteral64(Imm.Val,
1982                                          AsmParser->hasInv2PiInlineImm());
1983    }
1984
1985    APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1986    if (!canLosslesslyConvertToFPType(FPLiteral, type))
1987      return false;
1988
1989    if (type.getScalarSizeInBits() == 16) {
1990      return isInlineableLiteralOp16(
1991        static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1992        type, AsmParser->hasInv2PiInlineImm());
1993    }
1994
1995    // Check if single precision literal is inlinable
1996    return AMDGPU::isInlinableLiteral32(
1997      static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1998      AsmParser->hasInv2PiInlineImm());
1999  }
2000
2001  // We got int literal token.
2002  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2003    return AMDGPU::isInlinableLiteral64(Imm.Val,
2004                                        AsmParser->hasInv2PiInlineImm());
2005  }
2006
2007  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2008    return false;
2009  }
2010
2011  if (type.getScalarSizeInBits() == 16) {
2012    return isInlineableLiteralOp16(
2013      static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2014      type, AsmParser->hasInv2PiInlineImm());
2015  }
2016
2017  return AMDGPU::isInlinableLiteral32(
2018    static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2019    AsmParser->hasInv2PiInlineImm());
2020}
2021
2022bool AMDGPUOperand::isLiteralImm(MVT type) const {
2023  // Check that this immediate can be added as literal
2024  if (!isImmTy(ImmTyNone)) {
2025    return false;
2026  }
2027
2028  if (!Imm.IsFPImm) {
2029    // We got int literal token.
2030
2031    if (type == MVT::f64 && hasFPModifiers()) {
2032      // Cannot apply fp modifiers to int literals preserving the same semantics
2033      // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2034      // disable these cases.
2035      return false;
2036    }
2037
2038    unsigned Size = type.getSizeInBits();
2039    if (Size == 64)
2040      Size = 32;
2041
2042    // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2043    // types.
2044    return isSafeTruncation(Imm.Val, Size);
2045  }
2046
2047  // We got fp literal token
2048  if (type == MVT::f64) { // Expected 64-bit fp operand
2049    // We would set low 64-bits of literal to zeroes but we accept this literals
2050    return true;
2051  }
2052
2053  if (type == MVT::i64) { // Expected 64-bit int operand
2054    // We don't allow fp literals in 64-bit integer instructions. It is
2055    // unclear how we should encode them.
2056    return false;
2057  }
2058
2059  // We allow fp literals with f16x2 operands assuming that the specified
2060  // literal goes into the lower half and the upper half is zero. We also
2061  // require that the literal may be losslessly converted to f16.
2062  //
2063  // For i16x2 operands, we assume that the specified literal is encoded as a
2064  // single-precision float. This is pretty odd, but it matches SP3 and what
2065  // happens in hardware.
2066  MVT ExpectedType = (type == MVT::v2f16)   ? MVT::f16
2067                     : (type == MVT::v2i16) ? MVT::f32
2068                     : (type == MVT::v2f32) ? MVT::f32
2069                                            : type;
2070
2071  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2072  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2073}
2074
2075bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2076  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2077}
2078
2079bool AMDGPUOperand::isVRegWithInputMods() const {
2080  return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2081         // GFX90A allows DPP on 64-bit operands.
2082         (isRegClass(AMDGPU::VReg_64RegClassID) &&
2083          AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2084}
2085
2086template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2087  return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2088                             : AMDGPU::VGPR_16_Lo128RegClassID);
2089}
2090
2091bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2092  if (AsmParser->isVI())
2093    return isVReg32();
2094  else if (AsmParser->isGFX9Plus())
2095    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2096  else
2097    return false;
2098}
2099
2100bool AMDGPUOperand::isSDWAFP16Operand() const {
2101  return isSDWAOperand(MVT::f16);
2102}
2103
2104bool AMDGPUOperand::isSDWAFP32Operand() const {
2105  return isSDWAOperand(MVT::f32);
2106}
2107
2108bool AMDGPUOperand::isSDWAInt16Operand() const {
2109  return isSDWAOperand(MVT::i16);
2110}
2111
2112bool AMDGPUOperand::isSDWAInt32Operand() const {
2113  return isSDWAOperand(MVT::i32);
2114}
2115
2116bool AMDGPUOperand::isBoolReg() const {
2117  auto FB = AsmParser->getFeatureBits();
2118  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2119                     (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2120}
2121
2122uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2123{
2124  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2125  assert(Size == 2 || Size == 4 || Size == 8);
2126
2127  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2128
2129  if (Imm.Mods.Abs) {
2130    Val &= ~FpSignMask;
2131  }
2132  if (Imm.Mods.Neg) {
2133    Val ^= FpSignMask;
2134  }
2135
2136  return Val;
2137}
2138
2139void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2140  if (isExpr()) {
2141    Inst.addOperand(MCOperand::createExpr(Expr));
2142    return;
2143  }
2144
2145  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2146                             Inst.getNumOperands())) {
2147    addLiteralImmOperand(Inst, Imm.Val,
2148                         ApplyModifiers &
2149                         isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2150  } else {
2151    assert(!isImmTy(ImmTyNone) || !hasModifiers());
2152    Inst.addOperand(MCOperand::createImm(Imm.Val));
2153    setImmKindNone();
2154  }
2155}
2156
2157void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2158  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2159  auto OpNum = Inst.getNumOperands();
2160  // Check that this operand accepts literals
2161  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2162
2163  if (ApplyModifiers) {
2164    assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2165    const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2166    Val = applyInputFPModifiers(Val, Size);
2167  }
2168
2169  APInt Literal(64, Val);
2170  uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2171
2172  if (Imm.IsFPImm) { // We got fp literal token
2173    switch (OpTy) {
2174    case AMDGPU::OPERAND_REG_IMM_INT64:
2175    case AMDGPU::OPERAND_REG_IMM_FP64:
2176    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2177    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2178    case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2179      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2180                                       AsmParser->hasInv2PiInlineImm())) {
2181        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2182        setImmKindConst();
2183        return;
2184      }
2185
2186      // Non-inlineable
2187      if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2188        // For fp operands we check if low 32 bits are zeros
2189        if (Literal.getLoBits(32) != 0) {
2190          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2191          "Can't encode literal as exact 64-bit floating-point operand. "
2192          "Low 32-bits will be set to zero");
2193          Val &= 0xffffffff00000000u;
2194        }
2195
2196        Inst.addOperand(MCOperand::createImm(Val));
2197        setImmKindLiteral();
2198        return;
2199      }
2200
2201      // We don't allow fp literals in 64-bit integer instructions. It is
2202      // unclear how we should encode them. This case should be checked earlier
2203      // in predicate methods (isLiteralImm())
2204      llvm_unreachable("fp literal in 64-bit integer instruction.");
2205
2206    case AMDGPU::OPERAND_REG_IMM_INT32:
2207    case AMDGPU::OPERAND_REG_IMM_FP32:
2208    case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2209    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2210    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2211    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2212    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2213    case AMDGPU::OPERAND_REG_IMM_INT16:
2214    case AMDGPU::OPERAND_REG_IMM_FP16:
2215    case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2216    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2217    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2218    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2219    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2220    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2221    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2222    case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2223    case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2224    case AMDGPU::OPERAND_REG_IMM_V2INT16:
2225    case AMDGPU::OPERAND_REG_IMM_V2FP16:
2226    case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2227    case AMDGPU::OPERAND_REG_IMM_V2FP32:
2228    case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2229    case AMDGPU::OPERAND_REG_IMM_V2INT32:
2230    case AMDGPU::OPERAND_KIMM32:
2231    case AMDGPU::OPERAND_KIMM16:
2232    case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2233      bool lost;
2234      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2235      // Convert literal to single precision
2236      FPLiteral.convert(*getOpFltSemantics(OpTy),
2237                        APFloat::rmNearestTiesToEven, &lost);
2238      // We allow precision lost but not overflow or underflow. This should be
2239      // checked earlier in isLiteralImm()
2240
2241      uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2242      Inst.addOperand(MCOperand::createImm(ImmVal));
2243      if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2244        setImmKindMandatoryLiteral();
2245      } else {
2246        setImmKindLiteral();
2247      }
2248      return;
2249    }
2250    default:
2251      llvm_unreachable("invalid operand size");
2252    }
2253
2254    return;
2255  }
2256
2257  // We got int literal token.
2258  // Only sign extend inline immediates.
2259  switch (OpTy) {
2260  case AMDGPU::OPERAND_REG_IMM_INT32:
2261  case AMDGPU::OPERAND_REG_IMM_FP32:
2262  case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2263  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2264  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2265  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2266  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2267  case AMDGPU::OPERAND_REG_IMM_V2INT16:
2268  case AMDGPU::OPERAND_REG_IMM_V2FP16:
2269  case AMDGPU::OPERAND_REG_IMM_V2FP32:
2270  case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2271  case AMDGPU::OPERAND_REG_IMM_V2INT32:
2272  case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2273  case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2274    if (isSafeTruncation(Val, 32) &&
2275        AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2276                                     AsmParser->hasInv2PiInlineImm())) {
2277      Inst.addOperand(MCOperand::createImm(Val));
2278      setImmKindConst();
2279      return;
2280    }
2281
2282    Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2283    setImmKindLiteral();
2284    return;
2285
2286  case AMDGPU::OPERAND_REG_IMM_INT64:
2287  case AMDGPU::OPERAND_REG_IMM_FP64:
2288  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2289  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2290  case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2291    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2292      Inst.addOperand(MCOperand::createImm(Val));
2293      setImmKindConst();
2294      return;
2295    }
2296
2297    Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2298                                                    : Lo_32(Val);
2299
2300    Inst.addOperand(MCOperand::createImm(Val));
2301    setImmKindLiteral();
2302    return;
2303
2304  case AMDGPU::OPERAND_REG_IMM_INT16:
2305  case AMDGPU::OPERAND_REG_IMM_FP16:
2306  case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2307  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2308  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2309  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2310  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2311    if (isSafeTruncation(Val, 16) &&
2312        AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2313                                     AsmParser->hasInv2PiInlineImm())) {
2314      Inst.addOperand(MCOperand::createImm(Val));
2315      setImmKindConst();
2316      return;
2317    }
2318
2319    Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2320    setImmKindLiteral();
2321    return;
2322
2323  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2324  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2325  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2326  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2327    assert(isSafeTruncation(Val, 16));
2328    assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2329                                        AsmParser->hasInv2PiInlineImm()));
2330
2331    Inst.addOperand(MCOperand::createImm(Val));
2332    return;
2333  }
2334  case AMDGPU::OPERAND_KIMM32:
2335    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2336    setImmKindMandatoryLiteral();
2337    return;
2338  case AMDGPU::OPERAND_KIMM16:
2339    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2340    setImmKindMandatoryLiteral();
2341    return;
2342  default:
2343    llvm_unreachable("invalid operand size");
2344  }
2345}
2346
2347void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2348  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2349}
2350
2351bool AMDGPUOperand::isInlineValue() const {
2352  return isRegKind() && ::isInlineValue(getReg());
2353}
2354
2355//===----------------------------------------------------------------------===//
2356// AsmParser
2357//===----------------------------------------------------------------------===//
2358
2359static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2360  if (Is == IS_VGPR) {
2361    switch (RegWidth) {
2362      default: return -1;
2363      case 32:
2364        return AMDGPU::VGPR_32RegClassID;
2365      case 64:
2366        return AMDGPU::VReg_64RegClassID;
2367      case 96:
2368        return AMDGPU::VReg_96RegClassID;
2369      case 128:
2370        return AMDGPU::VReg_128RegClassID;
2371      case 160:
2372        return AMDGPU::VReg_160RegClassID;
2373      case 192:
2374        return AMDGPU::VReg_192RegClassID;
2375      case 224:
2376        return AMDGPU::VReg_224RegClassID;
2377      case 256:
2378        return AMDGPU::VReg_256RegClassID;
2379      case 288:
2380        return AMDGPU::VReg_288RegClassID;
2381      case 320:
2382        return AMDGPU::VReg_320RegClassID;
2383      case 352:
2384        return AMDGPU::VReg_352RegClassID;
2385      case 384:
2386        return AMDGPU::VReg_384RegClassID;
2387      case 512:
2388        return AMDGPU::VReg_512RegClassID;
2389      case 1024:
2390        return AMDGPU::VReg_1024RegClassID;
2391    }
2392  } else if (Is == IS_TTMP) {
2393    switch (RegWidth) {
2394      default: return -1;
2395      case 32:
2396        return AMDGPU::TTMP_32RegClassID;
2397      case 64:
2398        return AMDGPU::TTMP_64RegClassID;
2399      case 128:
2400        return AMDGPU::TTMP_128RegClassID;
2401      case 256:
2402        return AMDGPU::TTMP_256RegClassID;
2403      case 512:
2404        return AMDGPU::TTMP_512RegClassID;
2405    }
2406  } else if (Is == IS_SGPR) {
2407    switch (RegWidth) {
2408      default: return -1;
2409      case 32:
2410        return AMDGPU::SGPR_32RegClassID;
2411      case 64:
2412        return AMDGPU::SGPR_64RegClassID;
2413      case 96:
2414        return AMDGPU::SGPR_96RegClassID;
2415      case 128:
2416        return AMDGPU::SGPR_128RegClassID;
2417      case 160:
2418        return AMDGPU::SGPR_160RegClassID;
2419      case 192:
2420        return AMDGPU::SGPR_192RegClassID;
2421      case 224:
2422        return AMDGPU::SGPR_224RegClassID;
2423      case 256:
2424        return AMDGPU::SGPR_256RegClassID;
2425      case 288:
2426        return AMDGPU::SGPR_288RegClassID;
2427      case 320:
2428        return AMDGPU::SGPR_320RegClassID;
2429      case 352:
2430        return AMDGPU::SGPR_352RegClassID;
2431      case 384:
2432        return AMDGPU::SGPR_384RegClassID;
2433      case 512:
2434        return AMDGPU::SGPR_512RegClassID;
2435    }
2436  } else if (Is == IS_AGPR) {
2437    switch (RegWidth) {
2438      default: return -1;
2439      case 32:
2440        return AMDGPU::AGPR_32RegClassID;
2441      case 64:
2442        return AMDGPU::AReg_64RegClassID;
2443      case 96:
2444        return AMDGPU::AReg_96RegClassID;
2445      case 128:
2446        return AMDGPU::AReg_128RegClassID;
2447      case 160:
2448        return AMDGPU::AReg_160RegClassID;
2449      case 192:
2450        return AMDGPU::AReg_192RegClassID;
2451      case 224:
2452        return AMDGPU::AReg_224RegClassID;
2453      case 256:
2454        return AMDGPU::AReg_256RegClassID;
2455      case 288:
2456        return AMDGPU::AReg_288RegClassID;
2457      case 320:
2458        return AMDGPU::AReg_320RegClassID;
2459      case 352:
2460        return AMDGPU::AReg_352RegClassID;
2461      case 384:
2462        return AMDGPU::AReg_384RegClassID;
2463      case 512:
2464        return AMDGPU::AReg_512RegClassID;
2465      case 1024:
2466        return AMDGPU::AReg_1024RegClassID;
2467    }
2468  }
2469  return -1;
2470}
2471
2472static unsigned getSpecialRegForName(StringRef RegName) {
2473  return StringSwitch<unsigned>(RegName)
2474    .Case("exec", AMDGPU::EXEC)
2475    .Case("vcc", AMDGPU::VCC)
2476    .Case("flat_scratch", AMDGPU::FLAT_SCR)
2477    .Case("xnack_mask", AMDGPU::XNACK_MASK)
2478    .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2479    .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2480    .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2481    .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2482    .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2483    .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2484    .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2485    .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2486    .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2487    .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2488    .Case("lds_direct", AMDGPU::LDS_DIRECT)
2489    .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2490    .Case("m0", AMDGPU::M0)
2491    .Case("vccz", AMDGPU::SRC_VCCZ)
2492    .Case("src_vccz", AMDGPU::SRC_VCCZ)
2493    .Case("execz", AMDGPU::SRC_EXECZ)
2494    .Case("src_execz", AMDGPU::SRC_EXECZ)
2495    .Case("scc", AMDGPU::SRC_SCC)
2496    .Case("src_scc", AMDGPU::SRC_SCC)
2497    .Case("tba", AMDGPU::TBA)
2498    .Case("tma", AMDGPU::TMA)
2499    .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2500    .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2501    .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2502    .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2503    .Case("vcc_lo", AMDGPU::VCC_LO)
2504    .Case("vcc_hi", AMDGPU::VCC_HI)
2505    .Case("exec_lo", AMDGPU::EXEC_LO)
2506    .Case("exec_hi", AMDGPU::EXEC_HI)
2507    .Case("tma_lo", AMDGPU::TMA_LO)
2508    .Case("tma_hi", AMDGPU::TMA_HI)
2509    .Case("tba_lo", AMDGPU::TBA_LO)
2510    .Case("tba_hi", AMDGPU::TBA_HI)
2511    .Case("pc", AMDGPU::PC_REG)
2512    .Case("null", AMDGPU::SGPR_NULL)
2513    .Default(AMDGPU::NoRegister);
2514}
2515
2516bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2517                                    SMLoc &EndLoc, bool RestoreOnFailure) {
2518  auto R = parseRegister();
2519  if (!R) return true;
2520  assert(R->isReg());
2521  RegNo = R->getReg();
2522  StartLoc = R->getStartLoc();
2523  EndLoc = R->getEndLoc();
2524  return false;
2525}
2526
2527bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2528                                    SMLoc &EndLoc) {
2529  return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2530}
2531
2532ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2533                                              SMLoc &EndLoc) {
2534  bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2535  bool PendingErrors = getParser().hasPendingError();
2536  getParser().clearPendingErrors();
2537  if (PendingErrors)
2538    return ParseStatus::Failure;
2539  if (Result)
2540    return ParseStatus::NoMatch;
2541  return ParseStatus::Success;
2542}
2543
2544bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2545                                            RegisterKind RegKind, unsigned Reg1,
2546                                            SMLoc Loc) {
2547  switch (RegKind) {
2548  case IS_SPECIAL:
2549    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2550      Reg = AMDGPU::EXEC;
2551      RegWidth = 64;
2552      return true;
2553    }
2554    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2555      Reg = AMDGPU::FLAT_SCR;
2556      RegWidth = 64;
2557      return true;
2558    }
2559    if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2560      Reg = AMDGPU::XNACK_MASK;
2561      RegWidth = 64;
2562      return true;
2563    }
2564    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2565      Reg = AMDGPU::VCC;
2566      RegWidth = 64;
2567      return true;
2568    }
2569    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2570      Reg = AMDGPU::TBA;
2571      RegWidth = 64;
2572      return true;
2573    }
2574    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2575      Reg = AMDGPU::TMA;
2576      RegWidth = 64;
2577      return true;
2578    }
2579    Error(Loc, "register does not fit in the list");
2580    return false;
2581  case IS_VGPR:
2582  case IS_SGPR:
2583  case IS_AGPR:
2584  case IS_TTMP:
2585    if (Reg1 != Reg + RegWidth / 32) {
2586      Error(Loc, "registers in a list must have consecutive indices");
2587      return false;
2588    }
2589    RegWidth += 32;
2590    return true;
2591  default:
2592    llvm_unreachable("unexpected register kind");
2593  }
2594}
2595
2596struct RegInfo {
2597  StringLiteral Name;
2598  RegisterKind Kind;
2599};
2600
2601static constexpr RegInfo RegularRegisters[] = {
2602  {{"v"},    IS_VGPR},
2603  {{"s"},    IS_SGPR},
2604  {{"ttmp"}, IS_TTMP},
2605  {{"acc"},  IS_AGPR},
2606  {{"a"},    IS_AGPR},
2607};
2608
2609static bool isRegularReg(RegisterKind Kind) {
2610  return Kind == IS_VGPR ||
2611         Kind == IS_SGPR ||
2612         Kind == IS_TTMP ||
2613         Kind == IS_AGPR;
2614}
2615
2616static const RegInfo* getRegularRegInfo(StringRef Str) {
2617  for (const RegInfo &Reg : RegularRegisters)
2618    if (Str.starts_with(Reg.Name))
2619      return &Reg;
2620  return nullptr;
2621}
2622
2623static bool getRegNum(StringRef Str, unsigned& Num) {
2624  return !Str.getAsInteger(10, Num);
2625}
2626
2627bool
2628AMDGPUAsmParser::isRegister(const AsmToken &Token,
2629                            const AsmToken &NextToken) const {
2630
2631  // A list of consecutive registers: [s0,s1,s2,s3]
2632  if (Token.is(AsmToken::LBrac))
2633    return true;
2634
2635  if (!Token.is(AsmToken::Identifier))
2636    return false;
2637
2638  // A single register like s0 or a range of registers like s[0:1]
2639
2640  StringRef Str = Token.getString();
2641  const RegInfo *Reg = getRegularRegInfo(Str);
2642  if (Reg) {
2643    StringRef RegName = Reg->Name;
2644    StringRef RegSuffix = Str.substr(RegName.size());
2645    if (!RegSuffix.empty()) {
2646      RegSuffix.consume_back(".l");
2647      RegSuffix.consume_back(".h");
2648      unsigned Num;
2649      // A single register with an index: rXX
2650      if (getRegNum(RegSuffix, Num))
2651        return true;
2652    } else {
2653      // A range of registers: r[XX:YY].
2654      if (NextToken.is(AsmToken::LBrac))
2655        return true;
2656    }
2657  }
2658
2659  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2660}
2661
2662bool
2663AMDGPUAsmParser::isRegister()
2664{
2665  return isRegister(getToken(), peekToken());
2666}
2667
2668unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2669                                        unsigned SubReg, unsigned RegWidth,
2670                                        SMLoc Loc) {
2671  assert(isRegularReg(RegKind));
2672
2673  unsigned AlignSize = 1;
2674  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2675    // SGPR and TTMP registers must be aligned.
2676    // Max required alignment is 4 dwords.
2677    AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2678  }
2679
2680  if (RegNum % AlignSize != 0) {
2681    Error(Loc, "invalid register alignment");
2682    return AMDGPU::NoRegister;
2683  }
2684
2685  unsigned RegIdx = RegNum / AlignSize;
2686  int RCID = getRegClass(RegKind, RegWidth);
2687  if (RCID == -1) {
2688    Error(Loc, "invalid or unsupported register size");
2689    return AMDGPU::NoRegister;
2690  }
2691
2692  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2693  const MCRegisterClass RC = TRI->getRegClass(RCID);
2694  if (RegIdx >= RC.getNumRegs()) {
2695    Error(Loc, "register index is out of range");
2696    return AMDGPU::NoRegister;
2697  }
2698
2699  unsigned Reg = RC.getRegister(RegIdx);
2700
2701  if (SubReg) {
2702    Reg = TRI->getSubReg(Reg, SubReg);
2703
2704    // Currently all regular registers have their .l and .h subregisters, so
2705    // we should never need to generate an error here.
2706    assert(Reg && "Invalid subregister!");
2707  }
2708
2709  return Reg;
2710}
2711
2712bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2713  int64_t RegLo, RegHi;
2714  if (!skipToken(AsmToken::LBrac, "missing register index"))
2715    return false;
2716
2717  SMLoc FirstIdxLoc = getLoc();
2718  SMLoc SecondIdxLoc;
2719
2720  if (!parseExpr(RegLo))
2721    return false;
2722
2723  if (trySkipToken(AsmToken::Colon)) {
2724    SecondIdxLoc = getLoc();
2725    if (!parseExpr(RegHi))
2726      return false;
2727  } else {
2728    RegHi = RegLo;
2729  }
2730
2731  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2732    return false;
2733
2734  if (!isUInt<32>(RegLo)) {
2735    Error(FirstIdxLoc, "invalid register index");
2736    return false;
2737  }
2738
2739  if (!isUInt<32>(RegHi)) {
2740    Error(SecondIdxLoc, "invalid register index");
2741    return false;
2742  }
2743
2744  if (RegLo > RegHi) {
2745    Error(FirstIdxLoc, "first register index should not exceed second index");
2746    return false;
2747  }
2748
2749  Num = static_cast<unsigned>(RegLo);
2750  RegWidth = 32 * ((RegHi - RegLo) + 1);
2751  return true;
2752}
2753
2754unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2755                                          unsigned &RegNum, unsigned &RegWidth,
2756                                          SmallVectorImpl<AsmToken> &Tokens) {
2757  assert(isToken(AsmToken::Identifier));
2758  unsigned Reg = getSpecialRegForName(getTokenStr());
2759  if (Reg) {
2760    RegNum = 0;
2761    RegWidth = 32;
2762    RegKind = IS_SPECIAL;
2763    Tokens.push_back(getToken());
2764    lex(); // skip register name
2765  }
2766  return Reg;
2767}
2768
2769unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2770                                          unsigned &RegNum, unsigned &RegWidth,
2771                                          SmallVectorImpl<AsmToken> &Tokens) {
2772  assert(isToken(AsmToken::Identifier));
2773  StringRef RegName = getTokenStr();
2774  auto Loc = getLoc();
2775
2776  const RegInfo *RI = getRegularRegInfo(RegName);
2777  if (!RI) {
2778    Error(Loc, "invalid register name");
2779    return AMDGPU::NoRegister;
2780  }
2781
2782  Tokens.push_back(getToken());
2783  lex(); // skip register name
2784
2785  RegKind = RI->Kind;
2786  StringRef RegSuffix = RegName.substr(RI->Name.size());
2787  unsigned SubReg = NoSubRegister;
2788  if (!RegSuffix.empty()) {
2789    // We don't know the opcode till we are done parsing, so we don't know if
2790    // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2791    // .h to correctly specify 16 bit registers. We also can't determine class
2792    // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2793    if (RegSuffix.consume_back(".l"))
2794      SubReg = AMDGPU::lo16;
2795    else if (RegSuffix.consume_back(".h"))
2796      SubReg = AMDGPU::hi16;
2797
2798    // Single 32-bit register: vXX.
2799    if (!getRegNum(RegSuffix, RegNum)) {
2800      Error(Loc, "invalid register index");
2801      return AMDGPU::NoRegister;
2802    }
2803    RegWidth = 32;
2804  } else {
2805    // Range of registers: v[XX:YY]. ":YY" is optional.
2806    if (!ParseRegRange(RegNum, RegWidth))
2807      return AMDGPU::NoRegister;
2808  }
2809
2810  return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2811}
2812
2813unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2814                                       unsigned &RegWidth,
2815                                       SmallVectorImpl<AsmToken> &Tokens) {
2816  unsigned Reg = AMDGPU::NoRegister;
2817  auto ListLoc = getLoc();
2818
2819  if (!skipToken(AsmToken::LBrac,
2820                 "expected a register or a list of registers")) {
2821    return AMDGPU::NoRegister;
2822  }
2823
2824  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2825
2826  auto Loc = getLoc();
2827  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2828    return AMDGPU::NoRegister;
2829  if (RegWidth != 32) {
2830    Error(Loc, "expected a single 32-bit register");
2831    return AMDGPU::NoRegister;
2832  }
2833
2834  for (; trySkipToken(AsmToken::Comma); ) {
2835    RegisterKind NextRegKind;
2836    unsigned NextReg, NextRegNum, NextRegWidth;
2837    Loc = getLoc();
2838
2839    if (!ParseAMDGPURegister(NextRegKind, NextReg,
2840                             NextRegNum, NextRegWidth,
2841                             Tokens)) {
2842      return AMDGPU::NoRegister;
2843    }
2844    if (NextRegWidth != 32) {
2845      Error(Loc, "expected a single 32-bit register");
2846      return AMDGPU::NoRegister;
2847    }
2848    if (NextRegKind != RegKind) {
2849      Error(Loc, "registers in a list must be of the same kind");
2850      return AMDGPU::NoRegister;
2851    }
2852    if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2853      return AMDGPU::NoRegister;
2854  }
2855
2856  if (!skipToken(AsmToken::RBrac,
2857                 "expected a comma or a closing square bracket")) {
2858    return AMDGPU::NoRegister;
2859  }
2860
2861  if (isRegularReg(RegKind))
2862    Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
2863
2864  return Reg;
2865}
2866
2867bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2868                                          unsigned &RegNum, unsigned &RegWidth,
2869                                          SmallVectorImpl<AsmToken> &Tokens) {
2870  auto Loc = getLoc();
2871  Reg = AMDGPU::NoRegister;
2872
2873  if (isToken(AsmToken::Identifier)) {
2874    Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2875    if (Reg == AMDGPU::NoRegister)
2876      Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2877  } else {
2878    Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2879  }
2880
2881  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2882  if (Reg == AMDGPU::NoRegister) {
2883    assert(Parser.hasPendingError());
2884    return false;
2885  }
2886
2887  if (!subtargetHasRegister(*TRI, Reg)) {
2888    if (Reg == AMDGPU::SGPR_NULL) {
2889      Error(Loc, "'null' operand is not supported on this GPU");
2890    } else {
2891      Error(Loc, "register not available on this GPU");
2892    }
2893    return false;
2894  }
2895
2896  return true;
2897}
2898
2899bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2900                                          unsigned &RegNum, unsigned &RegWidth,
2901                                          bool RestoreOnFailure /*=false*/) {
2902  Reg = AMDGPU::NoRegister;
2903
2904  SmallVector<AsmToken, 1> Tokens;
2905  if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2906    if (RestoreOnFailure) {
2907      while (!Tokens.empty()) {
2908        getLexer().UnLex(Tokens.pop_back_val());
2909      }
2910    }
2911    return true;
2912  }
2913  return false;
2914}
2915
2916std::optional<StringRef>
2917AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2918  switch (RegKind) {
2919  case IS_VGPR:
2920    return StringRef(".amdgcn.next_free_vgpr");
2921  case IS_SGPR:
2922    return StringRef(".amdgcn.next_free_sgpr");
2923  default:
2924    return std::nullopt;
2925  }
2926}
2927
2928void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2929  auto SymbolName = getGprCountSymbolName(RegKind);
2930  assert(SymbolName && "initializing invalid register kind");
2931  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2932  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2933}
2934
2935bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2936                                            unsigned DwordRegIndex,
2937                                            unsigned RegWidth) {
2938  // Symbols are only defined for GCN targets
2939  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2940    return true;
2941
2942  auto SymbolName = getGprCountSymbolName(RegKind);
2943  if (!SymbolName)
2944    return true;
2945  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2946
2947  int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2948  int64_t OldCount;
2949
2950  if (!Sym->isVariable())
2951    return !Error(getLoc(),
2952                  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2953  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2954    return !Error(
2955        getLoc(),
2956        ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2957
2958  if (OldCount <= NewMax)
2959    Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2960
2961  return true;
2962}
2963
2964std::unique_ptr<AMDGPUOperand>
2965AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2966  const auto &Tok = getToken();
2967  SMLoc StartLoc = Tok.getLoc();
2968  SMLoc EndLoc = Tok.getEndLoc();
2969  RegisterKind RegKind;
2970  unsigned Reg, RegNum, RegWidth;
2971
2972  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2973    return nullptr;
2974  }
2975  if (isHsaAbi(getSTI())) {
2976    if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2977      return nullptr;
2978  } else
2979    KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2980  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2981}
2982
2983ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2984                                      bool HasSP3AbsModifier, bool HasLit) {
2985  // TODO: add syntactic sugar for 1/(2*PI)
2986
2987  if (isRegister())
2988    return ParseStatus::NoMatch;
2989  assert(!isModifier());
2990
2991  if (!HasLit) {
2992    HasLit = trySkipId("lit");
2993    if (HasLit) {
2994      if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2995        return ParseStatus::Failure;
2996      ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2997      if (S.isSuccess() &&
2998          !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999        return ParseStatus::Failure;
3000      return S;
3001    }
3002  }
3003
3004  const auto& Tok = getToken();
3005  const auto& NextTok = peekToken();
3006  bool IsReal = Tok.is(AsmToken::Real);
3007  SMLoc S = getLoc();
3008  bool Negate = false;
3009
3010  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3011    lex();
3012    IsReal = true;
3013    Negate = true;
3014  }
3015
3016  AMDGPUOperand::Modifiers Mods;
3017  Mods.Lit = HasLit;
3018
3019  if (IsReal) {
3020    // Floating-point expressions are not supported.
3021    // Can only allow floating-point literals with an
3022    // optional sign.
3023
3024    StringRef Num = getTokenStr();
3025    lex();
3026
3027    APFloat RealVal(APFloat::IEEEdouble());
3028    auto roundMode = APFloat::rmNearestTiesToEven;
3029    if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3030      return ParseStatus::Failure;
3031    if (Negate)
3032      RealVal.changeSign();
3033
3034    Operands.push_back(
3035      AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3036                               AMDGPUOperand::ImmTyNone, true));
3037    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3038    Op.setModifiers(Mods);
3039
3040    return ParseStatus::Success;
3041
3042  } else {
3043    int64_t IntVal;
3044    const MCExpr *Expr;
3045    SMLoc S = getLoc();
3046
3047    if (HasSP3AbsModifier) {
3048      // This is a workaround for handling expressions
3049      // as arguments of SP3 'abs' modifier, for example:
3050      //     |1.0|
3051      //     |-1|
3052      //     |1+x|
3053      // This syntax is not compatible with syntax of standard
3054      // MC expressions (due to the trailing '|').
3055      SMLoc EndLoc;
3056      if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3057        return ParseStatus::Failure;
3058    } else {
3059      if (Parser.parseExpression(Expr))
3060        return ParseStatus::Failure;
3061    }
3062
3063    if (Expr->evaluateAsAbsolute(IntVal)) {
3064      Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3065      AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3066      Op.setModifiers(Mods);
3067    } else {
3068      if (HasLit)
3069        return ParseStatus::NoMatch;
3070      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3071    }
3072
3073    return ParseStatus::Success;
3074  }
3075
3076  return ParseStatus::NoMatch;
3077}
3078
3079ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3080  if (!isRegister())
3081    return ParseStatus::NoMatch;
3082
3083  if (auto R = parseRegister()) {
3084    assert(R->isReg());
3085    Operands.push_back(std::move(R));
3086    return ParseStatus::Success;
3087  }
3088  return ParseStatus::Failure;
3089}
3090
3091ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3092                                           bool HasSP3AbsMod, bool HasLit) {
3093  ParseStatus Res = parseReg(Operands);
3094  if (!Res.isNoMatch())
3095    return Res;
3096  if (isModifier())
3097    return ParseStatus::NoMatch;
3098  return parseImm(Operands, HasSP3AbsMod, HasLit);
3099}
3100
3101bool
3102AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3103  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3104    const auto &str = Token.getString();
3105    return str == "abs" || str == "neg" || str == "sext";
3106  }
3107  return false;
3108}
3109
3110bool
3111AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3112  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3113}
3114
3115bool
3116AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3117  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3118}
3119
3120bool
3121AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3122  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3123}
3124
3125// Check if this is an operand modifier or an opcode modifier
3126// which may look like an expression but it is not. We should
3127// avoid parsing these modifiers as expressions. Currently
3128// recognized sequences are:
3129//   |...|
3130//   abs(...)
3131//   neg(...)
3132//   sext(...)
3133//   -reg
3134//   -|...|
3135//   -abs(...)
3136//   name:...
3137//
3138bool
3139AMDGPUAsmParser::isModifier() {
3140
3141  AsmToken Tok = getToken();
3142  AsmToken NextToken[2];
3143  peekTokens(NextToken);
3144
3145  return isOperandModifier(Tok, NextToken[0]) ||
3146         (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3147         isOpcodeModifierWithVal(Tok, NextToken[0]);
3148}
3149
3150// Check if the current token is an SP3 'neg' modifier.
3151// Currently this modifier is allowed in the following context:
3152//
3153// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3154// 2. Before an 'abs' modifier: -abs(...)
3155// 3. Before an SP3 'abs' modifier: -|...|
3156//
3157// In all other cases "-" is handled as a part
3158// of an expression that follows the sign.
3159//
3160// Note: When "-" is followed by an integer literal,
3161// this is interpreted as integer negation rather
3162// than a floating-point NEG modifier applied to N.
3163// Beside being contr-intuitive, such use of floating-point
3164// NEG modifier would have resulted in different meaning
3165// of integer literals used with VOP1/2/C and VOP3,
3166// for example:
3167//    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3168//    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3169// Negative fp literals with preceding "-" are
3170// handled likewise for uniformity
3171//
3172bool
3173AMDGPUAsmParser::parseSP3NegModifier() {
3174
3175  AsmToken NextToken[2];
3176  peekTokens(NextToken);
3177
3178  if (isToken(AsmToken::Minus) &&
3179      (isRegister(NextToken[0], NextToken[1]) ||
3180       NextToken[0].is(AsmToken::Pipe) ||
3181       isId(NextToken[0], "abs"))) {
3182    lex();
3183    return true;
3184  }
3185
3186  return false;
3187}
3188
3189ParseStatus
3190AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3191                                              bool AllowImm) {
3192  bool Neg, SP3Neg;
3193  bool Abs, SP3Abs;
3194  bool Lit;
3195  SMLoc Loc;
3196
3197  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3198  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3199    return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3200
3201  SP3Neg = parseSP3NegModifier();
3202
3203  Loc = getLoc();
3204  Neg = trySkipId("neg");
3205  if (Neg && SP3Neg)
3206    return Error(Loc, "expected register or immediate");
3207  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3208    return ParseStatus::Failure;
3209
3210  Abs = trySkipId("abs");
3211  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3212    return ParseStatus::Failure;
3213
3214  Lit = trySkipId("lit");
3215  if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3216    return ParseStatus::Failure;
3217
3218  Loc = getLoc();
3219  SP3Abs = trySkipToken(AsmToken::Pipe);
3220  if (Abs && SP3Abs)
3221    return Error(Loc, "expected register or immediate");
3222
3223  ParseStatus Res;
3224  if (AllowImm) {
3225    Res = parseRegOrImm(Operands, SP3Abs, Lit);
3226  } else {
3227    Res = parseReg(Operands);
3228  }
3229  if (!Res.isSuccess())
3230    return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3231
3232  if (Lit && !Operands.back()->isImm())
3233    Error(Loc, "expected immediate with lit modifier");
3234
3235  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3236    return ParseStatus::Failure;
3237  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3238    return ParseStatus::Failure;
3239  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3240    return ParseStatus::Failure;
3241  if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3242    return ParseStatus::Failure;
3243
3244  AMDGPUOperand::Modifiers Mods;
3245  Mods.Abs = Abs || SP3Abs;
3246  Mods.Neg = Neg || SP3Neg;
3247  Mods.Lit = Lit;
3248
3249  if (Mods.hasFPModifiers() || Lit) {
3250    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3251    if (Op.isExpr())
3252      return Error(Op.getStartLoc(), "expected an absolute expression");
3253    Op.setModifiers(Mods);
3254  }
3255  return ParseStatus::Success;
3256}
3257
3258ParseStatus
3259AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3260                                               bool AllowImm) {
3261  bool Sext = trySkipId("sext");
3262  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3263    return ParseStatus::Failure;
3264
3265  ParseStatus Res;
3266  if (AllowImm) {
3267    Res = parseRegOrImm(Operands);
3268  } else {
3269    Res = parseReg(Operands);
3270  }
3271  if (!Res.isSuccess())
3272    return Sext ? ParseStatus::Failure : Res;
3273
3274  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3275    return ParseStatus::Failure;
3276
3277  AMDGPUOperand::Modifiers Mods;
3278  Mods.Sext = Sext;
3279
3280  if (Mods.hasIntModifiers()) {
3281    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3282    if (Op.isExpr())
3283      return Error(Op.getStartLoc(), "expected an absolute expression");
3284    Op.setModifiers(Mods);
3285  }
3286
3287  return ParseStatus::Success;
3288}
3289
3290ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3291  return parseRegOrImmWithFPInputMods(Operands, false);
3292}
3293
3294ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3295  return parseRegOrImmWithIntInputMods(Operands, false);
3296}
3297
3298ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3299  auto Loc = getLoc();
3300  if (trySkipId("off")) {
3301    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3302                                                AMDGPUOperand::ImmTyOff, false));
3303    return ParseStatus::Success;
3304  }
3305
3306  if (!isRegister())
3307    return ParseStatus::NoMatch;
3308
3309  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3310  if (Reg) {
3311    Operands.push_back(std::move(Reg));
3312    return ParseStatus::Success;
3313  }
3314
3315  return ParseStatus::Failure;
3316}
3317
3318unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3319  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3320
3321  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3322      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3323      (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3324      (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3325    return Match_InvalidOperand;
3326
3327  if ((TSFlags & SIInstrFlags::VOP3) &&
3328      (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3329      getForcedEncodingSize() != 64)
3330    return Match_PreferE32;
3331
3332  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3333      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3334    // v_mac_f32/16 allow only dst_sel == DWORD;
3335    auto OpNum =
3336        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3337    const auto &Op = Inst.getOperand(OpNum);
3338    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3339      return Match_InvalidOperand;
3340    }
3341  }
3342
3343  return Match_Success;
3344}
3345
3346static ArrayRef<unsigned> getAllVariants() {
3347  static const unsigned Variants[] = {
3348    AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3349    AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3350    AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3351  };
3352
3353  return ArrayRef(Variants);
3354}
3355
3356// What asm variants we should check
3357ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3358  if (isForcedDPP() && isForcedVOP3()) {
3359    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3360    return ArrayRef(Variants);
3361  }
3362  if (getForcedEncodingSize() == 32) {
3363    static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3364    return ArrayRef(Variants);
3365  }
3366
3367  if (isForcedVOP3()) {
3368    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3369    return ArrayRef(Variants);
3370  }
3371
3372  if (isForcedSDWA()) {
3373    static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3374                                        AMDGPUAsmVariants::SDWA9};
3375    return ArrayRef(Variants);
3376  }
3377
3378  if (isForcedDPP()) {
3379    static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3380    return ArrayRef(Variants);
3381  }
3382
3383  return getAllVariants();
3384}
3385
3386StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3387  if (isForcedDPP() && isForcedVOP3())
3388    return "e64_dpp";
3389
3390  if (getForcedEncodingSize() == 32)
3391    return "e32";
3392
3393  if (isForcedVOP3())
3394    return "e64";
3395
3396  if (isForcedSDWA())
3397    return "sdwa";
3398
3399  if (isForcedDPP())
3400    return "dpp";
3401
3402  return "";
3403}
3404
3405unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3406  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3407  for (MCPhysReg Reg : Desc.implicit_uses()) {
3408    switch (Reg) {
3409    case AMDGPU::FLAT_SCR:
3410    case AMDGPU::VCC:
3411    case AMDGPU::VCC_LO:
3412    case AMDGPU::VCC_HI:
3413    case AMDGPU::M0:
3414      return Reg;
3415    default:
3416      break;
3417    }
3418  }
3419  return AMDGPU::NoRegister;
3420}
3421
3422// NB: This code is correct only when used to check constant
3423// bus limitations because GFX7 support no f16 inline constants.
3424// Note that there are no cases when a GFX7 opcode violates
3425// constant bus limitations due to the use of an f16 constant.
3426bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3427                                       unsigned OpIdx) const {
3428  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3429
3430  if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3431      AMDGPU::isKImmOperand(Desc, OpIdx)) {
3432    return false;
3433  }
3434
3435  const MCOperand &MO = Inst.getOperand(OpIdx);
3436
3437  int64_t Val = MO.getImm();
3438  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3439
3440  switch (OpSize) { // expected operand size
3441  case 8:
3442    return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3443  case 4:
3444    return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3445  case 2: {
3446    const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3447    if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3448        OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3449        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3450      return AMDGPU::isInlinableIntLiteral(Val);
3451
3452    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3453        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3454        OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3455      return AMDGPU::isInlinableLiteralV2I16(Val);
3456
3457    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3458        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3459        OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3460      return AMDGPU::isInlinableLiteralV2F16(Val);
3461
3462    return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3463  }
3464  default:
3465    llvm_unreachable("invalid operand size");
3466  }
3467}
3468
3469unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3470  if (!isGFX10Plus())
3471    return 1;
3472
3473  switch (Opcode) {
3474  // 64-bit shift instructions can use only one scalar value input
3475  case AMDGPU::V_LSHLREV_B64_e64:
3476  case AMDGPU::V_LSHLREV_B64_gfx10:
3477  case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3478  case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3479  case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3480  case AMDGPU::V_LSHRREV_B64_e64:
3481  case AMDGPU::V_LSHRREV_B64_gfx10:
3482  case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3483  case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3484  case AMDGPU::V_ASHRREV_I64_e64:
3485  case AMDGPU::V_ASHRREV_I64_gfx10:
3486  case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3487  case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3488  case AMDGPU::V_LSHL_B64_e64:
3489  case AMDGPU::V_LSHR_B64_e64:
3490  case AMDGPU::V_ASHR_I64_e64:
3491    return 1;
3492  default:
3493    return 2;
3494  }
3495}
3496
3497constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3498using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3499
3500// Get regular operand indices in the same order as specified
3501// in the instruction (but append mandatory literals to the end).
3502static OperandIndices getSrcOperandIndices(unsigned Opcode,
3503                                           bool AddMandatoryLiterals = false) {
3504
3505  int16_t ImmIdx =
3506      AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3507
3508  if (isVOPD(Opcode)) {
3509    int16_t ImmDeferredIdx =
3510        AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3511                             : -1;
3512
3513    return {getNamedOperandIdx(Opcode, OpName::src0X),
3514            getNamedOperandIdx(Opcode, OpName::vsrc1X),
3515            getNamedOperandIdx(Opcode, OpName::src0Y),
3516            getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3517            ImmDeferredIdx,
3518            ImmIdx};
3519  }
3520
3521  return {getNamedOperandIdx(Opcode, OpName::src0),
3522          getNamedOperandIdx(Opcode, OpName::src1),
3523          getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3524}
3525
3526bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3527  const MCOperand &MO = Inst.getOperand(OpIdx);
3528  if (MO.isImm()) {
3529    return !isInlineConstant(Inst, OpIdx);
3530  } else if (MO.isReg()) {
3531    auto Reg = MO.getReg();
3532    if (!Reg) {
3533      return false;
3534    }
3535    const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3536    auto PReg = mc2PseudoReg(Reg);
3537    return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3538  } else {
3539    return true;
3540  }
3541}
3542
3543bool AMDGPUAsmParser::validateConstantBusLimitations(
3544    const MCInst &Inst, const OperandVector &Operands) {
3545  const unsigned Opcode = Inst.getOpcode();
3546  const MCInstrDesc &Desc = MII.get(Opcode);
3547  unsigned LastSGPR = AMDGPU::NoRegister;
3548  unsigned ConstantBusUseCount = 0;
3549  unsigned NumLiterals = 0;
3550  unsigned LiteralSize;
3551
3552  if (!(Desc.TSFlags &
3553        (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3554         SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3555      !isVOPD(Opcode))
3556    return true;
3557
3558  // Check special imm operands (used by madmk, etc)
3559  if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3560    ++NumLiterals;
3561    LiteralSize = 4;
3562  }
3563
3564  SmallDenseSet<unsigned> SGPRsUsed;
3565  unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3566  if (SGPRUsed != AMDGPU::NoRegister) {
3567    SGPRsUsed.insert(SGPRUsed);
3568    ++ConstantBusUseCount;
3569  }
3570
3571  OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3572
3573  for (int OpIdx : OpIndices) {
3574    if (OpIdx == -1)
3575      continue;
3576
3577    const MCOperand &MO = Inst.getOperand(OpIdx);
3578    if (usesConstantBus(Inst, OpIdx)) {
3579      if (MO.isReg()) {
3580        LastSGPR = mc2PseudoReg(MO.getReg());
3581        // Pairs of registers with a partial intersections like these
3582        //   s0, s[0:1]
3583        //   flat_scratch_lo, flat_scratch
3584        //   flat_scratch_lo, flat_scratch_hi
3585        // are theoretically valid but they are disabled anyway.
3586        // Note that this code mimics SIInstrInfo::verifyInstruction
3587        if (SGPRsUsed.insert(LastSGPR).second) {
3588          ++ConstantBusUseCount;
3589        }
3590      } else { // Expression or a literal
3591
3592        if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3593          continue; // special operand like VINTERP attr_chan
3594
3595        // An instruction may use only one literal.
3596        // This has been validated on the previous step.
3597        // See validateVOPLiteral.
3598        // This literal may be used as more than one operand.
3599        // If all these operands are of the same size,
3600        // this literal counts as one scalar value.
3601        // Otherwise it counts as 2 scalar values.
3602        // See "GFX10 Shader Programming", section 3.6.2.3.
3603
3604        unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3605        if (Size < 4)
3606          Size = 4;
3607
3608        if (NumLiterals == 0) {
3609          NumLiterals = 1;
3610          LiteralSize = Size;
3611        } else if (LiteralSize != Size) {
3612          NumLiterals = 2;
3613        }
3614      }
3615    }
3616  }
3617  ConstantBusUseCount += NumLiterals;
3618
3619  if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3620    return true;
3621
3622  SMLoc LitLoc = getLitLoc(Operands);
3623  SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3624  SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3625  Error(Loc, "invalid operand (violates constant bus restrictions)");
3626  return false;
3627}
3628
3629bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3630    const MCInst &Inst, const OperandVector &Operands) {
3631
3632  const unsigned Opcode = Inst.getOpcode();
3633  if (!isVOPD(Opcode))
3634    return true;
3635
3636  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3637
3638  auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3639    const MCOperand &Opr = Inst.getOperand(OperandIdx);
3640    return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3641               ? Opr.getReg()
3642               : MCRegister::NoRegister;
3643  };
3644
3645  // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3646  bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3647
3648  const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3649  auto InvalidCompOprIdx =
3650      InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3651  if (!InvalidCompOprIdx)
3652    return true;
3653
3654  auto CompOprIdx = *InvalidCompOprIdx;
3655  auto ParsedIdx =
3656      std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3657               InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3658  assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3659
3660  auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3661  if (CompOprIdx == VOPD::Component::DST) {
3662    Error(Loc, "one dst register must be even and the other odd");
3663  } else {
3664    auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3665    Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3666                   " operands must use different VGPR banks");
3667  }
3668
3669  return false;
3670}
3671
3672bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3673
3674  const unsigned Opc = Inst.getOpcode();
3675  const MCInstrDesc &Desc = MII.get(Opc);
3676
3677  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3678    int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3679    assert(ClampIdx != -1);
3680    return Inst.getOperand(ClampIdx).getImm() == 0;
3681  }
3682
3683  return true;
3684}
3685
3686constexpr uint64_t MIMGFlags =
3687    SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3688
3689bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3690                                           const SMLoc &IDLoc) {
3691
3692  const unsigned Opc = Inst.getOpcode();
3693  const MCInstrDesc &Desc = MII.get(Opc);
3694
3695  if ((Desc.TSFlags & MIMGFlags) == 0)
3696    return true;
3697
3698  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3699  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3700  int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3701
3702  assert(VDataIdx != -1);
3703
3704  if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3705    return true;
3706
3707  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3708  unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3709  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3710  if (DMask == 0)
3711    DMask = 1;
3712
3713  bool IsPackedD16 = false;
3714  unsigned DataSize =
3715      (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3716  if (hasPackedD16()) {
3717    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3718    IsPackedD16 = D16Idx >= 0;
3719    if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3720      DataSize = (DataSize + 1) / 2;
3721  }
3722
3723  if ((VDataSize / 4) == DataSize + TFESize)
3724    return true;
3725
3726  StringRef Modifiers;
3727  if (isGFX90A())
3728    Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3729  else
3730    Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3731
3732  Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3733  return false;
3734}
3735
3736bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3737                                           const SMLoc &IDLoc) {
3738  const unsigned Opc = Inst.getOpcode();
3739  const MCInstrDesc &Desc = MII.get(Opc);
3740
3741  if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3742    return true;
3743
3744  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3745
3746  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3747      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3748  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3749  int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3750                                                     : AMDGPU::OpName::rsrc;
3751  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3752  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3753  int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3754
3755  assert(VAddr0Idx != -1);
3756  assert(SrsrcIdx != -1);
3757  assert(SrsrcIdx > VAddr0Idx);
3758
3759  bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3760  if (BaseOpcode->BVH) {
3761    if (IsA16 == BaseOpcode->A16)
3762      return true;
3763    Error(IDLoc, "image address size does not match a16");
3764    return false;
3765  }
3766
3767  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3768  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3769  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3770  unsigned ActualAddrSize =
3771      IsNSA ? SrsrcIdx - VAddr0Idx
3772            : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3773
3774  unsigned ExpectedAddrSize =
3775      AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3776
3777  if (IsNSA) {
3778    if (hasPartialNSAEncoding() &&
3779        ExpectedAddrSize >
3780            getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3781      int VAddrLastIdx = SrsrcIdx - 1;
3782      unsigned VAddrLastSize =
3783          AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3784
3785      ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3786    }
3787  } else {
3788    if (ExpectedAddrSize > 12)
3789      ExpectedAddrSize = 16;
3790
3791    // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3792    // This provides backward compatibility for assembly created
3793    // before 160b/192b/224b types were directly supported.
3794    if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3795      return true;
3796  }
3797
3798  if (ActualAddrSize == ExpectedAddrSize)
3799    return true;
3800
3801  Error(IDLoc, "image address size does not match dim and a16");
3802  return false;
3803}
3804
3805bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3806
3807  const unsigned Opc = Inst.getOpcode();
3808  const MCInstrDesc &Desc = MII.get(Opc);
3809
3810  if ((Desc.TSFlags & MIMGFlags) == 0)
3811    return true;
3812  if (!Desc.mayLoad() || !Desc.mayStore())
3813    return true; // Not atomic
3814
3815  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3816  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3817
3818  // This is an incomplete check because image_atomic_cmpswap
3819  // may only use 0x3 and 0xf while other atomic operations
3820  // may use 0x1 and 0x3. However these limitations are
3821  // verified when we check that dmask matches dst size.
3822  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3823}
3824
3825bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3826
3827  const unsigned Opc = Inst.getOpcode();
3828  const MCInstrDesc &Desc = MII.get(Opc);
3829
3830  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3831    return true;
3832
3833  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3834  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3835
3836  // GATHER4 instructions use dmask in a different fashion compared to
3837  // other MIMG instructions. The only useful DMASK values are
3838  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3839  // (red,red,red,red) etc.) The ISA document doesn't mention
3840  // this.
3841  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3842}
3843
3844bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3845  const unsigned Opc = Inst.getOpcode();
3846  const MCInstrDesc &Desc = MII.get(Opc);
3847
3848  if ((Desc.TSFlags & MIMGFlags) == 0)
3849    return true;
3850
3851  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3852  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3853      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3854
3855  if (!BaseOpcode->MSAA)
3856    return true;
3857
3858  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3859  assert(DimIdx != -1);
3860
3861  unsigned Dim = Inst.getOperand(DimIdx).getImm();
3862  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3863
3864  return DimInfo->MSAA;
3865}
3866
3867static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3868{
3869  switch (Opcode) {
3870  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3871  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3872  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3873    return true;
3874  default:
3875    return false;
3876  }
3877}
3878
3879// movrels* opcodes should only allow VGPRS as src0.
3880// This is specified in .td description for vop1/vop3,
3881// but sdwa is handled differently. See isSDWAOperand.
3882bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3883                                      const OperandVector &Operands) {
3884
3885  const unsigned Opc = Inst.getOpcode();
3886  const MCInstrDesc &Desc = MII.get(Opc);
3887
3888  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3889    return true;
3890
3891  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3892  assert(Src0Idx != -1);
3893
3894  SMLoc ErrLoc;
3895  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3896  if (Src0.isReg()) {
3897    auto Reg = mc2PseudoReg(Src0.getReg());
3898    const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3899    if (!isSGPR(Reg, TRI))
3900      return true;
3901    ErrLoc = getRegLoc(Reg, Operands);
3902  } else {
3903    ErrLoc = getConstLoc(Operands);
3904  }
3905
3906  Error(ErrLoc, "source operand must be a VGPR");
3907  return false;
3908}
3909
3910bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3911                                          const OperandVector &Operands) {
3912
3913  const unsigned Opc = Inst.getOpcode();
3914
3915  if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3916    return true;
3917
3918  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3919  assert(Src0Idx != -1);
3920
3921  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3922  if (!Src0.isReg())
3923    return true;
3924
3925  auto Reg = mc2PseudoReg(Src0.getReg());
3926  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3927  if (!isGFX90A() && isSGPR(Reg, TRI)) {
3928    Error(getRegLoc(Reg, Operands),
3929          "source operand must be either a VGPR or an inline constant");
3930    return false;
3931  }
3932
3933  return true;
3934}
3935
3936bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3937                                      const OperandVector &Operands) {
3938  unsigned Opcode = Inst.getOpcode();
3939  const MCInstrDesc &Desc = MII.get(Opcode);
3940
3941  if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3942      !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3943    return true;
3944
3945  const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3946  if (Src2Idx == -1)
3947    return true;
3948
3949  if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3950    Error(getConstLoc(Operands),
3951          "inline constants are not allowed for this operand");
3952    return false;
3953  }
3954
3955  return true;
3956}
3957
3958bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3959                                   const OperandVector &Operands) {
3960  const unsigned Opc = Inst.getOpcode();
3961  const MCInstrDesc &Desc = MII.get(Opc);
3962
3963  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3964    return true;
3965
3966  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3967  if (Src2Idx == -1)
3968    return true;
3969
3970  const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3971  if (!Src2.isReg())
3972    return true;
3973
3974  MCRegister Src2Reg = Src2.getReg();
3975  MCRegister DstReg = Inst.getOperand(0).getReg();
3976  if (Src2Reg == DstReg)
3977    return true;
3978
3979  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3980  if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3981    return true;
3982
3983  if (TRI->regsOverlap(Src2Reg, DstReg)) {
3984    Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3985          "source 2 operand must not partially overlap with dst");
3986    return false;
3987  }
3988
3989  return true;
3990}
3991
3992bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3993  switch (Inst.getOpcode()) {
3994  default:
3995    return true;
3996  case V_DIV_SCALE_F32_gfx6_gfx7:
3997  case V_DIV_SCALE_F32_vi:
3998  case V_DIV_SCALE_F32_gfx10:
3999  case V_DIV_SCALE_F64_gfx6_gfx7:
4000  case V_DIV_SCALE_F64_vi:
4001  case V_DIV_SCALE_F64_gfx10:
4002    break;
4003  }
4004
4005  // TODO: Check that src0 = src1 or src2.
4006
4007  for (auto Name : {AMDGPU::OpName::src0_modifiers,
4008                    AMDGPU::OpName::src2_modifiers,
4009                    AMDGPU::OpName::src2_modifiers}) {
4010    if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4011            .getImm() &
4012        SISrcMods::ABS) {
4013      return false;
4014    }
4015  }
4016
4017  return true;
4018}
4019
4020bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4021
4022  const unsigned Opc = Inst.getOpcode();
4023  const MCInstrDesc &Desc = MII.get(Opc);
4024
4025  if ((Desc.TSFlags & MIMGFlags) == 0)
4026    return true;
4027
4028  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4029  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4030    if (isCI() || isSI())
4031      return false;
4032  }
4033
4034  return true;
4035}
4036
4037static bool IsRevOpcode(const unsigned Opcode)
4038{
4039  switch (Opcode) {
4040  case AMDGPU::V_SUBREV_F32_e32:
4041  case AMDGPU::V_SUBREV_F32_e64:
4042  case AMDGPU::V_SUBREV_F32_e32_gfx10:
4043  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4044  case AMDGPU::V_SUBREV_F32_e32_vi:
4045  case AMDGPU::V_SUBREV_F32_e64_gfx10:
4046  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4047  case AMDGPU::V_SUBREV_F32_e64_vi:
4048
4049  case AMDGPU::V_SUBREV_CO_U32_e32:
4050  case AMDGPU::V_SUBREV_CO_U32_e64:
4051  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4052  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4053
4054  case AMDGPU::V_SUBBREV_U32_e32:
4055  case AMDGPU::V_SUBBREV_U32_e64:
4056  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4057  case AMDGPU::V_SUBBREV_U32_e32_vi:
4058  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4059  case AMDGPU::V_SUBBREV_U32_e64_vi:
4060
4061  case AMDGPU::V_SUBREV_U32_e32:
4062  case AMDGPU::V_SUBREV_U32_e64:
4063  case AMDGPU::V_SUBREV_U32_e32_gfx9:
4064  case AMDGPU::V_SUBREV_U32_e32_vi:
4065  case AMDGPU::V_SUBREV_U32_e64_gfx9:
4066  case AMDGPU::V_SUBREV_U32_e64_vi:
4067
4068  case AMDGPU::V_SUBREV_F16_e32:
4069  case AMDGPU::V_SUBREV_F16_e64:
4070  case AMDGPU::V_SUBREV_F16_e32_gfx10:
4071  case AMDGPU::V_SUBREV_F16_e32_vi:
4072  case AMDGPU::V_SUBREV_F16_e64_gfx10:
4073  case AMDGPU::V_SUBREV_F16_e64_vi:
4074
4075  case AMDGPU::V_SUBREV_U16_e32:
4076  case AMDGPU::V_SUBREV_U16_e64:
4077  case AMDGPU::V_SUBREV_U16_e32_vi:
4078  case AMDGPU::V_SUBREV_U16_e64_vi:
4079
4080  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4081  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4082  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4083
4084  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4085  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4086
4087  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4088  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4089
4090  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4091  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4092
4093  case AMDGPU::V_LSHRREV_B32_e32:
4094  case AMDGPU::V_LSHRREV_B32_e64:
4095  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4096  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4097  case AMDGPU::V_LSHRREV_B32_e32_vi:
4098  case AMDGPU::V_LSHRREV_B32_e64_vi:
4099  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4100  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4101
4102  case AMDGPU::V_ASHRREV_I32_e32:
4103  case AMDGPU::V_ASHRREV_I32_e64:
4104  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4105  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4106  case AMDGPU::V_ASHRREV_I32_e32_vi:
4107  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4108  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4109  case AMDGPU::V_ASHRREV_I32_e64_vi:
4110
4111  case AMDGPU::V_LSHLREV_B32_e32:
4112  case AMDGPU::V_LSHLREV_B32_e64:
4113  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4114  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4115  case AMDGPU::V_LSHLREV_B32_e32_vi:
4116  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4117  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4118  case AMDGPU::V_LSHLREV_B32_e64_vi:
4119
4120  case AMDGPU::V_LSHLREV_B16_e32:
4121  case AMDGPU::V_LSHLREV_B16_e64:
4122  case AMDGPU::V_LSHLREV_B16_e32_vi:
4123  case AMDGPU::V_LSHLREV_B16_e64_vi:
4124  case AMDGPU::V_LSHLREV_B16_gfx10:
4125
4126  case AMDGPU::V_LSHRREV_B16_e32:
4127  case AMDGPU::V_LSHRREV_B16_e64:
4128  case AMDGPU::V_LSHRREV_B16_e32_vi:
4129  case AMDGPU::V_LSHRREV_B16_e64_vi:
4130  case AMDGPU::V_LSHRREV_B16_gfx10:
4131
4132  case AMDGPU::V_ASHRREV_I16_e32:
4133  case AMDGPU::V_ASHRREV_I16_e64:
4134  case AMDGPU::V_ASHRREV_I16_e32_vi:
4135  case AMDGPU::V_ASHRREV_I16_e64_vi:
4136  case AMDGPU::V_ASHRREV_I16_gfx10:
4137
4138  case AMDGPU::V_LSHLREV_B64_e64:
4139  case AMDGPU::V_LSHLREV_B64_gfx10:
4140  case AMDGPU::V_LSHLREV_B64_vi:
4141
4142  case AMDGPU::V_LSHRREV_B64_e64:
4143  case AMDGPU::V_LSHRREV_B64_gfx10:
4144  case AMDGPU::V_LSHRREV_B64_vi:
4145
4146  case AMDGPU::V_ASHRREV_I64_e64:
4147  case AMDGPU::V_ASHRREV_I64_gfx10:
4148  case AMDGPU::V_ASHRREV_I64_vi:
4149
4150  case AMDGPU::V_PK_LSHLREV_B16:
4151  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4152  case AMDGPU::V_PK_LSHLREV_B16_vi:
4153
4154  case AMDGPU::V_PK_LSHRREV_B16:
4155  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4156  case AMDGPU::V_PK_LSHRREV_B16_vi:
4157  case AMDGPU::V_PK_ASHRREV_I16:
4158  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4159  case AMDGPU::V_PK_ASHRREV_I16_vi:
4160    return true;
4161  default:
4162    return false;
4163  }
4164}
4165
4166std::optional<StringRef>
4167AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4168
4169  using namespace SIInstrFlags;
4170  const unsigned Opcode = Inst.getOpcode();
4171  const MCInstrDesc &Desc = MII.get(Opcode);
4172
4173  // lds_direct register is defined so that it can be used
4174  // with 9-bit operands only. Ignore encodings which do not accept these.
4175  const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4176  if ((Desc.TSFlags & Enc) == 0)
4177    return std::nullopt;
4178
4179  for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4180    auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4181    if (SrcIdx == -1)
4182      break;
4183    const auto &Src = Inst.getOperand(SrcIdx);
4184    if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4185
4186      if (isGFX90A() || isGFX11Plus())
4187        return StringRef("lds_direct is not supported on this GPU");
4188
4189      if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4190        return StringRef("lds_direct cannot be used with this instruction");
4191
4192      if (SrcName != OpName::src0)
4193        return StringRef("lds_direct may be used as src0 only");
4194    }
4195  }
4196
4197  return std::nullopt;
4198}
4199
4200SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4201  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4202    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4203    if (Op.isFlatOffset())
4204      return Op.getStartLoc();
4205  }
4206  return getLoc();
4207}
4208
4209bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4210                                     const OperandVector &Operands) {
4211  auto Opcode = Inst.getOpcode();
4212  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4213  if (OpNum == -1)
4214    return true;
4215
4216  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4217  if ((TSFlags & SIInstrFlags::FLAT))
4218    return validateFlatOffset(Inst, Operands);
4219
4220  if ((TSFlags & SIInstrFlags::SMRD))
4221    return validateSMEMOffset(Inst, Operands);
4222
4223  const auto &Op = Inst.getOperand(OpNum);
4224  if (isGFX12Plus() &&
4225      (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4226    const unsigned OffsetSize = 24;
4227    if (!isIntN(OffsetSize, Op.getImm())) {
4228      Error(getFlatOffsetLoc(Operands),
4229            Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4230      return false;
4231    }
4232  } else {
4233    const unsigned OffsetSize = 16;
4234    if (!isUIntN(OffsetSize, Op.getImm())) {
4235      Error(getFlatOffsetLoc(Operands),
4236            Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4237      return false;
4238    }
4239  }
4240  return true;
4241}
4242
4243bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4244                                         const OperandVector &Operands) {
4245  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4246  if ((TSFlags & SIInstrFlags::FLAT) == 0)
4247    return true;
4248
4249  auto Opcode = Inst.getOpcode();
4250  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4251  assert(OpNum != -1);
4252
4253  const auto &Op = Inst.getOperand(OpNum);
4254  if (!hasFlatOffsets() && Op.getImm() != 0) {
4255    Error(getFlatOffsetLoc(Operands),
4256          "flat offset modifier is not supported on this GPU");
4257    return false;
4258  }
4259
4260  // For pre-GFX12 FLAT instructions the offset must be positive;
4261  // MSB is ignored and forced to zero.
4262  unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4263  bool AllowNegative =
4264      (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4265      isGFX12Plus();
4266  if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4267    Error(getFlatOffsetLoc(Operands),
4268          Twine("expected a ") +
4269              (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4270                             : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4271    return false;
4272  }
4273
4274  return true;
4275}
4276
4277SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4278  // Start with second operand because SMEM Offset cannot be dst or src0.
4279  for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4280    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4281    if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4282      return Op.getStartLoc();
4283  }
4284  return getLoc();
4285}
4286
4287bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4288                                         const OperandVector &Operands) {
4289  if (isCI() || isSI())
4290    return true;
4291
4292  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4293  if ((TSFlags & SIInstrFlags::SMRD) == 0)
4294    return true;
4295
4296  auto Opcode = Inst.getOpcode();
4297  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4298  if (OpNum == -1)
4299    return true;
4300
4301  const auto &Op = Inst.getOperand(OpNum);
4302  if (!Op.isImm())
4303    return true;
4304
4305  uint64_t Offset = Op.getImm();
4306  bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4307  if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4308      AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4309    return true;
4310
4311  Error(getSMEMOffsetLoc(Operands),
4312        isGFX12Plus()          ? "expected a 24-bit signed offset"
4313        : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4314                               : "expected a 21-bit signed offset");
4315
4316  return false;
4317}
4318
4319bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4320  unsigned Opcode = Inst.getOpcode();
4321  const MCInstrDesc &Desc = MII.get(Opcode);
4322  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4323    return true;
4324
4325  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4326  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4327
4328  const int OpIndices[] = { Src0Idx, Src1Idx };
4329
4330  unsigned NumExprs = 0;
4331  unsigned NumLiterals = 0;
4332  uint32_t LiteralValue;
4333
4334  for (int OpIdx : OpIndices) {
4335    if (OpIdx == -1) break;
4336
4337    const MCOperand &MO = Inst.getOperand(OpIdx);
4338    // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4339    if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4340      if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4341        uint32_t Value = static_cast<uint32_t>(MO.getImm());
4342        if (NumLiterals == 0 || LiteralValue != Value) {
4343          LiteralValue = Value;
4344          ++NumLiterals;
4345        }
4346      } else if (MO.isExpr()) {
4347        ++NumExprs;
4348      }
4349    }
4350  }
4351
4352  return NumLiterals + NumExprs <= 1;
4353}
4354
4355bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4356  const unsigned Opc = Inst.getOpcode();
4357  if (isPermlane16(Opc)) {
4358    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4359    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4360
4361    if (OpSel & ~3)
4362      return false;
4363  }
4364
4365  uint64_t TSFlags = MII.get(Opc).TSFlags;
4366
4367  if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4368    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4369    if (OpSelIdx != -1) {
4370      if (Inst.getOperand(OpSelIdx).getImm() != 0)
4371        return false;
4372    }
4373    int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4374    if (OpSelHiIdx != -1) {
4375      if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4376        return false;
4377    }
4378  }
4379
4380  // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4381  if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4382      (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4383    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4384    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4385    if (OpSel & 3)
4386      return false;
4387  }
4388
4389  return true;
4390}
4391
4392bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4393  assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4394
4395  const unsigned Opc = Inst.getOpcode();
4396  uint64_t TSFlags = MII.get(Opc).TSFlags;
4397
4398  // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4399  // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4400  // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4401  // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4402  if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4403      !(TSFlags & SIInstrFlags::IsSWMMAC))
4404    return true;
4405
4406  int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4407  if (NegIdx == -1)
4408    return true;
4409
4410  unsigned Neg = Inst.getOperand(NegIdx).getImm();
4411
4412  // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4413  // on some src operands but not allowed on other.
4414  // It is convenient that such instructions don't have src_modifiers operand
4415  // for src operands that don't allow neg because they also don't allow opsel.
4416
4417  int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4418                    AMDGPU::OpName::src1_modifiers,
4419                    AMDGPU::OpName::src2_modifiers};
4420
4421  for (unsigned i = 0; i < 3; ++i) {
4422    if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4423      if (Neg & (1 << i))
4424        return false;
4425    }
4426  }
4427
4428  return true;
4429}
4430
4431bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4432                                  const OperandVector &Operands) {
4433  const unsigned Opc = Inst.getOpcode();
4434  int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4435  if (DppCtrlIdx >= 0) {
4436    unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4437
4438    if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4439        AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4440      // DP ALU DPP is supported for row_newbcast only on GFX9*
4441      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4442      Error(S, "DP ALU dpp only supports row_newbcast");
4443      return false;
4444    }
4445  }
4446
4447  int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4448  bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4449
4450  if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4451    int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4452    if (Src1Idx >= 0) {
4453      const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4454      const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4455      if (Src1.isImm() ||
4456          (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4457        AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4458        Error(Op.getStartLoc(), "invalid operand for instruction");
4459        return false;
4460      }
4461    }
4462  }
4463
4464  return true;
4465}
4466
4467// Check if VCC register matches wavefront size
4468bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4469  auto FB = getFeatureBits();
4470  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4471    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4472}
4473
4474// One unique literal can be used. VOP3 literal is only allowed in GFX10+
4475bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4476                                         const OperandVector &Operands) {
4477  unsigned Opcode = Inst.getOpcode();
4478  const MCInstrDesc &Desc = MII.get(Opcode);
4479  bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4480  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4481      !HasMandatoryLiteral && !isVOPD(Opcode))
4482    return true;
4483
4484  OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4485
4486  unsigned NumExprs = 0;
4487  unsigned NumLiterals = 0;
4488  uint32_t LiteralValue;
4489
4490  for (int OpIdx : OpIndices) {
4491    if (OpIdx == -1)
4492      continue;
4493
4494    const MCOperand &MO = Inst.getOperand(OpIdx);
4495    if (!MO.isImm() && !MO.isExpr())
4496      continue;
4497    if (!isSISrcOperand(Desc, OpIdx))
4498      continue;
4499
4500    if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4501      uint64_t Value = static_cast<uint64_t>(MO.getImm());
4502      bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4503                    AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4504      bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4505
4506      if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4507        Error(getLitLoc(Operands), "invalid operand for instruction");
4508        return false;
4509      }
4510
4511      if (IsFP64 && IsValid32Op)
4512        Value = Hi_32(Value);
4513
4514      if (NumLiterals == 0 || LiteralValue != Value) {
4515        LiteralValue = Value;
4516        ++NumLiterals;
4517      }
4518    } else if (MO.isExpr()) {
4519      ++NumExprs;
4520    }
4521  }
4522  NumLiterals += NumExprs;
4523
4524  if (!NumLiterals)
4525    return true;
4526
4527  if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4528    Error(getLitLoc(Operands), "literal operands are not supported");
4529    return false;
4530  }
4531
4532  if (NumLiterals > 1) {
4533    Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4534    return false;
4535  }
4536
4537  return true;
4538}
4539
4540// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4541static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4542                         const MCRegisterInfo *MRI) {
4543  int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4544  if (OpIdx < 0)
4545    return -1;
4546
4547  const MCOperand &Op = Inst.getOperand(OpIdx);
4548  if (!Op.isReg())
4549    return -1;
4550
4551  unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4552  auto Reg = Sub ? Sub : Op.getReg();
4553  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4554  return AGPR32.contains(Reg) ? 1 : 0;
4555}
4556
4557bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4558  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4559  if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4560                  SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4561                  SIInstrFlags::DS)) == 0)
4562    return true;
4563
4564  uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4565                                                      : AMDGPU::OpName::vdata;
4566
4567  const MCRegisterInfo *MRI = getMRI();
4568  int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4569  int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4570
4571  if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4572    int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4573    if (Data2Areg >= 0 && Data2Areg != DataAreg)
4574      return false;
4575  }
4576
4577  auto FB = getFeatureBits();
4578  if (FB[AMDGPU::FeatureGFX90AInsts]) {
4579    if (DataAreg < 0 || DstAreg < 0)
4580      return true;
4581    return DstAreg == DataAreg;
4582  }
4583
4584  return DstAreg < 1 && DataAreg < 1;
4585}
4586
4587bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4588  auto FB = getFeatureBits();
4589  if (!FB[AMDGPU::FeatureGFX90AInsts])
4590    return true;
4591
4592  const MCRegisterInfo *MRI = getMRI();
4593  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4594  const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4595  for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4596    const MCOperand &Op = Inst.getOperand(I);
4597    if (!Op.isReg())
4598      continue;
4599
4600    unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4601    if (!Sub)
4602      continue;
4603
4604    if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4605      return false;
4606    if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4607      return false;
4608  }
4609
4610  return true;
4611}
4612
4613SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4614  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4615    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4616    if (Op.isBLGP())
4617      return Op.getStartLoc();
4618  }
4619  return SMLoc();
4620}
4621
4622bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4623                                   const OperandVector &Operands) {
4624  unsigned Opc = Inst.getOpcode();
4625  int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4626  if (BlgpIdx == -1)
4627    return true;
4628  SMLoc BLGPLoc = getBLGPLoc(Operands);
4629  if (!BLGPLoc.isValid())
4630    return true;
4631  bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4632  auto FB = getFeatureBits();
4633  bool UsesNeg = false;
4634  if (FB[AMDGPU::FeatureGFX940Insts]) {
4635    switch (Opc) {
4636    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4637    case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4638    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4639    case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4640      UsesNeg = true;
4641    }
4642  }
4643
4644  if (IsNeg == UsesNeg)
4645    return true;
4646
4647  Error(BLGPLoc,
4648        UsesNeg ? "invalid modifier: blgp is not supported"
4649                : "invalid modifier: neg is not supported");
4650
4651  return false;
4652}
4653
4654bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4655                                      const OperandVector &Operands) {
4656  if (!isGFX11Plus())
4657    return true;
4658
4659  unsigned Opc = Inst.getOpcode();
4660  if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4661      Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4662      Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4663      Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4664    return true;
4665
4666  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4667  assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4668  auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4669  if (Reg == AMDGPU::SGPR_NULL)
4670    return true;
4671
4672  SMLoc RegLoc = getRegLoc(Reg, Operands);
4673  Error(RegLoc, "src0 must be null");
4674  return false;
4675}
4676
4677bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4678                                 const OperandVector &Operands) {
4679  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4680  if ((TSFlags & SIInstrFlags::DS) == 0)
4681    return true;
4682  if (TSFlags & SIInstrFlags::GWS)
4683    return validateGWS(Inst, Operands);
4684  // Only validate GDS for non-GWS instructions.
4685  if (hasGDS())
4686    return true;
4687  int GDSIdx =
4688      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4689  if (GDSIdx < 0)
4690    return true;
4691  unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4692  if (GDS) {
4693    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4694    Error(S, "gds modifier is not supported on this GPU");
4695    return false;
4696  }
4697  return true;
4698}
4699
4700// gfx90a has an undocumented limitation:
4701// DS_GWS opcodes must use even aligned registers.
4702bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4703                                  const OperandVector &Operands) {
4704  if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4705    return true;
4706
4707  int Opc = Inst.getOpcode();
4708  if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4709      Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4710    return true;
4711
4712  const MCRegisterInfo *MRI = getMRI();
4713  const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4714  int Data0Pos =
4715      AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4716  assert(Data0Pos != -1);
4717  auto Reg = Inst.getOperand(Data0Pos).getReg();
4718  auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4719  if (RegIdx & 1) {
4720    SMLoc RegLoc = getRegLoc(Reg, Operands);
4721    Error(RegLoc, "vgpr must be even aligned");
4722    return false;
4723  }
4724
4725  return true;
4726}
4727
4728bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4729                                            const OperandVector &Operands,
4730                                            const SMLoc &IDLoc) {
4731  int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4732                                           AMDGPU::OpName::cpol);
4733  if (CPolPos == -1)
4734    return true;
4735
4736  unsigned CPol = Inst.getOperand(CPolPos).getImm();
4737
4738  if (isGFX12Plus())
4739    return validateTHAndScopeBits(Inst, Operands, CPol);
4740
4741  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4742  if (TSFlags & SIInstrFlags::SMRD) {
4743    if (CPol && (isSI() || isCI())) {
4744      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4745      Error(S, "cache policy is not supported for SMRD instructions");
4746      return false;
4747    }
4748    if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4749      Error(IDLoc, "invalid cache policy for SMEM instruction");
4750      return false;
4751    }
4752  }
4753
4754  if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4755    const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4756                                      SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4757                                      SIInstrFlags::FLAT;
4758    if (!(TSFlags & AllowSCCModifier)) {
4759      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4760      StringRef CStr(S.getPointer());
4761      S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4762      Error(S,
4763            "scc modifier is not supported for this instruction on this GPU");
4764      return false;
4765    }
4766  }
4767
4768  if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4769    return true;
4770
4771  if (TSFlags & SIInstrFlags::IsAtomicRet) {
4772    if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4773      Error(IDLoc, isGFX940() ? "instruction must use sc0"
4774                              : "instruction must use glc");
4775      return false;
4776    }
4777  } else {
4778    if (CPol & CPol::GLC) {
4779      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4780      StringRef CStr(S.getPointer());
4781      S = SMLoc::getFromPointer(
4782          &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4783      Error(S, isGFX940() ? "instruction must not use sc0"
4784                          : "instruction must not use glc");
4785      return false;
4786    }
4787  }
4788
4789  return true;
4790}
4791
4792bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4793                                             const OperandVector &Operands,
4794                                             const unsigned CPol) {
4795  const unsigned TH = CPol & AMDGPU::CPol::TH;
4796  const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4797
4798  const unsigned Opcode = Inst.getOpcode();
4799  const MCInstrDesc &TID = MII.get(Opcode);
4800
4801  auto PrintError = [&](StringRef Msg) {
4802    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4803    Error(S, Msg);
4804    return false;
4805  };
4806
4807  if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4808      (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4809      (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4810    return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4811
4812  if (TH == 0)
4813    return true;
4814
4815  if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4816      ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4817       (TH == AMDGPU::CPol::TH_NT_HT)))
4818    return PrintError("invalid th value for SMEM instruction");
4819
4820  if (TH == AMDGPU::CPol::TH_BYPASS) {
4821    if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4822         CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4823        (Scope == AMDGPU::CPol::SCOPE_SYS &&
4824         !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
4825      return PrintError("scope and th combination is not valid");
4826  }
4827
4828  bool IsStore = TID.mayStore();
4829  bool IsAtomic =
4830      TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
4831
4832  if (IsAtomic) {
4833    if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
4834      return PrintError("invalid th value for atomic instructions");
4835  } else if (IsStore) {
4836    if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
4837      return PrintError("invalid th value for store instructions");
4838  } else {
4839    if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
4840      return PrintError("invalid th value for load instructions");
4841  }
4842
4843  return true;
4844}
4845
4846bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4847  if (!isGFX11Plus())
4848    return true;
4849  for (auto &Operand : Operands) {
4850    if (!Operand->isReg())
4851      continue;
4852    unsigned Reg = Operand->getReg();
4853    if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4854      Error(getRegLoc(Reg, Operands),
4855            "execz and vccz are not supported on this GPU");
4856      return false;
4857    }
4858  }
4859  return true;
4860}
4861
4862bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4863                                  const OperandVector &Operands) {
4864  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4865  if (Desc.mayStore() &&
4866      (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4867    SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4868    if (Loc != getInstLoc(Operands)) {
4869      Error(Loc, "TFE modifier has no meaning for store instructions");
4870      return false;
4871    }
4872  }
4873
4874  return true;
4875}
4876
4877bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4878                                          const SMLoc &IDLoc,
4879                                          const OperandVector &Operands) {
4880  if (auto ErrMsg = validateLdsDirect(Inst)) {
4881    Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4882    return false;
4883  }
4884  if (!validateSOPLiteral(Inst)) {
4885    Error(getLitLoc(Operands),
4886      "only one unique literal operand is allowed");
4887    return false;
4888  }
4889  if (!validateVOPLiteral(Inst, Operands)) {
4890    return false;
4891  }
4892  if (!validateConstantBusLimitations(Inst, Operands)) {
4893    return false;
4894  }
4895  if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4896    return false;
4897  }
4898  if (!validateIntClampSupported(Inst)) {
4899    Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4900      "integer clamping is not supported on this GPU");
4901    return false;
4902  }
4903  if (!validateOpSel(Inst)) {
4904    Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4905      "invalid op_sel operand");
4906    return false;
4907  }
4908  if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
4909    Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
4910          "invalid neg_lo operand");
4911    return false;
4912  }
4913  if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
4914    Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
4915          "invalid neg_hi operand");
4916    return false;
4917  }
4918  if (!validateDPP(Inst, Operands)) {
4919    return false;
4920  }
4921  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4922  if (!validateMIMGD16(Inst)) {
4923    Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4924      "d16 modifier is not supported on this GPU");
4925    return false;
4926  }
4927  if (!validateMIMGMSAA(Inst)) {
4928    Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4929          "invalid dim; must be MSAA type");
4930    return false;
4931  }
4932  if (!validateMIMGDataSize(Inst, IDLoc)) {
4933    return false;
4934  }
4935  if (!validateMIMGAddrSize(Inst, IDLoc))
4936    return false;
4937  if (!validateMIMGAtomicDMask(Inst)) {
4938    Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4939      "invalid atomic image dmask");
4940    return false;
4941  }
4942  if (!validateMIMGGatherDMask(Inst)) {
4943    Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4944      "invalid image_gather dmask: only one bit must be set");
4945    return false;
4946  }
4947  if (!validateMovrels(Inst, Operands)) {
4948    return false;
4949  }
4950  if (!validateOffset(Inst, Operands)) {
4951    return false;
4952  }
4953  if (!validateMAIAccWrite(Inst, Operands)) {
4954    return false;
4955  }
4956  if (!validateMAISrc2(Inst, Operands)) {
4957    return false;
4958  }
4959  if (!validateMFMA(Inst, Operands)) {
4960    return false;
4961  }
4962  if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4963    return false;
4964  }
4965
4966  if (!validateAGPRLdSt(Inst)) {
4967    Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4968    ? "invalid register class: data and dst should be all VGPR or AGPR"
4969    : "invalid register class: agpr loads and stores not supported on this GPU"
4970    );
4971    return false;
4972  }
4973  if (!validateVGPRAlign(Inst)) {
4974    Error(IDLoc,
4975      "invalid register class: vgpr tuples must be 64 bit aligned");
4976    return false;
4977  }
4978  if (!validateDS(Inst, Operands)) {
4979    return false;
4980  }
4981
4982  if (!validateBLGP(Inst, Operands)) {
4983    return false;
4984  }
4985
4986  if (!validateDivScale(Inst)) {
4987    Error(IDLoc, "ABS not allowed in VOP3B instructions");
4988    return false;
4989  }
4990  if (!validateWaitCnt(Inst, Operands)) {
4991    return false;
4992  }
4993  if (!validateExeczVcczOperands(Operands)) {
4994    return false;
4995  }
4996  if (!validateTFE(Inst, Operands)) {
4997    return false;
4998  }
4999
5000  return true;
5001}
5002
5003static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5004                                            const FeatureBitset &FBS,
5005                                            unsigned VariantID = 0);
5006
5007static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5008                                const FeatureBitset &AvailableFeatures,
5009                                unsigned VariantID);
5010
5011bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5012                                       const FeatureBitset &FBS) {
5013  return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5014}
5015
5016bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5017                                       const FeatureBitset &FBS,
5018                                       ArrayRef<unsigned> Variants) {
5019  for (auto Variant : Variants) {
5020    if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5021      return true;
5022  }
5023
5024  return false;
5025}
5026
5027bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5028                                                  const SMLoc &IDLoc) {
5029  FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5030
5031  // Check if requested instruction variant is supported.
5032  if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5033    return false;
5034
5035  // This instruction is not supported.
5036  // Clear any other pending errors because they are no longer relevant.
5037  getParser().clearPendingErrors();
5038
5039  // Requested instruction variant is not supported.
5040  // Check if any other variants are supported.
5041  StringRef VariantName = getMatchedVariantName();
5042  if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5043    return Error(IDLoc,
5044                 Twine(VariantName,
5045                       " variant of this instruction is not supported"));
5046  }
5047
5048  // Check if this instruction may be used with a different wavesize.
5049  if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5050      !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5051
5052    FeatureBitset FeaturesWS32 = getFeatureBits();
5053    FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5054        .flip(AMDGPU::FeatureWavefrontSize32);
5055    FeatureBitset AvailableFeaturesWS32 =
5056        ComputeAvailableFeatures(FeaturesWS32);
5057
5058    if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5059      return Error(IDLoc, "instruction requires wavesize=32");
5060  }
5061
5062  // Finally check if this instruction is supported on any other GPU.
5063  if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5064    return Error(IDLoc, "instruction not supported on this GPU");
5065  }
5066
5067  // Instruction not supported on any GPU. Probably a typo.
5068  std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5069  return Error(IDLoc, "invalid instruction" + Suggestion);
5070}
5071
5072static bool isInvalidVOPDY(const OperandVector &Operands,
5073                           uint64_t InvalidOprIdx) {
5074  assert(InvalidOprIdx < Operands.size());
5075  const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5076  if (Op.isToken() && InvalidOprIdx > 1) {
5077    const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5078    return PrevOp.isToken() && PrevOp.getToken() == "::";
5079  }
5080  return false;
5081}
5082
5083bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5084                                              OperandVector &Operands,
5085                                              MCStreamer &Out,
5086                                              uint64_t &ErrorInfo,
5087                                              bool MatchingInlineAsm) {
5088  MCInst Inst;
5089  unsigned Result = Match_Success;
5090  for (auto Variant : getMatchedVariants()) {
5091    uint64_t EI;
5092    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5093                                  Variant);
5094    // We order match statuses from least to most specific. We use most specific
5095    // status as resulting
5096    // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5097    if ((R == Match_Success) ||
5098        (R == Match_PreferE32) ||
5099        (R == Match_MissingFeature && Result != Match_PreferE32) ||
5100        (R == Match_InvalidOperand && Result != Match_MissingFeature
5101                                   && Result != Match_PreferE32) ||
5102        (R == Match_MnemonicFail   && Result != Match_InvalidOperand
5103                                   && Result != Match_MissingFeature
5104                                   && Result != Match_PreferE32)) {
5105      Result = R;
5106      ErrorInfo = EI;
5107    }
5108    if (R == Match_Success)
5109      break;
5110  }
5111
5112  if (Result == Match_Success) {
5113    if (!validateInstruction(Inst, IDLoc, Operands)) {
5114      return true;
5115    }
5116    Inst.setLoc(IDLoc);
5117    Out.emitInstruction(Inst, getSTI());
5118    return false;
5119  }
5120
5121  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5122  if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5123    return true;
5124  }
5125
5126  switch (Result) {
5127  default: break;
5128  case Match_MissingFeature:
5129    // It has been verified that the specified instruction
5130    // mnemonic is valid. A match was found but it requires
5131    // features which are not supported on this GPU.
5132    return Error(IDLoc, "operands are not valid for this GPU or mode");
5133
5134  case Match_InvalidOperand: {
5135    SMLoc ErrorLoc = IDLoc;
5136    if (ErrorInfo != ~0ULL) {
5137      if (ErrorInfo >= Operands.size()) {
5138        return Error(IDLoc, "too few operands for instruction");
5139      }
5140      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5141      if (ErrorLoc == SMLoc())
5142        ErrorLoc = IDLoc;
5143
5144      if (isInvalidVOPDY(Operands, ErrorInfo))
5145        return Error(ErrorLoc, "invalid VOPDY instruction");
5146    }
5147    return Error(ErrorLoc, "invalid operand for instruction");
5148  }
5149
5150  case Match_PreferE32:
5151    return Error(IDLoc, "internal error: instruction without _e64 suffix "
5152                        "should be encoded as e32");
5153  case Match_MnemonicFail:
5154    llvm_unreachable("Invalid instructions should have been handled already");
5155  }
5156  llvm_unreachable("Implement any new match types added!");
5157}
5158
5159bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5160  int64_t Tmp = -1;
5161  if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5162    return true;
5163  }
5164  if (getParser().parseAbsoluteExpression(Tmp)) {
5165    return true;
5166  }
5167  Ret = static_cast<uint32_t>(Tmp);
5168  return false;
5169}
5170
5171bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5172  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5173    return TokError("directive only supported for amdgcn architecture");
5174
5175  std::string TargetIDDirective;
5176  SMLoc TargetStart = getTok().getLoc();
5177  if (getParser().parseEscapedString(TargetIDDirective))
5178    return true;
5179
5180  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5181  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5182    return getParser().Error(TargetRange.Start,
5183        (Twine(".amdgcn_target directive's target id ") +
5184         Twine(TargetIDDirective) +
5185         Twine(" does not match the specified target id ") +
5186         Twine(getTargetStreamer().getTargetID()->toString())).str());
5187
5188  return false;
5189}
5190
5191bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5192  return Error(Range.Start, "value out of range", Range);
5193}
5194
5195bool AMDGPUAsmParser::calculateGPRBlocks(
5196    const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5197    bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5198    unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5199    SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5200  // TODO(scott.linder): These calculations are duplicated from
5201  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5202  IsaVersion Version = getIsaVersion(getSTI().getCPU());
5203
5204  unsigned NumVGPRs = NextFreeVGPR;
5205  unsigned NumSGPRs = NextFreeSGPR;
5206
5207  if (Version.Major >= 10)
5208    NumSGPRs = 0;
5209  else {
5210    unsigned MaxAddressableNumSGPRs =
5211        IsaInfo::getAddressableNumSGPRs(&getSTI());
5212
5213    if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5214        NumSGPRs > MaxAddressableNumSGPRs)
5215      return OutOfRangeError(SGPRRange);
5216
5217    NumSGPRs +=
5218        IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5219
5220    if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5221        NumSGPRs > MaxAddressableNumSGPRs)
5222      return OutOfRangeError(SGPRRange);
5223
5224    if (Features.test(FeatureSGPRInitBug))
5225      NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
5226  }
5227
5228  VGPRBlocks =
5229      IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5230  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5231
5232  return false;
5233}
5234
5235bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5236  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5237    return TokError("directive only supported for amdgcn architecture");
5238
5239  if (!isHsaAbi(getSTI()))
5240    return TokError("directive only supported for amdhsa OS");
5241
5242  StringRef KernelName;
5243  if (getParser().parseIdentifier(KernelName))
5244    return true;
5245
5246  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
5247
5248  StringSet<> Seen;
5249
5250  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5251
5252  SMRange VGPRRange;
5253  uint64_t NextFreeVGPR = 0;
5254  uint64_t AccumOffset = 0;
5255  uint64_t SharedVGPRCount = 0;
5256  uint64_t PreloadLength = 0;
5257  uint64_t PreloadOffset = 0;
5258  SMRange SGPRRange;
5259  uint64_t NextFreeSGPR = 0;
5260
5261  // Count the number of user SGPRs implied from the enabled feature bits.
5262  unsigned ImpliedUserSGPRCount = 0;
5263
5264  // Track if the asm explicitly contains the directive for the user SGPR
5265  // count.
5266  std::optional<unsigned> ExplicitUserSGPRCount;
5267  bool ReserveVCC = true;
5268  bool ReserveFlatScr = true;
5269  std::optional<bool> EnableWavefrontSize32;
5270
5271  while (true) {
5272    while (trySkipToken(AsmToken::EndOfStatement));
5273
5274    StringRef ID;
5275    SMRange IDRange = getTok().getLocRange();
5276    if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5277      return true;
5278
5279    if (ID == ".end_amdhsa_kernel")
5280      break;
5281
5282    if (!Seen.insert(ID).second)
5283      return TokError(".amdhsa_ directives cannot be repeated");
5284
5285    SMLoc ValStart = getLoc();
5286    int64_t IVal;
5287    if (getParser().parseAbsoluteExpression(IVal))
5288      return true;
5289    SMLoc ValEnd = getLoc();
5290    SMRange ValRange = SMRange(ValStart, ValEnd);
5291
5292    if (IVal < 0)
5293      return OutOfRangeError(ValRange);
5294
5295    uint64_t Val = IVal;
5296
5297#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5298  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
5299    return OutOfRangeError(RANGE);                                             \
5300  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5301
5302    if (ID == ".amdhsa_group_segment_fixed_size") {
5303      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5304        return OutOfRangeError(ValRange);
5305      KD.group_segment_fixed_size = Val;
5306    } else if (ID == ".amdhsa_private_segment_fixed_size") {
5307      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5308        return OutOfRangeError(ValRange);
5309      KD.private_segment_fixed_size = Val;
5310    } else if (ID == ".amdhsa_kernarg_size") {
5311      if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5312        return OutOfRangeError(ValRange);
5313      KD.kernarg_size = Val;
5314    } else if (ID == ".amdhsa_user_sgpr_count") {
5315      ExplicitUserSGPRCount = Val;
5316    } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5317      if (hasArchitectedFlatScratch())
5318        return Error(IDRange.Start,
5319                     "directive is not supported with architected flat scratch",
5320                     IDRange);
5321      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5322                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5323                       Val, ValRange);
5324      if (Val)
5325        ImpliedUserSGPRCount += 4;
5326    } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5327      if (!hasKernargPreload())
5328        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5329
5330      if (Val > getMaxNumUserSGPRs())
5331        return OutOfRangeError(ValRange);
5332      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5333                       ValRange);
5334      if (Val) {
5335        ImpliedUserSGPRCount += Val;
5336        PreloadLength = Val;
5337      }
5338    } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5339      if (!hasKernargPreload())
5340        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5341
5342      if (Val >= 1024)
5343        return OutOfRangeError(ValRange);
5344      PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5345                       ValRange);
5346      if (Val)
5347        PreloadOffset = Val;
5348    } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5349      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5350                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5351                       ValRange);
5352      if (Val)
5353        ImpliedUserSGPRCount += 2;
5354    } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5355      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5356                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5357                       ValRange);
5358      if (Val)
5359        ImpliedUserSGPRCount += 2;
5360    } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5361      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5362                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5363                       Val, ValRange);
5364      if (Val)
5365        ImpliedUserSGPRCount += 2;
5366    } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5367      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5368                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5369                       ValRange);
5370      if (Val)
5371        ImpliedUserSGPRCount += 2;
5372    } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5373      if (hasArchitectedFlatScratch())
5374        return Error(IDRange.Start,
5375                     "directive is not supported with architected flat scratch",
5376                     IDRange);
5377      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5378                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5379                       ValRange);
5380      if (Val)
5381        ImpliedUserSGPRCount += 2;
5382    } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5383      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5384                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5385                       Val, ValRange);
5386      if (Val)
5387        ImpliedUserSGPRCount += 1;
5388    } else if (ID == ".amdhsa_wavefront_size32") {
5389      if (IVersion.Major < 10)
5390        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5391      EnableWavefrontSize32 = Val;
5392      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5393                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5394                       Val, ValRange);
5395    } else if (ID == ".amdhsa_uses_dynamic_stack") {
5396      PARSE_BITS_ENTRY(KD.kernel_code_properties,
5397                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5398    } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5399      if (hasArchitectedFlatScratch())
5400        return Error(IDRange.Start,
5401                     "directive is not supported with architected flat scratch",
5402                     IDRange);
5403      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5404                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5405    } else if (ID == ".amdhsa_enable_private_segment") {
5406      if (!hasArchitectedFlatScratch())
5407        return Error(
5408            IDRange.Start,
5409            "directive is not supported without architected flat scratch",
5410            IDRange);
5411      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5412                       COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5413    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5414      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5415                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5416                       ValRange);
5417    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5418      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5419                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5420                       ValRange);
5421    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5422      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5423                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5424                       ValRange);
5425    } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5426      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5427                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5428                       ValRange);
5429    } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5430      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5431                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5432                       ValRange);
5433    } else if (ID == ".amdhsa_next_free_vgpr") {
5434      VGPRRange = ValRange;
5435      NextFreeVGPR = Val;
5436    } else if (ID == ".amdhsa_next_free_sgpr") {
5437      SGPRRange = ValRange;
5438      NextFreeSGPR = Val;
5439    } else if (ID == ".amdhsa_accum_offset") {
5440      if (!isGFX90A())
5441        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5442      AccumOffset = Val;
5443    } else if (ID == ".amdhsa_reserve_vcc") {
5444      if (!isUInt<1>(Val))
5445        return OutOfRangeError(ValRange);
5446      ReserveVCC = Val;
5447    } else if (ID == ".amdhsa_reserve_flat_scratch") {
5448      if (IVersion.Major < 7)
5449        return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5450      if (hasArchitectedFlatScratch())
5451        return Error(IDRange.Start,
5452                     "directive is not supported with architected flat scratch",
5453                     IDRange);
5454      if (!isUInt<1>(Val))
5455        return OutOfRangeError(ValRange);
5456      ReserveFlatScr = Val;
5457    } else if (ID == ".amdhsa_reserve_xnack_mask") {
5458      if (IVersion.Major < 8)
5459        return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5460      if (!isUInt<1>(Val))
5461        return OutOfRangeError(ValRange);
5462      if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5463        return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5464                                 IDRange);
5465    } else if (ID == ".amdhsa_float_round_mode_32") {
5466      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5467                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5468    } else if (ID == ".amdhsa_float_round_mode_16_64") {
5469      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5470                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5471    } else if (ID == ".amdhsa_float_denorm_mode_32") {
5472      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5473                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5474    } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5475      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5476                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5477                       ValRange);
5478    } else if (ID == ".amdhsa_dx10_clamp") {
5479      if (IVersion.Major >= 12)
5480        return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5481      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5482                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5483                       ValRange);
5484    } else if (ID == ".amdhsa_ieee_mode") {
5485      if (IVersion.Major >= 12)
5486        return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5487      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5488                       COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5489                       ValRange);
5490    } else if (ID == ".amdhsa_fp16_overflow") {
5491      if (IVersion.Major < 9)
5492        return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5493      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5494                       ValRange);
5495    } else if (ID == ".amdhsa_tg_split") {
5496      if (!isGFX90A())
5497        return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5498      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5499                       ValRange);
5500    } else if (ID == ".amdhsa_workgroup_processor_mode") {
5501      if (IVersion.Major < 10)
5502        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5503      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5504                       ValRange);
5505    } else if (ID == ".amdhsa_memory_ordered") {
5506      if (IVersion.Major < 10)
5507        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5508      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5509                       ValRange);
5510    } else if (ID == ".amdhsa_forward_progress") {
5511      if (IVersion.Major < 10)
5512        return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5513      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5514                       ValRange);
5515    } else if (ID == ".amdhsa_shared_vgpr_count") {
5516      if (IVersion.Major < 10 || IVersion.Major >= 12)
5517        return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5518                     IDRange);
5519      SharedVGPRCount = Val;
5520      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5521                       COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
5522                       ValRange);
5523    } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5524      PARSE_BITS_ENTRY(
5525          KD.compute_pgm_rsrc2,
5526          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5527          ValRange);
5528    } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5529      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5530                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5531                       Val, ValRange);
5532    } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5533      PARSE_BITS_ENTRY(
5534          KD.compute_pgm_rsrc2,
5535          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5536          ValRange);
5537    } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5538      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5539                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5540                       Val, ValRange);
5541    } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5542      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5543                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5544                       Val, ValRange);
5545    } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5546      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5547                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5548                       Val, ValRange);
5549    } else if (ID == ".amdhsa_exception_int_div_zero") {
5550      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5551                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5552                       Val, ValRange);
5553    } else if (ID == ".amdhsa_round_robin_scheduling") {
5554      if (IVersion.Major < 12)
5555        return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5556      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5557                       COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5558                       ValRange);
5559    } else {
5560      return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5561    }
5562
5563#undef PARSE_BITS_ENTRY
5564  }
5565
5566  if (!Seen.contains(".amdhsa_next_free_vgpr"))
5567    return TokError(".amdhsa_next_free_vgpr directive is required");
5568
5569  if (!Seen.contains(".amdhsa_next_free_sgpr"))
5570    return TokError(".amdhsa_next_free_sgpr directive is required");
5571
5572  unsigned VGPRBlocks;
5573  unsigned SGPRBlocks;
5574  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5575                         getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5576                         EnableWavefrontSize32, NextFreeVGPR,
5577                         VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5578                         SGPRBlocks))
5579    return true;
5580
5581  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5582          VGPRBlocks))
5583    return OutOfRangeError(VGPRRange);
5584  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5585                  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5586
5587  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5588          SGPRBlocks))
5589    return OutOfRangeError(SGPRRange);
5590  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5591                  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5592                  SGPRBlocks);
5593
5594  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5595    return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5596                    "enabled user SGPRs");
5597
5598  unsigned UserSGPRCount =
5599      ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5600
5601  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5602    return TokError("too many user SGPRs enabled");
5603  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5604                  UserSGPRCount);
5605
5606  if (PreloadLength && KD.kernarg_size &&
5607      (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5608    return TokError("Kernarg preload length + offset is larger than the "
5609                    "kernarg segment size");
5610
5611  if (isGFX90A()) {
5612    if (!Seen.contains(".amdhsa_accum_offset"))
5613      return TokError(".amdhsa_accum_offset directive is required");
5614    if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5615      return TokError("accum_offset should be in range [4..256] in "
5616                      "increments of 4");
5617    if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5618      return TokError("accum_offset exceeds total VGPR allocation");
5619    AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5620                    (AccumOffset / 4 - 1));
5621  }
5622
5623  if (IVersion.Major >= 10 && IVersion.Major < 12) {
5624    // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5625    if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5626      return TokError("shared_vgpr_count directive not valid on "
5627                      "wavefront size 32");
5628    }
5629    if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5630      return TokError("shared_vgpr_count*2 + "
5631                      "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5632                      "exceed 63\n");
5633    }
5634  }
5635
5636  getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5637                                                 NextFreeVGPR, NextFreeSGPR,
5638                                                 ReserveVCC, ReserveFlatScr);
5639  return false;
5640}
5641
5642bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5643  uint32_t Version;
5644  if (ParseAsAbsoluteExpression(Version))
5645    return true;
5646
5647  getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5648  return false;
5649}
5650
5651bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5652                                               amd_kernel_code_t &Header) {
5653  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5654  // assembly for backwards compatibility.
5655  if (ID == "max_scratch_backing_memory_byte_size") {
5656    Parser.eatToEndOfStatement();
5657    return false;
5658  }
5659
5660  SmallString<40> ErrStr;
5661  raw_svector_ostream Err(ErrStr);
5662  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5663    return TokError(Err.str());
5664  }
5665  Lex();
5666
5667  if (ID == "enable_dx10_clamp") {
5668    if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5669        isGFX12Plus())
5670      return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5671  }
5672
5673  if (ID == "enable_ieee_mode") {
5674    if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5675        isGFX12Plus())
5676      return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5677  }
5678
5679  if (ID == "enable_wavefront_size32") {
5680    if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5681      if (!isGFX10Plus())
5682        return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5683      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5684        return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5685    } else {
5686      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5687        return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5688    }
5689  }
5690
5691  if (ID == "wavefront_size") {
5692    if (Header.wavefront_size == 5) {
5693      if (!isGFX10Plus())
5694        return TokError("wavefront_size=5 is only allowed on GFX10+");
5695      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5696        return TokError("wavefront_size=5 requires +WavefrontSize32");
5697    } else if (Header.wavefront_size == 6) {
5698      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5699        return TokError("wavefront_size=6 requires +WavefrontSize64");
5700    }
5701  }
5702
5703  if (ID == "enable_wgp_mode") {
5704    if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5705        !isGFX10Plus())
5706      return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5707  }
5708
5709  if (ID == "enable_mem_ordered") {
5710    if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5711        !isGFX10Plus())
5712      return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5713  }
5714
5715  if (ID == "enable_fwd_progress") {
5716    if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5717        !isGFX10Plus())
5718      return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5719  }
5720
5721  return false;
5722}
5723
5724bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5725  amd_kernel_code_t Header;
5726  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5727
5728  while (true) {
5729    // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5730    // will set the current token to EndOfStatement.
5731    while(trySkipToken(AsmToken::EndOfStatement));
5732
5733    StringRef ID;
5734    if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5735      return true;
5736
5737    if (ID == ".end_amd_kernel_code_t")
5738      break;
5739
5740    if (ParseAMDKernelCodeTValue(ID, Header))
5741      return true;
5742  }
5743
5744  getTargetStreamer().EmitAMDKernelCodeT(Header);
5745
5746  return false;
5747}
5748
5749bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5750  StringRef KernelName;
5751  if (!parseId(KernelName, "expected symbol name"))
5752    return true;
5753
5754  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5755                                           ELF::STT_AMDGPU_HSA_KERNEL);
5756
5757  KernelScope.initialize(getContext());
5758  return false;
5759}
5760
5761bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5762  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5763    return Error(getLoc(),
5764                 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5765                 "architectures");
5766  }
5767
5768  auto TargetIDDirective = getLexer().getTok().getStringContents();
5769  if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5770    return Error(getParser().getTok().getLoc(), "target id must match options");
5771
5772  getTargetStreamer().EmitISAVersion();
5773  Lex();
5774
5775  return false;
5776}
5777
5778bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5779  assert(isHsaAbi(getSTI()));
5780
5781  std::string HSAMetadataString;
5782  if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5783                          HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5784    return true;
5785
5786  if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5787    return Error(getLoc(), "invalid HSA metadata");
5788
5789  return false;
5790}
5791
5792/// Common code to parse out a block of text (typically YAML) between start and
5793/// end directives.
5794bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5795                                          const char *AssemblerDirectiveEnd,
5796                                          std::string &CollectString) {
5797
5798  raw_string_ostream CollectStream(CollectString);
5799
5800  getLexer().setSkipSpace(false);
5801
5802  bool FoundEnd = false;
5803  while (!isToken(AsmToken::Eof)) {
5804    while (isToken(AsmToken::Space)) {
5805      CollectStream << getTokenStr();
5806      Lex();
5807    }
5808
5809    if (trySkipId(AssemblerDirectiveEnd)) {
5810      FoundEnd = true;
5811      break;
5812    }
5813
5814    CollectStream << Parser.parseStringToEndOfStatement()
5815                  << getContext().getAsmInfo()->getSeparatorString();
5816
5817    Parser.eatToEndOfStatement();
5818  }
5819
5820  getLexer().setSkipSpace(true);
5821
5822  if (isToken(AsmToken::Eof) && !FoundEnd) {
5823    return TokError(Twine("expected directive ") +
5824                    Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5825  }
5826
5827  CollectStream.flush();
5828  return false;
5829}
5830
5831/// Parse the assembler directive for new MsgPack-format PAL metadata.
5832bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5833  std::string String;
5834  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5835                          AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5836    return true;
5837
5838  auto PALMetadata = getTargetStreamer().getPALMetadata();
5839  if (!PALMetadata->setFromString(String))
5840    return Error(getLoc(), "invalid PAL metadata");
5841  return false;
5842}
5843
5844/// Parse the assembler directive for old linear-format PAL metadata.
5845bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5846  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5847    return Error(getLoc(),
5848                 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5849                 "not available on non-amdpal OSes")).str());
5850  }
5851
5852  auto PALMetadata = getTargetStreamer().getPALMetadata();
5853  PALMetadata->setLegacy();
5854  for (;;) {
5855    uint32_t Key, Value;
5856    if (ParseAsAbsoluteExpression(Key)) {
5857      return TokError(Twine("invalid value in ") +
5858                      Twine(PALMD::AssemblerDirective));
5859    }
5860    if (!trySkipToken(AsmToken::Comma)) {
5861      return TokError(Twine("expected an even number of values in ") +
5862                      Twine(PALMD::AssemblerDirective));
5863    }
5864    if (ParseAsAbsoluteExpression(Value)) {
5865      return TokError(Twine("invalid value in ") +
5866                      Twine(PALMD::AssemblerDirective));
5867    }
5868    PALMetadata->setRegister(Key, Value);
5869    if (!trySkipToken(AsmToken::Comma))
5870      break;
5871  }
5872  return false;
5873}
5874
5875/// ParseDirectiveAMDGPULDS
5876///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5877bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5878  if (getParser().checkForValidSection())
5879    return true;
5880
5881  StringRef Name;
5882  SMLoc NameLoc = getLoc();
5883  if (getParser().parseIdentifier(Name))
5884    return TokError("expected identifier in directive");
5885
5886  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5887  if (getParser().parseComma())
5888    return true;
5889
5890  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5891
5892  int64_t Size;
5893  SMLoc SizeLoc = getLoc();
5894  if (getParser().parseAbsoluteExpression(Size))
5895    return true;
5896  if (Size < 0)
5897    return Error(SizeLoc, "size must be non-negative");
5898  if (Size > LocalMemorySize)
5899    return Error(SizeLoc, "size is too large");
5900
5901  int64_t Alignment = 4;
5902  if (trySkipToken(AsmToken::Comma)) {
5903    SMLoc AlignLoc = getLoc();
5904    if (getParser().parseAbsoluteExpression(Alignment))
5905      return true;
5906    if (Alignment < 0 || !isPowerOf2_64(Alignment))
5907      return Error(AlignLoc, "alignment must be a power of two");
5908
5909    // Alignment larger than the size of LDS is possible in theory, as long
5910    // as the linker manages to place to symbol at address 0, but we do want
5911    // to make sure the alignment fits nicely into a 32-bit integer.
5912    if (Alignment >= 1u << 31)
5913      return Error(AlignLoc, "alignment is too large");
5914  }
5915
5916  if (parseEOL())
5917    return true;
5918
5919  Symbol->redefineIfPossible();
5920  if (!Symbol->isUndefined())
5921    return Error(NameLoc, "invalid symbol redefinition");
5922
5923  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5924  return false;
5925}
5926
5927bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5928  StringRef IDVal = DirectiveID.getString();
5929
5930  if (isHsaAbi(getSTI())) {
5931    if (IDVal == ".amdhsa_kernel")
5932     return ParseDirectiveAMDHSAKernel();
5933
5934    if (IDVal == ".amdhsa_code_object_version")
5935      return ParseDirectiveAMDHSACodeObjectVersion();
5936
5937    // TODO: Restructure/combine with PAL metadata directive.
5938    if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5939      return ParseDirectiveHSAMetadata();
5940  } else {
5941    if (IDVal == ".amd_kernel_code_t")
5942      return ParseDirectiveAMDKernelCodeT();
5943
5944    if (IDVal == ".amdgpu_hsa_kernel")
5945      return ParseDirectiveAMDGPUHsaKernel();
5946
5947    if (IDVal == ".amd_amdgpu_isa")
5948      return ParseDirectiveISAVersion();
5949
5950    if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
5951      return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
5952                              Twine(" directive is "
5953                                    "not available on non-amdhsa OSes"))
5954                                 .str());
5955    }
5956  }
5957
5958  if (IDVal == ".amdgcn_target")
5959    return ParseDirectiveAMDGCNTarget();
5960
5961  if (IDVal == ".amdgpu_lds")
5962    return ParseDirectiveAMDGPULDS();
5963
5964  if (IDVal == PALMD::AssemblerDirectiveBegin)
5965    return ParseDirectivePALMetadataBegin();
5966
5967  if (IDVal == PALMD::AssemblerDirective)
5968    return ParseDirectivePALMetadata();
5969
5970  return true;
5971}
5972
5973bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5974                                           unsigned RegNo) {
5975
5976  if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5977    return isGFX9Plus();
5978
5979  // GFX10+ has 2 more SGPRs 104 and 105.
5980  if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5981    return hasSGPR104_SGPR105();
5982
5983  switch (RegNo) {
5984  case AMDGPU::SRC_SHARED_BASE_LO:
5985  case AMDGPU::SRC_SHARED_BASE:
5986  case AMDGPU::SRC_SHARED_LIMIT_LO:
5987  case AMDGPU::SRC_SHARED_LIMIT:
5988  case AMDGPU::SRC_PRIVATE_BASE_LO:
5989  case AMDGPU::SRC_PRIVATE_BASE:
5990  case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5991  case AMDGPU::SRC_PRIVATE_LIMIT:
5992    return isGFX9Plus();
5993  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5994    return isGFX9Plus() && !isGFX11Plus();
5995  case AMDGPU::TBA:
5996  case AMDGPU::TBA_LO:
5997  case AMDGPU::TBA_HI:
5998  case AMDGPU::TMA:
5999  case AMDGPU::TMA_LO:
6000  case AMDGPU::TMA_HI:
6001    return !isGFX9Plus();
6002  case AMDGPU::XNACK_MASK:
6003  case AMDGPU::XNACK_MASK_LO:
6004  case AMDGPU::XNACK_MASK_HI:
6005    return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6006  case AMDGPU::SGPR_NULL:
6007    return isGFX10Plus();
6008  default:
6009    break;
6010  }
6011
6012  if (isCI())
6013    return true;
6014
6015  if (isSI() || isGFX10Plus()) {
6016    // No flat_scr on SI.
6017    // On GFX10Plus flat scratch is not a valid register operand and can only be
6018    // accessed with s_setreg/s_getreg.
6019    switch (RegNo) {
6020    case AMDGPU::FLAT_SCR:
6021    case AMDGPU::FLAT_SCR_LO:
6022    case AMDGPU::FLAT_SCR_HI:
6023      return false;
6024    default:
6025      return true;
6026    }
6027  }
6028
6029  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6030  // SI/CI have.
6031  if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6032    return hasSGPR102_SGPR103();
6033
6034  return true;
6035}
6036
6037ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6038                                          StringRef Mnemonic,
6039                                          OperandMode Mode) {
6040  ParseStatus Res = parseVOPD(Operands);
6041  if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6042    return Res;
6043
6044  // Try to parse with a custom parser
6045  Res = MatchOperandParserImpl(Operands, Mnemonic);
6046
6047  // If we successfully parsed the operand or if there as an error parsing,
6048  // we are done.
6049  //
6050  // If we are parsing after we reach EndOfStatement then this means we
6051  // are appending default values to the Operands list.  This is only done
6052  // by custom parser, so we shouldn't continue on to the generic parsing.
6053  if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6054    return Res;
6055
6056  SMLoc RBraceLoc;
6057  SMLoc LBraceLoc = getLoc();
6058  if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6059    unsigned Prefix = Operands.size();
6060
6061    for (;;) {
6062      auto Loc = getLoc();
6063      Res = parseReg(Operands);
6064      if (Res.isNoMatch())
6065        Error(Loc, "expected a register");
6066      if (!Res.isSuccess())
6067        return ParseStatus::Failure;
6068
6069      RBraceLoc = getLoc();
6070      if (trySkipToken(AsmToken::RBrac))
6071        break;
6072
6073      if (!skipToken(AsmToken::Comma,
6074                     "expected a comma or a closing square bracket"))
6075        return ParseStatus::Failure;
6076    }
6077
6078    if (Operands.size() - Prefix > 1) {
6079      Operands.insert(Operands.begin() + Prefix,
6080                      AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6081      Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6082    }
6083
6084    return ParseStatus::Success;
6085  }
6086
6087  return parseRegOrImm(Operands);
6088}
6089
6090StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6091  // Clear any forced encodings from the previous instruction.
6092  setForcedEncodingSize(0);
6093  setForcedDPP(false);
6094  setForcedSDWA(false);
6095
6096  if (Name.ends_with("_e64_dpp")) {
6097    setForcedDPP(true);
6098    setForcedEncodingSize(64);
6099    return Name.substr(0, Name.size() - 8);
6100  } else if (Name.ends_with("_e64")) {
6101    setForcedEncodingSize(64);
6102    return Name.substr(0, Name.size() - 4);
6103  } else if (Name.ends_with("_e32")) {
6104    setForcedEncodingSize(32);
6105    return Name.substr(0, Name.size() - 4);
6106  } else if (Name.ends_with("_dpp")) {
6107    setForcedDPP(true);
6108    return Name.substr(0, Name.size() - 4);
6109  } else if (Name.ends_with("_sdwa")) {
6110    setForcedSDWA(true);
6111    return Name.substr(0, Name.size() - 5);
6112  }
6113  return Name;
6114}
6115
6116static void applyMnemonicAliases(StringRef &Mnemonic,
6117                                 const FeatureBitset &Features,
6118                                 unsigned VariantID);
6119
6120bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6121                                       StringRef Name,
6122                                       SMLoc NameLoc, OperandVector &Operands) {
6123  // Add the instruction mnemonic
6124  Name = parseMnemonicSuffix(Name);
6125
6126  // If the target architecture uses MnemonicAlias, call it here to parse
6127  // operands correctly.
6128  applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6129
6130  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6131
6132  bool IsMIMG = Name.starts_with("image_");
6133
6134  while (!trySkipToken(AsmToken::EndOfStatement)) {
6135    OperandMode Mode = OperandMode_Default;
6136    if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6137      Mode = OperandMode_NSA;
6138    ParseStatus Res = parseOperand(Operands, Name, Mode);
6139
6140    if (!Res.isSuccess()) {
6141      checkUnsupportedInstruction(Name, NameLoc);
6142      if (!Parser.hasPendingError()) {
6143        // FIXME: use real operand location rather than the current location.
6144        StringRef Msg = Res.isFailure() ? "failed parsing operand."
6145                                        : "not a valid operand.";
6146        Error(getLoc(), Msg);
6147      }
6148      while (!trySkipToken(AsmToken::EndOfStatement)) {
6149        lex();
6150      }
6151      return true;
6152    }
6153
6154    // Eat the comma or space if there is one.
6155    trySkipToken(AsmToken::Comma);
6156  }
6157
6158  return false;
6159}
6160
6161//===----------------------------------------------------------------------===//
6162// Utility functions
6163//===----------------------------------------------------------------------===//
6164
6165ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6166                                          OperandVector &Operands) {
6167  SMLoc S = getLoc();
6168  if (!trySkipId(Name))
6169    return ParseStatus::NoMatch;
6170
6171  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6172  return ParseStatus::Success;
6173}
6174
6175ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6176                                                int64_t &IntVal) {
6177
6178  if (!trySkipId(Prefix, AsmToken::Colon))
6179    return ParseStatus::NoMatch;
6180
6181  return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6182}
6183
6184ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6185    const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6186    std::function<bool(int64_t &)> ConvertResult) {
6187  SMLoc S = getLoc();
6188  int64_t Value = 0;
6189
6190  ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6191  if (!Res.isSuccess())
6192    return Res;
6193
6194  if (ConvertResult && !ConvertResult(Value)) {
6195    Error(S, "invalid " + StringRef(Prefix) + " value.");
6196  }
6197
6198  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6199  return ParseStatus::Success;
6200}
6201
6202ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6203    const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6204    bool (*ConvertResult)(int64_t &)) {
6205  SMLoc S = getLoc();
6206  if (!trySkipId(Prefix, AsmToken::Colon))
6207    return ParseStatus::NoMatch;
6208
6209  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6210    return ParseStatus::Failure;
6211
6212  unsigned Val = 0;
6213  const unsigned MaxSize = 4;
6214
6215  // FIXME: How to verify the number of elements matches the number of src
6216  // operands?
6217  for (int I = 0; ; ++I) {
6218    int64_t Op;
6219    SMLoc Loc = getLoc();
6220    if (!parseExpr(Op))
6221      return ParseStatus::Failure;
6222
6223    if (Op != 0 && Op != 1)
6224      return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6225
6226    Val |= (Op << I);
6227
6228    if (trySkipToken(AsmToken::RBrac))
6229      break;
6230
6231    if (I + 1 == MaxSize)
6232      return Error(getLoc(), "expected a closing square bracket");
6233
6234    if (!skipToken(AsmToken::Comma, "expected a comma"))
6235      return ParseStatus::Failure;
6236  }
6237
6238  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6239  return ParseStatus::Success;
6240}
6241
6242ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6243                                           OperandVector &Operands,
6244                                           AMDGPUOperand::ImmTy ImmTy) {
6245  int64_t Bit;
6246  SMLoc S = getLoc();
6247
6248  if (trySkipId(Name)) {
6249    Bit = 1;
6250  } else if (trySkipId("no", Name)) {
6251    Bit = 0;
6252  } else {
6253    return ParseStatus::NoMatch;
6254  }
6255
6256  if (Name == "r128" && !hasMIMG_R128())
6257    return Error(S, "r128 modifier is not supported on this GPU");
6258  if (Name == "a16" && !hasA16())
6259    return Error(S, "a16 modifier is not supported on this GPU");
6260
6261  if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6262    ImmTy = AMDGPUOperand::ImmTyR128A16;
6263
6264  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6265  return ParseStatus::Success;
6266}
6267
6268unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6269                                      bool &Disabling) const {
6270  Disabling = Id.consume_front("no");
6271
6272  if (isGFX940() && !Mnemo.starts_with("s_")) {
6273    return StringSwitch<unsigned>(Id)
6274        .Case("nt", AMDGPU::CPol::NT)
6275        .Case("sc0", AMDGPU::CPol::SC0)
6276        .Case("sc1", AMDGPU::CPol::SC1)
6277        .Default(0);
6278  }
6279
6280  return StringSwitch<unsigned>(Id)
6281      .Case("dlc", AMDGPU::CPol::DLC)
6282      .Case("glc", AMDGPU::CPol::GLC)
6283      .Case("scc", AMDGPU::CPol::SCC)
6284      .Case("slc", AMDGPU::CPol::SLC)
6285      .Default(0);
6286}
6287
6288ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6289  if (isGFX12Plus()) {
6290    SMLoc StringLoc = getLoc();
6291
6292    int64_t CPolVal = 0;
6293    ParseStatus ResTH = ParseStatus::NoMatch;
6294    ParseStatus ResScope = ParseStatus::NoMatch;
6295
6296    for (;;) {
6297      if (ResTH.isNoMatch()) {
6298        int64_t TH;
6299        ResTH = parseTH(Operands, TH);
6300        if (ResTH.isFailure())
6301          return ResTH;
6302        if (ResTH.isSuccess()) {
6303          CPolVal |= TH;
6304          continue;
6305        }
6306      }
6307
6308      if (ResScope.isNoMatch()) {
6309        int64_t Scope;
6310        ResScope = parseScope(Operands, Scope);
6311        if (ResScope.isFailure())
6312          return ResScope;
6313        if (ResScope.isSuccess()) {
6314          CPolVal |= Scope;
6315          continue;
6316        }
6317      }
6318
6319      break;
6320    }
6321
6322    if (ResTH.isNoMatch() && ResScope.isNoMatch())
6323      return ParseStatus::NoMatch;
6324
6325    Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6326                                                AMDGPUOperand::ImmTyCPol));
6327    return ParseStatus::Success;
6328  }
6329
6330  StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6331  SMLoc OpLoc = getLoc();
6332  unsigned Enabled = 0, Seen = 0;
6333  for (;;) {
6334    SMLoc S = getLoc();
6335    bool Disabling;
6336    unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6337    if (!CPol)
6338      break;
6339
6340    lex();
6341
6342    if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6343      return Error(S, "dlc modifier is not supported on this GPU");
6344
6345    if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6346      return Error(S, "scc modifier is not supported on this GPU");
6347
6348    if (Seen & CPol)
6349      return Error(S, "duplicate cache policy modifier");
6350
6351    if (!Disabling)
6352      Enabled |= CPol;
6353
6354    Seen |= CPol;
6355  }
6356
6357  if (!Seen)
6358    return ParseStatus::NoMatch;
6359
6360  Operands.push_back(
6361      AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6362  return ParseStatus::Success;
6363}
6364
6365ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6366                                        int64_t &Scope) {
6367  Scope = AMDGPU::CPol::SCOPE_CU; // default;
6368
6369  StringRef Value;
6370  SMLoc StringLoc;
6371  ParseStatus Res;
6372
6373  Res = parseStringWithPrefix("scope", Value, StringLoc);
6374  if (!Res.isSuccess())
6375    return Res;
6376
6377  Scope = StringSwitch<int64_t>(Value)
6378              .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6379              .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6380              .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6381              .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6382              .Default(0xffffffff);
6383
6384  if (Scope == 0xffffffff)
6385    return Error(StringLoc, "invalid scope value");
6386
6387  return ParseStatus::Success;
6388}
6389
6390ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6391  TH = AMDGPU::CPol::TH_RT; // default
6392
6393  StringRef Value;
6394  SMLoc StringLoc;
6395  ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6396  if (!Res.isSuccess())
6397    return Res;
6398
6399  if (Value == "TH_DEFAULT")
6400    TH = AMDGPU::CPol::TH_RT;
6401  else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6402           Value == "TH_LOAD_NT_WB") {
6403    return Error(StringLoc, "invalid th value");
6404  } else if (Value.starts_with("TH_ATOMIC_")) {
6405    Value = Value.drop_front(10);
6406    TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6407  } else if (Value.starts_with("TH_LOAD_")) {
6408    Value = Value.drop_front(8);
6409    TH = AMDGPU::CPol::TH_TYPE_LOAD;
6410  } else if (Value.starts_with("TH_STORE_")) {
6411    Value = Value.drop_front(9);
6412    TH = AMDGPU::CPol::TH_TYPE_STORE;
6413  } else {
6414    return Error(StringLoc, "invalid th value");
6415  }
6416
6417  if (Value == "BYPASS")
6418    TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6419
6420  if (TH != 0) {
6421    if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6422      TH |= StringSwitch<int64_t>(Value)
6423                .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6424                .Case("RT", AMDGPU::CPol::TH_RT)
6425                .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6426                .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6427                .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6428                                       AMDGPU::CPol::TH_ATOMIC_RETURN)
6429                .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6430                .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6431                                        AMDGPU::CPol::TH_ATOMIC_NT)
6432                .Default(0xffffffff);
6433    else
6434      TH |= StringSwitch<int64_t>(Value)
6435                .Case("RT", AMDGPU::CPol::TH_RT)
6436                .Case("NT", AMDGPU::CPol::TH_NT)
6437                .Case("HT", AMDGPU::CPol::TH_HT)
6438                .Case("LU", AMDGPU::CPol::TH_LU)
6439                .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6440                .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6441                .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6442                .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6443                .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6444                .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6445                .Default(0xffffffff);
6446  }
6447
6448  if (TH == 0xffffffff)
6449    return Error(StringLoc, "invalid th value");
6450
6451  return ParseStatus::Success;
6452}
6453
6454static void addOptionalImmOperand(
6455  MCInst& Inst, const OperandVector& Operands,
6456  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6457  AMDGPUOperand::ImmTy ImmT,
6458  int64_t Default = 0) {
6459  auto i = OptionalIdx.find(ImmT);
6460  if (i != OptionalIdx.end()) {
6461    unsigned Idx = i->second;
6462    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6463  } else {
6464    Inst.addOperand(MCOperand::createImm(Default));
6465  }
6466}
6467
6468ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6469                                                   StringRef &Value,
6470                                                   SMLoc &StringLoc) {
6471  if (!trySkipId(Prefix, AsmToken::Colon))
6472    return ParseStatus::NoMatch;
6473
6474  StringLoc = getLoc();
6475  return parseId(Value, "expected an identifier") ? ParseStatus::Success
6476                                                  : ParseStatus::Failure;
6477}
6478
6479//===----------------------------------------------------------------------===//
6480// MTBUF format
6481//===----------------------------------------------------------------------===//
6482
6483bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6484                                  int64_t MaxVal,
6485                                  int64_t &Fmt) {
6486  int64_t Val;
6487  SMLoc Loc = getLoc();
6488
6489  auto Res = parseIntWithPrefix(Pref, Val);
6490  if (Res.isFailure())
6491    return false;
6492  if (Res.isNoMatch())
6493    return true;
6494
6495  if (Val < 0 || Val > MaxVal) {
6496    Error(Loc, Twine("out of range ", StringRef(Pref)));
6497    return false;
6498  }
6499
6500  Fmt = Val;
6501  return true;
6502}
6503
6504ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6505                                              AMDGPUOperand::ImmTy ImmTy) {
6506  const char *Pref = "index_key";
6507  int64_t ImmVal = 0;
6508  SMLoc Loc = getLoc();
6509  auto Res = parseIntWithPrefix(Pref, ImmVal);
6510  if (!Res.isSuccess())
6511    return Res;
6512
6513  if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6514    return Error(Loc, Twine("out of range ", StringRef(Pref)));
6515
6516  if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6517    return Error(Loc, Twine("out of range ", StringRef(Pref)));
6518
6519  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6520  return ParseStatus::Success;
6521}
6522
6523ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6524  return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6525}
6526
6527ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6528  return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6529}
6530
6531// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6532// values to live in a joint format operand in the MCInst encoding.
6533ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6534  using namespace llvm::AMDGPU::MTBUFFormat;
6535
6536  int64_t Dfmt = DFMT_UNDEF;
6537  int64_t Nfmt = NFMT_UNDEF;
6538
6539  // dfmt and nfmt can appear in either order, and each is optional.
6540  for (int I = 0; I < 2; ++I) {
6541    if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6542      return ParseStatus::Failure;
6543
6544    if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6545      return ParseStatus::Failure;
6546
6547    // Skip optional comma between dfmt/nfmt
6548    // but guard against 2 commas following each other.
6549    if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6550        !peekToken().is(AsmToken::Comma)) {
6551      trySkipToken(AsmToken::Comma);
6552    }
6553  }
6554
6555  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6556    return ParseStatus::NoMatch;
6557
6558  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6559  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6560
6561  Format = encodeDfmtNfmt(Dfmt, Nfmt);
6562  return ParseStatus::Success;
6563}
6564
6565ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6566  using namespace llvm::AMDGPU::MTBUFFormat;
6567
6568  int64_t Fmt = UFMT_UNDEF;
6569
6570  if (!tryParseFmt("format", UFMT_MAX, Fmt))
6571    return ParseStatus::Failure;
6572
6573  if (Fmt == UFMT_UNDEF)
6574    return ParseStatus::NoMatch;
6575
6576  Format = Fmt;
6577  return ParseStatus::Success;
6578}
6579
6580bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6581                                    int64_t &Nfmt,
6582                                    StringRef FormatStr,
6583                                    SMLoc Loc) {
6584  using namespace llvm::AMDGPU::MTBUFFormat;
6585  int64_t Format;
6586
6587  Format = getDfmt(FormatStr);
6588  if (Format != DFMT_UNDEF) {
6589    Dfmt = Format;
6590    return true;
6591  }
6592
6593  Format = getNfmt(FormatStr, getSTI());
6594  if (Format != NFMT_UNDEF) {
6595    Nfmt = Format;
6596    return true;
6597  }
6598
6599  Error(Loc, "unsupported format");
6600  return false;
6601}
6602
6603ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6604                                                      SMLoc FormatLoc,
6605                                                      int64_t &Format) {
6606  using namespace llvm::AMDGPU::MTBUFFormat;
6607
6608  int64_t Dfmt = DFMT_UNDEF;
6609  int64_t Nfmt = NFMT_UNDEF;
6610  if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6611    return ParseStatus::Failure;
6612
6613  if (trySkipToken(AsmToken::Comma)) {
6614    StringRef Str;
6615    SMLoc Loc = getLoc();
6616    if (!parseId(Str, "expected a format string") ||
6617        !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6618      return ParseStatus::Failure;
6619    if (Dfmt == DFMT_UNDEF)
6620      return Error(Loc, "duplicate numeric format");
6621    if (Nfmt == NFMT_UNDEF)
6622      return Error(Loc, "duplicate data format");
6623  }
6624
6625  Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6626  Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6627
6628  if (isGFX10Plus()) {
6629    auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6630    if (Ufmt == UFMT_UNDEF)
6631      return Error(FormatLoc, "unsupported format");
6632    Format = Ufmt;
6633  } else {
6634    Format = encodeDfmtNfmt(Dfmt, Nfmt);
6635  }
6636
6637  return ParseStatus::Success;
6638}
6639
6640ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6641                                                        SMLoc Loc,
6642                                                        int64_t &Format) {
6643  using namespace llvm::AMDGPU::MTBUFFormat;
6644
6645  auto Id = getUnifiedFormat(FormatStr, getSTI());
6646  if (Id == UFMT_UNDEF)
6647    return ParseStatus::NoMatch;
6648
6649  if (!isGFX10Plus())
6650    return Error(Loc, "unified format is not supported on this GPU");
6651
6652  Format = Id;
6653  return ParseStatus::Success;
6654}
6655
6656ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6657  using namespace llvm::AMDGPU::MTBUFFormat;
6658  SMLoc Loc = getLoc();
6659
6660  if (!parseExpr(Format))
6661    return ParseStatus::Failure;
6662  if (!isValidFormatEncoding(Format, getSTI()))
6663    return Error(Loc, "out of range format");
6664
6665  return ParseStatus::Success;
6666}
6667
6668ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6669  using namespace llvm::AMDGPU::MTBUFFormat;
6670
6671  if (!trySkipId("format", AsmToken::Colon))
6672    return ParseStatus::NoMatch;
6673
6674  if (trySkipToken(AsmToken::LBrac)) {
6675    StringRef FormatStr;
6676    SMLoc Loc = getLoc();
6677    if (!parseId(FormatStr, "expected a format string"))
6678      return ParseStatus::Failure;
6679
6680    auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6681    if (Res.isNoMatch())
6682      Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6683    if (!Res.isSuccess())
6684      return Res;
6685
6686    if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6687      return ParseStatus::Failure;
6688
6689    return ParseStatus::Success;
6690  }
6691
6692  return parseNumericFormat(Format);
6693}
6694
6695ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6696  using namespace llvm::AMDGPU::MTBUFFormat;
6697
6698  int64_t Format = getDefaultFormatEncoding(getSTI());
6699  ParseStatus Res;
6700  SMLoc Loc = getLoc();
6701
6702  // Parse legacy format syntax.
6703  Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6704  if (Res.isFailure())
6705    return Res;
6706
6707  bool FormatFound = Res.isSuccess();
6708
6709  Operands.push_back(
6710    AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6711
6712  if (FormatFound)
6713    trySkipToken(AsmToken::Comma);
6714
6715  if (isToken(AsmToken::EndOfStatement)) {
6716    // We are expecting an soffset operand,
6717    // but let matcher handle the error.
6718    return ParseStatus::Success;
6719  }
6720
6721  // Parse soffset.
6722  Res = parseRegOrImm(Operands);
6723  if (!Res.isSuccess())
6724    return Res;
6725
6726  trySkipToken(AsmToken::Comma);
6727
6728  if (!FormatFound) {
6729    Res = parseSymbolicOrNumericFormat(Format);
6730    if (Res.isFailure())
6731      return Res;
6732    if (Res.isSuccess()) {
6733      auto Size = Operands.size();
6734      AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6735      assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6736      Op.setImm(Format);
6737    }
6738    return ParseStatus::Success;
6739  }
6740
6741  if (isId("format") && peekToken().is(AsmToken::Colon))
6742    return Error(getLoc(), "duplicate format");
6743  return ParseStatus::Success;
6744}
6745
6746ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6747  ParseStatus Res =
6748      parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6749  if (Res.isNoMatch()) {
6750    Res = parseIntWithPrefix("inst_offset", Operands,
6751                             AMDGPUOperand::ImmTyInstOffset);
6752  }
6753  return Res;
6754}
6755
6756ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6757  ParseStatus Res =
6758      parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6759  if (Res.isNoMatch())
6760    Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6761  return Res;
6762}
6763
6764ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6765  ParseStatus Res =
6766      parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6767  if (Res.isNoMatch()) {
6768    Res =
6769        parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6770  }
6771  return Res;
6772}
6773
6774//===----------------------------------------------------------------------===//
6775// Exp
6776//===----------------------------------------------------------------------===//
6777
6778void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6779  OptionalImmIndexMap OptionalIdx;
6780
6781  unsigned OperandIdx[4];
6782  unsigned EnMask = 0;
6783  int SrcIdx = 0;
6784
6785  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6786    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6787
6788    // Add the register arguments
6789    if (Op.isReg()) {
6790      assert(SrcIdx < 4);
6791      OperandIdx[SrcIdx] = Inst.size();
6792      Op.addRegOperands(Inst, 1);
6793      ++SrcIdx;
6794      continue;
6795    }
6796
6797    if (Op.isOff()) {
6798      assert(SrcIdx < 4);
6799      OperandIdx[SrcIdx] = Inst.size();
6800      Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6801      ++SrcIdx;
6802      continue;
6803    }
6804
6805    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6806      Op.addImmOperands(Inst, 1);
6807      continue;
6808    }
6809
6810    if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6811      continue;
6812
6813    // Handle optional arguments
6814    OptionalIdx[Op.getImmTy()] = i;
6815  }
6816
6817  assert(SrcIdx == 4);
6818
6819  bool Compr = false;
6820  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6821    Compr = true;
6822    Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6823    Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6824    Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6825  }
6826
6827  for (auto i = 0; i < SrcIdx; ++i) {
6828    if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6829      EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6830    }
6831  }
6832
6833  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6834  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6835
6836  Inst.addOperand(MCOperand::createImm(EnMask));
6837}
6838
6839//===----------------------------------------------------------------------===//
6840// s_waitcnt
6841//===----------------------------------------------------------------------===//
6842
6843static bool
6844encodeCnt(
6845  const AMDGPU::IsaVersion ISA,
6846  int64_t &IntVal,
6847  int64_t CntVal,
6848  bool Saturate,
6849  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6850  unsigned (*decode)(const IsaVersion &Version, unsigned))
6851{
6852  bool Failed = false;
6853
6854  IntVal = encode(ISA, IntVal, CntVal);
6855  if (CntVal != decode(ISA, IntVal)) {
6856    if (Saturate) {
6857      IntVal = encode(ISA, IntVal, -1);
6858    } else {
6859      Failed = true;
6860    }
6861  }
6862  return Failed;
6863}
6864
6865bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6866
6867  SMLoc CntLoc = getLoc();
6868  StringRef CntName = getTokenStr();
6869
6870  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6871      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6872    return false;
6873
6874  int64_t CntVal;
6875  SMLoc ValLoc = getLoc();
6876  if (!parseExpr(CntVal))
6877    return false;
6878
6879  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6880
6881  bool Failed = true;
6882  bool Sat = CntName.ends_with("_sat");
6883
6884  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6885    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6886  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6887    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6888  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6889    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6890  } else {
6891    Error(CntLoc, "invalid counter name " + CntName);
6892    return false;
6893  }
6894
6895  if (Failed) {
6896    Error(ValLoc, "too large value for " + CntName);
6897    return false;
6898  }
6899
6900  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6901    return false;
6902
6903  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6904    if (isToken(AsmToken::EndOfStatement)) {
6905      Error(getLoc(), "expected a counter name");
6906      return false;
6907    }
6908  }
6909
6910  return true;
6911}
6912
6913ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6914  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6915  int64_t Waitcnt = getWaitcntBitMask(ISA);
6916  SMLoc S = getLoc();
6917
6918  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6919    while (!isToken(AsmToken::EndOfStatement)) {
6920      if (!parseCnt(Waitcnt))
6921        return ParseStatus::Failure;
6922    }
6923  } else {
6924    if (!parseExpr(Waitcnt))
6925      return ParseStatus::Failure;
6926  }
6927
6928  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6929  return ParseStatus::Success;
6930}
6931
6932bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6933  SMLoc FieldLoc = getLoc();
6934  StringRef FieldName = getTokenStr();
6935  if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6936      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6937    return false;
6938
6939  SMLoc ValueLoc = getLoc();
6940  StringRef ValueName = getTokenStr();
6941  if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6942      !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6943    return false;
6944
6945  unsigned Shift;
6946  if (FieldName == "instid0") {
6947    Shift = 0;
6948  } else if (FieldName == "instskip") {
6949    Shift = 4;
6950  } else if (FieldName == "instid1") {
6951    Shift = 7;
6952  } else {
6953    Error(FieldLoc, "invalid field name " + FieldName);
6954    return false;
6955  }
6956
6957  int Value;
6958  if (Shift == 4) {
6959    // Parse values for instskip.
6960    Value = StringSwitch<int>(ValueName)
6961                .Case("SAME", 0)
6962                .Case("NEXT", 1)
6963                .Case("SKIP_1", 2)
6964                .Case("SKIP_2", 3)
6965                .Case("SKIP_3", 4)
6966                .Case("SKIP_4", 5)
6967                .Default(-1);
6968  } else {
6969    // Parse values for instid0 and instid1.
6970    Value = StringSwitch<int>(ValueName)
6971                .Case("NO_DEP", 0)
6972                .Case("VALU_DEP_1", 1)
6973                .Case("VALU_DEP_2", 2)
6974                .Case("VALU_DEP_3", 3)
6975                .Case("VALU_DEP_4", 4)
6976                .Case("TRANS32_DEP_1", 5)
6977                .Case("TRANS32_DEP_2", 6)
6978                .Case("TRANS32_DEP_3", 7)
6979                .Case("FMA_ACCUM_CYCLE_1", 8)
6980                .Case("SALU_CYCLE_1", 9)
6981                .Case("SALU_CYCLE_2", 10)
6982                .Case("SALU_CYCLE_3", 11)
6983                .Default(-1);
6984  }
6985  if (Value < 0) {
6986    Error(ValueLoc, "invalid value name " + ValueName);
6987    return false;
6988  }
6989
6990  Delay |= Value << Shift;
6991  return true;
6992}
6993
6994ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6995  int64_t Delay = 0;
6996  SMLoc S = getLoc();
6997
6998  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6999    do {
7000      if (!parseDelay(Delay))
7001        return ParseStatus::Failure;
7002    } while (trySkipToken(AsmToken::Pipe));
7003  } else {
7004    if (!parseExpr(Delay))
7005      return ParseStatus::Failure;
7006  }
7007
7008  Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7009  return ParseStatus::Success;
7010}
7011
7012bool
7013AMDGPUOperand::isSWaitCnt() const {
7014  return isImm();
7015}
7016
7017bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7018
7019//===----------------------------------------------------------------------===//
7020// DepCtr
7021//===----------------------------------------------------------------------===//
7022
7023void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7024                                  StringRef DepCtrName) {
7025  switch (ErrorId) {
7026  case OPR_ID_UNKNOWN:
7027    Error(Loc, Twine("invalid counter name ", DepCtrName));
7028    return;
7029  case OPR_ID_UNSUPPORTED:
7030    Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7031    return;
7032  case OPR_ID_DUPLICATE:
7033    Error(Loc, Twine("duplicate counter name ", DepCtrName));
7034    return;
7035  case OPR_VAL_INVALID:
7036    Error(Loc, Twine("invalid value for ", DepCtrName));
7037    return;
7038  default:
7039    assert(false);
7040  }
7041}
7042
7043bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7044
7045  using namespace llvm::AMDGPU::DepCtr;
7046
7047  SMLoc DepCtrLoc = getLoc();
7048  StringRef DepCtrName = getTokenStr();
7049
7050  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7051      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7052    return false;
7053
7054  int64_t ExprVal;
7055  if (!parseExpr(ExprVal))
7056    return false;
7057
7058  unsigned PrevOprMask = UsedOprMask;
7059  int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7060
7061  if (CntVal < 0) {
7062    depCtrError(DepCtrLoc, CntVal, DepCtrName);
7063    return false;
7064  }
7065
7066  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7067    return false;
7068
7069  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7070    if (isToken(AsmToken::EndOfStatement)) {
7071      Error(getLoc(), "expected a counter name");
7072      return false;
7073    }
7074  }
7075
7076  unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7077  DepCtr = (DepCtr & ~CntValMask) | CntVal;
7078  return true;
7079}
7080
7081ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7082  using namespace llvm::AMDGPU::DepCtr;
7083
7084  int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7085  SMLoc Loc = getLoc();
7086
7087  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7088    unsigned UsedOprMask = 0;
7089    while (!isToken(AsmToken::EndOfStatement)) {
7090      if (!parseDepCtr(DepCtr, UsedOprMask))
7091        return ParseStatus::Failure;
7092    }
7093  } else {
7094    if (!parseExpr(DepCtr))
7095      return ParseStatus::Failure;
7096  }
7097
7098  Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7099  return ParseStatus::Success;
7100}
7101
7102bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7103
7104//===----------------------------------------------------------------------===//
7105// hwreg
7106//===----------------------------------------------------------------------===//
7107
7108bool
7109AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
7110                                OperandInfoTy &Offset,
7111                                OperandInfoTy &Width) {
7112  using namespace llvm::AMDGPU::Hwreg;
7113
7114  // The register may be specified by name or using a numeric code
7115  HwReg.Loc = getLoc();
7116  if (isToken(AsmToken::Identifier) &&
7117      (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7118    HwReg.IsSymbolic = true;
7119    lex(); // skip register name
7120  } else if (!parseExpr(HwReg.Id, "a register name")) {
7121    return false;
7122  }
7123
7124  if (trySkipToken(AsmToken::RParen))
7125    return true;
7126
7127  // parse optional params
7128  if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7129    return false;
7130
7131  Offset.Loc = getLoc();
7132  if (!parseExpr(Offset.Id))
7133    return false;
7134
7135  if (!skipToken(AsmToken::Comma, "expected a comma"))
7136    return false;
7137
7138  Width.Loc = getLoc();
7139  return parseExpr(Width.Id) &&
7140         skipToken(AsmToken::RParen, "expected a closing parenthesis");
7141}
7142
7143bool
7144AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
7145                               const OperandInfoTy &Offset,
7146                               const OperandInfoTy &Width) {
7147
7148  using namespace llvm::AMDGPU::Hwreg;
7149
7150  if (HwReg.IsSymbolic) {
7151    if (HwReg.Id == OPR_ID_UNSUPPORTED) {
7152      Error(HwReg.Loc,
7153            "specified hardware register is not supported on this GPU");
7154      return false;
7155    }
7156  } else {
7157    if (!isValidHwreg(HwReg.Id)) {
7158      Error(HwReg.Loc,
7159            "invalid code of hardware register: only 6-bit values are legal");
7160      return false;
7161    }
7162  }
7163  if (!isValidHwregOffset(Offset.Id)) {
7164    Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
7165    return false;
7166  }
7167  if (!isValidHwregWidth(Width.Id)) {
7168    Error(Width.Loc,
7169          "invalid bitfield width: only values from 1 to 32 are legal");
7170    return false;
7171  }
7172  return true;
7173}
7174
7175ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7176  using namespace llvm::AMDGPU::Hwreg;
7177
7178  int64_t ImmVal = 0;
7179  SMLoc Loc = getLoc();
7180
7181  if (trySkipId("hwreg", AsmToken::LParen)) {
7182    OperandInfoTy HwReg(OPR_ID_UNKNOWN);
7183    OperandInfoTy Offset(OFFSET_DEFAULT_);
7184    OperandInfoTy Width(WIDTH_DEFAULT_);
7185    if (parseHwregBody(HwReg, Offset, Width) &&
7186        validateHwreg(HwReg, Offset, Width)) {
7187      ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
7188    } else {
7189      return ParseStatus::Failure;
7190    }
7191  } else if (parseExpr(ImmVal, "a hwreg macro")) {
7192    if (ImmVal < 0 || !isUInt<16>(ImmVal))
7193      return Error(Loc, "invalid immediate: only 16-bit values are legal");
7194  } else {
7195    return ParseStatus::Failure;
7196  }
7197
7198  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7199  return ParseStatus::Success;
7200}
7201
7202bool AMDGPUOperand::isHwreg() const {
7203  return isImmTy(ImmTyHwreg);
7204}
7205
7206//===----------------------------------------------------------------------===//
7207// sendmsg
7208//===----------------------------------------------------------------------===//
7209
7210bool
7211AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7212                                  OperandInfoTy &Op,
7213                                  OperandInfoTy &Stream) {
7214  using namespace llvm::AMDGPU::SendMsg;
7215
7216  Msg.Loc = getLoc();
7217  if (isToken(AsmToken::Identifier) &&
7218      (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7219    Msg.IsSymbolic = true;
7220    lex(); // skip message name
7221  } else if (!parseExpr(Msg.Id, "a message name")) {
7222    return false;
7223  }
7224
7225  if (trySkipToken(AsmToken::Comma)) {
7226    Op.IsDefined = true;
7227    Op.Loc = getLoc();
7228    if (isToken(AsmToken::Identifier) &&
7229        (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
7230      lex(); // skip operation name
7231    } else if (!parseExpr(Op.Id, "an operation name")) {
7232      return false;
7233    }
7234
7235    if (trySkipToken(AsmToken::Comma)) {
7236      Stream.IsDefined = true;
7237      Stream.Loc = getLoc();
7238      if (!parseExpr(Stream.Id))
7239        return false;
7240    }
7241  }
7242
7243  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7244}
7245
7246bool
7247AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7248                                 const OperandInfoTy &Op,
7249                                 const OperandInfoTy &Stream) {
7250  using namespace llvm::AMDGPU::SendMsg;
7251
7252  // Validation strictness depends on whether message is specified
7253  // in a symbolic or in a numeric form. In the latter case
7254  // only encoding possibility is checked.
7255  bool Strict = Msg.IsSymbolic;
7256
7257  if (Strict) {
7258    if (Msg.Id == OPR_ID_UNSUPPORTED) {
7259      Error(Msg.Loc, "specified message id is not supported on this GPU");
7260      return false;
7261    }
7262  } else {
7263    if (!isValidMsgId(Msg.Id, getSTI())) {
7264      Error(Msg.Loc, "invalid message id");
7265      return false;
7266    }
7267  }
7268  if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
7269    if (Op.IsDefined) {
7270      Error(Op.Loc, "message does not support operations");
7271    } else {
7272      Error(Msg.Loc, "missing message operation");
7273    }
7274    return false;
7275  }
7276  if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
7277    Error(Op.Loc, "invalid operation id");
7278    return false;
7279  }
7280  if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
7281      Stream.IsDefined) {
7282    Error(Stream.Loc, "message operation does not support streams");
7283    return false;
7284  }
7285  if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
7286    Error(Stream.Loc, "invalid message stream id");
7287    return false;
7288  }
7289  return true;
7290}
7291
7292ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7293  using namespace llvm::AMDGPU::SendMsg;
7294
7295  int64_t ImmVal = 0;
7296  SMLoc Loc = getLoc();
7297
7298  if (trySkipId("sendmsg", AsmToken::LParen)) {
7299    OperandInfoTy Msg(OPR_ID_UNKNOWN);
7300    OperandInfoTy Op(OP_NONE_);
7301    OperandInfoTy Stream(STREAM_ID_NONE_);
7302    if (parseSendMsgBody(Msg, Op, Stream) &&
7303        validateSendMsg(Msg, Op, Stream)) {
7304      ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
7305    } else {
7306      return ParseStatus::Failure;
7307    }
7308  } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7309    if (ImmVal < 0 || !isUInt<16>(ImmVal))
7310      return Error(Loc, "invalid immediate: only 16-bit values are legal");
7311  } else {
7312    return ParseStatus::Failure;
7313  }
7314
7315  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7316  return ParseStatus::Success;
7317}
7318
7319bool AMDGPUOperand::isSendMsg() const {
7320  return isImmTy(ImmTySendMsg);
7321}
7322
7323//===----------------------------------------------------------------------===//
7324// v_interp
7325//===----------------------------------------------------------------------===//
7326
7327ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7328  StringRef Str;
7329  SMLoc S = getLoc();
7330
7331  if (!parseId(Str))
7332    return ParseStatus::NoMatch;
7333
7334  int Slot = StringSwitch<int>(Str)
7335    .Case("p10", 0)
7336    .Case("p20", 1)
7337    .Case("p0", 2)
7338    .Default(-1);
7339
7340  if (Slot == -1)
7341    return Error(S, "invalid interpolation slot");
7342
7343  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7344                                              AMDGPUOperand::ImmTyInterpSlot));
7345  return ParseStatus::Success;
7346}
7347
7348ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7349  StringRef Str;
7350  SMLoc S = getLoc();
7351
7352  if (!parseId(Str))
7353    return ParseStatus::NoMatch;
7354
7355  if (!Str.starts_with("attr"))
7356    return Error(S, "invalid interpolation attribute");
7357
7358  StringRef Chan = Str.take_back(2);
7359  int AttrChan = StringSwitch<int>(Chan)
7360    .Case(".x", 0)
7361    .Case(".y", 1)
7362    .Case(".z", 2)
7363    .Case(".w", 3)
7364    .Default(-1);
7365  if (AttrChan == -1)
7366    return Error(S, "invalid or missing interpolation attribute channel");
7367
7368  Str = Str.drop_back(2).drop_front(4);
7369
7370  uint8_t Attr;
7371  if (Str.getAsInteger(10, Attr))
7372    return Error(S, "invalid or missing interpolation attribute number");
7373
7374  if (Attr > 32)
7375    return Error(S, "out of bounds interpolation attribute number");
7376
7377  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7378
7379  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7380                                              AMDGPUOperand::ImmTyInterpAttr));
7381  Operands.push_back(AMDGPUOperand::CreateImm(
7382      this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7383  return ParseStatus::Success;
7384}
7385
7386//===----------------------------------------------------------------------===//
7387// exp
7388//===----------------------------------------------------------------------===//
7389
7390ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7391  using namespace llvm::AMDGPU::Exp;
7392
7393  StringRef Str;
7394  SMLoc S = getLoc();
7395
7396  if (!parseId(Str))
7397    return ParseStatus::NoMatch;
7398
7399  unsigned Id = getTgtId(Str);
7400  if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7401    return Error(S, (Id == ET_INVALID)
7402                        ? "invalid exp target"
7403                        : "exp target is not supported on this GPU");
7404
7405  Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7406                                              AMDGPUOperand::ImmTyExpTgt));
7407  return ParseStatus::Success;
7408}
7409
7410//===----------------------------------------------------------------------===//
7411// parser helpers
7412//===----------------------------------------------------------------------===//
7413
7414bool
7415AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7416  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7417}
7418
7419bool
7420AMDGPUAsmParser::isId(const StringRef Id) const {
7421  return isId(getToken(), Id);
7422}
7423
7424bool
7425AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7426  return getTokenKind() == Kind;
7427}
7428
7429StringRef AMDGPUAsmParser::getId() const {
7430  return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7431}
7432
7433bool
7434AMDGPUAsmParser::trySkipId(const StringRef Id) {
7435  if (isId(Id)) {
7436    lex();
7437    return true;
7438  }
7439  return false;
7440}
7441
7442bool
7443AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7444  if (isToken(AsmToken::Identifier)) {
7445    StringRef Tok = getTokenStr();
7446    if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7447      lex();
7448      return true;
7449    }
7450  }
7451  return false;
7452}
7453
7454bool
7455AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7456  if (isId(Id) && peekToken().is(Kind)) {
7457    lex();
7458    lex();
7459    return true;
7460  }
7461  return false;
7462}
7463
7464bool
7465AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7466  if (isToken(Kind)) {
7467    lex();
7468    return true;
7469  }
7470  return false;
7471}
7472
7473bool
7474AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7475                           const StringRef ErrMsg) {
7476  if (!trySkipToken(Kind)) {
7477    Error(getLoc(), ErrMsg);
7478    return false;
7479  }
7480  return true;
7481}
7482
7483bool
7484AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7485  SMLoc S = getLoc();
7486
7487  const MCExpr *Expr;
7488  if (Parser.parseExpression(Expr))
7489    return false;
7490
7491  if (Expr->evaluateAsAbsolute(Imm))
7492    return true;
7493
7494  if (Expected.empty()) {
7495    Error(S, "expected absolute expression");
7496  } else {
7497    Error(S, Twine("expected ", Expected) +
7498             Twine(" or an absolute expression"));
7499  }
7500  return false;
7501}
7502
7503bool
7504AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7505  SMLoc S = getLoc();
7506
7507  const MCExpr *Expr;
7508  if (Parser.parseExpression(Expr))
7509    return false;
7510
7511  int64_t IntVal;
7512  if (Expr->evaluateAsAbsolute(IntVal)) {
7513    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7514  } else {
7515    Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7516  }
7517  return true;
7518}
7519
7520bool
7521AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7522  if (isToken(AsmToken::String)) {
7523    Val = getToken().getStringContents();
7524    lex();
7525    return true;
7526  } else {
7527    Error(getLoc(), ErrMsg);
7528    return false;
7529  }
7530}
7531
7532bool
7533AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7534  if (isToken(AsmToken::Identifier)) {
7535    Val = getTokenStr();
7536    lex();
7537    return true;
7538  } else {
7539    if (!ErrMsg.empty())
7540      Error(getLoc(), ErrMsg);
7541    return false;
7542  }
7543}
7544
7545AsmToken
7546AMDGPUAsmParser::getToken() const {
7547  return Parser.getTok();
7548}
7549
7550AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7551  return isToken(AsmToken::EndOfStatement)
7552             ? getToken()
7553             : getLexer().peekTok(ShouldSkipSpace);
7554}
7555
7556void
7557AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7558  auto TokCount = getLexer().peekTokens(Tokens);
7559
7560  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7561    Tokens[Idx] = AsmToken(AsmToken::Error, "");
7562}
7563
7564AsmToken::TokenKind
7565AMDGPUAsmParser::getTokenKind() const {
7566  return getLexer().getKind();
7567}
7568
7569SMLoc
7570AMDGPUAsmParser::getLoc() const {
7571  return getToken().getLoc();
7572}
7573
7574StringRef
7575AMDGPUAsmParser::getTokenStr() const {
7576  return getToken().getString();
7577}
7578
7579void
7580AMDGPUAsmParser::lex() {
7581  Parser.Lex();
7582}
7583
7584SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7585  return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7586}
7587
7588SMLoc
7589AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7590                               const OperandVector &Operands) const {
7591  for (unsigned i = Operands.size() - 1; i > 0; --i) {
7592    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7593    if (Test(Op))
7594      return Op.getStartLoc();
7595  }
7596  return getInstLoc(Operands);
7597}
7598
7599SMLoc
7600AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7601                           const OperandVector &Operands) const {
7602  auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7603  return getOperandLoc(Test, Operands);
7604}
7605
7606SMLoc
7607AMDGPUAsmParser::getRegLoc(unsigned Reg,
7608                           const OperandVector &Operands) const {
7609  auto Test = [=](const AMDGPUOperand& Op) {
7610    return Op.isRegKind() && Op.getReg() == Reg;
7611  };
7612  return getOperandLoc(Test, Operands);
7613}
7614
7615SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7616                                 bool SearchMandatoryLiterals) const {
7617  auto Test = [](const AMDGPUOperand& Op) {
7618    return Op.IsImmKindLiteral() || Op.isExpr();
7619  };
7620  SMLoc Loc = getOperandLoc(Test, Operands);
7621  if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7622    Loc = getMandatoryLitLoc(Operands);
7623  return Loc;
7624}
7625
7626SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7627  auto Test = [](const AMDGPUOperand &Op) {
7628    return Op.IsImmKindMandatoryLiteral();
7629  };
7630  return getOperandLoc(Test, Operands);
7631}
7632
7633SMLoc
7634AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7635  auto Test = [](const AMDGPUOperand& Op) {
7636    return Op.isImmKindConst();
7637  };
7638  return getOperandLoc(Test, Operands);
7639}
7640
7641//===----------------------------------------------------------------------===//
7642// swizzle
7643//===----------------------------------------------------------------------===//
7644
7645LLVM_READNONE
7646static unsigned
7647encodeBitmaskPerm(const unsigned AndMask,
7648                  const unsigned OrMask,
7649                  const unsigned XorMask) {
7650  using namespace llvm::AMDGPU::Swizzle;
7651
7652  return BITMASK_PERM_ENC |
7653         (AndMask << BITMASK_AND_SHIFT) |
7654         (OrMask  << BITMASK_OR_SHIFT)  |
7655         (XorMask << BITMASK_XOR_SHIFT);
7656}
7657
7658bool
7659AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7660                                     const unsigned MinVal,
7661                                     const unsigned MaxVal,
7662                                     const StringRef ErrMsg,
7663                                     SMLoc &Loc) {
7664  if (!skipToken(AsmToken::Comma, "expected a comma")) {
7665    return false;
7666  }
7667  Loc = getLoc();
7668  if (!parseExpr(Op)) {
7669    return false;
7670  }
7671  if (Op < MinVal || Op > MaxVal) {
7672    Error(Loc, ErrMsg);
7673    return false;
7674  }
7675
7676  return true;
7677}
7678
7679bool
7680AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7681                                      const unsigned MinVal,
7682                                      const unsigned MaxVal,
7683                                      const StringRef ErrMsg) {
7684  SMLoc Loc;
7685  for (unsigned i = 0; i < OpNum; ++i) {
7686    if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7687      return false;
7688  }
7689
7690  return true;
7691}
7692
7693bool
7694AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7695  using namespace llvm::AMDGPU::Swizzle;
7696
7697  int64_t Lane[LANE_NUM];
7698  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7699                           "expected a 2-bit lane id")) {
7700    Imm = QUAD_PERM_ENC;
7701    for (unsigned I = 0; I < LANE_NUM; ++I) {
7702      Imm |= Lane[I] << (LANE_SHIFT * I);
7703    }
7704    return true;
7705  }
7706  return false;
7707}
7708
7709bool
7710AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7711  using namespace llvm::AMDGPU::Swizzle;
7712
7713  SMLoc Loc;
7714  int64_t GroupSize;
7715  int64_t LaneIdx;
7716
7717  if (!parseSwizzleOperand(GroupSize,
7718                           2, 32,
7719                           "group size must be in the interval [2,32]",
7720                           Loc)) {
7721    return false;
7722  }
7723  if (!isPowerOf2_64(GroupSize)) {
7724    Error(Loc, "group size must be a power of two");
7725    return false;
7726  }
7727  if (parseSwizzleOperand(LaneIdx,
7728                          0, GroupSize - 1,
7729                          "lane id must be in the interval [0,group size - 1]",
7730                          Loc)) {
7731    Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7732    return true;
7733  }
7734  return false;
7735}
7736
7737bool
7738AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7739  using namespace llvm::AMDGPU::Swizzle;
7740
7741  SMLoc Loc;
7742  int64_t GroupSize;
7743
7744  if (!parseSwizzleOperand(GroupSize,
7745                           2, 32,
7746                           "group size must be in the interval [2,32]",
7747                           Loc)) {
7748    return false;
7749  }
7750  if (!isPowerOf2_64(GroupSize)) {
7751    Error(Loc, "group size must be a power of two");
7752    return false;
7753  }
7754
7755  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7756  return true;
7757}
7758
7759bool
7760AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7761  using namespace llvm::AMDGPU::Swizzle;
7762
7763  SMLoc Loc;
7764  int64_t GroupSize;
7765
7766  if (!parseSwizzleOperand(GroupSize,
7767                           1, 16,
7768                           "group size must be in the interval [1,16]",
7769                           Loc)) {
7770    return false;
7771  }
7772  if (!isPowerOf2_64(GroupSize)) {
7773    Error(Loc, "group size must be a power of two");
7774    return false;
7775  }
7776
7777  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7778  return true;
7779}
7780
7781bool
7782AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7783  using namespace llvm::AMDGPU::Swizzle;
7784
7785  if (!skipToken(AsmToken::Comma, "expected a comma")) {
7786    return false;
7787  }
7788
7789  StringRef Ctl;
7790  SMLoc StrLoc = getLoc();
7791  if (!parseString(Ctl)) {
7792    return false;
7793  }
7794  if (Ctl.size() != BITMASK_WIDTH) {
7795    Error(StrLoc, "expected a 5-character mask");
7796    return false;
7797  }
7798
7799  unsigned AndMask = 0;
7800  unsigned OrMask = 0;
7801  unsigned XorMask = 0;
7802
7803  for (size_t i = 0; i < Ctl.size(); ++i) {
7804    unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7805    switch(Ctl[i]) {
7806    default:
7807      Error(StrLoc, "invalid mask");
7808      return false;
7809    case '0':
7810      break;
7811    case '1':
7812      OrMask |= Mask;
7813      break;
7814    case 'p':
7815      AndMask |= Mask;
7816      break;
7817    case 'i':
7818      AndMask |= Mask;
7819      XorMask |= Mask;
7820      break;
7821    }
7822  }
7823
7824  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7825  return true;
7826}
7827
7828bool
7829AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7830
7831  SMLoc OffsetLoc = getLoc();
7832
7833  if (!parseExpr(Imm, "a swizzle macro")) {
7834    return false;
7835  }
7836  if (!isUInt<16>(Imm)) {
7837    Error(OffsetLoc, "expected a 16-bit offset");
7838    return false;
7839  }
7840  return true;
7841}
7842
7843bool
7844AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7845  using namespace llvm::AMDGPU::Swizzle;
7846
7847  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7848
7849    SMLoc ModeLoc = getLoc();
7850    bool Ok = false;
7851
7852    if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7853      Ok = parseSwizzleQuadPerm(Imm);
7854    } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7855      Ok = parseSwizzleBitmaskPerm(Imm);
7856    } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7857      Ok = parseSwizzleBroadcast(Imm);
7858    } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7859      Ok = parseSwizzleSwap(Imm);
7860    } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7861      Ok = parseSwizzleReverse(Imm);
7862    } else {
7863      Error(ModeLoc, "expected a swizzle mode");
7864    }
7865
7866    return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7867  }
7868
7869  return false;
7870}
7871
7872ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7873  SMLoc S = getLoc();
7874  int64_t Imm = 0;
7875
7876  if (trySkipId("offset")) {
7877
7878    bool Ok = false;
7879    if (skipToken(AsmToken::Colon, "expected a colon")) {
7880      if (trySkipId("swizzle")) {
7881        Ok = parseSwizzleMacro(Imm);
7882      } else {
7883        Ok = parseSwizzleOffset(Imm);
7884      }
7885    }
7886
7887    Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7888
7889    return Ok ? ParseStatus::Success : ParseStatus::Failure;
7890  }
7891  return ParseStatus::NoMatch;
7892}
7893
7894bool
7895AMDGPUOperand::isSwizzle() const {
7896  return isImmTy(ImmTySwizzle);
7897}
7898
7899//===----------------------------------------------------------------------===//
7900// VGPR Index Mode
7901//===----------------------------------------------------------------------===//
7902
7903int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7904
7905  using namespace llvm::AMDGPU::VGPRIndexMode;
7906
7907  if (trySkipToken(AsmToken::RParen)) {
7908    return OFF;
7909  }
7910
7911  int64_t Imm = 0;
7912
7913  while (true) {
7914    unsigned Mode = 0;
7915    SMLoc S = getLoc();
7916
7917    for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7918      if (trySkipId(IdSymbolic[ModeId])) {
7919        Mode = 1 << ModeId;
7920        break;
7921      }
7922    }
7923
7924    if (Mode == 0) {
7925      Error(S, (Imm == 0)?
7926               "expected a VGPR index mode or a closing parenthesis" :
7927               "expected a VGPR index mode");
7928      return UNDEF;
7929    }
7930
7931    if (Imm & Mode) {
7932      Error(S, "duplicate VGPR index mode");
7933      return UNDEF;
7934    }
7935    Imm |= Mode;
7936
7937    if (trySkipToken(AsmToken::RParen))
7938      break;
7939    if (!skipToken(AsmToken::Comma,
7940                   "expected a comma or a closing parenthesis"))
7941      return UNDEF;
7942  }
7943
7944  return Imm;
7945}
7946
7947ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7948
7949  using namespace llvm::AMDGPU::VGPRIndexMode;
7950
7951  int64_t Imm = 0;
7952  SMLoc S = getLoc();
7953
7954  if (trySkipId("gpr_idx", AsmToken::LParen)) {
7955    Imm = parseGPRIdxMacro();
7956    if (Imm == UNDEF)
7957      return ParseStatus::Failure;
7958  } else {
7959    if (getParser().parseAbsoluteExpression(Imm))
7960      return ParseStatus::Failure;
7961    if (Imm < 0 || !isUInt<4>(Imm))
7962      return Error(S, "invalid immediate: only 4-bit values are legal");
7963  }
7964
7965  Operands.push_back(
7966      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7967  return ParseStatus::Success;
7968}
7969
7970bool AMDGPUOperand::isGPRIdxMode() const {
7971  return isImmTy(ImmTyGprIdxMode);
7972}
7973
7974//===----------------------------------------------------------------------===//
7975// sopp branch targets
7976//===----------------------------------------------------------------------===//
7977
7978ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7979
7980  // Make sure we are not parsing something
7981  // that looks like a label or an expression but is not.
7982  // This will improve error messages.
7983  if (isRegister() || isModifier())
7984    return ParseStatus::NoMatch;
7985
7986  if (!parseExpr(Operands))
7987    return ParseStatus::Failure;
7988
7989  AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7990  assert(Opr.isImm() || Opr.isExpr());
7991  SMLoc Loc = Opr.getStartLoc();
7992
7993  // Currently we do not support arbitrary expressions as branch targets.
7994  // Only labels and absolute expressions are accepted.
7995  if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7996    Error(Loc, "expected an absolute expression or a label");
7997  } else if (Opr.isImm() && !Opr.isS16Imm()) {
7998    Error(Loc, "expected a 16-bit signed jump offset");
7999  }
8000
8001  return ParseStatus::Success;
8002}
8003
8004//===----------------------------------------------------------------------===//
8005// Boolean holding registers
8006//===----------------------------------------------------------------------===//
8007
8008ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8009  return parseReg(Operands);
8010}
8011
8012//===----------------------------------------------------------------------===//
8013// mubuf
8014//===----------------------------------------------------------------------===//
8015
8016void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8017                                   const OperandVector &Operands,
8018                                   bool IsAtomic) {
8019  OptionalImmIndexMap OptionalIdx;
8020  unsigned FirstOperandIdx = 1;
8021  bool IsAtomicReturn = false;
8022
8023  if (IsAtomic) {
8024    for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8025      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8026      if (!Op.isCPol())
8027        continue;
8028      IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
8029      break;
8030    }
8031
8032    if (!IsAtomicReturn) {
8033      int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
8034      if (NewOpc != -1)
8035        Inst.setOpcode(NewOpc);
8036    }
8037
8038    IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
8039                      SIInstrFlags::IsAtomicRet;
8040  }
8041
8042  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8043    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8044
8045    // Add the register arguments
8046    if (Op.isReg()) {
8047      Op.addRegOperands(Inst, 1);
8048      // Insert a tied src for atomic return dst.
8049      // This cannot be postponed as subsequent calls to
8050      // addImmOperands rely on correct number of MC operands.
8051      if (IsAtomicReturn && i == FirstOperandIdx)
8052        Op.addRegOperands(Inst, 1);
8053      continue;
8054    }
8055
8056    // Handle the case where soffset is an immediate
8057    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8058      Op.addImmOperands(Inst, 1);
8059      continue;
8060    }
8061
8062    // Handle tokens like 'offen' which are sometimes hard-coded into the
8063    // asm string.  There are no MCInst operands for these.
8064    if (Op.isToken()) {
8065      continue;
8066    }
8067    assert(Op.isImm());
8068
8069    // Handle optional arguments
8070    OptionalIdx[Op.getImmTy()] = i;
8071  }
8072
8073  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8074  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8075}
8076
8077//===----------------------------------------------------------------------===//
8078// smrd
8079//===----------------------------------------------------------------------===//
8080
8081bool AMDGPUOperand::isSMRDOffset8() const {
8082  return isImmLiteral() && isUInt<8>(getImm());
8083}
8084
8085bool AMDGPUOperand::isSMEMOffset() const {
8086  // Offset range is checked later by validator.
8087  return isImmLiteral();
8088}
8089
8090bool AMDGPUOperand::isSMRDLiteralOffset() const {
8091  // 32-bit literals are only supported on CI and we only want to use them
8092  // when the offset is > 8-bits.
8093  return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8094}
8095
8096//===----------------------------------------------------------------------===//
8097// vop3
8098//===----------------------------------------------------------------------===//
8099
8100static bool ConvertOmodMul(int64_t &Mul) {
8101  if (Mul != 1 && Mul != 2 && Mul != 4)
8102    return false;
8103
8104  Mul >>= 1;
8105  return true;
8106}
8107
8108static bool ConvertOmodDiv(int64_t &Div) {
8109  if (Div == 1) {
8110    Div = 0;
8111    return true;
8112  }
8113
8114  if (Div == 2) {
8115    Div = 3;
8116    return true;
8117  }
8118
8119  return false;
8120}
8121
8122// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8123// This is intentional and ensures compatibility with sp3.
8124// See bug 35397 for details.
8125bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8126  if (BoundCtrl == 0 || BoundCtrl == 1) {
8127    if (!isGFX11Plus())
8128      BoundCtrl = 1;
8129    return true;
8130  }
8131  return false;
8132}
8133
8134void AMDGPUAsmParser::onBeginOfFile() {
8135  if (!getParser().getStreamer().getTargetStreamer() ||
8136      getSTI().getTargetTriple().getArch() == Triple::r600)
8137    return;
8138
8139  if (!getTargetStreamer().getTargetID())
8140    getTargetStreamer().initializeTargetID(getSTI(),
8141                                           getSTI().getFeatureString());
8142
8143  if (isHsaAbi(getSTI()))
8144    getTargetStreamer().EmitDirectiveAMDGCNTarget();
8145}
8146
8147ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8148  StringRef Name = getTokenStr();
8149  if (Name == "mul") {
8150    return parseIntWithPrefix("mul", Operands,
8151                              AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8152  }
8153
8154  if (Name == "div") {
8155    return parseIntWithPrefix("div", Operands,
8156                              AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8157  }
8158
8159  return ParseStatus::NoMatch;
8160}
8161
8162// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8163// the number of src operands present, then copies that bit into src0_modifiers.
8164void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8165  int Opc = Inst.getOpcode();
8166  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8167  if (OpSelIdx == -1)
8168    return;
8169
8170  int SrcNum;
8171  const int Ops[] = { AMDGPU::OpName::src0,
8172                      AMDGPU::OpName::src1,
8173                      AMDGPU::OpName::src2 };
8174  for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8175       ++SrcNum)
8176    ;
8177  assert(SrcNum > 0);
8178
8179  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8180
8181  if ((OpSel & (1 << SrcNum)) != 0) {
8182    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8183    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8184    Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8185  }
8186}
8187
8188void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8189                                   const OperandVector &Operands) {
8190  cvtVOP3P(Inst, Operands);
8191  cvtVOP3DstOpSelOnly(Inst);
8192}
8193
8194void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8195                                   OptionalImmIndexMap &OptionalIdx) {
8196  cvtVOP3P(Inst, Operands, OptionalIdx);
8197  cvtVOP3DstOpSelOnly(Inst);
8198}
8199
8200static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8201  return
8202      // 1. This operand is input modifiers
8203      Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8204      // 2. This is not last operand
8205      && Desc.NumOperands > (OpNum + 1)
8206      // 3. Next operand is register class
8207      && Desc.operands()[OpNum + 1].RegClass != -1
8208      // 4. Next register is not tied to any other operand
8209      && Desc.getOperandConstraint(OpNum + 1,
8210                                   MCOI::OperandConstraint::TIED_TO) == -1;
8211}
8212
8213void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8214{
8215  OptionalImmIndexMap OptionalIdx;
8216  unsigned Opc = Inst.getOpcode();
8217
8218  unsigned I = 1;
8219  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8220  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8221    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8222  }
8223
8224  for (unsigned E = Operands.size(); I != E; ++I) {
8225    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8226    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8227      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8228    } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8229               Op.isInterpAttrChan()) {
8230      Inst.addOperand(MCOperand::createImm(Op.getImm()));
8231    } else if (Op.isImmModifier()) {
8232      OptionalIdx[Op.getImmTy()] = I;
8233    } else {
8234      llvm_unreachable("unhandled operand type");
8235    }
8236  }
8237
8238  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8239    addOptionalImmOperand(Inst, Operands, OptionalIdx,
8240                          AMDGPUOperand::ImmTyHigh);
8241
8242  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8243    addOptionalImmOperand(Inst, Operands, OptionalIdx,
8244                          AMDGPUOperand::ImmTyClampSI);
8245
8246  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8247    addOptionalImmOperand(Inst, Operands, OptionalIdx,
8248                          AMDGPUOperand::ImmTyOModSI);
8249}
8250
8251void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8252{
8253  OptionalImmIndexMap OptionalIdx;
8254  unsigned Opc = Inst.getOpcode();
8255
8256  unsigned I = 1;
8257  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8258  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8259    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8260  }
8261
8262  for (unsigned E = Operands.size(); I != E; ++I) {
8263    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8264    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8265      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8266    } else if (Op.isImmModifier()) {
8267      OptionalIdx[Op.getImmTy()] = I;
8268    } else {
8269      llvm_unreachable("unhandled operand type");
8270    }
8271  }
8272
8273  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8274
8275  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8276  if (OpSelIdx != -1)
8277    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8278
8279  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8280
8281  if (OpSelIdx == -1)
8282    return;
8283
8284  const int Ops[] = { AMDGPU::OpName::src0,
8285                      AMDGPU::OpName::src1,
8286                      AMDGPU::OpName::src2 };
8287  const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8288                         AMDGPU::OpName::src1_modifiers,
8289                         AMDGPU::OpName::src2_modifiers };
8290
8291  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8292
8293  for (int J = 0; J < 3; ++J) {
8294    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8295    if (OpIdx == -1)
8296      break;
8297
8298    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8299    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8300
8301    if ((OpSel & (1 << J)) != 0)
8302      ModVal |= SISrcMods::OP_SEL_0;
8303    if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8304        (OpSel & (1 << 3)) != 0)
8305      ModVal |= SISrcMods::DST_OP_SEL;
8306
8307    Inst.getOperand(ModIdx).setImm(ModVal);
8308  }
8309}
8310
8311void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8312                              OptionalImmIndexMap &OptionalIdx) {
8313  unsigned Opc = Inst.getOpcode();
8314
8315  unsigned I = 1;
8316  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8317  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8318    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8319  }
8320
8321  for (unsigned E = Operands.size(); I != E; ++I) {
8322    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8323    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8324      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8325    } else if (Op.isImmModifier()) {
8326      OptionalIdx[Op.getImmTy()] = I;
8327    } else if (Op.isRegOrImm()) {
8328      Op.addRegOrImmOperands(Inst, 1);
8329    } else {
8330      llvm_unreachable("unhandled operand type");
8331    }
8332  }
8333
8334  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8335    addOptionalImmOperand(Inst, Operands, OptionalIdx,
8336                          AMDGPUOperand::ImmTyClampSI);
8337
8338  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8339    addOptionalImmOperand(Inst, Operands, OptionalIdx,
8340                          AMDGPUOperand::ImmTyOModSI);
8341
8342  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8343  // it has src2 register operand that is tied to dst operand
8344  // we don't allow modifiers for this operand in assembler so src2_modifiers
8345  // should be 0.
8346  if (isMAC(Opc)) {
8347    auto it = Inst.begin();
8348    std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8349    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8350    ++it;
8351    // Copy the operand to ensure it's not invalidated when Inst grows.
8352    Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8353  }
8354}
8355
8356void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8357  OptionalImmIndexMap OptionalIdx;
8358  cvtVOP3(Inst, Operands, OptionalIdx);
8359}
8360
8361void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8362                               OptionalImmIndexMap &OptIdx) {
8363  const int Opc = Inst.getOpcode();
8364  const MCInstrDesc &Desc = MII.get(Opc);
8365
8366  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8367
8368  if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8369      Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8370      Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8371      Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8372    Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8373    Inst.addOperand(Inst.getOperand(0));
8374  }
8375
8376  // Adding vdst_in operand is already covered for these DPP instructions in
8377  // cvtVOP3DPP.
8378  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8379      !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8380        Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8381        Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8382        Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8383    assert(!IsPacked);
8384    Inst.addOperand(Inst.getOperand(0));
8385  }
8386
8387  // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8388  // instruction, and then figure out where to actually put the modifiers
8389
8390  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8391  if (OpSelIdx != -1) {
8392    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8393  }
8394
8395  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8396  if (OpSelHiIdx != -1) {
8397    int DefaultVal = IsPacked ? -1 : 0;
8398    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8399                          DefaultVal);
8400  }
8401
8402  int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8403  if (NegLoIdx != -1)
8404    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8405
8406  int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8407  if (NegHiIdx != -1)
8408    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8409
8410  const int Ops[] = { AMDGPU::OpName::src0,
8411                      AMDGPU::OpName::src1,
8412                      AMDGPU::OpName::src2 };
8413  const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8414                         AMDGPU::OpName::src1_modifiers,
8415                         AMDGPU::OpName::src2_modifiers };
8416
8417  unsigned OpSel = 0;
8418  unsigned OpSelHi = 0;
8419  unsigned NegLo = 0;
8420  unsigned NegHi = 0;
8421
8422  if (OpSelIdx != -1)
8423    OpSel = Inst.getOperand(OpSelIdx).getImm();
8424
8425  if (OpSelHiIdx != -1)
8426    OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8427
8428  if (NegLoIdx != -1)
8429    NegLo = Inst.getOperand(NegLoIdx).getImm();
8430
8431  if (NegHiIdx != -1)
8432    NegHi = Inst.getOperand(NegHiIdx).getImm();
8433
8434  for (int J = 0; J < 3; ++J) {
8435    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8436    if (OpIdx == -1)
8437      break;
8438
8439    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8440
8441    if (ModIdx == -1)
8442      continue;
8443
8444    uint32_t ModVal = 0;
8445
8446    if ((OpSel & (1 << J)) != 0)
8447      ModVal |= SISrcMods::OP_SEL_0;
8448
8449    if ((OpSelHi & (1 << J)) != 0)
8450      ModVal |= SISrcMods::OP_SEL_1;
8451
8452    if ((NegLo & (1 << J)) != 0)
8453      ModVal |= SISrcMods::NEG;
8454
8455    if ((NegHi & (1 << J)) != 0)
8456      ModVal |= SISrcMods::NEG_HI;
8457
8458    Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8459  }
8460}
8461
8462void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8463  OptionalImmIndexMap OptIdx;
8464  cvtVOP3(Inst, Operands, OptIdx);
8465  cvtVOP3P(Inst, Operands, OptIdx);
8466}
8467
8468static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8469                                  unsigned i, unsigned Opc, unsigned OpName) {
8470  if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8471    ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8472  else
8473    ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8474}
8475
8476void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8477  unsigned Opc = Inst.getOpcode();
8478
8479  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8480  addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8481  addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8482  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8483  ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8484
8485  OptionalImmIndexMap OptIdx;
8486  for (unsigned i = 5; i < Operands.size(); ++i) {
8487    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8488    OptIdx[Op.getImmTy()] = i;
8489  }
8490
8491  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8492    addOptionalImmOperand(Inst, Operands, OptIdx,
8493                          AMDGPUOperand::ImmTyIndexKey8bit);
8494
8495  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8496    addOptionalImmOperand(Inst, Operands, OptIdx,
8497                          AMDGPUOperand::ImmTyIndexKey16bit);
8498
8499  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8500    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8501
8502  cvtVOP3P(Inst, Operands, OptIdx);
8503}
8504
8505//===----------------------------------------------------------------------===//
8506// VOPD
8507//===----------------------------------------------------------------------===//
8508
8509ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8510  if (!hasVOPD(getSTI()))
8511    return ParseStatus::NoMatch;
8512
8513  if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8514    SMLoc S = getLoc();
8515    lex();
8516    lex();
8517    Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8518    SMLoc OpYLoc = getLoc();
8519    StringRef OpYName;
8520    if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8521      Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8522      return ParseStatus::Success;
8523    }
8524    return Error(OpYLoc, "expected a VOPDY instruction after ::");
8525  }
8526  return ParseStatus::NoMatch;
8527}
8528
8529// Create VOPD MCInst operands using parsed assembler operands.
8530void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8531  auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8532    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8533    if (Op.isReg()) {
8534      Op.addRegOperands(Inst, 1);
8535      return;
8536    }
8537    if (Op.isImm()) {
8538      Op.addImmOperands(Inst, 1);
8539      return;
8540    }
8541    llvm_unreachable("Unhandled operand type in cvtVOPD");
8542  };
8543
8544  const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8545
8546  // MCInst operands are ordered as follows:
8547  //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8548
8549  for (auto CompIdx : VOPD::COMPONENTS) {
8550    addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8551  }
8552
8553  for (auto CompIdx : VOPD::COMPONENTS) {
8554    const auto &CInfo = InstInfo[CompIdx];
8555    auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8556    for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8557      addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8558    if (CInfo.hasSrc2Acc())
8559      addOp(CInfo.getIndexOfDstInParsedOperands());
8560  }
8561}
8562
8563//===----------------------------------------------------------------------===//
8564// dpp
8565//===----------------------------------------------------------------------===//
8566
8567bool AMDGPUOperand::isDPP8() const {
8568  return isImmTy(ImmTyDPP8);
8569}
8570
8571bool AMDGPUOperand::isDPPCtrl() const {
8572  using namespace AMDGPU::DPP;
8573
8574  bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8575  if (result) {
8576    int64_t Imm = getImm();
8577    return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8578           (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8579           (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8580           (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8581           (Imm == DppCtrl::WAVE_SHL1) ||
8582           (Imm == DppCtrl::WAVE_ROL1) ||
8583           (Imm == DppCtrl::WAVE_SHR1) ||
8584           (Imm == DppCtrl::WAVE_ROR1) ||
8585           (Imm == DppCtrl::ROW_MIRROR) ||
8586           (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8587           (Imm == DppCtrl::BCAST15) ||
8588           (Imm == DppCtrl::BCAST31) ||
8589           (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8590           (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8591  }
8592  return false;
8593}
8594
8595//===----------------------------------------------------------------------===//
8596// mAI
8597//===----------------------------------------------------------------------===//
8598
8599bool AMDGPUOperand::isBLGP() const {
8600  return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8601}
8602
8603bool AMDGPUOperand::isCBSZ() const {
8604  return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8605}
8606
8607bool AMDGPUOperand::isABID() const {
8608  return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8609}
8610
8611bool AMDGPUOperand::isS16Imm() const {
8612  return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8613}
8614
8615bool AMDGPUOperand::isU16Imm() const {
8616  return isImmLiteral() && isUInt<16>(getImm());
8617}
8618
8619//===----------------------------------------------------------------------===//
8620// dim
8621//===----------------------------------------------------------------------===//
8622
8623bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8624  // We want to allow "dim:1D" etc.,
8625  // but the initial 1 is tokenized as an integer.
8626  std::string Token;
8627  if (isToken(AsmToken::Integer)) {
8628    SMLoc Loc = getToken().getEndLoc();
8629    Token = std::string(getTokenStr());
8630    lex();
8631    if (getLoc() != Loc)
8632      return false;
8633  }
8634
8635  StringRef Suffix;
8636  if (!parseId(Suffix))
8637    return false;
8638  Token += Suffix;
8639
8640  StringRef DimId = Token;
8641  if (DimId.starts_with("SQ_RSRC_IMG_"))
8642    DimId = DimId.drop_front(12);
8643
8644  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8645  if (!DimInfo)
8646    return false;
8647
8648  Encoding = DimInfo->Encoding;
8649  return true;
8650}
8651
8652ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8653  if (!isGFX10Plus())
8654    return ParseStatus::NoMatch;
8655
8656  SMLoc S = getLoc();
8657
8658  if (!trySkipId("dim", AsmToken::Colon))
8659    return ParseStatus::NoMatch;
8660
8661  unsigned Encoding;
8662  SMLoc Loc = getLoc();
8663  if (!parseDimId(Encoding))
8664    return Error(Loc, "invalid dim value");
8665
8666  Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8667                                              AMDGPUOperand::ImmTyDim));
8668  return ParseStatus::Success;
8669}
8670
8671//===----------------------------------------------------------------------===//
8672// dpp
8673//===----------------------------------------------------------------------===//
8674
8675ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8676  SMLoc S = getLoc();
8677
8678  if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8679    return ParseStatus::NoMatch;
8680
8681  // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8682
8683  int64_t Sels[8];
8684
8685  if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8686    return ParseStatus::Failure;
8687
8688  for (size_t i = 0; i < 8; ++i) {
8689    if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8690      return ParseStatus::Failure;
8691
8692    SMLoc Loc = getLoc();
8693    if (getParser().parseAbsoluteExpression(Sels[i]))
8694      return ParseStatus::Failure;
8695    if (0 > Sels[i] || 7 < Sels[i])
8696      return Error(Loc, "expected a 3-bit value");
8697  }
8698
8699  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8700    return ParseStatus::Failure;
8701
8702  unsigned DPP8 = 0;
8703  for (size_t i = 0; i < 8; ++i)
8704    DPP8 |= (Sels[i] << (i * 3));
8705
8706  Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8707  return ParseStatus::Success;
8708}
8709
8710bool
8711AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8712                                    const OperandVector &Operands) {
8713  if (Ctrl == "row_newbcast")
8714    return isGFX90A();
8715
8716  if (Ctrl == "row_share" ||
8717      Ctrl == "row_xmask")
8718    return isGFX10Plus();
8719
8720  if (Ctrl == "wave_shl" ||
8721      Ctrl == "wave_shr" ||
8722      Ctrl == "wave_rol" ||
8723      Ctrl == "wave_ror" ||
8724      Ctrl == "row_bcast")
8725    return isVI() || isGFX9();
8726
8727  return Ctrl == "row_mirror" ||
8728         Ctrl == "row_half_mirror" ||
8729         Ctrl == "quad_perm" ||
8730         Ctrl == "row_shl" ||
8731         Ctrl == "row_shr" ||
8732         Ctrl == "row_ror";
8733}
8734
8735int64_t
8736AMDGPUAsmParser::parseDPPCtrlPerm() {
8737  // quad_perm:[%d,%d,%d,%d]
8738
8739  if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8740    return -1;
8741
8742  int64_t Val = 0;
8743  for (int i = 0; i < 4; ++i) {
8744    if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8745      return -1;
8746
8747    int64_t Temp;
8748    SMLoc Loc = getLoc();
8749    if (getParser().parseAbsoluteExpression(Temp))
8750      return -1;
8751    if (Temp < 0 || Temp > 3) {
8752      Error(Loc, "expected a 2-bit value");
8753      return -1;
8754    }
8755
8756    Val += (Temp << i * 2);
8757  }
8758
8759  if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8760    return -1;
8761
8762  return Val;
8763}
8764
8765int64_t
8766AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8767  using namespace AMDGPU::DPP;
8768
8769  // sel:%d
8770
8771  int64_t Val;
8772  SMLoc Loc = getLoc();
8773
8774  if (getParser().parseAbsoluteExpression(Val))
8775    return -1;
8776
8777  struct DppCtrlCheck {
8778    int64_t Ctrl;
8779    int Lo;
8780    int Hi;
8781  };
8782
8783  DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8784    .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8785    .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8786    .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8787    .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8788    .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8789    .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8790    .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8791    .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8792    .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8793    .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8794    .Default({-1, 0, 0});
8795
8796  bool Valid;
8797  if (Check.Ctrl == -1) {
8798    Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8799    Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8800  } else {
8801    Valid = Check.Lo <= Val && Val <= Check.Hi;
8802    Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8803  }
8804
8805  if (!Valid) {
8806    Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8807    return -1;
8808  }
8809
8810  return Val;
8811}
8812
8813ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8814  using namespace AMDGPU::DPP;
8815
8816  if (!isToken(AsmToken::Identifier) ||
8817      !isSupportedDPPCtrl(getTokenStr(), Operands))
8818    return ParseStatus::NoMatch;
8819
8820  SMLoc S = getLoc();
8821  int64_t Val = -1;
8822  StringRef Ctrl;
8823
8824  parseId(Ctrl);
8825
8826  if (Ctrl == "row_mirror") {
8827    Val = DppCtrl::ROW_MIRROR;
8828  } else if (Ctrl == "row_half_mirror") {
8829    Val = DppCtrl::ROW_HALF_MIRROR;
8830  } else {
8831    if (skipToken(AsmToken::Colon, "expected a colon")) {
8832      if (Ctrl == "quad_perm") {
8833        Val = parseDPPCtrlPerm();
8834      } else {
8835        Val = parseDPPCtrlSel(Ctrl);
8836      }
8837    }
8838  }
8839
8840  if (Val == -1)
8841    return ParseStatus::Failure;
8842
8843  Operands.push_back(
8844    AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8845  return ParseStatus::Success;
8846}
8847
8848void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8849                                 bool IsDPP8) {
8850  OptionalImmIndexMap OptionalIdx;
8851  unsigned Opc = Inst.getOpcode();
8852  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8853
8854  // MAC instructions are special because they have 'old'
8855  // operand which is not tied to dst (but assumed to be).
8856  // They also have dummy unused src2_modifiers.
8857  int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8858  int Src2ModIdx =
8859      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8860  bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8861               Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8862
8863  unsigned I = 1;
8864  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8865    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8866  }
8867
8868  int Fi = 0;
8869  for (unsigned E = Operands.size(); I != E; ++I) {
8870
8871    if (IsMAC) {
8872      int NumOperands = Inst.getNumOperands();
8873      if (OldIdx == NumOperands) {
8874        // Handle old operand
8875        constexpr int DST_IDX = 0;
8876        Inst.addOperand(Inst.getOperand(DST_IDX));
8877      } else if (Src2ModIdx == NumOperands) {
8878        // Add unused dummy src2_modifiers
8879        Inst.addOperand(MCOperand::createImm(0));
8880      }
8881    }
8882
8883    int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
8884    if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
8885      Inst.addOperand(Inst.getOperand(0));
8886    }
8887
8888    bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
8889                          Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
8890                          Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
8891                          Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
8892    if (IsVOP3CvtSrDpp) {
8893      if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
8894        Inst.addOperand(MCOperand::createImm(0));
8895        Inst.addOperand(MCOperand::createReg(0));
8896      }
8897    }
8898
8899    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8900                                            MCOI::TIED_TO);
8901    if (TiedTo != -1) {
8902      assert((unsigned)TiedTo < Inst.getNumOperands());
8903      // handle tied old or src2 for MAC instructions
8904      Inst.addOperand(Inst.getOperand(TiedTo));
8905    }
8906    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8907    // Add the register arguments
8908    if (IsDPP8 && Op.isDppFI()) {
8909      Fi = Op.getImm();
8910    } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8911      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8912    } else if (Op.isReg()) {
8913      Op.addRegOperands(Inst, 1);
8914    } else if (Op.isImm() &&
8915               Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8916      assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8917      Op.addImmOperands(Inst, 1);
8918    } else if (Op.isImm()) {
8919      OptionalIdx[Op.getImmTy()] = I;
8920    } else {
8921      llvm_unreachable("unhandled operand type");
8922    }
8923  }
8924  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8925    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8926
8927  if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8928    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8929
8930  if (Desc.TSFlags & SIInstrFlags::VOP3P)
8931    cvtVOP3P(Inst, Operands, OptionalIdx);
8932  else if (Desc.TSFlags & SIInstrFlags::VOP3)
8933    cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8934  else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8935    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8936  }
8937
8938  if (IsDPP8) {
8939    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8940    using namespace llvm::AMDGPU::DPP;
8941    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8942  } else {
8943    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8944    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8945    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8946    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8947
8948    if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8949      addOptionalImmOperand(Inst, Operands, OptionalIdx,
8950                            AMDGPUOperand::ImmTyDppFI);
8951  }
8952}
8953
8954void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8955  OptionalImmIndexMap OptionalIdx;
8956
8957  unsigned I = 1;
8958  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8959  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8960    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8961  }
8962
8963  int Fi = 0;
8964  for (unsigned E = Operands.size(); I != E; ++I) {
8965    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8966                                            MCOI::TIED_TO);
8967    if (TiedTo != -1) {
8968      assert((unsigned)TiedTo < Inst.getNumOperands());
8969      // handle tied old or src2 for MAC instructions
8970      Inst.addOperand(Inst.getOperand(TiedTo));
8971    }
8972    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8973    // Add the register arguments
8974    if (Op.isReg() && validateVccOperand(Op.getReg())) {
8975      // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8976      // Skip it.
8977      continue;
8978    }
8979
8980    if (IsDPP8) {
8981      if (Op.isDPP8()) {
8982        Op.addImmOperands(Inst, 1);
8983      } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8984        Op.addRegWithFPInputModsOperands(Inst, 2);
8985      } else if (Op.isDppFI()) {
8986        Fi = Op.getImm();
8987      } else if (Op.isReg()) {
8988        Op.addRegOperands(Inst, 1);
8989      } else {
8990        llvm_unreachable("Invalid operand type");
8991      }
8992    } else {
8993      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8994        Op.addRegWithFPInputModsOperands(Inst, 2);
8995      } else if (Op.isReg()) {
8996        Op.addRegOperands(Inst, 1);
8997      } else if (Op.isDPPCtrl()) {
8998        Op.addImmOperands(Inst, 1);
8999      } else if (Op.isImm()) {
9000        // Handle optional arguments
9001        OptionalIdx[Op.getImmTy()] = I;
9002      } else {
9003        llvm_unreachable("Invalid operand type");
9004      }
9005    }
9006  }
9007
9008  if (IsDPP8) {
9009    using namespace llvm::AMDGPU::DPP;
9010    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9011  } else {
9012    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9013    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9014    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9015    if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9016      addOptionalImmOperand(Inst, Operands, OptionalIdx,
9017                            AMDGPUOperand::ImmTyDppFI);
9018    }
9019  }
9020}
9021
9022//===----------------------------------------------------------------------===//
9023// sdwa
9024//===----------------------------------------------------------------------===//
9025
9026ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9027                                          StringRef Prefix,
9028                                          AMDGPUOperand::ImmTy Type) {
9029  using namespace llvm::AMDGPU::SDWA;
9030
9031  SMLoc S = getLoc();
9032  StringRef Value;
9033
9034  SMLoc StringLoc;
9035  ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9036  if (!Res.isSuccess())
9037    return Res;
9038
9039  int64_t Int;
9040  Int = StringSwitch<int64_t>(Value)
9041        .Case("BYTE_0", SdwaSel::BYTE_0)
9042        .Case("BYTE_1", SdwaSel::BYTE_1)
9043        .Case("BYTE_2", SdwaSel::BYTE_2)
9044        .Case("BYTE_3", SdwaSel::BYTE_3)
9045        .Case("WORD_0", SdwaSel::WORD_0)
9046        .Case("WORD_1", SdwaSel::WORD_1)
9047        .Case("DWORD", SdwaSel::DWORD)
9048        .Default(0xffffffff);
9049
9050  if (Int == 0xffffffff)
9051    return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9052
9053  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9054  return ParseStatus::Success;
9055}
9056
9057ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9058  using namespace llvm::AMDGPU::SDWA;
9059
9060  SMLoc S = getLoc();
9061  StringRef Value;
9062
9063  SMLoc StringLoc;
9064  ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9065  if (!Res.isSuccess())
9066    return Res;
9067
9068  int64_t Int;
9069  Int = StringSwitch<int64_t>(Value)
9070        .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9071        .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9072        .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9073        .Default(0xffffffff);
9074
9075  if (Int == 0xffffffff)
9076    return Error(StringLoc, "invalid dst_unused value");
9077
9078  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9079  return ParseStatus::Success;
9080}
9081
9082void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9083  cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9084}
9085
9086void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9087  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9088}
9089
9090void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9091  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9092}
9093
9094void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9095  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9096}
9097
9098void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9099  cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9100}
9101
9102void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9103                              uint64_t BasicInstType,
9104                              bool SkipDstVcc,
9105                              bool SkipSrcVcc) {
9106  using namespace llvm::AMDGPU::SDWA;
9107
9108  OptionalImmIndexMap OptionalIdx;
9109  bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9110  bool SkippedVcc = false;
9111
9112  unsigned I = 1;
9113  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9114  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9115    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9116  }
9117
9118  for (unsigned E = Operands.size(); I != E; ++I) {
9119    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9120    if (SkipVcc && !SkippedVcc && Op.isReg() &&
9121        (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9122      // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9123      // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9124      // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9125      // Skip VCC only if we didn't skip it on previous iteration.
9126      // Note that src0 and src1 occupy 2 slots each because of modifiers.
9127      if (BasicInstType == SIInstrFlags::VOP2 &&
9128          ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9129           (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9130        SkippedVcc = true;
9131        continue;
9132      } else if (BasicInstType == SIInstrFlags::VOPC &&
9133                 Inst.getNumOperands() == 0) {
9134        SkippedVcc = true;
9135        continue;
9136      }
9137    }
9138    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9139      Op.addRegOrImmWithInputModsOperands(Inst, 2);
9140    } else if (Op.isImm()) {
9141      // Handle optional arguments
9142      OptionalIdx[Op.getImmTy()] = I;
9143    } else {
9144      llvm_unreachable("Invalid operand type");
9145    }
9146    SkippedVcc = false;
9147  }
9148
9149  const unsigned Opc = Inst.getOpcode();
9150  if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9151      Opc != AMDGPU::V_NOP_sdwa_vi) {
9152    // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9153    switch (BasicInstType) {
9154    case SIInstrFlags::VOP1:
9155      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9156        addOptionalImmOperand(Inst, Operands, OptionalIdx,
9157                              AMDGPUOperand::ImmTyClampSI, 0);
9158
9159      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9160        addOptionalImmOperand(Inst, Operands, OptionalIdx,
9161                              AMDGPUOperand::ImmTyOModSI, 0);
9162
9163      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9164        addOptionalImmOperand(Inst, Operands, OptionalIdx,
9165                              AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9166
9167      if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9168        addOptionalImmOperand(Inst, Operands, OptionalIdx,
9169                              AMDGPUOperand::ImmTySDWADstUnused,
9170                              DstUnused::UNUSED_PRESERVE);
9171
9172      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9173      break;
9174
9175    case SIInstrFlags::VOP2:
9176      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9177
9178      if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9179        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9180
9181      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9182      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9183      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9184      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9185      break;
9186
9187    case SIInstrFlags::VOPC:
9188      if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9189        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9190      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9191      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9192      break;
9193
9194    default:
9195      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9196    }
9197  }
9198
9199  // special case v_mac_{f16, f32}:
9200  // it has src2 register operand that is tied to dst operand
9201  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9202      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9203    auto it = Inst.begin();
9204    std::advance(
9205      it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9206    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9207  }
9208}
9209
9210/// Force static initialization.
9211extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9212  RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9213  RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9214}
9215
9216#define GET_REGISTER_MATCHER
9217#define GET_MATCHER_IMPLEMENTATION
9218#define GET_MNEMONIC_SPELL_CHECKER
9219#define GET_MNEMONIC_CHECKER
9220#include "AMDGPUGenAsmMatcher.inc"
9221
9222ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9223                                                unsigned MCK) {
9224  switch (MCK) {
9225  case MCK_addr64:
9226    return parseTokenOp("addr64", Operands);
9227  case MCK_done:
9228    return parseTokenOp("done", Operands);
9229  case MCK_idxen:
9230    return parseTokenOp("idxen", Operands);
9231  case MCK_lds:
9232    return parseTokenOp("lds", Operands);
9233  case MCK_offen:
9234    return parseTokenOp("offen", Operands);
9235  case MCK_off:
9236    return parseTokenOp("off", Operands);
9237  case MCK_row_95_en:
9238    return parseTokenOp("row_en", Operands);
9239  case MCK_gds:
9240    return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9241  case MCK_tfe:
9242    return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9243  }
9244  return tryCustomParseOperand(Operands, MCK);
9245}
9246
9247// This function should be defined after auto-generated include so that we have
9248// MatchClassKind enum defined
9249unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9250                                                     unsigned Kind) {
9251  // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9252  // But MatchInstructionImpl() expects to meet token and fails to validate
9253  // operand. This method checks if we are given immediate operand but expect to
9254  // get corresponding token.
9255  AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9256  switch (Kind) {
9257  case MCK_addr64:
9258    return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9259  case MCK_gds:
9260    return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9261  case MCK_lds:
9262    return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9263  case MCK_idxen:
9264    return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9265  case MCK_offen:
9266    return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9267  case MCK_tfe:
9268    return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9269  case MCK_SSrcB32:
9270    // When operands have expression values, they will return true for isToken,
9271    // because it is not possible to distinguish between a token and an
9272    // expression at parse time. MatchInstructionImpl() will always try to
9273    // match an operand as a token, when isToken returns true, and when the
9274    // name of the expression is not a valid token, the match will fail,
9275    // so we need to handle it here.
9276    return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9277  case MCK_SSrcF32:
9278    return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9279  case MCK_SOPPBrTarget:
9280    return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9281  case MCK_VReg32OrOff:
9282    return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9283  case MCK_InterpSlot:
9284    return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9285  case MCK_InterpAttr:
9286    return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9287  case MCK_InterpAttrChan:
9288    return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9289  case MCK_SReg_64:
9290  case MCK_SReg_64_XEXEC:
9291    // Null is defined as a 32-bit register but
9292    // it should also be enabled with 64-bit operands.
9293    // The following code enables it for SReg_64 operands
9294    // used as source and destination. Remaining source
9295    // operands are handled in isInlinableImm.
9296    return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9297  default:
9298    return Match_InvalidOperand;
9299  }
9300}
9301
9302//===----------------------------------------------------------------------===//
9303// endpgm
9304//===----------------------------------------------------------------------===//
9305
9306ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9307  SMLoc S = getLoc();
9308  int64_t Imm = 0;
9309
9310  if (!parseExpr(Imm)) {
9311    // The operand is optional, if not present default to 0
9312    Imm = 0;
9313  }
9314
9315  if (!isUInt<16>(Imm))
9316    return Error(S, "expected a 16-bit value");
9317
9318  Operands.push_back(
9319      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9320  return ParseStatus::Success;
9321}
9322
9323bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9324
9325//===----------------------------------------------------------------------===//
9326// LDSDIR
9327//===----------------------------------------------------------------------===//
9328
9329bool AMDGPUOperand::isWaitVDST() const {
9330  return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9331}
9332
9333bool AMDGPUOperand::isWaitVAVDst() const {
9334  return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9335}
9336
9337bool AMDGPUOperand::isWaitVMVSrc() const {
9338  return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9339}
9340
9341//===----------------------------------------------------------------------===//
9342// VINTERP
9343//===----------------------------------------------------------------------===//
9344
9345bool AMDGPUOperand::isWaitEXP() const {
9346  return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9347}
9348
9349//===----------------------------------------------------------------------===//
9350// Split Barrier
9351//===----------------------------------------------------------------------===//
9352
9353bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9354