1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPU.h"
10#include "AMDKernelCodeT.h"
11#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12#include "MCTargetDesc/AMDGPUTargetStreamer.h"
13#include "SIDefines.h"
14#include "SIInstrInfo.h"
15#include "TargetInfo/AMDGPUTargetInfo.h"
16#include "Utils/AMDGPUAsmUtils.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/ADT/APFloat.h"
20#include "llvm/ADT/APInt.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallBitVector.h"
24#include "llvm/ADT/SmallString.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/ADT/StringSwitch.h"
27#include "llvm/ADT/Twine.h"
28#include "llvm/BinaryFormat/ELF.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCExpr.h"
32#include "llvm/MC/MCInst.h"
33#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/MC/MCInstrInfo.h"
35#include "llvm/MC/MCParser/MCAsmLexer.h"
36#include "llvm/MC/MCParser/MCAsmParser.h"
37#include "llvm/MC/MCParser/MCAsmParserExtension.h"
38#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39#include "llvm/MC/MCParser/MCTargetAsmParser.h"
40#include "llvm/MC/MCRegisterInfo.h"
41#include "llvm/MC/MCStreamer.h"
42#include "llvm/MC/MCSubtargetInfo.h"
43#include "llvm/MC/MCSymbol.h"
44#include "llvm/Support/AMDGPUMetadata.h"
45#include "llvm/Support/AMDHSAKernelDescriptor.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/Compiler.h"
48#include "llvm/Support/Error.h"
49#include "llvm/Support/MachineValueType.h"
50#include "llvm/Support/MathExtras.h"
51#include "llvm/Support/SMLoc.h"
52#include "llvm/Support/TargetParser.h"
53#include "llvm/Support/TargetRegistry.h"
54#include "llvm/Support/raw_ostream.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <cstring>
59#include <iterator>
60#include <map>
61#include <memory>
62#include <string>
63
64using namespace llvm;
65using namespace llvm::AMDGPU;
66using namespace llvm::amdhsa;
67
68namespace {
69
70class AMDGPUAsmParser;
71
72enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73
74//===----------------------------------------------------------------------===//
75// Operand
76//===----------------------------------------------------------------------===//
77
78class AMDGPUOperand : public MCParsedAsmOperand {
79  enum KindTy {
80    Token,
81    Immediate,
82    Register,
83    Expression
84  } Kind;
85
86  SMLoc StartLoc, EndLoc;
87  const AMDGPUAsmParser *AsmParser;
88
89public:
90  AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91    : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92
93  using Ptr = std::unique_ptr<AMDGPUOperand>;
94
95  struct Modifiers {
96    bool Abs = false;
97    bool Neg = false;
98    bool Sext = false;
99
100    bool hasFPModifiers() const { return Abs || Neg; }
101    bool hasIntModifiers() const { return Sext; }
102    bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103
104    int64_t getFPModifiersOperand() const {
105      int64_t Operand = 0;
106      Operand |= Abs ? SISrcMods::ABS : 0u;
107      Operand |= Neg ? SISrcMods::NEG : 0u;
108      return Operand;
109    }
110
111    int64_t getIntModifiersOperand() const {
112      int64_t Operand = 0;
113      Operand |= Sext ? SISrcMods::SEXT : 0u;
114      return Operand;
115    }
116
117    int64_t getModifiersOperand() const {
118      assert(!(hasFPModifiers() && hasIntModifiers())
119           && "fp and int modifiers should not be used simultaneously");
120      if (hasFPModifiers()) {
121        return getFPModifiersOperand();
122      } else if (hasIntModifiers()) {
123        return getIntModifiersOperand();
124      } else {
125        return 0;
126      }
127    }
128
129    friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130  };
131
132  enum ImmTy {
133    ImmTyNone,
134    ImmTyGDS,
135    ImmTyLDS,
136    ImmTyOffen,
137    ImmTyIdxen,
138    ImmTyAddr64,
139    ImmTyOffset,
140    ImmTyInstOffset,
141    ImmTyOffset0,
142    ImmTyOffset1,
143    ImmTyDLC,
144    ImmTyGLC,
145    ImmTySLC,
146    ImmTySWZ,
147    ImmTyTFE,
148    ImmTyD16,
149    ImmTyClampSI,
150    ImmTyOModSI,
151    ImmTyDPP8,
152    ImmTyDppCtrl,
153    ImmTyDppRowMask,
154    ImmTyDppBankMask,
155    ImmTyDppBoundCtrl,
156    ImmTyDppFi,
157    ImmTySdwaDstSel,
158    ImmTySdwaSrc0Sel,
159    ImmTySdwaSrc1Sel,
160    ImmTySdwaDstUnused,
161    ImmTyDMask,
162    ImmTyDim,
163    ImmTyUNorm,
164    ImmTyDA,
165    ImmTyR128A16,
166    ImmTyLWE,
167    ImmTyExpTgt,
168    ImmTyExpCompr,
169    ImmTyExpVM,
170    ImmTyFORMAT,
171    ImmTyHwreg,
172    ImmTyOff,
173    ImmTySendMsg,
174    ImmTyInterpSlot,
175    ImmTyInterpAttr,
176    ImmTyAttrChan,
177    ImmTyOpSel,
178    ImmTyOpSelHi,
179    ImmTyNegLo,
180    ImmTyNegHi,
181    ImmTySwizzle,
182    ImmTyGprIdxMode,
183    ImmTyHigh,
184    ImmTyBLGP,
185    ImmTyCBSZ,
186    ImmTyABID,
187    ImmTyEndpgm,
188  };
189
190private:
191  struct TokOp {
192    const char *Data;
193    unsigned Length;
194  };
195
196  struct ImmOp {
197    int64_t Val;
198    ImmTy Type;
199    bool IsFPImm;
200    Modifiers Mods;
201  };
202
203  struct RegOp {
204    unsigned RegNo;
205    Modifiers Mods;
206  };
207
208  union {
209    TokOp Tok;
210    ImmOp Imm;
211    RegOp Reg;
212    const MCExpr *Expr;
213  };
214
215public:
216  bool isToken() const override {
217    if (Kind == Token)
218      return true;
219
220    // When parsing operands, we can't always tell if something was meant to be
221    // a token, like 'gds', or an expression that references a global variable.
222    // In this case, we assume the string is an expression, and if we need to
223    // interpret is a token, then we treat the symbol name as the token.
224    return isSymbolRefExpr();
225  }
226
227  bool isSymbolRefExpr() const {
228    return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229  }
230
231  bool isImm() const override {
232    return Kind == Immediate;
233  }
234
235  bool isInlinableImm(MVT type) const;
236  bool isLiteralImm(MVT type) const;
237
238  bool isRegKind() const {
239    return Kind == Register;
240  }
241
242  bool isReg() const override {
243    return isRegKind() && !hasModifiers();
244  }
245
246  bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247    return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
248  }
249
250  bool isRegOrImmWithInt16InputMods() const {
251    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252  }
253
254  bool isRegOrImmWithInt32InputMods() const {
255    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
256  }
257
258  bool isRegOrImmWithInt64InputMods() const {
259    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
260  }
261
262  bool isRegOrImmWithFP16InputMods() const {
263    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
264  }
265
266  bool isRegOrImmWithFP32InputMods() const {
267    return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
268  }
269
270  bool isRegOrImmWithFP64InputMods() const {
271    return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
272  }
273
274  bool isVReg() const {
275    return isRegClass(AMDGPU::VGPR_32RegClassID) ||
276           isRegClass(AMDGPU::VReg_64RegClassID) ||
277           isRegClass(AMDGPU::VReg_96RegClassID) ||
278           isRegClass(AMDGPU::VReg_128RegClassID) ||
279           isRegClass(AMDGPU::VReg_160RegClassID) ||
280           isRegClass(AMDGPU::VReg_256RegClassID) ||
281           isRegClass(AMDGPU::VReg_512RegClassID) ||
282           isRegClass(AMDGPU::VReg_1024RegClassID);
283  }
284
285  bool isVReg32() const {
286    return isRegClass(AMDGPU::VGPR_32RegClassID);
287  }
288
289  bool isVReg32OrOff() const {
290    return isOff() || isVReg32();
291  }
292
293  bool isNull() const {
294    return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
295  }
296
297  bool isSDWAOperand(MVT type) const;
298  bool isSDWAFP16Operand() const;
299  bool isSDWAFP32Operand() const;
300  bool isSDWAInt16Operand() const;
301  bool isSDWAInt32Operand() const;
302
303  bool isImmTy(ImmTy ImmT) const {
304    return isImm() && Imm.Type == ImmT;
305  }
306
307  bool isImmModifier() const {
308    return isImm() && Imm.Type != ImmTyNone;
309  }
310
311  bool isClampSI() const { return isImmTy(ImmTyClampSI); }
312  bool isOModSI() const { return isImmTy(ImmTyOModSI); }
313  bool isDMask() const { return isImmTy(ImmTyDMask); }
314  bool isDim() const { return isImmTy(ImmTyDim); }
315  bool isUNorm() const { return isImmTy(ImmTyUNorm); }
316  bool isDA() const { return isImmTy(ImmTyDA); }
317  bool isR128A16() const { return isImmTy(ImmTyR128A16); }
318  bool isLWE() const { return isImmTy(ImmTyLWE); }
319  bool isOff() const { return isImmTy(ImmTyOff); }
320  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
321  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
322  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
323  bool isOffen() const { return isImmTy(ImmTyOffen); }
324  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
325  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
326  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
327  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
328  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
329
330  bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
331  bool isGDS() const { return isImmTy(ImmTyGDS); }
332  bool isLDS() const { return isImmTy(ImmTyLDS); }
333  bool isDLC() const { return isImmTy(ImmTyDLC); }
334  bool isGLC() const { return isImmTy(ImmTyGLC); }
335  bool isSLC() const { return isImmTy(ImmTySLC); }
336  bool isSWZ() const { return isImmTy(ImmTySWZ); }
337  bool isTFE() const { return isImmTy(ImmTyTFE); }
338  bool isD16() const { return isImmTy(ImmTyD16); }
339  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
340  bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
341  bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
342  bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
343  bool isFI() const { return isImmTy(ImmTyDppFi); }
344  bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
345  bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
346  bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
347  bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
348  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
349  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
350  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
351  bool isOpSel() const { return isImmTy(ImmTyOpSel); }
352  bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
353  bool isNegLo() const { return isImmTy(ImmTyNegLo); }
354  bool isNegHi() const { return isImmTy(ImmTyNegHi); }
355  bool isHigh() const { return isImmTy(ImmTyHigh); }
356
357  bool isMod() const {
358    return isClampSI() || isOModSI();
359  }
360
361  bool isRegOrImm() const {
362    return isReg() || isImm();
363  }
364
365  bool isRegClass(unsigned RCID) const;
366
367  bool isInlineValue() const;
368
369  bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
370    return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
371  }
372
373  bool isSCSrcB16() const {
374    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
375  }
376
377  bool isSCSrcV2B16() const {
378    return isSCSrcB16();
379  }
380
381  bool isSCSrcB32() const {
382    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
383  }
384
385  bool isSCSrcB64() const {
386    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
387  }
388
389  bool isBoolReg() const;
390
391  bool isSCSrcF16() const {
392    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
393  }
394
395  bool isSCSrcV2F16() const {
396    return isSCSrcF16();
397  }
398
399  bool isSCSrcF32() const {
400    return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
401  }
402
403  bool isSCSrcF64() const {
404    return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
405  }
406
407  bool isSSrcB32() const {
408    return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
409  }
410
411  bool isSSrcB16() const {
412    return isSCSrcB16() || isLiteralImm(MVT::i16);
413  }
414
415  bool isSSrcV2B16() const {
416    llvm_unreachable("cannot happen");
417    return isSSrcB16();
418  }
419
420  bool isSSrcB64() const {
421    // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
422    // See isVSrc64().
423    return isSCSrcB64() || isLiteralImm(MVT::i64);
424  }
425
426  bool isSSrcF32() const {
427    return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
428  }
429
430  bool isSSrcF64() const {
431    return isSCSrcB64() || isLiteralImm(MVT::f64);
432  }
433
434  bool isSSrcF16() const {
435    return isSCSrcB16() || isLiteralImm(MVT::f16);
436  }
437
438  bool isSSrcV2F16() const {
439    llvm_unreachable("cannot happen");
440    return isSSrcF16();
441  }
442
443  bool isSSrcOrLdsB32() const {
444    return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
445           isLiteralImm(MVT::i32) || isExpr();
446  }
447
448  bool isVCSrcB32() const {
449    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
450  }
451
452  bool isVCSrcB64() const {
453    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
454  }
455
456  bool isVCSrcB16() const {
457    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
458  }
459
460  bool isVCSrcV2B16() const {
461    return isVCSrcB16();
462  }
463
464  bool isVCSrcF32() const {
465    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
466  }
467
468  bool isVCSrcF64() const {
469    return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
470  }
471
472  bool isVCSrcF16() const {
473    return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
474  }
475
476  bool isVCSrcV2F16() const {
477    return isVCSrcF16();
478  }
479
480  bool isVSrcB32() const {
481    return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
482  }
483
484  bool isVSrcB64() const {
485    return isVCSrcF64() || isLiteralImm(MVT::i64);
486  }
487
488  bool isVSrcB16() const {
489    return isVCSrcF16() || isLiteralImm(MVT::i16);
490  }
491
492  bool isVSrcV2B16() const {
493    return isVSrcB16() || isLiteralImm(MVT::v2i16);
494  }
495
496  bool isVSrcF32() const {
497    return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
498  }
499
500  bool isVSrcF64() const {
501    return isVCSrcF64() || isLiteralImm(MVT::f64);
502  }
503
504  bool isVSrcF16() const {
505    return isVCSrcF16() || isLiteralImm(MVT::f16);
506  }
507
508  bool isVSrcV2F16() const {
509    return isVSrcF16() || isLiteralImm(MVT::v2f16);
510  }
511
512  bool isVISrcB32() const {
513    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
514  }
515
516  bool isVISrcB16() const {
517    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
518  }
519
520  bool isVISrcV2B16() const {
521    return isVISrcB16();
522  }
523
524  bool isVISrcF32() const {
525    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
526  }
527
528  bool isVISrcF16() const {
529    return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
530  }
531
532  bool isVISrcV2F16() const {
533    return isVISrcF16() || isVISrcB32();
534  }
535
536  bool isAISrcB32() const {
537    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
538  }
539
540  bool isAISrcB16() const {
541    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
542  }
543
544  bool isAISrcV2B16() const {
545    return isAISrcB16();
546  }
547
548  bool isAISrcF32() const {
549    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
550  }
551
552  bool isAISrcF16() const {
553    return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
554  }
555
556  bool isAISrcV2F16() const {
557    return isAISrcF16() || isAISrcB32();
558  }
559
560  bool isAISrc_128B32() const {
561    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
562  }
563
564  bool isAISrc_128B16() const {
565    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
566  }
567
568  bool isAISrc_128V2B16() const {
569    return isAISrc_128B16();
570  }
571
572  bool isAISrc_128F32() const {
573    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
574  }
575
576  bool isAISrc_128F16() const {
577    return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
578  }
579
580  bool isAISrc_128V2F16() const {
581    return isAISrc_128F16() || isAISrc_128B32();
582  }
583
584  bool isAISrc_512B32() const {
585    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
586  }
587
588  bool isAISrc_512B16() const {
589    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
590  }
591
592  bool isAISrc_512V2B16() const {
593    return isAISrc_512B16();
594  }
595
596  bool isAISrc_512F32() const {
597    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
598  }
599
600  bool isAISrc_512F16() const {
601    return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
602  }
603
604  bool isAISrc_512V2F16() const {
605    return isAISrc_512F16() || isAISrc_512B32();
606  }
607
608  bool isAISrc_1024B32() const {
609    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
610  }
611
612  bool isAISrc_1024B16() const {
613    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
614  }
615
616  bool isAISrc_1024V2B16() const {
617    return isAISrc_1024B16();
618  }
619
620  bool isAISrc_1024F32() const {
621    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
622  }
623
624  bool isAISrc_1024F16() const {
625    return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
626  }
627
628  bool isAISrc_1024V2F16() const {
629    return isAISrc_1024F16() || isAISrc_1024B32();
630  }
631
632  bool isKImmFP32() const {
633    return isLiteralImm(MVT::f32);
634  }
635
636  bool isKImmFP16() const {
637    return isLiteralImm(MVT::f16);
638  }
639
640  bool isMem() const override {
641    return false;
642  }
643
644  bool isExpr() const {
645    return Kind == Expression;
646  }
647
648  bool isSoppBrTarget() const {
649    return isExpr() || isImm();
650  }
651
652  bool isSWaitCnt() const;
653  bool isHwreg() const;
654  bool isSendMsg() const;
655  bool isSwizzle() const;
656  bool isSMRDOffset8() const;
657  bool isSMRDOffset20() const;
658  bool isSMRDLiteralOffset() const;
659  bool isDPP8() const;
660  bool isDPPCtrl() const;
661  bool isBLGP() const;
662  bool isCBSZ() const;
663  bool isABID() const;
664  bool isGPRIdxMode() const;
665  bool isS16Imm() const;
666  bool isU16Imm() const;
667  bool isEndpgm() const;
668
669  StringRef getExpressionAsToken() const {
670    assert(isExpr());
671    const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
672    return S->getSymbol().getName();
673  }
674
675  StringRef getToken() const {
676    assert(isToken());
677
678    if (Kind == Expression)
679      return getExpressionAsToken();
680
681    return StringRef(Tok.Data, Tok.Length);
682  }
683
684  int64_t getImm() const {
685    assert(isImm());
686    return Imm.Val;
687  }
688
689  ImmTy getImmTy() const {
690    assert(isImm());
691    return Imm.Type;
692  }
693
694  unsigned getReg() const override {
695    assert(isRegKind());
696    return Reg.RegNo;
697  }
698
699  SMLoc getStartLoc() const override {
700    return StartLoc;
701  }
702
703  SMLoc getEndLoc() const override {
704    return EndLoc;
705  }
706
707  SMRange getLocRange() const {
708    return SMRange(StartLoc, EndLoc);
709  }
710
711  Modifiers getModifiers() const {
712    assert(isRegKind() || isImmTy(ImmTyNone));
713    return isRegKind() ? Reg.Mods : Imm.Mods;
714  }
715
716  void setModifiers(Modifiers Mods) {
717    assert(isRegKind() || isImmTy(ImmTyNone));
718    if (isRegKind())
719      Reg.Mods = Mods;
720    else
721      Imm.Mods = Mods;
722  }
723
724  bool hasModifiers() const {
725    return getModifiers().hasModifiers();
726  }
727
728  bool hasFPModifiers() const {
729    return getModifiers().hasFPModifiers();
730  }
731
732  bool hasIntModifiers() const {
733    return getModifiers().hasIntModifiers();
734  }
735
736  uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
737
738  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
739
740  void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
741
742  template <unsigned Bitwidth>
743  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
744
745  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
746    addKImmFPOperands<16>(Inst, N);
747  }
748
749  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
750    addKImmFPOperands<32>(Inst, N);
751  }
752
753  void addRegOperands(MCInst &Inst, unsigned N) const;
754
755  void addBoolRegOperands(MCInst &Inst, unsigned N) const {
756    addRegOperands(Inst, N);
757  }
758
759  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
760    if (isRegKind())
761      addRegOperands(Inst, N);
762    else if (isExpr())
763      Inst.addOperand(MCOperand::createExpr(Expr));
764    else
765      addImmOperands(Inst, N);
766  }
767
768  void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
769    Modifiers Mods = getModifiers();
770    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
771    if (isRegKind()) {
772      addRegOperands(Inst, N);
773    } else {
774      addImmOperands(Inst, N, false);
775    }
776  }
777
778  void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
779    assert(!hasIntModifiers());
780    addRegOrImmWithInputModsOperands(Inst, N);
781  }
782
783  void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
784    assert(!hasFPModifiers());
785    addRegOrImmWithInputModsOperands(Inst, N);
786  }
787
788  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
789    Modifiers Mods = getModifiers();
790    Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
791    assert(isRegKind());
792    addRegOperands(Inst, N);
793  }
794
795  void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
796    assert(!hasIntModifiers());
797    addRegWithInputModsOperands(Inst, N);
798  }
799
800  void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
801    assert(!hasFPModifiers());
802    addRegWithInputModsOperands(Inst, N);
803  }
804
805  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
806    if (isImm())
807      addImmOperands(Inst, N);
808    else {
809      assert(isExpr());
810      Inst.addOperand(MCOperand::createExpr(Expr));
811    }
812  }
813
814  static void printImmTy(raw_ostream& OS, ImmTy Type) {
815    switch (Type) {
816    case ImmTyNone: OS << "None"; break;
817    case ImmTyGDS: OS << "GDS"; break;
818    case ImmTyLDS: OS << "LDS"; break;
819    case ImmTyOffen: OS << "Offen"; break;
820    case ImmTyIdxen: OS << "Idxen"; break;
821    case ImmTyAddr64: OS << "Addr64"; break;
822    case ImmTyOffset: OS << "Offset"; break;
823    case ImmTyInstOffset: OS << "InstOffset"; break;
824    case ImmTyOffset0: OS << "Offset0"; break;
825    case ImmTyOffset1: OS << "Offset1"; break;
826    case ImmTyDLC: OS << "DLC"; break;
827    case ImmTyGLC: OS << "GLC"; break;
828    case ImmTySLC: OS << "SLC"; break;
829    case ImmTySWZ: OS << "SWZ"; break;
830    case ImmTyTFE: OS << "TFE"; break;
831    case ImmTyD16: OS << "D16"; break;
832    case ImmTyFORMAT: OS << "FORMAT"; break;
833    case ImmTyClampSI: OS << "ClampSI"; break;
834    case ImmTyOModSI: OS << "OModSI"; break;
835    case ImmTyDPP8: OS << "DPP8"; break;
836    case ImmTyDppCtrl: OS << "DppCtrl"; break;
837    case ImmTyDppRowMask: OS << "DppRowMask"; break;
838    case ImmTyDppBankMask: OS << "DppBankMask"; break;
839    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
840    case ImmTyDppFi: OS << "FI"; break;
841    case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
842    case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
843    case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
844    case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
845    case ImmTyDMask: OS << "DMask"; break;
846    case ImmTyDim: OS << "Dim"; break;
847    case ImmTyUNorm: OS << "UNorm"; break;
848    case ImmTyDA: OS << "DA"; break;
849    case ImmTyR128A16: OS << "R128A16"; break;
850    case ImmTyLWE: OS << "LWE"; break;
851    case ImmTyOff: OS << "Off"; break;
852    case ImmTyExpTgt: OS << "ExpTgt"; break;
853    case ImmTyExpCompr: OS << "ExpCompr"; break;
854    case ImmTyExpVM: OS << "ExpVM"; break;
855    case ImmTyHwreg: OS << "Hwreg"; break;
856    case ImmTySendMsg: OS << "SendMsg"; break;
857    case ImmTyInterpSlot: OS << "InterpSlot"; break;
858    case ImmTyInterpAttr: OS << "InterpAttr"; break;
859    case ImmTyAttrChan: OS << "AttrChan"; break;
860    case ImmTyOpSel: OS << "OpSel"; break;
861    case ImmTyOpSelHi: OS << "OpSelHi"; break;
862    case ImmTyNegLo: OS << "NegLo"; break;
863    case ImmTyNegHi: OS << "NegHi"; break;
864    case ImmTySwizzle: OS << "Swizzle"; break;
865    case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
866    case ImmTyHigh: OS << "High"; break;
867    case ImmTyBLGP: OS << "BLGP"; break;
868    case ImmTyCBSZ: OS << "CBSZ"; break;
869    case ImmTyABID: OS << "ABID"; break;
870    case ImmTyEndpgm: OS << "Endpgm"; break;
871    }
872  }
873
874  void print(raw_ostream &OS) const override {
875    switch (Kind) {
876    case Register:
877      OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
878      break;
879    case Immediate:
880      OS << '<' << getImm();
881      if (getImmTy() != ImmTyNone) {
882        OS << " type: "; printImmTy(OS, getImmTy());
883      }
884      OS << " mods: " << Imm.Mods << '>';
885      break;
886    case Token:
887      OS << '\'' << getToken() << '\'';
888      break;
889    case Expression:
890      OS << "<expr " << *Expr << '>';
891      break;
892    }
893  }
894
895  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
896                                      int64_t Val, SMLoc Loc,
897                                      ImmTy Type = ImmTyNone,
898                                      bool IsFPImm = false) {
899    auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
900    Op->Imm.Val = Val;
901    Op->Imm.IsFPImm = IsFPImm;
902    Op->Imm.Type = Type;
903    Op->Imm.Mods = Modifiers();
904    Op->StartLoc = Loc;
905    Op->EndLoc = Loc;
906    return Op;
907  }
908
909  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
910                                        StringRef Str, SMLoc Loc,
911                                        bool HasExplicitEncodingSize = true) {
912    auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
913    Res->Tok.Data = Str.data();
914    Res->Tok.Length = Str.size();
915    Res->StartLoc = Loc;
916    Res->EndLoc = Loc;
917    return Res;
918  }
919
920  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
921                                      unsigned RegNo, SMLoc S,
922                                      SMLoc E) {
923    auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
924    Op->Reg.RegNo = RegNo;
925    Op->Reg.Mods = Modifiers();
926    Op->StartLoc = S;
927    Op->EndLoc = E;
928    return Op;
929  }
930
931  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
932                                       const class MCExpr *Expr, SMLoc S) {
933    auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
934    Op->Expr = Expr;
935    Op->StartLoc = S;
936    Op->EndLoc = S;
937    return Op;
938  }
939};
940
941raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
942  OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
943  return OS;
944}
945
946//===----------------------------------------------------------------------===//
947// AsmParser
948//===----------------------------------------------------------------------===//
949
950// Holds info related to the current kernel, e.g. count of SGPRs used.
951// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
952// .amdgpu_hsa_kernel or at EOF.
953class KernelScopeInfo {
954  int SgprIndexUnusedMin = -1;
955  int VgprIndexUnusedMin = -1;
956  MCContext *Ctx = nullptr;
957
958  void usesSgprAt(int i) {
959    if (i >= SgprIndexUnusedMin) {
960      SgprIndexUnusedMin = ++i;
961      if (Ctx) {
962        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
963        Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
964      }
965    }
966  }
967
968  void usesVgprAt(int i) {
969    if (i >= VgprIndexUnusedMin) {
970      VgprIndexUnusedMin = ++i;
971      if (Ctx) {
972        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
973        Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
974      }
975    }
976  }
977
978public:
979  KernelScopeInfo() = default;
980
981  void initialize(MCContext &Context) {
982    Ctx = &Context;
983    usesSgprAt(SgprIndexUnusedMin = -1);
984    usesVgprAt(VgprIndexUnusedMin = -1);
985  }
986
987  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
988    switch (RegKind) {
989      case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
990      case IS_AGPR: // fall through
991      case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
992      default: break;
993    }
994  }
995};
996
997class AMDGPUAsmParser : public MCTargetAsmParser {
998  MCAsmParser &Parser;
999
1000  // Number of extra operands parsed after the first optional operand.
1001  // This may be necessary to skip hardcoded mandatory operands.
1002  static const unsigned MAX_OPR_LOOKAHEAD = 8;
1003
1004  unsigned ForcedEncodingSize = 0;
1005  bool ForcedDPP = false;
1006  bool ForcedSDWA = false;
1007  KernelScopeInfo KernelScope;
1008
1009  /// @name Auto-generated Match Functions
1010  /// {
1011
1012#define GET_ASSEMBLER_HEADER
1013#include "AMDGPUGenAsmMatcher.inc"
1014
1015  /// }
1016
1017private:
1018  bool ParseAsAbsoluteExpression(uint32_t &Ret);
1019  bool OutOfRangeError(SMRange Range);
1020  /// Calculate VGPR/SGPR blocks required for given target, reserved
1021  /// registers, and user-specified NextFreeXGPR values.
1022  ///
1023  /// \param Features [in] Target features, used for bug corrections.
1024  /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1025  /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1026  /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1027  /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1028  /// descriptor field, if valid.
1029  /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1030  /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1031  /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1032  /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1033  /// \param VGPRBlocks [out] Result VGPR block count.
1034  /// \param SGPRBlocks [out] Result SGPR block count.
1035  bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1036                          bool FlatScrUsed, bool XNACKUsed,
1037                          Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1038                          SMRange VGPRRange, unsigned NextFreeSGPR,
1039                          SMRange SGPRRange, unsigned &VGPRBlocks,
1040                          unsigned &SGPRBlocks);
1041  bool ParseDirectiveAMDGCNTarget();
1042  bool ParseDirectiveAMDHSAKernel();
1043  bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1044  bool ParseDirectiveHSACodeObjectVersion();
1045  bool ParseDirectiveHSACodeObjectISA();
1046  bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1047  bool ParseDirectiveAMDKernelCodeT();
1048  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1049  bool ParseDirectiveAMDGPUHsaKernel();
1050
1051  bool ParseDirectiveISAVersion();
1052  bool ParseDirectiveHSAMetadata();
1053  bool ParseDirectivePALMetadataBegin();
1054  bool ParseDirectivePALMetadata();
1055  bool ParseDirectiveAMDGPULDS();
1056
1057  /// Common code to parse out a block of text (typically YAML) between start and
1058  /// end directives.
1059  bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1060                           const char *AssemblerDirectiveEnd,
1061                           std::string &CollectString);
1062
1063  bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1064                             RegisterKind RegKind, unsigned Reg1);
1065  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1066                           unsigned& RegNum, unsigned& RegWidth);
1067  unsigned ParseRegularReg(RegisterKind &RegKind,
1068                           unsigned &RegNum,
1069                           unsigned &RegWidth);
1070  unsigned ParseSpecialReg(RegisterKind &RegKind,
1071                           unsigned &RegNum,
1072                           unsigned &RegWidth);
1073  unsigned ParseRegList(RegisterKind &RegKind,
1074                        unsigned &RegNum,
1075                        unsigned &RegWidth);
1076  bool ParseRegRange(unsigned& Num, unsigned& Width);
1077  unsigned getRegularReg(RegisterKind RegKind,
1078                         unsigned RegNum,
1079                         unsigned RegWidth);
1080
1081  bool isRegister();
1082  bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1083  Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1084  void initializeGprCountSymbol(RegisterKind RegKind);
1085  bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1086                             unsigned RegWidth);
1087  void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1088                    bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1089  void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1090                 bool IsGdsHardcoded);
1091
1092public:
1093  enum AMDGPUMatchResultTy {
1094    Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1095  };
1096  enum OperandMode {
1097    OperandMode_Default,
1098    OperandMode_NSA,
1099  };
1100
1101  using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1102
1103  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1104               const MCInstrInfo &MII,
1105               const MCTargetOptions &Options)
1106      : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1107    MCAsmParserExtension::Initialize(Parser);
1108
1109    if (getFeatureBits().none()) {
1110      // Set default features.
1111      copySTI().ToggleFeature("southern-islands");
1112    }
1113
1114    setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1115
1116    {
1117      // TODO: make those pre-defined variables read-only.
1118      // Currently there is none suitable machinery in the core llvm-mc for this.
1119      // MCSymbol::isRedefinable is intended for another purpose, and
1120      // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1121      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1122      MCContext &Ctx = getContext();
1123      if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1124        MCSymbol *Sym =
1125            Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1126        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1127        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1128        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1129        Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1130        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1131      } else {
1132        MCSymbol *Sym =
1133            Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1134        Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1135        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1136        Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1137        Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1138        Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1139      }
1140      if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1141        initializeGprCountSymbol(IS_VGPR);
1142        initializeGprCountSymbol(IS_SGPR);
1143      } else
1144        KernelScope.initialize(getContext());
1145    }
1146  }
1147
1148  bool hasXNACK() const {
1149    return AMDGPU::hasXNACK(getSTI());
1150  }
1151
1152  bool hasMIMG_R128() const {
1153    return AMDGPU::hasMIMG_R128(getSTI());
1154  }
1155
1156  bool hasPackedD16() const {
1157    return AMDGPU::hasPackedD16(getSTI());
1158  }
1159
1160  bool isSI() const {
1161    return AMDGPU::isSI(getSTI());
1162  }
1163
1164  bool isCI() const {
1165    return AMDGPU::isCI(getSTI());
1166  }
1167
1168  bool isVI() const {
1169    return AMDGPU::isVI(getSTI());
1170  }
1171
1172  bool isGFX9() const {
1173    return AMDGPU::isGFX9(getSTI());
1174  }
1175
1176  bool isGFX10() const {
1177    return AMDGPU::isGFX10(getSTI());
1178  }
1179
1180  bool hasInv2PiInlineImm() const {
1181    return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1182  }
1183
1184  bool hasFlatOffsets() const {
1185    return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1186  }
1187
1188  bool hasSGPR102_SGPR103() const {
1189    return !isVI() && !isGFX9();
1190  }
1191
1192  bool hasSGPR104_SGPR105() const {
1193    return isGFX10();
1194  }
1195
1196  bool hasIntClamp() const {
1197    return getFeatureBits()[AMDGPU::FeatureIntClamp];
1198  }
1199
1200  AMDGPUTargetStreamer &getTargetStreamer() {
1201    MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1202    return static_cast<AMDGPUTargetStreamer &>(TS);
1203  }
1204
1205  const MCRegisterInfo *getMRI() const {
1206    // We need this const_cast because for some reason getContext() is not const
1207    // in MCAsmParser.
1208    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1209  }
1210
1211  const MCInstrInfo *getMII() const {
1212    return &MII;
1213  }
1214
1215  const FeatureBitset &getFeatureBits() const {
1216    return getSTI().getFeatureBits();
1217  }
1218
1219  void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1220  void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1221  void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1222
1223  unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1224  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1225  bool isForcedDPP() const { return ForcedDPP; }
1226  bool isForcedSDWA() const { return ForcedSDWA; }
1227  ArrayRef<unsigned> getMatchedVariants() const;
1228
1229  std::unique_ptr<AMDGPUOperand> parseRegister();
1230  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1231  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1232  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1233                                      unsigned Kind) override;
1234  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1235                               OperandVector &Operands, MCStreamer &Out,
1236                               uint64_t &ErrorInfo,
1237                               bool MatchingInlineAsm) override;
1238  bool ParseDirective(AsmToken DirectiveID) override;
1239  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1240                                    OperandMode Mode = OperandMode_Default);
1241  StringRef parseMnemonicSuffix(StringRef Name);
1242  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1243                        SMLoc NameLoc, OperandVector &Operands) override;
1244  //bool ProcessInstruction(MCInst &Inst);
1245
1246  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1247
1248  OperandMatchResultTy
1249  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1250                     AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1251                     bool (*ConvertResult)(int64_t &) = nullptr);
1252
1253  OperandMatchResultTy
1254  parseOperandArrayWithPrefix(const char *Prefix,
1255                              OperandVector &Operands,
1256                              AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1257                              bool (*ConvertResult)(int64_t&) = nullptr);
1258
1259  OperandMatchResultTy
1260  parseNamedBit(const char *Name, OperandVector &Operands,
1261                AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1262  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1263                                             StringRef &Value);
1264
1265  bool isModifier();
1266  bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1267  bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1268  bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1269  bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1270  bool parseSP3NegModifier();
1271  OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1272  OperandMatchResultTy parseReg(OperandVector &Operands);
1273  OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1274  OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1275  OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1276  OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1277  OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1278  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1279  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1280
1281  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1282  void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1283  void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1284  void cvtExp(MCInst &Inst, const OperandVector &Operands);
1285
1286  bool parseCnt(int64_t &IntVal);
1287  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1288  OperandMatchResultTy parseHwreg(OperandVector &Operands);
1289
1290private:
1291  struct OperandInfoTy {
1292    int64_t Id;
1293    bool IsSymbolic = false;
1294    bool IsDefined = false;
1295
1296    OperandInfoTy(int64_t Id_) : Id(Id_) {}
1297  };
1298
1299  bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1300  bool validateSendMsg(const OperandInfoTy &Msg,
1301                       const OperandInfoTy &Op,
1302                       const OperandInfoTy &Stream,
1303                       const SMLoc Loc);
1304
1305  bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1306  bool validateHwreg(const OperandInfoTy &HwReg,
1307                     const int64_t Offset,
1308                     const int64_t Width,
1309                     const SMLoc Loc);
1310
1311  void errorExpTgt();
1312  OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1313  SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1314
1315  bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1316  bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1317  bool validateSOPLiteral(const MCInst &Inst) const;
1318  bool validateConstantBusLimitations(const MCInst &Inst);
1319  bool validateEarlyClobberLimitations(const MCInst &Inst);
1320  bool validateIntClampSupported(const MCInst &Inst);
1321  bool validateMIMGAtomicDMask(const MCInst &Inst);
1322  bool validateMIMGGatherDMask(const MCInst &Inst);
1323  bool validateMovrels(const MCInst &Inst);
1324  bool validateMIMGDataSize(const MCInst &Inst);
1325  bool validateMIMGAddrSize(const MCInst &Inst);
1326  bool validateMIMGD16(const MCInst &Inst);
1327  bool validateMIMGDim(const MCInst &Inst);
1328  bool validateLdsDirect(const MCInst &Inst);
1329  bool validateOpSel(const MCInst &Inst);
1330  bool validateVccOperand(unsigned Reg) const;
1331  bool validateVOP3Literal(const MCInst &Inst) const;
1332  unsigned getConstantBusLimit(unsigned Opcode) const;
1333  bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1334  bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1335  unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1336
1337  bool isId(const StringRef Id) const;
1338  bool isId(const AsmToken &Token, const StringRef Id) const;
1339  bool isToken(const AsmToken::TokenKind Kind) const;
1340  bool trySkipId(const StringRef Id);
1341  bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1342  bool trySkipToken(const AsmToken::TokenKind Kind);
1343  bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1344  bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1345  void peekTokens(MutableArrayRef<AsmToken> Tokens);
1346  AsmToken::TokenKind getTokenKind() const;
1347  bool parseExpr(int64_t &Imm);
1348  bool parseExpr(OperandVector &Operands);
1349  StringRef getTokenStr() const;
1350  AsmToken peekToken();
1351  AsmToken getToken() const;
1352  SMLoc getLoc() const;
1353  void lex();
1354
1355public:
1356  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1357  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1358
1359  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1360  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1361  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1362  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1363  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1364  OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1365
1366  bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1367                            const unsigned MinVal,
1368                            const unsigned MaxVal,
1369                            const StringRef ErrMsg);
1370  OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1371  bool parseSwizzleOffset(int64_t &Imm);
1372  bool parseSwizzleMacro(int64_t &Imm);
1373  bool parseSwizzleQuadPerm(int64_t &Imm);
1374  bool parseSwizzleBitmaskPerm(int64_t &Imm);
1375  bool parseSwizzleBroadcast(int64_t &Imm);
1376  bool parseSwizzleSwap(int64_t &Imm);
1377  bool parseSwizzleReverse(int64_t &Imm);
1378
1379  OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1380  int64_t parseGPRIdxMacro();
1381
1382  void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1383  void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1384  void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1385  void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1386  void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1387
1388  AMDGPUOperand::Ptr defaultDLC() const;
1389  AMDGPUOperand::Ptr defaultGLC() const;
1390  AMDGPUOperand::Ptr defaultSLC() const;
1391
1392  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1393  AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1394  AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1395  AMDGPUOperand::Ptr defaultFlatOffset() const;
1396
1397  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1398
1399  void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1400               OptionalImmIndexMap &OptionalIdx);
1401  void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1402  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1403  void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1404
1405  void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1406
1407  void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1408               bool IsAtomic = false);
1409  void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1410
1411  OperandMatchResultTy parseDim(OperandVector &Operands);
1412  OperandMatchResultTy parseDPP8(OperandVector &Operands);
1413  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1414  AMDGPUOperand::Ptr defaultRowMask() const;
1415  AMDGPUOperand::Ptr defaultBankMask() const;
1416  AMDGPUOperand::Ptr defaultBoundCtrl() const;
1417  AMDGPUOperand::Ptr defaultFI() const;
1418  void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1419  void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1420
1421  OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1422                                    AMDGPUOperand::ImmTy Type);
1423  OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1424  void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1425  void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1426  void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1427  void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1428  void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1429  void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1430               uint64_t BasicInstType,
1431               bool SkipDstVcc = false,
1432               bool SkipSrcVcc = false);
1433
1434  AMDGPUOperand::Ptr defaultBLGP() const;
1435  AMDGPUOperand::Ptr defaultCBSZ() const;
1436  AMDGPUOperand::Ptr defaultABID() const;
1437
1438  OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1439  AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1440};
1441
1442struct OptionalOperand {
1443  const char *Name;
1444  AMDGPUOperand::ImmTy Type;
1445  bool IsBit;
1446  bool (*ConvertResult)(int64_t&);
1447};
1448
1449} // end anonymous namespace
1450
1451// May be called with integer type with equivalent bitwidth.
1452static const fltSemantics *getFltSemantics(unsigned Size) {
1453  switch (Size) {
1454  case 4:
1455    return &APFloat::IEEEsingle();
1456  case 8:
1457    return &APFloat::IEEEdouble();
1458  case 2:
1459    return &APFloat::IEEEhalf();
1460  default:
1461    llvm_unreachable("unsupported fp type");
1462  }
1463}
1464
1465static const fltSemantics *getFltSemantics(MVT VT) {
1466  return getFltSemantics(VT.getSizeInBits() / 8);
1467}
1468
1469static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1470  switch (OperandType) {
1471  case AMDGPU::OPERAND_REG_IMM_INT32:
1472  case AMDGPU::OPERAND_REG_IMM_FP32:
1473  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1474  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1475  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1476  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1477    return &APFloat::IEEEsingle();
1478  case AMDGPU::OPERAND_REG_IMM_INT64:
1479  case AMDGPU::OPERAND_REG_IMM_FP64:
1480  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1481  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1482    return &APFloat::IEEEdouble();
1483  case AMDGPU::OPERAND_REG_IMM_INT16:
1484  case AMDGPU::OPERAND_REG_IMM_FP16:
1485  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1486  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1487  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1488  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1489  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1490  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1491  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1492  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1493  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1494  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1495    return &APFloat::IEEEhalf();
1496  default:
1497    llvm_unreachable("unsupported fp type");
1498  }
1499}
1500
1501//===----------------------------------------------------------------------===//
1502// Operand
1503//===----------------------------------------------------------------------===//
1504
1505static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1506  bool Lost;
1507
1508  // Convert literal to single precision
1509  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1510                                               APFloat::rmNearestTiesToEven,
1511                                               &Lost);
1512  // We allow precision lost but not overflow or underflow
1513  if (Status != APFloat::opOK &&
1514      Lost &&
1515      ((Status & APFloat::opOverflow)  != 0 ||
1516       (Status & APFloat::opUnderflow) != 0)) {
1517    return false;
1518  }
1519
1520  return true;
1521}
1522
1523static bool isSafeTruncation(int64_t Val, unsigned Size) {
1524  return isUIntN(Size, Val) || isIntN(Size, Val);
1525}
1526
1527bool AMDGPUOperand::isInlinableImm(MVT type) const {
1528
1529  // This is a hack to enable named inline values like
1530  // shared_base with both 32-bit and 64-bit operands.
1531  // Note that these values are defined as
1532  // 32-bit operands only.
1533  if (isInlineValue()) {
1534    return true;
1535  }
1536
1537  if (!isImmTy(ImmTyNone)) {
1538    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1539    return false;
1540  }
1541  // TODO: We should avoid using host float here. It would be better to
1542  // check the float bit values which is what a few other places do.
1543  // We've had bot failures before due to weird NaN support on mips hosts.
1544
1545  APInt Literal(64, Imm.Val);
1546
1547  if (Imm.IsFPImm) { // We got fp literal token
1548    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1549      return AMDGPU::isInlinableLiteral64(Imm.Val,
1550                                          AsmParser->hasInv2PiInlineImm());
1551    }
1552
1553    APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1554    if (!canLosslesslyConvertToFPType(FPLiteral, type))
1555      return false;
1556
1557    if (type.getScalarSizeInBits() == 16) {
1558      return AMDGPU::isInlinableLiteral16(
1559        static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1560        AsmParser->hasInv2PiInlineImm());
1561    }
1562
1563    // Check if single precision literal is inlinable
1564    return AMDGPU::isInlinableLiteral32(
1565      static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1566      AsmParser->hasInv2PiInlineImm());
1567  }
1568
1569  // We got int literal token.
1570  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1571    return AMDGPU::isInlinableLiteral64(Imm.Val,
1572                                        AsmParser->hasInv2PiInlineImm());
1573  }
1574
1575  if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1576    return false;
1577  }
1578
1579  if (type.getScalarSizeInBits() == 16) {
1580    return AMDGPU::isInlinableLiteral16(
1581      static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1582      AsmParser->hasInv2PiInlineImm());
1583  }
1584
1585  return AMDGPU::isInlinableLiteral32(
1586    static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1587    AsmParser->hasInv2PiInlineImm());
1588}
1589
1590bool AMDGPUOperand::isLiteralImm(MVT type) const {
1591  // Check that this immediate can be added as literal
1592  if (!isImmTy(ImmTyNone)) {
1593    return false;
1594  }
1595
1596  if (!Imm.IsFPImm) {
1597    // We got int literal token.
1598
1599    if (type == MVT::f64 && hasFPModifiers()) {
1600      // Cannot apply fp modifiers to int literals preserving the same semantics
1601      // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1602      // disable these cases.
1603      return false;
1604    }
1605
1606    unsigned Size = type.getSizeInBits();
1607    if (Size == 64)
1608      Size = 32;
1609
1610    // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1611    // types.
1612    return isSafeTruncation(Imm.Val, Size);
1613  }
1614
1615  // We got fp literal token
1616  if (type == MVT::f64) { // Expected 64-bit fp operand
1617    // We would set low 64-bits of literal to zeroes but we accept this literals
1618    return true;
1619  }
1620
1621  if (type == MVT::i64) { // Expected 64-bit int operand
1622    // We don't allow fp literals in 64-bit integer instructions. It is
1623    // unclear how we should encode them.
1624    return false;
1625  }
1626
1627  // We allow fp literals with f16x2 operands assuming that the specified
1628  // literal goes into the lower half and the upper half is zero. We also
1629  // require that the literal may be losslesly converted to f16.
1630  MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1631                     (type == MVT::v2i16)? MVT::i16 : type;
1632
1633  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1634  return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1635}
1636
1637bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1638  return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1639}
1640
1641bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1642  if (AsmParser->isVI())
1643    return isVReg32();
1644  else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1645    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1646  else
1647    return false;
1648}
1649
1650bool AMDGPUOperand::isSDWAFP16Operand() const {
1651  return isSDWAOperand(MVT::f16);
1652}
1653
1654bool AMDGPUOperand::isSDWAFP32Operand() const {
1655  return isSDWAOperand(MVT::f32);
1656}
1657
1658bool AMDGPUOperand::isSDWAInt16Operand() const {
1659  return isSDWAOperand(MVT::i16);
1660}
1661
1662bool AMDGPUOperand::isSDWAInt32Operand() const {
1663  return isSDWAOperand(MVT::i32);
1664}
1665
1666bool AMDGPUOperand::isBoolReg() const {
1667  return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1668         (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1669}
1670
1671uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1672{
1673  assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1674  assert(Size == 2 || Size == 4 || Size == 8);
1675
1676  const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1677
1678  if (Imm.Mods.Abs) {
1679    Val &= ~FpSignMask;
1680  }
1681  if (Imm.Mods.Neg) {
1682    Val ^= FpSignMask;
1683  }
1684
1685  return Val;
1686}
1687
1688void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1689  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1690                             Inst.getNumOperands())) {
1691    addLiteralImmOperand(Inst, Imm.Val,
1692                         ApplyModifiers &
1693                         isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1694  } else {
1695    assert(!isImmTy(ImmTyNone) || !hasModifiers());
1696    Inst.addOperand(MCOperand::createImm(Imm.Val));
1697  }
1698}
1699
1700void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1701  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1702  auto OpNum = Inst.getNumOperands();
1703  // Check that this operand accepts literals
1704  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1705
1706  if (ApplyModifiers) {
1707    assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1708    const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1709    Val = applyInputFPModifiers(Val, Size);
1710  }
1711
1712  APInt Literal(64, Val);
1713  uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1714
1715  if (Imm.IsFPImm) { // We got fp literal token
1716    switch (OpTy) {
1717    case AMDGPU::OPERAND_REG_IMM_INT64:
1718    case AMDGPU::OPERAND_REG_IMM_FP64:
1719    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1720    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1721      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1722                                       AsmParser->hasInv2PiInlineImm())) {
1723        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1724        return;
1725      }
1726
1727      // Non-inlineable
1728      if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1729        // For fp operands we check if low 32 bits are zeros
1730        if (Literal.getLoBits(32) != 0) {
1731          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1732          "Can't encode literal as exact 64-bit floating-point operand. "
1733          "Low 32-bits will be set to zero");
1734        }
1735
1736        Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1737        return;
1738      }
1739
1740      // We don't allow fp literals in 64-bit integer instructions. It is
1741      // unclear how we should encode them. This case should be checked earlier
1742      // in predicate methods (isLiteralImm())
1743      llvm_unreachable("fp literal in 64-bit integer instruction.");
1744
1745    case AMDGPU::OPERAND_REG_IMM_INT32:
1746    case AMDGPU::OPERAND_REG_IMM_FP32:
1747    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1748    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1749    case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1750    case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1751    case AMDGPU::OPERAND_REG_IMM_INT16:
1752    case AMDGPU::OPERAND_REG_IMM_FP16:
1753    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1754    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1755    case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1756    case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1757    case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1758    case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1759    case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1760    case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1761    case AMDGPU::OPERAND_REG_IMM_V2INT16:
1762    case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1763      bool lost;
1764      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1765      // Convert literal to single precision
1766      FPLiteral.convert(*getOpFltSemantics(OpTy),
1767                        APFloat::rmNearestTiesToEven, &lost);
1768      // We allow precision lost but not overflow or underflow. This should be
1769      // checked earlier in isLiteralImm()
1770
1771      uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1772      Inst.addOperand(MCOperand::createImm(ImmVal));
1773      return;
1774    }
1775    default:
1776      llvm_unreachable("invalid operand size");
1777    }
1778
1779    return;
1780  }
1781
1782  // We got int literal token.
1783  // Only sign extend inline immediates.
1784  switch (OpTy) {
1785  case AMDGPU::OPERAND_REG_IMM_INT32:
1786  case AMDGPU::OPERAND_REG_IMM_FP32:
1787  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1788  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1789  case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1790  case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1791  case AMDGPU::OPERAND_REG_IMM_V2INT16:
1792  case AMDGPU::OPERAND_REG_IMM_V2FP16:
1793    if (isSafeTruncation(Val, 32) &&
1794        AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1795                                     AsmParser->hasInv2PiInlineImm())) {
1796      Inst.addOperand(MCOperand::createImm(Val));
1797      return;
1798    }
1799
1800    Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1801    return;
1802
1803  case AMDGPU::OPERAND_REG_IMM_INT64:
1804  case AMDGPU::OPERAND_REG_IMM_FP64:
1805  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1806  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1807    if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1808      Inst.addOperand(MCOperand::createImm(Val));
1809      return;
1810    }
1811
1812    Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1813    return;
1814
1815  case AMDGPU::OPERAND_REG_IMM_INT16:
1816  case AMDGPU::OPERAND_REG_IMM_FP16:
1817  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1818  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1819  case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1820  case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1821    if (isSafeTruncation(Val, 16) &&
1822        AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1823                                     AsmParser->hasInv2PiInlineImm())) {
1824      Inst.addOperand(MCOperand::createImm(Val));
1825      return;
1826    }
1827
1828    Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1829    return;
1830
1831  case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1832  case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1833  case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1834  case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1835    assert(isSafeTruncation(Val, 16));
1836    assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1837                                        AsmParser->hasInv2PiInlineImm()));
1838
1839    Inst.addOperand(MCOperand::createImm(Val));
1840    return;
1841  }
1842  default:
1843    llvm_unreachable("invalid operand size");
1844  }
1845}
1846
1847template <unsigned Bitwidth>
1848void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1849  APInt Literal(64, Imm.Val);
1850
1851  if (!Imm.IsFPImm) {
1852    // We got int literal token.
1853    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1854    return;
1855  }
1856
1857  bool Lost;
1858  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1859  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1860                    APFloat::rmNearestTiesToEven, &Lost);
1861  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1862}
1863
1864void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1865  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1866}
1867
1868static bool isInlineValue(unsigned Reg) {
1869  switch (Reg) {
1870  case AMDGPU::SRC_SHARED_BASE:
1871  case AMDGPU::SRC_SHARED_LIMIT:
1872  case AMDGPU::SRC_PRIVATE_BASE:
1873  case AMDGPU::SRC_PRIVATE_LIMIT:
1874  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1875    return true;
1876  case AMDGPU::SRC_VCCZ:
1877  case AMDGPU::SRC_EXECZ:
1878  case AMDGPU::SRC_SCC:
1879    return true;
1880  case AMDGPU::SGPR_NULL:
1881    return true;
1882  default:
1883    return false;
1884  }
1885}
1886
1887bool AMDGPUOperand::isInlineValue() const {
1888  return isRegKind() && ::isInlineValue(getReg());
1889}
1890
1891//===----------------------------------------------------------------------===//
1892// AsmParser
1893//===----------------------------------------------------------------------===//
1894
1895static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1896  if (Is == IS_VGPR) {
1897    switch (RegWidth) {
1898      default: return -1;
1899      case 1: return AMDGPU::VGPR_32RegClassID;
1900      case 2: return AMDGPU::VReg_64RegClassID;
1901      case 3: return AMDGPU::VReg_96RegClassID;
1902      case 4: return AMDGPU::VReg_128RegClassID;
1903      case 5: return AMDGPU::VReg_160RegClassID;
1904      case 8: return AMDGPU::VReg_256RegClassID;
1905      case 16: return AMDGPU::VReg_512RegClassID;
1906      case 32: return AMDGPU::VReg_1024RegClassID;
1907    }
1908  } else if (Is == IS_TTMP) {
1909    switch (RegWidth) {
1910      default: return -1;
1911      case 1: return AMDGPU::TTMP_32RegClassID;
1912      case 2: return AMDGPU::TTMP_64RegClassID;
1913      case 4: return AMDGPU::TTMP_128RegClassID;
1914      case 8: return AMDGPU::TTMP_256RegClassID;
1915      case 16: return AMDGPU::TTMP_512RegClassID;
1916    }
1917  } else if (Is == IS_SGPR) {
1918    switch (RegWidth) {
1919      default: return -1;
1920      case 1: return AMDGPU::SGPR_32RegClassID;
1921      case 2: return AMDGPU::SGPR_64RegClassID;
1922      case 4: return AMDGPU::SGPR_128RegClassID;
1923      case 8: return AMDGPU::SGPR_256RegClassID;
1924      case 16: return AMDGPU::SGPR_512RegClassID;
1925    }
1926  } else if (Is == IS_AGPR) {
1927    switch (RegWidth) {
1928      default: return -1;
1929      case 1: return AMDGPU::AGPR_32RegClassID;
1930      case 2: return AMDGPU::AReg_64RegClassID;
1931      case 4: return AMDGPU::AReg_128RegClassID;
1932      case 16: return AMDGPU::AReg_512RegClassID;
1933      case 32: return AMDGPU::AReg_1024RegClassID;
1934    }
1935  }
1936  return -1;
1937}
1938
1939static unsigned getSpecialRegForName(StringRef RegName) {
1940  return StringSwitch<unsigned>(RegName)
1941    .Case("exec", AMDGPU::EXEC)
1942    .Case("vcc", AMDGPU::VCC)
1943    .Case("flat_scratch", AMDGPU::FLAT_SCR)
1944    .Case("xnack_mask", AMDGPU::XNACK_MASK)
1945    .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1946    .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1947    .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1948    .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1949    .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1950    .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1951    .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1952    .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1953    .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1954    .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1955    .Case("lds_direct", AMDGPU::LDS_DIRECT)
1956    .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1957    .Case("m0", AMDGPU::M0)
1958    .Case("vccz", AMDGPU::SRC_VCCZ)
1959    .Case("src_vccz", AMDGPU::SRC_VCCZ)
1960    .Case("execz", AMDGPU::SRC_EXECZ)
1961    .Case("src_execz", AMDGPU::SRC_EXECZ)
1962    .Case("scc", AMDGPU::SRC_SCC)
1963    .Case("src_scc", AMDGPU::SRC_SCC)
1964    .Case("tba", AMDGPU::TBA)
1965    .Case("tma", AMDGPU::TMA)
1966    .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1967    .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1968    .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1969    .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1970    .Case("vcc_lo", AMDGPU::VCC_LO)
1971    .Case("vcc_hi", AMDGPU::VCC_HI)
1972    .Case("exec_lo", AMDGPU::EXEC_LO)
1973    .Case("exec_hi", AMDGPU::EXEC_HI)
1974    .Case("tma_lo", AMDGPU::TMA_LO)
1975    .Case("tma_hi", AMDGPU::TMA_HI)
1976    .Case("tba_lo", AMDGPU::TBA_LO)
1977    .Case("tba_hi", AMDGPU::TBA_HI)
1978    .Case("null", AMDGPU::SGPR_NULL)
1979    .Default(AMDGPU::NoRegister);
1980}
1981
1982bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1983                                    SMLoc &EndLoc) {
1984  auto R = parseRegister();
1985  if (!R) return true;
1986  assert(R->isReg());
1987  RegNo = R->getReg();
1988  StartLoc = R->getStartLoc();
1989  EndLoc = R->getEndLoc();
1990  return false;
1991}
1992
1993bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1994                                            RegisterKind RegKind, unsigned Reg1) {
1995  switch (RegKind) {
1996  case IS_SPECIAL:
1997    if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1998      Reg = AMDGPU::EXEC;
1999      RegWidth = 2;
2000      return true;
2001    }
2002    if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2003      Reg = AMDGPU::FLAT_SCR;
2004      RegWidth = 2;
2005      return true;
2006    }
2007    if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2008      Reg = AMDGPU::XNACK_MASK;
2009      RegWidth = 2;
2010      return true;
2011    }
2012    if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2013      Reg = AMDGPU::VCC;
2014      RegWidth = 2;
2015      return true;
2016    }
2017    if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2018      Reg = AMDGPU::TBA;
2019      RegWidth = 2;
2020      return true;
2021    }
2022    if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2023      Reg = AMDGPU::TMA;
2024      RegWidth = 2;
2025      return true;
2026    }
2027    return false;
2028  case IS_VGPR:
2029  case IS_SGPR:
2030  case IS_AGPR:
2031  case IS_TTMP:
2032    if (Reg1 != Reg + RegWidth) {
2033      return false;
2034    }
2035    RegWidth++;
2036    return true;
2037  default:
2038    llvm_unreachable("unexpected register kind");
2039  }
2040}
2041
2042struct RegInfo {
2043  StringLiteral Name;
2044  RegisterKind Kind;
2045};
2046
2047static constexpr RegInfo RegularRegisters[] = {
2048  {{"v"},    IS_VGPR},
2049  {{"s"},    IS_SGPR},
2050  {{"ttmp"}, IS_TTMP},
2051  {{"acc"},  IS_AGPR},
2052  {{"a"},    IS_AGPR},
2053};
2054
2055static bool isRegularReg(RegisterKind Kind) {
2056  return Kind == IS_VGPR ||
2057         Kind == IS_SGPR ||
2058         Kind == IS_TTMP ||
2059         Kind == IS_AGPR;
2060}
2061
2062static const RegInfo* getRegularRegInfo(StringRef Str) {
2063  for (const RegInfo &Reg : RegularRegisters)
2064    if (Str.startswith(Reg.Name))
2065      return &Reg;
2066  return nullptr;
2067}
2068
2069static bool getRegNum(StringRef Str, unsigned& Num) {
2070  return !Str.getAsInteger(10, Num);
2071}
2072
2073bool
2074AMDGPUAsmParser::isRegister(const AsmToken &Token,
2075                            const AsmToken &NextToken) const {
2076
2077  // A list of consecutive registers: [s0,s1,s2,s3]
2078  if (Token.is(AsmToken::LBrac))
2079    return true;
2080
2081  if (!Token.is(AsmToken::Identifier))
2082    return false;
2083
2084  // A single register like s0 or a range of registers like s[0:1]
2085
2086  StringRef Str = Token.getString();
2087  const RegInfo *Reg = getRegularRegInfo(Str);
2088  if (Reg) {
2089    StringRef RegName = Reg->Name;
2090    StringRef RegSuffix = Str.substr(RegName.size());
2091    if (!RegSuffix.empty()) {
2092      unsigned Num;
2093      // A single register with an index: rXX
2094      if (getRegNum(RegSuffix, Num))
2095        return true;
2096    } else {
2097      // A range of registers: r[XX:YY].
2098      if (NextToken.is(AsmToken::LBrac))
2099        return true;
2100    }
2101  }
2102
2103  return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2104}
2105
2106bool
2107AMDGPUAsmParser::isRegister()
2108{
2109  return isRegister(getToken(), peekToken());
2110}
2111
2112unsigned
2113AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2114                               unsigned RegNum,
2115                               unsigned RegWidth) {
2116
2117  assert(isRegularReg(RegKind));
2118
2119  unsigned AlignSize = 1;
2120  if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2121    // SGPR and TTMP registers must be aligned.
2122    // Max required alignment is 4 dwords.
2123    AlignSize = std::min(RegWidth, 4u);
2124  }
2125
2126  if (RegNum % AlignSize != 0)
2127    return AMDGPU::NoRegister;
2128
2129  unsigned RegIdx = RegNum / AlignSize;
2130  int RCID = getRegClass(RegKind, RegWidth);
2131  if (RCID == -1)
2132    return AMDGPU::NoRegister;
2133
2134  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2135  const MCRegisterClass RC = TRI->getRegClass(RCID);
2136  if (RegIdx >= RC.getNumRegs())
2137    return AMDGPU::NoRegister;
2138
2139  return RC.getRegister(RegIdx);
2140}
2141
2142bool
2143AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2144  int64_t RegLo, RegHi;
2145  if (!trySkipToken(AsmToken::LBrac))
2146    return false;
2147
2148  if (!parseExpr(RegLo))
2149    return false;
2150
2151  if (trySkipToken(AsmToken::Colon)) {
2152    if (!parseExpr(RegHi))
2153      return false;
2154  } else {
2155    RegHi = RegLo;
2156  }
2157
2158  if (!trySkipToken(AsmToken::RBrac))
2159    return false;
2160
2161  if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2162    return false;
2163
2164  Num = static_cast<unsigned>(RegLo);
2165  Width = (RegHi - RegLo) + 1;
2166  return true;
2167}
2168
2169unsigned
2170AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2171                                 unsigned &RegNum,
2172                                 unsigned &RegWidth) {
2173  assert(isToken(AsmToken::Identifier));
2174  unsigned Reg = getSpecialRegForName(getTokenStr());
2175  if (Reg) {
2176    RegNum = 0;
2177    RegWidth = 1;
2178    RegKind = IS_SPECIAL;
2179    lex(); // skip register name
2180  }
2181  return Reg;
2182}
2183
2184unsigned
2185AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2186                                 unsigned &RegNum,
2187                                 unsigned &RegWidth) {
2188  assert(isToken(AsmToken::Identifier));
2189  StringRef RegName = getTokenStr();
2190
2191  const RegInfo *RI = getRegularRegInfo(RegName);
2192  if (!RI)
2193    return AMDGPU::NoRegister;
2194  lex(); // skip register name
2195
2196  RegKind = RI->Kind;
2197  StringRef RegSuffix = RegName.substr(RI->Name.size());
2198  if (!RegSuffix.empty()) {
2199    // Single 32-bit register: vXX.
2200    if (!getRegNum(RegSuffix, RegNum))
2201      return AMDGPU::NoRegister;
2202    RegWidth = 1;
2203  } else {
2204    // Range of registers: v[XX:YY]. ":YY" is optional.
2205    if (!ParseRegRange(RegNum, RegWidth))
2206      return AMDGPU::NoRegister;
2207  }
2208
2209  return getRegularReg(RegKind, RegNum, RegWidth);
2210}
2211
2212unsigned
2213AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2214                              unsigned &RegNum,
2215                              unsigned &RegWidth) {
2216  unsigned Reg = AMDGPU::NoRegister;
2217
2218  if (!trySkipToken(AsmToken::LBrac))
2219    return AMDGPU::NoRegister;
2220
2221  // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2222
2223  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2224    return AMDGPU::NoRegister;
2225  if (RegWidth != 1)
2226    return AMDGPU::NoRegister;
2227
2228  for (; trySkipToken(AsmToken::Comma); ) {
2229    RegisterKind NextRegKind;
2230    unsigned NextReg, NextRegNum, NextRegWidth;
2231
2232    if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2233      return AMDGPU::NoRegister;
2234    if (NextRegWidth != 1)
2235      return AMDGPU::NoRegister;
2236    if (NextRegKind != RegKind)
2237      return AMDGPU::NoRegister;
2238    if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2239      return AMDGPU::NoRegister;
2240  }
2241
2242  if (!trySkipToken(AsmToken::RBrac))
2243    return AMDGPU::NoRegister;
2244
2245  if (isRegularReg(RegKind))
2246    Reg = getRegularReg(RegKind, RegNum, RegWidth);
2247
2248  return Reg;
2249}
2250
2251bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2252                                          unsigned &Reg,
2253                                          unsigned &RegNum,
2254                                          unsigned &RegWidth) {
2255  Reg = AMDGPU::NoRegister;
2256
2257  if (isToken(AsmToken::Identifier)) {
2258    Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2259    if (Reg == AMDGPU::NoRegister)
2260      Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2261  } else {
2262    Reg = ParseRegList(RegKind, RegNum, RegWidth);
2263  }
2264
2265  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2266  return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2267}
2268
2269Optional<StringRef>
2270AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2271  switch (RegKind) {
2272  case IS_VGPR:
2273    return StringRef(".amdgcn.next_free_vgpr");
2274  case IS_SGPR:
2275    return StringRef(".amdgcn.next_free_sgpr");
2276  default:
2277    return None;
2278  }
2279}
2280
2281void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2282  auto SymbolName = getGprCountSymbolName(RegKind);
2283  assert(SymbolName && "initializing invalid register kind");
2284  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2285  Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2286}
2287
2288bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2289                                            unsigned DwordRegIndex,
2290                                            unsigned RegWidth) {
2291  // Symbols are only defined for GCN targets
2292  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2293    return true;
2294
2295  auto SymbolName = getGprCountSymbolName(RegKind);
2296  if (!SymbolName)
2297    return true;
2298  MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2299
2300  int64_t NewMax = DwordRegIndex + RegWidth - 1;
2301  int64_t OldCount;
2302
2303  if (!Sym->isVariable())
2304    return !Error(getParser().getTok().getLoc(),
2305                  ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2306  if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2307    return !Error(
2308        getParser().getTok().getLoc(),
2309        ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2310
2311  if (OldCount <= NewMax)
2312    Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2313
2314  return true;
2315}
2316
2317std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2318  const auto &Tok = Parser.getTok();
2319  SMLoc StartLoc = Tok.getLoc();
2320  SMLoc EndLoc = Tok.getEndLoc();
2321  RegisterKind RegKind;
2322  unsigned Reg, RegNum, RegWidth;
2323
2324  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2325    //FIXME: improve error messages (bug 41303).
2326    Error(StartLoc, "not a valid operand.");
2327    return nullptr;
2328  }
2329  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2330    if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2331      return nullptr;
2332  } else
2333    KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2334  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2335}
2336
2337OperandMatchResultTy
2338AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2339  // TODO: add syntactic sugar for 1/(2*PI)
2340
2341  assert(!isRegister());
2342  assert(!isModifier());
2343
2344  const auto& Tok = getToken();
2345  const auto& NextTok = peekToken();
2346  bool IsReal = Tok.is(AsmToken::Real);
2347  SMLoc S = getLoc();
2348  bool Negate = false;
2349
2350  if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2351    lex();
2352    IsReal = true;
2353    Negate = true;
2354  }
2355
2356  if (IsReal) {
2357    // Floating-point expressions are not supported.
2358    // Can only allow floating-point literals with an
2359    // optional sign.
2360
2361    StringRef Num = getTokenStr();
2362    lex();
2363
2364    APFloat RealVal(APFloat::IEEEdouble());
2365    auto roundMode = APFloat::rmNearestTiesToEven;
2366    if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2367      return MatchOperand_ParseFail;
2368    }
2369    if (Negate)
2370      RealVal.changeSign();
2371
2372    Operands.push_back(
2373      AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2374                               AMDGPUOperand::ImmTyNone, true));
2375
2376    return MatchOperand_Success;
2377
2378  } else {
2379    int64_t IntVal;
2380    const MCExpr *Expr;
2381    SMLoc S = getLoc();
2382
2383    if (HasSP3AbsModifier) {
2384      // This is a workaround for handling expressions
2385      // as arguments of SP3 'abs' modifier, for example:
2386      //     |1.0|
2387      //     |-1|
2388      //     |1+x|
2389      // This syntax is not compatible with syntax of standard
2390      // MC expressions (due to the trailing '|').
2391      SMLoc EndLoc;
2392      if (getParser().parsePrimaryExpr(Expr, EndLoc))
2393        return MatchOperand_ParseFail;
2394    } else {
2395      if (Parser.parseExpression(Expr))
2396        return MatchOperand_ParseFail;
2397    }
2398
2399    if (Expr->evaluateAsAbsolute(IntVal)) {
2400      Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2401    } else {
2402      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2403    }
2404
2405    return MatchOperand_Success;
2406  }
2407
2408  return MatchOperand_NoMatch;
2409}
2410
2411OperandMatchResultTy
2412AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2413  if (!isRegister())
2414    return MatchOperand_NoMatch;
2415
2416  if (auto R = parseRegister()) {
2417    assert(R->isReg());
2418    Operands.push_back(std::move(R));
2419    return MatchOperand_Success;
2420  }
2421  return MatchOperand_ParseFail;
2422}
2423
2424OperandMatchResultTy
2425AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2426  auto res = parseReg(Operands);
2427  if (res != MatchOperand_NoMatch) {
2428    return res;
2429  } else if (isModifier()) {
2430    return MatchOperand_NoMatch;
2431  } else {
2432    return parseImm(Operands, HasSP3AbsMod);
2433  }
2434}
2435
2436bool
2437AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2438  if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2439    const auto &str = Token.getString();
2440    return str == "abs" || str == "neg" || str == "sext";
2441  }
2442  return false;
2443}
2444
2445bool
2446AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2447  return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2448}
2449
2450bool
2451AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2452  return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2453}
2454
2455bool
2456AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2457  return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2458}
2459
2460// Check if this is an operand modifier or an opcode modifier
2461// which may look like an expression but it is not. We should
2462// avoid parsing these modifiers as expressions. Currently
2463// recognized sequences are:
2464//   |...|
2465//   abs(...)
2466//   neg(...)
2467//   sext(...)
2468//   -reg
2469//   -|...|
2470//   -abs(...)
2471//   name:...
2472// Note that simple opcode modifiers like 'gds' may be parsed as
2473// expressions; this is a special case. See getExpressionAsToken.
2474//
2475bool
2476AMDGPUAsmParser::isModifier() {
2477
2478  AsmToken Tok = getToken();
2479  AsmToken NextToken[2];
2480  peekTokens(NextToken);
2481
2482  return isOperandModifier(Tok, NextToken[0]) ||
2483         (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2484         isOpcodeModifierWithVal(Tok, NextToken[0]);
2485}
2486
2487// Check if the current token is an SP3 'neg' modifier.
2488// Currently this modifier is allowed in the following context:
2489//
2490// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2491// 2. Before an 'abs' modifier: -abs(...)
2492// 3. Before an SP3 'abs' modifier: -|...|
2493//
2494// In all other cases "-" is handled as a part
2495// of an expression that follows the sign.
2496//
2497// Note: When "-" is followed by an integer literal,
2498// this is interpreted as integer negation rather
2499// than a floating-point NEG modifier applied to N.
2500// Beside being contr-intuitive, such use of floating-point
2501// NEG modifier would have resulted in different meaning
2502// of integer literals used with VOP1/2/C and VOP3,
2503// for example:
2504//    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2505//    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2506// Negative fp literals with preceding "-" are
2507// handled likewise for unifomtity
2508//
2509bool
2510AMDGPUAsmParser::parseSP3NegModifier() {
2511
2512  AsmToken NextToken[2];
2513  peekTokens(NextToken);
2514
2515  if (isToken(AsmToken::Minus) &&
2516      (isRegister(NextToken[0], NextToken[1]) ||
2517       NextToken[0].is(AsmToken::Pipe) ||
2518       isId(NextToken[0], "abs"))) {
2519    lex();
2520    return true;
2521  }
2522
2523  return false;
2524}
2525
2526OperandMatchResultTy
2527AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2528                                              bool AllowImm) {
2529  bool Neg, SP3Neg;
2530  bool Abs, SP3Abs;
2531  SMLoc Loc;
2532
2533  // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2534  if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2535    Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2536    return MatchOperand_ParseFail;
2537  }
2538
2539  SP3Neg = parseSP3NegModifier();
2540
2541  Loc = getLoc();
2542  Neg = trySkipId("neg");
2543  if (Neg && SP3Neg) {
2544    Error(Loc, "expected register or immediate");
2545    return MatchOperand_ParseFail;
2546  }
2547  if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2548    return MatchOperand_ParseFail;
2549
2550  Abs = trySkipId("abs");
2551  if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2552    return MatchOperand_ParseFail;
2553
2554  Loc = getLoc();
2555  SP3Abs = trySkipToken(AsmToken::Pipe);
2556  if (Abs && SP3Abs) {
2557    Error(Loc, "expected register or immediate");
2558    return MatchOperand_ParseFail;
2559  }
2560
2561  OperandMatchResultTy Res;
2562  if (AllowImm) {
2563    Res = parseRegOrImm(Operands, SP3Abs);
2564  } else {
2565    Res = parseReg(Operands);
2566  }
2567  if (Res != MatchOperand_Success) {
2568    return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2569  }
2570
2571  if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2572    return MatchOperand_ParseFail;
2573  if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2574    return MatchOperand_ParseFail;
2575  if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2576    return MatchOperand_ParseFail;
2577
2578  AMDGPUOperand::Modifiers Mods;
2579  Mods.Abs = Abs || SP3Abs;
2580  Mods.Neg = Neg || SP3Neg;
2581
2582  if (Mods.hasFPModifiers()) {
2583    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2584    if (Op.isExpr()) {
2585      Error(Op.getStartLoc(), "expected an absolute expression");
2586      return MatchOperand_ParseFail;
2587    }
2588    Op.setModifiers(Mods);
2589  }
2590  return MatchOperand_Success;
2591}
2592
2593OperandMatchResultTy
2594AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2595                                               bool AllowImm) {
2596  bool Sext = trySkipId("sext");
2597  if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2598    return MatchOperand_ParseFail;
2599
2600  OperandMatchResultTy Res;
2601  if (AllowImm) {
2602    Res = parseRegOrImm(Operands);
2603  } else {
2604    Res = parseReg(Operands);
2605  }
2606  if (Res != MatchOperand_Success) {
2607    return Sext? MatchOperand_ParseFail : Res;
2608  }
2609
2610  if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2611    return MatchOperand_ParseFail;
2612
2613  AMDGPUOperand::Modifiers Mods;
2614  Mods.Sext = Sext;
2615
2616  if (Mods.hasIntModifiers()) {
2617    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2618    if (Op.isExpr()) {
2619      Error(Op.getStartLoc(), "expected an absolute expression");
2620      return MatchOperand_ParseFail;
2621    }
2622    Op.setModifiers(Mods);
2623  }
2624
2625  return MatchOperand_Success;
2626}
2627
2628OperandMatchResultTy
2629AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2630  return parseRegOrImmWithFPInputMods(Operands, false);
2631}
2632
2633OperandMatchResultTy
2634AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2635  return parseRegOrImmWithIntInputMods(Operands, false);
2636}
2637
2638OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2639  auto Loc = getLoc();
2640  if (trySkipId("off")) {
2641    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2642                                                AMDGPUOperand::ImmTyOff, false));
2643    return MatchOperand_Success;
2644  }
2645
2646  if (!isRegister())
2647    return MatchOperand_NoMatch;
2648
2649  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2650  if (Reg) {
2651    Operands.push_back(std::move(Reg));
2652    return MatchOperand_Success;
2653  }
2654
2655  return MatchOperand_ParseFail;
2656
2657}
2658
2659unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2660  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2661
2662  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2663      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2664      (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2665      (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2666    return Match_InvalidOperand;
2667
2668  if ((TSFlags & SIInstrFlags::VOP3) &&
2669      (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2670      getForcedEncodingSize() != 64)
2671    return Match_PreferE32;
2672
2673  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2674      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2675    // v_mac_f32/16 allow only dst_sel == DWORD;
2676    auto OpNum =
2677        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2678    const auto &Op = Inst.getOperand(OpNum);
2679    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2680      return Match_InvalidOperand;
2681    }
2682  }
2683
2684  return Match_Success;
2685}
2686
2687// What asm variants we should check
2688ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2689  if (getForcedEncodingSize() == 32) {
2690    static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2691    return makeArrayRef(Variants);
2692  }
2693
2694  if (isForcedVOP3()) {
2695    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2696    return makeArrayRef(Variants);
2697  }
2698
2699  if (isForcedSDWA()) {
2700    static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2701                                        AMDGPUAsmVariants::SDWA9};
2702    return makeArrayRef(Variants);
2703  }
2704
2705  if (isForcedDPP()) {
2706    static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2707    return makeArrayRef(Variants);
2708  }
2709
2710  static const unsigned Variants[] = {
2711    AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2712    AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2713  };
2714
2715  return makeArrayRef(Variants);
2716}
2717
2718unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2719  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2720  const unsigned Num = Desc.getNumImplicitUses();
2721  for (unsigned i = 0; i < Num; ++i) {
2722    unsigned Reg = Desc.ImplicitUses[i];
2723    switch (Reg) {
2724    case AMDGPU::FLAT_SCR:
2725    case AMDGPU::VCC:
2726    case AMDGPU::VCC_LO:
2727    case AMDGPU::VCC_HI:
2728    case AMDGPU::M0:
2729      return Reg;
2730    default:
2731      break;
2732    }
2733  }
2734  return AMDGPU::NoRegister;
2735}
2736
2737// NB: This code is correct only when used to check constant
2738// bus limitations because GFX7 support no f16 inline constants.
2739// Note that there are no cases when a GFX7 opcode violates
2740// constant bus limitations due to the use of an f16 constant.
2741bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2742                                       unsigned OpIdx) const {
2743  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2744
2745  if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2746    return false;
2747  }
2748
2749  const MCOperand &MO = Inst.getOperand(OpIdx);
2750
2751  int64_t Val = MO.getImm();
2752  auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2753
2754  switch (OpSize) { // expected operand size
2755  case 8:
2756    return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2757  case 4:
2758    return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2759  case 2: {
2760    const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2761    if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2762        OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2763        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2764        OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2765        OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2766        OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2767      return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2768    } else {
2769      return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2770    }
2771  }
2772  default:
2773    llvm_unreachable("invalid operand size");
2774  }
2775}
2776
2777unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2778  if (!isGFX10())
2779    return 1;
2780
2781  switch (Opcode) {
2782  // 64-bit shift instructions can use only one scalar value input
2783  case AMDGPU::V_LSHLREV_B64:
2784  case AMDGPU::V_LSHLREV_B64_gfx10:
2785  case AMDGPU::V_LSHL_B64:
2786  case AMDGPU::V_LSHRREV_B64:
2787  case AMDGPU::V_LSHRREV_B64_gfx10:
2788  case AMDGPU::V_LSHR_B64:
2789  case AMDGPU::V_ASHRREV_I64:
2790  case AMDGPU::V_ASHRREV_I64_gfx10:
2791  case AMDGPU::V_ASHR_I64:
2792    return 1;
2793  default:
2794    return 2;
2795  }
2796}
2797
2798bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2799  const MCOperand &MO = Inst.getOperand(OpIdx);
2800  if (MO.isImm()) {
2801    return !isInlineConstant(Inst, OpIdx);
2802  } else if (MO.isReg()) {
2803    auto Reg = MO.getReg();
2804    const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2805    return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2806  } else {
2807    return true;
2808  }
2809}
2810
2811bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2812  const unsigned Opcode = Inst.getOpcode();
2813  const MCInstrDesc &Desc = MII.get(Opcode);
2814  unsigned ConstantBusUseCount = 0;
2815  unsigned NumLiterals = 0;
2816  unsigned LiteralSize;
2817
2818  if (Desc.TSFlags &
2819      (SIInstrFlags::VOPC |
2820       SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2821       SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2822       SIInstrFlags::SDWA)) {
2823    // Check special imm operands (used by madmk, etc)
2824    if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2825      ++ConstantBusUseCount;
2826    }
2827
2828    SmallDenseSet<unsigned> SGPRsUsed;
2829    unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2830    if (SGPRUsed != AMDGPU::NoRegister) {
2831      SGPRsUsed.insert(SGPRUsed);
2832      ++ConstantBusUseCount;
2833    }
2834
2835    const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2836    const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2837    const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2838
2839    const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2840
2841    for (int OpIdx : OpIndices) {
2842      if (OpIdx == -1) break;
2843
2844      const MCOperand &MO = Inst.getOperand(OpIdx);
2845      if (usesConstantBus(Inst, OpIdx)) {
2846        if (MO.isReg()) {
2847          const unsigned Reg = mc2PseudoReg(MO.getReg());
2848          // Pairs of registers with a partial intersections like these
2849          //   s0, s[0:1]
2850          //   flat_scratch_lo, flat_scratch
2851          //   flat_scratch_lo, flat_scratch_hi
2852          // are theoretically valid but they are disabled anyway.
2853          // Note that this code mimics SIInstrInfo::verifyInstruction
2854          if (!SGPRsUsed.count(Reg)) {
2855            SGPRsUsed.insert(Reg);
2856            ++ConstantBusUseCount;
2857          }
2858        } else { // Expression or a literal
2859
2860          if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2861            continue; // special operand like VINTERP attr_chan
2862
2863          // An instruction may use only one literal.
2864          // This has been validated on the previous step.
2865          // See validateVOP3Literal.
2866          // This literal may be used as more than one operand.
2867          // If all these operands are of the same size,
2868          // this literal counts as one scalar value.
2869          // Otherwise it counts as 2 scalar values.
2870          // See "GFX10 Shader Programming", section 3.6.2.3.
2871
2872          unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2873          if (Size < 4) Size = 4;
2874
2875          if (NumLiterals == 0) {
2876            NumLiterals = 1;
2877            LiteralSize = Size;
2878          } else if (LiteralSize != Size) {
2879            NumLiterals = 2;
2880          }
2881        }
2882      }
2883    }
2884  }
2885  ConstantBusUseCount += NumLiterals;
2886
2887  return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2888}
2889
2890bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2891  const unsigned Opcode = Inst.getOpcode();
2892  const MCInstrDesc &Desc = MII.get(Opcode);
2893
2894  const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2895  if (DstIdx == -1 ||
2896      Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2897    return true;
2898  }
2899
2900  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2901
2902  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2903  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2904  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2905
2906  assert(DstIdx != -1);
2907  const MCOperand &Dst = Inst.getOperand(DstIdx);
2908  assert(Dst.isReg());
2909  const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2910
2911  const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2912
2913  for (int SrcIdx : SrcIndices) {
2914    if (SrcIdx == -1) break;
2915    const MCOperand &Src = Inst.getOperand(SrcIdx);
2916    if (Src.isReg()) {
2917      const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2918      if (isRegIntersect(DstReg, SrcReg, TRI)) {
2919        return false;
2920      }
2921    }
2922  }
2923
2924  return true;
2925}
2926
2927bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2928
2929  const unsigned Opc = Inst.getOpcode();
2930  const MCInstrDesc &Desc = MII.get(Opc);
2931
2932  if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2933    int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2934    assert(ClampIdx != -1);
2935    return Inst.getOperand(ClampIdx).getImm() == 0;
2936  }
2937
2938  return true;
2939}
2940
2941bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2942
2943  const unsigned Opc = Inst.getOpcode();
2944  const MCInstrDesc &Desc = MII.get(Opc);
2945
2946  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2947    return true;
2948
2949  int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2950  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2951  int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2952
2953  assert(VDataIdx != -1);
2954  assert(DMaskIdx != -1);
2955  assert(TFEIdx != -1);
2956
2957  unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2958  unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2959  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2960  if (DMask == 0)
2961    DMask = 1;
2962
2963  unsigned DataSize =
2964    (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2965  if (hasPackedD16()) {
2966    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2967    if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2968      DataSize = (DataSize + 1) / 2;
2969  }
2970
2971  return (VDataSize / 4) == DataSize + TFESize;
2972}
2973
2974bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2975  const unsigned Opc = Inst.getOpcode();
2976  const MCInstrDesc &Desc = MII.get(Opc);
2977
2978  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2979    return true;
2980
2981  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2982  const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2983      AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2984  int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2985  int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2986  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2987
2988  assert(VAddr0Idx != -1);
2989  assert(SrsrcIdx != -1);
2990  assert(DimIdx != -1);
2991  assert(SrsrcIdx > VAddr0Idx);
2992
2993  unsigned Dim = Inst.getOperand(DimIdx).getImm();
2994  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2995  bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2996  unsigned VAddrSize =
2997      IsNSA ? SrsrcIdx - VAddr0Idx
2998            : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2999
3000  unsigned AddrSize = BaseOpcode->NumExtraArgs +
3001                      (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3002                      (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3003                      (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3004  if (!IsNSA) {
3005    if (AddrSize > 8)
3006      AddrSize = 16;
3007    else if (AddrSize > 4)
3008      AddrSize = 8;
3009  }
3010
3011  return VAddrSize == AddrSize;
3012}
3013
3014bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3015
3016  const unsigned Opc = Inst.getOpcode();
3017  const MCInstrDesc &Desc = MII.get(Opc);
3018
3019  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3020    return true;
3021  if (!Desc.mayLoad() || !Desc.mayStore())
3022    return true; // Not atomic
3023
3024  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3025  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3026
3027  // This is an incomplete check because image_atomic_cmpswap
3028  // may only use 0x3 and 0xf while other atomic operations
3029  // may use 0x1 and 0x3. However these limitations are
3030  // verified when we check that dmask matches dst size.
3031  return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3032}
3033
3034bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3035
3036  const unsigned Opc = Inst.getOpcode();
3037  const MCInstrDesc &Desc = MII.get(Opc);
3038
3039  if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3040    return true;
3041
3042  int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3043  unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3044
3045  // GATHER4 instructions use dmask in a different fashion compared to
3046  // other MIMG instructions. The only useful DMASK values are
3047  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3048  // (red,red,red,red) etc.) The ISA document doesn't mention
3049  // this.
3050  return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3051}
3052
3053static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3054{
3055  switch (Opcode) {
3056  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3057  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3058  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3059    return true;
3060  default:
3061    return false;
3062  }
3063}
3064
3065// movrels* opcodes should only allow VGPRS as src0.
3066// This is specified in .td description for vop1/vop3,
3067// but sdwa is handled differently. See isSDWAOperand.
3068bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3069
3070  const unsigned Opc = Inst.getOpcode();
3071  const MCInstrDesc &Desc = MII.get(Opc);
3072
3073  if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3074    return true;
3075
3076  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3077  assert(Src0Idx != -1);
3078
3079  const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3080  if (!Src0.isReg())
3081    return false;
3082
3083  auto Reg = Src0.getReg();
3084  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3085  return !isSGPR(mc2PseudoReg(Reg), TRI);
3086}
3087
3088bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3089
3090  const unsigned Opc = Inst.getOpcode();
3091  const MCInstrDesc &Desc = MII.get(Opc);
3092
3093  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3094    return true;
3095
3096  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3097  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3098    if (isCI() || isSI())
3099      return false;
3100  }
3101
3102  return true;
3103}
3104
3105bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3106  const unsigned Opc = Inst.getOpcode();
3107  const MCInstrDesc &Desc = MII.get(Opc);
3108
3109  if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3110    return true;
3111
3112  int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3113  if (DimIdx < 0)
3114    return true;
3115
3116  long Imm = Inst.getOperand(DimIdx).getImm();
3117  if (Imm < 0 || Imm >= 8)
3118    return false;
3119
3120  return true;
3121}
3122
3123static bool IsRevOpcode(const unsigned Opcode)
3124{
3125  switch (Opcode) {
3126  case AMDGPU::V_SUBREV_F32_e32:
3127  case AMDGPU::V_SUBREV_F32_e64:
3128  case AMDGPU::V_SUBREV_F32_e32_gfx10:
3129  case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3130  case AMDGPU::V_SUBREV_F32_e32_vi:
3131  case AMDGPU::V_SUBREV_F32_e64_gfx10:
3132  case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3133  case AMDGPU::V_SUBREV_F32_e64_vi:
3134
3135  case AMDGPU::V_SUBREV_I32_e32:
3136  case AMDGPU::V_SUBREV_I32_e64:
3137  case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3138  case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3139
3140  case AMDGPU::V_SUBBREV_U32_e32:
3141  case AMDGPU::V_SUBBREV_U32_e64:
3142  case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3143  case AMDGPU::V_SUBBREV_U32_e32_vi:
3144  case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3145  case AMDGPU::V_SUBBREV_U32_e64_vi:
3146
3147  case AMDGPU::V_SUBREV_U32_e32:
3148  case AMDGPU::V_SUBREV_U32_e64:
3149  case AMDGPU::V_SUBREV_U32_e32_gfx9:
3150  case AMDGPU::V_SUBREV_U32_e32_vi:
3151  case AMDGPU::V_SUBREV_U32_e64_gfx9:
3152  case AMDGPU::V_SUBREV_U32_e64_vi:
3153
3154  case AMDGPU::V_SUBREV_F16_e32:
3155  case AMDGPU::V_SUBREV_F16_e64:
3156  case AMDGPU::V_SUBREV_F16_e32_gfx10:
3157  case AMDGPU::V_SUBREV_F16_e32_vi:
3158  case AMDGPU::V_SUBREV_F16_e64_gfx10:
3159  case AMDGPU::V_SUBREV_F16_e64_vi:
3160
3161  case AMDGPU::V_SUBREV_U16_e32:
3162  case AMDGPU::V_SUBREV_U16_e64:
3163  case AMDGPU::V_SUBREV_U16_e32_vi:
3164  case AMDGPU::V_SUBREV_U16_e64_vi:
3165
3166  case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3167  case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3168  case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3169
3170  case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3171  case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3172
3173  case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3174  case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3175
3176  case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3177  case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3178
3179  case AMDGPU::V_LSHRREV_B32_e32:
3180  case AMDGPU::V_LSHRREV_B32_e64:
3181  case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3182  case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3183  case AMDGPU::V_LSHRREV_B32_e32_vi:
3184  case AMDGPU::V_LSHRREV_B32_e64_vi:
3185  case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3186  case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3187
3188  case AMDGPU::V_ASHRREV_I32_e32:
3189  case AMDGPU::V_ASHRREV_I32_e64:
3190  case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3191  case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3192  case AMDGPU::V_ASHRREV_I32_e32_vi:
3193  case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3194  case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3195  case AMDGPU::V_ASHRREV_I32_e64_vi:
3196
3197  case AMDGPU::V_LSHLREV_B32_e32:
3198  case AMDGPU::V_LSHLREV_B32_e64:
3199  case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3200  case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3201  case AMDGPU::V_LSHLREV_B32_e32_vi:
3202  case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3203  case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3204  case AMDGPU::V_LSHLREV_B32_e64_vi:
3205
3206  case AMDGPU::V_LSHLREV_B16_e32:
3207  case AMDGPU::V_LSHLREV_B16_e64:
3208  case AMDGPU::V_LSHLREV_B16_e32_vi:
3209  case AMDGPU::V_LSHLREV_B16_e64_vi:
3210  case AMDGPU::V_LSHLREV_B16_gfx10:
3211
3212  case AMDGPU::V_LSHRREV_B16_e32:
3213  case AMDGPU::V_LSHRREV_B16_e64:
3214  case AMDGPU::V_LSHRREV_B16_e32_vi:
3215  case AMDGPU::V_LSHRREV_B16_e64_vi:
3216  case AMDGPU::V_LSHRREV_B16_gfx10:
3217
3218  case AMDGPU::V_ASHRREV_I16_e32:
3219  case AMDGPU::V_ASHRREV_I16_e64:
3220  case AMDGPU::V_ASHRREV_I16_e32_vi:
3221  case AMDGPU::V_ASHRREV_I16_e64_vi:
3222  case AMDGPU::V_ASHRREV_I16_gfx10:
3223
3224  case AMDGPU::V_LSHLREV_B64:
3225  case AMDGPU::V_LSHLREV_B64_gfx10:
3226  case AMDGPU::V_LSHLREV_B64_vi:
3227
3228  case AMDGPU::V_LSHRREV_B64:
3229  case AMDGPU::V_LSHRREV_B64_gfx10:
3230  case AMDGPU::V_LSHRREV_B64_vi:
3231
3232  case AMDGPU::V_ASHRREV_I64:
3233  case AMDGPU::V_ASHRREV_I64_gfx10:
3234  case AMDGPU::V_ASHRREV_I64_vi:
3235
3236  case AMDGPU::V_PK_LSHLREV_B16:
3237  case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3238  case AMDGPU::V_PK_LSHLREV_B16_vi:
3239
3240  case AMDGPU::V_PK_LSHRREV_B16:
3241  case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3242  case AMDGPU::V_PK_LSHRREV_B16_vi:
3243  case AMDGPU::V_PK_ASHRREV_I16:
3244  case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3245  case AMDGPU::V_PK_ASHRREV_I16_vi:
3246    return true;
3247  default:
3248    return false;
3249  }
3250}
3251
3252bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3253
3254  using namespace SIInstrFlags;
3255  const unsigned Opcode = Inst.getOpcode();
3256  const MCInstrDesc &Desc = MII.get(Opcode);
3257
3258  // lds_direct register is defined so that it can be used
3259  // with 9-bit operands only. Ignore encodings which do not accept these.
3260  if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3261    return true;
3262
3263  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266
3267  const int SrcIndices[] = { Src1Idx, Src2Idx };
3268
3269  // lds_direct cannot be specified as either src1 or src2.
3270  for (int SrcIdx : SrcIndices) {
3271    if (SrcIdx == -1) break;
3272    const MCOperand &Src = Inst.getOperand(SrcIdx);
3273    if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3274      return false;
3275    }
3276  }
3277
3278  if (Src0Idx == -1)
3279    return true;
3280
3281  const MCOperand &Src = Inst.getOperand(Src0Idx);
3282  if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3283    return true;
3284
3285  // lds_direct is specified as src0. Check additional limitations.
3286  return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3287}
3288
3289SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3290  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3291    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3292    if (Op.isFlatOffset())
3293      return Op.getStartLoc();
3294  }
3295  return getLoc();
3296}
3297
3298bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3299                                         const OperandVector &Operands) {
3300  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3301  if ((TSFlags & SIInstrFlags::FLAT) == 0)
3302    return true;
3303
3304  auto Opcode = Inst.getOpcode();
3305  auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3306  assert(OpNum != -1);
3307
3308  const auto &Op = Inst.getOperand(OpNum);
3309  if (!hasFlatOffsets() && Op.getImm() != 0) {
3310    Error(getFlatOffsetLoc(Operands),
3311          "flat offset modifier is not supported on this GPU");
3312    return false;
3313  }
3314
3315  // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3316  // For FLAT segment the offset must be positive;
3317  // MSB is ignored and forced to zero.
3318  unsigned OffsetSize = isGFX9() ? 13 : 12;
3319  if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3320    if (!isIntN(OffsetSize, Op.getImm())) {
3321      Error(getFlatOffsetLoc(Operands),
3322            isGFX9() ? "expected a 13-bit signed offset" :
3323                       "expected a 12-bit signed offset");
3324      return false;
3325    }
3326  } else {
3327    if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3328      Error(getFlatOffsetLoc(Operands),
3329            isGFX9() ? "expected a 12-bit unsigned offset" :
3330                       "expected an 11-bit unsigned offset");
3331      return false;
3332    }
3333  }
3334
3335  return true;
3336}
3337
3338bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3339  unsigned Opcode = Inst.getOpcode();
3340  const MCInstrDesc &Desc = MII.get(Opcode);
3341  if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3342    return true;
3343
3344  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3345  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3346
3347  const int OpIndices[] = { Src0Idx, Src1Idx };
3348
3349  unsigned NumExprs = 0;
3350  unsigned NumLiterals = 0;
3351  uint32_t LiteralValue;
3352
3353  for (int OpIdx : OpIndices) {
3354    if (OpIdx == -1) break;
3355
3356    const MCOperand &MO = Inst.getOperand(OpIdx);
3357    // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3358    if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3359      if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3360        uint32_t Value = static_cast<uint32_t>(MO.getImm());
3361        if (NumLiterals == 0 || LiteralValue != Value) {
3362          LiteralValue = Value;
3363          ++NumLiterals;
3364        }
3365      } else if (MO.isExpr()) {
3366        ++NumExprs;
3367      }
3368    }
3369  }
3370
3371  return NumLiterals + NumExprs <= 1;
3372}
3373
3374bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3375  const unsigned Opc = Inst.getOpcode();
3376  if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3377      Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3378    int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3379    unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3380
3381    if (OpSel & ~3)
3382      return false;
3383  }
3384  return true;
3385}
3386
3387// Check if VCC register matches wavefront size
3388bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3389  auto FB = getFeatureBits();
3390  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3391    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3392}
3393
3394// VOP3 literal is only allowed in GFX10+ and only one can be used
3395bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3396  unsigned Opcode = Inst.getOpcode();
3397  const MCInstrDesc &Desc = MII.get(Opcode);
3398  if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3399    return true;
3400
3401  const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3402  const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3403  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3404
3405  const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3406
3407  unsigned NumExprs = 0;
3408  unsigned NumLiterals = 0;
3409  uint32_t LiteralValue;
3410
3411  for (int OpIdx : OpIndices) {
3412    if (OpIdx == -1) break;
3413
3414    const MCOperand &MO = Inst.getOperand(OpIdx);
3415    if (!MO.isImm() && !MO.isExpr())
3416      continue;
3417    if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3418      continue;
3419
3420    if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3421        getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3422      return false;
3423
3424    if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3425      uint32_t Value = static_cast<uint32_t>(MO.getImm());
3426      if (NumLiterals == 0 || LiteralValue != Value) {
3427        LiteralValue = Value;
3428        ++NumLiterals;
3429      }
3430    } else if (MO.isExpr()) {
3431      ++NumExprs;
3432    }
3433  }
3434  NumLiterals += NumExprs;
3435
3436  return !NumLiterals ||
3437         (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3438}
3439
3440bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3441                                          const SMLoc &IDLoc,
3442                                          const OperandVector &Operands) {
3443  if (!validateLdsDirect(Inst)) {
3444    Error(IDLoc,
3445      "invalid use of lds_direct");
3446    return false;
3447  }
3448  if (!validateSOPLiteral(Inst)) {
3449    Error(IDLoc,
3450      "only one literal operand is allowed");
3451    return false;
3452  }
3453  if (!validateVOP3Literal(Inst)) {
3454    Error(IDLoc,
3455      "invalid literal operand");
3456    return false;
3457  }
3458  if (!validateConstantBusLimitations(Inst)) {
3459    Error(IDLoc,
3460      "invalid operand (violates constant bus restrictions)");
3461    return false;
3462  }
3463  if (!validateEarlyClobberLimitations(Inst)) {
3464    Error(IDLoc,
3465      "destination must be different than all sources");
3466    return false;
3467  }
3468  if (!validateIntClampSupported(Inst)) {
3469    Error(IDLoc,
3470      "integer clamping is not supported on this GPU");
3471    return false;
3472  }
3473  if (!validateOpSel(Inst)) {
3474    Error(IDLoc,
3475      "invalid op_sel operand");
3476    return false;
3477  }
3478  // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3479  if (!validateMIMGD16(Inst)) {
3480    Error(IDLoc,
3481      "d16 modifier is not supported on this GPU");
3482    return false;
3483  }
3484  if (!validateMIMGDim(Inst)) {
3485    Error(IDLoc, "dim modifier is required on this GPU");
3486    return false;
3487  }
3488  if (!validateMIMGDataSize(Inst)) {
3489    Error(IDLoc,
3490      "image data size does not match dmask and tfe");
3491    return false;
3492  }
3493  if (!validateMIMGAddrSize(Inst)) {
3494    Error(IDLoc,
3495      "image address size does not match dim and a16");
3496    return false;
3497  }
3498  if (!validateMIMGAtomicDMask(Inst)) {
3499    Error(IDLoc,
3500      "invalid atomic image dmask");
3501    return false;
3502  }
3503  if (!validateMIMGGatherDMask(Inst)) {
3504    Error(IDLoc,
3505      "invalid image_gather dmask: only one bit must be set");
3506    return false;
3507  }
3508  if (!validateMovrels(Inst)) {
3509    Error(IDLoc, "source operand must be a VGPR");
3510    return false;
3511  }
3512  if (!validateFlatOffset(Inst, Operands)) {
3513    return false;
3514  }
3515
3516  return true;
3517}
3518
3519static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3520                                            const FeatureBitset &FBS,
3521                                            unsigned VariantID = 0);
3522
3523bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3524                                              OperandVector &Operands,
3525                                              MCStreamer &Out,
3526                                              uint64_t &ErrorInfo,
3527                                              bool MatchingInlineAsm) {
3528  MCInst Inst;
3529  unsigned Result = Match_Success;
3530  for (auto Variant : getMatchedVariants()) {
3531    uint64_t EI;
3532    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3533                                  Variant);
3534    // We order match statuses from least to most specific. We use most specific
3535    // status as resulting
3536    // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3537    if ((R == Match_Success) ||
3538        (R == Match_PreferE32) ||
3539        (R == Match_MissingFeature && Result != Match_PreferE32) ||
3540        (R == Match_InvalidOperand && Result != Match_MissingFeature
3541                                   && Result != Match_PreferE32) ||
3542        (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3543                                   && Result != Match_MissingFeature
3544                                   && Result != Match_PreferE32)) {
3545      Result = R;
3546      ErrorInfo = EI;
3547    }
3548    if (R == Match_Success)
3549      break;
3550  }
3551
3552  switch (Result) {
3553  default: break;
3554  case Match_Success:
3555    if (!validateInstruction(Inst, IDLoc, Operands)) {
3556      return true;
3557    }
3558    Inst.setLoc(IDLoc);
3559    Out.EmitInstruction(Inst, getSTI());
3560    return false;
3561
3562  case Match_MissingFeature:
3563    return Error(IDLoc, "instruction not supported on this GPU");
3564
3565  case Match_MnemonicFail: {
3566    FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3567    std::string Suggestion = AMDGPUMnemonicSpellCheck(
3568        ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3569    return Error(IDLoc, "invalid instruction" + Suggestion,
3570                 ((AMDGPUOperand &)*Operands[0]).getLocRange());
3571  }
3572
3573  case Match_InvalidOperand: {
3574    SMLoc ErrorLoc = IDLoc;
3575    if (ErrorInfo != ~0ULL) {
3576      if (ErrorInfo >= Operands.size()) {
3577        return Error(IDLoc, "too few operands for instruction");
3578      }
3579      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3580      if (ErrorLoc == SMLoc())
3581        ErrorLoc = IDLoc;
3582    }
3583    return Error(ErrorLoc, "invalid operand for instruction");
3584  }
3585
3586  case Match_PreferE32:
3587    return Error(IDLoc, "internal error: instruction without _e64 suffix "
3588                        "should be encoded as e32");
3589  }
3590  llvm_unreachable("Implement any new match types added!");
3591}
3592
3593bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3594  int64_t Tmp = -1;
3595  if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3596    return true;
3597  }
3598  if (getParser().parseAbsoluteExpression(Tmp)) {
3599    return true;
3600  }
3601  Ret = static_cast<uint32_t>(Tmp);
3602  return false;
3603}
3604
3605bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3606                                               uint32_t &Minor) {
3607  if (ParseAsAbsoluteExpression(Major))
3608    return TokError("invalid major version");
3609
3610  if (getLexer().isNot(AsmToken::Comma))
3611    return TokError("minor version number required, comma expected");
3612  Lex();
3613
3614  if (ParseAsAbsoluteExpression(Minor))
3615    return TokError("invalid minor version");
3616
3617  return false;
3618}
3619
3620bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3621  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3622    return TokError("directive only supported for amdgcn architecture");
3623
3624  std::string Target;
3625
3626  SMLoc TargetStart = getTok().getLoc();
3627  if (getParser().parseEscapedString(Target))
3628    return true;
3629  SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3630
3631  std::string ExpectedTarget;
3632  raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3633  IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3634
3635  if (Target != ExpectedTargetOS.str())
3636    return getParser().Error(TargetRange.Start, "target must match options",
3637                             TargetRange);
3638
3639  getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3640  return false;
3641}
3642
3643bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3644  return getParser().Error(Range.Start, "value out of range", Range);
3645}
3646
3647bool AMDGPUAsmParser::calculateGPRBlocks(
3648    const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3649    bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3650    SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3651    unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3652  // TODO(scott.linder): These calculations are duplicated from
3653  // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3654  IsaVersion Version = getIsaVersion(getSTI().getCPU());
3655
3656  unsigned NumVGPRs = NextFreeVGPR;
3657  unsigned NumSGPRs = NextFreeSGPR;
3658
3659  if (Version.Major >= 10)
3660    NumSGPRs = 0;
3661  else {
3662    unsigned MaxAddressableNumSGPRs =
3663        IsaInfo::getAddressableNumSGPRs(&getSTI());
3664
3665    if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3666        NumSGPRs > MaxAddressableNumSGPRs)
3667      return OutOfRangeError(SGPRRange);
3668
3669    NumSGPRs +=
3670        IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3671
3672    if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3673        NumSGPRs > MaxAddressableNumSGPRs)
3674      return OutOfRangeError(SGPRRange);
3675
3676    if (Features.test(FeatureSGPRInitBug))
3677      NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3678  }
3679
3680  VGPRBlocks =
3681      IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3682  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3683
3684  return false;
3685}
3686
3687bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3688  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3689    return TokError("directive only supported for amdgcn architecture");
3690
3691  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3692    return TokError("directive only supported for amdhsa OS");
3693
3694  StringRef KernelName;
3695  if (getParser().parseIdentifier(KernelName))
3696    return true;
3697
3698  kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3699
3700  StringSet<> Seen;
3701
3702  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3703
3704  SMRange VGPRRange;
3705  uint64_t NextFreeVGPR = 0;
3706  SMRange SGPRRange;
3707  uint64_t NextFreeSGPR = 0;
3708  unsigned UserSGPRCount = 0;
3709  bool ReserveVCC = true;
3710  bool ReserveFlatScr = true;
3711  bool ReserveXNACK = hasXNACK();
3712  Optional<bool> EnableWavefrontSize32;
3713
3714  while (true) {
3715    while (getLexer().is(AsmToken::EndOfStatement))
3716      Lex();
3717
3718    if (getLexer().isNot(AsmToken::Identifier))
3719      return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3720
3721    StringRef ID = getTok().getIdentifier();
3722    SMRange IDRange = getTok().getLocRange();
3723    Lex();
3724
3725    if (ID == ".end_amdhsa_kernel")
3726      break;
3727
3728    if (Seen.find(ID) != Seen.end())
3729      return TokError(".amdhsa_ directives cannot be repeated");
3730    Seen.insert(ID);
3731
3732    SMLoc ValStart = getTok().getLoc();
3733    int64_t IVal;
3734    if (getParser().parseAbsoluteExpression(IVal))
3735      return true;
3736    SMLoc ValEnd = getTok().getLoc();
3737    SMRange ValRange = SMRange(ValStart, ValEnd);
3738
3739    if (IVal < 0)
3740      return OutOfRangeError(ValRange);
3741
3742    uint64_t Val = IVal;
3743
3744#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3745  if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3746    return OutOfRangeError(RANGE);                                             \
3747  AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3748
3749    if (ID == ".amdhsa_group_segment_fixed_size") {
3750      if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3751        return OutOfRangeError(ValRange);
3752      KD.group_segment_fixed_size = Val;
3753    } else if (ID == ".amdhsa_private_segment_fixed_size") {
3754      if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3755        return OutOfRangeError(ValRange);
3756      KD.private_segment_fixed_size = Val;
3757    } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3758      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3759                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3760                       Val, ValRange);
3761      if (Val)
3762        UserSGPRCount += 4;
3763    } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3764      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3765                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3766                       ValRange);
3767      if (Val)
3768        UserSGPRCount += 2;
3769    } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3770      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3771                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3772                       ValRange);
3773      if (Val)
3774        UserSGPRCount += 2;
3775    } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3776      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3777                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3778                       Val, ValRange);
3779      if (Val)
3780        UserSGPRCount += 2;
3781    } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3782      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3783                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3784                       ValRange);
3785      if (Val)
3786        UserSGPRCount += 2;
3787    } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3788      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3789                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3790                       ValRange);
3791      if (Val)
3792        UserSGPRCount += 2;
3793    } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3794      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3795                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3796                       Val, ValRange);
3797      if (Val)
3798        UserSGPRCount += 1;
3799    } else if (ID == ".amdhsa_wavefront_size32") {
3800      if (IVersion.Major < 10)
3801        return getParser().Error(IDRange.Start, "directive requires gfx10+",
3802                                 IDRange);
3803      EnableWavefrontSize32 = Val;
3804      PARSE_BITS_ENTRY(KD.kernel_code_properties,
3805                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3806                       Val, ValRange);
3807    } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3808      PARSE_BITS_ENTRY(
3809          KD.compute_pgm_rsrc2,
3810          COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3811          ValRange);
3812    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3813      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3814                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3815                       ValRange);
3816    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3817      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3818                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3819                       ValRange);
3820    } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3821      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3822                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3823                       ValRange);
3824    } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3825      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3826                       COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3827                       ValRange);
3828    } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3829      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3830                       COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3831                       ValRange);
3832    } else if (ID == ".amdhsa_next_free_vgpr") {
3833      VGPRRange = ValRange;
3834      NextFreeVGPR = Val;
3835    } else if (ID == ".amdhsa_next_free_sgpr") {
3836      SGPRRange = ValRange;
3837      NextFreeSGPR = Val;
3838    } else if (ID == ".amdhsa_reserve_vcc") {
3839      if (!isUInt<1>(Val))
3840        return OutOfRangeError(ValRange);
3841      ReserveVCC = Val;
3842    } else if (ID == ".amdhsa_reserve_flat_scratch") {
3843      if (IVersion.Major < 7)
3844        return getParser().Error(IDRange.Start, "directive requires gfx7+",
3845                                 IDRange);
3846      if (!isUInt<1>(Val))
3847        return OutOfRangeError(ValRange);
3848      ReserveFlatScr = Val;
3849    } else if (ID == ".amdhsa_reserve_xnack_mask") {
3850      if (IVersion.Major < 8)
3851        return getParser().Error(IDRange.Start, "directive requires gfx8+",
3852                                 IDRange);
3853      if (!isUInt<1>(Val))
3854        return OutOfRangeError(ValRange);
3855      ReserveXNACK = Val;
3856    } else if (ID == ".amdhsa_float_round_mode_32") {
3857      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3858                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3859    } else if (ID == ".amdhsa_float_round_mode_16_64") {
3860      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3861                       COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3862    } else if (ID == ".amdhsa_float_denorm_mode_32") {
3863      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3864                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3865    } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3866      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3867                       COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3868                       ValRange);
3869    } else if (ID == ".amdhsa_dx10_clamp") {
3870      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3871                       COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3872    } else if (ID == ".amdhsa_ieee_mode") {
3873      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3874                       Val, ValRange);
3875    } else if (ID == ".amdhsa_fp16_overflow") {
3876      if (IVersion.Major < 9)
3877        return getParser().Error(IDRange.Start, "directive requires gfx9+",
3878                                 IDRange);
3879      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3880                       ValRange);
3881    } else if (ID == ".amdhsa_workgroup_processor_mode") {
3882      if (IVersion.Major < 10)
3883        return getParser().Error(IDRange.Start, "directive requires gfx10+",
3884                                 IDRange);
3885      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3886                       ValRange);
3887    } else if (ID == ".amdhsa_memory_ordered") {
3888      if (IVersion.Major < 10)
3889        return getParser().Error(IDRange.Start, "directive requires gfx10+",
3890                                 IDRange);
3891      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3892                       ValRange);
3893    } else if (ID == ".amdhsa_forward_progress") {
3894      if (IVersion.Major < 10)
3895        return getParser().Error(IDRange.Start, "directive requires gfx10+",
3896                                 IDRange);
3897      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3898                       ValRange);
3899    } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3900      PARSE_BITS_ENTRY(
3901          KD.compute_pgm_rsrc2,
3902          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3903          ValRange);
3904    } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3905      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3906                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3907                       Val, ValRange);
3908    } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3909      PARSE_BITS_ENTRY(
3910          KD.compute_pgm_rsrc2,
3911          COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3912          ValRange);
3913    } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3914      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3915                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3916                       Val, ValRange);
3917    } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3918      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3919                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3920                       Val, ValRange);
3921    } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3922      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3923                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3924                       Val, ValRange);
3925    } else if (ID == ".amdhsa_exception_int_div_zero") {
3926      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3927                       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3928                       Val, ValRange);
3929    } else {
3930      return getParser().Error(IDRange.Start,
3931                               "unknown .amdhsa_kernel directive", IDRange);
3932    }
3933
3934#undef PARSE_BITS_ENTRY
3935  }
3936
3937  if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3938    return TokError(".amdhsa_next_free_vgpr directive is required");
3939
3940  if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3941    return TokError(".amdhsa_next_free_sgpr directive is required");
3942
3943  unsigned VGPRBlocks;
3944  unsigned SGPRBlocks;
3945  if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3946                         ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3947                         VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3948                         SGPRBlocks))
3949    return true;
3950
3951  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3952          VGPRBlocks))
3953    return OutOfRangeError(VGPRRange);
3954  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3955                  COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3956
3957  if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3958          SGPRBlocks))
3959    return OutOfRangeError(SGPRRange);
3960  AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3961                  COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3962                  SGPRBlocks);
3963
3964  if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3965    return TokError("too many user SGPRs enabled");
3966  AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3967                  UserSGPRCount);
3968
3969  getTargetStreamer().EmitAmdhsaKernelDescriptor(
3970      getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3971      ReserveFlatScr, ReserveXNACK);
3972  return false;
3973}
3974
3975bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3976  uint32_t Major;
3977  uint32_t Minor;
3978
3979  if (ParseDirectiveMajorMinor(Major, Minor))
3980    return true;
3981
3982  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3983  return false;
3984}
3985
3986bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3987  uint32_t Major;
3988  uint32_t Minor;
3989  uint32_t Stepping;
3990  StringRef VendorName;
3991  StringRef ArchName;
3992
3993  // If this directive has no arguments, then use the ISA version for the
3994  // targeted GPU.
3995  if (getLexer().is(AsmToken::EndOfStatement)) {
3996    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3997    getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3998                                                      ISA.Stepping,
3999                                                      "AMD", "AMDGPU");
4000    return false;
4001  }
4002
4003  if (ParseDirectiveMajorMinor(Major, Minor))
4004    return true;
4005
4006  if (getLexer().isNot(AsmToken::Comma))
4007    return TokError("stepping version number required, comma expected");
4008  Lex();
4009
4010  if (ParseAsAbsoluteExpression(Stepping))
4011    return TokError("invalid stepping version");
4012
4013  if (getLexer().isNot(AsmToken::Comma))
4014    return TokError("vendor name required, comma expected");
4015  Lex();
4016
4017  if (getLexer().isNot(AsmToken::String))
4018    return TokError("invalid vendor name");
4019
4020  VendorName = getLexer().getTok().getStringContents();
4021  Lex();
4022
4023  if (getLexer().isNot(AsmToken::Comma))
4024    return TokError("arch name required, comma expected");
4025  Lex();
4026
4027  if (getLexer().isNot(AsmToken::String))
4028    return TokError("invalid arch name");
4029
4030  ArchName = getLexer().getTok().getStringContents();
4031  Lex();
4032
4033  getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4034                                                    VendorName, ArchName);
4035  return false;
4036}
4037
4038bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4039                                               amd_kernel_code_t &Header) {
4040  // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4041  // assembly for backwards compatibility.
4042  if (ID == "max_scratch_backing_memory_byte_size") {
4043    Parser.eatToEndOfStatement();
4044    return false;
4045  }
4046
4047  SmallString<40> ErrStr;
4048  raw_svector_ostream Err(ErrStr);
4049  if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4050    return TokError(Err.str());
4051  }
4052  Lex();
4053
4054  if (ID == "enable_wavefront_size32") {
4055    if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4056      if (!isGFX10())
4057        return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4058      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4059        return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4060    } else {
4061      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4062        return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4063    }
4064  }
4065
4066  if (ID == "wavefront_size") {
4067    if (Header.wavefront_size == 5) {
4068      if (!isGFX10())
4069        return TokError("wavefront_size=5 is only allowed on GFX10+");
4070      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4071        return TokError("wavefront_size=5 requires +WavefrontSize32");
4072    } else if (Header.wavefront_size == 6) {
4073      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4074        return TokError("wavefront_size=6 requires +WavefrontSize64");
4075    }
4076  }
4077
4078  if (ID == "enable_wgp_mode") {
4079    if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4080      return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4081  }
4082
4083  if (ID == "enable_mem_ordered") {
4084    if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4085      return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4086  }
4087
4088  if (ID == "enable_fwd_progress") {
4089    if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4090      return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4091  }
4092
4093  return false;
4094}
4095
4096bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4097  amd_kernel_code_t Header;
4098  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4099
4100  while (true) {
4101    // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4102    // will set the current token to EndOfStatement.
4103    while(getLexer().is(AsmToken::EndOfStatement))
4104      Lex();
4105
4106    if (getLexer().isNot(AsmToken::Identifier))
4107      return TokError("expected value identifier or .end_amd_kernel_code_t");
4108
4109    StringRef ID = getLexer().getTok().getIdentifier();
4110    Lex();
4111
4112    if (ID == ".end_amd_kernel_code_t")
4113      break;
4114
4115    if (ParseAMDKernelCodeTValue(ID, Header))
4116      return true;
4117  }
4118
4119  getTargetStreamer().EmitAMDKernelCodeT(Header);
4120
4121  return false;
4122}
4123
4124bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4125  if (getLexer().isNot(AsmToken::Identifier))
4126    return TokError("expected symbol name");
4127
4128  StringRef KernelName = Parser.getTok().getString();
4129
4130  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4131                                           ELF::STT_AMDGPU_HSA_KERNEL);
4132  Lex();
4133  if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4134    KernelScope.initialize(getContext());
4135  return false;
4136}
4137
4138bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4139  if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4140    return Error(getParser().getTok().getLoc(),
4141                 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4142                 "architectures");
4143  }
4144
4145  auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4146
4147  std::string ISAVersionStringFromSTI;
4148  raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4149  IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4150
4151  if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4152    return Error(getParser().getTok().getLoc(),
4153                 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4154                 "arguments specified through the command line");
4155  }
4156
4157  getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4158  Lex();
4159
4160  return false;
4161}
4162
4163bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4164  const char *AssemblerDirectiveBegin;
4165  const char *AssemblerDirectiveEnd;
4166  std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4167      AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4168          ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4169                            HSAMD::V3::AssemblerDirectiveEnd)
4170          : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4171                            HSAMD::AssemblerDirectiveEnd);
4172
4173  if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4174    return Error(getParser().getTok().getLoc(),
4175                 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4176                 "not available on non-amdhsa OSes")).str());
4177  }
4178
4179  std::string HSAMetadataString;
4180  if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4181                          HSAMetadataString))
4182    return true;
4183
4184  if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4185    if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4186      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4187  } else {
4188    if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4189      return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4190  }
4191
4192  return false;
4193}
4194
4195/// Common code to parse out a block of text (typically YAML) between start and
4196/// end directives.
4197bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4198                                          const char *AssemblerDirectiveEnd,
4199                                          std::string &CollectString) {
4200
4201  raw_string_ostream CollectStream(CollectString);
4202
4203  getLexer().setSkipSpace(false);
4204
4205  bool FoundEnd = false;
4206  while (!getLexer().is(AsmToken::Eof)) {
4207    while (getLexer().is(AsmToken::Space)) {
4208      CollectStream << getLexer().getTok().getString();
4209      Lex();
4210    }
4211
4212    if (getLexer().is(AsmToken::Identifier)) {
4213      StringRef ID = getLexer().getTok().getIdentifier();
4214      if (ID == AssemblerDirectiveEnd) {
4215        Lex();
4216        FoundEnd = true;
4217        break;
4218      }
4219    }
4220
4221    CollectStream << Parser.parseStringToEndOfStatement()
4222                  << getContext().getAsmInfo()->getSeparatorString();
4223
4224    Parser.eatToEndOfStatement();
4225  }
4226
4227  getLexer().setSkipSpace(true);
4228
4229  if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4230    return TokError(Twine("expected directive ") +
4231                    Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4232  }
4233
4234  CollectStream.flush();
4235  return false;
4236}
4237
4238/// Parse the assembler directive for new MsgPack-format PAL metadata.
4239bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4240  std::string String;
4241  if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4242                          AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4243    return true;
4244
4245  auto PALMetadata = getTargetStreamer().getPALMetadata();
4246  if (!PALMetadata->setFromString(String))
4247    return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4248  return false;
4249}
4250
4251/// Parse the assembler directive for old linear-format PAL metadata.
4252bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4253  if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4254    return Error(getParser().getTok().getLoc(),
4255                 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4256                 "not available on non-amdpal OSes")).str());
4257  }
4258
4259  auto PALMetadata = getTargetStreamer().getPALMetadata();
4260  PALMetadata->setLegacy();
4261  for (;;) {
4262    uint32_t Key, Value;
4263    if (ParseAsAbsoluteExpression(Key)) {
4264      return TokError(Twine("invalid value in ") +
4265                      Twine(PALMD::AssemblerDirective));
4266    }
4267    if (getLexer().isNot(AsmToken::Comma)) {
4268      return TokError(Twine("expected an even number of values in ") +
4269                      Twine(PALMD::AssemblerDirective));
4270    }
4271    Lex();
4272    if (ParseAsAbsoluteExpression(Value)) {
4273      return TokError(Twine("invalid value in ") +
4274                      Twine(PALMD::AssemblerDirective));
4275    }
4276    PALMetadata->setRegister(Key, Value);
4277    if (getLexer().isNot(AsmToken::Comma))
4278      break;
4279    Lex();
4280  }
4281  return false;
4282}
4283
4284/// ParseDirectiveAMDGPULDS
4285///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4286bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4287  if (getParser().checkForValidSection())
4288    return true;
4289
4290  StringRef Name;
4291  SMLoc NameLoc = getLexer().getLoc();
4292  if (getParser().parseIdentifier(Name))
4293    return TokError("expected identifier in directive");
4294
4295  MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4296  if (parseToken(AsmToken::Comma, "expected ','"))
4297    return true;
4298
4299  unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4300
4301  int64_t Size;
4302  SMLoc SizeLoc = getLexer().getLoc();
4303  if (getParser().parseAbsoluteExpression(Size))
4304    return true;
4305  if (Size < 0)
4306    return Error(SizeLoc, "size must be non-negative");
4307  if (Size > LocalMemorySize)
4308    return Error(SizeLoc, "size is too large");
4309
4310  int64_t Align = 4;
4311  if (getLexer().is(AsmToken::Comma)) {
4312    Lex();
4313    SMLoc AlignLoc = getLexer().getLoc();
4314    if (getParser().parseAbsoluteExpression(Align))
4315      return true;
4316    if (Align < 0 || !isPowerOf2_64(Align))
4317      return Error(AlignLoc, "alignment must be a power of two");
4318
4319    // Alignment larger than the size of LDS is possible in theory, as long
4320    // as the linker manages to place to symbol at address 0, but we do want
4321    // to make sure the alignment fits nicely into a 32-bit integer.
4322    if (Align >= 1u << 31)
4323      return Error(AlignLoc, "alignment is too large");
4324  }
4325
4326  if (parseToken(AsmToken::EndOfStatement,
4327                 "unexpected token in '.amdgpu_lds' directive"))
4328    return true;
4329
4330  Symbol->redefineIfPossible();
4331  if (!Symbol->isUndefined())
4332    return Error(NameLoc, "invalid symbol redefinition");
4333
4334  getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4335  return false;
4336}
4337
4338bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4339  StringRef IDVal = DirectiveID.getString();
4340
4341  if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4342    if (IDVal == ".amdgcn_target")
4343      return ParseDirectiveAMDGCNTarget();
4344
4345    if (IDVal == ".amdhsa_kernel")
4346      return ParseDirectiveAMDHSAKernel();
4347
4348    // TODO: Restructure/combine with PAL metadata directive.
4349    if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4350      return ParseDirectiveHSAMetadata();
4351  } else {
4352    if (IDVal == ".hsa_code_object_version")
4353      return ParseDirectiveHSACodeObjectVersion();
4354
4355    if (IDVal == ".hsa_code_object_isa")
4356      return ParseDirectiveHSACodeObjectISA();
4357
4358    if (IDVal == ".amd_kernel_code_t")
4359      return ParseDirectiveAMDKernelCodeT();
4360
4361    if (IDVal == ".amdgpu_hsa_kernel")
4362      return ParseDirectiveAMDGPUHsaKernel();
4363
4364    if (IDVal == ".amd_amdgpu_isa")
4365      return ParseDirectiveISAVersion();
4366
4367    if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4368      return ParseDirectiveHSAMetadata();
4369  }
4370
4371  if (IDVal == ".amdgpu_lds")
4372    return ParseDirectiveAMDGPULDS();
4373
4374  if (IDVal == PALMD::AssemblerDirectiveBegin)
4375    return ParseDirectivePALMetadataBegin();
4376
4377  if (IDVal == PALMD::AssemblerDirective)
4378    return ParseDirectivePALMetadata();
4379
4380  return true;
4381}
4382
4383bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4384                                           unsigned RegNo) const {
4385
4386  for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4387       R.isValid(); ++R) {
4388    if (*R == RegNo)
4389      return isGFX9() || isGFX10();
4390  }
4391
4392  // GFX10 has 2 more SGPRs 104 and 105.
4393  for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4394       R.isValid(); ++R) {
4395    if (*R == RegNo)
4396      return hasSGPR104_SGPR105();
4397  }
4398
4399  switch (RegNo) {
4400  case AMDGPU::SRC_SHARED_BASE:
4401  case AMDGPU::SRC_SHARED_LIMIT:
4402  case AMDGPU::SRC_PRIVATE_BASE:
4403  case AMDGPU::SRC_PRIVATE_LIMIT:
4404  case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4405    return !isCI() && !isSI() && !isVI();
4406  case AMDGPU::TBA:
4407  case AMDGPU::TBA_LO:
4408  case AMDGPU::TBA_HI:
4409  case AMDGPU::TMA:
4410  case AMDGPU::TMA_LO:
4411  case AMDGPU::TMA_HI:
4412    return !isGFX9() && !isGFX10();
4413  case AMDGPU::XNACK_MASK:
4414  case AMDGPU::XNACK_MASK_LO:
4415  case AMDGPU::XNACK_MASK_HI:
4416    return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4417  case AMDGPU::SGPR_NULL:
4418    return isGFX10();
4419  default:
4420    break;
4421  }
4422
4423  if (isCI())
4424    return true;
4425
4426  if (isSI() || isGFX10()) {
4427    // No flat_scr on SI.
4428    // On GFX10 flat scratch is not a valid register operand and can only be
4429    // accessed with s_setreg/s_getreg.
4430    switch (RegNo) {
4431    case AMDGPU::FLAT_SCR:
4432    case AMDGPU::FLAT_SCR_LO:
4433    case AMDGPU::FLAT_SCR_HI:
4434      return false;
4435    default:
4436      return true;
4437    }
4438  }
4439
4440  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4441  // SI/CI have.
4442  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4443       R.isValid(); ++R) {
4444    if (*R == RegNo)
4445      return hasSGPR102_SGPR103();
4446  }
4447
4448  return true;
4449}
4450
4451OperandMatchResultTy
4452AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4453                              OperandMode Mode) {
4454  // Try to parse with a custom parser
4455  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4456
4457  // If we successfully parsed the operand or if there as an error parsing,
4458  // we are done.
4459  //
4460  // If we are parsing after we reach EndOfStatement then this means we
4461  // are appending default values to the Operands list.  This is only done
4462  // by custom parser, so we shouldn't continue on to the generic parsing.
4463  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4464      getLexer().is(AsmToken::EndOfStatement))
4465    return ResTy;
4466
4467  if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4468    unsigned Prefix = Operands.size();
4469    SMLoc LBraceLoc = getTok().getLoc();
4470    Parser.Lex(); // eat the '['
4471
4472    for (;;) {
4473      ResTy = parseReg(Operands);
4474      if (ResTy != MatchOperand_Success)
4475        return ResTy;
4476
4477      if (getLexer().is(AsmToken::RBrac))
4478        break;
4479
4480      if (getLexer().isNot(AsmToken::Comma))
4481        return MatchOperand_ParseFail;
4482      Parser.Lex();
4483    }
4484
4485    if (Operands.size() - Prefix > 1) {
4486      Operands.insert(Operands.begin() + Prefix,
4487                      AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4488      Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4489                                                    getTok().getLoc()));
4490    }
4491
4492    Parser.Lex(); // eat the ']'
4493    return MatchOperand_Success;
4494  }
4495
4496  return parseRegOrImm(Operands);
4497}
4498
4499StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4500  // Clear any forced encodings from the previous instruction.
4501  setForcedEncodingSize(0);
4502  setForcedDPP(false);
4503  setForcedSDWA(false);
4504
4505  if (Name.endswith("_e64")) {
4506    setForcedEncodingSize(64);
4507    return Name.substr(0, Name.size() - 4);
4508  } else if (Name.endswith("_e32")) {
4509    setForcedEncodingSize(32);
4510    return Name.substr(0, Name.size() - 4);
4511  } else if (Name.endswith("_dpp")) {
4512    setForcedDPP(true);
4513    return Name.substr(0, Name.size() - 4);
4514  } else if (Name.endswith("_sdwa")) {
4515    setForcedSDWA(true);
4516    return Name.substr(0, Name.size() - 5);
4517  }
4518  return Name;
4519}
4520
4521bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4522                                       StringRef Name,
4523                                       SMLoc NameLoc, OperandVector &Operands) {
4524  // Add the instruction mnemonic
4525  Name = parseMnemonicSuffix(Name);
4526  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4527
4528  bool IsMIMG = Name.startswith("image_");
4529
4530  while (!getLexer().is(AsmToken::EndOfStatement)) {
4531    OperandMode Mode = OperandMode_Default;
4532    if (IsMIMG && isGFX10() && Operands.size() == 2)
4533      Mode = OperandMode_NSA;
4534    OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4535
4536    // Eat the comma or space if there is one.
4537    if (getLexer().is(AsmToken::Comma))
4538      Parser.Lex();
4539
4540    switch (Res) {
4541      case MatchOperand_Success: break;
4542      case MatchOperand_ParseFail:
4543        // FIXME: use real operand location rather than the current location.
4544        Error(getLexer().getLoc(), "failed parsing operand.");
4545        while (!getLexer().is(AsmToken::EndOfStatement)) {
4546          Parser.Lex();
4547        }
4548        return true;
4549      case MatchOperand_NoMatch:
4550        // FIXME: use real operand location rather than the current location.
4551        Error(getLexer().getLoc(), "not a valid operand.");
4552        while (!getLexer().is(AsmToken::EndOfStatement)) {
4553          Parser.Lex();
4554        }
4555        return true;
4556    }
4557  }
4558
4559  return false;
4560}
4561
4562//===----------------------------------------------------------------------===//
4563// Utility functions
4564//===----------------------------------------------------------------------===//
4565
4566OperandMatchResultTy
4567AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4568
4569  if (!trySkipId(Prefix, AsmToken::Colon))
4570    return MatchOperand_NoMatch;
4571
4572  return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4573}
4574
4575OperandMatchResultTy
4576AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4577                                    AMDGPUOperand::ImmTy ImmTy,
4578                                    bool (*ConvertResult)(int64_t&)) {
4579  SMLoc S = getLoc();
4580  int64_t Value = 0;
4581
4582  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4583  if (Res != MatchOperand_Success)
4584    return Res;
4585
4586  if (ConvertResult && !ConvertResult(Value)) {
4587    Error(S, "invalid " + StringRef(Prefix) + " value.");
4588  }
4589
4590  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4591  return MatchOperand_Success;
4592}
4593
4594OperandMatchResultTy
4595AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4596                                             OperandVector &Operands,
4597                                             AMDGPUOperand::ImmTy ImmTy,
4598                                             bool (*ConvertResult)(int64_t&)) {
4599  SMLoc S = getLoc();
4600  if (!trySkipId(Prefix, AsmToken::Colon))
4601    return MatchOperand_NoMatch;
4602
4603  if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4604    return MatchOperand_ParseFail;
4605
4606  unsigned Val = 0;
4607  const unsigned MaxSize = 4;
4608
4609  // FIXME: How to verify the number of elements matches the number of src
4610  // operands?
4611  for (int I = 0; ; ++I) {
4612    int64_t Op;
4613    SMLoc Loc = getLoc();
4614    if (!parseExpr(Op))
4615      return MatchOperand_ParseFail;
4616
4617    if (Op != 0 && Op != 1) {
4618      Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4619      return MatchOperand_ParseFail;
4620    }
4621
4622    Val |= (Op << I);
4623
4624    if (trySkipToken(AsmToken::RBrac))
4625      break;
4626
4627    if (I + 1 == MaxSize) {
4628      Error(getLoc(), "expected a closing square bracket");
4629      return MatchOperand_ParseFail;
4630    }
4631
4632    if (!skipToken(AsmToken::Comma, "expected a comma"))
4633      return MatchOperand_ParseFail;
4634  }
4635
4636  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4637  return MatchOperand_Success;
4638}
4639
4640OperandMatchResultTy
4641AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4642                               AMDGPUOperand::ImmTy ImmTy) {
4643  int64_t Bit = 0;
4644  SMLoc S = Parser.getTok().getLoc();
4645
4646  // We are at the end of the statement, and this is a default argument, so
4647  // use a default value.
4648  if (getLexer().isNot(AsmToken::EndOfStatement)) {
4649    switch(getLexer().getKind()) {
4650      case AsmToken::Identifier: {
4651        StringRef Tok = Parser.getTok().getString();
4652        if (Tok == Name) {
4653          if (Tok == "r128" && isGFX9())
4654            Error(S, "r128 modifier is not supported on this GPU");
4655          if (Tok == "a16" && !isGFX9() && !isGFX10())
4656            Error(S, "a16 modifier is not supported on this GPU");
4657          Bit = 1;
4658          Parser.Lex();
4659        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4660          Bit = 0;
4661          Parser.Lex();
4662        } else {
4663          return MatchOperand_NoMatch;
4664        }
4665        break;
4666      }
4667      default:
4668        return MatchOperand_NoMatch;
4669    }
4670  }
4671
4672  if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4673    return MatchOperand_ParseFail;
4674
4675  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4676  return MatchOperand_Success;
4677}
4678
4679static void addOptionalImmOperand(
4680  MCInst& Inst, const OperandVector& Operands,
4681  AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4682  AMDGPUOperand::ImmTy ImmT,
4683  int64_t Default = 0) {
4684  auto i = OptionalIdx.find(ImmT);
4685  if (i != OptionalIdx.end()) {
4686    unsigned Idx = i->second;
4687    ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4688  } else {
4689    Inst.addOperand(MCOperand::createImm(Default));
4690  }
4691}
4692
4693OperandMatchResultTy
4694AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4695  if (getLexer().isNot(AsmToken::Identifier)) {
4696    return MatchOperand_NoMatch;
4697  }
4698  StringRef Tok = Parser.getTok().getString();
4699  if (Tok != Prefix) {
4700    return MatchOperand_NoMatch;
4701  }
4702
4703  Parser.Lex();
4704  if (getLexer().isNot(AsmToken::Colon)) {
4705    return MatchOperand_ParseFail;
4706  }
4707
4708  Parser.Lex();
4709  if (getLexer().isNot(AsmToken::Identifier)) {
4710    return MatchOperand_ParseFail;
4711  }
4712
4713  Value = Parser.getTok().getString();
4714  return MatchOperand_Success;
4715}
4716
4717// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4718// values to live in a joint format operand in the MCInst encoding.
4719OperandMatchResultTy
4720AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4721  SMLoc S = Parser.getTok().getLoc();
4722  int64_t Dfmt = 0, Nfmt = 0;
4723  // dfmt and nfmt can appear in either order, and each is optional.
4724  bool GotDfmt = false, GotNfmt = false;
4725  while (!GotDfmt || !GotNfmt) {
4726    if (!GotDfmt) {
4727      auto Res = parseIntWithPrefix("dfmt", Dfmt);
4728      if (Res != MatchOperand_NoMatch) {
4729        if (Res != MatchOperand_Success)
4730          return Res;
4731        if (Dfmt >= 16) {
4732          Error(Parser.getTok().getLoc(), "out of range dfmt");
4733          return MatchOperand_ParseFail;
4734        }
4735        GotDfmt = true;
4736        Parser.Lex();
4737        continue;
4738      }
4739    }
4740    if (!GotNfmt) {
4741      auto Res = parseIntWithPrefix("nfmt", Nfmt);
4742      if (Res != MatchOperand_NoMatch) {
4743        if (Res != MatchOperand_Success)
4744          return Res;
4745        if (Nfmt >= 8) {
4746          Error(Parser.getTok().getLoc(), "out of range nfmt");
4747          return MatchOperand_ParseFail;
4748        }
4749        GotNfmt = true;
4750        Parser.Lex();
4751        continue;
4752      }
4753    }
4754    break;
4755  }
4756  if (!GotDfmt && !GotNfmt)
4757    return MatchOperand_NoMatch;
4758  auto Format = Dfmt | Nfmt << 4;
4759  Operands.push_back(
4760      AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4761  return MatchOperand_Success;
4762}
4763
4764//===----------------------------------------------------------------------===//
4765// ds
4766//===----------------------------------------------------------------------===//
4767
4768void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4769                                    const OperandVector &Operands) {
4770  OptionalImmIndexMap OptionalIdx;
4771
4772  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4773    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4774
4775    // Add the register arguments
4776    if (Op.isReg()) {
4777      Op.addRegOperands(Inst, 1);
4778      continue;
4779    }
4780
4781    // Handle optional arguments
4782    OptionalIdx[Op.getImmTy()] = i;
4783  }
4784
4785  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4786  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4787  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4788
4789  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4790}
4791
4792void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4793                                bool IsGdsHardcoded) {
4794  OptionalImmIndexMap OptionalIdx;
4795
4796  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4797    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4798
4799    // Add the register arguments
4800    if (Op.isReg()) {
4801      Op.addRegOperands(Inst, 1);
4802      continue;
4803    }
4804
4805    if (Op.isToken() && Op.getToken() == "gds") {
4806      IsGdsHardcoded = true;
4807      continue;
4808    }
4809
4810    // Handle optional arguments
4811    OptionalIdx[Op.getImmTy()] = i;
4812  }
4813
4814  AMDGPUOperand::ImmTy OffsetType =
4815    (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4816     Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4817     Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4818                                                      AMDGPUOperand::ImmTyOffset;
4819
4820  addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4821
4822  if (!IsGdsHardcoded) {
4823    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4824  }
4825  Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4826}
4827
4828void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4829  OptionalImmIndexMap OptionalIdx;
4830
4831  unsigned OperandIdx[4];
4832  unsigned EnMask = 0;
4833  int SrcIdx = 0;
4834
4835  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4836    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4837
4838    // Add the register arguments
4839    if (Op.isReg()) {
4840      assert(SrcIdx < 4);
4841      OperandIdx[SrcIdx] = Inst.size();
4842      Op.addRegOperands(Inst, 1);
4843      ++SrcIdx;
4844      continue;
4845    }
4846
4847    if (Op.isOff()) {
4848      assert(SrcIdx < 4);
4849      OperandIdx[SrcIdx] = Inst.size();
4850      Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4851      ++SrcIdx;
4852      continue;
4853    }
4854
4855    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4856      Op.addImmOperands(Inst, 1);
4857      continue;
4858    }
4859
4860    if (Op.isToken() && Op.getToken() == "done")
4861      continue;
4862
4863    // Handle optional arguments
4864    OptionalIdx[Op.getImmTy()] = i;
4865  }
4866
4867  assert(SrcIdx == 4);
4868
4869  bool Compr = false;
4870  if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4871    Compr = true;
4872    Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4873    Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4874    Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4875  }
4876
4877  for (auto i = 0; i < SrcIdx; ++i) {
4878    if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4879      EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4880    }
4881  }
4882
4883  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4884  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4885
4886  Inst.addOperand(MCOperand::createImm(EnMask));
4887}
4888
4889//===----------------------------------------------------------------------===//
4890// s_waitcnt
4891//===----------------------------------------------------------------------===//
4892
4893static bool
4894encodeCnt(
4895  const AMDGPU::IsaVersion ISA,
4896  int64_t &IntVal,
4897  int64_t CntVal,
4898  bool Saturate,
4899  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4900  unsigned (*decode)(const IsaVersion &Version, unsigned))
4901{
4902  bool Failed = false;
4903
4904  IntVal = encode(ISA, IntVal, CntVal);
4905  if (CntVal != decode(ISA, IntVal)) {
4906    if (Saturate) {
4907      IntVal = encode(ISA, IntVal, -1);
4908    } else {
4909      Failed = true;
4910    }
4911  }
4912  return Failed;
4913}
4914
4915bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4916
4917  SMLoc CntLoc = getLoc();
4918  StringRef CntName = getTokenStr();
4919
4920  if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4921      !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4922    return false;
4923
4924  int64_t CntVal;
4925  SMLoc ValLoc = getLoc();
4926  if (!parseExpr(CntVal))
4927    return false;
4928
4929  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4930
4931  bool Failed = true;
4932  bool Sat = CntName.endswith("_sat");
4933
4934  if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4935    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4936  } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4937    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4938  } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4939    Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4940  } else {
4941    Error(CntLoc, "invalid counter name " + CntName);
4942    return false;
4943  }
4944
4945  if (Failed) {
4946    Error(ValLoc, "too large value for " + CntName);
4947    return false;
4948  }
4949
4950  if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4951    return false;
4952
4953  if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4954    if (isToken(AsmToken::EndOfStatement)) {
4955      Error(getLoc(), "expected a counter name");
4956      return false;
4957    }
4958  }
4959
4960  return true;
4961}
4962
4963OperandMatchResultTy
4964AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4965  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4966  int64_t Waitcnt = getWaitcntBitMask(ISA);
4967  SMLoc S = getLoc();
4968
4969  // If parse failed, do not return error code
4970  // to avoid excessive error messages.
4971  if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4972    while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4973  } else {
4974    parseExpr(Waitcnt);
4975  }
4976
4977  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4978  return MatchOperand_Success;
4979}
4980
4981bool
4982AMDGPUOperand::isSWaitCnt() const {
4983  return isImm();
4984}
4985
4986//===----------------------------------------------------------------------===//
4987// hwreg
4988//===----------------------------------------------------------------------===//
4989
4990bool
4991AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4992                                int64_t &Offset,
4993                                int64_t &Width) {
4994  using namespace llvm::AMDGPU::Hwreg;
4995
4996  // The register may be specified by name or using a numeric code
4997  if (isToken(AsmToken::Identifier) &&
4998      (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4999    HwReg.IsSymbolic = true;
5000    lex(); // skip message name
5001  } else if (!parseExpr(HwReg.Id)) {
5002    return false;
5003  }
5004
5005  if (trySkipToken(AsmToken::RParen))
5006    return true;
5007
5008  // parse optional params
5009  return
5010    skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5011    parseExpr(Offset) &&
5012    skipToken(AsmToken::Comma, "expected a comma") &&
5013    parseExpr(Width) &&
5014    skipToken(AsmToken::RParen, "expected a closing parenthesis");
5015}
5016
5017bool
5018AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5019                               const int64_t Offset,
5020                               const int64_t Width,
5021                               const SMLoc Loc) {
5022
5023  using namespace llvm::AMDGPU::Hwreg;
5024
5025  if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5026    Error(Loc, "specified hardware register is not supported on this GPU");
5027    return false;
5028  } else if (!isValidHwreg(HwReg.Id)) {
5029    Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5030    return false;
5031  } else if (!isValidHwregOffset(Offset)) {
5032    Error(Loc, "invalid bit offset: only 5-bit values are legal");
5033    return false;
5034  } else if (!isValidHwregWidth(Width)) {
5035    Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5036    return false;
5037  }
5038  return true;
5039}
5040
5041OperandMatchResultTy
5042AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5043  using namespace llvm::AMDGPU::Hwreg;
5044
5045  int64_t ImmVal = 0;
5046  SMLoc Loc = getLoc();
5047
5048  // If parse failed, do not return error code
5049  // to avoid excessive error messages.
5050  if (trySkipId("hwreg", AsmToken::LParen)) {
5051    OperandInfoTy HwReg(ID_UNKNOWN_);
5052    int64_t Offset = OFFSET_DEFAULT_;
5053    int64_t Width = WIDTH_DEFAULT_;
5054    if (parseHwregBody(HwReg, Offset, Width) &&
5055        validateHwreg(HwReg, Offset, Width, Loc)) {
5056      ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5057    }
5058  } else if (parseExpr(ImmVal)) {
5059    if (ImmVal < 0 || !isUInt<16>(ImmVal))
5060      Error(Loc, "invalid immediate: only 16-bit values are legal");
5061  }
5062
5063  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5064  return MatchOperand_Success;
5065}
5066
5067bool AMDGPUOperand::isHwreg() const {
5068  return isImmTy(ImmTyHwreg);
5069}
5070
5071//===----------------------------------------------------------------------===//
5072// sendmsg
5073//===----------------------------------------------------------------------===//
5074
5075bool
5076AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5077                                  OperandInfoTy &Op,
5078                                  OperandInfoTy &Stream) {
5079  using namespace llvm::AMDGPU::SendMsg;
5080
5081  if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5082    Msg.IsSymbolic = true;
5083    lex(); // skip message name
5084  } else if (!parseExpr(Msg.Id)) {
5085    return false;
5086  }
5087
5088  if (trySkipToken(AsmToken::Comma)) {
5089    Op.IsDefined = true;
5090    if (isToken(AsmToken::Identifier) &&
5091        (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5092      lex(); // skip operation name
5093    } else if (!parseExpr(Op.Id)) {
5094      return false;
5095    }
5096
5097    if (trySkipToken(AsmToken::Comma)) {
5098      Stream.IsDefined = true;
5099      if (!parseExpr(Stream.Id))
5100        return false;
5101    }
5102  }
5103
5104  return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5105}
5106
5107bool
5108AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5109                                 const OperandInfoTy &Op,
5110                                 const OperandInfoTy &Stream,
5111                                 const SMLoc S) {
5112  using namespace llvm::AMDGPU::SendMsg;
5113
5114  // Validation strictness depends on whether message is specified
5115  // in a symbolc or in a numeric form. In the latter case
5116  // only encoding possibility is checked.
5117  bool Strict = Msg.IsSymbolic;
5118
5119  if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5120    Error(S, "invalid message id");
5121    return false;
5122  } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5123    Error(S, Op.IsDefined ?
5124             "message does not support operations" :
5125             "missing message operation");
5126    return false;
5127  } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5128    Error(S, "invalid operation id");
5129    return false;
5130  } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5131    Error(S, "message operation does not support streams");
5132    return false;
5133  } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5134    Error(S, "invalid message stream id");
5135    return false;
5136  }
5137  return true;
5138}
5139
5140OperandMatchResultTy
5141AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5142  using namespace llvm::AMDGPU::SendMsg;
5143
5144  int64_t ImmVal = 0;
5145  SMLoc Loc = getLoc();
5146
5147  // If parse failed, do not return error code
5148  // to avoid excessive error messages.
5149  if (trySkipId("sendmsg", AsmToken::LParen)) {
5150    OperandInfoTy Msg(ID_UNKNOWN_);
5151    OperandInfoTy Op(OP_NONE_);
5152    OperandInfoTy Stream(STREAM_ID_NONE_);
5153    if (parseSendMsgBody(Msg, Op, Stream) &&
5154        validateSendMsg(Msg, Op, Stream, Loc)) {
5155      ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5156    }
5157  } else if (parseExpr(ImmVal)) {
5158    if (ImmVal < 0 || !isUInt<16>(ImmVal))
5159      Error(Loc, "invalid immediate: only 16-bit values are legal");
5160  }
5161
5162  Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5163  return MatchOperand_Success;
5164}
5165
5166bool AMDGPUOperand::isSendMsg() const {
5167  return isImmTy(ImmTySendMsg);
5168}
5169
5170//===----------------------------------------------------------------------===//
5171// v_interp
5172//===----------------------------------------------------------------------===//
5173
5174OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5175  if (getLexer().getKind() != AsmToken::Identifier)
5176    return MatchOperand_NoMatch;
5177
5178  StringRef Str = Parser.getTok().getString();
5179  int Slot = StringSwitch<int>(Str)
5180    .Case("p10", 0)
5181    .Case("p20", 1)
5182    .Case("p0", 2)
5183    .Default(-1);
5184
5185  SMLoc S = Parser.getTok().getLoc();
5186  if (Slot == -1)
5187    return MatchOperand_ParseFail;
5188
5189  Parser.Lex();
5190  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5191                                              AMDGPUOperand::ImmTyInterpSlot));
5192  return MatchOperand_Success;
5193}
5194
5195OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5196  if (getLexer().getKind() != AsmToken::Identifier)
5197    return MatchOperand_NoMatch;
5198
5199  StringRef Str = Parser.getTok().getString();
5200  if (!Str.startswith("attr"))
5201    return MatchOperand_NoMatch;
5202
5203  StringRef Chan = Str.take_back(2);
5204  int AttrChan = StringSwitch<int>(Chan)
5205    .Case(".x", 0)
5206    .Case(".y", 1)
5207    .Case(".z", 2)
5208    .Case(".w", 3)
5209    .Default(-1);
5210  if (AttrChan == -1)
5211    return MatchOperand_ParseFail;
5212
5213  Str = Str.drop_back(2).drop_front(4);
5214
5215  uint8_t Attr;
5216  if (Str.getAsInteger(10, Attr))
5217    return MatchOperand_ParseFail;
5218
5219  SMLoc S = Parser.getTok().getLoc();
5220  Parser.Lex();
5221  if (Attr > 63) {
5222    Error(S, "out of bounds attr");
5223    return MatchOperand_Success;
5224  }
5225
5226  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5227
5228  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5229                                              AMDGPUOperand::ImmTyInterpAttr));
5230  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5231                                              AMDGPUOperand::ImmTyAttrChan));
5232  return MatchOperand_Success;
5233}
5234
5235//===----------------------------------------------------------------------===//
5236// exp
5237//===----------------------------------------------------------------------===//
5238
5239void AMDGPUAsmParser::errorExpTgt() {
5240  Error(Parser.getTok().getLoc(), "invalid exp target");
5241}
5242
5243OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5244                                                      uint8_t &Val) {
5245  if (Str == "null") {
5246    Val = 9;
5247    return MatchOperand_Success;
5248  }
5249
5250  if (Str.startswith("mrt")) {
5251    Str = Str.drop_front(3);
5252    if (Str == "z") { // == mrtz
5253      Val = 8;
5254      return MatchOperand_Success;
5255    }
5256
5257    if (Str.getAsInteger(10, Val))
5258      return MatchOperand_ParseFail;
5259
5260    if (Val > 7)
5261      errorExpTgt();
5262
5263    return MatchOperand_Success;
5264  }
5265
5266  if (Str.startswith("pos")) {
5267    Str = Str.drop_front(3);
5268    if (Str.getAsInteger(10, Val))
5269      return MatchOperand_ParseFail;
5270
5271    if (Val > 4 || (Val == 4 && !isGFX10()))
5272      errorExpTgt();
5273
5274    Val += 12;
5275    return MatchOperand_Success;
5276  }
5277
5278  if (isGFX10() && Str == "prim") {
5279    Val = 20;
5280    return MatchOperand_Success;
5281  }
5282
5283  if (Str.startswith("param")) {
5284    Str = Str.drop_front(5);
5285    if (Str.getAsInteger(10, Val))
5286      return MatchOperand_ParseFail;
5287
5288    if (Val >= 32)
5289      errorExpTgt();
5290
5291    Val += 32;
5292    return MatchOperand_Success;
5293  }
5294
5295  if (Str.startswith("invalid_target_")) {
5296    Str = Str.drop_front(15);
5297    if (Str.getAsInteger(10, Val))
5298      return MatchOperand_ParseFail;
5299
5300    errorExpTgt();
5301    return MatchOperand_Success;
5302  }
5303
5304  return MatchOperand_NoMatch;
5305}
5306
5307OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5308  uint8_t Val;
5309  StringRef Str = Parser.getTok().getString();
5310
5311  auto Res = parseExpTgtImpl(Str, Val);
5312  if (Res != MatchOperand_Success)
5313    return Res;
5314
5315  SMLoc S = Parser.getTok().getLoc();
5316  Parser.Lex();
5317
5318  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5319                                              AMDGPUOperand::ImmTyExpTgt));
5320  return MatchOperand_Success;
5321}
5322
5323//===----------------------------------------------------------------------===//
5324// parser helpers
5325//===----------------------------------------------------------------------===//
5326
5327bool
5328AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5329  return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5330}
5331
5332bool
5333AMDGPUAsmParser::isId(const StringRef Id) const {
5334  return isId(getToken(), Id);
5335}
5336
5337bool
5338AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5339  return getTokenKind() == Kind;
5340}
5341
5342bool
5343AMDGPUAsmParser::trySkipId(const StringRef Id) {
5344  if (isId(Id)) {
5345    lex();
5346    return true;
5347  }
5348  return false;
5349}
5350
5351bool
5352AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5353  if (isId(Id) && peekToken().is(Kind)) {
5354    lex();
5355    lex();
5356    return true;
5357  }
5358  return false;
5359}
5360
5361bool
5362AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5363  if (isToken(Kind)) {
5364    lex();
5365    return true;
5366  }
5367  return false;
5368}
5369
5370bool
5371AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5372                           const StringRef ErrMsg) {
5373  if (!trySkipToken(Kind)) {
5374    Error(getLoc(), ErrMsg);
5375    return false;
5376  }
5377  return true;
5378}
5379
5380bool
5381AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5382  return !getParser().parseAbsoluteExpression(Imm);
5383}
5384
5385bool
5386AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5387  SMLoc S = getLoc();
5388
5389  const MCExpr *Expr;
5390  if (Parser.parseExpression(Expr))
5391    return false;
5392
5393  int64_t IntVal;
5394  if (Expr->evaluateAsAbsolute(IntVal)) {
5395    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5396  } else {
5397    Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5398  }
5399  return true;
5400}
5401
5402bool
5403AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5404  if (isToken(AsmToken::String)) {
5405    Val = getToken().getStringContents();
5406    lex();
5407    return true;
5408  } else {
5409    Error(getLoc(), ErrMsg);
5410    return false;
5411  }
5412}
5413
5414AsmToken
5415AMDGPUAsmParser::getToken() const {
5416  return Parser.getTok();
5417}
5418
5419AsmToken
5420AMDGPUAsmParser::peekToken() {
5421  return getLexer().peekTok();
5422}
5423
5424void
5425AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5426  auto TokCount = getLexer().peekTokens(Tokens);
5427
5428  for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5429    Tokens[Idx] = AsmToken(AsmToken::Error, "");
5430}
5431
5432AsmToken::TokenKind
5433AMDGPUAsmParser::getTokenKind() const {
5434  return getLexer().getKind();
5435}
5436
5437SMLoc
5438AMDGPUAsmParser::getLoc() const {
5439  return getToken().getLoc();
5440}
5441
5442StringRef
5443AMDGPUAsmParser::getTokenStr() const {
5444  return getToken().getString();
5445}
5446
5447void
5448AMDGPUAsmParser::lex() {
5449  Parser.Lex();
5450}
5451
5452//===----------------------------------------------------------------------===//
5453// swizzle
5454//===----------------------------------------------------------------------===//
5455
5456LLVM_READNONE
5457static unsigned
5458encodeBitmaskPerm(const unsigned AndMask,
5459                  const unsigned OrMask,
5460                  const unsigned XorMask) {
5461  using namespace llvm::AMDGPU::Swizzle;
5462
5463  return BITMASK_PERM_ENC |
5464         (AndMask << BITMASK_AND_SHIFT) |
5465         (OrMask  << BITMASK_OR_SHIFT)  |
5466         (XorMask << BITMASK_XOR_SHIFT);
5467}
5468
5469bool
5470AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5471                                      const unsigned MinVal,
5472                                      const unsigned MaxVal,
5473                                      const StringRef ErrMsg) {
5474  for (unsigned i = 0; i < OpNum; ++i) {
5475    if (!skipToken(AsmToken::Comma, "expected a comma")){
5476      return false;
5477    }
5478    SMLoc ExprLoc = Parser.getTok().getLoc();
5479    if (!parseExpr(Op[i])) {
5480      return false;
5481    }
5482    if (Op[i] < MinVal || Op[i] > MaxVal) {
5483      Error(ExprLoc, ErrMsg);
5484      return false;
5485    }
5486  }
5487
5488  return true;
5489}
5490
5491bool
5492AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5493  using namespace llvm::AMDGPU::Swizzle;
5494
5495  int64_t Lane[LANE_NUM];
5496  if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5497                           "expected a 2-bit lane id")) {
5498    Imm = QUAD_PERM_ENC;
5499    for (unsigned I = 0; I < LANE_NUM; ++I) {
5500      Imm |= Lane[I] << (LANE_SHIFT * I);
5501    }
5502    return true;
5503  }
5504  return false;
5505}
5506
5507bool
5508AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5509  using namespace llvm::AMDGPU::Swizzle;
5510
5511  SMLoc S = Parser.getTok().getLoc();
5512  int64_t GroupSize;
5513  int64_t LaneIdx;
5514
5515  if (!parseSwizzleOperands(1, &GroupSize,
5516                            2, 32,
5517                            "group size must be in the interval [2,32]")) {
5518    return false;
5519  }
5520  if (!isPowerOf2_64(GroupSize)) {
5521    Error(S, "group size must be a power of two");
5522    return false;
5523  }
5524  if (parseSwizzleOperands(1, &LaneIdx,
5525                           0, GroupSize - 1,
5526                           "lane id must be in the interval [0,group size - 1]")) {
5527    Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5528    return true;
5529  }
5530  return false;
5531}
5532
5533bool
5534AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5535  using namespace llvm::AMDGPU::Swizzle;
5536
5537  SMLoc S = Parser.getTok().getLoc();
5538  int64_t GroupSize;
5539
5540  if (!parseSwizzleOperands(1, &GroupSize,
5541      2, 32, "group size must be in the interval [2,32]")) {
5542    return false;
5543  }
5544  if (!isPowerOf2_64(GroupSize)) {
5545    Error(S, "group size must be a power of two");
5546    return false;
5547  }
5548
5549  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5550  return true;
5551}
5552
5553bool
5554AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5555  using namespace llvm::AMDGPU::Swizzle;
5556
5557  SMLoc S = Parser.getTok().getLoc();
5558  int64_t GroupSize;
5559
5560  if (!parseSwizzleOperands(1, &GroupSize,
5561      1, 16, "group size must be in the interval [1,16]")) {
5562    return false;
5563  }
5564  if (!isPowerOf2_64(GroupSize)) {
5565    Error(S, "group size must be a power of two");
5566    return false;
5567  }
5568
5569  Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5570  return true;
5571}
5572
5573bool
5574AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5575  using namespace llvm::AMDGPU::Swizzle;
5576
5577  if (!skipToken(AsmToken::Comma, "expected a comma")) {
5578    return false;
5579  }
5580
5581  StringRef Ctl;
5582  SMLoc StrLoc = Parser.getTok().getLoc();
5583  if (!parseString(Ctl)) {
5584    return false;
5585  }
5586  if (Ctl.size() != BITMASK_WIDTH) {
5587    Error(StrLoc, "expected a 5-character mask");
5588    return false;
5589  }
5590
5591  unsigned AndMask = 0;
5592  unsigned OrMask = 0;
5593  unsigned XorMask = 0;
5594
5595  for (size_t i = 0; i < Ctl.size(); ++i) {
5596    unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5597    switch(Ctl[i]) {
5598    default:
5599      Error(StrLoc, "invalid mask");
5600      return false;
5601    case '0':
5602      break;
5603    case '1':
5604      OrMask |= Mask;
5605      break;
5606    case 'p':
5607      AndMask |= Mask;
5608      break;
5609    case 'i':
5610      AndMask |= Mask;
5611      XorMask |= Mask;
5612      break;
5613    }
5614  }
5615
5616  Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5617  return true;
5618}
5619
5620bool
5621AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5622
5623  SMLoc OffsetLoc = Parser.getTok().getLoc();
5624
5625  if (!parseExpr(Imm)) {
5626    return false;
5627  }
5628  if (!isUInt<16>(Imm)) {
5629    Error(OffsetLoc, "expected a 16-bit offset");
5630    return false;
5631  }
5632  return true;
5633}
5634
5635bool
5636AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5637  using namespace llvm::AMDGPU::Swizzle;
5638
5639  if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5640
5641    SMLoc ModeLoc = Parser.getTok().getLoc();
5642    bool Ok = false;
5643
5644    if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5645      Ok = parseSwizzleQuadPerm(Imm);
5646    } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5647      Ok = parseSwizzleBitmaskPerm(Imm);
5648    } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5649      Ok = parseSwizzleBroadcast(Imm);
5650    } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5651      Ok = parseSwizzleSwap(Imm);
5652    } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5653      Ok = parseSwizzleReverse(Imm);
5654    } else {
5655      Error(ModeLoc, "expected a swizzle mode");
5656    }
5657
5658    return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5659  }
5660
5661  return false;
5662}
5663
5664OperandMatchResultTy
5665AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5666  SMLoc S = Parser.getTok().getLoc();
5667  int64_t Imm = 0;
5668
5669  if (trySkipId("offset")) {
5670
5671    bool Ok = false;
5672    if (skipToken(AsmToken::Colon, "expected a colon")) {
5673      if (trySkipId("swizzle")) {
5674        Ok = parseSwizzleMacro(Imm);
5675      } else {
5676        Ok = parseSwizzleOffset(Imm);
5677      }
5678    }
5679
5680    Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5681
5682    return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5683  } else {
5684    // Swizzle "offset" operand is optional.
5685    // If it is omitted, try parsing other optional operands.
5686    return parseOptionalOpr(Operands);
5687  }
5688}
5689
5690bool
5691AMDGPUOperand::isSwizzle() const {
5692  return isImmTy(ImmTySwizzle);
5693}
5694
5695//===----------------------------------------------------------------------===//
5696// VGPR Index Mode
5697//===----------------------------------------------------------------------===//
5698
5699int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5700
5701  using namespace llvm::AMDGPU::VGPRIndexMode;
5702
5703  if (trySkipToken(AsmToken::RParen)) {
5704    return OFF;
5705  }
5706
5707  int64_t Imm = 0;
5708
5709  while (true) {
5710    unsigned Mode = 0;
5711    SMLoc S = Parser.getTok().getLoc();
5712
5713    for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5714      if (trySkipId(IdSymbolic[ModeId])) {
5715        Mode = 1 << ModeId;
5716        break;
5717      }
5718    }
5719
5720    if (Mode == 0) {
5721      Error(S, (Imm == 0)?
5722               "expected a VGPR index mode or a closing parenthesis" :
5723               "expected a VGPR index mode");
5724      break;
5725    }
5726
5727    if (Imm & Mode) {
5728      Error(S, "duplicate VGPR index mode");
5729      break;
5730    }
5731    Imm |= Mode;
5732
5733    if (trySkipToken(AsmToken::RParen))
5734      break;
5735    if (!skipToken(AsmToken::Comma,
5736                   "expected a comma or a closing parenthesis"))
5737      break;
5738  }
5739
5740  return Imm;
5741}
5742
5743OperandMatchResultTy
5744AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5745
5746  int64_t Imm = 0;
5747  SMLoc S = Parser.getTok().getLoc();
5748
5749  if (getLexer().getKind() == AsmToken::Identifier &&
5750      Parser.getTok().getString() == "gpr_idx" &&
5751      getLexer().peekTok().is(AsmToken::LParen)) {
5752
5753    Parser.Lex();
5754    Parser.Lex();
5755
5756    // If parse failed, trigger an error but do not return error code
5757    // to avoid excessive error messages.
5758    Imm = parseGPRIdxMacro();
5759
5760  } else {
5761    if (getParser().parseAbsoluteExpression(Imm))
5762      return MatchOperand_NoMatch;
5763    if (Imm < 0 || !isUInt<4>(Imm)) {
5764      Error(S, "invalid immediate: only 4-bit values are legal");
5765    }
5766  }
5767
5768  Operands.push_back(
5769      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5770  return MatchOperand_Success;
5771}
5772
5773bool AMDGPUOperand::isGPRIdxMode() const {
5774  return isImmTy(ImmTyGprIdxMode);
5775}
5776
5777//===----------------------------------------------------------------------===//
5778// sopp branch targets
5779//===----------------------------------------------------------------------===//
5780
5781OperandMatchResultTy
5782AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5783
5784  // Make sure we are not parsing something
5785  // that looks like a label or an expression but is not.
5786  // This will improve error messages.
5787  if (isRegister() || isModifier())
5788    return MatchOperand_NoMatch;
5789
5790  if (parseExpr(Operands)) {
5791
5792    AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5793    assert(Opr.isImm() || Opr.isExpr());
5794    SMLoc Loc = Opr.getStartLoc();
5795
5796    // Currently we do not support arbitrary expressions as branch targets.
5797    // Only labels and absolute expressions are accepted.
5798    if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5799      Error(Loc, "expected an absolute expression or a label");
5800    } else if (Opr.isImm() && !Opr.isS16Imm()) {
5801      Error(Loc, "expected a 16-bit signed jump offset");
5802    }
5803  }
5804
5805  return MatchOperand_Success; // avoid excessive error messages
5806}
5807
5808//===----------------------------------------------------------------------===//
5809// Boolean holding registers
5810//===----------------------------------------------------------------------===//
5811
5812OperandMatchResultTy
5813AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5814  return parseReg(Operands);
5815}
5816
5817//===----------------------------------------------------------------------===//
5818// mubuf
5819//===----------------------------------------------------------------------===//
5820
5821AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5822  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5823}
5824
5825AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5826  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5827}
5828
5829AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5830  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5831}
5832
5833void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5834                               const OperandVector &Operands,
5835                               bool IsAtomic,
5836                               bool IsAtomicReturn,
5837                               bool IsLds) {
5838  bool IsLdsOpcode = IsLds;
5839  bool HasLdsModifier = false;
5840  OptionalImmIndexMap OptionalIdx;
5841  assert(IsAtomicReturn ? IsAtomic : true);
5842  unsigned FirstOperandIdx = 1;
5843
5844  for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5845    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5846
5847    // Add the register arguments
5848    if (Op.isReg()) {
5849      Op.addRegOperands(Inst, 1);
5850      // Insert a tied src for atomic return dst.
5851      // This cannot be postponed as subsequent calls to
5852      // addImmOperands rely on correct number of MC operands.
5853      if (IsAtomicReturn && i == FirstOperandIdx)
5854        Op.addRegOperands(Inst, 1);
5855      continue;
5856    }
5857
5858    // Handle the case where soffset is an immediate
5859    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5860      Op.addImmOperands(Inst, 1);
5861      continue;
5862    }
5863
5864    HasLdsModifier |= Op.isLDS();
5865
5866    // Handle tokens like 'offen' which are sometimes hard-coded into the
5867    // asm string.  There are no MCInst operands for these.
5868    if (Op.isToken()) {
5869      continue;
5870    }
5871    assert(Op.isImm());
5872
5873    // Handle optional arguments
5874    OptionalIdx[Op.getImmTy()] = i;
5875  }
5876
5877  // This is a workaround for an llvm quirk which may result in an
5878  // incorrect instruction selection. Lds and non-lds versions of
5879  // MUBUF instructions are identical except that lds versions
5880  // have mandatory 'lds' modifier. However this modifier follows
5881  // optional modifiers and llvm asm matcher regards this 'lds'
5882  // modifier as an optional one. As a result, an lds version
5883  // of opcode may be selected even if it has no 'lds' modifier.
5884  if (IsLdsOpcode && !HasLdsModifier) {
5885    int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5886    if (NoLdsOpcode != -1) { // Got lds version - correct it.
5887      Inst.setOpcode(NoLdsOpcode);
5888      IsLdsOpcode = false;
5889    }
5890  }
5891
5892  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5893  if (!IsAtomic) { // glc is hard-coded.
5894    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5895  }
5896  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5897
5898  if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5899    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5900  }
5901
5902  if (isGFX10())
5903    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5904}
5905
5906void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5907  OptionalImmIndexMap OptionalIdx;
5908
5909  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5910    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5911
5912    // Add the register arguments
5913    if (Op.isReg()) {
5914      Op.addRegOperands(Inst, 1);
5915      continue;
5916    }
5917
5918    // Handle the case where soffset is an immediate
5919    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5920      Op.addImmOperands(Inst, 1);
5921      continue;
5922    }
5923
5924    // Handle tokens like 'offen' which are sometimes hard-coded into the
5925    // asm string.  There are no MCInst operands for these.
5926    if (Op.isToken()) {
5927      continue;
5928    }
5929    assert(Op.isImm());
5930
5931    // Handle optional arguments
5932    OptionalIdx[Op.getImmTy()] = i;
5933  }
5934
5935  addOptionalImmOperand(Inst, Operands, OptionalIdx,
5936                        AMDGPUOperand::ImmTyOffset);
5937  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5938  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5939  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5940  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5941
5942  if (isGFX10())
5943    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5944}
5945
5946//===----------------------------------------------------------------------===//
5947// mimg
5948//===----------------------------------------------------------------------===//
5949
5950void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5951                              bool IsAtomic) {
5952  unsigned I = 1;
5953  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5954  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5955    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5956  }
5957
5958  if (IsAtomic) {
5959    // Add src, same as dst
5960    assert(Desc.getNumDefs() == 1);
5961    ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5962  }
5963
5964  OptionalImmIndexMap OptionalIdx;
5965
5966  for (unsigned E = Operands.size(); I != E; ++I) {
5967    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5968
5969    // Add the register arguments
5970    if (Op.isReg()) {
5971      Op.addRegOperands(Inst, 1);
5972    } else if (Op.isImmModifier()) {
5973      OptionalIdx[Op.getImmTy()] = I;
5974    } else if (!Op.isToken()) {
5975      llvm_unreachable("unexpected operand type");
5976    }
5977  }
5978
5979  bool IsGFX10 = isGFX10();
5980
5981  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5982  if (IsGFX10)
5983    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5984  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5985  if (IsGFX10)
5986    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5987  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5988  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5989  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5990  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5991  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5992  if (!IsGFX10)
5993    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5994  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5995}
5996
5997void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5998  cvtMIMG(Inst, Operands, true);
5999}
6000
6001//===----------------------------------------------------------------------===//
6002// smrd
6003//===----------------------------------------------------------------------===//
6004
6005bool AMDGPUOperand::isSMRDOffset8() const {
6006  return isImm() && isUInt<8>(getImm());
6007}
6008
6009bool AMDGPUOperand::isSMRDOffset20() const {
6010  return isImm() && isUInt<20>(getImm());
6011}
6012
6013bool AMDGPUOperand::isSMRDLiteralOffset() const {
6014  // 32-bit literals are only supported on CI and we only want to use them
6015  // when the offset is > 8-bits.
6016  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6017}
6018
6019AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6020  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6021}
6022
6023AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6024  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6025}
6026
6027AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6028  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6029}
6030
6031AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6032  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6033}
6034
6035//===----------------------------------------------------------------------===//
6036// vop3
6037//===----------------------------------------------------------------------===//
6038
6039static bool ConvertOmodMul(int64_t &Mul) {
6040  if (Mul != 1 && Mul != 2 && Mul != 4)
6041    return false;
6042
6043  Mul >>= 1;
6044  return true;
6045}
6046
6047static bool ConvertOmodDiv(int64_t &Div) {
6048  if (Div == 1) {
6049    Div = 0;
6050    return true;
6051  }
6052
6053  if (Div == 2) {
6054    Div = 3;
6055    return true;
6056  }
6057
6058  return false;
6059}
6060
6061static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6062  if (BoundCtrl == 0) {
6063    BoundCtrl = 1;
6064    return true;
6065  }
6066
6067  if (BoundCtrl == -1) {
6068    BoundCtrl = 0;
6069    return true;
6070  }
6071
6072  return false;
6073}
6074
6075// Note: the order in this table matches the order of operands in AsmString.
6076static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6077  {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6078  {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6079  {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6080  {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6081  {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6082  {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6083  {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6084  {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6085  {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6086  {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6087  {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6088  {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6089  {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6090  {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6091  {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6092  {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6093  {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6094  {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6095  {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6096  {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6097  {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6098  {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6099  {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6100  {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6101  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6102  {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6103  {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6104  {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6105  {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6106  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6107  {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6108  {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6109  {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6110  {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6111  {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6112  {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6113  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6114  {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6115  {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6116  {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6117  {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6118  {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6119  {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6120  {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6121};
6122
6123OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6124
6125  OperandMatchResultTy res = parseOptionalOpr(Operands);
6126
6127  // This is a hack to enable hardcoded mandatory operands which follow
6128  // optional operands.
6129  //
6130  // Current design assumes that all operands after the first optional operand
6131  // are also optional. However implementation of some instructions violates
6132  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6133  //
6134  // To alleviate this problem, we have to (implicitly) parse extra operands
6135  // to make sure autogenerated parser of custom operands never hit hardcoded
6136  // mandatory operands.
6137
6138  for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6139    if (res != MatchOperand_Success ||
6140        isToken(AsmToken::EndOfStatement))
6141      break;
6142
6143    trySkipToken(AsmToken::Comma);
6144    res = parseOptionalOpr(Operands);
6145  }
6146
6147  return res;
6148}
6149
6150OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6151  OperandMatchResultTy res;
6152  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6153    // try to parse any optional operand here
6154    if (Op.IsBit) {
6155      res = parseNamedBit(Op.Name, Operands, Op.Type);
6156    } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6157      res = parseOModOperand(Operands);
6158    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6159               Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6160               Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6161      res = parseSDWASel(Operands, Op.Name, Op.Type);
6162    } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6163      res = parseSDWADstUnused(Operands);
6164    } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6165               Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6166               Op.Type == AMDGPUOperand::ImmTyNegLo ||
6167               Op.Type == AMDGPUOperand::ImmTyNegHi) {
6168      res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6169                                        Op.ConvertResult);
6170    } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6171      res = parseDim(Operands);
6172    } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6173      res = parseDfmtNfmt(Operands);
6174    } else {
6175      res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6176    }
6177    if (res != MatchOperand_NoMatch) {
6178      return res;
6179    }
6180  }
6181  return MatchOperand_NoMatch;
6182}
6183
6184OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6185  StringRef Name = Parser.getTok().getString();
6186  if (Name == "mul") {
6187    return parseIntWithPrefix("mul", Operands,
6188                              AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6189  }
6190
6191  if (Name == "div") {
6192    return parseIntWithPrefix("div", Operands,
6193                              AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6194  }
6195
6196  return MatchOperand_NoMatch;
6197}
6198
6199void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6200  cvtVOP3P(Inst, Operands);
6201
6202  int Opc = Inst.getOpcode();
6203
6204  int SrcNum;
6205  const int Ops[] = { AMDGPU::OpName::src0,
6206                      AMDGPU::OpName::src1,
6207                      AMDGPU::OpName::src2 };
6208  for (SrcNum = 0;
6209       SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6210       ++SrcNum);
6211  assert(SrcNum > 0);
6212
6213  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6214  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6215
6216  if ((OpSel & (1 << SrcNum)) != 0) {
6217    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6218    uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6219    Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6220  }
6221}
6222
6223static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6224      // 1. This operand is input modifiers
6225  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6226      // 2. This is not last operand
6227      && Desc.NumOperands > (OpNum + 1)
6228      // 3. Next operand is register class
6229      && Desc.OpInfo[OpNum + 1].RegClass != -1
6230      // 4. Next register is not tied to any other operand
6231      && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6232}
6233
6234void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6235{
6236  OptionalImmIndexMap OptionalIdx;
6237  unsigned Opc = Inst.getOpcode();
6238
6239  unsigned I = 1;
6240  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6241  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6242    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6243  }
6244
6245  for (unsigned E = Operands.size(); I != E; ++I) {
6246    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6247    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6248      Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6249    } else if (Op.isInterpSlot() ||
6250               Op.isInterpAttr() ||
6251               Op.isAttrChan()) {
6252      Inst.addOperand(MCOperand::createImm(Op.getImm()));
6253    } else if (Op.isImmModifier()) {
6254      OptionalIdx[Op.getImmTy()] = I;
6255    } else {
6256      llvm_unreachable("unhandled operand type");
6257    }
6258  }
6259
6260  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6261    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6262  }
6263
6264  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6265    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6266  }
6267
6268  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6269    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6270  }
6271}
6272
6273void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6274                              OptionalImmIndexMap &OptionalIdx) {
6275  unsigned Opc = Inst.getOpcode();
6276
6277  unsigned I = 1;
6278  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6279  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6280    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6281  }
6282
6283  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6284    // This instruction has src modifiers
6285    for (unsigned E = Operands.size(); I != E; ++I) {
6286      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6287      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6288        Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6289      } else if (Op.isImmModifier()) {
6290        OptionalIdx[Op.getImmTy()] = I;
6291      } else if (Op.isRegOrImm()) {
6292        Op.addRegOrImmOperands(Inst, 1);
6293      } else {
6294        llvm_unreachable("unhandled operand type");
6295      }
6296    }
6297  } else {
6298    // No src modifiers
6299    for (unsigned E = Operands.size(); I != E; ++I) {
6300      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6301      if (Op.isMod()) {
6302        OptionalIdx[Op.getImmTy()] = I;
6303      } else {
6304        Op.addRegOrImmOperands(Inst, 1);
6305      }
6306    }
6307  }
6308
6309  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6310    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6311  }
6312
6313  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6314    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6315  }
6316
6317  // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6318  // it has src2 register operand that is tied to dst operand
6319  // we don't allow modifiers for this operand in assembler so src2_modifiers
6320  // should be 0.
6321  if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6322      Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6323      Opc == AMDGPU::V_MAC_F32_e64_vi ||
6324      Opc == AMDGPU::V_MAC_F16_e64_vi ||
6325      Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6326      Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6327      Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6328    auto it = Inst.begin();
6329    std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6330    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6331    ++it;
6332    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6333  }
6334}
6335
6336void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6337  OptionalImmIndexMap OptionalIdx;
6338  cvtVOP3(Inst, Operands, OptionalIdx);
6339}
6340
6341void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6342                               const OperandVector &Operands) {
6343  OptionalImmIndexMap OptIdx;
6344  const int Opc = Inst.getOpcode();
6345  const MCInstrDesc &Desc = MII.get(Opc);
6346
6347  const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6348
6349  cvtVOP3(Inst, Operands, OptIdx);
6350
6351  if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6352    assert(!IsPacked);
6353    Inst.addOperand(Inst.getOperand(0));
6354  }
6355
6356  // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6357  // instruction, and then figure out where to actually put the modifiers
6358
6359  addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6360
6361  int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6362  if (OpSelHiIdx != -1) {
6363    int DefaultVal = IsPacked ? -1 : 0;
6364    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6365                          DefaultVal);
6366  }
6367
6368  int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6369  if (NegLoIdx != -1) {
6370    assert(IsPacked);
6371    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6372    addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6373  }
6374
6375  const int Ops[] = { AMDGPU::OpName::src0,
6376                      AMDGPU::OpName::src1,
6377                      AMDGPU::OpName::src2 };
6378  const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6379                         AMDGPU::OpName::src1_modifiers,
6380                         AMDGPU::OpName::src2_modifiers };
6381
6382  int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6383
6384  unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6385  unsigned OpSelHi = 0;
6386  unsigned NegLo = 0;
6387  unsigned NegHi = 0;
6388
6389  if (OpSelHiIdx != -1) {
6390    OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6391  }
6392
6393  if (NegLoIdx != -1) {
6394    int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6395    NegLo = Inst.getOperand(NegLoIdx).getImm();
6396    NegHi = Inst.getOperand(NegHiIdx).getImm();
6397  }
6398
6399  for (int J = 0; J < 3; ++J) {
6400    int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6401    if (OpIdx == -1)
6402      break;
6403
6404    uint32_t ModVal = 0;
6405
6406    if ((OpSel & (1 << J)) != 0)
6407      ModVal |= SISrcMods::OP_SEL_0;
6408
6409    if ((OpSelHi & (1 << J)) != 0)
6410      ModVal |= SISrcMods::OP_SEL_1;
6411
6412    if ((NegLo & (1 << J)) != 0)
6413      ModVal |= SISrcMods::NEG;
6414
6415    if ((NegHi & (1 << J)) != 0)
6416      ModVal |= SISrcMods::NEG_HI;
6417
6418    int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6419
6420    Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6421  }
6422}
6423
6424//===----------------------------------------------------------------------===//
6425// dpp
6426//===----------------------------------------------------------------------===//
6427
6428bool AMDGPUOperand::isDPP8() const {
6429  return isImmTy(ImmTyDPP8);
6430}
6431
6432bool AMDGPUOperand::isDPPCtrl() const {
6433  using namespace AMDGPU::DPP;
6434
6435  bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6436  if (result) {
6437    int64_t Imm = getImm();
6438    return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6439           (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6440           (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6441           (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6442           (Imm == DppCtrl::WAVE_SHL1) ||
6443           (Imm == DppCtrl::WAVE_ROL1) ||
6444           (Imm == DppCtrl::WAVE_SHR1) ||
6445           (Imm == DppCtrl::WAVE_ROR1) ||
6446           (Imm == DppCtrl::ROW_MIRROR) ||
6447           (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6448           (Imm == DppCtrl::BCAST15) ||
6449           (Imm == DppCtrl::BCAST31) ||
6450           (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6451           (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6452  }
6453  return false;
6454}
6455
6456//===----------------------------------------------------------------------===//
6457// mAI
6458//===----------------------------------------------------------------------===//
6459
6460bool AMDGPUOperand::isBLGP() const {
6461  return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6462}
6463
6464bool AMDGPUOperand::isCBSZ() const {
6465  return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6466}
6467
6468bool AMDGPUOperand::isABID() const {
6469  return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6470}
6471
6472bool AMDGPUOperand::isS16Imm() const {
6473  return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6474}
6475
6476bool AMDGPUOperand::isU16Imm() const {
6477  return isImm() && isUInt<16>(getImm());
6478}
6479
6480OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6481  if (!isGFX10())
6482    return MatchOperand_NoMatch;
6483
6484  SMLoc S = Parser.getTok().getLoc();
6485
6486  if (getLexer().isNot(AsmToken::Identifier))
6487    return MatchOperand_NoMatch;
6488  if (getLexer().getTok().getString() != "dim")
6489    return MatchOperand_NoMatch;
6490
6491  Parser.Lex();
6492  if (getLexer().isNot(AsmToken::Colon))
6493    return MatchOperand_ParseFail;
6494
6495  Parser.Lex();
6496
6497  // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6498  // integer.
6499  std::string Token;
6500  if (getLexer().is(AsmToken::Integer)) {
6501    SMLoc Loc = getLexer().getTok().getEndLoc();
6502    Token = getLexer().getTok().getString();
6503    Parser.Lex();
6504    if (getLexer().getTok().getLoc() != Loc)
6505      return MatchOperand_ParseFail;
6506  }
6507  if (getLexer().isNot(AsmToken::Identifier))
6508    return MatchOperand_ParseFail;
6509  Token += getLexer().getTok().getString();
6510
6511  StringRef DimId = Token;
6512  if (DimId.startswith("SQ_RSRC_IMG_"))
6513    DimId = DimId.substr(12);
6514
6515  const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6516  if (!DimInfo)
6517    return MatchOperand_ParseFail;
6518
6519  Parser.Lex();
6520
6521  Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6522                                              AMDGPUOperand::ImmTyDim));
6523  return MatchOperand_Success;
6524}
6525
6526OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6527  SMLoc S = Parser.getTok().getLoc();
6528  StringRef Prefix;
6529
6530  if (getLexer().getKind() == AsmToken::Identifier) {
6531    Prefix = Parser.getTok().getString();
6532  } else {
6533    return MatchOperand_NoMatch;
6534  }
6535
6536  if (Prefix != "dpp8")
6537    return parseDPPCtrl(Operands);
6538  if (!isGFX10())
6539    return MatchOperand_NoMatch;
6540
6541  // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6542
6543  int64_t Sels[8];
6544
6545  Parser.Lex();
6546  if (getLexer().isNot(AsmToken::Colon))
6547    return MatchOperand_ParseFail;
6548
6549  Parser.Lex();
6550  if (getLexer().isNot(AsmToken::LBrac))
6551    return MatchOperand_ParseFail;
6552
6553  Parser.Lex();
6554  if (getParser().parseAbsoluteExpression(Sels[0]))
6555    return MatchOperand_ParseFail;
6556  if (0 > Sels[0] || 7 < Sels[0])
6557    return MatchOperand_ParseFail;
6558
6559  for (size_t i = 1; i < 8; ++i) {
6560    if (getLexer().isNot(AsmToken::Comma))
6561      return MatchOperand_ParseFail;
6562
6563    Parser.Lex();
6564    if (getParser().parseAbsoluteExpression(Sels[i]))
6565      return MatchOperand_ParseFail;
6566    if (0 > Sels[i] || 7 < Sels[i])
6567      return MatchOperand_ParseFail;
6568  }
6569
6570  if (getLexer().isNot(AsmToken::RBrac))
6571    return MatchOperand_ParseFail;
6572  Parser.Lex();
6573
6574  unsigned DPP8 = 0;
6575  for (size_t i = 0; i < 8; ++i)
6576    DPP8 |= (Sels[i] << (i * 3));
6577
6578  Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6579  return MatchOperand_Success;
6580}
6581
6582OperandMatchResultTy
6583AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6584  using namespace AMDGPU::DPP;
6585
6586  SMLoc S = Parser.getTok().getLoc();
6587  StringRef Prefix;
6588  int64_t Int;
6589
6590  if (getLexer().getKind() == AsmToken::Identifier) {
6591    Prefix = Parser.getTok().getString();
6592  } else {
6593    return MatchOperand_NoMatch;
6594  }
6595
6596  if (Prefix == "row_mirror") {
6597    Int = DppCtrl::ROW_MIRROR;
6598    Parser.Lex();
6599  } else if (Prefix == "row_half_mirror") {
6600    Int = DppCtrl::ROW_HALF_MIRROR;
6601    Parser.Lex();
6602  } else {
6603    // Check to prevent parseDPPCtrlOps from eating invalid tokens
6604    if (Prefix != "quad_perm"
6605        && Prefix != "row_shl"
6606        && Prefix != "row_shr"
6607        && Prefix != "row_ror"
6608        && Prefix != "wave_shl"
6609        && Prefix != "wave_rol"
6610        && Prefix != "wave_shr"
6611        && Prefix != "wave_ror"
6612        && Prefix != "row_bcast"
6613        && Prefix != "row_share"
6614        && Prefix != "row_xmask") {
6615      return MatchOperand_NoMatch;
6616    }
6617
6618    if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6619      return MatchOperand_NoMatch;
6620
6621    if (!isVI() && !isGFX9() &&
6622        (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6623         Prefix == "wave_rol" || Prefix == "wave_ror" ||
6624         Prefix == "row_bcast"))
6625      return MatchOperand_NoMatch;
6626
6627    Parser.Lex();
6628    if (getLexer().isNot(AsmToken::Colon))
6629      return MatchOperand_ParseFail;
6630
6631    if (Prefix == "quad_perm") {
6632      // quad_perm:[%d,%d,%d,%d]
6633      Parser.Lex();
6634      if (getLexer().isNot(AsmToken::LBrac))
6635        return MatchOperand_ParseFail;
6636      Parser.Lex();
6637
6638      if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6639        return MatchOperand_ParseFail;
6640
6641      for (int i = 0; i < 3; ++i) {
6642        if (getLexer().isNot(AsmToken::Comma))
6643          return MatchOperand_ParseFail;
6644        Parser.Lex();
6645
6646        int64_t Temp;
6647        if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6648          return MatchOperand_ParseFail;
6649        const int shift = i*2 + 2;
6650        Int += (Temp << shift);
6651      }
6652
6653      if (getLexer().isNot(AsmToken::RBrac))
6654        return MatchOperand_ParseFail;
6655      Parser.Lex();
6656    } else {
6657      // sel:%d
6658      Parser.Lex();
6659      if (getParser().parseAbsoluteExpression(Int))
6660        return MatchOperand_ParseFail;
6661
6662      if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6663        Int |= DppCtrl::ROW_SHL0;
6664      } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6665        Int |= DppCtrl::ROW_SHR0;
6666      } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6667        Int |= DppCtrl::ROW_ROR0;
6668      } else if (Prefix == "wave_shl" && 1 == Int) {
6669        Int = DppCtrl::WAVE_SHL1;
6670      } else if (Prefix == "wave_rol" && 1 == Int) {
6671        Int = DppCtrl::WAVE_ROL1;
6672      } else if (Prefix == "wave_shr" && 1 == Int) {
6673        Int = DppCtrl::WAVE_SHR1;
6674      } else if (Prefix == "wave_ror" && 1 == Int) {
6675        Int = DppCtrl::WAVE_ROR1;
6676      } else if (Prefix == "row_bcast") {
6677        if (Int == 15) {
6678          Int = DppCtrl::BCAST15;
6679        } else if (Int == 31) {
6680          Int = DppCtrl::BCAST31;
6681        } else {
6682          return MatchOperand_ParseFail;
6683        }
6684      } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6685        Int |= DppCtrl::ROW_SHARE_FIRST;
6686      } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6687        Int |= DppCtrl::ROW_XMASK_FIRST;
6688      } else {
6689        return MatchOperand_ParseFail;
6690      }
6691    }
6692  }
6693
6694  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6695  return MatchOperand_Success;
6696}
6697
6698AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6699  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6700}
6701
6702AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6703  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6704}
6705
6706AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6707  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6708}
6709
6710AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6711  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6712}
6713
6714AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6715  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6716}
6717
6718void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6719  OptionalImmIndexMap OptionalIdx;
6720
6721  unsigned I = 1;
6722  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6723  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6724    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6725  }
6726
6727  int Fi = 0;
6728  for (unsigned E = Operands.size(); I != E; ++I) {
6729    auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6730                                            MCOI::TIED_TO);
6731    if (TiedTo != -1) {
6732      assert((unsigned)TiedTo < Inst.getNumOperands());
6733      // handle tied old or src2 for MAC instructions
6734      Inst.addOperand(Inst.getOperand(TiedTo));
6735    }
6736    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6737    // Add the register arguments
6738    if (Op.isReg() && validateVccOperand(Op.getReg())) {
6739      // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6740      // Skip it.
6741      continue;
6742    }
6743
6744    if (IsDPP8) {
6745      if (Op.isDPP8()) {
6746        Op.addImmOperands(Inst, 1);
6747      } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6748        Op.addRegWithFPInputModsOperands(Inst, 2);
6749      } else if (Op.isFI()) {
6750        Fi = Op.getImm();
6751      } else if (Op.isReg()) {
6752        Op.addRegOperands(Inst, 1);
6753      } else {
6754        llvm_unreachable("Invalid operand type");
6755      }
6756    } else {
6757      if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6758        Op.addRegWithFPInputModsOperands(Inst, 2);
6759      } else if (Op.isDPPCtrl()) {
6760        Op.addImmOperands(Inst, 1);
6761      } else if (Op.isImm()) {
6762        // Handle optional arguments
6763        OptionalIdx[Op.getImmTy()] = I;
6764      } else {
6765        llvm_unreachable("Invalid operand type");
6766      }
6767    }
6768  }
6769
6770  if (IsDPP8) {
6771    using namespace llvm::AMDGPU::DPP;
6772    Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6773  } else {
6774    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6775    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6776    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6777    if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6778      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6779    }
6780  }
6781}
6782
6783//===----------------------------------------------------------------------===//
6784// sdwa
6785//===----------------------------------------------------------------------===//
6786
6787OperandMatchResultTy
6788AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6789                              AMDGPUOperand::ImmTy Type) {
6790  using namespace llvm::AMDGPU::SDWA;
6791
6792  SMLoc S = Parser.getTok().getLoc();
6793  StringRef Value;
6794  OperandMatchResultTy res;
6795
6796  res = parseStringWithPrefix(Prefix, Value);
6797  if (res != MatchOperand_Success) {
6798    return res;
6799  }
6800
6801  int64_t Int;
6802  Int = StringSwitch<int64_t>(Value)
6803        .Case("BYTE_0", SdwaSel::BYTE_0)
6804        .Case("BYTE_1", SdwaSel::BYTE_1)
6805        .Case("BYTE_2", SdwaSel::BYTE_2)
6806        .Case("BYTE_3", SdwaSel::BYTE_3)
6807        .Case("WORD_0", SdwaSel::WORD_0)
6808        .Case("WORD_1", SdwaSel::WORD_1)
6809        .Case("DWORD", SdwaSel::DWORD)
6810        .Default(0xffffffff);
6811  Parser.Lex(); // eat last token
6812
6813  if (Int == 0xffffffff) {
6814    return MatchOperand_ParseFail;
6815  }
6816
6817  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6818  return MatchOperand_Success;
6819}
6820
6821OperandMatchResultTy
6822AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6823  using namespace llvm::AMDGPU::SDWA;
6824
6825  SMLoc S = Parser.getTok().getLoc();
6826  StringRef Value;
6827  OperandMatchResultTy res;
6828
6829  res = parseStringWithPrefix("dst_unused", Value);
6830  if (res != MatchOperand_Success) {
6831    return res;
6832  }
6833
6834  int64_t Int;
6835  Int = StringSwitch<int64_t>(Value)
6836        .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6837        .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6838        .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6839        .Default(0xffffffff);
6840  Parser.Lex(); // eat last token
6841
6842  if (Int == 0xffffffff) {
6843    return MatchOperand_ParseFail;
6844  }
6845
6846  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6847  return MatchOperand_Success;
6848}
6849
6850void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6851  cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6852}
6853
6854void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6855  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6856}
6857
6858void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6859  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6860}
6861
6862void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6863  cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6864}
6865
6866void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6867  cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6868}
6869
6870void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6871                              uint64_t BasicInstType,
6872                              bool SkipDstVcc,
6873                              bool SkipSrcVcc) {
6874  using namespace llvm::AMDGPU::SDWA;
6875
6876  OptionalImmIndexMap OptionalIdx;
6877  bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6878  bool SkippedVcc = false;
6879
6880  unsigned I = 1;
6881  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6882  for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6883    ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6884  }
6885
6886  for (unsigned E = Operands.size(); I != E; ++I) {
6887    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6888    if (SkipVcc && !SkippedVcc && Op.isReg() &&
6889        (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6890      // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6891      // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6892      // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6893      // Skip VCC only if we didn't skip it on previous iteration.
6894      // Note that src0 and src1 occupy 2 slots each because of modifiers.
6895      if (BasicInstType == SIInstrFlags::VOP2 &&
6896          ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6897           (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6898        SkippedVcc = true;
6899        continue;
6900      } else if (BasicInstType == SIInstrFlags::VOPC &&
6901                 Inst.getNumOperands() == 0) {
6902        SkippedVcc = true;
6903        continue;
6904      }
6905    }
6906    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6907      Op.addRegOrImmWithInputModsOperands(Inst, 2);
6908    } else if (Op.isImm()) {
6909      // Handle optional arguments
6910      OptionalIdx[Op.getImmTy()] = I;
6911    } else {
6912      llvm_unreachable("Invalid operand type");
6913    }
6914    SkippedVcc = false;
6915  }
6916
6917  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6918      Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6919      Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6920    // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6921    switch (BasicInstType) {
6922    case SIInstrFlags::VOP1:
6923      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6924      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6925        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6926      }
6927      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6928      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6929      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6930      break;
6931
6932    case SIInstrFlags::VOP2:
6933      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6934      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6935        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6936      }
6937      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6938      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6939      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6940      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6941      break;
6942
6943    case SIInstrFlags::VOPC:
6944      if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6945        addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6946      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6947      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6948      break;
6949
6950    default:
6951      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6952    }
6953  }
6954
6955  // special case v_mac_{f16, f32}:
6956  // it has src2 register operand that is tied to dst operand
6957  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6958      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6959    auto it = Inst.begin();
6960    std::advance(
6961      it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6962    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6963  }
6964}
6965
6966//===----------------------------------------------------------------------===//
6967// mAI
6968//===----------------------------------------------------------------------===//
6969
6970AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6971  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6972}
6973
6974AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6975  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6976}
6977
6978AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6979  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6980}
6981
6982/// Force static initialization.
6983extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
6984  RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6985  RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6986}
6987
6988#define GET_REGISTER_MATCHER
6989#define GET_MATCHER_IMPLEMENTATION
6990#define GET_MNEMONIC_SPELL_CHECKER
6991#include "AMDGPUGenAsmMatcher.inc"
6992
6993// This fuction should be defined after auto-generated include so that we have
6994// MatchClassKind enum defined
6995unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6996                                                     unsigned Kind) {
6997  // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6998  // But MatchInstructionImpl() expects to meet token and fails to validate
6999  // operand. This method checks if we are given immediate operand but expect to
7000  // get corresponding token.
7001  AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7002  switch (Kind) {
7003  case MCK_addr64:
7004    return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7005  case MCK_gds:
7006    return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7007  case MCK_lds:
7008    return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7009  case MCK_glc:
7010    return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7011  case MCK_idxen:
7012    return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7013  case MCK_offen:
7014    return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7015  case MCK_SSrcB32:
7016    // When operands have expression values, they will return true for isToken,
7017    // because it is not possible to distinguish between a token and an
7018    // expression at parse time. MatchInstructionImpl() will always try to
7019    // match an operand as a token, when isToken returns true, and when the
7020    // name of the expression is not a valid token, the match will fail,
7021    // so we need to handle it here.
7022    return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7023  case MCK_SSrcF32:
7024    return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7025  case MCK_SoppBrTarget:
7026    return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7027  case MCK_VReg32OrOff:
7028    return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7029  case MCK_InterpSlot:
7030    return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7031  case MCK_Attr:
7032    return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7033  case MCK_AttrChan:
7034    return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7035  case MCK_SReg_64:
7036  case MCK_SReg_64_XEXEC:
7037    // Null is defined as a 32-bit register but
7038    // it should also be enabled with 64-bit operands.
7039    // The following code enables it for SReg_64 operands
7040    // used as source and destination. Remaining source
7041    // operands are handled in isInlinableImm.
7042    return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7043  default:
7044    return Match_InvalidOperand;
7045  }
7046}
7047
7048//===----------------------------------------------------------------------===//
7049// endpgm
7050//===----------------------------------------------------------------------===//
7051
7052OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7053  SMLoc S = Parser.getTok().getLoc();
7054  int64_t Imm = 0;
7055
7056  if (!parseExpr(Imm)) {
7057    // The operand is optional, if not present default to 0
7058    Imm = 0;
7059  }
7060
7061  if (!isUInt<16>(Imm)) {
7062    Error(S, "expected a 16-bit value");
7063    return MatchOperand_ParseFail;
7064  }
7065
7066  Operands.push_back(
7067      AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7068  return MatchOperand_Success;
7069}
7070
7071bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7072