AMDGPUAsmParser.cpp revision 344779
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "AMDGPU.h" 11#include "AMDKernelCodeT.h" 12#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 13#include "MCTargetDesc/AMDGPUTargetStreamer.h" 14#include "SIDefines.h" 15#include "SIInstrInfo.h" 16#include "Utils/AMDGPUAsmUtils.h" 17#include "Utils/AMDGPUBaseInfo.h" 18#include "Utils/AMDKernelCodeTUtils.h" 19#include "llvm/ADT/APFloat.h" 20#include "llvm/ADT/APInt.h" 21#include "llvm/ADT/ArrayRef.h" 22#include "llvm/ADT/STLExtras.h" 23#include "llvm/ADT/SmallBitVector.h" 24#include "llvm/ADT/SmallString.h" 25#include "llvm/ADT/StringRef.h" 26#include "llvm/ADT/StringSwitch.h" 27#include "llvm/ADT/Twine.h" 28#include "llvm/BinaryFormat/ELF.h" 29#include "llvm/MC/MCAsmInfo.h" 30#include "llvm/MC/MCContext.h" 31#include "llvm/MC/MCExpr.h" 32#include "llvm/MC/MCInst.h" 33#include "llvm/MC/MCInstrDesc.h" 34#include "llvm/MC/MCInstrInfo.h" 35#include "llvm/MC/MCParser/MCAsmLexer.h" 36#include "llvm/MC/MCParser/MCAsmParser.h" 37#include "llvm/MC/MCParser/MCAsmParserExtension.h" 38#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39#include "llvm/MC/MCParser/MCTargetAsmParser.h" 40#include "llvm/MC/MCRegisterInfo.h" 41#include "llvm/MC/MCStreamer.h" 42#include "llvm/MC/MCSubtargetInfo.h" 43#include "llvm/MC/MCSymbol.h" 44#include "llvm/Support/AMDGPUMetadata.h" 45#include "llvm/Support/AMDHSAKernelDescriptor.h" 46#include "llvm/Support/Casting.h" 47#include "llvm/Support/Compiler.h" 48#include "llvm/Support/ErrorHandling.h" 49#include "llvm/Support/MachineValueType.h" 50#include "llvm/Support/MathExtras.h" 51#include "llvm/Support/SMLoc.h" 52#include "llvm/Support/TargetParser.h" 53#include "llvm/Support/TargetRegistry.h" 54#include "llvm/Support/raw_ostream.h" 55#include <algorithm> 56#include <cassert> 57#include <cstdint> 58#include <cstring> 59#include <iterator> 60#include <map> 61#include <memory> 62#include <string> 63 64using namespace llvm; 65using namespace llvm::AMDGPU; 66using namespace llvm::amdhsa; 67 68namespace { 69 70class AMDGPUAsmParser; 71 72enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 73 74//===----------------------------------------------------------------------===// 75// Operand 76//===----------------------------------------------------------------------===// 77 78class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0; 107 Operand |= Neg ? SISrcMods::NEG : 0; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyGLC, 144 ImmTySLC, 145 ImmTyTFE, 146 ImmTyD16, 147 ImmTyClampSI, 148 ImmTyOModSI, 149 ImmTyDppCtrl, 150 ImmTyDppRowMask, 151 ImmTyDppBankMask, 152 ImmTyDppBoundCtrl, 153 ImmTySdwaDstSel, 154 ImmTySdwaSrc0Sel, 155 ImmTySdwaSrc1Sel, 156 ImmTySdwaDstUnused, 157 ImmTyDMask, 158 ImmTyUNorm, 159 ImmTyDA, 160 ImmTyR128A16, 161 ImmTyLWE, 162 ImmTyExpTgt, 163 ImmTyExpCompr, 164 ImmTyExpVM, 165 ImmTyFORMAT, 166 ImmTyHwreg, 167 ImmTyOff, 168 ImmTySendMsg, 169 ImmTyInterpSlot, 170 ImmTyInterpAttr, 171 ImmTyAttrChan, 172 ImmTyOpSel, 173 ImmTyOpSelHi, 174 ImmTyNegLo, 175 ImmTyNegHi, 176 ImmTySwizzle, 177 ImmTyHigh 178 }; 179 180 struct TokOp { 181 const char *Data; 182 unsigned Length; 183 }; 184 185 struct ImmOp { 186 int64_t Val; 187 ImmTy Type; 188 bool IsFPImm; 189 Modifiers Mods; 190 }; 191 192 struct RegOp { 193 unsigned RegNo; 194 bool IsForcedVOP3; 195 Modifiers Mods; 196 }; 197 198 union { 199 TokOp Tok; 200 ImmOp Imm; 201 RegOp Reg; 202 const MCExpr *Expr; 203 }; 204 205 bool isToken() const override { 206 if (Kind == Token) 207 return true; 208 209 if (Kind != Expression || !Expr) 210 return false; 211 212 // When parsing operands, we can't always tell if something was meant to be 213 // a token, like 'gds', or an expression that references a global variable. 214 // In this case, we assume the string is an expression, and if we need to 215 // interpret is a token, then we treat the symbol name as the token. 216 return isa<MCSymbolRefExpr>(Expr); 217 } 218 219 bool isImm() const override { 220 return Kind == Immediate; 221 } 222 223 bool isInlinableImm(MVT type) const; 224 bool isLiteralImm(MVT type) const; 225 226 bool isRegKind() const { 227 return Kind == Register; 228 } 229 230 bool isReg() const override { 231 return isRegKind() && !hasModifiers(); 232 } 233 234 bool isRegOrImmWithInputMods(MVT type) const { 235 return isRegKind() || isInlinableImm(type); 236 } 237 238 bool isRegOrImmWithInt16InputMods() const { 239 return isRegOrImmWithInputMods(MVT::i16); 240 } 241 242 bool isRegOrImmWithInt32InputMods() const { 243 return isRegOrImmWithInputMods(MVT::i32); 244 } 245 246 bool isRegOrImmWithInt64InputMods() const { 247 return isRegOrImmWithInputMods(MVT::i64); 248 } 249 250 bool isRegOrImmWithFP16InputMods() const { 251 return isRegOrImmWithInputMods(MVT::f16); 252 } 253 254 bool isRegOrImmWithFP32InputMods() const { 255 return isRegOrImmWithInputMods(MVT::f32); 256 } 257 258 bool isRegOrImmWithFP64InputMods() const { 259 return isRegOrImmWithInputMods(MVT::f64); 260 } 261 262 bool isVReg() const { 263 return isRegClass(AMDGPU::VGPR_32RegClassID) || 264 isRegClass(AMDGPU::VReg_64RegClassID) || 265 isRegClass(AMDGPU::VReg_96RegClassID) || 266 isRegClass(AMDGPU::VReg_128RegClassID) || 267 isRegClass(AMDGPU::VReg_256RegClassID) || 268 isRegClass(AMDGPU::VReg_512RegClassID); 269 } 270 271 bool isVReg32OrOff() const { 272 return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID); 273 } 274 275 bool isSDWAOperand(MVT type) const; 276 bool isSDWAFP16Operand() const; 277 bool isSDWAFP32Operand() const; 278 bool isSDWAInt16Operand() const; 279 bool isSDWAInt32Operand() const; 280 281 bool isImmTy(ImmTy ImmT) const { 282 return isImm() && Imm.Type == ImmT; 283 } 284 285 bool isImmModifier() const { 286 return isImm() && Imm.Type != ImmTyNone; 287 } 288 289 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 290 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 291 bool isDMask() const { return isImmTy(ImmTyDMask); } 292 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 293 bool isDA() const { return isImmTy(ImmTyDA); } 294 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 295 bool isLWE() const { return isImmTy(ImmTyLWE); } 296 bool isOff() const { return isImmTy(ImmTyOff); } 297 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 298 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 299 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 300 bool isOffen() const { return isImmTy(ImmTyOffen); } 301 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 302 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 303 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 304 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } 305 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 306 307 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 308 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 309 bool isGDS() const { return isImmTy(ImmTyGDS); } 310 bool isLDS() const { return isImmTy(ImmTyLDS); } 311 bool isGLC() const { return isImmTy(ImmTyGLC); } 312 bool isSLC() const { return isImmTy(ImmTySLC); } 313 bool isTFE() const { return isImmTy(ImmTyTFE); } 314 bool isD16() const { return isImmTy(ImmTyD16); } 315 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 316 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 317 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 318 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 319 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 320 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 321 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 322 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 323 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 324 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 325 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 326 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 327 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 328 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 329 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 330 bool isHigh() const { return isImmTy(ImmTyHigh); } 331 332 bool isMod() const { 333 return isClampSI() || isOModSI(); 334 } 335 336 bool isRegOrImm() const { 337 return isReg() || isImm(); 338 } 339 340 bool isRegClass(unsigned RCID) const; 341 342 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 343 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 344 } 345 346 bool isSCSrcB16() const { 347 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 348 } 349 350 bool isSCSrcV2B16() const { 351 return isSCSrcB16(); 352 } 353 354 bool isSCSrcB32() const { 355 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 356 } 357 358 bool isSCSrcB64() const { 359 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 360 } 361 362 bool isSCSrcF16() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 364 } 365 366 bool isSCSrcV2F16() const { 367 return isSCSrcF16(); 368 } 369 370 bool isSCSrcF32() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 372 } 373 374 bool isSCSrcF64() const { 375 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 376 } 377 378 bool isSSrcB32() const { 379 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 380 } 381 382 bool isSSrcB16() const { 383 return isSCSrcB16() || isLiteralImm(MVT::i16); 384 } 385 386 bool isSSrcV2B16() const { 387 llvm_unreachable("cannot happen"); 388 return isSSrcB16(); 389 } 390 391 bool isSSrcB64() const { 392 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 393 // See isVSrc64(). 394 return isSCSrcB64() || isLiteralImm(MVT::i64); 395 } 396 397 bool isSSrcF32() const { 398 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 399 } 400 401 bool isSSrcF64() const { 402 return isSCSrcB64() || isLiteralImm(MVT::f64); 403 } 404 405 bool isSSrcF16() const { 406 return isSCSrcB16() || isLiteralImm(MVT::f16); 407 } 408 409 bool isSSrcV2F16() const { 410 llvm_unreachable("cannot happen"); 411 return isSSrcF16(); 412 } 413 414 bool isVCSrcB32() const { 415 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 416 } 417 418 bool isVCSrcB64() const { 419 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 420 } 421 422 bool isVCSrcB16() const { 423 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 424 } 425 426 bool isVCSrcV2B16() const { 427 return isVCSrcB16(); 428 } 429 430 bool isVCSrcF32() const { 431 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 432 } 433 434 bool isVCSrcF64() const { 435 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 436 } 437 438 bool isVCSrcF16() const { 439 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 440 } 441 442 bool isVCSrcV2F16() const { 443 return isVCSrcF16(); 444 } 445 446 bool isVSrcB32() const { 447 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 448 } 449 450 bool isVSrcB64() const { 451 return isVCSrcF64() || isLiteralImm(MVT::i64); 452 } 453 454 bool isVSrcB16() const { 455 return isVCSrcF16() || isLiteralImm(MVT::i16); 456 } 457 458 bool isVSrcV2B16() const { 459 llvm_unreachable("cannot happen"); 460 return isVSrcB16(); 461 } 462 463 bool isVSrcF32() const { 464 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 465 } 466 467 bool isVSrcF64() const { 468 return isVCSrcF64() || isLiteralImm(MVT::f64); 469 } 470 471 bool isVSrcF16() const { 472 return isVCSrcF16() || isLiteralImm(MVT::f16); 473 } 474 475 bool isVSrcV2F16() const { 476 llvm_unreachable("cannot happen"); 477 return isVSrcF16(); 478 } 479 480 bool isKImmFP32() const { 481 return isLiteralImm(MVT::f32); 482 } 483 484 bool isKImmFP16() const { 485 return isLiteralImm(MVT::f16); 486 } 487 488 bool isMem() const override { 489 return false; 490 } 491 492 bool isExpr() const { 493 return Kind == Expression; 494 } 495 496 bool isSoppBrTarget() const { 497 return isExpr() || isImm(); 498 } 499 500 bool isSWaitCnt() const; 501 bool isHwreg() const; 502 bool isSendMsg() const; 503 bool isSwizzle() const; 504 bool isSMRDOffset8() const; 505 bool isSMRDOffset20() const; 506 bool isSMRDLiteralOffset() const; 507 bool isDPPCtrl() const; 508 bool isGPRIdxMode() const; 509 bool isS16Imm() const; 510 bool isU16Imm() const; 511 512 StringRef getExpressionAsToken() const { 513 assert(isExpr()); 514 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 515 return S->getSymbol().getName(); 516 } 517 518 StringRef getToken() const { 519 assert(isToken()); 520 521 if (Kind == Expression) 522 return getExpressionAsToken(); 523 524 return StringRef(Tok.Data, Tok.Length); 525 } 526 527 int64_t getImm() const { 528 assert(isImm()); 529 return Imm.Val; 530 } 531 532 ImmTy getImmTy() const { 533 assert(isImm()); 534 return Imm.Type; 535 } 536 537 unsigned getReg() const override { 538 return Reg.RegNo; 539 } 540 541 SMLoc getStartLoc() const override { 542 return StartLoc; 543 } 544 545 SMLoc getEndLoc() const override { 546 return EndLoc; 547 } 548 549 SMRange getLocRange() const { 550 return SMRange(StartLoc, EndLoc); 551 } 552 553 Modifiers getModifiers() const { 554 assert(isRegKind() || isImmTy(ImmTyNone)); 555 return isRegKind() ? Reg.Mods : Imm.Mods; 556 } 557 558 void setModifiers(Modifiers Mods) { 559 assert(isRegKind() || isImmTy(ImmTyNone)); 560 if (isRegKind()) 561 Reg.Mods = Mods; 562 else 563 Imm.Mods = Mods; 564 } 565 566 bool hasModifiers() const { 567 return getModifiers().hasModifiers(); 568 } 569 570 bool hasFPModifiers() const { 571 return getModifiers().hasFPModifiers(); 572 } 573 574 bool hasIntModifiers() const { 575 return getModifiers().hasIntModifiers(); 576 } 577 578 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 579 580 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 581 582 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 583 584 template <unsigned Bitwidth> 585 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 586 587 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 588 addKImmFPOperands<16>(Inst, N); 589 } 590 591 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 592 addKImmFPOperands<32>(Inst, N); 593 } 594 595 void addRegOperands(MCInst &Inst, unsigned N) const; 596 597 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 598 if (isRegKind()) 599 addRegOperands(Inst, N); 600 else if (isExpr()) 601 Inst.addOperand(MCOperand::createExpr(Expr)); 602 else 603 addImmOperands(Inst, N); 604 } 605 606 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 607 Modifiers Mods = getModifiers(); 608 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 609 if (isRegKind()) { 610 addRegOperands(Inst, N); 611 } else { 612 addImmOperands(Inst, N, false); 613 } 614 } 615 616 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 617 assert(!hasIntModifiers()); 618 addRegOrImmWithInputModsOperands(Inst, N); 619 } 620 621 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 622 assert(!hasFPModifiers()); 623 addRegOrImmWithInputModsOperands(Inst, N); 624 } 625 626 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 627 Modifiers Mods = getModifiers(); 628 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 629 assert(isRegKind()); 630 addRegOperands(Inst, N); 631 } 632 633 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 634 assert(!hasIntModifiers()); 635 addRegWithInputModsOperands(Inst, N); 636 } 637 638 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 639 assert(!hasFPModifiers()); 640 addRegWithInputModsOperands(Inst, N); 641 } 642 643 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 644 if (isImm()) 645 addImmOperands(Inst, N); 646 else { 647 assert(isExpr()); 648 Inst.addOperand(MCOperand::createExpr(Expr)); 649 } 650 } 651 652 static void printImmTy(raw_ostream& OS, ImmTy Type) { 653 switch (Type) { 654 case ImmTyNone: OS << "None"; break; 655 case ImmTyGDS: OS << "GDS"; break; 656 case ImmTyLDS: OS << "LDS"; break; 657 case ImmTyOffen: OS << "Offen"; break; 658 case ImmTyIdxen: OS << "Idxen"; break; 659 case ImmTyAddr64: OS << "Addr64"; break; 660 case ImmTyOffset: OS << "Offset"; break; 661 case ImmTyInstOffset: OS << "InstOffset"; break; 662 case ImmTyOffset0: OS << "Offset0"; break; 663 case ImmTyOffset1: OS << "Offset1"; break; 664 case ImmTyGLC: OS << "GLC"; break; 665 case ImmTySLC: OS << "SLC"; break; 666 case ImmTyTFE: OS << "TFE"; break; 667 case ImmTyD16: OS << "D16"; break; 668 case ImmTyFORMAT: OS << "FORMAT"; break; 669 case ImmTyClampSI: OS << "ClampSI"; break; 670 case ImmTyOModSI: OS << "OModSI"; break; 671 case ImmTyDppCtrl: OS << "DppCtrl"; break; 672 case ImmTyDppRowMask: OS << "DppRowMask"; break; 673 case ImmTyDppBankMask: OS << "DppBankMask"; break; 674 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 675 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 676 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 677 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 678 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 679 case ImmTyDMask: OS << "DMask"; break; 680 case ImmTyUNorm: OS << "UNorm"; break; 681 case ImmTyDA: OS << "DA"; break; 682 case ImmTyR128A16: OS << "R128A16"; break; 683 case ImmTyLWE: OS << "LWE"; break; 684 case ImmTyOff: OS << "Off"; break; 685 case ImmTyExpTgt: OS << "ExpTgt"; break; 686 case ImmTyExpCompr: OS << "ExpCompr"; break; 687 case ImmTyExpVM: OS << "ExpVM"; break; 688 case ImmTyHwreg: OS << "Hwreg"; break; 689 case ImmTySendMsg: OS << "SendMsg"; break; 690 case ImmTyInterpSlot: OS << "InterpSlot"; break; 691 case ImmTyInterpAttr: OS << "InterpAttr"; break; 692 case ImmTyAttrChan: OS << "AttrChan"; break; 693 case ImmTyOpSel: OS << "OpSel"; break; 694 case ImmTyOpSelHi: OS << "OpSelHi"; break; 695 case ImmTyNegLo: OS << "NegLo"; break; 696 case ImmTyNegHi: OS << "NegHi"; break; 697 case ImmTySwizzle: OS << "Swizzle"; break; 698 case ImmTyHigh: OS << "High"; break; 699 } 700 } 701 702 void print(raw_ostream &OS) const override { 703 switch (Kind) { 704 case Register: 705 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 706 break; 707 case Immediate: 708 OS << '<' << getImm(); 709 if (getImmTy() != ImmTyNone) { 710 OS << " type: "; printImmTy(OS, getImmTy()); 711 } 712 OS << " mods: " << Imm.Mods << '>'; 713 break; 714 case Token: 715 OS << '\'' << getToken() << '\''; 716 break; 717 case Expression: 718 OS << "<expr " << *Expr << '>'; 719 break; 720 } 721 } 722 723 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 724 int64_t Val, SMLoc Loc, 725 ImmTy Type = ImmTyNone, 726 bool IsFPImm = false) { 727 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 728 Op->Imm.Val = Val; 729 Op->Imm.IsFPImm = IsFPImm; 730 Op->Imm.Type = Type; 731 Op->Imm.Mods = Modifiers(); 732 Op->StartLoc = Loc; 733 Op->EndLoc = Loc; 734 return Op; 735 } 736 737 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 738 StringRef Str, SMLoc Loc, 739 bool HasExplicitEncodingSize = true) { 740 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 741 Res->Tok.Data = Str.data(); 742 Res->Tok.Length = Str.size(); 743 Res->StartLoc = Loc; 744 Res->EndLoc = Loc; 745 return Res; 746 } 747 748 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 749 unsigned RegNo, SMLoc S, 750 SMLoc E, 751 bool ForceVOP3) { 752 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 753 Op->Reg.RegNo = RegNo; 754 Op->Reg.Mods = Modifiers(); 755 Op->Reg.IsForcedVOP3 = ForceVOP3; 756 Op->StartLoc = S; 757 Op->EndLoc = E; 758 return Op; 759 } 760 761 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 762 const class MCExpr *Expr, SMLoc S) { 763 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 764 Op->Expr = Expr; 765 Op->StartLoc = S; 766 Op->EndLoc = S; 767 return Op; 768 } 769}; 770 771raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 772 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 773 return OS; 774} 775 776//===----------------------------------------------------------------------===// 777// AsmParser 778//===----------------------------------------------------------------------===// 779 780// Holds info related to the current kernel, e.g. count of SGPRs used. 781// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 782// .amdgpu_hsa_kernel or at EOF. 783class KernelScopeInfo { 784 int SgprIndexUnusedMin = -1; 785 int VgprIndexUnusedMin = -1; 786 MCContext *Ctx = nullptr; 787 788 void usesSgprAt(int i) { 789 if (i >= SgprIndexUnusedMin) { 790 SgprIndexUnusedMin = ++i; 791 if (Ctx) { 792 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 793 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 794 } 795 } 796 } 797 798 void usesVgprAt(int i) { 799 if (i >= VgprIndexUnusedMin) { 800 VgprIndexUnusedMin = ++i; 801 if (Ctx) { 802 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 803 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 804 } 805 } 806 } 807 808public: 809 KernelScopeInfo() = default; 810 811 void initialize(MCContext &Context) { 812 Ctx = &Context; 813 usesSgprAt(SgprIndexUnusedMin = -1); 814 usesVgprAt(VgprIndexUnusedMin = -1); 815 } 816 817 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 818 switch (RegKind) { 819 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 820 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 821 default: break; 822 } 823 } 824}; 825 826class AMDGPUAsmParser : public MCTargetAsmParser { 827 MCAsmParser &Parser; 828 829 // Number of extra operands parsed after the first optional operand. 830 // This may be necessary to skip hardcoded mandatory operands. 831 static const unsigned MAX_OPR_LOOKAHEAD = 8; 832 833 unsigned ForcedEncodingSize = 0; 834 bool ForcedDPP = false; 835 bool ForcedSDWA = false; 836 KernelScopeInfo KernelScope; 837 838 /// @name Auto-generated Match Functions 839 /// { 840 841#define GET_ASSEMBLER_HEADER 842#include "AMDGPUGenAsmMatcher.inc" 843 844 /// } 845 846private: 847 bool ParseAsAbsoluteExpression(uint32_t &Ret); 848 bool OutOfRangeError(SMRange Range); 849 /// Calculate VGPR/SGPR blocks required for given target, reserved 850 /// registers, and user-specified NextFreeXGPR values. 851 /// 852 /// \param Features [in] Target features, used for bug corrections. 853 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 854 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 855 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 856 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 857 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 858 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 859 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 860 /// \param VGPRBlocks [out] Result VGPR block count. 861 /// \param SGPRBlocks [out] Result SGPR block count. 862 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 863 bool FlatScrUsed, bool XNACKUsed, 864 unsigned NextFreeVGPR, SMRange VGPRRange, 865 unsigned NextFreeSGPR, SMRange SGPRRange, 866 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 867 bool ParseDirectiveAMDGCNTarget(); 868 bool ParseDirectiveAMDHSAKernel(); 869 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 870 bool ParseDirectiveHSACodeObjectVersion(); 871 bool ParseDirectiveHSACodeObjectISA(); 872 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 873 bool ParseDirectiveAMDKernelCodeT(); 874 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 875 bool ParseDirectiveAMDGPUHsaKernel(); 876 877 bool ParseDirectiveISAVersion(); 878 bool ParseDirectiveHSAMetadata(); 879 bool ParseDirectivePALMetadata(); 880 881 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 882 RegisterKind RegKind, unsigned Reg1, 883 unsigned RegNum); 884 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 885 unsigned& RegNum, unsigned& RegWidth, 886 unsigned *DwordRegIndex); 887 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 888 void initializeGprCountSymbol(RegisterKind RegKind); 889 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 890 unsigned RegWidth); 891 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 892 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 893 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 894 bool IsGdsHardcoded); 895 896public: 897 enum AMDGPUMatchResultTy { 898 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 899 }; 900 901 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 902 903 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 904 const MCInstrInfo &MII, 905 const MCTargetOptions &Options) 906 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 907 MCAsmParserExtension::Initialize(Parser); 908 909 if (getFeatureBits().none()) { 910 // Set default features. 911 copySTI().ToggleFeature("SOUTHERN_ISLANDS"); 912 } 913 914 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 915 916 { 917 // TODO: make those pre-defined variables read-only. 918 // Currently there is none suitable machinery in the core llvm-mc for this. 919 // MCSymbol::isRedefinable is intended for another purpose, and 920 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 921 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 922 MCContext &Ctx = getContext(); 923 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 924 MCSymbol *Sym = 925 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 926 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 927 } else { 928 MCSymbol *Sym = 929 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 930 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 931 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 932 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 933 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 934 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 935 } 936 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 937 initializeGprCountSymbol(IS_VGPR); 938 initializeGprCountSymbol(IS_SGPR); 939 } else 940 KernelScope.initialize(getContext()); 941 } 942 } 943 944 bool hasXNACK() const { 945 return AMDGPU::hasXNACK(getSTI()); 946 } 947 948 bool hasMIMG_R128() const { 949 return AMDGPU::hasMIMG_R128(getSTI()); 950 } 951 952 bool hasPackedD16() const { 953 return AMDGPU::hasPackedD16(getSTI()); 954 } 955 956 bool isSI() const { 957 return AMDGPU::isSI(getSTI()); 958 } 959 960 bool isCI() const { 961 return AMDGPU::isCI(getSTI()); 962 } 963 964 bool isVI() const { 965 return AMDGPU::isVI(getSTI()); 966 } 967 968 bool isGFX9() const { 969 return AMDGPU::isGFX9(getSTI()); 970 } 971 972 bool hasInv2PiInlineImm() const { 973 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 974 } 975 976 bool hasFlatOffsets() const { 977 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 978 } 979 980 bool hasSGPR102_SGPR103() const { 981 return !isVI(); 982 } 983 984 bool hasIntClamp() const { 985 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 986 } 987 988 AMDGPUTargetStreamer &getTargetStreamer() { 989 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 990 return static_cast<AMDGPUTargetStreamer &>(TS); 991 } 992 993 const MCRegisterInfo *getMRI() const { 994 // We need this const_cast because for some reason getContext() is not const 995 // in MCAsmParser. 996 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 997 } 998 999 const MCInstrInfo *getMII() const { 1000 return &MII; 1001 } 1002 1003 const FeatureBitset &getFeatureBits() const { 1004 return getSTI().getFeatureBits(); 1005 } 1006 1007 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1008 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1009 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1010 1011 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1012 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1013 bool isForcedDPP() const { return ForcedDPP; } 1014 bool isForcedSDWA() const { return ForcedSDWA; } 1015 ArrayRef<unsigned> getMatchedVariants() const; 1016 1017 std::unique_ptr<AMDGPUOperand> parseRegister(); 1018 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1019 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1020 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1021 unsigned Kind) override; 1022 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1023 OperandVector &Operands, MCStreamer &Out, 1024 uint64_t &ErrorInfo, 1025 bool MatchingInlineAsm) override; 1026 bool ParseDirective(AsmToken DirectiveID) override; 1027 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1028 StringRef parseMnemonicSuffix(StringRef Name); 1029 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1030 SMLoc NameLoc, OperandVector &Operands) override; 1031 //bool ProcessInstruction(MCInst &Inst); 1032 1033 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1034 1035 OperandMatchResultTy 1036 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1037 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1038 bool (*ConvertResult)(int64_t &) = nullptr); 1039 1040 OperandMatchResultTy parseOperandArrayWithPrefix( 1041 const char *Prefix, 1042 OperandVector &Operands, 1043 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1044 bool (*ConvertResult)(int64_t&) = nullptr); 1045 1046 OperandMatchResultTy 1047 parseNamedBit(const char *Name, OperandVector &Operands, 1048 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1049 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1050 StringRef &Value); 1051 1052 bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false); 1053 OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false); 1054 OperandMatchResultTy parseReg(OperandVector &Operands); 1055 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1056 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1057 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1058 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1059 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1060 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1061 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1062 1063 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1064 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1065 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1066 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1067 1068 bool parseCnt(int64_t &IntVal); 1069 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1070 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1071 1072private: 1073 struct OperandInfoTy { 1074 int64_t Id; 1075 bool IsSymbolic = false; 1076 1077 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1078 }; 1079 1080 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1081 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1082 1083 void errorExpTgt(); 1084 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1085 1086 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1087 bool validateConstantBusLimitations(const MCInst &Inst); 1088 bool validateEarlyClobberLimitations(const MCInst &Inst); 1089 bool validateIntClampSupported(const MCInst &Inst); 1090 bool validateMIMGAtomicDMask(const MCInst &Inst); 1091 bool validateMIMGGatherDMask(const MCInst &Inst); 1092 bool validateMIMGDataSize(const MCInst &Inst); 1093 bool validateMIMGD16(const MCInst &Inst); 1094 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1095 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1096 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1097 1098 bool trySkipId(const StringRef Id); 1099 bool trySkipToken(const AsmToken::TokenKind Kind); 1100 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1101 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1102 bool parseExpr(int64_t &Imm); 1103 1104public: 1105 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1106 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1107 1108 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1109 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1110 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1111 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1112 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1113 1114 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1115 const unsigned MinVal, 1116 const unsigned MaxVal, 1117 const StringRef ErrMsg); 1118 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1119 bool parseSwizzleOffset(int64_t &Imm); 1120 bool parseSwizzleMacro(int64_t &Imm); 1121 bool parseSwizzleQuadPerm(int64_t &Imm); 1122 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1123 bool parseSwizzleBroadcast(int64_t &Imm); 1124 bool parseSwizzleSwap(int64_t &Imm); 1125 bool parseSwizzleReverse(int64_t &Imm); 1126 1127 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1128 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1129 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1130 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1131 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1132 1133 AMDGPUOperand::Ptr defaultGLC() const; 1134 AMDGPUOperand::Ptr defaultSLC() const; 1135 1136 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1137 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1138 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1139 AMDGPUOperand::Ptr defaultOffsetU12() const; 1140 AMDGPUOperand::Ptr defaultOffsetS13() const; 1141 1142 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1143 1144 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1145 OptionalImmIndexMap &OptionalIdx); 1146 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1147 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1148 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1149 1150 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1151 1152 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1153 bool IsAtomic = false); 1154 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1155 1156 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1157 AMDGPUOperand::Ptr defaultRowMask() const; 1158 AMDGPUOperand::Ptr defaultBankMask() const; 1159 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1160 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1161 1162 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1163 AMDGPUOperand::ImmTy Type); 1164 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1165 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1166 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1167 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1168 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1169 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1170 uint64_t BasicInstType, bool skipVcc = false); 1171}; 1172 1173struct OptionalOperand { 1174 const char *Name; 1175 AMDGPUOperand::ImmTy Type; 1176 bool IsBit; 1177 bool (*ConvertResult)(int64_t&); 1178}; 1179 1180} // end anonymous namespace 1181 1182// May be called with integer type with equivalent bitwidth. 1183static const fltSemantics *getFltSemantics(unsigned Size) { 1184 switch (Size) { 1185 case 4: 1186 return &APFloat::IEEEsingle(); 1187 case 8: 1188 return &APFloat::IEEEdouble(); 1189 case 2: 1190 return &APFloat::IEEEhalf(); 1191 default: 1192 llvm_unreachable("unsupported fp type"); 1193 } 1194} 1195 1196static const fltSemantics *getFltSemantics(MVT VT) { 1197 return getFltSemantics(VT.getSizeInBits() / 8); 1198} 1199 1200static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1201 switch (OperandType) { 1202 case AMDGPU::OPERAND_REG_IMM_INT32: 1203 case AMDGPU::OPERAND_REG_IMM_FP32: 1204 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1205 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1206 return &APFloat::IEEEsingle(); 1207 case AMDGPU::OPERAND_REG_IMM_INT64: 1208 case AMDGPU::OPERAND_REG_IMM_FP64: 1209 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1210 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1211 return &APFloat::IEEEdouble(); 1212 case AMDGPU::OPERAND_REG_IMM_INT16: 1213 case AMDGPU::OPERAND_REG_IMM_FP16: 1214 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1215 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1216 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1217 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1218 return &APFloat::IEEEhalf(); 1219 default: 1220 llvm_unreachable("unsupported fp type"); 1221 } 1222} 1223 1224//===----------------------------------------------------------------------===// 1225// Operand 1226//===----------------------------------------------------------------------===// 1227 1228static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1229 bool Lost; 1230 1231 // Convert literal to single precision 1232 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1233 APFloat::rmNearestTiesToEven, 1234 &Lost); 1235 // We allow precision lost but not overflow or underflow 1236 if (Status != APFloat::opOK && 1237 Lost && 1238 ((Status & APFloat::opOverflow) != 0 || 1239 (Status & APFloat::opUnderflow) != 0)) { 1240 return false; 1241 } 1242 1243 return true; 1244} 1245 1246bool AMDGPUOperand::isInlinableImm(MVT type) const { 1247 if (!isImmTy(ImmTyNone)) { 1248 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1249 return false; 1250 } 1251 // TODO: We should avoid using host float here. It would be better to 1252 // check the float bit values which is what a few other places do. 1253 // We've had bot failures before due to weird NaN support on mips hosts. 1254 1255 APInt Literal(64, Imm.Val); 1256 1257 if (Imm.IsFPImm) { // We got fp literal token 1258 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1259 return AMDGPU::isInlinableLiteral64(Imm.Val, 1260 AsmParser->hasInv2PiInlineImm()); 1261 } 1262 1263 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1264 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1265 return false; 1266 1267 if (type.getScalarSizeInBits() == 16) { 1268 return AMDGPU::isInlinableLiteral16( 1269 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1270 AsmParser->hasInv2PiInlineImm()); 1271 } 1272 1273 // Check if single precision literal is inlinable 1274 return AMDGPU::isInlinableLiteral32( 1275 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1276 AsmParser->hasInv2PiInlineImm()); 1277 } 1278 1279 // We got int literal token. 1280 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1281 return AMDGPU::isInlinableLiteral64(Imm.Val, 1282 AsmParser->hasInv2PiInlineImm()); 1283 } 1284 1285 if (type.getScalarSizeInBits() == 16) { 1286 return AMDGPU::isInlinableLiteral16( 1287 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1288 AsmParser->hasInv2PiInlineImm()); 1289 } 1290 1291 return AMDGPU::isInlinableLiteral32( 1292 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1293 AsmParser->hasInv2PiInlineImm()); 1294} 1295 1296bool AMDGPUOperand::isLiteralImm(MVT type) const { 1297 // Check that this immediate can be added as literal 1298 if (!isImmTy(ImmTyNone)) { 1299 return false; 1300 } 1301 1302 if (!Imm.IsFPImm) { 1303 // We got int literal token. 1304 1305 if (type == MVT::f64 && hasFPModifiers()) { 1306 // Cannot apply fp modifiers to int literals preserving the same semantics 1307 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1308 // disable these cases. 1309 return false; 1310 } 1311 1312 unsigned Size = type.getSizeInBits(); 1313 if (Size == 64) 1314 Size = 32; 1315 1316 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1317 // types. 1318 return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val); 1319 } 1320 1321 // We got fp literal token 1322 if (type == MVT::f64) { // Expected 64-bit fp operand 1323 // We would set low 64-bits of literal to zeroes but we accept this literals 1324 return true; 1325 } 1326 1327 if (type == MVT::i64) { // Expected 64-bit int operand 1328 // We don't allow fp literals in 64-bit integer instructions. It is 1329 // unclear how we should encode them. 1330 return false; 1331 } 1332 1333 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1334 return canLosslesslyConvertToFPType(FPLiteral, type); 1335} 1336 1337bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1338 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1339} 1340 1341bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1342 if (AsmParser->isVI()) 1343 return isVReg(); 1344 else if (AsmParser->isGFX9()) 1345 return isRegKind() || isInlinableImm(type); 1346 else 1347 return false; 1348} 1349 1350bool AMDGPUOperand::isSDWAFP16Operand() const { 1351 return isSDWAOperand(MVT::f16); 1352} 1353 1354bool AMDGPUOperand::isSDWAFP32Operand() const { 1355 return isSDWAOperand(MVT::f32); 1356} 1357 1358bool AMDGPUOperand::isSDWAInt16Operand() const { 1359 return isSDWAOperand(MVT::i16); 1360} 1361 1362bool AMDGPUOperand::isSDWAInt32Operand() const { 1363 return isSDWAOperand(MVT::i32); 1364} 1365 1366uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1367{ 1368 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1369 assert(Size == 2 || Size == 4 || Size == 8); 1370 1371 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1372 1373 if (Imm.Mods.Abs) { 1374 Val &= ~FpSignMask; 1375 } 1376 if (Imm.Mods.Neg) { 1377 Val ^= FpSignMask; 1378 } 1379 1380 return Val; 1381} 1382 1383void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1384 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1385 Inst.getNumOperands())) { 1386 addLiteralImmOperand(Inst, Imm.Val, 1387 ApplyModifiers & 1388 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1389 } else { 1390 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1391 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1392 } 1393} 1394 1395void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1396 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1397 auto OpNum = Inst.getNumOperands(); 1398 // Check that this operand accepts literals 1399 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1400 1401 if (ApplyModifiers) { 1402 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1403 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1404 Val = applyInputFPModifiers(Val, Size); 1405 } 1406 1407 APInt Literal(64, Val); 1408 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1409 1410 if (Imm.IsFPImm) { // We got fp literal token 1411 switch (OpTy) { 1412 case AMDGPU::OPERAND_REG_IMM_INT64: 1413 case AMDGPU::OPERAND_REG_IMM_FP64: 1414 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1415 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1416 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1417 AsmParser->hasInv2PiInlineImm())) { 1418 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1419 return; 1420 } 1421 1422 // Non-inlineable 1423 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1424 // For fp operands we check if low 32 bits are zeros 1425 if (Literal.getLoBits(32) != 0) { 1426 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1427 "Can't encode literal as exact 64-bit floating-point operand. " 1428 "Low 32-bits will be set to zero"); 1429 } 1430 1431 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1432 return; 1433 } 1434 1435 // We don't allow fp literals in 64-bit integer instructions. It is 1436 // unclear how we should encode them. This case should be checked earlier 1437 // in predicate methods (isLiteralImm()) 1438 llvm_unreachable("fp literal in 64-bit integer instruction."); 1439 1440 case AMDGPU::OPERAND_REG_IMM_INT32: 1441 case AMDGPU::OPERAND_REG_IMM_FP32: 1442 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1443 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1444 case AMDGPU::OPERAND_REG_IMM_INT16: 1445 case AMDGPU::OPERAND_REG_IMM_FP16: 1446 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1447 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1448 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1449 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1450 bool lost; 1451 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1452 // Convert literal to single precision 1453 FPLiteral.convert(*getOpFltSemantics(OpTy), 1454 APFloat::rmNearestTiesToEven, &lost); 1455 // We allow precision lost but not overflow or underflow. This should be 1456 // checked earlier in isLiteralImm() 1457 1458 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1459 if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 1460 OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 1461 ImmVal |= (ImmVal << 16); 1462 } 1463 1464 Inst.addOperand(MCOperand::createImm(ImmVal)); 1465 return; 1466 } 1467 default: 1468 llvm_unreachable("invalid operand size"); 1469 } 1470 1471 return; 1472 } 1473 1474 // We got int literal token. 1475 // Only sign extend inline immediates. 1476 // FIXME: No errors on truncation 1477 switch (OpTy) { 1478 case AMDGPU::OPERAND_REG_IMM_INT32: 1479 case AMDGPU::OPERAND_REG_IMM_FP32: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1482 if (isInt<32>(Val) && 1483 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1484 AsmParser->hasInv2PiInlineImm())) { 1485 Inst.addOperand(MCOperand::createImm(Val)); 1486 return; 1487 } 1488 1489 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1490 return; 1491 1492 case AMDGPU::OPERAND_REG_IMM_INT64: 1493 case AMDGPU::OPERAND_REG_IMM_FP64: 1494 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1496 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1497 Inst.addOperand(MCOperand::createImm(Val)); 1498 return; 1499 } 1500 1501 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1502 return; 1503 1504 case AMDGPU::OPERAND_REG_IMM_INT16: 1505 case AMDGPU::OPERAND_REG_IMM_FP16: 1506 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1507 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1508 if (isInt<16>(Val) && 1509 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1510 AsmParser->hasInv2PiInlineImm())) { 1511 Inst.addOperand(MCOperand::createImm(Val)); 1512 return; 1513 } 1514 1515 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1516 return; 1517 1518 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1519 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1520 auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue()); 1521 assert(AMDGPU::isInlinableLiteral16(LiteralVal, 1522 AsmParser->hasInv2PiInlineImm())); 1523 1524 uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 | 1525 static_cast<uint32_t>(LiteralVal); 1526 Inst.addOperand(MCOperand::createImm(ImmVal)); 1527 return; 1528 } 1529 default: 1530 llvm_unreachable("invalid operand size"); 1531 } 1532} 1533 1534template <unsigned Bitwidth> 1535void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1536 APInt Literal(64, Imm.Val); 1537 1538 if (!Imm.IsFPImm) { 1539 // We got int literal token. 1540 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1541 return; 1542 } 1543 1544 bool Lost; 1545 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1546 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1547 APFloat::rmNearestTiesToEven, &Lost); 1548 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1549} 1550 1551void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1552 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1553} 1554 1555//===----------------------------------------------------------------------===// 1556// AsmParser 1557//===----------------------------------------------------------------------===// 1558 1559static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1560 if (Is == IS_VGPR) { 1561 switch (RegWidth) { 1562 default: return -1; 1563 case 1: return AMDGPU::VGPR_32RegClassID; 1564 case 2: return AMDGPU::VReg_64RegClassID; 1565 case 3: return AMDGPU::VReg_96RegClassID; 1566 case 4: return AMDGPU::VReg_128RegClassID; 1567 case 8: return AMDGPU::VReg_256RegClassID; 1568 case 16: return AMDGPU::VReg_512RegClassID; 1569 } 1570 } else if (Is == IS_TTMP) { 1571 switch (RegWidth) { 1572 default: return -1; 1573 case 1: return AMDGPU::TTMP_32RegClassID; 1574 case 2: return AMDGPU::TTMP_64RegClassID; 1575 case 4: return AMDGPU::TTMP_128RegClassID; 1576 case 8: return AMDGPU::TTMP_256RegClassID; 1577 case 16: return AMDGPU::TTMP_512RegClassID; 1578 } 1579 } else if (Is == IS_SGPR) { 1580 switch (RegWidth) { 1581 default: return -1; 1582 case 1: return AMDGPU::SGPR_32RegClassID; 1583 case 2: return AMDGPU::SGPR_64RegClassID; 1584 case 4: return AMDGPU::SGPR_128RegClassID; 1585 case 8: return AMDGPU::SGPR_256RegClassID; 1586 case 16: return AMDGPU::SGPR_512RegClassID; 1587 } 1588 } 1589 return -1; 1590} 1591 1592static unsigned getSpecialRegForName(StringRef RegName) { 1593 return StringSwitch<unsigned>(RegName) 1594 .Case("exec", AMDGPU::EXEC) 1595 .Case("vcc", AMDGPU::VCC) 1596 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1597 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1598 .Case("m0", AMDGPU::M0) 1599 .Case("scc", AMDGPU::SCC) 1600 .Case("tba", AMDGPU::TBA) 1601 .Case("tma", AMDGPU::TMA) 1602 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1603 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1604 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1605 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1606 .Case("vcc_lo", AMDGPU::VCC_LO) 1607 .Case("vcc_hi", AMDGPU::VCC_HI) 1608 .Case("exec_lo", AMDGPU::EXEC_LO) 1609 .Case("exec_hi", AMDGPU::EXEC_HI) 1610 .Case("tma_lo", AMDGPU::TMA_LO) 1611 .Case("tma_hi", AMDGPU::TMA_HI) 1612 .Case("tba_lo", AMDGPU::TBA_LO) 1613 .Case("tba_hi", AMDGPU::TBA_HI) 1614 .Default(0); 1615} 1616 1617bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1618 SMLoc &EndLoc) { 1619 auto R = parseRegister(); 1620 if (!R) return true; 1621 assert(R->isReg()); 1622 RegNo = R->getReg(); 1623 StartLoc = R->getStartLoc(); 1624 EndLoc = R->getEndLoc(); 1625 return false; 1626} 1627 1628bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1629 RegisterKind RegKind, unsigned Reg1, 1630 unsigned RegNum) { 1631 switch (RegKind) { 1632 case IS_SPECIAL: 1633 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1634 Reg = AMDGPU::EXEC; 1635 RegWidth = 2; 1636 return true; 1637 } 1638 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1639 Reg = AMDGPU::FLAT_SCR; 1640 RegWidth = 2; 1641 return true; 1642 } 1643 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1644 Reg = AMDGPU::XNACK_MASK; 1645 RegWidth = 2; 1646 return true; 1647 } 1648 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1649 Reg = AMDGPU::VCC; 1650 RegWidth = 2; 1651 return true; 1652 } 1653 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1654 Reg = AMDGPU::TBA; 1655 RegWidth = 2; 1656 return true; 1657 } 1658 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1659 Reg = AMDGPU::TMA; 1660 RegWidth = 2; 1661 return true; 1662 } 1663 return false; 1664 case IS_VGPR: 1665 case IS_SGPR: 1666 case IS_TTMP: 1667 if (Reg1 != Reg + RegWidth) { 1668 return false; 1669 } 1670 RegWidth++; 1671 return true; 1672 default: 1673 llvm_unreachable("unexpected register kind"); 1674 } 1675} 1676 1677bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1678 unsigned &RegNum, unsigned &RegWidth, 1679 unsigned *DwordRegIndex) { 1680 if (DwordRegIndex) { *DwordRegIndex = 0; } 1681 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1682 if (getLexer().is(AsmToken::Identifier)) { 1683 StringRef RegName = Parser.getTok().getString(); 1684 if ((Reg = getSpecialRegForName(RegName))) { 1685 Parser.Lex(); 1686 RegKind = IS_SPECIAL; 1687 } else { 1688 unsigned RegNumIndex = 0; 1689 if (RegName[0] == 'v') { 1690 RegNumIndex = 1; 1691 RegKind = IS_VGPR; 1692 } else if (RegName[0] == 's') { 1693 RegNumIndex = 1; 1694 RegKind = IS_SGPR; 1695 } else if (RegName.startswith("ttmp")) { 1696 RegNumIndex = strlen("ttmp"); 1697 RegKind = IS_TTMP; 1698 } else { 1699 return false; 1700 } 1701 if (RegName.size() > RegNumIndex) { 1702 // Single 32-bit register: vXX. 1703 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1704 return false; 1705 Parser.Lex(); 1706 RegWidth = 1; 1707 } else { 1708 // Range of registers: v[XX:YY]. ":YY" is optional. 1709 Parser.Lex(); 1710 int64_t RegLo, RegHi; 1711 if (getLexer().isNot(AsmToken::LBrac)) 1712 return false; 1713 Parser.Lex(); 1714 1715 if (getParser().parseAbsoluteExpression(RegLo)) 1716 return false; 1717 1718 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1719 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1720 return false; 1721 Parser.Lex(); 1722 1723 if (isRBrace) { 1724 RegHi = RegLo; 1725 } else { 1726 if (getParser().parseAbsoluteExpression(RegHi)) 1727 return false; 1728 1729 if (getLexer().isNot(AsmToken::RBrac)) 1730 return false; 1731 Parser.Lex(); 1732 } 1733 RegNum = (unsigned) RegLo; 1734 RegWidth = (RegHi - RegLo) + 1; 1735 } 1736 } 1737 } else if (getLexer().is(AsmToken::LBrac)) { 1738 // List of consecutive registers: [s0,s1,s2,s3] 1739 Parser.Lex(); 1740 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1741 return false; 1742 if (RegWidth != 1) 1743 return false; 1744 RegisterKind RegKind1; 1745 unsigned Reg1, RegNum1, RegWidth1; 1746 do { 1747 if (getLexer().is(AsmToken::Comma)) { 1748 Parser.Lex(); 1749 } else if (getLexer().is(AsmToken::RBrac)) { 1750 Parser.Lex(); 1751 break; 1752 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1753 if (RegWidth1 != 1) { 1754 return false; 1755 } 1756 if (RegKind1 != RegKind) { 1757 return false; 1758 } 1759 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1760 return false; 1761 } 1762 } else { 1763 return false; 1764 } 1765 } while (true); 1766 } else { 1767 return false; 1768 } 1769 switch (RegKind) { 1770 case IS_SPECIAL: 1771 RegNum = 0; 1772 RegWidth = 1; 1773 break; 1774 case IS_VGPR: 1775 case IS_SGPR: 1776 case IS_TTMP: 1777 { 1778 unsigned Size = 1; 1779 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1780 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1781 Size = std::min(RegWidth, 4u); 1782 } 1783 if (RegNum % Size != 0) 1784 return false; 1785 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1786 RegNum = RegNum / Size; 1787 int RCID = getRegClass(RegKind, RegWidth); 1788 if (RCID == -1) 1789 return false; 1790 const MCRegisterClass RC = TRI->getRegClass(RCID); 1791 if (RegNum >= RC.getNumRegs()) 1792 return false; 1793 Reg = RC.getRegister(RegNum); 1794 break; 1795 } 1796 1797 default: 1798 llvm_unreachable("unexpected register kind"); 1799 } 1800 1801 if (!subtargetHasRegister(*TRI, Reg)) 1802 return false; 1803 return true; 1804} 1805 1806Optional<StringRef> 1807AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1808 switch (RegKind) { 1809 case IS_VGPR: 1810 return StringRef(".amdgcn.next_free_vgpr"); 1811 case IS_SGPR: 1812 return StringRef(".amdgcn.next_free_sgpr"); 1813 default: 1814 return None; 1815 } 1816} 1817 1818void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1819 auto SymbolName = getGprCountSymbolName(RegKind); 1820 assert(SymbolName && "initializing invalid register kind"); 1821 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1822 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1823} 1824 1825bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1826 unsigned DwordRegIndex, 1827 unsigned RegWidth) { 1828 // Symbols are only defined for GCN targets 1829 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1830 return true; 1831 1832 auto SymbolName = getGprCountSymbolName(RegKind); 1833 if (!SymbolName) 1834 return true; 1835 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1836 1837 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1838 int64_t OldCount; 1839 1840 if (!Sym->isVariable()) 1841 return !Error(getParser().getTok().getLoc(), 1842 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1843 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1844 return !Error( 1845 getParser().getTok().getLoc(), 1846 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1847 1848 if (OldCount <= NewMax) 1849 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1850 1851 return true; 1852} 1853 1854std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1855 const auto &Tok = Parser.getTok(); 1856 SMLoc StartLoc = Tok.getLoc(); 1857 SMLoc EndLoc = Tok.getEndLoc(); 1858 RegisterKind RegKind; 1859 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1860 1861 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1862 return nullptr; 1863 } 1864 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1865 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1866 return nullptr; 1867 } else 1868 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 1869 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 1870} 1871 1872bool 1873AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) { 1874 if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) && 1875 (getLexer().getKind() == AsmToken::Integer || 1876 getLexer().getKind() == AsmToken::Real)) { 1877 // This is a workaround for handling operands like these: 1878 // |1.0| 1879 // |-1| 1880 // This syntax is not compatible with syntax of standard 1881 // MC expressions (due to the trailing '|'). 1882 1883 SMLoc EndLoc; 1884 const MCExpr *Expr; 1885 1886 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 1887 return true; 1888 } 1889 1890 return !Expr->evaluateAsAbsolute(Val); 1891 } 1892 1893 return getParser().parseAbsoluteExpression(Val); 1894} 1895 1896OperandMatchResultTy 1897AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) { 1898 // TODO: add syntactic sugar for 1/(2*PI) 1899 bool Minus = false; 1900 if (getLexer().getKind() == AsmToken::Minus) { 1901 const AsmToken NextToken = getLexer().peekTok(); 1902 if (!NextToken.is(AsmToken::Integer) && 1903 !NextToken.is(AsmToken::Real)) { 1904 return MatchOperand_NoMatch; 1905 } 1906 Minus = true; 1907 Parser.Lex(); 1908 } 1909 1910 SMLoc S = Parser.getTok().getLoc(); 1911 switch(getLexer().getKind()) { 1912 case AsmToken::Integer: { 1913 int64_t IntVal; 1914 if (parseAbsoluteExpr(IntVal, AbsMod)) 1915 return MatchOperand_ParseFail; 1916 if (Minus) 1917 IntVal *= -1; 1918 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 1919 return MatchOperand_Success; 1920 } 1921 case AsmToken::Real: { 1922 int64_t IntVal; 1923 if (parseAbsoluteExpr(IntVal, AbsMod)) 1924 return MatchOperand_ParseFail; 1925 1926 APFloat F(BitsToDouble(IntVal)); 1927 if (Minus) 1928 F.changeSign(); 1929 Operands.push_back( 1930 AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S, 1931 AMDGPUOperand::ImmTyNone, true)); 1932 return MatchOperand_Success; 1933 } 1934 default: 1935 return MatchOperand_NoMatch; 1936 } 1937} 1938 1939OperandMatchResultTy 1940AMDGPUAsmParser::parseReg(OperandVector &Operands) { 1941 if (auto R = parseRegister()) { 1942 assert(R->isReg()); 1943 R->Reg.IsForcedVOP3 = isForcedVOP3(); 1944 Operands.push_back(std::move(R)); 1945 return MatchOperand_Success; 1946 } 1947 return MatchOperand_NoMatch; 1948} 1949 1950OperandMatchResultTy 1951AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 1952 auto res = parseImm(Operands, AbsMod); 1953 if (res != MatchOperand_NoMatch) { 1954 return res; 1955 } 1956 1957 return parseReg(Operands); 1958} 1959 1960OperandMatchResultTy 1961AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 1962 bool AllowImm) { 1963 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 1964 1965 if (getLexer().getKind()== AsmToken::Minus) { 1966 const AsmToken NextToken = getLexer().peekTok(); 1967 1968 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 1969 if (NextToken.is(AsmToken::Minus)) { 1970 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 1971 return MatchOperand_ParseFail; 1972 } 1973 1974 // '-' followed by an integer literal N should be interpreted as integer 1975 // negation rather than a floating-point NEG modifier applied to N. 1976 // Beside being contr-intuitive, such use of floating-point NEG modifier 1977 // results in different meaning of integer literals used with VOP1/2/C 1978 // and VOP3, for example: 1979 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 1980 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 1981 // Negative fp literals should be handled likewise for unifomtity 1982 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 1983 Parser.Lex(); 1984 Negate = true; 1985 } 1986 } 1987 1988 if (getLexer().getKind() == AsmToken::Identifier && 1989 Parser.getTok().getString() == "neg") { 1990 if (Negate) { 1991 Error(Parser.getTok().getLoc(), "expected register or immediate"); 1992 return MatchOperand_ParseFail; 1993 } 1994 Parser.Lex(); 1995 Negate2 = true; 1996 if (getLexer().isNot(AsmToken::LParen)) { 1997 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 1998 return MatchOperand_ParseFail; 1999 } 2000 Parser.Lex(); 2001 } 2002 2003 if (getLexer().getKind() == AsmToken::Identifier && 2004 Parser.getTok().getString() == "abs") { 2005 Parser.Lex(); 2006 Abs2 = true; 2007 if (getLexer().isNot(AsmToken::LParen)) { 2008 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2009 return MatchOperand_ParseFail; 2010 } 2011 Parser.Lex(); 2012 } 2013 2014 if (getLexer().getKind() == AsmToken::Pipe) { 2015 if (Abs2) { 2016 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2017 return MatchOperand_ParseFail; 2018 } 2019 Parser.Lex(); 2020 Abs = true; 2021 } 2022 2023 OperandMatchResultTy Res; 2024 if (AllowImm) { 2025 Res = parseRegOrImm(Operands, Abs); 2026 } else { 2027 Res = parseReg(Operands); 2028 } 2029 if (Res != MatchOperand_Success) { 2030 return Res; 2031 } 2032 2033 AMDGPUOperand::Modifiers Mods; 2034 if (Abs) { 2035 if (getLexer().getKind() != AsmToken::Pipe) { 2036 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2037 return MatchOperand_ParseFail; 2038 } 2039 Parser.Lex(); 2040 Mods.Abs = true; 2041 } 2042 if (Abs2) { 2043 if (getLexer().isNot(AsmToken::RParen)) { 2044 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2045 return MatchOperand_ParseFail; 2046 } 2047 Parser.Lex(); 2048 Mods.Abs = true; 2049 } 2050 2051 if (Negate) { 2052 Mods.Neg = true; 2053 } else if (Negate2) { 2054 if (getLexer().isNot(AsmToken::RParen)) { 2055 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2056 return MatchOperand_ParseFail; 2057 } 2058 Parser.Lex(); 2059 Mods.Neg = true; 2060 } 2061 2062 if (Mods.hasFPModifiers()) { 2063 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2064 Op.setModifiers(Mods); 2065 } 2066 return MatchOperand_Success; 2067} 2068 2069OperandMatchResultTy 2070AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2071 bool AllowImm) { 2072 bool Sext = false; 2073 2074 if (getLexer().getKind() == AsmToken::Identifier && 2075 Parser.getTok().getString() == "sext") { 2076 Parser.Lex(); 2077 Sext = true; 2078 if (getLexer().isNot(AsmToken::LParen)) { 2079 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2080 return MatchOperand_ParseFail; 2081 } 2082 Parser.Lex(); 2083 } 2084 2085 OperandMatchResultTy Res; 2086 if (AllowImm) { 2087 Res = parseRegOrImm(Operands); 2088 } else { 2089 Res = parseReg(Operands); 2090 } 2091 if (Res != MatchOperand_Success) { 2092 return Res; 2093 } 2094 2095 AMDGPUOperand::Modifiers Mods; 2096 if (Sext) { 2097 if (getLexer().isNot(AsmToken::RParen)) { 2098 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2099 return MatchOperand_ParseFail; 2100 } 2101 Parser.Lex(); 2102 Mods.Sext = true; 2103 } 2104 2105 if (Mods.hasIntModifiers()) { 2106 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2107 Op.setModifiers(Mods); 2108 } 2109 2110 return MatchOperand_Success; 2111} 2112 2113OperandMatchResultTy 2114AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2115 return parseRegOrImmWithFPInputMods(Operands, false); 2116} 2117 2118OperandMatchResultTy 2119AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2120 return parseRegOrImmWithIntInputMods(Operands, false); 2121} 2122 2123OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2124 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2125 if (Reg) { 2126 Operands.push_back(std::move(Reg)); 2127 return MatchOperand_Success; 2128 } 2129 2130 const AsmToken &Tok = Parser.getTok(); 2131 if (Tok.getString() == "off") { 2132 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(), 2133 AMDGPUOperand::ImmTyOff, false)); 2134 Parser.Lex(); 2135 return MatchOperand_Success; 2136 } 2137 2138 return MatchOperand_NoMatch; 2139} 2140 2141unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2142 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2143 2144 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2145 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2146 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2147 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2148 return Match_InvalidOperand; 2149 2150 if ((TSFlags & SIInstrFlags::VOP3) && 2151 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2152 getForcedEncodingSize() != 64) 2153 return Match_PreferE32; 2154 2155 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2156 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2157 // v_mac_f32/16 allow only dst_sel == DWORD; 2158 auto OpNum = 2159 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2160 const auto &Op = Inst.getOperand(OpNum); 2161 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2162 return Match_InvalidOperand; 2163 } 2164 } 2165 2166 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2167 // FIXME: Produces error without correct column reported. 2168 auto OpNum = 2169 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2170 const auto &Op = Inst.getOperand(OpNum); 2171 if (Op.getImm() != 0) 2172 return Match_InvalidOperand; 2173 } 2174 2175 return Match_Success; 2176} 2177 2178// What asm variants we should check 2179ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2180 if (getForcedEncodingSize() == 32) { 2181 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2182 return makeArrayRef(Variants); 2183 } 2184 2185 if (isForcedVOP3()) { 2186 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2187 return makeArrayRef(Variants); 2188 } 2189 2190 if (isForcedSDWA()) { 2191 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2192 AMDGPUAsmVariants::SDWA9}; 2193 return makeArrayRef(Variants); 2194 } 2195 2196 if (isForcedDPP()) { 2197 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2198 return makeArrayRef(Variants); 2199 } 2200 2201 static const unsigned Variants[] = { 2202 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2203 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2204 }; 2205 2206 return makeArrayRef(Variants); 2207} 2208 2209unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2210 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2211 const unsigned Num = Desc.getNumImplicitUses(); 2212 for (unsigned i = 0; i < Num; ++i) { 2213 unsigned Reg = Desc.ImplicitUses[i]; 2214 switch (Reg) { 2215 case AMDGPU::FLAT_SCR: 2216 case AMDGPU::VCC: 2217 case AMDGPU::M0: 2218 return Reg; 2219 default: 2220 break; 2221 } 2222 } 2223 return AMDGPU::NoRegister; 2224} 2225 2226// NB: This code is correct only when used to check constant 2227// bus limitations because GFX7 support no f16 inline constants. 2228// Note that there are no cases when a GFX7 opcode violates 2229// constant bus limitations due to the use of an f16 constant. 2230bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2231 unsigned OpIdx) const { 2232 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2233 2234 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2235 return false; 2236 } 2237 2238 const MCOperand &MO = Inst.getOperand(OpIdx); 2239 2240 int64_t Val = MO.getImm(); 2241 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2242 2243 switch (OpSize) { // expected operand size 2244 case 8: 2245 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2246 case 4: 2247 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2248 case 2: { 2249 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2250 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2251 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2252 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2253 } else { 2254 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2255 } 2256 } 2257 default: 2258 llvm_unreachable("invalid operand size"); 2259 } 2260} 2261 2262bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2263 const MCOperand &MO = Inst.getOperand(OpIdx); 2264 if (MO.isImm()) { 2265 return !isInlineConstant(Inst, OpIdx); 2266 } 2267 return !MO.isReg() || 2268 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2269} 2270 2271bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2272 const unsigned Opcode = Inst.getOpcode(); 2273 const MCInstrDesc &Desc = MII.get(Opcode); 2274 unsigned ConstantBusUseCount = 0; 2275 2276 if (Desc.TSFlags & 2277 (SIInstrFlags::VOPC | 2278 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2279 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2280 SIInstrFlags::SDWA)) { 2281 // Check special imm operands (used by madmk, etc) 2282 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2283 ++ConstantBusUseCount; 2284 } 2285 2286 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2287 if (SGPRUsed != AMDGPU::NoRegister) { 2288 ++ConstantBusUseCount; 2289 } 2290 2291 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2292 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2293 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2294 2295 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2296 2297 for (int OpIdx : OpIndices) { 2298 if (OpIdx == -1) break; 2299 2300 const MCOperand &MO = Inst.getOperand(OpIdx); 2301 if (usesConstantBus(Inst, OpIdx)) { 2302 if (MO.isReg()) { 2303 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2304 // Pairs of registers with a partial intersections like these 2305 // s0, s[0:1] 2306 // flat_scratch_lo, flat_scratch 2307 // flat_scratch_lo, flat_scratch_hi 2308 // are theoretically valid but they are disabled anyway. 2309 // Note that this code mimics SIInstrInfo::verifyInstruction 2310 if (Reg != SGPRUsed) { 2311 ++ConstantBusUseCount; 2312 } 2313 SGPRUsed = Reg; 2314 } else { // Expression or a literal 2315 ++ConstantBusUseCount; 2316 } 2317 } 2318 } 2319 } 2320 2321 return ConstantBusUseCount <= 1; 2322} 2323 2324bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2325 const unsigned Opcode = Inst.getOpcode(); 2326 const MCInstrDesc &Desc = MII.get(Opcode); 2327 2328 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2329 if (DstIdx == -1 || 2330 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2331 return true; 2332 } 2333 2334 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2335 2336 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2337 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2338 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2339 2340 assert(DstIdx != -1); 2341 const MCOperand &Dst = Inst.getOperand(DstIdx); 2342 assert(Dst.isReg()); 2343 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2344 2345 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2346 2347 for (int SrcIdx : SrcIndices) { 2348 if (SrcIdx == -1) break; 2349 const MCOperand &Src = Inst.getOperand(SrcIdx); 2350 if (Src.isReg()) { 2351 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2352 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2353 return false; 2354 } 2355 } 2356 } 2357 2358 return true; 2359} 2360 2361bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2362 2363 const unsigned Opc = Inst.getOpcode(); 2364 const MCInstrDesc &Desc = MII.get(Opc); 2365 2366 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2367 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2368 assert(ClampIdx != -1); 2369 return Inst.getOperand(ClampIdx).getImm() == 0; 2370 } 2371 2372 return true; 2373} 2374 2375bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2376 2377 const unsigned Opc = Inst.getOpcode(); 2378 const MCInstrDesc &Desc = MII.get(Opc); 2379 2380 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2381 return true; 2382 2383 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2384 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2385 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2386 2387 assert(VDataIdx != -1); 2388 assert(DMaskIdx != -1); 2389 assert(TFEIdx != -1); 2390 2391 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2392 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2393 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2394 if (DMask == 0) 2395 DMask = 1; 2396 2397 unsigned DataSize = 2398 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2399 if (hasPackedD16()) { 2400 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2401 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2402 DataSize = (DataSize + 1) / 2; 2403 } 2404 2405 return (VDataSize / 4) == DataSize + TFESize; 2406} 2407 2408bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2409 2410 const unsigned Opc = Inst.getOpcode(); 2411 const MCInstrDesc &Desc = MII.get(Opc); 2412 2413 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2414 return true; 2415 if (!Desc.mayLoad() || !Desc.mayStore()) 2416 return true; // Not atomic 2417 2418 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2419 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2420 2421 // This is an incomplete check because image_atomic_cmpswap 2422 // may only use 0x3 and 0xf while other atomic operations 2423 // may use 0x1 and 0x3. However these limitations are 2424 // verified when we check that dmask matches dst size. 2425 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2426} 2427 2428bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2429 2430 const unsigned Opc = Inst.getOpcode(); 2431 const MCInstrDesc &Desc = MII.get(Opc); 2432 2433 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2434 return true; 2435 2436 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2437 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2438 2439 // GATHER4 instructions use dmask in a different fashion compared to 2440 // other MIMG instructions. The only useful DMASK values are 2441 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2442 // (red,red,red,red) etc.) The ISA document doesn't mention 2443 // this. 2444 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2445} 2446 2447bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2448 2449 const unsigned Opc = Inst.getOpcode(); 2450 const MCInstrDesc &Desc = MII.get(Opc); 2451 2452 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2453 return true; 2454 2455 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2456 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2457 if (isCI() || isSI()) 2458 return false; 2459 } 2460 2461 return true; 2462} 2463 2464bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2465 const SMLoc &IDLoc) { 2466 if (!validateConstantBusLimitations(Inst)) { 2467 Error(IDLoc, 2468 "invalid operand (violates constant bus restrictions)"); 2469 return false; 2470 } 2471 if (!validateEarlyClobberLimitations(Inst)) { 2472 Error(IDLoc, 2473 "destination must be different than all sources"); 2474 return false; 2475 } 2476 if (!validateIntClampSupported(Inst)) { 2477 Error(IDLoc, 2478 "integer clamping is not supported on this GPU"); 2479 return false; 2480 } 2481 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2482 if (!validateMIMGD16(Inst)) { 2483 Error(IDLoc, 2484 "d16 modifier is not supported on this GPU"); 2485 return false; 2486 } 2487 if (!validateMIMGDataSize(Inst)) { 2488 Error(IDLoc, 2489 "image data size does not match dmask and tfe"); 2490 return false; 2491 } 2492 if (!validateMIMGAtomicDMask(Inst)) { 2493 Error(IDLoc, 2494 "invalid atomic image dmask"); 2495 return false; 2496 } 2497 if (!validateMIMGGatherDMask(Inst)) { 2498 Error(IDLoc, 2499 "invalid image_gather dmask: only one bit must be set"); 2500 return false; 2501 } 2502 2503 return true; 2504} 2505 2506static std::string AMDGPUMnemonicSpellCheck(StringRef S, uint64_t FBS, 2507 unsigned VariantID = 0); 2508 2509bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2510 OperandVector &Operands, 2511 MCStreamer &Out, 2512 uint64_t &ErrorInfo, 2513 bool MatchingInlineAsm) { 2514 MCInst Inst; 2515 unsigned Result = Match_Success; 2516 for (auto Variant : getMatchedVariants()) { 2517 uint64_t EI; 2518 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2519 Variant); 2520 // We order match statuses from least to most specific. We use most specific 2521 // status as resulting 2522 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2523 if ((R == Match_Success) || 2524 (R == Match_PreferE32) || 2525 (R == Match_MissingFeature && Result != Match_PreferE32) || 2526 (R == Match_InvalidOperand && Result != Match_MissingFeature 2527 && Result != Match_PreferE32) || 2528 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2529 && Result != Match_MissingFeature 2530 && Result != Match_PreferE32)) { 2531 Result = R; 2532 ErrorInfo = EI; 2533 } 2534 if (R == Match_Success) 2535 break; 2536 } 2537 2538 switch (Result) { 2539 default: break; 2540 case Match_Success: 2541 if (!validateInstruction(Inst, IDLoc)) { 2542 return true; 2543 } 2544 Inst.setLoc(IDLoc); 2545 Out.EmitInstruction(Inst, getSTI()); 2546 return false; 2547 2548 case Match_MissingFeature: 2549 return Error(IDLoc, "instruction not supported on this GPU"); 2550 2551 case Match_MnemonicFail: { 2552 uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2553 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2554 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2555 return Error(IDLoc, "invalid instruction" + Suggestion, 2556 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2557 } 2558 2559 case Match_InvalidOperand: { 2560 SMLoc ErrorLoc = IDLoc; 2561 if (ErrorInfo != ~0ULL) { 2562 if (ErrorInfo >= Operands.size()) { 2563 return Error(IDLoc, "too few operands for instruction"); 2564 } 2565 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2566 if (ErrorLoc == SMLoc()) 2567 ErrorLoc = IDLoc; 2568 } 2569 return Error(ErrorLoc, "invalid operand for instruction"); 2570 } 2571 2572 case Match_PreferE32: 2573 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2574 "should be encoded as e32"); 2575 } 2576 llvm_unreachable("Implement any new match types added!"); 2577} 2578 2579bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2580 int64_t Tmp = -1; 2581 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2582 return true; 2583 } 2584 if (getParser().parseAbsoluteExpression(Tmp)) { 2585 return true; 2586 } 2587 Ret = static_cast<uint32_t>(Tmp); 2588 return false; 2589} 2590 2591bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2592 uint32_t &Minor) { 2593 if (ParseAsAbsoluteExpression(Major)) 2594 return TokError("invalid major version"); 2595 2596 if (getLexer().isNot(AsmToken::Comma)) 2597 return TokError("minor version number required, comma expected"); 2598 Lex(); 2599 2600 if (ParseAsAbsoluteExpression(Minor)) 2601 return TokError("invalid minor version"); 2602 2603 return false; 2604} 2605 2606bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2607 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2608 return TokError("directive only supported for amdgcn architecture"); 2609 2610 std::string Target; 2611 2612 SMLoc TargetStart = getTok().getLoc(); 2613 if (getParser().parseEscapedString(Target)) 2614 return true; 2615 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2616 2617 std::string ExpectedTarget; 2618 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2619 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2620 2621 if (Target != ExpectedTargetOS.str()) 2622 return getParser().Error(TargetRange.Start, "target must match options", 2623 TargetRange); 2624 2625 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2626 return false; 2627} 2628 2629bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2630 return getParser().Error(Range.Start, "value out of range", Range); 2631} 2632 2633bool AMDGPUAsmParser::calculateGPRBlocks( 2634 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2635 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2636 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2637 unsigned &SGPRBlocks) { 2638 // TODO(scott.linder): These calculations are duplicated from 2639 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2640 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2641 2642 unsigned NumVGPRs = NextFreeVGPR; 2643 unsigned NumSGPRs = NextFreeSGPR; 2644 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2645 2646 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2647 NumSGPRs > MaxAddressableNumSGPRs) 2648 return OutOfRangeError(SGPRRange); 2649 2650 NumSGPRs += 2651 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2652 2653 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2654 NumSGPRs > MaxAddressableNumSGPRs) 2655 return OutOfRangeError(SGPRRange); 2656 2657 if (Features.test(FeatureSGPRInitBug)) 2658 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2659 2660 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2661 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2662 2663 return false; 2664} 2665 2666bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2667 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2668 return TokError("directive only supported for amdgcn architecture"); 2669 2670 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2671 return TokError("directive only supported for amdhsa OS"); 2672 2673 StringRef KernelName; 2674 if (getParser().parseIdentifier(KernelName)) 2675 return true; 2676 2677 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2678 2679 StringSet<> Seen; 2680 2681 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2682 2683 SMRange VGPRRange; 2684 uint64_t NextFreeVGPR = 0; 2685 SMRange SGPRRange; 2686 uint64_t NextFreeSGPR = 0; 2687 unsigned UserSGPRCount = 0; 2688 bool ReserveVCC = true; 2689 bool ReserveFlatScr = true; 2690 bool ReserveXNACK = hasXNACK(); 2691 2692 while (true) { 2693 while (getLexer().is(AsmToken::EndOfStatement)) 2694 Lex(); 2695 2696 if (getLexer().isNot(AsmToken::Identifier)) 2697 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2698 2699 StringRef ID = getTok().getIdentifier(); 2700 SMRange IDRange = getTok().getLocRange(); 2701 Lex(); 2702 2703 if (ID == ".end_amdhsa_kernel") 2704 break; 2705 2706 if (Seen.find(ID) != Seen.end()) 2707 return TokError(".amdhsa_ directives cannot be repeated"); 2708 Seen.insert(ID); 2709 2710 SMLoc ValStart = getTok().getLoc(); 2711 int64_t IVal; 2712 if (getParser().parseAbsoluteExpression(IVal)) 2713 return true; 2714 SMLoc ValEnd = getTok().getLoc(); 2715 SMRange ValRange = SMRange(ValStart, ValEnd); 2716 2717 if (IVal < 0) 2718 return OutOfRangeError(ValRange); 2719 2720 uint64_t Val = IVal; 2721 2722#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 2723 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 2724 return OutOfRangeError(RANGE); \ 2725 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 2726 2727 if (ID == ".amdhsa_group_segment_fixed_size") { 2728 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 2729 return OutOfRangeError(ValRange); 2730 KD.group_segment_fixed_size = Val; 2731 } else if (ID == ".amdhsa_private_segment_fixed_size") { 2732 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 2733 return OutOfRangeError(ValRange); 2734 KD.private_segment_fixed_size = Val; 2735 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 2736 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2737 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 2738 Val, ValRange); 2739 UserSGPRCount++; 2740 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 2741 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2742 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 2743 ValRange); 2744 UserSGPRCount++; 2745 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 2746 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2747 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 2748 ValRange); 2749 UserSGPRCount++; 2750 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 2751 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 2753 Val, ValRange); 2754 UserSGPRCount++; 2755 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 2756 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 2758 ValRange); 2759 UserSGPRCount++; 2760 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 2761 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2762 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 2763 ValRange); 2764 UserSGPRCount++; 2765 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 2766 PARSE_BITS_ENTRY(KD.kernel_code_properties, 2767 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 2768 Val, ValRange); 2769 UserSGPRCount++; 2770 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 2771 PARSE_BITS_ENTRY( 2772 KD.compute_pgm_rsrc2, 2773 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 2774 ValRange); 2775 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 2776 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2777 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 2778 ValRange); 2779 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 2780 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2781 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 2782 ValRange); 2783 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 2784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2785 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 2786 ValRange); 2787 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 2788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2789 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 2790 ValRange); 2791 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 2792 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2793 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 2794 ValRange); 2795 } else if (ID == ".amdhsa_next_free_vgpr") { 2796 VGPRRange = ValRange; 2797 NextFreeVGPR = Val; 2798 } else if (ID == ".amdhsa_next_free_sgpr") { 2799 SGPRRange = ValRange; 2800 NextFreeSGPR = Val; 2801 } else if (ID == ".amdhsa_reserve_vcc") { 2802 if (!isUInt<1>(Val)) 2803 return OutOfRangeError(ValRange); 2804 ReserveVCC = Val; 2805 } else if (ID == ".amdhsa_reserve_flat_scratch") { 2806 if (IVersion.Major < 7) 2807 return getParser().Error(IDRange.Start, "directive requires gfx7+", 2808 IDRange); 2809 if (!isUInt<1>(Val)) 2810 return OutOfRangeError(ValRange); 2811 ReserveFlatScr = Val; 2812 } else if (ID == ".amdhsa_reserve_xnack_mask") { 2813 if (IVersion.Major < 8) 2814 return getParser().Error(IDRange.Start, "directive requires gfx8+", 2815 IDRange); 2816 if (!isUInt<1>(Val)) 2817 return OutOfRangeError(ValRange); 2818 ReserveXNACK = Val; 2819 } else if (ID == ".amdhsa_float_round_mode_32") { 2820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2821 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 2822 } else if (ID == ".amdhsa_float_round_mode_16_64") { 2823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2824 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 2825 } else if (ID == ".amdhsa_float_denorm_mode_32") { 2826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2827 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 2828 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 2829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2830 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 2831 ValRange); 2832 } else if (ID == ".amdhsa_dx10_clamp") { 2833 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 2834 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 2835 } else if (ID == ".amdhsa_ieee_mode") { 2836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 2837 Val, ValRange); 2838 } else if (ID == ".amdhsa_fp16_overflow") { 2839 if (IVersion.Major < 9) 2840 return getParser().Error(IDRange.Start, "directive requires gfx9+", 2841 IDRange); 2842 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 2843 ValRange); 2844 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 2845 PARSE_BITS_ENTRY( 2846 KD.compute_pgm_rsrc2, 2847 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 2848 ValRange); 2849 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 2850 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2851 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 2852 Val, ValRange); 2853 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 2854 PARSE_BITS_ENTRY( 2855 KD.compute_pgm_rsrc2, 2856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 2857 ValRange); 2858 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 2859 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2860 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 2861 Val, ValRange); 2862 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 2863 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2864 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 2865 Val, ValRange); 2866 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 2867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2868 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 2869 Val, ValRange); 2870 } else if (ID == ".amdhsa_exception_int_div_zero") { 2871 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 2872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 2873 Val, ValRange); 2874 } else { 2875 return getParser().Error(IDRange.Start, 2876 "unknown .amdhsa_kernel directive", IDRange); 2877 } 2878 2879#undef PARSE_BITS_ENTRY 2880 } 2881 2882 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 2883 return TokError(".amdhsa_next_free_vgpr directive is required"); 2884 2885 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 2886 return TokError(".amdhsa_next_free_sgpr directive is required"); 2887 2888 unsigned VGPRBlocks; 2889 unsigned SGPRBlocks; 2890 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 2891 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 2892 SGPRRange, VGPRBlocks, SGPRBlocks)) 2893 return true; 2894 2895 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 2896 VGPRBlocks)) 2897 return OutOfRangeError(VGPRRange); 2898 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2899 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 2900 2901 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 2902 SGPRBlocks)) 2903 return OutOfRangeError(SGPRRange); 2904 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 2905 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 2906 SGPRBlocks); 2907 2908 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 2909 return TokError("too many user SGPRs enabled"); 2910 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 2911 UserSGPRCount); 2912 2913 getTargetStreamer().EmitAmdhsaKernelDescriptor( 2914 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 2915 ReserveFlatScr, ReserveXNACK); 2916 return false; 2917} 2918 2919bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 2920 uint32_t Major; 2921 uint32_t Minor; 2922 2923 if (ParseDirectiveMajorMinor(Major, Minor)) 2924 return true; 2925 2926 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 2927 return false; 2928} 2929 2930bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 2931 uint32_t Major; 2932 uint32_t Minor; 2933 uint32_t Stepping; 2934 StringRef VendorName; 2935 StringRef ArchName; 2936 2937 // If this directive has no arguments, then use the ISA version for the 2938 // targeted GPU. 2939 if (getLexer().is(AsmToken::EndOfStatement)) { 2940 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 2941 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 2942 ISA.Stepping, 2943 "AMD", "AMDGPU"); 2944 return false; 2945 } 2946 2947 if (ParseDirectiveMajorMinor(Major, Minor)) 2948 return true; 2949 2950 if (getLexer().isNot(AsmToken::Comma)) 2951 return TokError("stepping version number required, comma expected"); 2952 Lex(); 2953 2954 if (ParseAsAbsoluteExpression(Stepping)) 2955 return TokError("invalid stepping version"); 2956 2957 if (getLexer().isNot(AsmToken::Comma)) 2958 return TokError("vendor name required, comma expected"); 2959 Lex(); 2960 2961 if (getLexer().isNot(AsmToken::String)) 2962 return TokError("invalid vendor name"); 2963 2964 VendorName = getLexer().getTok().getStringContents(); 2965 Lex(); 2966 2967 if (getLexer().isNot(AsmToken::Comma)) 2968 return TokError("arch name required, comma expected"); 2969 Lex(); 2970 2971 if (getLexer().isNot(AsmToken::String)) 2972 return TokError("invalid arch name"); 2973 2974 ArchName = getLexer().getTok().getStringContents(); 2975 Lex(); 2976 2977 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 2978 VendorName, ArchName); 2979 return false; 2980} 2981 2982bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 2983 amd_kernel_code_t &Header) { 2984 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 2985 // assembly for backwards compatibility. 2986 if (ID == "max_scratch_backing_memory_byte_size") { 2987 Parser.eatToEndOfStatement(); 2988 return false; 2989 } 2990 2991 SmallString<40> ErrStr; 2992 raw_svector_ostream Err(ErrStr); 2993 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 2994 return TokError(Err.str()); 2995 } 2996 Lex(); 2997 return false; 2998} 2999 3000bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3001 amd_kernel_code_t Header; 3002 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3003 3004 while (true) { 3005 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3006 // will set the current token to EndOfStatement. 3007 while(getLexer().is(AsmToken::EndOfStatement)) 3008 Lex(); 3009 3010 if (getLexer().isNot(AsmToken::Identifier)) 3011 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3012 3013 StringRef ID = getLexer().getTok().getIdentifier(); 3014 Lex(); 3015 3016 if (ID == ".end_amd_kernel_code_t") 3017 break; 3018 3019 if (ParseAMDKernelCodeTValue(ID, Header)) 3020 return true; 3021 } 3022 3023 getTargetStreamer().EmitAMDKernelCodeT(Header); 3024 3025 return false; 3026} 3027 3028bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3029 if (getLexer().isNot(AsmToken::Identifier)) 3030 return TokError("expected symbol name"); 3031 3032 StringRef KernelName = Parser.getTok().getString(); 3033 3034 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3035 ELF::STT_AMDGPU_HSA_KERNEL); 3036 Lex(); 3037 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3038 KernelScope.initialize(getContext()); 3039 return false; 3040} 3041 3042bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3043 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3044 return Error(getParser().getTok().getLoc(), 3045 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3046 "architectures"); 3047 } 3048 3049 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3050 3051 std::string ISAVersionStringFromSTI; 3052 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3053 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3054 3055 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3056 return Error(getParser().getTok().getLoc(), 3057 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3058 "arguments specified through the command line"); 3059 } 3060 3061 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3062 Lex(); 3063 3064 return false; 3065} 3066 3067bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3068 const char *AssemblerDirectiveBegin; 3069 const char *AssemblerDirectiveEnd; 3070 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3071 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3072 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3073 HSAMD::V3::AssemblerDirectiveEnd) 3074 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3075 HSAMD::AssemblerDirectiveEnd); 3076 3077 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3078 return Error(getParser().getTok().getLoc(), 3079 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3080 "not available on non-amdhsa OSes")).str()); 3081 } 3082 3083 std::string HSAMetadataString; 3084 raw_string_ostream YamlStream(HSAMetadataString); 3085 3086 getLexer().setSkipSpace(false); 3087 3088 bool FoundEnd = false; 3089 while (!getLexer().is(AsmToken::Eof)) { 3090 while (getLexer().is(AsmToken::Space)) { 3091 YamlStream << getLexer().getTok().getString(); 3092 Lex(); 3093 } 3094 3095 if (getLexer().is(AsmToken::Identifier)) { 3096 StringRef ID = getLexer().getTok().getIdentifier(); 3097 if (ID == AssemblerDirectiveEnd) { 3098 Lex(); 3099 FoundEnd = true; 3100 break; 3101 } 3102 } 3103 3104 YamlStream << Parser.parseStringToEndOfStatement() 3105 << getContext().getAsmInfo()->getSeparatorString(); 3106 3107 Parser.eatToEndOfStatement(); 3108 } 3109 3110 getLexer().setSkipSpace(true); 3111 3112 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3113 return TokError(Twine("expected directive ") + 3114 Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); 3115 } 3116 3117 YamlStream.flush(); 3118 3119 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3120 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3121 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3122 } else { 3123 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3124 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3125 } 3126 3127 return false; 3128} 3129 3130bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3131 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3132 return Error(getParser().getTok().getLoc(), 3133 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3134 "not available on non-amdpal OSes")).str()); 3135 } 3136 3137 PALMD::Metadata PALMetadata; 3138 for (;;) { 3139 uint32_t Value; 3140 if (ParseAsAbsoluteExpression(Value)) { 3141 return TokError(Twine("invalid value in ") + 3142 Twine(PALMD::AssemblerDirective)); 3143 } 3144 PALMetadata.push_back(Value); 3145 if (getLexer().isNot(AsmToken::Comma)) 3146 break; 3147 Lex(); 3148 } 3149 getTargetStreamer().EmitPALMetadata(PALMetadata); 3150 return false; 3151} 3152 3153bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3154 StringRef IDVal = DirectiveID.getString(); 3155 3156 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3157 if (IDVal == ".amdgcn_target") 3158 return ParseDirectiveAMDGCNTarget(); 3159 3160 if (IDVal == ".amdhsa_kernel") 3161 return ParseDirectiveAMDHSAKernel(); 3162 3163 // TODO: Restructure/combine with PAL metadata directive. 3164 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3165 return ParseDirectiveHSAMetadata(); 3166 } else { 3167 if (IDVal == ".hsa_code_object_version") 3168 return ParseDirectiveHSACodeObjectVersion(); 3169 3170 if (IDVal == ".hsa_code_object_isa") 3171 return ParseDirectiveHSACodeObjectISA(); 3172 3173 if (IDVal == ".amd_kernel_code_t") 3174 return ParseDirectiveAMDKernelCodeT(); 3175 3176 if (IDVal == ".amdgpu_hsa_kernel") 3177 return ParseDirectiveAMDGPUHsaKernel(); 3178 3179 if (IDVal == ".amd_amdgpu_isa") 3180 return ParseDirectiveISAVersion(); 3181 3182 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3183 return ParseDirectiveHSAMetadata(); 3184 } 3185 3186 if (IDVal == PALMD::AssemblerDirective) 3187 return ParseDirectivePALMetadata(); 3188 3189 return true; 3190} 3191 3192bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3193 unsigned RegNo) const { 3194 3195 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3196 R.isValid(); ++R) { 3197 if (*R == RegNo) 3198 return isGFX9(); 3199 } 3200 3201 switch (RegNo) { 3202 case AMDGPU::TBA: 3203 case AMDGPU::TBA_LO: 3204 case AMDGPU::TBA_HI: 3205 case AMDGPU::TMA: 3206 case AMDGPU::TMA_LO: 3207 case AMDGPU::TMA_HI: 3208 return !isGFX9(); 3209 case AMDGPU::XNACK_MASK: 3210 case AMDGPU::XNACK_MASK_LO: 3211 case AMDGPU::XNACK_MASK_HI: 3212 return !isCI() && !isSI() && hasXNACK(); 3213 default: 3214 break; 3215 } 3216 3217 if (isCI()) 3218 return true; 3219 3220 if (isSI()) { 3221 // No flat_scr 3222 switch (RegNo) { 3223 case AMDGPU::FLAT_SCR: 3224 case AMDGPU::FLAT_SCR_LO: 3225 case AMDGPU::FLAT_SCR_HI: 3226 return false; 3227 default: 3228 return true; 3229 } 3230 } 3231 3232 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3233 // SI/CI have. 3234 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3235 R.isValid(); ++R) { 3236 if (*R == RegNo) 3237 return false; 3238 } 3239 3240 return true; 3241} 3242 3243OperandMatchResultTy 3244AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3245 // Try to parse with a custom parser 3246 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3247 3248 // If we successfully parsed the operand or if there as an error parsing, 3249 // we are done. 3250 // 3251 // If we are parsing after we reach EndOfStatement then this means we 3252 // are appending default values to the Operands list. This is only done 3253 // by custom parser, so we shouldn't continue on to the generic parsing. 3254 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3255 getLexer().is(AsmToken::EndOfStatement)) 3256 return ResTy; 3257 3258 ResTy = parseRegOrImm(Operands); 3259 3260 if (ResTy == MatchOperand_Success) 3261 return ResTy; 3262 3263 const auto &Tok = Parser.getTok(); 3264 SMLoc S = Tok.getLoc(); 3265 3266 const MCExpr *Expr = nullptr; 3267 if (!Parser.parseExpression(Expr)) { 3268 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3269 return MatchOperand_Success; 3270 } 3271 3272 // Possibly this is an instruction flag like 'gds'. 3273 if (Tok.getKind() == AsmToken::Identifier) { 3274 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3275 Parser.Lex(); 3276 return MatchOperand_Success; 3277 } 3278 3279 return MatchOperand_NoMatch; 3280} 3281 3282StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3283 // Clear any forced encodings from the previous instruction. 3284 setForcedEncodingSize(0); 3285 setForcedDPP(false); 3286 setForcedSDWA(false); 3287 3288 if (Name.endswith("_e64")) { 3289 setForcedEncodingSize(64); 3290 return Name.substr(0, Name.size() - 4); 3291 } else if (Name.endswith("_e32")) { 3292 setForcedEncodingSize(32); 3293 return Name.substr(0, Name.size() - 4); 3294 } else if (Name.endswith("_dpp")) { 3295 setForcedDPP(true); 3296 return Name.substr(0, Name.size() - 4); 3297 } else if (Name.endswith("_sdwa")) { 3298 setForcedSDWA(true); 3299 return Name.substr(0, Name.size() - 5); 3300 } 3301 return Name; 3302} 3303 3304bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3305 StringRef Name, 3306 SMLoc NameLoc, OperandVector &Operands) { 3307 // Add the instruction mnemonic 3308 Name = parseMnemonicSuffix(Name); 3309 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3310 3311 while (!getLexer().is(AsmToken::EndOfStatement)) { 3312 OperandMatchResultTy Res = parseOperand(Operands, Name); 3313 3314 // Eat the comma or space if there is one. 3315 if (getLexer().is(AsmToken::Comma)) 3316 Parser.Lex(); 3317 3318 switch (Res) { 3319 case MatchOperand_Success: break; 3320 case MatchOperand_ParseFail: 3321 Error(getLexer().getLoc(), "failed parsing operand."); 3322 while (!getLexer().is(AsmToken::EndOfStatement)) { 3323 Parser.Lex(); 3324 } 3325 return true; 3326 case MatchOperand_NoMatch: 3327 Error(getLexer().getLoc(), "not a valid operand."); 3328 while (!getLexer().is(AsmToken::EndOfStatement)) { 3329 Parser.Lex(); 3330 } 3331 return true; 3332 } 3333 } 3334 3335 return false; 3336} 3337 3338//===----------------------------------------------------------------------===// 3339// Utility functions 3340//===----------------------------------------------------------------------===// 3341 3342OperandMatchResultTy 3343AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3344 switch(getLexer().getKind()) { 3345 default: return MatchOperand_NoMatch; 3346 case AsmToken::Identifier: { 3347 StringRef Name = Parser.getTok().getString(); 3348 if (!Name.equals(Prefix)) { 3349 return MatchOperand_NoMatch; 3350 } 3351 3352 Parser.Lex(); 3353 if (getLexer().isNot(AsmToken::Colon)) 3354 return MatchOperand_ParseFail; 3355 3356 Parser.Lex(); 3357 3358 bool IsMinus = false; 3359 if (getLexer().getKind() == AsmToken::Minus) { 3360 Parser.Lex(); 3361 IsMinus = true; 3362 } 3363 3364 if (getLexer().isNot(AsmToken::Integer)) 3365 return MatchOperand_ParseFail; 3366 3367 if (getParser().parseAbsoluteExpression(Int)) 3368 return MatchOperand_ParseFail; 3369 3370 if (IsMinus) 3371 Int = -Int; 3372 break; 3373 } 3374 } 3375 return MatchOperand_Success; 3376} 3377 3378OperandMatchResultTy 3379AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3380 AMDGPUOperand::ImmTy ImmTy, 3381 bool (*ConvertResult)(int64_t&)) { 3382 SMLoc S = Parser.getTok().getLoc(); 3383 int64_t Value = 0; 3384 3385 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3386 if (Res != MatchOperand_Success) 3387 return Res; 3388 3389 if (ConvertResult && !ConvertResult(Value)) { 3390 return MatchOperand_ParseFail; 3391 } 3392 3393 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3394 return MatchOperand_Success; 3395} 3396 3397OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3398 const char *Prefix, 3399 OperandVector &Operands, 3400 AMDGPUOperand::ImmTy ImmTy, 3401 bool (*ConvertResult)(int64_t&)) { 3402 StringRef Name = Parser.getTok().getString(); 3403 if (!Name.equals(Prefix)) 3404 return MatchOperand_NoMatch; 3405 3406 Parser.Lex(); 3407 if (getLexer().isNot(AsmToken::Colon)) 3408 return MatchOperand_ParseFail; 3409 3410 Parser.Lex(); 3411 if (getLexer().isNot(AsmToken::LBrac)) 3412 return MatchOperand_ParseFail; 3413 Parser.Lex(); 3414 3415 unsigned Val = 0; 3416 SMLoc S = Parser.getTok().getLoc(); 3417 3418 // FIXME: How to verify the number of elements matches the number of src 3419 // operands? 3420 for (int I = 0; I < 4; ++I) { 3421 if (I != 0) { 3422 if (getLexer().is(AsmToken::RBrac)) 3423 break; 3424 3425 if (getLexer().isNot(AsmToken::Comma)) 3426 return MatchOperand_ParseFail; 3427 Parser.Lex(); 3428 } 3429 3430 if (getLexer().isNot(AsmToken::Integer)) 3431 return MatchOperand_ParseFail; 3432 3433 int64_t Op; 3434 if (getParser().parseAbsoluteExpression(Op)) 3435 return MatchOperand_ParseFail; 3436 3437 if (Op != 0 && Op != 1) 3438 return MatchOperand_ParseFail; 3439 Val |= (Op << I); 3440 } 3441 3442 Parser.Lex(); 3443 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3444 return MatchOperand_Success; 3445} 3446 3447OperandMatchResultTy 3448AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3449 AMDGPUOperand::ImmTy ImmTy) { 3450 int64_t Bit = 0; 3451 SMLoc S = Parser.getTok().getLoc(); 3452 3453 // We are at the end of the statement, and this is a default argument, so 3454 // use a default value. 3455 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3456 switch(getLexer().getKind()) { 3457 case AsmToken::Identifier: { 3458 StringRef Tok = Parser.getTok().getString(); 3459 if (Tok == Name) { 3460 if (Tok == "r128" && isGFX9()) 3461 Error(S, "r128 modifier is not supported on this GPU"); 3462 if (Tok == "a16" && !isGFX9()) 3463 Error(S, "a16 modifier is not supported on this GPU"); 3464 Bit = 1; 3465 Parser.Lex(); 3466 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3467 Bit = 0; 3468 Parser.Lex(); 3469 } else { 3470 return MatchOperand_NoMatch; 3471 } 3472 break; 3473 } 3474 default: 3475 return MatchOperand_NoMatch; 3476 } 3477 } 3478 3479 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3480 return MatchOperand_Success; 3481} 3482 3483static void addOptionalImmOperand( 3484 MCInst& Inst, const OperandVector& Operands, 3485 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3486 AMDGPUOperand::ImmTy ImmT, 3487 int64_t Default = 0) { 3488 auto i = OptionalIdx.find(ImmT); 3489 if (i != OptionalIdx.end()) { 3490 unsigned Idx = i->second; 3491 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3492 } else { 3493 Inst.addOperand(MCOperand::createImm(Default)); 3494 } 3495} 3496 3497OperandMatchResultTy 3498AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3499 if (getLexer().isNot(AsmToken::Identifier)) { 3500 return MatchOperand_NoMatch; 3501 } 3502 StringRef Tok = Parser.getTok().getString(); 3503 if (Tok != Prefix) { 3504 return MatchOperand_NoMatch; 3505 } 3506 3507 Parser.Lex(); 3508 if (getLexer().isNot(AsmToken::Colon)) { 3509 return MatchOperand_ParseFail; 3510 } 3511 3512 Parser.Lex(); 3513 if (getLexer().isNot(AsmToken::Identifier)) { 3514 return MatchOperand_ParseFail; 3515 } 3516 3517 Value = Parser.getTok().getString(); 3518 return MatchOperand_Success; 3519} 3520 3521// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3522// values to live in a joint format operand in the MCInst encoding. 3523OperandMatchResultTy 3524AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3525 SMLoc S = Parser.getTok().getLoc(); 3526 int64_t Dfmt = 0, Nfmt = 0; 3527 // dfmt and nfmt can appear in either order, and each is optional. 3528 bool GotDfmt = false, GotNfmt = false; 3529 while (!GotDfmt || !GotNfmt) { 3530 if (!GotDfmt) { 3531 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3532 if (Res != MatchOperand_NoMatch) { 3533 if (Res != MatchOperand_Success) 3534 return Res; 3535 if (Dfmt >= 16) { 3536 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3537 return MatchOperand_ParseFail; 3538 } 3539 GotDfmt = true; 3540 Parser.Lex(); 3541 continue; 3542 } 3543 } 3544 if (!GotNfmt) { 3545 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3546 if (Res != MatchOperand_NoMatch) { 3547 if (Res != MatchOperand_Success) 3548 return Res; 3549 if (Nfmt >= 8) { 3550 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3551 return MatchOperand_ParseFail; 3552 } 3553 GotNfmt = true; 3554 Parser.Lex(); 3555 continue; 3556 } 3557 } 3558 break; 3559 } 3560 if (!GotDfmt && !GotNfmt) 3561 return MatchOperand_NoMatch; 3562 auto Format = Dfmt | Nfmt << 4; 3563 Operands.push_back( 3564 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3565 return MatchOperand_Success; 3566} 3567 3568//===----------------------------------------------------------------------===// 3569// ds 3570//===----------------------------------------------------------------------===// 3571 3572void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3573 const OperandVector &Operands) { 3574 OptionalImmIndexMap OptionalIdx; 3575 3576 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3577 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3578 3579 // Add the register arguments 3580 if (Op.isReg()) { 3581 Op.addRegOperands(Inst, 1); 3582 continue; 3583 } 3584 3585 // Handle optional arguments 3586 OptionalIdx[Op.getImmTy()] = i; 3587 } 3588 3589 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3592 3593 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3594} 3595 3596void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3597 bool IsGdsHardcoded) { 3598 OptionalImmIndexMap OptionalIdx; 3599 3600 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3601 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3602 3603 // Add the register arguments 3604 if (Op.isReg()) { 3605 Op.addRegOperands(Inst, 1); 3606 continue; 3607 } 3608 3609 if (Op.isToken() && Op.getToken() == "gds") { 3610 IsGdsHardcoded = true; 3611 continue; 3612 } 3613 3614 // Handle optional arguments 3615 OptionalIdx[Op.getImmTy()] = i; 3616 } 3617 3618 AMDGPUOperand::ImmTy OffsetType = 3619 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3620 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3621 AMDGPUOperand::ImmTyOffset; 3622 3623 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3624 3625 if (!IsGdsHardcoded) { 3626 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3627 } 3628 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3629} 3630 3631void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3632 OptionalImmIndexMap OptionalIdx; 3633 3634 unsigned OperandIdx[4]; 3635 unsigned EnMask = 0; 3636 int SrcIdx = 0; 3637 3638 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3639 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3640 3641 // Add the register arguments 3642 if (Op.isReg()) { 3643 assert(SrcIdx < 4); 3644 OperandIdx[SrcIdx] = Inst.size(); 3645 Op.addRegOperands(Inst, 1); 3646 ++SrcIdx; 3647 continue; 3648 } 3649 3650 if (Op.isOff()) { 3651 assert(SrcIdx < 4); 3652 OperandIdx[SrcIdx] = Inst.size(); 3653 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3654 ++SrcIdx; 3655 continue; 3656 } 3657 3658 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3659 Op.addImmOperands(Inst, 1); 3660 continue; 3661 } 3662 3663 if (Op.isToken() && Op.getToken() == "done") 3664 continue; 3665 3666 // Handle optional arguments 3667 OptionalIdx[Op.getImmTy()] = i; 3668 } 3669 3670 assert(SrcIdx == 4); 3671 3672 bool Compr = false; 3673 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3674 Compr = true; 3675 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3676 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3677 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3678 } 3679 3680 for (auto i = 0; i < SrcIdx; ++i) { 3681 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 3682 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 3683 } 3684 } 3685 3686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 3687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 3688 3689 Inst.addOperand(MCOperand::createImm(EnMask)); 3690} 3691 3692//===----------------------------------------------------------------------===// 3693// s_waitcnt 3694//===----------------------------------------------------------------------===// 3695 3696static bool 3697encodeCnt( 3698 const AMDGPU::IsaVersion ISA, 3699 int64_t &IntVal, 3700 int64_t CntVal, 3701 bool Saturate, 3702 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 3703 unsigned (*decode)(const IsaVersion &Version, unsigned)) 3704{ 3705 bool Failed = false; 3706 3707 IntVal = encode(ISA, IntVal, CntVal); 3708 if (CntVal != decode(ISA, IntVal)) { 3709 if (Saturate) { 3710 IntVal = encode(ISA, IntVal, -1); 3711 } else { 3712 Failed = true; 3713 } 3714 } 3715 return Failed; 3716} 3717 3718bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 3719 StringRef CntName = Parser.getTok().getString(); 3720 int64_t CntVal; 3721 3722 Parser.Lex(); 3723 if (getLexer().isNot(AsmToken::LParen)) 3724 return true; 3725 3726 Parser.Lex(); 3727 if (getLexer().isNot(AsmToken::Integer)) 3728 return true; 3729 3730 SMLoc ValLoc = Parser.getTok().getLoc(); 3731 if (getParser().parseAbsoluteExpression(CntVal)) 3732 return true; 3733 3734 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3735 3736 bool Failed = true; 3737 bool Sat = CntName.endswith("_sat"); 3738 3739 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 3740 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 3741 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 3742 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 3743 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 3744 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 3745 } 3746 3747 if (Failed) { 3748 Error(ValLoc, "too large value for " + CntName); 3749 return true; 3750 } 3751 3752 if (getLexer().isNot(AsmToken::RParen)) { 3753 return true; 3754 } 3755 3756 Parser.Lex(); 3757 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 3758 const AsmToken NextToken = getLexer().peekTok(); 3759 if (NextToken.is(AsmToken::Identifier)) { 3760 Parser.Lex(); 3761 } 3762 } 3763 3764 return false; 3765} 3766 3767OperandMatchResultTy 3768AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 3769 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3770 int64_t Waitcnt = getWaitcntBitMask(ISA); 3771 SMLoc S = Parser.getTok().getLoc(); 3772 3773 switch(getLexer().getKind()) { 3774 default: return MatchOperand_ParseFail; 3775 case AsmToken::Integer: 3776 // The operand can be an integer value. 3777 if (getParser().parseAbsoluteExpression(Waitcnt)) 3778 return MatchOperand_ParseFail; 3779 break; 3780 3781 case AsmToken::Identifier: 3782 do { 3783 if (parseCnt(Waitcnt)) 3784 return MatchOperand_ParseFail; 3785 } while(getLexer().isNot(AsmToken::EndOfStatement)); 3786 break; 3787 } 3788 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 3789 return MatchOperand_Success; 3790} 3791 3792bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 3793 int64_t &Width) { 3794 using namespace llvm::AMDGPU::Hwreg; 3795 3796 if (Parser.getTok().getString() != "hwreg") 3797 return true; 3798 Parser.Lex(); 3799 3800 if (getLexer().isNot(AsmToken::LParen)) 3801 return true; 3802 Parser.Lex(); 3803 3804 if (getLexer().is(AsmToken::Identifier)) { 3805 HwReg.IsSymbolic = true; 3806 HwReg.Id = ID_UNKNOWN_; 3807 const StringRef tok = Parser.getTok().getString(); 3808 int Last = ID_SYMBOLIC_LAST_; 3809 if (isSI() || isCI() || isVI()) 3810 Last = ID_SYMBOLIC_FIRST_GFX9_; 3811 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 3812 if (tok == IdSymbolic[i]) { 3813 HwReg.Id = i; 3814 break; 3815 } 3816 } 3817 Parser.Lex(); 3818 } else { 3819 HwReg.IsSymbolic = false; 3820 if (getLexer().isNot(AsmToken::Integer)) 3821 return true; 3822 if (getParser().parseAbsoluteExpression(HwReg.Id)) 3823 return true; 3824 } 3825 3826 if (getLexer().is(AsmToken::RParen)) { 3827 Parser.Lex(); 3828 return false; 3829 } 3830 3831 // optional params 3832 if (getLexer().isNot(AsmToken::Comma)) 3833 return true; 3834 Parser.Lex(); 3835 3836 if (getLexer().isNot(AsmToken::Integer)) 3837 return true; 3838 if (getParser().parseAbsoluteExpression(Offset)) 3839 return true; 3840 3841 if (getLexer().isNot(AsmToken::Comma)) 3842 return true; 3843 Parser.Lex(); 3844 3845 if (getLexer().isNot(AsmToken::Integer)) 3846 return true; 3847 if (getParser().parseAbsoluteExpression(Width)) 3848 return true; 3849 3850 if (getLexer().isNot(AsmToken::RParen)) 3851 return true; 3852 Parser.Lex(); 3853 3854 return false; 3855} 3856 3857OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 3858 using namespace llvm::AMDGPU::Hwreg; 3859 3860 int64_t Imm16Val = 0; 3861 SMLoc S = Parser.getTok().getLoc(); 3862 3863 switch(getLexer().getKind()) { 3864 default: return MatchOperand_NoMatch; 3865 case AsmToken::Integer: 3866 // The operand can be an integer value. 3867 if (getParser().parseAbsoluteExpression(Imm16Val)) 3868 return MatchOperand_NoMatch; 3869 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 3870 Error(S, "invalid immediate: only 16-bit values are legal"); 3871 // Do not return error code, but create an imm operand anyway and proceed 3872 // to the next operand, if any. That avoids unneccessary error messages. 3873 } 3874 break; 3875 3876 case AsmToken::Identifier: { 3877 OperandInfoTy HwReg(ID_UNKNOWN_); 3878 int64_t Offset = OFFSET_DEFAULT_; 3879 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 3880 if (parseHwregConstruct(HwReg, Offset, Width)) 3881 return MatchOperand_ParseFail; 3882 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 3883 if (HwReg.IsSymbolic) 3884 Error(S, "invalid symbolic name of hardware register"); 3885 else 3886 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 3887 } 3888 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 3889 Error(S, "invalid bit offset: only 5-bit values are legal"); 3890 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 3891 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 3892 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 3893 } 3894 break; 3895 } 3896 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 3897 return MatchOperand_Success; 3898} 3899 3900bool AMDGPUOperand::isSWaitCnt() const { 3901 return isImm(); 3902} 3903 3904bool AMDGPUOperand::isHwreg() const { 3905 return isImmTy(ImmTyHwreg); 3906} 3907 3908bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 3909 using namespace llvm::AMDGPU::SendMsg; 3910 3911 if (Parser.getTok().getString() != "sendmsg") 3912 return true; 3913 Parser.Lex(); 3914 3915 if (getLexer().isNot(AsmToken::LParen)) 3916 return true; 3917 Parser.Lex(); 3918 3919 if (getLexer().is(AsmToken::Identifier)) { 3920 Msg.IsSymbolic = true; 3921 Msg.Id = ID_UNKNOWN_; 3922 const std::string tok = Parser.getTok().getString(); 3923 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 3924 switch(i) { 3925 default: continue; // Omit gaps. 3926 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 3927 } 3928 if (tok == IdSymbolic[i]) { 3929 Msg.Id = i; 3930 break; 3931 } 3932 } 3933 Parser.Lex(); 3934 } else { 3935 Msg.IsSymbolic = false; 3936 if (getLexer().isNot(AsmToken::Integer)) 3937 return true; 3938 if (getParser().parseAbsoluteExpression(Msg.Id)) 3939 return true; 3940 if (getLexer().is(AsmToken::Integer)) 3941 if (getParser().parseAbsoluteExpression(Msg.Id)) 3942 Msg.Id = ID_UNKNOWN_; 3943 } 3944 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 3945 return false; 3946 3947 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 3948 if (getLexer().isNot(AsmToken::RParen)) 3949 return true; 3950 Parser.Lex(); 3951 return false; 3952 } 3953 3954 if (getLexer().isNot(AsmToken::Comma)) 3955 return true; 3956 Parser.Lex(); 3957 3958 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 3959 Operation.Id = ID_UNKNOWN_; 3960 if (getLexer().is(AsmToken::Identifier)) { 3961 Operation.IsSymbolic = true; 3962 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 3963 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 3964 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 3965 const StringRef Tok = Parser.getTok().getString(); 3966 for (int i = F; i < L; ++i) { 3967 if (Tok == S[i]) { 3968 Operation.Id = i; 3969 break; 3970 } 3971 } 3972 Parser.Lex(); 3973 } else { 3974 Operation.IsSymbolic = false; 3975 if (getLexer().isNot(AsmToken::Integer)) 3976 return true; 3977 if (getParser().parseAbsoluteExpression(Operation.Id)) 3978 return true; 3979 } 3980 3981 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 3982 // Stream id is optional. 3983 if (getLexer().is(AsmToken::RParen)) { 3984 Parser.Lex(); 3985 return false; 3986 } 3987 3988 if (getLexer().isNot(AsmToken::Comma)) 3989 return true; 3990 Parser.Lex(); 3991 3992 if (getLexer().isNot(AsmToken::Integer)) 3993 return true; 3994 if (getParser().parseAbsoluteExpression(StreamId)) 3995 return true; 3996 } 3997 3998 if (getLexer().isNot(AsmToken::RParen)) 3999 return true; 4000 Parser.Lex(); 4001 return false; 4002} 4003 4004OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4005 if (getLexer().getKind() != AsmToken::Identifier) 4006 return MatchOperand_NoMatch; 4007 4008 StringRef Str = Parser.getTok().getString(); 4009 int Slot = StringSwitch<int>(Str) 4010 .Case("p10", 0) 4011 .Case("p20", 1) 4012 .Case("p0", 2) 4013 .Default(-1); 4014 4015 SMLoc S = Parser.getTok().getLoc(); 4016 if (Slot == -1) 4017 return MatchOperand_ParseFail; 4018 4019 Parser.Lex(); 4020 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4021 AMDGPUOperand::ImmTyInterpSlot)); 4022 return MatchOperand_Success; 4023} 4024 4025OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4026 if (getLexer().getKind() != AsmToken::Identifier) 4027 return MatchOperand_NoMatch; 4028 4029 StringRef Str = Parser.getTok().getString(); 4030 if (!Str.startswith("attr")) 4031 return MatchOperand_NoMatch; 4032 4033 StringRef Chan = Str.take_back(2); 4034 int AttrChan = StringSwitch<int>(Chan) 4035 .Case(".x", 0) 4036 .Case(".y", 1) 4037 .Case(".z", 2) 4038 .Case(".w", 3) 4039 .Default(-1); 4040 if (AttrChan == -1) 4041 return MatchOperand_ParseFail; 4042 4043 Str = Str.drop_back(2).drop_front(4); 4044 4045 uint8_t Attr; 4046 if (Str.getAsInteger(10, Attr)) 4047 return MatchOperand_ParseFail; 4048 4049 SMLoc S = Parser.getTok().getLoc(); 4050 Parser.Lex(); 4051 if (Attr > 63) { 4052 Error(S, "out of bounds attr"); 4053 return MatchOperand_Success; 4054 } 4055 4056 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4057 4058 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4059 AMDGPUOperand::ImmTyInterpAttr)); 4060 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4061 AMDGPUOperand::ImmTyAttrChan)); 4062 return MatchOperand_Success; 4063} 4064 4065void AMDGPUAsmParser::errorExpTgt() { 4066 Error(Parser.getTok().getLoc(), "invalid exp target"); 4067} 4068 4069OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4070 uint8_t &Val) { 4071 if (Str == "null") { 4072 Val = 9; 4073 return MatchOperand_Success; 4074 } 4075 4076 if (Str.startswith("mrt")) { 4077 Str = Str.drop_front(3); 4078 if (Str == "z") { // == mrtz 4079 Val = 8; 4080 return MatchOperand_Success; 4081 } 4082 4083 if (Str.getAsInteger(10, Val)) 4084 return MatchOperand_ParseFail; 4085 4086 if (Val > 7) 4087 errorExpTgt(); 4088 4089 return MatchOperand_Success; 4090 } 4091 4092 if (Str.startswith("pos")) { 4093 Str = Str.drop_front(3); 4094 if (Str.getAsInteger(10, Val)) 4095 return MatchOperand_ParseFail; 4096 4097 if (Val > 3) 4098 errorExpTgt(); 4099 4100 Val += 12; 4101 return MatchOperand_Success; 4102 } 4103 4104 if (Str.startswith("param")) { 4105 Str = Str.drop_front(5); 4106 if (Str.getAsInteger(10, Val)) 4107 return MatchOperand_ParseFail; 4108 4109 if (Val >= 32) 4110 errorExpTgt(); 4111 4112 Val += 32; 4113 return MatchOperand_Success; 4114 } 4115 4116 if (Str.startswith("invalid_target_")) { 4117 Str = Str.drop_front(15); 4118 if (Str.getAsInteger(10, Val)) 4119 return MatchOperand_ParseFail; 4120 4121 errorExpTgt(); 4122 return MatchOperand_Success; 4123 } 4124 4125 return MatchOperand_NoMatch; 4126} 4127 4128OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4129 uint8_t Val; 4130 StringRef Str = Parser.getTok().getString(); 4131 4132 auto Res = parseExpTgtImpl(Str, Val); 4133 if (Res != MatchOperand_Success) 4134 return Res; 4135 4136 SMLoc S = Parser.getTok().getLoc(); 4137 Parser.Lex(); 4138 4139 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4140 AMDGPUOperand::ImmTyExpTgt)); 4141 return MatchOperand_Success; 4142} 4143 4144OperandMatchResultTy 4145AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4146 using namespace llvm::AMDGPU::SendMsg; 4147 4148 int64_t Imm16Val = 0; 4149 SMLoc S = Parser.getTok().getLoc(); 4150 4151 switch(getLexer().getKind()) { 4152 default: 4153 return MatchOperand_NoMatch; 4154 case AsmToken::Integer: 4155 // The operand can be an integer value. 4156 if (getParser().parseAbsoluteExpression(Imm16Val)) 4157 return MatchOperand_NoMatch; 4158 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4159 Error(S, "invalid immediate: only 16-bit values are legal"); 4160 // Do not return error code, but create an imm operand anyway and proceed 4161 // to the next operand, if any. That avoids unneccessary error messages. 4162 } 4163 break; 4164 case AsmToken::Identifier: { 4165 OperandInfoTy Msg(ID_UNKNOWN_); 4166 OperandInfoTy Operation(OP_UNKNOWN_); 4167 int64_t StreamId = STREAM_ID_DEFAULT_; 4168 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4169 return MatchOperand_ParseFail; 4170 do { 4171 // Validate and encode message ID. 4172 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4173 || Msg.Id == ID_SYSMSG)) { 4174 if (Msg.IsSymbolic) 4175 Error(S, "invalid/unsupported symbolic name of message"); 4176 else 4177 Error(S, "invalid/unsupported code of message"); 4178 break; 4179 } 4180 Imm16Val = (Msg.Id << ID_SHIFT_); 4181 // Validate and encode operation ID. 4182 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4183 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4184 if (Operation.IsSymbolic) 4185 Error(S, "invalid symbolic name of GS_OP"); 4186 else 4187 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4188 break; 4189 } 4190 if (Operation.Id == OP_GS_NOP 4191 && Msg.Id != ID_GS_DONE) { 4192 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4193 break; 4194 } 4195 Imm16Val |= (Operation.Id << OP_SHIFT_); 4196 } 4197 if (Msg.Id == ID_SYSMSG) { 4198 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4199 if (Operation.IsSymbolic) 4200 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4201 else 4202 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4203 break; 4204 } 4205 Imm16Val |= (Operation.Id << OP_SHIFT_); 4206 } 4207 // Validate and encode stream ID. 4208 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4209 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4210 Error(S, "invalid stream id: only 2-bit values are legal"); 4211 break; 4212 } 4213 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4214 } 4215 } while (false); 4216 } 4217 break; 4218 } 4219 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4220 return MatchOperand_Success; 4221} 4222 4223bool AMDGPUOperand::isSendMsg() const { 4224 return isImmTy(ImmTySendMsg); 4225} 4226 4227//===----------------------------------------------------------------------===// 4228// parser helpers 4229//===----------------------------------------------------------------------===// 4230 4231bool 4232AMDGPUAsmParser::trySkipId(const StringRef Id) { 4233 if (getLexer().getKind() == AsmToken::Identifier && 4234 Parser.getTok().getString() == Id) { 4235 Parser.Lex(); 4236 return true; 4237 } 4238 return false; 4239} 4240 4241bool 4242AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4243 if (getLexer().getKind() == Kind) { 4244 Parser.Lex(); 4245 return true; 4246 } 4247 return false; 4248} 4249 4250bool 4251AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4252 const StringRef ErrMsg) { 4253 if (!trySkipToken(Kind)) { 4254 Error(Parser.getTok().getLoc(), ErrMsg); 4255 return false; 4256 } 4257 return true; 4258} 4259 4260bool 4261AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4262 return !getParser().parseAbsoluteExpression(Imm); 4263} 4264 4265bool 4266AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4267 SMLoc S = Parser.getTok().getLoc(); 4268 if (getLexer().getKind() == AsmToken::String) { 4269 Val = Parser.getTok().getStringContents(); 4270 Parser.Lex(); 4271 return true; 4272 } else { 4273 Error(S, ErrMsg); 4274 return false; 4275 } 4276} 4277 4278//===----------------------------------------------------------------------===// 4279// swizzle 4280//===----------------------------------------------------------------------===// 4281 4282LLVM_READNONE 4283static unsigned 4284encodeBitmaskPerm(const unsigned AndMask, 4285 const unsigned OrMask, 4286 const unsigned XorMask) { 4287 using namespace llvm::AMDGPU::Swizzle; 4288 4289 return BITMASK_PERM_ENC | 4290 (AndMask << BITMASK_AND_SHIFT) | 4291 (OrMask << BITMASK_OR_SHIFT) | 4292 (XorMask << BITMASK_XOR_SHIFT); 4293} 4294 4295bool 4296AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4297 const unsigned MinVal, 4298 const unsigned MaxVal, 4299 const StringRef ErrMsg) { 4300 for (unsigned i = 0; i < OpNum; ++i) { 4301 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4302 return false; 4303 } 4304 SMLoc ExprLoc = Parser.getTok().getLoc(); 4305 if (!parseExpr(Op[i])) { 4306 return false; 4307 } 4308 if (Op[i] < MinVal || Op[i] > MaxVal) { 4309 Error(ExprLoc, ErrMsg); 4310 return false; 4311 } 4312 } 4313 4314 return true; 4315} 4316 4317bool 4318AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4319 using namespace llvm::AMDGPU::Swizzle; 4320 4321 int64_t Lane[LANE_NUM]; 4322 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4323 "expected a 2-bit lane id")) { 4324 Imm = QUAD_PERM_ENC; 4325 for (auto i = 0; i < LANE_NUM; ++i) { 4326 Imm |= Lane[i] << (LANE_SHIFT * i); 4327 } 4328 return true; 4329 } 4330 return false; 4331} 4332 4333bool 4334AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4335 using namespace llvm::AMDGPU::Swizzle; 4336 4337 SMLoc S = Parser.getTok().getLoc(); 4338 int64_t GroupSize; 4339 int64_t LaneIdx; 4340 4341 if (!parseSwizzleOperands(1, &GroupSize, 4342 2, 32, 4343 "group size must be in the interval [2,32]")) { 4344 return false; 4345 } 4346 if (!isPowerOf2_64(GroupSize)) { 4347 Error(S, "group size must be a power of two"); 4348 return false; 4349 } 4350 if (parseSwizzleOperands(1, &LaneIdx, 4351 0, GroupSize - 1, 4352 "lane id must be in the interval [0,group size - 1]")) { 4353 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4354 return true; 4355 } 4356 return false; 4357} 4358 4359bool 4360AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4361 using namespace llvm::AMDGPU::Swizzle; 4362 4363 SMLoc S = Parser.getTok().getLoc(); 4364 int64_t GroupSize; 4365 4366 if (!parseSwizzleOperands(1, &GroupSize, 4367 2, 32, "group size must be in the interval [2,32]")) { 4368 return false; 4369 } 4370 if (!isPowerOf2_64(GroupSize)) { 4371 Error(S, "group size must be a power of two"); 4372 return false; 4373 } 4374 4375 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4376 return true; 4377} 4378 4379bool 4380AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4381 using namespace llvm::AMDGPU::Swizzle; 4382 4383 SMLoc S = Parser.getTok().getLoc(); 4384 int64_t GroupSize; 4385 4386 if (!parseSwizzleOperands(1, &GroupSize, 4387 1, 16, "group size must be in the interval [1,16]")) { 4388 return false; 4389 } 4390 if (!isPowerOf2_64(GroupSize)) { 4391 Error(S, "group size must be a power of two"); 4392 return false; 4393 } 4394 4395 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4396 return true; 4397} 4398 4399bool 4400AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4401 using namespace llvm::AMDGPU::Swizzle; 4402 4403 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4404 return false; 4405 } 4406 4407 StringRef Ctl; 4408 SMLoc StrLoc = Parser.getTok().getLoc(); 4409 if (!parseString(Ctl)) { 4410 return false; 4411 } 4412 if (Ctl.size() != BITMASK_WIDTH) { 4413 Error(StrLoc, "expected a 5-character mask"); 4414 return false; 4415 } 4416 4417 unsigned AndMask = 0; 4418 unsigned OrMask = 0; 4419 unsigned XorMask = 0; 4420 4421 for (size_t i = 0; i < Ctl.size(); ++i) { 4422 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4423 switch(Ctl[i]) { 4424 default: 4425 Error(StrLoc, "invalid mask"); 4426 return false; 4427 case '0': 4428 break; 4429 case '1': 4430 OrMask |= Mask; 4431 break; 4432 case 'p': 4433 AndMask |= Mask; 4434 break; 4435 case 'i': 4436 AndMask |= Mask; 4437 XorMask |= Mask; 4438 break; 4439 } 4440 } 4441 4442 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4443 return true; 4444} 4445 4446bool 4447AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4448 4449 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4450 4451 if (!parseExpr(Imm)) { 4452 return false; 4453 } 4454 if (!isUInt<16>(Imm)) { 4455 Error(OffsetLoc, "expected a 16-bit offset"); 4456 return false; 4457 } 4458 return true; 4459} 4460 4461bool 4462AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4463 using namespace llvm::AMDGPU::Swizzle; 4464 4465 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4466 4467 SMLoc ModeLoc = Parser.getTok().getLoc(); 4468 bool Ok = false; 4469 4470 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4471 Ok = parseSwizzleQuadPerm(Imm); 4472 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4473 Ok = parseSwizzleBitmaskPerm(Imm); 4474 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4475 Ok = parseSwizzleBroadcast(Imm); 4476 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4477 Ok = parseSwizzleSwap(Imm); 4478 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4479 Ok = parseSwizzleReverse(Imm); 4480 } else { 4481 Error(ModeLoc, "expected a swizzle mode"); 4482 } 4483 4484 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4485 } 4486 4487 return false; 4488} 4489 4490OperandMatchResultTy 4491AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4492 SMLoc S = Parser.getTok().getLoc(); 4493 int64_t Imm = 0; 4494 4495 if (trySkipId("offset")) { 4496 4497 bool Ok = false; 4498 if (skipToken(AsmToken::Colon, "expected a colon")) { 4499 if (trySkipId("swizzle")) { 4500 Ok = parseSwizzleMacro(Imm); 4501 } else { 4502 Ok = parseSwizzleOffset(Imm); 4503 } 4504 } 4505 4506 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4507 4508 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4509 } else { 4510 // Swizzle "offset" operand is optional. 4511 // If it is omitted, try parsing other optional operands. 4512 return parseOptionalOpr(Operands); 4513 } 4514} 4515 4516bool 4517AMDGPUOperand::isSwizzle() const { 4518 return isImmTy(ImmTySwizzle); 4519} 4520 4521//===----------------------------------------------------------------------===// 4522// sopp branch targets 4523//===----------------------------------------------------------------------===// 4524 4525OperandMatchResultTy 4526AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4527 SMLoc S = Parser.getTok().getLoc(); 4528 4529 switch (getLexer().getKind()) { 4530 default: return MatchOperand_ParseFail; 4531 case AsmToken::Integer: { 4532 int64_t Imm; 4533 if (getParser().parseAbsoluteExpression(Imm)) 4534 return MatchOperand_ParseFail; 4535 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4536 return MatchOperand_Success; 4537 } 4538 4539 case AsmToken::Identifier: 4540 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4541 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4542 Parser.getTok().getString()), getContext()), S)); 4543 Parser.Lex(); 4544 return MatchOperand_Success; 4545 } 4546} 4547 4548//===----------------------------------------------------------------------===// 4549// mubuf 4550//===----------------------------------------------------------------------===// 4551 4552AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 4553 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 4554} 4555 4556AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 4557 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 4558} 4559 4560void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 4561 const OperandVector &Operands, 4562 bool IsAtomic, 4563 bool IsAtomicReturn, 4564 bool IsLds) { 4565 bool IsLdsOpcode = IsLds; 4566 bool HasLdsModifier = false; 4567 OptionalImmIndexMap OptionalIdx; 4568 assert(IsAtomicReturn ? IsAtomic : true); 4569 4570 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4572 4573 // Add the register arguments 4574 if (Op.isReg()) { 4575 Op.addRegOperands(Inst, 1); 4576 continue; 4577 } 4578 4579 // Handle the case where soffset is an immediate 4580 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4581 Op.addImmOperands(Inst, 1); 4582 continue; 4583 } 4584 4585 HasLdsModifier = Op.isLDS(); 4586 4587 // Handle tokens like 'offen' which are sometimes hard-coded into the 4588 // asm string. There are no MCInst operands for these. 4589 if (Op.isToken()) { 4590 continue; 4591 } 4592 assert(Op.isImm()); 4593 4594 // Handle optional arguments 4595 OptionalIdx[Op.getImmTy()] = i; 4596 } 4597 4598 // This is a workaround for an llvm quirk which may result in an 4599 // incorrect instruction selection. Lds and non-lds versions of 4600 // MUBUF instructions are identical except that lds versions 4601 // have mandatory 'lds' modifier. However this modifier follows 4602 // optional modifiers and llvm asm matcher regards this 'lds' 4603 // modifier as an optional one. As a result, an lds version 4604 // of opcode may be selected even if it has no 'lds' modifier. 4605 if (IsLdsOpcode && !HasLdsModifier) { 4606 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 4607 if (NoLdsOpcode != -1) { // Got lds version - correct it. 4608 Inst.setOpcode(NoLdsOpcode); 4609 IsLdsOpcode = false; 4610 } 4611 } 4612 4613 // Copy $vdata_in operand and insert as $vdata for MUBUF_Atomic RTN insns. 4614 if (IsAtomicReturn) { 4615 MCInst::iterator I = Inst.begin(); // $vdata_in is always at the beginning. 4616 Inst.insert(I, *I); 4617 } 4618 4619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 4620 if (!IsAtomic) { // glc is hard-coded. 4621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4622 } 4623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4624 4625 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 4626 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4627 } 4628} 4629 4630void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 4631 OptionalImmIndexMap OptionalIdx; 4632 4633 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4634 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4635 4636 // Add the register arguments 4637 if (Op.isReg()) { 4638 Op.addRegOperands(Inst, 1); 4639 continue; 4640 } 4641 4642 // Handle the case where soffset is an immediate 4643 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 4644 Op.addImmOperands(Inst, 1); 4645 continue; 4646 } 4647 4648 // Handle tokens like 'offen' which are sometimes hard-coded into the 4649 // asm string. There are no MCInst operands for these. 4650 if (Op.isToken()) { 4651 continue; 4652 } 4653 assert(Op.isImm()); 4654 4655 // Handle optional arguments 4656 OptionalIdx[Op.getImmTy()] = i; 4657 } 4658 4659 addOptionalImmOperand(Inst, Operands, OptionalIdx, 4660 AMDGPUOperand::ImmTyOffset); 4661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 4662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4665} 4666 4667//===----------------------------------------------------------------------===// 4668// mimg 4669//===----------------------------------------------------------------------===// 4670 4671void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 4672 bool IsAtomic) { 4673 unsigned I = 1; 4674 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4675 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4676 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4677 } 4678 4679 if (IsAtomic) { 4680 // Add src, same as dst 4681 assert(Desc.getNumDefs() == 1); 4682 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 4683 } 4684 4685 OptionalImmIndexMap OptionalIdx; 4686 4687 for (unsigned E = Operands.size(); I != E; ++I) { 4688 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4689 4690 // Add the register arguments 4691 if (Op.isReg()) { 4692 Op.addRegOperands(Inst, 1); 4693 } else if (Op.isImmModifier()) { 4694 OptionalIdx[Op.getImmTy()] = I; 4695 } else { 4696 llvm_unreachable("unexpected operand type"); 4697 } 4698 } 4699 4700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 4701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 4702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 4703 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 4704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 4705 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 4706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 4707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 4708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 4709} 4710 4711void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 4712 cvtMIMG(Inst, Operands, true); 4713} 4714 4715//===----------------------------------------------------------------------===// 4716// smrd 4717//===----------------------------------------------------------------------===// 4718 4719bool AMDGPUOperand::isSMRDOffset8() const { 4720 return isImm() && isUInt<8>(getImm()); 4721} 4722 4723bool AMDGPUOperand::isSMRDOffset20() const { 4724 return isImm() && isUInt<20>(getImm()); 4725} 4726 4727bool AMDGPUOperand::isSMRDLiteralOffset() const { 4728 // 32-bit literals are only supported on CI and we only want to use them 4729 // when the offset is > 8-bits. 4730 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 4731} 4732 4733AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 4734 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4735} 4736 4737AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 4738 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4739} 4740 4741AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 4742 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4743} 4744 4745AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 4746 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4747} 4748 4749AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 4750 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 4751} 4752 4753//===----------------------------------------------------------------------===// 4754// vop3 4755//===----------------------------------------------------------------------===// 4756 4757static bool ConvertOmodMul(int64_t &Mul) { 4758 if (Mul != 1 && Mul != 2 && Mul != 4) 4759 return false; 4760 4761 Mul >>= 1; 4762 return true; 4763} 4764 4765static bool ConvertOmodDiv(int64_t &Div) { 4766 if (Div == 1) { 4767 Div = 0; 4768 return true; 4769 } 4770 4771 if (Div == 2) { 4772 Div = 3; 4773 return true; 4774 } 4775 4776 return false; 4777} 4778 4779static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 4780 if (BoundCtrl == 0) { 4781 BoundCtrl = 1; 4782 return true; 4783 } 4784 4785 if (BoundCtrl == -1) { 4786 BoundCtrl = 0; 4787 return true; 4788 } 4789 4790 return false; 4791} 4792 4793// Note: the order in this table matches the order of operands in AsmString. 4794static const OptionalOperand AMDGPUOptionalOperandTable[] = { 4795 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 4796 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 4797 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 4798 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 4799 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 4800 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 4801 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 4802 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 4803 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 4804 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 4805 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 4806 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 4807 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 4808 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4809 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 4810 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 4811 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 4812 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 4813 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 4814 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4815 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 4816 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 4817 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 4818 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 4819 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 4820 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 4821 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 4822 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 4823 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 4824 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 4825 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 4826 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 4827 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 4828 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 4829 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 4830 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 4831 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 4832}; 4833 4834OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 4835 unsigned size = Operands.size(); 4836 assert(size > 0); 4837 4838 OperandMatchResultTy res = parseOptionalOpr(Operands); 4839 4840 // This is a hack to enable hardcoded mandatory operands which follow 4841 // optional operands. 4842 // 4843 // Current design assumes that all operands after the first optional operand 4844 // are also optional. However implementation of some instructions violates 4845 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 4846 // 4847 // To alleviate this problem, we have to (implicitly) parse extra operands 4848 // to make sure autogenerated parser of custom operands never hit hardcoded 4849 // mandatory operands. 4850 4851 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 4852 4853 // We have parsed the first optional operand. 4854 // Parse as many operands as necessary to skip all mandatory operands. 4855 4856 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 4857 if (res != MatchOperand_Success || 4858 getLexer().is(AsmToken::EndOfStatement)) break; 4859 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 4860 res = parseOptionalOpr(Operands); 4861 } 4862 } 4863 4864 return res; 4865} 4866 4867OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 4868 OperandMatchResultTy res; 4869 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 4870 // try to parse any optional operand here 4871 if (Op.IsBit) { 4872 res = parseNamedBit(Op.Name, Operands, Op.Type); 4873 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 4874 res = parseOModOperand(Operands); 4875 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 4876 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 4877 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 4878 res = parseSDWASel(Operands, Op.Name, Op.Type); 4879 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 4880 res = parseSDWADstUnused(Operands); 4881 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 4882 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 4883 Op.Type == AMDGPUOperand::ImmTyNegLo || 4884 Op.Type == AMDGPUOperand::ImmTyNegHi) { 4885 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 4886 Op.ConvertResult); 4887 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 4888 res = parseDfmtNfmt(Operands); 4889 } else { 4890 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 4891 } 4892 if (res != MatchOperand_NoMatch) { 4893 return res; 4894 } 4895 } 4896 return MatchOperand_NoMatch; 4897} 4898 4899OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 4900 StringRef Name = Parser.getTok().getString(); 4901 if (Name == "mul") { 4902 return parseIntWithPrefix("mul", Operands, 4903 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 4904 } 4905 4906 if (Name == "div") { 4907 return parseIntWithPrefix("div", Operands, 4908 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 4909 } 4910 4911 return MatchOperand_NoMatch; 4912} 4913 4914void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 4915 cvtVOP3P(Inst, Operands); 4916 4917 int Opc = Inst.getOpcode(); 4918 4919 int SrcNum; 4920 const int Ops[] = { AMDGPU::OpName::src0, 4921 AMDGPU::OpName::src1, 4922 AMDGPU::OpName::src2 }; 4923 for (SrcNum = 0; 4924 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 4925 ++SrcNum); 4926 assert(SrcNum > 0); 4927 4928 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4929 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4930 4931 if ((OpSel & (1 << SrcNum)) != 0) { 4932 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 4933 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 4934 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 4935 } 4936} 4937 4938static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 4939 // 1. This operand is input modifiers 4940 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 4941 // 2. This is not last operand 4942 && Desc.NumOperands > (OpNum + 1) 4943 // 3. Next operand is register class 4944 && Desc.OpInfo[OpNum + 1].RegClass != -1 4945 // 4. Next register is not tied to any other operand 4946 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 4947} 4948 4949void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 4950{ 4951 OptionalImmIndexMap OptionalIdx; 4952 unsigned Opc = Inst.getOpcode(); 4953 4954 unsigned I = 1; 4955 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4956 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4957 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4958 } 4959 4960 for (unsigned E = Operands.size(); I != E; ++I) { 4961 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 4962 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 4963 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 4964 } else if (Op.isInterpSlot() || 4965 Op.isInterpAttr() || 4966 Op.isAttrChan()) { 4967 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 4968 } else if (Op.isImmModifier()) { 4969 OptionalIdx[Op.getImmTy()] = I; 4970 } else { 4971 llvm_unreachable("unhandled operand type"); 4972 } 4973 } 4974 4975 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 4976 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 4977 } 4978 4979 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 4980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 4981 } 4982 4983 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 4984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 4985 } 4986} 4987 4988void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 4989 OptionalImmIndexMap &OptionalIdx) { 4990 unsigned Opc = Inst.getOpcode(); 4991 4992 unsigned I = 1; 4993 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4994 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 4995 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 4996 } 4997 4998 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 4999 // This instruction has src modifiers 5000 for (unsigned E = Operands.size(); I != E; ++I) { 5001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5002 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5003 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5004 } else if (Op.isImmModifier()) { 5005 OptionalIdx[Op.getImmTy()] = I; 5006 } else if (Op.isRegOrImm()) { 5007 Op.addRegOrImmOperands(Inst, 1); 5008 } else { 5009 llvm_unreachable("unhandled operand type"); 5010 } 5011 } 5012 } else { 5013 // No src modifiers 5014 for (unsigned E = Operands.size(); I != E; ++I) { 5015 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5016 if (Op.isMod()) { 5017 OptionalIdx[Op.getImmTy()] = I; 5018 } else { 5019 Op.addRegOrImmOperands(Inst, 1); 5020 } 5021 } 5022 } 5023 5024 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5025 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5026 } 5027 5028 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5030 } 5031 5032 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5033 // it has src2 register operand that is tied to dst operand 5034 // we don't allow modifiers for this operand in assembler so src2_modifiers 5035 // should be 0. 5036 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5037 Opc == AMDGPU::V_MAC_F32_e64_vi || 5038 Opc == AMDGPU::V_MAC_F16_e64_vi || 5039 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5040 auto it = Inst.begin(); 5041 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5042 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5043 ++it; 5044 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5045 } 5046} 5047 5048void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5049 OptionalImmIndexMap OptionalIdx; 5050 cvtVOP3(Inst, Operands, OptionalIdx); 5051} 5052 5053void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5054 const OperandVector &Operands) { 5055 OptionalImmIndexMap OptIdx; 5056 const int Opc = Inst.getOpcode(); 5057 const MCInstrDesc &Desc = MII.get(Opc); 5058 5059 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5060 5061 cvtVOP3(Inst, Operands, OptIdx); 5062 5063 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5064 assert(!IsPacked); 5065 Inst.addOperand(Inst.getOperand(0)); 5066 } 5067 5068 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5069 // instruction, and then figure out where to actually put the modifiers 5070 5071 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5072 5073 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5074 if (OpSelHiIdx != -1) { 5075 int DefaultVal = IsPacked ? -1 : 0; 5076 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5077 DefaultVal); 5078 } 5079 5080 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5081 if (NegLoIdx != -1) { 5082 assert(IsPacked); 5083 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5084 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5085 } 5086 5087 const int Ops[] = { AMDGPU::OpName::src0, 5088 AMDGPU::OpName::src1, 5089 AMDGPU::OpName::src2 }; 5090 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5091 AMDGPU::OpName::src1_modifiers, 5092 AMDGPU::OpName::src2_modifiers }; 5093 5094 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5095 5096 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5097 unsigned OpSelHi = 0; 5098 unsigned NegLo = 0; 5099 unsigned NegHi = 0; 5100 5101 if (OpSelHiIdx != -1) { 5102 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5103 } 5104 5105 if (NegLoIdx != -1) { 5106 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5107 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5108 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5109 } 5110 5111 for (int J = 0; J < 3; ++J) { 5112 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5113 if (OpIdx == -1) 5114 break; 5115 5116 uint32_t ModVal = 0; 5117 5118 if ((OpSel & (1 << J)) != 0) 5119 ModVal |= SISrcMods::OP_SEL_0; 5120 5121 if ((OpSelHi & (1 << J)) != 0) 5122 ModVal |= SISrcMods::OP_SEL_1; 5123 5124 if ((NegLo & (1 << J)) != 0) 5125 ModVal |= SISrcMods::NEG; 5126 5127 if ((NegHi & (1 << J)) != 0) 5128 ModVal |= SISrcMods::NEG_HI; 5129 5130 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5131 5132 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5133 } 5134} 5135 5136//===----------------------------------------------------------------------===// 5137// dpp 5138//===----------------------------------------------------------------------===// 5139 5140bool AMDGPUOperand::isDPPCtrl() const { 5141 using namespace AMDGPU::DPP; 5142 5143 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5144 if (result) { 5145 int64_t Imm = getImm(); 5146 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5147 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5148 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5149 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5150 (Imm == DppCtrl::WAVE_SHL1) || 5151 (Imm == DppCtrl::WAVE_ROL1) || 5152 (Imm == DppCtrl::WAVE_SHR1) || 5153 (Imm == DppCtrl::WAVE_ROR1) || 5154 (Imm == DppCtrl::ROW_MIRROR) || 5155 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5156 (Imm == DppCtrl::BCAST15) || 5157 (Imm == DppCtrl::BCAST31); 5158 } 5159 return false; 5160} 5161 5162bool AMDGPUOperand::isGPRIdxMode() const { 5163 return isImm() && isUInt<4>(getImm()); 5164} 5165 5166bool AMDGPUOperand::isS16Imm() const { 5167 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5168} 5169 5170bool AMDGPUOperand::isU16Imm() const { 5171 return isImm() && isUInt<16>(getImm()); 5172} 5173 5174OperandMatchResultTy 5175AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5176 using namespace AMDGPU::DPP; 5177 5178 SMLoc S = Parser.getTok().getLoc(); 5179 StringRef Prefix; 5180 int64_t Int; 5181 5182 if (getLexer().getKind() == AsmToken::Identifier) { 5183 Prefix = Parser.getTok().getString(); 5184 } else { 5185 return MatchOperand_NoMatch; 5186 } 5187 5188 if (Prefix == "row_mirror") { 5189 Int = DppCtrl::ROW_MIRROR; 5190 Parser.Lex(); 5191 } else if (Prefix == "row_half_mirror") { 5192 Int = DppCtrl::ROW_HALF_MIRROR; 5193 Parser.Lex(); 5194 } else { 5195 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5196 if (Prefix != "quad_perm" 5197 && Prefix != "row_shl" 5198 && Prefix != "row_shr" 5199 && Prefix != "row_ror" 5200 && Prefix != "wave_shl" 5201 && Prefix != "wave_rol" 5202 && Prefix != "wave_shr" 5203 && Prefix != "wave_ror" 5204 && Prefix != "row_bcast") { 5205 return MatchOperand_NoMatch; 5206 } 5207 5208 Parser.Lex(); 5209 if (getLexer().isNot(AsmToken::Colon)) 5210 return MatchOperand_ParseFail; 5211 5212 if (Prefix == "quad_perm") { 5213 // quad_perm:[%d,%d,%d,%d] 5214 Parser.Lex(); 5215 if (getLexer().isNot(AsmToken::LBrac)) 5216 return MatchOperand_ParseFail; 5217 Parser.Lex(); 5218 5219 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5220 return MatchOperand_ParseFail; 5221 5222 for (int i = 0; i < 3; ++i) { 5223 if (getLexer().isNot(AsmToken::Comma)) 5224 return MatchOperand_ParseFail; 5225 Parser.Lex(); 5226 5227 int64_t Temp; 5228 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5229 return MatchOperand_ParseFail; 5230 const int shift = i*2 + 2; 5231 Int += (Temp << shift); 5232 } 5233 5234 if (getLexer().isNot(AsmToken::RBrac)) 5235 return MatchOperand_ParseFail; 5236 Parser.Lex(); 5237 } else { 5238 // sel:%d 5239 Parser.Lex(); 5240 if (getParser().parseAbsoluteExpression(Int)) 5241 return MatchOperand_ParseFail; 5242 5243 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5244 Int |= DppCtrl::ROW_SHL0; 5245 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5246 Int |= DppCtrl::ROW_SHR0; 5247 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5248 Int |= DppCtrl::ROW_ROR0; 5249 } else if (Prefix == "wave_shl" && 1 == Int) { 5250 Int = DppCtrl::WAVE_SHL1; 5251 } else if (Prefix == "wave_rol" && 1 == Int) { 5252 Int = DppCtrl::WAVE_ROL1; 5253 } else if (Prefix == "wave_shr" && 1 == Int) { 5254 Int = DppCtrl::WAVE_SHR1; 5255 } else if (Prefix == "wave_ror" && 1 == Int) { 5256 Int = DppCtrl::WAVE_ROR1; 5257 } else if (Prefix == "row_bcast") { 5258 if (Int == 15) { 5259 Int = DppCtrl::BCAST15; 5260 } else if (Int == 31) { 5261 Int = DppCtrl::BCAST31; 5262 } else { 5263 return MatchOperand_ParseFail; 5264 } 5265 } else { 5266 return MatchOperand_ParseFail; 5267 } 5268 } 5269 } 5270 5271 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5272 return MatchOperand_Success; 5273} 5274 5275AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5276 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5277} 5278 5279AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5280 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5281} 5282 5283AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5284 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5285} 5286 5287void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5288 OptionalImmIndexMap OptionalIdx; 5289 5290 unsigned I = 1; 5291 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5292 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5293 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5294 } 5295 5296 for (unsigned E = Operands.size(); I != E; ++I) { 5297 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5298 MCOI::TIED_TO); 5299 if (TiedTo != -1) { 5300 assert((unsigned)TiedTo < Inst.getNumOperands()); 5301 // handle tied old or src2 for MAC instructions 5302 Inst.addOperand(Inst.getOperand(TiedTo)); 5303 } 5304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5305 // Add the register arguments 5306 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5307 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5308 // Skip it. 5309 continue; 5310 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5311 Op.addRegWithFPInputModsOperands(Inst, 2); 5312 } else if (Op.isDPPCtrl()) { 5313 Op.addImmOperands(Inst, 1); 5314 } else if (Op.isImm()) { 5315 // Handle optional arguments 5316 OptionalIdx[Op.getImmTy()] = I; 5317 } else { 5318 llvm_unreachable("Invalid operand type"); 5319 } 5320 } 5321 5322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5323 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5325} 5326 5327//===----------------------------------------------------------------------===// 5328// sdwa 5329//===----------------------------------------------------------------------===// 5330 5331OperandMatchResultTy 5332AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5333 AMDGPUOperand::ImmTy Type) { 5334 using namespace llvm::AMDGPU::SDWA; 5335 5336 SMLoc S = Parser.getTok().getLoc(); 5337 StringRef Value; 5338 OperandMatchResultTy res; 5339 5340 res = parseStringWithPrefix(Prefix, Value); 5341 if (res != MatchOperand_Success) { 5342 return res; 5343 } 5344 5345 int64_t Int; 5346 Int = StringSwitch<int64_t>(Value) 5347 .Case("BYTE_0", SdwaSel::BYTE_0) 5348 .Case("BYTE_1", SdwaSel::BYTE_1) 5349 .Case("BYTE_2", SdwaSel::BYTE_2) 5350 .Case("BYTE_3", SdwaSel::BYTE_3) 5351 .Case("WORD_0", SdwaSel::WORD_0) 5352 .Case("WORD_1", SdwaSel::WORD_1) 5353 .Case("DWORD", SdwaSel::DWORD) 5354 .Default(0xffffffff); 5355 Parser.Lex(); // eat last token 5356 5357 if (Int == 0xffffffff) { 5358 return MatchOperand_ParseFail; 5359 } 5360 5361 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5362 return MatchOperand_Success; 5363} 5364 5365OperandMatchResultTy 5366AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5367 using namespace llvm::AMDGPU::SDWA; 5368 5369 SMLoc S = Parser.getTok().getLoc(); 5370 StringRef Value; 5371 OperandMatchResultTy res; 5372 5373 res = parseStringWithPrefix("dst_unused", Value); 5374 if (res != MatchOperand_Success) { 5375 return res; 5376 } 5377 5378 int64_t Int; 5379 Int = StringSwitch<int64_t>(Value) 5380 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5381 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5382 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5383 .Default(0xffffffff); 5384 Parser.Lex(); // eat last token 5385 5386 if (Int == 0xffffffff) { 5387 return MatchOperand_ParseFail; 5388 } 5389 5390 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5391 return MatchOperand_Success; 5392} 5393 5394void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5395 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5396} 5397 5398void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5399 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5400} 5401 5402void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5403 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5404} 5405 5406void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5407 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5408} 5409 5410void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5411 uint64_t BasicInstType, bool skipVcc) { 5412 using namespace llvm::AMDGPU::SDWA; 5413 5414 OptionalImmIndexMap OptionalIdx; 5415 bool skippedVcc = false; 5416 5417 unsigned I = 1; 5418 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5419 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5420 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5421 } 5422 5423 for (unsigned E = Operands.size(); I != E; ++I) { 5424 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5425 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5426 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5427 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5428 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5429 // Skip VCC only if we didn't skip it on previous iteration. 5430 if (BasicInstType == SIInstrFlags::VOP2 && 5431 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5432 skippedVcc = true; 5433 continue; 5434 } else if (BasicInstType == SIInstrFlags::VOPC && 5435 Inst.getNumOperands() == 0) { 5436 skippedVcc = true; 5437 continue; 5438 } 5439 } 5440 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5441 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5442 } else if (Op.isImm()) { 5443 // Handle optional arguments 5444 OptionalIdx[Op.getImmTy()] = I; 5445 } else { 5446 llvm_unreachable("Invalid operand type"); 5447 } 5448 skippedVcc = false; 5449 } 5450 5451 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5452 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5453 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5454 switch (BasicInstType) { 5455 case SIInstrFlags::VOP1: 5456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5457 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5459 } 5460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5463 break; 5464 5465 case SIInstrFlags::VOP2: 5466 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5467 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5468 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5469 } 5470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5474 break; 5475 5476 case SIInstrFlags::VOPC: 5477 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5479 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5480 break; 5481 5482 default: 5483 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5484 } 5485 } 5486 5487 // special case v_mac_{f16, f32}: 5488 // it has src2 register operand that is tied to dst operand 5489 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5490 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5491 auto it = Inst.begin(); 5492 std::advance( 5493 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5494 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5495 } 5496} 5497 5498/// Force static initialization. 5499extern "C" void LLVMInitializeAMDGPUAsmParser() { 5500 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5501 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5502} 5503 5504#define GET_REGISTER_MATCHER 5505#define GET_MATCHER_IMPLEMENTATION 5506#define GET_MNEMONIC_SPELL_CHECKER 5507#include "AMDGPUGenAsmMatcher.inc" 5508 5509// This fuction should be defined after auto-generated include so that we have 5510// MatchClassKind enum defined 5511unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5512 unsigned Kind) { 5513 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5514 // But MatchInstructionImpl() expects to meet token and fails to validate 5515 // operand. This method checks if we are given immediate operand but expect to 5516 // get corresponding token. 5517 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5518 switch (Kind) { 5519 case MCK_addr64: 5520 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5521 case MCK_gds: 5522 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5523 case MCK_lds: 5524 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5525 case MCK_glc: 5526 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5527 case MCK_idxen: 5528 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5529 case MCK_offen: 5530 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5531 case MCK_SSrcB32: 5532 // When operands have expression values, they will return true for isToken, 5533 // because it is not possible to distinguish between a token and an 5534 // expression at parse time. MatchInstructionImpl() will always try to 5535 // match an operand as a token, when isToken returns true, and when the 5536 // name of the expression is not a valid token, the match will fail, 5537 // so we need to handle it here. 5538 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5539 case MCK_SSrcF32: 5540 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5541 case MCK_SoppBrTarget: 5542 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5543 case MCK_VReg32OrOff: 5544 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5545 case MCK_InterpSlot: 5546 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 5547 case MCK_Attr: 5548 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 5549 case MCK_AttrChan: 5550 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 5551 default: 5552 return Match_InvalidOperand; 5553 } 5554} 5555