1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "AMDKernelCodeT.h" 10#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11#include "MCTargetDesc/AMDGPUTargetStreamer.h" 12#include "SIDefines.h" 13#include "SIInstrInfo.h" 14#include "SIRegisterInfo.h" 15#include "TargetInfo/AMDGPUTargetInfo.h" 16#include "Utils/AMDGPUAsmUtils.h" 17#include "Utils/AMDGPUBaseInfo.h" 18#include "Utils/AMDKernelCodeTUtils.h" 19#include "llvm/ADT/APFloat.h" 20#include "llvm/ADT/SmallBitVector.h" 21#include "llvm/ADT/StringSet.h" 22#include "llvm/ADT/Twine.h" 23#include "llvm/MC/MCAsmInfo.h" 24#include "llvm/MC/MCContext.h" 25#include "llvm/MC/MCExpr.h" 26#include "llvm/MC/MCInst.h" 27#include "llvm/MC/MCParser/MCAsmParser.h" 28#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29#include "llvm/MC/MCParser/MCTargetAsmParser.h" 30#include "llvm/MC/MCSymbol.h" 31#include "llvm/Support/AMDGPUMetadata.h" 32#include "llvm/Support/AMDHSAKernelDescriptor.h" 33#include "llvm/Support/Casting.h" 34#include "llvm/Support/MachineValueType.h" 35#include "llvm/Support/TargetParser.h" 36#include "llvm/Support/TargetRegistry.h" 37 38using namespace llvm; 39using namespace llvm::AMDGPU; 40using namespace llvm::amdhsa; 41 42namespace { 43 44class AMDGPUAsmParser; 45 46enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48//===----------------------------------------------------------------------===// 49// Operand 50//===----------------------------------------------------------------------===// 51 52class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105}; 1106 1107raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110} 1111 1112//===----------------------------------------------------------------------===// 1113// AsmParser 1114//===----------------------------------------------------------------------===// 1115 1116// Holds info related to the current kernel, e.g. count of SGPRs used. 1117// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118// .amdgpu_hsa_kernel or at EOF. 1119class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161}; 1162 1163class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179#define GET_ASSEMBLER_HEADER 1180#include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1333 1334 bool isSI() const { 1335 return AMDGPU::isSI(getSTI()); 1336 } 1337 1338 bool isCI() const { 1339 return AMDGPU::isCI(getSTI()); 1340 } 1341 1342 bool isVI() const { 1343 return AMDGPU::isVI(getSTI()); 1344 } 1345 1346 bool isGFX9() const { 1347 return AMDGPU::isGFX9(getSTI()); 1348 } 1349 1350 bool isGFX90A() const { 1351 return AMDGPU::isGFX90A(getSTI()); 1352 } 1353 1354 bool isGFX9Plus() const { 1355 return AMDGPU::isGFX9Plus(getSTI()); 1356 } 1357 1358 bool isGFX10() const { 1359 return AMDGPU::isGFX10(getSTI()); 1360 } 1361 1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1363 1364 bool isGFX10_BEncoding() const { 1365 return AMDGPU::isGFX10_BEncoding(getSTI()); 1366 } 1367 1368 bool hasInv2PiInlineImm() const { 1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1370 } 1371 1372 bool hasFlatOffsets() const { 1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1374 } 1375 1376 bool hasArchitectedFlatScratch() const { 1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateOpSel(const MCInst &Inst); 1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateVccOperand(unsigned Reg) const; 1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1547 bool validateAGPRLdSt(const MCInst &Inst) const; 1548 bool validateVGPRAlign(const MCInst &Inst) const; 1549 bool validateDivScale(const MCInst &Inst); 1550 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1551 const SMLoc &IDLoc); 1552 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1553 unsigned getConstantBusLimit(unsigned Opcode) const; 1554 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1555 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1556 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1557 1558 bool isSupportedMnemo(StringRef Mnemo, 1559 const FeatureBitset &FBS); 1560 bool isSupportedMnemo(StringRef Mnemo, 1561 const FeatureBitset &FBS, 1562 ArrayRef<unsigned> Variants); 1563 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1564 1565 bool isId(const StringRef Id) const; 1566 bool isId(const AsmToken &Token, const StringRef Id) const; 1567 bool isToken(const AsmToken::TokenKind Kind) const; 1568 bool trySkipId(const StringRef Id); 1569 bool trySkipId(const StringRef Pref, const StringRef Id); 1570 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1571 bool trySkipToken(const AsmToken::TokenKind Kind); 1572 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1573 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1574 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1575 1576 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1577 AsmToken::TokenKind getTokenKind() const; 1578 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1579 bool parseExpr(OperandVector &Operands); 1580 StringRef getTokenStr() const; 1581 AsmToken peekToken(); 1582 AsmToken getToken() const; 1583 SMLoc getLoc() const; 1584 void lex(); 1585 1586public: 1587 void onBeginOfFile() override; 1588 1589 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1590 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1591 1592 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1593 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1594 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1595 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1596 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1597 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1598 1599 bool parseSwizzleOperand(int64_t &Op, 1600 const unsigned MinVal, 1601 const unsigned MaxVal, 1602 const StringRef ErrMsg, 1603 SMLoc &Loc); 1604 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1605 const unsigned MinVal, 1606 const unsigned MaxVal, 1607 const StringRef ErrMsg); 1608 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1609 bool parseSwizzleOffset(int64_t &Imm); 1610 bool parseSwizzleMacro(int64_t &Imm); 1611 bool parseSwizzleQuadPerm(int64_t &Imm); 1612 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1613 bool parseSwizzleBroadcast(int64_t &Imm); 1614 bool parseSwizzleSwap(int64_t &Imm); 1615 bool parseSwizzleReverse(int64_t &Imm); 1616 1617 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1618 int64_t parseGPRIdxMacro(); 1619 1620 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1621 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1622 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1623 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1624 1625 AMDGPUOperand::Ptr defaultCPol() const; 1626 1627 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1628 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1629 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1630 AMDGPUOperand::Ptr defaultFlatOffset() const; 1631 1632 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1633 1634 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1635 OptionalImmIndexMap &OptionalIdx); 1636 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1637 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1638 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1640 OptionalImmIndexMap &OptionalIdx); 1641 1642 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1643 1644 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1645 bool IsAtomic = false); 1646 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1647 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1648 1649 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1650 1651 bool parseDimId(unsigned &Encoding); 1652 OperandMatchResultTy parseDim(OperandVector &Operands); 1653 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1654 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1655 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1656 int64_t parseDPPCtrlSel(StringRef Ctrl); 1657 int64_t parseDPPCtrlPerm(); 1658 AMDGPUOperand::Ptr defaultRowMask() const; 1659 AMDGPUOperand::Ptr defaultBankMask() const; 1660 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1661 AMDGPUOperand::Ptr defaultFI() const; 1662 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1663 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1664 1665 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1666 AMDGPUOperand::ImmTy Type); 1667 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1668 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1669 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1670 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1671 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1672 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1673 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1674 uint64_t BasicInstType, 1675 bool SkipDstVcc = false, 1676 bool SkipSrcVcc = false); 1677 1678 AMDGPUOperand::Ptr defaultBLGP() const; 1679 AMDGPUOperand::Ptr defaultCBSZ() const; 1680 AMDGPUOperand::Ptr defaultABID() const; 1681 1682 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1683 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1684}; 1685 1686struct OptionalOperand { 1687 const char *Name; 1688 AMDGPUOperand::ImmTy Type; 1689 bool IsBit; 1690 bool (*ConvertResult)(int64_t&); 1691}; 1692 1693} // end anonymous namespace 1694 1695// May be called with integer type with equivalent bitwidth. 1696static const fltSemantics *getFltSemantics(unsigned Size) { 1697 switch (Size) { 1698 case 4: 1699 return &APFloat::IEEEsingle(); 1700 case 8: 1701 return &APFloat::IEEEdouble(); 1702 case 2: 1703 return &APFloat::IEEEhalf(); 1704 default: 1705 llvm_unreachable("unsupported fp type"); 1706 } 1707} 1708 1709static const fltSemantics *getFltSemantics(MVT VT) { 1710 return getFltSemantics(VT.getSizeInBits() / 8); 1711} 1712 1713static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1714 switch (OperandType) { 1715 case AMDGPU::OPERAND_REG_IMM_INT32: 1716 case AMDGPU::OPERAND_REG_IMM_FP32: 1717 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1718 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1719 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1720 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1721 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1722 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1723 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1724 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1725 return &APFloat::IEEEsingle(); 1726 case AMDGPU::OPERAND_REG_IMM_INT64: 1727 case AMDGPU::OPERAND_REG_IMM_FP64: 1728 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1729 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1731 return &APFloat::IEEEdouble(); 1732 case AMDGPU::OPERAND_REG_IMM_INT16: 1733 case AMDGPU::OPERAND_REG_IMM_FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1736 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1737 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1738 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1739 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1740 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1742 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1743 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1744 return &APFloat::IEEEhalf(); 1745 default: 1746 llvm_unreachable("unsupported fp type"); 1747 } 1748} 1749 1750//===----------------------------------------------------------------------===// 1751// Operand 1752//===----------------------------------------------------------------------===// 1753 1754static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1755 bool Lost; 1756 1757 // Convert literal to single precision 1758 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1759 APFloat::rmNearestTiesToEven, 1760 &Lost); 1761 // We allow precision lost but not overflow or underflow 1762 if (Status != APFloat::opOK && 1763 Lost && 1764 ((Status & APFloat::opOverflow) != 0 || 1765 (Status & APFloat::opUnderflow) != 0)) { 1766 return false; 1767 } 1768 1769 return true; 1770} 1771 1772static bool isSafeTruncation(int64_t Val, unsigned Size) { 1773 return isUIntN(Size, Val) || isIntN(Size, Val); 1774} 1775 1776static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1777 if (VT.getScalarType() == MVT::i16) { 1778 // FP immediate values are broken. 1779 return isInlinableIntLiteral(Val); 1780 } 1781 1782 // f16/v2f16 operands work correctly for all values. 1783 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1784} 1785 1786bool AMDGPUOperand::isInlinableImm(MVT type) const { 1787 1788 // This is a hack to enable named inline values like 1789 // shared_base with both 32-bit and 64-bit operands. 1790 // Note that these values are defined as 1791 // 32-bit operands only. 1792 if (isInlineValue()) { 1793 return true; 1794 } 1795 1796 if (!isImmTy(ImmTyNone)) { 1797 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1798 return false; 1799 } 1800 // TODO: We should avoid using host float here. It would be better to 1801 // check the float bit values which is what a few other places do. 1802 // We've had bot failures before due to weird NaN support on mips hosts. 1803 1804 APInt Literal(64, Imm.Val); 1805 1806 if (Imm.IsFPImm) { // We got fp literal token 1807 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1808 return AMDGPU::isInlinableLiteral64(Imm.Val, 1809 AsmParser->hasInv2PiInlineImm()); 1810 } 1811 1812 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1813 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1814 return false; 1815 1816 if (type.getScalarSizeInBits() == 16) { 1817 return isInlineableLiteralOp16( 1818 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1819 type, AsmParser->hasInv2PiInlineImm()); 1820 } 1821 1822 // Check if single precision literal is inlinable 1823 return AMDGPU::isInlinableLiteral32( 1824 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1825 AsmParser->hasInv2PiInlineImm()); 1826 } 1827 1828 // We got int literal token. 1829 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1830 return AMDGPU::isInlinableLiteral64(Imm.Val, 1831 AsmParser->hasInv2PiInlineImm()); 1832 } 1833 1834 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1835 return false; 1836 } 1837 1838 if (type.getScalarSizeInBits() == 16) { 1839 return isInlineableLiteralOp16( 1840 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1841 type, AsmParser->hasInv2PiInlineImm()); 1842 } 1843 1844 return AMDGPU::isInlinableLiteral32( 1845 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1846 AsmParser->hasInv2PiInlineImm()); 1847} 1848 1849bool AMDGPUOperand::isLiteralImm(MVT type) const { 1850 // Check that this immediate can be added as literal 1851 if (!isImmTy(ImmTyNone)) { 1852 return false; 1853 } 1854 1855 if (!Imm.IsFPImm) { 1856 // We got int literal token. 1857 1858 if (type == MVT::f64 && hasFPModifiers()) { 1859 // Cannot apply fp modifiers to int literals preserving the same semantics 1860 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1861 // disable these cases. 1862 return false; 1863 } 1864 1865 unsigned Size = type.getSizeInBits(); 1866 if (Size == 64) 1867 Size = 32; 1868 1869 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1870 // types. 1871 return isSafeTruncation(Imm.Val, Size); 1872 } 1873 1874 // We got fp literal token 1875 if (type == MVT::f64) { // Expected 64-bit fp operand 1876 // We would set low 64-bits of literal to zeroes but we accept this literals 1877 return true; 1878 } 1879 1880 if (type == MVT::i64) { // Expected 64-bit int operand 1881 // We don't allow fp literals in 64-bit integer instructions. It is 1882 // unclear how we should encode them. 1883 return false; 1884 } 1885 1886 // We allow fp literals with f16x2 operands assuming that the specified 1887 // literal goes into the lower half and the upper half is zero. We also 1888 // require that the literal may be losslesly converted to f16. 1889 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1890 (type == MVT::v2i16)? MVT::i16 : 1891 (type == MVT::v2f32)? MVT::f32 : type; 1892 1893 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1894 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1895} 1896 1897bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1898 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1899} 1900 1901bool AMDGPUOperand::isVRegWithInputMods() const { 1902 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1903 // GFX90A allows DPP on 64-bit operands. 1904 (isRegClass(AMDGPU::VReg_64RegClassID) && 1905 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1906} 1907 1908bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1909 if (AsmParser->isVI()) 1910 return isVReg32(); 1911 else if (AsmParser->isGFX9Plus()) 1912 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1913 else 1914 return false; 1915} 1916 1917bool AMDGPUOperand::isSDWAFP16Operand() const { 1918 return isSDWAOperand(MVT::f16); 1919} 1920 1921bool AMDGPUOperand::isSDWAFP32Operand() const { 1922 return isSDWAOperand(MVT::f32); 1923} 1924 1925bool AMDGPUOperand::isSDWAInt16Operand() const { 1926 return isSDWAOperand(MVT::i16); 1927} 1928 1929bool AMDGPUOperand::isSDWAInt32Operand() const { 1930 return isSDWAOperand(MVT::i32); 1931} 1932 1933bool AMDGPUOperand::isBoolReg() const { 1934 auto FB = AsmParser->getFeatureBits(); 1935 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1936 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1937} 1938 1939uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1940{ 1941 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1942 assert(Size == 2 || Size == 4 || Size == 8); 1943 1944 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1945 1946 if (Imm.Mods.Abs) { 1947 Val &= ~FpSignMask; 1948 } 1949 if (Imm.Mods.Neg) { 1950 Val ^= FpSignMask; 1951 } 1952 1953 return Val; 1954} 1955 1956void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1957 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1958 Inst.getNumOperands())) { 1959 addLiteralImmOperand(Inst, Imm.Val, 1960 ApplyModifiers & 1961 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1962 } else { 1963 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1964 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1965 setImmKindNone(); 1966 } 1967} 1968 1969void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1970 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1971 auto OpNum = Inst.getNumOperands(); 1972 // Check that this operand accepts literals 1973 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1974 1975 if (ApplyModifiers) { 1976 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1977 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1978 Val = applyInputFPModifiers(Val, Size); 1979 } 1980 1981 APInt Literal(64, Val); 1982 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1983 1984 if (Imm.IsFPImm) { // We got fp literal token 1985 switch (OpTy) { 1986 case AMDGPU::OPERAND_REG_IMM_INT64: 1987 case AMDGPU::OPERAND_REG_IMM_FP64: 1988 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1989 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1990 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1991 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1992 AsmParser->hasInv2PiInlineImm())) { 1993 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1994 setImmKindConst(); 1995 return; 1996 } 1997 1998 // Non-inlineable 1999 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2000 // For fp operands we check if low 32 bits are zeros 2001 if (Literal.getLoBits(32) != 0) { 2002 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2003 "Can't encode literal as exact 64-bit floating-point operand. " 2004 "Low 32-bits will be set to zero"); 2005 } 2006 2007 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2008 setImmKindLiteral(); 2009 return; 2010 } 2011 2012 // We don't allow fp literals in 64-bit integer instructions. It is 2013 // unclear how we should encode them. This case should be checked earlier 2014 // in predicate methods (isLiteralImm()) 2015 llvm_unreachable("fp literal in 64-bit integer instruction."); 2016 2017 case AMDGPU::OPERAND_REG_IMM_INT32: 2018 case AMDGPU::OPERAND_REG_IMM_FP32: 2019 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2020 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2021 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2023 case AMDGPU::OPERAND_REG_IMM_INT16: 2024 case AMDGPU::OPERAND_REG_IMM_FP16: 2025 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2026 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2027 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2028 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2029 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2030 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2031 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2033 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2034 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2035 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2036 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2037 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2038 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2039 bool lost; 2040 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2041 // Convert literal to single precision 2042 FPLiteral.convert(*getOpFltSemantics(OpTy), 2043 APFloat::rmNearestTiesToEven, &lost); 2044 // We allow precision lost but not overflow or underflow. This should be 2045 // checked earlier in isLiteralImm() 2046 2047 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2048 Inst.addOperand(MCOperand::createImm(ImmVal)); 2049 setImmKindLiteral(); 2050 return; 2051 } 2052 default: 2053 llvm_unreachable("invalid operand size"); 2054 } 2055 2056 return; 2057 } 2058 2059 // We got int literal token. 2060 // Only sign extend inline immediates. 2061 switch (OpTy) { 2062 case AMDGPU::OPERAND_REG_IMM_INT32: 2063 case AMDGPU::OPERAND_REG_IMM_FP32: 2064 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2065 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2066 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2067 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2068 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2069 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2070 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2071 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2072 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2073 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2074 if (isSafeTruncation(Val, 32) && 2075 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2076 AsmParser->hasInv2PiInlineImm())) { 2077 Inst.addOperand(MCOperand::createImm(Val)); 2078 setImmKindConst(); 2079 return; 2080 } 2081 2082 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2083 setImmKindLiteral(); 2084 return; 2085 2086 case AMDGPU::OPERAND_REG_IMM_INT64: 2087 case AMDGPU::OPERAND_REG_IMM_FP64: 2088 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2089 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2090 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2091 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2092 Inst.addOperand(MCOperand::createImm(Val)); 2093 setImmKindConst(); 2094 return; 2095 } 2096 2097 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2098 setImmKindLiteral(); 2099 return; 2100 2101 case AMDGPU::OPERAND_REG_IMM_INT16: 2102 case AMDGPU::OPERAND_REG_IMM_FP16: 2103 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2105 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2106 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2107 if (isSafeTruncation(Val, 16) && 2108 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2109 AsmParser->hasInv2PiInlineImm())) { 2110 Inst.addOperand(MCOperand::createImm(Val)); 2111 setImmKindConst(); 2112 return; 2113 } 2114 2115 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2116 setImmKindLiteral(); 2117 return; 2118 2119 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2123 assert(isSafeTruncation(Val, 16)); 2124 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2125 AsmParser->hasInv2PiInlineImm())); 2126 2127 Inst.addOperand(MCOperand::createImm(Val)); 2128 return; 2129 } 2130 default: 2131 llvm_unreachable("invalid operand size"); 2132 } 2133} 2134 2135template <unsigned Bitwidth> 2136void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2137 APInt Literal(64, Imm.Val); 2138 setImmKindNone(); 2139 2140 if (!Imm.IsFPImm) { 2141 // We got int literal token. 2142 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2143 return; 2144 } 2145 2146 bool Lost; 2147 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2148 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2149 APFloat::rmNearestTiesToEven, &Lost); 2150 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2151} 2152 2153void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2154 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2155} 2156 2157static bool isInlineValue(unsigned Reg) { 2158 switch (Reg) { 2159 case AMDGPU::SRC_SHARED_BASE: 2160 case AMDGPU::SRC_SHARED_LIMIT: 2161 case AMDGPU::SRC_PRIVATE_BASE: 2162 case AMDGPU::SRC_PRIVATE_LIMIT: 2163 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2164 return true; 2165 case AMDGPU::SRC_VCCZ: 2166 case AMDGPU::SRC_EXECZ: 2167 case AMDGPU::SRC_SCC: 2168 return true; 2169 case AMDGPU::SGPR_NULL: 2170 return true; 2171 default: 2172 return false; 2173 } 2174} 2175 2176bool AMDGPUOperand::isInlineValue() const { 2177 return isRegKind() && ::isInlineValue(getReg()); 2178} 2179 2180//===----------------------------------------------------------------------===// 2181// AsmParser 2182//===----------------------------------------------------------------------===// 2183 2184static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2185 if (Is == IS_VGPR) { 2186 switch (RegWidth) { 2187 default: return -1; 2188 case 1: return AMDGPU::VGPR_32RegClassID; 2189 case 2: return AMDGPU::VReg_64RegClassID; 2190 case 3: return AMDGPU::VReg_96RegClassID; 2191 case 4: return AMDGPU::VReg_128RegClassID; 2192 case 5: return AMDGPU::VReg_160RegClassID; 2193 case 6: return AMDGPU::VReg_192RegClassID; 2194 case 8: return AMDGPU::VReg_256RegClassID; 2195 case 16: return AMDGPU::VReg_512RegClassID; 2196 case 32: return AMDGPU::VReg_1024RegClassID; 2197 } 2198 } else if (Is == IS_TTMP) { 2199 switch (RegWidth) { 2200 default: return -1; 2201 case 1: return AMDGPU::TTMP_32RegClassID; 2202 case 2: return AMDGPU::TTMP_64RegClassID; 2203 case 4: return AMDGPU::TTMP_128RegClassID; 2204 case 8: return AMDGPU::TTMP_256RegClassID; 2205 case 16: return AMDGPU::TTMP_512RegClassID; 2206 } 2207 } else if (Is == IS_SGPR) { 2208 switch (RegWidth) { 2209 default: return -1; 2210 case 1: return AMDGPU::SGPR_32RegClassID; 2211 case 2: return AMDGPU::SGPR_64RegClassID; 2212 case 3: return AMDGPU::SGPR_96RegClassID; 2213 case 4: return AMDGPU::SGPR_128RegClassID; 2214 case 5: return AMDGPU::SGPR_160RegClassID; 2215 case 6: return AMDGPU::SGPR_192RegClassID; 2216 case 8: return AMDGPU::SGPR_256RegClassID; 2217 case 16: return AMDGPU::SGPR_512RegClassID; 2218 } 2219 } else if (Is == IS_AGPR) { 2220 switch (RegWidth) { 2221 default: return -1; 2222 case 1: return AMDGPU::AGPR_32RegClassID; 2223 case 2: return AMDGPU::AReg_64RegClassID; 2224 case 3: return AMDGPU::AReg_96RegClassID; 2225 case 4: return AMDGPU::AReg_128RegClassID; 2226 case 5: return AMDGPU::AReg_160RegClassID; 2227 case 6: return AMDGPU::AReg_192RegClassID; 2228 case 8: return AMDGPU::AReg_256RegClassID; 2229 case 16: return AMDGPU::AReg_512RegClassID; 2230 case 32: return AMDGPU::AReg_1024RegClassID; 2231 } 2232 } 2233 return -1; 2234} 2235 2236static unsigned getSpecialRegForName(StringRef RegName) { 2237 return StringSwitch<unsigned>(RegName) 2238 .Case("exec", AMDGPU::EXEC) 2239 .Case("vcc", AMDGPU::VCC) 2240 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2241 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2242 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2243 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2244 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2245 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2246 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2247 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2248 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2249 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2250 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2251 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2252 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2253 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2254 .Case("m0", AMDGPU::M0) 2255 .Case("vccz", AMDGPU::SRC_VCCZ) 2256 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2257 .Case("execz", AMDGPU::SRC_EXECZ) 2258 .Case("src_execz", AMDGPU::SRC_EXECZ) 2259 .Case("scc", AMDGPU::SRC_SCC) 2260 .Case("src_scc", AMDGPU::SRC_SCC) 2261 .Case("tba", AMDGPU::TBA) 2262 .Case("tma", AMDGPU::TMA) 2263 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2264 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2265 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2266 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2267 .Case("vcc_lo", AMDGPU::VCC_LO) 2268 .Case("vcc_hi", AMDGPU::VCC_HI) 2269 .Case("exec_lo", AMDGPU::EXEC_LO) 2270 .Case("exec_hi", AMDGPU::EXEC_HI) 2271 .Case("tma_lo", AMDGPU::TMA_LO) 2272 .Case("tma_hi", AMDGPU::TMA_HI) 2273 .Case("tba_lo", AMDGPU::TBA_LO) 2274 .Case("tba_hi", AMDGPU::TBA_HI) 2275 .Case("pc", AMDGPU::PC_REG) 2276 .Case("null", AMDGPU::SGPR_NULL) 2277 .Default(AMDGPU::NoRegister); 2278} 2279 2280bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2281 SMLoc &EndLoc, bool RestoreOnFailure) { 2282 auto R = parseRegister(); 2283 if (!R) return true; 2284 assert(R->isReg()); 2285 RegNo = R->getReg(); 2286 StartLoc = R->getStartLoc(); 2287 EndLoc = R->getEndLoc(); 2288 return false; 2289} 2290 2291bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2292 SMLoc &EndLoc) { 2293 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2294} 2295 2296OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2297 SMLoc &StartLoc, 2298 SMLoc &EndLoc) { 2299 bool Result = 2300 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2301 bool PendingErrors = getParser().hasPendingError(); 2302 getParser().clearPendingErrors(); 2303 if (PendingErrors) 2304 return MatchOperand_ParseFail; 2305 if (Result) 2306 return MatchOperand_NoMatch; 2307 return MatchOperand_Success; 2308} 2309 2310bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2311 RegisterKind RegKind, unsigned Reg1, 2312 SMLoc Loc) { 2313 switch (RegKind) { 2314 case IS_SPECIAL: 2315 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2316 Reg = AMDGPU::EXEC; 2317 RegWidth = 2; 2318 return true; 2319 } 2320 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2321 Reg = AMDGPU::FLAT_SCR; 2322 RegWidth = 2; 2323 return true; 2324 } 2325 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2326 Reg = AMDGPU::XNACK_MASK; 2327 RegWidth = 2; 2328 return true; 2329 } 2330 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2331 Reg = AMDGPU::VCC; 2332 RegWidth = 2; 2333 return true; 2334 } 2335 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2336 Reg = AMDGPU::TBA; 2337 RegWidth = 2; 2338 return true; 2339 } 2340 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2341 Reg = AMDGPU::TMA; 2342 RegWidth = 2; 2343 return true; 2344 } 2345 Error(Loc, "register does not fit in the list"); 2346 return false; 2347 case IS_VGPR: 2348 case IS_SGPR: 2349 case IS_AGPR: 2350 case IS_TTMP: 2351 if (Reg1 != Reg + RegWidth) { 2352 Error(Loc, "registers in a list must have consecutive indices"); 2353 return false; 2354 } 2355 RegWidth++; 2356 return true; 2357 default: 2358 llvm_unreachable("unexpected register kind"); 2359 } 2360} 2361 2362struct RegInfo { 2363 StringLiteral Name; 2364 RegisterKind Kind; 2365}; 2366 2367static constexpr RegInfo RegularRegisters[] = { 2368 {{"v"}, IS_VGPR}, 2369 {{"s"}, IS_SGPR}, 2370 {{"ttmp"}, IS_TTMP}, 2371 {{"acc"}, IS_AGPR}, 2372 {{"a"}, IS_AGPR}, 2373}; 2374 2375static bool isRegularReg(RegisterKind Kind) { 2376 return Kind == IS_VGPR || 2377 Kind == IS_SGPR || 2378 Kind == IS_TTMP || 2379 Kind == IS_AGPR; 2380} 2381 2382static const RegInfo* getRegularRegInfo(StringRef Str) { 2383 for (const RegInfo &Reg : RegularRegisters) 2384 if (Str.startswith(Reg.Name)) 2385 return &Reg; 2386 return nullptr; 2387} 2388 2389static bool getRegNum(StringRef Str, unsigned& Num) { 2390 return !Str.getAsInteger(10, Num); 2391} 2392 2393bool 2394AMDGPUAsmParser::isRegister(const AsmToken &Token, 2395 const AsmToken &NextToken) const { 2396 2397 // A list of consecutive registers: [s0,s1,s2,s3] 2398 if (Token.is(AsmToken::LBrac)) 2399 return true; 2400 2401 if (!Token.is(AsmToken::Identifier)) 2402 return false; 2403 2404 // A single register like s0 or a range of registers like s[0:1] 2405 2406 StringRef Str = Token.getString(); 2407 const RegInfo *Reg = getRegularRegInfo(Str); 2408 if (Reg) { 2409 StringRef RegName = Reg->Name; 2410 StringRef RegSuffix = Str.substr(RegName.size()); 2411 if (!RegSuffix.empty()) { 2412 unsigned Num; 2413 // A single register with an index: rXX 2414 if (getRegNum(RegSuffix, Num)) 2415 return true; 2416 } else { 2417 // A range of registers: r[XX:YY]. 2418 if (NextToken.is(AsmToken::LBrac)) 2419 return true; 2420 } 2421 } 2422 2423 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2424} 2425 2426bool 2427AMDGPUAsmParser::isRegister() 2428{ 2429 return isRegister(getToken(), peekToken()); 2430} 2431 2432unsigned 2433AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2434 unsigned RegNum, 2435 unsigned RegWidth, 2436 SMLoc Loc) { 2437 2438 assert(isRegularReg(RegKind)); 2439 2440 unsigned AlignSize = 1; 2441 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2442 // SGPR and TTMP registers must be aligned. 2443 // Max required alignment is 4 dwords. 2444 AlignSize = std::min(RegWidth, 4u); 2445 } 2446 2447 if (RegNum % AlignSize != 0) { 2448 Error(Loc, "invalid register alignment"); 2449 return AMDGPU::NoRegister; 2450 } 2451 2452 unsigned RegIdx = RegNum / AlignSize; 2453 int RCID = getRegClass(RegKind, RegWidth); 2454 if (RCID == -1) { 2455 Error(Loc, "invalid or unsupported register size"); 2456 return AMDGPU::NoRegister; 2457 } 2458 2459 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2460 const MCRegisterClass RC = TRI->getRegClass(RCID); 2461 if (RegIdx >= RC.getNumRegs()) { 2462 Error(Loc, "register index is out of range"); 2463 return AMDGPU::NoRegister; 2464 } 2465 2466 return RC.getRegister(RegIdx); 2467} 2468 2469bool 2470AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2471 int64_t RegLo, RegHi; 2472 if (!skipToken(AsmToken::LBrac, "missing register index")) 2473 return false; 2474 2475 SMLoc FirstIdxLoc = getLoc(); 2476 SMLoc SecondIdxLoc; 2477 2478 if (!parseExpr(RegLo)) 2479 return false; 2480 2481 if (trySkipToken(AsmToken::Colon)) { 2482 SecondIdxLoc = getLoc(); 2483 if (!parseExpr(RegHi)) 2484 return false; 2485 } else { 2486 RegHi = RegLo; 2487 } 2488 2489 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2490 return false; 2491 2492 if (!isUInt<32>(RegLo)) { 2493 Error(FirstIdxLoc, "invalid register index"); 2494 return false; 2495 } 2496 2497 if (!isUInt<32>(RegHi)) { 2498 Error(SecondIdxLoc, "invalid register index"); 2499 return false; 2500 } 2501 2502 if (RegLo > RegHi) { 2503 Error(FirstIdxLoc, "first register index should not exceed second index"); 2504 return false; 2505 } 2506 2507 Num = static_cast<unsigned>(RegLo); 2508 Width = (RegHi - RegLo) + 1; 2509 return true; 2510} 2511 2512unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2513 unsigned &RegNum, unsigned &RegWidth, 2514 SmallVectorImpl<AsmToken> &Tokens) { 2515 assert(isToken(AsmToken::Identifier)); 2516 unsigned Reg = getSpecialRegForName(getTokenStr()); 2517 if (Reg) { 2518 RegNum = 0; 2519 RegWidth = 1; 2520 RegKind = IS_SPECIAL; 2521 Tokens.push_back(getToken()); 2522 lex(); // skip register name 2523 } 2524 return Reg; 2525} 2526 2527unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2528 unsigned &RegNum, unsigned &RegWidth, 2529 SmallVectorImpl<AsmToken> &Tokens) { 2530 assert(isToken(AsmToken::Identifier)); 2531 StringRef RegName = getTokenStr(); 2532 auto Loc = getLoc(); 2533 2534 const RegInfo *RI = getRegularRegInfo(RegName); 2535 if (!RI) { 2536 Error(Loc, "invalid register name"); 2537 return AMDGPU::NoRegister; 2538 } 2539 2540 Tokens.push_back(getToken()); 2541 lex(); // skip register name 2542 2543 RegKind = RI->Kind; 2544 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2545 if (!RegSuffix.empty()) { 2546 // Single 32-bit register: vXX. 2547 if (!getRegNum(RegSuffix, RegNum)) { 2548 Error(Loc, "invalid register index"); 2549 return AMDGPU::NoRegister; 2550 } 2551 RegWidth = 1; 2552 } else { 2553 // Range of registers: v[XX:YY]. ":YY" is optional. 2554 if (!ParseRegRange(RegNum, RegWidth)) 2555 return AMDGPU::NoRegister; 2556 } 2557 2558 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2559} 2560 2561unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2562 unsigned &RegWidth, 2563 SmallVectorImpl<AsmToken> &Tokens) { 2564 unsigned Reg = AMDGPU::NoRegister; 2565 auto ListLoc = getLoc(); 2566 2567 if (!skipToken(AsmToken::LBrac, 2568 "expected a register or a list of registers")) { 2569 return AMDGPU::NoRegister; 2570 } 2571 2572 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2573 2574 auto Loc = getLoc(); 2575 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2576 return AMDGPU::NoRegister; 2577 if (RegWidth != 1) { 2578 Error(Loc, "expected a single 32-bit register"); 2579 return AMDGPU::NoRegister; 2580 } 2581 2582 for (; trySkipToken(AsmToken::Comma); ) { 2583 RegisterKind NextRegKind; 2584 unsigned NextReg, NextRegNum, NextRegWidth; 2585 Loc = getLoc(); 2586 2587 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2588 NextRegNum, NextRegWidth, 2589 Tokens)) { 2590 return AMDGPU::NoRegister; 2591 } 2592 if (NextRegWidth != 1) { 2593 Error(Loc, "expected a single 32-bit register"); 2594 return AMDGPU::NoRegister; 2595 } 2596 if (NextRegKind != RegKind) { 2597 Error(Loc, "registers in a list must be of the same kind"); 2598 return AMDGPU::NoRegister; 2599 } 2600 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2601 return AMDGPU::NoRegister; 2602 } 2603 2604 if (!skipToken(AsmToken::RBrac, 2605 "expected a comma or a closing square bracket")) { 2606 return AMDGPU::NoRegister; 2607 } 2608 2609 if (isRegularReg(RegKind)) 2610 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2611 2612 return Reg; 2613} 2614 2615bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2616 unsigned &RegNum, unsigned &RegWidth, 2617 SmallVectorImpl<AsmToken> &Tokens) { 2618 auto Loc = getLoc(); 2619 Reg = AMDGPU::NoRegister; 2620 2621 if (isToken(AsmToken::Identifier)) { 2622 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2623 if (Reg == AMDGPU::NoRegister) 2624 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2625 } else { 2626 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2627 } 2628 2629 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2630 if (Reg == AMDGPU::NoRegister) { 2631 assert(Parser.hasPendingError()); 2632 return false; 2633 } 2634 2635 if (!subtargetHasRegister(*TRI, Reg)) { 2636 if (Reg == AMDGPU::SGPR_NULL) { 2637 Error(Loc, "'null' operand is not supported on this GPU"); 2638 } else { 2639 Error(Loc, "register not available on this GPU"); 2640 } 2641 return false; 2642 } 2643 2644 return true; 2645} 2646 2647bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2648 unsigned &RegNum, unsigned &RegWidth, 2649 bool RestoreOnFailure /*=false*/) { 2650 Reg = AMDGPU::NoRegister; 2651 2652 SmallVector<AsmToken, 1> Tokens; 2653 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2654 if (RestoreOnFailure) { 2655 while (!Tokens.empty()) { 2656 getLexer().UnLex(Tokens.pop_back_val()); 2657 } 2658 } 2659 return true; 2660 } 2661 return false; 2662} 2663 2664Optional<StringRef> 2665AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2666 switch (RegKind) { 2667 case IS_VGPR: 2668 return StringRef(".amdgcn.next_free_vgpr"); 2669 case IS_SGPR: 2670 return StringRef(".amdgcn.next_free_sgpr"); 2671 default: 2672 return None; 2673 } 2674} 2675 2676void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2677 auto SymbolName = getGprCountSymbolName(RegKind); 2678 assert(SymbolName && "initializing invalid register kind"); 2679 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2680 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2681} 2682 2683bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2684 unsigned DwordRegIndex, 2685 unsigned RegWidth) { 2686 // Symbols are only defined for GCN targets 2687 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2688 return true; 2689 2690 auto SymbolName = getGprCountSymbolName(RegKind); 2691 if (!SymbolName) 2692 return true; 2693 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2694 2695 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2696 int64_t OldCount; 2697 2698 if (!Sym->isVariable()) 2699 return !Error(getLoc(), 2700 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2701 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2702 return !Error( 2703 getLoc(), 2704 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2705 2706 if (OldCount <= NewMax) 2707 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2708 2709 return true; 2710} 2711 2712std::unique_ptr<AMDGPUOperand> 2713AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2714 const auto &Tok = getToken(); 2715 SMLoc StartLoc = Tok.getLoc(); 2716 SMLoc EndLoc = Tok.getEndLoc(); 2717 RegisterKind RegKind; 2718 unsigned Reg, RegNum, RegWidth; 2719 2720 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2721 return nullptr; 2722 } 2723 if (isHsaAbiVersion3Or4(&getSTI())) { 2724 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2725 return nullptr; 2726 } else 2727 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2728 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2729} 2730 2731OperandMatchResultTy 2732AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2733 // TODO: add syntactic sugar for 1/(2*PI) 2734 2735 assert(!isRegister()); 2736 assert(!isModifier()); 2737 2738 const auto& Tok = getToken(); 2739 const auto& NextTok = peekToken(); 2740 bool IsReal = Tok.is(AsmToken::Real); 2741 SMLoc S = getLoc(); 2742 bool Negate = false; 2743 2744 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2745 lex(); 2746 IsReal = true; 2747 Negate = true; 2748 } 2749 2750 if (IsReal) { 2751 // Floating-point expressions are not supported. 2752 // Can only allow floating-point literals with an 2753 // optional sign. 2754 2755 StringRef Num = getTokenStr(); 2756 lex(); 2757 2758 APFloat RealVal(APFloat::IEEEdouble()); 2759 auto roundMode = APFloat::rmNearestTiesToEven; 2760 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2761 return MatchOperand_ParseFail; 2762 } 2763 if (Negate) 2764 RealVal.changeSign(); 2765 2766 Operands.push_back( 2767 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2768 AMDGPUOperand::ImmTyNone, true)); 2769 2770 return MatchOperand_Success; 2771 2772 } else { 2773 int64_t IntVal; 2774 const MCExpr *Expr; 2775 SMLoc S = getLoc(); 2776 2777 if (HasSP3AbsModifier) { 2778 // This is a workaround for handling expressions 2779 // as arguments of SP3 'abs' modifier, for example: 2780 // |1.0| 2781 // |-1| 2782 // |1+x| 2783 // This syntax is not compatible with syntax of standard 2784 // MC expressions (due to the trailing '|'). 2785 SMLoc EndLoc; 2786 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2787 return MatchOperand_ParseFail; 2788 } else { 2789 if (Parser.parseExpression(Expr)) 2790 return MatchOperand_ParseFail; 2791 } 2792 2793 if (Expr->evaluateAsAbsolute(IntVal)) { 2794 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2795 } else { 2796 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2797 } 2798 2799 return MatchOperand_Success; 2800 } 2801 2802 return MatchOperand_NoMatch; 2803} 2804 2805OperandMatchResultTy 2806AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2807 if (!isRegister()) 2808 return MatchOperand_NoMatch; 2809 2810 if (auto R = parseRegister()) { 2811 assert(R->isReg()); 2812 Operands.push_back(std::move(R)); 2813 return MatchOperand_Success; 2814 } 2815 return MatchOperand_ParseFail; 2816} 2817 2818OperandMatchResultTy 2819AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2820 auto res = parseReg(Operands); 2821 if (res != MatchOperand_NoMatch) { 2822 return res; 2823 } else if (isModifier()) { 2824 return MatchOperand_NoMatch; 2825 } else { 2826 return parseImm(Operands, HasSP3AbsMod); 2827 } 2828} 2829 2830bool 2831AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2832 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2833 const auto &str = Token.getString(); 2834 return str == "abs" || str == "neg" || str == "sext"; 2835 } 2836 return false; 2837} 2838 2839bool 2840AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2841 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2842} 2843 2844bool 2845AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2846 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2847} 2848 2849bool 2850AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2851 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2852} 2853 2854// Check if this is an operand modifier or an opcode modifier 2855// which may look like an expression but it is not. We should 2856// avoid parsing these modifiers as expressions. Currently 2857// recognized sequences are: 2858// |...| 2859// abs(...) 2860// neg(...) 2861// sext(...) 2862// -reg 2863// -|...| 2864// -abs(...) 2865// name:... 2866// Note that simple opcode modifiers like 'gds' may be parsed as 2867// expressions; this is a special case. See getExpressionAsToken. 2868// 2869bool 2870AMDGPUAsmParser::isModifier() { 2871 2872 AsmToken Tok = getToken(); 2873 AsmToken NextToken[2]; 2874 peekTokens(NextToken); 2875 2876 return isOperandModifier(Tok, NextToken[0]) || 2877 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2878 isOpcodeModifierWithVal(Tok, NextToken[0]); 2879} 2880 2881// Check if the current token is an SP3 'neg' modifier. 2882// Currently this modifier is allowed in the following context: 2883// 2884// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2885// 2. Before an 'abs' modifier: -abs(...) 2886// 3. Before an SP3 'abs' modifier: -|...| 2887// 2888// In all other cases "-" is handled as a part 2889// of an expression that follows the sign. 2890// 2891// Note: When "-" is followed by an integer literal, 2892// this is interpreted as integer negation rather 2893// than a floating-point NEG modifier applied to N. 2894// Beside being contr-intuitive, such use of floating-point 2895// NEG modifier would have resulted in different meaning 2896// of integer literals used with VOP1/2/C and VOP3, 2897// for example: 2898// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2899// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2900// Negative fp literals with preceding "-" are 2901// handled likewise for unifomtity 2902// 2903bool 2904AMDGPUAsmParser::parseSP3NegModifier() { 2905 2906 AsmToken NextToken[2]; 2907 peekTokens(NextToken); 2908 2909 if (isToken(AsmToken::Minus) && 2910 (isRegister(NextToken[0], NextToken[1]) || 2911 NextToken[0].is(AsmToken::Pipe) || 2912 isId(NextToken[0], "abs"))) { 2913 lex(); 2914 return true; 2915 } 2916 2917 return false; 2918} 2919 2920OperandMatchResultTy 2921AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2922 bool AllowImm) { 2923 bool Neg, SP3Neg; 2924 bool Abs, SP3Abs; 2925 SMLoc Loc; 2926 2927 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2928 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2929 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2930 return MatchOperand_ParseFail; 2931 } 2932 2933 SP3Neg = parseSP3NegModifier(); 2934 2935 Loc = getLoc(); 2936 Neg = trySkipId("neg"); 2937 if (Neg && SP3Neg) { 2938 Error(Loc, "expected register or immediate"); 2939 return MatchOperand_ParseFail; 2940 } 2941 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2942 return MatchOperand_ParseFail; 2943 2944 Abs = trySkipId("abs"); 2945 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2946 return MatchOperand_ParseFail; 2947 2948 Loc = getLoc(); 2949 SP3Abs = trySkipToken(AsmToken::Pipe); 2950 if (Abs && SP3Abs) { 2951 Error(Loc, "expected register or immediate"); 2952 return MatchOperand_ParseFail; 2953 } 2954 2955 OperandMatchResultTy Res; 2956 if (AllowImm) { 2957 Res = parseRegOrImm(Operands, SP3Abs); 2958 } else { 2959 Res = parseReg(Operands); 2960 } 2961 if (Res != MatchOperand_Success) { 2962 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2963 } 2964 2965 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2966 return MatchOperand_ParseFail; 2967 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2968 return MatchOperand_ParseFail; 2969 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2970 return MatchOperand_ParseFail; 2971 2972 AMDGPUOperand::Modifiers Mods; 2973 Mods.Abs = Abs || SP3Abs; 2974 Mods.Neg = Neg || SP3Neg; 2975 2976 if (Mods.hasFPModifiers()) { 2977 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2978 if (Op.isExpr()) { 2979 Error(Op.getStartLoc(), "expected an absolute expression"); 2980 return MatchOperand_ParseFail; 2981 } 2982 Op.setModifiers(Mods); 2983 } 2984 return MatchOperand_Success; 2985} 2986 2987OperandMatchResultTy 2988AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2989 bool AllowImm) { 2990 bool Sext = trySkipId("sext"); 2991 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2992 return MatchOperand_ParseFail; 2993 2994 OperandMatchResultTy Res; 2995 if (AllowImm) { 2996 Res = parseRegOrImm(Operands); 2997 } else { 2998 Res = parseReg(Operands); 2999 } 3000 if (Res != MatchOperand_Success) { 3001 return Sext? MatchOperand_ParseFail : Res; 3002 } 3003 3004 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3005 return MatchOperand_ParseFail; 3006 3007 AMDGPUOperand::Modifiers Mods; 3008 Mods.Sext = Sext; 3009 3010 if (Mods.hasIntModifiers()) { 3011 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3012 if (Op.isExpr()) { 3013 Error(Op.getStartLoc(), "expected an absolute expression"); 3014 return MatchOperand_ParseFail; 3015 } 3016 Op.setModifiers(Mods); 3017 } 3018 3019 return MatchOperand_Success; 3020} 3021 3022OperandMatchResultTy 3023AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3024 return parseRegOrImmWithFPInputMods(Operands, false); 3025} 3026 3027OperandMatchResultTy 3028AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3029 return parseRegOrImmWithIntInputMods(Operands, false); 3030} 3031 3032OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3033 auto Loc = getLoc(); 3034 if (trySkipId("off")) { 3035 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3036 AMDGPUOperand::ImmTyOff, false)); 3037 return MatchOperand_Success; 3038 } 3039 3040 if (!isRegister()) 3041 return MatchOperand_NoMatch; 3042 3043 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3044 if (Reg) { 3045 Operands.push_back(std::move(Reg)); 3046 return MatchOperand_Success; 3047 } 3048 3049 return MatchOperand_ParseFail; 3050 3051} 3052 3053unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3054 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3055 3056 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3057 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3058 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3059 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3060 return Match_InvalidOperand; 3061 3062 if ((TSFlags & SIInstrFlags::VOP3) && 3063 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3064 getForcedEncodingSize() != 64) 3065 return Match_PreferE32; 3066 3067 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3068 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3069 // v_mac_f32/16 allow only dst_sel == DWORD; 3070 auto OpNum = 3071 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3072 const auto &Op = Inst.getOperand(OpNum); 3073 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3074 return Match_InvalidOperand; 3075 } 3076 } 3077 3078 return Match_Success; 3079} 3080 3081static ArrayRef<unsigned> getAllVariants() { 3082 static const unsigned Variants[] = { 3083 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3084 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3085 }; 3086 3087 return makeArrayRef(Variants); 3088} 3089 3090// What asm variants we should check 3091ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3092 if (getForcedEncodingSize() == 32) { 3093 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3094 return makeArrayRef(Variants); 3095 } 3096 3097 if (isForcedVOP3()) { 3098 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3099 return makeArrayRef(Variants); 3100 } 3101 3102 if (isForcedSDWA()) { 3103 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3104 AMDGPUAsmVariants::SDWA9}; 3105 return makeArrayRef(Variants); 3106 } 3107 3108 if (isForcedDPP()) { 3109 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3110 return makeArrayRef(Variants); 3111 } 3112 3113 return getAllVariants(); 3114} 3115 3116StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3117 if (getForcedEncodingSize() == 32) 3118 return "e32"; 3119 3120 if (isForcedVOP3()) 3121 return "e64"; 3122 3123 if (isForcedSDWA()) 3124 return "sdwa"; 3125 3126 if (isForcedDPP()) 3127 return "dpp"; 3128 3129 return ""; 3130} 3131 3132unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3133 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3134 const unsigned Num = Desc.getNumImplicitUses(); 3135 for (unsigned i = 0; i < Num; ++i) { 3136 unsigned Reg = Desc.ImplicitUses[i]; 3137 switch (Reg) { 3138 case AMDGPU::FLAT_SCR: 3139 case AMDGPU::VCC: 3140 case AMDGPU::VCC_LO: 3141 case AMDGPU::VCC_HI: 3142 case AMDGPU::M0: 3143 return Reg; 3144 default: 3145 break; 3146 } 3147 } 3148 return AMDGPU::NoRegister; 3149} 3150 3151// NB: This code is correct only when used to check constant 3152// bus limitations because GFX7 support no f16 inline constants. 3153// Note that there are no cases when a GFX7 opcode violates 3154// constant bus limitations due to the use of an f16 constant. 3155bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3156 unsigned OpIdx) const { 3157 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3158 3159 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3160 return false; 3161 } 3162 3163 const MCOperand &MO = Inst.getOperand(OpIdx); 3164 3165 int64_t Val = MO.getImm(); 3166 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3167 3168 switch (OpSize) { // expected operand size 3169 case 8: 3170 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3171 case 4: 3172 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3173 case 2: { 3174 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3175 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3176 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3177 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3178 return AMDGPU::isInlinableIntLiteral(Val); 3179 3180 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3182 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3183 return AMDGPU::isInlinableIntLiteralV216(Val); 3184 3185 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3186 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3187 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3188 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3189 3190 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3191 } 3192 default: 3193 llvm_unreachable("invalid operand size"); 3194 } 3195} 3196 3197unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3198 if (!isGFX10Plus()) 3199 return 1; 3200 3201 switch (Opcode) { 3202 // 64-bit shift instructions can use only one scalar value input 3203 case AMDGPU::V_LSHLREV_B64_e64: 3204 case AMDGPU::V_LSHLREV_B64_gfx10: 3205 case AMDGPU::V_LSHRREV_B64_e64: 3206 case AMDGPU::V_LSHRREV_B64_gfx10: 3207 case AMDGPU::V_ASHRREV_I64_e64: 3208 case AMDGPU::V_ASHRREV_I64_gfx10: 3209 case AMDGPU::V_LSHL_B64_e64: 3210 case AMDGPU::V_LSHR_B64_e64: 3211 case AMDGPU::V_ASHR_I64_e64: 3212 return 1; 3213 default: 3214 return 2; 3215 } 3216} 3217 3218bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3219 const MCOperand &MO = Inst.getOperand(OpIdx); 3220 if (MO.isImm()) { 3221 return !isInlineConstant(Inst, OpIdx); 3222 } else if (MO.isReg()) { 3223 auto Reg = MO.getReg(); 3224 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3225 auto PReg = mc2PseudoReg(Reg); 3226 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3227 } else { 3228 return true; 3229 } 3230} 3231 3232bool 3233AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3234 const OperandVector &Operands) { 3235 const unsigned Opcode = Inst.getOpcode(); 3236 const MCInstrDesc &Desc = MII.get(Opcode); 3237 unsigned LastSGPR = AMDGPU::NoRegister; 3238 unsigned ConstantBusUseCount = 0; 3239 unsigned NumLiterals = 0; 3240 unsigned LiteralSize; 3241 3242 if (Desc.TSFlags & 3243 (SIInstrFlags::VOPC | 3244 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3245 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3246 SIInstrFlags::SDWA)) { 3247 // Check special imm operands (used by madmk, etc) 3248 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3249 ++ConstantBusUseCount; 3250 } 3251 3252 SmallDenseSet<unsigned> SGPRsUsed; 3253 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3254 if (SGPRUsed != AMDGPU::NoRegister) { 3255 SGPRsUsed.insert(SGPRUsed); 3256 ++ConstantBusUseCount; 3257 } 3258 3259 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3260 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3261 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3262 3263 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3264 3265 for (int OpIdx : OpIndices) { 3266 if (OpIdx == -1) break; 3267 3268 const MCOperand &MO = Inst.getOperand(OpIdx); 3269 if (usesConstantBus(Inst, OpIdx)) { 3270 if (MO.isReg()) { 3271 LastSGPR = mc2PseudoReg(MO.getReg()); 3272 // Pairs of registers with a partial intersections like these 3273 // s0, s[0:1] 3274 // flat_scratch_lo, flat_scratch 3275 // flat_scratch_lo, flat_scratch_hi 3276 // are theoretically valid but they are disabled anyway. 3277 // Note that this code mimics SIInstrInfo::verifyInstruction 3278 if (!SGPRsUsed.count(LastSGPR)) { 3279 SGPRsUsed.insert(LastSGPR); 3280 ++ConstantBusUseCount; 3281 } 3282 } else { // Expression or a literal 3283 3284 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3285 continue; // special operand like VINTERP attr_chan 3286 3287 // An instruction may use only one literal. 3288 // This has been validated on the previous step. 3289 // See validateVOP3Literal. 3290 // This literal may be used as more than one operand. 3291 // If all these operands are of the same size, 3292 // this literal counts as one scalar value. 3293 // Otherwise it counts as 2 scalar values. 3294 // See "GFX10 Shader Programming", section 3.6.2.3. 3295 3296 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3297 if (Size < 4) Size = 4; 3298 3299 if (NumLiterals == 0) { 3300 NumLiterals = 1; 3301 LiteralSize = Size; 3302 } else if (LiteralSize != Size) { 3303 NumLiterals = 2; 3304 } 3305 } 3306 } 3307 } 3308 } 3309 ConstantBusUseCount += NumLiterals; 3310 3311 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3312 return true; 3313 3314 SMLoc LitLoc = getLitLoc(Operands); 3315 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3316 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3317 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3318 return false; 3319} 3320 3321bool 3322AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3323 const OperandVector &Operands) { 3324 const unsigned Opcode = Inst.getOpcode(); 3325 const MCInstrDesc &Desc = MII.get(Opcode); 3326 3327 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3328 if (DstIdx == -1 || 3329 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3330 return true; 3331 } 3332 3333 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3334 3335 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3336 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3337 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3338 3339 assert(DstIdx != -1); 3340 const MCOperand &Dst = Inst.getOperand(DstIdx); 3341 assert(Dst.isReg()); 3342 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3343 3344 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3345 3346 for (int SrcIdx : SrcIndices) { 3347 if (SrcIdx == -1) break; 3348 const MCOperand &Src = Inst.getOperand(SrcIdx); 3349 if (Src.isReg()) { 3350 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3351 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3352 Error(getRegLoc(SrcReg, Operands), 3353 "destination must be different than all sources"); 3354 return false; 3355 } 3356 } 3357 } 3358 3359 return true; 3360} 3361 3362bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3363 3364 const unsigned Opc = Inst.getOpcode(); 3365 const MCInstrDesc &Desc = MII.get(Opc); 3366 3367 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3368 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3369 assert(ClampIdx != -1); 3370 return Inst.getOperand(ClampIdx).getImm() == 0; 3371 } 3372 3373 return true; 3374} 3375 3376bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3377 3378 const unsigned Opc = Inst.getOpcode(); 3379 const MCInstrDesc &Desc = MII.get(Opc); 3380 3381 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3382 return true; 3383 3384 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3385 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3386 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3387 3388 assert(VDataIdx != -1); 3389 3390 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3391 return true; 3392 3393 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3394 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3395 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3396 if (DMask == 0) 3397 DMask = 1; 3398 3399 unsigned DataSize = 3400 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3401 if (hasPackedD16()) { 3402 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3403 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3404 DataSize = (DataSize + 1) / 2; 3405 } 3406 3407 return (VDataSize / 4) == DataSize + TFESize; 3408} 3409 3410bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3411 const unsigned Opc = Inst.getOpcode(); 3412 const MCInstrDesc &Desc = MII.get(Opc); 3413 3414 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3415 return true; 3416 3417 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3418 3419 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3420 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3421 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3422 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3423 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3424 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3425 3426 assert(VAddr0Idx != -1); 3427 assert(SrsrcIdx != -1); 3428 assert(SrsrcIdx > VAddr0Idx); 3429 3430 if (DimIdx == -1) 3431 return true; // intersect_ray 3432 3433 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3434 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3435 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3436 unsigned VAddrSize = 3437 IsNSA ? SrsrcIdx - VAddr0Idx 3438 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3439 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3440 3441 unsigned AddrSize = 3442 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3443 3444 if (!IsNSA) { 3445 if (AddrSize > 8) 3446 AddrSize = 16; 3447 else if (AddrSize > 4) 3448 AddrSize = 8; 3449 } 3450 3451 return VAddrSize == AddrSize; 3452} 3453 3454bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3455 3456 const unsigned Opc = Inst.getOpcode(); 3457 const MCInstrDesc &Desc = MII.get(Opc); 3458 3459 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3460 return true; 3461 if (!Desc.mayLoad() || !Desc.mayStore()) 3462 return true; // Not atomic 3463 3464 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3465 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3466 3467 // This is an incomplete check because image_atomic_cmpswap 3468 // may only use 0x3 and 0xf while other atomic operations 3469 // may use 0x1 and 0x3. However these limitations are 3470 // verified when we check that dmask matches dst size. 3471 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3472} 3473 3474bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3475 3476 const unsigned Opc = Inst.getOpcode(); 3477 const MCInstrDesc &Desc = MII.get(Opc); 3478 3479 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3480 return true; 3481 3482 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3483 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3484 3485 // GATHER4 instructions use dmask in a different fashion compared to 3486 // other MIMG instructions. The only useful DMASK values are 3487 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3488 // (red,red,red,red) etc.) The ISA document doesn't mention 3489 // this. 3490 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3491} 3492 3493bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3494 const unsigned Opc = Inst.getOpcode(); 3495 const MCInstrDesc &Desc = MII.get(Opc); 3496 3497 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3498 return true; 3499 3500 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3501 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3502 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3503 3504 if (!BaseOpcode->MSAA) 3505 return true; 3506 3507 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3508 assert(DimIdx != -1); 3509 3510 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3511 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3512 3513 return DimInfo->MSAA; 3514} 3515 3516static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3517{ 3518 switch (Opcode) { 3519 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3520 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3521 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3522 return true; 3523 default: 3524 return false; 3525 } 3526} 3527 3528// movrels* opcodes should only allow VGPRS as src0. 3529// This is specified in .td description for vop1/vop3, 3530// but sdwa is handled differently. See isSDWAOperand. 3531bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3532 const OperandVector &Operands) { 3533 3534 const unsigned Opc = Inst.getOpcode(); 3535 const MCInstrDesc &Desc = MII.get(Opc); 3536 3537 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3538 return true; 3539 3540 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3541 assert(Src0Idx != -1); 3542 3543 SMLoc ErrLoc; 3544 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3545 if (Src0.isReg()) { 3546 auto Reg = mc2PseudoReg(Src0.getReg()); 3547 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3548 if (!isSGPR(Reg, TRI)) 3549 return true; 3550 ErrLoc = getRegLoc(Reg, Operands); 3551 } else { 3552 ErrLoc = getConstLoc(Operands); 3553 } 3554 3555 Error(ErrLoc, "source operand must be a VGPR"); 3556 return false; 3557} 3558 3559bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3560 const OperandVector &Operands) { 3561 3562 const unsigned Opc = Inst.getOpcode(); 3563 3564 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3565 return true; 3566 3567 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3568 assert(Src0Idx != -1); 3569 3570 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3571 if (!Src0.isReg()) 3572 return true; 3573 3574 auto Reg = mc2PseudoReg(Src0.getReg()); 3575 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3576 if (isSGPR(Reg, TRI)) { 3577 Error(getRegLoc(Reg, Operands), 3578 "source operand must be either a VGPR or an inline constant"); 3579 return false; 3580 } 3581 3582 return true; 3583} 3584 3585bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3586 switch (Inst.getOpcode()) { 3587 default: 3588 return true; 3589 case V_DIV_SCALE_F32_gfx6_gfx7: 3590 case V_DIV_SCALE_F32_vi: 3591 case V_DIV_SCALE_F32_gfx10: 3592 case V_DIV_SCALE_F64_gfx6_gfx7: 3593 case V_DIV_SCALE_F64_vi: 3594 case V_DIV_SCALE_F64_gfx10: 3595 break; 3596 } 3597 3598 // TODO: Check that src0 = src1 or src2. 3599 3600 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3601 AMDGPU::OpName::src2_modifiers, 3602 AMDGPU::OpName::src2_modifiers}) { 3603 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3604 .getImm() & 3605 SISrcMods::ABS) { 3606 return false; 3607 } 3608 } 3609 3610 return true; 3611} 3612 3613bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3614 3615 const unsigned Opc = Inst.getOpcode(); 3616 const MCInstrDesc &Desc = MII.get(Opc); 3617 3618 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3619 return true; 3620 3621 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3622 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3623 if (isCI() || isSI()) 3624 return false; 3625 } 3626 3627 return true; 3628} 3629 3630bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3631 const unsigned Opc = Inst.getOpcode(); 3632 const MCInstrDesc &Desc = MII.get(Opc); 3633 3634 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3635 return true; 3636 3637 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3638 if (DimIdx < 0) 3639 return true; 3640 3641 long Imm = Inst.getOperand(DimIdx).getImm(); 3642 if (Imm < 0 || Imm >= 8) 3643 return false; 3644 3645 return true; 3646} 3647 3648static bool IsRevOpcode(const unsigned Opcode) 3649{ 3650 switch (Opcode) { 3651 case AMDGPU::V_SUBREV_F32_e32: 3652 case AMDGPU::V_SUBREV_F32_e64: 3653 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3654 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3655 case AMDGPU::V_SUBREV_F32_e32_vi: 3656 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3657 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3658 case AMDGPU::V_SUBREV_F32_e64_vi: 3659 3660 case AMDGPU::V_SUBREV_CO_U32_e32: 3661 case AMDGPU::V_SUBREV_CO_U32_e64: 3662 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3663 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3664 3665 case AMDGPU::V_SUBBREV_U32_e32: 3666 case AMDGPU::V_SUBBREV_U32_e64: 3667 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3668 case AMDGPU::V_SUBBREV_U32_e32_vi: 3669 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3670 case AMDGPU::V_SUBBREV_U32_e64_vi: 3671 3672 case AMDGPU::V_SUBREV_U32_e32: 3673 case AMDGPU::V_SUBREV_U32_e64: 3674 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3675 case AMDGPU::V_SUBREV_U32_e32_vi: 3676 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3677 case AMDGPU::V_SUBREV_U32_e64_vi: 3678 3679 case AMDGPU::V_SUBREV_F16_e32: 3680 case AMDGPU::V_SUBREV_F16_e64: 3681 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3682 case AMDGPU::V_SUBREV_F16_e32_vi: 3683 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3684 case AMDGPU::V_SUBREV_F16_e64_vi: 3685 3686 case AMDGPU::V_SUBREV_U16_e32: 3687 case AMDGPU::V_SUBREV_U16_e64: 3688 case AMDGPU::V_SUBREV_U16_e32_vi: 3689 case AMDGPU::V_SUBREV_U16_e64_vi: 3690 3691 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3692 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3693 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3694 3695 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3696 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3697 3698 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3699 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3700 3701 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3702 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3703 3704 case AMDGPU::V_LSHRREV_B32_e32: 3705 case AMDGPU::V_LSHRREV_B32_e64: 3706 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3707 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3708 case AMDGPU::V_LSHRREV_B32_e32_vi: 3709 case AMDGPU::V_LSHRREV_B32_e64_vi: 3710 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3711 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3712 3713 case AMDGPU::V_ASHRREV_I32_e32: 3714 case AMDGPU::V_ASHRREV_I32_e64: 3715 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3716 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3717 case AMDGPU::V_ASHRREV_I32_e32_vi: 3718 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3719 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3720 case AMDGPU::V_ASHRREV_I32_e64_vi: 3721 3722 case AMDGPU::V_LSHLREV_B32_e32: 3723 case AMDGPU::V_LSHLREV_B32_e64: 3724 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3725 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3726 case AMDGPU::V_LSHLREV_B32_e32_vi: 3727 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3728 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3729 case AMDGPU::V_LSHLREV_B32_e64_vi: 3730 3731 case AMDGPU::V_LSHLREV_B16_e32: 3732 case AMDGPU::V_LSHLREV_B16_e64: 3733 case AMDGPU::V_LSHLREV_B16_e32_vi: 3734 case AMDGPU::V_LSHLREV_B16_e64_vi: 3735 case AMDGPU::V_LSHLREV_B16_gfx10: 3736 3737 case AMDGPU::V_LSHRREV_B16_e32: 3738 case AMDGPU::V_LSHRREV_B16_e64: 3739 case AMDGPU::V_LSHRREV_B16_e32_vi: 3740 case AMDGPU::V_LSHRREV_B16_e64_vi: 3741 case AMDGPU::V_LSHRREV_B16_gfx10: 3742 3743 case AMDGPU::V_ASHRREV_I16_e32: 3744 case AMDGPU::V_ASHRREV_I16_e64: 3745 case AMDGPU::V_ASHRREV_I16_e32_vi: 3746 case AMDGPU::V_ASHRREV_I16_e64_vi: 3747 case AMDGPU::V_ASHRREV_I16_gfx10: 3748 3749 case AMDGPU::V_LSHLREV_B64_e64: 3750 case AMDGPU::V_LSHLREV_B64_gfx10: 3751 case AMDGPU::V_LSHLREV_B64_vi: 3752 3753 case AMDGPU::V_LSHRREV_B64_e64: 3754 case AMDGPU::V_LSHRREV_B64_gfx10: 3755 case AMDGPU::V_LSHRREV_B64_vi: 3756 3757 case AMDGPU::V_ASHRREV_I64_e64: 3758 case AMDGPU::V_ASHRREV_I64_gfx10: 3759 case AMDGPU::V_ASHRREV_I64_vi: 3760 3761 case AMDGPU::V_PK_LSHLREV_B16: 3762 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3763 case AMDGPU::V_PK_LSHLREV_B16_vi: 3764 3765 case AMDGPU::V_PK_LSHRREV_B16: 3766 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3767 case AMDGPU::V_PK_LSHRREV_B16_vi: 3768 case AMDGPU::V_PK_ASHRREV_I16: 3769 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3770 case AMDGPU::V_PK_ASHRREV_I16_vi: 3771 return true; 3772 default: 3773 return false; 3774 } 3775} 3776 3777Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3778 3779 using namespace SIInstrFlags; 3780 const unsigned Opcode = Inst.getOpcode(); 3781 const MCInstrDesc &Desc = MII.get(Opcode); 3782 3783 // lds_direct register is defined so that it can be used 3784 // with 9-bit operands only. Ignore encodings which do not accept these. 3785 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3786 if ((Desc.TSFlags & Enc) == 0) 3787 return None; 3788 3789 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3790 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3791 if (SrcIdx == -1) 3792 break; 3793 const auto &Src = Inst.getOperand(SrcIdx); 3794 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3795 3796 if (isGFX90A()) 3797 return StringRef("lds_direct is not supported on this GPU"); 3798 3799 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3800 return StringRef("lds_direct cannot be used with this instruction"); 3801 3802 if (SrcName != OpName::src0) 3803 return StringRef("lds_direct may be used as src0 only"); 3804 } 3805 } 3806 3807 return None; 3808} 3809 3810SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3811 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3812 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3813 if (Op.isFlatOffset()) 3814 return Op.getStartLoc(); 3815 } 3816 return getLoc(); 3817} 3818 3819bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3820 const OperandVector &Operands) { 3821 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3822 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3823 return true; 3824 3825 auto Opcode = Inst.getOpcode(); 3826 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3827 assert(OpNum != -1); 3828 3829 const auto &Op = Inst.getOperand(OpNum); 3830 if (!hasFlatOffsets() && Op.getImm() != 0) { 3831 Error(getFlatOffsetLoc(Operands), 3832 "flat offset modifier is not supported on this GPU"); 3833 return false; 3834 } 3835 3836 // For FLAT segment the offset must be positive; 3837 // MSB is ignored and forced to zero. 3838 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3839 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3840 if (!isIntN(OffsetSize, Op.getImm())) { 3841 Error(getFlatOffsetLoc(Operands), 3842 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3843 return false; 3844 } 3845 } else { 3846 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3847 if (!isUIntN(OffsetSize, Op.getImm())) { 3848 Error(getFlatOffsetLoc(Operands), 3849 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3850 return false; 3851 } 3852 } 3853 3854 return true; 3855} 3856 3857SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3858 // Start with second operand because SMEM Offset cannot be dst or src0. 3859 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3860 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3861 if (Op.isSMEMOffset()) 3862 return Op.getStartLoc(); 3863 } 3864 return getLoc(); 3865} 3866 3867bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3868 const OperandVector &Operands) { 3869 if (isCI() || isSI()) 3870 return true; 3871 3872 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3873 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3874 return true; 3875 3876 auto Opcode = Inst.getOpcode(); 3877 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3878 if (OpNum == -1) 3879 return true; 3880 3881 const auto &Op = Inst.getOperand(OpNum); 3882 if (!Op.isImm()) 3883 return true; 3884 3885 uint64_t Offset = Op.getImm(); 3886 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3887 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3888 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3889 return true; 3890 3891 Error(getSMEMOffsetLoc(Operands), 3892 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3893 "expected a 21-bit signed offset"); 3894 3895 return false; 3896} 3897 3898bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3899 unsigned Opcode = Inst.getOpcode(); 3900 const MCInstrDesc &Desc = MII.get(Opcode); 3901 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3902 return true; 3903 3904 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3905 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3906 3907 const int OpIndices[] = { Src0Idx, Src1Idx }; 3908 3909 unsigned NumExprs = 0; 3910 unsigned NumLiterals = 0; 3911 uint32_t LiteralValue; 3912 3913 for (int OpIdx : OpIndices) { 3914 if (OpIdx == -1) break; 3915 3916 const MCOperand &MO = Inst.getOperand(OpIdx); 3917 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3918 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3919 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3920 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3921 if (NumLiterals == 0 || LiteralValue != Value) { 3922 LiteralValue = Value; 3923 ++NumLiterals; 3924 } 3925 } else if (MO.isExpr()) { 3926 ++NumExprs; 3927 } 3928 } 3929 } 3930 3931 return NumLiterals + NumExprs <= 1; 3932} 3933 3934bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3935 const unsigned Opc = Inst.getOpcode(); 3936 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3937 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3938 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3939 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3940 3941 if (OpSel & ~3) 3942 return false; 3943 } 3944 return true; 3945} 3946 3947bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3948 const OperandVector &Operands) { 3949 const unsigned Opc = Inst.getOpcode(); 3950 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3951 if (DppCtrlIdx < 0) 3952 return true; 3953 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3954 3955 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3956 // DPP64 is supported for row_newbcast only. 3957 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3958 if (Src0Idx >= 0 && 3959 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3960 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3961 Error(S, "64 bit dpp only supports row_newbcast"); 3962 return false; 3963 } 3964 } 3965 3966 return true; 3967} 3968 3969// Check if VCC register matches wavefront size 3970bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3971 auto FB = getFeatureBits(); 3972 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3973 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3974} 3975 3976// VOP3 literal is only allowed in GFX10+ and only one can be used 3977bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3978 const OperandVector &Operands) { 3979 unsigned Opcode = Inst.getOpcode(); 3980 const MCInstrDesc &Desc = MII.get(Opcode); 3981 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3982 return true; 3983 3984 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3985 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3986 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3987 3988 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3989 3990 unsigned NumExprs = 0; 3991 unsigned NumLiterals = 0; 3992 uint32_t LiteralValue; 3993 3994 for (int OpIdx : OpIndices) { 3995 if (OpIdx == -1) break; 3996 3997 const MCOperand &MO = Inst.getOperand(OpIdx); 3998 if (!MO.isImm() && !MO.isExpr()) 3999 continue; 4000 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4001 continue; 4002 4003 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4004 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4005 Error(getConstLoc(Operands), 4006 "inline constants are not allowed for this operand"); 4007 return false; 4008 } 4009 4010 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4011 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4012 if (NumLiterals == 0 || LiteralValue != Value) { 4013 LiteralValue = Value; 4014 ++NumLiterals; 4015 } 4016 } else if (MO.isExpr()) { 4017 ++NumExprs; 4018 } 4019 } 4020 NumLiterals += NumExprs; 4021 4022 if (!NumLiterals) 4023 return true; 4024 4025 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4026 Error(getLitLoc(Operands), "literal operands are not supported"); 4027 return false; 4028 } 4029 4030 if (NumLiterals > 1) { 4031 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4032 return false; 4033 } 4034 4035 return true; 4036} 4037 4038// Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4039static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4040 const MCRegisterInfo *MRI) { 4041 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4042 if (OpIdx < 0) 4043 return -1; 4044 4045 const MCOperand &Op = Inst.getOperand(OpIdx); 4046 if (!Op.isReg()) 4047 return -1; 4048 4049 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4050 auto Reg = Sub ? Sub : Op.getReg(); 4051 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4052 return AGRP32.contains(Reg) ? 1 : 0; 4053} 4054 4055bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4056 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4057 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4058 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4059 SIInstrFlags::DS)) == 0) 4060 return true; 4061 4062 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4063 : AMDGPU::OpName::vdata; 4064 4065 const MCRegisterInfo *MRI = getMRI(); 4066 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4067 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4068 4069 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4070 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4071 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4072 return false; 4073 } 4074 4075 auto FB = getFeatureBits(); 4076 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4077 if (DataAreg < 0 || DstAreg < 0) 4078 return true; 4079 return DstAreg == DataAreg; 4080 } 4081 4082 return DstAreg < 1 && DataAreg < 1; 4083} 4084 4085bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4086 auto FB = getFeatureBits(); 4087 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4088 return true; 4089 4090 const MCRegisterInfo *MRI = getMRI(); 4091 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4092 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4093 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4094 const MCOperand &Op = Inst.getOperand(I); 4095 if (!Op.isReg()) 4096 continue; 4097 4098 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4099 if (!Sub) 4100 continue; 4101 4102 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4103 return false; 4104 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4105 return false; 4106 } 4107 4108 return true; 4109} 4110 4111bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4112 const OperandVector &Operands, 4113 const SMLoc &IDLoc) { 4114 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4115 AMDGPU::OpName::cpol); 4116 if (CPolPos == -1) 4117 return true; 4118 4119 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4120 4121 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4122 if ((TSFlags & (SIInstrFlags::SMRD)) && 4123 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4124 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4125 return false; 4126 } 4127 4128 if (isGFX90A() && (CPol & CPol::SCC)) { 4129 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4130 StringRef CStr(S.getPointer()); 4131 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4132 Error(S, "scc is not supported on this GPU"); 4133 return false; 4134 } 4135 4136 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4137 return true; 4138 4139 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4140 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4141 Error(IDLoc, "instruction must use glc"); 4142 return false; 4143 } 4144 } else { 4145 if (CPol & CPol::GLC) { 4146 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4147 StringRef CStr(S.getPointer()); 4148 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4149 Error(S, "instruction must not use glc"); 4150 return false; 4151 } 4152 } 4153 4154 return true; 4155} 4156 4157bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4158 const SMLoc &IDLoc, 4159 const OperandVector &Operands) { 4160 if (auto ErrMsg = validateLdsDirect(Inst)) { 4161 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4162 return false; 4163 } 4164 if (!validateSOPLiteral(Inst)) { 4165 Error(getLitLoc(Operands), 4166 "only one literal operand is allowed"); 4167 return false; 4168 } 4169 if (!validateVOP3Literal(Inst, Operands)) { 4170 return false; 4171 } 4172 if (!validateConstantBusLimitations(Inst, Operands)) { 4173 return false; 4174 } 4175 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4176 return false; 4177 } 4178 if (!validateIntClampSupported(Inst)) { 4179 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4180 "integer clamping is not supported on this GPU"); 4181 return false; 4182 } 4183 if (!validateOpSel(Inst)) { 4184 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4185 "invalid op_sel operand"); 4186 return false; 4187 } 4188 if (!validateDPP(Inst, Operands)) { 4189 return false; 4190 } 4191 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4192 if (!validateMIMGD16(Inst)) { 4193 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4194 "d16 modifier is not supported on this GPU"); 4195 return false; 4196 } 4197 if (!validateMIMGDim(Inst)) { 4198 Error(IDLoc, "dim modifier is required on this GPU"); 4199 return false; 4200 } 4201 if (!validateMIMGMSAA(Inst)) { 4202 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4203 "invalid dim; must be MSAA type"); 4204 return false; 4205 } 4206 if (!validateMIMGDataSize(Inst)) { 4207 Error(IDLoc, 4208 "image data size does not match dmask and tfe"); 4209 return false; 4210 } 4211 if (!validateMIMGAddrSize(Inst)) { 4212 Error(IDLoc, 4213 "image address size does not match dim and a16"); 4214 return false; 4215 } 4216 if (!validateMIMGAtomicDMask(Inst)) { 4217 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4218 "invalid atomic image dmask"); 4219 return false; 4220 } 4221 if (!validateMIMGGatherDMask(Inst)) { 4222 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4223 "invalid image_gather dmask: only one bit must be set"); 4224 return false; 4225 } 4226 if (!validateMovrels(Inst, Operands)) { 4227 return false; 4228 } 4229 if (!validateFlatOffset(Inst, Operands)) { 4230 return false; 4231 } 4232 if (!validateSMEMOffset(Inst, Operands)) { 4233 return false; 4234 } 4235 if (!validateMAIAccWrite(Inst, Operands)) { 4236 return false; 4237 } 4238 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4239 return false; 4240 } 4241 4242 if (!validateAGPRLdSt(Inst)) { 4243 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4244 ? "invalid register class: data and dst should be all VGPR or AGPR" 4245 : "invalid register class: agpr loads and stores not supported on this GPU" 4246 ); 4247 return false; 4248 } 4249 if (!validateVGPRAlign(Inst)) { 4250 Error(IDLoc, 4251 "invalid register class: vgpr tuples must be 64 bit aligned"); 4252 return false; 4253 } 4254 4255 if (!validateDivScale(Inst)) { 4256 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4257 return false; 4258 } 4259 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4260 return false; 4261 } 4262 4263 return true; 4264} 4265 4266static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4267 const FeatureBitset &FBS, 4268 unsigned VariantID = 0); 4269 4270static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4271 const FeatureBitset &AvailableFeatures, 4272 unsigned VariantID); 4273 4274bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4275 const FeatureBitset &FBS) { 4276 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4277} 4278 4279bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4280 const FeatureBitset &FBS, 4281 ArrayRef<unsigned> Variants) { 4282 for (auto Variant : Variants) { 4283 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4284 return true; 4285 } 4286 4287 return false; 4288} 4289 4290bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4291 const SMLoc &IDLoc) { 4292 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4293 4294 // Check if requested instruction variant is supported. 4295 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4296 return false; 4297 4298 // This instruction is not supported. 4299 // Clear any other pending errors because they are no longer relevant. 4300 getParser().clearPendingErrors(); 4301 4302 // Requested instruction variant is not supported. 4303 // Check if any other variants are supported. 4304 StringRef VariantName = getMatchedVariantName(); 4305 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4306 return Error(IDLoc, 4307 Twine(VariantName, 4308 " variant of this instruction is not supported")); 4309 } 4310 4311 // Finally check if this instruction is supported on any other GPU. 4312 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4313 return Error(IDLoc, "instruction not supported on this GPU"); 4314 } 4315 4316 // Instruction not supported on any GPU. Probably a typo. 4317 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4318 return Error(IDLoc, "invalid instruction" + Suggestion); 4319} 4320 4321bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4322 OperandVector &Operands, 4323 MCStreamer &Out, 4324 uint64_t &ErrorInfo, 4325 bool MatchingInlineAsm) { 4326 MCInst Inst; 4327 unsigned Result = Match_Success; 4328 for (auto Variant : getMatchedVariants()) { 4329 uint64_t EI; 4330 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4331 Variant); 4332 // We order match statuses from least to most specific. We use most specific 4333 // status as resulting 4334 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4335 if ((R == Match_Success) || 4336 (R == Match_PreferE32) || 4337 (R == Match_MissingFeature && Result != Match_PreferE32) || 4338 (R == Match_InvalidOperand && Result != Match_MissingFeature 4339 && Result != Match_PreferE32) || 4340 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4341 && Result != Match_MissingFeature 4342 && Result != Match_PreferE32)) { 4343 Result = R; 4344 ErrorInfo = EI; 4345 } 4346 if (R == Match_Success) 4347 break; 4348 } 4349 4350 if (Result == Match_Success) { 4351 if (!validateInstruction(Inst, IDLoc, Operands)) { 4352 return true; 4353 } 4354 Inst.setLoc(IDLoc); 4355 Out.emitInstruction(Inst, getSTI()); 4356 return false; 4357 } 4358 4359 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4360 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4361 return true; 4362 } 4363 4364 switch (Result) { 4365 default: break; 4366 case Match_MissingFeature: 4367 // It has been verified that the specified instruction 4368 // mnemonic is valid. A match was found but it requires 4369 // features which are not supported on this GPU. 4370 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4371 4372 case Match_InvalidOperand: { 4373 SMLoc ErrorLoc = IDLoc; 4374 if (ErrorInfo != ~0ULL) { 4375 if (ErrorInfo >= Operands.size()) { 4376 return Error(IDLoc, "too few operands for instruction"); 4377 } 4378 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4379 if (ErrorLoc == SMLoc()) 4380 ErrorLoc = IDLoc; 4381 } 4382 return Error(ErrorLoc, "invalid operand for instruction"); 4383 } 4384 4385 case Match_PreferE32: 4386 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4387 "should be encoded as e32"); 4388 case Match_MnemonicFail: 4389 llvm_unreachable("Invalid instructions should have been handled already"); 4390 } 4391 llvm_unreachable("Implement any new match types added!"); 4392} 4393 4394bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4395 int64_t Tmp = -1; 4396 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4397 return true; 4398 } 4399 if (getParser().parseAbsoluteExpression(Tmp)) { 4400 return true; 4401 } 4402 Ret = static_cast<uint32_t>(Tmp); 4403 return false; 4404} 4405 4406bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4407 uint32_t &Minor) { 4408 if (ParseAsAbsoluteExpression(Major)) 4409 return TokError("invalid major version"); 4410 4411 if (!trySkipToken(AsmToken::Comma)) 4412 return TokError("minor version number required, comma expected"); 4413 4414 if (ParseAsAbsoluteExpression(Minor)) 4415 return TokError("invalid minor version"); 4416 4417 return false; 4418} 4419 4420bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4421 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4422 return TokError("directive only supported for amdgcn architecture"); 4423 4424 std::string TargetIDDirective; 4425 SMLoc TargetStart = getTok().getLoc(); 4426 if (getParser().parseEscapedString(TargetIDDirective)) 4427 return true; 4428 4429 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4430 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4431 return getParser().Error(TargetRange.Start, 4432 (Twine(".amdgcn_target directive's target id ") + 4433 Twine(TargetIDDirective) + 4434 Twine(" does not match the specified target id ") + 4435 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4436 4437 return false; 4438} 4439 4440bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4441 return Error(Range.Start, "value out of range", Range); 4442} 4443 4444bool AMDGPUAsmParser::calculateGPRBlocks( 4445 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4446 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4447 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4448 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4449 // TODO(scott.linder): These calculations are duplicated from 4450 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4451 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4452 4453 unsigned NumVGPRs = NextFreeVGPR; 4454 unsigned NumSGPRs = NextFreeSGPR; 4455 4456 if (Version.Major >= 10) 4457 NumSGPRs = 0; 4458 else { 4459 unsigned MaxAddressableNumSGPRs = 4460 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4461 4462 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4463 NumSGPRs > MaxAddressableNumSGPRs) 4464 return OutOfRangeError(SGPRRange); 4465 4466 NumSGPRs += 4467 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4468 4469 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4470 NumSGPRs > MaxAddressableNumSGPRs) 4471 return OutOfRangeError(SGPRRange); 4472 4473 if (Features.test(FeatureSGPRInitBug)) 4474 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4475 } 4476 4477 VGPRBlocks = 4478 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4479 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4480 4481 return false; 4482} 4483 4484bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4485 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4486 return TokError("directive only supported for amdgcn architecture"); 4487 4488 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4489 return TokError("directive only supported for amdhsa OS"); 4490 4491 StringRef KernelName; 4492 if (getParser().parseIdentifier(KernelName)) 4493 return true; 4494 4495 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4496 4497 StringSet<> Seen; 4498 4499 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4500 4501 SMRange VGPRRange; 4502 uint64_t NextFreeVGPR = 0; 4503 uint64_t AccumOffset = 0; 4504 SMRange SGPRRange; 4505 uint64_t NextFreeSGPR = 0; 4506 unsigned UserSGPRCount = 0; 4507 bool ReserveVCC = true; 4508 bool ReserveFlatScr = true; 4509 Optional<bool> EnableWavefrontSize32; 4510 4511 while (true) { 4512 while (trySkipToken(AsmToken::EndOfStatement)); 4513 4514 StringRef ID; 4515 SMRange IDRange = getTok().getLocRange(); 4516 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4517 return true; 4518 4519 if (ID == ".end_amdhsa_kernel") 4520 break; 4521 4522 if (Seen.find(ID) != Seen.end()) 4523 return TokError(".amdhsa_ directives cannot be repeated"); 4524 Seen.insert(ID); 4525 4526 SMLoc ValStart = getLoc(); 4527 int64_t IVal; 4528 if (getParser().parseAbsoluteExpression(IVal)) 4529 return true; 4530 SMLoc ValEnd = getLoc(); 4531 SMRange ValRange = SMRange(ValStart, ValEnd); 4532 4533 if (IVal < 0) 4534 return OutOfRangeError(ValRange); 4535 4536 uint64_t Val = IVal; 4537 4538#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4539 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4540 return OutOfRangeError(RANGE); \ 4541 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4542 4543 if (ID == ".amdhsa_group_segment_fixed_size") { 4544 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4545 return OutOfRangeError(ValRange); 4546 KD.group_segment_fixed_size = Val; 4547 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4548 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4549 return OutOfRangeError(ValRange); 4550 KD.private_segment_fixed_size = Val; 4551 } else if (ID == ".amdhsa_kernarg_size") { 4552 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4553 return OutOfRangeError(ValRange); 4554 KD.kernarg_size = Val; 4555 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4556 if (hasArchitectedFlatScratch()) 4557 return Error(IDRange.Start, 4558 "directive is not supported with architected flat scratch", 4559 IDRange); 4560 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4561 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4562 Val, ValRange); 4563 if (Val) 4564 UserSGPRCount += 4; 4565 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4566 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4567 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4568 ValRange); 4569 if (Val) 4570 UserSGPRCount += 2; 4571 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4572 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4573 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4574 ValRange); 4575 if (Val) 4576 UserSGPRCount += 2; 4577 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4578 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4579 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4580 Val, ValRange); 4581 if (Val) 4582 UserSGPRCount += 2; 4583 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4584 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4585 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4586 ValRange); 4587 if (Val) 4588 UserSGPRCount += 2; 4589 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4590 if (hasArchitectedFlatScratch()) 4591 return Error(IDRange.Start, 4592 "directive is not supported with architected flat scratch", 4593 IDRange); 4594 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4595 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4596 ValRange); 4597 if (Val) 4598 UserSGPRCount += 2; 4599 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4600 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4601 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4602 Val, ValRange); 4603 if (Val) 4604 UserSGPRCount += 1; 4605 } else if (ID == ".amdhsa_wavefront_size32") { 4606 if (IVersion.Major < 10) 4607 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4608 EnableWavefrontSize32 = Val; 4609 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4610 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4611 Val, ValRange); 4612 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4613 if (hasArchitectedFlatScratch()) 4614 return Error(IDRange.Start, 4615 "directive is not supported with architected flat scratch", 4616 IDRange); 4617 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4618 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4619 } else if (ID == ".amdhsa_enable_private_segment") { 4620 if (!hasArchitectedFlatScratch()) 4621 return Error( 4622 IDRange.Start, 4623 "directive is not supported without architected flat scratch", 4624 IDRange); 4625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4626 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4627 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4628 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4629 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4630 ValRange); 4631 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4632 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4633 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4634 ValRange); 4635 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4636 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4637 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4638 ValRange); 4639 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4640 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4641 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4642 ValRange); 4643 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4644 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4645 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4646 ValRange); 4647 } else if (ID == ".amdhsa_next_free_vgpr") { 4648 VGPRRange = ValRange; 4649 NextFreeVGPR = Val; 4650 } else if (ID == ".amdhsa_next_free_sgpr") { 4651 SGPRRange = ValRange; 4652 NextFreeSGPR = Val; 4653 } else if (ID == ".amdhsa_accum_offset") { 4654 if (!isGFX90A()) 4655 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4656 AccumOffset = Val; 4657 } else if (ID == ".amdhsa_reserve_vcc") { 4658 if (!isUInt<1>(Val)) 4659 return OutOfRangeError(ValRange); 4660 ReserveVCC = Val; 4661 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4662 if (IVersion.Major < 7) 4663 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4664 if (hasArchitectedFlatScratch()) 4665 return Error(IDRange.Start, 4666 "directive is not supported with architected flat scratch", 4667 IDRange); 4668 if (!isUInt<1>(Val)) 4669 return OutOfRangeError(ValRange); 4670 ReserveFlatScr = Val; 4671 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4672 if (IVersion.Major < 8) 4673 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4674 if (!isUInt<1>(Val)) 4675 return OutOfRangeError(ValRange); 4676 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4677 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4678 IDRange); 4679 } else if (ID == ".amdhsa_float_round_mode_32") { 4680 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4681 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4682 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4683 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4684 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4685 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4687 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4688 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4689 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4690 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4691 ValRange); 4692 } else if (ID == ".amdhsa_dx10_clamp") { 4693 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4694 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4695 } else if (ID == ".amdhsa_ieee_mode") { 4696 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4697 Val, ValRange); 4698 } else if (ID == ".amdhsa_fp16_overflow") { 4699 if (IVersion.Major < 9) 4700 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4702 ValRange); 4703 } else if (ID == ".amdhsa_tg_split") { 4704 if (!isGFX90A()) 4705 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4706 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4707 ValRange); 4708 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4709 if (IVersion.Major < 10) 4710 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4711 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4712 ValRange); 4713 } else if (ID == ".amdhsa_memory_ordered") { 4714 if (IVersion.Major < 10) 4715 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4716 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4717 ValRange); 4718 } else if (ID == ".amdhsa_forward_progress") { 4719 if (IVersion.Major < 10) 4720 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4722 ValRange); 4723 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4724 PARSE_BITS_ENTRY( 4725 KD.compute_pgm_rsrc2, 4726 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4727 ValRange); 4728 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4729 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4730 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4731 Val, ValRange); 4732 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4733 PARSE_BITS_ENTRY( 4734 KD.compute_pgm_rsrc2, 4735 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4736 ValRange); 4737 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4738 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4739 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4740 Val, ValRange); 4741 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4742 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4743 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4744 Val, ValRange); 4745 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4746 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4747 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4748 Val, ValRange); 4749 } else if (ID == ".amdhsa_exception_int_div_zero") { 4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4751 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4752 Val, ValRange); 4753 } else { 4754 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4755 } 4756 4757#undef PARSE_BITS_ENTRY 4758 } 4759 4760 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4761 return TokError(".amdhsa_next_free_vgpr directive is required"); 4762 4763 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4764 return TokError(".amdhsa_next_free_sgpr directive is required"); 4765 4766 unsigned VGPRBlocks; 4767 unsigned SGPRBlocks; 4768 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4769 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4770 EnableWavefrontSize32, NextFreeVGPR, 4771 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4772 SGPRBlocks)) 4773 return true; 4774 4775 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4776 VGPRBlocks)) 4777 return OutOfRangeError(VGPRRange); 4778 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4779 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4780 4781 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4782 SGPRBlocks)) 4783 return OutOfRangeError(SGPRRange); 4784 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4785 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4786 SGPRBlocks); 4787 4788 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4789 return TokError("too many user SGPRs enabled"); 4790 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4791 UserSGPRCount); 4792 4793 if (isGFX90A()) { 4794 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4795 return TokError(".amdhsa_accum_offset directive is required"); 4796 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4797 return TokError("accum_offset should be in range [4..256] in " 4798 "increments of 4"); 4799 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4800 return TokError("accum_offset exceeds total VGPR allocation"); 4801 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4802 (AccumOffset / 4 - 1)); 4803 } 4804 4805 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4806 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4807 ReserveFlatScr); 4808 return false; 4809} 4810 4811bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4812 uint32_t Major; 4813 uint32_t Minor; 4814 4815 if (ParseDirectiveMajorMinor(Major, Minor)) 4816 return true; 4817 4818 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4819 return false; 4820} 4821 4822bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4823 uint32_t Major; 4824 uint32_t Minor; 4825 uint32_t Stepping; 4826 StringRef VendorName; 4827 StringRef ArchName; 4828 4829 // If this directive has no arguments, then use the ISA version for the 4830 // targeted GPU. 4831 if (isToken(AsmToken::EndOfStatement)) { 4832 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4833 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4834 ISA.Stepping, 4835 "AMD", "AMDGPU"); 4836 return false; 4837 } 4838 4839 if (ParseDirectiveMajorMinor(Major, Minor)) 4840 return true; 4841 4842 if (!trySkipToken(AsmToken::Comma)) 4843 return TokError("stepping version number required, comma expected"); 4844 4845 if (ParseAsAbsoluteExpression(Stepping)) 4846 return TokError("invalid stepping version"); 4847 4848 if (!trySkipToken(AsmToken::Comma)) 4849 return TokError("vendor name required, comma expected"); 4850 4851 if (!parseString(VendorName, "invalid vendor name")) 4852 return true; 4853 4854 if (!trySkipToken(AsmToken::Comma)) 4855 return TokError("arch name required, comma expected"); 4856 4857 if (!parseString(ArchName, "invalid arch name")) 4858 return true; 4859 4860 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4861 VendorName, ArchName); 4862 return false; 4863} 4864 4865bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4866 amd_kernel_code_t &Header) { 4867 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4868 // assembly for backwards compatibility. 4869 if (ID == "max_scratch_backing_memory_byte_size") { 4870 Parser.eatToEndOfStatement(); 4871 return false; 4872 } 4873 4874 SmallString<40> ErrStr; 4875 raw_svector_ostream Err(ErrStr); 4876 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4877 return TokError(Err.str()); 4878 } 4879 Lex(); 4880 4881 if (ID == "enable_wavefront_size32") { 4882 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4883 if (!isGFX10Plus()) 4884 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4885 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4886 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4887 } else { 4888 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4889 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4890 } 4891 } 4892 4893 if (ID == "wavefront_size") { 4894 if (Header.wavefront_size == 5) { 4895 if (!isGFX10Plus()) 4896 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4897 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4898 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4899 } else if (Header.wavefront_size == 6) { 4900 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4901 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4902 } 4903 } 4904 4905 if (ID == "enable_wgp_mode") { 4906 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4907 !isGFX10Plus()) 4908 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4909 } 4910 4911 if (ID == "enable_mem_ordered") { 4912 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4913 !isGFX10Plus()) 4914 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4915 } 4916 4917 if (ID == "enable_fwd_progress") { 4918 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4919 !isGFX10Plus()) 4920 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4921 } 4922 4923 return false; 4924} 4925 4926bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4927 amd_kernel_code_t Header; 4928 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4929 4930 while (true) { 4931 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4932 // will set the current token to EndOfStatement. 4933 while(trySkipToken(AsmToken::EndOfStatement)); 4934 4935 StringRef ID; 4936 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4937 return true; 4938 4939 if (ID == ".end_amd_kernel_code_t") 4940 break; 4941 4942 if (ParseAMDKernelCodeTValue(ID, Header)) 4943 return true; 4944 } 4945 4946 getTargetStreamer().EmitAMDKernelCodeT(Header); 4947 4948 return false; 4949} 4950 4951bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4952 StringRef KernelName; 4953 if (!parseId(KernelName, "expected symbol name")) 4954 return true; 4955 4956 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4957 ELF::STT_AMDGPU_HSA_KERNEL); 4958 4959 KernelScope.initialize(getContext()); 4960 return false; 4961} 4962 4963bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4964 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4965 return Error(getLoc(), 4966 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4967 "architectures"); 4968 } 4969 4970 auto TargetIDDirective = getLexer().getTok().getStringContents(); 4971 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4972 return Error(getParser().getTok().getLoc(), "target id must match options"); 4973 4974 getTargetStreamer().EmitISAVersion(); 4975 Lex(); 4976 4977 return false; 4978} 4979 4980bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4981 const char *AssemblerDirectiveBegin; 4982 const char *AssemblerDirectiveEnd; 4983 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4984 isHsaAbiVersion3Or4(&getSTI()) 4985 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4986 HSAMD::V3::AssemblerDirectiveEnd) 4987 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4988 HSAMD::AssemblerDirectiveEnd); 4989 4990 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4991 return Error(getLoc(), 4992 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4993 "not available on non-amdhsa OSes")).str()); 4994 } 4995 4996 std::string HSAMetadataString; 4997 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4998 HSAMetadataString)) 4999 return true; 5000 5001 if (isHsaAbiVersion3Or4(&getSTI())) { 5002 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5003 return Error(getLoc(), "invalid HSA metadata"); 5004 } else { 5005 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5006 return Error(getLoc(), "invalid HSA metadata"); 5007 } 5008 5009 return false; 5010} 5011 5012/// Common code to parse out a block of text (typically YAML) between start and 5013/// end directives. 5014bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5015 const char *AssemblerDirectiveEnd, 5016 std::string &CollectString) { 5017 5018 raw_string_ostream CollectStream(CollectString); 5019 5020 getLexer().setSkipSpace(false); 5021 5022 bool FoundEnd = false; 5023 while (!isToken(AsmToken::Eof)) { 5024 while (isToken(AsmToken::Space)) { 5025 CollectStream << getTokenStr(); 5026 Lex(); 5027 } 5028 5029 if (trySkipId(AssemblerDirectiveEnd)) { 5030 FoundEnd = true; 5031 break; 5032 } 5033 5034 CollectStream << Parser.parseStringToEndOfStatement() 5035 << getContext().getAsmInfo()->getSeparatorString(); 5036 5037 Parser.eatToEndOfStatement(); 5038 } 5039 5040 getLexer().setSkipSpace(true); 5041 5042 if (isToken(AsmToken::Eof) && !FoundEnd) { 5043 return TokError(Twine("expected directive ") + 5044 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5045 } 5046 5047 CollectStream.flush(); 5048 return false; 5049} 5050 5051/// Parse the assembler directive for new MsgPack-format PAL metadata. 5052bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5053 std::string String; 5054 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5055 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5056 return true; 5057 5058 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5059 if (!PALMetadata->setFromString(String)) 5060 return Error(getLoc(), "invalid PAL metadata"); 5061 return false; 5062} 5063 5064/// Parse the assembler directive for old linear-format PAL metadata. 5065bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5066 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5067 return Error(getLoc(), 5068 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5069 "not available on non-amdpal OSes")).str()); 5070 } 5071 5072 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5073 PALMetadata->setLegacy(); 5074 for (;;) { 5075 uint32_t Key, Value; 5076 if (ParseAsAbsoluteExpression(Key)) { 5077 return TokError(Twine("invalid value in ") + 5078 Twine(PALMD::AssemblerDirective)); 5079 } 5080 if (!trySkipToken(AsmToken::Comma)) { 5081 return TokError(Twine("expected an even number of values in ") + 5082 Twine(PALMD::AssemblerDirective)); 5083 } 5084 if (ParseAsAbsoluteExpression(Value)) { 5085 return TokError(Twine("invalid value in ") + 5086 Twine(PALMD::AssemblerDirective)); 5087 } 5088 PALMetadata->setRegister(Key, Value); 5089 if (!trySkipToken(AsmToken::Comma)) 5090 break; 5091 } 5092 return false; 5093} 5094 5095/// ParseDirectiveAMDGPULDS 5096/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5097bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5098 if (getParser().checkForValidSection()) 5099 return true; 5100 5101 StringRef Name; 5102 SMLoc NameLoc = getLoc(); 5103 if (getParser().parseIdentifier(Name)) 5104 return TokError("expected identifier in directive"); 5105 5106 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5107 if (parseToken(AsmToken::Comma, "expected ','")) 5108 return true; 5109 5110 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5111 5112 int64_t Size; 5113 SMLoc SizeLoc = getLoc(); 5114 if (getParser().parseAbsoluteExpression(Size)) 5115 return true; 5116 if (Size < 0) 5117 return Error(SizeLoc, "size must be non-negative"); 5118 if (Size > LocalMemorySize) 5119 return Error(SizeLoc, "size is too large"); 5120 5121 int64_t Alignment = 4; 5122 if (trySkipToken(AsmToken::Comma)) { 5123 SMLoc AlignLoc = getLoc(); 5124 if (getParser().parseAbsoluteExpression(Alignment)) 5125 return true; 5126 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5127 return Error(AlignLoc, "alignment must be a power of two"); 5128 5129 // Alignment larger than the size of LDS is possible in theory, as long 5130 // as the linker manages to place to symbol at address 0, but we do want 5131 // to make sure the alignment fits nicely into a 32-bit integer. 5132 if (Alignment >= 1u << 31) 5133 return Error(AlignLoc, "alignment is too large"); 5134 } 5135 5136 if (parseToken(AsmToken::EndOfStatement, 5137 "unexpected token in '.amdgpu_lds' directive")) 5138 return true; 5139 5140 Symbol->redefineIfPossible(); 5141 if (!Symbol->isUndefined()) 5142 return Error(NameLoc, "invalid symbol redefinition"); 5143 5144 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5145 return false; 5146} 5147 5148bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5149 StringRef IDVal = DirectiveID.getString(); 5150 5151 if (isHsaAbiVersion3Or4(&getSTI())) { 5152 if (IDVal == ".amdhsa_kernel") 5153 return ParseDirectiveAMDHSAKernel(); 5154 5155 // TODO: Restructure/combine with PAL metadata directive. 5156 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5157 return ParseDirectiveHSAMetadata(); 5158 } else { 5159 if (IDVal == ".hsa_code_object_version") 5160 return ParseDirectiveHSACodeObjectVersion(); 5161 5162 if (IDVal == ".hsa_code_object_isa") 5163 return ParseDirectiveHSACodeObjectISA(); 5164 5165 if (IDVal == ".amd_kernel_code_t") 5166 return ParseDirectiveAMDKernelCodeT(); 5167 5168 if (IDVal == ".amdgpu_hsa_kernel") 5169 return ParseDirectiveAMDGPUHsaKernel(); 5170 5171 if (IDVal == ".amd_amdgpu_isa") 5172 return ParseDirectiveISAVersion(); 5173 5174 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5175 return ParseDirectiveHSAMetadata(); 5176 } 5177 5178 if (IDVal == ".amdgcn_target") 5179 return ParseDirectiveAMDGCNTarget(); 5180 5181 if (IDVal == ".amdgpu_lds") 5182 return ParseDirectiveAMDGPULDS(); 5183 5184 if (IDVal == PALMD::AssemblerDirectiveBegin) 5185 return ParseDirectivePALMetadataBegin(); 5186 5187 if (IDVal == PALMD::AssemblerDirective) 5188 return ParseDirectivePALMetadata(); 5189 5190 return true; 5191} 5192 5193bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5194 unsigned RegNo) { 5195 5196 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5197 R.isValid(); ++R) { 5198 if (*R == RegNo) 5199 return isGFX9Plus(); 5200 } 5201 5202 // GFX10 has 2 more SGPRs 104 and 105. 5203 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5204 R.isValid(); ++R) { 5205 if (*R == RegNo) 5206 return hasSGPR104_SGPR105(); 5207 } 5208 5209 switch (RegNo) { 5210 case AMDGPU::SRC_SHARED_BASE: 5211 case AMDGPU::SRC_SHARED_LIMIT: 5212 case AMDGPU::SRC_PRIVATE_BASE: 5213 case AMDGPU::SRC_PRIVATE_LIMIT: 5214 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5215 return isGFX9Plus(); 5216 case AMDGPU::TBA: 5217 case AMDGPU::TBA_LO: 5218 case AMDGPU::TBA_HI: 5219 case AMDGPU::TMA: 5220 case AMDGPU::TMA_LO: 5221 case AMDGPU::TMA_HI: 5222 return !isGFX9Plus(); 5223 case AMDGPU::XNACK_MASK: 5224 case AMDGPU::XNACK_MASK_LO: 5225 case AMDGPU::XNACK_MASK_HI: 5226 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5227 case AMDGPU::SGPR_NULL: 5228 return isGFX10Plus(); 5229 default: 5230 break; 5231 } 5232 5233 if (isCI()) 5234 return true; 5235 5236 if (isSI() || isGFX10Plus()) { 5237 // No flat_scr on SI. 5238 // On GFX10 flat scratch is not a valid register operand and can only be 5239 // accessed with s_setreg/s_getreg. 5240 switch (RegNo) { 5241 case AMDGPU::FLAT_SCR: 5242 case AMDGPU::FLAT_SCR_LO: 5243 case AMDGPU::FLAT_SCR_HI: 5244 return false; 5245 default: 5246 return true; 5247 } 5248 } 5249 5250 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5251 // SI/CI have. 5252 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5253 R.isValid(); ++R) { 5254 if (*R == RegNo) 5255 return hasSGPR102_SGPR103(); 5256 } 5257 5258 return true; 5259} 5260 5261OperandMatchResultTy 5262AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5263 OperandMode Mode) { 5264 // Try to parse with a custom parser 5265 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5266 5267 // If we successfully parsed the operand or if there as an error parsing, 5268 // we are done. 5269 // 5270 // If we are parsing after we reach EndOfStatement then this means we 5271 // are appending default values to the Operands list. This is only done 5272 // by custom parser, so we shouldn't continue on to the generic parsing. 5273 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5274 isToken(AsmToken::EndOfStatement)) 5275 return ResTy; 5276 5277 SMLoc RBraceLoc; 5278 SMLoc LBraceLoc = getLoc(); 5279 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5280 unsigned Prefix = Operands.size(); 5281 5282 for (;;) { 5283 auto Loc = getLoc(); 5284 ResTy = parseReg(Operands); 5285 if (ResTy == MatchOperand_NoMatch) 5286 Error(Loc, "expected a register"); 5287 if (ResTy != MatchOperand_Success) 5288 return MatchOperand_ParseFail; 5289 5290 RBraceLoc = getLoc(); 5291 if (trySkipToken(AsmToken::RBrac)) 5292 break; 5293 5294 if (!skipToken(AsmToken::Comma, 5295 "expected a comma or a closing square bracket")) { 5296 return MatchOperand_ParseFail; 5297 } 5298 } 5299 5300 if (Operands.size() - Prefix > 1) { 5301 Operands.insert(Operands.begin() + Prefix, 5302 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5303 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5304 } 5305 5306 return MatchOperand_Success; 5307 } 5308 5309 return parseRegOrImm(Operands); 5310} 5311 5312StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5313 // Clear any forced encodings from the previous instruction. 5314 setForcedEncodingSize(0); 5315 setForcedDPP(false); 5316 setForcedSDWA(false); 5317 5318 if (Name.endswith("_e64")) { 5319 setForcedEncodingSize(64); 5320 return Name.substr(0, Name.size() - 4); 5321 } else if (Name.endswith("_e32")) { 5322 setForcedEncodingSize(32); 5323 return Name.substr(0, Name.size() - 4); 5324 } else if (Name.endswith("_dpp")) { 5325 setForcedDPP(true); 5326 return Name.substr(0, Name.size() - 4); 5327 } else if (Name.endswith("_sdwa")) { 5328 setForcedSDWA(true); 5329 return Name.substr(0, Name.size() - 5); 5330 } 5331 return Name; 5332} 5333 5334bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5335 StringRef Name, 5336 SMLoc NameLoc, OperandVector &Operands) { 5337 // Add the instruction mnemonic 5338 Name = parseMnemonicSuffix(Name); 5339 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5340 5341 bool IsMIMG = Name.startswith("image_"); 5342 5343 while (!trySkipToken(AsmToken::EndOfStatement)) { 5344 OperandMode Mode = OperandMode_Default; 5345 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5346 Mode = OperandMode_NSA; 5347 CPolSeen = 0; 5348 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5349 5350 if (Res != MatchOperand_Success) { 5351 checkUnsupportedInstruction(Name, NameLoc); 5352 if (!Parser.hasPendingError()) { 5353 // FIXME: use real operand location rather than the current location. 5354 StringRef Msg = 5355 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5356 "not a valid operand."; 5357 Error(getLoc(), Msg); 5358 } 5359 while (!trySkipToken(AsmToken::EndOfStatement)) { 5360 lex(); 5361 } 5362 return true; 5363 } 5364 5365 // Eat the comma or space if there is one. 5366 trySkipToken(AsmToken::Comma); 5367 } 5368 5369 return false; 5370} 5371 5372//===----------------------------------------------------------------------===// 5373// Utility functions 5374//===----------------------------------------------------------------------===// 5375 5376OperandMatchResultTy 5377AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5378 5379 if (!trySkipId(Prefix, AsmToken::Colon)) 5380 return MatchOperand_NoMatch; 5381 5382 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5383} 5384 5385OperandMatchResultTy 5386AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5387 AMDGPUOperand::ImmTy ImmTy, 5388 bool (*ConvertResult)(int64_t&)) { 5389 SMLoc S = getLoc(); 5390 int64_t Value = 0; 5391 5392 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5393 if (Res != MatchOperand_Success) 5394 return Res; 5395 5396 if (ConvertResult && !ConvertResult(Value)) { 5397 Error(S, "invalid " + StringRef(Prefix) + " value."); 5398 } 5399 5400 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5401 return MatchOperand_Success; 5402} 5403 5404OperandMatchResultTy 5405AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5406 OperandVector &Operands, 5407 AMDGPUOperand::ImmTy ImmTy, 5408 bool (*ConvertResult)(int64_t&)) { 5409 SMLoc S = getLoc(); 5410 if (!trySkipId(Prefix, AsmToken::Colon)) 5411 return MatchOperand_NoMatch; 5412 5413 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5414 return MatchOperand_ParseFail; 5415 5416 unsigned Val = 0; 5417 const unsigned MaxSize = 4; 5418 5419 // FIXME: How to verify the number of elements matches the number of src 5420 // operands? 5421 for (int I = 0; ; ++I) { 5422 int64_t Op; 5423 SMLoc Loc = getLoc(); 5424 if (!parseExpr(Op)) 5425 return MatchOperand_ParseFail; 5426 5427 if (Op != 0 && Op != 1) { 5428 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5429 return MatchOperand_ParseFail; 5430 } 5431 5432 Val |= (Op << I); 5433 5434 if (trySkipToken(AsmToken::RBrac)) 5435 break; 5436 5437 if (I + 1 == MaxSize) { 5438 Error(getLoc(), "expected a closing square bracket"); 5439 return MatchOperand_ParseFail; 5440 } 5441 5442 if (!skipToken(AsmToken::Comma, "expected a comma")) 5443 return MatchOperand_ParseFail; 5444 } 5445 5446 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5447 return MatchOperand_Success; 5448} 5449 5450OperandMatchResultTy 5451AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5452 AMDGPUOperand::ImmTy ImmTy) { 5453 int64_t Bit; 5454 SMLoc S = getLoc(); 5455 5456 if (trySkipId(Name)) { 5457 Bit = 1; 5458 } else if (trySkipId("no", Name)) { 5459 Bit = 0; 5460 } else { 5461 return MatchOperand_NoMatch; 5462 } 5463 5464 if (Name == "r128" && !hasMIMG_R128()) { 5465 Error(S, "r128 modifier is not supported on this GPU"); 5466 return MatchOperand_ParseFail; 5467 } 5468 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5469 Error(S, "a16 modifier is not supported on this GPU"); 5470 return MatchOperand_ParseFail; 5471 } 5472 5473 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5474 ImmTy = AMDGPUOperand::ImmTyR128A16; 5475 5476 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5477 return MatchOperand_Success; 5478} 5479 5480OperandMatchResultTy 5481AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5482 unsigned CPolOn = 0; 5483 unsigned CPolOff = 0; 5484 SMLoc S = getLoc(); 5485 5486 if (trySkipId("glc")) 5487 CPolOn = AMDGPU::CPol::GLC; 5488 else if (trySkipId("noglc")) 5489 CPolOff = AMDGPU::CPol::GLC; 5490 else if (trySkipId("slc")) 5491 CPolOn = AMDGPU::CPol::SLC; 5492 else if (trySkipId("noslc")) 5493 CPolOff = AMDGPU::CPol::SLC; 5494 else if (trySkipId("dlc")) 5495 CPolOn = AMDGPU::CPol::DLC; 5496 else if (trySkipId("nodlc")) 5497 CPolOff = AMDGPU::CPol::DLC; 5498 else if (trySkipId("scc")) 5499 CPolOn = AMDGPU::CPol::SCC; 5500 else if (trySkipId("noscc")) 5501 CPolOff = AMDGPU::CPol::SCC; 5502 else 5503 return MatchOperand_NoMatch; 5504 5505 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5506 Error(S, "dlc modifier is not supported on this GPU"); 5507 return MatchOperand_ParseFail; 5508 } 5509 5510 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5511 Error(S, "scc modifier is not supported on this GPU"); 5512 return MatchOperand_ParseFail; 5513 } 5514 5515 if (CPolSeen & (CPolOn | CPolOff)) { 5516 Error(S, "duplicate cache policy modifier"); 5517 return MatchOperand_ParseFail; 5518 } 5519 5520 CPolSeen |= (CPolOn | CPolOff); 5521 5522 for (unsigned I = 1; I != Operands.size(); ++I) { 5523 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5524 if (Op.isCPol()) { 5525 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5526 return MatchOperand_Success; 5527 } 5528 } 5529 5530 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5531 AMDGPUOperand::ImmTyCPol)); 5532 5533 return MatchOperand_Success; 5534} 5535 5536static void addOptionalImmOperand( 5537 MCInst& Inst, const OperandVector& Operands, 5538 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5539 AMDGPUOperand::ImmTy ImmT, 5540 int64_t Default = 0) { 5541 auto i = OptionalIdx.find(ImmT); 5542 if (i != OptionalIdx.end()) { 5543 unsigned Idx = i->second; 5544 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5545 } else { 5546 Inst.addOperand(MCOperand::createImm(Default)); 5547 } 5548} 5549 5550OperandMatchResultTy 5551AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5552 StringRef &Value, 5553 SMLoc &StringLoc) { 5554 if (!trySkipId(Prefix, AsmToken::Colon)) 5555 return MatchOperand_NoMatch; 5556 5557 StringLoc = getLoc(); 5558 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5559 : MatchOperand_ParseFail; 5560} 5561 5562//===----------------------------------------------------------------------===// 5563// MTBUF format 5564//===----------------------------------------------------------------------===// 5565 5566bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5567 int64_t MaxVal, 5568 int64_t &Fmt) { 5569 int64_t Val; 5570 SMLoc Loc = getLoc(); 5571 5572 auto Res = parseIntWithPrefix(Pref, Val); 5573 if (Res == MatchOperand_ParseFail) 5574 return false; 5575 if (Res == MatchOperand_NoMatch) 5576 return true; 5577 5578 if (Val < 0 || Val > MaxVal) { 5579 Error(Loc, Twine("out of range ", StringRef(Pref))); 5580 return false; 5581 } 5582 5583 Fmt = Val; 5584 return true; 5585} 5586 5587// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5588// values to live in a joint format operand in the MCInst encoding. 5589OperandMatchResultTy 5590AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5591 using namespace llvm::AMDGPU::MTBUFFormat; 5592 5593 int64_t Dfmt = DFMT_UNDEF; 5594 int64_t Nfmt = NFMT_UNDEF; 5595 5596 // dfmt and nfmt can appear in either order, and each is optional. 5597 for (int I = 0; I < 2; ++I) { 5598 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5599 return MatchOperand_ParseFail; 5600 5601 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5602 return MatchOperand_ParseFail; 5603 } 5604 // Skip optional comma between dfmt/nfmt 5605 // but guard against 2 commas following each other. 5606 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5607 !peekToken().is(AsmToken::Comma)) { 5608 trySkipToken(AsmToken::Comma); 5609 } 5610 } 5611 5612 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5613 return MatchOperand_NoMatch; 5614 5615 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5616 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5617 5618 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5619 return MatchOperand_Success; 5620} 5621 5622OperandMatchResultTy 5623AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5624 using namespace llvm::AMDGPU::MTBUFFormat; 5625 5626 int64_t Fmt = UFMT_UNDEF; 5627 5628 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5629 return MatchOperand_ParseFail; 5630 5631 if (Fmt == UFMT_UNDEF) 5632 return MatchOperand_NoMatch; 5633 5634 Format = Fmt; 5635 return MatchOperand_Success; 5636} 5637 5638bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5639 int64_t &Nfmt, 5640 StringRef FormatStr, 5641 SMLoc Loc) { 5642 using namespace llvm::AMDGPU::MTBUFFormat; 5643 int64_t Format; 5644 5645 Format = getDfmt(FormatStr); 5646 if (Format != DFMT_UNDEF) { 5647 Dfmt = Format; 5648 return true; 5649 } 5650 5651 Format = getNfmt(FormatStr, getSTI()); 5652 if (Format != NFMT_UNDEF) { 5653 Nfmt = Format; 5654 return true; 5655 } 5656 5657 Error(Loc, "unsupported format"); 5658 return false; 5659} 5660 5661OperandMatchResultTy 5662AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5663 SMLoc FormatLoc, 5664 int64_t &Format) { 5665 using namespace llvm::AMDGPU::MTBUFFormat; 5666 5667 int64_t Dfmt = DFMT_UNDEF; 5668 int64_t Nfmt = NFMT_UNDEF; 5669 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5670 return MatchOperand_ParseFail; 5671 5672 if (trySkipToken(AsmToken::Comma)) { 5673 StringRef Str; 5674 SMLoc Loc = getLoc(); 5675 if (!parseId(Str, "expected a format string") || 5676 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5677 return MatchOperand_ParseFail; 5678 } 5679 if (Dfmt == DFMT_UNDEF) { 5680 Error(Loc, "duplicate numeric format"); 5681 return MatchOperand_ParseFail; 5682 } else if (Nfmt == NFMT_UNDEF) { 5683 Error(Loc, "duplicate data format"); 5684 return MatchOperand_ParseFail; 5685 } 5686 } 5687 5688 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5689 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5690 5691 if (isGFX10Plus()) { 5692 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5693 if (Ufmt == UFMT_UNDEF) { 5694 Error(FormatLoc, "unsupported format"); 5695 return MatchOperand_ParseFail; 5696 } 5697 Format = Ufmt; 5698 } else { 5699 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5700 } 5701 5702 return MatchOperand_Success; 5703} 5704 5705OperandMatchResultTy 5706AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5707 SMLoc Loc, 5708 int64_t &Format) { 5709 using namespace llvm::AMDGPU::MTBUFFormat; 5710 5711 auto Id = getUnifiedFormat(FormatStr); 5712 if (Id == UFMT_UNDEF) 5713 return MatchOperand_NoMatch; 5714 5715 if (!isGFX10Plus()) { 5716 Error(Loc, "unified format is not supported on this GPU"); 5717 return MatchOperand_ParseFail; 5718 } 5719 5720 Format = Id; 5721 return MatchOperand_Success; 5722} 5723 5724OperandMatchResultTy 5725AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5726 using namespace llvm::AMDGPU::MTBUFFormat; 5727 SMLoc Loc = getLoc(); 5728 5729 if (!parseExpr(Format)) 5730 return MatchOperand_ParseFail; 5731 if (!isValidFormatEncoding(Format, getSTI())) { 5732 Error(Loc, "out of range format"); 5733 return MatchOperand_ParseFail; 5734 } 5735 5736 return MatchOperand_Success; 5737} 5738 5739OperandMatchResultTy 5740AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5741 using namespace llvm::AMDGPU::MTBUFFormat; 5742 5743 if (!trySkipId("format", AsmToken::Colon)) 5744 return MatchOperand_NoMatch; 5745 5746 if (trySkipToken(AsmToken::LBrac)) { 5747 StringRef FormatStr; 5748 SMLoc Loc = getLoc(); 5749 if (!parseId(FormatStr, "expected a format string")) 5750 return MatchOperand_ParseFail; 5751 5752 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5753 if (Res == MatchOperand_NoMatch) 5754 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5755 if (Res != MatchOperand_Success) 5756 return Res; 5757 5758 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5759 return MatchOperand_ParseFail; 5760 5761 return MatchOperand_Success; 5762 } 5763 5764 return parseNumericFormat(Format); 5765} 5766 5767OperandMatchResultTy 5768AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5769 using namespace llvm::AMDGPU::MTBUFFormat; 5770 5771 int64_t Format = getDefaultFormatEncoding(getSTI()); 5772 OperandMatchResultTy Res; 5773 SMLoc Loc = getLoc(); 5774 5775 // Parse legacy format syntax. 5776 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5777 if (Res == MatchOperand_ParseFail) 5778 return Res; 5779 5780 bool FormatFound = (Res == MatchOperand_Success); 5781 5782 Operands.push_back( 5783 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5784 5785 if (FormatFound) 5786 trySkipToken(AsmToken::Comma); 5787 5788 if (isToken(AsmToken::EndOfStatement)) { 5789 // We are expecting an soffset operand, 5790 // but let matcher handle the error. 5791 return MatchOperand_Success; 5792 } 5793 5794 // Parse soffset. 5795 Res = parseRegOrImm(Operands); 5796 if (Res != MatchOperand_Success) 5797 return Res; 5798 5799 trySkipToken(AsmToken::Comma); 5800 5801 if (!FormatFound) { 5802 Res = parseSymbolicOrNumericFormat(Format); 5803 if (Res == MatchOperand_ParseFail) 5804 return Res; 5805 if (Res == MatchOperand_Success) { 5806 auto Size = Operands.size(); 5807 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5808 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5809 Op.setImm(Format); 5810 } 5811 return MatchOperand_Success; 5812 } 5813 5814 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5815 Error(getLoc(), "duplicate format"); 5816 return MatchOperand_ParseFail; 5817 } 5818 return MatchOperand_Success; 5819} 5820 5821//===----------------------------------------------------------------------===// 5822// ds 5823//===----------------------------------------------------------------------===// 5824 5825void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5826 const OperandVector &Operands) { 5827 OptionalImmIndexMap OptionalIdx; 5828 5829 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5830 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5831 5832 // Add the register arguments 5833 if (Op.isReg()) { 5834 Op.addRegOperands(Inst, 1); 5835 continue; 5836 } 5837 5838 // Handle optional arguments 5839 OptionalIdx[Op.getImmTy()] = i; 5840 } 5841 5842 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5843 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5844 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5845 5846 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5847} 5848 5849void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5850 bool IsGdsHardcoded) { 5851 OptionalImmIndexMap OptionalIdx; 5852 5853 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5854 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5855 5856 // Add the register arguments 5857 if (Op.isReg()) { 5858 Op.addRegOperands(Inst, 1); 5859 continue; 5860 } 5861 5862 if (Op.isToken() && Op.getToken() == "gds") { 5863 IsGdsHardcoded = true; 5864 continue; 5865 } 5866 5867 // Handle optional arguments 5868 OptionalIdx[Op.getImmTy()] = i; 5869 } 5870 5871 AMDGPUOperand::ImmTy OffsetType = 5872 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5873 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5874 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5875 AMDGPUOperand::ImmTyOffset; 5876 5877 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5878 5879 if (!IsGdsHardcoded) { 5880 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5881 } 5882 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5883} 5884 5885void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5886 OptionalImmIndexMap OptionalIdx; 5887 5888 unsigned OperandIdx[4]; 5889 unsigned EnMask = 0; 5890 int SrcIdx = 0; 5891 5892 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5893 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5894 5895 // Add the register arguments 5896 if (Op.isReg()) { 5897 assert(SrcIdx < 4); 5898 OperandIdx[SrcIdx] = Inst.size(); 5899 Op.addRegOperands(Inst, 1); 5900 ++SrcIdx; 5901 continue; 5902 } 5903 5904 if (Op.isOff()) { 5905 assert(SrcIdx < 4); 5906 OperandIdx[SrcIdx] = Inst.size(); 5907 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5908 ++SrcIdx; 5909 continue; 5910 } 5911 5912 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5913 Op.addImmOperands(Inst, 1); 5914 continue; 5915 } 5916 5917 if (Op.isToken() && Op.getToken() == "done") 5918 continue; 5919 5920 // Handle optional arguments 5921 OptionalIdx[Op.getImmTy()] = i; 5922 } 5923 5924 assert(SrcIdx == 4); 5925 5926 bool Compr = false; 5927 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5928 Compr = true; 5929 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5930 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5931 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5932 } 5933 5934 for (auto i = 0; i < SrcIdx; ++i) { 5935 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5936 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5937 } 5938 } 5939 5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5942 5943 Inst.addOperand(MCOperand::createImm(EnMask)); 5944} 5945 5946//===----------------------------------------------------------------------===// 5947// s_waitcnt 5948//===----------------------------------------------------------------------===// 5949 5950static bool 5951encodeCnt( 5952 const AMDGPU::IsaVersion ISA, 5953 int64_t &IntVal, 5954 int64_t CntVal, 5955 bool Saturate, 5956 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5957 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5958{ 5959 bool Failed = false; 5960 5961 IntVal = encode(ISA, IntVal, CntVal); 5962 if (CntVal != decode(ISA, IntVal)) { 5963 if (Saturate) { 5964 IntVal = encode(ISA, IntVal, -1); 5965 } else { 5966 Failed = true; 5967 } 5968 } 5969 return Failed; 5970} 5971 5972bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5973 5974 SMLoc CntLoc = getLoc(); 5975 StringRef CntName = getTokenStr(); 5976 5977 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5978 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5979 return false; 5980 5981 int64_t CntVal; 5982 SMLoc ValLoc = getLoc(); 5983 if (!parseExpr(CntVal)) 5984 return false; 5985 5986 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5987 5988 bool Failed = true; 5989 bool Sat = CntName.endswith("_sat"); 5990 5991 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5992 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5993 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5994 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5995 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5996 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5997 } else { 5998 Error(CntLoc, "invalid counter name " + CntName); 5999 return false; 6000 } 6001 6002 if (Failed) { 6003 Error(ValLoc, "too large value for " + CntName); 6004 return false; 6005 } 6006 6007 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6008 return false; 6009 6010 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6011 if (isToken(AsmToken::EndOfStatement)) { 6012 Error(getLoc(), "expected a counter name"); 6013 return false; 6014 } 6015 } 6016 6017 return true; 6018} 6019 6020OperandMatchResultTy 6021AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6022 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6023 int64_t Waitcnt = getWaitcntBitMask(ISA); 6024 SMLoc S = getLoc(); 6025 6026 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6027 while (!isToken(AsmToken::EndOfStatement)) { 6028 if (!parseCnt(Waitcnt)) 6029 return MatchOperand_ParseFail; 6030 } 6031 } else { 6032 if (!parseExpr(Waitcnt)) 6033 return MatchOperand_ParseFail; 6034 } 6035 6036 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6037 return MatchOperand_Success; 6038} 6039 6040bool 6041AMDGPUOperand::isSWaitCnt() const { 6042 return isImm(); 6043} 6044 6045//===----------------------------------------------------------------------===// 6046// hwreg 6047//===----------------------------------------------------------------------===// 6048 6049bool 6050AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6051 OperandInfoTy &Offset, 6052 OperandInfoTy &Width) { 6053 using namespace llvm::AMDGPU::Hwreg; 6054 6055 // The register may be specified by name or using a numeric code 6056 HwReg.Loc = getLoc(); 6057 if (isToken(AsmToken::Identifier) && 6058 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6059 HwReg.IsSymbolic = true; 6060 lex(); // skip register name 6061 } else if (!parseExpr(HwReg.Id, "a register name")) { 6062 return false; 6063 } 6064 6065 if (trySkipToken(AsmToken::RParen)) 6066 return true; 6067 6068 // parse optional params 6069 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6070 return false; 6071 6072 Offset.Loc = getLoc(); 6073 if (!parseExpr(Offset.Id)) 6074 return false; 6075 6076 if (!skipToken(AsmToken::Comma, "expected a comma")) 6077 return false; 6078 6079 Width.Loc = getLoc(); 6080 return parseExpr(Width.Id) && 6081 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6082} 6083 6084bool 6085AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6086 const OperandInfoTy &Offset, 6087 const OperandInfoTy &Width) { 6088 6089 using namespace llvm::AMDGPU::Hwreg; 6090 6091 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6092 Error(HwReg.Loc, 6093 "specified hardware register is not supported on this GPU"); 6094 return false; 6095 } 6096 if (!isValidHwreg(HwReg.Id)) { 6097 Error(HwReg.Loc, 6098 "invalid code of hardware register: only 6-bit values are legal"); 6099 return false; 6100 } 6101 if (!isValidHwregOffset(Offset.Id)) { 6102 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6103 return false; 6104 } 6105 if (!isValidHwregWidth(Width.Id)) { 6106 Error(Width.Loc, 6107 "invalid bitfield width: only values from 1 to 32 are legal"); 6108 return false; 6109 } 6110 return true; 6111} 6112 6113OperandMatchResultTy 6114AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6115 using namespace llvm::AMDGPU::Hwreg; 6116 6117 int64_t ImmVal = 0; 6118 SMLoc Loc = getLoc(); 6119 6120 if (trySkipId("hwreg", AsmToken::LParen)) { 6121 OperandInfoTy HwReg(ID_UNKNOWN_); 6122 OperandInfoTy Offset(OFFSET_DEFAULT_); 6123 OperandInfoTy Width(WIDTH_DEFAULT_); 6124 if (parseHwregBody(HwReg, Offset, Width) && 6125 validateHwreg(HwReg, Offset, Width)) { 6126 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6127 } else { 6128 return MatchOperand_ParseFail; 6129 } 6130 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6131 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6132 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6133 return MatchOperand_ParseFail; 6134 } 6135 } else { 6136 return MatchOperand_ParseFail; 6137 } 6138 6139 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6140 return MatchOperand_Success; 6141} 6142 6143bool AMDGPUOperand::isHwreg() const { 6144 return isImmTy(ImmTyHwreg); 6145} 6146 6147//===----------------------------------------------------------------------===// 6148// sendmsg 6149//===----------------------------------------------------------------------===// 6150 6151bool 6152AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6153 OperandInfoTy &Op, 6154 OperandInfoTy &Stream) { 6155 using namespace llvm::AMDGPU::SendMsg; 6156 6157 Msg.Loc = getLoc(); 6158 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6159 Msg.IsSymbolic = true; 6160 lex(); // skip message name 6161 } else if (!parseExpr(Msg.Id, "a message name")) { 6162 return false; 6163 } 6164 6165 if (trySkipToken(AsmToken::Comma)) { 6166 Op.IsDefined = true; 6167 Op.Loc = getLoc(); 6168 if (isToken(AsmToken::Identifier) && 6169 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6170 lex(); // skip operation name 6171 } else if (!parseExpr(Op.Id, "an operation name")) { 6172 return false; 6173 } 6174 6175 if (trySkipToken(AsmToken::Comma)) { 6176 Stream.IsDefined = true; 6177 Stream.Loc = getLoc(); 6178 if (!parseExpr(Stream.Id)) 6179 return false; 6180 } 6181 } 6182 6183 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6184} 6185 6186bool 6187AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6188 const OperandInfoTy &Op, 6189 const OperandInfoTy &Stream) { 6190 using namespace llvm::AMDGPU::SendMsg; 6191 6192 // Validation strictness depends on whether message is specified 6193 // in a symbolc or in a numeric form. In the latter case 6194 // only encoding possibility is checked. 6195 bool Strict = Msg.IsSymbolic; 6196 6197 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6198 Error(Msg.Loc, "invalid message id"); 6199 return false; 6200 } 6201 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6202 if (Op.IsDefined) { 6203 Error(Op.Loc, "message does not support operations"); 6204 } else { 6205 Error(Msg.Loc, "missing message operation"); 6206 } 6207 return false; 6208 } 6209 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6210 Error(Op.Loc, "invalid operation id"); 6211 return false; 6212 } 6213 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6214 Error(Stream.Loc, "message operation does not support streams"); 6215 return false; 6216 } 6217 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6218 Error(Stream.Loc, "invalid message stream id"); 6219 return false; 6220 } 6221 return true; 6222} 6223 6224OperandMatchResultTy 6225AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6226 using namespace llvm::AMDGPU::SendMsg; 6227 6228 int64_t ImmVal = 0; 6229 SMLoc Loc = getLoc(); 6230 6231 if (trySkipId("sendmsg", AsmToken::LParen)) { 6232 OperandInfoTy Msg(ID_UNKNOWN_); 6233 OperandInfoTy Op(OP_NONE_); 6234 OperandInfoTy Stream(STREAM_ID_NONE_); 6235 if (parseSendMsgBody(Msg, Op, Stream) && 6236 validateSendMsg(Msg, Op, Stream)) { 6237 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6238 } else { 6239 return MatchOperand_ParseFail; 6240 } 6241 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6242 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6243 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6244 return MatchOperand_ParseFail; 6245 } 6246 } else { 6247 return MatchOperand_ParseFail; 6248 } 6249 6250 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6251 return MatchOperand_Success; 6252} 6253 6254bool AMDGPUOperand::isSendMsg() const { 6255 return isImmTy(ImmTySendMsg); 6256} 6257 6258//===----------------------------------------------------------------------===// 6259// v_interp 6260//===----------------------------------------------------------------------===// 6261 6262OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6263 StringRef Str; 6264 SMLoc S = getLoc(); 6265 6266 if (!parseId(Str)) 6267 return MatchOperand_NoMatch; 6268 6269 int Slot = StringSwitch<int>(Str) 6270 .Case("p10", 0) 6271 .Case("p20", 1) 6272 .Case("p0", 2) 6273 .Default(-1); 6274 6275 if (Slot == -1) { 6276 Error(S, "invalid interpolation slot"); 6277 return MatchOperand_ParseFail; 6278 } 6279 6280 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6281 AMDGPUOperand::ImmTyInterpSlot)); 6282 return MatchOperand_Success; 6283} 6284 6285OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6286 StringRef Str; 6287 SMLoc S = getLoc(); 6288 6289 if (!parseId(Str)) 6290 return MatchOperand_NoMatch; 6291 6292 if (!Str.startswith("attr")) { 6293 Error(S, "invalid interpolation attribute"); 6294 return MatchOperand_ParseFail; 6295 } 6296 6297 StringRef Chan = Str.take_back(2); 6298 int AttrChan = StringSwitch<int>(Chan) 6299 .Case(".x", 0) 6300 .Case(".y", 1) 6301 .Case(".z", 2) 6302 .Case(".w", 3) 6303 .Default(-1); 6304 if (AttrChan == -1) { 6305 Error(S, "invalid or missing interpolation attribute channel"); 6306 return MatchOperand_ParseFail; 6307 } 6308 6309 Str = Str.drop_back(2).drop_front(4); 6310 6311 uint8_t Attr; 6312 if (Str.getAsInteger(10, Attr)) { 6313 Error(S, "invalid or missing interpolation attribute number"); 6314 return MatchOperand_ParseFail; 6315 } 6316 6317 if (Attr > 63) { 6318 Error(S, "out of bounds interpolation attribute number"); 6319 return MatchOperand_ParseFail; 6320 } 6321 6322 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6323 6324 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6325 AMDGPUOperand::ImmTyInterpAttr)); 6326 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6327 AMDGPUOperand::ImmTyAttrChan)); 6328 return MatchOperand_Success; 6329} 6330 6331//===----------------------------------------------------------------------===// 6332// exp 6333//===----------------------------------------------------------------------===// 6334 6335OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6336 using namespace llvm::AMDGPU::Exp; 6337 6338 StringRef Str; 6339 SMLoc S = getLoc(); 6340 6341 if (!parseId(Str)) 6342 return MatchOperand_NoMatch; 6343 6344 unsigned Id = getTgtId(Str); 6345 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6346 Error(S, (Id == ET_INVALID) ? 6347 "invalid exp target" : 6348 "exp target is not supported on this GPU"); 6349 return MatchOperand_ParseFail; 6350 } 6351 6352 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6353 AMDGPUOperand::ImmTyExpTgt)); 6354 return MatchOperand_Success; 6355} 6356 6357//===----------------------------------------------------------------------===// 6358// parser helpers 6359//===----------------------------------------------------------------------===// 6360 6361bool 6362AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6363 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6364} 6365 6366bool 6367AMDGPUAsmParser::isId(const StringRef Id) const { 6368 return isId(getToken(), Id); 6369} 6370 6371bool 6372AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6373 return getTokenKind() == Kind; 6374} 6375 6376bool 6377AMDGPUAsmParser::trySkipId(const StringRef Id) { 6378 if (isId(Id)) { 6379 lex(); 6380 return true; 6381 } 6382 return false; 6383} 6384 6385bool 6386AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6387 if (isToken(AsmToken::Identifier)) { 6388 StringRef Tok = getTokenStr(); 6389 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6390 lex(); 6391 return true; 6392 } 6393 } 6394 return false; 6395} 6396 6397bool 6398AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6399 if (isId(Id) && peekToken().is(Kind)) { 6400 lex(); 6401 lex(); 6402 return true; 6403 } 6404 return false; 6405} 6406 6407bool 6408AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6409 if (isToken(Kind)) { 6410 lex(); 6411 return true; 6412 } 6413 return false; 6414} 6415 6416bool 6417AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6418 const StringRef ErrMsg) { 6419 if (!trySkipToken(Kind)) { 6420 Error(getLoc(), ErrMsg); 6421 return false; 6422 } 6423 return true; 6424} 6425 6426bool 6427AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6428 SMLoc S = getLoc(); 6429 6430 const MCExpr *Expr; 6431 if (Parser.parseExpression(Expr)) 6432 return false; 6433 6434 if (Expr->evaluateAsAbsolute(Imm)) 6435 return true; 6436 6437 if (Expected.empty()) { 6438 Error(S, "expected absolute expression"); 6439 } else { 6440 Error(S, Twine("expected ", Expected) + 6441 Twine(" or an absolute expression")); 6442 } 6443 return false; 6444} 6445 6446bool 6447AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6448 SMLoc S = getLoc(); 6449 6450 const MCExpr *Expr; 6451 if (Parser.parseExpression(Expr)) 6452 return false; 6453 6454 int64_t IntVal; 6455 if (Expr->evaluateAsAbsolute(IntVal)) { 6456 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6457 } else { 6458 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6459 } 6460 return true; 6461} 6462 6463bool 6464AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6465 if (isToken(AsmToken::String)) { 6466 Val = getToken().getStringContents(); 6467 lex(); 6468 return true; 6469 } else { 6470 Error(getLoc(), ErrMsg); 6471 return false; 6472 } 6473} 6474 6475bool 6476AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6477 if (isToken(AsmToken::Identifier)) { 6478 Val = getTokenStr(); 6479 lex(); 6480 return true; 6481 } else { 6482 if (!ErrMsg.empty()) 6483 Error(getLoc(), ErrMsg); 6484 return false; 6485 } 6486} 6487 6488AsmToken 6489AMDGPUAsmParser::getToken() const { 6490 return Parser.getTok(); 6491} 6492 6493AsmToken 6494AMDGPUAsmParser::peekToken() { 6495 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6496} 6497 6498void 6499AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6500 auto TokCount = getLexer().peekTokens(Tokens); 6501 6502 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6503 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6504} 6505 6506AsmToken::TokenKind 6507AMDGPUAsmParser::getTokenKind() const { 6508 return getLexer().getKind(); 6509} 6510 6511SMLoc 6512AMDGPUAsmParser::getLoc() const { 6513 return getToken().getLoc(); 6514} 6515 6516StringRef 6517AMDGPUAsmParser::getTokenStr() const { 6518 return getToken().getString(); 6519} 6520 6521void 6522AMDGPUAsmParser::lex() { 6523 Parser.Lex(); 6524} 6525 6526SMLoc 6527AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6528 const OperandVector &Operands) const { 6529 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6531 if (Test(Op)) 6532 return Op.getStartLoc(); 6533 } 6534 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6535} 6536 6537SMLoc 6538AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6539 const OperandVector &Operands) const { 6540 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6541 return getOperandLoc(Test, Operands); 6542} 6543 6544SMLoc 6545AMDGPUAsmParser::getRegLoc(unsigned Reg, 6546 const OperandVector &Operands) const { 6547 auto Test = [=](const AMDGPUOperand& Op) { 6548 return Op.isRegKind() && Op.getReg() == Reg; 6549 }; 6550 return getOperandLoc(Test, Operands); 6551} 6552 6553SMLoc 6554AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6555 auto Test = [](const AMDGPUOperand& Op) { 6556 return Op.IsImmKindLiteral() || Op.isExpr(); 6557 }; 6558 return getOperandLoc(Test, Operands); 6559} 6560 6561SMLoc 6562AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6563 auto Test = [](const AMDGPUOperand& Op) { 6564 return Op.isImmKindConst(); 6565 }; 6566 return getOperandLoc(Test, Operands); 6567} 6568 6569//===----------------------------------------------------------------------===// 6570// swizzle 6571//===----------------------------------------------------------------------===// 6572 6573LLVM_READNONE 6574static unsigned 6575encodeBitmaskPerm(const unsigned AndMask, 6576 const unsigned OrMask, 6577 const unsigned XorMask) { 6578 using namespace llvm::AMDGPU::Swizzle; 6579 6580 return BITMASK_PERM_ENC | 6581 (AndMask << BITMASK_AND_SHIFT) | 6582 (OrMask << BITMASK_OR_SHIFT) | 6583 (XorMask << BITMASK_XOR_SHIFT); 6584} 6585 6586bool 6587AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6588 const unsigned MinVal, 6589 const unsigned MaxVal, 6590 const StringRef ErrMsg, 6591 SMLoc &Loc) { 6592 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6593 return false; 6594 } 6595 Loc = getLoc(); 6596 if (!parseExpr(Op)) { 6597 return false; 6598 } 6599 if (Op < MinVal || Op > MaxVal) { 6600 Error(Loc, ErrMsg); 6601 return false; 6602 } 6603 6604 return true; 6605} 6606 6607bool 6608AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6609 const unsigned MinVal, 6610 const unsigned MaxVal, 6611 const StringRef ErrMsg) { 6612 SMLoc Loc; 6613 for (unsigned i = 0; i < OpNum; ++i) { 6614 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6615 return false; 6616 } 6617 6618 return true; 6619} 6620 6621bool 6622AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6623 using namespace llvm::AMDGPU::Swizzle; 6624 6625 int64_t Lane[LANE_NUM]; 6626 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6627 "expected a 2-bit lane id")) { 6628 Imm = QUAD_PERM_ENC; 6629 for (unsigned I = 0; I < LANE_NUM; ++I) { 6630 Imm |= Lane[I] << (LANE_SHIFT * I); 6631 } 6632 return true; 6633 } 6634 return false; 6635} 6636 6637bool 6638AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6639 using namespace llvm::AMDGPU::Swizzle; 6640 6641 SMLoc Loc; 6642 int64_t GroupSize; 6643 int64_t LaneIdx; 6644 6645 if (!parseSwizzleOperand(GroupSize, 6646 2, 32, 6647 "group size must be in the interval [2,32]", 6648 Loc)) { 6649 return false; 6650 } 6651 if (!isPowerOf2_64(GroupSize)) { 6652 Error(Loc, "group size must be a power of two"); 6653 return false; 6654 } 6655 if (parseSwizzleOperand(LaneIdx, 6656 0, GroupSize - 1, 6657 "lane id must be in the interval [0,group size - 1]", 6658 Loc)) { 6659 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6660 return true; 6661 } 6662 return false; 6663} 6664 6665bool 6666AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6667 using namespace llvm::AMDGPU::Swizzle; 6668 6669 SMLoc Loc; 6670 int64_t GroupSize; 6671 6672 if (!parseSwizzleOperand(GroupSize, 6673 2, 32, 6674 "group size must be in the interval [2,32]", 6675 Loc)) { 6676 return false; 6677 } 6678 if (!isPowerOf2_64(GroupSize)) { 6679 Error(Loc, "group size must be a power of two"); 6680 return false; 6681 } 6682 6683 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6684 return true; 6685} 6686 6687bool 6688AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6689 using namespace llvm::AMDGPU::Swizzle; 6690 6691 SMLoc Loc; 6692 int64_t GroupSize; 6693 6694 if (!parseSwizzleOperand(GroupSize, 6695 1, 16, 6696 "group size must be in the interval [1,16]", 6697 Loc)) { 6698 return false; 6699 } 6700 if (!isPowerOf2_64(GroupSize)) { 6701 Error(Loc, "group size must be a power of two"); 6702 return false; 6703 } 6704 6705 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6706 return true; 6707} 6708 6709bool 6710AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6711 using namespace llvm::AMDGPU::Swizzle; 6712 6713 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6714 return false; 6715 } 6716 6717 StringRef Ctl; 6718 SMLoc StrLoc = getLoc(); 6719 if (!parseString(Ctl)) { 6720 return false; 6721 } 6722 if (Ctl.size() != BITMASK_WIDTH) { 6723 Error(StrLoc, "expected a 5-character mask"); 6724 return false; 6725 } 6726 6727 unsigned AndMask = 0; 6728 unsigned OrMask = 0; 6729 unsigned XorMask = 0; 6730 6731 for (size_t i = 0; i < Ctl.size(); ++i) { 6732 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6733 switch(Ctl[i]) { 6734 default: 6735 Error(StrLoc, "invalid mask"); 6736 return false; 6737 case '0': 6738 break; 6739 case '1': 6740 OrMask |= Mask; 6741 break; 6742 case 'p': 6743 AndMask |= Mask; 6744 break; 6745 case 'i': 6746 AndMask |= Mask; 6747 XorMask |= Mask; 6748 break; 6749 } 6750 } 6751 6752 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6753 return true; 6754} 6755 6756bool 6757AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6758 6759 SMLoc OffsetLoc = getLoc(); 6760 6761 if (!parseExpr(Imm, "a swizzle macro")) { 6762 return false; 6763 } 6764 if (!isUInt<16>(Imm)) { 6765 Error(OffsetLoc, "expected a 16-bit offset"); 6766 return false; 6767 } 6768 return true; 6769} 6770 6771bool 6772AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6773 using namespace llvm::AMDGPU::Swizzle; 6774 6775 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6776 6777 SMLoc ModeLoc = getLoc(); 6778 bool Ok = false; 6779 6780 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6781 Ok = parseSwizzleQuadPerm(Imm); 6782 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6783 Ok = parseSwizzleBitmaskPerm(Imm); 6784 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6785 Ok = parseSwizzleBroadcast(Imm); 6786 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6787 Ok = parseSwizzleSwap(Imm); 6788 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6789 Ok = parseSwizzleReverse(Imm); 6790 } else { 6791 Error(ModeLoc, "expected a swizzle mode"); 6792 } 6793 6794 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6795 } 6796 6797 return false; 6798} 6799 6800OperandMatchResultTy 6801AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6802 SMLoc S = getLoc(); 6803 int64_t Imm = 0; 6804 6805 if (trySkipId("offset")) { 6806 6807 bool Ok = false; 6808 if (skipToken(AsmToken::Colon, "expected a colon")) { 6809 if (trySkipId("swizzle")) { 6810 Ok = parseSwizzleMacro(Imm); 6811 } else { 6812 Ok = parseSwizzleOffset(Imm); 6813 } 6814 } 6815 6816 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6817 6818 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6819 } else { 6820 // Swizzle "offset" operand is optional. 6821 // If it is omitted, try parsing other optional operands. 6822 return parseOptionalOpr(Operands); 6823 } 6824} 6825 6826bool 6827AMDGPUOperand::isSwizzle() const { 6828 return isImmTy(ImmTySwizzle); 6829} 6830 6831//===----------------------------------------------------------------------===// 6832// VGPR Index Mode 6833//===----------------------------------------------------------------------===// 6834 6835int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6836 6837 using namespace llvm::AMDGPU::VGPRIndexMode; 6838 6839 if (trySkipToken(AsmToken::RParen)) { 6840 return OFF; 6841 } 6842 6843 int64_t Imm = 0; 6844 6845 while (true) { 6846 unsigned Mode = 0; 6847 SMLoc S = getLoc(); 6848 6849 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6850 if (trySkipId(IdSymbolic[ModeId])) { 6851 Mode = 1 << ModeId; 6852 break; 6853 } 6854 } 6855 6856 if (Mode == 0) { 6857 Error(S, (Imm == 0)? 6858 "expected a VGPR index mode or a closing parenthesis" : 6859 "expected a VGPR index mode"); 6860 return UNDEF; 6861 } 6862 6863 if (Imm & Mode) { 6864 Error(S, "duplicate VGPR index mode"); 6865 return UNDEF; 6866 } 6867 Imm |= Mode; 6868 6869 if (trySkipToken(AsmToken::RParen)) 6870 break; 6871 if (!skipToken(AsmToken::Comma, 6872 "expected a comma or a closing parenthesis")) 6873 return UNDEF; 6874 } 6875 6876 return Imm; 6877} 6878 6879OperandMatchResultTy 6880AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6881 6882 using namespace llvm::AMDGPU::VGPRIndexMode; 6883 6884 int64_t Imm = 0; 6885 SMLoc S = getLoc(); 6886 6887 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6888 Imm = parseGPRIdxMacro(); 6889 if (Imm == UNDEF) 6890 return MatchOperand_ParseFail; 6891 } else { 6892 if (getParser().parseAbsoluteExpression(Imm)) 6893 return MatchOperand_ParseFail; 6894 if (Imm < 0 || !isUInt<4>(Imm)) { 6895 Error(S, "invalid immediate: only 4-bit values are legal"); 6896 return MatchOperand_ParseFail; 6897 } 6898 } 6899 6900 Operands.push_back( 6901 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6902 return MatchOperand_Success; 6903} 6904 6905bool AMDGPUOperand::isGPRIdxMode() const { 6906 return isImmTy(ImmTyGprIdxMode); 6907} 6908 6909//===----------------------------------------------------------------------===// 6910// sopp branch targets 6911//===----------------------------------------------------------------------===// 6912 6913OperandMatchResultTy 6914AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6915 6916 // Make sure we are not parsing something 6917 // that looks like a label or an expression but is not. 6918 // This will improve error messages. 6919 if (isRegister() || isModifier()) 6920 return MatchOperand_NoMatch; 6921 6922 if (!parseExpr(Operands)) 6923 return MatchOperand_ParseFail; 6924 6925 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6926 assert(Opr.isImm() || Opr.isExpr()); 6927 SMLoc Loc = Opr.getStartLoc(); 6928 6929 // Currently we do not support arbitrary expressions as branch targets. 6930 // Only labels and absolute expressions are accepted. 6931 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6932 Error(Loc, "expected an absolute expression or a label"); 6933 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6934 Error(Loc, "expected a 16-bit signed jump offset"); 6935 } 6936 6937 return MatchOperand_Success; 6938} 6939 6940//===----------------------------------------------------------------------===// 6941// Boolean holding registers 6942//===----------------------------------------------------------------------===// 6943 6944OperandMatchResultTy 6945AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6946 return parseReg(Operands); 6947} 6948 6949//===----------------------------------------------------------------------===// 6950// mubuf 6951//===----------------------------------------------------------------------===// 6952 6953AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6954 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6955} 6956 6957void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6958 const OperandVector &Operands, 6959 bool IsAtomic, 6960 bool IsLds) { 6961 bool IsLdsOpcode = IsLds; 6962 bool HasLdsModifier = false; 6963 OptionalImmIndexMap OptionalIdx; 6964 unsigned FirstOperandIdx = 1; 6965 bool IsAtomicReturn = false; 6966 6967 if (IsAtomic) { 6968 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6969 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6970 if (!Op.isCPol()) 6971 continue; 6972 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6973 break; 6974 } 6975 6976 if (!IsAtomicReturn) { 6977 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6978 if (NewOpc != -1) 6979 Inst.setOpcode(NewOpc); 6980 } 6981 6982 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6983 SIInstrFlags::IsAtomicRet; 6984 } 6985 6986 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6987 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6988 6989 // Add the register arguments 6990 if (Op.isReg()) { 6991 Op.addRegOperands(Inst, 1); 6992 // Insert a tied src for atomic return dst. 6993 // This cannot be postponed as subsequent calls to 6994 // addImmOperands rely on correct number of MC operands. 6995 if (IsAtomicReturn && i == FirstOperandIdx) 6996 Op.addRegOperands(Inst, 1); 6997 continue; 6998 } 6999 7000 // Handle the case where soffset is an immediate 7001 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7002 Op.addImmOperands(Inst, 1); 7003 continue; 7004 } 7005 7006 HasLdsModifier |= Op.isLDS(); 7007 7008 // Handle tokens like 'offen' which are sometimes hard-coded into the 7009 // asm string. There are no MCInst operands for these. 7010 if (Op.isToken()) { 7011 continue; 7012 } 7013 assert(Op.isImm()); 7014 7015 // Handle optional arguments 7016 OptionalIdx[Op.getImmTy()] = i; 7017 } 7018 7019 // This is a workaround for an llvm quirk which may result in an 7020 // incorrect instruction selection. Lds and non-lds versions of 7021 // MUBUF instructions are identical except that lds versions 7022 // have mandatory 'lds' modifier. However this modifier follows 7023 // optional modifiers and llvm asm matcher regards this 'lds' 7024 // modifier as an optional one. As a result, an lds version 7025 // of opcode may be selected even if it has no 'lds' modifier. 7026 if (IsLdsOpcode && !HasLdsModifier) { 7027 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7028 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7029 Inst.setOpcode(NoLdsOpcode); 7030 IsLdsOpcode = false; 7031 } 7032 } 7033 7034 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7036 7037 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7039 } 7040 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7041} 7042 7043void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7044 OptionalImmIndexMap OptionalIdx; 7045 7046 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7047 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7048 7049 // Add the register arguments 7050 if (Op.isReg()) { 7051 Op.addRegOperands(Inst, 1); 7052 continue; 7053 } 7054 7055 // Handle the case where soffset is an immediate 7056 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7057 Op.addImmOperands(Inst, 1); 7058 continue; 7059 } 7060 7061 // Handle tokens like 'offen' which are sometimes hard-coded into the 7062 // asm string. There are no MCInst operands for these. 7063 if (Op.isToken()) { 7064 continue; 7065 } 7066 assert(Op.isImm()); 7067 7068 // Handle optional arguments 7069 OptionalIdx[Op.getImmTy()] = i; 7070 } 7071 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7073 AMDGPUOperand::ImmTyOffset); 7074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7077 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7078} 7079 7080//===----------------------------------------------------------------------===// 7081// mimg 7082//===----------------------------------------------------------------------===// 7083 7084void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7085 bool IsAtomic) { 7086 unsigned I = 1; 7087 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7088 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7089 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7090 } 7091 7092 if (IsAtomic) { 7093 // Add src, same as dst 7094 assert(Desc.getNumDefs() == 1); 7095 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7096 } 7097 7098 OptionalImmIndexMap OptionalIdx; 7099 7100 for (unsigned E = Operands.size(); I != E; ++I) { 7101 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7102 7103 // Add the register arguments 7104 if (Op.isReg()) { 7105 Op.addRegOperands(Inst, 1); 7106 } else if (Op.isImmModifier()) { 7107 OptionalIdx[Op.getImmTy()] = I; 7108 } else if (!Op.isToken()) { 7109 llvm_unreachable("unexpected operand type"); 7110 } 7111 } 7112 7113 bool IsGFX10Plus = isGFX10Plus(); 7114 7115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7116 if (IsGFX10Plus) 7117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7121 if (IsGFX10Plus) 7122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7123 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7126 if (!IsGFX10Plus) 7127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7129} 7130 7131void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7132 cvtMIMG(Inst, Operands, true); 7133} 7134 7135void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7136 OptionalImmIndexMap OptionalIdx; 7137 bool IsAtomicReturn = false; 7138 7139 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7140 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7141 if (!Op.isCPol()) 7142 continue; 7143 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7144 break; 7145 } 7146 7147 if (!IsAtomicReturn) { 7148 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7149 if (NewOpc != -1) 7150 Inst.setOpcode(NewOpc); 7151 } 7152 7153 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7154 SIInstrFlags::IsAtomicRet; 7155 7156 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7158 7159 // Add the register arguments 7160 if (Op.isReg()) { 7161 Op.addRegOperands(Inst, 1); 7162 if (IsAtomicReturn && i == 1) 7163 Op.addRegOperands(Inst, 1); 7164 continue; 7165 } 7166 7167 // Handle the case where soffset is an immediate 7168 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7169 Op.addImmOperands(Inst, 1); 7170 continue; 7171 } 7172 7173 // Handle tokens like 'offen' which are sometimes hard-coded into the 7174 // asm string. There are no MCInst operands for these. 7175 if (Op.isToken()) { 7176 continue; 7177 } 7178 assert(Op.isImm()); 7179 7180 // Handle optional arguments 7181 OptionalIdx[Op.getImmTy()] = i; 7182 } 7183 7184 if ((int)Inst.getNumOperands() <= 7185 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7188} 7189 7190void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7191 const OperandVector &Operands) { 7192 for (unsigned I = 1; I < Operands.size(); ++I) { 7193 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7194 if (Operand.isReg()) 7195 Operand.addRegOperands(Inst, 1); 7196 } 7197 7198 Inst.addOperand(MCOperand::createImm(1)); // a16 7199} 7200 7201//===----------------------------------------------------------------------===// 7202// smrd 7203//===----------------------------------------------------------------------===// 7204 7205bool AMDGPUOperand::isSMRDOffset8() const { 7206 return isImm() && isUInt<8>(getImm()); 7207} 7208 7209bool AMDGPUOperand::isSMEMOffset() const { 7210 return isImm(); // Offset range is checked later by validator. 7211} 7212 7213bool AMDGPUOperand::isSMRDLiteralOffset() const { 7214 // 32-bit literals are only supported on CI and we only want to use them 7215 // when the offset is > 8-bits. 7216 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7217} 7218 7219AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7220 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7221} 7222 7223AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7224 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7225} 7226 7227AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7228 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7229} 7230 7231AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7232 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7233} 7234 7235//===----------------------------------------------------------------------===// 7236// vop3 7237//===----------------------------------------------------------------------===// 7238 7239static bool ConvertOmodMul(int64_t &Mul) { 7240 if (Mul != 1 && Mul != 2 && Mul != 4) 7241 return false; 7242 7243 Mul >>= 1; 7244 return true; 7245} 7246 7247static bool ConvertOmodDiv(int64_t &Div) { 7248 if (Div == 1) { 7249 Div = 0; 7250 return true; 7251 } 7252 7253 if (Div == 2) { 7254 Div = 3; 7255 return true; 7256 } 7257 7258 return false; 7259} 7260 7261// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7262// This is intentional and ensures compatibility with sp3. 7263// See bug 35397 for details. 7264static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7265 if (BoundCtrl == 0 || BoundCtrl == 1) { 7266 BoundCtrl = 1; 7267 return true; 7268 } 7269 return false; 7270} 7271 7272// Note: the order in this table matches the order of operands in AsmString. 7273static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7274 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7275 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7276 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7277 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7278 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7279 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7280 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7281 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7282 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7283 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7284 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7285 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7286 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7287 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7288 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7289 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7290 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7291 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7292 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7293 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7294 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7295 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7296 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7297 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7298 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7299 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7300 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7301 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7302 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7303 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7304 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7305 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7306 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7307 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7308 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7309 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7310 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7311 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7312 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7313 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7314 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7315}; 7316 7317void AMDGPUAsmParser::onBeginOfFile() { 7318 if (!getParser().getStreamer().getTargetStreamer() || 7319 getSTI().getTargetTriple().getArch() == Triple::r600) 7320 return; 7321 7322 if (!getTargetStreamer().getTargetID()) 7323 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7324 7325 if (isHsaAbiVersion3Or4(&getSTI())) 7326 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7327} 7328 7329OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7330 7331 OperandMatchResultTy res = parseOptionalOpr(Operands); 7332 7333 // This is a hack to enable hardcoded mandatory operands which follow 7334 // optional operands. 7335 // 7336 // Current design assumes that all operands after the first optional operand 7337 // are also optional. However implementation of some instructions violates 7338 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7339 // 7340 // To alleviate this problem, we have to (implicitly) parse extra operands 7341 // to make sure autogenerated parser of custom operands never hit hardcoded 7342 // mandatory operands. 7343 7344 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7345 if (res != MatchOperand_Success || 7346 isToken(AsmToken::EndOfStatement)) 7347 break; 7348 7349 trySkipToken(AsmToken::Comma); 7350 res = parseOptionalOpr(Operands); 7351 } 7352 7353 return res; 7354} 7355 7356OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7357 OperandMatchResultTy res; 7358 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7359 // try to parse any optional operand here 7360 if (Op.IsBit) { 7361 res = parseNamedBit(Op.Name, Operands, Op.Type); 7362 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7363 res = parseOModOperand(Operands); 7364 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7365 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7366 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7367 res = parseSDWASel(Operands, Op.Name, Op.Type); 7368 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7369 res = parseSDWADstUnused(Operands); 7370 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7371 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7372 Op.Type == AMDGPUOperand::ImmTyNegLo || 7373 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7374 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7375 Op.ConvertResult); 7376 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7377 res = parseDim(Operands); 7378 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7379 res = parseCPol(Operands); 7380 } else { 7381 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7382 } 7383 if (res != MatchOperand_NoMatch) { 7384 return res; 7385 } 7386 } 7387 return MatchOperand_NoMatch; 7388} 7389 7390OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7391 StringRef Name = getTokenStr(); 7392 if (Name == "mul") { 7393 return parseIntWithPrefix("mul", Operands, 7394 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7395 } 7396 7397 if (Name == "div") { 7398 return parseIntWithPrefix("div", Operands, 7399 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7400 } 7401 7402 return MatchOperand_NoMatch; 7403} 7404 7405void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7406 cvtVOP3P(Inst, Operands); 7407 7408 int Opc = Inst.getOpcode(); 7409 7410 int SrcNum; 7411 const int Ops[] = { AMDGPU::OpName::src0, 7412 AMDGPU::OpName::src1, 7413 AMDGPU::OpName::src2 }; 7414 for (SrcNum = 0; 7415 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7416 ++SrcNum); 7417 assert(SrcNum > 0); 7418 7419 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7420 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7421 7422 if ((OpSel & (1 << SrcNum)) != 0) { 7423 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7424 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7425 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7426 } 7427} 7428 7429static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7430 // 1. This operand is input modifiers 7431 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7432 // 2. This is not last operand 7433 && Desc.NumOperands > (OpNum + 1) 7434 // 3. Next operand is register class 7435 && Desc.OpInfo[OpNum + 1].RegClass != -1 7436 // 4. Next register is not tied to any other operand 7437 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7438} 7439 7440void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7441{ 7442 OptionalImmIndexMap OptionalIdx; 7443 unsigned Opc = Inst.getOpcode(); 7444 7445 unsigned I = 1; 7446 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7447 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7448 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7449 } 7450 7451 for (unsigned E = Operands.size(); I != E; ++I) { 7452 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7453 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7454 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7455 } else if (Op.isInterpSlot() || 7456 Op.isInterpAttr() || 7457 Op.isAttrChan()) { 7458 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7459 } else if (Op.isImmModifier()) { 7460 OptionalIdx[Op.getImmTy()] = I; 7461 } else { 7462 llvm_unreachable("unhandled operand type"); 7463 } 7464 } 7465 7466 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7468 } 7469 7470 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7472 } 7473 7474 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7475 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7476 } 7477} 7478 7479void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7480 OptionalImmIndexMap &OptionalIdx) { 7481 unsigned Opc = Inst.getOpcode(); 7482 7483 unsigned I = 1; 7484 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7485 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7486 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7487 } 7488 7489 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7490 // This instruction has src modifiers 7491 for (unsigned E = Operands.size(); I != E; ++I) { 7492 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7493 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7494 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7495 } else if (Op.isImmModifier()) { 7496 OptionalIdx[Op.getImmTy()] = I; 7497 } else if (Op.isRegOrImm()) { 7498 Op.addRegOrImmOperands(Inst, 1); 7499 } else { 7500 llvm_unreachable("unhandled operand type"); 7501 } 7502 } 7503 } else { 7504 // No src modifiers 7505 for (unsigned E = Operands.size(); I != E; ++I) { 7506 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7507 if (Op.isMod()) { 7508 OptionalIdx[Op.getImmTy()] = I; 7509 } else { 7510 Op.addRegOrImmOperands(Inst, 1); 7511 } 7512 } 7513 } 7514 7515 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7517 } 7518 7519 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7520 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7521 } 7522 7523 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7524 // it has src2 register operand that is tied to dst operand 7525 // we don't allow modifiers for this operand in assembler so src2_modifiers 7526 // should be 0. 7527 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7528 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7529 Opc == AMDGPU::V_MAC_F32_e64_vi || 7530 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7531 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7532 Opc == AMDGPU::V_MAC_F16_e64_vi || 7533 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7534 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7535 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7536 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7537 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7538 auto it = Inst.begin(); 7539 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7540 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7541 ++it; 7542 // Copy the operand to ensure it's not invalidated when Inst grows. 7543 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7544 } 7545} 7546 7547void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7548 OptionalImmIndexMap OptionalIdx; 7549 cvtVOP3(Inst, Operands, OptionalIdx); 7550} 7551 7552void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7553 OptionalImmIndexMap &OptIdx) { 7554 const int Opc = Inst.getOpcode(); 7555 const MCInstrDesc &Desc = MII.get(Opc); 7556 7557 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7558 7559 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7560 assert(!IsPacked); 7561 Inst.addOperand(Inst.getOperand(0)); 7562 } 7563 7564 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7565 // instruction, and then figure out where to actually put the modifiers 7566 7567 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7568 if (OpSelIdx != -1) { 7569 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7570 } 7571 7572 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7573 if (OpSelHiIdx != -1) { 7574 int DefaultVal = IsPacked ? -1 : 0; 7575 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7576 DefaultVal); 7577 } 7578 7579 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7580 if (NegLoIdx != -1) { 7581 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7582 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7583 } 7584 7585 const int Ops[] = { AMDGPU::OpName::src0, 7586 AMDGPU::OpName::src1, 7587 AMDGPU::OpName::src2 }; 7588 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7589 AMDGPU::OpName::src1_modifiers, 7590 AMDGPU::OpName::src2_modifiers }; 7591 7592 unsigned OpSel = 0; 7593 unsigned OpSelHi = 0; 7594 unsigned NegLo = 0; 7595 unsigned NegHi = 0; 7596 7597 if (OpSelIdx != -1) 7598 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7599 7600 if (OpSelHiIdx != -1) 7601 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7602 7603 if (NegLoIdx != -1) { 7604 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7605 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7606 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7607 } 7608 7609 for (int J = 0; J < 3; ++J) { 7610 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7611 if (OpIdx == -1) 7612 break; 7613 7614 uint32_t ModVal = 0; 7615 7616 if ((OpSel & (1 << J)) != 0) 7617 ModVal |= SISrcMods::OP_SEL_0; 7618 7619 if ((OpSelHi & (1 << J)) != 0) 7620 ModVal |= SISrcMods::OP_SEL_1; 7621 7622 if ((NegLo & (1 << J)) != 0) 7623 ModVal |= SISrcMods::NEG; 7624 7625 if ((NegHi & (1 << J)) != 0) 7626 ModVal |= SISrcMods::NEG_HI; 7627 7628 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7629 7630 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7631 } 7632} 7633 7634void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7635 OptionalImmIndexMap OptIdx; 7636 cvtVOP3(Inst, Operands, OptIdx); 7637 cvtVOP3P(Inst, Operands, OptIdx); 7638} 7639 7640//===----------------------------------------------------------------------===// 7641// dpp 7642//===----------------------------------------------------------------------===// 7643 7644bool AMDGPUOperand::isDPP8() const { 7645 return isImmTy(ImmTyDPP8); 7646} 7647 7648bool AMDGPUOperand::isDPPCtrl() const { 7649 using namespace AMDGPU::DPP; 7650 7651 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7652 if (result) { 7653 int64_t Imm = getImm(); 7654 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7655 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7656 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7657 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7658 (Imm == DppCtrl::WAVE_SHL1) || 7659 (Imm == DppCtrl::WAVE_ROL1) || 7660 (Imm == DppCtrl::WAVE_SHR1) || 7661 (Imm == DppCtrl::WAVE_ROR1) || 7662 (Imm == DppCtrl::ROW_MIRROR) || 7663 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7664 (Imm == DppCtrl::BCAST15) || 7665 (Imm == DppCtrl::BCAST31) || 7666 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7667 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7668 } 7669 return false; 7670} 7671 7672//===----------------------------------------------------------------------===// 7673// mAI 7674//===----------------------------------------------------------------------===// 7675 7676bool AMDGPUOperand::isBLGP() const { 7677 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7678} 7679 7680bool AMDGPUOperand::isCBSZ() const { 7681 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7682} 7683 7684bool AMDGPUOperand::isABID() const { 7685 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7686} 7687 7688bool AMDGPUOperand::isS16Imm() const { 7689 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7690} 7691 7692bool AMDGPUOperand::isU16Imm() const { 7693 return isImm() && isUInt<16>(getImm()); 7694} 7695 7696//===----------------------------------------------------------------------===// 7697// dim 7698//===----------------------------------------------------------------------===// 7699 7700bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7701 // We want to allow "dim:1D" etc., 7702 // but the initial 1 is tokenized as an integer. 7703 std::string Token; 7704 if (isToken(AsmToken::Integer)) { 7705 SMLoc Loc = getToken().getEndLoc(); 7706 Token = std::string(getTokenStr()); 7707 lex(); 7708 if (getLoc() != Loc) 7709 return false; 7710 } 7711 7712 StringRef Suffix; 7713 if (!parseId(Suffix)) 7714 return false; 7715 Token += Suffix; 7716 7717 StringRef DimId = Token; 7718 if (DimId.startswith("SQ_RSRC_IMG_")) 7719 DimId = DimId.drop_front(12); 7720 7721 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7722 if (!DimInfo) 7723 return false; 7724 7725 Encoding = DimInfo->Encoding; 7726 return true; 7727} 7728 7729OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7730 if (!isGFX10Plus()) 7731 return MatchOperand_NoMatch; 7732 7733 SMLoc S = getLoc(); 7734 7735 if (!trySkipId("dim", AsmToken::Colon)) 7736 return MatchOperand_NoMatch; 7737 7738 unsigned Encoding; 7739 SMLoc Loc = getLoc(); 7740 if (!parseDimId(Encoding)) { 7741 Error(Loc, "invalid dim value"); 7742 return MatchOperand_ParseFail; 7743 } 7744 7745 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7746 AMDGPUOperand::ImmTyDim)); 7747 return MatchOperand_Success; 7748} 7749 7750//===----------------------------------------------------------------------===// 7751// dpp 7752//===----------------------------------------------------------------------===// 7753 7754OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7755 SMLoc S = getLoc(); 7756 7757 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7758 return MatchOperand_NoMatch; 7759 7760 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7761 7762 int64_t Sels[8]; 7763 7764 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7765 return MatchOperand_ParseFail; 7766 7767 for (size_t i = 0; i < 8; ++i) { 7768 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7769 return MatchOperand_ParseFail; 7770 7771 SMLoc Loc = getLoc(); 7772 if (getParser().parseAbsoluteExpression(Sels[i])) 7773 return MatchOperand_ParseFail; 7774 if (0 > Sels[i] || 7 < Sels[i]) { 7775 Error(Loc, "expected a 3-bit value"); 7776 return MatchOperand_ParseFail; 7777 } 7778 } 7779 7780 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7781 return MatchOperand_ParseFail; 7782 7783 unsigned DPP8 = 0; 7784 for (size_t i = 0; i < 8; ++i) 7785 DPP8 |= (Sels[i] << (i * 3)); 7786 7787 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7788 return MatchOperand_Success; 7789} 7790 7791bool 7792AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7793 const OperandVector &Operands) { 7794 if (Ctrl == "row_newbcast") 7795 return isGFX90A(); 7796 7797 if (Ctrl == "row_share" || 7798 Ctrl == "row_xmask") 7799 return isGFX10Plus(); 7800 7801 if (Ctrl == "wave_shl" || 7802 Ctrl == "wave_shr" || 7803 Ctrl == "wave_rol" || 7804 Ctrl == "wave_ror" || 7805 Ctrl == "row_bcast") 7806 return isVI() || isGFX9(); 7807 7808 return Ctrl == "row_mirror" || 7809 Ctrl == "row_half_mirror" || 7810 Ctrl == "quad_perm" || 7811 Ctrl == "row_shl" || 7812 Ctrl == "row_shr" || 7813 Ctrl == "row_ror"; 7814} 7815 7816int64_t 7817AMDGPUAsmParser::parseDPPCtrlPerm() { 7818 // quad_perm:[%d,%d,%d,%d] 7819 7820 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7821 return -1; 7822 7823 int64_t Val = 0; 7824 for (int i = 0; i < 4; ++i) { 7825 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7826 return -1; 7827 7828 int64_t Temp; 7829 SMLoc Loc = getLoc(); 7830 if (getParser().parseAbsoluteExpression(Temp)) 7831 return -1; 7832 if (Temp < 0 || Temp > 3) { 7833 Error(Loc, "expected a 2-bit value"); 7834 return -1; 7835 } 7836 7837 Val += (Temp << i * 2); 7838 } 7839 7840 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7841 return -1; 7842 7843 return Val; 7844} 7845 7846int64_t 7847AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7848 using namespace AMDGPU::DPP; 7849 7850 // sel:%d 7851 7852 int64_t Val; 7853 SMLoc Loc = getLoc(); 7854 7855 if (getParser().parseAbsoluteExpression(Val)) 7856 return -1; 7857 7858 struct DppCtrlCheck { 7859 int64_t Ctrl; 7860 int Lo; 7861 int Hi; 7862 }; 7863 7864 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7865 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7866 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7867 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7868 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7869 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7870 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7871 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7872 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7873 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7874 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7875 .Default({-1, 0, 0}); 7876 7877 bool Valid; 7878 if (Check.Ctrl == -1) { 7879 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7880 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7881 } else { 7882 Valid = Check.Lo <= Val && Val <= Check.Hi; 7883 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7884 } 7885 7886 if (!Valid) { 7887 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7888 return -1; 7889 } 7890 7891 return Val; 7892} 7893 7894OperandMatchResultTy 7895AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7896 using namespace AMDGPU::DPP; 7897 7898 if (!isToken(AsmToken::Identifier) || 7899 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7900 return MatchOperand_NoMatch; 7901 7902 SMLoc S = getLoc(); 7903 int64_t Val = -1; 7904 StringRef Ctrl; 7905 7906 parseId(Ctrl); 7907 7908 if (Ctrl == "row_mirror") { 7909 Val = DppCtrl::ROW_MIRROR; 7910 } else if (Ctrl == "row_half_mirror") { 7911 Val = DppCtrl::ROW_HALF_MIRROR; 7912 } else { 7913 if (skipToken(AsmToken::Colon, "expected a colon")) { 7914 if (Ctrl == "quad_perm") { 7915 Val = parseDPPCtrlPerm(); 7916 } else { 7917 Val = parseDPPCtrlSel(Ctrl); 7918 } 7919 } 7920 } 7921 7922 if (Val == -1) 7923 return MatchOperand_ParseFail; 7924 7925 Operands.push_back( 7926 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7927 return MatchOperand_Success; 7928} 7929 7930AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7931 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7932} 7933 7934AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7935 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7936} 7937 7938AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7939 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7940} 7941 7942AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7943 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7944} 7945 7946AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7947 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7948} 7949 7950void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7951 OptionalImmIndexMap OptionalIdx; 7952 7953 unsigned I = 1; 7954 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7955 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7956 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7957 } 7958 7959 int Fi = 0; 7960 for (unsigned E = Operands.size(); I != E; ++I) { 7961 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7962 MCOI::TIED_TO); 7963 if (TiedTo != -1) { 7964 assert((unsigned)TiedTo < Inst.getNumOperands()); 7965 // handle tied old or src2 for MAC instructions 7966 Inst.addOperand(Inst.getOperand(TiedTo)); 7967 } 7968 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7969 // Add the register arguments 7970 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7971 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7972 // Skip it. 7973 continue; 7974 } 7975 7976 if (IsDPP8) { 7977 if (Op.isDPP8()) { 7978 Op.addImmOperands(Inst, 1); 7979 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7980 Op.addRegWithFPInputModsOperands(Inst, 2); 7981 } else if (Op.isFI()) { 7982 Fi = Op.getImm(); 7983 } else if (Op.isReg()) { 7984 Op.addRegOperands(Inst, 1); 7985 } else { 7986 llvm_unreachable("Invalid operand type"); 7987 } 7988 } else { 7989 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7990 Op.addRegWithFPInputModsOperands(Inst, 2); 7991 } else if (Op.isDPPCtrl()) { 7992 Op.addImmOperands(Inst, 1); 7993 } else if (Op.isImm()) { 7994 // Handle optional arguments 7995 OptionalIdx[Op.getImmTy()] = I; 7996 } else { 7997 llvm_unreachable("Invalid operand type"); 7998 } 7999 } 8000 } 8001 8002 if (IsDPP8) { 8003 using namespace llvm::AMDGPU::DPP; 8004 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8005 } else { 8006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8009 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8011 } 8012 } 8013} 8014 8015//===----------------------------------------------------------------------===// 8016// sdwa 8017//===----------------------------------------------------------------------===// 8018 8019OperandMatchResultTy 8020AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8021 AMDGPUOperand::ImmTy Type) { 8022 using namespace llvm::AMDGPU::SDWA; 8023 8024 SMLoc S = getLoc(); 8025 StringRef Value; 8026 OperandMatchResultTy res; 8027 8028 SMLoc StringLoc; 8029 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8030 if (res != MatchOperand_Success) { 8031 return res; 8032 } 8033 8034 int64_t Int; 8035 Int = StringSwitch<int64_t>(Value) 8036 .Case("BYTE_0", SdwaSel::BYTE_0) 8037 .Case("BYTE_1", SdwaSel::BYTE_1) 8038 .Case("BYTE_2", SdwaSel::BYTE_2) 8039 .Case("BYTE_3", SdwaSel::BYTE_3) 8040 .Case("WORD_0", SdwaSel::WORD_0) 8041 .Case("WORD_1", SdwaSel::WORD_1) 8042 .Case("DWORD", SdwaSel::DWORD) 8043 .Default(0xffffffff); 8044 8045 if (Int == 0xffffffff) { 8046 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8047 return MatchOperand_ParseFail; 8048 } 8049 8050 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8051 return MatchOperand_Success; 8052} 8053 8054OperandMatchResultTy 8055AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8056 using namespace llvm::AMDGPU::SDWA; 8057 8058 SMLoc S = getLoc(); 8059 StringRef Value; 8060 OperandMatchResultTy res; 8061 8062 SMLoc StringLoc; 8063 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8064 if (res != MatchOperand_Success) { 8065 return res; 8066 } 8067 8068 int64_t Int; 8069 Int = StringSwitch<int64_t>(Value) 8070 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8071 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8072 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8073 .Default(0xffffffff); 8074 8075 if (Int == 0xffffffff) { 8076 Error(StringLoc, "invalid dst_unused value"); 8077 return MatchOperand_ParseFail; 8078 } 8079 8080 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8081 return MatchOperand_Success; 8082} 8083 8084void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8085 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8086} 8087 8088void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8089 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8090} 8091 8092void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8093 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8094} 8095 8096void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8097 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8098} 8099 8100void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8101 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8102} 8103 8104void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8105 uint64_t BasicInstType, 8106 bool SkipDstVcc, 8107 bool SkipSrcVcc) { 8108 using namespace llvm::AMDGPU::SDWA; 8109 8110 OptionalImmIndexMap OptionalIdx; 8111 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8112 bool SkippedVcc = false; 8113 8114 unsigned I = 1; 8115 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8116 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8117 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8118 } 8119 8120 for (unsigned E = Operands.size(); I != E; ++I) { 8121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8122 if (SkipVcc && !SkippedVcc && Op.isReg() && 8123 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8124 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8125 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8126 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8127 // Skip VCC only if we didn't skip it on previous iteration. 8128 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8129 if (BasicInstType == SIInstrFlags::VOP2 && 8130 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8131 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8132 SkippedVcc = true; 8133 continue; 8134 } else if (BasicInstType == SIInstrFlags::VOPC && 8135 Inst.getNumOperands() == 0) { 8136 SkippedVcc = true; 8137 continue; 8138 } 8139 } 8140 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8141 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8142 } else if (Op.isImm()) { 8143 // Handle optional arguments 8144 OptionalIdx[Op.getImmTy()] = I; 8145 } else { 8146 llvm_unreachable("Invalid operand type"); 8147 } 8148 SkippedVcc = false; 8149 } 8150 8151 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8152 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8153 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8154 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8155 switch (BasicInstType) { 8156 case SIInstrFlags::VOP1: 8157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8158 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8160 } 8161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8162 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8164 break; 8165 8166 case SIInstrFlags::VOP2: 8167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8168 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8170 } 8171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8172 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8173 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8174 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8175 break; 8176 8177 case SIInstrFlags::VOPC: 8178 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8182 break; 8183 8184 default: 8185 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8186 } 8187 } 8188 8189 // special case v_mac_{f16, f32}: 8190 // it has src2 register operand that is tied to dst operand 8191 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8192 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8193 auto it = Inst.begin(); 8194 std::advance( 8195 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8196 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8197 } 8198} 8199 8200//===----------------------------------------------------------------------===// 8201// mAI 8202//===----------------------------------------------------------------------===// 8203 8204AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8205 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8206} 8207 8208AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8209 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8210} 8211 8212AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8213 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8214} 8215 8216/// Force static initialization. 8217extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8218 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8219 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8220} 8221 8222#define GET_REGISTER_MATCHER 8223#define GET_MATCHER_IMPLEMENTATION 8224#define GET_MNEMONIC_SPELL_CHECKER 8225#define GET_MNEMONIC_CHECKER 8226#include "AMDGPUGenAsmMatcher.inc" 8227 8228// This fuction should be defined after auto-generated include so that we have 8229// MatchClassKind enum defined 8230unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8231 unsigned Kind) { 8232 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8233 // But MatchInstructionImpl() expects to meet token and fails to validate 8234 // operand. This method checks if we are given immediate operand but expect to 8235 // get corresponding token. 8236 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8237 switch (Kind) { 8238 case MCK_addr64: 8239 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8240 case MCK_gds: 8241 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8242 case MCK_lds: 8243 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8244 case MCK_idxen: 8245 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8246 case MCK_offen: 8247 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8248 case MCK_SSrcB32: 8249 // When operands have expression values, they will return true for isToken, 8250 // because it is not possible to distinguish between a token and an 8251 // expression at parse time. MatchInstructionImpl() will always try to 8252 // match an operand as a token, when isToken returns true, and when the 8253 // name of the expression is not a valid token, the match will fail, 8254 // so we need to handle it here. 8255 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8256 case MCK_SSrcF32: 8257 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8258 case MCK_SoppBrTarget: 8259 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8260 case MCK_VReg32OrOff: 8261 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8262 case MCK_InterpSlot: 8263 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8264 case MCK_Attr: 8265 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8266 case MCK_AttrChan: 8267 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8268 case MCK_ImmSMEMOffset: 8269 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8270 case MCK_SReg_64: 8271 case MCK_SReg_64_XEXEC: 8272 // Null is defined as a 32-bit register but 8273 // it should also be enabled with 64-bit operands. 8274 // The following code enables it for SReg_64 operands 8275 // used as source and destination. Remaining source 8276 // operands are handled in isInlinableImm. 8277 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8278 default: 8279 return Match_InvalidOperand; 8280 } 8281} 8282 8283//===----------------------------------------------------------------------===// 8284// endpgm 8285//===----------------------------------------------------------------------===// 8286 8287OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8288 SMLoc S = getLoc(); 8289 int64_t Imm = 0; 8290 8291 if (!parseExpr(Imm)) { 8292 // The operand is optional, if not present default to 0 8293 Imm = 0; 8294 } 8295 8296 if (!isUInt<16>(Imm)) { 8297 Error(S, "expected a 16-bit value"); 8298 return MatchOperand_ParseFail; 8299 } 8300 8301 Operands.push_back( 8302 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8303 return MatchOperand_Success; 8304} 8305 8306bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8307