1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "MCTargetDesc/X86BaseInfo.h" 11#include "llvm/MC/MCTargetAsmParser.h" 12#include "llvm/MC/MCStreamer.h" 13#include "llvm/MC/MCExpr.h" 14#include "llvm/MC/MCSymbol.h" 15#include "llvm/MC/MCInst.h" 16#include "llvm/MC/MCRegisterInfo.h" 17#include "llvm/MC/MCSubtargetInfo.h" 18#include "llvm/MC/MCParser/MCAsmLexer.h" 19#include "llvm/MC/MCParser/MCAsmParser.h" 20#include "llvm/MC/MCParser/MCParsedAsmOperand.h" 21#include "llvm/ADT/APFloat.h" 22#include "llvm/ADT/SmallString.h" 23#include "llvm/ADT/SmallVector.h" 24#include "llvm/ADT/StringSwitch.h" 25#include "llvm/ADT/Twine.h" 26#include "llvm/Support/SourceMgr.h" 27#include "llvm/Support/TargetRegistry.h" 28#include "llvm/Support/raw_ostream.h" 29 30using namespace llvm; 31 32namespace { 33struct X86Operand; 34 35class X86AsmParser : public MCTargetAsmParser { 36 MCSubtargetInfo &STI; 37 MCAsmParser &Parser; 38 ParseInstructionInfo *InstInfo; 39private: 40 MCAsmParser &getParser() const { return Parser; } 41 42 MCAsmLexer &getLexer() const { return Parser.getLexer(); } 43 44 bool Error(SMLoc L, const Twine &Msg, 45 ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), 46 bool MatchingInlineAsm = false) { 47 if (MatchingInlineAsm) return true; 48 return Parser.Error(L, Msg, Ranges); 49 } 50 51 X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { 52 Error(Loc, Msg); 53 return 0; 54 } 55 56 X86Operand *ParseOperand(); 57 X86Operand *ParseATTOperand(); 58 X86Operand *ParseIntelOperand(); 59 X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); 60 X86Operand *ParseIntelTypeOperator(SMLoc StartLoc); 61 X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); 62 X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); 63 X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); 64 65 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, 66 SmallString<64> &Err); 67 68 bool ParseDirectiveWord(unsigned Size, SMLoc L); 69 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 70 71 bool processInstruction(MCInst &Inst, 72 const SmallVectorImpl<MCParsedAsmOperand*> &Ops); 73 74 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 75 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 76 MCStreamer &Out, unsigned &ErrorInfo, 77 bool MatchingInlineAsm); 78 79 /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) 80 /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. 81 bool isSrcOp(X86Operand &Op); 82 83 /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) 84 /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. 85 bool isDstOp(X86Operand &Op); 86 87 bool is64BitMode() const { 88 // FIXME: Can tablegen auto-generate this? 89 return (STI.getFeatureBits() & X86::Mode64Bit) != 0; 90 } 91 void SwitchMode() { 92 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit)); 93 setAvailableFeatures(FB); 94 } 95 96 /// @name Auto-generated Matcher Functions 97 /// { 98 99#define GET_ASSEMBLER_HEADER 100#include "X86GenAsmMatcher.inc" 101 102 /// } 103 104public: 105 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) 106 : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { 107 108 // Initialize the set of available features. 109 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); 110 } 111 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); 112 113 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 114 SMLoc NameLoc, 115 SmallVectorImpl<MCParsedAsmOperand*> &Operands); 116 117 virtual bool ParseDirective(AsmToken DirectiveID); 118 119 bool isParsingIntelSyntax() { 120 return getParser().getAssemblerDialect(); 121 } 122}; 123} // end anonymous namespace 124 125/// @name Auto-generated Match Functions 126/// { 127 128static unsigned MatchRegisterName(StringRef Name); 129 130/// } 131 132static bool isImmSExti16i8Value(uint64_t Value) { 133 return (( Value <= 0x000000000000007FULL)|| 134 (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| 135 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 136} 137 138static bool isImmSExti32i8Value(uint64_t Value) { 139 return (( Value <= 0x000000000000007FULL)|| 140 (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| 141 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 142} 143 144static bool isImmZExtu32u8Value(uint64_t Value) { 145 return (Value <= 0x00000000000000FFULL); 146} 147 148static bool isImmSExti64i8Value(uint64_t Value) { 149 return (( Value <= 0x000000000000007FULL)|| 150 (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 151} 152 153static bool isImmSExti64i32Value(uint64_t Value) { 154 return (( Value <= 0x000000007FFFFFFFULL)|| 155 (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); 156} 157namespace { 158 159/// X86Operand - Instances of this class represent a parsed X86 machine 160/// instruction. 161struct X86Operand : public MCParsedAsmOperand { 162 enum KindTy { 163 Token, 164 Register, 165 Immediate, 166 Memory 167 } Kind; 168 169 SMLoc StartLoc, EndLoc; 170 SMLoc OffsetOfLoc; 171 172 union { 173 struct { 174 const char *Data; 175 unsigned Length; 176 } Tok; 177 178 struct { 179 unsigned RegNo; 180 } Reg; 181 182 struct { 183 const MCExpr *Val; 184 bool NeedAsmRewrite; 185 } Imm; 186 187 struct { 188 unsigned SegReg; 189 const MCExpr *Disp; 190 unsigned BaseReg; 191 unsigned IndexReg; 192 unsigned Scale; 193 unsigned Size; 194 bool NeedSizeDir; 195 } Mem; 196 }; 197 198 X86Operand(KindTy K, SMLoc Start, SMLoc End) 199 : Kind(K), StartLoc(Start), EndLoc(End) {} 200 201 /// getStartLoc - Get the location of the first token of this operand. 202 SMLoc getStartLoc() const { return StartLoc; } 203 /// getEndLoc - Get the location of the last token of this operand. 204 SMLoc getEndLoc() const { return EndLoc; } 205 /// getLocRange - Get the range between the first and last token of this 206 /// operand. 207 SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } 208 /// getOffsetOfLoc - Get the location of the offset operator. 209 SMLoc getOffsetOfLoc() const { return OffsetOfLoc; } 210 211 virtual void print(raw_ostream &OS) const {} 212 213 StringRef getToken() const { 214 assert(Kind == Token && "Invalid access!"); 215 return StringRef(Tok.Data, Tok.Length); 216 } 217 void setTokenValue(StringRef Value) { 218 assert(Kind == Token && "Invalid access!"); 219 Tok.Data = Value.data(); 220 Tok.Length = Value.size(); 221 } 222 223 unsigned getReg() const { 224 assert(Kind == Register && "Invalid access!"); 225 return Reg.RegNo; 226 } 227 228 const MCExpr *getImm() const { 229 assert(Kind == Immediate && "Invalid access!"); 230 return Imm.Val; 231 } 232 233 bool needAsmRewrite() const { 234 assert(Kind == Immediate && "Invalid access!"); 235 return Imm.NeedAsmRewrite; 236 } 237 238 const MCExpr *getMemDisp() const { 239 assert(Kind == Memory && "Invalid access!"); 240 return Mem.Disp; 241 } 242 unsigned getMemSegReg() const { 243 assert(Kind == Memory && "Invalid access!"); 244 return Mem.SegReg; 245 } 246 unsigned getMemBaseReg() const { 247 assert(Kind == Memory && "Invalid access!"); 248 return Mem.BaseReg; 249 } 250 unsigned getMemIndexReg() const { 251 assert(Kind == Memory && "Invalid access!"); 252 return Mem.IndexReg; 253 } 254 unsigned getMemScale() const { 255 assert(Kind == Memory && "Invalid access!"); 256 return Mem.Scale; 257 } 258 259 bool isToken() const {return Kind == Token; } 260 261 bool isImm() const { return Kind == Immediate; } 262 263 bool isImmSExti16i8() const { 264 if (!isImm()) 265 return false; 266 267 // If this isn't a constant expr, just assume it fits and let relaxation 268 // handle it. 269 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 270 if (!CE) 271 return true; 272 273 // Otherwise, check the value is in a range that makes sense for this 274 // extension. 275 return isImmSExti16i8Value(CE->getValue()); 276 } 277 bool isImmSExti32i8() const { 278 if (!isImm()) 279 return false; 280 281 // If this isn't a constant expr, just assume it fits and let relaxation 282 // handle it. 283 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 284 if (!CE) 285 return true; 286 287 // Otherwise, check the value is in a range that makes sense for this 288 // extension. 289 return isImmSExti32i8Value(CE->getValue()); 290 } 291 bool isImmZExtu32u8() const { 292 if (!isImm()) 293 return false; 294 295 // If this isn't a constant expr, just assume it fits and let relaxation 296 // handle it. 297 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 298 if (!CE) 299 return true; 300 301 // Otherwise, check the value is in a range that makes sense for this 302 // extension. 303 return isImmZExtu32u8Value(CE->getValue()); 304 } 305 bool isImmSExti64i8() const { 306 if (!isImm()) 307 return false; 308 309 // If this isn't a constant expr, just assume it fits and let relaxation 310 // handle it. 311 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 312 if (!CE) 313 return true; 314 315 // Otherwise, check the value is in a range that makes sense for this 316 // extension. 317 return isImmSExti64i8Value(CE->getValue()); 318 } 319 bool isImmSExti64i32() const { 320 if (!isImm()) 321 return false; 322 323 // If this isn't a constant expr, just assume it fits and let relaxation 324 // handle it. 325 const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); 326 if (!CE) 327 return true; 328 329 // Otherwise, check the value is in a range that makes sense for this 330 // extension. 331 return isImmSExti64i32Value(CE->getValue()); 332 } 333 334 unsigned getMemSize() const { 335 assert(Kind == Memory && "Invalid access!"); 336 return Mem.Size; 337 } 338 339 bool isOffsetOf() const { 340 return OffsetOfLoc.getPointer(); 341 } 342 343 bool needSizeDirective() const { 344 assert(Kind == Memory && "Invalid access!"); 345 return Mem.NeedSizeDir; 346 } 347 348 bool isMem() const { return Kind == Memory; } 349 bool isMem8() const { 350 return Kind == Memory && (!Mem.Size || Mem.Size == 8); 351 } 352 bool isMem16() const { 353 return Kind == Memory && (!Mem.Size || Mem.Size == 16); 354 } 355 bool isMem32() const { 356 return Kind == Memory && (!Mem.Size || Mem.Size == 32); 357 } 358 bool isMem64() const { 359 return Kind == Memory && (!Mem.Size || Mem.Size == 64); 360 } 361 bool isMem80() const { 362 return Kind == Memory && (!Mem.Size || Mem.Size == 80); 363 } 364 bool isMem128() const { 365 return Kind == Memory && (!Mem.Size || Mem.Size == 128); 366 } 367 bool isMem256() const { 368 return Kind == Memory && (!Mem.Size || Mem.Size == 256); 369 } 370 371 bool isMemVX32() const { 372 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 373 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; 374 } 375 bool isMemVY32() const { 376 return Kind == Memory && (!Mem.Size || Mem.Size == 32) && 377 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; 378 } 379 bool isMemVX64() const { 380 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 381 getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; 382 } 383 bool isMemVY64() const { 384 return Kind == Memory && (!Mem.Size || Mem.Size == 64) && 385 getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; 386 } 387 388 bool isAbsMem() const { 389 return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && 390 !getMemIndexReg() && getMemScale() == 1; 391 } 392 393 bool isReg() const { return Kind == Register; } 394 395 void addExpr(MCInst &Inst, const MCExpr *Expr) const { 396 // Add as immediates when possible. 397 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) 398 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 399 else 400 Inst.addOperand(MCOperand::CreateExpr(Expr)); 401 } 402 403 void addRegOperands(MCInst &Inst, unsigned N) const { 404 assert(N == 1 && "Invalid number of operands!"); 405 Inst.addOperand(MCOperand::CreateReg(getReg())); 406 } 407 408 void addImmOperands(MCInst &Inst, unsigned N) const { 409 assert(N == 1 && "Invalid number of operands!"); 410 addExpr(Inst, getImm()); 411 } 412 413 void addMem8Operands(MCInst &Inst, unsigned N) const { 414 addMemOperands(Inst, N); 415 } 416 void addMem16Operands(MCInst &Inst, unsigned N) const { 417 addMemOperands(Inst, N); 418 } 419 void addMem32Operands(MCInst &Inst, unsigned N) const { 420 addMemOperands(Inst, N); 421 } 422 void addMem64Operands(MCInst &Inst, unsigned N) const { 423 addMemOperands(Inst, N); 424 } 425 void addMem80Operands(MCInst &Inst, unsigned N) const { 426 addMemOperands(Inst, N); 427 } 428 void addMem128Operands(MCInst &Inst, unsigned N) const { 429 addMemOperands(Inst, N); 430 } 431 void addMem256Operands(MCInst &Inst, unsigned N) const { 432 addMemOperands(Inst, N); 433 } 434 void addMemVX32Operands(MCInst &Inst, unsigned N) const { 435 addMemOperands(Inst, N); 436 } 437 void addMemVY32Operands(MCInst &Inst, unsigned N) const { 438 addMemOperands(Inst, N); 439 } 440 void addMemVX64Operands(MCInst &Inst, unsigned N) const { 441 addMemOperands(Inst, N); 442 } 443 void addMemVY64Operands(MCInst &Inst, unsigned N) const { 444 addMemOperands(Inst, N); 445 } 446 447 void addMemOperands(MCInst &Inst, unsigned N) const { 448 assert((N == 5) && "Invalid number of operands!"); 449 Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); 450 Inst.addOperand(MCOperand::CreateImm(getMemScale())); 451 Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); 452 addExpr(Inst, getMemDisp()); 453 Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); 454 } 455 456 void addAbsMemOperands(MCInst &Inst, unsigned N) const { 457 assert((N == 1) && "Invalid number of operands!"); 458 // Add as immediates when possible. 459 if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp())) 460 Inst.addOperand(MCOperand::CreateImm(CE->getValue())); 461 else 462 Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); 463 } 464 465 static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { 466 SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1); 467 X86Operand *Res = new X86Operand(Token, Loc, EndLoc); 468 Res->Tok.Data = Str.data(); 469 Res->Tok.Length = Str.size(); 470 return Res; 471 } 472 473 static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, 474 SMLoc OffsetOfLoc = SMLoc()) { 475 X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); 476 Res->Reg.RegNo = RegNo; 477 Res->OffsetOfLoc = OffsetOfLoc; 478 return Res; 479 } 480 481 static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, 482 bool NeedRewrite = true){ 483 X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); 484 Res->Imm.Val = Val; 485 Res->Imm.NeedAsmRewrite = NeedRewrite; 486 return Res; 487 } 488 489 /// Create an absolute memory operand. 490 static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, 491 unsigned Size = 0, bool NeedSizeDir = false){ 492 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 493 Res->Mem.SegReg = 0; 494 Res->Mem.Disp = Disp; 495 Res->Mem.BaseReg = 0; 496 Res->Mem.IndexReg = 0; 497 Res->Mem.Scale = 1; 498 Res->Mem.Size = Size; 499 Res->Mem.NeedSizeDir = NeedSizeDir; 500 return Res; 501 } 502 503 /// Create a generalized memory operand. 504 static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, 505 unsigned BaseReg, unsigned IndexReg, 506 unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, 507 unsigned Size = 0, bool NeedSizeDir = false) { 508 // We should never just have a displacement, that should be parsed as an 509 // absolute memory operand. 510 assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); 511 512 // The scale should always be one of {1,2,4,8}. 513 assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && 514 "Invalid scale!"); 515 X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); 516 Res->Mem.SegReg = SegReg; 517 Res->Mem.Disp = Disp; 518 Res->Mem.BaseReg = BaseReg; 519 Res->Mem.IndexReg = IndexReg; 520 Res->Mem.Scale = Scale; 521 Res->Mem.Size = Size; 522 Res->Mem.NeedSizeDir = NeedSizeDir; 523 return Res; 524 } 525}; 526 527} // end anonymous namespace. 528 529bool X86AsmParser::isSrcOp(X86Operand &Op) { 530 unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI; 531 532 return (Op.isMem() && 533 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && 534 isa<MCConstantExpr>(Op.Mem.Disp) && 535 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 536 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); 537} 538 539bool X86AsmParser::isDstOp(X86Operand &Op) { 540 unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; 541 542 return Op.isMem() && 543 (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && 544 isa<MCConstantExpr>(Op.Mem.Disp) && 545 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 546 Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; 547} 548 549bool X86AsmParser::ParseRegister(unsigned &RegNo, 550 SMLoc &StartLoc, SMLoc &EndLoc) { 551 RegNo = 0; 552 const AsmToken &PercentTok = Parser.getTok(); 553 StartLoc = PercentTok.getLoc(); 554 555 // If we encounter a %, ignore it. This code handles registers with and 556 // without the prefix, unprefixed registers can occur in cfi directives. 557 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 558 Parser.Lex(); // Eat percent token. 559 560 const AsmToken &Tok = Parser.getTok(); 561 if (Tok.isNot(AsmToken::Identifier)) { 562 if (isParsingIntelSyntax()) return true; 563 return Error(StartLoc, "invalid register name", 564 SMRange(StartLoc, Tok.getEndLoc())); 565 } 566 567 RegNo = MatchRegisterName(Tok.getString()); 568 569 // If the match failed, try the register name as lowercase. 570 if (RegNo == 0) 571 RegNo = MatchRegisterName(Tok.getString().lower()); 572 573 if (!is64BitMode()) { 574 // FIXME: This should be done using Requires<In32BitMode> and 575 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 576 // checked. 577 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 578 // REX prefix. 579 if (RegNo == X86::RIZ || 580 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 581 X86II::isX86_64NonExtLowByteReg(RegNo) || 582 X86II::isX86_64ExtendedReg(RegNo)) 583 return Error(StartLoc, "register %" 584 + Tok.getString() + " is only available in 64-bit mode", 585 SMRange(StartLoc, Tok.getEndLoc())); 586 } 587 588 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 589 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { 590 RegNo = X86::ST0; 591 EndLoc = Tok.getLoc(); 592 Parser.Lex(); // Eat 'st' 593 594 // Check to see if we have '(4)' after %st. 595 if (getLexer().isNot(AsmToken::LParen)) 596 return false; 597 // Lex the paren. 598 getParser().Lex(); 599 600 const AsmToken &IntTok = Parser.getTok(); 601 if (IntTok.isNot(AsmToken::Integer)) 602 return Error(IntTok.getLoc(), "expected stack index"); 603 switch (IntTok.getIntVal()) { 604 case 0: RegNo = X86::ST0; break; 605 case 1: RegNo = X86::ST1; break; 606 case 2: RegNo = X86::ST2; break; 607 case 3: RegNo = X86::ST3; break; 608 case 4: RegNo = X86::ST4; break; 609 case 5: RegNo = X86::ST5; break; 610 case 6: RegNo = X86::ST6; break; 611 case 7: RegNo = X86::ST7; break; 612 default: return Error(IntTok.getLoc(), "invalid stack index"); 613 } 614 615 if (getParser().Lex().isNot(AsmToken::RParen)) 616 return Error(Parser.getTok().getLoc(), "expected ')'"); 617 618 EndLoc = Tok.getLoc(); 619 Parser.Lex(); // Eat ')' 620 return false; 621 } 622 623 // If this is "db[0-7]", match it as an alias 624 // for dr[0-7]. 625 if (RegNo == 0 && Tok.getString().size() == 3 && 626 Tok.getString().startswith("db")) { 627 switch (Tok.getString()[2]) { 628 case '0': RegNo = X86::DR0; break; 629 case '1': RegNo = X86::DR1; break; 630 case '2': RegNo = X86::DR2; break; 631 case '3': RegNo = X86::DR3; break; 632 case '4': RegNo = X86::DR4; break; 633 case '5': RegNo = X86::DR5; break; 634 case '6': RegNo = X86::DR6; break; 635 case '7': RegNo = X86::DR7; break; 636 } 637 638 if (RegNo != 0) { 639 EndLoc = Tok.getLoc(); 640 Parser.Lex(); // Eat it. 641 return false; 642 } 643 } 644 645 if (RegNo == 0) { 646 if (isParsingIntelSyntax()) return true; 647 return Error(StartLoc, "invalid register name", 648 SMRange(StartLoc, Tok.getEndLoc())); 649 } 650 651 EndLoc = Tok.getEndLoc(); 652 Parser.Lex(); // Eat identifier token. 653 return false; 654} 655 656X86Operand *X86AsmParser::ParseOperand() { 657 if (isParsingIntelSyntax()) 658 return ParseIntelOperand(); 659 return ParseATTOperand(); 660} 661 662/// getIntelMemOperandSize - Return intel memory operand size. 663static unsigned getIntelMemOperandSize(StringRef OpStr) { 664 unsigned Size = StringSwitch<unsigned>(OpStr) 665 .Cases("BYTE", "byte", 8) 666 .Cases("WORD", "word", 16) 667 .Cases("DWORD", "dword", 32) 668 .Cases("QWORD", "qword", 64) 669 .Cases("XWORD", "xword", 80) 670 .Cases("XMMWORD", "xmmword", 128) 671 .Cases("YMMWORD", "ymmword", 256) 672 .Default(0); 673 return Size; 674} 675 676X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, 677 unsigned Size) { 678 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 679 const AsmToken &Tok = Parser.getTok(); 680 SMLoc Start = Tok.getLoc(), End; 681 682 const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); 683 // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] 684 685 // Eat '[' 686 if (getLexer().isNot(AsmToken::LBrac)) 687 return ErrorOperand(Start, "Expected '[' token!"); 688 Parser.Lex(); 689 690 if (getLexer().is(AsmToken::Identifier)) { 691 // Parse BaseReg 692 if (ParseRegister(BaseReg, Start, End)) { 693 // Handle '[' 'symbol' ']' 694 if (getParser().ParseExpression(Disp, End)) return 0; 695 if (getLexer().isNot(AsmToken::RBrac)) 696 return ErrorOperand(Start, "Expected ']' token!"); 697 Parser.Lex(); 698 End = Tok.getLoc(); 699 return X86Operand::CreateMem(Disp, Start, End, Size); 700 } 701 } else if (getLexer().is(AsmToken::Integer)) { 702 int64_t Val = Tok.getIntVal(); 703 Parser.Lex(); 704 SMLoc Loc = Tok.getLoc(); 705 if (getLexer().is(AsmToken::RBrac)) { 706 // Handle '[' number ']' 707 Parser.Lex(); 708 End = Tok.getLoc(); 709 const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); 710 if (SegReg) 711 return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, 712 Start, End, Size); 713 return X86Operand::CreateMem(Disp, Start, End, Size); 714 } else if (getLexer().is(AsmToken::Star)) { 715 // Handle '[' Scale*IndexReg ']' 716 Parser.Lex(); 717 SMLoc IdxRegLoc = Tok.getLoc(); 718 if (ParseRegister(IndexReg, IdxRegLoc, End)) 719 return ErrorOperand(IdxRegLoc, "Expected register"); 720 Scale = Val; 721 } else 722 return ErrorOperand(Loc, "Unexpected token"); 723 } 724 725 // Parse ][ as a plus. 726 bool ExpectRBrac = true; 727 if (getLexer().is(AsmToken::RBrac)) { 728 ExpectRBrac = false; 729 Parser.Lex(); 730 End = Tok.getLoc(); 731 } 732 733 if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || 734 getLexer().is(AsmToken::LBrac)) { 735 ExpectRBrac = true; 736 bool isPlus = getLexer().is(AsmToken::Plus) || 737 getLexer().is(AsmToken::LBrac); 738 Parser.Lex(); 739 SMLoc PlusLoc = Tok.getLoc(); 740 if (getLexer().is(AsmToken::Integer)) { 741 int64_t Val = Tok.getIntVal(); 742 Parser.Lex(); 743 if (getLexer().is(AsmToken::Star)) { 744 Parser.Lex(); 745 SMLoc IdxRegLoc = Tok.getLoc(); 746 if (ParseRegister(IndexReg, IdxRegLoc, End)) 747 return ErrorOperand(IdxRegLoc, "Expected register"); 748 Scale = Val; 749 } else if (getLexer().is(AsmToken::RBrac)) { 750 const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); 751 Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext()); 752 } else 753 return ErrorOperand(PlusLoc, "unexpected token after +"); 754 } else if (getLexer().is(AsmToken::Identifier)) { 755 // This could be an index register or a displacement expression. 756 End = Tok.getLoc(); 757 if (!IndexReg) 758 ParseRegister(IndexReg, Start, End); 759 else if (getParser().ParseExpression(Disp, End)) return 0; 760 } 761 } 762 763 // Parse ][ as a plus. 764 if (getLexer().is(AsmToken::RBrac)) { 765 ExpectRBrac = false; 766 Parser.Lex(); 767 End = Tok.getLoc(); 768 if (getLexer().is(AsmToken::LBrac)) { 769 ExpectRBrac = true; 770 Parser.Lex(); 771 if (getParser().ParseExpression(Disp, End)) 772 return 0; 773 } 774 } else if (ExpectRBrac) { 775 if (getParser().ParseExpression(Disp, End)) 776 return 0; 777 } 778 779 if (ExpectRBrac) { 780 if (getLexer().isNot(AsmToken::RBrac)) 781 return ErrorOperand(End, "expected ']' token!"); 782 Parser.Lex(); 783 End = Tok.getLoc(); 784 } 785 786 // Parse the dot operator (e.g., [ebx].foo.bar). 787 if (Tok.getString().startswith(".")) { 788 SmallString<64> Err; 789 const MCExpr *NewDisp; 790 if (ParseIntelDotOperator(Disp, &NewDisp, Err)) 791 return ErrorOperand(Tok.getLoc(), Err); 792 793 Parser.Lex(); // Eat the field. 794 Disp = NewDisp; 795 } 796 797 End = Tok.getLoc(); 798 799 // handle [-42] 800 if (!BaseReg && !IndexReg) 801 return X86Operand::CreateMem(Disp, Start, End, Size); 802 803 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 804 Start, End, Size); 805} 806 807/// ParseIntelMemOperand - Parse intel style memory operand. 808X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { 809 const AsmToken &Tok = Parser.getTok(); 810 SMLoc End; 811 812 unsigned Size = getIntelMemOperandSize(Tok.getString()); 813 if (Size) { 814 Parser.Lex(); 815 assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && 816 "Unexpected token!"); 817 Parser.Lex(); 818 } 819 820 if (getLexer().is(AsmToken::LBrac)) 821 return ParseIntelBracExpression(SegReg, Size); 822 823 if (!ParseRegister(SegReg, Start, End)) { 824 // Handel SegReg : [ ... ] 825 if (getLexer().isNot(AsmToken::Colon)) 826 return ErrorOperand(Start, "Expected ':' token!"); 827 Parser.Lex(); // Eat : 828 if (getLexer().isNot(AsmToken::LBrac)) 829 return ErrorOperand(Start, "Expected '[' token!"); 830 return ParseIntelBracExpression(SegReg, Size); 831 } 832 833 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 834 if (getParser().ParseExpression(Disp, End)) return 0; 835 End = Parser.getTok().getLoc(); 836 837 bool NeedSizeDir = false; 838 if (!Size && isParsingInlineAsm()) { 839 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { 840 const MCSymbol &Sym = SymRef->getSymbol(); 841 // FIXME: The SemaLookup will fail if the name is anything other then an 842 // identifier. 843 // FIXME: Pass a valid SMLoc. 844 SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); 845 NeedSizeDir = Size > 0; 846 } 847 } 848 if (!isParsingInlineAsm()) 849 return X86Operand::CreateMem(Disp, Start, End, Size); 850 else 851 // When parsing inline assembly we set the base register to a non-zero value 852 // as we don't know the actual value at this time. This is necessary to 853 // get the matching correct in some cases. 854 return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, 855 /*Scale*/1, Start, End, Size, NeedSizeDir); 856} 857 858/// Parse the '.' operator. 859bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, 860 const MCExpr **NewDisp, 861 SmallString<64> &Err) { 862 AsmToken Tok = *&Parser.getTok(); 863 uint64_t OrigDispVal, DotDispVal; 864 865 // FIXME: Handle non-constant expressions. 866 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { 867 OrigDispVal = OrigDisp->getValue(); 868 } else { 869 Err = "Non-constant offsets are not supported!"; 870 return true; 871 } 872 873 // Drop the '.'. 874 StringRef DotDispStr = Tok.getString().drop_front(1); 875 876 // .Imm gets lexed as a real. 877 if (Tok.is(AsmToken::Real)) { 878 APInt DotDisp; 879 DotDispStr.getAsInteger(10, DotDisp); 880 DotDispVal = DotDisp.getZExtValue(); 881 } else if (Tok.is(AsmToken::Identifier)) { 882 // We should only see an identifier when parsing the original inline asm. 883 // The front-end should rewrite this in terms of immediates. 884 assert (isParsingInlineAsm() && "Unexpected field name!"); 885 886 unsigned DotDisp; 887 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 888 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 889 DotDisp)) { 890 Err = "Unable to lookup field reference!"; 891 return true; 892 } 893 DotDispVal = DotDisp; 894 } else { 895 Err = "Unexpected token type!"; 896 return true; 897 } 898 899 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 900 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); 901 unsigned Len = DotDispStr.size(); 902 unsigned Val = OrigDispVal + DotDispVal; 903 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len, 904 Val)); 905 } 906 907 *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); 908 return false; 909} 910 911/// Parse the 'offset' operator. This operator is used to specify the 912/// location rather then the content of a variable. 913X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { 914 SMLoc OffsetOfLoc = Start; 915 Parser.Lex(); // Eat offset. 916 Start = Parser.getTok().getLoc(); 917 assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); 918 919 SMLoc End; 920 const MCExpr *Val; 921 if (getParser().ParseExpression(Val, End)) 922 return ErrorOperand(Start, "Unable to parse expression!"); 923 924 End = Parser.getTok().getLoc(); 925 926 // Don't emit the offset operator. 927 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); 928 929 // The offset operator will have an 'r' constraint, thus we need to create 930 // register operand to ensure proper matching. Just pick a GPR based on 931 // the size of a pointer. 932 unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; 933 return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); 934} 935 936/// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or 937/// C++ type or variable. If the variable is an array, TYPE returns the size of 938/// a single element of the array. 939X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { 940 SMLoc TypeLoc = Start; 941 Parser.Lex(); // Eat offset. 942 Start = Parser.getTok().getLoc(); 943 assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); 944 945 SMLoc End; 946 const MCExpr *Val; 947 if (getParser().ParseExpression(Val, End)) 948 return 0; 949 950 End = Parser.getTok().getLoc(); 951 952 unsigned Size = 0; 953 if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { 954 const MCSymbol &Sym = SymRef->getSymbol(); 955 // FIXME: The SemaLookup will fail if the name is anything other then an 956 // identifier. 957 // FIXME: Pass a valid SMLoc. 958 if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) 959 return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); 960 961 Size /= 8; // Size is in terms of bits, but we want bytes in the context. 962 } 963 964 // Rewrite the type operator and the C or C++ type or variable in terms of an 965 // immediate. E.g. TYPE foo -> $$4 966 unsigned Len = End.getPointer() - TypeLoc.getPointer(); 967 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size)); 968 969 const MCExpr *Imm = MCConstantExpr::Create(Size, getContext()); 970 return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); 971} 972 973X86Operand *X86AsmParser::ParseIntelOperand() { 974 SMLoc Start = Parser.getTok().getLoc(), End; 975 976 // offset operator. 977 StringRef AsmTokStr = Parser.getTok().getString(); 978 if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") && 979 isParsingInlineAsm()) 980 return ParseIntelOffsetOfOperator(Start); 981 982 // Type directive. 983 if ((AsmTokStr == "type" || AsmTokStr == "TYPE") && 984 isParsingInlineAsm()) 985 return ParseIntelTypeOperator(Start); 986 987 // Unsupported directives. 988 if (isParsingIntelSyntax() && 989 (AsmTokStr == "size" || AsmTokStr == "SIZE" || 990 AsmTokStr == "length" || AsmTokStr == "LENGTH")) 991 return ErrorOperand(Start, "Unsupported directive!"); 992 993 // immediate. 994 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || 995 getLexer().is(AsmToken::Minus)) { 996 const MCExpr *Val; 997 if (!getParser().ParseExpression(Val, End)) { 998 End = Parser.getTok().getLoc(); 999 return X86Operand::CreateImm(Val, Start, End); 1000 } 1001 } 1002 1003 // register 1004 unsigned RegNo = 0; 1005 if (!ParseRegister(RegNo, Start, End)) { 1006 // If this is a segment register followed by a ':', then this is the start 1007 // of a memory reference, otherwise this is a normal register reference. 1008 if (getLexer().isNot(AsmToken::Colon)) 1009 return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc()); 1010 1011 getParser().Lex(); // Eat the colon. 1012 return ParseIntelMemOperand(RegNo, Start); 1013 } 1014 1015 // mem operand 1016 return ParseIntelMemOperand(0, Start); 1017} 1018 1019X86Operand *X86AsmParser::ParseATTOperand() { 1020 switch (getLexer().getKind()) { 1021 default: 1022 // Parse a memory operand with no segment register. 1023 return ParseMemOperand(0, Parser.getTok().getLoc()); 1024 case AsmToken::Percent: { 1025 // Read the register. 1026 unsigned RegNo; 1027 SMLoc Start, End; 1028 if (ParseRegister(RegNo, Start, End)) return 0; 1029 if (RegNo == X86::EIZ || RegNo == X86::RIZ) { 1030 Error(Start, "%eiz and %riz can only be used as index registers", 1031 SMRange(Start, End)); 1032 return 0; 1033 } 1034 1035 // If this is a segment register followed by a ':', then this is the start 1036 // of a memory reference, otherwise this is a normal register reference. 1037 if (getLexer().isNot(AsmToken::Colon)) 1038 return X86Operand::CreateReg(RegNo, Start, End); 1039 1040 1041 getParser().Lex(); // Eat the colon. 1042 return ParseMemOperand(RegNo, Start); 1043 } 1044 case AsmToken::Dollar: { 1045 // $42 -> immediate. 1046 SMLoc Start = Parser.getTok().getLoc(), End; 1047 Parser.Lex(); 1048 const MCExpr *Val; 1049 if (getParser().ParseExpression(Val, End)) 1050 return 0; 1051 return X86Operand::CreateImm(Val, Start, End); 1052 } 1053 } 1054} 1055 1056/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix 1057/// has already been parsed if present. 1058X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { 1059 1060 // We have to disambiguate a parenthesized expression "(4+5)" from the start 1061 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The 1062 // only way to do this without lookahead is to eat the '(' and see what is 1063 // after it. 1064 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); 1065 if (getLexer().isNot(AsmToken::LParen)) { 1066 SMLoc ExprEnd; 1067 if (getParser().ParseExpression(Disp, ExprEnd)) return 0; 1068 1069 // After parsing the base expression we could either have a parenthesized 1070 // memory address or not. If not, return now. If so, eat the (. 1071 if (getLexer().isNot(AsmToken::LParen)) { 1072 // Unless we have a segment register, treat this as an immediate. 1073 if (SegReg == 0) 1074 return X86Operand::CreateMem(Disp, MemStart, ExprEnd); 1075 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1076 } 1077 1078 // Eat the '('. 1079 Parser.Lex(); 1080 } else { 1081 // Okay, we have a '('. We don't know if this is an expression or not, but 1082 // so we have to eat the ( to see beyond it. 1083 SMLoc LParenLoc = Parser.getTok().getLoc(); 1084 Parser.Lex(); // Eat the '('. 1085 1086 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { 1087 // Nothing to do here, fall into the code below with the '(' part of the 1088 // memory operand consumed. 1089 } else { 1090 SMLoc ExprEnd; 1091 1092 // It must be an parenthesized expression, parse it now. 1093 if (getParser().ParseParenExpression(Disp, ExprEnd)) 1094 return 0; 1095 1096 // After parsing the base expression we could either have a parenthesized 1097 // memory address or not. If not, return now. If so, eat the (. 1098 if (getLexer().isNot(AsmToken::LParen)) { 1099 // Unless we have a segment register, treat this as an immediate. 1100 if (SegReg == 0) 1101 return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); 1102 return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); 1103 } 1104 1105 // Eat the '('. 1106 Parser.Lex(); 1107 } 1108 } 1109 1110 // If we reached here, then we just ate the ( of the memory operand. Process 1111 // the rest of the memory operand. 1112 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 1113 SMLoc IndexLoc; 1114 1115 if (getLexer().is(AsmToken::Percent)) { 1116 SMLoc StartLoc, EndLoc; 1117 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; 1118 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { 1119 Error(StartLoc, "eiz and riz can only be used as index registers", 1120 SMRange(StartLoc, EndLoc)); 1121 return 0; 1122 } 1123 } 1124 1125 if (getLexer().is(AsmToken::Comma)) { 1126 Parser.Lex(); // Eat the comma. 1127 IndexLoc = Parser.getTok().getLoc(); 1128 1129 // Following the comma we should have either an index register, or a scale 1130 // value. We don't support the later form, but we want to parse it 1131 // correctly. 1132 // 1133 // Not that even though it would be completely consistent to support syntax 1134 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 1135 if (getLexer().is(AsmToken::Percent)) { 1136 SMLoc L; 1137 if (ParseRegister(IndexReg, L, L)) return 0; 1138 1139 if (getLexer().isNot(AsmToken::RParen)) { 1140 // Parse the scale amount: 1141 // ::= ',' [scale-expression] 1142 if (getLexer().isNot(AsmToken::Comma)) { 1143 Error(Parser.getTok().getLoc(), 1144 "expected comma in scale expression"); 1145 return 0; 1146 } 1147 Parser.Lex(); // Eat the comma. 1148 1149 if (getLexer().isNot(AsmToken::RParen)) { 1150 SMLoc Loc = Parser.getTok().getLoc(); 1151 1152 int64_t ScaleVal; 1153 if (getParser().ParseAbsoluteExpression(ScaleVal)){ 1154 Error(Loc, "expected scale expression"); 1155 return 0; 1156 } 1157 1158 // Validate the scale amount. 1159 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ 1160 Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); 1161 return 0; 1162 } 1163 Scale = (unsigned)ScaleVal; 1164 } 1165 } 1166 } else if (getLexer().isNot(AsmToken::RParen)) { 1167 // A scale amount without an index is ignored. 1168 // index. 1169 SMLoc Loc = Parser.getTok().getLoc(); 1170 1171 int64_t Value; 1172 if (getParser().ParseAbsoluteExpression(Value)) 1173 return 0; 1174 1175 if (Value != 1) 1176 Warning(Loc, "scale factor without index register is ignored"); 1177 Scale = 1; 1178 } 1179 } 1180 1181 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 1182 if (getLexer().isNot(AsmToken::RParen)) { 1183 Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); 1184 return 0; 1185 } 1186 SMLoc MemEnd = Parser.getTok().getLoc(); 1187 Parser.Lex(); // Eat the ')'. 1188 1189 // If we have both a base register and an index register make sure they are 1190 // both 64-bit or 32-bit registers. 1191 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1192 if (BaseReg != 0 && IndexReg != 0) { 1193 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1194 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1195 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && 1196 IndexReg != X86::RIZ) { 1197 Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); 1198 return 0; 1199 } 1200 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1201 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1202 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && 1203 IndexReg != X86::EIZ){ 1204 Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); 1205 return 0; 1206 } 1207 } 1208 1209 return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, 1210 MemStart, MemEnd); 1211} 1212 1213bool X86AsmParser:: 1214ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, 1215 SmallVectorImpl<MCParsedAsmOperand*> &Operands) { 1216 InstInfo = &Info; 1217 StringRef PatchedName = Name; 1218 1219 // FIXME: Hack to recognize setneb as setne. 1220 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 1221 PatchedName != "setb" && PatchedName != "setnb") 1222 PatchedName = PatchedName.substr(0, Name.size()-1); 1223 1224 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 1225 const MCExpr *ExtraImmOp = 0; 1226 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 1227 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 1228 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 1229 bool IsVCMP = PatchedName[0] == 'v'; 1230 unsigned SSECCIdx = IsVCMP ? 4 : 3; 1231 unsigned SSEComparisonCode = StringSwitch<unsigned>( 1232 PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) 1233 .Case("eq", 0x00) 1234 .Case("lt", 0x01) 1235 .Case("le", 0x02) 1236 .Case("unord", 0x03) 1237 .Case("neq", 0x04) 1238 .Case("nlt", 0x05) 1239 .Case("nle", 0x06) 1240 .Case("ord", 0x07) 1241 /* AVX only from here */ 1242 .Case("eq_uq", 0x08) 1243 .Case("nge", 0x09) 1244 .Case("ngt", 0x0A) 1245 .Case("false", 0x0B) 1246 .Case("neq_oq", 0x0C) 1247 .Case("ge", 0x0D) 1248 .Case("gt", 0x0E) 1249 .Case("true", 0x0F) 1250 .Case("eq_os", 0x10) 1251 .Case("lt_oq", 0x11) 1252 .Case("le_oq", 0x12) 1253 .Case("unord_s", 0x13) 1254 .Case("neq_us", 0x14) 1255 .Case("nlt_uq", 0x15) 1256 .Case("nle_uq", 0x16) 1257 .Case("ord_s", 0x17) 1258 .Case("eq_us", 0x18) 1259 .Case("nge_uq", 0x19) 1260 .Case("ngt_uq", 0x1A) 1261 .Case("false_os", 0x1B) 1262 .Case("neq_os", 0x1C) 1263 .Case("ge_oq", 0x1D) 1264 .Case("gt_oq", 0x1E) 1265 .Case("true_us", 0x1F) 1266 .Default(~0U); 1267 if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { 1268 ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, 1269 getParser().getContext()); 1270 if (PatchedName.endswith("ss")) { 1271 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 1272 } else if (PatchedName.endswith("sd")) { 1273 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 1274 } else if (PatchedName.endswith("ps")) { 1275 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 1276 } else { 1277 assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); 1278 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 1279 } 1280 } 1281 } 1282 1283 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 1284 1285 if (ExtraImmOp && !isParsingIntelSyntax()) 1286 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1287 1288 // Determine whether this is an instruction prefix. 1289 bool isPrefix = 1290 Name == "lock" || Name == "rep" || 1291 Name == "repe" || Name == "repz" || 1292 Name == "repne" || Name == "repnz" || 1293 Name == "rex64" || Name == "data16"; 1294 1295 1296 // This does the actual operand parsing. Don't parse any more if we have a 1297 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 1298 // just want to parse the "lock" as the first instruction and the "incl" as 1299 // the next one. 1300 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 1301 1302 // Parse '*' modifier. 1303 if (getLexer().is(AsmToken::Star)) { 1304 SMLoc Loc = Parser.getTok().getLoc(); 1305 Operands.push_back(X86Operand::CreateToken("*", Loc)); 1306 Parser.Lex(); // Eat the star. 1307 } 1308 1309 // Read the first operand. 1310 if (X86Operand *Op = ParseOperand()) 1311 Operands.push_back(Op); 1312 else { 1313 Parser.EatToEndOfStatement(); 1314 return true; 1315 } 1316 1317 while (getLexer().is(AsmToken::Comma)) { 1318 Parser.Lex(); // Eat the comma. 1319 1320 // Parse and remember the operand. 1321 if (X86Operand *Op = ParseOperand()) 1322 Operands.push_back(Op); 1323 else { 1324 Parser.EatToEndOfStatement(); 1325 return true; 1326 } 1327 } 1328 1329 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1330 SMLoc Loc = getLexer().getLoc(); 1331 Parser.EatToEndOfStatement(); 1332 return Error(Loc, "unexpected token in argument list"); 1333 } 1334 } 1335 1336 if (getLexer().is(AsmToken::EndOfStatement)) 1337 Parser.Lex(); // Consume the EndOfStatement 1338 else if (isPrefix && getLexer().is(AsmToken::Slash)) 1339 Parser.Lex(); // Consume the prefix separator Slash 1340 1341 if (ExtraImmOp && isParsingIntelSyntax()) 1342 Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); 1343 1344 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> 1345 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 1346 // documented form in various unofficial manuals, so a lot of code uses it. 1347 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && 1348 Operands.size() == 3) { 1349 X86Operand &Op = *(X86Operand*)Operands.back(); 1350 if (Op.isMem() && Op.Mem.SegReg == 0 && 1351 isa<MCConstantExpr>(Op.Mem.Disp) && 1352 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1353 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1354 SMLoc Loc = Op.getEndLoc(); 1355 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1356 delete &Op; 1357 } 1358 } 1359 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". 1360 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && 1361 Operands.size() == 3) { 1362 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1363 if (Op.isMem() && Op.Mem.SegReg == 0 && 1364 isa<MCConstantExpr>(Op.Mem.Disp) && 1365 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && 1366 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { 1367 SMLoc Loc = Op.getEndLoc(); 1368 Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); 1369 delete &Op; 1370 } 1371 } 1372 // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" 1373 if (Name.startswith("ins") && Operands.size() == 3 && 1374 (Name == "insb" || Name == "insw" || Name == "insl")) { 1375 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1376 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1377 if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { 1378 Operands.pop_back(); 1379 Operands.pop_back(); 1380 delete &Op; 1381 delete &Op2; 1382 } 1383 } 1384 1385 // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" 1386 if (Name.startswith("outs") && Operands.size() == 3 && 1387 (Name == "outsb" || Name == "outsw" || Name == "outsl")) { 1388 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1389 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1390 if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { 1391 Operands.pop_back(); 1392 Operands.pop_back(); 1393 delete &Op; 1394 delete &Op2; 1395 } 1396 } 1397 1398 // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" 1399 if (Name.startswith("movs") && Operands.size() == 3 && 1400 (Name == "movsb" || Name == "movsw" || Name == "movsl" || 1401 (is64BitMode() && Name == "movsq"))) { 1402 X86Operand &Op = *(X86Operand*)Operands.begin()[1]; 1403 X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; 1404 if (isSrcOp(Op) && isDstOp(Op2)) { 1405 Operands.pop_back(); 1406 Operands.pop_back(); 1407 delete &Op; 1408 delete &Op2; 1409 } 1410 } 1411 // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" 1412 if (Name.startswith("lods") && Operands.size() == 3 && 1413 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 1414 Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) { 1415 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1416 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 1417 if (isSrcOp(*Op1) && Op2->isReg()) { 1418 const char *ins; 1419 unsigned reg = Op2->getReg(); 1420 bool isLods = Name == "lods"; 1421 if (reg == X86::AL && (isLods || Name == "lodsb")) 1422 ins = "lodsb"; 1423 else if (reg == X86::AX && (isLods || Name == "lodsw")) 1424 ins = "lodsw"; 1425 else if (reg == X86::EAX && (isLods || Name == "lodsl")) 1426 ins = "lodsl"; 1427 else if (reg == X86::RAX && (isLods || Name == "lodsq")) 1428 ins = "lodsq"; 1429 else 1430 ins = NULL; 1431 if (ins != NULL) { 1432 Operands.pop_back(); 1433 Operands.pop_back(); 1434 delete Op1; 1435 delete Op2; 1436 if (Name != ins) 1437 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 1438 } 1439 } 1440 } 1441 // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" 1442 if (Name.startswith("stos") && Operands.size() == 3 && 1443 (Name == "stos" || Name == "stosb" || Name == "stosw" || 1444 Name == "stosl" || (is64BitMode() && Name == "stosq"))) { 1445 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1446 X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]); 1447 if (isDstOp(*Op2) && Op1->isReg()) { 1448 const char *ins; 1449 unsigned reg = Op1->getReg(); 1450 bool isStos = Name == "stos"; 1451 if (reg == X86::AL && (isStos || Name == "stosb")) 1452 ins = "stosb"; 1453 else if (reg == X86::AX && (isStos || Name == "stosw")) 1454 ins = "stosw"; 1455 else if (reg == X86::EAX && (isStos || Name == "stosl")) 1456 ins = "stosl"; 1457 else if (reg == X86::RAX && (isStos || Name == "stosq")) 1458 ins = "stosq"; 1459 else 1460 ins = NULL; 1461 if (ins != NULL) { 1462 Operands.pop_back(); 1463 Operands.pop_back(); 1464 delete Op1; 1465 delete Op2; 1466 if (Name != ins) 1467 static_cast<X86Operand*>(Operands[0])->setTokenValue(ins); 1468 } 1469 } 1470 } 1471 1472 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 1473 // "shift <op>". 1474 if ((Name.startswith("shr") || Name.startswith("sar") || 1475 Name.startswith("shl") || Name.startswith("sal") || 1476 Name.startswith("rcl") || Name.startswith("rcr") || 1477 Name.startswith("rol") || Name.startswith("ror")) && 1478 Operands.size() == 3) { 1479 if (isParsingIntelSyntax()) { 1480 // Intel syntax 1481 X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); 1482 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 1483 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 1484 delete Operands[2]; 1485 Operands.pop_back(); 1486 } 1487 } else { 1488 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1489 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 1490 cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { 1491 delete Operands[1]; 1492 Operands.erase(Operands.begin() + 1); 1493 } 1494 } 1495 } 1496 1497 // Transforms "int $3" into "int3" as a size optimization. We can't write an 1498 // instalias with an immediate operand yet. 1499 if (Name == "int" && Operands.size() == 2) { 1500 X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); 1501 if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && 1502 cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { 1503 delete Operands[1]; 1504 Operands.erase(Operands.begin() + 1); 1505 static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); 1506 } 1507 } 1508 1509 return false; 1510} 1511 1512bool X86AsmParser:: 1513processInstruction(MCInst &Inst, 1514 const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { 1515 switch (Inst.getOpcode()) { 1516 default: return false; 1517 case X86::AND16i16: { 1518 if (!Inst.getOperand(0).isImm() || 1519 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1520 return false; 1521 1522 MCInst TmpInst; 1523 TmpInst.setOpcode(X86::AND16ri8); 1524 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1525 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1526 TmpInst.addOperand(Inst.getOperand(0)); 1527 Inst = TmpInst; 1528 return true; 1529 } 1530 case X86::AND32i32: { 1531 if (!Inst.getOperand(0).isImm() || 1532 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1533 return false; 1534 1535 MCInst TmpInst; 1536 TmpInst.setOpcode(X86::AND32ri8); 1537 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1538 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1539 TmpInst.addOperand(Inst.getOperand(0)); 1540 Inst = TmpInst; 1541 return true; 1542 } 1543 case X86::AND64i32: { 1544 if (!Inst.getOperand(0).isImm() || 1545 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1546 return false; 1547 1548 MCInst TmpInst; 1549 TmpInst.setOpcode(X86::AND64ri8); 1550 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1551 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1552 TmpInst.addOperand(Inst.getOperand(0)); 1553 Inst = TmpInst; 1554 return true; 1555 } 1556 case X86::XOR16i16: { 1557 if (!Inst.getOperand(0).isImm() || 1558 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1559 return false; 1560 1561 MCInst TmpInst; 1562 TmpInst.setOpcode(X86::XOR16ri8); 1563 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1564 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1565 TmpInst.addOperand(Inst.getOperand(0)); 1566 Inst = TmpInst; 1567 return true; 1568 } 1569 case X86::XOR32i32: { 1570 if (!Inst.getOperand(0).isImm() || 1571 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1572 return false; 1573 1574 MCInst TmpInst; 1575 TmpInst.setOpcode(X86::XOR32ri8); 1576 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1577 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1578 TmpInst.addOperand(Inst.getOperand(0)); 1579 Inst = TmpInst; 1580 return true; 1581 } 1582 case X86::XOR64i32: { 1583 if (!Inst.getOperand(0).isImm() || 1584 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1585 return false; 1586 1587 MCInst TmpInst; 1588 TmpInst.setOpcode(X86::XOR64ri8); 1589 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1590 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1591 TmpInst.addOperand(Inst.getOperand(0)); 1592 Inst = TmpInst; 1593 return true; 1594 } 1595 case X86::OR16i16: { 1596 if (!Inst.getOperand(0).isImm() || 1597 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1598 return false; 1599 1600 MCInst TmpInst; 1601 TmpInst.setOpcode(X86::OR16ri8); 1602 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1603 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1604 TmpInst.addOperand(Inst.getOperand(0)); 1605 Inst = TmpInst; 1606 return true; 1607 } 1608 case X86::OR32i32: { 1609 if (!Inst.getOperand(0).isImm() || 1610 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1611 return false; 1612 1613 MCInst TmpInst; 1614 TmpInst.setOpcode(X86::OR32ri8); 1615 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1616 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1617 TmpInst.addOperand(Inst.getOperand(0)); 1618 Inst = TmpInst; 1619 return true; 1620 } 1621 case X86::OR64i32: { 1622 if (!Inst.getOperand(0).isImm() || 1623 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1624 return false; 1625 1626 MCInst TmpInst; 1627 TmpInst.setOpcode(X86::OR64ri8); 1628 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1629 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1630 TmpInst.addOperand(Inst.getOperand(0)); 1631 Inst = TmpInst; 1632 return true; 1633 } 1634 case X86::CMP16i16: { 1635 if (!Inst.getOperand(0).isImm() || 1636 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1637 return false; 1638 1639 MCInst TmpInst; 1640 TmpInst.setOpcode(X86::CMP16ri8); 1641 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1642 TmpInst.addOperand(Inst.getOperand(0)); 1643 Inst = TmpInst; 1644 return true; 1645 } 1646 case X86::CMP32i32: { 1647 if (!Inst.getOperand(0).isImm() || 1648 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1649 return false; 1650 1651 MCInst TmpInst; 1652 TmpInst.setOpcode(X86::CMP32ri8); 1653 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1654 TmpInst.addOperand(Inst.getOperand(0)); 1655 Inst = TmpInst; 1656 return true; 1657 } 1658 case X86::CMP64i32: { 1659 if (!Inst.getOperand(0).isImm() || 1660 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1661 return false; 1662 1663 MCInst TmpInst; 1664 TmpInst.setOpcode(X86::CMP64ri8); 1665 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1666 TmpInst.addOperand(Inst.getOperand(0)); 1667 Inst = TmpInst; 1668 return true; 1669 } 1670 case X86::ADD16i16: { 1671 if (!Inst.getOperand(0).isImm() || 1672 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1673 return false; 1674 1675 MCInst TmpInst; 1676 TmpInst.setOpcode(X86::ADD16ri8); 1677 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1678 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1679 TmpInst.addOperand(Inst.getOperand(0)); 1680 Inst = TmpInst; 1681 return true; 1682 } 1683 case X86::ADD32i32: { 1684 if (!Inst.getOperand(0).isImm() || 1685 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1686 return false; 1687 1688 MCInst TmpInst; 1689 TmpInst.setOpcode(X86::ADD32ri8); 1690 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1691 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1692 TmpInst.addOperand(Inst.getOperand(0)); 1693 Inst = TmpInst; 1694 return true; 1695 } 1696 case X86::ADD64i32: { 1697 if (!Inst.getOperand(0).isImm() || 1698 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1699 return false; 1700 1701 MCInst TmpInst; 1702 TmpInst.setOpcode(X86::ADD64ri8); 1703 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1704 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1705 TmpInst.addOperand(Inst.getOperand(0)); 1706 Inst = TmpInst; 1707 return true; 1708 } 1709 case X86::SUB16i16: { 1710 if (!Inst.getOperand(0).isImm() || 1711 !isImmSExti16i8Value(Inst.getOperand(0).getImm())) 1712 return false; 1713 1714 MCInst TmpInst; 1715 TmpInst.setOpcode(X86::SUB16ri8); 1716 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1717 TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); 1718 TmpInst.addOperand(Inst.getOperand(0)); 1719 Inst = TmpInst; 1720 return true; 1721 } 1722 case X86::SUB32i32: { 1723 if (!Inst.getOperand(0).isImm() || 1724 !isImmSExti32i8Value(Inst.getOperand(0).getImm())) 1725 return false; 1726 1727 MCInst TmpInst; 1728 TmpInst.setOpcode(X86::SUB32ri8); 1729 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1730 TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); 1731 TmpInst.addOperand(Inst.getOperand(0)); 1732 Inst = TmpInst; 1733 return true; 1734 } 1735 case X86::SUB64i32: { 1736 if (!Inst.getOperand(0).isImm() || 1737 !isImmSExti64i8Value(Inst.getOperand(0).getImm())) 1738 return false; 1739 1740 MCInst TmpInst; 1741 TmpInst.setOpcode(X86::SUB64ri8); 1742 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1743 TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); 1744 TmpInst.addOperand(Inst.getOperand(0)); 1745 Inst = TmpInst; 1746 return true; 1747 } 1748 } 1749} 1750 1751bool X86AsmParser:: 1752MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1753 SmallVectorImpl<MCParsedAsmOperand*> &Operands, 1754 MCStreamer &Out, unsigned &ErrorInfo, 1755 bool MatchingInlineAsm) { 1756 assert(!Operands.empty() && "Unexpect empty operand list!"); 1757 X86Operand *Op = static_cast<X86Operand*>(Operands[0]); 1758 assert(Op->isToken() && "Leading operand should always be a mnemonic!"); 1759 ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); 1760 1761 // First, handle aliases that expand to multiple instructions. 1762 // FIXME: This should be replaced with a real .td file alias mechanism. 1763 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 1764 // call. 1765 if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || 1766 Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || 1767 Op->getToken() == "finit" || Op->getToken() == "fsave" || 1768 Op->getToken() == "fstenv" || Op->getToken() == "fclex") { 1769 MCInst Inst; 1770 Inst.setOpcode(X86::WAIT); 1771 Inst.setLoc(IDLoc); 1772 if (!MatchingInlineAsm) 1773 Out.EmitInstruction(Inst); 1774 1775 const char *Repl = 1776 StringSwitch<const char*>(Op->getToken()) 1777 .Case("finit", "fninit") 1778 .Case("fsave", "fnsave") 1779 .Case("fstcw", "fnstcw") 1780 .Case("fstcww", "fnstcw") 1781 .Case("fstenv", "fnstenv") 1782 .Case("fstsw", "fnstsw") 1783 .Case("fstsww", "fnstsw") 1784 .Case("fclex", "fnclex") 1785 .Default(0); 1786 assert(Repl && "Unknown wait-prefixed instruction"); 1787 delete Operands[0]; 1788 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 1789 } 1790 1791 bool WasOriginallyInvalidOperand = false; 1792 MCInst Inst; 1793 1794 // First, try a direct match. 1795 switch (MatchInstructionImpl(Operands, Inst, 1796 ErrorInfo, MatchingInlineAsm, 1797 isParsingIntelSyntax())) { 1798 default: break; 1799 case Match_Success: 1800 // Some instructions need post-processing to, for example, tweak which 1801 // encoding is selected. Loop on it while changes happen so the 1802 // individual transformations can chain off each other. 1803 if (!MatchingInlineAsm) 1804 while (processInstruction(Inst, Operands)) 1805 ; 1806 1807 Inst.setLoc(IDLoc); 1808 if (!MatchingInlineAsm) 1809 Out.EmitInstruction(Inst); 1810 Opcode = Inst.getOpcode(); 1811 return false; 1812 case Match_MissingFeature: 1813 Error(IDLoc, "instruction requires a CPU feature not currently enabled", 1814 EmptyRanges, MatchingInlineAsm); 1815 return true; 1816 case Match_InvalidOperand: 1817 WasOriginallyInvalidOperand = true; 1818 break; 1819 case Match_MnemonicFail: 1820 break; 1821 } 1822 1823 // FIXME: Ideally, we would only attempt suffix matches for things which are 1824 // valid prefixes, and we could just infer the right unambiguous 1825 // type. However, that requires substantially more matcher support than the 1826 // following hack. 1827 1828 // Change the operand to point to a temporary token. 1829 StringRef Base = Op->getToken(); 1830 SmallString<16> Tmp; 1831 Tmp += Base; 1832 Tmp += ' '; 1833 Op->setTokenValue(Tmp.str()); 1834 1835 // If this instruction starts with an 'f', then it is a floating point stack 1836 // instruction. These come in up to three forms for 32-bit, 64-bit, and 1837 // 80-bit floating point, which use the suffixes s,l,t respectively. 1838 // 1839 // Otherwise, we assume that this may be an integer instruction, which comes 1840 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 1841 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 1842 1843 // Check for the various suffix matches. 1844 Tmp[Base.size()] = Suffixes[0]; 1845 unsigned ErrorInfoIgnore; 1846 unsigned Match1, Match2, Match3, Match4; 1847 1848 Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 1849 isParsingIntelSyntax()); 1850 Tmp[Base.size()] = Suffixes[1]; 1851 Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 1852 isParsingIntelSyntax()); 1853 Tmp[Base.size()] = Suffixes[2]; 1854 Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 1855 isParsingIntelSyntax()); 1856 Tmp[Base.size()] = Suffixes[3]; 1857 Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, 1858 isParsingIntelSyntax()); 1859 1860 // Restore the old token. 1861 Op->setTokenValue(Base); 1862 1863 // If exactly one matched, then we treat that as a successful match (and the 1864 // instruction will already have been filled in correctly, since the failing 1865 // matches won't have modified it). 1866 unsigned NumSuccessfulMatches = 1867 (Match1 == Match_Success) + (Match2 == Match_Success) + 1868 (Match3 == Match_Success) + (Match4 == Match_Success); 1869 if (NumSuccessfulMatches == 1) { 1870 Inst.setLoc(IDLoc); 1871 if (!MatchingInlineAsm) 1872 Out.EmitInstruction(Inst); 1873 Opcode = Inst.getOpcode(); 1874 return false; 1875 } 1876 1877 // Otherwise, the match failed, try to produce a decent error message. 1878 1879 // If we had multiple suffix matches, then identify this as an ambiguous 1880 // match. 1881 if (NumSuccessfulMatches > 1) { 1882 char MatchChars[4]; 1883 unsigned NumMatches = 0; 1884 if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; 1885 if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; 1886 if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; 1887 if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; 1888 1889 SmallString<126> Msg; 1890 raw_svector_ostream OS(Msg); 1891 OS << "ambiguous instructions require an explicit suffix (could be "; 1892 for (unsigned i = 0; i != NumMatches; ++i) { 1893 if (i != 0) 1894 OS << ", "; 1895 if (i + 1 == NumMatches) 1896 OS << "or "; 1897 OS << "'" << Base << MatchChars[i] << "'"; 1898 } 1899 OS << ")"; 1900 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm); 1901 return true; 1902 } 1903 1904 // Okay, we know that none of the variants matched successfully. 1905 1906 // If all of the instructions reported an invalid mnemonic, then the original 1907 // mnemonic was invalid. 1908 if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && 1909 (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { 1910 if (!WasOriginallyInvalidOperand) { 1911 ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : 1912 Op->getLocRange(); 1913 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 1914 Ranges, MatchingInlineAsm); 1915 } 1916 1917 // Recover location info for the operand if we know which was the problem. 1918 if (ErrorInfo != ~0U) { 1919 if (ErrorInfo >= Operands.size()) 1920 return Error(IDLoc, "too few operands for instruction", 1921 EmptyRanges, MatchingInlineAsm); 1922 1923 X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; 1924 if (Operand->getStartLoc().isValid()) { 1925 SMRange OperandRange = Operand->getLocRange(); 1926 return Error(Operand->getStartLoc(), "invalid operand for instruction", 1927 OperandRange, MatchingInlineAsm); 1928 } 1929 } 1930 1931 return Error(IDLoc, "invalid operand for instruction", EmptyRanges, 1932 MatchingInlineAsm); 1933 } 1934 1935 // If one instruction matched with a missing feature, report this as a 1936 // missing feature. 1937 if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + 1938 (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ 1939 Error(IDLoc, "instruction requires a CPU feature not currently enabled", 1940 EmptyRanges, MatchingInlineAsm); 1941 return true; 1942 } 1943 1944 // If one instruction matched with an invalid operand, report this as an 1945 // operand failure. 1946 if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + 1947 (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ 1948 Error(IDLoc, "invalid operand for instruction", EmptyRanges, 1949 MatchingInlineAsm); 1950 return true; 1951 } 1952 1953 // If all of these were an outright failure, report it in a useless way. 1954 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 1955 EmptyRanges, MatchingInlineAsm); 1956 return true; 1957} 1958 1959 1960bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 1961 StringRef IDVal = DirectiveID.getIdentifier(); 1962 if (IDVal == ".word") 1963 return ParseDirectiveWord(2, DirectiveID.getLoc()); 1964 else if (IDVal.startswith(".code")) 1965 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 1966 else if (IDVal.startswith(".att_syntax")) { 1967 getParser().setAssemblerDialect(0); 1968 return false; 1969 } else if (IDVal.startswith(".intel_syntax")) { 1970 getParser().setAssemblerDialect(1); 1971 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1972 if(Parser.getTok().getString() == "noprefix") { 1973 // FIXME : Handle noprefix 1974 Parser.Lex(); 1975 } else 1976 return true; 1977 } 1978 return false; 1979 } 1980 return true; 1981} 1982 1983/// ParseDirectiveWord 1984/// ::= .word [ expression (, expression)* ] 1985bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { 1986 if (getLexer().isNot(AsmToken::EndOfStatement)) { 1987 for (;;) { 1988 const MCExpr *Value; 1989 if (getParser().ParseExpression(Value)) 1990 return true; 1991 1992 getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); 1993 1994 if (getLexer().is(AsmToken::EndOfStatement)) 1995 break; 1996 1997 // FIXME: Improve diagnostic. 1998 if (getLexer().isNot(AsmToken::Comma)) 1999 return Error(L, "unexpected token in directive"); 2000 Parser.Lex(); 2001 } 2002 } 2003 2004 Parser.Lex(); 2005 return false; 2006} 2007 2008/// ParseDirectiveCode 2009/// ::= .code32 | .code64 2010bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 2011 if (IDVal == ".code32") { 2012 Parser.Lex(); 2013 if (is64BitMode()) { 2014 SwitchMode(); 2015 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 2016 } 2017 } else if (IDVal == ".code64") { 2018 Parser.Lex(); 2019 if (!is64BitMode()) { 2020 SwitchMode(); 2021 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 2022 } 2023 } else { 2024 return Error(L, "unexpected directive " + IDVal); 2025 } 2026 2027 return false; 2028} 2029 2030 2031extern "C" void LLVMInitializeX86AsmLexer(); 2032 2033// Force static initialization. 2034extern "C" void LLVMInitializeX86AsmParser() { 2035 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); 2036 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); 2037 LLVMInitializeX86AsmLexer(); 2038} 2039 2040#define GET_REGISTER_MATCHER 2041#define GET_MATCHER_IMPLEMENTATION 2042#include "X86GenAsmMatcher.inc" 2043