1//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "MCTargetDesc/X86BaseInfo.h"
11#include "llvm/MC/MCTargetAsmParser.h"
12#include "llvm/MC/MCStreamer.h"
13#include "llvm/MC/MCExpr.h"
14#include "llvm/MC/MCSymbol.h"
15#include "llvm/MC/MCInst.h"
16#include "llvm/MC/MCRegisterInfo.h"
17#include "llvm/MC/MCSubtargetInfo.h"
18#include "llvm/MC/MCParser/MCAsmLexer.h"
19#include "llvm/MC/MCParser/MCAsmParser.h"
20#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21#include "llvm/ADT/APFloat.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/StringSwitch.h"
25#include "llvm/ADT/Twine.h"
26#include "llvm/Support/SourceMgr.h"
27#include "llvm/Support/TargetRegistry.h"
28#include "llvm/Support/raw_ostream.h"
29
30using namespace llvm;
31
32namespace {
33struct X86Operand;
34
35class X86AsmParser : public MCTargetAsmParser {
36  MCSubtargetInfo &STI;
37  MCAsmParser &Parser;
38  ParseInstructionInfo *InstInfo;
39private:
40  MCAsmParser &getParser() const { return Parser; }
41
42  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
43
44  bool Error(SMLoc L, const Twine &Msg,
45             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
46             bool MatchingInlineAsm = false) {
47    if (MatchingInlineAsm) return true;
48    return Parser.Error(L, Msg, Ranges);
49  }
50
51  X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
52    Error(Loc, Msg);
53    return 0;
54  }
55
56  X86Operand *ParseOperand();
57  X86Operand *ParseATTOperand();
58  X86Operand *ParseIntelOperand();
59  X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
60  X86Operand *ParseIntelTypeOperator(SMLoc StartLoc);
61  X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc);
62  X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
63  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
64
65  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
66                             SmallString<64> &Err);
67
68  bool ParseDirectiveWord(unsigned Size, SMLoc L);
69  bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
70
71  bool processInstruction(MCInst &Inst,
72                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
73
74  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
75                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
76                               MCStreamer &Out, unsigned &ErrorInfo,
77                               bool MatchingInlineAsm);
78
79  /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
80  /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
81  bool isSrcOp(X86Operand &Op);
82
83  /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
84  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
85  bool isDstOp(X86Operand &Op);
86
87  bool is64BitMode() const {
88    // FIXME: Can tablegen auto-generate this?
89    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
90  }
91  void SwitchMode() {
92    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(X86::Mode64Bit));
93    setAvailableFeatures(FB);
94  }
95
96  /// @name Auto-generated Matcher Functions
97  /// {
98
99#define GET_ASSEMBLER_HEADER
100#include "X86GenAsmMatcher.inc"
101
102  /// }
103
104public:
105  X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
106    : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
107
108    // Initialize the set of available features.
109    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
110  }
111  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
112
113  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
114                                SMLoc NameLoc,
115                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
116
117  virtual bool ParseDirective(AsmToken DirectiveID);
118
119  bool isParsingIntelSyntax() {
120    return getParser().getAssemblerDialect();
121  }
122};
123} // end anonymous namespace
124
125/// @name Auto-generated Match Functions
126/// {
127
128static unsigned MatchRegisterName(StringRef Name);
129
130/// }
131
132static bool isImmSExti16i8Value(uint64_t Value) {
133  return ((                                  Value <= 0x000000000000007FULL)||
134          (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
135          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
136}
137
138static bool isImmSExti32i8Value(uint64_t Value) {
139  return ((                                  Value <= 0x000000000000007FULL)||
140          (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
141          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
142}
143
144static bool isImmZExtu32u8Value(uint64_t Value) {
145    return (Value <= 0x00000000000000FFULL);
146}
147
148static bool isImmSExti64i8Value(uint64_t Value) {
149  return ((                                  Value <= 0x000000000000007FULL)||
150          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
151}
152
153static bool isImmSExti64i32Value(uint64_t Value) {
154  return ((                                  Value <= 0x000000007FFFFFFFULL)||
155          (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
156}
157namespace {
158
159/// X86Operand - Instances of this class represent a parsed X86 machine
160/// instruction.
161struct X86Operand : public MCParsedAsmOperand {
162  enum KindTy {
163    Token,
164    Register,
165    Immediate,
166    Memory
167  } Kind;
168
169  SMLoc StartLoc, EndLoc;
170  SMLoc OffsetOfLoc;
171
172  union {
173    struct {
174      const char *Data;
175      unsigned Length;
176    } Tok;
177
178    struct {
179      unsigned RegNo;
180    } Reg;
181
182    struct {
183      const MCExpr *Val;
184      bool NeedAsmRewrite;
185    } Imm;
186
187    struct {
188      unsigned SegReg;
189      const MCExpr *Disp;
190      unsigned BaseReg;
191      unsigned IndexReg;
192      unsigned Scale;
193      unsigned Size;
194      bool NeedSizeDir;
195    } Mem;
196  };
197
198  X86Operand(KindTy K, SMLoc Start, SMLoc End)
199    : Kind(K), StartLoc(Start), EndLoc(End) {}
200
201  /// getStartLoc - Get the location of the first token of this operand.
202  SMLoc getStartLoc() const { return StartLoc; }
203  /// getEndLoc - Get the location of the last token of this operand.
204  SMLoc getEndLoc() const { return EndLoc; }
205  /// getLocRange - Get the range between the first and last token of this
206  /// operand.
207  SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
208  /// getOffsetOfLoc - Get the location of the offset operator.
209  SMLoc getOffsetOfLoc() const { return OffsetOfLoc; }
210
211  virtual void print(raw_ostream &OS) const {}
212
213  StringRef getToken() const {
214    assert(Kind == Token && "Invalid access!");
215    return StringRef(Tok.Data, Tok.Length);
216  }
217  void setTokenValue(StringRef Value) {
218    assert(Kind == Token && "Invalid access!");
219    Tok.Data = Value.data();
220    Tok.Length = Value.size();
221  }
222
223  unsigned getReg() const {
224    assert(Kind == Register && "Invalid access!");
225    return Reg.RegNo;
226  }
227
228  const MCExpr *getImm() const {
229    assert(Kind == Immediate && "Invalid access!");
230    return Imm.Val;
231  }
232
233  bool needAsmRewrite() const {
234    assert(Kind == Immediate && "Invalid access!");
235    return Imm.NeedAsmRewrite;
236  }
237
238  const MCExpr *getMemDisp() const {
239    assert(Kind == Memory && "Invalid access!");
240    return Mem.Disp;
241  }
242  unsigned getMemSegReg() const {
243    assert(Kind == Memory && "Invalid access!");
244    return Mem.SegReg;
245  }
246  unsigned getMemBaseReg() const {
247    assert(Kind == Memory && "Invalid access!");
248    return Mem.BaseReg;
249  }
250  unsigned getMemIndexReg() const {
251    assert(Kind == Memory && "Invalid access!");
252    return Mem.IndexReg;
253  }
254  unsigned getMemScale() const {
255    assert(Kind == Memory && "Invalid access!");
256    return Mem.Scale;
257  }
258
259  bool isToken() const {return Kind == Token; }
260
261  bool isImm() const { return Kind == Immediate; }
262
263  bool isImmSExti16i8() const {
264    if (!isImm())
265      return false;
266
267    // If this isn't a constant expr, just assume it fits and let relaxation
268    // handle it.
269    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
270    if (!CE)
271      return true;
272
273    // Otherwise, check the value is in a range that makes sense for this
274    // extension.
275    return isImmSExti16i8Value(CE->getValue());
276  }
277  bool isImmSExti32i8() const {
278    if (!isImm())
279      return false;
280
281    // If this isn't a constant expr, just assume it fits and let relaxation
282    // handle it.
283    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
284    if (!CE)
285      return true;
286
287    // Otherwise, check the value is in a range that makes sense for this
288    // extension.
289    return isImmSExti32i8Value(CE->getValue());
290  }
291  bool isImmZExtu32u8() const {
292    if (!isImm())
293      return false;
294
295    // If this isn't a constant expr, just assume it fits and let relaxation
296    // handle it.
297    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
298    if (!CE)
299      return true;
300
301    // Otherwise, check the value is in a range that makes sense for this
302    // extension.
303    return isImmZExtu32u8Value(CE->getValue());
304  }
305  bool isImmSExti64i8() const {
306    if (!isImm())
307      return false;
308
309    // If this isn't a constant expr, just assume it fits and let relaxation
310    // handle it.
311    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
312    if (!CE)
313      return true;
314
315    // Otherwise, check the value is in a range that makes sense for this
316    // extension.
317    return isImmSExti64i8Value(CE->getValue());
318  }
319  bool isImmSExti64i32() const {
320    if (!isImm())
321      return false;
322
323    // If this isn't a constant expr, just assume it fits and let relaxation
324    // handle it.
325    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
326    if (!CE)
327      return true;
328
329    // Otherwise, check the value is in a range that makes sense for this
330    // extension.
331    return isImmSExti64i32Value(CE->getValue());
332  }
333
334  unsigned getMemSize() const {
335    assert(Kind == Memory && "Invalid access!");
336    return Mem.Size;
337  }
338
339  bool isOffsetOf() const {
340    return OffsetOfLoc.getPointer();
341  }
342
343  bool needSizeDirective() const {
344    assert(Kind == Memory && "Invalid access!");
345    return Mem.NeedSizeDir;
346  }
347
348  bool isMem() const { return Kind == Memory; }
349  bool isMem8() const {
350    return Kind == Memory && (!Mem.Size || Mem.Size == 8);
351  }
352  bool isMem16() const {
353    return Kind == Memory && (!Mem.Size || Mem.Size == 16);
354  }
355  bool isMem32() const {
356    return Kind == Memory && (!Mem.Size || Mem.Size == 32);
357  }
358  bool isMem64() const {
359    return Kind == Memory && (!Mem.Size || Mem.Size == 64);
360  }
361  bool isMem80() const {
362    return Kind == Memory && (!Mem.Size || Mem.Size == 80);
363  }
364  bool isMem128() const {
365    return Kind == Memory && (!Mem.Size || Mem.Size == 128);
366  }
367  bool isMem256() const {
368    return Kind == Memory && (!Mem.Size || Mem.Size == 256);
369  }
370
371  bool isMemVX32() const {
372    return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
373      getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
374  }
375  bool isMemVY32() const {
376    return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
377      getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
378  }
379  bool isMemVX64() const {
380    return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
381      getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15;
382  }
383  bool isMemVY64() const {
384    return Kind == Memory && (!Mem.Size || Mem.Size == 64) &&
385      getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15;
386  }
387
388  bool isAbsMem() const {
389    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
390      !getMemIndexReg() && getMemScale() == 1;
391  }
392
393  bool isReg() const { return Kind == Register; }
394
395  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
396    // Add as immediates when possible.
397    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
398      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
399    else
400      Inst.addOperand(MCOperand::CreateExpr(Expr));
401  }
402
403  void addRegOperands(MCInst &Inst, unsigned N) const {
404    assert(N == 1 && "Invalid number of operands!");
405    Inst.addOperand(MCOperand::CreateReg(getReg()));
406  }
407
408  void addImmOperands(MCInst &Inst, unsigned N) const {
409    assert(N == 1 && "Invalid number of operands!");
410    addExpr(Inst, getImm());
411  }
412
413  void addMem8Operands(MCInst &Inst, unsigned N) const {
414    addMemOperands(Inst, N);
415  }
416  void addMem16Operands(MCInst &Inst, unsigned N) const {
417    addMemOperands(Inst, N);
418  }
419  void addMem32Operands(MCInst &Inst, unsigned N) const {
420    addMemOperands(Inst, N);
421  }
422  void addMem64Operands(MCInst &Inst, unsigned N) const {
423    addMemOperands(Inst, N);
424  }
425  void addMem80Operands(MCInst &Inst, unsigned N) const {
426    addMemOperands(Inst, N);
427  }
428  void addMem128Operands(MCInst &Inst, unsigned N) const {
429    addMemOperands(Inst, N);
430  }
431  void addMem256Operands(MCInst &Inst, unsigned N) const {
432    addMemOperands(Inst, N);
433  }
434  void addMemVX32Operands(MCInst &Inst, unsigned N) const {
435    addMemOperands(Inst, N);
436  }
437  void addMemVY32Operands(MCInst &Inst, unsigned N) const {
438    addMemOperands(Inst, N);
439  }
440  void addMemVX64Operands(MCInst &Inst, unsigned N) const {
441    addMemOperands(Inst, N);
442  }
443  void addMemVY64Operands(MCInst &Inst, unsigned N) const {
444    addMemOperands(Inst, N);
445  }
446
447  void addMemOperands(MCInst &Inst, unsigned N) const {
448    assert((N == 5) && "Invalid number of operands!");
449    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
450    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
451    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
452    addExpr(Inst, getMemDisp());
453    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
454  }
455
456  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
457    assert((N == 1) && "Invalid number of operands!");
458    // Add as immediates when possible.
459    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
460      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
461    else
462      Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
463  }
464
465  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
466    SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
467    X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
468    Res->Tok.Data = Str.data();
469    Res->Tok.Length = Str.size();
470    return Res;
471  }
472
473  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
474                               SMLoc OffsetOfLoc = SMLoc()) {
475    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
476    Res->Reg.RegNo = RegNo;
477    Res->OffsetOfLoc = OffsetOfLoc;
478    return Res;
479  }
480
481  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
482                               bool NeedRewrite = true){
483    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
484    Res->Imm.Val = Val;
485    Res->Imm.NeedAsmRewrite = NeedRewrite;
486    return Res;
487  }
488
489  /// Create an absolute memory operand.
490  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
491                               unsigned Size = 0, bool NeedSizeDir = false){
492    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
493    Res->Mem.SegReg   = 0;
494    Res->Mem.Disp     = Disp;
495    Res->Mem.BaseReg  = 0;
496    Res->Mem.IndexReg = 0;
497    Res->Mem.Scale    = 1;
498    Res->Mem.Size     = Size;
499    Res->Mem.NeedSizeDir = NeedSizeDir;
500    return Res;
501  }
502
503  /// Create a generalized memory operand.
504  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
505                               unsigned BaseReg, unsigned IndexReg,
506                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
507                               unsigned Size = 0, bool NeedSizeDir = false) {
508    // We should never just have a displacement, that should be parsed as an
509    // absolute memory operand.
510    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
511
512    // The scale should always be one of {1,2,4,8}.
513    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
514           "Invalid scale!");
515    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
516    Res->Mem.SegReg   = SegReg;
517    Res->Mem.Disp     = Disp;
518    Res->Mem.BaseReg  = BaseReg;
519    Res->Mem.IndexReg = IndexReg;
520    Res->Mem.Scale    = Scale;
521    Res->Mem.Size     = Size;
522    Res->Mem.NeedSizeDir = NeedSizeDir;
523    return Res;
524  }
525};
526
527} // end anonymous namespace.
528
529bool X86AsmParser::isSrcOp(X86Operand &Op) {
530  unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
531
532  return (Op.isMem() &&
533    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
534    isa<MCConstantExpr>(Op.Mem.Disp) &&
535    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
536    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
537}
538
539bool X86AsmParser::isDstOp(X86Operand &Op) {
540  unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
541
542  return Op.isMem() &&
543    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
544    isa<MCConstantExpr>(Op.Mem.Disp) &&
545    cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
546    Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
547}
548
549bool X86AsmParser::ParseRegister(unsigned &RegNo,
550                                 SMLoc &StartLoc, SMLoc &EndLoc) {
551  RegNo = 0;
552  const AsmToken &PercentTok = Parser.getTok();
553  StartLoc = PercentTok.getLoc();
554
555  // If we encounter a %, ignore it. This code handles registers with and
556  // without the prefix, unprefixed registers can occur in cfi directives.
557  if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
558    Parser.Lex(); // Eat percent token.
559
560  const AsmToken &Tok = Parser.getTok();
561  if (Tok.isNot(AsmToken::Identifier)) {
562    if (isParsingIntelSyntax()) return true;
563    return Error(StartLoc, "invalid register name",
564                 SMRange(StartLoc, Tok.getEndLoc()));
565  }
566
567  RegNo = MatchRegisterName(Tok.getString());
568
569  // If the match failed, try the register name as lowercase.
570  if (RegNo == 0)
571    RegNo = MatchRegisterName(Tok.getString().lower());
572
573  if (!is64BitMode()) {
574    // FIXME: This should be done using Requires<In32BitMode> and
575    // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
576    // checked.
577    // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
578    // REX prefix.
579    if (RegNo == X86::RIZ ||
580        X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
581        X86II::isX86_64NonExtLowByteReg(RegNo) ||
582        X86II::isX86_64ExtendedReg(RegNo))
583      return Error(StartLoc, "register %"
584                   + Tok.getString() + " is only available in 64-bit mode",
585                   SMRange(StartLoc, Tok.getEndLoc()));
586  }
587
588  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
589  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
590    RegNo = X86::ST0;
591    EndLoc = Tok.getLoc();
592    Parser.Lex(); // Eat 'st'
593
594    // Check to see if we have '(4)' after %st.
595    if (getLexer().isNot(AsmToken::LParen))
596      return false;
597    // Lex the paren.
598    getParser().Lex();
599
600    const AsmToken &IntTok = Parser.getTok();
601    if (IntTok.isNot(AsmToken::Integer))
602      return Error(IntTok.getLoc(), "expected stack index");
603    switch (IntTok.getIntVal()) {
604    case 0: RegNo = X86::ST0; break;
605    case 1: RegNo = X86::ST1; break;
606    case 2: RegNo = X86::ST2; break;
607    case 3: RegNo = X86::ST3; break;
608    case 4: RegNo = X86::ST4; break;
609    case 5: RegNo = X86::ST5; break;
610    case 6: RegNo = X86::ST6; break;
611    case 7: RegNo = X86::ST7; break;
612    default: return Error(IntTok.getLoc(), "invalid stack index");
613    }
614
615    if (getParser().Lex().isNot(AsmToken::RParen))
616      return Error(Parser.getTok().getLoc(), "expected ')'");
617
618    EndLoc = Tok.getLoc();
619    Parser.Lex(); // Eat ')'
620    return false;
621  }
622
623  // If this is "db[0-7]", match it as an alias
624  // for dr[0-7].
625  if (RegNo == 0 && Tok.getString().size() == 3 &&
626      Tok.getString().startswith("db")) {
627    switch (Tok.getString()[2]) {
628    case '0': RegNo = X86::DR0; break;
629    case '1': RegNo = X86::DR1; break;
630    case '2': RegNo = X86::DR2; break;
631    case '3': RegNo = X86::DR3; break;
632    case '4': RegNo = X86::DR4; break;
633    case '5': RegNo = X86::DR5; break;
634    case '6': RegNo = X86::DR6; break;
635    case '7': RegNo = X86::DR7; break;
636    }
637
638    if (RegNo != 0) {
639      EndLoc = Tok.getLoc();
640      Parser.Lex(); // Eat it.
641      return false;
642    }
643  }
644
645  if (RegNo == 0) {
646    if (isParsingIntelSyntax()) return true;
647    return Error(StartLoc, "invalid register name",
648                 SMRange(StartLoc, Tok.getEndLoc()));
649  }
650
651  EndLoc = Tok.getEndLoc();
652  Parser.Lex(); // Eat identifier token.
653  return false;
654}
655
656X86Operand *X86AsmParser::ParseOperand() {
657  if (isParsingIntelSyntax())
658    return ParseIntelOperand();
659  return ParseATTOperand();
660}
661
662/// getIntelMemOperandSize - Return intel memory operand size.
663static unsigned getIntelMemOperandSize(StringRef OpStr) {
664  unsigned Size = StringSwitch<unsigned>(OpStr)
665    .Cases("BYTE", "byte", 8)
666    .Cases("WORD", "word", 16)
667    .Cases("DWORD", "dword", 32)
668    .Cases("QWORD", "qword", 64)
669    .Cases("XWORD", "xword", 80)
670    .Cases("XMMWORD", "xmmword", 128)
671    .Cases("YMMWORD", "ymmword", 256)
672    .Default(0);
673  return Size;
674}
675
676X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
677                                                   unsigned Size) {
678  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
679  const AsmToken &Tok = Parser.getTok();
680  SMLoc Start = Tok.getLoc(), End;
681
682  const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
683  // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
684
685  // Eat '['
686  if (getLexer().isNot(AsmToken::LBrac))
687    return ErrorOperand(Start, "Expected '[' token!");
688  Parser.Lex();
689
690  if (getLexer().is(AsmToken::Identifier)) {
691    // Parse BaseReg
692    if (ParseRegister(BaseReg, Start, End)) {
693      // Handle '[' 'symbol' ']'
694      if (getParser().ParseExpression(Disp, End)) return 0;
695      if (getLexer().isNot(AsmToken::RBrac))
696        return ErrorOperand(Start, "Expected ']' token!");
697      Parser.Lex();
698      End = Tok.getLoc();
699      return X86Operand::CreateMem(Disp, Start, End, Size);
700    }
701  } else if (getLexer().is(AsmToken::Integer)) {
702      int64_t Val = Tok.getIntVal();
703      Parser.Lex();
704      SMLoc Loc = Tok.getLoc();
705      if (getLexer().is(AsmToken::RBrac)) {
706        // Handle '[' number ']'
707        Parser.Lex();
708        End = Tok.getLoc();
709        const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
710        if (SegReg)
711          return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
712                                       Start, End, Size);
713        return X86Operand::CreateMem(Disp, Start, End, Size);
714      } else if (getLexer().is(AsmToken::Star)) {
715        // Handle '[' Scale*IndexReg ']'
716        Parser.Lex();
717        SMLoc IdxRegLoc = Tok.getLoc();
718        if (ParseRegister(IndexReg, IdxRegLoc, End))
719          return ErrorOperand(IdxRegLoc, "Expected register");
720        Scale = Val;
721      } else
722        return ErrorOperand(Loc, "Unexpected token");
723  }
724
725  // Parse ][ as a plus.
726  bool ExpectRBrac = true;
727  if (getLexer().is(AsmToken::RBrac)) {
728    ExpectRBrac = false;
729    Parser.Lex();
730    End = Tok.getLoc();
731  }
732
733  if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) ||
734      getLexer().is(AsmToken::LBrac)) {
735    ExpectRBrac = true;
736    bool isPlus = getLexer().is(AsmToken::Plus) ||
737      getLexer().is(AsmToken::LBrac);
738    Parser.Lex();
739    SMLoc PlusLoc = Tok.getLoc();
740    if (getLexer().is(AsmToken::Integer)) {
741      int64_t Val = Tok.getIntVal();
742      Parser.Lex();
743      if (getLexer().is(AsmToken::Star)) {
744        Parser.Lex();
745        SMLoc IdxRegLoc = Tok.getLoc();
746        if (ParseRegister(IndexReg, IdxRegLoc, End))
747          return ErrorOperand(IdxRegLoc, "Expected register");
748        Scale = Val;
749      } else if (getLexer().is(AsmToken::RBrac)) {
750        const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
751        Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
752      } else
753        return ErrorOperand(PlusLoc, "unexpected token after +");
754    } else if (getLexer().is(AsmToken::Identifier)) {
755      // This could be an index register or a displacement expression.
756      End = Tok.getLoc();
757      if (!IndexReg)
758        ParseRegister(IndexReg, Start, End);
759      else if (getParser().ParseExpression(Disp, End)) return 0;
760    }
761  }
762
763  // Parse ][ as a plus.
764  if (getLexer().is(AsmToken::RBrac)) {
765    ExpectRBrac = false;
766    Parser.Lex();
767    End = Tok.getLoc();
768    if (getLexer().is(AsmToken::LBrac)) {
769      ExpectRBrac = true;
770      Parser.Lex();
771      if (getParser().ParseExpression(Disp, End))
772        return 0;
773    }
774  } else if (ExpectRBrac) {
775      if (getParser().ParseExpression(Disp, End))
776        return 0;
777  }
778
779  if (ExpectRBrac) {
780    if (getLexer().isNot(AsmToken::RBrac))
781      return ErrorOperand(End, "expected ']' token!");
782    Parser.Lex();
783    End = Tok.getLoc();
784  }
785
786  // Parse the dot operator (e.g., [ebx].foo.bar).
787  if (Tok.getString().startswith(".")) {
788    SmallString<64> Err;
789    const MCExpr *NewDisp;
790    if (ParseIntelDotOperator(Disp, &NewDisp, Err))
791      return ErrorOperand(Tok.getLoc(), Err);
792
793    Parser.Lex();  // Eat the field.
794    Disp = NewDisp;
795  }
796
797  End = Tok.getLoc();
798
799  // handle [-42]
800  if (!BaseReg && !IndexReg)
801    return X86Operand::CreateMem(Disp, Start, End, Size);
802
803  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
804                               Start, End, Size);
805}
806
807/// ParseIntelMemOperand - Parse intel style memory operand.
808X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
809  const AsmToken &Tok = Parser.getTok();
810  SMLoc End;
811
812  unsigned Size = getIntelMemOperandSize(Tok.getString());
813  if (Size) {
814    Parser.Lex();
815    assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
816            "Unexpected token!");
817    Parser.Lex();
818  }
819
820  if (getLexer().is(AsmToken::LBrac))
821    return ParseIntelBracExpression(SegReg, Size);
822
823  if (!ParseRegister(SegReg, Start, End)) {
824    // Handel SegReg : [ ... ]
825    if (getLexer().isNot(AsmToken::Colon))
826      return ErrorOperand(Start, "Expected ':' token!");
827    Parser.Lex(); // Eat :
828    if (getLexer().isNot(AsmToken::LBrac))
829      return ErrorOperand(Start, "Expected '[' token!");
830    return ParseIntelBracExpression(SegReg, Size);
831  }
832
833  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
834  if (getParser().ParseExpression(Disp, End)) return 0;
835  End = Parser.getTok().getLoc();
836
837  bool NeedSizeDir = false;
838  if (!Size && isParsingInlineAsm()) {
839    if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
840      const MCSymbol &Sym = SymRef->getSymbol();
841      // FIXME: The SemaLookup will fail if the name is anything other then an
842      // identifier.
843      // FIXME: Pass a valid SMLoc.
844      SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
845      NeedSizeDir = Size > 0;
846    }
847  }
848  if (!isParsingInlineAsm())
849    return X86Operand::CreateMem(Disp, Start, End, Size);
850  else
851    // When parsing inline assembly we set the base register to a non-zero value
852    // as we don't know the actual value at this time.  This is necessary to
853    // get the matching correct in some cases.
854    return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
855                                 /*Scale*/1, Start, End, Size, NeedSizeDir);
856}
857
858/// Parse the '.' operator.
859bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
860                                         const MCExpr **NewDisp,
861                                         SmallString<64> &Err) {
862  AsmToken Tok = *&Parser.getTok();
863  uint64_t OrigDispVal, DotDispVal;
864
865  // FIXME: Handle non-constant expressions.
866  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
867    OrigDispVal = OrigDisp->getValue();
868  } else {
869    Err = "Non-constant offsets are not supported!";
870    return true;
871  }
872
873  // Drop the '.'.
874  StringRef DotDispStr = Tok.getString().drop_front(1);
875
876  // .Imm gets lexed as a real.
877  if (Tok.is(AsmToken::Real)) {
878    APInt DotDisp;
879    DotDispStr.getAsInteger(10, DotDisp);
880    DotDispVal = DotDisp.getZExtValue();
881  } else if (Tok.is(AsmToken::Identifier)) {
882    // We should only see an identifier when parsing the original inline asm.
883    // The front-end should rewrite this in terms of immediates.
884    assert (isParsingInlineAsm() && "Unexpected field name!");
885
886    unsigned DotDisp;
887    std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
888    if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
889                                           DotDisp)) {
890      Err = "Unable to lookup field reference!";
891      return true;
892    }
893    DotDispVal = DotDisp;
894  } else {
895    Err = "Unexpected token type!";
896    return true;
897  }
898
899  if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
900    SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
901    unsigned Len = DotDispStr.size();
902    unsigned Val = OrigDispVal + DotDispVal;
903    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
904                                                Val));
905  }
906
907  *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
908  return false;
909}
910
911/// Parse the 'offset' operator.  This operator is used to specify the
912/// location rather then the content of a variable.
913X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
914  SMLoc OffsetOfLoc = Start;
915  Parser.Lex(); // Eat offset.
916  Start = Parser.getTok().getLoc();
917  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
918
919  SMLoc End;
920  const MCExpr *Val;
921  if (getParser().ParseExpression(Val, End))
922    return ErrorOperand(Start, "Unable to parse expression!");
923
924  End = Parser.getTok().getLoc();
925
926  // Don't emit the offset operator.
927  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
928
929  // The offset operator will have an 'r' constraint, thus we need to create
930  // register operand to ensure proper matching.  Just pick a GPR based on
931  // the size of a pointer.
932  unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
933  return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc);
934}
935
936/// Parse the 'TYPE' operator.  The TYPE operator returns the size of a C or
937/// C++ type or variable. If the variable is an array, TYPE returns the size of
938/// a single element of the array.
939X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) {
940  SMLoc TypeLoc = Start;
941  Parser.Lex(); // Eat offset.
942  Start = Parser.getTok().getLoc();
943  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
944
945  SMLoc End;
946  const MCExpr *Val;
947  if (getParser().ParseExpression(Val, End))
948    return 0;
949
950  End = Parser.getTok().getLoc();
951
952  unsigned Size = 0;
953  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
954    const MCSymbol &Sym = SymRef->getSymbol();
955    // FIXME: The SemaLookup will fail if the name is anything other then an
956    // identifier.
957    // FIXME: Pass a valid SMLoc.
958    if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size))
959      return ErrorOperand(Start, "Unable to lookup TYPE of expr!");
960
961    Size /= 8; // Size is in terms of bits, but we want bytes in the context.
962  }
963
964  // Rewrite the type operator and the C or C++ type or variable in terms of an
965  // immediate.  E.g. TYPE foo -> $$4
966  unsigned Len = End.getPointer() - TypeLoc.getPointer();
967  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, Size));
968
969  const MCExpr *Imm = MCConstantExpr::Create(Size, getContext());
970  return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
971}
972
973X86Operand *X86AsmParser::ParseIntelOperand() {
974  SMLoc Start = Parser.getTok().getLoc(), End;
975
976  // offset operator.
977  StringRef AsmTokStr = Parser.getTok().getString();
978  if ((AsmTokStr == "offset" || AsmTokStr == "OFFSET") &&
979      isParsingInlineAsm())
980    return ParseIntelOffsetOfOperator(Start);
981
982  // Type directive.
983  if ((AsmTokStr == "type" || AsmTokStr == "TYPE") &&
984      isParsingInlineAsm())
985    return ParseIntelTypeOperator(Start);
986
987  // Unsupported directives.
988  if (isParsingIntelSyntax() &&
989      (AsmTokStr == "size" || AsmTokStr == "SIZE" ||
990       AsmTokStr == "length" || AsmTokStr == "LENGTH"))
991      return ErrorOperand(Start, "Unsupported directive!");
992
993  // immediate.
994  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
995      getLexer().is(AsmToken::Minus)) {
996    const MCExpr *Val;
997    if (!getParser().ParseExpression(Val, End)) {
998      End = Parser.getTok().getLoc();
999      return X86Operand::CreateImm(Val, Start, End);
1000    }
1001  }
1002
1003  // register
1004  unsigned RegNo = 0;
1005  if (!ParseRegister(RegNo, Start, End)) {
1006    // If this is a segment register followed by a ':', then this is the start
1007    // of a memory reference, otherwise this is a normal register reference.
1008    if (getLexer().isNot(AsmToken::Colon))
1009      return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc());
1010
1011    getParser().Lex(); // Eat the colon.
1012    return ParseIntelMemOperand(RegNo, Start);
1013  }
1014
1015  // mem operand
1016  return ParseIntelMemOperand(0, Start);
1017}
1018
1019X86Operand *X86AsmParser::ParseATTOperand() {
1020  switch (getLexer().getKind()) {
1021  default:
1022    // Parse a memory operand with no segment register.
1023    return ParseMemOperand(0, Parser.getTok().getLoc());
1024  case AsmToken::Percent: {
1025    // Read the register.
1026    unsigned RegNo;
1027    SMLoc Start, End;
1028    if (ParseRegister(RegNo, Start, End)) return 0;
1029    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1030      Error(Start, "%eiz and %riz can only be used as index registers",
1031            SMRange(Start, End));
1032      return 0;
1033    }
1034
1035    // If this is a segment register followed by a ':', then this is the start
1036    // of a memory reference, otherwise this is a normal register reference.
1037    if (getLexer().isNot(AsmToken::Colon))
1038      return X86Operand::CreateReg(RegNo, Start, End);
1039
1040
1041    getParser().Lex(); // Eat the colon.
1042    return ParseMemOperand(RegNo, Start);
1043  }
1044  case AsmToken::Dollar: {
1045    // $42 -> immediate.
1046    SMLoc Start = Parser.getTok().getLoc(), End;
1047    Parser.Lex();
1048    const MCExpr *Val;
1049    if (getParser().ParseExpression(Val, End))
1050      return 0;
1051    return X86Operand::CreateImm(Val, Start, End);
1052  }
1053  }
1054}
1055
1056/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
1057/// has already been parsed if present.
1058X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
1059
1060  // We have to disambiguate a parenthesized expression "(4+5)" from the start
1061  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
1062  // only way to do this without lookahead is to eat the '(' and see what is
1063  // after it.
1064  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1065  if (getLexer().isNot(AsmToken::LParen)) {
1066    SMLoc ExprEnd;
1067    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
1068
1069    // After parsing the base expression we could either have a parenthesized
1070    // memory address or not.  If not, return now.  If so, eat the (.
1071    if (getLexer().isNot(AsmToken::LParen)) {
1072      // Unless we have a segment register, treat this as an immediate.
1073      if (SegReg == 0)
1074        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
1075      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1076    }
1077
1078    // Eat the '('.
1079    Parser.Lex();
1080  } else {
1081    // Okay, we have a '('.  We don't know if this is an expression or not, but
1082    // so we have to eat the ( to see beyond it.
1083    SMLoc LParenLoc = Parser.getTok().getLoc();
1084    Parser.Lex(); // Eat the '('.
1085
1086    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1087      // Nothing to do here, fall into the code below with the '(' part of the
1088      // memory operand consumed.
1089    } else {
1090      SMLoc ExprEnd;
1091
1092      // It must be an parenthesized expression, parse it now.
1093      if (getParser().ParseParenExpression(Disp, ExprEnd))
1094        return 0;
1095
1096      // After parsing the base expression we could either have a parenthesized
1097      // memory address or not.  If not, return now.  If so, eat the (.
1098      if (getLexer().isNot(AsmToken::LParen)) {
1099        // Unless we have a segment register, treat this as an immediate.
1100        if (SegReg == 0)
1101          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
1102        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
1103      }
1104
1105      // Eat the '('.
1106      Parser.Lex();
1107    }
1108  }
1109
1110  // If we reached here, then we just ate the ( of the memory operand.  Process
1111  // the rest of the memory operand.
1112  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1113  SMLoc IndexLoc;
1114
1115  if (getLexer().is(AsmToken::Percent)) {
1116    SMLoc StartLoc, EndLoc;
1117    if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
1118    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1119      Error(StartLoc, "eiz and riz can only be used as index registers",
1120            SMRange(StartLoc, EndLoc));
1121      return 0;
1122    }
1123  }
1124
1125  if (getLexer().is(AsmToken::Comma)) {
1126    Parser.Lex(); // Eat the comma.
1127    IndexLoc = Parser.getTok().getLoc();
1128
1129    // Following the comma we should have either an index register, or a scale
1130    // value. We don't support the later form, but we want to parse it
1131    // correctly.
1132    //
1133    // Not that even though it would be completely consistent to support syntax
1134    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1135    if (getLexer().is(AsmToken::Percent)) {
1136      SMLoc L;
1137      if (ParseRegister(IndexReg, L, L)) return 0;
1138
1139      if (getLexer().isNot(AsmToken::RParen)) {
1140        // Parse the scale amount:
1141        //  ::= ',' [scale-expression]
1142        if (getLexer().isNot(AsmToken::Comma)) {
1143          Error(Parser.getTok().getLoc(),
1144                "expected comma in scale expression");
1145          return 0;
1146        }
1147        Parser.Lex(); // Eat the comma.
1148
1149        if (getLexer().isNot(AsmToken::RParen)) {
1150          SMLoc Loc = Parser.getTok().getLoc();
1151
1152          int64_t ScaleVal;
1153          if (getParser().ParseAbsoluteExpression(ScaleVal)){
1154            Error(Loc, "expected scale expression");
1155            return 0;
1156          }
1157
1158          // Validate the scale amount.
1159          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1160            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1161            return 0;
1162          }
1163          Scale = (unsigned)ScaleVal;
1164        }
1165      }
1166    } else if (getLexer().isNot(AsmToken::RParen)) {
1167      // A scale amount without an index is ignored.
1168      // index.
1169      SMLoc Loc = Parser.getTok().getLoc();
1170
1171      int64_t Value;
1172      if (getParser().ParseAbsoluteExpression(Value))
1173        return 0;
1174
1175      if (Value != 1)
1176        Warning(Loc, "scale factor without index register is ignored");
1177      Scale = 1;
1178    }
1179  }
1180
1181  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1182  if (getLexer().isNot(AsmToken::RParen)) {
1183    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1184    return 0;
1185  }
1186  SMLoc MemEnd = Parser.getTok().getLoc();
1187  Parser.Lex(); // Eat the ')'.
1188
1189  // If we have both a base register and an index register make sure they are
1190  // both 64-bit or 32-bit registers.
1191  // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1192  if (BaseReg != 0 && IndexReg != 0) {
1193    if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1194        (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1195         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
1196        IndexReg != X86::RIZ) {
1197      Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
1198      return 0;
1199    }
1200    if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1201        (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1202         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
1203        IndexReg != X86::EIZ){
1204      Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
1205      return 0;
1206    }
1207  }
1208
1209  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
1210                               MemStart, MemEnd);
1211}
1212
1213bool X86AsmParser::
1214ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
1215                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
1216  InstInfo = &Info;
1217  StringRef PatchedName = Name;
1218
1219  // FIXME: Hack to recognize setneb as setne.
1220  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
1221      PatchedName != "setb" && PatchedName != "setnb")
1222    PatchedName = PatchedName.substr(0, Name.size()-1);
1223
1224  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
1225  const MCExpr *ExtraImmOp = 0;
1226  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
1227      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
1228       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
1229    bool IsVCMP = PatchedName[0] == 'v';
1230    unsigned SSECCIdx = IsVCMP ? 4 : 3;
1231    unsigned SSEComparisonCode = StringSwitch<unsigned>(
1232      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
1233      .Case("eq",       0x00)
1234      .Case("lt",       0x01)
1235      .Case("le",       0x02)
1236      .Case("unord",    0x03)
1237      .Case("neq",      0x04)
1238      .Case("nlt",      0x05)
1239      .Case("nle",      0x06)
1240      .Case("ord",      0x07)
1241      /* AVX only from here */
1242      .Case("eq_uq",    0x08)
1243      .Case("nge",      0x09)
1244      .Case("ngt",      0x0A)
1245      .Case("false",    0x0B)
1246      .Case("neq_oq",   0x0C)
1247      .Case("ge",       0x0D)
1248      .Case("gt",       0x0E)
1249      .Case("true",     0x0F)
1250      .Case("eq_os",    0x10)
1251      .Case("lt_oq",    0x11)
1252      .Case("le_oq",    0x12)
1253      .Case("unord_s",  0x13)
1254      .Case("neq_us",   0x14)
1255      .Case("nlt_uq",   0x15)
1256      .Case("nle_uq",   0x16)
1257      .Case("ord_s",    0x17)
1258      .Case("eq_us",    0x18)
1259      .Case("nge_uq",   0x19)
1260      .Case("ngt_uq",   0x1A)
1261      .Case("false_os", 0x1B)
1262      .Case("neq_os",   0x1C)
1263      .Case("ge_oq",    0x1D)
1264      .Case("gt_oq",    0x1E)
1265      .Case("true_us",  0x1F)
1266      .Default(~0U);
1267    if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
1268      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
1269                                          getParser().getContext());
1270      if (PatchedName.endswith("ss")) {
1271        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
1272      } else if (PatchedName.endswith("sd")) {
1273        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
1274      } else if (PatchedName.endswith("ps")) {
1275        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
1276      } else {
1277        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
1278        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
1279      }
1280    }
1281  }
1282
1283  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
1284
1285  if (ExtraImmOp && !isParsingIntelSyntax())
1286    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1287
1288  // Determine whether this is an instruction prefix.
1289  bool isPrefix =
1290    Name == "lock" || Name == "rep" ||
1291    Name == "repe" || Name == "repz" ||
1292    Name == "repne" || Name == "repnz" ||
1293    Name == "rex64" || Name == "data16";
1294
1295
1296  // This does the actual operand parsing.  Don't parse any more if we have a
1297  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
1298  // just want to parse the "lock" as the first instruction and the "incl" as
1299  // the next one.
1300  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
1301
1302    // Parse '*' modifier.
1303    if (getLexer().is(AsmToken::Star)) {
1304      SMLoc Loc = Parser.getTok().getLoc();
1305      Operands.push_back(X86Operand::CreateToken("*", Loc));
1306      Parser.Lex(); // Eat the star.
1307    }
1308
1309    // Read the first operand.
1310    if (X86Operand *Op = ParseOperand())
1311      Operands.push_back(Op);
1312    else {
1313      Parser.EatToEndOfStatement();
1314      return true;
1315    }
1316
1317    while (getLexer().is(AsmToken::Comma)) {
1318      Parser.Lex();  // Eat the comma.
1319
1320      // Parse and remember the operand.
1321      if (X86Operand *Op = ParseOperand())
1322        Operands.push_back(Op);
1323      else {
1324        Parser.EatToEndOfStatement();
1325        return true;
1326      }
1327    }
1328
1329    if (getLexer().isNot(AsmToken::EndOfStatement)) {
1330      SMLoc Loc = getLexer().getLoc();
1331      Parser.EatToEndOfStatement();
1332      return Error(Loc, "unexpected token in argument list");
1333    }
1334  }
1335
1336  if (getLexer().is(AsmToken::EndOfStatement))
1337    Parser.Lex(); // Consume the EndOfStatement
1338  else if (isPrefix && getLexer().is(AsmToken::Slash))
1339    Parser.Lex(); // Consume the prefix separator Slash
1340
1341  if (ExtraImmOp && isParsingIntelSyntax())
1342    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
1343
1344  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
1345  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
1346  // documented form in various unofficial manuals, so a lot of code uses it.
1347  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
1348      Operands.size() == 3) {
1349    X86Operand &Op = *(X86Operand*)Operands.back();
1350    if (Op.isMem() && Op.Mem.SegReg == 0 &&
1351        isa<MCConstantExpr>(Op.Mem.Disp) &&
1352        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1353        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1354      SMLoc Loc = Op.getEndLoc();
1355      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1356      delete &Op;
1357    }
1358  }
1359  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
1360  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
1361      Operands.size() == 3) {
1362    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1363    if (Op.isMem() && Op.Mem.SegReg == 0 &&
1364        isa<MCConstantExpr>(Op.Mem.Disp) &&
1365        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
1366        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
1367      SMLoc Loc = Op.getEndLoc();
1368      Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
1369      delete &Op;
1370    }
1371  }
1372  // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
1373  if (Name.startswith("ins") && Operands.size() == 3 &&
1374      (Name == "insb" || Name == "insw" || Name == "insl")) {
1375    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1376    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1377    if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
1378      Operands.pop_back();
1379      Operands.pop_back();
1380      delete &Op;
1381      delete &Op2;
1382    }
1383  }
1384
1385  // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
1386  if (Name.startswith("outs") && Operands.size() == 3 &&
1387      (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
1388    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1389    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1390    if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
1391      Operands.pop_back();
1392      Operands.pop_back();
1393      delete &Op;
1394      delete &Op2;
1395    }
1396  }
1397
1398  // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
1399  if (Name.startswith("movs") && Operands.size() == 3 &&
1400      (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
1401       (is64BitMode() && Name == "movsq"))) {
1402    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
1403    X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
1404    if (isSrcOp(Op) && isDstOp(Op2)) {
1405      Operands.pop_back();
1406      Operands.pop_back();
1407      delete &Op;
1408      delete &Op2;
1409    }
1410  }
1411  // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
1412  if (Name.startswith("lods") && Operands.size() == 3 &&
1413      (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
1414       Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
1415    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1416    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1417    if (isSrcOp(*Op1) && Op2->isReg()) {
1418      const char *ins;
1419      unsigned reg = Op2->getReg();
1420      bool isLods = Name == "lods";
1421      if (reg == X86::AL && (isLods || Name == "lodsb"))
1422        ins = "lodsb";
1423      else if (reg == X86::AX && (isLods || Name == "lodsw"))
1424        ins = "lodsw";
1425      else if (reg == X86::EAX && (isLods || Name == "lodsl"))
1426        ins = "lodsl";
1427      else if (reg == X86::RAX && (isLods || Name == "lodsq"))
1428        ins = "lodsq";
1429      else
1430        ins = NULL;
1431      if (ins != NULL) {
1432        Operands.pop_back();
1433        Operands.pop_back();
1434        delete Op1;
1435        delete Op2;
1436        if (Name != ins)
1437          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1438      }
1439    }
1440  }
1441  // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
1442  if (Name.startswith("stos") && Operands.size() == 3 &&
1443      (Name == "stos" || Name == "stosb" || Name == "stosw" ||
1444       Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
1445    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1446    X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
1447    if (isDstOp(*Op2) && Op1->isReg()) {
1448      const char *ins;
1449      unsigned reg = Op1->getReg();
1450      bool isStos = Name == "stos";
1451      if (reg == X86::AL && (isStos || Name == "stosb"))
1452        ins = "stosb";
1453      else if (reg == X86::AX && (isStos || Name == "stosw"))
1454        ins = "stosw";
1455      else if (reg == X86::EAX && (isStos || Name == "stosl"))
1456        ins = "stosl";
1457      else if (reg == X86::RAX && (isStos || Name == "stosq"))
1458        ins = "stosq";
1459      else
1460        ins = NULL;
1461      if (ins != NULL) {
1462        Operands.pop_back();
1463        Operands.pop_back();
1464        delete Op1;
1465        delete Op2;
1466        if (Name != ins)
1467          static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
1468      }
1469    }
1470  }
1471
1472  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
1473  // "shift <op>".
1474  if ((Name.startswith("shr") || Name.startswith("sar") ||
1475       Name.startswith("shl") || Name.startswith("sal") ||
1476       Name.startswith("rcl") || Name.startswith("rcr") ||
1477       Name.startswith("rol") || Name.startswith("ror")) &&
1478      Operands.size() == 3) {
1479    if (isParsingIntelSyntax()) {
1480      // Intel syntax
1481      X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
1482      if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1483          cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1484        delete Operands[2];
1485        Operands.pop_back();
1486      }
1487    } else {
1488      X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1489      if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1490          cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
1491        delete Operands[1];
1492        Operands.erase(Operands.begin() + 1);
1493      }
1494    }
1495  }
1496
1497  // Transforms "int $3" into "int3" as a size optimization.  We can't write an
1498  // instalias with an immediate operand yet.
1499  if (Name == "int" && Operands.size() == 2) {
1500    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
1501    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
1502        cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
1503      delete Operands[1];
1504      Operands.erase(Operands.begin() + 1);
1505      static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
1506    }
1507  }
1508
1509  return false;
1510}
1511
1512bool X86AsmParser::
1513processInstruction(MCInst &Inst,
1514                   const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
1515  switch (Inst.getOpcode()) {
1516  default: return false;
1517  case X86::AND16i16: {
1518    if (!Inst.getOperand(0).isImm() ||
1519        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1520      return false;
1521
1522    MCInst TmpInst;
1523    TmpInst.setOpcode(X86::AND16ri8);
1524    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1525    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1526    TmpInst.addOperand(Inst.getOperand(0));
1527    Inst = TmpInst;
1528    return true;
1529  }
1530  case X86::AND32i32: {
1531    if (!Inst.getOperand(0).isImm() ||
1532        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1533      return false;
1534
1535    MCInst TmpInst;
1536    TmpInst.setOpcode(X86::AND32ri8);
1537    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1538    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1539    TmpInst.addOperand(Inst.getOperand(0));
1540    Inst = TmpInst;
1541    return true;
1542  }
1543  case X86::AND64i32: {
1544    if (!Inst.getOperand(0).isImm() ||
1545        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1546      return false;
1547
1548    MCInst TmpInst;
1549    TmpInst.setOpcode(X86::AND64ri8);
1550    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1551    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1552    TmpInst.addOperand(Inst.getOperand(0));
1553    Inst = TmpInst;
1554    return true;
1555  }
1556  case X86::XOR16i16: {
1557    if (!Inst.getOperand(0).isImm() ||
1558        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1559      return false;
1560
1561    MCInst TmpInst;
1562    TmpInst.setOpcode(X86::XOR16ri8);
1563    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1564    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1565    TmpInst.addOperand(Inst.getOperand(0));
1566    Inst = TmpInst;
1567    return true;
1568  }
1569  case X86::XOR32i32: {
1570    if (!Inst.getOperand(0).isImm() ||
1571        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1572      return false;
1573
1574    MCInst TmpInst;
1575    TmpInst.setOpcode(X86::XOR32ri8);
1576    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1577    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1578    TmpInst.addOperand(Inst.getOperand(0));
1579    Inst = TmpInst;
1580    return true;
1581  }
1582  case X86::XOR64i32: {
1583    if (!Inst.getOperand(0).isImm() ||
1584        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1585      return false;
1586
1587    MCInst TmpInst;
1588    TmpInst.setOpcode(X86::XOR64ri8);
1589    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1590    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1591    TmpInst.addOperand(Inst.getOperand(0));
1592    Inst = TmpInst;
1593    return true;
1594  }
1595  case X86::OR16i16: {
1596    if (!Inst.getOperand(0).isImm() ||
1597        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1598      return false;
1599
1600    MCInst TmpInst;
1601    TmpInst.setOpcode(X86::OR16ri8);
1602    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1603    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1604    TmpInst.addOperand(Inst.getOperand(0));
1605    Inst = TmpInst;
1606    return true;
1607  }
1608  case X86::OR32i32: {
1609    if (!Inst.getOperand(0).isImm() ||
1610        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1611      return false;
1612
1613    MCInst TmpInst;
1614    TmpInst.setOpcode(X86::OR32ri8);
1615    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1616    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1617    TmpInst.addOperand(Inst.getOperand(0));
1618    Inst = TmpInst;
1619    return true;
1620  }
1621  case X86::OR64i32: {
1622    if (!Inst.getOperand(0).isImm() ||
1623        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1624      return false;
1625
1626    MCInst TmpInst;
1627    TmpInst.setOpcode(X86::OR64ri8);
1628    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1629    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1630    TmpInst.addOperand(Inst.getOperand(0));
1631    Inst = TmpInst;
1632    return true;
1633  }
1634  case X86::CMP16i16: {
1635    if (!Inst.getOperand(0).isImm() ||
1636        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1637      return false;
1638
1639    MCInst TmpInst;
1640    TmpInst.setOpcode(X86::CMP16ri8);
1641    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1642    TmpInst.addOperand(Inst.getOperand(0));
1643    Inst = TmpInst;
1644    return true;
1645  }
1646  case X86::CMP32i32: {
1647    if (!Inst.getOperand(0).isImm() ||
1648        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1649      return false;
1650
1651    MCInst TmpInst;
1652    TmpInst.setOpcode(X86::CMP32ri8);
1653    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1654    TmpInst.addOperand(Inst.getOperand(0));
1655    Inst = TmpInst;
1656    return true;
1657  }
1658  case X86::CMP64i32: {
1659    if (!Inst.getOperand(0).isImm() ||
1660        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1661      return false;
1662
1663    MCInst TmpInst;
1664    TmpInst.setOpcode(X86::CMP64ri8);
1665    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1666    TmpInst.addOperand(Inst.getOperand(0));
1667    Inst = TmpInst;
1668    return true;
1669  }
1670  case X86::ADD16i16: {
1671    if (!Inst.getOperand(0).isImm() ||
1672        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1673      return false;
1674
1675    MCInst TmpInst;
1676    TmpInst.setOpcode(X86::ADD16ri8);
1677    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1678    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1679    TmpInst.addOperand(Inst.getOperand(0));
1680    Inst = TmpInst;
1681    return true;
1682  }
1683  case X86::ADD32i32: {
1684    if (!Inst.getOperand(0).isImm() ||
1685        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1686      return false;
1687
1688    MCInst TmpInst;
1689    TmpInst.setOpcode(X86::ADD32ri8);
1690    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1691    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1692    TmpInst.addOperand(Inst.getOperand(0));
1693    Inst = TmpInst;
1694    return true;
1695  }
1696  case X86::ADD64i32: {
1697    if (!Inst.getOperand(0).isImm() ||
1698        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1699      return false;
1700
1701    MCInst TmpInst;
1702    TmpInst.setOpcode(X86::ADD64ri8);
1703    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1704    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1705    TmpInst.addOperand(Inst.getOperand(0));
1706    Inst = TmpInst;
1707    return true;
1708  }
1709  case X86::SUB16i16: {
1710    if (!Inst.getOperand(0).isImm() ||
1711        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
1712      return false;
1713
1714    MCInst TmpInst;
1715    TmpInst.setOpcode(X86::SUB16ri8);
1716    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1717    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
1718    TmpInst.addOperand(Inst.getOperand(0));
1719    Inst = TmpInst;
1720    return true;
1721  }
1722  case X86::SUB32i32: {
1723    if (!Inst.getOperand(0).isImm() ||
1724        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
1725      return false;
1726
1727    MCInst TmpInst;
1728    TmpInst.setOpcode(X86::SUB32ri8);
1729    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1730    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
1731    TmpInst.addOperand(Inst.getOperand(0));
1732    Inst = TmpInst;
1733    return true;
1734  }
1735  case X86::SUB64i32: {
1736    if (!Inst.getOperand(0).isImm() ||
1737        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
1738      return false;
1739
1740    MCInst TmpInst;
1741    TmpInst.setOpcode(X86::SUB64ri8);
1742    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1743    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
1744    TmpInst.addOperand(Inst.getOperand(0));
1745    Inst = TmpInst;
1746    return true;
1747  }
1748  }
1749}
1750
1751bool X86AsmParser::
1752MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1753                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
1754                        MCStreamer &Out, unsigned &ErrorInfo,
1755                        bool MatchingInlineAsm) {
1756  assert(!Operands.empty() && "Unexpect empty operand list!");
1757  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
1758  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
1759  ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
1760
1761  // First, handle aliases that expand to multiple instructions.
1762  // FIXME: This should be replaced with a real .td file alias mechanism.
1763  // Also, MatchInstructionImpl should actually *do* the EmitInstruction
1764  // call.
1765  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
1766      Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
1767      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
1768      Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
1769    MCInst Inst;
1770    Inst.setOpcode(X86::WAIT);
1771    Inst.setLoc(IDLoc);
1772    if (!MatchingInlineAsm)
1773      Out.EmitInstruction(Inst);
1774
1775    const char *Repl =
1776      StringSwitch<const char*>(Op->getToken())
1777        .Case("finit",  "fninit")
1778        .Case("fsave",  "fnsave")
1779        .Case("fstcw",  "fnstcw")
1780        .Case("fstcww",  "fnstcw")
1781        .Case("fstenv", "fnstenv")
1782        .Case("fstsw",  "fnstsw")
1783        .Case("fstsww", "fnstsw")
1784        .Case("fclex",  "fnclex")
1785        .Default(0);
1786    assert(Repl && "Unknown wait-prefixed instruction");
1787    delete Operands[0];
1788    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
1789  }
1790
1791  bool WasOriginallyInvalidOperand = false;
1792  MCInst Inst;
1793
1794  // First, try a direct match.
1795  switch (MatchInstructionImpl(Operands, Inst,
1796                               ErrorInfo, MatchingInlineAsm,
1797                               isParsingIntelSyntax())) {
1798  default: break;
1799  case Match_Success:
1800    // Some instructions need post-processing to, for example, tweak which
1801    // encoding is selected. Loop on it while changes happen so the
1802    // individual transformations can chain off each other.
1803    if (!MatchingInlineAsm)
1804      while (processInstruction(Inst, Operands))
1805        ;
1806
1807    Inst.setLoc(IDLoc);
1808    if (!MatchingInlineAsm)
1809      Out.EmitInstruction(Inst);
1810    Opcode = Inst.getOpcode();
1811    return false;
1812  case Match_MissingFeature:
1813    Error(IDLoc, "instruction requires a CPU feature not currently enabled",
1814          EmptyRanges, MatchingInlineAsm);
1815    return true;
1816  case Match_InvalidOperand:
1817    WasOriginallyInvalidOperand = true;
1818    break;
1819  case Match_MnemonicFail:
1820    break;
1821  }
1822
1823  // FIXME: Ideally, we would only attempt suffix matches for things which are
1824  // valid prefixes, and we could just infer the right unambiguous
1825  // type. However, that requires substantially more matcher support than the
1826  // following hack.
1827
1828  // Change the operand to point to a temporary token.
1829  StringRef Base = Op->getToken();
1830  SmallString<16> Tmp;
1831  Tmp += Base;
1832  Tmp += ' ';
1833  Op->setTokenValue(Tmp.str());
1834
1835  // If this instruction starts with an 'f', then it is a floating point stack
1836  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
1837  // 80-bit floating point, which use the suffixes s,l,t respectively.
1838  //
1839  // Otherwise, we assume that this may be an integer instruction, which comes
1840  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
1841  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
1842
1843  // Check for the various suffix matches.
1844  Tmp[Base.size()] = Suffixes[0];
1845  unsigned ErrorInfoIgnore;
1846  unsigned Match1, Match2, Match3, Match4;
1847
1848  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1849                                isParsingIntelSyntax());
1850  Tmp[Base.size()] = Suffixes[1];
1851  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1852                                isParsingIntelSyntax());
1853  Tmp[Base.size()] = Suffixes[2];
1854  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1855                                isParsingIntelSyntax());
1856  Tmp[Base.size()] = Suffixes[3];
1857  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
1858                                isParsingIntelSyntax());
1859
1860  // Restore the old token.
1861  Op->setTokenValue(Base);
1862
1863  // If exactly one matched, then we treat that as a successful match (and the
1864  // instruction will already have been filled in correctly, since the failing
1865  // matches won't have modified it).
1866  unsigned NumSuccessfulMatches =
1867    (Match1 == Match_Success) + (Match2 == Match_Success) +
1868    (Match3 == Match_Success) + (Match4 == Match_Success);
1869  if (NumSuccessfulMatches == 1) {
1870    Inst.setLoc(IDLoc);
1871    if (!MatchingInlineAsm)
1872      Out.EmitInstruction(Inst);
1873    Opcode = Inst.getOpcode();
1874    return false;
1875  }
1876
1877  // Otherwise, the match failed, try to produce a decent error message.
1878
1879  // If we had multiple suffix matches, then identify this as an ambiguous
1880  // match.
1881  if (NumSuccessfulMatches > 1) {
1882    char MatchChars[4];
1883    unsigned NumMatches = 0;
1884    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
1885    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
1886    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
1887    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
1888
1889    SmallString<126> Msg;
1890    raw_svector_ostream OS(Msg);
1891    OS << "ambiguous instructions require an explicit suffix (could be ";
1892    for (unsigned i = 0; i != NumMatches; ++i) {
1893      if (i != 0)
1894        OS << ", ";
1895      if (i + 1 == NumMatches)
1896        OS << "or ";
1897      OS << "'" << Base << MatchChars[i] << "'";
1898    }
1899    OS << ")";
1900    Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
1901    return true;
1902  }
1903
1904  // Okay, we know that none of the variants matched successfully.
1905
1906  // If all of the instructions reported an invalid mnemonic, then the original
1907  // mnemonic was invalid.
1908  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
1909      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
1910    if (!WasOriginallyInvalidOperand) {
1911      ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
1912        Op->getLocRange();
1913      return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
1914                   Ranges, MatchingInlineAsm);
1915    }
1916
1917    // Recover location info for the operand if we know which was the problem.
1918    if (ErrorInfo != ~0U) {
1919      if (ErrorInfo >= Operands.size())
1920        return Error(IDLoc, "too few operands for instruction",
1921                     EmptyRanges, MatchingInlineAsm);
1922
1923      X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
1924      if (Operand->getStartLoc().isValid()) {
1925        SMRange OperandRange = Operand->getLocRange();
1926        return Error(Operand->getStartLoc(), "invalid operand for instruction",
1927                     OperandRange, MatchingInlineAsm);
1928      }
1929    }
1930
1931    return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
1932                 MatchingInlineAsm);
1933  }
1934
1935  // If one instruction matched with a missing feature, report this as a
1936  // missing feature.
1937  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
1938      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
1939    Error(IDLoc, "instruction requires a CPU feature not currently enabled",
1940          EmptyRanges, MatchingInlineAsm);
1941    return true;
1942  }
1943
1944  // If one instruction matched with an invalid operand, report this as an
1945  // operand failure.
1946  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
1947      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
1948    Error(IDLoc, "invalid operand for instruction", EmptyRanges,
1949          MatchingInlineAsm);
1950    return true;
1951  }
1952
1953  // If all of these were an outright failure, report it in a useless way.
1954  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
1955        EmptyRanges, MatchingInlineAsm);
1956  return true;
1957}
1958
1959
1960bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
1961  StringRef IDVal = DirectiveID.getIdentifier();
1962  if (IDVal == ".word")
1963    return ParseDirectiveWord(2, DirectiveID.getLoc());
1964  else if (IDVal.startswith(".code"))
1965    return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
1966  else if (IDVal.startswith(".att_syntax")) {
1967    getParser().setAssemblerDialect(0);
1968    return false;
1969  } else if (IDVal.startswith(".intel_syntax")) {
1970    getParser().setAssemblerDialect(1);
1971    if (getLexer().isNot(AsmToken::EndOfStatement)) {
1972      if(Parser.getTok().getString() == "noprefix") {
1973        // FIXME : Handle noprefix
1974        Parser.Lex();
1975      } else
1976        return true;
1977    }
1978    return false;
1979  }
1980  return true;
1981}
1982
1983/// ParseDirectiveWord
1984///  ::= .word [ expression (, expression)* ]
1985bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
1986  if (getLexer().isNot(AsmToken::EndOfStatement)) {
1987    for (;;) {
1988      const MCExpr *Value;
1989      if (getParser().ParseExpression(Value))
1990        return true;
1991
1992      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
1993
1994      if (getLexer().is(AsmToken::EndOfStatement))
1995        break;
1996
1997      // FIXME: Improve diagnostic.
1998      if (getLexer().isNot(AsmToken::Comma))
1999        return Error(L, "unexpected token in directive");
2000      Parser.Lex();
2001    }
2002  }
2003
2004  Parser.Lex();
2005  return false;
2006}
2007
2008/// ParseDirectiveCode
2009///  ::= .code32 | .code64
2010bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2011  if (IDVal == ".code32") {
2012    Parser.Lex();
2013    if (is64BitMode()) {
2014      SwitchMode();
2015      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2016    }
2017  } else if (IDVal == ".code64") {
2018    Parser.Lex();
2019    if (!is64BitMode()) {
2020      SwitchMode();
2021      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2022    }
2023  } else {
2024    return Error(L, "unexpected directive " + IDVal);
2025  }
2026
2027  return false;
2028}
2029
2030
2031extern "C" void LLVMInitializeX86AsmLexer();
2032
2033// Force static initialization.
2034extern "C" void LLVMInitializeX86AsmParser() {
2035  RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2036  RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2037  LLVMInitializeX86AsmLexer();
2038}
2039
2040#define GET_REGISTER_MATCHER
2041#define GET_MATCHER_IMPLEMENTATION
2042#include "X86GenAsmMatcher.inc"
2043