X86CodeEmitter.cpp revision 263508
1//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the pass that transforms the X86 machine instructions into
11// relocatable machine code.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "x86-emitter"
16#include "X86.h"
17#include "X86InstrInfo.h"
18#include "X86JITInfo.h"
19#include "X86Relocations.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/JITCodeEmitter.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineModuleInfo.h"
27#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCCodeEmitter.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/PassManager.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/Target/TargetOptions.h"
37using namespace llvm;
38
39STATISTIC(NumEmitted, "Number of machine instructions emitted");
40
41namespace {
42  template<class CodeEmitter>
43  class Emitter : public MachineFunctionPass {
44    const X86InstrInfo  *II;
45    const DataLayout    *TD;
46    X86TargetMachine    &TM;
47    CodeEmitter         &MCE;
48    MachineModuleInfo   *MMI;
49    intptr_t PICBaseOffset;
50    bool Is64BitMode;
51    bool IsPIC;
52  public:
53    static char ID;
54    explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
55      : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
56        MCE(mce), PICBaseOffset(0), Is64BitMode(false),
57        IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
58
59    bool runOnMachineFunction(MachineFunction &MF);
60
61    virtual const char *getPassName() const {
62      return "X86 Machine Code Emitter";
63    }
64
65    void emitOpcodePrefix(uint64_t TSFlags, int MemOperand,
66                          const MachineInstr &MI,
67                          const MCInstrDesc *Desc) const;
68
69    void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand,
70                             const MachineInstr &MI,
71                             const MCInstrDesc *Desc) const;
72
73    void emitSegmentOverridePrefix(uint64_t TSFlags,
74                                   int MemOperand,
75                                   const MachineInstr &MI) const;
76
77    void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
78
79    void getAnalysisUsage(AnalysisUsage &AU) const {
80      AU.setPreservesAll();
81      AU.addRequired<MachineModuleInfo>();
82      MachineFunctionPass::getAnalysisUsage(AU);
83    }
84
85  private:
86    void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
87    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
88                           intptr_t Disp = 0, intptr_t PCAdj = 0,
89                           bool Indirect = false);
90    void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
91    void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
92                              intptr_t PCAdj = 0);
93    void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
94                              intptr_t PCAdj = 0);
95
96    void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
97                               intptr_t Adj = 0, bool IsPCRel = true);
98
99    void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
100    void emitRegModRMByte(unsigned RegOpcodeField);
101    void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
102    void emitConstant(uint64_t Val, unsigned Size);
103
104    void emitMemModRMByte(const MachineInstr &MI,
105                          unsigned Op, unsigned RegOpcodeField,
106                          intptr_t PCAdj = 0);
107
108    unsigned getX86RegNum(unsigned RegNo) const {
109      const TargetRegisterInfo *TRI = TM.getRegisterInfo();
110      return TRI->getEncodingValue(RegNo) & 0x7;
111    }
112
113    unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
114                                         unsigned OpNum) const;
115  };
116
117template<class CodeEmitter>
118  char Emitter<CodeEmitter>::ID = 0;
119} // end anonymous namespace.
120
121/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
122/// to the specified JITCodeEmitter object.
123FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
124                                                JITCodeEmitter &JCE) {
125  return new Emitter<JITCodeEmitter>(TM, JCE);
126}
127
128template<class CodeEmitter>
129bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
130  MMI = &getAnalysis<MachineModuleInfo>();
131  MCE.setModuleInfo(MMI);
132
133  II = TM.getInstrInfo();
134  TD = TM.getDataLayout();
135  Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
136  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
137
138  do {
139    DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n");
140    MCE.startFunction(MF);
141    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
142         MBB != E; ++MBB) {
143      MCE.StartMachineBasicBlock(MBB);
144      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
145           I != E; ++I) {
146        const MCInstrDesc &Desc = I->getDesc();
147        emitInstruction(*I, &Desc);
148        // MOVPC32r is basically a call plus a pop instruction.
149        if (Desc.getOpcode() == X86::MOVPC32r)
150          emitInstruction(*I, &II->get(X86::POP32r));
151        ++NumEmitted;  // Keep track of the # of mi's emitted
152      }
153    }
154  } while (MCE.finishFunction(MF));
155
156  return false;
157}
158
159/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
160/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
161/// size, and 3) use of X86-64 extended registers.
162static unsigned determineREX(const MachineInstr &MI) {
163  unsigned REX = 0;
164  const MCInstrDesc &Desc = MI.getDesc();
165
166  // Pseudo instructions do not need REX prefix byte.
167  if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
168    return 0;
169  if (Desc.TSFlags & X86II::REX_W)
170    REX |= 1 << 3;
171
172  unsigned NumOps = Desc.getNumOperands();
173  if (NumOps) {
174    bool isTwoAddr = NumOps > 1 &&
175      Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
176
177    // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
178    unsigned i = isTwoAddr ? 1 : 0;
179    for (unsigned e = NumOps; i != e; ++i) {
180      const MachineOperand& MO = MI.getOperand(i);
181      if (MO.isReg()) {
182        unsigned Reg = MO.getReg();
183        if (X86II::isX86_64NonExtLowByteReg(Reg))
184          REX |= 0x40;
185      }
186    }
187
188    switch (Desc.TSFlags & X86II::FormMask) {
189      case X86II::MRMInitReg:
190        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
191          REX |= (1 << 0) | (1 << 2);
192        break;
193      case X86II::MRMSrcReg: {
194        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
195          REX |= 1 << 2;
196        i = isTwoAddr ? 2 : 1;
197        for (unsigned e = NumOps; i != e; ++i) {
198          const MachineOperand& MO = MI.getOperand(i);
199          if (X86InstrInfo::isX86_64ExtendedReg(MO))
200            REX |= 1 << 0;
201        }
202        break;
203      }
204      case X86II::MRMSrcMem: {
205        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
206          REX |= 1 << 2;
207        unsigned Bit = 0;
208        i = isTwoAddr ? 2 : 1;
209        for (; i != NumOps; ++i) {
210          const MachineOperand& MO = MI.getOperand(i);
211          if (MO.isReg()) {
212            if (X86InstrInfo::isX86_64ExtendedReg(MO))
213              REX |= 1 << Bit;
214            Bit++;
215          }
216        }
217        break;
218      }
219      case X86II::MRM0m: case X86II::MRM1m:
220      case X86II::MRM2m: case X86II::MRM3m:
221      case X86II::MRM4m: case X86II::MRM5m:
222      case X86II::MRM6m: case X86II::MRM7m:
223      case X86II::MRMDestMem: {
224        unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
225        i = isTwoAddr ? 1 : 0;
226        if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
227          REX |= 1 << 2;
228        unsigned Bit = 0;
229        for (; i != e; ++i) {
230          const MachineOperand& MO = MI.getOperand(i);
231          if (MO.isReg()) {
232            if (X86InstrInfo::isX86_64ExtendedReg(MO))
233              REX |= 1 << Bit;
234            Bit++;
235          }
236        }
237        break;
238      }
239      default: {
240        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
241          REX |= 1 << 0;
242        i = isTwoAddr ? 2 : 1;
243        for (unsigned e = NumOps; i != e; ++i) {
244          const MachineOperand& MO = MI.getOperand(i);
245          if (X86InstrInfo::isX86_64ExtendedReg(MO))
246            REX |= 1 << 2;
247        }
248        break;
249      }
250    }
251  }
252  return REX;
253}
254
255
256/// emitPCRelativeBlockAddress - This method keeps track of the information
257/// necessary to resolve the address of this block later and emits a dummy
258/// value.
259///
260template<class CodeEmitter>
261void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
262  // Remember where this reference was and where it is to so we can
263  // deal with it later.
264  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
265                                             X86::reloc_pcrel_word, MBB));
266  MCE.emitWordLE(0);
267}
268
269/// emitGlobalAddress - Emit the specified address to the code stream assuming
270/// this is part of a "take the address of a global" instruction.
271///
272template<class CodeEmitter>
273void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
274                                unsigned Reloc,
275                                intptr_t Disp /* = 0 */,
276                                intptr_t PCAdj /* = 0 */,
277                                bool Indirect /* = false */) {
278  intptr_t RelocCST = Disp;
279  if (Reloc == X86::reloc_picrel_word)
280    RelocCST = PICBaseOffset;
281  else if (Reloc == X86::reloc_pcrel_word)
282    RelocCST = PCAdj;
283  MachineRelocation MR = Indirect
284    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
285                                           const_cast<GlobalValue *>(GV),
286                                           RelocCST, false)
287    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
288                               const_cast<GlobalValue *>(GV), RelocCST, false);
289  MCE.addRelocation(MR);
290  // The relocated value will be added to the displacement
291  if (Reloc == X86::reloc_absolute_dword)
292    MCE.emitDWordLE(Disp);
293  else
294    MCE.emitWordLE((int32_t)Disp);
295}
296
297/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
298/// be emitted to the current location in the function, and allow it to be PC
299/// relative.
300template<class CodeEmitter>
301void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
302                                                     unsigned Reloc) {
303  intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
304
305  // X86 never needs stubs because instruction selection will always pick
306  // an instruction sequence that is large enough to hold any address
307  // to a symbol.
308  // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
309  bool NeedStub = false;
310  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
311                                                 Reloc, ES, RelocCST,
312                                                 0, NeedStub));
313  if (Reloc == X86::reloc_absolute_dword)
314    MCE.emitDWordLE(0);
315  else
316    MCE.emitWordLE(0);
317}
318
319/// emitConstPoolAddress - Arrange for the address of an constant pool
320/// to be emitted to the current location in the function, and allow it to be PC
321/// relative.
322template<class CodeEmitter>
323void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
324                                   intptr_t Disp /* = 0 */,
325                                   intptr_t PCAdj /* = 0 */) {
326  intptr_t RelocCST = 0;
327  if (Reloc == X86::reloc_picrel_word)
328    RelocCST = PICBaseOffset;
329  else if (Reloc == X86::reloc_pcrel_word)
330    RelocCST = PCAdj;
331  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
332                                                    Reloc, CPI, RelocCST));
333  // The relocated value will be added to the displacement
334  if (Reloc == X86::reloc_absolute_dword)
335    MCE.emitDWordLE(Disp);
336  else
337    MCE.emitWordLE((int32_t)Disp);
338}
339
340/// emitJumpTableAddress - Arrange for the address of a jump table to
341/// be emitted to the current location in the function, and allow it to be PC
342/// relative.
343template<class CodeEmitter>
344void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
345                                   intptr_t PCAdj /* = 0 */) {
346  intptr_t RelocCST = 0;
347  if (Reloc == X86::reloc_picrel_word)
348    RelocCST = PICBaseOffset;
349  else if (Reloc == X86::reloc_pcrel_word)
350    RelocCST = PCAdj;
351  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
352                                                    Reloc, JTI, RelocCST));
353  // The relocated value will be added to the displacement
354  if (Reloc == X86::reloc_absolute_dword)
355    MCE.emitDWordLE(0);
356  else
357    MCE.emitWordLE(0);
358}
359
360inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
361                                      unsigned RM) {
362  assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
363  return RM | (RegOpcode << 3) | (Mod << 6);
364}
365
366template<class CodeEmitter>
367void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
368                                            unsigned RegOpcodeFld){
369  MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
370}
371
372template<class CodeEmitter>
373void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
374  MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
375}
376
377template<class CodeEmitter>
378void Emitter<CodeEmitter>::emitSIBByte(unsigned SS,
379                                       unsigned Index,
380                                       unsigned Base) {
381  // SIB byte is in the same format as the ModRMByte...
382  MCE.emitByte(ModRMByte(SS, Index, Base));
383}
384
385template<class CodeEmitter>
386void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
387  // Output the constant in little endian byte order...
388  for (unsigned i = 0; i != Size; ++i) {
389    MCE.emitByte(Val & 255);
390    Val >>= 8;
391  }
392}
393
394/// isDisp8 - Return true if this signed displacement fits in a 8-bit
395/// sign-extended field.
396static bool isDisp8(int Value) {
397  return Value == (signed char)Value;
398}
399
400static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
401                              const TargetMachine &TM) {
402  // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
403  // mechanism as 32-bit mode.
404  if (TM.getSubtarget<X86Subtarget>().is64Bit() &&
405      !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
406    return false;
407
408  // Return true if this is a reference to a stub containing the address of the
409  // global, not the global itself.
410  return isGlobalStubReference(GVOp.getTargetFlags());
411}
412
413template<class CodeEmitter>
414void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
415                                                 int DispVal,
416                                                 intptr_t Adj /* = 0 */,
417                                                 bool IsPCRel /* = true */) {
418  // If this is a simple integer displacement that doesn't require a relocation,
419  // emit it now.
420  if (!RelocOp) {
421    emitConstant(DispVal, 4);
422    return;
423  }
424
425  // Otherwise, this is something that requires a relocation.  Emit it as such
426  // now.
427  unsigned RelocType = Is64BitMode ?
428    (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
429    : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
430  if (RelocOp->isGlobal()) {
431    // In 64-bit static small code model, we could potentially emit absolute.
432    // But it's probably not beneficial. If the MCE supports using RIP directly
433    // do it, otherwise fallback to absolute (this is determined by IsPCRel).
434    //  89 05 00 00 00 00     mov    %eax,0(%rip)  # PC-relative
435    //  89 04 25 00 00 00 00  mov    %eax,0x0      # Absolute
436    bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
437    emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
438                      Adj, Indirect);
439  } else if (RelocOp->isSymbol()) {
440    emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
441  } else if (RelocOp->isCPI()) {
442    emitConstPoolAddress(RelocOp->getIndex(), RelocType,
443                         RelocOp->getOffset(), Adj);
444  } else {
445    assert(RelocOp->isJTI() && "Unexpected machine operand!");
446    emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
447  }
448}
449
450template<class CodeEmitter>
451void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
452                                            unsigned Op,unsigned RegOpcodeField,
453                                            intptr_t PCAdj) {
454  const MachineOperand &Op3 = MI.getOperand(Op+3);
455  int DispVal = 0;
456  const MachineOperand *DispForReloc = 0;
457
458  // Figure out what sort of displacement we have to handle here.
459  if (Op3.isGlobal()) {
460    DispForReloc = &Op3;
461  } else if (Op3.isSymbol()) {
462    DispForReloc = &Op3;
463  } else if (Op3.isCPI()) {
464    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
465      DispForReloc = &Op3;
466    } else {
467      DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
468      DispVal += Op3.getOffset();
469    }
470  } else if (Op3.isJTI()) {
471    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
472      DispForReloc = &Op3;
473    } else {
474      DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
475    }
476  } else {
477    DispVal = Op3.getImm();
478  }
479
480  const MachineOperand &Base     = MI.getOperand(Op);
481  const MachineOperand &Scale    = MI.getOperand(Op+1);
482  const MachineOperand &IndexReg = MI.getOperand(Op+2);
483
484  unsigned BaseReg = Base.getReg();
485
486  // Handle %rip relative addressing.
487  if (BaseReg == X86::RIP ||
488      (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
489    assert(IndexReg.getReg() == 0 && Is64BitMode &&
490           "Invalid rip-relative address");
491    MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
492    emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
493    return;
494  }
495
496  // Indicate that the displacement will use an pcrel or absolute reference
497  // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
498  // while others, unless explicit asked to use RIP, use absolute references.
499  bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
500
501  // Is a SIB byte needed?
502  // If no BaseReg, issue a RIP relative instruction only if the MCE can
503  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
504  // 2-7) and absolute references.
505  unsigned BaseRegNo = -1U;
506  if (BaseReg != 0 && BaseReg != X86::RIP)
507    BaseRegNo = getX86RegNum(BaseReg);
508
509  if (// The SIB byte must be used if there is an index register.
510      IndexReg.getReg() == 0 &&
511      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
512      // encode to an R/M value of 4, which indicates that a SIB byte is
513      // present.
514      BaseRegNo != N86::ESP &&
515      // If there is no base register and we're in 64-bit mode, we need a SIB
516      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
517      (!Is64BitMode || BaseReg != 0)) {
518    if (BaseReg == 0 ||          // [disp32]     in X86-32 mode
519        BaseReg == X86::RIP) {   // [disp32+RIP] in X86-64 mode
520      MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
521      emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
522      return;
523    }
524
525    // If the base is not EBP/ESP and there is no displacement, use simple
526    // indirect register encoding, this handles addresses like [EAX].  The
527    // encoding for [EBP] with no displacement means [disp32] so we handle it
528    // by emitting a displacement of 0 below.
529    if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
530      MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
531      return;
532    }
533
534    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
535    if (!DispForReloc && isDisp8(DispVal)) {
536      MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
537      emitConstant(DispVal, 1);
538      return;
539    }
540
541    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
542    MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
543    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
544    return;
545  }
546
547  // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
548  assert(IndexReg.getReg() != X86::ESP &&
549         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
550
551  bool ForceDisp32 = false;
552  bool ForceDisp8  = false;
553  if (BaseReg == 0) {
554    // If there is no base register, we emit the special case SIB byte with
555    // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
556    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
557    ForceDisp32 = true;
558  } else if (DispForReloc) {
559    // Emit the normal disp32 encoding.
560    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
561    ForceDisp32 = true;
562  } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
563    // Emit no displacement ModR/M byte
564    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
565  } else if (isDisp8(DispVal)) {
566    // Emit the disp8 encoding...
567    MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
568    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
569  } else {
570    // Emit the normal disp32 encoding...
571    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
572  }
573
574  // Calculate what the SS field value should be...
575  static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 };
576  unsigned SS = SSTable[Scale.getImm()];
577
578  if (BaseReg == 0) {
579    // Handle the SIB byte for the case where there is no base, see Intel
580    // Manual 2A, table 2-7. The displacement has already been output.
581    unsigned IndexRegNo;
582    if (IndexReg.getReg())
583      IndexRegNo = getX86RegNum(IndexReg.getReg());
584    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
585      IndexRegNo = 4;
586    emitSIBByte(SS, IndexRegNo, 5);
587  } else {
588    unsigned BaseRegNo = getX86RegNum(BaseReg);
589    unsigned IndexRegNo;
590    if (IndexReg.getReg())
591      IndexRegNo = getX86RegNum(IndexReg.getReg());
592    else
593      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
594    emitSIBByte(SS, IndexRegNo, BaseRegNo);
595  }
596
597  // Do we need to output a displacement?
598  if (ForceDisp8) {
599    emitConstant(DispVal, 1);
600  } else if (DispVal != 0 || ForceDisp32) {
601    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
602  }
603}
604
605static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II,
606                                   unsigned Opcode) {
607  const MCInstrDesc *Desc = &II->get(Opcode);
608  MI.setDesc(*Desc);
609  return Desc;
610}
611
612/// Is16BitMemOperand - Return true if the specified instruction has
613/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
614static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) {
615  const MachineOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
616  const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
617
618  if ((BaseReg.getReg() != 0 &&
619       X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
620      (IndexReg.getReg() != 0 &&
621       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
622    return true;
623  return false;
624}
625
626/// Is32BitMemOperand - Return true if the specified instruction has
627/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
628static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) {
629  const MachineOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
630  const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
631
632  if ((BaseReg.getReg() != 0 &&
633       X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) ||
634      (IndexReg.getReg() != 0 &&
635       X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg())))
636    return true;
637  return false;
638}
639
640/// Is64BitMemOperand - Return true if the specified instruction has
641/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
642#ifndef NDEBUG
643static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) {
644  const MachineOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
645  const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
646
647  if ((BaseReg.getReg() != 0 &&
648       X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
649      (IndexReg.getReg() != 0 &&
650       X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
651    return true;
652  return false;
653}
654#endif
655
656template<class CodeEmitter>
657void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags,
658                                            int MemOperand,
659                                            const MachineInstr &MI,
660                                            const MCInstrDesc *Desc) const {
661  // Emit the lock opcode prefix as needed.
662  if (Desc->TSFlags & X86II::LOCK)
663    MCE.emitByte(0xF0);
664
665  // Emit segment override opcode prefix as needed.
666  emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
667
668  // Emit the repeat opcode prefix as needed.
669  if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
670    MCE.emitByte(0xF3);
671
672  // Emit the address size opcode prefix as needed.
673  bool need_address_override;
674  if (TSFlags & X86II::AdSize) {
675    need_address_override = true;
676  } else if (MemOperand == -1) {
677    need_address_override = false;
678  } else if (Is64BitMode) {
679    assert(!Is16BitMemOperand(MI, MemOperand));
680    need_address_override = Is32BitMemOperand(MI, MemOperand);
681  } else {
682    assert(!Is64BitMemOperand(MI, MemOperand));
683    need_address_override = Is16BitMemOperand(MI, MemOperand);
684  }
685
686  if (need_address_override)
687    MCE.emitByte(0x67);
688
689  // Emit the operand size opcode prefix as needed.
690  if (TSFlags & X86II::OpSize)
691    MCE.emitByte(0x66);
692
693  bool Need0FPrefix = false;
694  switch (Desc->TSFlags & X86II::Op0Mask) {
695    case X86II::TB:  // Two-byte opcode prefix
696    case X86II::T8:  // 0F 38
697    case X86II::TA:  // 0F 3A
698    case X86II::A6:  // 0F A6
699    case X86II::A7:  // 0F A7
700      Need0FPrefix = true;
701      break;
702    case X86II::REP: break; // already handled.
703    case X86II::T8XS: // F3 0F 38
704    case X86II::XS:   // F3 0F
705      MCE.emitByte(0xF3);
706      Need0FPrefix = true;
707      break;
708    case X86II::T8XD: // F2 0F 38
709    case X86II::TAXD: // F2 0F 3A
710    case X86II::XD:   // F2 0F
711      MCE.emitByte(0xF2);
712      Need0FPrefix = true;
713      break;
714    case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
715    case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
716      MCE.emitByte(0xD8+
717                   (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
718                    >> X86II::Op0Shift));
719      break; // Two-byte opcode prefix
720    default: llvm_unreachable("Invalid prefix!");
721    case 0: break;  // No prefix!
722  }
723
724  // Handle REX prefix.
725  if (Is64BitMode) {
726    if (unsigned REX = determineREX(MI))
727      MCE.emitByte(0x40 | REX);
728  }
729
730  // 0x0F escape code must be emitted just before the opcode.
731  if (Need0FPrefix)
732    MCE.emitByte(0x0F);
733
734  switch (Desc->TSFlags & X86II::Op0Mask) {
735    case X86II::T8XD:  // F2 0F 38
736    case X86II::T8XS:  // F3 0F 38
737    case X86II::T8:    // 0F 38
738      MCE.emitByte(0x38);
739      break;
740    case X86II::TAXD:  // F2 0F 38
741    case X86II::TA:    // 0F 3A
742      MCE.emitByte(0x3A);
743      break;
744    case X86II::A6:    // 0F A6
745      MCE.emitByte(0xA6);
746      break;
747    case X86II::A7:    // 0F A7
748      MCE.emitByte(0xA7);
749      break;
750  }
751}
752
753// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
754// 0-7 and the difference between the 2 groups is given by the REX prefix.
755// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
756// in 1's complement form, example:
757//
758//  ModRM field => XMM9 => 1
759//  VEX.VVVV    => XMM9 => ~9
760//
761// See table 4-35 of Intel AVX Programming Reference for details.
762template<class CodeEmitter>
763unsigned char
764Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI,
765                                             unsigned OpNum) const {
766  unsigned SrcReg = MI.getOperand(OpNum).getReg();
767  unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg());
768  if (X86II::isX86_64ExtendedReg(SrcReg))
769    SrcRegNum |= 8;
770
771  // The registers represented through VEX_VVVV should
772  // be encoded in 1's complement form.
773  return (~SrcRegNum) & 0xf;
774}
775
776/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
777template<class CodeEmitter>
778void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags,
779                                                 int MemOperand,
780                                                 const MachineInstr &MI) const {
781  switch (TSFlags & X86II::SegOvrMask) {
782    default: llvm_unreachable("Invalid segment!");
783    case 0:
784      // No segment override, check for explicit one on memory operand.
785      if (MemOperand != -1) {   // If the instruction has a memory operand.
786        switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
787          default: llvm_unreachable("Unknown segment register!");
788          case 0: break;
789          case X86::CS: MCE.emitByte(0x2E); break;
790          case X86::SS: MCE.emitByte(0x36); break;
791          case X86::DS: MCE.emitByte(0x3E); break;
792          case X86::ES: MCE.emitByte(0x26); break;
793          case X86::FS: MCE.emitByte(0x64); break;
794          case X86::GS: MCE.emitByte(0x65); break;
795        }
796      }
797      break;
798    case X86II::FS:
799      MCE.emitByte(0x64);
800      break;
801    case X86II::GS:
802      MCE.emitByte(0x65);
803      break;
804  }
805}
806
807template<class CodeEmitter>
808void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
809                                               int MemOperand,
810                                               const MachineInstr &MI,
811                                               const MCInstrDesc *Desc) const {
812  bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
813  bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
814  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
815
816  // VEX_R: opcode externsion equivalent to REX.R in
817  // 1's complement (inverted) form
818  //
819  //  1: Same as REX_R=0 (must be 1 in 32-bit mode)
820  //  0: Same as REX_R=1 (64 bit mode only)
821  //
822  unsigned char VEX_R = 0x1;
823
824  // VEX_X: equivalent to REX.X, only used when a
825  // register is used for index in SIB Byte.
826  //
827  //  1: Same as REX.X=0 (must be 1 in 32-bit mode)
828  //  0: Same as REX.X=1 (64-bit mode only)
829  unsigned char VEX_X = 0x1;
830
831  // VEX_B:
832  //
833  //  1: Same as REX_B=0 (ignored in 32-bit mode)
834  //  0: Same as REX_B=1 (64 bit mode only)
835  //
836  unsigned char VEX_B = 0x1;
837
838  // VEX_W: opcode specific (use like REX.W, or used for
839  // opcode extension, or ignored, depending on the opcode byte)
840  unsigned char VEX_W = 0;
841
842  // XOP: Use XOP prefix byte 0x8f instead of VEX.
843  bool XOP = false;
844
845  // VEX_5M (VEX m-mmmmm field):
846  //
847  //  0b00000: Reserved for future use
848  //  0b00001: implied 0F leading opcode
849  //  0b00010: implied 0F 38 leading opcode bytes
850  //  0b00011: implied 0F 3A leading opcode bytes
851  //  0b00100-0b11111: Reserved for future use
852  //  0b01000: XOP map select - 08h instructions with imm byte
853  //  0b01001: XOP map select - 09h instructions with no imm byte
854  //  0b01010: XOP map select - 0Ah instructions with imm dword
855  unsigned char VEX_5M = 0x1;
856
857  // VEX_4V (VEX vvvv field): a register specifier
858  // (in 1's complement form) or 1111 if unused.
859  unsigned char VEX_4V = 0xf;
860
861  // VEX_L (Vector Length):
862  //
863  //  0: scalar or 128-bit vector
864  //  1: 256-bit vector
865  //
866  unsigned char VEX_L = 0;
867
868  // VEX_PP: opcode extension providing equivalent
869  // functionality of a SIMD prefix
870  //
871  //  0b00: None
872  //  0b01: 66
873  //  0b10: F3
874  //  0b11: F2
875  //
876  unsigned char VEX_PP = 0;
877
878  // Encode the operand size opcode prefix as needed.
879  if (TSFlags & X86II::OpSize)
880    VEX_PP = 0x01;
881
882  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
883    VEX_W = 1;
884
885  if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
886    XOP = true;
887
888  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
889    VEX_L = 1;
890
891  switch (TSFlags & X86II::Op0Mask) {
892    default: llvm_unreachable("Invalid prefix!");
893    case X86II::T8:  // 0F 38
894      VEX_5M = 0x2;
895      break;
896    case X86II::TA:  // 0F 3A
897      VEX_5M = 0x3;
898      break;
899    case X86II::T8XS: // F3 0F 38
900      VEX_PP = 0x2;
901      VEX_5M = 0x2;
902      break;
903    case X86II::T8XD: // F2 0F 38
904      VEX_PP = 0x3;
905      VEX_5M = 0x2;
906      break;
907    case X86II::TAXD: // F2 0F 3A
908      VEX_PP = 0x3;
909      VEX_5M = 0x3;
910      break;
911    case X86II::XS:  // F3 0F
912      VEX_PP = 0x2;
913      break;
914    case X86II::XD:  // F2 0F
915      VEX_PP = 0x3;
916      break;
917    case X86II::XOP8:
918      VEX_5M = 0x8;
919      break;
920    case X86II::XOP9:
921      VEX_5M = 0x9;
922      break;
923    case X86II::XOPA:
924      VEX_5M = 0xA;
925      break;
926    case X86II::TB: // VEX_5M/VEX_PP already correct
927      break;
928  }
929
930
931  // Classify VEX_B, VEX_4V, VEX_R, VEX_X
932  unsigned NumOps = Desc->getNumOperands();
933  unsigned CurOp = 0;
934  if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
935    ++CurOp;
936  else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
937    assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
938    // Special case for GATHER with 2 TIED_TO operands
939    // Skip the first 2 operands: dst, mask_wb
940    CurOp += 2;
941  }
942
943  switch (TSFlags & X86II::FormMask) {
944    case X86II::MRMInitReg:
945      // Duplicate register.
946      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
947        VEX_R = 0x0;
948
949      if (HasVEX_4V)
950        VEX_4V = getVEXRegisterEncoding(MI, CurOp);
951      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
952        VEX_B = 0x0;
953      if (HasVEX_4VOp3)
954        VEX_4V = getVEXRegisterEncoding(MI, CurOp);
955      break;
956    case X86II::MRMDestMem: {
957      // MRMDestMem instructions forms:
958      //  MemAddr, src1(ModR/M)
959      //  MemAddr, src1(VEX_4V), src2(ModR/M)
960      //  MemAddr, src1(ModR/M), imm8
961      //
962      if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
963        VEX_B = 0x0;
964      if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
965        VEX_X = 0x0;
966
967      CurOp = X86::AddrNumOperands;
968      if (HasVEX_4V)
969        VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
970
971      const MachineOperand &MO = MI.getOperand(CurOp);
972      if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
973        VEX_R = 0x0;
974      break;
975    }
976    case X86II::MRMSrcMem:
977      // MRMSrcMem instructions forms:
978      //  src1(ModR/M), MemAddr
979      //  src1(ModR/M), src2(VEX_4V), MemAddr
980      //  src1(ModR/M), MemAddr, imm8
981      //  src1(ModR/M), MemAddr, src2(VEX_I8IMM)
982      //
983      //  FMA4:
984      //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
985      //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
986      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
987        VEX_R = 0x0;
988      CurOp++;
989
990      if (HasVEX_4V) {
991        VEX_4V = getVEXRegisterEncoding(MI, CurOp);
992        CurOp++;
993      }
994
995      if (X86II::isX86_64ExtendedReg(
996                          MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
997        VEX_B = 0x0;
998      if (X86II::isX86_64ExtendedReg(
999                          MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
1000        VEX_X = 0x0;
1001
1002      if (HasVEX_4VOp3)
1003        VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
1004      break;
1005    case X86II::MRM0m: case X86II::MRM1m:
1006    case X86II::MRM2m: case X86II::MRM3m:
1007    case X86II::MRM4m: case X86II::MRM5m:
1008    case X86II::MRM6m: case X86II::MRM7m: {
1009      // MRM[0-9]m instructions forms:
1010      //  MemAddr
1011      //  src1(VEX_4V), MemAddr
1012      if (HasVEX_4V)
1013        VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
1014
1015      if (X86II::isX86_64ExtendedReg(
1016                          MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
1017        VEX_B = 0x0;
1018      if (X86II::isX86_64ExtendedReg(
1019                          MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
1020        VEX_X = 0x0;
1021      break;
1022    }
1023    case X86II::MRMSrcReg:
1024      // MRMSrcReg instructions forms:
1025      //  dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
1026      //  dst(ModR/M), src1(ModR/M)
1027      //  dst(ModR/M), src1(ModR/M), imm8
1028      //
1029      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
1030        VEX_R = 0x0;
1031      CurOp++;
1032
1033      if (HasVEX_4V)
1034        VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
1035
1036      if (HasMemOp4) // Skip second register source (encoded in I8IMM)
1037        CurOp++;
1038
1039      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
1040        VEX_B = 0x0;
1041      CurOp++;
1042      if (HasVEX_4VOp3)
1043        VEX_4V = getVEXRegisterEncoding(MI, CurOp);
1044      break;
1045    case X86II::MRMDestReg:
1046      // MRMDestReg instructions forms:
1047      //  dst(ModR/M), src(ModR/M)
1048      //  dst(ModR/M), src(ModR/M), imm8
1049      //  dst(ModR/M), src1(VEX_4V), src2(ModR/M)
1050      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
1051        VEX_B = 0x0;
1052      CurOp++;
1053
1054      if (HasVEX_4V)
1055        VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
1056
1057      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
1058        VEX_R = 0x0;
1059      break;
1060    case X86II::MRM0r: case X86II::MRM1r:
1061    case X86II::MRM2r: case X86II::MRM3r:
1062    case X86II::MRM4r: case X86II::MRM5r:
1063    case X86II::MRM6r: case X86II::MRM7r:
1064      // MRM0r-MRM7r instructions forms:
1065      //  dst(VEX_4V), src(ModR/M), imm8
1066      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
1067      CurOp++;
1068
1069      if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
1070        VEX_B = 0x0;
1071      break;
1072    default: // RawFrm
1073      break;
1074  }
1075
1076  // Emit segment override opcode prefix as needed.
1077  emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
1078
1079  // VEX opcode prefix can have 2 or 3 bytes
1080  //
1081  //  3 bytes:
1082  //    +-----+ +--------------+ +-------------------+
1083  //    | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
1084  //    +-----+ +--------------+ +-------------------+
1085  //  2 bytes:
1086  //    +-----+ +-------------------+
1087  //    | C5h | | R | vvvv | L | pp |
1088  //    +-----+ +-------------------+
1089  //
1090  unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
1091
1092  if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
1093    MCE.emitByte(0xC5);
1094    MCE.emitByte(LastByte | (VEX_R << 7));
1095    return;
1096  }
1097
1098  // 3 byte VEX prefix
1099  MCE.emitByte(XOP ? 0x8F : 0xC4);
1100  MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M);
1101  MCE.emitByte(LastByte | (VEX_W << 7));
1102}
1103
1104template<class CodeEmitter>
1105void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
1106                                           const MCInstrDesc *Desc) {
1107  DEBUG(dbgs() << MI);
1108
1109  // If this is a pseudo instruction, lower it.
1110  switch (Desc->getOpcode()) {
1111  case X86::ADD16rr_DB:      Desc = UpdateOp(MI, II, X86::OR16rr); break;
1112  case X86::ADD32rr_DB:      Desc = UpdateOp(MI, II, X86::OR32rr); break;
1113  case X86::ADD64rr_DB:      Desc = UpdateOp(MI, II, X86::OR64rr); break;
1114  case X86::ADD16ri_DB:      Desc = UpdateOp(MI, II, X86::OR16ri); break;
1115  case X86::ADD32ri_DB:      Desc = UpdateOp(MI, II, X86::OR32ri); break;
1116  case X86::ADD64ri32_DB:    Desc = UpdateOp(MI, II, X86::OR64ri32); break;
1117  case X86::ADD16ri8_DB:     Desc = UpdateOp(MI, II, X86::OR16ri8); break;
1118  case X86::ADD32ri8_DB:     Desc = UpdateOp(MI, II, X86::OR32ri8); break;
1119  case X86::ADD64ri8_DB:     Desc = UpdateOp(MI, II, X86::OR64ri8); break;
1120  case X86::ACQUIRE_MOV8rm:  Desc = UpdateOp(MI, II, X86::MOV8rm); break;
1121  case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
1122  case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
1123  case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
1124  case X86::RELEASE_MOV8mr:  Desc = UpdateOp(MI, II, X86::MOV8mr); break;
1125  case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
1126  case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
1127  case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;
1128  }
1129
1130
1131  MCE.processDebugLoc(MI.getDebugLoc(), true);
1132
1133  unsigned Opcode = Desc->Opcode;
1134
1135  // If this is a two-address instruction, skip one of the register operands.
1136  unsigned NumOps = Desc->getNumOperands();
1137  unsigned CurOp = 0;
1138  if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0)
1139    ++CurOp;
1140  else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) {
1141    assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
1142    // Special case for GATHER with 2 TIED_TO operands
1143    // Skip the first 2 operands: dst, mask_wb
1144    CurOp += 2;
1145  }
1146
1147  uint64_t TSFlags = Desc->TSFlags;
1148
1149  // Is this instruction encoded using the AVX VEX prefix?
1150  bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
1151  // It uses the VEX.VVVV field?
1152  bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
1153  bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
1154  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
1155  const unsigned MemOp4_I8IMMOperand = 2;
1156
1157  // Determine where the memory operand starts, if present.
1158  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
1159  if (MemoryOperand != -1) MemoryOperand += CurOp;
1160
1161  if (!HasVEXPrefix)
1162    emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
1163  else
1164    emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc);
1165
1166  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
1167  switch (TSFlags & X86II::FormMask) {
1168  default:
1169    llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
1170  case X86II::Pseudo:
1171    // Remember the current PC offset, this is the PIC relocation
1172    // base address.
1173    switch (Opcode) {
1174    default:
1175      llvm_unreachable("pseudo instructions should be removed before code"
1176                       " emission");
1177    // Do nothing for Int_MemBarrier - it's just a comment.  Add a debug
1178    // to make it slightly easier to see.
1179    case X86::Int_MemBarrier:
1180      DEBUG(dbgs() << "#MEMBARRIER\n");
1181      break;
1182
1183    case TargetOpcode::INLINEASM:
1184      // We allow inline assembler nodes with empty bodies - they can
1185      // implicitly define registers, which is ok for JIT.
1186      if (MI.getOperand(0).getSymbolName()[0])
1187        report_fatal_error("JIT does not support inline asm!");
1188      break;
1189    case TargetOpcode::PROLOG_LABEL:
1190    case TargetOpcode::GC_LABEL:
1191    case TargetOpcode::EH_LABEL:
1192      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
1193      break;
1194
1195    case TargetOpcode::IMPLICIT_DEF:
1196    case TargetOpcode::KILL:
1197      break;
1198    case X86::MOVPC32r: {
1199      // This emits the "call" portion of this pseudo instruction.
1200      MCE.emitByte(BaseOpcode);
1201      emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
1202      // Remember PIC base.
1203      PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
1204      X86JITInfo *JTI = TM.getJITInfo();
1205      JTI->setPICBase(MCE.getCurrentPCValue());
1206      break;
1207    }
1208    }
1209    CurOp = NumOps;
1210    break;
1211  case X86II::RawFrm: {
1212    MCE.emitByte(BaseOpcode);
1213
1214    if (CurOp == NumOps)
1215      break;
1216
1217    const MachineOperand &MO = MI.getOperand(CurOp++);
1218
1219    DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
1220    DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
1221    DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
1222    DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
1223    DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
1224
1225    if (MO.isMBB()) {
1226      emitPCRelativeBlockAddress(MO.getMBB());
1227      break;
1228    }
1229
1230    if (MO.isGlobal()) {
1231      emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
1232                        MO.getOffset(), 0);
1233      break;
1234    }
1235
1236    if (MO.isSymbol()) {
1237      emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
1238      break;
1239    }
1240
1241    // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
1242    if (MO.isJTI()) {
1243      emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
1244      break;
1245    }
1246
1247    assert(MO.isImm() && "Unknown RawFrm operand!");
1248    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
1249      // Fix up immediate operand for pc relative calls.
1250      intptr_t Imm = (intptr_t)MO.getImm();
1251      Imm = Imm - MCE.getCurrentPCValue() - 4;
1252      emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
1253    } else
1254      emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
1255    break;
1256  }
1257
1258  case X86II::AddRegFrm: {
1259    MCE.emitByte(BaseOpcode +
1260                 getX86RegNum(MI.getOperand(CurOp++).getReg()));
1261
1262    if (CurOp == NumOps)
1263      break;
1264
1265    const MachineOperand &MO1 = MI.getOperand(CurOp++);
1266    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
1267    if (MO1.isImm()) {
1268      emitConstant(MO1.getImm(), Size);
1269      break;
1270    }
1271
1272    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
1273      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
1274    if (Opcode == X86::MOV32ri64)
1275      rt = X86::reloc_absolute_word;  // FIXME: add X86II flag?
1276    // This should not occur on Darwin for relocatable objects.
1277    if (Opcode == X86::MOV64ri)
1278      rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
1279    if (MO1.isGlobal()) {
1280      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
1281      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
1282                        Indirect);
1283    } else if (MO1.isSymbol())
1284      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
1285    else if (MO1.isCPI())
1286      emitConstPoolAddress(MO1.getIndex(), rt);
1287    else if (MO1.isJTI())
1288      emitJumpTableAddress(MO1.getIndex(), rt);
1289    break;
1290  }
1291
1292  case X86II::MRMDestReg: {
1293    MCE.emitByte(BaseOpcode);
1294
1295    unsigned SrcRegNum = CurOp+1;
1296    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1297      SrcRegNum++;
1298
1299    emitRegModRMByte(MI.getOperand(CurOp).getReg(),
1300                     getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
1301    CurOp = SrcRegNum + 1;
1302    break;
1303  }
1304  case X86II::MRMDestMem: {
1305    MCE.emitByte(BaseOpcode);
1306
1307    unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
1308    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1309      SrcRegNum++;
1310    emitMemModRMByte(MI, CurOp,
1311                     getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
1312    CurOp = SrcRegNum + 1;
1313    break;
1314  }
1315
1316  case X86II::MRMSrcReg: {
1317    MCE.emitByte(BaseOpcode);
1318
1319    unsigned SrcRegNum = CurOp+1;
1320    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
1321      ++SrcRegNum;
1322
1323    if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
1324      ++SrcRegNum;
1325
1326    emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(),
1327                     getX86RegNum(MI.getOperand(CurOp).getReg()));
1328    // 2 operands skipped with HasMemOp4, compensate accordingly
1329    CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
1330    if (HasVEX_4VOp3)
1331      ++CurOp;
1332    break;
1333  }
1334  case X86II::MRMSrcMem: {
1335    int AddrOperands = X86::AddrNumOperands;
1336    unsigned FirstMemOp = CurOp+1;
1337    if (HasVEX_4V) {
1338      ++AddrOperands;
1339      ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
1340    }
1341    if (HasMemOp4) // Skip second register source (encoded in I8IMM)
1342      ++FirstMemOp;
1343
1344    MCE.emitByte(BaseOpcode);
1345
1346    intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
1347      X86II::getSizeOfImm(Desc->TSFlags) : 0;
1348    emitMemModRMByte(MI, FirstMemOp,
1349                     getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj);
1350    CurOp += AddrOperands + 1;
1351    if (HasVEX_4VOp3)
1352      ++CurOp;
1353    break;
1354  }
1355
1356  case X86II::MRM0r: case X86II::MRM1r:
1357  case X86II::MRM2r: case X86II::MRM3r:
1358  case X86II::MRM4r: case X86II::MRM5r:
1359  case X86II::MRM6r: case X86II::MRM7r: {
1360    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
1361      ++CurOp;
1362    MCE.emitByte(BaseOpcode);
1363    emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
1364                     (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
1365
1366    if (CurOp == NumOps)
1367      break;
1368
1369    const MachineOperand &MO1 = MI.getOperand(CurOp++);
1370    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
1371    if (MO1.isImm()) {
1372      emitConstant(MO1.getImm(), Size);
1373      break;
1374    }
1375
1376    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
1377      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
1378    if (Opcode == X86::MOV64ri32)
1379      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
1380    if (MO1.isGlobal()) {
1381      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
1382      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
1383                        Indirect);
1384    } else if (MO1.isSymbol())
1385      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
1386    else if (MO1.isCPI())
1387      emitConstPoolAddress(MO1.getIndex(), rt);
1388    else if (MO1.isJTI())
1389      emitJumpTableAddress(MO1.getIndex(), rt);
1390    break;
1391  }
1392
1393  case X86II::MRM0m: case X86II::MRM1m:
1394  case X86II::MRM2m: case X86II::MRM3m:
1395  case X86II::MRM4m: case X86II::MRM5m:
1396  case X86II::MRM6m: case X86II::MRM7m: {
1397    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
1398      ++CurOp;
1399    intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
1400      (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
1401          X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
1402
1403    MCE.emitByte(BaseOpcode);
1404    emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
1405                     PCAdj);
1406    CurOp += X86::AddrNumOperands;
1407
1408    if (CurOp == NumOps)
1409      break;
1410
1411    const MachineOperand &MO = MI.getOperand(CurOp++);
1412    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
1413    if (MO.isImm()) {
1414      emitConstant(MO.getImm(), Size);
1415      break;
1416    }
1417
1418    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
1419      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
1420    if (Opcode == X86::MOV64mi32)
1421      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
1422    if (MO.isGlobal()) {
1423      bool Indirect = gvNeedsNonLazyPtr(MO, TM);
1424      emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
1425                        Indirect);
1426    } else if (MO.isSymbol())
1427      emitExternalSymbolAddress(MO.getSymbolName(), rt);
1428    else if (MO.isCPI())
1429      emitConstPoolAddress(MO.getIndex(), rt);
1430    else if (MO.isJTI())
1431      emitJumpTableAddress(MO.getIndex(), rt);
1432    break;
1433  }
1434
1435  case X86II::MRMInitReg:
1436    MCE.emitByte(BaseOpcode);
1437    // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
1438    emitRegModRMByte(MI.getOperand(CurOp).getReg(),
1439                     getX86RegNum(MI.getOperand(CurOp).getReg()));
1440    ++CurOp;
1441    break;
1442
1443  case X86II::MRM_C1:
1444    MCE.emitByte(BaseOpcode);
1445    MCE.emitByte(0xC1);
1446    break;
1447  case X86II::MRM_C8:
1448    MCE.emitByte(BaseOpcode);
1449    MCE.emitByte(0xC8);
1450    break;
1451  case X86II::MRM_C9:
1452    MCE.emitByte(BaseOpcode);
1453    MCE.emitByte(0xC9);
1454    break;
1455  case X86II::MRM_CA:
1456    MCE.emitByte(BaseOpcode);
1457    MCE.emitByte(0xCA);
1458    break;
1459  case X86II::MRM_CB:
1460    MCE.emitByte(BaseOpcode);
1461    MCE.emitByte(0xCB);
1462    break;
1463  case X86II::MRM_E8:
1464    MCE.emitByte(BaseOpcode);
1465    MCE.emitByte(0xE8);
1466    break;
1467  case X86II::MRM_F0:
1468    MCE.emitByte(BaseOpcode);
1469    MCE.emitByte(0xF0);
1470    break;
1471  }
1472
1473  while (CurOp != NumOps && NumOps - CurOp <= 2) {
1474    // The last source register of a 4 operand instruction in AVX is encoded
1475    // in bits[7:4] of a immediate byte.
1476    if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
1477      const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
1478                                                         : CurOp);
1479      ++CurOp;
1480      unsigned RegNum = getX86RegNum(MO.getReg()) << 4;
1481      if (X86II::isX86_64ExtendedReg(MO.getReg()))
1482        RegNum |= 1 << 7;
1483      // If there is an additional 5th operand it must be an immediate, which
1484      // is encoded in bits[3:0]
1485      if (CurOp != NumOps) {
1486        const MachineOperand &MIMM = MI.getOperand(CurOp++);
1487        if (MIMM.isImm()) {
1488          unsigned Val = MIMM.getImm();
1489          assert(Val < 16 && "Immediate operand value out of range");
1490          RegNum |= Val;
1491        }
1492      }
1493      emitConstant(RegNum, 1);
1494    } else {
1495      emitConstant(MI.getOperand(CurOp++).getImm(),
1496                   X86II::getSizeOfImm(Desc->TSFlags));
1497    }
1498  }
1499
1500  if (!MI.isVariadic() && CurOp != NumOps) {
1501#ifndef NDEBUG
1502    dbgs() << "Cannot encode all operands of: " << MI << "\n";
1503#endif
1504    llvm_unreachable(0);
1505  }
1506
1507  MCE.processDebugLoc(MI.getDebugLoc(), false);
1508}
1509