X86CodeEmitter.cpp revision 243830
1//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the pass that transforms the X86 machine instructions into 11// relocatable machine code. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-emitter" 16#include "X86InstrInfo.h" 17#include "X86JITInfo.h" 18#include "X86Subtarget.h" 19#include "X86TargetMachine.h" 20#include "X86Relocations.h" 21#include "X86.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/PassManager.h" 24#include "llvm/CodeGen/JITCodeEmitter.h" 25#include "llvm/CodeGen/MachineFunctionPass.h" 26#include "llvm/CodeGen/MachineInstr.h" 27#include "llvm/CodeGen/MachineModuleInfo.h" 28#include "llvm/CodeGen/Passes.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/MC/MCCodeEmitter.h" 31#include "llvm/MC/MCExpr.h" 32#include "llvm/MC/MCInst.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/Target/TargetOptions.h" 37using namespace llvm; 38 39STATISTIC(NumEmitted, "Number of machine instructions emitted"); 40 41namespace { 42 template<class CodeEmitter> 43 class Emitter : public MachineFunctionPass { 44 const X86InstrInfo *II; 45 const DataLayout *TD; 46 X86TargetMachine &TM; 47 CodeEmitter &MCE; 48 MachineModuleInfo *MMI; 49 intptr_t PICBaseOffset; 50 bool Is64BitMode; 51 bool IsPIC; 52 public: 53 static char ID; 54 explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) 55 : MachineFunctionPass(ID), II(0), TD(0), TM(tm), 56 MCE(mce), PICBaseOffset(0), Is64BitMode(false), 57 IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} 58 Emitter(X86TargetMachine &tm, CodeEmitter &mce, 59 const X86InstrInfo &ii, const DataLayout &td, bool is64) 60 : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), 61 MCE(mce), PICBaseOffset(0), Is64BitMode(is64), 62 IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} 63 64 bool runOnMachineFunction(MachineFunction &MF); 65 66 virtual const char *getPassName() const { 67 return "X86 Machine Code Emitter"; 68 } 69 70 void emitOpcodePrefix(uint64_t TSFlags, int MemOperand, 71 const MachineInstr &MI, 72 const MCInstrDesc *Desc) const; 73 74 void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand, 75 const MachineInstr &MI, 76 const MCInstrDesc *Desc) const; 77 78 void emitSegmentOverridePrefix(uint64_t TSFlags, 79 int MemOperand, 80 const MachineInstr &MI) const; 81 82 void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); 83 84 void getAnalysisUsage(AnalysisUsage &AU) const { 85 AU.setPreservesAll(); 86 AU.addRequired<MachineModuleInfo>(); 87 MachineFunctionPass::getAnalysisUsage(AU); 88 } 89 90 private: 91 void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); 92 void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, 93 intptr_t Disp = 0, intptr_t PCAdj = 0, 94 bool Indirect = false); 95 void emitExternalSymbolAddress(const char *ES, unsigned Reloc); 96 void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, 97 intptr_t PCAdj = 0); 98 void emitJumpTableAddress(unsigned JTI, unsigned Reloc, 99 intptr_t PCAdj = 0); 100 101 void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, 102 intptr_t Adj = 0, bool IsPCRel = true); 103 104 void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); 105 void emitRegModRMByte(unsigned RegOpcodeField); 106 void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); 107 void emitConstant(uint64_t Val, unsigned Size); 108 109 void emitMemModRMByte(const MachineInstr &MI, 110 unsigned Op, unsigned RegOpcodeField, 111 intptr_t PCAdj = 0); 112 113 unsigned getX86RegNum(unsigned RegNo) const { 114 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 115 return TRI->getEncodingValue(RegNo) & 0x7; 116 } 117 118 unsigned char getVEXRegisterEncoding(const MachineInstr &MI, 119 unsigned OpNum) const; 120 }; 121 122template<class CodeEmitter> 123 char Emitter<CodeEmitter>::ID = 0; 124} // end anonymous namespace. 125 126/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code 127/// to the specified templated MachineCodeEmitter object. 128FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, 129 JITCodeEmitter &JCE) { 130 return new Emitter<JITCodeEmitter>(TM, JCE); 131} 132 133template<class CodeEmitter> 134bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { 135 MMI = &getAnalysis<MachineModuleInfo>(); 136 MCE.setModuleInfo(MMI); 137 138 II = TM.getInstrInfo(); 139 TD = TM.getDataLayout(); 140 Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit(); 141 IsPIC = TM.getRelocationModel() == Reloc::PIC_; 142 143 do { 144 DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); 145 MCE.startFunction(MF); 146 for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 147 MBB != E; ++MBB) { 148 MCE.StartMachineBasicBlock(MBB); 149 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 150 I != E; ++I) { 151 const MCInstrDesc &Desc = I->getDesc(); 152 emitInstruction(*I, &Desc); 153 // MOVPC32r is basically a call plus a pop instruction. 154 if (Desc.getOpcode() == X86::MOVPC32r) 155 emitInstruction(*I, &II->get(X86::POP32r)); 156 ++NumEmitted; // Keep track of the # of mi's emitted 157 } 158 } 159 } while (MCE.finishFunction(MF)); 160 161 return false; 162} 163 164/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 165/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 166/// size, and 3) use of X86-64 extended registers. 167static unsigned determineREX(const MachineInstr &MI) { 168 unsigned REX = 0; 169 const MCInstrDesc &Desc = MI.getDesc(); 170 171 // Pseudo instructions do not need REX prefix byte. 172 if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 173 return 0; 174 if (Desc.TSFlags & X86II::REX_W) 175 REX |= 1 << 3; 176 177 unsigned NumOps = Desc.getNumOperands(); 178 if (NumOps) { 179 bool isTwoAddr = NumOps > 1 && 180 Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; 181 182 // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 183 unsigned i = isTwoAddr ? 1 : 0; 184 for (unsigned e = NumOps; i != e; ++i) { 185 const MachineOperand& MO = MI.getOperand(i); 186 if (MO.isReg()) { 187 unsigned Reg = MO.getReg(); 188 if (X86II::isX86_64NonExtLowByteReg(Reg)) 189 REX |= 0x40; 190 } 191 } 192 193 switch (Desc.TSFlags & X86II::FormMask) { 194 case X86II::MRMInitReg: 195 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 196 REX |= (1 << 0) | (1 << 2); 197 break; 198 case X86II::MRMSrcReg: { 199 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 200 REX |= 1 << 2; 201 i = isTwoAddr ? 2 : 1; 202 for (unsigned e = NumOps; i != e; ++i) { 203 const MachineOperand& MO = MI.getOperand(i); 204 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 205 REX |= 1 << 0; 206 } 207 break; 208 } 209 case X86II::MRMSrcMem: { 210 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 211 REX |= 1 << 2; 212 unsigned Bit = 0; 213 i = isTwoAddr ? 2 : 1; 214 for (; i != NumOps; ++i) { 215 const MachineOperand& MO = MI.getOperand(i); 216 if (MO.isReg()) { 217 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 218 REX |= 1 << Bit; 219 Bit++; 220 } 221 } 222 break; 223 } 224 case X86II::MRM0m: case X86II::MRM1m: 225 case X86II::MRM2m: case X86II::MRM3m: 226 case X86II::MRM4m: case X86II::MRM5m: 227 case X86II::MRM6m: case X86II::MRM7m: 228 case X86II::MRMDestMem: { 229 unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); 230 i = isTwoAddr ? 1 : 0; 231 if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e))) 232 REX |= 1 << 2; 233 unsigned Bit = 0; 234 for (; i != e; ++i) { 235 const MachineOperand& MO = MI.getOperand(i); 236 if (MO.isReg()) { 237 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 238 REX |= 1 << Bit; 239 Bit++; 240 } 241 } 242 break; 243 } 244 default: { 245 if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) 246 REX |= 1 << 0; 247 i = isTwoAddr ? 2 : 1; 248 for (unsigned e = NumOps; i != e; ++i) { 249 const MachineOperand& MO = MI.getOperand(i); 250 if (X86InstrInfo::isX86_64ExtendedReg(MO)) 251 REX |= 1 << 2; 252 } 253 break; 254 } 255 } 256 } 257 return REX; 258} 259 260 261/// emitPCRelativeBlockAddress - This method keeps track of the information 262/// necessary to resolve the address of this block later and emits a dummy 263/// value. 264/// 265template<class CodeEmitter> 266void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { 267 // Remember where this reference was and where it is to so we can 268 // deal with it later. 269 MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), 270 X86::reloc_pcrel_word, MBB)); 271 MCE.emitWordLE(0); 272} 273 274/// emitGlobalAddress - Emit the specified address to the code stream assuming 275/// this is part of a "take the address of a global" instruction. 276/// 277template<class CodeEmitter> 278void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV, 279 unsigned Reloc, 280 intptr_t Disp /* = 0 */, 281 intptr_t PCAdj /* = 0 */, 282 bool Indirect /* = false */) { 283 intptr_t RelocCST = Disp; 284 if (Reloc == X86::reloc_picrel_word) 285 RelocCST = PICBaseOffset; 286 else if (Reloc == X86::reloc_pcrel_word) 287 RelocCST = PCAdj; 288 MachineRelocation MR = Indirect 289 ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, 290 const_cast<GlobalValue *>(GV), 291 RelocCST, false) 292 : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, 293 const_cast<GlobalValue *>(GV), RelocCST, false); 294 MCE.addRelocation(MR); 295 // The relocated value will be added to the displacement 296 if (Reloc == X86::reloc_absolute_dword) 297 MCE.emitDWordLE(Disp); 298 else 299 MCE.emitWordLE((int32_t)Disp); 300} 301 302/// emitExternalSymbolAddress - Arrange for the address of an external symbol to 303/// be emitted to the current location in the function, and allow it to be PC 304/// relative. 305template<class CodeEmitter> 306void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, 307 unsigned Reloc) { 308 intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; 309 310 // X86 never needs stubs because instruction selection will always pick 311 // an instruction sequence that is large enough to hold any address 312 // to a symbol. 313 // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall) 314 bool NeedStub = false; 315 MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), 316 Reloc, ES, RelocCST, 317 0, NeedStub)); 318 if (Reloc == X86::reloc_absolute_dword) 319 MCE.emitDWordLE(0); 320 else 321 MCE.emitWordLE(0); 322} 323 324/// emitConstPoolAddress - Arrange for the address of an constant pool 325/// to be emitted to the current location in the function, and allow it to be PC 326/// relative. 327template<class CodeEmitter> 328void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc, 329 intptr_t Disp /* = 0 */, 330 intptr_t PCAdj /* = 0 */) { 331 intptr_t RelocCST = 0; 332 if (Reloc == X86::reloc_picrel_word) 333 RelocCST = PICBaseOffset; 334 else if (Reloc == X86::reloc_pcrel_word) 335 RelocCST = PCAdj; 336 MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), 337 Reloc, CPI, RelocCST)); 338 // The relocated value will be added to the displacement 339 if (Reloc == X86::reloc_absolute_dword) 340 MCE.emitDWordLE(Disp); 341 else 342 MCE.emitWordLE((int32_t)Disp); 343} 344 345/// emitJumpTableAddress - Arrange for the address of a jump table to 346/// be emitted to the current location in the function, and allow it to be PC 347/// relative. 348template<class CodeEmitter> 349void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc, 350 intptr_t PCAdj /* = 0 */) { 351 intptr_t RelocCST = 0; 352 if (Reloc == X86::reloc_picrel_word) 353 RelocCST = PICBaseOffset; 354 else if (Reloc == X86::reloc_pcrel_word) 355 RelocCST = PCAdj; 356 MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), 357 Reloc, JTI, RelocCST)); 358 // The relocated value will be added to the displacement 359 if (Reloc == X86::reloc_absolute_dword) 360 MCE.emitDWordLE(0); 361 else 362 MCE.emitWordLE(0); 363} 364 365inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, 366 unsigned RM) { 367 assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); 368 return RM | (RegOpcode << 3) | (Mod << 6); 369} 370 371template<class CodeEmitter> 372void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg, 373 unsigned RegOpcodeFld){ 374 MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); 375} 376 377template<class CodeEmitter> 378void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) { 379 MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0)); 380} 381 382template<class CodeEmitter> 383void Emitter<CodeEmitter>::emitSIBByte(unsigned SS, 384 unsigned Index, 385 unsigned Base) { 386 // SIB byte is in the same format as the ModRMByte... 387 MCE.emitByte(ModRMByte(SS, Index, Base)); 388} 389 390template<class CodeEmitter> 391void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) { 392 // Output the constant in little endian byte order... 393 for (unsigned i = 0; i != Size; ++i) { 394 MCE.emitByte(Val & 255); 395 Val >>= 8; 396 } 397} 398 399/// isDisp8 - Return true if this signed displacement fits in a 8-bit 400/// sign-extended field. 401static bool isDisp8(int Value) { 402 return Value == (signed char)Value; 403} 404 405static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, 406 const TargetMachine &TM) { 407 // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer 408 // mechanism as 32-bit mode. 409 if (TM.getSubtarget<X86Subtarget>().is64Bit() && 410 !TM.getSubtarget<X86Subtarget>().isTargetDarwin()) 411 return false; 412 413 // Return true if this is a reference to a stub containing the address of the 414 // global, not the global itself. 415 return isGlobalStubReference(GVOp.getTargetFlags()); 416} 417 418template<class CodeEmitter> 419void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, 420 int DispVal, 421 intptr_t Adj /* = 0 */, 422 bool IsPCRel /* = true */) { 423 // If this is a simple integer displacement that doesn't require a relocation, 424 // emit it now. 425 if (!RelocOp) { 426 emitConstant(DispVal, 4); 427 return; 428 } 429 430 // Otherwise, this is something that requires a relocation. Emit it as such 431 // now. 432 unsigned RelocType = Is64BitMode ? 433 (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) 434 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 435 if (RelocOp->isGlobal()) { 436 // In 64-bit static small code model, we could potentially emit absolute. 437 // But it's probably not beneficial. If the MCE supports using RIP directly 438 // do it, otherwise fallback to absolute (this is determined by IsPCRel). 439 // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative 440 // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute 441 bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); 442 emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), 443 Adj, Indirect); 444 } else if (RelocOp->isSymbol()) { 445 emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); 446 } else if (RelocOp->isCPI()) { 447 emitConstPoolAddress(RelocOp->getIndex(), RelocType, 448 RelocOp->getOffset(), Adj); 449 } else { 450 assert(RelocOp->isJTI() && "Unexpected machine operand!"); 451 emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj); 452 } 453} 454 455template<class CodeEmitter> 456void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, 457 unsigned Op,unsigned RegOpcodeField, 458 intptr_t PCAdj) { 459 const MachineOperand &Op3 = MI.getOperand(Op+3); 460 int DispVal = 0; 461 const MachineOperand *DispForReloc = 0; 462 463 // Figure out what sort of displacement we have to handle here. 464 if (Op3.isGlobal()) { 465 DispForReloc = &Op3; 466 } else if (Op3.isSymbol()) { 467 DispForReloc = &Op3; 468 } else if (Op3.isCPI()) { 469 if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { 470 DispForReloc = &Op3; 471 } else { 472 DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); 473 DispVal += Op3.getOffset(); 474 } 475 } else if (Op3.isJTI()) { 476 if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { 477 DispForReloc = &Op3; 478 } else { 479 DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); 480 } 481 } else { 482 DispVal = Op3.getImm(); 483 } 484 485 const MachineOperand &Base = MI.getOperand(Op); 486 const MachineOperand &Scale = MI.getOperand(Op+1); 487 const MachineOperand &IndexReg = MI.getOperand(Op+2); 488 489 unsigned BaseReg = Base.getReg(); 490 491 // Handle %rip relative addressing. 492 if (BaseReg == X86::RIP || 493 (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode 494 assert(IndexReg.getReg() == 0 && Is64BitMode && 495 "Invalid rip-relative address"); 496 MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); 497 emitDisplacementField(DispForReloc, DispVal, PCAdj, true); 498 return; 499 } 500 501 // Indicate that the displacement will use an pcrel or absolute reference 502 // by default. MCEs able to resolve addresses on-the-fly use pcrel by default 503 // while others, unless explicit asked to use RIP, use absolute references. 504 bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; 505 506 // Is a SIB byte needed? 507 // If no BaseReg, issue a RIP relative instruction only if the MCE can 508 // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table 509 // 2-7) and absolute references. 510 unsigned BaseRegNo = -1U; 511 if (BaseReg != 0 && BaseReg != X86::RIP) 512 BaseRegNo = getX86RegNum(BaseReg); 513 514 if (// The SIB byte must be used if there is an index register. 515 IndexReg.getReg() == 0 && 516 // The SIB byte must be used if the base is ESP/RSP/R12, all of which 517 // encode to an R/M value of 4, which indicates that a SIB byte is 518 // present. 519 BaseRegNo != N86::ESP && 520 // If there is no base register and we're in 64-bit mode, we need a SIB 521 // byte to emit an addr that is just 'disp32' (the non-RIP relative form). 522 (!Is64BitMode || BaseReg != 0)) { 523 if (BaseReg == 0 || // [disp32] in X86-32 mode 524 BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode 525 MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); 526 emitDisplacementField(DispForReloc, DispVal, PCAdj, true); 527 return; 528 } 529 530 // If the base is not EBP/ESP and there is no displacement, use simple 531 // indirect register encoding, this handles addresses like [EAX]. The 532 // encoding for [EBP] with no displacement means [disp32] so we handle it 533 // by emitting a displacement of 0 below. 534 if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 535 MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); 536 return; 537 } 538 539 // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. 540 if (!DispForReloc && isDisp8(DispVal)) { 541 MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); 542 emitConstant(DispVal, 1); 543 return; 544 } 545 546 // Otherwise, emit the most general non-SIB encoding: [REG+disp32] 547 MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); 548 emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); 549 return; 550 } 551 552 // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. 553 assert(IndexReg.getReg() != X86::ESP && 554 IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 555 556 bool ForceDisp32 = false; 557 bool ForceDisp8 = false; 558 if (BaseReg == 0) { 559 // If there is no base register, we emit the special case SIB byte with 560 // MOD=0, BASE=4, to JUST get the index, scale, and displacement. 561 MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); 562 ForceDisp32 = true; 563 } else if (DispForReloc) { 564 // Emit the normal disp32 encoding. 565 MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); 566 ForceDisp32 = true; 567 } else if (DispVal == 0 && BaseRegNo != N86::EBP) { 568 // Emit no displacement ModR/M byte 569 MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); 570 } else if (isDisp8(DispVal)) { 571 // Emit the disp8 encoding... 572 MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); 573 ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP 574 } else { 575 // Emit the normal disp32 encoding... 576 MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); 577 } 578 579 // Calculate what the SS field value should be... 580 static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; 581 unsigned SS = SSTable[Scale.getImm()]; 582 583 if (BaseReg == 0) { 584 // Handle the SIB byte for the case where there is no base, see Intel 585 // Manual 2A, table 2-7. The displacement has already been output. 586 unsigned IndexRegNo; 587 if (IndexReg.getReg()) 588 IndexRegNo = getX86RegNum(IndexReg.getReg()); 589 else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) 590 IndexRegNo = 4; 591 emitSIBByte(SS, IndexRegNo, 5); 592 } else { 593 unsigned BaseRegNo = getX86RegNum(BaseReg); 594 unsigned IndexRegNo; 595 if (IndexReg.getReg()) 596 IndexRegNo = getX86RegNum(IndexReg.getReg()); 597 else 598 IndexRegNo = 4; // For example [ESP+1*<noreg>+4] 599 emitSIBByte(SS, IndexRegNo, BaseRegNo); 600 } 601 602 // Do we need to output a displacement? 603 if (ForceDisp8) { 604 emitConstant(DispVal, 1); 605 } else if (DispVal != 0 || ForceDisp32) { 606 emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); 607 } 608} 609 610static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II, 611 unsigned Opcode) { 612 const MCInstrDesc *Desc = &II->get(Opcode); 613 MI.setDesc(*Desc); 614 return Desc; 615} 616 617/// Is16BitMemOperand - Return true if the specified instruction has 618/// a 16-bit memory operand. Op specifies the operand # of the memoperand. 619static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) { 620 const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); 621 const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); 622 623 if ((BaseReg.getReg() != 0 && 624 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || 625 (IndexReg.getReg() != 0 && 626 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) 627 return true; 628 return false; 629} 630 631/// Is32BitMemOperand - Return true if the specified instruction has 632/// a 32-bit memory operand. Op specifies the operand # of the memoperand. 633static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) { 634 const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); 635 const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); 636 637 if ((BaseReg.getReg() != 0 && 638 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || 639 (IndexReg.getReg() != 0 && 640 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) 641 return true; 642 return false; 643} 644 645/// Is64BitMemOperand - Return true if the specified instruction has 646/// a 64-bit memory operand. Op specifies the operand # of the memoperand. 647#ifndef NDEBUG 648static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) { 649 const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); 650 const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); 651 652 if ((BaseReg.getReg() != 0 && 653 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || 654 (IndexReg.getReg() != 0 && 655 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) 656 return true; 657 return false; 658} 659#endif 660 661template<class CodeEmitter> 662void Emitter<CodeEmitter>::emitOpcodePrefix(uint64_t TSFlags, 663 int MemOperand, 664 const MachineInstr &MI, 665 const MCInstrDesc *Desc) const { 666 // Emit the lock opcode prefix as needed. 667 if (Desc->TSFlags & X86II::LOCK) 668 MCE.emitByte(0xF0); 669 670 // Emit segment override opcode prefix as needed. 671 emitSegmentOverridePrefix(TSFlags, MemOperand, MI); 672 673 // Emit the repeat opcode prefix as needed. 674 if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) 675 MCE.emitByte(0xF3); 676 677 // Emit the address size opcode prefix as needed. 678 bool need_address_override; 679 if (TSFlags & X86II::AdSize) { 680 need_address_override = true; 681 } else if (MemOperand == -1) { 682 need_address_override = false; 683 } else if (Is64BitMode) { 684 assert(!Is16BitMemOperand(MI, MemOperand)); 685 need_address_override = Is32BitMemOperand(MI, MemOperand); 686 } else { 687 assert(!Is64BitMemOperand(MI, MemOperand)); 688 need_address_override = Is16BitMemOperand(MI, MemOperand); 689 } 690 691 if (need_address_override) 692 MCE.emitByte(0x67); 693 694 // Emit the operand size opcode prefix as needed. 695 if (TSFlags & X86II::OpSize) 696 MCE.emitByte(0x66); 697 698 bool Need0FPrefix = false; 699 switch (Desc->TSFlags & X86II::Op0Mask) { 700 case X86II::TB: // Two-byte opcode prefix 701 case X86II::T8: // 0F 38 702 case X86II::TA: // 0F 3A 703 case X86II::A6: // 0F A6 704 case X86II::A7: // 0F A7 705 Need0FPrefix = true; 706 break; 707 case X86II::REP: break; // already handled. 708 case X86II::T8XS: // F3 0F 38 709 case X86II::XS: // F3 0F 710 MCE.emitByte(0xF3); 711 Need0FPrefix = true; 712 break; 713 case X86II::T8XD: // F2 0F 38 714 case X86II::TAXD: // F2 0F 3A 715 case X86II::XD: // F2 0F 716 MCE.emitByte(0xF2); 717 Need0FPrefix = true; 718 break; 719 case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 720 case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 721 MCE.emitByte(0xD8+ 722 (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) 723 >> X86II::Op0Shift)); 724 break; // Two-byte opcode prefix 725 default: llvm_unreachable("Invalid prefix!"); 726 case 0: break; // No prefix! 727 } 728 729 // Handle REX prefix. 730 if (Is64BitMode) { 731 if (unsigned REX = determineREX(MI)) 732 MCE.emitByte(0x40 | REX); 733 } 734 735 // 0x0F escape code must be emitted just before the opcode. 736 if (Need0FPrefix) 737 MCE.emitByte(0x0F); 738 739 switch (Desc->TSFlags & X86II::Op0Mask) { 740 case X86II::T8XD: // F2 0F 38 741 case X86II::T8XS: // F3 0F 38 742 case X86II::T8: // 0F 38 743 MCE.emitByte(0x38); 744 break; 745 case X86II::TAXD: // F2 0F 38 746 case X86II::TA: // 0F 3A 747 MCE.emitByte(0x3A); 748 break; 749 case X86II::A6: // 0F A6 750 MCE.emitByte(0xA6); 751 break; 752 case X86II::A7: // 0F A7 753 MCE.emitByte(0xA7); 754 break; 755 } 756} 757 758// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range 759// 0-7 and the difference between the 2 groups is given by the REX prefix. 760// In the VEX prefix, registers are seen sequencially from 0-15 and encoded 761// in 1's complement form, example: 762// 763// ModRM field => XMM9 => 1 764// VEX.VVVV => XMM9 => ~9 765// 766// See table 4-35 of Intel AVX Programming Reference for details. 767template<class CodeEmitter> 768unsigned char 769Emitter<CodeEmitter>::getVEXRegisterEncoding(const MachineInstr &MI, 770 unsigned OpNum) const { 771 unsigned SrcReg = MI.getOperand(OpNum).getReg(); 772 unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg()); 773 if (X86II::isX86_64ExtendedReg(SrcReg)) 774 SrcRegNum |= 8; 775 776 // The registers represented through VEX_VVVV should 777 // be encoded in 1's complement form. 778 return (~SrcRegNum) & 0xf; 779} 780 781/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed 782template<class CodeEmitter> 783void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags, 784 int MemOperand, 785 const MachineInstr &MI) const { 786 switch (TSFlags & X86II::SegOvrMask) { 787 default: llvm_unreachable("Invalid segment!"); 788 case 0: 789 // No segment override, check for explicit one on memory operand. 790 if (MemOperand != -1) { // If the instruction has a memory operand. 791 switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { 792 default: llvm_unreachable("Unknown segment register!"); 793 case 0: break; 794 case X86::CS: MCE.emitByte(0x2E); break; 795 case X86::SS: MCE.emitByte(0x36); break; 796 case X86::DS: MCE.emitByte(0x3E); break; 797 case X86::ES: MCE.emitByte(0x26); break; 798 case X86::FS: MCE.emitByte(0x64); break; 799 case X86::GS: MCE.emitByte(0x65); break; 800 } 801 } 802 break; 803 case X86II::FS: 804 MCE.emitByte(0x64); 805 break; 806 case X86II::GS: 807 MCE.emitByte(0x65); 808 break; 809 } 810} 811 812template<class CodeEmitter> 813void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, 814 int MemOperand, 815 const MachineInstr &MI, 816 const MCInstrDesc *Desc) const { 817 bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; 818 bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; 819 820 // VEX_R: opcode externsion equivalent to REX.R in 821 // 1's complement (inverted) form 822 // 823 // 1: Same as REX_R=0 (must be 1 in 32-bit mode) 824 // 0: Same as REX_R=1 (64 bit mode only) 825 // 826 unsigned char VEX_R = 0x1; 827 828 // VEX_X: equivalent to REX.X, only used when a 829 // register is used for index in SIB Byte. 830 // 831 // 1: Same as REX.X=0 (must be 1 in 32-bit mode) 832 // 0: Same as REX.X=1 (64-bit mode only) 833 unsigned char VEX_X = 0x1; 834 835 // VEX_B: 836 // 837 // 1: Same as REX_B=0 (ignored in 32-bit mode) 838 // 0: Same as REX_B=1 (64 bit mode only) 839 // 840 unsigned char VEX_B = 0x1; 841 842 // VEX_W: opcode specific (use like REX.W, or used for 843 // opcode extension, or ignored, depending on the opcode byte) 844 unsigned char VEX_W = 0; 845 846 // XOP: Use XOP prefix byte 0x8f instead of VEX. 847 unsigned char XOP = 0; 848 849 // VEX_5M (VEX m-mmmmm field): 850 // 851 // 0b00000: Reserved for future use 852 // 0b00001: implied 0F leading opcode 853 // 0b00010: implied 0F 38 leading opcode bytes 854 // 0b00011: implied 0F 3A leading opcode bytes 855 // 0b00100-0b11111: Reserved for future use 856 // 0b01000: XOP map select - 08h instructions with imm byte 857 // 0b10001: XOP map select - 09h instructions with no imm byte 858 unsigned char VEX_5M = 0x1; 859 860 // VEX_4V (VEX vvvv field): a register specifier 861 // (in 1's complement form) or 1111 if unused. 862 unsigned char VEX_4V = 0xf; 863 864 // VEX_L (Vector Length): 865 // 866 // 0: scalar or 128-bit vector 867 // 1: 256-bit vector 868 // 869 unsigned char VEX_L = 0; 870 871 // VEX_PP: opcode extension providing equivalent 872 // functionality of a SIMD prefix 873 // 874 // 0b00: None 875 // 0b01: 66 876 // 0b10: F3 877 // 0b11: F2 878 // 879 unsigned char VEX_PP = 0; 880 881 // Encode the operand size opcode prefix as needed. 882 if (TSFlags & X86II::OpSize) 883 VEX_PP = 0x01; 884 885 if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) 886 VEX_W = 1; 887 888 if ((TSFlags >> X86II::VEXShift) & X86II::XOP) 889 XOP = 1; 890 891 if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) 892 VEX_L = 1; 893 894 switch (TSFlags & X86II::Op0Mask) { 895 default: llvm_unreachable("Invalid prefix!"); 896 case X86II::T8: // 0F 38 897 VEX_5M = 0x2; 898 break; 899 case X86II::TA: // 0F 3A 900 VEX_5M = 0x3; 901 break; 902 case X86II::T8XS: // F3 0F 38 903 VEX_PP = 0x2; 904 VEX_5M = 0x2; 905 break; 906 case X86II::T8XD: // F2 0F 38 907 VEX_PP = 0x3; 908 VEX_5M = 0x2; 909 break; 910 case X86II::TAXD: // F2 0F 3A 911 VEX_PP = 0x3; 912 VEX_5M = 0x3; 913 break; 914 case X86II::XS: // F3 0F 915 VEX_PP = 0x2; 916 break; 917 case X86II::XD: // F2 0F 918 VEX_PP = 0x3; 919 break; 920 case X86II::XOP8: 921 VEX_5M = 0x8; 922 break; 923 case X86II::XOP9: 924 VEX_5M = 0x9; 925 break; 926 case X86II::A6: // Bypass: Not used by VEX 927 case X86II::A7: // Bypass: Not used by VEX 928 case X86II::TB: // Bypass: Not used by VEX 929 case 0: 930 break; // No prefix! 931 } 932 933 934 // Classify VEX_B, VEX_4V, VEX_R, VEX_X 935 unsigned NumOps = Desc->getNumOperands(); 936 unsigned CurOp = 0; 937 if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) 938 ++CurOp; 939 else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { 940 assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); 941 // Special case for GATHER with 2 TIED_TO operands 942 // Skip the first 2 operands: dst, mask_wb 943 CurOp += 2; 944 } 945 946 switch (TSFlags & X86II::FormMask) { 947 case X86II::MRMInitReg: 948 // Duplicate register. 949 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 950 VEX_R = 0x0; 951 952 if (HasVEX_4V) 953 VEX_4V = getVEXRegisterEncoding(MI, CurOp); 954 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 955 VEX_B = 0x0; 956 if (HasVEX_4VOp3) 957 VEX_4V = getVEXRegisterEncoding(MI, CurOp); 958 break; 959 case X86II::MRMDestMem: { 960 // MRMDestMem instructions forms: 961 // MemAddr, src1(ModR/M) 962 // MemAddr, src1(VEX_4V), src2(ModR/M) 963 // MemAddr, src1(ModR/M), imm8 964 // 965 if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) 966 VEX_B = 0x0; 967 if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) 968 VEX_X = 0x0; 969 970 CurOp = X86::AddrNumOperands; 971 if (HasVEX_4V) 972 VEX_4V = getVEXRegisterEncoding(MI, CurOp++); 973 974 const MachineOperand &MO = MI.getOperand(CurOp); 975 if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) 976 VEX_R = 0x0; 977 break; 978 } 979 case X86II::MRMSrcMem: 980 // MRMSrcMem instructions forms: 981 // src1(ModR/M), MemAddr 982 // src1(ModR/M), src2(VEX_4V), MemAddr 983 // src1(ModR/M), MemAddr, imm8 984 // src1(ModR/M), MemAddr, src2(VEX_I8IMM) 985 // 986 // FMA4: 987 // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) 988 // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), 989 if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) 990 VEX_R = 0x0; 991 992 if (HasVEX_4V) 993 VEX_4V = getVEXRegisterEncoding(MI, 1); 994 995 if (X86II::isX86_64ExtendedReg( 996 MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) 997 VEX_B = 0x0; 998 if (X86II::isX86_64ExtendedReg( 999 MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) 1000 VEX_X = 0x0; 1001 1002 if (HasVEX_4VOp3) 1003 VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); 1004 break; 1005 case X86II::MRM0m: case X86II::MRM1m: 1006 case X86II::MRM2m: case X86II::MRM3m: 1007 case X86II::MRM4m: case X86II::MRM5m: 1008 case X86II::MRM6m: case X86II::MRM7m: { 1009 // MRM[0-9]m instructions forms: 1010 // MemAddr 1011 // src1(VEX_4V), MemAddr 1012 if (HasVEX_4V) 1013 VEX_4V = getVEXRegisterEncoding(MI, 0); 1014 1015 if (X86II::isX86_64ExtendedReg( 1016 MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) 1017 VEX_B = 0x0; 1018 if (X86II::isX86_64ExtendedReg( 1019 MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) 1020 VEX_X = 0x0; 1021 break; 1022 } 1023 case X86II::MRMSrcReg: 1024 // MRMSrcReg instructions forms: 1025 // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) 1026 // dst(ModR/M), src1(ModR/M) 1027 // dst(ModR/M), src1(ModR/M), imm8 1028 // 1029 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 1030 VEX_R = 0x0; 1031 CurOp++; 1032 1033 if (HasVEX_4V) 1034 VEX_4V = getVEXRegisterEncoding(MI, CurOp++); 1035 if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) 1036 VEX_B = 0x0; 1037 CurOp++; 1038 if (HasVEX_4VOp3) 1039 VEX_4V = getVEXRegisterEncoding(MI, CurOp); 1040 break; 1041 case X86II::MRMDestReg: 1042 // MRMDestReg instructions forms: 1043 // dst(ModR/M), src(ModR/M) 1044 // dst(ModR/M), src(ModR/M), imm8 1045 if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) 1046 VEX_B = 0x0; 1047 if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) 1048 VEX_R = 0x0; 1049 break; 1050 case X86II::MRM0r: case X86II::MRM1r: 1051 case X86II::MRM2r: case X86II::MRM3r: 1052 case X86II::MRM4r: case X86II::MRM5r: 1053 case X86II::MRM6r: case X86II::MRM7r: 1054 // MRM0r-MRM7r instructions forms: 1055 // dst(VEX_4V), src(ModR/M), imm8 1056 VEX_4V = getVEXRegisterEncoding(MI, 0); 1057 if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) 1058 VEX_B = 0x0; 1059 break; 1060 default: // RawFrm 1061 break; 1062 } 1063 1064 // Emit segment override opcode prefix as needed. 1065 emitSegmentOverridePrefix(TSFlags, MemOperand, MI); 1066 1067 // VEX opcode prefix can have 2 or 3 bytes 1068 // 1069 // 3 bytes: 1070 // +-----+ +--------------+ +-------------------+ 1071 // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | 1072 // +-----+ +--------------+ +-------------------+ 1073 // 2 bytes: 1074 // +-----+ +-------------------+ 1075 // | C5h | | R | vvvv | L | pp | 1076 // +-----+ +-------------------+ 1077 // 1078 unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); 1079 1080 if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix 1081 MCE.emitByte(0xC5); 1082 MCE.emitByte(LastByte | (VEX_R << 7)); 1083 return; 1084 } 1085 1086 // 3 byte VEX prefix 1087 MCE.emitByte(XOP ? 0x8F : 0xC4); 1088 MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); 1089 MCE.emitByte(LastByte | (VEX_W << 7)); 1090} 1091 1092template<class CodeEmitter> 1093void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, 1094 const MCInstrDesc *Desc) { 1095 DEBUG(dbgs() << MI); 1096 1097 // If this is a pseudo instruction, lower it. 1098 switch (Desc->getOpcode()) { 1099 case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; 1100 case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; 1101 case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; 1102 case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; 1103 case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; 1104 case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; 1105 case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; 1106 case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; 1107 case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; 1108 case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; 1109 case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; 1110 case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; 1111 case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; 1112 case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; 1113 case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; 1114 case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; 1115 case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; 1116 } 1117 1118 1119 MCE.processDebugLoc(MI.getDebugLoc(), true); 1120 1121 unsigned Opcode = Desc->Opcode; 1122 1123 // If this is a two-address instruction, skip one of the register operands. 1124 unsigned NumOps = Desc->getNumOperands(); 1125 unsigned CurOp = 0; 1126 if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) 1127 ++CurOp; 1128 else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { 1129 assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); 1130 // Special case for GATHER with 2 TIED_TO operands 1131 // Skip the first 2 operands: dst, mask_wb 1132 CurOp += 2; 1133 } 1134 1135 uint64_t TSFlags = Desc->TSFlags; 1136 1137 // Is this instruction encoded using the AVX VEX prefix? 1138 bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX; 1139 // It uses the VEX.VVVV field? 1140 bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; 1141 bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; 1142 bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; 1143 const unsigned MemOp4_I8IMMOperand = 2; 1144 1145 // Determine where the memory operand starts, if present. 1146 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); 1147 if (MemoryOperand != -1) MemoryOperand += CurOp; 1148 1149 if (!HasVEXPrefix) 1150 emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); 1151 else 1152 emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); 1153 1154 unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); 1155 switch (TSFlags & X86II::FormMask) { 1156 default: 1157 llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 1158 case X86II::Pseudo: 1159 // Remember the current PC offset, this is the PIC relocation 1160 // base address. 1161 switch (Opcode) { 1162 default: 1163 llvm_unreachable("pseudo instructions should be removed before code" 1164 " emission"); 1165 // Do nothing for Int_MemBarrier - it's just a comment. Add a debug 1166 // to make it slightly easier to see. 1167 case X86::Int_MemBarrier: 1168 DEBUG(dbgs() << "#MEMBARRIER\n"); 1169 break; 1170 1171 case TargetOpcode::INLINEASM: 1172 // We allow inline assembler nodes with empty bodies - they can 1173 // implicitly define registers, which is ok for JIT. 1174 if (MI.getOperand(0).getSymbolName()[0]) 1175 report_fatal_error("JIT does not support inline asm!"); 1176 break; 1177 case TargetOpcode::PROLOG_LABEL: 1178 case TargetOpcode::GC_LABEL: 1179 case TargetOpcode::EH_LABEL: 1180 MCE.emitLabel(MI.getOperand(0).getMCSymbol()); 1181 break; 1182 1183 case TargetOpcode::IMPLICIT_DEF: 1184 case TargetOpcode::KILL: 1185 break; 1186 case X86::MOVPC32r: { 1187 // This emits the "call" portion of this pseudo instruction. 1188 MCE.emitByte(BaseOpcode); 1189 emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags)); 1190 // Remember PIC base. 1191 PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); 1192 X86JITInfo *JTI = TM.getJITInfo(); 1193 JTI->setPICBase(MCE.getCurrentPCValue()); 1194 break; 1195 } 1196 } 1197 CurOp = NumOps; 1198 break; 1199 case X86II::RawFrm: { 1200 MCE.emitByte(BaseOpcode); 1201 1202 if (CurOp == NumOps) 1203 break; 1204 1205 const MachineOperand &MO = MI.getOperand(CurOp++); 1206 1207 DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); 1208 DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); 1209 DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); 1210 DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); 1211 DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); 1212 1213 if (MO.isMBB()) { 1214 emitPCRelativeBlockAddress(MO.getMBB()); 1215 break; 1216 } 1217 1218 if (MO.isGlobal()) { 1219 emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, 1220 MO.getOffset(), 0); 1221 break; 1222 } 1223 1224 if (MO.isSymbol()) { 1225 emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); 1226 break; 1227 } 1228 1229 // FIXME: Only used by hackish MCCodeEmitter, remove when dead. 1230 if (MO.isJTI()) { 1231 emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); 1232 break; 1233 } 1234 1235 assert(MO.isImm() && "Unknown RawFrm operand!"); 1236 if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { 1237 // Fix up immediate operand for pc relative calls. 1238 intptr_t Imm = (intptr_t)MO.getImm(); 1239 Imm = Imm - MCE.getCurrentPCValue() - 4; 1240 emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags)); 1241 } else 1242 emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); 1243 break; 1244 } 1245 1246 case X86II::AddRegFrm: { 1247 MCE.emitByte(BaseOpcode + 1248 getX86RegNum(MI.getOperand(CurOp++).getReg())); 1249 1250 if (CurOp == NumOps) 1251 break; 1252 1253 const MachineOperand &MO1 = MI.getOperand(CurOp++); 1254 unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 1255 if (MO1.isImm()) { 1256 emitConstant(MO1.getImm(), Size); 1257 break; 1258 } 1259 1260 unsigned rt = Is64BitMode ? X86::reloc_pcrel_word 1261 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 1262 if (Opcode == X86::MOV64ri64i32) 1263 rt = X86::reloc_absolute_word; // FIXME: add X86II flag? 1264 // This should not occur on Darwin for relocatable objects. 1265 if (Opcode == X86::MOV64ri) 1266 rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? 1267 if (MO1.isGlobal()) { 1268 bool Indirect = gvNeedsNonLazyPtr(MO1, TM); 1269 emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, 1270 Indirect); 1271 } else if (MO1.isSymbol()) 1272 emitExternalSymbolAddress(MO1.getSymbolName(), rt); 1273 else if (MO1.isCPI()) 1274 emitConstPoolAddress(MO1.getIndex(), rt); 1275 else if (MO1.isJTI()) 1276 emitJumpTableAddress(MO1.getIndex(), rt); 1277 break; 1278 } 1279 1280 case X86II::MRMDestReg: { 1281 MCE.emitByte(BaseOpcode); 1282 emitRegModRMByte(MI.getOperand(CurOp).getReg(), 1283 getX86RegNum(MI.getOperand(CurOp+1).getReg())); 1284 CurOp += 2; 1285 break; 1286 } 1287 case X86II::MRMDestMem: { 1288 MCE.emitByte(BaseOpcode); 1289 1290 unsigned SrcRegNum = CurOp + X86::AddrNumOperands; 1291 if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) 1292 SrcRegNum++; 1293 emitMemModRMByte(MI, CurOp, 1294 getX86RegNum(MI.getOperand(SrcRegNum).getReg())); 1295 CurOp = SrcRegNum + 1; 1296 break; 1297 } 1298 1299 case X86II::MRMSrcReg: { 1300 MCE.emitByte(BaseOpcode); 1301 1302 unsigned SrcRegNum = CurOp+1; 1303 if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) 1304 ++SrcRegNum; 1305 1306 if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) 1307 ++SrcRegNum; 1308 1309 emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), 1310 getX86RegNum(MI.getOperand(CurOp).getReg())); 1311 // 2 operands skipped with HasMemOp4, compensate accordingly 1312 CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; 1313 if (HasVEX_4VOp3) 1314 ++CurOp; 1315 break; 1316 } 1317 case X86II::MRMSrcMem: { 1318 int AddrOperands = X86::AddrNumOperands; 1319 unsigned FirstMemOp = CurOp+1; 1320 if (HasVEX_4V) { 1321 ++AddrOperands; 1322 ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). 1323 } 1324 if (HasMemOp4) // Skip second register source (encoded in I8IMM) 1325 ++FirstMemOp; 1326 1327 MCE.emitByte(BaseOpcode); 1328 1329 intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? 1330 X86II::getSizeOfImm(Desc->TSFlags) : 0; 1331 emitMemModRMByte(MI, FirstMemOp, 1332 getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); 1333 CurOp += AddrOperands + 1; 1334 if (HasVEX_4VOp3) 1335 ++CurOp; 1336 break; 1337 } 1338 1339 case X86II::MRM0r: case X86II::MRM1r: 1340 case X86II::MRM2r: case X86II::MRM3r: 1341 case X86II::MRM4r: case X86II::MRM5r: 1342 case X86II::MRM6r: case X86II::MRM7r: { 1343 if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). 1344 ++CurOp; 1345 MCE.emitByte(BaseOpcode); 1346 emitRegModRMByte(MI.getOperand(CurOp++).getReg(), 1347 (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); 1348 1349 if (CurOp == NumOps) 1350 break; 1351 1352 const MachineOperand &MO1 = MI.getOperand(CurOp++); 1353 unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 1354 if (MO1.isImm()) { 1355 emitConstant(MO1.getImm(), Size); 1356 break; 1357 } 1358 1359 unsigned rt = Is64BitMode ? X86::reloc_pcrel_word 1360 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 1361 if (Opcode == X86::MOV64ri32) 1362 rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? 1363 if (MO1.isGlobal()) { 1364 bool Indirect = gvNeedsNonLazyPtr(MO1, TM); 1365 emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, 1366 Indirect); 1367 } else if (MO1.isSymbol()) 1368 emitExternalSymbolAddress(MO1.getSymbolName(), rt); 1369 else if (MO1.isCPI()) 1370 emitConstPoolAddress(MO1.getIndex(), rt); 1371 else if (MO1.isJTI()) 1372 emitJumpTableAddress(MO1.getIndex(), rt); 1373 break; 1374 } 1375 1376 case X86II::MRM0m: case X86II::MRM1m: 1377 case X86II::MRM2m: case X86II::MRM3m: 1378 case X86II::MRM4m: case X86II::MRM5m: 1379 case X86II::MRM6m: case X86II::MRM7m: { 1380 if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). 1381 ++CurOp; 1382 intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? 1383 (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? 1384 X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; 1385 1386 MCE.emitByte(BaseOpcode); 1387 emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, 1388 PCAdj); 1389 CurOp += X86::AddrNumOperands; 1390 1391 if (CurOp == NumOps) 1392 break; 1393 1394 const MachineOperand &MO = MI.getOperand(CurOp++); 1395 unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); 1396 if (MO.isImm()) { 1397 emitConstant(MO.getImm(), Size); 1398 break; 1399 } 1400 1401 unsigned rt = Is64BitMode ? X86::reloc_pcrel_word 1402 : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); 1403 if (Opcode == X86::MOV64mi32) 1404 rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? 1405 if (MO.isGlobal()) { 1406 bool Indirect = gvNeedsNonLazyPtr(MO, TM); 1407 emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, 1408 Indirect); 1409 } else if (MO.isSymbol()) 1410 emitExternalSymbolAddress(MO.getSymbolName(), rt); 1411 else if (MO.isCPI()) 1412 emitConstPoolAddress(MO.getIndex(), rt); 1413 else if (MO.isJTI()) 1414 emitJumpTableAddress(MO.getIndex(), rt); 1415 break; 1416 } 1417 1418 case X86II::MRMInitReg: 1419 MCE.emitByte(BaseOpcode); 1420 // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 1421 emitRegModRMByte(MI.getOperand(CurOp).getReg(), 1422 getX86RegNum(MI.getOperand(CurOp).getReg())); 1423 ++CurOp; 1424 break; 1425 1426 case X86II::MRM_C1: 1427 MCE.emitByte(BaseOpcode); 1428 MCE.emitByte(0xC1); 1429 break; 1430 case X86II::MRM_C8: 1431 MCE.emitByte(BaseOpcode); 1432 MCE.emitByte(0xC8); 1433 break; 1434 case X86II::MRM_C9: 1435 MCE.emitByte(BaseOpcode); 1436 MCE.emitByte(0xC9); 1437 break; 1438 case X86II::MRM_E8: 1439 MCE.emitByte(BaseOpcode); 1440 MCE.emitByte(0xE8); 1441 break; 1442 case X86II::MRM_F0: 1443 MCE.emitByte(BaseOpcode); 1444 MCE.emitByte(0xF0); 1445 break; 1446 } 1447 1448 while (CurOp != NumOps && NumOps - CurOp <= 2) { 1449 // The last source register of a 4 operand instruction in AVX is encoded 1450 // in bits[7:4] of a immediate byte. 1451 if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { 1452 const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand 1453 : CurOp); 1454 ++CurOp; 1455 unsigned RegNum = getX86RegNum(MO.getReg()) << 4; 1456 if (X86II::isX86_64ExtendedReg(MO.getReg())) 1457 RegNum |= 1 << 7; 1458 // If there is an additional 5th operand it must be an immediate, which 1459 // is encoded in bits[3:0] 1460 if (CurOp != NumOps) { 1461 const MachineOperand &MIMM = MI.getOperand(CurOp++); 1462 if (MIMM.isImm()) { 1463 unsigned Val = MIMM.getImm(); 1464 assert(Val < 16 && "Immediate operand value out of range"); 1465 RegNum |= Val; 1466 } 1467 } 1468 emitConstant(RegNum, 1); 1469 } else { 1470 emitConstant(MI.getOperand(CurOp++).getImm(), 1471 X86II::getSizeOfImm(Desc->TSFlags)); 1472 } 1473 } 1474 1475 if (!MI.isVariadic() && CurOp != NumOps) { 1476#ifndef NDEBUG 1477 dbgs() << "Cannot encode all operands of: " << MI << "\n"; 1478#endif 1479 llvm_unreachable(0); 1480 } 1481 1482 MCE.processDebugLoc(MI.getDebugLoc(), false); 1483} 1484