X86MCInstLower.cpp revision 363496
1//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains code to lower X86 MachineInstrs to their corresponding
10// MCInst records.
11//
12//===----------------------------------------------------------------------===//
13
14#include "MCTargetDesc/X86ATTInstPrinter.h"
15#include "MCTargetDesc/X86BaseInfo.h"
16#include "MCTargetDesc/X86InstComments.h"
17#include "MCTargetDesc/X86TargetStreamer.h"
18#include "Utils/X86ShuffleDecode.h"
19#include "X86AsmPrinter.h"
20#include "X86RegisterInfo.h"
21#include "X86ShuffleDecodeConstantPool.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/ADT/SmallString.h"
24#include "llvm/ADT/iterator_range.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineModuleInfoImpls.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/StackMaps.h"
30#include "llvm/IR/DataLayout.h"
31#include "llvm/IR/GlobalValue.h"
32#include "llvm/IR/Mangler.h"
33#include "llvm/MC/MCAsmInfo.h"
34#include "llvm/MC/MCCodeEmitter.h"
35#include "llvm/MC/MCContext.h"
36#include "llvm/MC/MCExpr.h"
37#include "llvm/MC/MCFixup.h"
38#include "llvm/MC/MCInst.h"
39#include "llvm/MC/MCInstBuilder.h"
40#include "llvm/MC/MCSection.h"
41#include "llvm/MC/MCSectionELF.h"
42#include "llvm/MC/MCStreamer.h"
43#include "llvm/MC/MCSymbol.h"
44#include "llvm/MC/MCSymbolELF.h"
45#include "llvm/Target/TargetLoweringObjectFile.h"
46
47using namespace llvm;
48
49namespace {
50
51/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
52class X86MCInstLower {
53  MCContext &Ctx;
54  const MachineFunction &MF;
55  const TargetMachine &TM;
56  const MCAsmInfo &MAI;
57  X86AsmPrinter &AsmPrinter;
58
59public:
60  X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
61
62  Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
63                                          const MachineOperand &MO) const;
64  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
65
66  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
67  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
68
69private:
70  MachineModuleInfoMachO &getMachOMMI() const;
71};
72
73} // end anonymous namespace
74
75// Emit a minimal sequence of nops spanning NumBytes bytes.
76static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
77                     const MCSubtargetInfo &STI);
78
79void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
80                                                 const MCSubtargetInfo &STI,
81                                                 MCCodeEmitter *CodeEmitter) {
82  if (InShadow) {
83    SmallString<256> Code;
84    SmallVector<MCFixup, 4> Fixups;
85    raw_svector_ostream VecOS(Code);
86    CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
87    CurrentShadowSize += Code.size();
88    if (CurrentShadowSize >= RequiredShadowSize)
89      InShadow = false; // The shadow is big enough. Stop counting.
90  }
91}
92
93void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
94    MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
95  if (InShadow && CurrentShadowSize < RequiredShadowSize) {
96    InShadow = false;
97    EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
98             MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
99  }
100}
101
102void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
103  OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
104  SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
105}
106
107X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
108                               X86AsmPrinter &asmprinter)
109    : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
110      AsmPrinter(asmprinter) {}
111
112MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
113  return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
114}
115
116/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
117/// operand to an MCSymbol.
118MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
119  const DataLayout &DL = MF.getDataLayout();
120  assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
121         "Isn't a symbol reference");
122
123  MCSymbol *Sym = nullptr;
124  SmallString<128> Name;
125  StringRef Suffix;
126
127  switch (MO.getTargetFlags()) {
128  case X86II::MO_DLLIMPORT:
129    // Handle dllimport linkage.
130    Name += "__imp_";
131    break;
132  case X86II::MO_COFFSTUB:
133    Name += ".refptr.";
134    break;
135  case X86II::MO_DARWIN_NONLAZY:
136  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
137    Suffix = "$non_lazy_ptr";
138    break;
139  }
140
141  if (!Suffix.empty())
142    Name += DL.getPrivateGlobalPrefix();
143
144  if (MO.isGlobal()) {
145    const GlobalValue *GV = MO.getGlobal();
146    AsmPrinter.getNameWithPrefix(Name, GV);
147  } else if (MO.isSymbol()) {
148    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
149  } else if (MO.isMBB()) {
150    assert(Suffix.empty());
151    Sym = MO.getMBB()->getSymbol();
152  }
153
154  Name += Suffix;
155  if (!Sym)
156    Sym = Ctx.getOrCreateSymbol(Name);
157
158  // If the target flags on the operand changes the name of the symbol, do that
159  // before we return the symbol.
160  switch (MO.getTargetFlags()) {
161  default:
162    break;
163  case X86II::MO_COFFSTUB: {
164    MachineModuleInfoCOFF &MMICOFF =
165        MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
166    MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
167    if (!StubSym.getPointer()) {
168      assert(MO.isGlobal() && "Extern symbol not handled yet");
169      StubSym = MachineModuleInfoImpl::StubValueTy(
170          AsmPrinter.getSymbol(MO.getGlobal()), true);
171    }
172    break;
173  }
174  case X86II::MO_DARWIN_NONLAZY:
175  case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
176    MachineModuleInfoImpl::StubValueTy &StubSym =
177        getMachOMMI().getGVStubEntry(Sym);
178    if (!StubSym.getPointer()) {
179      assert(MO.isGlobal() && "Extern symbol not handled yet");
180      StubSym = MachineModuleInfoImpl::StubValueTy(
181          AsmPrinter.getSymbol(MO.getGlobal()),
182          !MO.getGlobal()->hasInternalLinkage());
183    }
184    break;
185  }
186  }
187
188  return Sym;
189}
190
191MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
192                                             MCSymbol *Sym) const {
193  // FIXME: We would like an efficient form for this, so we don't have to do a
194  // lot of extra uniquing.
195  const MCExpr *Expr = nullptr;
196  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
197
198  switch (MO.getTargetFlags()) {
199  default:
200    llvm_unreachable("Unknown target flag on GV operand");
201  case X86II::MO_NO_FLAG: // No flag.
202  // These affect the name of the symbol, not any suffix.
203  case X86II::MO_DARWIN_NONLAZY:
204  case X86II::MO_DLLIMPORT:
205  case X86II::MO_COFFSTUB:
206    break;
207
208  case X86II::MO_TLVP:
209    RefKind = MCSymbolRefExpr::VK_TLVP;
210    break;
211  case X86II::MO_TLVP_PIC_BASE:
212    Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
213    // Subtract the pic base.
214    Expr = MCBinaryExpr::createSub(
215        Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
216    break;
217  case X86II::MO_SECREL:
218    RefKind = MCSymbolRefExpr::VK_SECREL;
219    break;
220  case X86II::MO_TLSGD:
221    RefKind = MCSymbolRefExpr::VK_TLSGD;
222    break;
223  case X86II::MO_TLSLD:
224    RefKind = MCSymbolRefExpr::VK_TLSLD;
225    break;
226  case X86II::MO_TLSLDM:
227    RefKind = MCSymbolRefExpr::VK_TLSLDM;
228    break;
229  case X86II::MO_GOTTPOFF:
230    RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
231    break;
232  case X86II::MO_INDNTPOFF:
233    RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
234    break;
235  case X86II::MO_TPOFF:
236    RefKind = MCSymbolRefExpr::VK_TPOFF;
237    break;
238  case X86II::MO_DTPOFF:
239    RefKind = MCSymbolRefExpr::VK_DTPOFF;
240    break;
241  case X86II::MO_NTPOFF:
242    RefKind = MCSymbolRefExpr::VK_NTPOFF;
243    break;
244  case X86II::MO_GOTNTPOFF:
245    RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
246    break;
247  case X86II::MO_GOTPCREL:
248    RefKind = MCSymbolRefExpr::VK_GOTPCREL;
249    break;
250  case X86II::MO_GOT:
251    RefKind = MCSymbolRefExpr::VK_GOT;
252    break;
253  case X86II::MO_GOTOFF:
254    RefKind = MCSymbolRefExpr::VK_GOTOFF;
255    break;
256  case X86II::MO_PLT:
257    RefKind = MCSymbolRefExpr::VK_PLT;
258    break;
259  case X86II::MO_ABS8:
260    RefKind = MCSymbolRefExpr::VK_X86_ABS8;
261    break;
262  case X86II::MO_PIC_BASE_OFFSET:
263  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
264    Expr = MCSymbolRefExpr::create(Sym, Ctx);
265    // Subtract the pic base.
266    Expr = MCBinaryExpr::createSub(
267        Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
268    if (MO.isJTI()) {
269      assert(MAI.doesSetDirectiveSuppressReloc());
270      // If .set directive is supported, use it to reduce the number of
271      // relocations the assembler will generate for differences between
272      // local labels. This is only safe when the symbols are in the same
273      // section so we are restricting it to jumptable references.
274      MCSymbol *Label = Ctx.createTempSymbol();
275      AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
276      Expr = MCSymbolRefExpr::create(Label, Ctx);
277    }
278    break;
279  }
280
281  if (!Expr)
282    Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
283
284  if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
285    Expr = MCBinaryExpr::createAdd(
286        Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
287  return MCOperand::createExpr(Expr);
288}
289
290/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
291/// a short fixed-register form.
292static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
293  unsigned ImmOp = Inst.getNumOperands() - 1;
294  assert(Inst.getOperand(0).isReg() &&
295         (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
296         ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
297           Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
298          Inst.getNumOperands() == 2) &&
299         "Unexpected instruction!");
300
301  // Check whether the destination register can be fixed.
302  unsigned Reg = Inst.getOperand(0).getReg();
303  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
304    return;
305
306  // If so, rewrite the instruction.
307  MCOperand Saved = Inst.getOperand(ImmOp);
308  Inst = MCInst();
309  Inst.setOpcode(Opcode);
310  Inst.addOperand(Saved);
311}
312
313/// If a movsx instruction has a shorter encoding for the used register
314/// simplify the instruction to use it instead.
315static void SimplifyMOVSX(MCInst &Inst) {
316  unsigned NewOpcode = 0;
317  unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
318  switch (Inst.getOpcode()) {
319  default:
320    llvm_unreachable("Unexpected instruction!");
321  case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
322    if (Op0 == X86::AX && Op1 == X86::AL)
323      NewOpcode = X86::CBW;
324    break;
325  case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
326    if (Op0 == X86::EAX && Op1 == X86::AX)
327      NewOpcode = X86::CWDE;
328    break;
329  case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
330    if (Op0 == X86::RAX && Op1 == X86::EAX)
331      NewOpcode = X86::CDQE;
332    break;
333  }
334
335  if (NewOpcode != 0) {
336    Inst = MCInst();
337    Inst.setOpcode(NewOpcode);
338  }
339}
340
341/// Simplify things like MOV32rm to MOV32o32a.
342static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
343                                  unsigned Opcode) {
344  // Don't make these simplifications in 64-bit mode; other assemblers don't
345  // perform them because they make the code larger.
346  if (Printer.getSubtarget().is64Bit())
347    return;
348
349  bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
350  unsigned AddrBase = IsStore;
351  unsigned RegOp = IsStore ? 0 : 5;
352  unsigned AddrOp = AddrBase + 3;
353  assert(
354      Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
355      Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
356      Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
357      Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
358      Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
359      (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
360      "Unexpected instruction!");
361
362  // Check whether the destination register can be fixed.
363  unsigned Reg = Inst.getOperand(RegOp).getReg();
364  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
365    return;
366
367  // Check whether this is an absolute address.
368  // FIXME: We know TLVP symbol refs aren't, but there should be a better way
369  // to do this here.
370  bool Absolute = true;
371  if (Inst.getOperand(AddrOp).isExpr()) {
372    const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
373    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
374      if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
375        Absolute = false;
376  }
377
378  if (Absolute &&
379      (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
380       Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
381       Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
382    return;
383
384  // If so, rewrite the instruction.
385  MCOperand Saved = Inst.getOperand(AddrOp);
386  MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
387  Inst = MCInst();
388  Inst.setOpcode(Opcode);
389  Inst.addOperand(Saved);
390  Inst.addOperand(Seg);
391}
392
393static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
394  return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
395}
396
397Optional<MCOperand>
398X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
399                                    const MachineOperand &MO) const {
400  switch (MO.getType()) {
401  default:
402    MI->print(errs());
403    llvm_unreachable("unknown operand type");
404  case MachineOperand::MO_Register:
405    // Ignore all implicit register operands.
406    if (MO.isImplicit())
407      return None;
408    return MCOperand::createReg(MO.getReg());
409  case MachineOperand::MO_Immediate:
410    return MCOperand::createImm(MO.getImm());
411  case MachineOperand::MO_MachineBasicBlock:
412  case MachineOperand::MO_GlobalAddress:
413  case MachineOperand::MO_ExternalSymbol:
414    return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
415  case MachineOperand::MO_MCSymbol:
416    return LowerSymbolOperand(MO, MO.getMCSymbol());
417  case MachineOperand::MO_JumpTableIndex:
418    return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
419  case MachineOperand::MO_ConstantPoolIndex:
420    return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
421  case MachineOperand::MO_BlockAddress:
422    return LowerSymbolOperand(
423        MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
424  case MachineOperand::MO_RegisterMask:
425    // Ignore call clobbers.
426    return None;
427  }
428}
429
430// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
431// information.
432static unsigned convertTailJumpOpcode(unsigned Opcode) {
433  switch (Opcode) {
434  case X86::TAILJMPr:
435    Opcode = X86::JMP32r;
436    break;
437  case X86::TAILJMPm:
438    Opcode = X86::JMP32m;
439    break;
440  case X86::TAILJMPr64:
441    Opcode = X86::JMP64r;
442    break;
443  case X86::TAILJMPm64:
444    Opcode = X86::JMP64m;
445    break;
446  case X86::TAILJMPr64_REX:
447    Opcode = X86::JMP64r_REX;
448    break;
449  case X86::TAILJMPm64_REX:
450    Opcode = X86::JMP64m_REX;
451    break;
452  case X86::TAILJMPd:
453  case X86::TAILJMPd64:
454    Opcode = X86::JMP_1;
455    break;
456  case X86::TAILJMPd_CC:
457  case X86::TAILJMPd64_CC:
458    Opcode = X86::JCC_1;
459    break;
460  }
461
462  return Opcode;
463}
464
465void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
466  OutMI.setOpcode(MI->getOpcode());
467
468  for (const MachineOperand &MO : MI->operands())
469    if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
470      OutMI.addOperand(MaybeMCOp.getValue());
471
472  // Handle a few special cases to eliminate operand modifiers.
473  switch (OutMI.getOpcode()) {
474  case X86::LEA64_32r:
475  case X86::LEA64r:
476  case X86::LEA16r:
477  case X86::LEA32r:
478    // LEA should have a segment register, but it must be empty.
479    assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
480           "Unexpected # of LEA operands");
481    assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
482           "LEA has segment specified!");
483    break;
484
485  // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
486  // if one of the registers is extended, but other isn't.
487  case X86::VMOVZPQILo2PQIrr:
488  case X86::VMOVAPDrr:
489  case X86::VMOVAPDYrr:
490  case X86::VMOVAPSrr:
491  case X86::VMOVAPSYrr:
492  case X86::VMOVDQArr:
493  case X86::VMOVDQAYrr:
494  case X86::VMOVDQUrr:
495  case X86::VMOVDQUYrr:
496  case X86::VMOVUPDrr:
497  case X86::VMOVUPDYrr:
498  case X86::VMOVUPSrr:
499  case X86::VMOVUPSYrr: {
500    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
501        X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
502      unsigned NewOpc;
503      switch (OutMI.getOpcode()) {
504      default: llvm_unreachable("Invalid opcode");
505      case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
506      case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
507      case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
508      case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
509      case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
510      case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
511      case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
512      case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
513      case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
514      case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
515      case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
516      case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
517      case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
518      }
519      OutMI.setOpcode(NewOpc);
520    }
521    break;
522  }
523  case X86::VMOVSDrr:
524  case X86::VMOVSSrr: {
525    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
526        X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
527      unsigned NewOpc;
528      switch (OutMI.getOpcode()) {
529      default: llvm_unreachable("Invalid opcode");
530      case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
531      case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
532      }
533      OutMI.setOpcode(NewOpc);
534    }
535    break;
536  }
537
538  case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
539  case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
540  case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
541  case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
542  case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
543  case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
544  case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
545  case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
546  case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
547  case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
548  case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
549  case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
550  case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
551  case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
552  case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
553  case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
554  case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
555  case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
556  case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
557  case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
558  case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
559  case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
560  case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
561  case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
562  case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
563  case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
564  case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
565  case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
566  case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
567  case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
568    // Turn immediate 0 into the VPCMPEQ instruction.
569    if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
570      unsigned NewOpc;
571      switch (OutMI.getOpcode()) {
572      default: llvm_unreachable("Invalid opcode");
573      case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
574      case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
575      case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
576      case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
577      case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
578      case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
579      case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
580      case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
581      case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
582      case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
583      case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
584      case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
585      case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
586      case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
587      case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
588      case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
589      case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
590      case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
591      case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
592      case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
593      case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
594      case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
595      case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
596      case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
597      case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
598      case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
599      case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
600      case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
601      case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
602      case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
603      case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
604      case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
605      case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
606      case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
607      case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
608      case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
609      case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
610      case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
611      case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
612      case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
613      case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
614      case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
615      case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
616      case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
617      case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
618      case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
619      case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
620      case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
621      case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
622      case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
623      case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
624      case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
625      case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
626      case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
627      case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
628      case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
629      case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
630      case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
631      case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
632      case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
633      }
634
635      OutMI.setOpcode(NewOpc);
636      OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
637      break;
638    }
639
640    // Turn immediate 6 into the VPCMPGT instruction.
641    if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
642      unsigned NewOpc;
643      switch (OutMI.getOpcode()) {
644      default: llvm_unreachable("Invalid opcode");
645      case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
646      case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
647      case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
648      case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
649      case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
650      case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
651      case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
652      case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
653      case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
654      case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
655      case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
656      case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
657      case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
658      case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
659      case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
660      case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
661      case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
662      case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
663      case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
664      case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
665      case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
666      case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
667      case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
668      case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
669      case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
670      case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
671      case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
672      case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
673      case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
674      case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
675      case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
676      case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
677      case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
678      case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
679      case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
680      case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
681      case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
682      case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
683      case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
684      case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
685      case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
686      case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
687      case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
688      case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
689      case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
690      case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
691      case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
692      case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
693      case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
694      case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
695      case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
696      case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
697      case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
698      case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
699      case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
700      case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
701      case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
702      case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
703      case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
704      case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
705      }
706
707      OutMI.setOpcode(NewOpc);
708      OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
709      break;
710    }
711
712    break;
713  }
714
715  // CALL64r, CALL64pcrel32 - These instructions used to have
716  // register inputs modeled as normal uses instead of implicit uses.  As such,
717  // they we used to truncate off all but the first operand (the callee). This
718  // issue seems to have been fixed at some point. This assert verifies that.
719  case X86::CALL64r:
720  case X86::CALL64pcrel32:
721    assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
722    break;
723
724  case X86::EH_RETURN:
725  case X86::EH_RETURN64: {
726    OutMI = MCInst();
727    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
728    break;
729  }
730
731  case X86::CLEANUPRET: {
732    // Replace CLEANUPRET with the appropriate RET.
733    OutMI = MCInst();
734    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
735    break;
736  }
737
738  case X86::CATCHRET: {
739    // Replace CATCHRET with the appropriate RET.
740    const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
741    unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
742    OutMI = MCInst();
743    OutMI.setOpcode(getRetOpcode(Subtarget));
744    OutMI.addOperand(MCOperand::createReg(ReturnReg));
745    break;
746  }
747
748  // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
749  // instruction.
750  case X86::TAILJMPr:
751  case X86::TAILJMPr64:
752  case X86::TAILJMPr64_REX:
753  case X86::TAILJMPd:
754  case X86::TAILJMPd64:
755    assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
756    OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
757    break;
758
759  case X86::TAILJMPd_CC:
760  case X86::TAILJMPd64_CC:
761    assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
762    OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
763    break;
764
765  case X86::TAILJMPm:
766  case X86::TAILJMPm64:
767  case X86::TAILJMPm64_REX:
768    assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
769           "Unexpected number of operands!");
770    OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
771    break;
772
773  case X86::DEC16r:
774  case X86::DEC32r:
775  case X86::INC16r:
776  case X86::INC32r:
777    // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
778    if (!AsmPrinter.getSubtarget().is64Bit()) {
779      unsigned Opcode;
780      switch (OutMI.getOpcode()) {
781      default: llvm_unreachable("Invalid opcode");
782      case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
783      case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
784      case X86::INC16r: Opcode = X86::INC16r_alt; break;
785      case X86::INC32r: Opcode = X86::INC32r_alt; break;
786      }
787      OutMI.setOpcode(Opcode);
788    }
789    break;
790
791  // We don't currently select the correct instruction form for instructions
792  // which have a short %eax, etc. form. Handle this by custom lowering, for
793  // now.
794  //
795  // Note, we are currently not handling the following instructions:
796  // MOV64ao8, MOV64o8a
797  // XCHG16ar, XCHG32ar, XCHG64ar
798  case X86::MOV8mr_NOREX:
799  case X86::MOV8mr:
800  case X86::MOV8rm_NOREX:
801  case X86::MOV8rm:
802  case X86::MOV16mr:
803  case X86::MOV16rm:
804  case X86::MOV32mr:
805  case X86::MOV32rm: {
806    unsigned NewOpc;
807    switch (OutMI.getOpcode()) {
808    default: llvm_unreachable("Invalid opcode");
809    case X86::MOV8mr_NOREX:
810    case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
811    case X86::MOV8rm_NOREX:
812    case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
813    case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
814    case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
815    case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
816    case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
817    }
818    SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
819    break;
820  }
821
822  case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
823  case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
824  case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
825  case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
826  case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
827  case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
828  case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
829  case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
830  case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
831    unsigned NewOpc;
832    switch (OutMI.getOpcode()) {
833    default: llvm_unreachable("Invalid opcode");
834    case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
835    case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
836    case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
837    case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
838    case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
839    case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
840    case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
841    case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
842    case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
843    case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
844    case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
845    case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
846    case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
847    case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
848    case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
849    case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
850    case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
851    case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
852    case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
853    case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
854    case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
855    case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
856    case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
857    case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
858    case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
859    case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
860    case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
861    case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
862    case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
863    case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
864    case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
865    case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
866    case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
867    case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
868    case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
869    case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
870    }
871    SimplifyShortImmForm(OutMI, NewOpc);
872    break;
873  }
874
875  // Try to shrink some forms of movsx.
876  case X86::MOVSX16rr8:
877  case X86::MOVSX32rr16:
878  case X86::MOVSX64rr32:
879    SimplifyMOVSX(OutMI);
880    break;
881
882  case X86::VCMPPDrri:
883  case X86::VCMPPDYrri:
884  case X86::VCMPPSrri:
885  case X86::VCMPPSYrri:
886  case X86::VCMPSDrr:
887  case X86::VCMPSSrr: {
888    // Swap the operands if it will enable a 2 byte VEX encoding.
889    // FIXME: Change the immediate to improve opportunities?
890    if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
891        X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
892      unsigned Imm = MI->getOperand(3).getImm() & 0x7;
893      switch (Imm) {
894      default: break;
895      case 0x00: // EQUAL
896      case 0x03: // UNORDERED
897      case 0x04: // NOT EQUAL
898      case 0x07: // ORDERED
899        std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
900        break;
901      }
902    }
903    break;
904  }
905
906  case X86::VMOVHLPSrr:
907  case X86::VUNPCKHPDrr:
908    // These are not truly commutable so hide them from the default case.
909    break;
910
911  default: {
912    // If the instruction is a commutable arithmetic instruction we might be
913    // able to commute the operands to get a 2 byte VEX prefix.
914    uint64_t TSFlags = MI->getDesc().TSFlags;
915    if (MI->getDesc().isCommutable() &&
916        (TSFlags & X86II::EncodingMask) == X86II::VEX &&
917        (TSFlags & X86II::OpMapMask) == X86II::TB &&
918        (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
919        !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
920        OutMI.getNumOperands() == 3) {
921      if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
922          X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
923        std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
924    }
925    break;
926  }
927  }
928}
929
930void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
931                                 const MachineInstr &MI) {
932  bool Is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
933                  MI.getOpcode() == X86::TLS_base_addr64;
934  MCContext &Ctx = OutStreamer->getContext();
935
936  MCSymbolRefExpr::VariantKind SRVK;
937  switch (MI.getOpcode()) {
938  case X86::TLS_addr32:
939  case X86::TLS_addr64:
940    SRVK = MCSymbolRefExpr::VK_TLSGD;
941    break;
942  case X86::TLS_base_addr32:
943    SRVK = MCSymbolRefExpr::VK_TLSLDM;
944    break;
945  case X86::TLS_base_addr64:
946    SRVK = MCSymbolRefExpr::VK_TLSLD;
947    break;
948  default:
949    llvm_unreachable("unexpected opcode");
950  }
951
952  const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
953      MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
954
955  // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
956  // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
957  // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
958  // only using GOT when GOTPCRELX is enabled.
959  // TODO Delete the workaround when GOTPCRELX becomes commonplace.
960  bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
961                Ctx.getAsmInfo()->canRelaxRelocations();
962
963  if (Is64Bits) {
964    bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
965    if (NeedsPadding)
966      EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
967    EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
968                                .addReg(X86::RDI)
969                                .addReg(X86::RIP)
970                                .addImm(1)
971                                .addReg(0)
972                                .addExpr(Sym)
973                                .addReg(0));
974    const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
975    if (NeedsPadding) {
976      if (!UseGot)
977        EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
978      EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
979      EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
980    }
981    if (UseGot) {
982      const MCExpr *Expr = MCSymbolRefExpr::create(
983          TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
984      EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
985                                  .addReg(X86::RIP)
986                                  .addImm(1)
987                                  .addReg(0)
988                                  .addExpr(Expr)
989                                  .addReg(0));
990    } else {
991      EmitAndCountInstruction(
992          MCInstBuilder(X86::CALL64pcrel32)
993              .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
994                                               MCSymbolRefExpr::VK_PLT, Ctx)));
995    }
996  } else {
997    if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
998      EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
999                                  .addReg(X86::EAX)
1000                                  .addReg(0)
1001                                  .addImm(1)
1002                                  .addReg(X86::EBX)
1003                                  .addExpr(Sym)
1004                                  .addReg(0));
1005    } else {
1006      EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1007                                  .addReg(X86::EAX)
1008                                  .addReg(X86::EBX)
1009                                  .addImm(1)
1010                                  .addReg(0)
1011                                  .addExpr(Sym)
1012                                  .addReg(0));
1013    }
1014
1015    const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1016    if (UseGot) {
1017      const MCExpr *Expr =
1018          MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1019      EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1020                                  .addReg(X86::EBX)
1021                                  .addImm(1)
1022                                  .addReg(0)
1023                                  .addExpr(Expr)
1024                                  .addReg(0));
1025    } else {
1026      EmitAndCountInstruction(
1027          MCInstBuilder(X86::CALLpcrel32)
1028              .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1029                                               MCSymbolRefExpr::VK_PLT, Ctx)));
1030    }
1031  }
1032}
1033
1034/// Return the longest nop which can be efficiently decoded for the given
1035/// target cpu.  15-bytes is the longest single NOP instruction, but some
1036/// platforms can't decode the longest forms efficiently.
1037static unsigned MaxLongNopLength(const MCSubtargetInfo &STI) {
1038  uint64_t MaxNopLength = 10;
1039  if (STI.getFeatureBits()[X86::ProcIntelSLM])
1040    MaxNopLength = 7;
1041  else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1042    MaxNopLength = 15;
1043  else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1044    MaxNopLength = 11;
1045  return MaxNopLength;
1046}
1047
1048/// Emit the largest nop instruction smaller than or equal to \p NumBytes
1049/// bytes.  Return the size of nop emitted.
1050static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
1051                        const MCSubtargetInfo &STI) {
1052  if (!Is64Bit) {
1053    // TODO Do additional checking if the CPU supports multi-byte nops.
1054    OS.EmitInstruction(MCInstBuilder(X86::NOOP), STI);
1055    return 1;
1056  }
1057
1058  // Cap a single nop emission at the profitable value for the target
1059  NumBytes = std::min(NumBytes, MaxLongNopLength(STI));
1060
1061  unsigned NopSize;
1062  unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1063  IndexReg = Displacement = SegmentReg = 0;
1064  BaseReg = X86::RAX;
1065  ScaleVal = 1;
1066  switch (NumBytes) {
1067  case 0:
1068    llvm_unreachable("Zero nops?");
1069    break;
1070  case 1:
1071    NopSize = 1;
1072    Opc = X86::NOOP;
1073    break;
1074  case 2:
1075    NopSize = 2;
1076    Opc = X86::XCHG16ar;
1077    break;
1078  case 3:
1079    NopSize = 3;
1080    Opc = X86::NOOPL;
1081    break;
1082  case 4:
1083    NopSize = 4;
1084    Opc = X86::NOOPL;
1085    Displacement = 8;
1086    break;
1087  case 5:
1088    NopSize = 5;
1089    Opc = X86::NOOPL;
1090    Displacement = 8;
1091    IndexReg = X86::RAX;
1092    break;
1093  case 6:
1094    NopSize = 6;
1095    Opc = X86::NOOPW;
1096    Displacement = 8;
1097    IndexReg = X86::RAX;
1098    break;
1099  case 7:
1100    NopSize = 7;
1101    Opc = X86::NOOPL;
1102    Displacement = 512;
1103    break;
1104  case 8:
1105    NopSize = 8;
1106    Opc = X86::NOOPL;
1107    Displacement = 512;
1108    IndexReg = X86::RAX;
1109    break;
1110  case 9:
1111    NopSize = 9;
1112    Opc = X86::NOOPW;
1113    Displacement = 512;
1114    IndexReg = X86::RAX;
1115    break;
1116  default:
1117    NopSize = 10;
1118    Opc = X86::NOOPW;
1119    Displacement = 512;
1120    IndexReg = X86::RAX;
1121    SegmentReg = X86::CS;
1122    break;
1123  }
1124
1125  unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1126  NopSize += NumPrefixes;
1127  for (unsigned i = 0; i != NumPrefixes; ++i)
1128    OS.EmitBytes("\x66");
1129
1130  switch (Opc) {
1131  default: llvm_unreachable("Unexpected opcode");
1132  case X86::NOOP:
1133    OS.EmitInstruction(MCInstBuilder(Opc), STI);
1134    break;
1135  case X86::XCHG16ar:
1136    OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI);
1137    break;
1138  case X86::NOOPL:
1139  case X86::NOOPW:
1140    OS.EmitInstruction(MCInstBuilder(Opc)
1141                           .addReg(BaseReg)
1142                           .addImm(ScaleVal)
1143                           .addReg(IndexReg)
1144                           .addImm(Displacement)
1145                           .addReg(SegmentReg),
1146                       STI);
1147    break;
1148  }
1149  assert(NopSize <= NumBytes && "We overemitted?");
1150  return NopSize;
1151}
1152
1153/// Emit the optimal amount of multi-byte nops on X86.
1154static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
1155                     const MCSubtargetInfo &STI) {
1156  unsigned NopsToEmit = NumBytes;
1157  (void)NopsToEmit;
1158  while (NumBytes) {
1159    NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI);
1160    assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1161  }
1162}
1163
1164/// A RAII helper which defines a region of instructions which can't have
1165/// padding added between them for correctness.
1166struct NoAutoPaddingScope {
1167  MCStreamer &OS;
1168  const bool OldAllowAutoPadding;
1169  NoAutoPaddingScope(MCStreamer &OS)
1170    : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
1171    changeAndComment(false);
1172  }
1173  ~NoAutoPaddingScope() {
1174    changeAndComment(OldAllowAutoPadding);
1175  }
1176  void changeAndComment(bool b) {
1177    if (b == OS.getAllowAutoPadding())
1178      return;
1179    OS.setAllowAutoPadding(b);
1180    if (b)
1181      OS.emitRawComment("autopadding");
1182    else
1183      OS.emitRawComment("noautopadding");
1184  }
1185};
1186
1187void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1188                                    X86MCInstLower &MCIL) {
1189  assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1190
1191  NoAutoPaddingScope NoPadScope(*OutStreamer);
1192
1193  StatepointOpers SOpers(&MI);
1194  if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1195    EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
1196             getSubtargetInfo());
1197  } else {
1198    // Lower call target and choose correct opcode
1199    const MachineOperand &CallTarget = SOpers.getCallTarget();
1200    MCOperand CallTargetMCOp;
1201    unsigned CallOpcode;
1202    switch (CallTarget.getType()) {
1203    case MachineOperand::MO_GlobalAddress:
1204    case MachineOperand::MO_ExternalSymbol:
1205      CallTargetMCOp = MCIL.LowerSymbolOperand(
1206          CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1207      CallOpcode = X86::CALL64pcrel32;
1208      // Currently, we only support relative addressing with statepoints.
1209      // Otherwise, we'll need a scratch register to hold the target
1210      // address.  You'll fail asserts during load & relocation if this
1211      // symbol is to far away. (TODO: support non-relative addressing)
1212      break;
1213    case MachineOperand::MO_Immediate:
1214      CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1215      CallOpcode = X86::CALL64pcrel32;
1216      // Currently, we only support relative addressing with statepoints.
1217      // Otherwise, we'll need a scratch register to hold the target
1218      // immediate.  You'll fail asserts during load & relocation if this
1219      // address is to far away. (TODO: support non-relative addressing)
1220      break;
1221    case MachineOperand::MO_Register:
1222      // FIXME: Add retpoline support and remove this.
1223      if (Subtarget->useIndirectThunkCalls())
1224        report_fatal_error("Lowering register statepoints with thunks not "
1225                           "yet implemented.");
1226      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1227      CallOpcode = X86::CALL64r;
1228      break;
1229    default:
1230      llvm_unreachable("Unsupported operand type in statepoint call target");
1231      break;
1232    }
1233
1234    // Emit call
1235    MCInst CallInst;
1236    CallInst.setOpcode(CallOpcode);
1237    CallInst.addOperand(CallTargetMCOp);
1238    OutStreamer->EmitInstruction(CallInst, getSubtargetInfo());
1239  }
1240
1241  // Record our statepoint node in the same section used by STACKMAP
1242  // and PATCHPOINT
1243  auto &Ctx = OutStreamer->getContext();
1244  MCSymbol *MILabel = Ctx.createTempSymbol();
1245  OutStreamer->EmitLabel(MILabel);
1246  SM.recordStatepoint(*MILabel, MI);
1247}
1248
1249void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1250                                     X86MCInstLower &MCIL) {
1251  // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1252  //                  <opcode>, <operands>
1253
1254  NoAutoPaddingScope NoPadScope(*OutStreamer);
1255
1256  Register DefRegister = FaultingMI.getOperand(0).getReg();
1257  FaultMaps::FaultKind FK =
1258      static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1259  MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1260  unsigned Opcode = FaultingMI.getOperand(3).getImm();
1261  unsigned OperandsBeginIdx = 4;
1262
1263  auto &Ctx = OutStreamer->getContext();
1264  MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1265  OutStreamer->EmitLabel(FaultingLabel);
1266
1267  assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1268  FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1269
1270  MCInst MI;
1271  MI.setOpcode(Opcode);
1272
1273  if (DefRegister != X86::NoRegister)
1274    MI.addOperand(MCOperand::createReg(DefRegister));
1275
1276  for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1277            E = FaultingMI.operands_end();
1278       I != E; ++I)
1279    if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1280      MI.addOperand(MaybeOperand.getValue());
1281
1282  OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1283  OutStreamer->EmitInstruction(MI, getSubtargetInfo());
1284}
1285
1286void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1287                                     X86MCInstLower &MCIL) {
1288  bool Is64Bits = Subtarget->is64Bit();
1289  MCContext &Ctx = OutStreamer->getContext();
1290  MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1291  const MCSymbolRefExpr *Op =
1292      MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1293
1294  EmitAndCountInstruction(
1295      MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1296          .addExpr(Op));
1297}
1298
1299void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1300                                      X86MCInstLower &MCIL) {
1301  // PATCHABLE_OP minsize, opcode, operands
1302
1303  NoAutoPaddingScope NoPadScope(*OutStreamer);
1304
1305  unsigned MinSize = MI.getOperand(0).getImm();
1306  unsigned Opcode = MI.getOperand(1).getImm();
1307
1308  MCInst MCI;
1309  MCI.setOpcode(Opcode);
1310  for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
1311    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1312      MCI.addOperand(MaybeOperand.getValue());
1313
1314  SmallString<256> Code;
1315  SmallVector<MCFixup, 4> Fixups;
1316  raw_svector_ostream VecOS(Code);
1317  CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1318
1319  if (Code.size() < MinSize) {
1320    if (MinSize == 2 && Opcode == X86::PUSH64r) {
1321      // This is an optimization that lets us get away without emitting a nop in
1322      // many cases.
1323      //
1324      // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1325      // bytes too, so the check on MinSize is important.
1326      MCI.setOpcode(X86::PUSH64rmr);
1327    } else {
1328      unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
1329                                 getSubtargetInfo());
1330      assert(NopSize == MinSize && "Could not implement MinSize!");
1331      (void)NopSize;
1332    }
1333  }
1334
1335  OutStreamer->EmitInstruction(MCI, getSubtargetInfo());
1336}
1337
1338// Lower a stackmap of the form:
1339// <id>, <shadowBytes>, ...
1340void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1341  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1342
1343  auto &Ctx = OutStreamer->getContext();
1344  MCSymbol *MILabel = Ctx.createTempSymbol();
1345  OutStreamer->EmitLabel(MILabel);
1346
1347  SM.recordStackMap(*MILabel, MI);
1348  unsigned NumShadowBytes = MI.getOperand(1).getImm();
1349  SMShadowTracker.reset(NumShadowBytes);
1350}
1351
1352// Lower a patchpoint of the form:
1353// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1354void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1355                                    X86MCInstLower &MCIL) {
1356  assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1357
1358  SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1359
1360  NoAutoPaddingScope NoPadScope(*OutStreamer);
1361
1362  auto &Ctx = OutStreamer->getContext();
1363  MCSymbol *MILabel = Ctx.createTempSymbol();
1364  OutStreamer->EmitLabel(MILabel);
1365  SM.recordPatchPoint(*MILabel, MI);
1366
1367  PatchPointOpers opers(&MI);
1368  unsigned ScratchIdx = opers.getNextScratchIdx();
1369  unsigned EncodedBytes = 0;
1370  const MachineOperand &CalleeMO = opers.getCallTarget();
1371
1372  // Check for null target. If target is non-null (i.e. is non-zero or is
1373  // symbolic) then emit a call.
1374  if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1375    MCOperand CalleeMCOp;
1376    switch (CalleeMO.getType()) {
1377    default:
1378      /// FIXME: Add a verifier check for bad callee types.
1379      llvm_unreachable("Unrecognized callee operand type.");
1380    case MachineOperand::MO_Immediate:
1381      if (CalleeMO.getImm())
1382        CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1383      break;
1384    case MachineOperand::MO_ExternalSymbol:
1385    case MachineOperand::MO_GlobalAddress:
1386      CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1387                                           MCIL.GetSymbolFromOperand(CalleeMO));
1388      break;
1389    }
1390
1391    // Emit MOV to materialize the target address and the CALL to target.
1392    // This is encoded with 12-13 bytes, depending on which register is used.
1393    Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1394    if (X86II::isX86_64ExtendedReg(ScratchReg))
1395      EncodedBytes = 13;
1396    else
1397      EncodedBytes = 12;
1398
1399    EmitAndCountInstruction(
1400        MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1401    // FIXME: Add retpoline support and remove this.
1402    if (Subtarget->useIndirectThunkCalls())
1403      report_fatal_error(
1404          "Lowering patchpoint with thunks not yet implemented.");
1405    EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1406  }
1407
1408  // Emit padding.
1409  unsigned NumBytes = opers.getNumPatchBytes();
1410  assert(NumBytes >= EncodedBytes &&
1411         "Patchpoint can't request size less than the length of a call.");
1412
1413  EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
1414           getSubtargetInfo());
1415}
1416
1417void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1418                                              X86MCInstLower &MCIL) {
1419  assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1420
1421  NoAutoPaddingScope NoPadScope(*OutStreamer);
1422
1423  // We want to emit the following pattern, which follows the x86 calling
1424  // convention to prepare for the trampoline call to be patched in.
1425  //
1426  //   .p2align 1, ...
1427  // .Lxray_event_sled_N:
1428  //   jmp +N                        // jump across the instrumentation sled
1429  //   ...                           // set up arguments in register
1430  //   callq __xray_CustomEvent@plt  // force dependency to symbol
1431  //   ...
1432  //   <jump here>
1433  //
1434  // After patching, it would look something like:
1435  //
1436  //   nopw (2-byte nop)
1437  //   ...
1438  //   callq __xrayCustomEvent  // already lowered
1439  //   ...
1440  //
1441  // ---
1442  // First we emit the label and the jump.
1443  auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1444  OutStreamer->AddComment("# XRay Custom Event Log");
1445  OutStreamer->EmitCodeAlignment(2);
1446  OutStreamer->EmitLabel(CurSled);
1447
1448  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1449  // an operand (computed as an offset from the jmp instruction).
1450  // FIXME: Find another less hacky way do force the relative jump.
1451  OutStreamer->EmitBinaryData("\xeb\x0f");
1452
1453  // The default C calling convention will place two arguments into %rcx and
1454  // %rdx -- so we only work with those.
1455  const Register DestRegs[] = {X86::RDI, X86::RSI};
1456  bool UsedMask[] = {false, false};
1457  // Filled out in loop.
1458  Register SrcRegs[] = {0, 0};
1459
1460  // Then we put the operands in the %rdi and %rsi registers. We spill the
1461  // values in the register before we clobber them, and mark them as used in
1462  // UsedMask. In case the arguments are already in the correct register, we use
1463  // emit nops appropriately sized to keep the sled the same size in every
1464  // situation.
1465  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1466    if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1467      assert(Op->isReg() && "Only support arguments in registers");
1468      SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1469      if (SrcRegs[I] != DestRegs[I]) {
1470        UsedMask[I] = true;
1471        EmitAndCountInstruction(
1472            MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1473      } else {
1474        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1475      }
1476    }
1477
1478  // Now that the register values are stashed, mov arguments into place.
1479  // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1480  // earlier DestReg. We will have already overwritten over the register before
1481  // we can copy from it.
1482  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1483    if (SrcRegs[I] != DestRegs[I])
1484      EmitAndCountInstruction(
1485          MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1486
1487  // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1488  // name of the trampoline to be implemented by the XRay runtime.
1489  auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1490  MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1491  if (isPositionIndependent())
1492    TOp.setTargetFlags(X86II::MO_PLT);
1493
1494  // Emit the call instruction.
1495  EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1496                              .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1497
1498  // Restore caller-saved and used registers.
1499  for (unsigned I = sizeof UsedMask; I-- > 0;)
1500    if (UsedMask[I])
1501      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1502    else
1503      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1504
1505  OutStreamer->AddComment("xray custom event end.");
1506
1507  // Record the sled version. Older versions of this sled were spelled
1508  // differently, so we let the runtime handle the different offsets we're
1509  // using.
1510  recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
1511}
1512
1513void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1514                                                    X86MCInstLower &MCIL) {
1515  assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1516
1517  NoAutoPaddingScope NoPadScope(*OutStreamer);
1518
1519  // We want to emit the following pattern, which follows the x86 calling
1520  // convention to prepare for the trampoline call to be patched in.
1521  //
1522  //   .p2align 1, ...
1523  // .Lxray_event_sled_N:
1524  //   jmp +N                        // jump across the instrumentation sled
1525  //   ...                           // set up arguments in register
1526  //   callq __xray_TypedEvent@plt  // force dependency to symbol
1527  //   ...
1528  //   <jump here>
1529  //
1530  // After patching, it would look something like:
1531  //
1532  //   nopw (2-byte nop)
1533  //   ...
1534  //   callq __xrayTypedEvent  // already lowered
1535  //   ...
1536  //
1537  // ---
1538  // First we emit the label and the jump.
1539  auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1540  OutStreamer->AddComment("# XRay Typed Event Log");
1541  OutStreamer->EmitCodeAlignment(2);
1542  OutStreamer->EmitLabel(CurSled);
1543
1544  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1545  // an operand (computed as an offset from the jmp instruction).
1546  // FIXME: Find another less hacky way do force the relative jump.
1547  OutStreamer->EmitBinaryData("\xeb\x14");
1548
1549  // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1550  // so we'll work with those. Or we may be called via SystemV, in which case
1551  // we don't have to do any translation.
1552  const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1553  bool UsedMask[] = {false, false, false};
1554
1555  // Will fill out src regs in the loop.
1556  Register SrcRegs[] = {0, 0, 0};
1557
1558  // Then we put the operands in the SystemV registers. We spill the values in
1559  // the registers before we clobber them, and mark them as used in UsedMask.
1560  // In case the arguments are already in the correct register, we emit nops
1561  // appropriately sized to keep the sled the same size in every situation.
1562  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1563    if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1564      // TODO: Is register only support adequate?
1565      assert(Op->isReg() && "Only supports arguments in registers");
1566      SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1567      if (SrcRegs[I] != DestRegs[I]) {
1568        UsedMask[I] = true;
1569        EmitAndCountInstruction(
1570            MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1571      } else {
1572        EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1573      }
1574    }
1575
1576  // In the above loop we only stash all of the destination registers or emit
1577  // nops if the arguments are already in the right place. Doing the actually
1578  // moving is postponed until after all the registers are stashed so nothing
1579  // is clobbers. We've already added nops to account for the size of mov and
1580  // push if the register is in the right place, so we only have to worry about
1581  // emitting movs.
1582  // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1583  // earlier DestReg. We will have already overwritten over the register before
1584  // we can copy from it.
1585  for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1586    if (UsedMask[I])
1587      EmitAndCountInstruction(
1588          MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1589
1590  // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1591  // name of the trampoline to be implemented by the XRay runtime.
1592  auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1593  MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1594  if (isPositionIndependent())
1595    TOp.setTargetFlags(X86II::MO_PLT);
1596
1597  // Emit the call instruction.
1598  EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1599                              .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1600
1601  // Restore caller-saved and used registers.
1602  for (unsigned I = sizeof UsedMask; I-- > 0;)
1603    if (UsedMask[I])
1604      EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1605    else
1606      EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1607
1608  OutStreamer->AddComment("xray typed event end.");
1609
1610  // Record the sled version.
1611  recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0);
1612}
1613
1614void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1615                                                  X86MCInstLower &MCIL) {
1616
1617  NoAutoPaddingScope NoPadScope(*OutStreamer);
1618
1619  const Function &F = MF->getFunction();
1620  if (F.hasFnAttribute("patchable-function-entry")) {
1621    unsigned Num;
1622    if (F.getFnAttribute("patchable-function-entry")
1623            .getValueAsString()
1624            .getAsInteger(10, Num))
1625      return;
1626    EmitNops(*OutStreamer, Num, Subtarget->is64Bit(), getSubtargetInfo());
1627    return;
1628  }
1629  // We want to emit the following pattern:
1630  //
1631  //   .p2align 1, ...
1632  // .Lxray_sled_N:
1633  //   jmp .tmpN
1634  //   # 9 bytes worth of noops
1635  //
1636  // We need the 9 bytes because at runtime, we'd be patching over the full 11
1637  // bytes with the following pattern:
1638  //
1639  //   mov %r10, <function id, 32-bit>   // 6 bytes
1640  //   call <relative offset, 32-bits>   // 5 bytes
1641  //
1642  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1643  OutStreamer->EmitCodeAlignment(2);
1644  OutStreamer->EmitLabel(CurSled);
1645
1646  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1647  // an operand (computed as an offset from the jmp instruction).
1648  // FIXME: Find another less hacky way do force the relative jump.
1649  OutStreamer->EmitBytes("\xeb\x09");
1650  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1651  recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
1652}
1653
1654void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1655                                       X86MCInstLower &MCIL) {
1656  NoAutoPaddingScope NoPadScope(*OutStreamer);
1657
1658  // Since PATCHABLE_RET takes the opcode of the return statement as an
1659  // argument, we use that to emit the correct form of the RET that we want.
1660  // i.e. when we see this:
1661  //
1662  //   PATCHABLE_RET X86::RET ...
1663  //
1664  // We should emit the RET followed by sleds.
1665  //
1666  //   .p2align 1, ...
1667  // .Lxray_sled_N:
1668  //   ret  # or equivalent instruction
1669  //   # 10 bytes worth of noops
1670  //
1671  // This just makes sure that the alignment for the next instruction is 2.
1672  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1673  OutStreamer->EmitCodeAlignment(2);
1674  OutStreamer->EmitLabel(CurSled);
1675  unsigned OpCode = MI.getOperand(0).getImm();
1676  MCInst Ret;
1677  Ret.setOpcode(OpCode);
1678  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1679    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1680      Ret.addOperand(MaybeOperand.getValue());
1681  OutStreamer->EmitInstruction(Ret, getSubtargetInfo());
1682  EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
1683  recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
1684}
1685
1686void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1687                                             X86MCInstLower &MCIL) {
1688  NoAutoPaddingScope NoPadScope(*OutStreamer);
1689
1690  // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1691  // instruction so we lower that particular instruction and its operands.
1692  // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1693  // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1694  // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1695  // tail call much like how we have it in PATCHABLE_RET.
1696  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1697  OutStreamer->EmitCodeAlignment(2);
1698  OutStreamer->EmitLabel(CurSled);
1699  auto Target = OutContext.createTempSymbol();
1700
1701  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1702  // an operand (computed as an offset from the jmp instruction).
1703  // FIXME: Find another less hacky way do force the relative jump.
1704  OutStreamer->EmitBytes("\xeb\x09");
1705  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1706  OutStreamer->EmitLabel(Target);
1707  recordSled(CurSled, MI, SledKind::TAIL_CALL);
1708
1709  unsigned OpCode = MI.getOperand(0).getImm();
1710  OpCode = convertTailJumpOpcode(OpCode);
1711  MCInst TC;
1712  TC.setOpcode(OpCode);
1713
1714  // Before emitting the instruction, add a comment to indicate that this is
1715  // indeed a tail call.
1716  OutStreamer->AddComment("TAILCALL");
1717  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1718    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1719      TC.addOperand(MaybeOperand.getValue());
1720  OutStreamer->EmitInstruction(TC, getSubtargetInfo());
1721}
1722
1723// Returns instruction preceding MBBI in MachineFunction.
1724// If MBBI is the first instruction of the first basic block, returns null.
1725static MachineBasicBlock::const_iterator
1726PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1727  const MachineBasicBlock *MBB = MBBI->getParent();
1728  while (MBBI == MBB->begin()) {
1729    if (MBB == &MBB->getParent()->front())
1730      return MachineBasicBlock::const_iterator();
1731    MBB = MBB->getPrevNode();
1732    MBBI = MBB->end();
1733  }
1734  --MBBI;
1735  return MBBI;
1736}
1737
1738static const Constant *getConstantFromPool(const MachineInstr &MI,
1739                                           const MachineOperand &Op) {
1740  if (!Op.isCPI() || Op.getOffset() != 0)
1741    return nullptr;
1742
1743  ArrayRef<MachineConstantPoolEntry> Constants =
1744      MI.getParent()->getParent()->getConstantPool()->getConstants();
1745  const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1746
1747  // Bail if this is a machine constant pool entry, we won't be able to dig out
1748  // anything useful.
1749  if (ConstantEntry.isMachineConstantPoolEntry())
1750    return nullptr;
1751
1752  const Constant *C = ConstantEntry.Val.ConstVal;
1753  assert((!C || ConstantEntry.getType() == C->getType()) &&
1754         "Expected a constant of the same type!");
1755  return C;
1756}
1757
1758static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1759                                     unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1760  std::string Comment;
1761
1762  // Compute the name for a register. This is really goofy because we have
1763  // multiple instruction printers that could (in theory) use different
1764  // names. Fortunately most people use the ATT style (outside of Windows)
1765  // and they actually agree on register naming here. Ultimately, this is
1766  // a comment, and so its OK if it isn't perfect.
1767  auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1768    return X86ATTInstPrinter::getRegisterName(RegNum);
1769  };
1770
1771  const MachineOperand &DstOp = MI->getOperand(0);
1772  const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1773  const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1774
1775  StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1776  StringRef Src1Name =
1777      SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1778  StringRef Src2Name =
1779      SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1780
1781  // One source operand, fix the mask to print all elements in one span.
1782  SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1783  if (Src1Name == Src2Name)
1784    for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1785      if (ShuffleMask[i] >= e)
1786        ShuffleMask[i] -= e;
1787
1788  raw_string_ostream CS(Comment);
1789  CS << DstName;
1790
1791  // Handle AVX512 MASK/MASXZ write mask comments.
1792  // MASK: zmmX {%kY}
1793  // MASKZ: zmmX {%kY} {z}
1794  if (SrcOp1Idx > 1) {
1795    assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1796
1797    const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1798    if (WriteMaskOp.isReg()) {
1799      CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1800
1801      if (SrcOp1Idx == 2) {
1802        CS << " {z}";
1803      }
1804    }
1805  }
1806
1807  CS << " = ";
1808
1809  for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1810    if (i != 0)
1811      CS << ",";
1812    if (ShuffleMask[i] == SM_SentinelZero) {
1813      CS << "zero";
1814      continue;
1815    }
1816
1817    // Otherwise, it must come from src1 or src2.  Print the span of elements
1818    // that comes from this src.
1819    bool isSrc1 = ShuffleMask[i] < (int)e;
1820    CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1821
1822    bool IsFirst = true;
1823    while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1824           (ShuffleMask[i] < (int)e) == isSrc1) {
1825      if (!IsFirst)
1826        CS << ',';
1827      else
1828        IsFirst = false;
1829      if (ShuffleMask[i] == SM_SentinelUndef)
1830        CS << "u";
1831      else
1832        CS << ShuffleMask[i] % (int)e;
1833      ++i;
1834    }
1835    CS << ']';
1836    --i; // For loop increments element #.
1837  }
1838  CS.flush();
1839
1840  return Comment;
1841}
1842
1843static void printConstant(const APInt &Val, raw_ostream &CS) {
1844  if (Val.getBitWidth() <= 64) {
1845    CS << Val.getZExtValue();
1846  } else {
1847    // print multi-word constant as (w0,w1)
1848    CS << "(";
1849    for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1850      if (i > 0)
1851        CS << ",";
1852      CS << Val.getRawData()[i];
1853    }
1854    CS << ")";
1855  }
1856}
1857
1858static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1859  SmallString<32> Str;
1860  // Force scientific notation to distinquish from integers.
1861  Flt.toString(Str, 0, 0);
1862  CS << Str;
1863}
1864
1865static void printConstant(const Constant *COp, raw_ostream &CS) {
1866  if (isa<UndefValue>(COp)) {
1867    CS << "u";
1868  } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1869    printConstant(CI->getValue(), CS);
1870  } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1871    printConstant(CF->getValueAPF(), CS);
1872  } else {
1873    CS << "?";
1874  }
1875}
1876
1877void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1878  assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1879  assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1880
1881  // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1882  if (EmitFPOData) {
1883    X86TargetStreamer *XTS =
1884        static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1885    switch (MI->getOpcode()) {
1886    case X86::SEH_PushReg:
1887      XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1888      break;
1889    case X86::SEH_StackAlloc:
1890      XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1891      break;
1892    case X86::SEH_StackAlign:
1893      XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1894      break;
1895    case X86::SEH_SetFrame:
1896      assert(MI->getOperand(1).getImm() == 0 &&
1897             ".cv_fpo_setframe takes no offset");
1898      XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1899      break;
1900    case X86::SEH_EndPrologue:
1901      XTS->emitFPOEndPrologue();
1902      break;
1903    case X86::SEH_SaveReg:
1904    case X86::SEH_SaveXMM:
1905    case X86::SEH_PushFrame:
1906      llvm_unreachable("SEH_ directive incompatible with FPO");
1907      break;
1908    default:
1909      llvm_unreachable("expected SEH_ instruction");
1910    }
1911    return;
1912  }
1913
1914  // Otherwise, use the .seh_ directives for all other Windows platforms.
1915  switch (MI->getOpcode()) {
1916  case X86::SEH_PushReg:
1917    OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
1918    break;
1919
1920  case X86::SEH_SaveReg:
1921    OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
1922                                   MI->getOperand(1).getImm());
1923    break;
1924
1925  case X86::SEH_SaveXMM:
1926    OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
1927                                   MI->getOperand(1).getImm());
1928    break;
1929
1930  case X86::SEH_StackAlloc:
1931    OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1932    break;
1933
1934  case X86::SEH_SetFrame:
1935    OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
1936                                    MI->getOperand(1).getImm());
1937    break;
1938
1939  case X86::SEH_PushFrame:
1940    OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1941    break;
1942
1943  case X86::SEH_EndPrologue:
1944    OutStreamer->EmitWinCFIEndProlog();
1945    break;
1946
1947  default:
1948    llvm_unreachable("expected SEH_ instruction");
1949  }
1950}
1951
1952static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1953  if (Info.RegClass == X86::VR128RegClassID ||
1954      Info.RegClass == X86::VR128XRegClassID)
1955    return 128;
1956  if (Info.RegClass == X86::VR256RegClassID ||
1957      Info.RegClass == X86::VR256XRegClassID)
1958    return 256;
1959  if (Info.RegClass == X86::VR512RegClassID)
1960    return 512;
1961  llvm_unreachable("Unknown register class!");
1962}
1963
1964void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
1965  X86MCInstLower MCInstLowering(*MF, *this);
1966  const X86RegisterInfo *RI =
1967      MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1968
1969  // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
1970  // are compressed from EVEX encoding to VEX encoding.
1971  if (TM.Options.MCOptions.ShowMCEncoding) {
1972    if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
1973      OutStreamer->AddComment("EVEX TO VEX Compression ", false);
1974  }
1975
1976  switch (MI->getOpcode()) {
1977  case TargetOpcode::DBG_VALUE:
1978    llvm_unreachable("Should be handled target independently");
1979
1980  // Emit nothing here but a comment if we can.
1981  case X86::Int_MemBarrier:
1982    OutStreamer->emitRawComment("MEMBARRIER");
1983    return;
1984
1985  case X86::EH_RETURN:
1986  case X86::EH_RETURN64: {
1987    // Lower these as normal, but add some comments.
1988    Register Reg = MI->getOperand(0).getReg();
1989    OutStreamer->AddComment(StringRef("eh_return, addr: %") +
1990                            X86ATTInstPrinter::getRegisterName(Reg));
1991    break;
1992  }
1993  case X86::CLEANUPRET: {
1994    // Lower these as normal, but add some comments.
1995    OutStreamer->AddComment("CLEANUPRET");
1996    break;
1997  }
1998
1999  case X86::CATCHRET: {
2000    // Lower these as normal, but add some comments.
2001    OutStreamer->AddComment("CATCHRET");
2002    break;
2003  }
2004
2005  case X86::ENDBR32:
2006  case X86::ENDBR64: {
2007    // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2008    // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2009    // non-empty. If MI is the initial ENDBR, place the
2010    // __patchable_function_entries label after ENDBR.
2011    if (CurrentPatchableFunctionEntrySym &&
2012        CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2013        MI == &MF->front().front()) {
2014      MCInst Inst;
2015      MCInstLowering.Lower(MI, Inst);
2016      EmitAndCountInstruction(Inst);
2017      CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2018      OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym);
2019      return;
2020    }
2021    break;
2022  }
2023
2024  case X86::TAILJMPr:
2025  case X86::TAILJMPm:
2026  case X86::TAILJMPd:
2027  case X86::TAILJMPd_CC:
2028  case X86::TAILJMPr64:
2029  case X86::TAILJMPm64:
2030  case X86::TAILJMPd64:
2031  case X86::TAILJMPd64_CC:
2032  case X86::TAILJMPr64_REX:
2033  case X86::TAILJMPm64_REX:
2034    // Lower these as normal, but add some comments.
2035    OutStreamer->AddComment("TAILCALL");
2036    break;
2037
2038  case X86::TLS_addr32:
2039  case X86::TLS_addr64:
2040  case X86::TLS_base_addr32:
2041  case X86::TLS_base_addr64:
2042    return LowerTlsAddr(MCInstLowering, *MI);
2043
2044  // Loading/storing mask pairs requires two kmov operations. The second one of these
2045  // needs a 2 byte displacement relative to the specified address (with 32 bit spill
2046  // size). The pairs of 1bit masks up to 16 bit masks all use the same spill size,
2047  // they all are stored using MASKPAIR16STORE, loaded using MASKPAIR16LOAD.
2048  //
2049  // The displacement value might wrap around in theory, thus the asserts in both
2050  // cases.
2051  case X86::MASKPAIR16LOAD: {
2052    int64_t Disp = MI->getOperand(1 + X86::AddrDisp).getImm();
2053    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
2054    Register Reg = MI->getOperand(0).getReg();
2055    Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
2056    Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
2057
2058    // Load the first mask register
2059    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWkm);
2060    MIB.addReg(Reg0);
2061    for (int i = 0; i < X86::AddrNumOperands; ++i) {
2062      auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
2063      MIB.addOperand(Op.getValue());
2064    }
2065    EmitAndCountInstruction(MIB);
2066
2067    // Load the second mask register of the pair
2068    MIB = MCInstBuilder(X86::KMOVWkm);
2069    MIB.addReg(Reg1);
2070    for (int i = 0; i < X86::AddrNumOperands; ++i) {
2071      if (i == X86::AddrDisp) {
2072        MIB.addImm(Disp + 2);
2073      } else {
2074        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(1 + i));
2075        MIB.addOperand(Op.getValue());
2076      }
2077    }
2078    EmitAndCountInstruction(MIB);
2079    return;
2080  }
2081
2082  case X86::MASKPAIR16STORE: {
2083    int64_t Disp = MI->getOperand(X86::AddrDisp).getImm();
2084    assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
2085    Register Reg = MI->getOperand(X86::AddrNumOperands).getReg();
2086    Register Reg0 = RI->getSubReg(Reg, X86::sub_mask_0);
2087    Register Reg1 = RI->getSubReg(Reg, X86::sub_mask_1);
2088
2089    // Store the first mask register
2090    MCInstBuilder MIB = MCInstBuilder(X86::KMOVWmk);
2091    for (int i = 0; i < X86::AddrNumOperands; ++i)
2092      MIB.addOperand(MCInstLowering.LowerMachineOperand(MI, MI->getOperand(i)).getValue());
2093    MIB.addReg(Reg0);
2094    EmitAndCountInstruction(MIB);
2095
2096    // Store the second mask register of the pair
2097    MIB = MCInstBuilder(X86::KMOVWmk);
2098    for (int i = 0; i < X86::AddrNumOperands; ++i) {
2099      if (i == X86::AddrDisp) {
2100        MIB.addImm(Disp + 2);
2101      } else {
2102        auto Op = MCInstLowering.LowerMachineOperand(MI, MI->getOperand(0 + i));
2103        MIB.addOperand(Op.getValue());
2104      }
2105    }
2106    MIB.addReg(Reg1);
2107    EmitAndCountInstruction(MIB);
2108    return;
2109  }
2110
2111  case X86::MOVPC32r: {
2112    // This is a pseudo op for a two instruction sequence with a label, which
2113    // looks like:
2114    //     call "L1$pb"
2115    // "L1$pb":
2116    //     popl %esi
2117
2118    // Emit the call.
2119    MCSymbol *PICBase = MF->getPICBaseSymbol();
2120    // FIXME: We would like an efficient form for this, so we don't have to do a
2121    // lot of extra uniquing.
2122    EmitAndCountInstruction(
2123        MCInstBuilder(X86::CALLpcrel32)
2124            .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2125
2126    const X86FrameLowering *FrameLowering =
2127        MF->getSubtarget<X86Subtarget>().getFrameLowering();
2128    bool hasFP = FrameLowering->hasFP(*MF);
2129
2130    // TODO: This is needed only if we require precise CFA.
2131    bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2132                               !OutStreamer->getDwarfFrameInfos().back().End;
2133
2134    int stackGrowth = -RI->getSlotSize();
2135
2136    if (HasActiveDwarfFrame && !hasFP) {
2137      OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
2138    }
2139
2140    // Emit the label.
2141    OutStreamer->EmitLabel(PICBase);
2142
2143    // popl $reg
2144    EmitAndCountInstruction(
2145        MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2146
2147    if (HasActiveDwarfFrame && !hasFP) {
2148      OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
2149    }
2150    return;
2151  }
2152
2153  case X86::ADD32ri: {
2154    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2155    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2156      break;
2157
2158    // Okay, we have something like:
2159    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2160
2161    // For this, we want to print something like:
2162    //   MYGLOBAL + (. - PICBASE)
2163    // However, we can't generate a ".", so just emit a new label here and refer
2164    // to it.
2165    MCSymbol *DotSym = OutContext.createTempSymbol();
2166    OutStreamer->EmitLabel(DotSym);
2167
2168    // Now that we have emitted the label, lower the complex operand expression.
2169    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2170
2171    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2172    const MCExpr *PICBase =
2173        MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2174    DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2175
2176    DotExpr = MCBinaryExpr::createAdd(
2177        MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2178
2179    EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2180                                .addReg(MI->getOperand(0).getReg())
2181                                .addReg(MI->getOperand(1).getReg())
2182                                .addExpr(DotExpr));
2183    return;
2184  }
2185  case TargetOpcode::STATEPOINT:
2186    return LowerSTATEPOINT(*MI, MCInstLowering);
2187
2188  case TargetOpcode::FAULTING_OP:
2189    return LowerFAULTING_OP(*MI, MCInstLowering);
2190
2191  case TargetOpcode::FENTRY_CALL:
2192    return LowerFENTRY_CALL(*MI, MCInstLowering);
2193
2194  case TargetOpcode::PATCHABLE_OP:
2195    return LowerPATCHABLE_OP(*MI, MCInstLowering);
2196
2197  case TargetOpcode::STACKMAP:
2198    return LowerSTACKMAP(*MI);
2199
2200  case TargetOpcode::PATCHPOINT:
2201    return LowerPATCHPOINT(*MI, MCInstLowering);
2202
2203  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2204    return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2205
2206  case TargetOpcode::PATCHABLE_RET:
2207    return LowerPATCHABLE_RET(*MI, MCInstLowering);
2208
2209  case TargetOpcode::PATCHABLE_TAIL_CALL:
2210    return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2211
2212  case TargetOpcode::PATCHABLE_EVENT_CALL:
2213    return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2214
2215  case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2216    return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2217
2218  case X86::MORESTACK_RET:
2219    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2220    return;
2221
2222  case X86::MORESTACK_RET_RESTORE_R10:
2223    // Return, then restore R10.
2224    EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2225    EmitAndCountInstruction(
2226        MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2227    return;
2228
2229  case X86::SEH_PushReg:
2230  case X86::SEH_SaveReg:
2231  case X86::SEH_SaveXMM:
2232  case X86::SEH_StackAlloc:
2233  case X86::SEH_StackAlign:
2234  case X86::SEH_SetFrame:
2235  case X86::SEH_PushFrame:
2236  case X86::SEH_EndPrologue:
2237    EmitSEHInstruction(MI);
2238    return;
2239
2240  case X86::SEH_Epilogue: {
2241    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2242    MachineBasicBlock::const_iterator MBBI(MI);
2243    // Check if preceded by a call and emit nop if so.
2244    for (MBBI = PrevCrossBBInst(MBBI);
2245         MBBI != MachineBasicBlock::const_iterator();
2246         MBBI = PrevCrossBBInst(MBBI)) {
2247      // Conservatively assume that pseudo instructions don't emit code and keep
2248      // looking for a call. We may emit an unnecessary nop in some cases.
2249      if (!MBBI->isPseudo()) {
2250        if (MBBI->isCall())
2251          EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2252        break;
2253      }
2254    }
2255    return;
2256  }
2257
2258  // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2259  // a constant shuffle mask. We won't be able to do this at the MC layer
2260  // because the mask isn't an immediate.
2261  case X86::PSHUFBrm:
2262  case X86::VPSHUFBrm:
2263  case X86::VPSHUFBYrm:
2264  case X86::VPSHUFBZ128rm:
2265  case X86::VPSHUFBZ128rmk:
2266  case X86::VPSHUFBZ128rmkz:
2267  case X86::VPSHUFBZ256rm:
2268  case X86::VPSHUFBZ256rmk:
2269  case X86::VPSHUFBZ256rmkz:
2270  case X86::VPSHUFBZrm:
2271  case X86::VPSHUFBZrmk:
2272  case X86::VPSHUFBZrmkz: {
2273    if (!OutStreamer->isVerboseAsm())
2274      break;
2275    unsigned SrcIdx, MaskIdx;
2276    switch (MI->getOpcode()) {
2277    default: llvm_unreachable("Invalid opcode");
2278    case X86::PSHUFBrm:
2279    case X86::VPSHUFBrm:
2280    case X86::VPSHUFBYrm:
2281    case X86::VPSHUFBZ128rm:
2282    case X86::VPSHUFBZ256rm:
2283    case X86::VPSHUFBZrm:
2284      SrcIdx = 1; MaskIdx = 5; break;
2285    case X86::VPSHUFBZ128rmkz:
2286    case X86::VPSHUFBZ256rmkz:
2287    case X86::VPSHUFBZrmkz:
2288      SrcIdx = 2; MaskIdx = 6; break;
2289    case X86::VPSHUFBZ128rmk:
2290    case X86::VPSHUFBZ256rmk:
2291    case X86::VPSHUFBZrmk:
2292      SrcIdx = 3; MaskIdx = 7; break;
2293    }
2294
2295    assert(MI->getNumOperands() >= 6 &&
2296           "We should always have at least 6 operands!");
2297
2298    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2299    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2300      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2301      SmallVector<int, 64> Mask;
2302      DecodePSHUFBMask(C, Width, Mask);
2303      if (!Mask.empty())
2304        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2305    }
2306    break;
2307  }
2308
2309  case X86::VPERMILPSrm:
2310  case X86::VPERMILPSYrm:
2311  case X86::VPERMILPSZ128rm:
2312  case X86::VPERMILPSZ128rmk:
2313  case X86::VPERMILPSZ128rmkz:
2314  case X86::VPERMILPSZ256rm:
2315  case X86::VPERMILPSZ256rmk:
2316  case X86::VPERMILPSZ256rmkz:
2317  case X86::VPERMILPSZrm:
2318  case X86::VPERMILPSZrmk:
2319  case X86::VPERMILPSZrmkz:
2320  case X86::VPERMILPDrm:
2321  case X86::VPERMILPDYrm:
2322  case X86::VPERMILPDZ128rm:
2323  case X86::VPERMILPDZ128rmk:
2324  case X86::VPERMILPDZ128rmkz:
2325  case X86::VPERMILPDZ256rm:
2326  case X86::VPERMILPDZ256rmk:
2327  case X86::VPERMILPDZ256rmkz:
2328  case X86::VPERMILPDZrm:
2329  case X86::VPERMILPDZrmk:
2330  case X86::VPERMILPDZrmkz: {
2331    if (!OutStreamer->isVerboseAsm())
2332      break;
2333    unsigned SrcIdx, MaskIdx;
2334    unsigned ElSize;
2335    switch (MI->getOpcode()) {
2336    default: llvm_unreachable("Invalid opcode");
2337    case X86::VPERMILPSrm:
2338    case X86::VPERMILPSYrm:
2339    case X86::VPERMILPSZ128rm:
2340    case X86::VPERMILPSZ256rm:
2341    case X86::VPERMILPSZrm:
2342      SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
2343    case X86::VPERMILPSZ128rmkz:
2344    case X86::VPERMILPSZ256rmkz:
2345    case X86::VPERMILPSZrmkz:
2346      SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
2347    case X86::VPERMILPSZ128rmk:
2348    case X86::VPERMILPSZ256rmk:
2349    case X86::VPERMILPSZrmk:
2350      SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
2351    case X86::VPERMILPDrm:
2352    case X86::VPERMILPDYrm:
2353    case X86::VPERMILPDZ128rm:
2354    case X86::VPERMILPDZ256rm:
2355    case X86::VPERMILPDZrm:
2356      SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
2357    case X86::VPERMILPDZ128rmkz:
2358    case X86::VPERMILPDZ256rmkz:
2359    case X86::VPERMILPDZrmkz:
2360      SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
2361    case X86::VPERMILPDZ128rmk:
2362    case X86::VPERMILPDZ256rmk:
2363    case X86::VPERMILPDZrmk:
2364      SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
2365    }
2366
2367    assert(MI->getNumOperands() >= 6 &&
2368           "We should always have at least 6 operands!");
2369
2370    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2371    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2372      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2373      SmallVector<int, 16> Mask;
2374      DecodeVPERMILPMask(C, ElSize, Width, Mask);
2375      if (!Mask.empty())
2376        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2377    }
2378    break;
2379  }
2380
2381  case X86::VPERMIL2PDrm:
2382  case X86::VPERMIL2PSrm:
2383  case X86::VPERMIL2PDYrm:
2384  case X86::VPERMIL2PSYrm: {
2385    if (!OutStreamer->isVerboseAsm())
2386      break;
2387    assert(MI->getNumOperands() >= 8 &&
2388           "We should always have at least 8 operands!");
2389
2390    const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2391    if (!CtrlOp.isImm())
2392      break;
2393
2394    unsigned ElSize;
2395    switch (MI->getOpcode()) {
2396    default: llvm_unreachable("Invalid opcode");
2397    case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2398    case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2399    }
2400
2401    const MachineOperand &MaskOp = MI->getOperand(6);
2402    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2403      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2404      SmallVector<int, 16> Mask;
2405      DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2406      if (!Mask.empty())
2407        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
2408    }
2409    break;
2410  }
2411
2412  case X86::VPPERMrrm: {
2413    if (!OutStreamer->isVerboseAsm())
2414      break;
2415    assert(MI->getNumOperands() >= 7 &&
2416           "We should always have at least 7 operands!");
2417
2418    const MachineOperand &MaskOp = MI->getOperand(6);
2419    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2420      unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2421      SmallVector<int, 16> Mask;
2422      DecodeVPPERMMask(C, Width, Mask);
2423      if (!Mask.empty())
2424        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
2425    }
2426    break;
2427  }
2428
2429  case X86::MMX_MOVQ64rm: {
2430    if (!OutStreamer->isVerboseAsm())
2431      break;
2432    if (MI->getNumOperands() <= 4)
2433      break;
2434    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2435      std::string Comment;
2436      raw_string_ostream CS(Comment);
2437      const MachineOperand &DstOp = MI->getOperand(0);
2438      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2439      if (auto *CF = dyn_cast<ConstantFP>(C)) {
2440        CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
2441        OutStreamer->AddComment(CS.str());
2442      }
2443    }
2444    break;
2445  }
2446
2447#define MOV_CASE(Prefix, Suffix)                                               \
2448  case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2449  case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2450  case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2451  case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2452  case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2453  case X86::Prefix##MOVDQU##Suffix##rm:
2454
2455#define MOV_AVX512_CASE(Suffix)                                                \
2456  case X86::VMOVDQA64##Suffix##rm:                                             \
2457  case X86::VMOVDQA32##Suffix##rm:                                             \
2458  case X86::VMOVDQU64##Suffix##rm:                                             \
2459  case X86::VMOVDQU32##Suffix##rm:                                             \
2460  case X86::VMOVDQU16##Suffix##rm:                                             \
2461  case X86::VMOVDQU8##Suffix##rm:                                              \
2462  case X86::VMOVAPS##Suffix##rm:                                               \
2463  case X86::VMOVAPD##Suffix##rm:                                               \
2464  case X86::VMOVUPS##Suffix##rm:                                               \
2465  case X86::VMOVUPD##Suffix##rm:
2466
2467#define CASE_ALL_MOV_RM()                                                      \
2468  MOV_CASE(, )   /* SSE */                                                     \
2469  MOV_CASE(V, )  /* AVX-128 */                                                 \
2470  MOV_CASE(V, Y) /* AVX-256 */                                                 \
2471  MOV_AVX512_CASE(Z)                                                           \
2472  MOV_AVX512_CASE(Z256)                                                        \
2473  MOV_AVX512_CASE(Z128)
2474
2475    // For loads from a constant pool to a vector register, print the constant
2476    // loaded.
2477    CASE_ALL_MOV_RM()
2478  case X86::VBROADCASTF128:
2479  case X86::VBROADCASTI128:
2480  case X86::VBROADCASTF32X4Z256rm:
2481  case X86::VBROADCASTF32X4rm:
2482  case X86::VBROADCASTF32X8rm:
2483  case X86::VBROADCASTF64X2Z128rm:
2484  case X86::VBROADCASTF64X2rm:
2485  case X86::VBROADCASTF64X4rm:
2486  case X86::VBROADCASTI32X4Z256rm:
2487  case X86::VBROADCASTI32X4rm:
2488  case X86::VBROADCASTI32X8rm:
2489  case X86::VBROADCASTI64X2Z128rm:
2490  case X86::VBROADCASTI64X2rm:
2491  case X86::VBROADCASTI64X4rm:
2492    if (!OutStreamer->isVerboseAsm())
2493      break;
2494    if (MI->getNumOperands() <= 4)
2495      break;
2496    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2497      int NumLanes = 1;
2498      // Override NumLanes for the broadcast instructions.
2499      switch (MI->getOpcode()) {
2500      case X86::VBROADCASTF128:        NumLanes = 2; break;
2501      case X86::VBROADCASTI128:        NumLanes = 2; break;
2502      case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2503      case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2504      case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2505      case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2506      case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2507      case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2508      case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2509      case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2510      case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2511      case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2512      case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2513      case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2514      }
2515
2516      std::string Comment;
2517      raw_string_ostream CS(Comment);
2518      const MachineOperand &DstOp = MI->getOperand(0);
2519      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2520      if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2521        CS << "[";
2522        for (int l = 0; l != NumLanes; ++l) {
2523          for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2524               ++i) {
2525            if (i != 0 || l != 0)
2526              CS << ",";
2527            if (CDS->getElementType()->isIntegerTy())
2528              printConstant(CDS->getElementAsAPInt(i), CS);
2529            else if (CDS->getElementType()->isHalfTy() ||
2530                     CDS->getElementType()->isFloatTy() ||
2531                     CDS->getElementType()->isDoubleTy())
2532              printConstant(CDS->getElementAsAPFloat(i), CS);
2533            else
2534              CS << "?";
2535          }
2536        }
2537        CS << "]";
2538        OutStreamer->AddComment(CS.str());
2539      } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2540        CS << "<";
2541        for (int l = 0; l != NumLanes; ++l) {
2542          for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2543               ++i) {
2544            if (i != 0 || l != 0)
2545              CS << ",";
2546            printConstant(CV->getOperand(i), CS);
2547          }
2548        }
2549        CS << ">";
2550        OutStreamer->AddComment(CS.str());
2551      }
2552    }
2553    break;
2554  case X86::MOVDDUPrm:
2555  case X86::VMOVDDUPrm:
2556  case X86::VMOVDDUPZ128rm:
2557  case X86::VBROADCASTSSrm:
2558  case X86::VBROADCASTSSYrm:
2559  case X86::VBROADCASTSSZ128m:
2560  case X86::VBROADCASTSSZ256m:
2561  case X86::VBROADCASTSSZm:
2562  case X86::VBROADCASTSDYrm:
2563  case X86::VBROADCASTSDZ256m:
2564  case X86::VBROADCASTSDZm:
2565  case X86::VPBROADCASTBrm:
2566  case X86::VPBROADCASTBYrm:
2567  case X86::VPBROADCASTBZ128m:
2568  case X86::VPBROADCASTBZ256m:
2569  case X86::VPBROADCASTBZm:
2570  case X86::VPBROADCASTDrm:
2571  case X86::VPBROADCASTDYrm:
2572  case X86::VPBROADCASTDZ128m:
2573  case X86::VPBROADCASTDZ256m:
2574  case X86::VPBROADCASTDZm:
2575  case X86::VPBROADCASTQrm:
2576  case X86::VPBROADCASTQYrm:
2577  case X86::VPBROADCASTQZ128m:
2578  case X86::VPBROADCASTQZ256m:
2579  case X86::VPBROADCASTQZm:
2580  case X86::VPBROADCASTWrm:
2581  case X86::VPBROADCASTWYrm:
2582  case X86::VPBROADCASTWZ128m:
2583  case X86::VPBROADCASTWZ256m:
2584  case X86::VPBROADCASTWZm:
2585    if (!OutStreamer->isVerboseAsm())
2586      break;
2587    if (MI->getNumOperands() <= 4)
2588      break;
2589    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2590      int NumElts;
2591      switch (MI->getOpcode()) {
2592      default: llvm_unreachable("Invalid opcode");
2593      case X86::MOVDDUPrm:         NumElts = 2;  break;
2594      case X86::VMOVDDUPrm:        NumElts = 2;  break;
2595      case X86::VMOVDDUPZ128rm:    NumElts = 2;  break;
2596      case X86::VBROADCASTSSrm:    NumElts = 4;  break;
2597      case X86::VBROADCASTSSYrm:   NumElts = 8;  break;
2598      case X86::VBROADCASTSSZ128m: NumElts = 4;  break;
2599      case X86::VBROADCASTSSZ256m: NumElts = 8;  break;
2600      case X86::VBROADCASTSSZm:    NumElts = 16; break;
2601      case X86::VBROADCASTSDYrm:   NumElts = 4;  break;
2602      case X86::VBROADCASTSDZ256m: NumElts = 4;  break;
2603      case X86::VBROADCASTSDZm:    NumElts = 8;  break;
2604      case X86::VPBROADCASTBrm:    NumElts = 16; break;
2605      case X86::VPBROADCASTBYrm:   NumElts = 32; break;
2606      case X86::VPBROADCASTBZ128m: NumElts = 16; break;
2607      case X86::VPBROADCASTBZ256m: NumElts = 32; break;
2608      case X86::VPBROADCASTBZm:    NumElts = 64; break;
2609      case X86::VPBROADCASTDrm:    NumElts = 4;  break;
2610      case X86::VPBROADCASTDYrm:   NumElts = 8;  break;
2611      case X86::VPBROADCASTDZ128m: NumElts = 4;  break;
2612      case X86::VPBROADCASTDZ256m: NumElts = 8;  break;
2613      case X86::VPBROADCASTDZm:    NumElts = 16; break;
2614      case X86::VPBROADCASTQrm:    NumElts = 2;  break;
2615      case X86::VPBROADCASTQYrm:   NumElts = 4;  break;
2616      case X86::VPBROADCASTQZ128m: NumElts = 2;  break;
2617      case X86::VPBROADCASTQZ256m: NumElts = 4;  break;
2618      case X86::VPBROADCASTQZm:    NumElts = 8;  break;
2619      case X86::VPBROADCASTWrm:    NumElts = 8;  break;
2620      case X86::VPBROADCASTWYrm:   NumElts = 16; break;
2621      case X86::VPBROADCASTWZ128m: NumElts = 8;  break;
2622      case X86::VPBROADCASTWZ256m: NumElts = 16; break;
2623      case X86::VPBROADCASTWZm:    NumElts = 32; break;
2624      }
2625
2626      std::string Comment;
2627      raw_string_ostream CS(Comment);
2628      const MachineOperand &DstOp = MI->getOperand(0);
2629      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2630      CS << "[";
2631      for (int i = 0; i != NumElts; ++i) {
2632        if (i != 0)
2633          CS << ",";
2634        printConstant(C, CS);
2635      }
2636      CS << "]";
2637      OutStreamer->AddComment(CS.str());
2638    }
2639  }
2640
2641  MCInst TmpInst;
2642  MCInstLowering.Lower(MI, TmpInst);
2643
2644  // Stackmap shadows cannot include branch targets, so we can count the bytes
2645  // in a call towards the shadow, but must ensure that the no thread returns
2646  // in to the stackmap shadow.  The only way to achieve this is if the call
2647  // is at the end of the shadow.
2648  if (MI->isCall()) {
2649    // Count then size of the call towards the shadow
2650    SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2651    // Then flush the shadow so that we fill with nops before the call, not
2652    // after it.
2653    SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2654    // Then emit the call
2655    OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo());
2656    return;
2657  }
2658
2659  EmitAndCountInstruction(TmpInst);
2660}
2661