1//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MCTargetDesc/X86BaseInfo.h"
10#include "MCTargetDesc/X86FixupKinds.h"
11#include "llvm/ADT/StringSwitch.h"
12#include "llvm/BinaryFormat/ELF.h"
13#include "llvm/BinaryFormat/MachO.h"
14#include "llvm/MC/MCAsmBackend.h"
15#include "llvm/MC/MCAsmLayout.h"
16#include "llvm/MC/MCAssembler.h"
17#include "llvm/MC/MCCodeEmitter.h"
18#include "llvm/MC/MCContext.h"
19#include "llvm/MC/MCDwarf.h"
20#include "llvm/MC/MCELFObjectWriter.h"
21#include "llvm/MC/MCExpr.h"
22#include "llvm/MC/MCFixupKindInfo.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstrInfo.h"
25#include "llvm/MC/MCMachObjectWriter.h"
26#include "llvm/MC/MCObjectStreamer.h"
27#include "llvm/MC/MCObjectWriter.h"
28#include "llvm/MC/MCRegisterInfo.h"
29#include "llvm/MC/MCSectionMachO.h"
30#include "llvm/MC/MCSubtargetInfo.h"
31#include "llvm/MC/MCValue.h"
32#include "llvm/Support/CommandLine.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/TargetRegistry.h"
35#include "llvm/Support/raw_ostream.h"
36
37using namespace llvm;
38
39namespace {
40/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41class X86AlignBranchKind {
42private:
43  uint8_t AlignBranchKind = 0;
44
45public:
46  void operator=(const std::string &Val) {
47    if (Val.empty())
48      return;
49    SmallVector<StringRef, 6> BranchTypes;
50    StringRef(Val).split(BranchTypes, '+', -1, false);
51    for (auto BranchType : BranchTypes) {
52      if (BranchType == "fused")
53        addKind(X86::AlignBranchFused);
54      else if (BranchType == "jcc")
55        addKind(X86::AlignBranchJcc);
56      else if (BranchType == "jmp")
57        addKind(X86::AlignBranchJmp);
58      else if (BranchType == "call")
59        addKind(X86::AlignBranchCall);
60      else if (BranchType == "ret")
61        addKind(X86::AlignBranchRet);
62      else if (BranchType == "indirect")
63        addKind(X86::AlignBranchIndirect);
64      else {
65        errs() << "invalid argument " << BranchType.str()
66               << " to -x86-align-branch=; each element must be one of: fused, "
67                  "jcc, jmp, call, ret, indirect.(plus separated)\n";
68      }
69    }
70  }
71
72  operator uint8_t() const { return AlignBranchKind; }
73  void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74};
75
76X86AlignBranchKind X86AlignBranchKindLoc;
77
78cl::opt<unsigned> X86AlignBranchBoundary(
79    "x86-align-branch-boundary", cl::init(0),
80    cl::desc(
81        "Control how the assembler should align branches with NOP. If the "
82        "boundary's size is not 0, it should be a power of 2 and no less "
83        "than 32. Branches will be aligned to prevent from being across or "
84        "against the boundary of specified size. The default value 0 does not "
85        "align branches."));
86
87cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88    "x86-align-branch",
89    cl::desc(
90        "Specify types of branches to align (plus separated list of types):"
91             "\njcc      indicates conditional jumps"
92             "\nfused    indicates fused conditional jumps"
93             "\njmp      indicates direct unconditional jumps"
94             "\ncall     indicates direct and indirect calls"
95             "\nret      indicates rets"
96             "\nindirect indicates indirect unconditional jumps"),
97    cl::location(X86AlignBranchKindLoc));
98
99cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100    "x86-branches-within-32B-boundaries", cl::init(false),
101    cl::desc(
102        "Align selected instructions to mitigate negative performance impact "
103        "of Intel's micro code update for errata skx102.  May break "
104        "assumptions about labels corresponding to particular instructions, "
105        "and should be used with caution."));
106
107cl::opt<unsigned> X86PadMaxPrefixSize(
108    "x86-pad-max-prefix-size", cl::init(0),
109    cl::desc("Maximum number of prefixes to use for padding"));
110
111cl::opt<bool> X86PadForAlign(
112    "x86-pad-for-align", cl::init(true), cl::Hidden,
113    cl::desc("Pad previous instructions to implement align directives"));
114
115cl::opt<bool> X86PadForBranchAlign(
116    "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
117    cl::desc("Pad previous instructions to implement branch alignment"));
118
119class X86ELFObjectWriter : public MCELFObjectTargetWriter {
120public:
121  X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
122                     bool HasRelocationAddend, bool foobar)
123    : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
124};
125
126class X86AsmBackend : public MCAsmBackend {
127  const MCSubtargetInfo &STI;
128  std::unique_ptr<const MCInstrInfo> MCII;
129  X86AlignBranchKind AlignBranchType;
130  Align AlignBoundary;
131  unsigned TargetPrefixMax = 0;
132
133  MCInst PrevInst;
134  MCBoundaryAlignFragment *PendingBA = nullptr;
135  std::pair<MCFragment *, size_t> PrevInstPosition;
136  bool CanPadInst;
137
138  uint8_t determinePaddingPrefix(const MCInst &Inst) const;
139  bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
140  bool needAlign(const MCInst &Inst) const;
141  bool canPadBranches(MCObjectStreamer &OS) const;
142  bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
143
144public:
145  X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
146      : MCAsmBackend(support::little), STI(STI),
147        MCII(T.createMCInstrInfo()) {
148    if (X86AlignBranchWithin32BBoundaries) {
149      // At the moment, this defaults to aligning fused branches, unconditional
150      // jumps, and (unfused) conditional jumps with nops.  Both the
151      // instructions aligned and the alignment method (nop vs prefix) may
152      // change in the future.
153      AlignBoundary = assumeAligned(32);;
154      AlignBranchType.addKind(X86::AlignBranchFused);
155      AlignBranchType.addKind(X86::AlignBranchJcc);
156      AlignBranchType.addKind(X86::AlignBranchJmp);
157    }
158    // Allow overriding defaults set by master flag
159    if (X86AlignBranchBoundary.getNumOccurrences())
160      AlignBoundary = assumeAligned(X86AlignBranchBoundary);
161    if (X86AlignBranch.getNumOccurrences())
162      AlignBranchType = X86AlignBranchKindLoc;
163    if (X86PadMaxPrefixSize.getNumOccurrences())
164      TargetPrefixMax = X86PadMaxPrefixSize;
165  }
166
167  bool allowAutoPadding() const override;
168  bool allowEnhancedRelaxation() const override;
169  void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
170  void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
171
172  unsigned getNumFixupKinds() const override {
173    return X86::NumTargetFixupKinds;
174  }
175
176  Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
177
178  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
179
180  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
181                             const MCValue &Target) override;
182
183  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
184                  const MCValue &Target, MutableArrayRef<char> Data,
185                  uint64_t Value, bool IsResolved,
186                  const MCSubtargetInfo *STI) const override;
187
188  bool mayNeedRelaxation(const MCInst &Inst,
189                         const MCSubtargetInfo &STI) const override;
190
191  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
192                            const MCRelaxableFragment *DF,
193                            const MCAsmLayout &Layout) const override;
194
195  void relaxInstruction(MCInst &Inst,
196                        const MCSubtargetInfo &STI) const override;
197
198  bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
199                                   MCCodeEmitter &Emitter,
200                                   unsigned &RemainingSize) const;
201
202  bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
203                               unsigned &RemainingSize) const;
204
205  bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
206                              unsigned &RemainingSize) const;
207
208  void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
209
210  bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
211};
212} // end anonymous namespace
213
214static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
215  unsigned Op = Inst.getOpcode();
216  switch (Op) {
217  default:
218    return Op;
219  case X86::JCC_1:
220    return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
221  case X86::JMP_1:
222    return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
223  }
224}
225
226static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
227  unsigned Op = Inst.getOpcode();
228  switch (Op) {
229  default:
230    return Op;
231
232    // IMUL
233  case X86::IMUL16rri8: return X86::IMUL16rri;
234  case X86::IMUL16rmi8: return X86::IMUL16rmi;
235  case X86::IMUL32rri8: return X86::IMUL32rri;
236  case X86::IMUL32rmi8: return X86::IMUL32rmi;
237  case X86::IMUL64rri8: return X86::IMUL64rri32;
238  case X86::IMUL64rmi8: return X86::IMUL64rmi32;
239
240    // AND
241  case X86::AND16ri8: return X86::AND16ri;
242  case X86::AND16mi8: return X86::AND16mi;
243  case X86::AND32ri8: return X86::AND32ri;
244  case X86::AND32mi8: return X86::AND32mi;
245  case X86::AND64ri8: return X86::AND64ri32;
246  case X86::AND64mi8: return X86::AND64mi32;
247
248    // OR
249  case X86::OR16ri8: return X86::OR16ri;
250  case X86::OR16mi8: return X86::OR16mi;
251  case X86::OR32ri8: return X86::OR32ri;
252  case X86::OR32mi8: return X86::OR32mi;
253  case X86::OR64ri8: return X86::OR64ri32;
254  case X86::OR64mi8: return X86::OR64mi32;
255
256    // XOR
257  case X86::XOR16ri8: return X86::XOR16ri;
258  case X86::XOR16mi8: return X86::XOR16mi;
259  case X86::XOR32ri8: return X86::XOR32ri;
260  case X86::XOR32mi8: return X86::XOR32mi;
261  case X86::XOR64ri8: return X86::XOR64ri32;
262  case X86::XOR64mi8: return X86::XOR64mi32;
263
264    // ADD
265  case X86::ADD16ri8: return X86::ADD16ri;
266  case X86::ADD16mi8: return X86::ADD16mi;
267  case X86::ADD32ri8: return X86::ADD32ri;
268  case X86::ADD32mi8: return X86::ADD32mi;
269  case X86::ADD64ri8: return X86::ADD64ri32;
270  case X86::ADD64mi8: return X86::ADD64mi32;
271
272   // ADC
273  case X86::ADC16ri8: return X86::ADC16ri;
274  case X86::ADC16mi8: return X86::ADC16mi;
275  case X86::ADC32ri8: return X86::ADC32ri;
276  case X86::ADC32mi8: return X86::ADC32mi;
277  case X86::ADC64ri8: return X86::ADC64ri32;
278  case X86::ADC64mi8: return X86::ADC64mi32;
279
280    // SUB
281  case X86::SUB16ri8: return X86::SUB16ri;
282  case X86::SUB16mi8: return X86::SUB16mi;
283  case X86::SUB32ri8: return X86::SUB32ri;
284  case X86::SUB32mi8: return X86::SUB32mi;
285  case X86::SUB64ri8: return X86::SUB64ri32;
286  case X86::SUB64mi8: return X86::SUB64mi32;
287
288   // SBB
289  case X86::SBB16ri8: return X86::SBB16ri;
290  case X86::SBB16mi8: return X86::SBB16mi;
291  case X86::SBB32ri8: return X86::SBB32ri;
292  case X86::SBB32mi8: return X86::SBB32mi;
293  case X86::SBB64ri8: return X86::SBB64ri32;
294  case X86::SBB64mi8: return X86::SBB64mi32;
295
296    // CMP
297  case X86::CMP16ri8: return X86::CMP16ri;
298  case X86::CMP16mi8: return X86::CMP16mi;
299  case X86::CMP32ri8: return X86::CMP32ri;
300  case X86::CMP32mi8: return X86::CMP32mi;
301  case X86::CMP64ri8: return X86::CMP64ri32;
302  case X86::CMP64mi8: return X86::CMP64mi32;
303
304    // PUSH
305  case X86::PUSH32i8:  return X86::PUSHi32;
306  case X86::PUSH16i8:  return X86::PUSHi16;
307  case X86::PUSH64i8:  return X86::PUSH64i32;
308  }
309}
310
311static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
312  unsigned R = getRelaxedOpcodeArith(Inst);
313  if (R != Inst.getOpcode())
314    return R;
315  return getRelaxedOpcodeBranch(Inst, Is16BitMode);
316}
317
318static X86::CondCode getCondFromBranch(const MCInst &MI,
319                                       const MCInstrInfo &MCII) {
320  unsigned Opcode = MI.getOpcode();
321  switch (Opcode) {
322  default:
323    return X86::COND_INVALID;
324  case X86::JCC_1: {
325    const MCInstrDesc &Desc = MCII.get(Opcode);
326    return static_cast<X86::CondCode>(
327        MI.getOperand(Desc.getNumOperands() - 1).getImm());
328  }
329  }
330}
331
332static X86::SecondMacroFusionInstKind
333classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
334  X86::CondCode CC = getCondFromBranch(MI, MCII);
335  return classifySecondCondCodeInMacroFusion(CC);
336}
337
338/// Check if the instruction uses RIP relative addressing.
339static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
340  unsigned Opcode = MI.getOpcode();
341  const MCInstrDesc &Desc = MCII.get(Opcode);
342  uint64_t TSFlags = Desc.TSFlags;
343  unsigned CurOp = X86II::getOperandBias(Desc);
344  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
345  if (MemoryOperand < 0)
346    return false;
347  unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
348  unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
349  return (BaseReg == X86::RIP);
350}
351
352/// Check if the instruction is a prefix.
353static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
354  return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
355}
356
357/// Check if the instruction is valid as the first instruction in macro fusion.
358static bool isFirstMacroFusibleInst(const MCInst &Inst,
359                                    const MCInstrInfo &MCII) {
360  // An Intel instruction with RIP relative addressing is not macro fusible.
361  if (isRIPRelative(Inst, MCII))
362    return false;
363  X86::FirstMacroFusionInstKind FIK =
364      X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
365  return FIK != X86::FirstMacroFusionInstKind::Invalid;
366}
367
368/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
369/// get a better peformance in some cases. Here, we determine which prefix is
370/// the most suitable.
371///
372/// If the instruction has a segment override prefix, use the existing one.
373/// If the target is 64-bit, use the CS.
374/// If the target is 32-bit,
375///   - If the instruction has a ESP/EBP base register, use SS.
376///   - Otherwise use DS.
377uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
378  assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
379         "Prefixes can be added only in 32-bit or 64-bit mode.");
380  const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
381  uint64_t TSFlags = Desc.TSFlags;
382
383  // Determine where the memory operand starts, if present.
384  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
385  if (MemoryOperand != -1)
386    MemoryOperand += X86II::getOperandBias(Desc);
387
388  unsigned SegmentReg = 0;
389  if (MemoryOperand >= 0) {
390    // Check for explicit segment override on memory operand.
391    SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
392  }
393
394  switch (TSFlags & X86II::FormMask) {
395  default:
396    break;
397  case X86II::RawFrmDstSrc: {
398    // Check segment override opcode prefix as needed (not for %ds).
399    if (Inst.getOperand(2).getReg() != X86::DS)
400      SegmentReg = Inst.getOperand(2).getReg();
401    break;
402  }
403  case X86II::RawFrmSrc: {
404    // Check segment override opcode prefix as needed (not for %ds).
405    if (Inst.getOperand(1).getReg() != X86::DS)
406      SegmentReg = Inst.getOperand(1).getReg();
407    break;
408  }
409  case X86II::RawFrmMemOffs: {
410    // Check segment override opcode prefix as needed.
411    SegmentReg = Inst.getOperand(1).getReg();
412    break;
413  }
414  }
415
416  if (SegmentReg != 0)
417    return X86::getSegmentOverridePrefixForReg(SegmentReg);
418
419  if (STI.hasFeature(X86::Mode64Bit))
420    return X86::CS_Encoding;
421
422  if (MemoryOperand >= 0) {
423    unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
424    unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
425    if (BaseReg == X86::ESP || BaseReg == X86::EBP)
426      return X86::SS_Encoding;
427  }
428  return X86::DS_Encoding;
429}
430
431/// Check if the two instructions will be macro-fused on the target cpu.
432bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
433  const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
434  if (!InstDesc.isConditionalBranch())
435    return false;
436  if (!isFirstMacroFusibleInst(Cmp, *MCII))
437    return false;
438  const X86::FirstMacroFusionInstKind CmpKind =
439      X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
440  const X86::SecondMacroFusionInstKind BranchKind =
441      classifySecondInstInMacroFusion(Jcc, *MCII);
442  return X86::isMacroFused(CmpKind, BranchKind);
443}
444
445/// Check if the instruction has a variant symbol operand.
446static bool hasVariantSymbol(const MCInst &MI) {
447  for (auto &Operand : MI) {
448    if (!Operand.isExpr())
449      continue;
450    const MCExpr &Expr = *Operand.getExpr();
451    if (Expr.getKind() == MCExpr::SymbolRef &&
452        cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
453      return true;
454  }
455  return false;
456}
457
458bool X86AsmBackend::allowAutoPadding() const {
459  return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
460}
461
462bool X86AsmBackend::allowEnhancedRelaxation() const {
463  return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
464}
465
466/// X86 has certain instructions which enable interrupts exactly one
467/// instruction *after* the instruction which stores to SS.  Return true if the
468/// given instruction has such an interrupt delay slot.
469static bool hasInterruptDelaySlot(const MCInst &Inst) {
470  switch (Inst.getOpcode()) {
471  case X86::POPSS16:
472  case X86::POPSS32:
473  case X86::STI:
474    return true;
475
476  case X86::MOV16sr:
477  case X86::MOV32sr:
478  case X86::MOV64sr:
479  case X86::MOV16sm:
480    if (Inst.getOperand(0).getReg() == X86::SS)
481      return true;
482    break;
483  }
484  return false;
485}
486
487/// Check if the instruction to be emitted is right after any data.
488static bool
489isRightAfterData(MCFragment *CurrentFragment,
490                 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
491  MCFragment *F = CurrentFragment;
492  // Empty data fragments may be created to prevent further data being
493  // added into the previous fragment, we need to skip them since they
494  // have no contents.
495  for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
496    if (cast<MCDataFragment>(F)->getContents().size() != 0)
497      break;
498
499  // Since data is always emitted into a DataFragment, our check strategy is
500  // simple here.
501  //   - If the fragment is a DataFragment
502  //     - If it's not the fragment where the previous instruction is,
503  //       returns true.
504  //     - If it's the fragment holding the previous instruction but its
505  //       size changed since the the previous instruction was emitted into
506  //       it, returns true.
507  //     - Otherwise returns false.
508  //   - If the fragment is not a DataFragment, returns false.
509  if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
510    return DF != PrevInstPosition.first ||
511           DF->getContents().size() != PrevInstPosition.second;
512
513  return false;
514}
515
516/// \returns the fragment size if it has instructions, otherwise returns 0.
517static size_t getSizeForInstFragment(const MCFragment *F) {
518  if (!F || !F->hasInstructions())
519    return 0;
520  // MCEncodedFragmentWithContents being templated makes this tricky.
521  switch (F->getKind()) {
522  default:
523    llvm_unreachable("Unknown fragment with instructions!");
524  case MCFragment::FT_Data:
525    return cast<MCDataFragment>(*F).getContents().size();
526  case MCFragment::FT_Relaxable:
527    return cast<MCRelaxableFragment>(*F).getContents().size();
528  case MCFragment::FT_CompactEncodedInst:
529    return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
530  }
531}
532
533/// Return true if we can insert NOP or prefixes automatically before the
534/// the instruction to be emitted.
535bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
536  if (hasVariantSymbol(Inst))
537    // Linker may rewrite the instruction with variant symbol operand(e.g.
538    // TLSCALL).
539    return false;
540
541  if (hasInterruptDelaySlot(PrevInst))
542    // If this instruction follows an interrupt enabling instruction with a one
543    // instruction delay, inserting a nop would change behavior.
544    return false;
545
546  if (isPrefix(PrevInst, *MCII))
547    // If this instruction follows a prefix, inserting a nop/prefix would change
548    // semantic.
549    return false;
550
551  if (isPrefix(Inst, *MCII))
552    // If this instruction is a prefix, inserting a prefix would change
553    // semantic.
554    return false;
555
556  if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
557    // If this instruction follows any data, there is no clear
558    // instruction boundary, inserting a nop/prefix would change semantic.
559    return false;
560
561  return true;
562}
563
564bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
565  if (!OS.getAllowAutoPadding())
566    return false;
567  assert(allowAutoPadding() && "incorrect initialization!");
568
569  // We only pad in text section.
570  if (!OS.getCurrentSectionOnly()->getKind().isText())
571    return false;
572
573  // To be Done: Currently don't deal with Bundle cases.
574  if (OS.getAssembler().isBundlingEnabled())
575    return false;
576
577  // Branches only need to be aligned in 32-bit or 64-bit mode.
578  if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
579    return false;
580
581  return true;
582}
583
584/// Check if the instruction operand needs to be aligned.
585bool X86AsmBackend::needAlign(const MCInst &Inst) const {
586  const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
587  return (Desc.isConditionalBranch() &&
588          (AlignBranchType & X86::AlignBranchJcc)) ||
589         (Desc.isUnconditionalBranch() &&
590          (AlignBranchType & X86::AlignBranchJmp)) ||
591         (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
592         (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
593         (Desc.isIndirectBranch() &&
594          (AlignBranchType & X86::AlignBranchIndirect));
595}
596
597/// Insert BoundaryAlignFragment before instructions to align branches.
598void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
599                                         const MCInst &Inst) {
600  CanPadInst = canPadInst(Inst, OS);
601
602  if (!canPadBranches(OS))
603    return;
604
605  if (!isMacroFused(PrevInst, Inst))
606    // Macro fusion doesn't happen indeed, clear the pending.
607    PendingBA = nullptr;
608
609  if (!CanPadInst)
610    return;
611
612  if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
613    // Macro fusion actually happens and there is no other fragment inserted
614    // after the previous instruction.
615    //
616    // Do nothing here since we already inserted a BoudaryAlign fragment when
617    // we met the first instruction in the fused pair and we'll tie them
618    // together in emitInstructionEnd.
619    //
620    // Note: When there is at least one fragment, such as MCAlignFragment,
621    // inserted after the previous instruction, e.g.
622    //
623    // \code
624    //   cmp %rax %rcx
625    //   .align 16
626    //   je .Label0
627    // \ endcode
628    //
629    // We will treat the JCC as a unfused branch although it may be fused
630    // with the CMP.
631    return;
632  }
633
634  if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
635                          isFirstMacroFusibleInst(Inst, *MCII))) {
636    // If we meet a unfused branch or the first instuction in a fusiable pair,
637    // insert a BoundaryAlign fragment.
638    OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
639  }
640}
641
642/// Set the last fragment to be aligned for the BoundaryAlignFragment.
643void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
644  PrevInst = Inst;
645  MCFragment *CF = OS.getCurrentFragment();
646  PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
647  if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
648    F->setAllowAutoPadding(CanPadInst);
649
650  if (!canPadBranches(OS))
651    return;
652
653  if (!needAlign(Inst) || !PendingBA)
654    return;
655
656  // Tie the aligned instructions into a a pending BoundaryAlign.
657  PendingBA->setLastFragment(CF);
658  PendingBA = nullptr;
659
660  // We need to ensure that further data isn't added to the current
661  // DataFragment, so that we can get the size of instructions later in
662  // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
663  // DataFragment.
664  if (isa_and_nonnull<MCDataFragment>(CF))
665    OS.insert(new MCDataFragment());
666
667  // Update the maximum alignment on the current section if necessary.
668  MCSection *Sec = OS.getCurrentSectionOnly();
669  if (AlignBoundary.value() > Sec->getAlignment())
670    Sec->setAlignment(AlignBoundary);
671}
672
673Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
674  if (STI.getTargetTriple().isOSBinFormatELF()) {
675    unsigned Type;
676    if (STI.getTargetTriple().getArch() == Triple::x86_64) {
677      Type = llvm::StringSwitch<unsigned>(Name)
678#define ELF_RELOC(X, Y) .Case(#X, Y)
679#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
680#undef ELF_RELOC
681                 .Default(-1u);
682    } else {
683      Type = llvm::StringSwitch<unsigned>(Name)
684#define ELF_RELOC(X, Y) .Case(#X, Y)
685#include "llvm/BinaryFormat/ELFRelocs/i386.def"
686#undef ELF_RELOC
687                 .Default(-1u);
688    }
689    if (Type == -1u)
690      return None;
691    return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
692  }
693  return MCAsmBackend::getFixupKind(Name);
694}
695
696const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
697  const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
698      {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
699      {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
700      {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
701      {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
702      {"reloc_signed_4byte", 0, 32, 0},
703      {"reloc_signed_4byte_relax", 0, 32, 0},
704      {"reloc_global_offset_table", 0, 32, 0},
705      {"reloc_global_offset_table8", 0, 64, 0},
706      {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
707  };
708
709  // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
710  // do not require any extra processing.
711  if (Kind >= FirstLiteralRelocationKind)
712    return MCAsmBackend::getFixupKindInfo(FK_NONE);
713
714  if (Kind < FirstTargetFixupKind)
715    return MCAsmBackend::getFixupKindInfo(Kind);
716
717  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
718         "Invalid kind!");
719  assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
720  return Infos[Kind - FirstTargetFixupKind];
721}
722
723bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
724                                          const MCFixup &Fixup,
725                                          const MCValue &) {
726  return Fixup.getKind() >= FirstLiteralRelocationKind;
727}
728
729static unsigned getFixupKindSize(unsigned Kind) {
730  switch (Kind) {
731  default:
732    llvm_unreachable("invalid fixup kind!");
733  case FK_NONE:
734    return 0;
735  case FK_PCRel_1:
736  case FK_SecRel_1:
737  case FK_Data_1:
738    return 1;
739  case FK_PCRel_2:
740  case FK_SecRel_2:
741  case FK_Data_2:
742    return 2;
743  case FK_PCRel_4:
744  case X86::reloc_riprel_4byte:
745  case X86::reloc_riprel_4byte_relax:
746  case X86::reloc_riprel_4byte_relax_rex:
747  case X86::reloc_riprel_4byte_movq_load:
748  case X86::reloc_signed_4byte:
749  case X86::reloc_signed_4byte_relax:
750  case X86::reloc_global_offset_table:
751  case X86::reloc_branch_4byte_pcrel:
752  case FK_SecRel_4:
753  case FK_Data_4:
754    return 4;
755  case FK_PCRel_8:
756  case FK_SecRel_8:
757  case FK_Data_8:
758  case X86::reloc_global_offset_table8:
759    return 8;
760  }
761}
762
763void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
764                               const MCValue &Target,
765                               MutableArrayRef<char> Data,
766                               uint64_t Value, bool IsResolved,
767                               const MCSubtargetInfo *STI) const {
768  unsigned Kind = Fixup.getKind();
769  if (Kind >= FirstLiteralRelocationKind)
770    return;
771  unsigned Size = getFixupKindSize(Kind);
772
773  assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
774
775  int64_t SignedValue = static_cast<int64_t>(Value);
776  if ((Target.isAbsolute() || IsResolved) &&
777      getFixupKindInfo(Fixup.getKind()).Flags &
778      MCFixupKindInfo::FKF_IsPCRel) {
779    // check that PC relative fixup fits into the fixup size.
780    if (Size > 0 && !isIntN(Size * 8, SignedValue))
781      Asm.getContext().reportError(
782                                   Fixup.getLoc(), "value of " + Twine(SignedValue) +
783                                   " is too large for field of " + Twine(Size) +
784                                   ((Size == 1) ? " byte." : " bytes."));
785  } else {
786    // Check that uppper bits are either all zeros or all ones.
787    // Specifically ignore overflow/underflow as long as the leakage is
788    // limited to the lower bits. This is to remain compatible with
789    // other assemblers.
790    assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
791           "Value does not fit in the Fixup field");
792  }
793
794  for (unsigned i = 0; i != Size; ++i)
795    Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
796}
797
798bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
799                                      const MCSubtargetInfo &STI) const {
800  // Branches can always be relaxed in either mode.
801  if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
802    return true;
803
804  // Check if this instruction is ever relaxable.
805  if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
806    return false;
807
808
809  // Check if the relaxable operand has an expression. For the current set of
810  // relaxable instructions, the relaxable operand is always the last operand.
811  unsigned RelaxableOp = Inst.getNumOperands() - 1;
812  if (Inst.getOperand(RelaxableOp).isExpr())
813    return true;
814
815  return false;
816}
817
818bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
819                                         uint64_t Value,
820                                         const MCRelaxableFragment *DF,
821                                         const MCAsmLayout &Layout) const {
822  // Relax if the value is too big for a (signed) i8.
823  return !isInt<8>(Value);
824}
825
826// FIXME: Can tblgen help at all here to verify there aren't other instructions
827// we can relax?
828void X86AsmBackend::relaxInstruction(MCInst &Inst,
829                                     const MCSubtargetInfo &STI) const {
830  // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
831  bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
832  unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
833
834  if (RelaxedOp == Inst.getOpcode()) {
835    SmallString<256> Tmp;
836    raw_svector_ostream OS(Tmp);
837    Inst.dump_pretty(OS);
838    OS << "\n";
839    report_fatal_error("unexpected instruction to relax: " + OS.str());
840  }
841
842  Inst.setOpcode(RelaxedOp);
843}
844
845/// Return true if this instruction has been fully relaxed into it's most
846/// general available form.
847static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
848  auto &Inst = RF.getInst();
849  auto &STI = *RF.getSubtargetInfo();
850  bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
851  return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
852}
853
854bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
855                                            MCCodeEmitter &Emitter,
856                                            unsigned &RemainingSize) const {
857  if (!RF.getAllowAutoPadding())
858    return false;
859  // If the instruction isn't fully relaxed, shifting it around might require a
860  // larger value for one of the fixups then can be encoded.  The outer loop
861  // will also catch this before moving to the next instruction, but we need to
862  // prevent padding this single instruction as well.
863  if (!isFullyRelaxed(RF))
864    return false;
865
866  const unsigned OldSize = RF.getContents().size();
867  if (OldSize == 15)
868    return false;
869
870  const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
871  const unsigned RemainingPrefixSize = [&]() -> unsigned {
872    SmallString<15> Code;
873    raw_svector_ostream VecOS(Code);
874    Emitter.emitPrefix(RF.getInst(), VecOS, STI);
875    assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
876
877    // TODO: It turns out we need a decent amount of plumbing for the target
878    // specific bits to determine number of prefixes its safe to add.  Various
879    // targets (older chips mostly, but also Atom family) encounter decoder
880    // stalls with too many prefixes.  For testing purposes, we set the value
881    // externally for the moment.
882    unsigned ExistingPrefixSize = Code.size();
883    if (TargetPrefixMax <= ExistingPrefixSize)
884      return 0;
885    return TargetPrefixMax - ExistingPrefixSize;
886  }();
887  const unsigned PrefixBytesToAdd =
888      std::min(MaxPossiblePad, RemainingPrefixSize);
889  if (PrefixBytesToAdd == 0)
890    return false;
891
892  const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
893
894  SmallString<256> Code;
895  Code.append(PrefixBytesToAdd, Prefix);
896  Code.append(RF.getContents().begin(), RF.getContents().end());
897  RF.getContents() = Code;
898
899  // Adjust the fixups for the change in offsets
900  for (auto &F : RF.getFixups()) {
901    F.setOffset(F.getOffset() + PrefixBytesToAdd);
902  }
903
904  RemainingSize -= PrefixBytesToAdd;
905  return true;
906}
907
908bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
909                                                MCCodeEmitter &Emitter,
910                                                unsigned &RemainingSize) const {
911  if (isFullyRelaxed(RF))
912    // TODO: There are lots of other tricks we could apply for increasing
913    // encoding size without impacting performance.
914    return false;
915
916  MCInst Relaxed = RF.getInst();
917  relaxInstruction(Relaxed, *RF.getSubtargetInfo());
918
919  SmallVector<MCFixup, 4> Fixups;
920  SmallString<15> Code;
921  raw_svector_ostream VecOS(Code);
922  Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
923  const unsigned OldSize = RF.getContents().size();
924  const unsigned NewSize = Code.size();
925  assert(NewSize >= OldSize && "size decrease during relaxation?");
926  unsigned Delta = NewSize - OldSize;
927  if (Delta > RemainingSize)
928    return false;
929  RF.setInst(Relaxed);
930  RF.getContents() = Code;
931  RF.getFixups() = Fixups;
932  RemainingSize -= Delta;
933  return true;
934}
935
936bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
937                                           MCCodeEmitter &Emitter,
938                                           unsigned &RemainingSize) const {
939  bool Changed = false;
940  if (RemainingSize != 0)
941    Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
942  if (RemainingSize != 0)
943    Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
944  return Changed;
945}
946
947void X86AsmBackend::finishLayout(MCAssembler const &Asm,
948                                 MCAsmLayout &Layout) const {
949  // See if we can further relax some instructions to cut down on the number of
950  // nop bytes required for code alignment.  The actual win is in reducing
951  // instruction count, not number of bytes.  Modern X86-64 can easily end up
952  // decode limited.  It is often better to reduce the number of instructions
953  // (i.e. eliminate nops) even at the cost of increasing the size and
954  // complexity of others.
955  if (!X86PadForAlign && !X86PadForBranchAlign)
956    return;
957
958  DenseSet<MCFragment *> LabeledFragments;
959  for (const MCSymbol &S : Asm.symbols())
960    LabeledFragments.insert(S.getFragment(false));
961
962  for (MCSection &Sec : Asm) {
963    if (!Sec.getKind().isText())
964      continue;
965
966    SmallVector<MCRelaxableFragment *, 4> Relaxable;
967    for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
968      MCFragment &F = *I;
969
970      if (LabeledFragments.count(&F))
971        Relaxable.clear();
972
973      if (F.getKind() == MCFragment::FT_Data ||
974          F.getKind() == MCFragment::FT_CompactEncodedInst)
975        // Skip and ignore
976        continue;
977
978      if (F.getKind() == MCFragment::FT_Relaxable) {
979        auto &RF = cast<MCRelaxableFragment>(*I);
980        Relaxable.push_back(&RF);
981        continue;
982      }
983
984      auto canHandle = [](MCFragment &F) -> bool {
985        switch (F.getKind()) {
986        default:
987          return false;
988        case MCFragment::FT_Align:
989          return X86PadForAlign;
990        case MCFragment::FT_BoundaryAlign:
991          return X86PadForBranchAlign;
992        }
993      };
994      // For any unhandled kind, assume we can't change layout.
995      if (!canHandle(F)) {
996        Relaxable.clear();
997        continue;
998      }
999
1000#ifndef NDEBUG
1001      const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1002#endif
1003      const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1004
1005      // To keep the effects local, prefer to relax instructions closest to
1006      // the align directive.  This is purely about human understandability
1007      // of the resulting code.  If we later find a reason to expand
1008      // particular instructions over others, we can adjust.
1009      MCFragment *FirstChangedFragment = nullptr;
1010      unsigned RemainingSize = OrigSize;
1011      while (!Relaxable.empty() && RemainingSize != 0) {
1012        auto &RF = *Relaxable.pop_back_val();
1013        // Give the backend a chance to play any tricks it wishes to increase
1014        // the encoding size of the given instruction.  Target independent code
1015        // will try further relaxation, but target's may play further tricks.
1016        if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1017          FirstChangedFragment = &RF;
1018
1019        // If we have an instruction which hasn't been fully relaxed, we can't
1020        // skip past it and insert bytes before it.  Changing its starting
1021        // offset might require a larger negative offset than it can encode.
1022        // We don't need to worry about larger positive offsets as none of the
1023        // possible offsets between this and our align are visible, and the
1024        // ones afterwards aren't changing.
1025        if (!isFullyRelaxed(RF))
1026          break;
1027      }
1028      Relaxable.clear();
1029
1030      if (FirstChangedFragment) {
1031        // Make sure the offsets for any fragments in the effected range get
1032        // updated.  Note that this (conservatively) invalidates the offsets of
1033        // those following, but this is not required.
1034        Layout.invalidateFragmentsFrom(FirstChangedFragment);
1035      }
1036
1037      // BoundaryAlign explicitly tracks it's size (unlike align)
1038      if (F.getKind() == MCFragment::FT_BoundaryAlign)
1039        cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1040
1041#ifndef NDEBUG
1042      const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1043      const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1044      assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1045             "can't move start of next fragment!");
1046      assert(FinalSize == RemainingSize && "inconsistent size computation?");
1047#endif
1048
1049      // If we're looking at a boundary align, make sure we don't try to pad
1050      // its target instructions for some following directive.  Doing so would
1051      // break the alignment of the current boundary align.
1052      if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1053        const MCFragment *LastFragment = BF->getLastFragment();
1054        if (!LastFragment)
1055          continue;
1056        while (&*I != LastFragment)
1057          ++I;
1058      }
1059    }
1060  }
1061
1062  // The layout is done. Mark every fragment as valid.
1063  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1064    MCSection &Section = *Layout.getSectionOrder()[i];
1065    Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1066    Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1067  }
1068}
1069
1070/// Write a sequence of optimal nops to the output, covering \p Count
1071/// bytes.
1072/// \return - true on success, false on failure
1073bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
1074  static const char Nops[10][11] = {
1075    // nop
1076    "\x90",
1077    // xchg %ax,%ax
1078    "\x66\x90",
1079    // nopl (%[re]ax)
1080    "\x0f\x1f\x00",
1081    // nopl 0(%[re]ax)
1082    "\x0f\x1f\x40\x00",
1083    // nopl 0(%[re]ax,%[re]ax,1)
1084    "\x0f\x1f\x44\x00\x00",
1085    // nopw 0(%[re]ax,%[re]ax,1)
1086    "\x66\x0f\x1f\x44\x00\x00",
1087    // nopl 0L(%[re]ax)
1088    "\x0f\x1f\x80\x00\x00\x00\x00",
1089    // nopl 0L(%[re]ax,%[re]ax,1)
1090    "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1091    // nopw 0L(%[re]ax,%[re]ax,1)
1092    "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1093    // nopw %cs:0L(%[re]ax,%[re]ax,1)
1094    "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1095  };
1096
1097  // This CPU doesn't support long nops. If needed add more.
1098  // FIXME: We could generated something better than plain 0x90.
1099  if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
1100    for (uint64_t i = 0; i < Count; ++i)
1101      OS << '\x90';
1102    return true;
1103  }
1104
1105  // 15-bytes is the longest single NOP instruction, but 10-bytes is
1106  // commonly the longest that can be efficiently decoded.
1107  uint64_t MaxNopLength = 10;
1108  if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
1109    MaxNopLength = 7;
1110  else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1111    MaxNopLength = 15;
1112  else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1113    MaxNopLength = 11;
1114
1115  // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1116  // length.
1117  do {
1118    const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1119    const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1120    for (uint8_t i = 0; i < Prefixes; i++)
1121      OS << '\x66';
1122    const uint8_t Rest = ThisNopLength - Prefixes;
1123    if (Rest != 0)
1124      OS.write(Nops[Rest - 1], Rest);
1125    Count -= ThisNopLength;
1126  } while (Count != 0);
1127
1128  return true;
1129}
1130
1131/* *** */
1132
1133namespace {
1134
1135class ELFX86AsmBackend : public X86AsmBackend {
1136public:
1137  uint8_t OSABI;
1138  ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1139      : X86AsmBackend(T, STI), OSABI(OSABI) {}
1140};
1141
1142class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1143public:
1144  ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1145                      const MCSubtargetInfo &STI)
1146    : ELFX86AsmBackend(T, OSABI, STI) {}
1147
1148  std::unique_ptr<MCObjectTargetWriter>
1149  createObjectTargetWriter() const override {
1150    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1151  }
1152};
1153
1154class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1155public:
1156  ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1157                       const MCSubtargetInfo &STI)
1158      : ELFX86AsmBackend(T, OSABI, STI) {}
1159
1160  std::unique_ptr<MCObjectTargetWriter>
1161  createObjectTargetWriter() const override {
1162    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1163                                    ELF::EM_X86_64);
1164  }
1165};
1166
1167class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1168public:
1169  ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1170                         const MCSubtargetInfo &STI)
1171      : ELFX86AsmBackend(T, OSABI, STI) {}
1172
1173  std::unique_ptr<MCObjectTargetWriter>
1174  createObjectTargetWriter() const override {
1175    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1176                                    ELF::EM_IAMCU);
1177  }
1178};
1179
1180class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1181public:
1182  ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1183                      const MCSubtargetInfo &STI)
1184    : ELFX86AsmBackend(T, OSABI, STI) {}
1185
1186  std::unique_ptr<MCObjectTargetWriter>
1187  createObjectTargetWriter() const override {
1188    return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1189  }
1190};
1191
1192class WindowsX86AsmBackend : public X86AsmBackend {
1193  bool Is64Bit;
1194
1195public:
1196  WindowsX86AsmBackend(const Target &T, bool is64Bit,
1197                       const MCSubtargetInfo &STI)
1198    : X86AsmBackend(T, STI)
1199    , Is64Bit(is64Bit) {
1200  }
1201
1202  Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1203    return StringSwitch<Optional<MCFixupKind>>(Name)
1204        .Case("dir32", FK_Data_4)
1205        .Case("secrel32", FK_SecRel_4)
1206        .Case("secidx", FK_SecRel_2)
1207        .Default(MCAsmBackend::getFixupKind(Name));
1208  }
1209
1210  std::unique_ptr<MCObjectTargetWriter>
1211  createObjectTargetWriter() const override {
1212    return createX86WinCOFFObjectWriter(Is64Bit);
1213  }
1214};
1215
1216namespace CU {
1217
1218  /// Compact unwind encoding values.
1219  enum CompactUnwindEncodings {
1220    /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1221    /// the return address, then [RE]SP is moved to [RE]BP.
1222    UNWIND_MODE_BP_FRAME                   = 0x01000000,
1223
1224    /// A frameless function with a small constant stack size.
1225    UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1226
1227    /// A frameless function with a large constant stack size.
1228    UNWIND_MODE_STACK_IND                  = 0x03000000,
1229
1230    /// No compact unwind encoding is available.
1231    UNWIND_MODE_DWARF                      = 0x04000000,
1232
1233    /// Mask for encoding the frame registers.
1234    UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1235
1236    /// Mask for encoding the frameless registers.
1237    UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1238  };
1239
1240} // end CU namespace
1241
1242class DarwinX86AsmBackend : public X86AsmBackend {
1243  const MCRegisterInfo &MRI;
1244
1245  /// Number of registers that can be saved in a compact unwind encoding.
1246  enum { CU_NUM_SAVED_REGS = 6 };
1247
1248  mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1249  Triple TT;
1250  bool Is64Bit;
1251
1252  unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1253  unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1254  unsigned StackDivide;                  ///< Amount to adjust stack size by.
1255protected:
1256  /// Size of a "push" instruction for the given register.
1257  unsigned PushInstrSize(unsigned Reg) const {
1258    switch (Reg) {
1259      case X86::EBX:
1260      case X86::ECX:
1261      case X86::EDX:
1262      case X86::EDI:
1263      case X86::ESI:
1264      case X86::EBP:
1265      case X86::RBX:
1266      case X86::RBP:
1267        return 1;
1268      case X86::R12:
1269      case X86::R13:
1270      case X86::R14:
1271      case X86::R15:
1272        return 2;
1273    }
1274    return 1;
1275  }
1276
1277private:
1278  /// Get the compact unwind number for a given register. The number
1279  /// corresponds to the enum lists in compact_unwind_encoding.h.
1280  int getCompactUnwindRegNum(unsigned Reg) const {
1281    static const MCPhysReg CU32BitRegs[7] = {
1282      X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1283    };
1284    static const MCPhysReg CU64BitRegs[] = {
1285      X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1286    };
1287    const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1288    for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1289      if (*CURegs == Reg)
1290        return Idx;
1291
1292    return -1;
1293  }
1294
1295  /// Return the registers encoded for a compact encoding with a frame
1296  /// pointer.
1297  uint32_t encodeCompactUnwindRegistersWithFrame() const {
1298    // Encode the registers in the order they were saved --- 3-bits per
1299    // register. The list of saved registers is assumed to be in reverse
1300    // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1301    uint32_t RegEnc = 0;
1302    for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1303      unsigned Reg = SavedRegs[i];
1304      if (Reg == 0) break;
1305
1306      int CURegNum = getCompactUnwindRegNum(Reg);
1307      if (CURegNum == -1) return ~0U;
1308
1309      // Encode the 3-bit register number in order, skipping over 3-bits for
1310      // each register.
1311      RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1312    }
1313
1314    assert((RegEnc & 0x3FFFF) == RegEnc &&
1315           "Invalid compact register encoding!");
1316    return RegEnc;
1317  }
1318
1319  /// Create the permutation encoding used with frameless stacks. It is
1320  /// passed the number of registers to be saved and an array of the registers
1321  /// saved.
1322  uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1323    // The saved registers are numbered from 1 to 6. In order to encode the
1324    // order in which they were saved, we re-number them according to their
1325    // place in the register order. The re-numbering is relative to the last
1326    // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1327    // that order:
1328    //
1329    //    Orig  Re-Num
1330    //    ----  ------
1331    //     6       6
1332    //     2       2
1333    //     4       3
1334    //     5       3
1335    //
1336    for (unsigned i = 0; i < RegCount; ++i) {
1337      int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1338      if (CUReg == -1) return ~0U;
1339      SavedRegs[i] = CUReg;
1340    }
1341
1342    // Reverse the list.
1343    std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1344
1345    uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1346    for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1347      unsigned Countless = 0;
1348      for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1349        if (SavedRegs[j] < SavedRegs[i])
1350          ++Countless;
1351
1352      RenumRegs[i] = SavedRegs[i] - Countless - 1;
1353    }
1354
1355    // Take the renumbered values and encode them into a 10-bit number.
1356    uint32_t permutationEncoding = 0;
1357    switch (RegCount) {
1358    case 6:
1359      permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1360                             + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1361                             +     RenumRegs[4];
1362      break;
1363    case 5:
1364      permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1365                             + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1366                             +     RenumRegs[5];
1367      break;
1368    case 4:
1369      permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1370                             + 3 * RenumRegs[4] +      RenumRegs[5];
1371      break;
1372    case 3:
1373      permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1374                             +     RenumRegs[5];
1375      break;
1376    case 2:
1377      permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1378      break;
1379    case 1:
1380      permutationEncoding |=       RenumRegs[5];
1381      break;
1382    }
1383
1384    assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1385           "Invalid compact register encoding!");
1386    return permutationEncoding;
1387  }
1388
1389public:
1390  DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1391                      const MCSubtargetInfo &STI)
1392      : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1393        Is64Bit(TT.isArch64Bit()) {
1394    memset(SavedRegs, 0, sizeof(SavedRegs));
1395    OffsetSize = Is64Bit ? 8 : 4;
1396    MoveInstrSize = Is64Bit ? 3 : 2;
1397    StackDivide = Is64Bit ? 8 : 4;
1398  }
1399
1400  std::unique_ptr<MCObjectTargetWriter>
1401  createObjectTargetWriter() const override {
1402    uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1403    uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1404    return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1405  }
1406
1407  /// Implementation of algorithm to generate the compact unwind encoding
1408  /// for the CFI instructions.
1409  uint32_t
1410  generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1411    if (Instrs.empty()) return 0;
1412
1413    // Reset the saved registers.
1414    unsigned SavedRegIdx = 0;
1415    memset(SavedRegs, 0, sizeof(SavedRegs));
1416
1417    bool HasFP = false;
1418
1419    // Encode that we are using EBP/RBP as the frame pointer.
1420    uint32_t CompactUnwindEncoding = 0;
1421
1422    unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1423    unsigned InstrOffset = 0;
1424    unsigned StackAdjust = 0;
1425    unsigned StackSize = 0;
1426    unsigned NumDefCFAOffsets = 0;
1427
1428    for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
1429      const MCCFIInstruction &Inst = Instrs[i];
1430
1431      switch (Inst.getOperation()) {
1432      default:
1433        // Any other CFI directives indicate a frame that we aren't prepared
1434        // to represent via compact unwind, so just bail out.
1435        return 0;
1436      case MCCFIInstruction::OpDefCfaRegister: {
1437        // Defines a frame pointer. E.g.
1438        //
1439        //     movq %rsp, %rbp
1440        //  L0:
1441        //     .cfi_def_cfa_register %rbp
1442        //
1443        HasFP = true;
1444
1445        // If the frame pointer is other than esp/rsp, we do not have a way to
1446        // generate a compact unwinding representation, so bail out.
1447        if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1448            (Is64Bit ? X86::RBP : X86::EBP))
1449          return 0;
1450
1451        // Reset the counts.
1452        memset(SavedRegs, 0, sizeof(SavedRegs));
1453        StackAdjust = 0;
1454        SavedRegIdx = 0;
1455        InstrOffset += MoveInstrSize;
1456        break;
1457      }
1458      case MCCFIInstruction::OpDefCfaOffset: {
1459        // Defines a new offset for the CFA. E.g.
1460        //
1461        //  With frame:
1462        //
1463        //     pushq %rbp
1464        //  L0:
1465        //     .cfi_def_cfa_offset 16
1466        //
1467        //  Without frame:
1468        //
1469        //     subq $72, %rsp
1470        //  L0:
1471        //     .cfi_def_cfa_offset 80
1472        //
1473        StackSize = Inst.getOffset() / StackDivide;
1474        ++NumDefCFAOffsets;
1475        break;
1476      }
1477      case MCCFIInstruction::OpOffset: {
1478        // Defines a "push" of a callee-saved register. E.g.
1479        //
1480        //     pushq %r15
1481        //     pushq %r14
1482        //     pushq %rbx
1483        //  L0:
1484        //     subq $120, %rsp
1485        //  L1:
1486        //     .cfi_offset %rbx, -40
1487        //     .cfi_offset %r14, -32
1488        //     .cfi_offset %r15, -24
1489        //
1490        if (SavedRegIdx == CU_NUM_SAVED_REGS)
1491          // If there are too many saved registers, we cannot use a compact
1492          // unwind encoding.
1493          return CU::UNWIND_MODE_DWARF;
1494
1495        unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1496        SavedRegs[SavedRegIdx++] = Reg;
1497        StackAdjust += OffsetSize;
1498        InstrOffset += PushInstrSize(Reg);
1499        break;
1500      }
1501      }
1502    }
1503
1504    StackAdjust /= StackDivide;
1505
1506    if (HasFP) {
1507      if ((StackAdjust & 0xFF) != StackAdjust)
1508        // Offset was too big for a compact unwind encoding.
1509        return CU::UNWIND_MODE_DWARF;
1510
1511      // Get the encoding of the saved registers when we have a frame pointer.
1512      uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1513      if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1514
1515      CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1516      CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1517      CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1518    } else {
1519      SubtractInstrIdx += InstrOffset;
1520      ++StackAdjust;
1521
1522      if ((StackSize & 0xFF) == StackSize) {
1523        // Frameless stack with a small stack size.
1524        CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1525
1526        // Encode the stack size.
1527        CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1528      } else {
1529        if ((StackAdjust & 0x7) != StackAdjust)
1530          // The extra stack adjustments are too big for us to handle.
1531          return CU::UNWIND_MODE_DWARF;
1532
1533        // Frameless stack with an offset too large for us to encode compactly.
1534        CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1535
1536        // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1537        // instruction.
1538        CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1539
1540        // Encode any extra stack adjustments (done via push instructions).
1541        CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1542      }
1543
1544      // Encode the number of registers saved. (Reverse the list first.)
1545      std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1546      CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1547
1548      // Get the encoding of the saved registers when we don't have a frame
1549      // pointer.
1550      uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1551      if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1552
1553      // Encode the register encoding.
1554      CompactUnwindEncoding |=
1555        RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1556    }
1557
1558    return CompactUnwindEncoding;
1559  }
1560};
1561
1562} // end anonymous namespace
1563
1564MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1565                                           const MCSubtargetInfo &STI,
1566                                           const MCRegisterInfo &MRI,
1567                                           const MCTargetOptions &Options) {
1568  const Triple &TheTriple = STI.getTargetTriple();
1569  if (TheTriple.isOSBinFormatMachO())
1570    return new DarwinX86AsmBackend(T, MRI, STI);
1571
1572  if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1573    return new WindowsX86AsmBackend(T, false, STI);
1574
1575  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1576
1577  if (TheTriple.isOSIAMCU())
1578    return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1579
1580  return new ELFX86_32AsmBackend(T, OSABI, STI);
1581}
1582
1583MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1584                                           const MCSubtargetInfo &STI,
1585                                           const MCRegisterInfo &MRI,
1586                                           const MCTargetOptions &Options) {
1587  const Triple &TheTriple = STI.getTargetTriple();
1588  if (TheTriple.isOSBinFormatMachO())
1589    return new DarwinX86AsmBackend(T, MRI, STI);
1590
1591  if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1592    return new WindowsX86AsmBackend(T, true, STI);
1593
1594  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1595
1596  if (TheTriple.getEnvironment() == Triple::GNUX32)
1597    return new ELFX86_X32AsmBackend(T, OSABI, STI);
1598  return new ELFX86_64AsmBackend(T, OSABI, STI);
1599}
1600