1//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MCTargetDesc/X86BaseInfo.h"
10#include "MCTargetDesc/X86FixupKinds.h"
11#include "MCTargetDesc/X86EncodingOptimization.h"
12#include "llvm/ADT/StringSwitch.h"
13#include "llvm/BinaryFormat/ELF.h"
14#include "llvm/BinaryFormat/MachO.h"
15#include "llvm/MC/MCAsmBackend.h"
16#include "llvm/MC/MCAsmLayout.h"
17#include "llvm/MC/MCAssembler.h"
18#include "llvm/MC/MCCodeEmitter.h"
19#include "llvm/MC/MCContext.h"
20#include "llvm/MC/MCDwarf.h"
21#include "llvm/MC/MCELFObjectWriter.h"
22#include "llvm/MC/MCExpr.h"
23#include "llvm/MC/MCFixupKindInfo.h"
24#include "llvm/MC/MCInst.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCMachObjectWriter.h"
27#include "llvm/MC/MCObjectStreamer.h"
28#include "llvm/MC/MCObjectWriter.h"
29#include "llvm/MC/MCRegisterInfo.h"
30#include "llvm/MC/MCSectionMachO.h"
31#include "llvm/MC/MCSubtargetInfo.h"
32#include "llvm/MC/MCValue.h"
33#include "llvm/MC/TargetRegistry.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/raw_ostream.h"
37
38using namespace llvm;
39
40namespace {
41/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42class X86AlignBranchKind {
43private:
44  uint8_t AlignBranchKind = 0;
45
46public:
47  void operator=(const std::string &Val) {
48    if (Val.empty())
49      return;
50    SmallVector<StringRef, 6> BranchTypes;
51    StringRef(Val).split(BranchTypes, '+', -1, false);
52    for (auto BranchType : BranchTypes) {
53      if (BranchType == "fused")
54        addKind(X86::AlignBranchFused);
55      else if (BranchType == "jcc")
56        addKind(X86::AlignBranchJcc);
57      else if (BranchType == "jmp")
58        addKind(X86::AlignBranchJmp);
59      else if (BranchType == "call")
60        addKind(X86::AlignBranchCall);
61      else if (BranchType == "ret")
62        addKind(X86::AlignBranchRet);
63      else if (BranchType == "indirect")
64        addKind(X86::AlignBranchIndirect);
65      else {
66        errs() << "invalid argument " << BranchType.str()
67               << " to -x86-align-branch=; each element must be one of: fused, "
68                  "jcc, jmp, call, ret, indirect.(plus separated)\n";
69      }
70    }
71  }
72
73  operator uint8_t() const { return AlignBranchKind; }
74  void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75};
76
77X86AlignBranchKind X86AlignBranchKindLoc;
78
79cl::opt<unsigned> X86AlignBranchBoundary(
80    "x86-align-branch-boundary", cl::init(0),
81    cl::desc(
82        "Control how the assembler should align branches with NOP. If the "
83        "boundary's size is not 0, it should be a power of 2 and no less "
84        "than 32. Branches will be aligned to prevent from being across or "
85        "against the boundary of specified size. The default value 0 does not "
86        "align branches."));
87
88cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89    "x86-align-branch",
90    cl::desc(
91        "Specify types of branches to align (plus separated list of types):"
92             "\njcc      indicates conditional jumps"
93             "\nfused    indicates fused conditional jumps"
94             "\njmp      indicates direct unconditional jumps"
95             "\ncall     indicates direct and indirect calls"
96             "\nret      indicates rets"
97             "\nindirect indicates indirect unconditional jumps"),
98    cl::location(X86AlignBranchKindLoc));
99
100cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101    "x86-branches-within-32B-boundaries", cl::init(false),
102    cl::desc(
103        "Align selected instructions to mitigate negative performance impact "
104        "of Intel's micro code update for errata skx102.  May break "
105        "assumptions about labels corresponding to particular instructions, "
106        "and should be used with caution."));
107
108cl::opt<unsigned> X86PadMaxPrefixSize(
109    "x86-pad-max-prefix-size", cl::init(0),
110    cl::desc("Maximum number of prefixes to use for padding"));
111
112cl::opt<bool> X86PadForAlign(
113    "x86-pad-for-align", cl::init(false), cl::Hidden,
114    cl::desc("Pad previous instructions to implement align directives"));
115
116cl::opt<bool> X86PadForBranchAlign(
117    "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118    cl::desc("Pad previous instructions to implement branch alignment"));
119
120class X86AsmBackend : public MCAsmBackend {
121  const MCSubtargetInfo &STI;
122  std::unique_ptr<const MCInstrInfo> MCII;
123  X86AlignBranchKind AlignBranchType;
124  Align AlignBoundary;
125  unsigned TargetPrefixMax = 0;
126
127  MCInst PrevInst;
128  MCBoundaryAlignFragment *PendingBA = nullptr;
129  std::pair<MCFragment *, size_t> PrevInstPosition;
130  bool CanPadInst = false;
131
132  uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133  bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134  bool needAlign(const MCInst &Inst) const;
135  bool canPadBranches(MCObjectStreamer &OS) const;
136  bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137
138public:
139  X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140      : MCAsmBackend(llvm::endianness::little), STI(STI),
141        MCII(T.createMCInstrInfo()) {
142    if (X86AlignBranchWithin32BBoundaries) {
143      // At the moment, this defaults to aligning fused branches, unconditional
144      // jumps, and (unfused) conditional jumps with nops.  Both the
145      // instructions aligned and the alignment method (nop vs prefix) may
146      // change in the future.
147      AlignBoundary = assumeAligned(32);
148      AlignBranchType.addKind(X86::AlignBranchFused);
149      AlignBranchType.addKind(X86::AlignBranchJcc);
150      AlignBranchType.addKind(X86::AlignBranchJmp);
151    }
152    // Allow overriding defaults set by main flag
153    if (X86AlignBranchBoundary.getNumOccurrences())
154      AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155    if (X86AlignBranch.getNumOccurrences())
156      AlignBranchType = X86AlignBranchKindLoc;
157    if (X86PadMaxPrefixSize.getNumOccurrences())
158      TargetPrefixMax = X86PadMaxPrefixSize;
159  }
160
161  bool allowAutoPadding() const override;
162  bool allowEnhancedRelaxation() const override;
163  void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164                            const MCSubtargetInfo &STI) override;
165  void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166
167  unsigned getNumFixupKinds() const override {
168    return X86::NumTargetFixupKinds;
169  }
170
171  std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172
173  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174
175  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176                             const MCValue &Target,
177                             const MCSubtargetInfo *STI) override;
178
179  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
180                  const MCValue &Target, MutableArrayRef<char> Data,
181                  uint64_t Value, bool IsResolved,
182                  const MCSubtargetInfo *STI) const override;
183
184  bool mayNeedRelaxation(const MCInst &Inst,
185                         const MCSubtargetInfo &STI) const override;
186
187  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
188                            const MCRelaxableFragment *DF,
189                            const MCAsmLayout &Layout) const override;
190
191  void relaxInstruction(MCInst &Inst,
192                        const MCSubtargetInfo &STI) const override;
193
194  bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195                                   MCCodeEmitter &Emitter,
196                                   unsigned &RemainingSize) const;
197
198  bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199                               unsigned &RemainingSize) const;
200
201  bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202                              unsigned &RemainingSize) const;
203
204  void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
205
206  unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207
208  bool writeNopData(raw_ostream &OS, uint64_t Count,
209                    const MCSubtargetInfo *STI) const override;
210};
211} // end anonymous namespace
212
213static bool isRelaxableBranch(unsigned Opcode) {
214  return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215}
216
217static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218                                       bool Is16BitMode = false) {
219  switch (Opcode) {
220  default:
221    llvm_unreachable("invalid opcode for branch");
222  case X86::JCC_1:
223    return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224  case X86::JMP_1:
225    return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226  }
227}
228
229static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230  unsigned Opcode = MI.getOpcode();
231  return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232                                   : X86::getOpcodeForLongImmediateForm(Opcode);
233}
234
235static X86::CondCode getCondFromBranch(const MCInst &MI,
236                                       const MCInstrInfo &MCII) {
237  unsigned Opcode = MI.getOpcode();
238  switch (Opcode) {
239  default:
240    return X86::COND_INVALID;
241  case X86::JCC_1: {
242    const MCInstrDesc &Desc = MCII.get(Opcode);
243    return static_cast<X86::CondCode>(
244        MI.getOperand(Desc.getNumOperands() - 1).getImm());
245  }
246  }
247}
248
249static X86::SecondMacroFusionInstKind
250classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251  X86::CondCode CC = getCondFromBranch(MI, MCII);
252  return classifySecondCondCodeInMacroFusion(CC);
253}
254
255/// Check if the instruction uses RIP relative addressing.
256static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257  unsigned Opcode = MI.getOpcode();
258  const MCInstrDesc &Desc = MCII.get(Opcode);
259  uint64_t TSFlags = Desc.TSFlags;
260  unsigned CurOp = X86II::getOperandBias(Desc);
261  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262  if (MemoryOperand < 0)
263    return false;
264  unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265  unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266  return (BaseReg == X86::RIP);
267}
268
269/// Check if the instruction is a prefix.
270static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
271  return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
272}
273
274/// Check if the instruction is valid as the first instruction in macro fusion.
275static bool isFirstMacroFusibleInst(const MCInst &Inst,
276                                    const MCInstrInfo &MCII) {
277  // An Intel instruction with RIP relative addressing is not macro fusible.
278  if (isRIPRelative(Inst, MCII))
279    return false;
280  X86::FirstMacroFusionInstKind FIK =
281      X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282  return FIK != X86::FirstMacroFusionInstKind::Invalid;
283}
284
285/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286/// get a better peformance in some cases. Here, we determine which prefix is
287/// the most suitable.
288///
289/// If the instruction has a segment override prefix, use the existing one.
290/// If the target is 64-bit, use the CS.
291/// If the target is 32-bit,
292///   - If the instruction has a ESP/EBP base register, use SS.
293///   - Otherwise use DS.
294uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295  assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296         "Prefixes can be added only in 32-bit or 64-bit mode.");
297  const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298  uint64_t TSFlags = Desc.TSFlags;
299
300  // Determine where the memory operand starts, if present.
301  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302  if (MemoryOperand != -1)
303    MemoryOperand += X86II::getOperandBias(Desc);
304
305  unsigned SegmentReg = 0;
306  if (MemoryOperand >= 0) {
307    // Check for explicit segment override on memory operand.
308    SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309  }
310
311  switch (TSFlags & X86II::FormMask) {
312  default:
313    break;
314  case X86II::RawFrmDstSrc: {
315    // Check segment override opcode prefix as needed (not for %ds).
316    if (Inst.getOperand(2).getReg() != X86::DS)
317      SegmentReg = Inst.getOperand(2).getReg();
318    break;
319  }
320  case X86II::RawFrmSrc: {
321    // Check segment override opcode prefix as needed (not for %ds).
322    if (Inst.getOperand(1).getReg() != X86::DS)
323      SegmentReg = Inst.getOperand(1).getReg();
324    break;
325  }
326  case X86II::RawFrmMemOffs: {
327    // Check segment override opcode prefix as needed.
328    SegmentReg = Inst.getOperand(1).getReg();
329    break;
330  }
331  }
332
333  if (SegmentReg != 0)
334    return X86::getSegmentOverridePrefixForReg(SegmentReg);
335
336  if (STI.hasFeature(X86::Is64Bit))
337    return X86::CS_Encoding;
338
339  if (MemoryOperand >= 0) {
340    unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341    unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342    if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343      return X86::SS_Encoding;
344  }
345  return X86::DS_Encoding;
346}
347
348/// Check if the two instructions will be macro-fused on the target cpu.
349bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350  const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351  if (!InstDesc.isConditionalBranch())
352    return false;
353  if (!isFirstMacroFusibleInst(Cmp, *MCII))
354    return false;
355  const X86::FirstMacroFusionInstKind CmpKind =
356      X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357  const X86::SecondMacroFusionInstKind BranchKind =
358      classifySecondInstInMacroFusion(Jcc, *MCII);
359  return X86::isMacroFused(CmpKind, BranchKind);
360}
361
362/// Check if the instruction has a variant symbol operand.
363static bool hasVariantSymbol(const MCInst &MI) {
364  for (auto &Operand : MI) {
365    if (!Operand.isExpr())
366      continue;
367    const MCExpr &Expr = *Operand.getExpr();
368    if (Expr.getKind() == MCExpr::SymbolRef &&
369        cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370      return true;
371  }
372  return false;
373}
374
375bool X86AsmBackend::allowAutoPadding() const {
376  return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377}
378
379bool X86AsmBackend::allowEnhancedRelaxation() const {
380  return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381}
382
383/// X86 has certain instructions which enable interrupts exactly one
384/// instruction *after* the instruction which stores to SS.  Return true if the
385/// given instruction has such an interrupt delay slot.
386static bool hasInterruptDelaySlot(const MCInst &Inst) {
387  switch (Inst.getOpcode()) {
388  case X86::POPSS16:
389  case X86::POPSS32:
390  case X86::STI:
391    return true;
392
393  case X86::MOV16sr:
394  case X86::MOV32sr:
395  case X86::MOV64sr:
396  case X86::MOV16sm:
397    if (Inst.getOperand(0).getReg() == X86::SS)
398      return true;
399    break;
400  }
401  return false;
402}
403
404/// Check if the instruction to be emitted is right after any data.
405static bool
406isRightAfterData(MCFragment *CurrentFragment,
407                 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408  MCFragment *F = CurrentFragment;
409  // Empty data fragments may be created to prevent further data being
410  // added into the previous fragment, we need to skip them since they
411  // have no contents.
412  for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
413    if (cast<MCDataFragment>(F)->getContents().size() != 0)
414      break;
415
416  // Since data is always emitted into a DataFragment, our check strategy is
417  // simple here.
418  //   - If the fragment is a DataFragment
419  //     - If it's not the fragment where the previous instruction is,
420  //       returns true.
421  //     - If it's the fragment holding the previous instruction but its
422  //       size changed since the previous instruction was emitted into
423  //       it, returns true.
424  //     - Otherwise returns false.
425  //   - If the fragment is not a DataFragment, returns false.
426  if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
427    return DF != PrevInstPosition.first ||
428           DF->getContents().size() != PrevInstPosition.second;
429
430  return false;
431}
432
433/// \returns the fragment size if it has instructions, otherwise returns 0.
434static size_t getSizeForInstFragment(const MCFragment *F) {
435  if (!F || !F->hasInstructions())
436    return 0;
437  // MCEncodedFragmentWithContents being templated makes this tricky.
438  switch (F->getKind()) {
439  default:
440    llvm_unreachable("Unknown fragment with instructions!");
441  case MCFragment::FT_Data:
442    return cast<MCDataFragment>(*F).getContents().size();
443  case MCFragment::FT_Relaxable:
444    return cast<MCRelaxableFragment>(*F).getContents().size();
445  case MCFragment::FT_CompactEncodedInst:
446    return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
447  }
448}
449
450/// Return true if we can insert NOP or prefixes automatically before the
451/// the instruction to be emitted.
452bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
453  if (hasVariantSymbol(Inst))
454    // Linker may rewrite the instruction with variant symbol operand(e.g.
455    // TLSCALL).
456    return false;
457
458  if (hasInterruptDelaySlot(PrevInst))
459    // If this instruction follows an interrupt enabling instruction with a one
460    // instruction delay, inserting a nop would change behavior.
461    return false;
462
463  if (isPrefix(PrevInst, *MCII))
464    // If this instruction follows a prefix, inserting a nop/prefix would change
465    // semantic.
466    return false;
467
468  if (isPrefix(Inst, *MCII))
469    // If this instruction is a prefix, inserting a prefix would change
470    // semantic.
471    return false;
472
473  if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
474    // If this instruction follows any data, there is no clear
475    // instruction boundary, inserting a nop/prefix would change semantic.
476    return false;
477
478  return true;
479}
480
481bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
482  if (!OS.getAllowAutoPadding())
483    return false;
484  assert(allowAutoPadding() && "incorrect initialization!");
485
486  // We only pad in text section.
487  if (!OS.getCurrentSectionOnly()->getKind().isText())
488    return false;
489
490  // To be Done: Currently don't deal with Bundle cases.
491  if (OS.getAssembler().isBundlingEnabled())
492    return false;
493
494  // Branches only need to be aligned in 32-bit or 64-bit mode.
495  if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
496    return false;
497
498  return true;
499}
500
501/// Check if the instruction operand needs to be aligned.
502bool X86AsmBackend::needAlign(const MCInst &Inst) const {
503  const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
504  return (Desc.isConditionalBranch() &&
505          (AlignBranchType & X86::AlignBranchJcc)) ||
506         (Desc.isUnconditionalBranch() &&
507          (AlignBranchType & X86::AlignBranchJmp)) ||
508         (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
509         (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
510         (Desc.isIndirectBranch() &&
511          (AlignBranchType & X86::AlignBranchIndirect));
512}
513
514/// Insert BoundaryAlignFragment before instructions to align branches.
515void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
516                                         const MCInst &Inst, const MCSubtargetInfo &STI) {
517  CanPadInst = canPadInst(Inst, OS);
518
519  if (!canPadBranches(OS))
520    return;
521
522  if (!isMacroFused(PrevInst, Inst))
523    // Macro fusion doesn't happen indeed, clear the pending.
524    PendingBA = nullptr;
525
526  if (!CanPadInst)
527    return;
528
529  if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
530    // Macro fusion actually happens and there is no other fragment inserted
531    // after the previous instruction.
532    //
533    // Do nothing here since we already inserted a BoudaryAlign fragment when
534    // we met the first instruction in the fused pair and we'll tie them
535    // together in emitInstructionEnd.
536    //
537    // Note: When there is at least one fragment, such as MCAlignFragment,
538    // inserted after the previous instruction, e.g.
539    //
540    // \code
541    //   cmp %rax %rcx
542    //   .align 16
543    //   je .Label0
544    // \ endcode
545    //
546    // We will treat the JCC as a unfused branch although it may be fused
547    // with the CMP.
548    return;
549  }
550
551  if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
552                          isFirstMacroFusibleInst(Inst, *MCII))) {
553    // If we meet a unfused branch or the first instuction in a fusiable pair,
554    // insert a BoundaryAlign fragment.
555    OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
556  }
557}
558
559/// Set the last fragment to be aligned for the BoundaryAlignFragment.
560void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
561  PrevInst = Inst;
562  MCFragment *CF = OS.getCurrentFragment();
563  PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
564  if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
565    F->setAllowAutoPadding(CanPadInst);
566
567  if (!canPadBranches(OS))
568    return;
569
570  if (!needAlign(Inst) || !PendingBA)
571    return;
572
573  // Tie the aligned instructions into a pending BoundaryAlign.
574  PendingBA->setLastFragment(CF);
575  PendingBA = nullptr;
576
577  // We need to ensure that further data isn't added to the current
578  // DataFragment, so that we can get the size of instructions later in
579  // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
580  // DataFragment.
581  if (isa_and_nonnull<MCDataFragment>(CF))
582    OS.insert(new MCDataFragment());
583
584  // Update the maximum alignment on the current section if necessary.
585  MCSection *Sec = OS.getCurrentSectionOnly();
586  Sec->ensureMinAlignment(AlignBoundary);
587}
588
589std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
590  if (STI.getTargetTriple().isOSBinFormatELF()) {
591    unsigned Type;
592    if (STI.getTargetTriple().getArch() == Triple::x86_64) {
593      Type = llvm::StringSwitch<unsigned>(Name)
594#define ELF_RELOC(X, Y) .Case(#X, Y)
595#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
596#undef ELF_RELOC
597                 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
598                 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
599                 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
600                 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
601                 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
602                 .Default(-1u);
603    } else {
604      Type = llvm::StringSwitch<unsigned>(Name)
605#define ELF_RELOC(X, Y) .Case(#X, Y)
606#include "llvm/BinaryFormat/ELFRelocs/i386.def"
607#undef ELF_RELOC
608                 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
609                 .Case("BFD_RELOC_8", ELF::R_386_8)
610                 .Case("BFD_RELOC_16", ELF::R_386_16)
611                 .Case("BFD_RELOC_32", ELF::R_386_32)
612                 .Default(-1u);
613    }
614    if (Type == -1u)
615      return std::nullopt;
616    return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
617  }
618  return MCAsmBackend::getFixupKind(Name);
619}
620
621const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
622  const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
623      {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
624      {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625      {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626      {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627      {"reloc_signed_4byte", 0, 32, 0},
628      {"reloc_signed_4byte_relax", 0, 32, 0},
629      {"reloc_global_offset_table", 0, 32, 0},
630      {"reloc_global_offset_table8", 0, 64, 0},
631      {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
632  };
633
634  // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
635  // do not require any extra processing.
636  if (Kind >= FirstLiteralRelocationKind)
637    return MCAsmBackend::getFixupKindInfo(FK_NONE);
638
639  if (Kind < FirstTargetFixupKind)
640    return MCAsmBackend::getFixupKindInfo(Kind);
641
642  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
643         "Invalid kind!");
644  assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
645  return Infos[Kind - FirstTargetFixupKind];
646}
647
648bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
649                                          const MCFixup &Fixup, const MCValue &,
650                                          const MCSubtargetInfo *STI) {
651  return Fixup.getKind() >= FirstLiteralRelocationKind;
652}
653
654static unsigned getFixupKindSize(unsigned Kind) {
655  switch (Kind) {
656  default:
657    llvm_unreachable("invalid fixup kind!");
658  case FK_NONE:
659    return 0;
660  case FK_PCRel_1:
661  case FK_SecRel_1:
662  case FK_Data_1:
663    return 1;
664  case FK_PCRel_2:
665  case FK_SecRel_2:
666  case FK_Data_2:
667    return 2;
668  case FK_PCRel_4:
669  case X86::reloc_riprel_4byte:
670  case X86::reloc_riprel_4byte_relax:
671  case X86::reloc_riprel_4byte_relax_rex:
672  case X86::reloc_riprel_4byte_movq_load:
673  case X86::reloc_signed_4byte:
674  case X86::reloc_signed_4byte_relax:
675  case X86::reloc_global_offset_table:
676  case X86::reloc_branch_4byte_pcrel:
677  case FK_SecRel_4:
678  case FK_Data_4:
679    return 4;
680  case FK_PCRel_8:
681  case FK_SecRel_8:
682  case FK_Data_8:
683  case X86::reloc_global_offset_table8:
684    return 8;
685  }
686}
687
688void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
689                               const MCValue &Target,
690                               MutableArrayRef<char> Data,
691                               uint64_t Value, bool IsResolved,
692                               const MCSubtargetInfo *STI) const {
693  unsigned Kind = Fixup.getKind();
694  if (Kind >= FirstLiteralRelocationKind)
695    return;
696  unsigned Size = getFixupKindSize(Kind);
697
698  assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
699
700  int64_t SignedValue = static_cast<int64_t>(Value);
701  if ((Target.isAbsolute() || IsResolved) &&
702      getFixupKindInfo(Fixup.getKind()).Flags &
703      MCFixupKindInfo::FKF_IsPCRel) {
704    // check that PC relative fixup fits into the fixup size.
705    if (Size > 0 && !isIntN(Size * 8, SignedValue))
706      Asm.getContext().reportError(
707                                   Fixup.getLoc(), "value of " + Twine(SignedValue) +
708                                   " is too large for field of " + Twine(Size) +
709                                   ((Size == 1) ? " byte." : " bytes."));
710  } else {
711    // Check that uppper bits are either all zeros or all ones.
712    // Specifically ignore overflow/underflow as long as the leakage is
713    // limited to the lower bits. This is to remain compatible with
714    // other assemblers.
715    assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
716           "Value does not fit in the Fixup field");
717  }
718
719  for (unsigned i = 0; i != Size; ++i)
720    Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
721}
722
723bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
724                                      const MCSubtargetInfo &STI) const {
725  unsigned Opcode = MI.getOpcode();
726  return isRelaxableBranch(Opcode) ||
727         (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
728          MI.getOperand(MI.getNumOperands() - 1).isExpr());
729}
730
731bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
732                                         uint64_t Value,
733                                         const MCRelaxableFragment *DF,
734                                         const MCAsmLayout &Layout) const {
735  // Relax if the value is too big for a (signed) i8.
736  return !isInt<8>(Value);
737}
738
739// FIXME: Can tblgen help at all here to verify there aren't other instructions
740// we can relax?
741void X86AsmBackend::relaxInstruction(MCInst &Inst,
742                                     const MCSubtargetInfo &STI) const {
743  // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
744  bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
745  unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
746
747  if (RelaxedOp == Inst.getOpcode()) {
748    SmallString<256> Tmp;
749    raw_svector_ostream OS(Tmp);
750    Inst.dump_pretty(OS);
751    OS << "\n";
752    report_fatal_error("unexpected instruction to relax: " + OS.str());
753  }
754
755  Inst.setOpcode(RelaxedOp);
756}
757
758bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
759                                            MCCodeEmitter &Emitter,
760                                            unsigned &RemainingSize) const {
761  if (!RF.getAllowAutoPadding())
762    return false;
763  // If the instruction isn't fully relaxed, shifting it around might require a
764  // larger value for one of the fixups then can be encoded.  The outer loop
765  // will also catch this before moving to the next instruction, but we need to
766  // prevent padding this single instruction as well.
767  if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
768    return false;
769
770  const unsigned OldSize = RF.getContents().size();
771  if (OldSize == 15)
772    return false;
773
774  const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
775  const unsigned RemainingPrefixSize = [&]() -> unsigned {
776    SmallString<15> Code;
777    Emitter.emitPrefix(RF.getInst(), Code, STI);
778    assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
779
780    // TODO: It turns out we need a decent amount of plumbing for the target
781    // specific bits to determine number of prefixes its safe to add.  Various
782    // targets (older chips mostly, but also Atom family) encounter decoder
783    // stalls with too many prefixes.  For testing purposes, we set the value
784    // externally for the moment.
785    unsigned ExistingPrefixSize = Code.size();
786    if (TargetPrefixMax <= ExistingPrefixSize)
787      return 0;
788    return TargetPrefixMax - ExistingPrefixSize;
789  }();
790  const unsigned PrefixBytesToAdd =
791      std::min(MaxPossiblePad, RemainingPrefixSize);
792  if (PrefixBytesToAdd == 0)
793    return false;
794
795  const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
796
797  SmallString<256> Code;
798  Code.append(PrefixBytesToAdd, Prefix);
799  Code.append(RF.getContents().begin(), RF.getContents().end());
800  RF.getContents() = Code;
801
802  // Adjust the fixups for the change in offsets
803  for (auto &F : RF.getFixups()) {
804    F.setOffset(F.getOffset() + PrefixBytesToAdd);
805  }
806
807  RemainingSize -= PrefixBytesToAdd;
808  return true;
809}
810
811bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
812                                                MCCodeEmitter &Emitter,
813                                                unsigned &RemainingSize) const {
814  if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
815    // TODO: There are lots of other tricks we could apply for increasing
816    // encoding size without impacting performance.
817    return false;
818
819  MCInst Relaxed = RF.getInst();
820  relaxInstruction(Relaxed, *RF.getSubtargetInfo());
821
822  SmallVector<MCFixup, 4> Fixups;
823  SmallString<15> Code;
824  Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
825  const unsigned OldSize = RF.getContents().size();
826  const unsigned NewSize = Code.size();
827  assert(NewSize >= OldSize && "size decrease during relaxation?");
828  unsigned Delta = NewSize - OldSize;
829  if (Delta > RemainingSize)
830    return false;
831  RF.setInst(Relaxed);
832  RF.getContents() = Code;
833  RF.getFixups() = Fixups;
834  RemainingSize -= Delta;
835  return true;
836}
837
838bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
839                                           MCCodeEmitter &Emitter,
840                                           unsigned &RemainingSize) const {
841  bool Changed = false;
842  if (RemainingSize != 0)
843    Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
844  if (RemainingSize != 0)
845    Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
846  return Changed;
847}
848
849void X86AsmBackend::finishLayout(MCAssembler const &Asm,
850                                 MCAsmLayout &Layout) const {
851  // See if we can further relax some instructions to cut down on the number of
852  // nop bytes required for code alignment.  The actual win is in reducing
853  // instruction count, not number of bytes.  Modern X86-64 can easily end up
854  // decode limited.  It is often better to reduce the number of instructions
855  // (i.e. eliminate nops) even at the cost of increasing the size and
856  // complexity of others.
857  if (!X86PadForAlign && !X86PadForBranchAlign)
858    return;
859
860  // The processed regions are delimitered by LabeledFragments. -g may have more
861  // MCSymbols and therefore different relaxation results. X86PadForAlign is
862  // disabled by default to eliminate the -g vs non -g difference.
863  DenseSet<MCFragment *> LabeledFragments;
864  for (const MCSymbol &S : Asm.symbols())
865    LabeledFragments.insert(S.getFragment(false));
866
867  for (MCSection &Sec : Asm) {
868    if (!Sec.getKind().isText())
869      continue;
870
871    SmallVector<MCRelaxableFragment *, 4> Relaxable;
872    for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
873      MCFragment &F = *I;
874
875      if (LabeledFragments.count(&F))
876        Relaxable.clear();
877
878      if (F.getKind() == MCFragment::FT_Data ||
879          F.getKind() == MCFragment::FT_CompactEncodedInst)
880        // Skip and ignore
881        continue;
882
883      if (F.getKind() == MCFragment::FT_Relaxable) {
884        auto &RF = cast<MCRelaxableFragment>(*I);
885        Relaxable.push_back(&RF);
886        continue;
887      }
888
889      auto canHandle = [](MCFragment &F) -> bool {
890        switch (F.getKind()) {
891        default:
892          return false;
893        case MCFragment::FT_Align:
894          return X86PadForAlign;
895        case MCFragment::FT_BoundaryAlign:
896          return X86PadForBranchAlign;
897        }
898      };
899      // For any unhandled kind, assume we can't change layout.
900      if (!canHandle(F)) {
901        Relaxable.clear();
902        continue;
903      }
904
905#ifndef NDEBUG
906      const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
907#endif
908      const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
909
910      // To keep the effects local, prefer to relax instructions closest to
911      // the align directive.  This is purely about human understandability
912      // of the resulting code.  If we later find a reason to expand
913      // particular instructions over others, we can adjust.
914      MCFragment *FirstChangedFragment = nullptr;
915      unsigned RemainingSize = OrigSize;
916      while (!Relaxable.empty() && RemainingSize != 0) {
917        auto &RF = *Relaxable.pop_back_val();
918        // Give the backend a chance to play any tricks it wishes to increase
919        // the encoding size of the given instruction.  Target independent code
920        // will try further relaxation, but target's may play further tricks.
921        if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
922          FirstChangedFragment = &RF;
923
924        // If we have an instruction which hasn't been fully relaxed, we can't
925        // skip past it and insert bytes before it.  Changing its starting
926        // offset might require a larger negative offset than it can encode.
927        // We don't need to worry about larger positive offsets as none of the
928        // possible offsets between this and our align are visible, and the
929        // ones afterwards aren't changing.
930        if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
931          break;
932      }
933      Relaxable.clear();
934
935      if (FirstChangedFragment) {
936        // Make sure the offsets for any fragments in the effected range get
937        // updated.  Note that this (conservatively) invalidates the offsets of
938        // those following, but this is not required.
939        Layout.invalidateFragmentsFrom(FirstChangedFragment);
940      }
941
942      // BoundaryAlign explicitly tracks it's size (unlike align)
943      if (F.getKind() == MCFragment::FT_BoundaryAlign)
944        cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
945
946#ifndef NDEBUG
947      const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
948      const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
949      assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
950             "can't move start of next fragment!");
951      assert(FinalSize == RemainingSize && "inconsistent size computation?");
952#endif
953
954      // If we're looking at a boundary align, make sure we don't try to pad
955      // its target instructions for some following directive.  Doing so would
956      // break the alignment of the current boundary align.
957      if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
958        const MCFragment *LastFragment = BF->getLastFragment();
959        if (!LastFragment)
960          continue;
961        while (&*I != LastFragment)
962          ++I;
963      }
964    }
965  }
966
967  // The layout is done. Mark every fragment as valid.
968  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
969    MCSection &Section = *Layout.getSectionOrder()[i];
970    Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
971    Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
972  }
973}
974
975unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976  if (STI.hasFeature(X86::Is16Bit))
977    return 4;
978  if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
979    return 1;
980  if (STI.hasFeature(X86::TuningFast7ByteNOP))
981    return 7;
982  if (STI.hasFeature(X86::TuningFast15ByteNOP))
983    return 15;
984  if (STI.hasFeature(X86::TuningFast11ByteNOP))
985    return 11;
986  // FIXME: handle 32-bit mode
987  // 15-bytes is the longest single NOP instruction, but 10-bytes is
988  // commonly the longest that can be efficiently decoded.
989  return 10;
990}
991
992/// Write a sequence of optimal nops to the output, covering \p Count
993/// bytes.
994/// \return - true on success, false on failure
995bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996                                 const MCSubtargetInfo *STI) const {
997  static const char Nops32Bit[10][11] = {
998      // nop
999      "\x90",
1000      // xchg %ax,%ax
1001      "\x66\x90",
1002      // nopl (%[re]ax)
1003      "\x0f\x1f\x00",
1004      // nopl 0(%[re]ax)
1005      "\x0f\x1f\x40\x00",
1006      // nopl 0(%[re]ax,%[re]ax,1)
1007      "\x0f\x1f\x44\x00\x00",
1008      // nopw 0(%[re]ax,%[re]ax,1)
1009      "\x66\x0f\x1f\x44\x00\x00",
1010      // nopl 0L(%[re]ax)
1011      "\x0f\x1f\x80\x00\x00\x00\x00",
1012      // nopl 0L(%[re]ax,%[re]ax,1)
1013      "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014      // nopw 0L(%[re]ax,%[re]ax,1)
1015      "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016      // nopw %cs:0L(%[re]ax,%[re]ax,1)
1017      "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018  };
1019
1020  // 16-bit mode uses different nop patterns than 32-bit.
1021  static const char Nops16Bit[4][11] = {
1022      // nop
1023      "\x90",
1024      // xchg %eax,%eax
1025      "\x66\x90",
1026      // lea 0(%si),%si
1027      "\x8d\x74\x00",
1028      // lea 0w(%si),%si
1029      "\x8d\xb4\x00\x00",
1030  };
1031
1032  const char(*Nops)[11] =
1033      STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034
1035  uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036
1037  // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038  // length.
1039  do {
1040    const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1041    const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042    for (uint8_t i = 0; i < Prefixes; i++)
1043      OS << '\x66';
1044    const uint8_t Rest = ThisNopLength - Prefixes;
1045    if (Rest != 0)
1046      OS.write(Nops[Rest - 1], Rest);
1047    Count -= ThisNopLength;
1048  } while (Count != 0);
1049
1050  return true;
1051}
1052
1053/* *** */
1054
1055namespace {
1056
1057class ELFX86AsmBackend : public X86AsmBackend {
1058public:
1059  uint8_t OSABI;
1060  ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061      : X86AsmBackend(T, STI), OSABI(OSABI) {}
1062};
1063
1064class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065public:
1066  ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067                      const MCSubtargetInfo &STI)
1068    : ELFX86AsmBackend(T, OSABI, STI) {}
1069
1070  std::unique_ptr<MCObjectTargetWriter>
1071  createObjectTargetWriter() const override {
1072    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1073  }
1074};
1075
1076class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077public:
1078  ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079                       const MCSubtargetInfo &STI)
1080      : ELFX86AsmBackend(T, OSABI, STI) {}
1081
1082  std::unique_ptr<MCObjectTargetWriter>
1083  createObjectTargetWriter() const override {
1084    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085                                    ELF::EM_X86_64);
1086  }
1087};
1088
1089class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090public:
1091  ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092                         const MCSubtargetInfo &STI)
1093      : ELFX86AsmBackend(T, OSABI, STI) {}
1094
1095  std::unique_ptr<MCObjectTargetWriter>
1096  createObjectTargetWriter() const override {
1097    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098                                    ELF::EM_IAMCU);
1099  }
1100};
1101
1102class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103public:
1104  ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105                      const MCSubtargetInfo &STI)
1106    : ELFX86AsmBackend(T, OSABI, STI) {}
1107
1108  std::unique_ptr<MCObjectTargetWriter>
1109  createObjectTargetWriter() const override {
1110    return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1111  }
1112};
1113
1114class WindowsX86AsmBackend : public X86AsmBackend {
1115  bool Is64Bit;
1116
1117public:
1118  WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119                       const MCSubtargetInfo &STI)
1120    : X86AsmBackend(T, STI)
1121    , Is64Bit(is64Bit) {
1122  }
1123
1124  std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125    return StringSwitch<std::optional<MCFixupKind>>(Name)
1126        .Case("dir32", FK_Data_4)
1127        .Case("secrel32", FK_SecRel_4)
1128        .Case("secidx", FK_SecRel_2)
1129        .Default(MCAsmBackend::getFixupKind(Name));
1130  }
1131
1132  std::unique_ptr<MCObjectTargetWriter>
1133  createObjectTargetWriter() const override {
1134    return createX86WinCOFFObjectWriter(Is64Bit);
1135  }
1136};
1137
1138namespace CU {
1139
1140  /// Compact unwind encoding values.
1141  enum CompactUnwindEncodings {
1142    /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143    /// the return address, then [RE]SP is moved to [RE]BP.
1144    UNWIND_MODE_BP_FRAME                   = 0x01000000,
1145
1146    /// A frameless function with a small constant stack size.
1147    UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1148
1149    /// A frameless function with a large constant stack size.
1150    UNWIND_MODE_STACK_IND                  = 0x03000000,
1151
1152    /// No compact unwind encoding is available.
1153    UNWIND_MODE_DWARF                      = 0x04000000,
1154
1155    /// Mask for encoding the frame registers.
1156    UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1157
1158    /// Mask for encoding the frameless registers.
1159    UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160  };
1161
1162} // namespace CU
1163
1164class DarwinX86AsmBackend : public X86AsmBackend {
1165  const MCRegisterInfo &MRI;
1166
1167  /// Number of registers that can be saved in a compact unwind encoding.
1168  enum { CU_NUM_SAVED_REGS = 6 };
1169
1170  mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171  Triple TT;
1172  bool Is64Bit;
1173
1174  unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1175  unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1176  unsigned StackDivide;                  ///< Amount to adjust stack size by.
1177protected:
1178  /// Size of a "push" instruction for the given register.
1179  unsigned PushInstrSize(unsigned Reg) const {
1180    switch (Reg) {
1181      case X86::EBX:
1182      case X86::ECX:
1183      case X86::EDX:
1184      case X86::EDI:
1185      case X86::ESI:
1186      case X86::EBP:
1187      case X86::RBX:
1188      case X86::RBP:
1189        return 1;
1190      case X86::R12:
1191      case X86::R13:
1192      case X86::R14:
1193      case X86::R15:
1194        return 2;
1195    }
1196    return 1;
1197  }
1198
1199private:
1200  /// Get the compact unwind number for a given register. The number
1201  /// corresponds to the enum lists in compact_unwind_encoding.h.
1202  int getCompactUnwindRegNum(unsigned Reg) const {
1203    static const MCPhysReg CU32BitRegs[7] = {
1204      X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205    };
1206    static const MCPhysReg CU64BitRegs[] = {
1207      X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208    };
1209    const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210    for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211      if (*CURegs == Reg)
1212        return Idx;
1213
1214    return -1;
1215  }
1216
1217  /// Return the registers encoded for a compact encoding with a frame
1218  /// pointer.
1219  uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220    // Encode the registers in the order they were saved --- 3-bits per
1221    // register. The list of saved registers is assumed to be in reverse
1222    // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223    uint32_t RegEnc = 0;
1224    for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225      unsigned Reg = SavedRegs[i];
1226      if (Reg == 0) break;
1227
1228      int CURegNum = getCompactUnwindRegNum(Reg);
1229      if (CURegNum == -1) return ~0U;
1230
1231      // Encode the 3-bit register number in order, skipping over 3-bits for
1232      // each register.
1233      RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234    }
1235
1236    assert((RegEnc & 0x3FFFF) == RegEnc &&
1237           "Invalid compact register encoding!");
1238    return RegEnc;
1239  }
1240
1241  /// Create the permutation encoding used with frameless stacks. It is
1242  /// passed the number of registers to be saved and an array of the registers
1243  /// saved.
1244  uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245    // The saved registers are numbered from 1 to 6. In order to encode the
1246    // order in which they were saved, we re-number them according to their
1247    // place in the register order. The re-numbering is relative to the last
1248    // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249    // that order:
1250    //
1251    //    Orig  Re-Num
1252    //    ----  ------
1253    //     6       6
1254    //     2       2
1255    //     4       3
1256    //     5       3
1257    //
1258    for (unsigned i = 0; i < RegCount; ++i) {
1259      int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1260      if (CUReg == -1) return ~0U;
1261      SavedRegs[i] = CUReg;
1262    }
1263
1264    // Reverse the list.
1265    std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266
1267    uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268    for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269      unsigned Countless = 0;
1270      for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271        if (SavedRegs[j] < SavedRegs[i])
1272          ++Countless;
1273
1274      RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275    }
1276
1277    // Take the renumbered values and encode them into a 10-bit number.
1278    uint32_t permutationEncoding = 0;
1279    switch (RegCount) {
1280    case 6:
1281      permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282                             + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1283                             +     RenumRegs[4];
1284      break;
1285    case 5:
1286      permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287                             + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1288                             +     RenumRegs[5];
1289      break;
1290    case 4:
1291      permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1292                             + 3 * RenumRegs[4] +      RenumRegs[5];
1293      break;
1294    case 3:
1295      permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1296                             +     RenumRegs[5];
1297      break;
1298    case 2:
1299      permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1300      break;
1301    case 1:
1302      permutationEncoding |=       RenumRegs[5];
1303      break;
1304    }
1305
1306    assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307           "Invalid compact register encoding!");
1308    return permutationEncoding;
1309  }
1310
1311public:
1312  DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313                      const MCSubtargetInfo &STI)
1314      : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315        Is64Bit(TT.isArch64Bit()) {
1316    memset(SavedRegs, 0, sizeof(SavedRegs));
1317    OffsetSize = Is64Bit ? 8 : 4;
1318    MoveInstrSize = Is64Bit ? 3 : 2;
1319    StackDivide = Is64Bit ? 8 : 4;
1320  }
1321
1322  std::unique_ptr<MCObjectTargetWriter>
1323  createObjectTargetWriter() const override {
1324    uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1325    uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1326    return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1327  }
1328
1329  /// Implementation of algorithm to generate the compact unwind encoding
1330  /// for the CFI instructions.
1331  uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332                                         const MCContext *Ctxt) const override {
1333    ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334    if (Instrs.empty()) return 0;
1335    if (!isDarwinCanonicalPersonality(FI->Personality) &&
1336        !Ctxt->emitCompactUnwindNonCanonical())
1337      return CU::UNWIND_MODE_DWARF;
1338
1339    // Reset the saved registers.
1340    unsigned SavedRegIdx = 0;
1341    memset(SavedRegs, 0, sizeof(SavedRegs));
1342
1343    bool HasFP = false;
1344
1345    // Encode that we are using EBP/RBP as the frame pointer.
1346    uint32_t CompactUnwindEncoding = 0;
1347
1348    unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349    unsigned InstrOffset = 0;
1350    unsigned StackAdjust = 0;
1351    unsigned StackSize = 0;
1352    int MinAbsOffset = std::numeric_limits<int>::max();
1353
1354    for (const MCCFIInstruction &Inst : Instrs) {
1355      switch (Inst.getOperation()) {
1356      default:
1357        // Any other CFI directives indicate a frame that we aren't prepared
1358        // to represent via compact unwind, so just bail out.
1359        return CU::UNWIND_MODE_DWARF;
1360      case MCCFIInstruction::OpDefCfaRegister: {
1361        // Defines a frame pointer. E.g.
1362        //
1363        //     movq %rsp, %rbp
1364        //  L0:
1365        //     .cfi_def_cfa_register %rbp
1366        //
1367        HasFP = true;
1368
1369        // If the frame pointer is other than esp/rsp, we do not have a way to
1370        // generate a compact unwinding representation, so bail out.
1371        if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1372            (Is64Bit ? X86::RBP : X86::EBP))
1373          return CU::UNWIND_MODE_DWARF;
1374
1375        // Reset the counts.
1376        memset(SavedRegs, 0, sizeof(SavedRegs));
1377        StackAdjust = 0;
1378        SavedRegIdx = 0;
1379        MinAbsOffset = std::numeric_limits<int>::max();
1380        InstrOffset += MoveInstrSize;
1381        break;
1382      }
1383      case MCCFIInstruction::OpDefCfaOffset: {
1384        // Defines a new offset for the CFA. E.g.
1385        //
1386        //  With frame:
1387        //
1388        //     pushq %rbp
1389        //  L0:
1390        //     .cfi_def_cfa_offset 16
1391        //
1392        //  Without frame:
1393        //
1394        //     subq $72, %rsp
1395        //  L0:
1396        //     .cfi_def_cfa_offset 80
1397        //
1398        StackSize = Inst.getOffset() / StackDivide;
1399        break;
1400      }
1401      case MCCFIInstruction::OpOffset: {
1402        // Defines a "push" of a callee-saved register. E.g.
1403        //
1404        //     pushq %r15
1405        //     pushq %r14
1406        //     pushq %rbx
1407        //  L0:
1408        //     subq $120, %rsp
1409        //  L1:
1410        //     .cfi_offset %rbx, -40
1411        //     .cfi_offset %r14, -32
1412        //     .cfi_offset %r15, -24
1413        //
1414        if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415          // If there are too many saved registers, we cannot use a compact
1416          // unwind encoding.
1417          return CU::UNWIND_MODE_DWARF;
1418
1419        unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1420        SavedRegs[SavedRegIdx++] = Reg;
1421        StackAdjust += OffsetSize;
1422        MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1423        InstrOffset += PushInstrSize(Reg);
1424        break;
1425      }
1426      }
1427    }
1428
1429    StackAdjust /= StackDivide;
1430
1431    if (HasFP) {
1432      if ((StackAdjust & 0xFF) != StackAdjust)
1433        // Offset was too big for a compact unwind encoding.
1434        return CU::UNWIND_MODE_DWARF;
1435
1436      // We don't attempt to track a real StackAdjust, so if the saved registers
1437      // aren't adjacent to rbp we can't cope.
1438      if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439        return CU::UNWIND_MODE_DWARF;
1440
1441      // Get the encoding of the saved registers when we have a frame pointer.
1442      uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443      if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444
1445      CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446      CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447      CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448    } else {
1449      SubtractInstrIdx += InstrOffset;
1450      ++StackAdjust;
1451
1452      if ((StackSize & 0xFF) == StackSize) {
1453        // Frameless stack with a small stack size.
1454        CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455
1456        // Encode the stack size.
1457        CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458      } else {
1459        if ((StackAdjust & 0x7) != StackAdjust)
1460          // The extra stack adjustments are too big for us to handle.
1461          return CU::UNWIND_MODE_DWARF;
1462
1463        // Frameless stack with an offset too large for us to encode compactly.
1464        CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465
1466        // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467        // instruction.
1468        CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469
1470        // Encode any extra stack adjustments (done via push instructions).
1471        CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472      }
1473
1474      // Encode the number of registers saved. (Reverse the list first.)
1475      std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1476      CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477
1478      // Get the encoding of the saved registers when we don't have a frame
1479      // pointer.
1480      uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1481      if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482
1483      // Encode the register encoding.
1484      CompactUnwindEncoding |=
1485        RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486    }
1487
1488    return CompactUnwindEncoding;
1489  }
1490};
1491
1492} // end anonymous namespace
1493
1494MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495                                           const MCSubtargetInfo &STI,
1496                                           const MCRegisterInfo &MRI,
1497                                           const MCTargetOptions &Options) {
1498  const Triple &TheTriple = STI.getTargetTriple();
1499  if (TheTriple.isOSBinFormatMachO())
1500    return new DarwinX86AsmBackend(T, MRI, STI);
1501
1502  if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503    return new WindowsX86AsmBackend(T, false, STI);
1504
1505  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506
1507  if (TheTriple.isOSIAMCU())
1508    return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509
1510  return new ELFX86_32AsmBackend(T, OSABI, STI);
1511}
1512
1513MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514                                           const MCSubtargetInfo &STI,
1515                                           const MCRegisterInfo &MRI,
1516                                           const MCTargetOptions &Options) {
1517  const Triple &TheTriple = STI.getTargetTriple();
1518  if (TheTriple.isOSBinFormatMachO())
1519    return new DarwinX86AsmBackend(T, MRI, STI);
1520
1521  if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522    return new WindowsX86AsmBackend(T, true, STI);
1523
1524  if (TheTriple.isUEFI()) {
1525    assert(TheTriple.isOSBinFormatCOFF() &&
1526         "Only COFF format is supported in UEFI environment.");
1527    return new WindowsX86AsmBackend(T, true, STI);
1528  }
1529
1530  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531
1532  if (TheTriple.isX32())
1533    return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534  return new ELFX86_64AsmBackend(T, OSABI, STI);
1535}
1536