1//===- X86_64.cpp ---------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "OutputSections.h"
10#include "Symbols.h"
11#include "SyntheticSections.h"
12#include "Target.h"
13#include "lld/Common/ErrorHandler.h"
14#include "llvm/BinaryFormat/ELF.h"
15#include "llvm/Support/Endian.h"
16
17using namespace llvm;
18using namespace llvm::object;
19using namespace llvm::support::endian;
20using namespace llvm::ELF;
21using namespace lld;
22using namespace lld::elf;
23
24namespace {
25class X86_64 : public TargetInfo {
26public:
27  X86_64();
28  int getTlsGdRelaxSkip(RelType type) const override;
29  RelExpr getRelExpr(RelType type, const Symbol &s,
30                     const uint8_t *loc) const override;
31  RelType getDynRel(RelType type) const override;
32  void writeGotPltHeader(uint8_t *buf) const override;
33  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
34  void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
35  void writePltHeader(uint8_t *buf) const override;
36  void writePlt(uint8_t *buf, const Symbol &sym,
37                uint64_t pltEntryAddr) const override;
38  void relocate(uint8_t *loc, const Relocation &rel,
39                uint64_t val) const override;
40  int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
41  void applyJumpInstrMod(uint8_t *loc, JumpModType type,
42                         unsigned size) const override;
43  RelExpr adjustGotPcExpr(RelType type, int64_t addend,
44                          const uint8_t *loc) const override;
45  void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
46  bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
47                                        uint8_t stOther) const override;
48  bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
49                             InputSection *nextIS) const override;
50};
51} // namespace
52
53// This is vector of NOP instructions of sizes from 1 to 8 bytes.  The
54// appropriately sized instructions are used to fill the gaps between sections
55// which are executed during fall through.
56static const std::vector<std::vector<uint8_t>> nopInstructions = {
57    {0x90},
58    {0x66, 0x90},
59    {0x0f, 0x1f, 0x00},
60    {0x0f, 0x1f, 0x40, 0x00},
61    {0x0f, 0x1f, 0x44, 0x00, 0x00},
62    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
63    {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
64    {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
65    {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};
66
67X86_64::X86_64() {
68  copyRel = R_X86_64_COPY;
69  gotRel = R_X86_64_GLOB_DAT;
70  pltRel = R_X86_64_JUMP_SLOT;
71  relativeRel = R_X86_64_RELATIVE;
72  iRelativeRel = R_X86_64_IRELATIVE;
73  symbolicRel = R_X86_64_64;
74  tlsDescRel = R_X86_64_TLSDESC;
75  tlsGotRel = R_X86_64_TPOFF64;
76  tlsModuleIndexRel = R_X86_64_DTPMOD64;
77  tlsOffsetRel = R_X86_64_DTPOFF64;
78  gotBaseSymInGotPlt = true;
79  gotEntrySize = 8;
80  pltHeaderSize = 16;
81  pltEntrySize = 16;
82  ipltEntrySize = 16;
83  trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
84  nopInstrs = nopInstructions;
85
86  // Align to the large page size (known as a superpage or huge page).
87  // FreeBSD automatically promotes large, superpage-aligned allocations.
88  defaultImageBase = 0x200000;
89}
90
91int X86_64::getTlsGdRelaxSkip(RelType type) const {
92  // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
93  return type == R_X86_64_GOTPC32_TLSDESC || type == R_X86_64_TLSDESC_CALL ? 1
94                                                                           : 2;
95}
96
97// Opcodes for the different X86_64 jmp instructions.
98enum JmpInsnOpcode : uint32_t {
99  J_JMP_32,
100  J_JNE_32,
101  J_JE_32,
102  J_JG_32,
103  J_JGE_32,
104  J_JB_32,
105  J_JBE_32,
106  J_JL_32,
107  J_JLE_32,
108  J_JA_32,
109  J_JAE_32,
110  J_UNKNOWN,
111};
112
113// Given the first (optional) and second byte of the insn's opcode, this
114// returns the corresponding enum value.
115static JmpInsnOpcode getJmpInsnType(const uint8_t *first,
116                                    const uint8_t *second) {
117  if (*second == 0xe9)
118    return J_JMP_32;
119
120  if (first == nullptr)
121    return J_UNKNOWN;
122
123  if (*first == 0x0f) {
124    switch (*second) {
125    case 0x84:
126      return J_JE_32;
127    case 0x85:
128      return J_JNE_32;
129    case 0x8f:
130      return J_JG_32;
131    case 0x8d:
132      return J_JGE_32;
133    case 0x82:
134      return J_JB_32;
135    case 0x86:
136      return J_JBE_32;
137    case 0x8c:
138      return J_JL_32;
139    case 0x8e:
140      return J_JLE_32;
141    case 0x87:
142      return J_JA_32;
143    case 0x83:
144      return J_JAE_32;
145    }
146  }
147  return J_UNKNOWN;
148}
149
150// Return the relocation index for input section IS with a specific Offset.
151// Returns the maximum size of the vector if no such relocation is found.
152static unsigned getRelocationWithOffset(const InputSection &is,
153                                        uint64_t offset) {
154  unsigned size = is.relocs().size();
155  for (unsigned i = size - 1; i + 1 > 0; --i) {
156    if (is.relocs()[i].offset == offset && is.relocs()[i].expr != R_NONE)
157      return i;
158  }
159  return size;
160}
161
162// Returns true if R corresponds to a relocation used for a jump instruction.
163// TODO: Once special relocations for relaxable jump instructions are available,
164// this should be modified to use those relocations.
165static bool isRelocationForJmpInsn(Relocation &R) {
166  return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||
167         R.type == R_X86_64_PC8;
168}
169
170// Return true if Relocation R points to the first instruction in the
171// next section.
172// TODO: Delete this once psABI reserves a new relocation type for fall thru
173// jumps.
174static bool isFallThruRelocation(InputSection &is, InputFile *file,
175                                 InputSection *nextIS, Relocation &r) {
176  if (!isRelocationForJmpInsn(r))
177    return false;
178
179  uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;
180  uint64_t targetOffset = InputSectionBase::getRelocTargetVA(
181      file, r.type, r.addend, addrLoc, *r.sym, r.expr);
182
183  // If this jmp is a fall thru, the target offset is the beginning of the
184  // next section.
185  uint64_t nextSectionOffset =
186      nextIS->getOutputSection()->addr + nextIS->outSecOff;
187  return (addrLoc + 4 + targetOffset) == nextSectionOffset;
188}
189
190// Return the jmp instruction opcode that is the inverse of the given
191// opcode.  For example, JE inverted is JNE.
192static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {
193  switch (opcode) {
194  case J_JE_32:
195    return J_JNE_32;
196  case J_JNE_32:
197    return J_JE_32;
198  case J_JG_32:
199    return J_JLE_32;
200  case J_JGE_32:
201    return J_JL_32;
202  case J_JB_32:
203    return J_JAE_32;
204  case J_JBE_32:
205    return J_JA_32;
206  case J_JL_32:
207    return J_JGE_32;
208  case J_JLE_32:
209    return J_JG_32;
210  case J_JA_32:
211    return J_JBE_32;
212  case J_JAE_32:
213    return J_JB_32;
214  default:
215    return J_UNKNOWN;
216  }
217}
218
219// Deletes direct jump instruction in input sections that jumps to the
220// following section as it is not required.  If there are two consecutive jump
221// instructions, it checks if they can be flipped and one can be deleted.
222// For example:
223// .section .text
224// a.BB.foo:
225//    ...
226//    10: jne aa.BB.foo
227//    16: jmp bar
228// aa.BB.foo:
229//    ...
230//
231// can be converted to:
232// a.BB.foo:
233//   ...
234//   10: je bar  #jne flipped to je and the jmp is deleted.
235// aa.BB.foo:
236//   ...
237bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
238                                   InputSection *nextIS) const {
239  const unsigned sizeOfDirectJmpInsn = 5;
240
241  if (nextIS == nullptr)
242    return false;
243
244  if (is.getSize() < sizeOfDirectJmpInsn)
245    return false;
246
247  // If this jmp insn can be removed, it is the last insn and the
248  // relocation is 4 bytes before the end.
249  unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);
250  if (rIndex == is.relocs().size())
251    return false;
252
253  Relocation &r = is.relocs()[rIndex];
254
255  // Check if the relocation corresponds to a direct jmp.
256  const uint8_t *secContents = is.content().data();
257  // If it is not a direct jmp instruction, there is nothing to do here.
258  if (*(secContents + r.offset - 1) != 0xe9)
259    return false;
260
261  if (isFallThruRelocation(is, file, nextIS, r)) {
262    // This is a fall thru and can be deleted.
263    r.expr = R_NONE;
264    r.offset = 0;
265    is.drop_back(sizeOfDirectJmpInsn);
266    is.nopFiller = true;
267    return true;
268  }
269
270  // Now, check if flip and delete is possible.
271  const unsigned sizeOfJmpCCInsn = 6;
272  // To flip, there must be at least one JmpCC and one direct jmp.
273  if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)
274    return false;
275
276  unsigned rbIndex =
277      getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));
278  if (rbIndex == is.relocs().size())
279    return false;
280
281  Relocation &rB = is.relocs()[rbIndex];
282
283  const uint8_t *jmpInsnB = secContents + rB.offset - 1;
284  JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);
285  if (jmpOpcodeB == J_UNKNOWN)
286    return false;
287
288  if (!isFallThruRelocation(is, file, nextIS, rB))
289    return false;
290
291  // jmpCC jumps to the fall thru block, the branch can be flipped and the
292  // jmp can be deleted.
293  JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);
294  if (jInvert == J_UNKNOWN)
295    return false;
296  is.jumpInstrMod = make<JumpInstrMod>();
297  *is.jumpInstrMod = {rB.offset - 1, jInvert, 4};
298  // Move R's values to rB except the offset.
299  rB = {r.expr, r.type, rB.offset, r.addend, r.sym};
300  // Cancel R
301  r.expr = R_NONE;
302  r.offset = 0;
303  is.drop_back(sizeOfDirectJmpInsn);
304  is.nopFiller = true;
305  return true;
306}
307
308RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
309                           const uint8_t *loc) const {
310  switch (type) {
311  case R_X86_64_8:
312  case R_X86_64_16:
313  case R_X86_64_32:
314  case R_X86_64_32S:
315  case R_X86_64_64:
316    return R_ABS;
317  case R_X86_64_DTPOFF32:
318  case R_X86_64_DTPOFF64:
319    return R_DTPREL;
320  case R_X86_64_TPOFF32:
321    return R_TPREL;
322  case R_X86_64_TLSDESC_CALL:
323    return R_TLSDESC_CALL;
324  case R_X86_64_TLSLD:
325    return R_TLSLD_PC;
326  case R_X86_64_TLSGD:
327    return R_TLSGD_PC;
328  case R_X86_64_SIZE32:
329  case R_X86_64_SIZE64:
330    return R_SIZE;
331  case R_X86_64_PLT32:
332    return R_PLT_PC;
333  case R_X86_64_PC8:
334  case R_X86_64_PC16:
335  case R_X86_64_PC32:
336  case R_X86_64_PC64:
337    return R_PC;
338  case R_X86_64_GOT32:
339  case R_X86_64_GOT64:
340    return R_GOTPLT;
341  case R_X86_64_GOTPC32_TLSDESC:
342    return R_TLSDESC_PC;
343  case R_X86_64_GOTPCREL:
344  case R_X86_64_GOTPCRELX:
345  case R_X86_64_REX_GOTPCRELX:
346  case R_X86_64_GOTTPOFF:
347    return R_GOT_PC;
348  case R_X86_64_GOTOFF64:
349    return R_GOTPLTREL;
350  case R_X86_64_PLTOFF64:
351    return R_PLT_GOTPLT;
352  case R_X86_64_GOTPC32:
353  case R_X86_64_GOTPC64:
354    return R_GOTPLTONLY_PC;
355  case R_X86_64_NONE:
356    return R_NONE;
357  default:
358    error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
359          ") against symbol " + toString(s));
360    return R_NONE;
361  }
362}
363
364void X86_64::writeGotPltHeader(uint8_t *buf) const {
365  // The first entry holds the value of _DYNAMIC. It is not clear why that is
366  // required, but it is documented in the psabi and the glibc dynamic linker
367  // seems to use it (note that this is relevant for linking ld.so, not any
368  // other program).
369  write64le(buf, mainPart->dynamic->getVA());
370}
371
372void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const {
373  // See comments in X86::writeGotPlt.
374  write64le(buf, s.getPltVA() + 6);
375}
376
377void X86_64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
378  // An x86 entry is the address of the ifunc resolver function (for -z rel).
379  if (config->writeAddends)
380    write64le(buf, s.getVA());
381}
382
383void X86_64::writePltHeader(uint8_t *buf) const {
384  const uint8_t pltData[] = {
385      0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)
386      0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
387      0x0f, 0x1f, 0x40, 0x00, // nop
388  };
389  memcpy(buf, pltData, sizeof(pltData));
390  uint64_t gotPlt = in.gotPlt->getVA();
391  uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA();
392  write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8
393  write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16
394}
395
396void X86_64::writePlt(uint8_t *buf, const Symbol &sym,
397                      uint64_t pltEntryAddr) const {
398  const uint8_t inst[] = {
399      0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
400      0x68, 0, 0, 0, 0,       // pushq <relocation index>
401      0xe9, 0, 0, 0, 0,       // jmpq plt[0]
402  };
403  memcpy(buf, inst, sizeof(inst));
404
405  write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6);
406  write32le(buf + 7, sym.getPltIdx());
407  write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
408}
409
410RelType X86_64::getDynRel(RelType type) const {
411  if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 ||
412      type == R_X86_64_SIZE64)
413    return type;
414  return R_X86_64_NONE;
415}
416
417static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
418  if (rel.type == R_X86_64_TLSGD) {
419    // Convert
420    //   .byte 0x66
421    //   leaq x@tlsgd(%rip), %rdi
422    //   .word 0x6666
423    //   rex64
424    //   call __tls_get_addr@plt
425    // to the following two instructions.
426    const uint8_t inst[] = {
427        0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
428        0x00, 0x00,                            // mov %fs:0x0,%rax
429        0x48, 0x8d, 0x80, 0,    0,    0,    0, // lea x@tpoff,%rax
430    };
431    memcpy(loc - 4, inst, sizeof(inst));
432
433    // The original code used a pc relative relocation and so we have to
434    // compensate for the -4 in had in the addend.
435    write32le(loc + 8, val + 4);
436  } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
437    // Convert leaq x@tlsdesc(%rip), %REG to movq $x@tpoff, %REG.
438    if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
439        (loc[-1] & 0xc7) != 0x05) {
440      errorOrWarn(getErrorLocation(loc - 3) +
441                  "R_X86_64_GOTPC32_TLSDESC must be used "
442                  "in leaq x@tlsdesc(%rip), %REG");
443      return;
444    }
445    loc[-3] = 0x48 | ((loc[-3] >> 2) & 1);
446    loc[-2] = 0xc7;
447    loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7);
448    write32le(loc, val + 4);
449  } else {
450    // Convert call *x@tlsdesc(%REG) to xchg ax, ax.
451    assert(rel.type == R_X86_64_TLSDESC_CALL);
452    loc[0] = 0x66;
453    loc[1] = 0x90;
454  }
455}
456
457static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {
458  if (rel.type == R_X86_64_TLSGD) {
459    // Convert
460    //   .byte 0x66
461    //   leaq x@tlsgd(%rip), %rdi
462    //   .word 0x6666
463    //   rex64
464    //   call __tls_get_addr@plt
465    // to the following two instructions.
466    const uint8_t inst[] = {
467        0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,
468        0x00, 0x00,                            // mov %fs:0x0,%rax
469        0x48, 0x03, 0x05, 0,    0,    0,    0, // addq x@gottpoff(%rip),%rax
470    };
471    memcpy(loc - 4, inst, sizeof(inst));
472
473    // Both code sequences are PC relatives, but since we are moving the
474    // constant forward by 8 bytes we have to subtract the value by 8.
475    write32le(loc + 8, val - 8);
476  } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
477    // Convert leaq x@tlsdesc(%rip), %REG to movq x@gottpoff(%rip), %REG.
478    assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
479    if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
480        (loc[-1] & 0xc7) != 0x05) {
481      errorOrWarn(getErrorLocation(loc - 3) +
482                  "R_X86_64_GOTPC32_TLSDESC must be used "
483                  "in leaq x@tlsdesc(%rip), %REG");
484      return;
485    }
486    loc[-2] = 0x8b;
487    write32le(loc, val);
488  } else {
489    // Convert call *x@tlsdesc(%rax) to xchg ax, ax.
490    assert(rel.type == R_X86_64_TLSDESC_CALL);
491    loc[0] = 0x66;
492    loc[1] = 0x90;
493  }
494}
495
496// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
497// R_X86_64_TPOFF32 so that it does not use GOT.
498static void relaxTlsIeToLe(uint8_t *loc, const Relocation &, uint64_t val) {
499  uint8_t *inst = loc - 3;
500  uint8_t reg = loc[-1] >> 3;
501  uint8_t *regSlot = loc - 1;
502
503  // Note that ADD with RSP or R12 is converted to ADD instead of LEA
504  // because LEA with these registers needs 4 bytes to encode and thus
505  // wouldn't fit the space.
506
507  if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
508    // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
509    memcpy(inst, "\x48\x81\xc4", 3);
510  } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
511    // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
512    memcpy(inst, "\x49\x81\xc4", 3);
513  } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
514    // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
515    memcpy(inst, "\x4d\x8d", 2);
516    *regSlot = 0x80 | (reg << 3) | reg;
517  } else if (memcmp(inst, "\x48\x03", 2) == 0) {
518    // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
519    memcpy(inst, "\x48\x8d", 2);
520    *regSlot = 0x80 | (reg << 3) | reg;
521  } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
522    // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
523    memcpy(inst, "\x49\xc7", 2);
524    *regSlot = 0xc0 | reg;
525  } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
526    // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
527    memcpy(inst, "\x48\xc7", 2);
528    *regSlot = 0xc0 | reg;
529  } else {
530    error(getErrorLocation(loc - 3) +
531          "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
532  }
533
534  // The original code used a PC relative relocation.
535  // Need to compensate for the -4 it had in the addend.
536  write32le(loc, val + 4);
537}
538
539static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
540  const uint8_t inst[] = {
541      0x66, 0x66,                                           // .word 0x6666
542      0x66,                                                 // .byte 0x66
543      0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
544  };
545
546  if (loc[4] == 0xe8) {
547    // Convert
548    //   leaq bar@tlsld(%rip), %rdi           # 48 8d 3d <Loc>
549    //   callq __tls_get_addr@PLT             # e8 <disp32>
550    //   leaq bar@dtpoff(%rax), %rcx
551    // to
552    //   .word 0x6666
553    //   .byte 0x66
554    //   mov %fs:0,%rax
555    //   leaq bar@tpoff(%rax), %rcx
556    memcpy(loc - 3, inst, sizeof(inst));
557    return;
558  }
559
560  if (loc[4] == 0xff && loc[5] == 0x15) {
561    // Convert
562    //   leaq  x@tlsld(%rip),%rdi               # 48 8d 3d <Loc>
563    //   call *__tls_get_addr@GOTPCREL(%rip)    # ff 15 <disp32>
564    // to
565    //   .long  0x66666666
566    //   movq   %fs:0,%rax
567    // See "Table 11.9: LD -> LE Code Transition (LP64)" in
568    // https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf
569    loc[-3] = 0x66;
570    memcpy(loc - 2, inst, sizeof(inst));
571    return;
572  }
573
574  error(getErrorLocation(loc - 3) +
575        "expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");
576}
577
578// A JumpInstrMod at a specific offset indicates that the jump instruction
579// opcode at that offset must be modified.  This is specifically used to relax
580// jump instructions with basic block sections.  This function looks at the
581// JumpMod and effects the change.
582void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,
583                               unsigned size) const {
584  switch (type) {
585  case J_JMP_32:
586    if (size == 4)
587      *loc = 0xe9;
588    else
589      *loc = 0xeb;
590    break;
591  case J_JE_32:
592    if (size == 4) {
593      loc[-1] = 0x0f;
594      *loc = 0x84;
595    } else
596      *loc = 0x74;
597    break;
598  case J_JNE_32:
599    if (size == 4) {
600      loc[-1] = 0x0f;
601      *loc = 0x85;
602    } else
603      *loc = 0x75;
604    break;
605  case J_JG_32:
606    if (size == 4) {
607      loc[-1] = 0x0f;
608      *loc = 0x8f;
609    } else
610      *loc = 0x7f;
611    break;
612  case J_JGE_32:
613    if (size == 4) {
614      loc[-1] = 0x0f;
615      *loc = 0x8d;
616    } else
617      *loc = 0x7d;
618    break;
619  case J_JB_32:
620    if (size == 4) {
621      loc[-1] = 0x0f;
622      *loc = 0x82;
623    } else
624      *loc = 0x72;
625    break;
626  case J_JBE_32:
627    if (size == 4) {
628      loc[-1] = 0x0f;
629      *loc = 0x86;
630    } else
631      *loc = 0x76;
632    break;
633  case J_JL_32:
634    if (size == 4) {
635      loc[-1] = 0x0f;
636      *loc = 0x8c;
637    } else
638      *loc = 0x7c;
639    break;
640  case J_JLE_32:
641    if (size == 4) {
642      loc[-1] = 0x0f;
643      *loc = 0x8e;
644    } else
645      *loc = 0x7e;
646    break;
647  case J_JA_32:
648    if (size == 4) {
649      loc[-1] = 0x0f;
650      *loc = 0x87;
651    } else
652      *loc = 0x77;
653    break;
654  case J_JAE_32:
655    if (size == 4) {
656      loc[-1] = 0x0f;
657      *loc = 0x83;
658    } else
659      *loc = 0x73;
660    break;
661  case J_UNKNOWN:
662    llvm_unreachable("Unknown Jump Relocation");
663  }
664}
665
666int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
667  switch (type) {
668  case R_X86_64_8:
669  case R_X86_64_PC8:
670    return SignExtend64<8>(*buf);
671  case R_X86_64_16:
672  case R_X86_64_PC16:
673    return SignExtend64<16>(read16le(buf));
674  case R_X86_64_32:
675  case R_X86_64_32S:
676  case R_X86_64_TPOFF32:
677  case R_X86_64_GOT32:
678  case R_X86_64_GOTPC32:
679  case R_X86_64_GOTPC32_TLSDESC:
680  case R_X86_64_GOTPCREL:
681  case R_X86_64_GOTPCRELX:
682  case R_X86_64_REX_GOTPCRELX:
683  case R_X86_64_PC32:
684  case R_X86_64_GOTTPOFF:
685  case R_X86_64_PLT32:
686  case R_X86_64_TLSGD:
687  case R_X86_64_TLSLD:
688  case R_X86_64_DTPOFF32:
689  case R_X86_64_SIZE32:
690    return SignExtend64<32>(read32le(buf));
691  case R_X86_64_64:
692  case R_X86_64_TPOFF64:
693  case R_X86_64_DTPOFF64:
694  case R_X86_64_DTPMOD64:
695  case R_X86_64_PC64:
696  case R_X86_64_SIZE64:
697  case R_X86_64_GLOB_DAT:
698  case R_X86_64_GOT64:
699  case R_X86_64_GOTOFF64:
700  case R_X86_64_GOTPC64:
701  case R_X86_64_PLTOFF64:
702  case R_X86_64_IRELATIVE:
703  case R_X86_64_RELATIVE:
704    return read64le(buf);
705  case R_X86_64_TLSDESC:
706    return read64le(buf + 8);
707  case R_X86_64_JUMP_SLOT:
708  case R_X86_64_NONE:
709    // These relocations are defined as not having an implicit addend.
710    return 0;
711  default:
712    internalLinkerError(getErrorLocation(buf),
713                        "cannot read addend for relocation " + toString(type));
714    return 0;
715  }
716}
717
718static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val);
719
720void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
721  switch (rel.type) {
722  case R_X86_64_8:
723    checkIntUInt(loc, val, 8, rel);
724    *loc = val;
725    break;
726  case R_X86_64_PC8:
727    checkInt(loc, val, 8, rel);
728    *loc = val;
729    break;
730  case R_X86_64_16:
731    checkIntUInt(loc, val, 16, rel);
732    write16le(loc, val);
733    break;
734  case R_X86_64_PC16:
735    checkInt(loc, val, 16, rel);
736    write16le(loc, val);
737    break;
738  case R_X86_64_32:
739    checkUInt(loc, val, 32, rel);
740    write32le(loc, val);
741    break;
742  case R_X86_64_32S:
743  case R_X86_64_GOT32:
744  case R_X86_64_GOTPC32:
745  case R_X86_64_GOTPCREL:
746  case R_X86_64_PC32:
747  case R_X86_64_PLT32:
748  case R_X86_64_DTPOFF32:
749  case R_X86_64_SIZE32:
750    checkInt(loc, val, 32, rel);
751    write32le(loc, val);
752    break;
753  case R_X86_64_64:
754  case R_X86_64_DTPOFF64:
755  case R_X86_64_PC64:
756  case R_X86_64_SIZE64:
757  case R_X86_64_GOT64:
758  case R_X86_64_GOTOFF64:
759  case R_X86_64_GOTPC64:
760  case R_X86_64_PLTOFF64:
761    write64le(loc, val);
762    break;
763  case R_X86_64_GOTPCRELX:
764  case R_X86_64_REX_GOTPCRELX:
765    if (rel.expr != R_GOT_PC) {
766      relaxGot(loc, rel, val);
767    } else {
768      checkInt(loc, val, 32, rel);
769      write32le(loc, val);
770    }
771    break;
772  case R_X86_64_GOTPC32_TLSDESC:
773  case R_X86_64_TLSDESC_CALL:
774  case R_X86_64_TLSGD:
775    if (rel.expr == R_RELAX_TLS_GD_TO_LE) {
776      relaxTlsGdToLe(loc, rel, val);
777    } else if (rel.expr == R_RELAX_TLS_GD_TO_IE) {
778      relaxTlsGdToIe(loc, rel, val);
779    } else {
780      checkInt(loc, val, 32, rel);
781      write32le(loc, val);
782    }
783    break;
784  case R_X86_64_TLSLD:
785    if (rel.expr == R_RELAX_TLS_LD_TO_LE) {
786      relaxTlsLdToLe(loc, rel, val);
787    } else {
788      checkInt(loc, val, 32, rel);
789      write32le(loc, val);
790    }
791    break;
792  case R_X86_64_GOTTPOFF:
793    if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
794      relaxTlsIeToLe(loc, rel, val);
795    } else {
796      checkInt(loc, val, 32, rel);
797      write32le(loc, val);
798    }
799    break;
800  case R_X86_64_TPOFF32:
801    checkInt(loc, val, 32, rel);
802    write32le(loc, val);
803    break;
804
805  case R_X86_64_TLSDESC:
806    // The addend is stored in the second 64-bit word.
807    write64le(loc + 8, val);
808    break;
809  default:
810    llvm_unreachable("unknown relocation");
811  }
812}
813
814RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend,
815                                const uint8_t *loc) const {
816  // Only R_X86_64_[REX_]GOTPCRELX can be relaxed. GNU as may emit GOTPCRELX
817  // with addend != -4. Such an instruction does not load the full GOT entry, so
818  // we cannot relax the relocation. E.g. movl x@GOTPCREL+4(%rip), %rax
819  // (addend=0) loads the high 32 bits of the GOT entry.
820  if (!config->relax || addend != -4 ||
821      (type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX))
822    return R_GOT_PC;
823  const uint8_t op = loc[-2];
824  const uint8_t modRm = loc[-1];
825
826  // FIXME: When PIC is disabled and foo is defined locally in the
827  // lower 32 bit address space, memory operand in mov can be converted into
828  // immediate operand. Otherwise, mov must be changed to lea. We support only
829  // latter relaxation at this moment.
830  if (op == 0x8b)
831    return R_RELAX_GOT_PC;
832
833  // Relax call and jmp.
834  if (op == 0xff && (modRm == 0x15 || modRm == 0x25))
835    return R_RELAX_GOT_PC;
836
837  // We don't support test/binop instructions without a REX prefix.
838  if (type == R_X86_64_GOTPCRELX)
839    return R_GOT_PC;
840
841  // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
842  // If PIC then no relaxation is available.
843  return config->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC;
844}
845
846// A subset of relaxations can only be applied for no-PIC. This method
847// handles such relaxations. Instructions encoding information was taken from:
848// "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
849// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
850//    64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
851static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,
852                          uint8_t modRm) {
853  const uint8_t rex = loc[-3];
854  // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
855  if (op == 0x85) {
856    // See "TEST-Logical Compare" (4-428 Vol. 2B),
857    // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
858
859    // ModR/M byte has form XX YYY ZZZ, where
860    // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
861    // XX has different meanings:
862    // 00: The operand's memory address is in reg1.
863    // 01: The operand's memory address is reg1 + a byte-sized displacement.
864    // 10: The operand's memory address is reg1 + a word-sized displacement.
865    // 11: The operand is reg1 itself.
866    // If an instruction requires only one operand, the unused reg2 field
867    // holds extra opcode bits rather than a register code
868    // 0xC0 == 11 000 000 binary.
869    // 0x38 == 00 111 000 binary.
870    // We transfer reg2 to reg1 here as operand.
871    // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
872    loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte.
873
874    // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
875    // See "TEST-Logical Compare" (4-428 Vol. 2B).
876    loc[-2] = 0xf7;
877
878    // Move R bit to the B bit in REX byte.
879    // REX byte is encoded as 0100WRXB, where
880    // 0100 is 4bit fixed pattern.
881    // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
882    //   default operand size is used (which is 32-bit for most but not all
883    //   instructions).
884    // REX.R This 1-bit value is an extension to the MODRM.reg field.
885    // REX.X This 1-bit value is an extension to the SIB.index field.
886    // REX.B This 1-bit value is an extension to the MODRM.rm field or the
887    // SIB.base field.
888    // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
889    loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
890    write32le(loc, val);
891    return;
892  }
893
894  // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
895  // or xor operations.
896
897  // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
898  // Logic is close to one for test instruction above, but we also
899  // write opcode extension here, see below for details.
900  loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte.
901
902  // Primary opcode is 0x81, opcode extension is one of:
903  // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
904  // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
905  // This value was wrote to MODRM.reg in a line above.
906  // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
907  // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
908  // descriptions about each operation.
909  loc[-2] = 0x81;
910  loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;
911  write32le(loc, val);
912}
913
914static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) {
915  checkInt(loc, val, 32, rel);
916  const uint8_t op = loc[-2];
917  const uint8_t modRm = loc[-1];
918
919  // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
920  if (op == 0x8b) {
921    loc[-2] = 0x8d;
922    write32le(loc, val);
923    return;
924  }
925
926  if (op != 0xff) {
927    // We are relaxing a rip relative to an absolute, so compensate
928    // for the old -4 addend.
929    assert(!config->isPic);
930    relaxGotNoPic(loc, val + 4, op, modRm);
931    return;
932  }
933
934  // Convert call/jmp instructions.
935  if (modRm == 0x15) {
936    // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
937    // Instead we convert to "addr32 call foo" where addr32 is an instruction
938    // prefix. That makes result expression to be a single instruction.
939    loc[-2] = 0x67; // addr32 prefix
940    loc[-1] = 0xe8; // call
941    write32le(loc, val);
942    return;
943  }
944
945  // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
946  // jmp doesn't return, so it is fine to use nop here, it is just a stub.
947  assert(modRm == 0x25);
948  loc[-2] = 0xe9; // jmp
949  loc[3] = 0x90;  // nop
950  write32le(loc - 1, val + 1);
951}
952
953// A split-stack prologue starts by checking the amount of stack remaining
954// in one of two ways:
955// A) Comparing of the stack pointer to a field in the tcb.
956// B) Or a load of a stack pointer offset with an lea to r10 or r11.
957bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,
958                                              uint8_t stOther) const {
959  if (!config->is64) {
960    error("target doesn't support split stacks");
961    return false;
962  }
963
964  if (loc + 8 >= end)
965    return false;
966
967  // Replace "cmp %fs:0x70,%rsp" and subsequent branch
968  // with "stc, nopl 0x0(%rax,%rax,1)"
969  if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) {
970    memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);
971    return true;
972  }
973
974  // Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could
975  // be r10 or r11. The lea instruction feeds a subsequent compare which checks
976  // if there is X available stack space. Making X larger effectively reserves
977  // that much additional space. The stack grows downward so subtract the value.
978  if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 ||
979      memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) {
980    // The offset bytes are encoded four bytes after the start of the
981    // instruction.
982    write32le(loc + 4, read32le(loc + 4) - 0x4000);
983    return true;
984  }
985  return false;
986}
987
988void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
989  uint64_t secAddr = sec.getOutputSection()->addr;
990  if (auto *s = dyn_cast<InputSection>(&sec))
991    secAddr += s->outSecOff;
992  for (const Relocation &rel : sec.relocs()) {
993    if (rel.expr == R_NONE) // See deleteFallThruJmpInsn
994      continue;
995    uint8_t *loc = buf + rel.offset;
996    const uint64_t val =
997        sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
998                             secAddr + rel.offset, *rel.sym, rel.expr);
999    relocate(loc, rel, val);
1000  }
1001  if (sec.jumpInstrMod) {
1002    applyJumpInstrMod(buf + sec.jumpInstrMod->offset,
1003                      sec.jumpInstrMod->original, sec.jumpInstrMod->size);
1004  }
1005}
1006
1007// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
1008// entries containing endbr64 instructions. A PLT entry will be split into two
1009// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
1010namespace {
1011class IntelIBT : public X86_64 {
1012public:
1013  IntelIBT();
1014  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
1015  void writePlt(uint8_t *buf, const Symbol &sym,
1016                uint64_t pltEntryAddr) const override;
1017  void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
1018
1019  static const unsigned IBTPltHeaderSize = 16;
1020};
1021} // namespace
1022
1023IntelIBT::IntelIBT() { pltHeaderSize = 0; }
1024
1025void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
1026  uint64_t va =
1027      in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;
1028  write64le(buf, va);
1029}
1030
1031void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
1032                        uint64_t pltEntryAddr) const {
1033  const uint8_t Inst[] = {
1034      0xf3, 0x0f, 0x1e, 0xfa,       // endbr64
1035      0xff, 0x25, 0,    0,    0, 0, // jmpq *got(%rip)
1036      0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
1037  };
1038  memcpy(buf, Inst, sizeof(Inst));
1039  write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10);
1040}
1041
1042void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
1043  writePltHeader(buf);
1044  buf += IBTPltHeaderSize;
1045
1046  const uint8_t inst[] = {
1047      0xf3, 0x0f, 0x1e, 0xfa,    // endbr64
1048      0x68, 0,    0,    0,    0, // pushq <relocation index>
1049      0xe9, 0,    0,    0,    0, // jmpq plt[0]
1050      0x66, 0x90,                // nop
1051  };
1052
1053  for (size_t i = 0; i < numEntries; ++i) {
1054    memcpy(buf, inst, sizeof(inst));
1055    write32le(buf + 5, i);
1056    write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);
1057    buf += sizeof(inst);
1058  }
1059}
1060
1061// These nonstandard PLT entries are to migtigate Spectre v2 security
1062// vulnerability. In order to mitigate Spectre v2, we want to avoid indirect
1063// branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT
1064// entries, we use a CALL followed by MOV and RET to do the same thing as an
1065// indirect jump. That instruction sequence is so-called "retpoline".
1066//
1067// We have two types of retpoline PLTs as a size optimization. If `-z now`
1068// is specified, all dynamic symbols are resolved at load-time. Thus, when
1069// that option is given, we can omit code for symbol lazy resolution.
1070namespace {
1071class Retpoline : public X86_64 {
1072public:
1073  Retpoline();
1074  void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
1075  void writePltHeader(uint8_t *buf) const override;
1076  void writePlt(uint8_t *buf, const Symbol &sym,
1077                uint64_t pltEntryAddr) const override;
1078};
1079
1080class RetpolineZNow : public X86_64 {
1081public:
1082  RetpolineZNow();
1083  void writeGotPlt(uint8_t *buf, const Symbol &s) const override {}
1084  void writePltHeader(uint8_t *buf) const override;
1085  void writePlt(uint8_t *buf, const Symbol &sym,
1086                uint64_t pltEntryAddr) const override;
1087};
1088} // namespace
1089
1090Retpoline::Retpoline() {
1091  pltHeaderSize = 48;
1092  pltEntrySize = 32;
1093  ipltEntrySize = 32;
1094}
1095
1096void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const {
1097  write64le(buf, s.getPltVA() + 21);
1098}
1099
1100void Retpoline::writePltHeader(uint8_t *buf) const {
1101  const uint8_t insn[] = {
1102      0xff, 0x35, 0,    0,    0,    0,          // 0:    pushq GOTPLT+8(%rip)
1103      0x4c, 0x8b, 0x1d, 0,    0,    0,    0,    // 6:    mov GOTPLT+16(%rip), %r11
1104      0xe8, 0x0e, 0x00, 0x00, 0x00,             // d:    callq next
1105      0xf3, 0x90,                               // 12: loop: pause
1106      0x0f, 0xae, 0xe8,                         // 14:   lfence
1107      0xeb, 0xf9,                               // 17:   jmp loop
1108      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19:   int3; .align 16
1109      0x4c, 0x89, 0x1c, 0x24,                   // 20: next: mov %r11, (%rsp)
1110      0xc3,                                     // 24:   ret
1111      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25:   int3; padding
1112      0xcc, 0xcc, 0xcc, 0xcc,                   // 2c:   int3; padding
1113  };
1114  memcpy(buf, insn, sizeof(insn));
1115
1116  uint64_t gotPlt = in.gotPlt->getVA();
1117  uint64_t plt = in.plt->getVA();
1118  write32le(buf + 2, gotPlt - plt - 6 + 8);
1119  write32le(buf + 9, gotPlt - plt - 13 + 16);
1120}
1121
1122void Retpoline::writePlt(uint8_t *buf, const Symbol &sym,
1123                         uint64_t pltEntryAddr) const {
1124  const uint8_t insn[] = {
1125      0xf3, 0x0f, 0x1e, 0xfa,       // 0:  endbr64
1126      0x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 4:  mov foo@GOTPLT(%rip), %r11
1127      0xe8, 0,    0,    0,    0,    // b:  callq plt+0x20
1128      0xe9, 0,    0,    0,    0,    // 10:  jmp plt+0x12
1129      0x68, 0,    0,    0,    0,    // 15: pushq <relocation index>
1130      0xe9, 0,    0,    0,    0,    // 1a: jmp plt+0
1131      0xcc,                         // 1f: int3; padding
1132  };
1133  memcpy(buf, insn, sizeof(insn));
1134
1135  uint64_t off = pltEntryAddr - in.plt->getVA();
1136
1137  write32le(buf + 7, sym.getGotPltVA() - pltEntryAddr - 11);
1138  write32le(buf + 12, -off - 16 + 32);
1139  write32le(buf + 17, -off - 21 + 18);
1140  write32le(buf + 22, sym.getPltIdx());
1141  write32le(buf + 27, -off - 31);
1142}
1143
1144RetpolineZNow::RetpolineZNow() {
1145  pltHeaderSize = 32;
1146  pltEntrySize = 16;
1147  ipltEntrySize = 16;
1148}
1149
1150void RetpolineZNow::writePltHeader(uint8_t *buf) const {
1151  const uint8_t insn[] = {
1152      0xe8, 0x0b, 0x00, 0x00, 0x00, // 0:    call next
1153      0xf3, 0x90,                   // 5:  loop: pause
1154      0x0f, 0xae, 0xe8,             // 7:    lfence
1155      0xeb, 0xf9,                   // a:    jmp loop
1156      0xcc, 0xcc, 0xcc, 0xcc,       // c:    int3; .align 16
1157      0x4c, 0x89, 0x1c, 0x24,       // 10: next: mov %r11, (%rsp)
1158      0xc3,                         // 14:   ret
1159      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15:   int3; padding
1160      0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a:   int3; padding
1161      0xcc,                         // 1f:   int3; padding
1162  };
1163  memcpy(buf, insn, sizeof(insn));
1164}
1165
1166void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,
1167                             uint64_t pltEntryAddr) const {
1168  const uint8_t insn[] = {
1169      0xf3, 0x0f, 0x1e, 0xfa,          // 0: endbr64
1170      0x4c, 0x8b, 0x1d, 0,    0, 0, 0, // 4: mov foo@GOTPLT(%rip), %r11
1171      0xe9, 0,    0,    0,    0,       // b: jmp plt+0
1172  };
1173  memcpy(buf, insn, sizeof(insn));
1174
1175  write32le(buf + 7, sym.getGotPltVA() - pltEntryAddr - 11);
1176  write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);
1177}
1178
1179static TargetInfo *getTargetInfo() {
1180  if (config->zRetpolineplt) {
1181    if (config->zNow) {
1182      static RetpolineZNow t;
1183      return &t;
1184    }
1185    static Retpoline t;
1186    return &t;
1187  }
1188
1189#ifdef __OpenBSD__
1190  static IntelIBT t;
1191  return &t;
1192#else
1193  if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {
1194    static IntelIBT t;
1195    return &t;
1196  }
1197
1198  static X86_64 t;
1199  return &t;
1200#endif
1201}
1202
1203TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); }
1204