ARMErrataFix.cpp revision 360784
1//===- ARMErrataFix.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file implements Section Patching for the purpose of working around the
9// Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions
10// can result in an incorrect instruction fetch or processor deadlock." The
11// erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the
12// Cortex-A8. A high level description of the patching technique is given in
13// the opening comment of AArch64ErrataFix.cpp.
14//===----------------------------------------------------------------------===//
15
16#include "ARMErrataFix.h"
17
18#include "Config.h"
19#include "LinkerScript.h"
20#include "OutputSections.h"
21#include "Relocations.h"
22#include "Symbols.h"
23#include "SyntheticSections.h"
24#include "Target.h"
25#include "lld/Common/Memory.h"
26#include "lld/Common/Strings.h"
27#include "llvm/Support/Endian.h"
28#include "llvm/Support/raw_ostream.h"
29#include <algorithm>
30
31using namespace llvm;
32using namespace llvm::ELF;
33using namespace llvm::object;
34using namespace llvm::support;
35using namespace llvm::support::endian;
36
37namespace lld {
38namespace elf {
39
40// The documented title for Erratum 657417 is:
41// "A 32bit branch instruction that spans two 4K regions can result in an
42// incorrect instruction fetch or processor deadlock". Graphically using a
43// 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff
44// xxxxxx000 // Memory region 1 start
45// target:
46// ...
47// xxxxxxffe f7fe // First halfword of branch to target:
48// xxxxxx000 // Memory region 2 start
49// xxxxxx002 bfff // Second halfword of branch to target:
50//
51// The specific trigger conditions that can be detected at link time are:
52// - There is a 32-bit Thumb-2 branch instruction with an address of the form
53//   xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the
54//   second 2 bytes are in region 2.
55// - The branch instruction is one of BLX, BL, B.w BCC.w
56// - The instruction preceding the branch is a 32-bit non-branch instruction.
57// - The target of the branch is in region 1.
58//
59// The linker mitigation for the fix is to redirect any branch that meets the
60// erratum conditions to a patch section containing a branch to the target.
61//
62// As adding patch sections may move branches onto region boundaries the patch
63// must iterate until no more patches are added.
64//
65// Example, before:
66// 00000FFA func: NOP.w      // 32-bit Thumb function
67// 00000FFE       B.W func   // 32-bit branch spanning 2 regions, dest in 1st.
68// Example, after:
69// 00000FFA func: NOP.w      // 32-bit Thumb function
70// 00000FFE       B.w __CortexA8657417_00000FFE
71// 00001002       2 - bytes padding
72// 00001004 __CortexA8657417_00000FFE: B.w func
73
74class Patch657417Section : public SyntheticSection {
75public:
76  Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM);
77
78  void writeTo(uint8_t *buf) override;
79
80  size_t getSize() const override { return 4; }
81
82  // Get the virtual address of the branch instruction at patcheeOffset.
83  uint64_t getBranchAddr() const;
84
85  static bool classof(const SectionBase *d) {
86    return d->kind() == InputSectionBase::Synthetic && d->name ==".text.patch";
87  }
88
89  // The Section we are patching.
90  const InputSection *patchee;
91  // The offset of the instruction in the Patchee section we are patching.
92  uint64_t patcheeOffset;
93  // A label for the start of the Patch that we can use as a relocation target.
94  Symbol *patchSym;
95  // A decoding of the branch instruction at patcheeOffset.
96  uint32_t instr;
97  // True If the patch is to be written in ARM state, otherwise the patch will
98  // be written in Thumb state.
99  bool isARM;
100};
101
102// Return true if the half-word, when taken as the first of a pair of halfwords
103// is the first half of a 32-bit instruction.
104// Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
105// section A6.3: 32-bit Thumb instruction encoding
106// |             HW1                   |               HW2                |
107// | 1 1 1 | op1 (2) | op2 (7) | x (4) |op|           x (15)              |
108// With op1 == 0b00, a 16-bit instruction is encoded.
109//
110// We test only the first halfword, looking for op != 0b00.
111static bool is32bitInstruction(uint16_t hw) {
112  return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000;
113}
114
115// Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
116// section A6.3.4 Branches and miscellaneous control.
117// |             HW1              |               HW2                |
118// | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) |
119// op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W)
120// op1 == 0x1               | Branch (B.W)
121// op1 == 1x0               | Branch with Link and Exchange (BLX.w)
122// op1 == 1x1               | Branch with Link (BL.W)
123
124static bool isBcc(uint32_t instr) {
125  return (instr & 0xf800d000) == 0xf0008000 &&
126         (instr & 0x03800000) != 0x03800000;
127}
128
129static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; }
130
131static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; }
132
133static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; }
134
135static bool is32bitBranch(uint32_t instr) {
136  return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr);
137}
138
139Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,
140                                       uint32_t instr, bool isARM)
141    : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,
142                       ".text.patch"),
143      patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {
144  parent = p->getParent();
145  patchSym = addSyntheticLocal(
146      saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
147      isARM ? 0 : 1, getSize(), *this);
148  addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
149}
150
151uint64_t Patch657417Section::getBranchAddr() const {
152  return patchee->getVA(patcheeOffset);
153}
154
155// Given a branch instruction instr at sourceAddr work out its destination
156// address. This is only used when the branch instruction has no relocation.
157static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) {
158  uint8_t buf[4];
159  write16le(buf, instr >> 16);
160  write16le(buf + 2, instr & 0x0000ffff);
161  int64_t offset;
162  if (isBcc(instr))
163    offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19);
164  else if (isB(instr))
165    offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24);
166  else
167    offset = target->getImplicitAddend(buf, R_ARM_THM_CALL);
168  return sourceAddr + offset + 4;
169}
170
171void Patch657417Section::writeTo(uint8_t *buf) {
172  // The base instruction of the patch is always a 32-bit unconditional branch.
173  if (isARM)
174    write32le(buf, 0xea000000);
175  else
176    write32le(buf, 0x9000f000);
177  // If we have a relocation then apply it. For a SyntheticSection buf already
178  // has outSecOff added, but relocateAlloc also adds outSecOff so we need to
179  // subtract to avoid double counting.
180  if (!relocations.empty()) {
181    relocateAlloc(buf - outSecOff, buf - outSecOff + getSize());
182    return;
183  }
184
185  // If we don't have a relocation then we must calculate and write the offset
186  // ourselves.
187  // Get the destination offset from the addend in the branch instruction.
188  // We cannot use the instruction in the patchee section as this will have
189  // been altered to point to us!
190  uint64_t s = getThumbDestAddr(getBranchAddr(), instr);
191  uint64_t p = getVA(4);
192  target->relocateOne(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p);
193}
194
195// Given a branch instruction spanning two 4KiB regions, at offset off from the
196// start of isec, return true if the destination of the branch is within the
197// first of the two 4Kib regions.
198static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off,
199                                    uint32_t instr, const Relocation *r) {
200  uint64_t sourceAddr = isec->getVA(0) + off;
201  assert((sourceAddr & 0xfff) == 0xffe);
202  uint64_t destAddr = sourceAddr;
203  // If there is a branch relocation at the same offset we must use this to
204  // find the destination address as the branch could be indirected via a thunk
205  // or the PLT.
206  if (r) {
207    uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA();
208    // Account for Thumb PC bias, usually cancelled to 0 by addend of -4.
209    destAddr = dst + r->addend + 4;
210  } else {
211    // If there is no relocation, we must have an intra-section branch
212    // We must extract the offset from the addend manually.
213    destAddr = getThumbDestAddr(sourceAddr, instr);
214  }
215
216  return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000);
217}
218
219// Return true if a branch can reach a patch section placed after isec.
220// The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB.
221static bool patchInRange(const InputSection *isec, uint64_t off,
222                         uint32_t instr) {
223
224  // We need the branch at source to reach a patch section placed immediately
225  // after isec. As there can be more than one patch in the patch section we
226  // add 0x100 as contingency to account for worst case of 1 branch every 4KiB
227  // for a 1 MiB range.
228  return target->inBranchRange(
229      isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off),
230      isec->getVA() + isec->getSize() + 0x100);
231}
232
233struct ScanResult {
234  // Offset of branch within its InputSection.
235  uint64_t off;
236  // Cached decoding of the branch instruction.
237  uint32_t instr;
238  // Branch relocation at off. Will be nullptr if no relocation exists.
239  Relocation *rel;
240};
241
242// Detect the erratum sequence, returning the offset of the branch instruction
243// and a decoding of the branch. If the erratum sequence is not found then
244// return an offset of 0 for the branch. 0 is a safe value to use for no patch
245// as there must be at least one 32-bit non-branch instruction before the
246// branch so the minimum offset for a patch is 4.
247static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off,
248                                           uint64_t limit) {
249  uint64_t isecAddr = isec->getVA(0);
250  // Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We
251  // need to check for a 32-bit instruction immediately before a 32-bit branch
252  // at 0xffe modulo 0x1000.
253  off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr;
254  if (off >= limit || limit - off < 8) {
255    // Need at least 2 4-byte sized instructions to trigger erratum.
256    off = limit;
257    return {0, 0, nullptr};
258  }
259
260  ScanResult scanRes = {0, 0, nullptr};
261  const uint8_t *buf = isec->data().begin();
262  // ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive
263  // little-endian halfwords.
264  const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off);
265  uint16_t hw11 = *instBuf++;
266  uint16_t hw12 = *instBuf++;
267  uint16_t hw21 = *instBuf++;
268  uint16_t hw22 = *instBuf++;
269  if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) {
270    uint32_t instr1 = (hw11 << 16) | hw12;
271    uint32_t instr2 = (hw21 << 16) | hw22;
272    if (!is32bitBranch(instr1) && is32bitBranch(instr2)) {
273      // Find a relocation for the branch if it exists. This will be used
274      // to determine the target.
275      uint64_t branchOff = off + 4;
276      auto relIt = llvm::find_if(isec->relocations, [=](const Relocation &r) {
277        return r.offset == branchOff &&
278               (r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 ||
279                r.type == R_ARM_THM_CALL);
280      });
281      if (relIt != isec->relocations.end())
282        scanRes.rel = &(*relIt);
283      if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) {
284        if (patchInRange(isec, branchOff, instr2)) {
285          scanRes.off = branchOff;
286          scanRes.instr = instr2;
287        } else {
288          warn(toString(isec->file) +
289               ": skipping cortex-a8 657417 erratum sequence, section " +
290               isec->name + " is too large to patch");
291        }
292      }
293    }
294  }
295  off += 0x1000;
296  return scanRes;
297}
298
299void ARMErr657417Patcher::init() {
300  // The Arm ABI permits a mix of ARM, Thumb and Data in the same
301  // InputSection. We must only scan Thumb instructions to avoid false
302  // matches. We use the mapping symbols in the InputObjects to identify this
303  // data, caching the results in sectionMap so we don't have to recalculate
304  // it each pass.
305
306  // The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe
307  // half open intervals [Symbol Value, Next Symbol Value) of code and data
308  // within sections. If there is no next symbol then the half open interval is
309  // [Symbol Value, End of section). The type, code or data, is determined by
310  // the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data.
311  auto isArmMapSymbol = [](const Symbol *s) {
312    return s->getName() == "$a" || s->getName().startswith("$a.");
313  };
314  auto isThumbMapSymbol = [](const Symbol *s) {
315    return s->getName() == "$t" || s->getName().startswith("$t.");
316  };
317  auto isDataMapSymbol = [](const Symbol *s) {
318    return s->getName() == "$d" || s->getName().startswith("$d.");
319  };
320
321  // Collect mapping symbols for every executable InputSection.
322  for (InputFile *file : objectFiles) {
323    auto *f = cast<ObjFile<ELF32LE>>(file);
324    for (Symbol *s : f->getLocalSymbols()) {
325      auto *def = dyn_cast<Defined>(s);
326      if (!def)
327        continue;
328      if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) &&
329          !isDataMapSymbol(def))
330        continue;
331      if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
332        if (sec->flags & SHF_EXECINSTR)
333          sectionMap[sec].push_back(def);
334    }
335  }
336  // For each InputSection make sure the mapping symbols are in sorted in
337  // ascending order and are in alternating Thumb, non-Thumb order.
338  for (auto &kv : sectionMap) {
339    std::vector<const Defined *> &mapSyms = kv.second;
340    llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
341      return a->value < b->value;
342    });
343    mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(),
344                              [=](const Defined *a, const Defined *b) {
345                                return (isThumbMapSymbol(a) ==
346                                        isThumbMapSymbol(b));
347                              }),
348                  mapSyms.end());
349    // Always start with a Thumb Mapping Symbol
350    if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front()))
351      mapSyms.erase(mapSyms.begin());
352  }
353  initialized = true;
354}
355
356void ARMErr657417Patcher::insertPatches(
357    InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) {
358  uint64_t spacing = 0x100000 - 0x7500;
359  uint64_t isecLimit;
360  uint64_t prevIsecLimit = isd.sections.front()->outSecOff;
361  uint64_t patchUpperBound = prevIsecLimit + spacing;
362  uint64_t outSecAddr = isd.sections.front()->getParent()->addr;
363
364  // Set the outSecOff of patches to the place where we want to insert them.
365  // We use a similar strategy to initial thunk placement, using 1 MiB as the
366  // range of the Thumb-2 conditional branch with a contingency accounting for
367  // thunk generation.
368  auto patchIt = patches.begin();
369  auto patchEnd = patches.end();
370  for (const InputSection *isec : isd.sections) {
371    isecLimit = isec->outSecOff + isec->getSize();
372    if (isecLimit > patchUpperBound) {
373      for (; patchIt != patchEnd; ++patchIt) {
374        if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit)
375          break;
376        (*patchIt)->outSecOff = prevIsecLimit;
377      }
378      patchUpperBound = prevIsecLimit + spacing;
379    }
380    prevIsecLimit = isecLimit;
381  }
382  for (; patchIt != patchEnd; ++patchIt)
383    (*patchIt)->outSecOff = isecLimit;
384
385  // Merge all patch sections. We use the outSecOff assigned above to
386  // determine the insertion point. This is ok as we only merge into an
387  // InputSectionDescription once per pass, and at the end of the pass
388  // assignAddresses() will recalculate all the outSecOff values.
389  std::vector<InputSection *> tmp;
390  tmp.reserve(isd.sections.size() + patches.size());
391  auto mergeCmp = [](const InputSection *a, const InputSection *b) {
392    if (a->outSecOff != b->outSecOff)
393      return a->outSecOff < b->outSecOff;
394    return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b);
395  };
396  std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(),
397             patches.end(), std::back_inserter(tmp), mergeCmp);
398  isd.sections = std::move(tmp);
399}
400
401// Given a branch instruction described by ScanRes redirect it to a patch
402// section containing an unconditional branch instruction to the target.
403// Ensure that this patch section is 4-byte aligned so that the branch cannot
404// span two 4 KiB regions. Place the patch section so that it is always after
405// isec so the branch we are patching always goes forwards.
406static void implementPatch(ScanResult sr, InputSection *isec,
407                           std::vector<Patch657417Section *> &patches) {
408
409  log("detected cortex-a8-657419 erratum sequence starting at " +
410      utohexstr(isec->getVA(sr.off)) + " in unpatched output.");
411  Patch657417Section *psec;
412  // We have two cases to deal with.
413  // Case 1. There is a relocation at patcheeOffset to a symbol. The
414  // unconditional branch in the patch must have a relocation so that any
415  // further redirection via the PLT or a Thunk happens as normal. At
416  // patcheeOffset we redirect the existing relocation to a Symbol defined at
417  // the start of the patch section.
418  //
419  // Case 2. There is no relocation at patcheeOffset. We are unlikely to have
420  // a symbol that we can use as a target for a relocation in the patch section.
421  // Luckily we know that the destination cannot be indirected via the PLT or
422  // a Thunk so we can just write the destination directly.
423  if (sr.rel) {
424    // Case 1. We have an existing relocation to redirect to patch and a
425    // Symbol target.
426
427    // Create a branch relocation for the unconditional branch in the patch.
428    // This can be redirected via the PLT or Thunks.
429    RelType patchRelType = R_ARM_THM_JUMP24;
430    int64_t patchRelAddend = sr.rel->addend;
431    bool destIsARM = false;
432    if (isBL(sr.instr) || isBLX(sr.instr)) {
433      // The final target of the branch may be ARM or Thumb, if the target
434      // is ARM then we write the patch in ARM state to avoid a state change
435      // Thunk from the patch to the target.
436      uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA()
437                                                       : sr.rel->sym->getVA();
438      destIsARM = (dstSymAddr & 1) == 0;
439    }
440    psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM);
441    if (destIsARM) {
442      // The patch will be in ARM state. Use an ARM relocation and account for
443      // the larger ARM PC-bias of 8 rather than Thumb's 4.
444      patchRelType = R_ARM_JUMP24;
445      patchRelAddend -= 4;
446    }
447    psec->relocations.push_back(
448        Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym});
449    // Redirect the existing branch relocation to the patch.
450    sr.rel->expr = R_PC;
451    sr.rel->addend = -4;
452    sr.rel->sym = psec->patchSym;
453  } else {
454    // Case 2. We do not have a relocation to the patch. Add a relocation of the
455    // appropriate type to the patch at patcheeOffset.
456
457    // The destination is ARM if we have a BLX.
458    psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr));
459    RelType type;
460    if (isBcc(sr.instr))
461      type = R_ARM_THM_JUMP19;
462    else if (isB(sr.instr))
463      type = R_ARM_THM_JUMP24;
464    else
465      type = R_ARM_THM_CALL;
466    isec->relocations.push_back(
467        Relocation{R_PC, type, sr.off, -4, psec->patchSym});
468  }
469  patches.push_back(psec);
470}
471
472// Scan all the instructions in InputSectionDescription, for each instance of
473// the erratum sequence create a Patch657417Section. We return the list of
474// Patch657417Sections that need to be applied to the InputSectionDescription.
475std::vector<Patch657417Section *>
476ARMErr657417Patcher::patchInputSectionDescription(
477    InputSectionDescription &isd) {
478  std::vector<Patch657417Section *> patches;
479  for (InputSection *isec : isd.sections) {
480    // LLD doesn't use the erratum sequence in SyntheticSections.
481    if (isa<SyntheticSection>(isec))
482      continue;
483    // Use sectionMap to make sure we only scan Thumb code and not Arm or inline
484    // data. We have already sorted mapSyms in ascending order and removed
485    // consecutive mapping symbols of the same type. Our range of executable
486    // instructions to scan is therefore [thumbSym->value, nonThumbSym->value)
487    // or [thumbSym->value, section size).
488    std::vector<const Defined *> &mapSyms = sectionMap[isec];
489
490    auto thumbSym = mapSyms.begin();
491    while (thumbSym != mapSyms.end()) {
492      auto nonThumbSym = std::next(thumbSym);
493      uint64_t off = (*thumbSym)->value;
494      uint64_t limit = (nonThumbSym == mapSyms.end()) ? isec->data().size()
495                                                      : (*nonThumbSym)->value;
496
497      while (off < limit) {
498        ScanResult sr = scanCortexA8Errata657417(isec, off, limit);
499        if (sr.off)
500          implementPatch(sr, isec, patches);
501      }
502      if (nonThumbSym == mapSyms.end())
503        break;
504      thumbSym = std::next(nonThumbSym);
505    }
506  }
507  return patches;
508}
509
510bool ARMErr657417Patcher::createFixes() {
511  if (!initialized)
512    init();
513
514  bool addressesChanged = false;
515  for (OutputSection *os : outputSections) {
516    if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
517      continue;
518    for (BaseCommand *bc : os->sectionCommands)
519      if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
520        std::vector<Patch657417Section *> patches =
521            patchInputSectionDescription(*isd);
522        if (!patches.empty()) {
523          insertPatches(*isd, patches);
524          addressesChanged = true;
525        }
526      }
527  }
528  return addressesChanged;
529}
530
531} // namespace elf
532} // namespace lld
533