1//===- Core/DefinedAtom.h - An Atom with content --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_CORE_DEFINED_ATOM_H
10#define LLD_CORE_DEFINED_ATOM_H
11
12#include "lld/Common/LLVM.h"
13#include "lld/Core/Atom.h"
14#include "lld/Core/Reference.h"
15#include "llvm/Support/ErrorHandling.h"
16
17namespace lld {
18class File;
19
20/// The fundamental unit of linking.
21///
22/// A C function or global variable is an atom.  An atom has content and
23/// attributes. The content of a function atom is the instructions that
24/// implement the function.  The content of a global variable atom is its
25/// initial bytes.
26///
27/// Here are some example attribute sets for common atoms. If a particular
28/// attribute is not listed, the default values are:  definition=regular,
29/// sectionChoice=basedOnContent, scope=translationUnit, merge=no,
30/// deadStrip=normal, interposable=no
31///
32///  C function:  void foo() {} <br>
33///    name=foo, type=code, perm=r_x, scope=global
34///
35///  C static function:  staic void func() {} <br>
36///    name=func, type=code, perm=r_x
37///
38///  C global variable:  int count = 1; <br>
39///    name=count, type=data, perm=rw_, scope=global
40///
41///  C tentative definition:  int bar; <br>
42///    name=bar, type=zerofill, perm=rw_, scope=global,
43///    merge=asTentative, interposable=yesAndRuntimeWeak
44///
45///  Uninitialized C static variable:  static int stuff; <br>
46///    name=stuff, type=zerofill, perm=rw_
47///
48///  Weak C function:  __attribute__((weak)) void foo() {} <br>
49///    name=foo, type=code, perm=r_x, scope=global, merge=asWeak
50///
51///  Hidden C function:  __attribute__((visibility("hidden"))) void foo() {}<br>
52///    name=foo, type=code, perm=r_x, scope=linkageUnit
53///
54///  No-dead-strip function:  __attribute__((used)) void foo() {} <br>
55///    name=foo, type=code, perm=r_x, scope=global, deadStrip=never
56///
57///  Non-inlined C++ inline method:  inline void Foo::doit() {} <br>
58///    name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
59///    mergeDupes=asWeak
60///
61///  Non-inlined C++ inline method whose address is taken:
62///     inline void Foo::doit() {} <br>
63///    name=_ZN3Foo4doitEv, type=code, perm=r_x, scope=global,
64///    mergeDupes=asAddressedWeak
65///
66///  literal c-string:  "hello" <br>
67///    name="" type=cstring, perm=r__, scope=linkageUnit
68///
69///  literal double:  1.234 <br>
70///    name="" type=literal8, perm=r__, scope=linkageUnit
71///
72///  constant:  { 1,2,3 } <br>
73///    name="" type=constant, perm=r__, scope=linkageUnit
74///
75///  Pointer to initializer function:  <br>
76///    name="" type=initializer, perm=rw_l,
77///    sectionChoice=customRequired
78///
79///  C function place in custom section:  __attribute__((section("__foo")))
80///                                       void foo() {} <br>
81///    name=foo, type=code, perm=r_x, scope=global,
82///    sectionChoice=customRequired, customSectionName=__foo
83///
84class DefinedAtom : public Atom {
85public:
86  enum Interposable {
87    interposeNo,            // linker can directly bind uses of this atom
88    interposeYes,           // linker must indirect (through GOT) uses
89    interposeYesAndRuntimeWeak // must indirect and mark symbol weak in final
90                               // linked image
91  };
92
93  enum Merge {
94    mergeNo,                // Another atom with same name is error
95    mergeAsTentative,       // Is ANSI C tentative definition, can be coalesced
96    mergeAsWeak,            // Is C++ inline definition that was not inlined,
97                            // but address was not taken, so atom can be hidden
98                            // by linker
99    mergeAsWeakAndAddressUsed, // Is C++ definition inline definition whose
100                               // address was taken.
101    mergeSameNameAndSize,   // Another atom with different size is error
102    mergeByLargestSection,  // Choose an atom whose section is the largest.
103    mergeByContent,         // Merge with other constants with same content.
104  };
105
106  enum ContentType {
107    typeUnknown,            // for use with definitionUndefined
108    typeMachHeader,         // atom representing mach_header [Darwin]
109    typeCode,               // executable code
110    typeResolver,           // function which returns address of target
111    typeBranchIsland,       // linker created for large binaries
112    typeBranchShim,         // linker created to switch thumb mode
113    typeStub,               // linker created for calling external function
114    typeStubHelper,         // linker created for initial stub binding
115    typeConstant,           // a read-only constant
116    typeCString,            // a zero terminated UTF8 C string
117    typeUTF16String,        // a zero terminated UTF16 string
118    typeCFI,                // a FDE or CIE from dwarf unwind info
119    typeLSDA,               // extra unwinding info
120    typeLiteral4,           // a four-btye read-only constant
121    typeLiteral8,           // an eight-btye read-only constant
122    typeLiteral16,          // a sixteen-btye read-only constant
123    typeData,               // read-write data
124    typeDataFast,           // allow data to be quickly accessed
125    typeZeroFill,           // zero-fill data
126    typeZeroFillFast,       // allow zero-fill data to be quicky accessed
127    typeConstData,          // read-only data after dynamic linker is done
128    typeObjC1Class,         // ObjC1 class [Darwin]
129    typeLazyPointer,        // pointer through which a stub jumps
130    typeLazyDylibPointer,   // pointer through which a stub jumps [Darwin]
131    typeNonLazyPointer,     // pointer to external symbol
132    typeCFString,           // NS/CFString object [Darwin]
133    typeGOT,                // pointer to external symbol
134    typeInitializerPtr,     // pointer to initializer function
135    typeTerminatorPtr,      // pointer to terminator function
136    typeCStringPtr,         // pointer to UTF8 C string [Darwin]
137    typeObjCClassPtr,       // pointer to ObjC class [Darwin]
138    typeObjC2CategoryList,  // pointers to ObjC category [Darwin]
139    typeObjCImageInfo,      // pointer to ObjC class [Darwin]
140    typeObjCMethodList,     // pointer to ObjC method list [Darwin]
141    typeDTraceDOF,          // runtime data for Dtrace [Darwin]
142    typeInterposingTuples,  // tuples of interposing info for dyld [Darwin]
143    typeTempLTO,            // temporary atom for bitcode reader
144    typeCompactUnwindInfo,  // runtime data for unwinder [Darwin]
145    typeProcessedUnwindInfo,// compressed compact unwind info [Darwin]
146    typeThunkTLV,           // thunk used to access a TLV [Darwin]
147    typeTLVInitialData,     // initial data for a TLV [Darwin]
148    typeTLVInitialZeroFill, // TLV initial zero fill data [Darwin]
149    typeTLVInitializerPtr,  // pointer to thread local initializer [Darwin]
150    typeDSOHandle,          // atom representing DSO handle [Darwin]
151    typeSectCreate,         // Created via the -sectcreate option [Darwin]
152  };
153
154  // Permission bits for atoms and segments. The order of these values are
155  // important, because the layout pass may sort atoms by permission if other
156  // attributes are the same.
157  enum ContentPermissions {
158    perm___  = 0,           // mapped as unaccessible
159    permR__  = 8,           // mapped read-only
160    permRW_  = 8 + 2,       // mapped readable and writable
161    permRW_L = 8 + 2 + 1,   // initially mapped r/w, then made read-only
162                            // loader writable
163    permR_X  = 8 + 4,       // mapped readable and executable
164    permRWX  = 8 + 2 + 4,   // mapped readable and writable and executable
165    permUnknown = 16        // unknown or invalid permissions
166  };
167
168  enum SectionChoice {
169    sectionBasedOnContent,  // linker infers final section based on content
170    sectionCustomPreferred, // linker may place in specific section
171    sectionCustomRequired   // linker must place in specific section
172  };
173
174  enum DeadStripKind {
175    deadStripNormal,        // linker may dead strip this atom
176    deadStripNever,         // linker must never dead strip this atom
177    deadStripAlways         // linker must remove this atom if unused
178  };
179
180  enum DynamicExport {
181    /// The linker may or may not export this atom dynamically depending
182    ///   on the output type and other context of the link.
183    dynamicExportNormal,
184    /// The linker will always export this atom dynamically.
185    dynamicExportAlways,
186  };
187
188  // Attributes describe a code model used by the atom.
189  enum CodeModel {
190    codeNA,           // no specific code model
191    // MIPS code models
192    codeMipsPIC,      // PIC function in a PIC / non-PIC mixed file
193    codeMipsMicro,    // microMIPS instruction encoding
194    codeMipsMicroPIC, // microMIPS instruction encoding + PIC
195    codeMips16,       // MIPS-16 instruction encoding
196    // ARM code models
197    codeARMThumb,     // ARM Thumb instruction set
198    codeARM_a,        // $a-like mapping symbol (for ARM code)
199    codeARM_d,        // $d-like mapping symbol (for data)
200    codeARM_t,        // $t-like mapping symbol (for Thumb code)
201  };
202
203  struct Alignment {
204    Alignment(int v, int m = 0) : value(v), modulus(m) {}
205
206    uint16_t value;
207    uint16_t modulus;
208
209    bool operator==(const Alignment &rhs) const {
210      return (value == rhs.value) && (modulus == rhs.modulus);
211    }
212  };
213
214  /// returns a value for the order of this Atom within its file.
215  ///
216  /// This is used by the linker to order the layout of Atoms so that the
217  /// resulting image is stable and reproducible.
218  virtual uint64_t ordinal() const = 0;
219
220  /// the number of bytes of space this atom's content will occupy in the
221  /// final linked image.
222  ///
223  /// For a function atom, it is the number of bytes of code in the function.
224  virtual uint64_t size() const = 0;
225
226  /// The size of the section from which the atom is instantiated.
227  ///
228  /// Merge::mergeByLargestSection is defined in terms of section size
229  /// and not in terms of atom size, so we need this function separate
230  /// from size().
231  virtual uint64_t sectionSize() const { return 0; }
232
233  /// The visibility of this atom to other atoms.
234  ///
235  /// C static functions have scope scopeTranslationUnit.  Regular C functions
236  /// have scope scopeGlobal.  Functions compiled with visibility=hidden have
237  /// scope scopeLinkageUnit so they can be see by other atoms being linked but
238  /// not by the OS loader.
239  virtual Scope scope() const = 0;
240
241  /// Whether the linker should use direct or indirect access to this
242  /// atom.
243  virtual Interposable interposable() const = 0;
244
245  /// how the linker should handle if multiple atoms have the same name.
246  virtual Merge merge() const = 0;
247
248  /// The type of this atom, such as code or data.
249  virtual ContentType contentType() const = 0;
250
251  /// The alignment constraints on how this atom must be laid out in the
252  /// final linked image (e.g. 16-byte aligned).
253  virtual Alignment alignment() const = 0;
254
255  /// Whether this atom must be in a specially named section in the final
256  /// linked image, or if the linker can infer the section based on the
257  /// contentType().
258  virtual SectionChoice sectionChoice() const = 0;
259
260  /// If sectionChoice() != sectionBasedOnContent, then this return the
261  /// name of the section the atom should be placed into.
262  virtual StringRef customSectionName() const = 0;
263
264  /// constraints on whether the linker may dead strip away this atom.
265  virtual DeadStripKind deadStrip() const = 0;
266
267  /// Under which conditions should this atom be dynamically exported.
268  virtual DynamicExport dynamicExport() const {
269    return dynamicExportNormal;
270  }
271
272  /// Code model used by the atom.
273  virtual CodeModel codeModel() const { return codeNA; }
274
275  /// Returns the OS memory protections required for this atom's content
276  /// at runtime.
277  ///
278  /// A function atom is R_X, a global variable is RW_, and a read-only constant
279  /// is R__.
280  virtual ContentPermissions permissions() const;
281
282  /// returns a reference to the raw (unrelocated) bytes of this Atom's
283  /// content.
284  virtual ArrayRef<uint8_t> rawContent() const = 0;
285
286  /// This class abstracts iterating over the sequence of References
287  /// in an Atom.  Concrete instances of DefinedAtom must implement
288  /// the derefIterator() and incrementIterator() methods.
289  class reference_iterator {
290  public:
291    reference_iterator(const DefinedAtom &a, const void *it)
292      : _atom(a), _it(it) { }
293
294    const Reference *operator*() const {
295      return _atom.derefIterator(_it);
296    }
297
298    const Reference *operator->() const {
299      return _atom.derefIterator(_it);
300    }
301
302    bool operator==(const reference_iterator &other) const {
303      return _it == other._it;
304    }
305
306    bool operator!=(const reference_iterator &other) const {
307      return !(*this == other);
308    }
309
310    reference_iterator &operator++() {
311      _atom.incrementIterator(_it);
312      return *this;
313    }
314  private:
315    const DefinedAtom &_atom;
316    const void *_it;
317  };
318
319  /// Returns an iterator to the beginning of this Atom's References.
320  virtual reference_iterator begin() const = 0;
321
322  /// Returns an iterator to the end of this Atom's References.
323  virtual reference_iterator end() const = 0;
324
325  /// Adds a reference to this atom.
326  virtual void addReference(Reference::KindNamespace ns,
327                            Reference::KindArch arch,
328                            Reference::KindValue kindValue, uint64_t off,
329                            const Atom *target, Reference::Addend a) {
330    llvm_unreachable("Subclass does not permit adding references");
331  }
332
333  static bool classof(const Atom *a) {
334    return a->definition() == definitionRegular;
335  }
336
337  /// Utility for deriving permissions from content type
338  static ContentPermissions permissions(ContentType type);
339
340  /// Utility function to check if the atom occupies file space
341  bool occupiesDiskSpace() const {
342    ContentType atomContentType = contentType();
343    return !(atomContentType == DefinedAtom::typeZeroFill ||
344             atomContentType == DefinedAtom::typeZeroFillFast ||
345             atomContentType == DefinedAtom::typeTLVInitialZeroFill);
346  }
347
348  /// Utility function to check if relocations in this atom to other defined
349  /// atoms can be implicitly generated, and so we don't need to explicitly
350  /// emit those relocations.
351  bool relocsToDefinedCanBeImplicit() const {
352    ContentType atomContentType = contentType();
353    return atomContentType == typeCFI;
354  }
355
356protected:
357  // DefinedAtom is an abstract base class. Only subclasses can access
358  // constructor.
359  DefinedAtom() : Atom(definitionRegular) { }
360
361  ~DefinedAtom() override = default;
362
363  /// Returns a pointer to the Reference object that the abstract
364  /// iterator "points" to.
365  virtual const Reference *derefIterator(const void *iter) const = 0;
366
367  /// Adjusts the abstract iterator to "point" to the next Reference
368  /// object for this Atom.
369  virtual void incrementIterator(const void *&iter) const = 0;
370};
371} // end namespace lld
372
373#endif
374