1//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9///
10/// \file These data structures comprise the "normalized" view of
11/// mach-o object files. The normalized view is an in-memory only data structure
12/// which is always in native endianness and pointer size.
13///
14/// The normalized view easily converts to and from YAML using YAML I/O.
15///
16/// The normalized view converts to and from binary mach-o object files using
17/// the writeBinary() and readBinary() functions.
18///
19/// The normalized view converts to and from lld::Atoms using the
20/// normalizedToAtoms() and normalizedFromAtoms().
21///
22/// Overall, the conversion paths available look like:
23///
24///                 +---------------+
25///                 | binary mach-o |
26///                 +---------------+
27///                        ^
28///                        |
29///                        v
30///                  +------------+         +------+
31///                  | normalized |   <->   | yaml |
32///                  +------------+         +------+
33///                        ^
34///                        |
35///                        v
36///                    +-------+
37///                    | Atoms |
38///                    +-------+
39///
40
41#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
42#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
43
44#include "DebugInfo.h"
45#include "lld/Common/LLVM.h"
46#include "lld/Core/Error.h"
47#include "lld/ReaderWriter/MachOLinkingContext.h"
48#include "llvm/ADT/SmallString.h"
49#include "llvm/ADT/StringRef.h"
50#include "llvm/BinaryFormat/MachO.h"
51#include "llvm/Support/Allocator.h"
52#include "llvm/Support/Debug.h"
53#include "llvm/Support/ErrorOr.h"
54#include "llvm/Support/YAMLTraits.h"
55
56using llvm::BumpPtrAllocator;
57using llvm::yaml::Hex64;
58using llvm::yaml::Hex32;
59using llvm::yaml::Hex16;
60using llvm::yaml::Hex8;
61using llvm::yaml::SequenceTraits;
62using llvm::MachO::HeaderFileType;
63using llvm::MachO::BindType;
64using llvm::MachO::RebaseType;
65using llvm::MachO::NListType;
66using llvm::MachO::RelocationInfoType;
67using llvm::MachO::SectionType;
68using llvm::MachO::LoadCommandType;
69using llvm::MachO::ExportSymbolKind;
70using llvm::MachO::DataRegionType;
71
72namespace lld {
73namespace mach_o {
74namespace normalized {
75
76
77/// The real mach-o relocation record is 8-bytes on disk and is
78/// encoded in one of two different bit-field patterns.  This
79/// normalized form has the union of all possible fields.
80struct Relocation {
81  Relocation() : offset(0), scattered(false),
82                 type(llvm::MachO::GENERIC_RELOC_VANILLA),
83                 length(0), pcRel(false), isExtern(false), value(0),
84                 symbol(0) { }
85
86  Hex32               offset;
87  bool                scattered;
88  RelocationInfoType  type;
89  uint8_t             length;
90  bool                pcRel;
91  bool                isExtern;
92  Hex32               value;
93  uint32_t            symbol;
94};
95
96/// A typedef so that YAML I/O can treat this vector as a sequence.
97typedef std::vector<Relocation> Relocations;
98
99/// A typedef so that YAML I/O can process the raw bytes in a section.
100typedef std::vector<Hex8> ContentBytes;
101
102/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence.
103typedef std::vector<uint32_t> IndirectSymbols;
104
105/// A typedef so that YAML I/O can encode/decode section attributes.
106LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr)
107
108/// A typedef so that YAML I/O can encode/decode section alignment.
109LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment)
110
111/// Mach-O has a 32-bit and 64-bit section record.  This normalized form
112/// can support either kind.
113struct Section {
114  Section() : type(llvm::MachO::S_REGULAR),
115              attributes(0), alignment(1), address(0) { }
116
117  StringRef       segmentName;
118  StringRef       sectionName;
119  SectionType     type;
120  SectionAttr     attributes;
121  SectionAlignment        alignment;
122  Hex64           address;
123  ArrayRef<uint8_t> content;
124  Relocations     relocations;
125  IndirectSymbols indirectSymbols;
126};
127
128
129/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist.
130LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope)
131
132/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist.
133LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc)
134
135/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol
136/// type and scope and mixed in the same n_type field.  This normalized form
137/// works for any pointer size and separates out the type and scope.
138struct Symbol {
139  Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { }
140
141  StringRef     name;
142  NListType     type;
143  SymbolScope   scope;
144  uint8_t       sect;
145  SymbolDesc    desc;
146  Hex64         value;
147};
148
149/// Check whether the given section type indicates a zero-filled section.
150// FIXME: Utility functions of this kind should probably be moved into
151//        llvm/Support.
152inline bool isZeroFillSection(SectionType T) {
153  return (T == llvm::MachO::S_ZEROFILL ||
154          T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL);
155}
156
157/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment.
158LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect)
159
160/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz
161LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion)
162
163/// Segments are only used in normalized final linked images (not in relocatable
164/// object files). They specify how a range of the file is loaded.
165struct Segment {
166  StringRef     name;
167  Hex64         address;
168  Hex64         size;
169  VMProtect     init_access;
170  VMProtect     max_access;
171};
172
173/// Only used in normalized final linked images to specify on which dylibs
174/// it depends.
175struct DependentDylib {
176  StringRef       path;
177  LoadCommandType kind;
178  PackedVersion   compatVersion;
179  PackedVersion   currentVersion;
180};
181
182/// A normalized rebasing entry.  Only used in normalized final linked images.
183struct RebaseLocation {
184  Hex32         segOffset;
185  uint8_t       segIndex;
186  RebaseType    kind;
187};
188
189/// A normalized binding entry.  Only used in normalized final linked images.
190struct BindLocation {
191  Hex32           segOffset;
192  uint8_t         segIndex;
193  BindType        kind;
194  bool            canBeNull;
195  int             ordinal;
196  StringRef       symbolName;
197  Hex64           addend;
198};
199
200/// A typedef so that YAML I/O can encode/decode export flags.
201LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags)
202
203/// A normalized export entry.  Only used in normalized final linked images.
204struct Export {
205  StringRef         name;
206  Hex64             offset;
207  ExportSymbolKind  kind;
208  ExportFlags       flags;
209  Hex32             otherOffset;
210  StringRef         otherName;
211};
212
213/// A normalized data-in-code entry.
214struct DataInCode {
215  Hex32           offset;
216  Hex16           length;
217  DataRegionType  kind;
218};
219
220/// A typedef so that YAML I/O can encode/decode mach_header.flags.
221LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
222
223///
224struct NormalizedFile {
225  MachOLinkingContext::Arch   arch = MachOLinkingContext::arch_unknown;
226  HeaderFileType              fileType = llvm::MachO::MH_OBJECT;
227  FileFlags                   flags = 0;
228  std::vector<Segment>        segments; // Not used in object files.
229  std::vector<Section>        sections;
230
231  // Symbols sorted by kind.
232  std::vector<Symbol>         localSymbols;
233  std::vector<Symbol>         globalSymbols;
234  std::vector<Symbol>         undefinedSymbols;
235  std::vector<Symbol>         stabsSymbols;
236
237  // Maps to load commands with no LINKEDIT content (final linked images only).
238  std::vector<DependentDylib> dependentDylibs;
239  StringRef                   installName;        // dylibs only
240  PackedVersion               compatVersion = 0;  // dylibs only
241  PackedVersion               currentVersion = 0; // dylibs only
242  bool                        hasUUID = false;
243  bool                        hasMinVersionLoadCommand = false;
244  bool                        generateDataInCodeLoadCommand = false;
245  std::vector<StringRef>      rpaths;
246  Hex64                       entryAddress = 0;
247  Hex64                       stackSize = 0;
248  MachOLinkingContext::OS     os = MachOLinkingContext::OS::unknown;
249  Hex64                       sourceVersion = 0;
250  PackedVersion               minOSverson = 0;
251  PackedVersion               sdkVersion = 0;
252  LoadCommandType             minOSVersionKind = (LoadCommandType)0;
253
254  // Maps to load commands with LINKEDIT content (final linked images only).
255  Hex32                       pageSize = 0;
256  std::vector<RebaseLocation> rebasingInfo;
257  std::vector<BindLocation>   bindingInfo;
258  std::vector<BindLocation>   weakBindingInfo;
259  std::vector<BindLocation>   lazyBindingInfo;
260  std::vector<Export>         exportInfo;
261  std::vector<uint8_t>        functionStarts;
262  std::vector<DataInCode>     dataInCode;
263
264  // TODO:
265  // code-signature
266  // split-seg-info
267  // function-starts
268
269  // For any allocations in this struct which need to be owned by this struct.
270  BumpPtrAllocator            ownedAllocations;
271};
272
273/// Tests if a file is a non-fat mach-o object file.
274bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch);
275
276/// If the buffer is a fat file with the request arch, then this function
277/// returns true with 'offset' and 'size' set to location of the arch slice
278/// within the buffer.  Otherwise returns false;
279bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch,
280                      uint32_t &offset, uint32_t &size);
281
282/// Reads a mach-o file and produces an in-memory normalized view.
283llvm::Expected<std::unique_ptr<NormalizedFile>>
284readBinary(std::unique_ptr<MemoryBuffer> &mb,
285           const MachOLinkingContext::Arch arch);
286
287/// Takes in-memory normalized view and writes a mach-o object file.
288llvm::Error writeBinary(const NormalizedFile &file, StringRef path);
289
290size_t headerAndLoadCommandsSize(const NormalizedFile &file,
291                                 bool includeFunctionStarts);
292
293
294/// Parses a yaml encoded mach-o file to produce an in-memory normalized view.
295llvm::Expected<std::unique_ptr<NormalizedFile>>
296readYaml(std::unique_ptr<MemoryBuffer> &mb);
297
298/// Writes a yaml encoded mach-o files given an in-memory normalized view.
299std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out);
300
301llvm::Error
302normalizedObjectToAtoms(MachOFile *file,
303                        const NormalizedFile &normalizedFile,
304                        bool copyRefs);
305
306llvm::Error
307normalizedDylibToAtoms(MachODylibFile *file,
308                       const NormalizedFile &normalizedFile,
309                       bool copyRefs);
310
311/// Takes in-memory normalized dylib or object and parses it into lld::File
312llvm::Expected<std::unique_ptr<lld::File>>
313normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
314                  bool copyRefs);
315
316/// Takes atoms and generates a normalized macho-o view.
317llvm::Expected<std::unique_ptr<NormalizedFile>>
318normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt);
319
320
321} // namespace normalized
322
323/// Class for interfacing mach-o yaml files into generic yaml parsing
324class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler {
325public:
326  MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch)
327    : _arch(arch) { }
328  bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override;
329private:
330  const MachOLinkingContext::Arch _arch;
331};
332
333} // namespace mach_o
334} // namespace lld
335
336#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H
337