1//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9/// 10/// \file These data structures comprise the "normalized" view of 11/// mach-o object files. The normalized view is an in-memory only data structure 12/// which is always in native endianness and pointer size. 13/// 14/// The normalized view easily converts to and from YAML using YAML I/O. 15/// 16/// The normalized view converts to and from binary mach-o object files using 17/// the writeBinary() and readBinary() functions. 18/// 19/// The normalized view converts to and from lld::Atoms using the 20/// normalizedToAtoms() and normalizedFromAtoms(). 21/// 22/// Overall, the conversion paths available look like: 23/// 24/// +---------------+ 25/// | binary mach-o | 26/// +---------------+ 27/// ^ 28/// | 29/// v 30/// +------------+ +------+ 31/// | normalized | <-> | yaml | 32/// +------------+ +------+ 33/// ^ 34/// | 35/// v 36/// +-------+ 37/// | Atoms | 38/// +-------+ 39/// 40 41#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H 42#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H 43 44#include "DebugInfo.h" 45#include "lld/Common/LLVM.h" 46#include "lld/Core/Error.h" 47#include "lld/ReaderWriter/MachOLinkingContext.h" 48#include "llvm/ADT/SmallString.h" 49#include "llvm/ADT/StringRef.h" 50#include "llvm/BinaryFormat/MachO.h" 51#include "llvm/Support/Allocator.h" 52#include "llvm/Support/Debug.h" 53#include "llvm/Support/ErrorOr.h" 54#include "llvm/Support/YAMLTraits.h" 55 56using llvm::BumpPtrAllocator; 57using llvm::yaml::Hex64; 58using llvm::yaml::Hex32; 59using llvm::yaml::Hex16; 60using llvm::yaml::Hex8; 61using llvm::yaml::SequenceTraits; 62using llvm::MachO::HeaderFileType; 63using llvm::MachO::BindType; 64using llvm::MachO::RebaseType; 65using llvm::MachO::NListType; 66using llvm::MachO::RelocationInfoType; 67using llvm::MachO::SectionType; 68using llvm::MachO::LoadCommandType; 69using llvm::MachO::ExportSymbolKind; 70using llvm::MachO::DataRegionType; 71 72namespace lld { 73namespace mach_o { 74namespace normalized { 75 76 77/// The real mach-o relocation record is 8-bytes on disk and is 78/// encoded in one of two different bit-field patterns. This 79/// normalized form has the union of all possible fields. 80struct Relocation { 81 Relocation() : offset(0), scattered(false), 82 type(llvm::MachO::GENERIC_RELOC_VANILLA), 83 length(0), pcRel(false), isExtern(false), value(0), 84 symbol(0) { } 85 86 Hex32 offset; 87 bool scattered; 88 RelocationInfoType type; 89 uint8_t length; 90 bool pcRel; 91 bool isExtern; 92 Hex32 value; 93 uint32_t symbol; 94}; 95 96/// A typedef so that YAML I/O can treat this vector as a sequence. 97typedef std::vector<Relocation> Relocations; 98 99/// A typedef so that YAML I/O can process the raw bytes in a section. 100typedef std::vector<Hex8> ContentBytes; 101 102/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence. 103typedef std::vector<uint32_t> IndirectSymbols; 104 105/// A typedef so that YAML I/O can encode/decode section attributes. 106LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr) 107 108/// A typedef so that YAML I/O can encode/decode section alignment. 109LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment) 110 111/// Mach-O has a 32-bit and 64-bit section record. This normalized form 112/// can support either kind. 113struct Section { 114 Section() : type(llvm::MachO::S_REGULAR), 115 attributes(0), alignment(1), address(0) { } 116 117 StringRef segmentName; 118 StringRef sectionName; 119 SectionType type; 120 SectionAttr attributes; 121 SectionAlignment alignment; 122 Hex64 address; 123 ArrayRef<uint8_t> content; 124 Relocations relocations; 125 IndirectSymbols indirectSymbols; 126}; 127 128 129/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist. 130LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope) 131 132/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist. 133LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc) 134 135/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol 136/// type and scope and mixed in the same n_type field. This normalized form 137/// works for any pointer size and separates out the type and scope. 138struct Symbol { 139 Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { } 140 141 StringRef name; 142 NListType type; 143 SymbolScope scope; 144 uint8_t sect; 145 SymbolDesc desc; 146 Hex64 value; 147}; 148 149/// Check whether the given section type indicates a zero-filled section. 150// FIXME: Utility functions of this kind should probably be moved into 151// llvm/Support. 152inline bool isZeroFillSection(SectionType T) { 153 return (T == llvm::MachO::S_ZEROFILL || 154 T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL); 155} 156 157/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment. 158LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect) 159 160/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz 161LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion) 162 163/// Segments are only used in normalized final linked images (not in relocatable 164/// object files). They specify how a range of the file is loaded. 165struct Segment { 166 StringRef name; 167 Hex64 address; 168 Hex64 size; 169 VMProtect init_access; 170 VMProtect max_access; 171}; 172 173/// Only used in normalized final linked images to specify on which dylibs 174/// it depends. 175struct DependentDylib { 176 StringRef path; 177 LoadCommandType kind; 178 PackedVersion compatVersion; 179 PackedVersion currentVersion; 180}; 181 182/// A normalized rebasing entry. Only used in normalized final linked images. 183struct RebaseLocation { 184 Hex32 segOffset; 185 uint8_t segIndex; 186 RebaseType kind; 187}; 188 189/// A normalized binding entry. Only used in normalized final linked images. 190struct BindLocation { 191 Hex32 segOffset; 192 uint8_t segIndex; 193 BindType kind; 194 bool canBeNull; 195 int ordinal; 196 StringRef symbolName; 197 Hex64 addend; 198}; 199 200/// A typedef so that YAML I/O can encode/decode export flags. 201LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags) 202 203/// A normalized export entry. Only used in normalized final linked images. 204struct Export { 205 StringRef name; 206 Hex64 offset; 207 ExportSymbolKind kind; 208 ExportFlags flags; 209 Hex32 otherOffset; 210 StringRef otherName; 211}; 212 213/// A normalized data-in-code entry. 214struct DataInCode { 215 Hex32 offset; 216 Hex16 length; 217 DataRegionType kind; 218}; 219 220/// A typedef so that YAML I/O can encode/decode mach_header.flags. 221LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) 222 223/// 224struct NormalizedFile { 225 MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; 226 HeaderFileType fileType = llvm::MachO::MH_OBJECT; 227 FileFlags flags = 0; 228 std::vector<Segment> segments; // Not used in object files. 229 std::vector<Section> sections; 230 231 // Symbols sorted by kind. 232 std::vector<Symbol> localSymbols; 233 std::vector<Symbol> globalSymbols; 234 std::vector<Symbol> undefinedSymbols; 235 std::vector<Symbol> stabsSymbols; 236 237 // Maps to load commands with no LINKEDIT content (final linked images only). 238 std::vector<DependentDylib> dependentDylibs; 239 StringRef installName; // dylibs only 240 PackedVersion compatVersion = 0; // dylibs only 241 PackedVersion currentVersion = 0; // dylibs only 242 bool hasUUID = false; 243 bool hasMinVersionLoadCommand = false; 244 bool generateDataInCodeLoadCommand = false; 245 std::vector<StringRef> rpaths; 246 Hex64 entryAddress = 0; 247 Hex64 stackSize = 0; 248 MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; 249 Hex64 sourceVersion = 0; 250 PackedVersion minOSverson = 0; 251 PackedVersion sdkVersion = 0; 252 LoadCommandType minOSVersionKind = (LoadCommandType)0; 253 254 // Maps to load commands with LINKEDIT content (final linked images only). 255 Hex32 pageSize = 0; 256 std::vector<RebaseLocation> rebasingInfo; 257 std::vector<BindLocation> bindingInfo; 258 std::vector<BindLocation> weakBindingInfo; 259 std::vector<BindLocation> lazyBindingInfo; 260 std::vector<Export> exportInfo; 261 std::vector<uint8_t> functionStarts; 262 std::vector<DataInCode> dataInCode; 263 264 // TODO: 265 // code-signature 266 // split-seg-info 267 // function-starts 268 269 // For any allocations in this struct which need to be owned by this struct. 270 BumpPtrAllocator ownedAllocations; 271}; 272 273/// Tests if a file is a non-fat mach-o object file. 274bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch); 275 276/// If the buffer is a fat file with the request arch, then this function 277/// returns true with 'offset' and 'size' set to location of the arch slice 278/// within the buffer. Otherwise returns false; 279bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, 280 uint32_t &offset, uint32_t &size); 281 282/// Reads a mach-o file and produces an in-memory normalized view. 283llvm::Expected<std::unique_ptr<NormalizedFile>> 284readBinary(std::unique_ptr<MemoryBuffer> &mb, 285 const MachOLinkingContext::Arch arch); 286 287/// Takes in-memory normalized view and writes a mach-o object file. 288llvm::Error writeBinary(const NormalizedFile &file, StringRef path); 289 290size_t headerAndLoadCommandsSize(const NormalizedFile &file, 291 bool includeFunctionStarts); 292 293 294/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. 295llvm::Expected<std::unique_ptr<NormalizedFile>> 296readYaml(std::unique_ptr<MemoryBuffer> &mb); 297 298/// Writes a yaml encoded mach-o files given an in-memory normalized view. 299std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out); 300 301llvm::Error 302normalizedObjectToAtoms(MachOFile *file, 303 const NormalizedFile &normalizedFile, 304 bool copyRefs); 305 306llvm::Error 307normalizedDylibToAtoms(MachODylibFile *file, 308 const NormalizedFile &normalizedFile, 309 bool copyRefs); 310 311/// Takes in-memory normalized dylib or object and parses it into lld::File 312llvm::Expected<std::unique_ptr<lld::File>> 313normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, 314 bool copyRefs); 315 316/// Takes atoms and generates a normalized macho-o view. 317llvm::Expected<std::unique_ptr<NormalizedFile>> 318normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt); 319 320 321} // namespace normalized 322 323/// Class for interfacing mach-o yaml files into generic yaml parsing 324class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { 325public: 326 MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch) 327 : _arch(arch) { } 328 bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override; 329private: 330 const MachOLinkingContext::Arch _arch; 331}; 332 333} // namespace mach_o 334} // namespace lld 335 336#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H 337