1//===------ macho2yaml.cpp - obj2yaml conversion tool -----------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "obj2yaml.h" 10#include "llvm/DebugInfo/DWARF/DWARFContext.h" 11#include "llvm/Object/MachOUniversal.h" 12#include "llvm/ObjectYAML/DWARFYAML.h" 13#include "llvm/ObjectYAML/ObjectYAML.h" 14#include "llvm/Support/Errc.h" 15#include "llvm/Support/Error.h" 16#include "llvm/Support/ErrorHandling.h" 17#include "llvm/Support/LEB128.h" 18 19#include <string.h> // for memcpy 20 21using namespace llvm; 22 23class MachODumper { 24 25 template <typename StructType> 26 Expected<const char *> processLoadCommandData( 27 MachOYAML::LoadCommand &LC, 28 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 29 MachOYAML::Object &Y); 30 31 const object::MachOObjectFile &Obj; 32 std::unique_ptr<DWARFContext> DWARFCtx; 33 unsigned RawSegment; 34 void dumpHeader(std::unique_ptr<MachOYAML::Object> &Y); 35 Error dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y); 36 void dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y); 37 void dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y); 38 void dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y); 39 void dumpBindOpcodes(std::vector<MachOYAML::BindOpcode> &BindOpcodes, 40 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy = false); 41 void dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y); 42 void dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y); 43 void dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y); 44 void dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y); 45 void dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y); 46 47 template <typename SectionType> 48 Expected<MachOYAML::Section> constructSectionCommon(SectionType Sec, 49 size_t SecIndex); 50 template <typename SectionType> 51 Expected<MachOYAML::Section> constructSection(SectionType Sec, 52 size_t SecIndex); 53 template <typename SectionType, typename SegmentType> 54 Expected<const char *> 55 extractSections(const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 56 std::vector<MachOYAML::Section> &Sections, 57 MachOYAML::Object &Y); 58 59public: 60 MachODumper(const object::MachOObjectFile &O, 61 std::unique_ptr<DWARFContext> DCtx, unsigned RawSegments) 62 : Obj(O), DWARFCtx(std::move(DCtx)), RawSegment(RawSegments) {} 63 Expected<std::unique_ptr<MachOYAML::Object>> dump(); 64}; 65 66#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ 67 case MachO::LCName: \ 68 memcpy((void *)&(LC.Data.LCStruct##_data), LoadCmd.Ptr, \ 69 sizeof(MachO::LCStruct)); \ 70 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) \ 71 MachO::swapStruct(LC.Data.LCStruct##_data); \ 72 if (Expected<const char *> ExpectedEndPtr = \ 73 processLoadCommandData<MachO::LCStruct>(LC, LoadCmd, *Y.get())) \ 74 EndPtr = *ExpectedEndPtr; \ 75 else \ 76 return ExpectedEndPtr.takeError(); \ 77 break; 78 79template <typename SectionType> 80Expected<MachOYAML::Section> 81MachODumper::constructSectionCommon(SectionType Sec, size_t SecIndex) { 82 MachOYAML::Section TempSec; 83 memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16); 84 memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16); 85 TempSec.addr = Sec.addr; 86 TempSec.size = Sec.size; 87 TempSec.offset = Sec.offset; 88 TempSec.align = Sec.align; 89 TempSec.reloff = Sec.reloff; 90 TempSec.nreloc = Sec.nreloc; 91 TempSec.flags = Sec.flags; 92 TempSec.reserved1 = Sec.reserved1; 93 TempSec.reserved2 = Sec.reserved2; 94 TempSec.reserved3 = 0; 95 if (!MachO::isVirtualSection(Sec.flags & MachO::SECTION_TYPE)) 96 TempSec.content = 97 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); 98 99 if (Expected<object::SectionRef> SecRef = Obj.getSection(SecIndex)) { 100 TempSec.relocations.reserve(TempSec.nreloc); 101 for (const object::RelocationRef &Reloc : SecRef->relocations()) { 102 const object::DataRefImpl Rel = Reloc.getRawDataRefImpl(); 103 const MachO::any_relocation_info RE = Obj.getRelocation(Rel); 104 MachOYAML::Relocation R; 105 R.address = Obj.getAnyRelocationAddress(RE); 106 R.is_pcrel = Obj.getAnyRelocationPCRel(RE); 107 R.length = Obj.getAnyRelocationLength(RE); 108 R.type = Obj.getAnyRelocationType(RE); 109 R.is_scattered = Obj.isRelocationScattered(RE); 110 R.symbolnum = (R.is_scattered ? 0 : Obj.getPlainRelocationSymbolNum(RE)); 111 R.is_extern = 112 (R.is_scattered ? false : Obj.getPlainRelocationExternal(RE)); 113 R.value = (R.is_scattered ? Obj.getScatteredRelocationValue(RE) : 0); 114 TempSec.relocations.push_back(R); 115 } 116 } else { 117 return SecRef.takeError(); 118 } 119 return TempSec; 120} 121 122template <> 123Expected<MachOYAML::Section> MachODumper::constructSection(MachO::section Sec, 124 size_t SecIndex) { 125 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); 126 if (TempSec) 127 TempSec->reserved3 = 0; 128 return TempSec; 129} 130 131template <> 132Expected<MachOYAML::Section> 133MachODumper::constructSection(MachO::section_64 Sec, size_t SecIndex) { 134 Expected<MachOYAML::Section> TempSec = constructSectionCommon(Sec, SecIndex); 135 if (TempSec) 136 TempSec->reserved3 = Sec.reserved3; 137 return TempSec; 138} 139 140static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, 141 DWARFYAML::Data &DWARF) { 142 if (SecName == "__debug_abbrev") { 143 dumpDebugAbbrev(DCtx, DWARF); 144 return Error::success(); 145 } 146 if (SecName == "__debug_aranges") 147 return dumpDebugARanges(DCtx, DWARF); 148 if (SecName == "__debug_info") { 149 dumpDebugInfo(DCtx, DWARF); 150 return Error::success(); 151 } 152 if (SecName == "__debug_line") { 153 dumpDebugLines(DCtx, DWARF); 154 return Error::success(); 155 } 156 if (SecName.startswith("__debug_pub")) { 157 // FIXME: We should extract pub-section dumpers from this function. 158 dumpDebugPubSections(DCtx, DWARF); 159 return Error::success(); 160 } 161 if (SecName == "__debug_ranges") 162 return dumpDebugRanges(DCtx, DWARF); 163 if (SecName == "__debug_str") 164 return dumpDebugStrings(DCtx, DWARF); 165 return createStringError(errc::not_supported, 166 "dumping " + SecName + " section is not supported"); 167} 168 169template <typename SectionType, typename SegmentType> 170Expected<const char *> MachODumper::extractSections( 171 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 172 std::vector<MachOYAML::Section> &Sections, MachOYAML::Object &Y) { 173 auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize; 174 const SectionType *Curr = 175 reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType)); 176 for (; reinterpret_cast<const void *>(Curr) < End; Curr++) { 177 SectionType Sec; 178 memcpy((void *)&Sec, Curr, sizeof(SectionType)); 179 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 180 MachO::swapStruct(Sec); 181 // For MachO section indices start from 1. 182 if (Expected<MachOYAML::Section> S = 183 constructSection(Sec, Sections.size() + 1)) { 184 StringRef SecName(S->sectname); 185 186 // Copy data sections if requested. 187 if ((RawSegment & ::RawSegments::data) && 188 StringRef(S->segname).startswith("__DATA")) 189 S->content = 190 yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); 191 192 if (SecName.startswith("__debug_")) { 193 // If the DWARF section cannot be successfully parsed, emit raw content 194 // instead of an entry in the DWARF section of the YAML. 195 if (Error Err = dumpDebugSection(SecName, *DWARFCtx, Y.DWARF)) 196 consumeError(std::move(Err)); 197 else 198 S->content.reset(); 199 } 200 Sections.push_back(std::move(*S)); 201 } else 202 return S.takeError(); 203 } 204 return reinterpret_cast<const char *>(Curr); 205} 206 207template <typename StructType> 208Expected<const char *> MachODumper::processLoadCommandData( 209 MachOYAML::LoadCommand &LC, 210 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 211 MachOYAML::Object &Y) { 212 return LoadCmd.Ptr + sizeof(StructType); 213} 214 215template <> 216Expected<const char *> 217MachODumper::processLoadCommandData<MachO::segment_command>( 218 MachOYAML::LoadCommand &LC, 219 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 220 MachOYAML::Object &Y) { 221 return extractSections<MachO::section, MachO::segment_command>( 222 LoadCmd, LC.Sections, Y); 223} 224 225template <> 226Expected<const char *> 227MachODumper::processLoadCommandData<MachO::segment_command_64>( 228 MachOYAML::LoadCommand &LC, 229 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 230 MachOYAML::Object &Y) { 231 return extractSections<MachO::section_64, MachO::segment_command_64>( 232 LoadCmd, LC.Sections, Y); 233} 234 235template <typename StructType> 236const char * 237readString(MachOYAML::LoadCommand &LC, 238 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd) { 239 auto Start = LoadCmd.Ptr + sizeof(StructType); 240 auto MaxSize = LoadCmd.C.cmdsize - sizeof(StructType); 241 auto Size = strnlen(Start, MaxSize); 242 LC.Content = StringRef(Start, Size).str(); 243 return Start + Size; 244} 245 246template <> 247Expected<const char *> 248MachODumper::processLoadCommandData<MachO::dylib_command>( 249 MachOYAML::LoadCommand &LC, 250 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 251 MachOYAML::Object &Y) { 252 return readString<MachO::dylib_command>(LC, LoadCmd); 253} 254 255template <> 256Expected<const char *> 257MachODumper::processLoadCommandData<MachO::dylinker_command>( 258 MachOYAML::LoadCommand &LC, 259 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 260 MachOYAML::Object &Y) { 261 return readString<MachO::dylinker_command>(LC, LoadCmd); 262} 263 264template <> 265Expected<const char *> 266MachODumper::processLoadCommandData<MachO::rpath_command>( 267 MachOYAML::LoadCommand &LC, 268 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 269 MachOYAML::Object &Y) { 270 return readString<MachO::rpath_command>(LC, LoadCmd); 271} 272 273template <> 274Expected<const char *> 275MachODumper::processLoadCommandData<MachO::build_version_command>( 276 MachOYAML::LoadCommand &LC, 277 const llvm::object::MachOObjectFile::LoadCommandInfo &LoadCmd, 278 MachOYAML::Object &Y) { 279 auto Start = LoadCmd.Ptr + sizeof(MachO::build_version_command); 280 auto NTools = LC.Data.build_version_command_data.ntools; 281 for (unsigned i = 0; i < NTools; ++i) { 282 auto Curr = Start + i * sizeof(MachO::build_tool_version); 283 MachO::build_tool_version BV; 284 memcpy((void *)&BV, Curr, sizeof(MachO::build_tool_version)); 285 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 286 MachO::swapStruct(BV); 287 LC.Tools.push_back(BV); 288 } 289 return Start + NTools * sizeof(MachO::build_tool_version); 290} 291 292Expected<std::unique_ptr<MachOYAML::Object>> MachODumper::dump() { 293 auto Y = std::make_unique<MachOYAML::Object>(); 294 Y->IsLittleEndian = Obj.isLittleEndian(); 295 dumpHeader(Y); 296 if (Error Err = dumpLoadCommands(Y)) 297 return std::move(Err); 298 if (RawSegment & ::RawSegments::linkedit) 299 Y->RawLinkEditSegment = 300 yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT")); 301 else 302 dumpLinkEdit(Y); 303 304 return std::move(Y); 305} 306 307void MachODumper::dumpHeader(std::unique_ptr<MachOYAML::Object> &Y) { 308 Y->Header.magic = Obj.getHeader().magic; 309 Y->Header.cputype = Obj.getHeader().cputype; 310 Y->Header.cpusubtype = Obj.getHeader().cpusubtype; 311 Y->Header.filetype = Obj.getHeader().filetype; 312 Y->Header.ncmds = Obj.getHeader().ncmds; 313 Y->Header.sizeofcmds = Obj.getHeader().sizeofcmds; 314 Y->Header.flags = Obj.getHeader().flags; 315 Y->Header.reserved = 0; 316} 317 318Error MachODumper::dumpLoadCommands(std::unique_ptr<MachOYAML::Object> &Y) { 319 for (auto LoadCmd : Obj.load_commands()) { 320 MachOYAML::LoadCommand LC; 321 const char *EndPtr = LoadCmd.Ptr; 322 switch (LoadCmd.C.cmd) { 323 default: 324 memcpy((void *)&(LC.Data.load_command_data), LoadCmd.Ptr, 325 sizeof(MachO::load_command)); 326 if (Obj.isLittleEndian() != sys::IsLittleEndianHost) 327 MachO::swapStruct(LC.Data.load_command_data); 328 if (Expected<const char *> ExpectedEndPtr = 329 processLoadCommandData<MachO::load_command>(LC, LoadCmd, *Y)) 330 EndPtr = *ExpectedEndPtr; 331 else 332 return ExpectedEndPtr.takeError(); 333 break; 334#include "llvm/BinaryFormat/MachO.def" 335 } 336 auto RemainingBytes = LoadCmd.C.cmdsize - (EndPtr - LoadCmd.Ptr); 337 if (!std::all_of(EndPtr, &EndPtr[RemainingBytes], 338 [](const char C) { return C == 0; })) { 339 LC.PayloadBytes.insert(LC.PayloadBytes.end(), EndPtr, 340 &EndPtr[RemainingBytes]); 341 RemainingBytes = 0; 342 } 343 LC.ZeroPadBytes = RemainingBytes; 344 Y->LoadCommands.push_back(std::move(LC)); 345 } 346 return Error::success(); 347} 348 349void MachODumper::dumpLinkEdit(std::unique_ptr<MachOYAML::Object> &Y) { 350 dumpRebaseOpcodes(Y); 351 dumpBindOpcodes(Y->LinkEdit.BindOpcodes, Obj.getDyldInfoBindOpcodes()); 352 dumpBindOpcodes(Y->LinkEdit.WeakBindOpcodes, 353 Obj.getDyldInfoWeakBindOpcodes()); 354 dumpBindOpcodes(Y->LinkEdit.LazyBindOpcodes, Obj.getDyldInfoLazyBindOpcodes(), 355 true); 356 dumpExportTrie(Y); 357 dumpSymbols(Y); 358 dumpIndirectSymbols(Y); 359 dumpFunctionStarts(Y); 360 dumpChainedFixups(Y); 361 dumpDataInCode(Y); 362} 363 364void MachODumper::dumpFunctionStarts(std::unique_ptr<MachOYAML::Object> &Y) { 365 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 366 367 auto FunctionStarts = Obj.getFunctionStarts(); 368 for (auto Addr : FunctionStarts) 369 LEData.FunctionStarts.push_back(Addr); 370} 371 372void MachODumper::dumpRebaseOpcodes(std::unique_ptr<MachOYAML::Object> &Y) { 373 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 374 375 auto RebaseOpcodes = Obj.getDyldInfoRebaseOpcodes(); 376 for (auto OpCode = RebaseOpcodes.begin(); OpCode != RebaseOpcodes.end(); 377 ++OpCode) { 378 MachOYAML::RebaseOpcode RebaseOp; 379 RebaseOp.Opcode = 380 static_cast<MachO::RebaseOpcode>(*OpCode & MachO::REBASE_OPCODE_MASK); 381 RebaseOp.Imm = *OpCode & MachO::REBASE_IMMEDIATE_MASK; 382 383 unsigned Count; 384 uint64_t ULEB = 0; 385 386 switch (RebaseOp.Opcode) { 387 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: 388 389 ULEB = decodeULEB128(OpCode + 1, &Count); 390 RebaseOp.ExtraData.push_back(ULEB); 391 OpCode += Count; 392 [[fallthrough]]; 393 // Intentionally no break here -- This opcode has two ULEB values 394 case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 395 case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: 396 case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: 397 case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: 398 399 ULEB = decodeULEB128(OpCode + 1, &Count); 400 RebaseOp.ExtraData.push_back(ULEB); 401 OpCode += Count; 402 break; 403 default: 404 break; 405 } 406 407 LEData.RebaseOpcodes.push_back(RebaseOp); 408 409 if (RebaseOp.Opcode == MachO::REBASE_OPCODE_DONE) 410 break; 411 } 412} 413 414StringRef ReadStringRef(const uint8_t *Start) { 415 const uint8_t *Itr = Start; 416 for (; *Itr; ++Itr) 417 ; 418 return StringRef(reinterpret_cast<const char *>(Start), Itr - Start); 419} 420 421void MachODumper::dumpBindOpcodes( 422 std::vector<MachOYAML::BindOpcode> &BindOpcodes, 423 ArrayRef<uint8_t> OpcodeBuffer, bool Lazy) { 424 for (auto OpCode = OpcodeBuffer.begin(); OpCode != OpcodeBuffer.end(); 425 ++OpCode) { 426 MachOYAML::BindOpcode BindOp; 427 BindOp.Opcode = 428 static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK); 429 BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK; 430 431 unsigned Count; 432 uint64_t ULEB = 0; 433 int64_t SLEB = 0; 434 435 switch (BindOp.Opcode) { 436 case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: 437 ULEB = decodeULEB128(OpCode + 1, &Count); 438 BindOp.ULEBExtraData.push_back(ULEB); 439 OpCode += Count; 440 [[fallthrough]]; 441 // Intentionally no break here -- this opcode has two ULEB values 442 443 case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: 444 case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: 445 case MachO::BIND_OPCODE_ADD_ADDR_ULEB: 446 case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: 447 ULEB = decodeULEB128(OpCode + 1, &Count); 448 BindOp.ULEBExtraData.push_back(ULEB); 449 OpCode += Count; 450 break; 451 452 case MachO::BIND_OPCODE_SET_ADDEND_SLEB: 453 SLEB = decodeSLEB128(OpCode + 1, &Count); 454 BindOp.SLEBExtraData.push_back(SLEB); 455 OpCode += Count; 456 break; 457 458 case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: 459 BindOp.Symbol = ReadStringRef(OpCode + 1); 460 OpCode += BindOp.Symbol.size() + 1; 461 break; 462 default: 463 break; 464 } 465 466 BindOpcodes.push_back(BindOp); 467 468 // Lazy bindings have DONE opcodes between operations, so we need to keep 469 // processing after a DONE. 470 if (!Lazy && BindOp.Opcode == MachO::BIND_OPCODE_DONE) 471 break; 472 } 473} 474 475/*! 476 * /brief processes a node from the export trie, and its children. 477 * 478 * To my knowledge there is no documentation of the encoded format of this data 479 * other than in the heads of the Apple linker engineers. To that end hopefully 480 * this comment and the implementation below can serve to light the way for 481 * anyone crazy enough to come down this path in the future. 482 * 483 * This function reads and preserves the trie structure of the export trie. To 484 * my knowledge there is no code anywhere else that reads the data and preserves 485 * the Trie. LD64 (sources available at opensource.apple.com) has a similar 486 * implementation that parses the export trie into a vector. That code as well 487 * as LLVM's libObject MachO implementation were the basis for this. 488 * 489 * The export trie is an encoded trie. The node serialization is a bit awkward. 490 * The below pseudo-code is the best description I've come up with for it. 491 * 492 * struct SerializedNode { 493 * ULEB128 TerminalSize; 494 * struct TerminalData { <-- This is only present if TerminalSize > 0 495 * ULEB128 Flags; 496 * ULEB128 Address; <-- Present if (! Flags & REEXPORT ) 497 * ULEB128 Other; <-- Present if ( Flags & REEXPORT || 498 * Flags & STUB_AND_RESOLVER ) 499 * char[] ImportName; <-- Present if ( Flags & REEXPORT ) 500 * } 501 * uint8_t ChildrenCount; 502 * Pair<char[], ULEB128> ChildNameOffsetPair[ChildrenCount]; 503 * SerializedNode Children[ChildrenCount] 504 * } 505 * 506 * Terminal nodes are nodes that represent actual exports. They can appear 507 * anywhere in the tree other than at the root; they do not need to be leaf 508 * nodes. When reading the data out of the trie this routine reads it in-order, 509 * but it puts the child names and offsets directly into the child nodes. This 510 * results in looping over the children twice during serialization and 511 * de-serialization, but it makes the YAML representation more human readable. 512 * 513 * Below is an example of the graph from a "Hello World" executable: 514 * 515 * ------- 516 * | '' | 517 * ------- 518 * | 519 * ------- 520 * | '_' | 521 * ------- 522 * | 523 * |----------------------------------------| 524 * | | 525 * ------------------------ --------------------- 526 * | '_mh_execute_header' | | 'main' | 527 * | Flags: 0x00000000 | | Flags: 0x00000000 | 528 * | Addr: 0x00000000 | | Addr: 0x00001160 | 529 * ------------------------ --------------------- 530 * 531 * This graph represents the trie for the exports "__mh_execute_header" and 532 * "_main". In the graph only the "_main" and "__mh_execute_header" nodes are 533 * terminal. 534*/ 535 536const uint8_t *processExportNode(const uint8_t *CurrPtr, 537 const uint8_t *const End, 538 MachOYAML::ExportEntry &Entry) { 539 if (CurrPtr >= End) 540 return CurrPtr; 541 unsigned Count = 0; 542 Entry.TerminalSize = decodeULEB128(CurrPtr, &Count); 543 CurrPtr += Count; 544 if (Entry.TerminalSize != 0) { 545 Entry.Flags = decodeULEB128(CurrPtr, &Count); 546 CurrPtr += Count; 547 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) { 548 Entry.Address = 0; 549 Entry.Other = decodeULEB128(CurrPtr, &Count); 550 CurrPtr += Count; 551 Entry.ImportName = std::string(reinterpret_cast<const char *>(CurrPtr)); 552 } else { 553 Entry.Address = decodeULEB128(CurrPtr, &Count); 554 CurrPtr += Count; 555 if (Entry.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { 556 Entry.Other = decodeULEB128(CurrPtr, &Count); 557 CurrPtr += Count; 558 } else 559 Entry.Other = 0; 560 } 561 } 562 uint8_t childrenCount = *CurrPtr++; 563 if (childrenCount == 0) 564 return CurrPtr; 565 566 Entry.Children.insert(Entry.Children.begin(), (size_t)childrenCount, 567 MachOYAML::ExportEntry()); 568 for (auto &Child : Entry.Children) { 569 Child.Name = std::string(reinterpret_cast<const char *>(CurrPtr)); 570 CurrPtr += Child.Name.length() + 1; 571 Child.NodeOffset = decodeULEB128(CurrPtr, &Count); 572 CurrPtr += Count; 573 } 574 for (auto &Child : Entry.Children) { 575 CurrPtr = processExportNode(CurrPtr, End, Child); 576 } 577 return CurrPtr; 578} 579 580void MachODumper::dumpExportTrie(std::unique_ptr<MachOYAML::Object> &Y) { 581 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 582 // The exports trie can be in LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE 583 auto ExportsTrie = Obj.getDyldInfoExportsTrie(); 584 if (ExportsTrie.empty()) 585 ExportsTrie = Obj.getDyldExportsTrie(); 586 processExportNode(ExportsTrie.begin(), ExportsTrie.end(), LEData.ExportTrie); 587} 588 589template <typename nlist_t> 590MachOYAML::NListEntry constructNameList(const nlist_t &nlist) { 591 MachOYAML::NListEntry NL; 592 NL.n_strx = nlist.n_strx; 593 NL.n_type = nlist.n_type; 594 NL.n_sect = nlist.n_sect; 595 NL.n_desc = nlist.n_desc; 596 NL.n_value = nlist.n_value; 597 return NL; 598} 599 600void MachODumper::dumpSymbols(std::unique_ptr<MachOYAML::Object> &Y) { 601 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 602 603 for (auto Symbol : Obj.symbols()) { 604 MachOYAML::NListEntry NLE = 605 Obj.is64Bit() 606 ? constructNameList<MachO::nlist_64>( 607 Obj.getSymbol64TableEntry(Symbol.getRawDataRefImpl())) 608 : constructNameList<MachO::nlist>( 609 Obj.getSymbolTableEntry(Symbol.getRawDataRefImpl())); 610 LEData.NameList.push_back(NLE); 611 } 612 613 StringRef RemainingTable = Obj.getStringTableData(); 614 while (RemainingTable.size() > 0) { 615 auto SymbolPair = RemainingTable.split('\0'); 616 RemainingTable = SymbolPair.second; 617 LEData.StringTable.push_back(SymbolPair.first); 618 } 619} 620 621void MachODumper::dumpIndirectSymbols(std::unique_ptr<MachOYAML::Object> &Y) { 622 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 623 624 MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand(); 625 for (unsigned i = 0; i < DLC.nindirectsyms; ++i) 626 LEData.IndirectSymbols.push_back(Obj.getIndirectSymbolTableEntry(DLC, i)); 627} 628 629void MachODumper::dumpChainedFixups(std::unique_ptr<MachOYAML::Object> &Y) { 630 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 631 632 for (const auto &LC : Y->LoadCommands) { 633 if (LC.Data.load_command_data.cmd == llvm::MachO::LC_DYLD_CHAINED_FIXUPS) { 634 const MachO::linkedit_data_command &DC = 635 LC.Data.linkedit_data_command_data; 636 if (DC.dataoff) { 637 assert(DC.dataoff < Obj.getData().size()); 638 assert(DC.dataoff + DC.datasize <= Obj.getData().size()); 639 const char *Bytes = Obj.getData().data() + DC.dataoff; 640 for (size_t Idx = 0; Idx < DC.datasize; Idx++) { 641 LEData.ChainedFixups.push_back(Bytes[Idx]); 642 } 643 } 644 break; 645 } 646 } 647} 648 649void MachODumper::dumpDataInCode(std::unique_ptr<MachOYAML::Object> &Y) { 650 MachOYAML::LinkEditData &LEData = Y->LinkEdit; 651 652 MachO::linkedit_data_command DIC = Obj.getDataInCodeLoadCommand(); 653 uint32_t NumEntries = DIC.datasize / sizeof(MachO::data_in_code_entry); 654 for (uint32_t Idx = 0; Idx < NumEntries; ++Idx) { 655 MachO::data_in_code_entry DICE = 656 Obj.getDataInCodeTableEntry(DIC.dataoff, Idx); 657 MachOYAML::DataInCodeEntry Entry{DICE.offset, DICE.length, DICE.kind}; 658 LEData.DataInCode.emplace_back(Entry); 659 } 660} 661 662Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj, 663 unsigned RawSegments) { 664 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(Obj); 665 MachODumper Dumper(Obj, std::move(DCtx), RawSegments); 666 Expected<std::unique_ptr<MachOYAML::Object>> YAML = Dumper.dump(); 667 if (!YAML) 668 return YAML.takeError(); 669 670 yaml::YamlObjectFile YAMLFile; 671 YAMLFile.MachO = std::move(YAML.get()); 672 673 yaml::Output Yout(Out); 674 Yout << YAMLFile; 675 return Error::success(); 676} 677 678Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj, 679 unsigned RawSegments) { 680 yaml::YamlObjectFile YAMLFile; 681 YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); 682 MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; 683 YAML.Header.magic = Obj.getMagic(); 684 YAML.Header.nfat_arch = Obj.getNumberOfObjects(); 685 686 for (auto Slice : Obj.objects()) { 687 MachOYAML::FatArch arch; 688 arch.cputype = Slice.getCPUType(); 689 arch.cpusubtype = Slice.getCPUSubType(); 690 arch.offset = Slice.getOffset(); 691 arch.size = Slice.getSize(); 692 arch.align = Slice.getAlign(); 693 arch.reserved = Slice.getReserved(); 694 YAML.FatArchs.push_back(arch); 695 696 auto SliceObj = Slice.getAsObjectFile(); 697 if (!SliceObj) 698 return SliceObj.takeError(); 699 700 std::unique_ptr<DWARFContext> DCtx = DWARFContext::create(*SliceObj.get()); 701 MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments); 702 Expected<std::unique_ptr<MachOYAML::Object>> YAMLObj = Dumper.dump(); 703 if (!YAMLObj) 704 return YAMLObj.takeError(); 705 YAML.Slices.push_back(*YAMLObj.get()); 706 } 707 708 yaml::Output Yout(Out); 709 Yout << YAML; 710 return Error::success(); 711} 712 713Error macho2yaml(raw_ostream &Out, const object::Binary &Binary, 714 unsigned RawSegments) { 715 if (const auto *MachOObj = dyn_cast<object::MachOUniversalBinary>(&Binary)) 716 return macho2yaml(Out, *MachOObj, RawSegments); 717 718 if (const auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Binary)) 719 return macho2yaml(Out, *MachOObj, RawSegments); 720 721 llvm_unreachable("unexpected Mach-O file format"); 722} 723