1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- 2 * 3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved. 4 * 5 * @APPLE_LICENSE_HEADER_START@ 6 * 7 * This file contains Original Code and/or Modifications of Original Code 8 * as defined in and that are subject to the Apple Public Source License 9 * Version 2.0 (the 'License'). You may not use this file except in 10 * compliance with the License. Please obtain a copy of the License at 11 * http://www.opensource.apple.com/apsl/ and read it before using this 12 * file. 13 * 14 * The Original Code and all software distributed under the License are 15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 19 * Please see the License for the specific language governing rights and 20 * limitations under the License. 21 * 22 * @APPLE_LICENSE_HEADER_END@ 23*/ 24#ifndef __MACH_O_TRIE__ 25#define __MACH_O_TRIE__ 26 27#include <algorithm> 28#include <vector> 29 30#include "MachOFileAbstraction.hpp" 31 32 33namespace mach_o { 34namespace trie { 35 36struct Edge 37{ 38 Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { } 39 ~Edge() { } 40 const char* fSubString; 41 struct Node* fChild; 42 43}; 44 45struct Node 46{ 47 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), 48 fOther(0), fImportedName(NULL), fOrdered(false), 49 fHaveExportInfo(false), fTrieOffset(0) {} 50 ~Node() { } 51 const char* fCummulativeString; 52 std::vector<Edge> fChildren; 53 uint64_t fAddress; 54 uint64_t fFlags; 55 uint64_t fOther; 56 const char* fImportedName; 57 bool fOrdered; 58 bool fHaveExportInfo; 59 uint32_t fTrieOffset; 60 61 void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) { 62 const char* partialStr = &fullStr[strlen(fCummulativeString)]; 63 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 64 Edge& e = *it; 65 int subStringLen = strlen(e.fSubString); 66 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { 67 // already have matching edge, go down that path 68 e.fChild->addSymbol(fullStr, address, flags, other, importName); 69 return; 70 } 71 else { 72 for (int i=subStringLen-1; i > 0; --i) { 73 if ( strncmp(e.fSubString, partialStr, i) == 0 ) { 74 // found a common substring, splice in new node 75 // was A -> C, now A -> B -> C 76 char* bNodeCummStr = strdup(e.fChild->fCummulativeString); 77 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0'; 78 //node* aNode = this; 79 Node* bNode = new Node(bNodeCummStr); 80 Node* cNode = e.fChild; 81 char* abEdgeStr = strdup(e.fSubString); 82 abEdgeStr[i] = '\0'; 83 char* bcEdgeStr = strdup(&e.fSubString[i]); 84 Edge& abEdge = e; 85 abEdge.fSubString = abEdgeStr; 86 abEdge.fChild = bNode; 87 Edge bcEdge(bcEdgeStr, cNode); 88 bNode->fChildren.push_back(bcEdge); 89 bNode->addSymbol(fullStr, address, flags, other, importName); 90 return; 91 } 92 } 93 } 94 } 95 96 // no commonality with any existing child, make a new edge that is this whole string 97 Node* newNode = new Node(strdup(fullStr)); 98 Edge newEdge(strdup(partialStr), newNode); 99 fChildren.push_back(newEdge); 100 newNode->fAddress = address; 101 newNode->fFlags = flags; 102 newNode->fOther = other; 103 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) ) 104 newNode->fImportedName = importName; 105 else 106 newNode->fImportedName = NULL; 107 newNode->fHaveExportInfo = true; 108 } 109 110 void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) { 111 if ( !fOrdered ) { 112 orderedNodes.push_back(this); 113 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString); 114 fOrdered = true; 115 } 116 const char* partialStr = &name[strlen(fCummulativeString)]; 117 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 118 Edge& e = *it; 119 int subStringLen = strlen(e.fSubString); 120 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { 121 // already have matching edge, go down that path 122 e.fChild->addOrderedNodes(name, orderedNodes); 123 return; 124 } 125 } 126 } 127 128 // byte for terminal node size in bytes, or 0x00 if not terminal node 129 // teminal node (uleb128 flags, uleb128 addr [uleb128 other]) 130 // byte for child node count 131 // each child: zero terminated substring, uleb128 node offset 132 bool updateOffset(uint32_t& offset) { 133 uint32_t nodeSize = 1; // length of export info when no export info 134 if ( fHaveExportInfo ) { 135 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { 136 nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal 137 if ( fImportedName != NULL ) 138 nodeSize += strlen(fImportedName); 139 ++nodeSize; // trailing zero in imported name 140 } 141 else { 142 nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); 143 if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) 144 nodeSize += uleb128_size(fOther); 145 } 146 // do have export info, overall node size so far is uleb128 of export info + export info 147 nodeSize += uleb128_size(nodeSize); 148 } 149 // add children 150 ++nodeSize; // byte for count of chidren 151 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 152 Edge& e = *it; 153 nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset); 154 } 155 bool result = (fTrieOffset != offset); 156 fTrieOffset = offset; 157 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString); 158 offset += nodeSize; 159 // return true if fTrieOffset was changed 160 return result; 161 } 162 163 void appendToStream(std::vector<uint8_t>& out) { 164 if ( fHaveExportInfo ) { 165 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { 166 if ( fImportedName != NULL ) { 167 // nodes with re-export info: size, flags, ordinal, string 168 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1; 169 out.push_back(nodeSize); 170 append_uleb128(fFlags, out); 171 append_uleb128(fOther, out); 172 append_string(fImportedName, out); 173 } 174 else { 175 // nodes with re-export info: size, flags, ordinal, empty-string 176 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1; 177 out.push_back(nodeSize); 178 append_uleb128(fFlags, out); 179 append_uleb128(fOther, out); 180 out.push_back(0); 181 } 182 } 183 else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { 184 // nodes with export info: size, flags, address, other 185 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther); 186 out.push_back(nodeSize); 187 append_uleb128(fFlags, out); 188 append_uleb128(fAddress, out); 189 append_uleb128(fOther, out); 190 } 191 else { 192 // nodes with export info: size, flags, address 193 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); 194 out.push_back(nodeSize); 195 append_uleb128(fFlags, out); 196 append_uleb128(fAddress, out); 197 } 198 } 199 else { 200 // no export info uleb128 of zero is one byte of zero 201 out.push_back(0); 202 } 203 // write number of children 204 out.push_back(fChildren.size()); 205 // write each child 206 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 207 Edge& e = *it; 208 append_string(e.fSubString, out); 209 append_uleb128(e.fChild->fTrieOffset, out); 210 } 211 } 212 213private: 214 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) { 215 uint8_t byte; 216 do { 217 byte = value & 0x7F; 218 value &= ~0x7F; 219 if ( value != 0 ) 220 byte |= 0x80; 221 out.push_back(byte); 222 value = value >> 7; 223 } while( byte >= 0x80 ); 224 } 225 226 static void append_string(const char* str, std::vector<uint8_t>& out) { 227 for (const char* s = str; *s != '\0'; ++s) 228 out.push_back(*s); 229 out.push_back('\0'); 230 } 231 232 static unsigned int uleb128_size(uint64_t value) { 233 uint32_t result = 0; 234 do { 235 value = value >> 7; 236 ++result; 237 } while ( value != 0 ); 238 return result; 239 } 240 241 242}; 243 244inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) { 245 uint64_t result = 0; 246 int bit = 0; 247 do { 248 if (p == end) 249#if __EXCEPTIONS 250 throw "malformed uleb128 extends beyond trie"; 251#else 252 return result; 253#endif 254 uint64_t slice = *p & 0x7f; 255 256 if (bit >= 64 || slice << bit >> bit != slice) 257#if __EXCEPTIONS 258 throw "uleb128 too big for 64-bits"; 259#else 260 return result; 261#endif 262 else { 263 result |= (slice << bit); 264 bit += 7; 265 } 266 } 267 while (*p++ & 0x80); 268 return result; 269} 270 271 272 273struct Entry 274{ 275 const char* name; 276 uint64_t address; 277 uint64_t flags; 278 uint64_t other; 279 const char* importName; 280}; 281 282 283 284inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output) 285{ 286 Node start(strdup("")); 287 288 // make nodes for all exported symbols 289 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { 290 start.addSymbol(it->name, it->address, it->flags, it->other, it->importName); 291 } 292 293 // create vector of nodes 294 std::vector<Node*> orderedNodes; 295 orderedNodes.reserve(entries.size()*2); 296 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { 297 start.addOrderedNodes(it->name, orderedNodes); 298 } 299 300 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized 301 bool more; 302 do { 303 uint32_t offset = 0; 304 more = false; 305 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { 306 if ( (*it)->updateOffset(offset) ) 307 more = true; 308 } 309 } while ( more ); 310 311 // create trie stream 312 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { 313 (*it)->appendToStream(output); 314 } 315} 316 317struct EntryWithOffset 318{ 319 uintptr_t nodeOffset; 320 Entry entry; 321 322 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); } 323}; 324 325 326 327static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end, 328 char* cummulativeString, int curStrOffset, 329 std::vector<EntryWithOffset>& output) 330{ 331 if ( p >= end ) 332#if __EXCEPTIONS 333 throw "malformed trie, node past end"; 334#else 335 return; 336#endif 337 const uint8_t terminalSize = read_uleb128(p, end); 338 const uint8_t* children = p + terminalSize; 339 if ( terminalSize != 0 ) { 340 EntryWithOffset e; 341 e.nodeOffset = p-start; 342 e.entry.name = strdup(cummulativeString); 343 e.entry.flags = read_uleb128(p, end); 344 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { 345 e.entry.address = 0; 346 e.entry.other = read_uleb128(p, end); // dylib ordinal 347 e.entry.importName = (char*)p; 348 } 349 else { 350 e.entry.address = read_uleb128(p, end); 351 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) 352 e.entry.other = read_uleb128(p, end); 353 else 354 e.entry.other = 0; 355 e.entry.importName = NULL; 356 } 357 output.push_back(e); 358 } 359 const uint8_t childrenCount = *children++; 360 const uint8_t* s = children; 361 for (uint8_t i=0; i < childrenCount; ++i) { 362 int edgeStrLen = 0; 363 while (*s != '\0') { 364 cummulativeString[curStrOffset+edgeStrLen] = *s++; 365 ++edgeStrLen; 366 } 367 cummulativeString[curStrOffset+edgeStrLen] = *s++; 368 uint32_t childNodeOffet = read_uleb128(s, end); 369 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output); 370 } 371} 372 373 374inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output) 375{ 376 // empty trie has no entries 377 if ( start == end ) 378 return; 379 char cummulativeString[4000]; 380 std::vector<EntryWithOffset> entries; 381 processExportNode(start, start, end, cummulativeString, 0, entries); 382 // to preserve tie layout order, sort by node offset 383 std::sort(entries.begin(), entries.end()); 384 // copy to output 385 output.reserve(entries.size()); 386 for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it) 387 output.push_back(it->entry); 388} 389 390 391 392 393}; // namespace trie 394}; // namespace mach_o 395 396 397#endif // __MACH_O_TRIE__ 398 399 400