1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23*/
24#ifndef __MACH_O_TRIE__
25#define __MACH_O_TRIE__
26
27#include <algorithm>
28#include <vector>
29
30#include "MachOFileAbstraction.hpp"
31
32
33namespace mach_o {
34namespace trie {
35
36struct Edge
37{
38					Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
39					~Edge() {  }
40	const char*		fSubString;
41	struct Node*	fChild;
42
43};
44
45struct Node
46{
47						Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0),
48											fOther(0), fImportedName(NULL), fOrdered(false),
49											fHaveExportInfo(false), fTrieOffset(0) {}
50						~Node() { }
51	const char*			fCummulativeString;
52	std::vector<Edge>	fChildren;
53	uint64_t			fAddress;
54	uint64_t			fFlags;
55	uint64_t			fOther;
56	const char*			fImportedName;
57	bool				fOrdered;
58	bool				fHaveExportInfo;
59	uint32_t			fTrieOffset;
60
61	void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) {
62		const char* partialStr = &fullStr[strlen(fCummulativeString)];
63		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
64			Edge& e = *it;
65			int subStringLen = strlen(e.fSubString);
66			if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
67				// already have matching edge, go down that path
68				e.fChild->addSymbol(fullStr, address, flags, other, importName);
69				return;
70			}
71			else {
72				for (int i=subStringLen-1; i > 0; --i) {
73					if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
74						// found a common substring, splice in new node
75						//  was A -> C,  now A -> B -> C
76						char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
77						bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
78						//node* aNode = this;
79						Node* bNode = new Node(bNodeCummStr);
80						Node* cNode = e.fChild;
81						char* abEdgeStr = strdup(e.fSubString);
82						abEdgeStr[i] = '\0';
83						char* bcEdgeStr = strdup(&e.fSubString[i]);
84						Edge& abEdge = e;
85						abEdge.fSubString = abEdgeStr;
86						abEdge.fChild = bNode;
87						Edge bcEdge(bcEdgeStr, cNode);
88						bNode->fChildren.push_back(bcEdge);
89						bNode->addSymbol(fullStr, address, flags, other, importName);
90						return;
91					}
92				}
93			}
94		}
95
96		// no commonality with any existing child, make a new edge that is this whole string
97		Node* newNode = new Node(strdup(fullStr));
98		Edge newEdge(strdup(partialStr), newNode);
99		fChildren.push_back(newEdge);
100		newNode->fAddress = address;
101		newNode->fFlags = flags;
102		newNode->fOther = other;
103		if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) )
104			newNode->fImportedName = importName;
105		else
106			newNode->fImportedName = NULL;
107		newNode->fHaveExportInfo = true;
108	}
109
110	void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
111		if ( !fOrdered ) {
112			orderedNodes.push_back(this);
113			//fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
114			fOrdered = true;
115		}
116		const char* partialStr = &name[strlen(fCummulativeString)];
117		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
118			Edge& e = *it;
119			int subStringLen = strlen(e.fSubString);
120			if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
121				// already have matching edge, go down that path
122				e.fChild->addOrderedNodes(name, orderedNodes);
123				return;
124			}
125		}
126	}
127
128	// byte for terminal node size in bytes, or 0x00 if not terminal node
129	// teminal node (uleb128 flags, uleb128 addr [uleb128 other])
130	// byte for child node count
131	//  each child: zero terminated substring, uleb128 node offset
132	bool updateOffset(uint32_t& offset) {
133		uint32_t nodeSize = 1; // length of export info when no export info
134		if ( fHaveExportInfo ) {
135			if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
136				nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal
137				if ( fImportedName != NULL )
138					nodeSize += strlen(fImportedName);
139				++nodeSize; // trailing zero in imported name
140			}
141			else {
142				nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
143				if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
144					nodeSize += uleb128_size(fOther);
145			}
146			// do have export info, overall node size so far is uleb128 of export info + export info
147			nodeSize += uleb128_size(nodeSize);
148		}
149		// add children
150		++nodeSize; // byte for count of chidren
151		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
152			Edge& e = *it;
153			nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
154		}
155		bool result = (fTrieOffset != offset);
156		fTrieOffset = offset;
157		//fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
158		offset += nodeSize;
159		// return true if fTrieOffset was changed
160		return result;
161	}
162
163	void appendToStream(std::vector<uint8_t>& out) {
164		if ( fHaveExportInfo ) {
165			if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
166				if ( fImportedName != NULL ) {
167					// nodes with re-export info: size, flags, ordinal, string
168					uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1;
169					out.push_back(nodeSize);
170					append_uleb128(fFlags, out);
171					append_uleb128(fOther, out);
172					append_string(fImportedName, out);
173				}
174				else {
175					// nodes with re-export info: size, flags, ordinal, empty-string
176					uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1;
177					out.push_back(nodeSize);
178					append_uleb128(fFlags, out);
179					append_uleb128(fOther, out);
180					out.push_back(0);
181				}
182			}
183			else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
184				// nodes with export info: size, flags, address, other
185				uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther);
186				out.push_back(nodeSize);
187				append_uleb128(fFlags, out);
188				append_uleb128(fAddress, out);
189				append_uleb128(fOther, out);
190			}
191			else {
192				// nodes with export info: size, flags, address
193				uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
194				out.push_back(nodeSize);
195				append_uleb128(fFlags, out);
196				append_uleb128(fAddress, out);
197			}
198		}
199		else {
200			// no export info uleb128 of zero is one byte of zero
201			out.push_back(0);
202		}
203		// write number of children
204		out.push_back(fChildren.size());
205		// write each child
206		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
207			Edge& e = *it;
208			append_string(e.fSubString, out);
209			append_uleb128(e.fChild->fTrieOffset, out);
210		}
211	}
212
213private:
214	static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
215		uint8_t byte;
216		do {
217			byte = value & 0x7F;
218			value &= ~0x7F;
219			if ( value != 0 )
220				byte |= 0x80;
221			out.push_back(byte);
222			value = value >> 7;
223		} while( byte >= 0x80 );
224	}
225
226	static void append_string(const char* str, std::vector<uint8_t>& out) {
227		for (const char* s = str; *s != '\0'; ++s)
228			out.push_back(*s);
229		out.push_back('\0');
230	}
231
232	static unsigned int	uleb128_size(uint64_t value) {
233		uint32_t result = 0;
234		do {
235			value = value >> 7;
236			++result;
237		} while ( value != 0 );
238		return result;
239	}
240
241
242};
243
244inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
245	uint64_t result = 0;
246	int		 bit = 0;
247	do {
248		if (p == end)
249#if __EXCEPTIONS
250			throw "malformed uleb128 extends beyond trie";
251#else
252			return result;
253#endif
254		uint64_t slice = *p & 0x7f;
255
256		if (bit >= 64 || slice << bit >> bit != slice)
257#if __EXCEPTIONS
258			throw "uleb128 too big for 64-bits";
259#else
260			return result;
261#endif
262		else {
263			result |= (slice << bit);
264			bit += 7;
265		}
266	}
267	while (*p++ & 0x80);
268	return result;
269}
270
271
272
273struct Entry
274{
275	const char*		name;
276	uint64_t		address;
277	uint64_t		flags;
278	uint64_t		other;
279	const char*		importName;
280};
281
282
283
284inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output)
285{
286	Node start(strdup(""));
287
288	// make nodes for all exported symbols
289	for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
290		start.addSymbol(it->name, it->address, it->flags, it->other, it->importName);
291	}
292
293	// create vector of nodes
294	std::vector<Node*> orderedNodes;
295	orderedNodes.reserve(entries.size()*2);
296	for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
297		start.addOrderedNodes(it->name, orderedNodes);
298	}
299
300	// assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
301	bool more;
302	do {
303		uint32_t offset = 0;
304		more = false;
305		for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
306			if ( (*it)->updateOffset(offset) )
307				more = true;
308		}
309	} while ( more );
310
311	// create trie stream
312	for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
313		(*it)->appendToStream(output);
314	}
315}
316
317struct EntryWithOffset
318{
319	uintptr_t		nodeOffset;
320	Entry			entry;
321
322	bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
323};
324
325
326
327static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
328									char* cummulativeString, int curStrOffset,
329									std::vector<EntryWithOffset>& output)
330{
331	if ( p >= end )
332#if __EXCEPTIONS
333		throw "malformed trie, node past end";
334#else
335		return;
336#endif
337	const uint8_t terminalSize = read_uleb128(p, end);
338	const uint8_t* children = p + terminalSize;
339	if ( terminalSize != 0 ) {
340		EntryWithOffset e;
341		e.nodeOffset = p-start;
342		e.entry.name = strdup(cummulativeString);
343		e.entry.flags = read_uleb128(p, end);
344		if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
345			e.entry.address = 0;
346			e.entry.other = read_uleb128(p, end); // dylib ordinal
347			e.entry.importName = (char*)p;
348		}
349		else {
350			e.entry.address = read_uleb128(p, end);
351			if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
352				e.entry.other = read_uleb128(p, end);
353			else
354				e.entry.other = 0;
355			e.entry.importName = NULL;
356		}
357		output.push_back(e);
358	}
359	const uint8_t childrenCount = *children++;
360	const uint8_t* s = children;
361	for (uint8_t i=0; i < childrenCount; ++i) {
362		int edgeStrLen = 0;
363		while (*s != '\0') {
364			cummulativeString[curStrOffset+edgeStrLen] = *s++;
365			++edgeStrLen;
366		}
367		cummulativeString[curStrOffset+edgeStrLen] = *s++;
368		uint32_t childNodeOffet = read_uleb128(s, end);
369		processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);
370	}
371}
372
373
374inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
375{
376	// empty trie has no entries
377	if ( start == end )
378		return;
379	char cummulativeString[4000];
380	std::vector<EntryWithOffset> entries;
381	processExportNode(start, start, end, cummulativeString, 0, entries);
382	// to preserve tie layout order, sort by node offset
383	std::sort(entries.begin(), entries.end());
384	// copy to output
385	output.reserve(entries.size());
386	for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
387		output.push_back(it->entry);
388}
389
390
391
392
393}; // namespace trie
394}; // namespace mach_o
395
396
397#endif	// __MACH_O_TRIE__
398
399
400