1#===- object.py - Python Object Bindings --------------------*- python -*--===#
2#
3#                     The LLVM Compiler Infrastructure
4#
5# This file is distributed under the University of Illinois Open Source
6# License. See LICENSE.TXT for details.
7#
8#===------------------------------------------------------------------------===#
9
10r"""
11Object File Interface
12=====================
13
14This module provides an interface for reading information from object files
15(e.g. binary executables and libraries).
16
17Using this module, you can obtain information about an object file's sections,
18symbols, and relocations. These are represented by the classes ObjectFile,
19Section, Symbol, and Relocation, respectively.
20
21Usage
22-----
23
24The only way to use this module is to start by creating an ObjectFile. You can
25create an ObjectFile by loading a file (specified by its path) or by creating a
26llvm.core.MemoryBuffer and loading that.
27
28Once you have an object file, you can inspect its sections and symbols directly
29by calling get_sections() and get_symbols() respectively. To inspect
30relocations, call get_relocations() on a Section instance.
31
32Iterator Interface
33------------------
34
35The LLVM bindings expose iteration over sections, symbols, and relocations in a
36way that only allows one instance to be operated on at a single time. This is
37slightly annoying from a Python perspective, as it isn't very Pythonic to have
38objects that "expire" but are still active from a dynamic language.
39
40To aid working around this limitation, each Section, Symbol, and Relocation
41instance caches its properties after first access. So, if the underlying
42iterator is advanced, the properties can still be obtained provided they have
43already been retrieved.
44
45In addition, we also provide a "cache" method on each class to cache all
46available data. You can call this on each obtained instance. Or, you can pass
47cache=True to the appropriate get_XXX() method to have this done for you.
48
49Here are some examples on how to perform iteration:
50
51    obj = ObjectFile(filename='/bin/ls')
52
53    # This is OK. Each Section is only accessed inside its own iteration slot.
54    section_names = []
55    for section in obj.get_sections():
56        section_names.append(section.name)
57
58    # This is NOT OK. You perform a lookup after the object has expired.
59    symbols = list(obj.get_symbols())
60    for symbol in symbols:
61        print symbol.name # This raises because the object has expired.
62
63    # In this example, we mix a working and failing scenario.
64    symbols = []
65    for symbol in obj.get_symbols():
66        symbols.append(symbol)
67        print symbol.name
68
69    for symbol in symbols:
70        print symbol.name # OK
71        print symbol.address # NOT OK. We didn't look up this property before.
72
73    # Cache everything up front.
74    symbols = list(obj.get_symbols(cache=True))
75    for symbol in symbols:
76        print symbol.name # OK
77
78"""
79
80from ctypes import c_char_p
81from ctypes import c_uint64
82
83from .common import CachedProperty
84from .common import LLVMObject
85from .common import c_object_p
86from .common import get_library
87from .core import MemoryBuffer
88
89__all__ = [
90    "lib",
91    "ObjectFile",
92    "Relocation",
93    "Section",
94    "Symbol",
95]
96
97class ObjectFile(LLVMObject):
98    """Represents an object/binary file."""
99
100    def __init__(self, filename=None, contents=None):
101        """Construct an instance from a filename or binary data.
102
103        filename must be a path to a file that can be opened with open().
104        contents can be either a native Python buffer type (like str) or a
105        llvm.core.MemoryBuffer instance.
106        """
107        if contents:
108            assert isinstance(contents, MemoryBuffer)
109
110        if filename is not None:
111            contents = MemoryBuffer(filename=filename)
112
113        if contents is None:
114            raise Exception('No input found.')
115
116        ptr = lib.LLVMCreateObjectFile(contents)
117        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
118        self.take_ownership(contents)
119
120    def get_sections(self, cache=False):
121        """Obtain the sections in this object file.
122
123        This is a generator for llvm.object.Section instances.
124
125        Sections are exposed as limited-use objects. See the module's
126        documentation on iterators for more.
127        """
128        sections = lib.LLVMGetSections(self)
129        last = None
130        while True:
131            if lib.LLVMIsSectionIteratorAtEnd(self, sections):
132                break
133
134            last = Section(sections)
135            if cache:
136                last.cache()
137
138            yield last
139
140            lib.LLVMMoveToNextSection(sections)
141            last.expire()
142
143        if last is not None:
144            last.expire()
145
146        lib.LLVMDisposeSectionIterator(sections)
147
148    def get_symbols(self, cache=False):
149        """Obtain the symbols in this object file.
150
151        This is a generator for llvm.object.Symbol instances.
152
153        Each Symbol instance is a limited-use object. See this module's
154        documentation on iterators for more.
155        """
156        symbols = lib.LLVMGetSymbols(self)
157        last = None
158        while True:
159            if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
160                break
161
162            last = Symbol(symbols, self)
163            if cache:
164                last.cache()
165
166            yield last
167
168            lib.LLVMMoveToNextSymbol(symbols)
169            last.expire()
170
171        if last is not None:
172            last.expire()
173
174        lib.LLVMDisposeSymbolIterator(symbols)
175
176class Section(LLVMObject):
177    """Represents a section in an object file."""
178
179    def __init__(self, ptr):
180        """Construct a new section instance.
181
182        Section instances can currently only be created from an ObjectFile
183        instance. Therefore, this constructor should not be used outside of
184        this module.
185        """
186        LLVMObject.__init__(self, ptr)
187
188        self.expired = False
189
190    @CachedProperty
191    def name(self):
192        """Obtain the string name of the section.
193
194        This is typically something like '.dynsym' or '.rodata'.
195        """
196        if self.expired:
197            raise Exception('Section instance has expired.')
198
199        return lib.LLVMGetSectionName(self)
200
201    @CachedProperty
202    def size(self):
203        """The size of the section, in long bytes."""
204        if self.expired:
205            raise Exception('Section instance has expired.')
206
207        return lib.LLVMGetSectionSize(self)
208
209    @CachedProperty
210    def contents(self):
211        if self.expired:
212            raise Exception('Section instance has expired.')
213
214        return lib.LLVMGetSectionContents(self)
215
216    @CachedProperty
217    def address(self):
218        """The address of this section, in long bytes."""
219        if self.expired:
220            raise Exception('Section instance has expired.')
221
222        return lib.LLVMGetSectionAddress(self)
223
224    def has_symbol(self, symbol):
225        """Returns whether a Symbol instance is present in this Section."""
226        if self.expired:
227            raise Exception('Section instance has expired.')
228
229        assert isinstance(symbol, Symbol)
230        return lib.LLVMGetSectionContainsSymbol(self, symbol)
231
232    def get_relocations(self, cache=False):
233        """Obtain the relocations in this Section.
234
235        This is a generator for llvm.object.Relocation instances.
236
237        Each instance is a limited used object. See this module's documentation
238        on iterators for more.
239        """
240        if self.expired:
241            raise Exception('Section instance has expired.')
242
243        relocations = lib.LLVMGetRelocations(self)
244        last = None
245        while True:
246            if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
247                break
248
249            last = Relocation(relocations)
250            if cache:
251                last.cache()
252
253            yield last
254
255            lib.LLVMMoveToNextRelocation(relocations)
256            last.expire()
257
258        if last is not None:
259            last.expire()
260
261        lib.LLVMDisposeRelocationIterator(relocations)
262
263    def cache(self):
264        """Cache properties of this Section.
265
266        This can be called as a workaround to the single active Section
267        limitation. When called, the properties of the Section are fetched so
268        they are still available after the Section has been marked inactive.
269        """
270        getattr(self, 'name')
271        getattr(self, 'size')
272        getattr(self, 'contents')
273        getattr(self, 'address')
274
275    def expire(self):
276        """Expire the section.
277
278        This is called internally by the section iterator.
279        """
280        self.expired = True
281
282class Symbol(LLVMObject):
283    """Represents a symbol in an object file."""
284    def __init__(self, ptr, object_file):
285        assert isinstance(ptr, c_object_p)
286        assert isinstance(object_file, ObjectFile)
287
288        LLVMObject.__init__(self, ptr)
289
290        self.expired = False
291        self._object_file = object_file
292
293    @CachedProperty
294    def name(self):
295        """The str name of the symbol.
296
297        This is often a function or variable name. Keep in mind that name
298        mangling could be in effect.
299        """
300        if self.expired:
301            raise Exception('Symbol instance has expired.')
302
303        return lib.LLVMGetSymbolName(self)
304
305    @CachedProperty
306    def address(self):
307        """The address of this symbol, in long bytes."""
308        if self.expired:
309            raise Exception('Symbol instance has expired.')
310
311        return lib.LLVMGetSymbolAddress(self)
312
313    @CachedProperty
314    def file_offset(self):
315        """The offset of this symbol in the file, in long bytes."""
316        if self.expired:
317            raise Exception('Symbol instance has expired.')
318
319        return lib.LLVMGetSymbolFileOffset(self)
320
321    @CachedProperty
322    def size(self):
323        """The size of the symbol, in long bytes."""
324        if self.expired:
325            raise Exception('Symbol instance has expired.')
326
327        return lib.LLVMGetSymbolSize(self)
328
329    @CachedProperty
330    def section(self):
331        """The Section to which this Symbol belongs.
332
333        The returned Section instance does not expire, unlike Sections that are
334        commonly obtained through iteration.
335
336        Because this obtains a new section iterator each time it is accessed,
337        calling this on a number of Symbol instances could be expensive.
338        """
339        sections = lib.LLVMGetSections(self._object_file)
340        lib.LLVMMoveToContainingSection(sections, self)
341
342        return Section(sections)
343
344    def cache(self):
345        """Cache all cacheable properties."""
346        getattr(self, 'name')
347        getattr(self, 'address')
348        getattr(self, 'file_offset')
349        getattr(self, 'size')
350
351    def expire(self):
352        """Mark the object as expired to prevent future API accesses.
353
354        This is called internally by this module and it is unlikely that
355        external callers have a legitimate reason for using it.
356        """
357        self.expired = True
358
359class Relocation(LLVMObject):
360    """Represents a relocation definition."""
361    def __init__(self, ptr):
362        """Create a new relocation instance.
363
364        Relocations are created from objects derived from Section instances.
365        Therefore, this constructor should not be called outside of this
366        module. See Section.get_relocations() for the proper method to obtain
367        a Relocation instance.
368        """
369        assert isinstance(ptr, c_object_p)
370
371        LLVMObject.__init__(self, ptr)
372
373        self.expired = False
374
375    @CachedProperty
376    def address(self):
377        """The address of this relocation, in long bytes."""
378        if self.expired:
379            raise Exception('Relocation instance has expired.')
380
381        return lib.LLVMGetRelocationAddress(self)
382
383    @CachedProperty
384    def offset(self):
385        """The offset of this relocation, in long bytes."""
386        if self.expired:
387            raise Exception('Relocation instance has expired.')
388
389        return lib.LLVMGetRelocationOffset(self)
390
391    @CachedProperty
392    def symbol(self):
393        """The Symbol corresponding to this Relocation."""
394        if self.expired:
395            raise Exception('Relocation instance has expired.')
396
397        ptr = lib.LLVMGetRelocationSymbol(self)
398        return Symbol(ptr)
399
400    @CachedProperty
401    def type_number(self):
402        """The relocation type, as a long."""
403        if self.expired:
404            raise Exception('Relocation instance has expired.')
405
406        return lib.LLVMGetRelocationType(self)
407
408    @CachedProperty
409    def type_name(self):
410        """The relocation type's name, as a str."""
411        if self.expired:
412            raise Exception('Relocation instance has expired.')
413
414        return lib.LLVMGetRelocationTypeName(self)
415
416    @CachedProperty
417    def value_string(self):
418        if self.expired:
419            raise Exception('Relocation instance has expired.')
420
421        return lib.LLVMGetRelocationValueString(self)
422
423    def expire(self):
424        """Expire this instance, making future API accesses fail."""
425        self.expired = True
426
427    def cache(self):
428        """Cache all cacheable properties on this instance."""
429        getattr(self, 'address')
430        getattr(self, 'offset')
431        getattr(self, 'symbol')
432        getattr(self, 'type')
433        getattr(self, 'type_name')
434        getattr(self, 'value_string')
435
436def register_library(library):
437    """Register function prototypes with LLVM library instance."""
438
439    # Object.h functions
440    library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
441    library.LLVMCreateObjectFile.restype = c_object_p
442
443    library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
444
445    library.LLVMGetSections.argtypes = [ObjectFile]
446    library.LLVMGetSections.restype = c_object_p
447
448    library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
449
450    library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
451    library.LLVMIsSectionIteratorAtEnd.restype = bool
452
453    library.LLVMMoveToNextSection.argtypes = [c_object_p]
454
455    library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
456
457    library.LLVMGetSymbols.argtypes = [ObjectFile]
458    library.LLVMGetSymbols.restype = c_object_p
459
460    library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
461
462    library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
463    library.LLVMIsSymbolIteratorAtEnd.restype = bool
464
465    library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
466
467    library.LLVMGetSectionName.argtypes = [c_object_p]
468    library.LLVMGetSectionName.restype = c_char_p
469
470    library.LLVMGetSectionSize.argtypes = [c_object_p]
471    library.LLVMGetSectionSize.restype = c_uint64
472
473    library.LLVMGetSectionContents.argtypes = [c_object_p]
474    library.LLVMGetSectionContents.restype = c_char_p
475
476    library.LLVMGetSectionAddress.argtypes = [c_object_p]
477    library.LLVMGetSectionAddress.restype = c_uint64
478
479    library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
480    library.LLVMGetSectionContainsSymbol.restype = bool
481
482    library.LLVMGetRelocations.argtypes = [c_object_p]
483    library.LLVMGetRelocations.restype = c_object_p
484
485    library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
486
487    library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
488    library.LLVMIsRelocationIteratorAtEnd.restype = bool
489
490    library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
491
492    library.LLVMGetSymbolName.argtypes = [Symbol]
493    library.LLVMGetSymbolName.restype = c_char_p
494
495    library.LLVMGetSymbolAddress.argtypes = [Symbol]
496    library.LLVMGetSymbolAddress.restype = c_uint64
497
498    library.LLVMGetSymbolFileOffset.argtypes = [Symbol]
499    library.LLVMGetSymbolFileOffset.restype = c_uint64
500
501    library.LLVMGetSymbolSize.argtypes = [Symbol]
502    library.LLVMGetSymbolSize.restype = c_uint64
503
504    library.LLVMGetRelocationAddress.argtypes = [c_object_p]
505    library.LLVMGetRelocationAddress.restype = c_uint64
506
507    library.LLVMGetRelocationOffset.argtypes = [c_object_p]
508    library.LLVMGetRelocationOffset.restype = c_uint64
509
510    library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
511    library.LLVMGetRelocationSymbol.restype = c_object_p
512
513    library.LLVMGetRelocationType.argtypes = [c_object_p]
514    library.LLVMGetRelocationType.restype = c_uint64
515
516    library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
517    library.LLVMGetRelocationTypeName.restype = c_char_p
518
519    library.LLVMGetRelocationValueString.argtypes = [c_object_p]
520    library.LLVMGetRelocationValueString.restype = c_char_p
521
522lib = get_library()
523register_library(lib)
524