1#!/usr/bin/env python
2#-
3# Copyright (c) 2010 Gleb Kurtsou
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9# 1. Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11# 2. Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in the
13#    documentation and/or other materials provided with the distribution.
14#
15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25# SUCH DAMAGE.
26#
27# $FreeBSD$
28
29from __future__ import print_function
30import os
31import sys
32import re
33import optparse
34
35class Config(object):
36    version = '0.1'
37    # controlled by user
38    verbose = 0
39    dump = False
40    no_dump = False
41    version_filter = None
42    symbol_filter = None
43    alias_prefixes = []
44    # misc opts
45    objdump = 'objdump'
46    dwarfdump = 'dwarfdump'
47    # debug
48    cmpcache_enabled = True
49    dwarfcache_enabled = True
50    w_alias = True
51    w_cached = False
52    w_symbol = True
53
54    class FileConfig(object):
55        filename = None
56        out = sys.stdout
57        def init(self, outname):
58            if outname and outname != '-':
59                self.out = open(outname, "w")
60
61    origfile = FileConfig()
62    newfile = FileConfig()
63
64    exclude_sym_default = [
65            '^__bss_start$',
66            '^_edata$',
67            '^_end$',
68            '^_fini$',
69            '^_init$',
70            ]
71
72    @classmethod
73    def init(cls):
74        cls.version_filter = StrFilter()
75        cls.symbol_filter = StrFilter()
76
77class App(object):
78    result_code = 0
79
80def warn(cond, msg):
81    if cond:
82        print("WARN: " + msg, file=sys.stderr)
83
84# {{{ misc
85
86class StrFilter(object):
87    def __init__(self):
88        self.exclude = []
89        self.include = []
90
91    def compile(self):
92        self.re_exclude = [ re.compile(x) for x in self.exclude ]
93        self.re_include = [ re.compile(x) for x in self.include ]
94
95    def match(self, s):
96        if len(self.re_include):
97            matched = False
98            for r in self.re_include:
99                if r.match(s):
100                    matched = True
101                    break
102            if not matched:
103                return False
104        for r in self.re_exclude:
105            if r.match(s):
106                return False
107        return True
108
109class Cache(object):
110
111    class CacheStats(object):
112        def __init__(self):
113            self.hit = 0
114            self.miss = 0
115
116        def show(self, name):
117            total = self.hit + self.miss
118            if total == 0:
119                ratio = '(undef)'
120            else:
121                ratio = '%f' % (self.hit/float(total))
122            return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \
123                    (name, self.hit, self.miss, ratio)
124
125    def __init__(self, enabled=True, stats=None):
126        self.enabled = enabled
127        self.items = {}
128        if stats == None:
129            self.stats = Cache.CacheStats()
130        else:
131            self.stats = stats
132
133    def get(self, id):
134        if self.enabled and id in self.items:
135            self.stats.hit += 1
136            return self.items[id]
137        else:
138            self.stats.miss += 1
139            return None
140
141    def put(self, id, obj):
142        if self.enabled:
143            if id in self.items and obj is not self.items[id]:
144                #raise ValueError("Item is already cached: %d (%s, %s)" %
145                #        (id, self.items[id], obj))
146                warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \
147                        (id, self.items[id], obj))
148            self.items[id] = obj
149
150    def replace(self, id, obj):
151        if self.enabled:
152            assert id in self.items
153            self.items[id] = obj
154
155class ListDiff(object):
156    def __init__(self, orig, new):
157        self.orig = set(orig)
158        self.new = set(new)
159        self.common = self.orig & self.new
160        self.added = self.new - self.common
161        self.removed = self.orig - self.common
162
163class PrettyPrinter(object):
164    def __init__(self):
165        self.stack = []
166
167    def run_nested(self, obj):
168        ex = obj._pp_ex(self)
169        self.stack.append(ex)
170
171    def run(self, obj):
172        self._result = obj._pp(self)
173        return self._result
174
175    def nested(self):
176        return sorted(set(self.stack))
177
178    def result(self):
179        return self._result;
180
181# }}}
182
183#{{{ symbols and version maps
184
185class Symbol(object):
186    def __init__(self, name, offset, version, lib):
187        self.name = name
188        self.offset = offset
189        self.version = version
190        self.lib = lib
191        self.definition = None
192
193    @property
194    def name_ver(self):
195        return self.name + '@' + self.version
196
197    def __repr__(self):
198        return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version)
199
200class CommonSymbol(object):
201    def __init__(self, origsym, newsym):
202        if origsym.name != newsym.name or origsym.version != newsym.version:
203            raise RuntimeError("Symbols have different names: %s",
204                    [origsym, newsym])
205        self.origsym = origsym
206        self.newsym = newsym
207        self.name = newsym.name
208        self.version = newsym.version
209
210    def __repr__(self):
211        return "CommonSymbol(%s, %s)" % (self.name, self.version)
212
213class SymbolAlias(object):
214    def __init__(self, alias, prefix, offset):
215        assert alias.startswith(prefix)
216        self.alias = alias
217        self.name = alias[len(prefix):]
218        self.offset = offset
219
220    def __repr__(self):
221        return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset)
222
223
224class VersionMap(object):
225    def __init__(self, name):
226        self.name = name
227        self.symbols = {}
228
229    def append(self, symbol):
230        if (symbol.name in self.symbols):
231            raise ValueError("Symbol is already defined %s@%s" %
232                    (symbol.name, self.name))
233        self.symbols[symbol.name] = symbol
234
235    def names(self):
236        return self.symbols.keys()
237
238    def __repr__(self):
239        return repr(self.symbols.values())
240
241# }}}
242
243# {{{ types and definitions
244
245class Def(object):
246    _is_alias = False
247
248    def __init__(self, id, name, **kwargs):
249        self.id = id
250        self.name = name
251        self.attrs = kwargs
252
253    def __getattr__(self, attr):
254        if attr not in self.attrs:
255            raise AttributeError('%s in %s' % (attr, str(self)))
256        return self.attrs[attr]
257
258    def _name_opt(self, default=''):
259        if not self.name:
260            return default
261        return self.name
262
263    def _alias(self):
264        if self._is_alias:
265            return self.type._alias()
266        return self
267
268    def __cmp__(self, other):
269        # TODO assert 'self' and 'other' belong to different libraries
270        #print 'cmp defs: %s, %s' % (self, other)
271        a = self._alias()
272        try:
273            b = other._alias()
274        except AttributeError:
275            return 1
276        r = cmp(a.__class__, b.__class__)
277        if r == 0:
278            if a.id != 0 and b.id != 0:
279                ind = (long(a.id) << 32) + b.id
280                r = Dwarf.cmpcache.get(ind)
281                if r != None:
282                    return r
283            else:
284                ind = 0
285            r = cmp(a.attrs, b.attrs)
286            if ind != 0:
287                Dwarf.cmpcache.put(ind, r)
288        else:
289            r = 0
290            #raise RuntimeError('Comparing different classes: %s, %s' %
291            #        (a.__class__.__name__, b.__class__.__name__))
292        return r
293
294    def __repr__(self):
295        p = []
296        if hasattr(self, 'name'):
297            p.append("name=%s" % self.name)
298        for (k, v) in self.attrs.items():
299            if isinstance(v, Def):
300                v = v.__class__.__name__ + '(...)'
301            p.append("%s=%s" % (k, v))
302        return self.__class__.__name__ + '(' + ', '.join(p) + ')'
303
304    def _mapval(self, param, vals):
305        if param not in vals.keys():
306            raise NotImplementedError("Invalid value '%s': %s" %
307                    (param, str(self)))
308        return vals[param]
309
310    def _pp_ex(self, pp):
311        raise NotImplementedError('Extended pretty print not implemeted: %s' %
312                str(self))
313
314    def _pp(self, pp):
315        raise NotImplementedError('Pretty print not implemeted: %s' % str(self))
316
317class AnonymousDef(Def):
318    def __init__(self, id, **kwargs):
319        Def.__init__(self, id, None, **kwargs)
320
321class Void(AnonymousDef):
322    _instance = None
323
324    def __new__(cls, *args, **kwargs):
325        if not cls._instance:
326            cls._instance = super(Void, cls).__new__(
327                    cls, *args, **kwargs)
328        return cls._instance
329
330    def __init__(self):
331        AnonymousDef.__init__(self, 0)
332
333    def _pp(self, pp):
334        return "void"
335
336class VarArgs(AnonymousDef):
337    def _pp(self, pp):
338        return "..."
339
340class PointerDef(AnonymousDef):
341    def _pp(self, pp):
342        t = pp.run(self.type)
343        return "%s*" % (t,)
344
345class BaseTypeDef(Def):
346    inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char']
347    def _pp(self, pp):
348        if self.encoding in self.inttypes:
349            sign = '' if self.encoding == 'DW_ATE_signed' else 'u'
350            bits = int(self.byte_size, 0) * 8
351            return '%sint%s_t' % (sign, bits)
352        elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size, 0) == 1:
353            return 'char';
354        elif self.encoding == 'DW_ATE_boolean' and int(self.byte_size, 0) == 1:
355            return 'bool';
356        elif self.encoding == 'DW_ATE_float':
357            return self._mapval(int(self.byte_size, 0), {
358                16: 'long double',
359                8: 'double',
360                4: 'float',
361            })
362        raise NotImplementedError('Invalid encoding: %s' % self)
363
364class TypeAliasDef(Def):
365    _is_alias = True
366    def _pp(self, pp):
367        alias = self._alias()
368        # push typedef name
369        if self.name and not alias.name:
370            alias.name = 'T(%s)' % self.name
371        # return type with modifiers
372        return self.type._pp(pp)
373
374class EnumerationTypeDef(Def):
375    def _pp(self, pp):
376        return 'enum ' + self._name_opt('UNKNOWN')
377
378class ConstTypeDef(AnonymousDef):
379    _is_alias = True
380    def _pp(self, pp):
381        return 'const ' + self.type._pp(pp)
382
383class VolatileTypeDef(AnonymousDef):
384    _is_alias = True
385    def _pp(self, pp):
386        return 'volatile ' + self.type._pp(pp)
387
388class RestrictTypeDef(AnonymousDef):
389    _is_alias = True
390    def _pp(self, pp):
391        return 'restrict ' + self.type._pp(pp)
392
393class ArrayDef(AnonymousDef):
394    def _pp(self, pp):
395        t = pp.run(self.type)
396        assert len(self.subranges) == 1
397        try:
398            sz = int(self.subranges[0].upper_bound) + 1
399        except ValueError:
400            s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound)
401            sz = int(s) + 1
402        return '%s[%s]' % (t, sz)
403
404class ArraySubrangeDef(AnonymousDef):
405    pass
406
407class FunctionDef(Def):
408    def _pp(self, pp):
409        result = pp.run(self.result)
410        if not self.params:
411            params = "void"
412        else:
413            params = ', '.join([ pp.run(x) for x in self.params ])
414        return "%s %s(%s);" % (result, self.name, params)
415
416class FunctionTypeDef(Def):
417    def _pp(self, pp):
418        result = pp.run(self.result)
419        if not self.params:
420            params = "void"
421        else:
422            params = ', '.join([ pp.run(x) for x in self.params ])
423        return "F(%s, %s, (%s))" % (self._name_opt(), result, params)
424
425class ParameterDef(Def):
426    def _pp(self, pp):
427        t = pp.run(self.type)
428        return "%s %s" % (t, self._name_opt())
429
430class VariableDef(Def):
431    def _pp(self, pp):
432        t = pp.run(self.type)
433        return "%s %s" % (t, self._name_opt())
434
435# TODO
436class StructForwardDef(Def):
437    pass
438
439class IncompleteDef(Def):
440    def update(self, complete, cache=None):
441        self.complete = complete
442        complete.incomplete = self
443        if cache != None:
444            cached = cache.get(self.id)
445            if cached != None and isinstance(cached, IncompleteDef):
446                cache.replace(self.id, complete)
447
448class StructIncompleteDef(IncompleteDef):
449    def _pp(self, pp):
450        return "struct %s" % (self.name,)
451
452class UnionIncompleteDef(IncompleteDef):
453    def _pp(self, pp):
454        return "union %s" % (self.name,)
455
456class StructDef(Def):
457    def _pp_ex(self, pp, suffix=';'):
458        members = [ pp.run(x) for x in self.members ]
459        return "struct %s { %s }%s" % \
460                (self._name_opt(), ' '.join(members), suffix)
461    def _pp(self, pp):
462        if self.name:
463            pp.run_nested(self)
464            return "struct %s" % (self.name,)
465        else:
466            return self._pp_ex(pp, suffix='')
467
468class UnionDef(Def):
469    def _pp_ex(self, pp, suffix=';'):
470        members = [ pp.run(x) for x in self.members ]
471        return "union %s { %s }%s" % \
472                (self._name_opt(), ' '.join(members), suffix)
473    def _pp(self, pp):
474        if self.name:
475            pp.run_nested(self)
476            return "union %s" % (self.name,)
477        else:
478            return self._pp_ex(pp, suffix='')
479
480class MemberDef(Def):
481    def _pp(self, pp):
482        t = pp.run(self.type)
483        if self.bit_size:
484            bits = ":%s" % self.bit_size
485        else:
486            bits = ""
487        return "%s %s%s;" % (t, self._name_opt(), bits)
488
489class Dwarf(object):
490
491    cmpcache = Cache(enabled=Config.cmpcache_enabled)
492
493    def __init__(self, dump):
494        self.dump = dump
495
496    def _build_optarg_type(self, praw):
497        type = praw.optarg('type', Void())
498        if type != Void():
499            type = self.buildref(praw.unit, type)
500        return type
501
502    def build_subprogram(self, raw):
503        if raw.optname == None:
504            raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc'));
505        params = [ self.build(x) for x in raw.nested ]
506        result = self._build_optarg_type(raw)
507        return FunctionDef(raw.id, raw.name, params=params, result=result)
508
509    def build_variable(self, raw):
510        type = self._build_optarg_type(raw)
511        return VariableDef(raw.id, raw.optname, type=type)
512
513    def build_subroutine_type(self, raw):
514        params = [ self.build(x) for x in raw.nested ]
515        result = self._build_optarg_type(raw)
516        return FunctionTypeDef(raw.id, raw.optname, params=params, result=result)
517
518    def build_formal_parameter(self, raw):
519        type = self._build_optarg_type(raw)
520        return ParameterDef(raw.id, raw.optname, type=type)
521
522    def build_pointer_type(self, raw):
523        type = self._build_optarg_type(raw)
524        return PointerDef(raw.id, type=type)
525
526    def build_member(self, raw):
527        type = self.buildref(raw.unit, raw.arg('type'))
528        return MemberDef(raw.id, raw.name, type=type,
529                bit_size=raw.optarg('bit_size', None))
530
531    def build_structure_type(self, raw):
532        incomplete = raw.unit.incomplete.get(raw.id)
533        if incomplete == None:
534            incomplete = StructIncompleteDef(raw.id, raw.optname)
535            raw.unit.incomplete.put(raw.id, incomplete)
536        else:
537            return incomplete
538        members = [ self.build(x) for x in raw.nested ]
539        byte_size = raw.optarg('byte_size', None)
540        if byte_size == None:
541            obj = StructForwardDef(raw.id, raw.name, members=members,
542                    forcename=raw.name)
543        obj = StructDef(raw.id, raw.optname, members=members,
544                byte_size=byte_size)
545        incomplete.update(obj, cache=raw.unit.cache)
546        return obj
547
548    def build_union_type(self, raw):
549        incomplete = raw.unit.incomplete.get(raw.id)
550        if incomplete == None:
551            incomplete = UnionIncompleteDef(raw.id, raw.optname)
552            raw.unit.incomplete.put(raw.id, incomplete)
553        else:
554            return incomplete
555        members = [ self.build(x) for x in raw.nested ]
556        byte_size = raw.optarg('byte_size', None)
557        obj = UnionDef(raw.id, raw.optname, members=members,
558                byte_size=byte_size)
559        obj.incomplete = incomplete
560        incomplete.complete = obj
561        return obj
562
563    def build_typedef(self, raw):
564        type = self._build_optarg_type(raw)
565        return TypeAliasDef(raw.id, raw.name, type=type)
566
567    def build_const_type(self, raw):
568        type = self._build_optarg_type(raw)
569        return ConstTypeDef(raw.id, type=type)
570
571    def build_volatile_type(self, raw):
572        type = self._build_optarg_type(raw)
573        return VolatileTypeDef(raw.id, type=type)
574
575    def build_restrict_type(self, raw):
576        type = self._build_optarg_type(raw)
577        return RestrictTypeDef(raw.id, type=type)
578
579    def build_enumeration_type(self, raw):
580        # TODO handle DW_TAG_enumerator ???
581        return EnumerationTypeDef(raw.id, name=raw.optname,
582                byte_size=raw.arg('byte_size'))
583
584    def build_base_type(self, raw):
585        return BaseTypeDef(raw.id, raw.optname,
586                byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding'))
587
588    def build_array_type(self, raw):
589        type = self.buildref(raw.unit, raw.arg('type'))
590        subranges = [ self.build(x) for x in raw.nested ]
591        return ArrayDef(raw.id, type=type, subranges=subranges)
592
593    def build_subrange_type(self, raw):
594        type = self.buildref(raw.unit, raw.arg('type'))
595        return ArraySubrangeDef(raw.id, type=type,
596                upper_bound=raw.optarg('upper_bound', 0))
597
598    def build_unspecified_parameters(self, raw):
599        return VarArgs(raw.id)
600
601    def _get_id(self, id):
602        try:
603            return int(id)
604        except ValueError:
605            if (id.startswith('<') and id.endswith('>')):
606                return int(id[1:-1], 0)
607            else:
608                raise ValueError("Invalid dwarf id: %s" % id)
609
610    def build(self, raw):
611        obj = raw.unit.cache.get(raw.id)
612        if obj != None:
613            return obj
614        builder_name = raw.tag.replace('DW_TAG_', 'build_')
615        try:
616            builder = getattr(self, builder_name)
617        except AttributeError:
618            raise AttributeError("Unknown dwarf tag: %s" % raw)
619        obj = builder(raw)
620        raw.unit.cache.put(obj.id, obj)
621        return obj
622
623    def buildref(self, unit, id):
624        id = self._get_id(id)
625        raw = unit.tags[id]
626        obj = self.build(raw)
627        return obj
628
629# }}}
630
631class Shlib(object):
632    def __init__(self, libfile):
633        self.libfile = libfile
634        self.versions = {}
635        self.alias_syms = {}
636
637    def parse_objdump(self):
638        objdump = ObjdumpParser(self.libfile)
639        objdump.run()
640        for p in objdump.dynamic_symbols:
641            vername = p['ver']
642            if vername.startswith('(') and vername.endswith(')'):
643                vername = vername[1:-1]
644            if not Config.version_filter.match(vername):
645                continue
646            if not Config.symbol_filter.match(p['symbol']):
647                continue
648            sym = Symbol(p['symbol'], p['offset'], vername, self)
649            if vername not in self.versions:
650                self.versions[vername] = VersionMap(vername)
651            self.versions[vername].append(sym)
652        if Config.alias_prefixes:
653            self.local_offsetmap = objdump.local_offsetmap
654            for p in objdump.local_symbols:
655                for prefix in Config.alias_prefixes:
656                    if not p['symbol'].startswith(prefix):
657                        continue
658                    alias = SymbolAlias(p['symbol'], prefix, p['offset'])
659                    if alias.name in self.alias_syms:
660                        prevalias = self.alias_syms[alias.name]
661                        if alias.name != prevalias.name or \
662                                alias.offset != prevalias.offset:
663                            warn(Config.w_alias, "Symbol alias is " \
664                                    "already defined: %s: %s at %08x -- %s at %08x" % \
665                                    (alias.alias, alias.name,  alias.offset,
666                                            prevalias.name, prevalias.offset))
667                    self.alias_syms[alias.name] = alias
668
669    def parse_dwarfdump(self):
670        dwarfdump = DwarfdumpParser(self.libfile)
671        def lookup(sym):
672            raw = None
673            try:
674                raw = dwarfdump.offsetmap[sym.offset]
675            except:
676                try:
677                    localnames = self.local_offsetmap[sym.offset]
678                    localnames.sort(key=lambda x: -len(x))
679                    for localname in localnames:
680                        if localname not in self.alias_syms:
681                            continue
682                        alias = self.alias_syms[localname]
683                        raw = dwarfdump.offsetmap[alias.offset]
684                        break
685                except:
686                    pass
687            return raw
688        dwarfdump.run()
689        dwarf = Dwarf(dwarfdump)
690        for ver in self.versions.values():
691            for sym in ver.symbols.values():
692                raw = lookup(sym);
693                if not raw:
694                    warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \
695                            (sym.name_ver, self.libfile, sym.offset))
696                    continue
697                if Config.verbose >= 3:
698                    print("Parsing symbol %s (%s)" % (sym.name_ver, self.libfile))
699                sym.definition = dwarf.build(raw)
700
701    def parse(self):
702        if not os.path.isfile(self.libfile):
703            print("No such file: %s" % self.libfile, file=sys.stderr)
704            sys.exit(1)
705        self.parse_objdump()
706        self.parse_dwarfdump()
707
708# {{{ parsers
709
710class Parser(object):
711    def __init__(self, proc):
712        self.proc = proc
713        self.parser = self.parse_begin
714
715    def run(self):
716        fd = os.popen(self.proc, 'r')
717        while True:
718            line = fd.readline()
719            if (not line):
720                break
721            line = line.strip()
722            if (line):
723                self.parser(line)
724        err = fd.close()
725        if err:
726            print("Execution failed: %s" % self.proc, file=sys.stderr)
727            sys.exit(2)
728
729    def parse_begin(self, line):
730        print(line)
731
732class ObjdumpParser(Parser):
733
734    re_header = re.compile('(?P<table>\w*)\s*SYMBOL TABLE:')
735
736    re_local_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<symbol>[^\s]*)')
737    re_lame_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+\*[A-Z]+\*')
738
739    re_dynamic_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<ver>[^\s]*)\s*(?P<symbol>[^\s]*)')
740
741    def __init__(self, libfile):
742        Parser.__init__(self, "%s -wtT %s" % (Config.objdump, libfile))
743        self.dynamic_symbols = []
744        self.local_symbols = []
745        self.local_offsetmap = {}
746
747    def parse_begin(self, line):
748        self.parse_header(line)
749
750    def add_symbol(self, table, symbol, offsetmap = None):
751        offset = int(symbol['offset'], 16);
752        symbol['offset'] = offset
753        if (offset == 0):
754            return
755        table.append(symbol)
756        if offsetmap != None:
757            if offset not in offsetmap:
758                offsetmap[offset] = [symbol['symbol']]
759            else:
760                offsetmap[offset].append(symbol['symbol'])
761
762    def parse_header(self, line):
763        m = self.re_header.match(line)
764        if (m):
765            table = m.group('table')
766            if (table == "DYNAMIC"):
767                self.parser = self.parse_dynamic
768            elif table == '':
769                self.parser = self.parse_local
770            else:
771                raise ValueError("Invalid symbol table: %s" % table)
772            return True
773        return False
774
775    def parse_local(self, line):
776        if (self.parse_header(line)):
777            return
778        if (self.re_lame_symbol.match(line)):
779            return
780        m = self.re_local_symbol.match(line)
781        if (not m):
782            return
783            #raise ValueError("Invalid symbol definition: %s" % line)
784        p = m.groupdict()
785        if (p['symbol'] and p['symbol'].find('@') == -1):
786            self.add_symbol(self.local_symbols, p, self.local_offsetmap);
787
788    def parse_dynamic(self, line):
789        if (self.parse_header(line)):
790            return
791        if (self.re_lame_symbol.match(line)):
792            return
793        m = self.re_dynamic_symbol.match(line)
794        if (not m):
795            raise ValueError("Invalid symbol definition: %s" % line)
796        p = m.groupdict()
797        if (p['symbol'] and p['ver']):
798            self.add_symbol(self.dynamic_symbols, p);
799
800class DwarfdumpParser(Parser):
801
802    tagcache_stats = Cache.CacheStats()
803
804    class Unit(object):
805        def __init__(self):
806            self.cache = Cache(enabled=Config.dwarfcache_enabled,
807                    stats=DwarfdumpParser.tagcache_stats)
808            self.incomplete = Cache()
809            self.tags = {}
810
811    class Tag(object):
812        def __init__(self, unit, data):
813            self.unit = unit
814            self.id = int(data['id'], 0)
815            self.level = int(data['level'])
816            self.tag = data['tag']
817            self.args = {}
818            self.nested = []
819
820        @property
821        def name(self):
822            return self.arg('name')
823
824        @property
825        def optname(self):
826            return self.optarg('name', None)
827
828        def setname(self, name):
829            self.args['DW_AT_name'] = name
830
831        def arg(self, a):
832            name = 'DW_AT_' + a
833            try:
834                return self.args[name]
835            except KeyError:
836                raise KeyError("Argument '%s' not found in %s: %s" %
837                        (name, self, self.args))
838
839        def optarg(self, a, default):
840            try:
841                return self.arg(a)
842            except KeyError:
843                return default
844
845        def __repr__(self):
846            return "Tag(%d, %d, %s)" % (self.level, self.id, self.tag)
847
848    re_header = re.compile('<(?P<level>\d+)><(?P<id>[0xX0-9a-fA-F]+(?:\+(0[xX])?[0-9a-fA-F]+)?)><(?P<tag>\w+)>')
849    re_argname = re.compile('(?P<arg>\w+)<')
850    re_argunknown = re.compile('<Unknown AT value \w+><[^<>]+>')
851
852    skip_tags = set([
853        'DW_TAG_lexical_block',
854        'DW_TAG_inlined_subroutine',
855        'DW_TAG_label',
856        'DW_TAG_variable',
857        ])
858
859    external_tags = set([
860        'DW_TAG_variable',
861        ])
862
863    def __init__(self, libfile):
864        Parser.__init__(self, "%s -di %s" % (Config.dwarfdump, libfile))
865        self.current_unit = None
866        self.offsetmap = {}
867        self.stack = []
868
869    def parse_begin(self, line):
870        if line == '.debug_info':
871            self.parser = self.parse_debuginfo
872        else:
873            raise ValueError("Invalid dwarfdump header: %s" % line)
874
875    def parse_argvalue(self, args):
876        assert args.startswith('<')
877        i = 1
878        cnt = 1
879        while i < len(args) and args[i]:
880            if args[i] == '<':
881                cnt += 1
882            elif args[i] == '>':
883                cnt -= 1
884                if cnt == 0:
885                    break
886            i = i + 1
887        value = args[1:i]
888        args = args[i+1:]
889        return (args, value)
890
891    def parse_arg(self, tag, args):
892        m = self.re_argname.match(args)
893        if not m:
894            m = self.re_argunknown.match(args)
895            if not m:
896                raise ValueError("Invalid dwarfdump: couldn't parse arguments: %s" %
897                        args)
898            args = args[len(m.group(0)):].lstrip()
899            return args
900        argname = m.group('arg')
901        args = args[len(argname):]
902        value = []
903        while len(args) > 0 and args.startswith('<'):
904            (args, v) = self.parse_argvalue(args)
905            value.append(v)
906        args = args.lstrip()
907        if len(value) == 1:
908            value = value[0]
909        tag.args[argname] = value
910        return args
911
912    def parse_debuginfo(self, line):
913        m = self.re_header.match(line)
914        if not m:
915            raise ValueError("Invalid dwarfdump: %s" % line)
916        if m.group('level') == '0':
917            self.current_unit = DwarfdumpParser.Unit()
918            return
919        tag = DwarfdumpParser.Tag(self.current_unit, m.groupdict())
920        args = line[len(m.group(0)):].lstrip()
921        while args:
922            args = self.parse_arg(tag, args)
923        tag.unit.tags[tag.id] = tag
924        def parse_offset(tag):
925            if 'DW_AT_low_pc' in tag.args:
926                return int(tag.args['DW_AT_low_pc'], 16)
927            elif 'DW_AT_location' in tag.args:
928                location = tag.args['DW_AT_location']
929                if location.startswith('DW_OP_addr'):
930                    return int(location.replace('DW_OP_addr', ''), 16)
931            return None
932        offset = parse_offset(tag)
933        if offset is not None and \
934                (tag.tag not in DwarfdumpParser.skip_tags or \
935                ('DW_AT_external' in tag.args and \
936                tag.tag in DwarfdumpParser.external_tags)):
937            if offset in self.offsetmap:
938                raise ValueError("Dwarf dump parse error: " +
939                        "symbol is already defined at offset 0x%x" % offset)
940            self.offsetmap[offset] = tag
941        if len(self.stack) > 0:
942            prev = self.stack.pop()
943            while prev.level >= tag.level and len(self.stack) > 0:
944                prev = self.stack.pop()
945            if prev.level < tag.level:
946                assert prev.level == tag.level - 1
947                # TODO check DW_AT_sibling ???
948                if tag.tag not in DwarfdumpParser.skip_tags:
949                    prev.nested.append(tag)
950                self.stack.append(prev)
951        self.stack.append(tag)
952        assert len(self.stack) == tag.level
953
954# }}}
955
956def list_str(l):
957    l = [ str(x) for x in l ]
958    l.sort()
959    return ', '.join(l)
960
961def names_ver_str(vername, names):
962    return list_str([ x + "@" + vername for x in names ])
963
964def common_symbols(origlib, newlib):
965    result = []
966    verdiff = ListDiff(origlib.versions.keys(), newlib.versions.keys())
967    if Config.verbose >= 1:
968        print('Original versions:   ', list_str(verdiff.orig))
969        print('New versions:        ', list_str(verdiff.new))
970    for vername in verdiff.added:
971        print('Added version:       ', vername)
972        print('    Added symbols:   ', \
973                names_ver_str(vername, newlib.versions[vername].names()))
974    for vername in verdiff.removed:
975        print('Removed version:     ', vername)
976        print('    Removed symbols: ', \
977                names_ver_str(vername, origlib.versions[vername].names()))
978    added = []
979    removed = []
980    for vername in verdiff.common:
981        origver = origlib.versions[vername]
982        newver = newlib.versions[vername]
983        namediff = ListDiff(origver.names(), newver.names())
984        if namediff.added:
985            added.append(names_ver_str(vername, namediff.added))
986        if namediff.removed:
987            removed.append(names_ver_str(vername, namediff.removed))
988        commonver = VersionMap(vername)
989        result.append(commonver)
990        for n in namediff.common:
991            sym = CommonSymbol(origver.symbols[n], newver.symbols[n])
992            commonver.append(sym)
993    if added:
994        print('Added symbols:')
995        for i in added:
996            print('    ', i)
997    if removed:
998        print('Removed symbols:')
999        for i in removed:
1000            print('    ', i)
1001    return result
1002
1003def cmp_symbols(commonver):
1004    for ver in commonver:
1005        names = ver.names();
1006        names.sort()
1007        for symname in names:
1008            sym = ver.symbols[symname]
1009            missing = sym.origsym.definition is None or sym.newsym.definition is None
1010            match = not missing and sym.origsym.definition == sym.newsym.definition
1011            if not match:
1012                App.result_code = 1
1013            if Config.verbose >= 1 or not match:
1014                if missing:
1015                    print('%s: missing definition' % \
1016                            (sym.origsym.name_ver,))
1017                    continue
1018                print('%s: definitions %smatch' % \
1019                        (sym.origsym.name_ver, "" if match else "mis"))
1020                if Config.dump or (not match and not Config.no_dump):
1021                    for x in [(sym.origsym, Config.origfile),
1022                            (sym.newsym, Config.newfile)]:
1023                        xsym = x[0]
1024                        xout = x[1].out
1025                        if not xsym.definition:
1026                            print('\n// Definition not found: %s %s' % \
1027                                    (xsym.name_ver, xsym.lib.libfile), file=xout)
1028                            continue
1029                        print('\n// Definitions mismatch: %s %s' % \
1030                                (xsym.name_ver, xsym.lib.libfile), file=xout)
1031                        pp = PrettyPrinter()
1032                        pp.run(xsym.definition)
1033                        for i in pp.nested():
1034                            print(i, file=xout)
1035                        print(pp.result(), file=xout)
1036
1037def dump_symbols(commonver):
1038    class SymbolDump(object):
1039        def __init__(self, io_conf):
1040            self.io_conf = io_conf
1041            self.pp = PrettyPrinter()
1042            self.res = []
1043        def run(self, sym):
1044            r = self.pp.run(sym.definition)
1045            self.res.append('/* %s@%s */ %s' % (sym.name, sym.version, r))
1046        def finish(self):
1047            print('\n// Symbol dump: version %s, library %s' % \
1048                    (ver.name, self.io_conf.filename), file=self.io_conf.out)
1049            for i in self.pp.nested():
1050                print(i, file=self.io_conf.out)
1051            print('', file=self.io_conf.out)
1052            for i in self.res:
1053                print(i, file=self.io_conf.out)
1054    for ver in commonver:
1055        names = sorted(ver.names());
1056        d_orig = SymbolDump(Config.origfile)
1057        d_new = SymbolDump(Config.newfile)
1058        for symname in names:
1059            sym = ver.symbols[symname]
1060            if not sym.origsym.definition or not sym.newsym.definition:
1061                # XXX
1062                warn(Config.w_symbol, 'Missing symbol definition: %s@%s' % \
1063                        (symname, ver.name))
1064                continue
1065            d_orig.run(sym.origsym)
1066            d_new.run(sym.newsym)
1067        d_orig.finish()
1068        d_new.finish()
1069
1070if __name__ == '__main__':
1071    Config.init()
1072    parser = optparse.OptionParser(usage="usage: %prog origlib newlib",
1073            version="%prog " + Config.version)
1074    parser.add_option('-v', '--verbose', action='count',
1075            help="verbose mode, may be specified several times")
1076    parser.add_option('--alias-prefix', action='append',
1077            help="name prefix to try for symbol alias lookup", metavar="STR")
1078    parser.add_option('--dump', action='store_true',
1079            help="dump symbol definitions")
1080    parser.add_option('--no-dump', action='store_true',
1081            help="disable dump for mismatched symbols")
1082    parser.add_option('--out-orig', action='store',
1083            help="result output file for original library", metavar="ORIGFILE")
1084    parser.add_option('--out-new', action='store',
1085            help="result output file for new library", metavar="NEWFILE")
1086    parser.add_option('--dwarfdump', action='store',
1087            help="path to dwarfdump executable", metavar="DWARFDUMP")
1088    parser.add_option('--objdump', action='store',
1089            help="path to objdump executable", metavar="OBJDUMP")
1090    parser.add_option('--exclude-ver', action='append', metavar="RE")
1091    parser.add_option('--include-ver', action='append', metavar="RE")
1092    parser.add_option('--exclude-sym', action='append', metavar="RE")
1093    parser.add_option('--include-sym', action='append', metavar="RE")
1094    parser.add_option('--no-exclude-sym-default', action='store_true',
1095            help="don't exclude special symbols like _init, _end, __bss_start")
1096    for opt in ['alias', 'cached', 'symbol']:
1097        parser.add_option("--w-" + opt,
1098                action="store_true", dest="w_" + opt)
1099        parser.add_option("--w-no-" + opt,
1100                action="store_false", dest="w_" + opt)
1101    (opts, args) = parser.parse_args()
1102
1103    if len(args) != 2:
1104        parser.print_help()
1105        sys.exit(-1)
1106    if opts.dwarfdump:
1107        Config.dwarfdump = opts.dwarfdump
1108    if opts.objdump:
1109        Config.objdump = opts.objdump
1110    if opts.out_orig:
1111        Config.origfile.init(opts.out_orig)
1112    if opts.out_new:
1113        Config.newfile.init(opts.out_new)
1114    if opts.no_dump:
1115        Config.dump = False
1116        Config.no_dump = True
1117    if opts.dump:
1118        Config.dump = True
1119        Config.no_dump = False
1120        Config.verbose = 1
1121    if opts.verbose:
1122        Config.verbose = opts.verbose
1123    if opts.alias_prefix:
1124        Config.alias_prefixes = opts.alias_prefix
1125        Config.alias_prefixes.sort(key=lambda x: -len(x))
1126    for (k, v) in ({ '_sym': Config.symbol_filter,
1127            '_ver': Config.version_filter }).items():
1128        for a in [ 'exclude', 'include' ]:
1129            opt = getattr(opts, a + k)
1130            if opt:
1131                getattr(v, a).extend(opt)
1132    if not opts.no_exclude_sym_default:
1133        Config.symbol_filter.exclude.extend(Config.exclude_sym_default)
1134    Config.version_filter.compile()
1135    Config.symbol_filter.compile()
1136    for w in ['w_alias', 'w_cached', 'w_symbol']:
1137        if hasattr(opts, w):
1138            v = getattr(opts, w)
1139            if v != None:
1140                setattr(Config, w, v)
1141
1142    (Config.origfile.filename, Config.newfile.filename) = (args[0], args[1])
1143
1144    origlib = Shlib(Config.origfile.filename)
1145    origlib.parse()
1146    newlib = Shlib(Config.newfile.filename)
1147    newlib.parse()
1148
1149    commonver = common_symbols(origlib, newlib)
1150    if Config.dump:
1151        dump_symbols(commonver)
1152    cmp_symbols(commonver)
1153    if Config.verbose >= 4:
1154        print(Dwarf.cmpcache.stats.show('Cmp'))
1155        print(DwarfdumpParser.tagcache_stats.show('Dwarf tag'))
1156
1157    sys.exit(App.result_code)
1158