1#!/usr/bin/env python 2#- 3# Copyright (c) 2010 Gleb Kurtsou 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions 8# are met: 9# 1. Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 2. Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# 15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25# SUCH DAMAGE. 26# 27# $FreeBSD$ 28 29from __future__ import print_function 30import os 31import sys 32import re 33import optparse 34 35class Config(object): 36 version = '0.1' 37 # controlled by user 38 verbose = 0 39 dump = False 40 no_dump = False 41 version_filter = None 42 symbol_filter = None 43 alias_prefixes = [] 44 # misc opts 45 objdump = 'objdump' 46 dwarfdump = 'dwarfdump' 47 # debug 48 cmpcache_enabled = True 49 dwarfcache_enabled = True 50 w_alias = True 51 w_cached = False 52 w_symbol = True 53 54 class FileConfig(object): 55 filename = None 56 out = sys.stdout 57 def init(self, outname): 58 if outname and outname != '-': 59 self.out = open(outname, "w") 60 61 origfile = FileConfig() 62 newfile = FileConfig() 63 64 exclude_sym_default = [ 65 '^__bss_start$', 66 '^_edata$', 67 '^_end$', 68 '^_fini$', 69 '^_init$', 70 ] 71 72 @classmethod 73 def init(cls): 74 cls.version_filter = StrFilter() 75 cls.symbol_filter = StrFilter() 76 77class App(object): 78 result_code = 0 79 80def warn(cond, msg): 81 if cond: 82 print("WARN: " + msg, file=sys.stderr) 83 84# {{{ misc 85 86class StrFilter(object): 87 def __init__(self): 88 self.exclude = [] 89 self.include = [] 90 91 def compile(self): 92 self.re_exclude = [ re.compile(x) for x in self.exclude ] 93 self.re_include = [ re.compile(x) for x in self.include ] 94 95 def match(self, s): 96 if len(self.re_include): 97 matched = False 98 for r in self.re_include: 99 if r.match(s): 100 matched = True 101 break 102 if not matched: 103 return False 104 for r in self.re_exclude: 105 if r.match(s): 106 return False 107 return True 108 109class Cache(object): 110 111 class CacheStats(object): 112 def __init__(self): 113 self.hit = 0 114 self.miss = 0 115 116 def show(self, name): 117 total = self.hit + self.miss 118 if total == 0: 119 ratio = '(undef)' 120 else: 121 ratio = '%f' % (self.hit/float(total)) 122 return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \ 123 (name, self.hit, self.miss, ratio) 124 125 def __init__(self, enabled=True, stats=None): 126 self.enabled = enabled 127 self.items = {} 128 if stats == None: 129 self.stats = Cache.CacheStats() 130 else: 131 self.stats = stats 132 133 def get(self, id): 134 if self.enabled and id in self.items: 135 self.stats.hit += 1 136 return self.items[id] 137 else: 138 self.stats.miss += 1 139 return None 140 141 def put(self, id, obj): 142 if self.enabled: 143 if id in self.items and obj is not self.items[id]: 144 #raise ValueError("Item is already cached: %d (%s, %s)" % 145 # (id, self.items[id], obj)) 146 warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \ 147 (id, self.items[id], obj)) 148 self.items[id] = obj 149 150 def replace(self, id, obj): 151 if self.enabled: 152 assert id in self.items 153 self.items[id] = obj 154 155class ListDiff(object): 156 def __init__(self, orig, new): 157 self.orig = set(orig) 158 self.new = set(new) 159 self.common = self.orig & self.new 160 self.added = self.new - self.common 161 self.removed = self.orig - self.common 162 163class PrettyPrinter(object): 164 def __init__(self): 165 self.stack = [] 166 167 def run_nested(self, obj): 168 ex = obj._pp_ex(self) 169 self.stack.append(ex) 170 171 def run(self, obj): 172 self._result = obj._pp(self) 173 return self._result 174 175 def nested(self): 176 return sorted(set(self.stack)) 177 178 def result(self): 179 return self._result; 180 181# }}} 182 183#{{{ symbols and version maps 184 185class Symbol(object): 186 def __init__(self, name, offset, version, lib): 187 self.name = name 188 self.offset = offset 189 self.version = version 190 self.lib = lib 191 self.definition = None 192 193 @property 194 def name_ver(self): 195 return self.name + '@' + self.version 196 197 def __repr__(self): 198 return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version) 199 200class CommonSymbol(object): 201 def __init__(self, origsym, newsym): 202 if origsym.name != newsym.name or origsym.version != newsym.version: 203 raise RuntimeError("Symbols have different names: %s", 204 [origsym, newsym]) 205 self.origsym = origsym 206 self.newsym = newsym 207 self.name = newsym.name 208 self.version = newsym.version 209 210 def __repr__(self): 211 return "CommonSymbol(%s, %s)" % (self.name, self.version) 212 213class SymbolAlias(object): 214 def __init__(self, alias, prefix, offset): 215 assert alias.startswith(prefix) 216 self.alias = alias 217 self.name = alias[len(prefix):] 218 self.offset = offset 219 220 def __repr__(self): 221 return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset) 222 223 224class VersionMap(object): 225 def __init__(self, name): 226 self.name = name 227 self.symbols = {} 228 229 def append(self, symbol): 230 if (symbol.name in self.symbols): 231 raise ValueError("Symbol is already defined %s@%s" % 232 (symbol.name, self.name)) 233 self.symbols[symbol.name] = symbol 234 235 def names(self): 236 return self.symbols.keys() 237 238 def __repr__(self): 239 return repr(self.symbols.values()) 240 241# }}} 242 243# {{{ types and definitions 244 245class Def(object): 246 _is_alias = False 247 248 def __init__(self, id, name, **kwargs): 249 self.id = id 250 self.name = name 251 self.attrs = kwargs 252 253 def __getattr__(self, attr): 254 if attr not in self.attrs: 255 raise AttributeError('%s in %s' % (attr, str(self))) 256 return self.attrs[attr] 257 258 def _name_opt(self, default=''): 259 if not self.name: 260 return default 261 return self.name 262 263 def _alias(self): 264 if self._is_alias: 265 return self.type._alias() 266 return self 267 268 def __cmp__(self, other): 269 # TODO assert 'self' and 'other' belong to different libraries 270 #print 'cmp defs: %s, %s' % (self, other) 271 a = self._alias() 272 try: 273 b = other._alias() 274 except AttributeError: 275 return 1 276 r = cmp(a.__class__, b.__class__) 277 if r == 0: 278 if a.id != 0 and b.id != 0: 279 ind = (long(a.id) << 32) + b.id 280 r = Dwarf.cmpcache.get(ind) 281 if r != None: 282 return r 283 else: 284 ind = 0 285 r = cmp(a.attrs, b.attrs) 286 if ind != 0: 287 Dwarf.cmpcache.put(ind, r) 288 else: 289 r = 0 290 #raise RuntimeError('Comparing different classes: %s, %s' % 291 # (a.__class__.__name__, b.__class__.__name__)) 292 return r 293 294 def __repr__(self): 295 p = [] 296 if hasattr(self, 'name'): 297 p.append("name=%s" % self.name) 298 for (k, v) in self.attrs.items(): 299 if isinstance(v, Def): 300 v = v.__class__.__name__ + '(...)' 301 p.append("%s=%s" % (k, v)) 302 return self.__class__.__name__ + '(' + ', '.join(p) + ')' 303 304 def _mapval(self, param, vals): 305 if param not in vals.keys(): 306 raise NotImplementedError("Invalid value '%s': %s" % 307 (param, str(self))) 308 return vals[param] 309 310 def _pp_ex(self, pp): 311 raise NotImplementedError('Extended pretty print not implemeted: %s' % 312 str(self)) 313 314 def _pp(self, pp): 315 raise NotImplementedError('Pretty print not implemeted: %s' % str(self)) 316 317class AnonymousDef(Def): 318 def __init__(self, id, **kwargs): 319 Def.__init__(self, id, None, **kwargs) 320 321class Void(AnonymousDef): 322 _instance = None 323 324 def __new__(cls, *args, **kwargs): 325 if not cls._instance: 326 cls._instance = super(Void, cls).__new__( 327 cls, *args, **kwargs) 328 return cls._instance 329 330 def __init__(self): 331 AnonymousDef.__init__(self, 0) 332 333 def _pp(self, pp): 334 return "void" 335 336class VarArgs(AnonymousDef): 337 def _pp(self, pp): 338 return "..." 339 340class PointerDef(AnonymousDef): 341 def _pp(self, pp): 342 t = pp.run(self.type) 343 return "%s*" % (t,) 344 345class BaseTypeDef(Def): 346 inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char'] 347 def _pp(self, pp): 348 if self.encoding in self.inttypes: 349 sign = '' if self.encoding == 'DW_ATE_signed' else 'u' 350 bits = int(self.byte_size, 0) * 8 351 return '%sint%s_t' % (sign, bits) 352 elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size, 0) == 1: 353 return 'char'; 354 elif self.encoding == 'DW_ATE_boolean' and int(self.byte_size, 0) == 1: 355 return 'bool'; 356 elif self.encoding == 'DW_ATE_float': 357 return self._mapval(int(self.byte_size, 0), { 358 16: 'long double', 359 8: 'double', 360 4: 'float', 361 }) 362 raise NotImplementedError('Invalid encoding: %s' % self) 363 364class TypeAliasDef(Def): 365 _is_alias = True 366 def _pp(self, pp): 367 alias = self._alias() 368 # push typedef name 369 if self.name and not alias.name: 370 alias.name = 'T(%s)' % self.name 371 # return type with modifiers 372 return self.type._pp(pp) 373 374class EnumerationTypeDef(Def): 375 def _pp(self, pp): 376 return 'enum ' + self._name_opt('UNKNOWN') 377 378class ConstTypeDef(AnonymousDef): 379 _is_alias = True 380 def _pp(self, pp): 381 return 'const ' + self.type._pp(pp) 382 383class VolatileTypeDef(AnonymousDef): 384 _is_alias = True 385 def _pp(self, pp): 386 return 'volatile ' + self.type._pp(pp) 387 388class RestrictTypeDef(AnonymousDef): 389 _is_alias = True 390 def _pp(self, pp): 391 return 'restrict ' + self.type._pp(pp) 392 393class ArrayDef(AnonymousDef): 394 def _pp(self, pp): 395 t = pp.run(self.type) 396 assert len(self.subranges) == 1 397 try: 398 sz = int(self.subranges[0].upper_bound) + 1 399 except ValueError: 400 s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound) 401 sz = int(s) + 1 402 return '%s[%s]' % (t, sz) 403 404class ArraySubrangeDef(AnonymousDef): 405 pass 406 407class FunctionDef(Def): 408 def _pp(self, pp): 409 result = pp.run(self.result) 410 if not self.params: 411 params = "void" 412 else: 413 params = ', '.join([ pp.run(x) for x in self.params ]) 414 return "%s %s(%s);" % (result, self.name, params) 415 416class FunctionTypeDef(Def): 417 def _pp(self, pp): 418 result = pp.run(self.result) 419 if not self.params: 420 params = "void" 421 else: 422 params = ', '.join([ pp.run(x) for x in self.params ]) 423 return "F(%s, %s, (%s))" % (self._name_opt(), result, params) 424 425class ParameterDef(Def): 426 def _pp(self, pp): 427 t = pp.run(self.type) 428 return "%s %s" % (t, self._name_opt()) 429 430class VariableDef(Def): 431 def _pp(self, pp): 432 t = pp.run(self.type) 433 return "%s %s" % (t, self._name_opt()) 434 435# TODO 436class StructForwardDef(Def): 437 pass 438 439class IncompleteDef(Def): 440 def update(self, complete, cache=None): 441 self.complete = complete 442 complete.incomplete = self 443 if cache != None: 444 cached = cache.get(self.id) 445 if cached != None and isinstance(cached, IncompleteDef): 446 cache.replace(self.id, complete) 447 448class StructIncompleteDef(IncompleteDef): 449 def _pp(self, pp): 450 return "struct %s" % (self.name,) 451 452class UnionIncompleteDef(IncompleteDef): 453 def _pp(self, pp): 454 return "union %s" % (self.name,) 455 456class StructDef(Def): 457 def _pp_ex(self, pp, suffix=';'): 458 members = [ pp.run(x) for x in self.members ] 459 return "struct %s { %s }%s" % \ 460 (self._name_opt(), ' '.join(members), suffix) 461 def _pp(self, pp): 462 if self.name: 463 pp.run_nested(self) 464 return "struct %s" % (self.name,) 465 else: 466 return self._pp_ex(pp, suffix='') 467 468class UnionDef(Def): 469 def _pp_ex(self, pp, suffix=';'): 470 members = [ pp.run(x) for x in self.members ] 471 return "union %s { %s }%s" % \ 472 (self._name_opt(), ' '.join(members), suffix) 473 def _pp(self, pp): 474 if self.name: 475 pp.run_nested(self) 476 return "union %s" % (self.name,) 477 else: 478 return self._pp_ex(pp, suffix='') 479 480class MemberDef(Def): 481 def _pp(self, pp): 482 t = pp.run(self.type) 483 if self.bit_size: 484 bits = ":%s" % self.bit_size 485 else: 486 bits = "" 487 return "%s %s%s;" % (t, self._name_opt(), bits) 488 489class Dwarf(object): 490 491 cmpcache = Cache(enabled=Config.cmpcache_enabled) 492 493 def __init__(self, dump): 494 self.dump = dump 495 496 def _build_optarg_type(self, praw): 497 type = praw.optarg('type', Void()) 498 if type != Void(): 499 type = self.buildref(praw.unit, type) 500 return type 501 502 def build_subprogram(self, raw): 503 if raw.optname == None: 504 raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc')); 505 params = [ self.build(x) for x in raw.nested ] 506 result = self._build_optarg_type(raw) 507 return FunctionDef(raw.id, raw.name, params=params, result=result) 508 509 def build_variable(self, raw): 510 type = self._build_optarg_type(raw) 511 return VariableDef(raw.id, raw.optname, type=type) 512 513 def build_subroutine_type(self, raw): 514 params = [ self.build(x) for x in raw.nested ] 515 result = self._build_optarg_type(raw) 516 return FunctionTypeDef(raw.id, raw.optname, params=params, result=result) 517 518 def build_formal_parameter(self, raw): 519 type = self._build_optarg_type(raw) 520 return ParameterDef(raw.id, raw.optname, type=type) 521 522 def build_pointer_type(self, raw): 523 type = self._build_optarg_type(raw) 524 return PointerDef(raw.id, type=type) 525 526 def build_member(self, raw): 527 type = self.buildref(raw.unit, raw.arg('type')) 528 return MemberDef(raw.id, raw.name, type=type, 529 bit_size=raw.optarg('bit_size', None)) 530 531 def build_structure_type(self, raw): 532 incomplete = raw.unit.incomplete.get(raw.id) 533 if incomplete == None: 534 incomplete = StructIncompleteDef(raw.id, raw.optname) 535 raw.unit.incomplete.put(raw.id, incomplete) 536 else: 537 return incomplete 538 members = [ self.build(x) for x in raw.nested ] 539 byte_size = raw.optarg('byte_size', None) 540 if byte_size == None: 541 obj = StructForwardDef(raw.id, raw.name, members=members, 542 forcename=raw.name) 543 obj = StructDef(raw.id, raw.optname, members=members, 544 byte_size=byte_size) 545 incomplete.update(obj, cache=raw.unit.cache) 546 return obj 547 548 def build_union_type(self, raw): 549 incomplete = raw.unit.incomplete.get(raw.id) 550 if incomplete == None: 551 incomplete = UnionIncompleteDef(raw.id, raw.optname) 552 raw.unit.incomplete.put(raw.id, incomplete) 553 else: 554 return incomplete 555 members = [ self.build(x) for x in raw.nested ] 556 byte_size = raw.optarg('byte_size', None) 557 obj = UnionDef(raw.id, raw.optname, members=members, 558 byte_size=byte_size) 559 obj.incomplete = incomplete 560 incomplete.complete = obj 561 return obj 562 563 def build_typedef(self, raw): 564 type = self._build_optarg_type(raw) 565 return TypeAliasDef(raw.id, raw.name, type=type) 566 567 def build_const_type(self, raw): 568 type = self._build_optarg_type(raw) 569 return ConstTypeDef(raw.id, type=type) 570 571 def build_volatile_type(self, raw): 572 type = self._build_optarg_type(raw) 573 return VolatileTypeDef(raw.id, type=type) 574 575 def build_restrict_type(self, raw): 576 type = self._build_optarg_type(raw) 577 return RestrictTypeDef(raw.id, type=type) 578 579 def build_enumeration_type(self, raw): 580 # TODO handle DW_TAG_enumerator ??? 581 return EnumerationTypeDef(raw.id, name=raw.optname, 582 byte_size=raw.arg('byte_size')) 583 584 def build_base_type(self, raw): 585 return BaseTypeDef(raw.id, raw.optname, 586 byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding')) 587 588 def build_array_type(self, raw): 589 type = self.buildref(raw.unit, raw.arg('type')) 590 subranges = [ self.build(x) for x in raw.nested ] 591 return ArrayDef(raw.id, type=type, subranges=subranges) 592 593 def build_subrange_type(self, raw): 594 type = self.buildref(raw.unit, raw.arg('type')) 595 return ArraySubrangeDef(raw.id, type=type, 596 upper_bound=raw.optarg('upper_bound', 0)) 597 598 def build_unspecified_parameters(self, raw): 599 return VarArgs(raw.id) 600 601 def _get_id(self, id): 602 try: 603 return int(id) 604 except ValueError: 605 if (id.startswith('<') and id.endswith('>')): 606 return int(id[1:-1], 0) 607 else: 608 raise ValueError("Invalid dwarf id: %s" % id) 609 610 def build(self, raw): 611 obj = raw.unit.cache.get(raw.id) 612 if obj != None: 613 return obj 614 builder_name = raw.tag.replace('DW_TAG_', 'build_') 615 try: 616 builder = getattr(self, builder_name) 617 except AttributeError: 618 raise AttributeError("Unknown dwarf tag: %s" % raw) 619 obj = builder(raw) 620 raw.unit.cache.put(obj.id, obj) 621 return obj 622 623 def buildref(self, unit, id): 624 id = self._get_id(id) 625 raw = unit.tags[id] 626 obj = self.build(raw) 627 return obj 628 629# }}} 630 631class Shlib(object): 632 def __init__(self, libfile): 633 self.libfile = libfile 634 self.versions = {} 635 self.alias_syms = {} 636 637 def parse_objdump(self): 638 objdump = ObjdumpParser(self.libfile) 639 objdump.run() 640 for p in objdump.dynamic_symbols: 641 vername = p['ver'] 642 if vername.startswith('(') and vername.endswith(')'): 643 vername = vername[1:-1] 644 if not Config.version_filter.match(vername): 645 continue 646 if not Config.symbol_filter.match(p['symbol']): 647 continue 648 sym = Symbol(p['symbol'], p['offset'], vername, self) 649 if vername not in self.versions: 650 self.versions[vername] = VersionMap(vername) 651 self.versions[vername].append(sym) 652 if Config.alias_prefixes: 653 self.local_offsetmap = objdump.local_offsetmap 654 for p in objdump.local_symbols: 655 for prefix in Config.alias_prefixes: 656 if not p['symbol'].startswith(prefix): 657 continue 658 alias = SymbolAlias(p['symbol'], prefix, p['offset']) 659 if alias.name in self.alias_syms: 660 prevalias = self.alias_syms[alias.name] 661 if alias.name != prevalias.name or \ 662 alias.offset != prevalias.offset: 663 warn(Config.w_alias, "Symbol alias is " \ 664 "already defined: %s: %s at %08x -- %s at %08x" % \ 665 (alias.alias, alias.name, alias.offset, 666 prevalias.name, prevalias.offset)) 667 self.alias_syms[alias.name] = alias 668 669 def parse_dwarfdump(self): 670 dwarfdump = DwarfdumpParser(self.libfile) 671 def lookup(sym): 672 raw = None 673 try: 674 raw = dwarfdump.offsetmap[sym.offset] 675 except: 676 try: 677 localnames = self.local_offsetmap[sym.offset] 678 localnames.sort(key=lambda x: -len(x)) 679 for localname in localnames: 680 if localname not in self.alias_syms: 681 continue 682 alias = self.alias_syms[localname] 683 raw = dwarfdump.offsetmap[alias.offset] 684 break 685 except: 686 pass 687 return raw 688 dwarfdump.run() 689 dwarf = Dwarf(dwarfdump) 690 for ver in self.versions.values(): 691 for sym in ver.symbols.values(): 692 raw = lookup(sym); 693 if not raw: 694 warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \ 695 (sym.name_ver, self.libfile, sym.offset)) 696 continue 697 if Config.verbose >= 3: 698 print("Parsing symbol %s (%s)" % (sym.name_ver, self.libfile)) 699 sym.definition = dwarf.build(raw) 700 701 def parse(self): 702 if not os.path.isfile(self.libfile): 703 print("No such file: %s" % self.libfile, file=sys.stderr) 704 sys.exit(1) 705 self.parse_objdump() 706 self.parse_dwarfdump() 707 708# {{{ parsers 709 710class Parser(object): 711 def __init__(self, proc): 712 self.proc = proc 713 self.parser = self.parse_begin 714 715 def run(self): 716 fd = os.popen(self.proc, 'r') 717 while True: 718 line = fd.readline() 719 if (not line): 720 break 721 line = line.strip() 722 if (line): 723 self.parser(line) 724 err = fd.close() 725 if err: 726 print("Execution failed: %s" % self.proc, file=sys.stderr) 727 sys.exit(2) 728 729 def parse_begin(self, line): 730 print(line) 731 732class ObjdumpParser(Parser): 733 734 re_header = re.compile('(?P<table>\w*)\s*SYMBOL TABLE:') 735 736 re_local_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<symbol>[^\s]*)') 737 re_lame_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+\*[A-Z]+\*') 738 739 re_dynamic_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<ver>[^\s]*)\s*(?P<symbol>[^\s]*)') 740 741 def __init__(self, libfile): 742 Parser.__init__(self, "%s -wtT %s" % (Config.objdump, libfile)) 743 self.dynamic_symbols = [] 744 self.local_symbols = [] 745 self.local_offsetmap = {} 746 747 def parse_begin(self, line): 748 self.parse_header(line) 749 750 def add_symbol(self, table, symbol, offsetmap = None): 751 offset = int(symbol['offset'], 16); 752 symbol['offset'] = offset 753 if (offset == 0): 754 return 755 table.append(symbol) 756 if offsetmap != None: 757 if offset not in offsetmap: 758 offsetmap[offset] = [symbol['symbol']] 759 else: 760 offsetmap[offset].append(symbol['symbol']) 761 762 def parse_header(self, line): 763 m = self.re_header.match(line) 764 if (m): 765 table = m.group('table') 766 if (table == "DYNAMIC"): 767 self.parser = self.parse_dynamic 768 elif table == '': 769 self.parser = self.parse_local 770 else: 771 raise ValueError("Invalid symbol table: %s" % table) 772 return True 773 return False 774 775 def parse_local(self, line): 776 if (self.parse_header(line)): 777 return 778 if (self.re_lame_symbol.match(line)): 779 return 780 m = self.re_local_symbol.match(line) 781 if (not m): 782 return 783 #raise ValueError("Invalid symbol definition: %s" % line) 784 p = m.groupdict() 785 if (p['symbol'] and p['symbol'].find('@') == -1): 786 self.add_symbol(self.local_symbols, p, self.local_offsetmap); 787 788 def parse_dynamic(self, line): 789 if (self.parse_header(line)): 790 return 791 if (self.re_lame_symbol.match(line)): 792 return 793 m = self.re_dynamic_symbol.match(line) 794 if (not m): 795 raise ValueError("Invalid symbol definition: %s" % line) 796 p = m.groupdict() 797 if (p['symbol'] and p['ver']): 798 self.add_symbol(self.dynamic_symbols, p); 799 800class DwarfdumpParser(Parser): 801 802 tagcache_stats = Cache.CacheStats() 803 804 class Unit(object): 805 def __init__(self): 806 self.cache = Cache(enabled=Config.dwarfcache_enabled, 807 stats=DwarfdumpParser.tagcache_stats) 808 self.incomplete = Cache() 809 self.tags = {} 810 811 class Tag(object): 812 def __init__(self, unit, data): 813 self.unit = unit 814 self.id = int(data['id'], 0) 815 self.level = int(data['level']) 816 self.tag = data['tag'] 817 self.args = {} 818 self.nested = [] 819 820 @property 821 def name(self): 822 return self.arg('name') 823 824 @property 825 def optname(self): 826 return self.optarg('name', None) 827 828 def setname(self, name): 829 self.args['DW_AT_name'] = name 830 831 def arg(self, a): 832 name = 'DW_AT_' + a 833 try: 834 return self.args[name] 835 except KeyError: 836 raise KeyError("Argument '%s' not found in %s: %s" % 837 (name, self, self.args)) 838 839 def optarg(self, a, default): 840 try: 841 return self.arg(a) 842 except KeyError: 843 return default 844 845 def __repr__(self): 846 return "Tag(%d, %d, %s)" % (self.level, self.id, self.tag) 847 848 re_header = re.compile('<(?P<level>\d+)><(?P<id>[0xX0-9a-fA-F]+(?:\+(0[xX])?[0-9a-fA-F]+)?)><(?P<tag>\w+)>') 849 re_argname = re.compile('(?P<arg>\w+)<') 850 re_argunknown = re.compile('<Unknown AT value \w+><[^<>]+>') 851 852 skip_tags = set([ 853 'DW_TAG_lexical_block', 854 'DW_TAG_inlined_subroutine', 855 'DW_TAG_label', 856 'DW_TAG_variable', 857 ]) 858 859 external_tags = set([ 860 'DW_TAG_variable', 861 ]) 862 863 def __init__(self, libfile): 864 Parser.__init__(self, "%s -di %s" % (Config.dwarfdump, libfile)) 865 self.current_unit = None 866 self.offsetmap = {} 867 self.stack = [] 868 869 def parse_begin(self, line): 870 if line == '.debug_info': 871 self.parser = self.parse_debuginfo 872 else: 873 raise ValueError("Invalid dwarfdump header: %s" % line) 874 875 def parse_argvalue(self, args): 876 assert args.startswith('<') 877 i = 1 878 cnt = 1 879 while i < len(args) and args[i]: 880 if args[i] == '<': 881 cnt += 1 882 elif args[i] == '>': 883 cnt -= 1 884 if cnt == 0: 885 break 886 i = i + 1 887 value = args[1:i] 888 args = args[i+1:] 889 return (args, value) 890 891 def parse_arg(self, tag, args): 892 m = self.re_argname.match(args) 893 if not m: 894 m = self.re_argunknown.match(args) 895 if not m: 896 raise ValueError("Invalid dwarfdump: couldn't parse arguments: %s" % 897 args) 898 args = args[len(m.group(0)):].lstrip() 899 return args 900 argname = m.group('arg') 901 args = args[len(argname):] 902 value = [] 903 while len(args) > 0 and args.startswith('<'): 904 (args, v) = self.parse_argvalue(args) 905 value.append(v) 906 args = args.lstrip() 907 if len(value) == 1: 908 value = value[0] 909 tag.args[argname] = value 910 return args 911 912 def parse_debuginfo(self, line): 913 m = self.re_header.match(line) 914 if not m: 915 raise ValueError("Invalid dwarfdump: %s" % line) 916 if m.group('level') == '0': 917 self.current_unit = DwarfdumpParser.Unit() 918 return 919 tag = DwarfdumpParser.Tag(self.current_unit, m.groupdict()) 920 args = line[len(m.group(0)):].lstrip() 921 while args: 922 args = self.parse_arg(tag, args) 923 tag.unit.tags[tag.id] = tag 924 def parse_offset(tag): 925 if 'DW_AT_low_pc' in tag.args: 926 return int(tag.args['DW_AT_low_pc'], 16) 927 elif 'DW_AT_location' in tag.args: 928 location = tag.args['DW_AT_location'] 929 if location.startswith('DW_OP_addr'): 930 return int(location.replace('DW_OP_addr', ''), 16) 931 return None 932 offset = parse_offset(tag) 933 if offset is not None and \ 934 (tag.tag not in DwarfdumpParser.skip_tags or \ 935 ('DW_AT_external' in tag.args and \ 936 tag.tag in DwarfdumpParser.external_tags)): 937 if offset in self.offsetmap: 938 raise ValueError("Dwarf dump parse error: " + 939 "symbol is already defined at offset 0x%x" % offset) 940 self.offsetmap[offset] = tag 941 if len(self.stack) > 0: 942 prev = self.stack.pop() 943 while prev.level >= tag.level and len(self.stack) > 0: 944 prev = self.stack.pop() 945 if prev.level < tag.level: 946 assert prev.level == tag.level - 1 947 # TODO check DW_AT_sibling ??? 948 if tag.tag not in DwarfdumpParser.skip_tags: 949 prev.nested.append(tag) 950 self.stack.append(prev) 951 self.stack.append(tag) 952 assert len(self.stack) == tag.level 953 954# }}} 955 956def list_str(l): 957 l = [ str(x) for x in l ] 958 l.sort() 959 return ', '.join(l) 960 961def names_ver_str(vername, names): 962 return list_str([ x + "@" + vername for x in names ]) 963 964def common_symbols(origlib, newlib): 965 result = [] 966 verdiff = ListDiff(origlib.versions.keys(), newlib.versions.keys()) 967 if Config.verbose >= 1: 968 print('Original versions: ', list_str(verdiff.orig)) 969 print('New versions: ', list_str(verdiff.new)) 970 for vername in verdiff.added: 971 print('Added version: ', vername) 972 print(' Added symbols: ', \ 973 names_ver_str(vername, newlib.versions[vername].names())) 974 for vername in verdiff.removed: 975 print('Removed version: ', vername) 976 print(' Removed symbols: ', \ 977 names_ver_str(vername, origlib.versions[vername].names())) 978 added = [] 979 removed = [] 980 for vername in verdiff.common: 981 origver = origlib.versions[vername] 982 newver = newlib.versions[vername] 983 namediff = ListDiff(origver.names(), newver.names()) 984 if namediff.added: 985 added.append(names_ver_str(vername, namediff.added)) 986 if namediff.removed: 987 removed.append(names_ver_str(vername, namediff.removed)) 988 commonver = VersionMap(vername) 989 result.append(commonver) 990 for n in namediff.common: 991 sym = CommonSymbol(origver.symbols[n], newver.symbols[n]) 992 commonver.append(sym) 993 if added: 994 print('Added symbols:') 995 for i in added: 996 print(' ', i) 997 if removed: 998 print('Removed symbols:') 999 for i in removed: 1000 print(' ', i) 1001 return result 1002 1003def cmp_symbols(commonver): 1004 for ver in commonver: 1005 names = ver.names(); 1006 names.sort() 1007 for symname in names: 1008 sym = ver.symbols[symname] 1009 missing = sym.origsym.definition is None or sym.newsym.definition is None 1010 match = not missing and sym.origsym.definition == sym.newsym.definition 1011 if not match: 1012 App.result_code = 1 1013 if Config.verbose >= 1 or not match: 1014 if missing: 1015 print('%s: missing definition' % \ 1016 (sym.origsym.name_ver,)) 1017 continue 1018 print('%s: definitions %smatch' % \ 1019 (sym.origsym.name_ver, "" if match else "mis")) 1020 if Config.dump or (not match and not Config.no_dump): 1021 for x in [(sym.origsym, Config.origfile), 1022 (sym.newsym, Config.newfile)]: 1023 xsym = x[0] 1024 xout = x[1].out 1025 if not xsym.definition: 1026 print('\n// Definition not found: %s %s' % \ 1027 (xsym.name_ver, xsym.lib.libfile), file=xout) 1028 continue 1029 print('\n// Definitions mismatch: %s %s' % \ 1030 (xsym.name_ver, xsym.lib.libfile), file=xout) 1031 pp = PrettyPrinter() 1032 pp.run(xsym.definition) 1033 for i in pp.nested(): 1034 print(i, file=xout) 1035 print(pp.result(), file=xout) 1036 1037def dump_symbols(commonver): 1038 class SymbolDump(object): 1039 def __init__(self, io_conf): 1040 self.io_conf = io_conf 1041 self.pp = PrettyPrinter() 1042 self.res = [] 1043 def run(self, sym): 1044 r = self.pp.run(sym.definition) 1045 self.res.append('/* %s@%s */ %s' % (sym.name, sym.version, r)) 1046 def finish(self): 1047 print('\n// Symbol dump: version %s, library %s' % \ 1048 (ver.name, self.io_conf.filename), file=self.io_conf.out) 1049 for i in self.pp.nested(): 1050 print(i, file=self.io_conf.out) 1051 print('', file=self.io_conf.out) 1052 for i in self.res: 1053 print(i, file=self.io_conf.out) 1054 for ver in commonver: 1055 names = sorted(ver.names()); 1056 d_orig = SymbolDump(Config.origfile) 1057 d_new = SymbolDump(Config.newfile) 1058 for symname in names: 1059 sym = ver.symbols[symname] 1060 if not sym.origsym.definition or not sym.newsym.definition: 1061 # XXX 1062 warn(Config.w_symbol, 'Missing symbol definition: %s@%s' % \ 1063 (symname, ver.name)) 1064 continue 1065 d_orig.run(sym.origsym) 1066 d_new.run(sym.newsym) 1067 d_orig.finish() 1068 d_new.finish() 1069 1070if __name__ == '__main__': 1071 Config.init() 1072 parser = optparse.OptionParser(usage="usage: %prog origlib newlib", 1073 version="%prog " + Config.version) 1074 parser.add_option('-v', '--verbose', action='count', 1075 help="verbose mode, may be specified several times") 1076 parser.add_option('--alias-prefix', action='append', 1077 help="name prefix to try for symbol alias lookup", metavar="STR") 1078 parser.add_option('--dump', action='store_true', 1079 help="dump symbol definitions") 1080 parser.add_option('--no-dump', action='store_true', 1081 help="disable dump for mismatched symbols") 1082 parser.add_option('--out-orig', action='store', 1083 help="result output file for original library", metavar="ORIGFILE") 1084 parser.add_option('--out-new', action='store', 1085 help="result output file for new library", metavar="NEWFILE") 1086 parser.add_option('--dwarfdump', action='store', 1087 help="path to dwarfdump executable", metavar="DWARFDUMP") 1088 parser.add_option('--objdump', action='store', 1089 help="path to objdump executable", metavar="OBJDUMP") 1090 parser.add_option('--exclude-ver', action='append', metavar="RE") 1091 parser.add_option('--include-ver', action='append', metavar="RE") 1092 parser.add_option('--exclude-sym', action='append', metavar="RE") 1093 parser.add_option('--include-sym', action='append', metavar="RE") 1094 parser.add_option('--no-exclude-sym-default', action='store_true', 1095 help="don't exclude special symbols like _init, _end, __bss_start") 1096 for opt in ['alias', 'cached', 'symbol']: 1097 parser.add_option("--w-" + opt, 1098 action="store_true", dest="w_" + opt) 1099 parser.add_option("--w-no-" + opt, 1100 action="store_false", dest="w_" + opt) 1101 (opts, args) = parser.parse_args() 1102 1103 if len(args) != 2: 1104 parser.print_help() 1105 sys.exit(-1) 1106 if opts.dwarfdump: 1107 Config.dwarfdump = opts.dwarfdump 1108 if opts.objdump: 1109 Config.objdump = opts.objdump 1110 if opts.out_orig: 1111 Config.origfile.init(opts.out_orig) 1112 if opts.out_new: 1113 Config.newfile.init(opts.out_new) 1114 if opts.no_dump: 1115 Config.dump = False 1116 Config.no_dump = True 1117 if opts.dump: 1118 Config.dump = True 1119 Config.no_dump = False 1120 Config.verbose = 1 1121 if opts.verbose: 1122 Config.verbose = opts.verbose 1123 if opts.alias_prefix: 1124 Config.alias_prefixes = opts.alias_prefix 1125 Config.alias_prefixes.sort(key=lambda x: -len(x)) 1126 for (k, v) in ({ '_sym': Config.symbol_filter, 1127 '_ver': Config.version_filter }).items(): 1128 for a in [ 'exclude', 'include' ]: 1129 opt = getattr(opts, a + k) 1130 if opt: 1131 getattr(v, a).extend(opt) 1132 if not opts.no_exclude_sym_default: 1133 Config.symbol_filter.exclude.extend(Config.exclude_sym_default) 1134 Config.version_filter.compile() 1135 Config.symbol_filter.compile() 1136 for w in ['w_alias', 'w_cached', 'w_symbol']: 1137 if hasattr(opts, w): 1138 v = getattr(opts, w) 1139 if v != None: 1140 setattr(Config, w, v) 1141 1142 (Config.origfile.filename, Config.newfile.filename) = (args[0], args[1]) 1143 1144 origlib = Shlib(Config.origfile.filename) 1145 origlib.parse() 1146 newlib = Shlib(Config.newfile.filename) 1147 newlib.parse() 1148 1149 commonver = common_symbols(origlib, newlib) 1150 if Config.dump: 1151 dump_symbols(commonver) 1152 cmp_symbols(commonver) 1153 if Config.verbose >= 4: 1154 print(Dwarf.cmpcache.stats.show('Cmp')) 1155 print(DwarfdumpParser.tagcache_stats.show('Dwarf tag')) 1156 1157 sys.exit(App.result_code) 1158