1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import argparse 11import bisect 12import getopt 13import os 14import re 15import subprocess 16import sys 17 18symbolizers = {} 19DEBUG = False 20demangle = False 21binutils_prefix = None 22sysroot_path = None 23binary_name_filter = None 24fix_filename_patterns = None 25logfile = sys.stdin 26allow_system_symbolizer = True 27force_system_symbolizer = False 28 29# FIXME: merge the code that calls fix_filename(). 30def fix_filename(file_name): 31 if fix_filename_patterns: 32 for path_to_cut in fix_filename_patterns: 33 file_name = re.sub('.*' + path_to_cut, '', file_name) 34 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 35 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 36 return file_name 37 38def sysroot_path_filter(binary_name): 39 return sysroot_path + binary_name 40 41def is_valid_arch(s): 42 return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s", 43 "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"] 44 45def guess_arch(addr): 46 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 47 if len(addr) > 10: 48 return 'x86_64' 49 else: 50 return 'i386' 51 52class Symbolizer(object): 53 def __init__(self): 54 pass 55 56 def symbolize(self, addr, binary, offset): 57 """Symbolize the given address (pair of binary and offset). 58 59 Overriden in subclasses. 60 Args: 61 addr: virtual address of an instruction. 62 binary: path to executable/shared object containing this instruction. 63 offset: instruction offset in the @binary. 64 Returns: 65 list of strings (one string for each inlined frame) describing 66 the code locations for this instruction (that is, function name, file 67 name, line and column numbers). 68 """ 69 return None 70 71 72class LLVMSymbolizer(Symbolizer): 73 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 74 super(LLVMSymbolizer, self).__init__() 75 self.symbolizer_path = symbolizer_path 76 self.default_arch = default_arch 77 self.system = system 78 self.dsym_hints = dsym_hints 79 self.pipe = self.open_llvm_symbolizer() 80 81 def open_llvm_symbolizer(self): 82 cmd = [self.symbolizer_path, 83 '--use-symbol-table=true', 84 '--demangle=%s' % demangle, 85 '--functions=linkage', 86 '--inlining=true', 87 '--default-arch=%s' % self.default_arch] 88 if self.system == 'Darwin': 89 for hint in self.dsym_hints: 90 cmd.append('--dsym-hint=%s' % hint) 91 if DEBUG: 92 print(' '.join(cmd)) 93 try: 94 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 95 stdout=subprocess.PIPE, 96 bufsize=0, 97 universal_newlines=True) 98 except OSError: 99 result = None 100 return result 101 102 def symbolize(self, addr, binary, offset): 103 """Overrides Symbolizer.symbolize.""" 104 if not self.pipe: 105 return None 106 result = [] 107 try: 108 symbolizer_input = '"%s" %s' % (binary, offset) 109 if DEBUG: 110 print(symbolizer_input) 111 self.pipe.stdin.write("%s\n" % symbolizer_input) 112 while True: 113 function_name = self.pipe.stdout.readline().rstrip() 114 if not function_name: 115 break 116 file_name = self.pipe.stdout.readline().rstrip() 117 file_name = fix_filename(file_name) 118 if (not function_name.startswith('??') or 119 not file_name.startswith('??')): 120 # Append only non-trivial frames. 121 result.append('%s in %s %s' % (addr, function_name, 122 file_name)) 123 except Exception: 124 result = [] 125 if not result: 126 result = None 127 return result 128 129 130def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 131 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 132 if not symbolizer_path: 133 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 134 if not symbolizer_path: 135 # Assume llvm-symbolizer is in PATH. 136 symbolizer_path = 'llvm-symbolizer' 137 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 138 139 140class Addr2LineSymbolizer(Symbolizer): 141 def __init__(self, binary): 142 super(Addr2LineSymbolizer, self).__init__() 143 self.binary = binary 144 self.pipe = self.open_addr2line() 145 self.output_terminator = -1 146 147 def open_addr2line(self): 148 addr2line_tool = 'addr2line' 149 if binutils_prefix: 150 addr2line_tool = binutils_prefix + addr2line_tool 151 cmd = [addr2line_tool, '-fi'] 152 if demangle: 153 cmd += ['--demangle'] 154 cmd += ['-e', self.binary] 155 if DEBUG: 156 print(' '.join(cmd)) 157 return subprocess.Popen(cmd, 158 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 159 bufsize=0, 160 universal_newlines=True) 161 162 def symbolize(self, addr, binary, offset): 163 """Overrides Symbolizer.symbolize.""" 164 if self.binary != binary: 165 return None 166 lines = [] 167 try: 168 self.pipe.stdin.write("%s\n" % offset) 169 self.pipe.stdin.write("%s\n" % self.output_terminator) 170 is_first_frame = True 171 while True: 172 function_name = self.pipe.stdout.readline().rstrip() 173 file_name = self.pipe.stdout.readline().rstrip() 174 if is_first_frame: 175 is_first_frame = False 176 elif function_name in ['', '??']: 177 assert file_name == function_name 178 break 179 lines.append((function_name, file_name)); 180 except Exception: 181 lines.append(('??', '??:0')) 182 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] 183 184class UnbufferedLineConverter(object): 185 """ 186 Wrap a child process that responds to each line of input with one line of 187 output. Uses pty to trick the child into providing unbuffered output. 188 """ 189 def __init__(self, args, close_stderr=False): 190 # Local imports so that the script can start on Windows. 191 import pty 192 import termios 193 pid, fd = pty.fork() 194 if pid == 0: 195 # We're the child. Transfer control to command. 196 if close_stderr: 197 dev_null = os.open('/dev/null', 0) 198 os.dup2(dev_null, 2) 199 os.execvp(args[0], args) 200 else: 201 # Disable echoing. 202 attr = termios.tcgetattr(fd) 203 attr[3] = attr[3] & ~termios.ECHO 204 termios.tcsetattr(fd, termios.TCSANOW, attr) 205 # Set up a file()-like interface to the child process 206 self.r = os.fdopen(fd, "r", 1) 207 self.w = os.fdopen(os.dup(fd), "w", 1) 208 209 def convert(self, line): 210 self.w.write(line + "\n") 211 return self.readline() 212 213 def readline(self): 214 return self.r.readline().rstrip() 215 216 217class DarwinSymbolizer(Symbolizer): 218 def __init__(self, addr, binary, arch): 219 super(DarwinSymbolizer, self).__init__() 220 self.binary = binary 221 self.arch = arch 222 self.open_atos() 223 224 def open_atos(self): 225 if DEBUG: 226 print('atos -o %s -arch %s' % (self.binary, self.arch)) 227 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 228 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 229 230 def symbolize(self, addr, binary, offset): 231 """Overrides Symbolizer.symbolize.""" 232 if self.binary != binary: 233 return None 234 if not os.path.exists(binary): 235 # If the binary doesn't exist atos will exit which will lead to IOError 236 # exceptions being raised later on so just don't try to symbolize. 237 return ['{} ({}:{}+{})'.format(addr, binary, self.arch, offset)] 238 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 239 while "got symbolicator for" in atos_line: 240 atos_line = self.atos.readline() 241 # A well-formed atos response looks like this: 242 # foo(type1, type2) (in object.name) (filename.cc:80) 243 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 244 if DEBUG: 245 print('atos_line: ', atos_line) 246 if match: 247 function_name = match.group(1) 248 function_name = re.sub('\(.*?\)', '', function_name) 249 file_name = fix_filename(match.group(3)) 250 return ['%s in %s %s' % (addr, function_name, file_name)] 251 else: 252 return ['%s in %s' % (addr, atos_line)] 253 254 255# Chain several symbolizers so that if one symbolizer fails, we fall back 256# to the next symbolizer in chain. 257class ChainSymbolizer(Symbolizer): 258 def __init__(self, symbolizer_list): 259 super(ChainSymbolizer, self).__init__() 260 self.symbolizer_list = symbolizer_list 261 262 def symbolize(self, addr, binary, offset): 263 """Overrides Symbolizer.symbolize.""" 264 for symbolizer in self.symbolizer_list: 265 if symbolizer: 266 result = symbolizer.symbolize(addr, binary, offset) 267 if result: 268 return result 269 return None 270 271 def append_symbolizer(self, symbolizer): 272 self.symbolizer_list.append(symbolizer) 273 274 275def BreakpadSymbolizerFactory(binary): 276 suffix = os.getenv('BREAKPAD_SUFFIX') 277 if suffix: 278 filename = binary + suffix 279 if os.access(filename, os.F_OK): 280 return BreakpadSymbolizer(filename) 281 return None 282 283 284def SystemSymbolizerFactory(system, addr, binary, arch): 285 if system == 'Darwin': 286 return DarwinSymbolizer(addr, binary, arch) 287 elif system in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']: 288 return Addr2LineSymbolizer(binary) 289 290 291class BreakpadSymbolizer(Symbolizer): 292 def __init__(self, filename): 293 super(BreakpadSymbolizer, self).__init__() 294 self.filename = filename 295 lines = file(filename).readlines() 296 self.files = [] 297 self.symbols = {} 298 self.address_list = [] 299 self.addresses = {} 300 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 301 fragments = lines[0].rstrip().split() 302 self.arch = fragments[2] 303 self.debug_id = fragments[3] 304 self.binary = ' '.join(fragments[4:]) 305 self.parse_lines(lines[1:]) 306 307 def parse_lines(self, lines): 308 cur_function_addr = '' 309 for line in lines: 310 fragments = line.split() 311 if fragments[0] == 'FILE': 312 assert int(fragments[1]) == len(self.files) 313 self.files.append(' '.join(fragments[2:])) 314 elif fragments[0] == 'PUBLIC': 315 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 316 elif fragments[0] in ['CFI', 'STACK']: 317 pass 318 elif fragments[0] == 'FUNC': 319 cur_function_addr = int(fragments[1], 16) 320 if not cur_function_addr in self.symbols.keys(): 321 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 322 else: 323 # Line starting with an address. 324 addr = int(fragments[0], 16) 325 self.address_list.append(addr) 326 # Tuple of symbol address, size, line, file number. 327 self.addresses[addr] = (cur_function_addr, 328 int(fragments[1], 16), 329 int(fragments[2]), 330 int(fragments[3])) 331 self.address_list.sort() 332 333 def get_sym_file_line(self, addr): 334 key = None 335 if addr in self.addresses.keys(): 336 key = addr 337 else: 338 index = bisect.bisect_left(self.address_list, addr) 339 if index == 0: 340 return None 341 else: 342 key = self.address_list[index - 1] 343 sym_id, size, line_no, file_no = self.addresses[key] 344 symbol = self.symbols[sym_id] 345 filename = self.files[file_no] 346 if addr < key + size: 347 return symbol, filename, line_no 348 else: 349 return None 350 351 def symbolize(self, addr, binary, offset): 352 if self.binary != binary: 353 return None 354 res = self.get_sym_file_line(int(offset, 16)) 355 if res: 356 function_name, file_name, line_no = res 357 result = ['%s in %s %s:%d' % ( 358 addr, function_name, file_name, line_no)] 359 print(result) 360 return result 361 else: 362 return None 363 364 365class SymbolizationLoop(object): 366 def __init__(self, binary_name_filter=None, dsym_hint_producer=None): 367 if sys.platform == 'win32': 368 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 369 # even in sandboxed processes. Nothing needs to be done here. 370 self.process_line = self.process_line_echo 371 else: 372 # Used by clients who may want to supply a different binary name. 373 # E.g. in Chrome several binaries may share a single .dSYM. 374 self.binary_name_filter = binary_name_filter 375 self.dsym_hint_producer = dsym_hint_producer 376 self.system = os.uname()[0] 377 if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']: 378 raise Exception('Unknown system') 379 self.llvm_symbolizers = {} 380 self.last_llvm_symbolizer = None 381 self.dsym_hints = set([]) 382 self.frame_no = 0 383 self.process_line = self.process_line_posix 384 385 def symbolize_address(self, addr, binary, offset, arch): 386 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 387 # a single symbolizer binary. 388 # On Darwin, if the dsym hint producer is present: 389 # 1. check whether we've seen this binary already; if so, 390 # use |llvm_symbolizers[binary]|, which has already loaded the debug 391 # info for this binary (might not be the case for 392 # |last_llvm_symbolizer|); 393 # 2. otherwise check if we've seen all the hints for this binary already; 394 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 395 # 3. otherwise create a new symbolizer and pass all currently known 396 # .dSYM hints to it. 397 result = None 398 if not force_system_symbolizer: 399 if not binary in self.llvm_symbolizers: 400 use_new_symbolizer = True 401 if self.system == 'Darwin' and self.dsym_hint_producer: 402 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 403 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 404 self.dsym_hints |= dsym_hints_for_binary 405 if self.last_llvm_symbolizer and not use_new_symbolizer: 406 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 407 else: 408 self.last_llvm_symbolizer = LLVMSymbolizerFactory( 409 self.system, arch, self.dsym_hints) 410 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 411 # Use the chain of symbolizers: 412 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 413 # (fall back to next symbolizer if the previous one fails). 414 if not binary in symbolizers: 415 symbolizers[binary] = ChainSymbolizer( 416 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) 417 result = symbolizers[binary].symbolize(addr, binary, offset) 418 else: 419 symbolizers[binary] = ChainSymbolizer([]) 420 if result is None: 421 if not allow_system_symbolizer: 422 raise Exception('Failed to launch or use llvm-symbolizer.') 423 # Initialize system symbolizer only if other symbolizers failed. 424 symbolizers[binary].append_symbolizer( 425 SystemSymbolizerFactory(self.system, addr, binary, arch)) 426 result = symbolizers[binary].symbolize(addr, binary, offset) 427 # The system symbolizer must produce some result. 428 assert result 429 return result 430 431 def get_symbolized_lines(self, symbolized_lines): 432 if not symbolized_lines: 433 return [self.current_line] 434 else: 435 result = [] 436 for symbolized_frame in symbolized_lines: 437 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 438 self.frame_no += 1 439 return result 440 441 def process_logfile(self): 442 self.frame_no = 0 443 for line in logfile: 444 processed = self.process_line(line) 445 print('\n'.join(processed)) 446 447 def process_line_echo(self, line): 448 return [line.rstrip()] 449 450 def process_line_posix(self, line): 451 self.current_line = line.rstrip() 452 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 453 stack_trace_line_format = ( 454 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 455 match = re.match(stack_trace_line_format, line) 456 if not match: 457 return [self.current_line] 458 if DEBUG: 459 print(line) 460 _, frameno_str, addr, binary, offset = match.groups() 461 arch = "" 462 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h" 463 colon_pos = binary.rfind(":") 464 if colon_pos != -1: 465 maybe_arch = binary[colon_pos+1:] 466 if is_valid_arch(maybe_arch): 467 arch = maybe_arch 468 binary = binary[0:colon_pos] 469 if arch == "": 470 arch = guess_arch(addr) 471 if frameno_str == '0': 472 # Assume that frame #0 is the first frame of new stack trace. 473 self.frame_no = 0 474 original_binary = binary 475 if self.binary_name_filter: 476 binary = self.binary_name_filter(binary) 477 symbolized_line = self.symbolize_address(addr, binary, offset, arch) 478 if not symbolized_line: 479 if original_binary != binary: 480 symbolized_line = self.symbolize_address(addr, original_binary, offset, arch) 481 return self.get_symbolized_lines(symbolized_line) 482 483 484if __name__ == '__main__': 485 parser = argparse.ArgumentParser( 486 formatter_class=argparse.RawDescriptionHelpFormatter, 487 description='ASan symbolization script', 488 epilog='Example of use:\n' 489 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' 490 '-s "$HOME/SymbolFiles" < asan.log') 491 parser.add_argument('path_to_cut', nargs='*', 492 help='pattern to be cut from the result file path ') 493 parser.add_argument('-d','--demangle', action='store_true', 494 help='demangle function names') 495 parser.add_argument('-s', metavar='SYSROOT', 496 help='set path to sysroot for sanitized binaries') 497 parser.add_argument('-c', metavar='CROSS_COMPILE', 498 help='set prefix for binutils') 499 parser.add_argument('-l','--logfile', default=sys.stdin, 500 type=argparse.FileType('r'), 501 help='set log file name to parse, default is stdin') 502 parser.add_argument('--force-system-symbolizer', action='store_true', 503 help='don\'t use llvm-symbolizer') 504 args = parser.parse_args() 505 if args.path_to_cut: 506 fix_filename_patterns = args.path_to_cut 507 if args.demangle: 508 demangle = True 509 if args.s: 510 binary_name_filter = sysroot_path_filter 511 sysroot_path = args.s 512 if args.c: 513 binutils_prefix = args.c 514 if args.logfile: 515 logfile = args.logfile 516 else: 517 logfile = sys.stdin 518 if args.force_system_symbolizer: 519 force_system_symbolizer = True 520 if force_system_symbolizer: 521 assert(allow_system_symbolizer) 522 loop = SymbolizationLoop(binary_name_filter) 523 loop.process_logfile() 524