1# * Copyright 2014, NICTA 2# * 3# * This software may be distributed and modified according to the terms 4# of 5# * the BSD 2-Clause license. Note that NO WARRANTY is provided. 6# * See "LICENSE_BSD2.txt" for details. 7# * 8# * @TAG(NICTA_BSD) 9#!/usr/bin/env python 10 11import os 12import re 13import sys 14import copy 15from subprocess import Popen, PIPE 16 17#so we can use addrs_to_f 18from elf_file import elfFile 19import elf_parser 20 21global path_counts 22path_counts = {} 23global b_id_counts 24b_id_counts = {} 25 26global tcfg_paths 27tcfg_paths = {} 28 29global tcfg_to_bb_map 30tcfg_to_bb_map = {} 31 32global total_edges 33total_edges = 0 34 35global id_to_context 36id_to_context = {} 37 38global elf_file 39 40def buildBBAddrToSize(): 41 ret = {} 42 global tcfg_to_bb_map 43 for bb_id in tcfg_to_bb_map: 44 (bb_addr, bb_size, bb_contexts, loop_id) = tcfg_to_bb_map[bb_id] 45 #print "bb_addr: %x, bb_size: %x" % (bb_addr, bb_size) 46 if bb_addr not in ret or (ret[bb_addr] < bb_size): 47 ret[bb_addr] = bb_size 48 return ret 49 50def read_variables(input_filename): 51 var_re = re.compile(r'^d(\d+|Sta)_(\d+)\s+([\d.]+)$') 52 b_var_re = re.compile(r'^b(\d+)\s+([\d.]+)$') 53 54 f = open(input_filename) 55 global path_counts 56 global total_edges 57 global b_id_counts 58 while True: 59 s = f.readline() 60 if s == '': 61 break 62 g = var_re.match(s.strip()) 63 if not g: 64 b_match = b_var_re.match(s.strip()) 65 if b_match: 66 b_id, count = b_match.groups() 67 b_id_counts[int(b_id)] = int(float(count)) 68 continue 69 from_id, to_id, count = g.groups() 70 if from_id not in path_counts: 71 path_counts[from_id] = {} 72 73 if to_id in path_counts[from_id]: 74 raise Exception("Variable from %s to %s mentioned more than once." % (from_id, to_id)) 75 76 count = int(round(float(count))) 77 if count == 0: 78 continue 79 80 path_counts[from_id][to_id] = count 81 total_edges += count 82 83 f.close() 84 if not path_counts: 85 raise Exception("No variables found in solution.") 86 87def print_context(ctx_list): 88 for x in ctx_list[:-1]: 89 print '%s' % elfFile().addrs_to_f[x], 90 print '' 91 92def read_tcfg_map(input_filename): 93 ''' 94 Sets up some global states and return the tcfg_to_bb_map 95 ''' 96 tcfg_re = re.compile( 97 r'''^ 98 (\d+) 99 \( \d+ \. \d+ \) 100 \( 101 (0x[0-9a-f]+) 102 \+ 103 (0x[0-9a-f]+) 104 \): 105 \s 106 \[\s 107 ([\d\s]*) 108 \](.*$)''', 109 re.VERBOSE) 110 111 f = open(input_filename) 112 global tcfg_paths 113 global tcfg_to_bb_map 114 global id_to_bb_addr 115 id_to_bb_addr = {} 116 while True: 117 s = f.readline() 118 if s == '': 119 break 120 g = tcfg_re.match(s.strip()) 121 if not g: 122 continue 123 124 bb_id, bb_addr, bb_size, bb_dests, bb_aux = g.groups() 125 126 bb_addr = int(bb_addr, 16) 127 bb_size = int(bb_size, 16) 128 bb_dests = [ x.strip() for x in bb_dests.split() if x.strip() ] 129 ctx_str = list(bb_aux.split(' ')) 130 ctx_list = [ int(x, 16) for x in ctx_str if x <> '' ] 131 #get rid of the loop_id 132 ctx_list = ctx_list[1:] 133 id_to_context[bb_id] = ctx_list 134 bb_aux = bb_aux.split() 135 loop_id = bb_aux[0] 136 if loop_id == '0': 137 loop_id = None 138 bb_context = reversed ([int (addr, 16) for addr in bb_aux[1:-1]]) 139 140 assert bb_id not in tcfg_paths 141 142 tcfg_paths[bb_id] = bb_dests 143 tcfg_to_bb_map[bb_id] = (bb_addr, bb_size, list (bb_context), loop_id) 144 id_to_bb_addr[bb_id] = bb_addr 145 ''' 146 if not bb_addr in bb_addr_to_ids: 147 bb_addr_to_ids[bb_addr] = [bb_id] 148 else: 149 bb_addr_to_ids[bb_addr].append(bb_id) 150 ''' 151 f.close() 152 if not tcfg_paths: 153 raise Exception("No nodes found in tcfg map.") 154 return tcfg_to_bb_map 155 156global addr2line_proc 157addr2line_proc = None 158def addr2line(addr): 159 global addr2line_proc 160 if addr2line_proc is None: 161 addr2line_proc = Popen( 162 ['arm-none-eabi-addr2line', '-fe', 163 elf_file], 164 stdin=PIPE, stdout=PIPE, close_fds=True 165 ) 166 167 addr2line_proc.stdin.write('%#x\n' % addr) 168 func = addr2line_proc.stdout.readline().strip() 169 line = addr2line_proc.stdout.readline().strip() 170 return func 171 172compound_ids = {} 173 174def simplify(): 175 rev = {} 176 for i in path_counts: 177 for j in path_counts[i]: 178 rev.setdefault(j, []) 179 rev[j].append(i) 180 simps = [i for i in path_counts if len(path_counts[i]) == 1 181 if len(rev.get(i, [])) == 1] 182 for i in simps: 183 [j] = rev[i] 184 assert path_counts[j].keys() == [i] 185 count = path_counts[j][i] 186 assert path_counts[i].values() == [count] 187 compound_ids.setdefault(j, [j]) 188 compound_ids[j].extend(compound_ids.get(i, [i])) 189 path_counts[j] = path_counts[i] 190 del path_counts[i] 191 192def print_block(node_id): 193 if node_id == 'Sta': 194 return 195 addr, size, _, _ = tcfg_to_bb_map[node_id] 196 for i in xrange(0, size, 4): 197 print '%#x %s' % (addr + i, addr2line(addr + i)) 198 199def print_block2(node_id): 200 assert node_id != 'Sta', node_id 201 addr, size, _, _ = tcfg_to_bb_map[node_id] 202 print '%#x - %#x' % (addr, addr + size - 4) 203 204def count_arc(local_path_counts, arc): 205 arc_steps = zip(arc, arc[1:] + [arc[0]]) 206 arc_counts = [local_path_counts[i][j] for (i, j) in arc_steps] 207 count = min(arc_counts) 208 for (i, j) in arc_steps: 209 local_path_counts[i][j] -= count 210 return (count, arc) 211 212def grab_arcs(local_path_counts, start): 213 stack = [start] 214 stack_set = set(stack) 215 216 arcs = [] 217 decisions = [] 218 219 while True: 220 node_id = stack[-1] 221 222 # have we reached the exit? 223 if node_id not in local_path_counts: 224 assert stack[0] == 'Sta' 225 for (i, node_id) in enumerate (stack): 226 if node_id in local_path_counts: 227 local_path_counts[node_id][stack[i + 1]] -= 1 228 arcs.append ((1, stack)) 229 return (arcs, decisions) 230 231 succs = [k for (k, count) in local_path_counts[node_id].iteritems() 232 if count > 0] 233 next_id = succs.pop() 234 if succs: 235 decisions.append (node_id) 236 237 if next_id in stack_set: 238 # we've found a loop 239 i = [j for (j, elt) in enumerate(stack) if elt == next_id][0] 240 arc = stack[i:] 241 stack = stack[:i] 242 stack_set = set(stack) 243 arcs.append (count_arc (local_path_counts, arc)) 244 245 if stack == []: 246 return (arcs, decisions) 247 else: 248 stack.append (next_id) 249 stack_set.add (next_id) 250 251def dict_list(xs): 252 d = {} 253 for (x, y) in xs: 254 d.setdefault (x, []) 255 d[x].append(y) 256 return d 257 258def follow2(): 259 # Find an eulerian path through the graph. 260 261 local_path_counts = copy.deepcopy(path_counts) 262 263 (arcs, decisions) = grab_arcs(local_path_counts, 'Sta') 264 live = [x for x in decisions 265 if [y for y in local_path_counts[x] if local_path_counts[x][y] > 0]] 266 while live: 267 x = live[0] 268 (arcs2, decisions) = grab_arcs(local_path_counts, x) 269 arcs.extend(arcs2) 270 live = [x for x in live if [y for y in local_path_counts[x] 271 if local_path_counts[x][y] > 0]] 272 273 arc_heads = dict_list([(arc[0], (count, arc)) for (count, arc) in arcs]) 274 275 [(_, top_arc)] = arc_heads['Sta'] 276 for (count, arc) in arcs: 277 print 'Arc (%d iterations): {' % count 278 if arc[0] == 'Sta': 279 arc = arc[1:] 280 print_arc_rec(arc, [], count, {}) 281 print '}' 282 283def print_arc_rec(arc, init_ctxt, count, arc_heads): 284 ctxt_depth = len(init_ctxt) 285 for node_id in arc: 286 xs = arc_heads.get(node_id, []) 287 for (count2, arc2) in xs: 288 arc_heads[node_id] = [] 289 assert count2 % count == 0, (count2, count) 290 print 'Loop (%d iterations) {' % (count2 / count) 291 print_arc_rec(arc2, ctxt, count2, arc_heads) 292 _, _, ctxt, _ = tcfg_to_bb_map[node_id] 293 for call_addr in ctxt[ctxt_depth:]: 294 print 'Function call at %#x: {' % call_addr 295 for i in range(ctxt_depth - len(ctxt)): 296 print '}' 297 ctxt_depth = len(ctxt) 298 print_block2(node_id) 299 for i in range(ctxt_depth - len(init_ctxt)): 300 print '}' 301 print '}' 302 303def dot(): 304 print 'digraph imm {' 305 for i in path_counts: 306 for j in path_counts[i]: 307 if i != 'Sta' and type(j) != 'Sta': 308 print '%s -> %s [label = \"%d %s -> %s\"];' % (i, j, path_counts[i][j], elfFile().addrs_to_f[id_to_bb_addr[i]], elfFile().addrs_to_f[id_to_bb_addr[j]]) 309 else: 310 print '%s -> %s [label = \"%d\"];' % (i, j, path_counts[i][j]) 311 312 print '}' 313 314def local_loop_id(i): 315 _, _, ctxt, lid = tcfg_to_bb_map[i] 316 if lid == None: 317 return i 318 else: 319 _, _, ctxt2, _ = tcfg_to_bb_map[lid] 320 if ctxt2 == ctxt: 321 return lid 322 else: 323 return i 324 325def refutable(): 326 print 'Refutable paths:' 327 node_ids = set(list(path_counts) 328 + [j for i in path_counts for j in path_counts[i]]) 329 node_ids -= set (['Sta']) 330 ctxts = dict_list([(tuple(tcfg_to_bb_map[i][2]), i) for i in node_ids]) 331 for (ctxt, nodes) in ctxts.iteritems(): 332 if not ctxt: 333 continue 334 print 'Funcall context %r {' % list(ctxt) 335 outgoing = {} 336 rev_graph = {} 337 for i in nodes: 338 li = local_loop_id (i) 339 outgoing.setdefault(li, 0) 340 for (j, count) in path_counts.get(i, {}).iteritems(): 341 outgoing[li] += count 342 if tcfg_to_bb_map[j][2] != list(ctxt): 343 continue 344 lj = local_loop_id (j) 345 if li != lj: 346 rev_graph.setdefault(lj, {}) 347 rev_graph[lj][li] = count 348 print_backtracks(outgoing, rev_graph) 349 print '}' 350 351def print_backtracks(outgoing, rev_graph): 352 covered = set() 353 for i in rev_graph: 354 if i in covered: 355 continue 356 tracks = print_backtracks_rec([i], outgoing[i], rev_graph) 357 covered.update([x for xs in tracks for x in xs]) 358 for i in outgoing: 359 if i not in covered: 360 if outgoing[i] > 0: 361 print 'Arc: %r' % [tcfg_to_bb_map[i][0]] 362 363def print_backtracks_rec(chunk, count, rev_graph): 364 chunk = list(chunk) 365 if count <= 0: 366 return [] 367 while True: 368 node_id = chunk[-1] 369 rev_opts = rev_graph.get(node_id, {}).items() 370 sum_opts = sum([c for (_, c) in rev_opts]) 371 opts = [(next_id, count - (sum_opts - c)) for (next_id, c) in rev_opts] 372 opts = [opt for opt in opts if opt[1] > 0] 373 if len(opts) > 1: 374 return [track for (next_id, count) in opts 375 for track in print_backtracks_rec(chunk + [next_id], 376 count, rev_graph)] 377 elif not opts: 378 arc_addrs = [tcfg_to_bb_map[i][0] for i in reversed(chunk)] 379 print 'Arc: %r' % arc_addrs 380 return [chunk] 381 else: 382 [(next_id, count)] = opts 383 chunk.append(next_id) 384 385def parse(elf_file_name, fun_name): 386 sol_file_name = fun_name + '.imm.sol' 387 map_file_name = fun_name + '.imm.map' 388 elf_file = elf_file_name 389 read_variables(sol_file_name) 390 read_tcfg_map() 391 392def profile(): 393 bb_addr_exec_count = {} 394 funs_to_bb_addrs = {} 395 funs_to_count = {} 396 fun_entries_to_count = {} 397 total_inst_ran = 0 398 global tcfg_to_bb_map 399 maxi = 0 400 for i in path_counts: 401 for j in path_counts[i]: 402 403 bb_addr = id_to_bb_addr[j] 404 inst_count = path_counts[i][j] * tcfg_to_bb_map[j][1] 405 total_inst_ran += inst_count 406 if inst_count == 0: 407 continue 408 bb_addr_exec_count.setdefault (bb_addr, 0) 409 bb_addr_exec_count[bb_addr] += path_counts[i][j] 410 411 if path_counts[i][j] > maxi: 412 maxi = path_counts[i][j] 413 max_edge = (i,j) 414 415 ctxt = id_to_context.get (j, []) 416 if len(id_to_context.get (i, [])) < len(ctxt): 417 fun = elfFile().addrs_to_f[bb_addr] 418 if i in id_to_bb_addr: 419 call_addr = id_to_bb_addr[i] 420 entry_name = '(%x in %s)' % (call_addr, 421 elfFile().addrs_to_f[call_addr]) 422 else: 423 entry_name = '(at depth %d)' % len(ctxt) 424 fun_entries_to_count.setdefault (fun, {}) 425 fun_entries_to_count[fun].setdefault (entry_name, 0) 426 fun_entries_to_count[fun][entry_name] += path_counts[i][j] 427 428 bb_addr_to_size = buildBBAddrToSize() 429 430 for bb_addr in bb_addr_exec_count: 431 fun = elfFile().addrs_to_f[bb_addr] 432 if fun not in funs_to_count: 433 funs_to_count[fun] = 0 434 funs_to_bb_addrs[fun] = [] 435 funs_to_bb_addrs[fun].append( bb_addr ) 436 inst_count = bb_addr_exec_count[bb_addr] * bb_addr_to_size[bb_addr] 437 funs_to_count[fun] += inst_count 438 439 for fun in sorted(funs_to_count, key= lambda x: funs_to_count[x], reverse=True): 440 count = funs_to_count[fun] 441 print "%s: %u instructions / %.2f %%" % (fun, count, float (count) / total_inst_ran * 100) 442 for (prev_fun, n) in fun_entries_to_count.get(fun, {}).iteritems(): 443 print ' + entered %d times from %s' % (n, prev_fun) 444 for bb_addr in sorted(funs_to_bb_addrs[fun]): 445 sz = bb_addr_to_size[bb_addr] 446 ecount = bb_addr_exec_count[bb_addr] 447 print " %x-%x : %dx %dins " % (bb_addr, bb_addr + sz - 4, 448 ecount, sz / 4) 449 450 print "\n\n Dominating executing path:" 451 i,j = max_edge 452 print 'max edge %d -> %d : %d' % (int(i),int(j), maxi) 453 i_bb = id_to_bb_addr[i] 454 j_bb = id_to_bb_addr[j] 455 print ' %x -> %x '% (i_bb,j_bb) 456 print ' %s -> %s ' % (elfFile().addrs_to_f[i_bb],elfFile().addrs_to_f[j_bb]) 457 print ' context:' 458 print_context(id_to_context[i]) 459 maxi=0 460 461 for b_id in b_id_counts: 462 count = b_id_counts[b_id] 463 if count > maxi: 464 max_id = b_id 465 maxi = count 466 print 't: max_id b%d, count %d' % (max_id, maxi) 467 print 'max_id b%d, count %d' % (max_id, maxi) 468 print 'max_bb_addr: %x' % id_to_bb_addr[str(max_id)] 469 470 print_context(id_to_context[str(max_id)]) 471 472if __name__ == '__main__': 473 argv = list(sys.argv) 474 opt = '--follow' 475 476 if len(argv) != 6: 477 print 'len(argv): %d' % len(argv) 478 print "Usage: reconstruct [OPTION] <dir_name> sol_file map elf_file" 479 print 'reconstruction options: --refutable, --follow, --dot, --profile' 480 sys.exit(1) 481 482 if argv[1].startswith ('--'): 483 opt = argv[1] 484 argv = argv[:1] + argv[2:] 485 #print 'Reading data' 486 dir_name = argv[1] 487 sol_file = argv[2] 488 tcfg_map_f = argv[3] 489 elf_file = argv[4] 490 read_variables(sol_file) 491 #print 'Reading tcfg' 492 read_tcfg_map(tcfg_map_f) 493 #print ' .. done reading' 494 #initialise elfFile and parse the objdump 495 elfFile(dir_name=dir_name,elf_only=True) 496 elf_parser.parseTxt() 497 #print 'Simplifying' 498 #simplify() 499 #print ' .. done simplifying' 500 if opt == '--follow': 501 follow2() 502 elif opt == '--dot': 503 dot() 504 elif opt == '--refutable': 505 refutable() 506 elif opt == '--profile': 507 profile() 508 else: 509 print 'Unknown reconstruction type: %r' % opt 510 511