# * Copyright 2014, NICTA # * # * This software may be distributed and modified according to the terms # of # * the BSD 2-Clause license. Note that NO WARRANTY is provided. # * See "LICENSE_BSD2.txt" for details. # * # * @TAG(NICTA_BSD) #!/usr/bin/env python import os import re import sys import copy from subprocess import Popen, PIPE #so we can use addrs_to_f from elf_file import elfFile import elf_parser global path_counts path_counts = {} global b_id_counts b_id_counts = {} global tcfg_paths tcfg_paths = {} global tcfg_to_bb_map tcfg_to_bb_map = {} global total_edges total_edges = 0 global id_to_context id_to_context = {} global elf_file def buildBBAddrToSize(): ret = {} global tcfg_to_bb_map for bb_id in tcfg_to_bb_map: (bb_addr, bb_size, bb_contexts, loop_id) = tcfg_to_bb_map[bb_id] #print "bb_addr: %x, bb_size: %x" % (bb_addr, bb_size) if bb_addr not in ret or (ret[bb_addr] < bb_size): ret[bb_addr] = bb_size return ret def read_variables(input_filename): var_re = re.compile(r'^d(\d+|Sta)_(\d+)\s+([\d.]+)$') b_var_re = re.compile(r'^b(\d+)\s+([\d.]+)$') f = open(input_filename) global path_counts global total_edges global b_id_counts while True: s = f.readline() if s == '': break g = var_re.match(s.strip()) if not g: b_match = b_var_re.match(s.strip()) if b_match: b_id, count = b_match.groups() b_id_counts[int(b_id)] = int(float(count)) continue from_id, to_id, count = g.groups() if from_id not in path_counts: path_counts[from_id] = {} if to_id in path_counts[from_id]: raise Exception("Variable from %s to %s mentioned more than once." % (from_id, to_id)) count = int(round(float(count))) if count == 0: continue path_counts[from_id][to_id] = count total_edges += count f.close() if not path_counts: raise Exception("No variables found in solution.") def print_context(ctx_list): for x in ctx_list[:-1]: print '%s' % elfFile().addrs_to_f[x], print '' def read_tcfg_map(input_filename): ''' Sets up some global states and return the tcfg_to_bb_map ''' tcfg_re = re.compile( r'''^ (\d+) \( \d+ \. \d+ \) \( (0x[0-9a-f]+) \+ (0x[0-9a-f]+) \): \s \[\s ([\d\s]*) \](.*$)''', re.VERBOSE) f = open(input_filename) global tcfg_paths global tcfg_to_bb_map global id_to_bb_addr id_to_bb_addr = {} while True: s = f.readline() if s == '': break g = tcfg_re.match(s.strip()) if not g: continue bb_id, bb_addr, bb_size, bb_dests, bb_aux = g.groups() bb_addr = int(bb_addr, 16) bb_size = int(bb_size, 16) bb_dests = [ x.strip() for x in bb_dests.split() if x.strip() ] ctx_str = list(bb_aux.split(' ')) ctx_list = [ int(x, 16) for x in ctx_str if x <> '' ] #get rid of the loop_id ctx_list = ctx_list[1:] id_to_context[bb_id] = ctx_list bb_aux = bb_aux.split() loop_id = bb_aux[0] if loop_id == '0': loop_id = None bb_context = reversed ([int (addr, 16) for addr in bb_aux[1:-1]]) assert bb_id not in tcfg_paths tcfg_paths[bb_id] = bb_dests tcfg_to_bb_map[bb_id] = (bb_addr, bb_size, list (bb_context), loop_id) id_to_bb_addr[bb_id] = bb_addr ''' if not bb_addr in bb_addr_to_ids: bb_addr_to_ids[bb_addr] = [bb_id] else: bb_addr_to_ids[bb_addr].append(bb_id) ''' f.close() if not tcfg_paths: raise Exception("No nodes found in tcfg map.") return tcfg_to_bb_map global addr2line_proc addr2line_proc = None def addr2line(addr): global addr2line_proc if addr2line_proc is None: addr2line_proc = Popen( ['arm-none-eabi-addr2line', '-fe', elf_file], stdin=PIPE, stdout=PIPE, close_fds=True ) addr2line_proc.stdin.write('%#x\n' % addr) func = addr2line_proc.stdout.readline().strip() line = addr2line_proc.stdout.readline().strip() return func compound_ids = {} def simplify(): rev = {} for i in path_counts: for j in path_counts[i]: rev.setdefault(j, []) rev[j].append(i) simps = [i for i in path_counts if len(path_counts[i]) == 1 if len(rev.get(i, [])) == 1] for i in simps: [j] = rev[i] assert path_counts[j].keys() == [i] count = path_counts[j][i] assert path_counts[i].values() == [count] compound_ids.setdefault(j, [j]) compound_ids[j].extend(compound_ids.get(i, [i])) path_counts[j] = path_counts[i] del path_counts[i] def print_block(node_id): if node_id == 'Sta': return addr, size, _, _ = tcfg_to_bb_map[node_id] for i in xrange(0, size, 4): print '%#x %s' % (addr + i, addr2line(addr + i)) def print_block2(node_id): assert node_id != 'Sta', node_id addr, size, _, _ = tcfg_to_bb_map[node_id] print '%#x - %#x' % (addr, addr + size - 4) def count_arc(local_path_counts, arc): arc_steps = zip(arc, arc[1:] + [arc[0]]) arc_counts = [local_path_counts[i][j] for (i, j) in arc_steps] count = min(arc_counts) for (i, j) in arc_steps: local_path_counts[i][j] -= count return (count, arc) def grab_arcs(local_path_counts, start): stack = [start] stack_set = set(stack) arcs = [] decisions = [] while True: node_id = stack[-1] # have we reached the exit? if node_id not in local_path_counts: assert stack[0] == 'Sta' for (i, node_id) in enumerate (stack): if node_id in local_path_counts: local_path_counts[node_id][stack[i + 1]] -= 1 arcs.append ((1, stack)) return (arcs, decisions) succs = [k for (k, count) in local_path_counts[node_id].iteritems() if count > 0] next_id = succs.pop() if succs: decisions.append (node_id) if next_id in stack_set: # we've found a loop i = [j for (j, elt) in enumerate(stack) if elt == next_id][0] arc = stack[i:] stack = stack[:i] stack_set = set(stack) arcs.append (count_arc (local_path_counts, arc)) if stack == []: return (arcs, decisions) else: stack.append (next_id) stack_set.add (next_id) def dict_list(xs): d = {} for (x, y) in xs: d.setdefault (x, []) d[x].append(y) return d def follow2(): # Find an eulerian path through the graph. local_path_counts = copy.deepcopy(path_counts) (arcs, decisions) = grab_arcs(local_path_counts, 'Sta') live = [x for x in decisions if [y for y in local_path_counts[x] if local_path_counts[x][y] > 0]] while live: x = live[0] (arcs2, decisions) = grab_arcs(local_path_counts, x) arcs.extend(arcs2) live = [x for x in live if [y for y in local_path_counts[x] if local_path_counts[x][y] > 0]] arc_heads = dict_list([(arc[0], (count, arc)) for (count, arc) in arcs]) [(_, top_arc)] = arc_heads['Sta'] for (count, arc) in arcs: print 'Arc (%d iterations): {' % count if arc[0] == 'Sta': arc = arc[1:] print_arc_rec(arc, [], count, {}) print '}' def print_arc_rec(arc, init_ctxt, count, arc_heads): ctxt_depth = len(init_ctxt) for node_id in arc: xs = arc_heads.get(node_id, []) for (count2, arc2) in xs: arc_heads[node_id] = [] assert count2 % count == 0, (count2, count) print 'Loop (%d iterations) {' % (count2 / count) print_arc_rec(arc2, ctxt, count2, arc_heads) _, _, ctxt, _ = tcfg_to_bb_map[node_id] for call_addr in ctxt[ctxt_depth:]: print 'Function call at %#x: {' % call_addr for i in range(ctxt_depth - len(ctxt)): print '}' ctxt_depth = len(ctxt) print_block2(node_id) for i in range(ctxt_depth - len(init_ctxt)): print '}' print '}' def dot(): print 'digraph imm {' for i in path_counts: for j in path_counts[i]: if i != 'Sta' and type(j) != 'Sta': print '%s -> %s [label = \"%d %s -> %s\"];' % (i, j, path_counts[i][j], elfFile().addrs_to_f[id_to_bb_addr[i]], elfFile().addrs_to_f[id_to_bb_addr[j]]) else: print '%s -> %s [label = \"%d\"];' % (i, j, path_counts[i][j]) print '}' def local_loop_id(i): _, _, ctxt, lid = tcfg_to_bb_map[i] if lid == None: return i else: _, _, ctxt2, _ = tcfg_to_bb_map[lid] if ctxt2 == ctxt: return lid else: return i def refutable(): print 'Refutable paths:' node_ids = set(list(path_counts) + [j for i in path_counts for j in path_counts[i]]) node_ids -= set (['Sta']) ctxts = dict_list([(tuple(tcfg_to_bb_map[i][2]), i) for i in node_ids]) for (ctxt, nodes) in ctxts.iteritems(): if not ctxt: continue print 'Funcall context %r {' % list(ctxt) outgoing = {} rev_graph = {} for i in nodes: li = local_loop_id (i) outgoing.setdefault(li, 0) for (j, count) in path_counts.get(i, {}).iteritems(): outgoing[li] += count if tcfg_to_bb_map[j][2] != list(ctxt): continue lj = local_loop_id (j) if li != lj: rev_graph.setdefault(lj, {}) rev_graph[lj][li] = count print_backtracks(outgoing, rev_graph) print '}' def print_backtracks(outgoing, rev_graph): covered = set() for i in rev_graph: if i in covered: continue tracks = print_backtracks_rec([i], outgoing[i], rev_graph) covered.update([x for xs in tracks for x in xs]) for i in outgoing: if i not in covered: if outgoing[i] > 0: print 'Arc: %r' % [tcfg_to_bb_map[i][0]] def print_backtracks_rec(chunk, count, rev_graph): chunk = list(chunk) if count <= 0: return [] while True: node_id = chunk[-1] rev_opts = rev_graph.get(node_id, {}).items() sum_opts = sum([c for (_, c) in rev_opts]) opts = [(next_id, count - (sum_opts - c)) for (next_id, c) in rev_opts] opts = [opt for opt in opts if opt[1] > 0] if len(opts) > 1: return [track for (next_id, count) in opts for track in print_backtracks_rec(chunk + [next_id], count, rev_graph)] elif not opts: arc_addrs = [tcfg_to_bb_map[i][0] for i in reversed(chunk)] print 'Arc: %r' % arc_addrs return [chunk] else: [(next_id, count)] = opts chunk.append(next_id) def parse(elf_file_name, fun_name): sol_file_name = fun_name + '.imm.sol' map_file_name = fun_name + '.imm.map' elf_file = elf_file_name read_variables(sol_file_name) read_tcfg_map() def profile(): bb_addr_exec_count = {} funs_to_bb_addrs = {} funs_to_count = {} fun_entries_to_count = {} total_inst_ran = 0 global tcfg_to_bb_map maxi = 0 for i in path_counts: for j in path_counts[i]: bb_addr = id_to_bb_addr[j] inst_count = path_counts[i][j] * tcfg_to_bb_map[j][1] total_inst_ran += inst_count if inst_count == 0: continue bb_addr_exec_count.setdefault (bb_addr, 0) bb_addr_exec_count[bb_addr] += path_counts[i][j] if path_counts[i][j] > maxi: maxi = path_counts[i][j] max_edge = (i,j) ctxt = id_to_context.get (j, []) if len(id_to_context.get (i, [])) < len(ctxt): fun = elfFile().addrs_to_f[bb_addr] if i in id_to_bb_addr: call_addr = id_to_bb_addr[i] entry_name = '(%x in %s)' % (call_addr, elfFile().addrs_to_f[call_addr]) else: entry_name = '(at depth %d)' % len(ctxt) fun_entries_to_count.setdefault (fun, {}) fun_entries_to_count[fun].setdefault (entry_name, 0) fun_entries_to_count[fun][entry_name] += path_counts[i][j] bb_addr_to_size = buildBBAddrToSize() for bb_addr in bb_addr_exec_count: fun = elfFile().addrs_to_f[bb_addr] if fun not in funs_to_count: funs_to_count[fun] = 0 funs_to_bb_addrs[fun] = [] funs_to_bb_addrs[fun].append( bb_addr ) inst_count = bb_addr_exec_count[bb_addr] * bb_addr_to_size[bb_addr] funs_to_count[fun] += inst_count for fun in sorted(funs_to_count, key= lambda x: funs_to_count[x], reverse=True): count = funs_to_count[fun] print "%s: %u instructions / %.2f %%" % (fun, count, float (count) / total_inst_ran * 100) for (prev_fun, n) in fun_entries_to_count.get(fun, {}).iteritems(): print ' + entered %d times from %s' % (n, prev_fun) for bb_addr in sorted(funs_to_bb_addrs[fun]): sz = bb_addr_to_size[bb_addr] ecount = bb_addr_exec_count[bb_addr] print " %x-%x : %dx %dins " % (bb_addr, bb_addr + sz - 4, ecount, sz / 4) print "\n\n Dominating executing path:" i,j = max_edge print 'max edge %d -> %d : %d' % (int(i),int(j), maxi) i_bb = id_to_bb_addr[i] j_bb = id_to_bb_addr[j] print ' %x -> %x '% (i_bb,j_bb) print ' %s -> %s ' % (elfFile().addrs_to_f[i_bb],elfFile().addrs_to_f[j_bb]) print ' context:' print_context(id_to_context[i]) maxi=0 for b_id in b_id_counts: count = b_id_counts[b_id] if count > maxi: max_id = b_id maxi = count print 't: max_id b%d, count %d' % (max_id, maxi) print 'max_id b%d, count %d' % (max_id, maxi) print 'max_bb_addr: %x' % id_to_bb_addr[str(max_id)] print_context(id_to_context[str(max_id)]) if __name__ == '__main__': argv = list(sys.argv) opt = '--follow' if len(argv) != 6: print 'len(argv): %d' % len(argv) print "Usage: reconstruct [OPTION] sol_file map elf_file" print 'reconstruction options: --refutable, --follow, --dot, --profile' sys.exit(1) if argv[1].startswith ('--'): opt = argv[1] argv = argv[:1] + argv[2:] #print 'Reading data' dir_name = argv[1] sol_file = argv[2] tcfg_map_f = argv[3] elf_file = argv[4] read_variables(sol_file) #print 'Reading tcfg' read_tcfg_map(tcfg_map_f) #print ' .. done reading' #initialise elfFile and parse the objdump elfFile(dir_name=dir_name,elf_only=True) elf_parser.parseTxt() #print 'Simplifying' #simplify() #print ' .. done simplifying' if opt == '--follow': follow2() elif opt == '--dot': dot() elif opt == '--refutable': refutable() elif opt == '--profile': profile() else: print 'Unknown reconstruction type: %r' % opt