1# * Copyright 2014, NICTA
2# *
3# * This software may be distributed and modified according to the terms
4# of
5# * the BSD 2-Clause license. Note that NO WARRANTY is provided.
6# * See "LICENSE_BSD2.txt" for details.
7# *
8# * @TAG(NICTA_BSD)
9#!/usr/bin/env python
10
11import os
12import re
13import sys
14import copy
15from subprocess import Popen, PIPE
16
17#so we can use addrs_to_f
18from elf_file import elfFile
19import elf_parser
20
21global path_counts
22path_counts = {}
23global b_id_counts
24b_id_counts = {}
25
26global tcfg_paths
27tcfg_paths = {}
28
29global tcfg_to_bb_map
30tcfg_to_bb_map = {}
31
32global total_edges
33total_edges = 0
34
35global id_to_context
36id_to_context = {}
37
38global elf_file
39
40def buildBBAddrToSize():
41    ret = {}
42    global tcfg_to_bb_map
43    for bb_id in tcfg_to_bb_map:
44        (bb_addr, bb_size, bb_contexts, loop_id) = tcfg_to_bb_map[bb_id]
45        #print "bb_addr: %x, bb_size: %x" % (bb_addr, bb_size)
46        if bb_addr not in ret or (ret[bb_addr] < bb_size):
47            ret[bb_addr] = bb_size
48    return ret
49
50def read_variables(input_filename):
51    var_re = re.compile(r'^d(\d+|Sta)_(\d+)\s+([\d.]+)$')
52    b_var_re = re.compile(r'^b(\d+)\s+([\d.]+)$')
53
54    f = open(input_filename)
55    global path_counts
56    global total_edges
57    global b_id_counts
58    while True:
59        s = f.readline()
60        if s == '':
61            break
62        g = var_re.match(s.strip())
63        if not g:
64	 	b_match = b_var_re.match(s.strip())
65		if b_match:
66		  b_id, count = b_match.groups()
67		  b_id_counts[int(b_id)] = int(float(count))
68		continue
69        from_id, to_id, count = g.groups()
70        if from_id not in path_counts:
71            path_counts[from_id] = {}
72
73        if to_id in path_counts[from_id]:
74            raise Exception("Variable from %s to %s mentioned more than once." % (from_id, to_id))
75
76        count = int(round(float(count)))
77        if count == 0:
78            continue
79
80        path_counts[from_id][to_id] = count
81        total_edges += count
82
83    f.close()
84    if not path_counts:
85        raise Exception("No variables found in solution.")
86
87def print_context(ctx_list):
88    for x in ctx_list[:-1]:
89      print '%s' % elfFile().addrs_to_f[x],
90    print ''
91
92def read_tcfg_map(input_filename):
93    '''
94    Sets up some global states and return the tcfg_to_bb_map
95    '''
96    tcfg_re = re.compile(
97        r'''^
98            (\d+)
99            \( \d+ \. \d+ \)
100            \(
101                (0x[0-9a-f]+)
102                \+
103                (0x[0-9a-f]+)
104            \):
105            \s
106            \[\s
107                ([\d\s]*)
108            \](.*$)''',
109            re.VERBOSE)
110
111    f = open(input_filename)
112    global tcfg_paths
113    global tcfg_to_bb_map
114    global id_to_bb_addr
115    id_to_bb_addr = {}
116    while True:
117        s = f.readline()
118        if s == '':
119            break
120        g = tcfg_re.match(s.strip())
121        if not g:
122            continue
123
124        bb_id, bb_addr, bb_size, bb_dests, bb_aux = g.groups()
125
126        bb_addr = int(bb_addr, 16)
127        bb_size = int(bb_size, 16)
128        bb_dests = [ x.strip() for x in bb_dests.split() if x.strip() ]
129        ctx_str = list(bb_aux.split(' '))
130        ctx_list = [ int(x, 16) for x in ctx_str if x <> '' ]
131        #get rid of the loop_id
132        ctx_list = ctx_list[1:]
133        id_to_context[bb_id] = ctx_list
134        bb_aux = bb_aux.split()
135        loop_id = bb_aux[0]
136        if loop_id == '0':
137            loop_id = None
138        bb_context = reversed ([int (addr, 16) for addr in bb_aux[1:-1]])
139
140        assert bb_id not in tcfg_paths
141
142        tcfg_paths[bb_id] = bb_dests
143        tcfg_to_bb_map[bb_id] = (bb_addr, bb_size, list (bb_context), loop_id)
144        id_to_bb_addr[bb_id] = bb_addr
145        '''
146        if not bb_addr in bb_addr_to_ids:
147          bb_addr_to_ids[bb_addr] = [bb_id]
148        else:
149          bb_addr_to_ids[bb_addr].append(bb_id)
150        '''
151    f.close()
152    if not tcfg_paths:
153        raise Exception("No nodes found in tcfg map.")
154    return tcfg_to_bb_map
155
156global addr2line_proc
157addr2line_proc = None
158def addr2line(addr):
159    global addr2line_proc
160    if addr2line_proc is None:
161        addr2line_proc = Popen(
162            ['arm-none-eabi-addr2line', '-fe',
163            elf_file],
164            stdin=PIPE, stdout=PIPE, close_fds=True
165        )
166
167    addr2line_proc.stdin.write('%#x\n' % addr)
168    func = addr2line_proc.stdout.readline().strip()
169    line = addr2line_proc.stdout.readline().strip()
170    return func
171
172compound_ids = {}
173
174def simplify():
175    rev = {}
176    for i in path_counts:
177        for j in path_counts[i]:
178            rev.setdefault(j, [])
179            rev[j].append(i)
180    simps = [i for i in path_counts if len(path_counts[i]) == 1
181        if len(rev.get(i, [])) == 1]
182    for i in simps:
183        [j] = rev[i]
184        assert path_counts[j].keys() == [i]
185        count = path_counts[j][i]
186        assert path_counts[i].values() == [count]
187        compound_ids.setdefault(j, [j])
188        compound_ids[j].extend(compound_ids.get(i, [i]))
189        path_counts[j] = path_counts[i]
190        del path_counts[i]
191
192def print_block(node_id):
193    if node_id == 'Sta':
194        return
195    addr, size, _, _ = tcfg_to_bb_map[node_id]
196    for i in xrange(0, size, 4):
197        print '%#x %s' % (addr + i, addr2line(addr + i))
198
199def print_block2(node_id):
200    assert node_id != 'Sta', node_id
201    addr, size, _, _ = tcfg_to_bb_map[node_id]
202    print '%#x - %#x' % (addr, addr + size - 4)
203
204def count_arc(local_path_counts, arc):
205    arc_steps = zip(arc, arc[1:] + [arc[0]])
206    arc_counts = [local_path_counts[i][j] for (i, j) in arc_steps]
207    count = min(arc_counts)
208    for (i, j) in arc_steps:
209        local_path_counts[i][j] -= count
210    return (count, arc)
211
212def grab_arcs(local_path_counts, start):
213    stack = [start]
214    stack_set = set(stack)
215
216    arcs = []
217    decisions = []
218
219    while True:
220        node_id = stack[-1]
221
222        # have we reached the exit?
223        if node_id not in local_path_counts:
224            assert stack[0] == 'Sta'
225            for (i, node_id) in enumerate (stack):
226                if node_id in local_path_counts:
227                    local_path_counts[node_id][stack[i + 1]] -= 1
228            arcs.append ((1, stack))
229            return (arcs, decisions)
230
231        succs = [k for (k, count) in local_path_counts[node_id].iteritems()
232            if count > 0]
233        next_id = succs.pop()
234        if succs:
235            decisions.append (node_id)
236
237        if next_id in stack_set:
238            # we've found a loop
239            i = [j for (j, elt) in enumerate(stack) if elt == next_id][0]
240            arc = stack[i:]
241            stack = stack[:i]
242            stack_set = set(stack)
243            arcs.append (count_arc (local_path_counts, arc))
244
245            if stack == []:
246                return (arcs, decisions)
247        else:
248            stack.append (next_id)
249            stack_set.add (next_id)
250
251def dict_list(xs):
252    d = {}
253    for (x, y) in xs:
254        d.setdefault (x, [])
255        d[x].append(y)
256    return d
257
258def follow2():
259    # Find an eulerian path through the graph.
260
261    local_path_counts = copy.deepcopy(path_counts)
262
263    (arcs, decisions) = grab_arcs(local_path_counts, 'Sta')
264    live = [x for x in decisions
265        if [y for y in local_path_counts[x] if local_path_counts[x][y] > 0]]
266    while live:
267        x = live[0]
268        (arcs2, decisions) = grab_arcs(local_path_counts, x)
269        arcs.extend(arcs2)
270        live = [x for x in live if [y for y in local_path_counts[x]
271            if local_path_counts[x][y] > 0]]
272
273    arc_heads = dict_list([(arc[0], (count, arc)) for (count, arc) in arcs])
274
275    [(_, top_arc)] = arc_heads['Sta']
276    for (count, arc) in arcs:
277        print 'Arc (%d iterations): {' % count
278        if arc[0] == 'Sta':
279            arc = arc[1:]
280        print_arc_rec(arc, [], count, {})
281        print '}'
282
283def print_arc_rec(arc, init_ctxt, count, arc_heads):
284    ctxt_depth = len(init_ctxt)
285    for node_id in arc:
286        xs = arc_heads.get(node_id, [])
287        for (count2, arc2) in xs:
288            arc_heads[node_id] = []
289            assert count2 % count == 0, (count2, count)
290            print 'Loop (%d iterations) {' % (count2 / count)
291            print_arc_rec(arc2, ctxt, count2, arc_heads)
292        _, _, ctxt, _ = tcfg_to_bb_map[node_id]
293        for call_addr in ctxt[ctxt_depth:]:
294            print 'Function call at %#x: {' % call_addr
295        for i in range(ctxt_depth - len(ctxt)):
296            print '}'
297        ctxt_depth = len(ctxt)
298        print_block2(node_id)
299    for i in range(ctxt_depth - len(init_ctxt)):
300        print '}'
301    print '}'
302
303def dot():
304    print 'digraph imm {'
305    for i in path_counts:
306        for j in path_counts[i]:
307            if i != 'Sta' and type(j) != 'Sta':
308              print '%s -> %s [label = \"%d %s -> %s\"];' % (i, j, path_counts[i][j], elfFile().addrs_to_f[id_to_bb_addr[i]], elfFile().addrs_to_f[id_to_bb_addr[j]])
309            else:
310              print '%s -> %s [label = \"%d\"];' % (i, j, path_counts[i][j])
311
312    print '}'
313
314def local_loop_id(i):
315    _, _, ctxt, lid = tcfg_to_bb_map[i]
316    if lid == None:
317        return i
318    else:
319        _, _, ctxt2, _ = tcfg_to_bb_map[lid]
320        if ctxt2 == ctxt:
321            return lid
322        else:
323            return i
324
325def refutable():
326    print 'Refutable paths:'
327    node_ids = set(list(path_counts)
328        + [j for i in path_counts for j in path_counts[i]])
329    node_ids -= set (['Sta'])
330    ctxts = dict_list([(tuple(tcfg_to_bb_map[i][2]), i) for i in node_ids])
331    for (ctxt, nodes) in ctxts.iteritems():
332        if not ctxt:
333            continue
334        print 'Funcall context %r {' % list(ctxt)
335        outgoing = {}
336        rev_graph = {}
337        for i in nodes:
338            li = local_loop_id (i)
339            outgoing.setdefault(li, 0)
340            for (j, count) in path_counts.get(i, {}).iteritems():
341                outgoing[li] += count
342                if tcfg_to_bb_map[j][2] != list(ctxt):
343                    continue
344                lj = local_loop_id (j)
345                if li != lj:
346                    rev_graph.setdefault(lj, {})
347                    rev_graph[lj][li] = count
348        print_backtracks(outgoing, rev_graph)
349        print '}'
350
351def print_backtracks(outgoing, rev_graph):
352    covered = set()
353    for i in rev_graph:
354        if i in covered:
355            continue
356        tracks = print_backtracks_rec([i], outgoing[i], rev_graph)
357        covered.update([x for xs in tracks for x in xs])
358    for i in outgoing:
359        if i not in covered:
360            if outgoing[i] > 0:
361                print 'Arc: %r' % [tcfg_to_bb_map[i][0]]
362
363def print_backtracks_rec(chunk, count, rev_graph):
364    chunk = list(chunk)
365    if count <= 0:
366        return []
367    while True:
368        node_id = chunk[-1]
369        rev_opts = rev_graph.get(node_id, {}).items()
370        sum_opts = sum([c for (_, c) in rev_opts])
371        opts = [(next_id, count - (sum_opts - c)) for (next_id, c) in rev_opts]
372        opts = [opt for opt in opts if opt[1] > 0]
373        if len(opts) > 1:
374            return [track for (next_id, count) in opts
375                for track in print_backtracks_rec(chunk + [next_id],
376                    count, rev_graph)]
377        elif not opts:
378            arc_addrs = [tcfg_to_bb_map[i][0] for i in reversed(chunk)]
379            print 'Arc: %r' % arc_addrs
380            return [chunk]
381        else:
382            [(next_id, count)] = opts
383            chunk.append(next_id)
384
385def parse(elf_file_name, fun_name):
386    sol_file_name = fun_name + '.imm.sol'
387    map_file_name = fun_name + '.imm.map'
388    elf_file = elf_file_name
389    read_variables(sol_file_name)
390    read_tcfg_map()
391
392def profile():
393    bb_addr_exec_count = {}
394    funs_to_bb_addrs = {}
395    funs_to_count = {}
396    fun_entries_to_count = {}
397    total_inst_ran = 0
398    global tcfg_to_bb_map
399    maxi = 0
400    for i in path_counts:
401      for j in path_counts[i]:
402
403        bb_addr = id_to_bb_addr[j]
404        inst_count = path_counts[i][j] * tcfg_to_bb_map[j][1]
405        total_inst_ran += inst_count
406        if inst_count == 0:
407            continue
408        bb_addr_exec_count.setdefault (bb_addr, 0)
409        bb_addr_exec_count[bb_addr] += path_counts[i][j]
410
411        if path_counts[i][j] > maxi:
412            maxi = path_counts[i][j]
413            max_edge = (i,j)
414
415	ctxt = id_to_context.get (j, [])
416        if len(id_to_context.get (i, [])) < len(ctxt):
417            fun = elfFile().addrs_to_f[bb_addr]
418            if i in id_to_bb_addr:
419                call_addr = id_to_bb_addr[i]
420                entry_name = '(%x in %s)' % (call_addr,
421                  elfFile().addrs_to_f[call_addr])
422            else:
423                entry_name = '(at depth %d)' % len(ctxt)
424            fun_entries_to_count.setdefault (fun, {})
425            fun_entries_to_count[fun].setdefault (entry_name, 0)
426            fun_entries_to_count[fun][entry_name] += path_counts[i][j]
427
428    bb_addr_to_size = buildBBAddrToSize()
429
430    for bb_addr in bb_addr_exec_count:
431        fun = elfFile().addrs_to_f[bb_addr]
432        if fun not in funs_to_count:
433            funs_to_count[fun] = 0
434            funs_to_bb_addrs[fun] = []
435        funs_to_bb_addrs[fun].append( bb_addr )
436        inst_count = bb_addr_exec_count[bb_addr] * bb_addr_to_size[bb_addr]
437        funs_to_count[fun] += inst_count
438
439    for fun in sorted(funs_to_count, key= lambda x: funs_to_count[x], reverse=True):
440        count = funs_to_count[fun]
441        print "%s: %u instructions / %.2f %%" % (fun, count, float (count) / total_inst_ran * 100)
442        for (prev_fun, n) in fun_entries_to_count.get(fun, {}).iteritems():
443            print '  + entered %d times from %s' % (n, prev_fun)
444        for bb_addr in sorted(funs_to_bb_addrs[fun]):
445            sz = bb_addr_to_size[bb_addr]
446            ecount = bb_addr_exec_count[bb_addr]
447            print "     %x-%x : %dx %dins " % (bb_addr, bb_addr + sz - 4,
448                ecount, sz / 4)
449
450    print "\n\n Dominating executing path:"
451    i,j = max_edge
452    print 'max edge %d -> %d : %d' % (int(i),int(j), maxi)
453    i_bb = id_to_bb_addr[i]
454    j_bb = id_to_bb_addr[j]
455    print '         %x -> %x '% (i_bb,j_bb)
456    print '         %s -> %s ' %  (elfFile().addrs_to_f[i_bb],elfFile().addrs_to_f[j_bb])
457    print '         context:'
458    print_context(id_to_context[i])
459    maxi=0
460
461    for b_id in b_id_counts:
462      count = b_id_counts[b_id]
463      if count > maxi:
464	max_id = b_id
465	maxi = count
466	print 't: max_id b%d, count %d' % (max_id, maxi)
467    print 'max_id b%d, count %d' % (max_id, maxi)
468    print 'max_bb_addr: %x' % id_to_bb_addr[str(max_id)]
469
470    print_context(id_to_context[str(max_id)])
471
472if __name__ == '__main__':
473	argv = list(sys.argv)
474	opt = '--follow'
475
476	if len(argv) != 6:
477		print 'len(argv): %d' % len(argv)
478		print "Usage: reconstruct [OPTION] <dir_name> sol_file map elf_file"
479		print 'reconstruction options: --refutable, --follow, --dot, --profile'
480		sys.exit(1)
481
482	if argv[1].startswith ('--'):
483		opt = argv[1]
484		argv = argv[:1] + argv[2:]
485	#print 'Reading data'
486	dir_name = argv[1]
487	sol_file = argv[2]
488	tcfg_map_f = argv[3]
489	elf_file = argv[4]
490	read_variables(sol_file)
491	#print 'Reading tcfg'
492	read_tcfg_map(tcfg_map_f)
493	#print ' .. done reading'
494	#initialise elfFile and parse the objdump
495	elfFile(dir_name=dir_name,elf_only=True)
496	elf_parser.parseTxt()
497	#print 'Simplifying'
498	#simplify()
499	#print ' .. done simplifying'
500	if opt == '--follow':
501		follow2()
502	elif opt == '--dot':
503		dot()
504	elif opt == '--refutable':
505		refutable()
506	elif opt == '--profile':
507		profile()
508	else:
509		print 'Unknown reconstruction type: %r' % opt
510
511