1# * Copyright 2016, NICTA
2# *
3# * This software may be distributed and modified according to the terms
4# of
5# * the BSD 2-Clause license. Note that NO WARRANTY is provided.
6# * See "LICENSE_BSD2.txt" for details.
7# *
8# * @TAG(NICTA_BSD)
9from elf_file import *
10import re
11
12def parseSymTab():
13    ef = elfFile()
14    #parse the symbol table
15    ef.syms = {}
16    #skip the header
17    while True:
18      line = ef.f_symtab.readline()
19      if line.startswith('SYMBOL TABLE'):
20        break
21    #the rest should be symbols
22    #should look like <address> <flags> <section> <alignment/size> <name>
23    #for "common" symbol it's alignment and for other it is size, seems like we don't need to distinguish the two
24    objdump_symbol_re = re.compile(
25     r'^([a-f0-9]+) (.*) +([a-zA-Z0-9_*.]+)\t([a-f0-9]+)\s+([a-zA-Z0-9_. -]+)$')
26    while True:
27        line = ef.f_symtab.readline()
28        g = objdump_symbol_re.match(line)
29        if g is not None:
30          addr, flags, section, ali_size, name = g.groups()
31          if ' ' in name:
32            print name
33            assert(0)
34          ef.addSymbol(name,addr, flags, section, ali_size)
35          #print '%s \n' % flags
36          #print 'addr %s flags %s section %s ali_size %s' % (addr,flags,section,ali_size)
37        else :
38          break
39
40def parseTxt ():
41    ef = elfFile()
42    curr_func_name = None
43    skip_next_line = False
44    for line in ef.f_text:
45      #hack used to skip the ndks_boot struct, which looks like a function
46      if skip_next_line == True:
47        skip_next_line = False
48        continue
49      #ingore empty lines and the header
50      if line in ['\n','\r\n']:
51        continue
52      header = re.search('kernel\.elf:\s*file\s*format\s*elf32-littlearm',line)
53      header2 = re.search('Disassembly of section \..*:',line)
54      if header != None or header2 != None:
55        continue
56      #ndsk_boot is a strange function only used in bootstrapping
57      ndks = re.search('.*ndks_boot.*',line)
58      if ndks != None:
59         skip_next_line = True
60         continue
61
62      #a function looks like f0000088 <create_it_frame_cap>:
63      r = re.search('(?P<f_addr>.*) <(?P<f_name>.*)>:$',line)
64      if r != None:
65        #we are creating a new function
66          #print '%s: %s' % (r.group('f_name'),r.group('f_addr'))
67        curr_func_name = r.group('f_name')
68        if (not ef.elf_only) and curr_func_name in ef.asm_fs:
69          g_f = ef.asm_fs[curr_func_name]
70        else:
71          g_f = None
72          #print '%s not found in asm_fs' % curr_func_name
73        ef.funcs[curr_func_name] = elfFunc(curr_func_name, r.group('f_addr'),g_f)
74        elf_fun = ef.funcs[curr_func_name]
75        elf_fun.entry_addr = int(r.group('f_addr'),16);
76        elf_fun.lines = {}
77
78      else:
79        #check if this is a literal line
80        literal = re.search('(?P<addr>.*):\s*[a-f0-9]+\s*(?P<size>(\.word)|(\.short)|(\.byte))\s*(?P<value>0x[a-f0-9]+)$',line)
81        if literal != None:
82           if literal.group('size') == '.word':
83                size = 4
84           else:
85                assert False, '%s size undefined' % literal.group('size')
86           line_addr = int(literal.group('addr'),16)
87           ef.literals[line_addr] = (size,int(literal.group('value'),16))
88           ef.addrs_to_f[line_addr] = curr_func_name
89        else:
90           #This is an instruction,
91            #extract the address, a line looks like f00000ac:>--e5801000 >--str>r1, [r0]
92           match = re.search('(?P<line_addr>.*):.*',line)
93           assert match !=None, line
94           line_addr = int(match.group('line_addr'),16)
95           elf_fun.lines[line_addr] = line
96           #remove everything after ;
97
98           line = line.split(';')[0]
99           line = line.rstrip(' \t\n\r')
100           ef.lines[line_addr] = line
101           ef.addrs_to_f[line_addr] = curr_func_name
102
103#is the mnemonic b ? bl, bx etc don't count
104#used to detect tail call
105def isDirectBranch(addr):
106  inst = elfFile().lines[addr]
107  match = re.search(r'[a-f0-9]+:\s*[a-f0-9]+\s+(b|bx)\s+.*',inst)
108  return match is not None
109
110def parseElf(dir_name):
111    ef = elfFile(dir_name)
112    parseTxt()
113    parseSymTab()
114    return ef
115
116
117