1#
2# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3#
4# SPDX-License-Identifier: BSD-2-Clause
5#
6
7from elf_file import *
8import re
9
10def parseSymTab():
11    ef = elfFile()
12    #parse the symbol table
13    ef.syms = {}
14    #skip the header
15    while True:
16      line = ef.f_symtab.readline()
17      if line.startswith('SYMBOL TABLE'):
18        break
19    #the rest should be symbols
20    #should look like <address> <flags> <section> <alignment/size> <name>
21    #for "common" symbol it's alignment and for other it is size, seems like we don't need to distinguish the two
22    objdump_symbol_re = re.compile(
23     r'^([a-f0-9]+) (.*) +([a-zA-Z0-9_*.]+)\t([a-f0-9]+)\s+([a-zA-Z0-9_. -]+)$')
24    while True:
25        line = ef.f_symtab.readline()
26        g = objdump_symbol_re.match(line)
27        if g is not None:
28          addr, flags, section, ali_size, name = g.groups()
29          if ' ' in name:
30            print name
31            assert(0)
32          ef.addSymbol(name,addr, flags, section, ali_size)
33          #print '%s \n' % flags
34          #print 'addr %s flags %s section %s ali_size %s' % (addr,flags,section,ali_size)
35        else :
36          break
37
38def parseTxt ():
39    ef = elfFile()
40    curr_func_name = None
41    skip_next_line = False
42    for line in ef.f_text:
43      #hack used to skip the ndks_boot struct, which looks like a function
44      if skip_next_line == True:
45        skip_next_line = False
46        continue
47      #ingore empty lines and the header
48      if line in ['\n','\r\n']:
49        continue
50      header = re.search('kernel\.elf:\s*file\s*format\s*elf32-littlearm',line)
51      header2 = re.search('Disassembly of section \..*:',line)
52      if header != None or header2 != None:
53        continue
54      #ndsk_boot is a strange function only used in bootstrapping
55      ndks = re.search('.*ndks_boot.*',line)
56      if ndks != None:
57         skip_next_line = True
58         continue
59
60      #a function looks like f0000088 <create_it_frame_cap>:
61      r = re.search('(?P<f_addr>.*) <(?P<f_name>.*)>:$',line)
62      if r != None:
63        #we are creating a new function
64          #print '%s: %s' % (r.group('f_name'),r.group('f_addr'))
65        curr_func_name = r.group('f_name')
66        if (not ef.elf_only) and curr_func_name in ef.asm_fs:
67          g_f = ef.asm_fs[curr_func_name]
68        else:
69          g_f = None
70          #print '%s not found in asm_fs' % curr_func_name
71        ef.funcs[curr_func_name] = elfFunc(curr_func_name, r.group('f_addr'),g_f)
72        elf_fun = ef.funcs[curr_func_name]
73        elf_fun.entry_addr = int(r.group('f_addr'),16);
74        elf_fun.lines = {}
75
76      else:
77        #check if this is a literal line
78        literal = re.search('(?P<addr>.*):\s*[a-f0-9]+\s*(?P<size>(\.word)|(\.short)|(\.byte))\s*(?P<value>0x[a-f0-9]+)$',line)
79        if literal != None:
80           if literal.group('size') == '.word':
81                size = 4
82           else:
83                assert False, '%s size undefined' % literal.group('size')
84           line_addr = int(literal.group('addr'),16)
85           ef.literals[line_addr] = (size,int(literal.group('value'),16))
86           ef.addrs_to_f[line_addr] = curr_func_name
87        else:
88           #This is an instruction,
89            #extract the address, a line looks like f00000ac:>--e5801000 >--str>r1, [r0]
90           match = re.search('(?P<line_addr>.*):.*',line)
91           assert match !=None, line
92           line_addr = int(match.group('line_addr'),16)
93           elf_fun.lines[line_addr] = line
94           #remove everything after ;
95
96           line = line.split(';')[0]
97           line = line.rstrip(' \t\n\r')
98           ef.lines[line_addr] = line
99           ef.addrs_to_f[line_addr] = curr_func_name
100
101#is the mnemonic b ? bl, bx etc don't count
102#used to detect tail call
103def isDirectBranch(addr):
104  inst = elfFile().lines[addr]
105  match = re.search(r'[a-f0-9]+:\s*[a-f0-9]+\s+(b|bx)\s+.*',inst)
106  return match is not None
107
108def parseElf(dir_name):
109    ef = elfFile(dir_name)
110    parseTxt()
111    parseSymTab()
112    return ef
113
114
115