1# 2# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3# 4# SPDX-License-Identifier: BSD-2-Clause 5# 6 7from elf_file import * 8import re 9 10def parseSymTab(): 11 ef = elfFile() 12 #parse the symbol table 13 ef.syms = {} 14 #skip the header 15 while True: 16 line = ef.f_symtab.readline() 17 if line.startswith('SYMBOL TABLE'): 18 break 19 #the rest should be symbols 20 #should look like <address> <flags> <section> <alignment/size> <name> 21 #for "common" symbol it's alignment and for other it is size, seems like we don't need to distinguish the two 22 objdump_symbol_re = re.compile( 23 r'^([a-f0-9]+) (.*) +([a-zA-Z0-9_*.]+)\t([a-f0-9]+)\s+([a-zA-Z0-9_. -]+)$') 24 while True: 25 line = ef.f_symtab.readline() 26 g = objdump_symbol_re.match(line) 27 if g is not None: 28 addr, flags, section, ali_size, name = g.groups() 29 if ' ' in name: 30 print name 31 assert(0) 32 ef.addSymbol(name,addr, flags, section, ali_size) 33 #print '%s \n' % flags 34 #print 'addr %s flags %s section %s ali_size %s' % (addr,flags,section,ali_size) 35 else : 36 break 37 38def parseTxt (): 39 ef = elfFile() 40 curr_func_name = None 41 skip_next_line = False 42 for line in ef.f_text: 43 #hack used to skip the ndks_boot struct, which looks like a function 44 if skip_next_line == True: 45 skip_next_line = False 46 continue 47 #ingore empty lines and the header 48 if line in ['\n','\r\n']: 49 continue 50 header = re.search('kernel\.elf:\s*file\s*format\s*elf32-littlearm',line) 51 header2 = re.search('Disassembly of section \..*:',line) 52 if header != None or header2 != None: 53 continue 54 #ndsk_boot is a strange function only used in bootstrapping 55 ndks = re.search('.*ndks_boot.*',line) 56 if ndks != None: 57 skip_next_line = True 58 continue 59 60 #a function looks like f0000088 <create_it_frame_cap>: 61 r = re.search('(?P<f_addr>.*) <(?P<f_name>.*)>:$',line) 62 if r != None: 63 #we are creating a new function 64 #print '%s: %s' % (r.group('f_name'),r.group('f_addr')) 65 curr_func_name = r.group('f_name') 66 if (not ef.elf_only) and curr_func_name in ef.asm_fs: 67 g_f = ef.asm_fs[curr_func_name] 68 else: 69 g_f = None 70 #print '%s not found in asm_fs' % curr_func_name 71 ef.funcs[curr_func_name] = elfFunc(curr_func_name, r.group('f_addr'),g_f) 72 elf_fun = ef.funcs[curr_func_name] 73 elf_fun.entry_addr = int(r.group('f_addr'),16); 74 elf_fun.lines = {} 75 76 else: 77 #check if this is a literal line 78 literal = re.search('(?P<addr>.*):\s*[a-f0-9]+\s*(?P<size>(\.word)|(\.short)|(\.byte))\s*(?P<value>0x[a-f0-9]+)$',line) 79 if literal != None: 80 if literal.group('size') == '.word': 81 size = 4 82 else: 83 assert False, '%s size undefined' % literal.group('size') 84 line_addr = int(literal.group('addr'),16) 85 ef.literals[line_addr] = (size,int(literal.group('value'),16)) 86 ef.addrs_to_f[line_addr] = curr_func_name 87 else: 88 #This is an instruction, 89 #extract the address, a line looks like f00000ac:>--e5801000 >--str>r1, [r0] 90 match = re.search('(?P<line_addr>.*):.*',line) 91 assert match !=None, line 92 line_addr = int(match.group('line_addr'),16) 93 elf_fun.lines[line_addr] = line 94 #remove everything after ; 95 96 line = line.split(';')[0] 97 line = line.rstrip(' \t\n\r') 98 ef.lines[line_addr] = line 99 ef.addrs_to_f[line_addr] = curr_func_name 100 101#is the mnemonic b ? bl, bx etc don't count 102#used to detect tail call 103def isDirectBranch(addr): 104 inst = elfFile().lines[addr] 105 match = re.search(r'[a-f0-9]+:\s*[a-f0-9]+\s+(b|bx)\s+.*',inst) 106 return match is not None 107 108def parseElf(dir_name): 109 ef = elfFile(dir_name) 110 parseTxt() 111 parseSymTab() 112 return ef 113 114 115