1# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
2#===----------------------------------------------------------------------===##
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7#
8#===----------------------------------------------------------------------===##
9"""
10extract - A set of function that extract symbol lists from shared libraries.
11"""
12import distutils.spawn
13import os.path
14import sys
15import re
16
17import libcxx.util
18from libcxx.sym_check import util
19
20extract_ignore_names = ['_init', '_fini']
21
22class NMExtractor(object):
23    """
24    NMExtractor - Extract symbol lists from libraries using nm.
25    """
26
27    @staticmethod
28    def find_tool():
29        """
30        Search for the nm executable and return the path.
31        """
32        return distutils.spawn.find_executable('nm')
33
34    def __init__(self, static_lib):
35        """
36        Initialize the nm executable and flags that will be used to extract
37        symbols from shared libraries.
38        """
39        self.nm_exe = self.find_tool()
40        if self.nm_exe is None:
41            # ERROR no NM found
42            print("ERROR: Could not find nm")
43            sys.exit(1)
44        self.static_lib = static_lib
45        self.flags = ['-P', '-g']
46
47
48    def extract(self, lib):
49        """
50        Extract symbols from a library and return the results as a dict of
51        parsed symbols.
52        """
53        cmd = [self.nm_exe] + self.flags + [lib]
54        out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
55        if exit_code != 0:
56            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
57        fmt_syms = (self._extract_sym(l)
58                    for l in out.splitlines() if l.strip())
59        # Cast symbol to string.
60        final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
61        # Make unique and sort strings.
62        tmp_list = list(sorted(set(final_syms)))
63        # Cast string back to symbol.
64        return util.read_syms_from_list(tmp_list)
65
66    def _extract_sym(self, sym_str):
67        bits = sym_str.split()
68        # Everything we want has at least two columns.
69        if len(bits) < 2:
70            return None
71        new_sym = {
72            'name': bits[0],
73            'type': bits[1],
74            'is_defined': (bits[1].lower() != 'u')
75        }
76        new_sym['name'] = new_sym['name'].replace('@@', '@')
77        new_sym = self._transform_sym_type(new_sym)
78        # NM types which we want to save the size for.
79        if new_sym['type'] == 'OBJECT' and len(bits) > 3:
80            new_sym['size'] = int(bits[3], 16)
81        return new_sym
82
83    @staticmethod
84    def _want_sym(sym):
85        """
86        Check that s is a valid symbol that we want to keep.
87        """
88        if sym is None or len(sym) < 2:
89            return False
90        if sym['name'] in extract_ignore_names:
91            return False
92        bad_types = ['t', 'b', 'r', 'd', 'w']
93        return (sym['type'] not in bad_types
94                and sym['name'] not in ['__bss_start', '_end', '_edata'])
95
96    @staticmethod
97    def _transform_sym_type(sym):
98        """
99        Map the nm single letter output for type to either FUNC or OBJECT.
100        If the type is not recognized it is left unchanged.
101        """
102        func_types = ['T', 'W']
103        obj_types = ['B', 'D', 'R', 'V', 'S']
104        if sym['type'] in func_types:
105            sym['type'] = 'FUNC'
106        elif sym['type'] in obj_types:
107            sym['type'] = 'OBJECT'
108        return sym
109
110class ReadElfExtractor(object):
111    """
112    ReadElfExtractor - Extract symbol lists from libraries using readelf.
113    """
114
115    @staticmethod
116    def find_tool():
117        """
118        Search for the readelf executable and return the path.
119        """
120        return distutils.spawn.find_executable('readelf')
121
122    def __init__(self, static_lib):
123        """
124        Initialize the readelf executable and flags that will be used to
125        extract symbols from shared libraries.
126        """
127        self.tool = self.find_tool()
128        if self.tool is None:
129            # ERROR no NM found
130            print("ERROR: Could not find readelf")
131            sys.exit(1)
132        # TODO: Support readelf for reading symbols from archives
133        assert not static_lib and "RealElf does not yet support static libs"
134        self.flags = ['--wide', '--symbols']
135
136    def extract(self, lib):
137        """
138        Extract symbols from a library and return the results as a dict of
139        parsed symbols.
140        """
141        cmd = [self.tool] + self.flags + [lib]
142        out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
143        if exit_code != 0:
144            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
145        dyn_syms = self.get_dynsym_table(out)
146        return self.process_syms(dyn_syms)
147
148    def process_syms(self, sym_list):
149        new_syms = []
150        for s in sym_list:
151            parts = s.split()
152            if not parts:
153                continue
154            assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
155            if len(parts) == 7:
156                continue
157            new_sym = {
158                'name': parts[7],
159                'size': int(parts[2]),
160                'type': parts[3],
161                'is_defined': (parts[6] != 'UND')
162            }
163            assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE', 'TLS']
164            if new_sym['name'] in extract_ignore_names:
165                continue
166            if new_sym['type'] == 'NOTYPE':
167                continue
168            if new_sym['type'] == 'FUNC':
169                del new_sym['size']
170            new_syms += [new_sym]
171        return new_syms
172
173    def get_dynsym_table(self, out):
174        lines = out.splitlines()
175        start = -1
176        end = -1
177        for i in range(len(lines)):
178            if lines[i].startswith("Symbol table '.dynsym'"):
179                start = i + 2
180            if start != -1 and end == -1 and not lines[i].strip():
181                end = i + 1
182        assert start != -1
183        if end == -1:
184            end = len(lines)
185        return lines[start:end]
186
187
188def extract_symbols(lib_file, static_lib=None):
189    """
190    Extract and return a list of symbols extracted from a static or dynamic
191    library. The symbols are extracted using NM or readelf. They are then
192    filtered and formated. Finally they symbols are made unique.
193    """
194    if static_lib is None:
195        _, ext = os.path.splitext(lib_file)
196        static_lib = True if ext in ['.a'] else False
197    if ReadElfExtractor.find_tool() and not static_lib:
198        extractor = ReadElfExtractor(static_lib=static_lib)
199    else:
200        extractor = NMExtractor(static_lib=static_lib)
201    return extractor.extract(lib_file)
202