1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (C) Google LLC, 2018
5#
6# Author: Tom Roeder <tmroeder@google.com>
7# Ported and modified for U-Boot by Joao Marcos Costa <jmcosta944@gmail.com>
8# Briefly documented at doc/build/gen_compile_commands.rst
9#
10"""A tool for generating compile_commands.json in U-Boot."""
11
12import argparse
13import json
14import logging
15import os
16import re
17import subprocess
18import sys
19
20_DEFAULT_OUTPUT = 'compile_commands.json'
21_DEFAULT_LOG_LEVEL = 'WARNING'
22
23_FILENAME_PATTERN = r'^\..*\.cmd$'
24_LINE_PATTERN = r'^(saved)?cmd_[^ ]*\.o := (?P<command_prefix>.* )(?P<file_path>[^ ]*\.[cS]) *(;|$)'
25_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
26# The tools/ directory adopts a different build system, and produces .cmd
27# files in a different format. Do not support it.
28_EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
29
30def parse_arguments():
31    """Sets up and parses command-line arguments.
32
33    Returns:
34        log_level: A logging level to filter log output.
35        directory: The work directory where the objects were built.
36        ar: Command used for parsing .a archives.
37        output: Where to write the compile-commands JSON file.
38        paths: The list of files/directories to handle to find .cmd files.
39    """
40    usage = 'Creates a compile_commands.json database from U-Boot .cmd files'
41    parser = argparse.ArgumentParser(description=usage)
42
43    directory_help = ('specify the output directory used for the U-Boot build '
44                      '(defaults to the working directory)')
45    parser.add_argument('-d', '--directory', type=str, default='.',
46                        help=directory_help)
47
48    output_help = ('path to the output command database (defaults to ' +
49                   _DEFAULT_OUTPUT + ')')
50    parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT,
51                        help=output_help)
52
53    log_level_help = ('the level of log messages to produce (defaults to ' +
54                      _DEFAULT_LOG_LEVEL + ')')
55    parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS,
56                        default=_DEFAULT_LOG_LEVEL, help=log_level_help)
57
58    ar_help = 'command used for parsing .a archives'
59    parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help)
60
61    paths_help = ('directories to search or files to parse '
62                  '(files should be *.o, *.a, or modules.order). '
63                  'If nothing is specified, the current directory is searched')
64    parser.add_argument('paths', type=str, nargs='*', help=paths_help)
65
66    args = parser.parse_args()
67
68    return (args.log_level,
69            os.path.realpath(args.directory),
70            args.output,
71            args.ar,
72            args.paths if len(args.paths) > 0 else [args.directory])
73
74
75def cmdfiles_in_dir(directory):
76    """Generate the iterator of .cmd files found under the directory.
77
78    Walk under the given directory, and yield every .cmd file found.
79
80    Args:
81        directory: The directory to search for .cmd files.
82
83    Yields:
84        The path to a .cmd file.
85    """
86
87    filename_matcher = re.compile(_FILENAME_PATTERN)
88    exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
89
90    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
91        # Prune unwanted directories.
92        if dirpath in exclude_dirs:
93            dirnames[:] = []
94            continue
95
96        for filename in filenames:
97            if filename_matcher.match(filename):
98                yield os.path.join(dirpath, filename)
99
100
101def to_cmdfile(path):
102    """Return the path of .cmd file used for the given build artifact
103
104    Args:
105        Path: file path
106
107    Returns:
108        The path to .cmd file
109    """
110    dir, base = os.path.split(path)
111    return os.path.join(dir, '.' + base + '.cmd')
112
113
114def cmdfiles_for_a(archive, ar):
115    """Generate the iterator of .cmd files associated with the archive.
116
117    Parse the given archive, and yield every .cmd file used to build it.
118
119    Args:
120        archive: The archive to parse
121
122    Yields:
123        The path to every .cmd file found
124    """
125    for obj in subprocess.check_output([ar, '-t', archive]).decode().split():
126        yield to_cmdfile(obj)
127
128
129def cmdfiles_for_modorder(modorder):
130    """Generate the iterator of .cmd files associated with the modules.order.
131
132    Parse the given modules.order, and yield every .cmd file used to build the
133    contained modules.
134
135    Args:
136        modorder: The modules.order file to parse
137
138    Yields:
139        The path to every .cmd file found
140    """
141    with open(modorder) as f:
142        for line in f:
143            obj = line.rstrip()
144            base, ext = os.path.splitext(obj)
145            if ext != '.o':
146                sys.exit('{}: module path must end with .o'.format(obj))
147            mod = base + '.mod'
148            # Read from *.mod, to get a list of objects that compose the module.
149            with open(mod) as m:
150                for mod_line in m:
151                    yield to_cmdfile(mod_line.rstrip())
152
153
154def process_line(root_directory, command_prefix, file_path):
155    """Extracts information from a .cmd line and creates an entry from it.
156
157    Args:
158        root_directory: The directory that was searched for .cmd files. Usually
159            used directly in the "directory" entry in compile_commands.json.
160        command_prefix: The extracted command line, up to the last element.
161        file_path: The .c file from the end of the extracted command.
162            Usually relative to root_directory, but sometimes absolute.
163
164    Returns:
165        An entry to append to compile_commands.
166
167    Raises:
168        ValueError: Could not find the extracted file based on file_path and
169            root_directory or file_directory.
170    """
171    # The .cmd files are intended to be included directly by Make, so they
172    # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the
173    # kernel version). The compile_commands.json file is not interepreted
174    # by Make, so this code replaces the escaped version with '#'.
175    prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#')
176
177    # Return the canonical path, eliminating any symbolic links encountered in the path.
178    abs_path = os.path.realpath(os.path.join(root_directory, file_path))
179    if not os.path.exists(abs_path):
180        raise ValueError('File %s not found' % abs_path)
181    return {
182        'directory': root_directory,
183        'file': abs_path,
184        'command': prefix + file_path,
185    }
186
187
188def main():
189    """Walks through the directory and finds and parses .cmd files."""
190    log_level, directory, output, ar, paths = parse_arguments()
191
192    level = getattr(logging, log_level)
193    logging.basicConfig(format='%(levelname)s: %(message)s', level=level)
194
195    line_matcher = re.compile(_LINE_PATTERN)
196
197    compile_commands = []
198
199    for path in paths:
200        # If 'path' is a directory, handle all .cmd files under it.
201        # Otherwise, handle .cmd files associated with the file.
202        # built-in objects are linked via vmlinux.a
203        # Modules are listed in modules.order.
204        if os.path.isdir(path):
205            cmdfiles = cmdfiles_in_dir(path)
206        elif path.endswith('.a'):
207            cmdfiles = cmdfiles_for_a(path, ar)
208        elif path.endswith('modules.order'):
209            cmdfiles = cmdfiles_for_modorder(path)
210        else:
211            sys.exit('{}: unknown file type'.format(path))
212
213        for cmdfile in cmdfiles:
214            with open(cmdfile, 'rt') as f:
215                result = line_matcher.match(f.readline())
216                if result:
217                    try:
218                        entry = process_line(directory, result.group('command_prefix'),
219                                             result.group('file_path'))
220                        compile_commands.append(entry)
221                    except ValueError as err:
222                        logging.info('Could not add line from %s: %s',
223                                     cmdfile, err)
224
225    with open(output, 'wt') as f:
226        json.dump(sorted(compile_commands, key=lambda x: x["file"]), f, indent=2, sort_keys=True)
227
228
229if __name__ == '__main__':
230    main()
231