1256809Sian# -*- coding: utf-8 -*- 2256809Sian# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3256809Sian# See https://llvm.org/LICENSE.txt for license information. 4256809Sian# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5256809Sian""" This module is responsible to capture the compiler invocation of any 6256809Sianbuild process. The result of that should be a compilation database. 7256809Sian 8256809SianThis implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES 9256809Sianmechanisms provided by the dynamic linker. The related library is implemented 10256809Sianin C language and can be found under 'libear' directory. 11256809Sian 12256809SianThe 'libear' library is capturing all child process creation and logging the 13256809Sianrelevant information about it into separate files in a specified directory. 14256809SianThe parameter of this process is the output directory name, where the report 15256809Sianfiles shall be placed. This parameter is passed as an environment variable. 16256809Sian 17256809SianThe module also implements compiler wrappers to intercept the compiler calls. 18256809Sian 19256809SianThe module implements the build command execution and the post-processing of 20256809Sianthe output files, which will condensates into a compilation database. """ 21256809Sian 22256809Sianimport sys 23256809Sianimport os 24256809Sianimport os.path 25256809Sianimport re 26256809Sianimport itertools 27256809Sianimport json 28256809Sianimport glob 29256809Sianimport logging 30256809Sianfrom libear import build_libear, TemporaryDirectory 31257393Sianfrom libscanbuild import command_entry_point, compiler_wrapper, \ 32256809Sian wrapper_environment, run_command, run_build 33256809Sianfrom libscanbuild import duplicate_check 34256809Sianfrom libscanbuild.compilation import split_command 35256809Sianfrom libscanbuild.arguments import parse_args_for_intercept_build 36256809Sianfrom libscanbuild.shell import encode, decode 37256809Sian 38256809Sian__all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper'] 39256809Sian 40256809SianGS = chr(0x1d) 41256809SianRS = chr(0x1e) 42256809SianUS = chr(0x1f) 43256809Sian 44264204SianCOMPILER_WRAPPER_CC = 'intercept-cc' 45264204SianCOMPILER_WRAPPER_CXX = 'intercept-c++' 46264204SianTRACE_FILE_EXTENSION = '.cmd' # same as in ear.c 47256809SianWRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'}) 48256809Sian 49256809Sian 50264251Srpaulo@command_entry_point 51256809Siandef intercept_build(): 52256809Sian """ Entry point for 'intercept-build' command. """ 53256809Sian 54256809Sian args = parse_args_for_intercept_build() 55256809Sian return capture(args) 56256809Sian 57256809Sian 58256809Siandef capture(args): 59256809Sian """ The entry point of build command interception. """ 60256809Sian 61256809Sian def post_processing(commands): 62256809Sian """ To make a compilation database, it needs to filter out commands 63256809Sian which are not compiler calls. Needs to find the source file name 64256809Sian from the arguments. And do shell escaping on the command. 65256809Sian 66256809Sian To support incremental builds, it is desired to read elements from 67256809Sian an existing compilation database from a previous run. These elements 68256809Sian shall be merged with the new elements. """ 69256809Sian 70256809Sian # create entries from the current run 71256809Sian current = itertools.chain.from_iterable( 72256809Sian # creates a sequence of entry generators from an exec, 73256809Sian format_entry(command) for command in commands) 74256809Sian # read entries from previous run 75256809Sian if 'append' in args and args.append and os.path.isfile(args.cdb): 76256809Sian with open(args.cdb) as handle: 77256809Sian previous = iter(json.load(handle)) 78256809Sian else: 79256809Sian previous = iter([]) 80256809Sian # filter out duplicate entries from both 81256809Sian duplicate = duplicate_check(entry_hash) 82256809Sian return (entry 83256809Sian for entry in itertools.chain(previous, current) 84256809Sian if os.path.exists(entry['file']) and not duplicate(entry)) 85256809Sian 86256809Sian with TemporaryDirectory(prefix='intercept-') as tmp_dir: 87256809Sian # run the build command 88256809Sian environment = setup_environment(args, tmp_dir) 89256809Sian exit_code = run_build(args.build, env=environment) 90256809Sian # read the intercepted exec calls 91256809Sian exec_traces = itertools.chain.from_iterable( 92256809Sian parse_exec_trace(os.path.join(tmp_dir, filename)) 93256809Sian for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) 94256809Sian # do post processing 95257393Sian entries = post_processing(exec_traces) 96257393Sian # dump the compilation database 97257393Sian with open(args.cdb, 'w+') as handle: 98257393Sian json.dump(list(entries), handle, sort_keys=True, indent=4) 99257393Sian return exit_code 100257393Sian 101256809Sian 102256809Siandef setup_environment(args, destination): 103256809Sian """ Sets up the environment for the build command. 104256809Sian 105256809Sian It sets the required environment variables and execute the given command. 106256809Sian The exec calls will be logged by the 'libear' preloaded library or by the 107256809Sian 'wrapper' programs. """ 108256809Sian 109262725Simp c_compiler = args.cc if 'cc' in args else 'cc' 110256809Sian cxx_compiler = args.cxx if 'cxx' in args else 'c++' 111256809Sian 112256809Sian libear_path = None if args.override_compiler or is_preload_disabled( 113256809Sian sys.platform) else build_libear(c_compiler, destination) 114256809Sian 115256809Sian environment = dict(os.environ) 116256809Sian environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) 117256809Sian 118256809Sian if not libear_path: 119256809Sian logging.debug('intercept gonna use compiler wrappers') 120256809Sian environment.update(wrapper_environment(args)) 121256809Sian environment.update({ 122256809Sian 'CC': COMPILER_WRAPPER_CC, 123256809Sian 'CXX': COMPILER_WRAPPER_CXX 124256809Sian }) 125256809Sian elif sys.platform == 'darwin': 126256809Sian logging.debug('intercept gonna preload libear on OSX') 127256809Sian environment.update({ 128256809Sian 'DYLD_INSERT_LIBRARIES': libear_path, 129256809Sian 'DYLD_FORCE_FLAT_NAMESPACE': '1' 130256809Sian }) 131256809Sian else: 132256809Sian logging.debug('intercept gonna preload libear on UNIX') 133256809Sian environment.update({'LD_PRELOAD': libear_path}) 134256809Sian 135 return environment 136 137 138@command_entry_point 139def intercept_compiler_wrapper(): 140 """ Entry point for `intercept-cc` and `intercept-c++`. """ 141 142 return compiler_wrapper(intercept_compiler_wrapper_impl) 143 144 145def intercept_compiler_wrapper_impl(_, execution): 146 """ Implement intercept compiler wrapper functionality. 147 148 It does generate execution report into target directory. 149 The target directory name is from environment variables. """ 150 151 message_prefix = 'execution report might be incomplete: %s' 152 153 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') 154 if not target_dir: 155 logging.warning(message_prefix, 'missing target directory') 156 return 157 # write current execution info to the pid file 158 try: 159 target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION 160 target_file = os.path.join(target_dir, target_file_name) 161 logging.debug('writing execution report to: %s', target_file) 162 write_exec_trace(target_file, execution) 163 except IOError: 164 logging.warning(message_prefix, 'io problem') 165 166 167def write_exec_trace(filename, entry): 168 """ Write execution report file. 169 170 This method shall be sync with the execution report writer in interception 171 library. The entry in the file is a JSON objects. 172 173 :param filename: path to the output execution trace file, 174 :param entry: the Execution object to append to that file. """ 175 176 with open(filename, 'ab') as handler: 177 pid = str(entry.pid) 178 command = US.join(entry.cmd) + US 179 content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS 180 handler.write(content.encode('utf-8')) 181 182 183def parse_exec_trace(filename): 184 """ Parse the file generated by the 'libear' preloaded library. 185 186 Given filename points to a file which contains the basic report 187 generated by the interception library or wrapper command. A single 188 report file _might_ contain multiple process creation info. """ 189 190 logging.debug('parse exec trace file: %s', filename) 191 with open(filename, 'r') as handler: 192 content = handler.read() 193 for group in filter(bool, content.split(GS)): 194 records = group.split(RS) 195 yield { 196 'pid': records[0], 197 'ppid': records[1], 198 'function': records[2], 199 'directory': records[3], 200 'command': records[4].split(US)[:-1] 201 } 202 203 204def format_entry(exec_trace): 205 """ Generate the desired fields for compilation database entries. """ 206 207 def abspath(cwd, name): 208 """ Create normalized absolute path from input filename. """ 209 fullname = name if os.path.isabs(name) else os.path.join(cwd, name) 210 return os.path.normpath(fullname) 211 212 logging.debug('format this command: %s', exec_trace['command']) 213 compilation = split_command(exec_trace['command']) 214 if compilation: 215 for source in compilation.files: 216 compiler = 'c++' if compilation.compiler == 'c++' else 'cc' 217 command = [compiler, '-c'] + compilation.flags + [source] 218 logging.debug('formated as: %s', command) 219 yield { 220 'directory': exec_trace['directory'], 221 'command': encode(command), 222 'file': abspath(exec_trace['directory'], source) 223 } 224 225 226def is_preload_disabled(platform): 227 """ Library-based interposition will fail silently if SIP is enabled, 228 so this should be detected. You can detect whether SIP is enabled on 229 Darwin by checking whether (1) there is a binary called 'csrutil' in 230 the path and, if so, (2) whether the output of executing 'csrutil status' 231 contains 'System Integrity Protection status: enabled'. 232 233 :param platform: name of the platform (returned by sys.platform), 234 :return: True if library preload will fail by the dynamic linker. """ 235 236 if platform in WRAPPER_ONLY_PLATFORMS: 237 return True 238 elif platform == 'darwin': 239 command = ['csrutil', 'status'] 240 pattern = re.compile(r'System Integrity Protection status:\s+enabled') 241 try: 242 return any(pattern.match(line) for line in run_command(command)) 243 except: 244 return False 245 else: 246 return False 247 248 249def entry_hash(entry): 250 """ Implement unique hash method for compilation database entries. """ 251 252 # For faster lookup in set filename is reverted 253 filename = entry['file'][::-1] 254 # For faster lookup in set directory is reverted 255 directory = entry['directory'][::-1] 256 # On OS X the 'cc' and 'c++' compilers are wrappers for 257 # 'clang' therefore both call would be logged. To avoid 258 # this the hash does not contain the first word of the 259 # command. 260 command = ' '.join(decode(entry['command'])[1:]) 261 262 return '<>'.join([filename, directory, command]) 263