1#!/usr/bin/env python3 2 3# Copyright (C) 2020 Free Software Foundation, Inc. 4# 5# This file is part of GCC. 6# 7# GCC is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 3, or (at your option) 10# any later version. 11# 12# GCC is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with GCC; see the file COPYING. If not, write to 19# the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20# Boston, MA 02110-1301, USA. 21 22# This script parses a .diff file generated with 'diff -up' or 'diff -cp' 23# and adds a skeleton ChangeLog file to the file. It does not try to be 24# too smart when parsing function names, but it produces a reasonable 25# approximation. 26# 27# Author: Martin Liska <mliska@suse.cz> 28 29import argparse 30import datetime 31import os 32import re 33import subprocess 34import sys 35from itertools import takewhile 36 37import requests 38 39from unidiff import PatchSet 40 41LINE_LIMIT = 100 42TAB_WIDTH = 8 43CO_AUTHORED_BY_PREFIX = 'co-authored-by: ' 44 45pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)') 46prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)') 47dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)') 48dg_regex = re.compile(r'{\s+dg-(error|warning)') 49pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})') 50identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)') 51comment_regex = re.compile(r'^\/\*') 52struct_regex = re.compile(r'^(class|struct|union|enum)\s+' 53 r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)') 54macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)') 55super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)') 56fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]') 57template_and_param_regex = re.compile(r'<[^<>]*>') 58md_def_regex = re.compile(r'\(define.*\s+"(.*)"') 59bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \ 60 'include_fields=summary,component' 61 62function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'} 63 64# NB: Makefile.in isn't listed as it's not always generated. 65generated_files = {'aclocal.m4', 'config.h.in', 'configure'} 66 67help_message = """\ 68Generate ChangeLog template for PATCH. 69PATCH must be generated using diff(1)'s -up or -cp options 70(or their equivalent in git). 71""" 72 73script_folder = os.path.realpath(__file__) 74root = os.path.dirname(os.path.dirname(script_folder)) 75 76firstpr = '' 77 78 79def find_changelog(path): 80 folder = os.path.split(path)[0] 81 while True: 82 if os.path.exists(os.path.join(root, folder, 'ChangeLog')): 83 return folder 84 folder = os.path.dirname(folder) 85 if folder == '': 86 return folder 87 raise AssertionError() 88 89 90def extract_function_name(line): 91 if comment_regex.match(line): 92 return None 93 m = struct_regex.search(line) 94 if m: 95 # Struct declaration 96 return m.group(1) + ' ' + m.group(3) 97 m = macro_regex.search(line) 98 if m: 99 # Macro definition 100 return m.group(2) 101 m = super_macro_regex.search(line) 102 if m: 103 # Supermacro 104 return m.group(1) 105 m = fn_regex.search(line) 106 if m: 107 # Discard template and function parameters. 108 fn = m.group(1) 109 fn = re.sub(template_and_param_regex, '', fn) 110 return fn.rstrip() 111 return None 112 113 114def try_add_function(functions, line): 115 fn = extract_function_name(line) 116 if fn and fn not in functions: 117 functions.append(fn) 118 return bool(fn) 119 120 121def sort_changelog_files(changed_file): 122 return (changed_file.is_added_file, changed_file.is_removed_file) 123 124 125def get_pr_titles(prs): 126 output = [] 127 for idx, pr in enumerate(prs): 128 pr_id = pr.split('/')[-1] 129 r = requests.get(bugzilla_url % pr_id) 130 bugs = r.json()['bugs'] 131 if len(bugs) == 1: 132 prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id) 133 out = '%s - %s\n' % (prs[idx], bugs[0]['summary']) 134 if out not in output: 135 output.append(out) 136 if output: 137 output.append('') 138 return '\n'.join(output) 139 140 141def append_changelog_line(out, relative_path, text): 142 line = f'\t* {relative_path}:' 143 if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT: 144 out += f'{line} {text}\n' 145 else: 146 out += f'{line}\n' 147 out += f'\t{text}\n' 148 return out 149 150 151def get_rel_path_if_prefixed(path, folder): 152 if path.startswith(folder): 153 return path[len(folder):].lstrip('/') 154 else: 155 return path 156 157 158def generate_changelog(data, no_functions=False, fill_pr_titles=False, 159 additional_prs=None): 160 changelogs = {} 161 changelog_list = [] 162 prs = [] 163 out = '' 164 diff = PatchSet(data) 165 global firstpr 166 167 if additional_prs: 168 for apr in additional_prs: 169 if not apr.startswith('PR ') and '/' in apr: 170 apr = 'PR ' + apr 171 if apr not in prs: 172 prs.append(apr) 173 for file in diff: 174 # skip files that can't be parsed 175 if file.path == '/dev/null': 176 continue 177 changelog = find_changelog(file.path) 178 if changelog not in changelogs: 179 changelogs[changelog] = [] 180 changelog_list.append(changelog) 181 changelogs[changelog].append(file) 182 183 # Extract PR entries from newly added tests 184 if 'testsuite' in file.path and file.is_added_file: 185 # Only search first ten lines as later lines may 186 # contains commented code which a note that it 187 # has not been tested due to a certain PR or DR. 188 this_file_prs = [] 189 for line in list(file)[0][0:10]: 190 m = pr_regex.search(line.value) 191 if m: 192 pr = m.group('pr') 193 if pr not in prs: 194 prs.append(pr) 195 this_file_prs.append(pr.split('/')[-1]) 196 else: 197 m = dr_regex.search(line.value) 198 if m: 199 dr = m.group('dr') 200 if dr not in prs: 201 prs.append(dr) 202 this_file_prs.append(dr.split('/')[-1]) 203 elif dg_regex.search(line.value): 204 # Found dg-warning/dg-error line 205 break 206 # PR number in the file name 207 fname = os.path.basename(file.path) 208 m = pr_filename_regex.search(fname) 209 if m: 210 pr = m.group('pr') 211 pr2 = 'PR ' + pr 212 if pr not in this_file_prs and pr2 not in prs: 213 prs.append(pr2) 214 215 if prs: 216 firstpr = prs[0] 217 218 if fill_pr_titles: 219 out += get_pr_titles(prs) 220 221 # print list of PR entries before ChangeLog entries 222 if prs: 223 if not out: 224 out += '\n' 225 for pr in prs: 226 out += '\t%s\n' % pr 227 out += '\n' 228 229 # sort ChangeLog so that 'testsuite' is at the end 230 for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x): 231 files = changelogs[changelog] 232 out += '%s:\n' % os.path.join(changelog, 'ChangeLog') 233 out += '\n' 234 # new and deleted files should be at the end 235 for file in sorted(files, key=sort_changelog_files): 236 assert file.path.startswith(changelog) 237 in_tests = 'testsuite' in changelog or 'testsuite' in file.path 238 relative_path = get_rel_path_if_prefixed(file.path, changelog) 239 functions = [] 240 if file.is_added_file: 241 msg = 'New test.' if in_tests else 'New file.' 242 out = append_changelog_line(out, relative_path, msg) 243 elif file.is_removed_file: 244 out = append_changelog_line(out, relative_path, 'Removed.') 245 elif hasattr(file, 'is_rename') and file.is_rename: 246 # A file can be theoretically moved to a location that 247 # belongs to a different ChangeLog. Let user fix it. 248 # 249 # Since unidiff 0.7.0, path.file == path.target_file[2:], 250 # it used to be path.source_file[2:] 251 relative_path = get_rel_path_if_prefixed(file.source_file[2:], 252 changelog) 253 out = append_changelog_line(out, relative_path, 'Moved to...') 254 new_path = get_rel_path_if_prefixed(file.target_file[2:], 255 changelog) 256 out += f'\t* {new_path}: ...here.\n' 257 elif os.path.basename(file.path) in generated_files: 258 out += '\t* %s: Regenerate.\n' % (relative_path) 259 append_changelog_line(out, relative_path, 'Regenerate.') 260 else: 261 if not no_functions: 262 for hunk in file: 263 # Do not add function names for testsuite files 264 extension = os.path.splitext(relative_path)[1] 265 if not in_tests and extension in function_extensions: 266 last_fn = None 267 modified_visited = False 268 success = False 269 for line in hunk: 270 m = identifier_regex.match(line.value) 271 if line.is_added or line.is_removed: 272 # special-case definition in .md files 273 m2 = md_def_regex.match(line.value) 274 if extension == '.md' and m2: 275 fn = m2.group(1) 276 if fn not in functions: 277 functions.append(fn) 278 last_fn = None 279 success = True 280 281 if not line.value.strip(): 282 continue 283 modified_visited = True 284 if m and try_add_function(functions, 285 m.group(1)): 286 last_fn = None 287 success = True 288 elif line.is_context: 289 if last_fn and modified_visited: 290 try_add_function(functions, last_fn) 291 last_fn = None 292 modified_visited = False 293 success = True 294 elif m: 295 last_fn = m.group(1) 296 modified_visited = False 297 if not success: 298 try_add_function(functions, 299 hunk.section_header) 300 if functions: 301 out += '\t* %s (%s):\n' % (relative_path, functions[0]) 302 for fn in functions[1:]: 303 out += '\t(%s):\n' % fn 304 else: 305 out += '\t* %s:\n' % relative_path 306 out += '\n' 307 return out 308 309 310def update_copyright(data): 311 current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d') 312 username = subprocess.check_output('git config user.name', shell=True, 313 encoding='utf8').strip() 314 email = subprocess.check_output('git config user.email', shell=True, 315 encoding='utf8').strip() 316 317 changelogs = set() 318 diff = PatchSet(data) 319 320 for file in diff: 321 changelog = os.path.join(find_changelog(file.path), 'ChangeLog') 322 if changelog not in changelogs: 323 changelogs.add(changelog) 324 with open(changelog) as f: 325 content = f.read() 326 with open(changelog, 'w+') as f: 327 f.write(f'{current_timestamp} {username} <{email}>\n\n') 328 f.write('\tUpdate copyright years.\n\n') 329 f.write(content) 330 331 332def skip_line_in_changelog(line): 333 if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'): 334 return False 335 return True 336 337 338if __name__ == '__main__': 339 parser = argparse.ArgumentParser(description=help_message) 340 parser.add_argument('input', nargs='?', 341 help='Patch file (or missing, read standard input)') 342 parser.add_argument('-b', '--pr-numbers', action='store', 343 type=lambda arg: arg.split(','), nargs='?', 344 help='Add the specified PRs (comma separated)') 345 parser.add_argument('-s', '--no-functions', action='store_true', 346 help='Do not generate function names in ChangeLogs') 347 parser.add_argument('-p', '--fill-up-bug-titles', action='store_true', 348 help='Download title of mentioned PRs') 349 parser.add_argument('-d', '--directory', 350 help='Root directory where to search for ChangeLog ' 351 'files') 352 parser.add_argument('-c', '--changelog', 353 help='Append the ChangeLog to a git commit message ' 354 'file') 355 parser.add_argument('--update-copyright', action='store_true', 356 help='Update copyright in ChangeLog files') 357 args = parser.parse_args() 358 if args.input == '-': 359 args.input = None 360 if args.directory: 361 root = args.directory 362 363 data = open(args.input) if args.input else sys.stdin 364 if args.update_copyright: 365 update_copyright(data) 366 else: 367 output = generate_changelog(data, args.no_functions, 368 args.fill_up_bug_titles, args.pr_numbers) 369 if args.changelog: 370 lines = open(args.changelog).read().split('\n') 371 start = list(takewhile(skip_line_in_changelog, lines)) 372 end = lines[len(start):] 373 with open(args.changelog, 'w') as f: 374 if not start or not start[0]: 375 # initial commit subject line 'component: [PRnnnnn]' 376 m = prnum_regex.match(firstpr) 377 if m: 378 title = f'{m.group("comp")}: [PR{m.group("num")}]' 379 start.insert(0, title) 380 if start: 381 # append empty line 382 if start[-1] != '': 383 start.append('') 384 else: 385 # append 2 empty lines 386 start = 2 * [''] 387 f.write('\n'.join(start)) 388 f.write('\n') 389 f.write(output) 390 f.write('\n'.join(end)) 391 else: 392 print(output, end='') 393