1#!/usr/bin/env python3
2
3# Copyright (C) 2020 Free Software Foundation, Inc.
4#
5# This file is part of GCC.
6#
7# GCC is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 3, or (at your option)
10# any later version.
11#
12# GCC is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with GCC; see the file COPYING.  If not, write to
19# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301, USA.
21
22# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23# and adds a skeleton ChangeLog file to the file. It does not try to be
24# too smart when parsing function names, but it produces a reasonable
25# approximation.
26#
27# Author: Martin Liska <mliska@suse.cz>
28
29import argparse
30import datetime
31import os
32import re
33import subprocess
34import sys
35from itertools import takewhile
36
37import requests
38
39from unidiff import PatchSet
40
41LINE_LIMIT = 100
42TAB_WIDTH = 8
43CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
44
45pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
46prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
47dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
48dg_regex = re.compile(r'{\s+dg-(error|warning)')
49pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
50identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
51comment_regex = re.compile(r'^\/\*')
52struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
53                          r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
54macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
55super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
56fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
57template_and_param_regex = re.compile(r'<[^<>]*>')
58md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
59bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
60               'include_fields=summary,component'
61
62function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
63
64# NB: Makefile.in isn't listed as it's not always generated.
65generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
66
67help_message = """\
68Generate ChangeLog template for PATCH.
69PATCH must be generated using diff(1)'s -up or -cp options
70(or their equivalent in git).
71"""
72
73script_folder = os.path.realpath(__file__)
74root = os.path.dirname(os.path.dirname(script_folder))
75
76firstpr = ''
77
78
79def find_changelog(path):
80    folder = os.path.split(path)[0]
81    while True:
82        if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
83            return folder
84        folder = os.path.dirname(folder)
85        if folder == '':
86            return folder
87    raise AssertionError()
88
89
90def extract_function_name(line):
91    if comment_regex.match(line):
92        return None
93    m = struct_regex.search(line)
94    if m:
95        # Struct declaration
96        return m.group(1) + ' ' + m.group(3)
97    m = macro_regex.search(line)
98    if m:
99        # Macro definition
100        return m.group(2)
101    m = super_macro_regex.search(line)
102    if m:
103        # Supermacro
104        return m.group(1)
105    m = fn_regex.search(line)
106    if m:
107        # Discard template and function parameters.
108        fn = m.group(1)
109        fn = re.sub(template_and_param_regex, '', fn)
110        return fn.rstrip()
111    return None
112
113
114def try_add_function(functions, line):
115    fn = extract_function_name(line)
116    if fn and fn not in functions:
117        functions.append(fn)
118    return bool(fn)
119
120
121def sort_changelog_files(changed_file):
122    return (changed_file.is_added_file, changed_file.is_removed_file)
123
124
125def get_pr_titles(prs):
126    output = []
127    for idx, pr in enumerate(prs):
128        pr_id = pr.split('/')[-1]
129        r = requests.get(bugzilla_url % pr_id)
130        bugs = r.json()['bugs']
131        if len(bugs) == 1:
132            prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
133            out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
134            if out not in output:
135                output.append(out)
136    if output:
137        output.append('')
138    return '\n'.join(output)
139
140
141def append_changelog_line(out, relative_path, text):
142    line = f'\t* {relative_path}:'
143    if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
144        out += f'{line} {text}\n'
145    else:
146        out += f'{line}\n'
147        out += f'\t{text}\n'
148    return out
149
150
151def get_rel_path_if_prefixed(path, folder):
152    if path.startswith(folder):
153        return path[len(folder):].lstrip('/')
154    else:
155        return path
156
157
158def generate_changelog(data, no_functions=False, fill_pr_titles=False,
159                       additional_prs=None):
160    changelogs = {}
161    changelog_list = []
162    prs = []
163    out = ''
164    diff = PatchSet(data)
165    global firstpr
166
167    if additional_prs:
168        for apr in additional_prs:
169            if not apr.startswith('PR ') and '/' in apr:
170                apr = 'PR ' + apr
171            if apr not in prs:
172                prs.append(apr)
173    for file in diff:
174        # skip files that can't be parsed
175        if file.path == '/dev/null':
176            continue
177        changelog = find_changelog(file.path)
178        if changelog not in changelogs:
179            changelogs[changelog] = []
180            changelog_list.append(changelog)
181        changelogs[changelog].append(file)
182
183        # Extract PR entries from newly added tests
184        if 'testsuite' in file.path and file.is_added_file:
185            # Only search first ten lines as later lines may
186            # contains commented code which a note that it
187            # has not been tested due to a certain PR or DR.
188            this_file_prs = []
189            for line in list(file)[0][0:10]:
190                m = pr_regex.search(line.value)
191                if m:
192                    pr = m.group('pr')
193                    if pr not in prs:
194                        prs.append(pr)
195                        this_file_prs.append(pr.split('/')[-1])
196                else:
197                    m = dr_regex.search(line.value)
198                    if m:
199                        dr = m.group('dr')
200                        if dr not in prs:
201                            prs.append(dr)
202                            this_file_prs.append(dr.split('/')[-1])
203                    elif dg_regex.search(line.value):
204                        # Found dg-warning/dg-error line
205                        break
206            # PR number in the file name
207            fname = os.path.basename(file.path)
208            m = pr_filename_regex.search(fname)
209            if m:
210                pr = m.group('pr')
211                pr2 = 'PR ' + pr
212                if pr not in this_file_prs and pr2 not in prs:
213                    prs.append(pr2)
214
215    if prs:
216        firstpr = prs[0]
217
218    if fill_pr_titles:
219        out += get_pr_titles(prs)
220
221    # print list of PR entries before ChangeLog entries
222    if prs:
223        if not out:
224            out += '\n'
225        for pr in prs:
226            out += '\t%s\n' % pr
227        out += '\n'
228
229    # sort ChangeLog so that 'testsuite' is at the end
230    for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
231        files = changelogs[changelog]
232        out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
233        out += '\n'
234        # new and deleted files should be at the end
235        for file in sorted(files, key=sort_changelog_files):
236            assert file.path.startswith(changelog)
237            in_tests = 'testsuite' in changelog or 'testsuite' in file.path
238            relative_path = get_rel_path_if_prefixed(file.path, changelog)
239            functions = []
240            if file.is_added_file:
241                msg = 'New test.' if in_tests else 'New file.'
242                out = append_changelog_line(out, relative_path, msg)
243            elif file.is_removed_file:
244                out = append_changelog_line(out, relative_path, 'Removed.')
245            elif hasattr(file, 'is_rename') and file.is_rename:
246                # A file can be theoretically moved to a location that
247                # belongs to a different ChangeLog.  Let user fix it.
248                #
249                # Since unidiff 0.7.0, path.file == path.target_file[2:],
250                # it used to be path.source_file[2:]
251                relative_path = get_rel_path_if_prefixed(file.source_file[2:],
252                                                         changelog)
253                out = append_changelog_line(out, relative_path, 'Moved to...')
254                new_path = get_rel_path_if_prefixed(file.target_file[2:],
255                                                    changelog)
256                out += f'\t* {new_path}: ...here.\n'
257            elif os.path.basename(file.path) in generated_files:
258                out += '\t* %s: Regenerate.\n' % (relative_path)
259                append_changelog_line(out, relative_path, 'Regenerate.')
260            else:
261                if not no_functions:
262                    for hunk in file:
263                        # Do not add function names for testsuite files
264                        extension = os.path.splitext(relative_path)[1]
265                        if not in_tests and extension in function_extensions:
266                            last_fn = None
267                            modified_visited = False
268                            success = False
269                            for line in hunk:
270                                m = identifier_regex.match(line.value)
271                                if line.is_added or line.is_removed:
272                                    # special-case definition in .md files
273                                    m2 = md_def_regex.match(line.value)
274                                    if extension == '.md' and m2:
275                                        fn = m2.group(1)
276                                        if fn not in functions:
277                                            functions.append(fn)
278                                            last_fn = None
279                                            success = True
280
281                                    if not line.value.strip():
282                                        continue
283                                    modified_visited = True
284                                    if m and try_add_function(functions,
285                                                              m.group(1)):
286                                        last_fn = None
287                                        success = True
288                                elif line.is_context:
289                                    if last_fn and modified_visited:
290                                        try_add_function(functions, last_fn)
291                                        last_fn = None
292                                        modified_visited = False
293                                        success = True
294                                    elif m:
295                                        last_fn = m.group(1)
296                                        modified_visited = False
297                            if not success:
298                                try_add_function(functions,
299                                                 hunk.section_header)
300                if functions:
301                    out += '\t* %s (%s):\n' % (relative_path, functions[0])
302                    for fn in functions[1:]:
303                        out += '\t(%s):\n' % fn
304                else:
305                    out += '\t* %s:\n' % relative_path
306        out += '\n'
307    return out
308
309
310def update_copyright(data):
311    current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
312    username = subprocess.check_output('git config user.name', shell=True,
313                                       encoding='utf8').strip()
314    email = subprocess.check_output('git config user.email', shell=True,
315                                    encoding='utf8').strip()
316
317    changelogs = set()
318    diff = PatchSet(data)
319
320    for file in diff:
321        changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
322        if changelog not in changelogs:
323            changelogs.add(changelog)
324            with open(changelog) as f:
325                content = f.read()
326            with open(changelog, 'w+') as f:
327                f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
328                f.write('\tUpdate copyright years.\n\n')
329                f.write(content)
330
331
332def skip_line_in_changelog(line):
333    if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
334        return False
335    return True
336
337
338if __name__ == '__main__':
339    parser = argparse.ArgumentParser(description=help_message)
340    parser.add_argument('input', nargs='?',
341                        help='Patch file (or missing, read standard input)')
342    parser.add_argument('-b', '--pr-numbers', action='store',
343                        type=lambda arg: arg.split(','), nargs='?',
344                        help='Add the specified PRs (comma separated)')
345    parser.add_argument('-s', '--no-functions', action='store_true',
346                        help='Do not generate function names in ChangeLogs')
347    parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
348                        help='Download title of mentioned PRs')
349    parser.add_argument('-d', '--directory',
350                        help='Root directory where to search for ChangeLog '
351                        'files')
352    parser.add_argument('-c', '--changelog',
353                        help='Append the ChangeLog to a git commit message '
354                             'file')
355    parser.add_argument('--update-copyright', action='store_true',
356                        help='Update copyright in ChangeLog files')
357    args = parser.parse_args()
358    if args.input == '-':
359        args.input = None
360    if args.directory:
361        root = args.directory
362
363    data = open(args.input) if args.input else sys.stdin
364    if args.update_copyright:
365        update_copyright(data)
366    else:
367        output = generate_changelog(data, args.no_functions,
368                                    args.fill_up_bug_titles, args.pr_numbers)
369        if args.changelog:
370            lines = open(args.changelog).read().split('\n')
371            start = list(takewhile(skip_line_in_changelog, lines))
372            end = lines[len(start):]
373            with open(args.changelog, 'w') as f:
374                if not start or not start[0]:
375                    # initial commit subject line 'component: [PRnnnnn]'
376                    m = prnum_regex.match(firstpr)
377                    if m:
378                        title = f'{m.group("comp")}: [PR{m.group("num")}]'
379                        start.insert(0, title)
380                if start:
381                    # append empty line
382                    if start[-1] != '':
383                        start.append('')
384                else:
385                    # append 2 empty lines
386                    start = 2 * ['']
387                f.write('\n'.join(start))
388                f.write('\n')
389                f.write(output)
390                f.write('\n'.join(end))
391        else:
392            print(output, end='')
393