1#!/usr/bin/env python3
2
3# Copyright (C) 2020 Free Software Foundation, Inc.
4#
5# This file is part of GCC.
6#
7# GCC is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 3, or (at your option)
10# any later version.
11#
12# GCC is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with GCC; see the file COPYING.  If not, write to
19# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20# Boston, MA 02110-1301, USA.
21
22# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23# and adds a skeleton ChangeLog file to the file. It does not try to be
24# too smart when parsing function names, but it produces a reasonable
25# approximation.
26#
27# Author: Martin Liska <mliska@suse.cz>
28
29import argparse
30import os
31import re
32import sys
33from itertools import takewhile
34
35import requests
36
37from unidiff import PatchSet
38
39pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
40dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
41identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
42comment_regex = re.compile(r'^\/\*')
43struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
44                          r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
45macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
46super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
47fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
48template_and_param_regex = re.compile(r'<[^<>]*>')
49bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
50               'include_fields=summary'
51
52function_extensions = set(['.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def'])
53
54help_message = """\
55Generate ChangeLog template for PATCH.
56PATCH must be generated using diff(1)'s -up or -cp options
57(or their equivalent in git).
58"""
59
60script_folder = os.path.realpath(__file__)
61gcc_root = os.path.dirname(os.path.dirname(script_folder))
62
63
64def find_changelog(path):
65    folder = os.path.split(path)[0]
66    while True:
67        if os.path.exists(os.path.join(gcc_root, folder, 'ChangeLog')):
68            return folder
69        folder = os.path.dirname(folder)
70        if folder == '':
71            return folder
72    raise AssertionError()
73
74
75def extract_function_name(line):
76    if comment_regex.match(line):
77        return None
78    m = struct_regex.search(line)
79    if m:
80        # Struct declaration
81        return m.group(1) + ' ' + m.group(3)
82    m = macro_regex.search(line)
83    if m:
84        # Macro definition
85        return m.group(2)
86    m = super_macro_regex.search(line)
87    if m:
88        # Supermacro
89        return m.group(1)
90    m = fn_regex.search(line)
91    if m:
92        # Discard template and function parameters.
93        fn = m.group(1)
94        fn = re.sub(template_and_param_regex, '', fn)
95        return fn.rstrip()
96    return None
97
98
99def try_add_function(functions, line):
100    fn = extract_function_name(line)
101    if fn and fn not in functions:
102        functions.append(fn)
103    return bool(fn)
104
105
106def sort_changelog_files(changed_file):
107    return (changed_file.is_added_file, changed_file.is_removed_file)
108
109
110def get_pr_titles(prs):
111    output = ''
112    for pr in prs:
113        id = pr.split('/')[-1]
114        r = requests.get(bugzilla_url % id)
115        bugs = r.json()['bugs']
116        if len(bugs) == 1:
117            output += '%s - %s\n' % (pr, bugs[0]['summary'])
118            print(output)
119    if output:
120        output += '\n'
121    return output
122
123
124def generate_changelog(data, no_functions=False, fill_pr_titles=False):
125    changelogs = {}
126    changelog_list = []
127    prs = []
128    out = ''
129    diff = PatchSet(data)
130
131    for file in diff:
132        changelog = find_changelog(file.path)
133        if changelog not in changelogs:
134            changelogs[changelog] = []
135            changelog_list.append(changelog)
136        changelogs[changelog].append(file)
137
138        # Extract PR entries from newly added tests
139        if 'testsuite' in file.path and file.is_added_file:
140            for line in list(file)[0]:
141                m = pr_regex.search(line.value)
142                if m:
143                    pr = m.group('pr')
144                    if pr not in prs:
145                        prs.append(pr)
146                else:
147                    m = dr_regex.search(line.value)
148                    if m:
149                        dr = m.group('dr')
150                        if dr not in prs:
151                            prs.append(dr)
152                    else:
153                        break
154
155    if fill_pr_titles:
156        out += get_pr_titles(prs)
157
158    # sort ChangeLog so that 'testsuite' is at the end
159    for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
160        files = changelogs[changelog]
161        out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
162        out += '\n'
163        for pr in prs:
164            out += '\t%s\n' % pr
165        # new and deleted files should be at the end
166        for file in sorted(files, key=sort_changelog_files):
167            assert file.path.startswith(changelog)
168            in_tests = 'testsuite' in changelog or 'testsuite' in file.path
169            relative_path = file.path[len(changelog):].lstrip('/')
170            functions = []
171            if file.is_added_file:
172                msg = 'New test' if in_tests else 'New file'
173                out += '\t* %s: %s.\n' % (relative_path, msg)
174            elif file.is_removed_file:
175                out += '\t* %s: Removed.\n' % (relative_path)
176            elif hasattr(file, 'is_rename') and file.is_rename:
177                out += '\t* %s: Moved to...\n' % (relative_path)
178                new_path = file.target_file[2:]
179                # A file can be theoretically moved to a location that
180                # belongs to a different ChangeLog.  Let user fix it.
181                if new_path.startswith(changelog):
182                    new_path = new_path[len(changelog):].lstrip('/')
183                out += '\t* %s: ...here.\n' % (new_path)
184            else:
185                if not no_functions:
186                    for hunk in file:
187                        # Do not add function names for testsuite files
188                        extension = os.path.splitext(relative_path)[1]
189                        if not in_tests and extension in function_extensions:
190                            last_fn = None
191                            modified_visited = False
192                            success = False
193                            for line in hunk:
194                                m = identifier_regex.match(line.value)
195                                if line.is_added or line.is_removed:
196                                    if not line.value.strip():
197                                        continue
198                                    modified_visited = True
199                                    if m and try_add_function(functions,
200                                                              m.group(1)):
201                                        last_fn = None
202                                        success = True
203                                elif line.is_context:
204                                    if last_fn and modified_visited:
205                                        try_add_function(functions, last_fn)
206                                        last_fn = None
207                                        modified_visited = False
208                                        success = True
209                                    elif m:
210                                        last_fn = m.group(1)
211                                        modified_visited = False
212                            if not success:
213                                try_add_function(functions,
214                                                 hunk.section_header)
215                if functions:
216                    out += '\t* %s (%s):\n' % (relative_path, functions[0])
217                    for fn in functions[1:]:
218                        out += '\t(%s):\n' % fn
219                else:
220                    out += '\t* %s:\n' % relative_path
221        out += '\n'
222    return out
223
224
225if __name__ == '__main__':
226    parser = argparse.ArgumentParser(description=help_message)
227    parser.add_argument('input', nargs='?',
228                        help='Patch file (or missing, read standard input)')
229    parser.add_argument('-s', '--no-functions', action='store_true',
230                        help='Do not generate function names in ChangeLogs')
231    parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
232                        help='Download title of mentioned PRs')
233    parser.add_argument('-c', '--changelog',
234                        help='Append the ChangeLog to a git commit message '
235                             'file')
236    args = parser.parse_args()
237    if args.input == '-':
238        args.input = None
239
240    input = open(args.input) if args.input else sys.stdin
241    data = input.read()
242    output = generate_changelog(data, args.no_functions,
243                                args.fill_up_bug_titles)
244    if args.changelog:
245        lines = open(args.changelog).read().split('\n')
246        start = list(takewhile(lambda l: not l.startswith('#'), lines))
247        end = lines[len(start):]
248        with open(args.changelog, 'w') as f:
249            if start:
250                # appent empty line
251                if start[-1] != '':
252                    start.append('')
253            else:
254                # append 2 empty lines
255                start = 2 * ['']
256            f.write('\n'.join(start))
257            f.write('\n')
258            f.write(output)
259            f.write('\n'.join(end))
260    else:
261        print(output, end='')
262