1# SPDX-License-Identifier: GPL-2.0+
2#
3# Copyright 2020 Google LLC
4#
5"""Talks to the patchwork service to figure out what patches have been reviewed
6and commented on. Provides a way to display review tags and comments.
7Allows creation of a new branch based on the old but with the review tags
8collected from patchwork.
9"""
10
11import collections
12import concurrent.futures
13from itertools import repeat
14import re
15
16import pygit2
17import requests
18
19from patman import patchstream
20from patman.patchstream import PatchStream
21from u_boot_pylib import terminal
22from u_boot_pylib import tout
23
24# Patches which are part of a multi-patch series are shown with a prefix like
25# [prefix, version, sequence], for example '[RFC, v2, 3/5]'. All but the last
26# part is optional. This decodes the string into groups. For single patches
27# the [] part is not present:
28# Groups: (ignore, ignore, ignore, prefix, version, sequence, subject)
29RE_PATCH = re.compile(r'(\[(((.*),)?(.*),)?(.*)\]\s)?(.*)$')
30
31# This decodes the sequence string into a patch number and patch count
32RE_SEQ = re.compile(r'(\d+)/(\d+)')
33
34def to_int(vals):
35    """Convert a list of strings into integers, using 0 if not an integer
36
37    Args:
38        vals (list): List of strings
39
40    Returns:
41        list: List of integers, one for each input string
42    """
43    out = [int(val) if val.isdigit() else 0 for val in vals]
44    return out
45
46
47class Patch(dict):
48    """Models a patch in patchwork
49
50    This class records information obtained from patchwork
51
52    Some of this information comes from the 'Patch' column:
53
54        [RFC,v2,1/3] dm: Driver and uclass changes for tiny-dm
55
56    This shows the prefix, version, seq, count and subject.
57
58    The other properties come from other columns in the display.
59
60    Properties:
61        pid (str): ID of the patch (typically an integer)
62        seq (int): Sequence number within series (1=first) parsed from sequence
63            string
64        count (int): Number of patches in series, parsed from sequence string
65        raw_subject (str): Entire subject line, e.g.
66            "[1/2,v2] efi_loader: Sort header file ordering"
67        prefix (str): Prefix string or None (e.g. 'RFC')
68        version (str): Version string or None (e.g. 'v2')
69        raw_subject (str): Raw patch subject
70        subject (str): Patch subject with [..] part removed (same as commit
71            subject)
72    """
73    def __init__(self, pid):
74        super().__init__()
75        self.id = pid  # Use 'id' to match what the Rest API provides
76        self.seq = None
77        self.count = None
78        self.prefix = None
79        self.version = None
80        self.raw_subject = None
81        self.subject = None
82
83    # These make us more like a dictionary
84    def __setattr__(self, name, value):
85        self[name] = value
86
87    def __getattr__(self, name):
88        return self[name]
89
90    def __hash__(self):
91        return hash(frozenset(self.items()))
92
93    def __str__(self):
94        return self.raw_subject
95
96    def parse_subject(self, raw_subject):
97        """Parse the subject of a patch into its component parts
98
99        See RE_PATCH for details. The parsed info is placed into seq, count,
100        prefix, version, subject
101
102        Args:
103            raw_subject (str): Subject string to parse
104
105        Raises:
106            ValueError: the subject cannot be parsed
107        """
108        self.raw_subject = raw_subject.strip()
109        mat = RE_PATCH.search(raw_subject.strip())
110        if not mat:
111            raise ValueError("Cannot parse subject '%s'" % raw_subject)
112        self.prefix, self.version, seq_info, self.subject = mat.groups()[3:]
113        mat_seq = RE_SEQ.match(seq_info) if seq_info else False
114        if mat_seq is None:
115            self.version = seq_info
116            seq_info = None
117        if self.version and not self.version.startswith('v'):
118            self.prefix = self.version
119            self.version = None
120        if seq_info:
121            if mat_seq:
122                self.seq = int(mat_seq.group(1))
123                self.count = int(mat_seq.group(2))
124        else:
125            self.seq = 1
126            self.count = 1
127
128
129class Review:
130    """Represents a single review email collected in Patchwork
131
132    Patches can attract multiple reviews. Each consists of an author/date and
133    a variable number of 'snippets', which are groups of quoted and unquoted
134    text.
135    """
136    def __init__(self, meta, snippets):
137        """Create new Review object
138
139        Args:
140            meta (str): Text containing review author and date
141            snippets (list): List of snippets in th review, each a list of text
142                lines
143        """
144        self.meta = ' : '.join([line for line in meta.splitlines() if line])
145        self.snippets = snippets
146
147def compare_with_series(series, patches):
148    """Compare a list of patches with a series it came from
149
150    This prints any problems as warnings
151
152    Args:
153        series (Series): Series to compare against
154        patches (:type: list of Patch): list of Patch objects to compare with
155
156    Returns:
157        tuple
158            dict:
159                key: Commit number (0...n-1)
160                value: Patch object for that commit
161            dict:
162                key: Patch number  (0...n-1)
163                value: Commit object for that patch
164    """
165    # Check the names match
166    warnings = []
167    patch_for_commit = {}
168    all_patches = set(patches)
169    for seq, cmt in enumerate(series.commits):
170        pmatch = [p for p in all_patches if p.subject == cmt.subject]
171        if len(pmatch) == 1:
172            patch_for_commit[seq] = pmatch[0]
173            all_patches.remove(pmatch[0])
174        elif len(pmatch) > 1:
175            warnings.append("Multiple patches match commit %d ('%s'):\n   %s" %
176                            (seq + 1, cmt.subject,
177                             '\n   '.join([p.subject for p in pmatch])))
178        else:
179            warnings.append("Cannot find patch for commit %d ('%s')" %
180                            (seq + 1, cmt.subject))
181
182
183    # Check the names match
184    commit_for_patch = {}
185    all_commits = set(series.commits)
186    for seq, patch in enumerate(patches):
187        cmatch = [c for c in all_commits if c.subject == patch.subject]
188        if len(cmatch) == 1:
189            commit_for_patch[seq] = cmatch[0]
190            all_commits.remove(cmatch[0])
191        elif len(cmatch) > 1:
192            warnings.append("Multiple commits match patch %d ('%s'):\n   %s" %
193                            (seq + 1, patch.subject,
194                             '\n   '.join([c.subject for c in cmatch])))
195        else:
196            warnings.append("Cannot find commit for patch %d ('%s')" %
197                            (seq + 1, patch.subject))
198
199    return patch_for_commit, commit_for_patch, warnings
200
201def call_rest_api(url, subpath):
202    """Call the patchwork API and return the result as JSON
203
204    Args:
205        url (str): URL of patchwork server, e.g. 'https://patchwork.ozlabs.org'
206        subpath (str): URL subpath to use
207
208    Returns:
209        dict: Json result
210
211    Raises:
212        ValueError: the URL could not be read
213    """
214    full_url = '%s/api/1.2/%s' % (url, subpath)
215    response = requests.get(full_url)
216    if response.status_code != 200:
217        raise ValueError("Could not read URL '%s'" % full_url)
218    return response.json()
219
220def collect_patches(series, series_id, url, rest_api=call_rest_api):
221    """Collect patch information about a series from patchwork
222
223    Uses the Patchwork REST API to collect information provided by patchwork
224    about the status of each patch.
225
226    Args:
227        series (Series): Series object corresponding to the local branch
228            containing the series
229        series_id (str): Patch series ID number
230        url (str): URL of patchwork server, e.g. 'https://patchwork.ozlabs.org'
231        rest_api (function): API function to call to access Patchwork, for
232            testing
233
234    Returns:
235        list: List of patches sorted by sequence number, each a Patch object
236
237    Raises:
238        ValueError: if the URL could not be read or the web page does not follow
239            the expected structure
240    """
241    data = rest_api(url, 'series/%s/' % series_id)
242
243    # Get all the rows, which are patches
244    patch_dict = data['patches']
245    count = len(patch_dict)
246    num_commits = len(series.commits)
247    if count != num_commits:
248        tout.warning('Warning: Patchwork reports %d patches, series has %d' %
249                     (count, num_commits))
250
251    patches = []
252
253    # Work through each row (patch) one at a time, collecting the information
254    warn_count = 0
255    for pw_patch in patch_dict:
256        patch = Patch(pw_patch['id'])
257        patch.parse_subject(pw_patch['name'])
258        patches.append(patch)
259    if warn_count > 1:
260        tout.warning('   (total of %d warnings)' % warn_count)
261
262    # Sort patches by patch number
263    patches = sorted(patches, key=lambda x: x.seq)
264    return patches
265
266def find_new_responses(new_rtag_list, review_list, seq, cmt, patch, url,
267                       rest_api=call_rest_api):
268    """Find new rtags collected by patchwork that we don't know about
269
270    This is designed to be run in parallel, once for each commit/patch
271
272    Args:
273        new_rtag_list (list): New rtags are written to new_rtag_list[seq]
274            list, each a dict:
275                key: Response tag (e.g. 'Reviewed-by')
276                value: Set of people who gave that response, each a name/email
277                    string
278        review_list (list): New reviews are written to review_list[seq]
279            list, each a
280                List of reviews for the patch, each a Review
281        seq (int): Position in new_rtag_list to update
282        cmt (Commit): Commit object for this commit
283        patch (Patch): Corresponding Patch object for this patch
284        url (str): URL of patchwork server, e.g. 'https://patchwork.ozlabs.org'
285        rest_api (function): API function to call to access Patchwork, for
286            testing
287    """
288    if not patch:
289        return
290
291    # Get the content for the patch email itself as well as all comments
292    data = rest_api(url, 'patches/%s/' % patch.id)
293    pstrm = PatchStream.process_text(data['content'], True)
294
295    rtags = collections.defaultdict(set)
296    for response, people in pstrm.commit.rtags.items():
297        rtags[response].update(people)
298
299    data = rest_api(url, 'patches/%s/comments/' % patch.id)
300
301    reviews = []
302    for comment in data:
303        pstrm = PatchStream.process_text(comment['content'], True)
304        if pstrm.snippets:
305            submitter = comment['submitter']
306            person = '%s <%s>' % (submitter['name'], submitter['email'])
307            reviews.append(Review(person, pstrm.snippets))
308        for response, people in pstrm.commit.rtags.items():
309            rtags[response].update(people)
310
311    # Find the tags that are not in the commit
312    new_rtags = collections.defaultdict(set)
313    base_rtags = cmt.rtags
314    for tag, people in rtags.items():
315        for who in people:
316            is_new = (tag not in base_rtags or
317                      who not in base_rtags[tag])
318            if is_new:
319                new_rtags[tag].add(who)
320    new_rtag_list[seq] = new_rtags
321    review_list[seq] = reviews
322
323def show_responses(rtags, indent, is_new):
324    """Show rtags collected
325
326    Args:
327        rtags (dict): review tags to show
328            key: Response tag (e.g. 'Reviewed-by')
329            value: Set of people who gave that response, each a name/email string
330        indent (str): Indentation string to write before each line
331        is_new (bool): True if this output should be highlighted
332
333    Returns:
334        int: Number of review tags displayed
335    """
336    col = terminal.Color()
337    count = 0
338    for tag in sorted(rtags.keys()):
339        people = rtags[tag]
340        for who in sorted(people):
341            terminal.tprint(indent + '%s %s: ' % ('+' if is_new else ' ', tag),
342                           newline=False, colour=col.GREEN, bright=is_new)
343            terminal.tprint(who, colour=col.WHITE, bright=is_new)
344            count += 1
345    return count
346
347def create_branch(series, new_rtag_list, branch, dest_branch, overwrite,
348                  repo=None):
349    """Create a new branch with review tags added
350
351    Args:
352        series (Series): Series object for the existing branch
353        new_rtag_list (list): List of review tags to add, one for each commit,
354                each a dict:
355            key: Response tag (e.g. 'Reviewed-by')
356            value: Set of people who gave that response, each a name/email
357                string
358        branch (str): Existing branch to update
359        dest_branch (str): Name of new branch to create
360        overwrite (bool): True to force overwriting dest_branch if it exists
361        repo (pygit2.Repository): Repo to use (use None unless testing)
362
363    Returns:
364        int: Total number of review tags added across all commits
365
366    Raises:
367        ValueError: if the destination branch name is the same as the original
368            branch, or it already exists and @overwrite is False
369    """
370    if branch == dest_branch:
371        raise ValueError(
372            'Destination branch must not be the same as the original branch')
373    if not repo:
374        repo = pygit2.Repository('.')
375    count = len(series.commits)
376    new_br = repo.branches.get(dest_branch)
377    if new_br:
378        if not overwrite:
379            raise ValueError("Branch '%s' already exists (-f to overwrite)" %
380                             dest_branch)
381        new_br.delete()
382    if not branch:
383        branch = 'HEAD'
384    target = repo.revparse_single('%s~%d' % (branch, count))
385    repo.branches.local.create(dest_branch, target)
386
387    num_added = 0
388    for seq in range(count):
389        parent = repo.branches.get(dest_branch)
390        cherry = repo.revparse_single('%s~%d' % (branch, count - seq - 1))
391
392        repo.merge_base(cherry.oid, parent.target)
393        base_tree = cherry.parents[0].tree
394
395        index = repo.merge_trees(base_tree, parent, cherry)
396        tree_id = index.write_tree(repo)
397
398        lines = []
399        if new_rtag_list[seq]:
400            for tag, people in new_rtag_list[seq].items():
401                for who in people:
402                    lines.append('%s: %s' % (tag, who))
403                    num_added += 1
404        message = patchstream.insert_tags(cherry.message.rstrip(),
405                                          sorted(lines))
406
407        repo.create_commit(
408            parent.name, cherry.author, cherry.committer, message, tree_id,
409            [parent.target])
410    return num_added
411
412def check_patchwork_status(series, series_id, branch, dest_branch, force,
413                           show_comments, url, rest_api=call_rest_api,
414                           test_repo=None):
415    """Check the status of a series on Patchwork
416
417    This finds review tags and comments for a series in Patchwork, displaying
418    them to show what is new compared to the local series.
419
420    Args:
421        series (Series): Series object for the existing branch
422        series_id (str): Patch series ID number
423        branch (str): Existing branch to update, or None
424        dest_branch (str): Name of new branch to create, or None
425        force (bool): True to force overwriting dest_branch if it exists
426        show_comments (bool): True to show the comments on each patch
427        url (str): URL of patchwork server, e.g. 'https://patchwork.ozlabs.org'
428        rest_api (function): API function to call to access Patchwork, for
429            testing
430        test_repo (pygit2.Repository): Repo to use (use None unless testing)
431    """
432    patches = collect_patches(series, series_id, url, rest_api)
433    col = terminal.Color()
434    count = len(series.commits)
435    new_rtag_list = [None] * count
436    review_list = [None] * count
437
438    patch_for_commit, _, warnings = compare_with_series(series, patches)
439    for warn in warnings:
440        tout.warning(warn)
441
442    patch_list = [patch_for_commit.get(c) for c in range(len(series.commits))]
443
444    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
445        futures = executor.map(
446            find_new_responses, repeat(new_rtag_list), repeat(review_list),
447            range(count), series.commits, patch_list, repeat(url),
448            repeat(rest_api))
449    for fresponse in futures:
450        if fresponse:
451            raise fresponse.exception()
452
453    num_to_add = 0
454    for seq, cmt in enumerate(series.commits):
455        patch = patch_for_commit.get(seq)
456        if not patch:
457            continue
458        terminal.tprint('%3d %s' % (patch.seq, patch.subject[:50]),
459                       colour=col.BLUE)
460        cmt = series.commits[seq]
461        base_rtags = cmt.rtags
462        new_rtags = new_rtag_list[seq]
463
464        indent = ' ' * 2
465        show_responses(base_rtags, indent, False)
466        num_to_add += show_responses(new_rtags, indent, True)
467        if show_comments:
468            for review in review_list[seq]:
469                terminal.tprint('Review: %s' % review.meta, colour=col.RED)
470                for snippet in review.snippets:
471                    for line in snippet:
472                        quoted = line.startswith('>')
473                        terminal.tprint('    %s' % line,
474                                       colour=col.MAGENTA if quoted else None)
475                    terminal.tprint()
476
477    terminal.tprint("%d new response%s available in patchwork%s" %
478                   (num_to_add, 's' if num_to_add != 1 else '',
479                    '' if dest_branch
480                    else ' (use -d to write them to a new branch)'))
481
482    if dest_branch:
483        num_added = create_branch(series, new_rtag_list, branch,
484                                  dest_branch, force, test_repo)
485        terminal.tprint(
486            "%d response%s added from patchwork into new branch '%s'" %
487            (num_added, 's' if num_added != 1 else '', dest_branch))
488