1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Copyright 2014, NICTA
5#
6# This software may be distributed and modified according to the terms of
7# the BSD 2-Clause license. Note that NO WARRANTY is provided.
8# See "LICENSE_BSD2.txt" for details.
9#
10# @TAG(NICTA_BSD)
11#
12# 2014 David Greenaway
13#
14# This script takes a git repository, fetches any remote patches on the
15# repository, and then shoots out an email describing any new commits.
16#
17# This should either be setup in cron to poll a remote repository, or---better
18# still---be executed by another script when a push event occurs.
19#
20
21from __future__ import unicode_literals
22
23import argparse
24import git
25import os
26import sys
27import shelve
28import datetime
29import time
30import fcntl
31
32import smtplib
33import email
34import email.header
35import email.generator
36import email.mime.text
37import StringIO
38
39# Allow UTF-8 quoted-printable messages.
40email.Charset.add_charset('utf-8', email.Charset.QP, email.Charset.QP, 'utf-8')
41
42# Furthest back in history we are willing to look for new commits.
43MAX_COMMITS = 100
44
45# Maximum number of lines to email out in a patch.
46MAX_PATCH_LINES = 5000
47
48# If we have more than this many emails, collapse them into a single message.
49MAX_EMAILS_PER_RUN = 10
50
51# Footer at the bottom of emails
52BODY_FOOTER = ["", "-- ", "Sent with ��� by 'commit-email.py'."]
53
54def as_utf8(s):
55    """Interpret the given byte string as utf-8."""
56    assert isinstance(s, str)
57    return s.decode('utf-8', 'replace')
58
59def is_unicode(s):
60    return isinstance(s, unicode)
61
62def is_ascii(s):
63    assert is_unicode(s)
64    try:
65        s.decode('ascii')
66    except UnicodeEncodeError:
67        return False
68    else:
69        return True
70
71def encode_unicode_header(s):
72    if is_ascii(s):
73        return s
74    return email.Header.make_header([(s, "utf-8")]).encode()
75
76VERBOSE = False
77def debug(x):
78    if VERBOSE:
79        sys.stderr.write(x + "\n")
80
81def get_commit_patch(repo, hexsha):
82    patch = repo.git.show(hexsha, patience=True, pretty="format:", stat=True, patch=True)
83    return as_utf8(patch)
84
85def get_commit_branches(repo, remote, hexsha):
86    commit_branches = set()
87    for ref in remote.refs:
88        try:
89            common_base = repo.git.merge_base(hexsha, ref.commit.hexsha)
90            if common_base == hexsha:
91                commit_branches.add(ref.remote_head)
92        except git.exc.GitCommandError:
93            pass
94    return sorted([as_utf8(x) for x in commit_branches])
95
96def first_line(s, max_len=256):
97    """Summarise the message 's'."""
98    assert is_unicode(s)
99    assert max_len >= 3
100    s = s.split("\n")[0].strip()
101    if len(s) > max_len:
102        s = s[:max_len - 3] + "���"
103    return s
104
105def send_email(from_addr, dest_addrs, headers, body, dry_run=False):
106    # Ensure we only have unicode inputs, and that email addresses, header
107    # names are in the ASCII subset. If only we had a type system...
108    assert is_ascii(from_addr)
109    assert all([is_ascii(x) for x in dest_addrs])
110    assert all([is_ascii(x) and is_ascii(y) for (x, y) in headers.items()])
111    assert is_unicode(body)
112
113    # Construct email
114    message = email.mime.text.MIMEText(body, "plain", "utf-8")
115    for header in headers.keys():
116        message[header] = email.header.Header(headers[header], "utf-8")
117    message['To'] = dest_addrs[0]
118
119    # Generate string.
120    message_io = StringIO.StringIO()
121    message_gen = email.generator.Generator(message_io, mangle_from_=False, maxheaderlen=900)
122    message_gen.flatten(message)
123    message_bytes = message_io.getvalue()
124
125    # Everything should be 7-bit ASCII now, encoded as quoted-printable.
126    assert is_ascii(message_bytes)
127
128    #  If dry run, just print the email.
129    if dry_run:
130        sys.stdout.write(message_bytes)
131        sys.stdout.write("\n")
132        return
133
134    # Send the email.
135    try:
136        mailer = smtplib.SMTP('localhost')
137        for addr in dest_addrs:
138            mailer.sendmail(from_addr, addr, message_bytes)
139        mailer.quit()
140    finally:
141        # Safety: wait a short amount of time to avoid overloading the server.
142        time.sleep(1.0)
143
144
145def email_commit(from_addr, dest_addrs, repo, remote, commit, repo_name, dry_run=False):
146    # Ensure we only have unicode inputs, and that email addresses, header
147    # names are ASCII. If only we had a type system...
148    assert is_ascii(from_addr)
149    assert all([is_ascii(x) for x in dest_addrs])
150    assert is_unicode(repo_name)
151
152    # Fetch patch, trim to size.
153    patch = get_commit_patch(repo, commit.hexsha)
154    patch = "\n".join(patch.split("\n")[:MAX_PATCH_LINES])
155
156    # Get branches this patch lives in.
157    branches = get_commit_branches(repo, remote, commit.hexsha)
158
159    # Construct subject from first line of message.
160    if len(branches) == 0 or ("master" in branches):
161        subject_branch = ""
162    elif len(branches) == 1:
163        subject_branch = " (" + branches[0] + ")"
164    else:
165        subject_branch = " (" + sorted(branches)[0] + "+)"
166    subject = repo_name + subject_branch + ": " + first_line(commit.message)
167
168    # Construct body.
169    body = ([
170            "commit:  %s" % (as_utf8(commit.hexsha[:12])),
171            "author:  %s <%s>" % (commit.author.name, as_utf8(commit.author.email)),
172            "date:    %s" % (
173                    datetime.datetime.fromtimestamp(commit.authored_date)
174                    .strftime('%A, %-d %B %Y @ %H:%M')),
175            "branch:  %s" % (", ".join(branches)),
176            ]
177            + [""]
178            + commit.message.strip().split("\n")
179            + [""]
180            + [""]
181            + patch.split("\n")
182            + BODY_FOOTER)
183
184    # Construct email
185    send_email(
186            from_addr=from_addr,
187            dest_addrs=dest_addrs,
188            headers={
189                "Reply-To": "%s <%s>" % (
190                        encode_unicode_header(commit.author.name),
191                        encode_unicode_header(as_utf8(commit.author.email))),
192                "From": "%s <%s>" % (
193                        encode_unicode_header(commit.author.name), from_addr),
194                "Subject": encode_unicode_header(subject),
195                },
196            body="\n".join(body) + "\n",
197            dry_run=dry_run
198            )
199
200def email_bulk_commit(from_addr, dest_addrs, repo, commits, repo_name, dry_run=False):
201    # Check inputs.
202    assert is_ascii(from_addr)
203    assert all([is_ascii(x) for x in dest_addrs])
204    assert is_unicode(repo_name)
205
206    # Construct subject.
207    subject = "%s: %d new commits" % (repo_name, len(commits))
208
209    # Construct body.
210    body = ["", subject, ""]
211    for c in commits:
212        body.append("%s: %s (%s)" % (
213            as_utf8(c.hexsha[:12]),
214            first_line(c.message, max_len=78),
215            c.author.name))
216    body += BODY_FOOTER
217
218    # If all the authors are the same, use that as the "From" address.
219    # Otherwise, invent something.
220    authors = set([x.author.email for x in commits])
221    author = "Verification Team"
222    message_from_address = from_addr
223    if len(authors) == 1:
224        author = commits[0].authors.name
225        message_from_address = as_utf8(commits[0].authors.email)
226
227    # Construct email
228    send_email(
229            from_addr=from_addr,
230            dest_addrs=dest_addrs,
231            headers={
232                "From": "%s <%s>" % (
233                        encode_unicode_header(author), from_addr),
234                "Reply-To": "%s <%s>" % (
235                        encode_unicode_header(author),
236                        encode_unicode_header(message_from_address)),
237                "Subject": encode_unicode_header(subject),
238                },
239            body="\n".join(body) + "\n",
240            dry_run=dry_run
241            )
242
243def main():
244    # Parse arguments.
245    parser = argparse.ArgumentParser(
246            description="Email new commits in a git repository.")
247    parser.add_argument('repo', help="git repository location", metavar='REPO')
248    parser.add_argument('--remote', '-r',
249            help="remote to pull from (default 'origin')", default="origin", type=unicode)
250    parser.add_argument('--verbose', '-v', action="store_true",
251            help="be verbose")
252    parser.add_argument('--mark-only', action="store_true",
253            help="mark commits as emailed, but don't actually send off an email")
254    parser.add_argument('--dry-run', '-n', action="store_true",
255            help="don't do a 'git' fetch, and print emails to standard out")
256    parser.add_argument('--no-fetch', action="store_true",
257            help="don't do a 'git fetch'.")
258    parser.add_argument('--repo-name', help="email subject prefix", type=unicode)
259    parser.add_argument('--to', '-d', help="email address to send to", dest="to_addr", type=unicode)
260    parser.add_argument('--from', '-f', help="email address to send from", dest="from_addr", type=unicode)
261    parser.add_argument('--max-emails', '-M', action="store",
262            help="maximum commit emails before we just send a single email summarising the changes",
263            dest="max_emails", default=MAX_EMAILS_PER_RUN)
264    args = parser.parse_args()
265
266    # Setup verbose debugging if neccessary.
267    global VERBOSE
268    if args.verbose:
269        VERBOSE = True
270
271    # Require to and from unless dry-run or mark-only.
272    if not args.dry_run and not args.mark_only:
273        if args.to_addr == None or args.from_addr == None:
274            parser.error("Require '--to' and '--from' email addresses.")
275    elif args.dry_run:
276        if args.to_addr == None:
277            args.to_addr = "recipient@example.com"
278        if args.from_addr == None:
279            args.from_addr = "sender@example.com"
280
281    # Load git repository.
282    debug("Opening git repository '%s'..." % args.repo)
283    repo = git.Repo(args.repo)
284
285    # Construct a repo name from the path, if one was not provided.
286    if not args.repo_name:
287        args.repo_name = as_utf8(os.path.split(repo.working_dir)[-1])
288
289    # Acquire a lock; it will be released when our process exits.
290    debug("Locking repository...")
291    file_lock = open(os.path.join(repo.git_dir, ".commit-emails-flock"), "w")
292    fcntl.flock(file_lock, fcntl.LOCK_EX)
293
294    # Fetch from given URL.
295    debug("Fetching from '%s'..." % args.remote)
296    remote = repo.remotes[args.remote]
297    if not args.dry_run and not args.no_fetch:
298        remote.update()
299
300    # Try and find recent commits.
301    commits = {}
302    for ref in remote.refs:
303        for commit in repo.iter_commits(ref.object, max_count=MAX_COMMITS):
304            commits[commit.hexsha] = commit
305
306    # Open up database of commits we have already seen.
307    db = shelve.open(os.path.join(repo.git_dir, "commit-email.db"))
308    try:
309        # Iterate over commits in increasing date order.
310        new_commits = []
311        for commit in sorted(commits.values(), key=lambda x: x.committed_date):
312            if not (commit.hexsha in db):
313                new_commits.append(commit)
314        debug("Found %d new commit(s)." % len(new_commits))
315
316        if len(new_commits) > args.max_emails:
317            # Email a bulk message.
318            if not args.mark_only:
319                debug("Sending bulk email with %d commits..." % len(new_commits))
320                email_bulk_commit(args.from_addr, [args.to_addr], repo, new_commits,
321                        repo_name=args.repo_name, dry_run=args.dry_run)
322            if not args.dry_run:
323                for commit in new_commits:
324                    db[commit.hexsha] = True
325                db.sync()
326        else:
327            # Email off individual commit messages.
328            for commit in new_commits:
329                if not args.mark_only:
330                    debug("Emailing commit %s to %s..." % (commit.hexsha, args.to_addr))
331                    email_commit(args.from_addr, [args.to_addr], repo, remote, commit,
332                            repo_name=args.repo_name, dry_run=args.dry_run)
333                if not args.dry_run:
334                    db[commit.hexsha] = True
335                    db.sync()
336    finally:
337        # Close the database.
338        db.close()
339
340if __name__ == "__main__":
341    main()
342