1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4# Copyright 2014, NICTA
6# This software may be distributed and modified according to the terms of
7# the BSD 2-Clause license. Note that NO WARRANTY is provided.
8# See "LICENSE_BSD2.txt" for details.
12# 2014 David Greenaway
14# This script takes a git repository, fetches any remote patches on the
15# repository, and then shoots out an email describing any new commits.
17# This should either be setup in cron to poll a remote repository, or---better
18# still---be executed by another script when a push event occurs.
21from __future__ import unicode_literals
23import argparse
24import git
25import os
26import sys
27import shelve
28import datetime
29import time
30import fcntl
32import smtplib
33import email
34import email.header
35import email.generator
36import email.mime.text
37import StringIO
39# Allow UTF-8 quoted-printable messages.
40email.Charset.add_charset('utf-8', email.Charset.QP, email.Charset.QP, 'utf-8')
42# Furthest back in history we are willing to look for new commits.
45# Maximum number of lines to email out in a patch.
48# If we have more than this many emails, collapse them into a single message.
51# Footer at the bottom of emails
52BODY_FOOTER = ["", "-- ", "Sent with ��� by 'commit-email.py'."]
54def as_utf8(s):
55    """Interpret the given byte string as utf-8."""
56    assert isinstance(s, str)
57    return s.decode('utf-8', 'replace')
59def is_unicode(s):
60    return isinstance(s, unicode)
62def is_ascii(s):
63    assert is_unicode(s)
64    try:
65        s.decode('ascii')
66    except UnicodeEncodeError:
67        return False
68    else:
69        return True
71def encode_unicode_header(s):
72    if is_ascii(s):
73        return s
74    return email.Header.make_header([(s, "utf-8")]).encode()
76VERBOSE = False
77def debug(x):
78    if VERBOSE:
79        sys.stderr.write(x + "\n")
81def get_commit_patch(repo, hexsha):
82    patch = repo.git.show(hexsha, patience=True, pretty="format:", stat=True, patch=True)
83    return as_utf8(patch)
85def get_commit_branches(repo, remote, hexsha):
86    commit_branches = set()
87    for ref in remote.refs:
88        try:
89            common_base = repo.git.merge_base(hexsha, ref.commit.hexsha)
90            if common_base == hexsha:
91                commit_branches.add(ref.remote_head)
92        except git.exc.GitCommandError:
93            pass
94    return sorted([as_utf8(x) for x in commit_branches])
96def first_line(s, max_len=256):
97    """Summarise the message 's'."""
98    assert is_unicode(s)
99    assert max_len >= 3
100    s = s.split("\n")[0].strip()
101    if len(s) > max_len:
102        s = s[:max_len - 3] + "���"
103    return s
105def send_email(from_addr, dest_addrs, headers, body, dry_run=False):
106    # Ensure we only have unicode inputs, and that email addresses, header
107    # names are in the ASCII subset. If only we had a type system...
108    assert is_ascii(from_addr)
109    assert all([is_ascii(x) for x in dest_addrs])
110    assert all([is_ascii(x) and is_ascii(y) for (x, y) in headers.items()])
111    assert is_unicode(body)
113    # Construct email
114    message = email.mime.text.MIMEText(body, "plain", "utf-8")
115    for header in headers.keys():
116        message[header] = email.header.Header(headers[header], "utf-8")
117    message['To'] = dest_addrs[0]
119    # Generate string.
120    message_io = StringIO.StringIO()
121    message_gen = email.generator.Generator(message_io, mangle_from_=False, maxheaderlen=900)
122    message_gen.flatten(message)
123    message_bytes = message_io.getvalue()
125    # Everything should be 7-bit ASCII now, encoded as quoted-printable.
126    assert is_ascii(message_bytes)
128    #  If dry run, just print the email.
129    if dry_run:
130        sys.stdout.write(message_bytes)
131        sys.stdout.write("\n")
132        return
134    # Send the email.
135    try:
136        mailer = smtplib.SMTP('localhost')
137        for addr in dest_addrs:
138            mailer.sendmail(from_addr, addr, message_bytes)
139        mailer.quit()
140    finally:
141        # Safety: wait a short amount of time to avoid overloading the server.
142        time.sleep(1.0)
145def email_commit(from_addr, dest_addrs, repo, remote, commit, repo_name, dry_run=False):
146    # Ensure we only have unicode inputs, and that email addresses, header
147    # names are ASCII. If only we had a type system...
148    assert is_ascii(from_addr)
149    assert all([is_ascii(x) for x in dest_addrs])
150    assert is_unicode(repo_name)
152    # Fetch patch, trim to size.
153    patch = get_commit_patch(repo, commit.hexsha)
154    patch = "\n".join(patch.split("\n")[:MAX_PATCH_LINES])
156    # Get branches this patch lives in.
157    branches = get_commit_branches(repo, remote, commit.hexsha)
159    # Construct subject from first line of message.
160    if len(branches) == 0 or ("master" in branches):
161        subject_branch = ""
162    elif len(branches) == 1:
163        subject_branch = " (" + branches[0] + ")"
164    else:
165        subject_branch = " (" + sorted(branches)[0] + "+)"
166    subject = repo_name + subject_branch + ": " + first_line(commit.message)
168    # Construct body.
169    body = ([
170            "commit:  %s" % (as_utf8(commit.hexsha[:12])),
171            "author:  %s <%s>" % (commit.author.name, as_utf8(commit.author.email)),
172            "date:    %s" % (
173                    datetime.datetime.fromtimestamp(commit.authored_date)
174                    .strftime('%A, %-d %B %Y @ %H:%M')),
175            "branch:  %s" % (", ".join(branches)),
176            ]
177            + [""]
178            + commit.message.strip().split("\n")
179            + [""]
180            + [""]
181            + patch.split("\n")
182            + BODY_FOOTER)
184    # Construct email
185    send_email(
186            from_addr=from_addr,
187            dest_addrs=dest_addrs,
188            headers={
189                "Reply-To": "%s <%s>" % (
190                        encode_unicode_header(commit.author.name),
191                        encode_unicode_header(as_utf8(commit.author.email))),
192                "From": "%s <%s>" % (
193                        encode_unicode_header(commit.author.name), from_addr),
194                "Subject": encode_unicode_header(subject),
195                },
196            body="\n".join(body) + "\n",
197            dry_run=dry_run
198            )
200def email_bulk_commit(from_addr, dest_addrs, repo, commits, repo_name, dry_run=False):
201    # Check inputs.
202    assert is_ascii(from_addr)
203    assert all([is_ascii(x) for x in dest_addrs])
204    assert is_unicode(repo_name)
206    # Construct subject.
207    subject = "%s: %d new commits" % (repo_name, len(commits))
209    # Construct body.
210    body = ["", subject, ""]
211    for c in commits:
212        body.append("%s: %s (%s)" % (
213            as_utf8(c.hexsha[:12]),
214            first_line(c.message, max_len=78),
215            c.author.name))
216    body += BODY_FOOTER
218    # If all the authors are the same, use that as the "From" address.
219    # Otherwise, invent something.
220    authors = set([x.author.email for x in commits])
221    author = "Verification Team"
222    message_from_address = from_addr
223    if len(authors) == 1:
224        author = commits[0].authors.name
225        message_from_address = as_utf8(commits[0].authors.email)
227    # Construct email
228    send_email(
229            from_addr=from_addr,
230            dest_addrs=dest_addrs,
231            headers={
232                "From": "%s <%s>" % (
233                        encode_unicode_header(author), from_addr),
234                "Reply-To": "%s <%s>" % (
235                        encode_unicode_header(author),
236                        encode_unicode_header(message_from_address)),
237                "Subject": encode_unicode_header(subject),
238                },
239            body="\n".join(body) + "\n",
240            dry_run=dry_run
241            )
243def main():
244    # Parse arguments.
245    parser = argparse.ArgumentParser(
246            description="Email new commits in a git repository.")
247    parser.add_argument('repo', help="git repository location", metavar='REPO')
248    parser.add_argument('--remote', '-r',
249            help="remote to pull from (default 'origin')", default="origin", type=unicode)
250    parser.add_argument('--verbose', '-v', action="store_true",
251            help="be verbose")
252    parser.add_argument('--mark-only', action="store_true",
253            help="mark commits as emailed, but don't actually send off an email")
254    parser.add_argument('--dry-run', '-n', action="store_true",
255            help="don't do a 'git' fetch, and print emails to standard out")
256    parser.add_argument('--no-fetch', action="store_true",
257            help="don't do a 'git fetch'.")
258    parser.add_argument('--repo-name', help="email subject prefix", type=unicode)
259    parser.add_argument('--to', '-d', help="email address to send to", dest="to_addr", type=unicode)
260    parser.add_argument('--from', '-f', help="email address to send from", dest="from_addr", type=unicode)
261    parser.add_argument('--max-emails', '-M', action="store",
262            help="maximum commit emails before we just send a single email summarising the changes",
263            dest="max_emails", default=MAX_EMAILS_PER_RUN)
264    args = parser.parse_args()
266    # Setup verbose debugging if neccessary.
267    global VERBOSE
268    if args.verbose:
269        VERBOSE = True
271    # Require to and from unless dry-run or mark-only.
272    if not args.dry_run and not args.mark_only:
273        if args.to_addr == None or args.from_addr == None:
274            parser.error("Require '--to' and '--from' email addresses.")
275    elif args.dry_run:
276        if args.to_addr == None:
277            args.to_addr = "recipient@example.com"
278        if args.from_addr == None:
279            args.from_addr = "sender@example.com"
281    # Load git repository.
282    debug("Opening git repository '%s'..." % args.repo)
283    repo = git.Repo(args.repo)
285    # Construct a repo name from the path, if one was not provided.
286    if not args.repo_name:
287        args.repo_name = as_utf8(os.path.split(repo.working_dir)[-1])
289    # Acquire a lock; it will be released when our process exits.
290    debug("Locking repository...")
291    file_lock = open(os.path.join(repo.git_dir, ".commit-emails-flock"), "w")
292    fcntl.flock(file_lock, fcntl.LOCK_EX)
294    # Fetch from given URL.
295    debug("Fetching from '%s'..." % args.remote)
296    remote = repo.remotes[args.remote]
297    if not args.dry_run and not args.no_fetch:
298        remote.update()
300    # Try and find recent commits.
301    commits = {}
302    for ref in remote.refs:
303        for commit in repo.iter_commits(ref.object, max_count=MAX_COMMITS):
304            commits[commit.hexsha] = commit
306    # Open up database of commits we have already seen.
307    db = shelve.open(os.path.join(repo.git_dir, "commit-email.db"))
308    try:
309        # Iterate over commits in increasing date order.
310        new_commits = []
311        for commit in sorted(commits.values(), key=lambda x: x.committed_date):
312            if not (commit.hexsha in db):
313                new_commits.append(commit)
314        debug("Found %d new commit(s)." % len(new_commits))
316        if len(new_commits) > args.max_emails:
317            # Email a bulk message.
318            if not args.mark_only:
319                debug("Sending bulk email with %d commits..." % len(new_commits))
320                email_bulk_commit(args.from_addr, [args.to_addr], repo, new_commits,
321                        repo_name=args.repo_name, dry_run=args.dry_run)
322            if not args.dry_run:
323                for commit in new_commits:
324                    db[commit.hexsha] = True
325                db.sync()
326        else:
327            # Email off individual commit messages.
328            for commit in new_commits:
329                if not args.mark_only:
330                    debug("Emailing commit %s to %s..." % (commit.hexsha, args.to_addr))
331                    email_commit(args.from_addr, [args.to_addr], repo, remote, commit,
332                            repo_name=args.repo_name, dry_run=args.dry_run)
333                if not args.dry_run:
334                    db[commit.hexsha] = True
335                    db.sync()
336    finally:
337        # Close the database.
338        db.close()
340if __name__ == "__main__":
341    main()