1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# 4# Copyright 2014, NICTA 5# 6# This software may be distributed and modified according to the terms of 7# the BSD 2-Clause license. Note that NO WARRANTY is provided. 8# See "LICENSE_BSD2.txt" for details. 9# 10# @TAG(NICTA_BSD) 11# 12# 2014 David Greenaway 13# 14# This script takes a git repository, fetches any remote patches on the 15# repository, and then shoots out an email describing any new commits. 16# 17# This should either be setup in cron to poll a remote repository, or---better 18# still---be executed by another script when a push event occurs. 19# 20 21from __future__ import unicode_literals 22 23import argparse 24import git 25import os 26import sys 27import shelve 28import datetime 29import time 30import fcntl 31 32import smtplib 33import email 34import email.header 35import email.generator 36import email.mime.text 37import StringIO 38 39# Allow UTF-8 quoted-printable messages. 40email.Charset.add_charset('utf-8', email.Charset.QP, email.Charset.QP, 'utf-8') 41 42# Furthest back in history we are willing to look for new commits. 43MAX_COMMITS = 100 44 45# Maximum number of lines to email out in a patch. 46MAX_PATCH_LINES = 5000 47 48# If we have more than this many emails, collapse them into a single message. 49MAX_EMAILS_PER_RUN = 10 50 51# Footer at the bottom of emails 52BODY_FOOTER = ["", "-- ", "Sent with ��� by 'commit-email.py'."] 53 54def as_utf8(s): 55 """Interpret the given byte string as utf-8.""" 56 assert isinstance(s, str) 57 return s.decode('utf-8', 'replace') 58 59def is_unicode(s): 60 return isinstance(s, unicode) 61 62def is_ascii(s): 63 assert is_unicode(s) 64 try: 65 s.decode('ascii') 66 except UnicodeEncodeError: 67 return False 68 else: 69 return True 70 71def encode_unicode_header(s): 72 if is_ascii(s): 73 return s 74 return email.Header.make_header([(s, "utf-8")]).encode() 75 76VERBOSE = False 77def debug(x): 78 if VERBOSE: 79 sys.stderr.write(x + "\n") 80 81def get_commit_patch(repo, hexsha): 82 patch = repo.git.show(hexsha, patience=True, pretty="format:", stat=True, patch=True) 83 return as_utf8(patch) 84 85def get_commit_branches(repo, remote, hexsha): 86 commit_branches = set() 87 for ref in remote.refs: 88 try: 89 common_base = repo.git.merge_base(hexsha, ref.commit.hexsha) 90 if common_base == hexsha: 91 commit_branches.add(ref.remote_head) 92 except git.exc.GitCommandError: 93 pass 94 return sorted([as_utf8(x) for x in commit_branches]) 95 96def first_line(s, max_len=256): 97 """Summarise the message 's'.""" 98 assert is_unicode(s) 99 assert max_len >= 3 100 s = s.split("\n")[0].strip() 101 if len(s) > max_len: 102 s = s[:max_len - 3] + "���" 103 return s 104 105def send_email(from_addr, dest_addrs, headers, body, dry_run=False): 106 # Ensure we only have unicode inputs, and that email addresses, header 107 # names are in the ASCII subset. If only we had a type system... 108 assert is_ascii(from_addr) 109 assert all([is_ascii(x) for x in dest_addrs]) 110 assert all([is_ascii(x) and is_ascii(y) for (x, y) in headers.items()]) 111 assert is_unicode(body) 112 113 # Construct email 114 message = email.mime.text.MIMEText(body, "plain", "utf-8") 115 for header in headers.keys(): 116 message[header] = email.header.Header(headers[header], "utf-8") 117 message['To'] = dest_addrs[0] 118 119 # Generate string. 120 message_io = StringIO.StringIO() 121 message_gen = email.generator.Generator(message_io, mangle_from_=False, maxheaderlen=900) 122 message_gen.flatten(message) 123 message_bytes = message_io.getvalue() 124 125 # Everything should be 7-bit ASCII now, encoded as quoted-printable. 126 assert is_ascii(message_bytes) 127 128 # If dry run, just print the email. 129 if dry_run: 130 sys.stdout.write(message_bytes) 131 sys.stdout.write("\n") 132 return 133 134 # Send the email. 135 try: 136 mailer = smtplib.SMTP('localhost') 137 for addr in dest_addrs: 138 mailer.sendmail(from_addr, addr, message_bytes) 139 mailer.quit() 140 finally: 141 # Safety: wait a short amount of time to avoid overloading the server. 142 time.sleep(1.0) 143 144 145def email_commit(from_addr, dest_addrs, repo, remote, commit, repo_name, dry_run=False): 146 # Ensure we only have unicode inputs, and that email addresses, header 147 # names are ASCII. If only we had a type system... 148 assert is_ascii(from_addr) 149 assert all([is_ascii(x) for x in dest_addrs]) 150 assert is_unicode(repo_name) 151 152 # Fetch patch, trim to size. 153 patch = get_commit_patch(repo, commit.hexsha) 154 patch = "\n".join(patch.split("\n")[:MAX_PATCH_LINES]) 155 156 # Get branches this patch lives in. 157 branches = get_commit_branches(repo, remote, commit.hexsha) 158 159 # Construct subject from first line of message. 160 if len(branches) == 0 or ("master" in branches): 161 subject_branch = "" 162 elif len(branches) == 1: 163 subject_branch = " (" + branches[0] + ")" 164 else: 165 subject_branch = " (" + sorted(branches)[0] + "+)" 166 subject = repo_name + subject_branch + ": " + first_line(commit.message) 167 168 # Construct body. 169 body = ([ 170 "commit: %s" % (as_utf8(commit.hexsha[:12])), 171 "author: %s <%s>" % (commit.author.name, as_utf8(commit.author.email)), 172 "date: %s" % ( 173 datetime.datetime.fromtimestamp(commit.authored_date) 174 .strftime('%A, %-d %B %Y @ %H:%M')), 175 "branch: %s" % (", ".join(branches)), 176 ] 177 + [""] 178 + commit.message.strip().split("\n") 179 + [""] 180 + [""] 181 + patch.split("\n") 182 + BODY_FOOTER) 183 184 # Construct email 185 send_email( 186 from_addr=from_addr, 187 dest_addrs=dest_addrs, 188 headers={ 189 "Reply-To": "%s <%s>" % ( 190 encode_unicode_header(commit.author.name), 191 encode_unicode_header(as_utf8(commit.author.email))), 192 "From": "%s <%s>" % ( 193 encode_unicode_header(commit.author.name), from_addr), 194 "Subject": encode_unicode_header(subject), 195 }, 196 body="\n".join(body) + "\n", 197 dry_run=dry_run 198 ) 199 200def email_bulk_commit(from_addr, dest_addrs, repo, commits, repo_name, dry_run=False): 201 # Check inputs. 202 assert is_ascii(from_addr) 203 assert all([is_ascii(x) for x in dest_addrs]) 204 assert is_unicode(repo_name) 205 206 # Construct subject. 207 subject = "%s: %d new commits" % (repo_name, len(commits)) 208 209 # Construct body. 210 body = ["", subject, ""] 211 for c in commits: 212 body.append("%s: %s (%s)" % ( 213 as_utf8(c.hexsha[:12]), 214 first_line(c.message, max_len=78), 215 c.author.name)) 216 body += BODY_FOOTER 217 218 # If all the authors are the same, use that as the "From" address. 219 # Otherwise, invent something. 220 authors = set([x.author.email for x in commits]) 221 author = "Verification Team" 222 message_from_address = from_addr 223 if len(authors) == 1: 224 author = commits[0].authors.name 225 message_from_address = as_utf8(commits[0].authors.email) 226 227 # Construct email 228 send_email( 229 from_addr=from_addr, 230 dest_addrs=dest_addrs, 231 headers={ 232 "From": "%s <%s>" % ( 233 encode_unicode_header(author), from_addr), 234 "Reply-To": "%s <%s>" % ( 235 encode_unicode_header(author), 236 encode_unicode_header(message_from_address)), 237 "Subject": encode_unicode_header(subject), 238 }, 239 body="\n".join(body) + "\n", 240 dry_run=dry_run 241 ) 242 243def main(): 244 # Parse arguments. 245 parser = argparse.ArgumentParser( 246 description="Email new commits in a git repository.") 247 parser.add_argument('repo', help="git repository location", metavar='REPO') 248 parser.add_argument('--remote', '-r', 249 help="remote to pull from (default 'origin')", default="origin", type=unicode) 250 parser.add_argument('--verbose', '-v', action="store_true", 251 help="be verbose") 252 parser.add_argument('--mark-only', action="store_true", 253 help="mark commits as emailed, but don't actually send off an email") 254 parser.add_argument('--dry-run', '-n', action="store_true", 255 help="don't do a 'git' fetch, and print emails to standard out") 256 parser.add_argument('--no-fetch', action="store_true", 257 help="don't do a 'git fetch'.") 258 parser.add_argument('--repo-name', help="email subject prefix", type=unicode) 259 parser.add_argument('--to', '-d', help="email address to send to", dest="to_addr", type=unicode) 260 parser.add_argument('--from', '-f', help="email address to send from", dest="from_addr", type=unicode) 261 parser.add_argument('--max-emails', '-M', action="store", 262 help="maximum commit emails before we just send a single email summarising the changes", 263 dest="max_emails", default=MAX_EMAILS_PER_RUN) 264 args = parser.parse_args() 265 266 # Setup verbose debugging if neccessary. 267 global VERBOSE 268 if args.verbose: 269 VERBOSE = True 270 271 # Require to and from unless dry-run or mark-only. 272 if not args.dry_run and not args.mark_only: 273 if args.to_addr == None or args.from_addr == None: 274 parser.error("Require '--to' and '--from' email addresses.") 275 elif args.dry_run: 276 if args.to_addr == None: 277 args.to_addr = "recipient@example.com" 278 if args.from_addr == None: 279 args.from_addr = "sender@example.com" 280 281 # Load git repository. 282 debug("Opening git repository '%s'..." % args.repo) 283 repo = git.Repo(args.repo) 284 285 # Construct a repo name from the path, if one was not provided. 286 if not args.repo_name: 287 args.repo_name = as_utf8(os.path.split(repo.working_dir)[-1]) 288 289 # Acquire a lock; it will be released when our process exits. 290 debug("Locking repository...") 291 file_lock = open(os.path.join(repo.git_dir, ".commit-emails-flock"), "w") 292 fcntl.flock(file_lock, fcntl.LOCK_EX) 293 294 # Fetch from given URL. 295 debug("Fetching from '%s'..." % args.remote) 296 remote = repo.remotes[args.remote] 297 if not args.dry_run and not args.no_fetch: 298 remote.update() 299 300 # Try and find recent commits. 301 commits = {} 302 for ref in remote.refs: 303 for commit in repo.iter_commits(ref.object, max_count=MAX_COMMITS): 304 commits[commit.hexsha] = commit 305 306 # Open up database of commits we have already seen. 307 db = shelve.open(os.path.join(repo.git_dir, "commit-email.db")) 308 try: 309 # Iterate over commits in increasing date order. 310 new_commits = [] 311 for commit in sorted(commits.values(), key=lambda x: x.committed_date): 312 if not (commit.hexsha in db): 313 new_commits.append(commit) 314 debug("Found %d new commit(s)." % len(new_commits)) 315 316 if len(new_commits) > args.max_emails: 317 # Email a bulk message. 318 if not args.mark_only: 319 debug("Sending bulk email with %d commits..." % len(new_commits)) 320 email_bulk_commit(args.from_addr, [args.to_addr], repo, new_commits, 321 repo_name=args.repo_name, dry_run=args.dry_run) 322 if not args.dry_run: 323 for commit in new_commits: 324 db[commit.hexsha] = True 325 db.sync() 326 else: 327 # Email off individual commit messages. 328 for commit in new_commits: 329 if not args.mark_only: 330 debug("Emailing commit %s to %s..." % (commit.hexsha, args.to_addr)) 331 email_commit(args.from_addr, [args.to_addr], repo, remote, commit, 332 repo_name=args.repo_name, dry_run=args.dry_run) 333 if not args.dry_run: 334 db[commit.hexsha] = True 335 db.sync() 336 finally: 337 # Close the database. 338 db.close() 339 340if __name__ == "__main__": 341 main() 342