update-copyright.py revision 1.1.1.1
1#!/usr/bin/python
2#
3# Copyright (C) 2013 Free Software Foundation, Inc.
4#
5# This script is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option)
8# any later version.
9
10# This script adjusts the copyright notices at the top of source files
11# so that they have the form:
12#
13#   Copyright XXXX-YYYY Free Software Foundation, Inc.
14#
15# It doesn't change code that is known to be maintained elsewhere or
16# that carries a non-FSF copyright.
17#
18# The script also doesn't change testsuite files, except those in
19# libstdc++-v3.  This is because libstdc++-v3 has a conformance testsuite,
20# while most tests in other directories are just things that failed at some
21# point in the past.
22#
23# Pass --this-year to the script if you want it to add the current year
24# to all applicable notices.  Pass --quilt if you are using quilt and
25# want files to be added to the quilt before being changed.
26#
27# By default the script will update all directories for which the
28# output has been vetted.  You can instead pass the names of individual
29# directories, including those that haven't been approved.  So:
30#
31#    update-copyright.pl --this-year
32#
33# is the command that would be used at the beginning of a year to update
34# all copyright notices (and possibly at other times to check whether
35# new files have been added with old years).  On the other hand:
36#
37#    update-copyright.pl --this-year libjava
38#
39# would run the script on just libjava/.
40#
41# Note that things like --version output strings must be updated before
42# this script is run.  There's already a separate procedure for that.
43
44import os
45import re
46import sys
47import time
48import subprocess
49
50class Errors:
51    def __init__ (self):
52        self.num_errors = 0
53
54    def report (self, filename, string):
55        if filename:
56            string = filename + ': ' + string
57        sys.stderr.write (string + '\n')
58        self.num_errors += 1
59
60    def ok (self):
61        return self.num_errors == 0
62
63class GenericFilter:
64    def __init__ (self):
65        self.skip_files = set()
66        self.skip_dirs = set()
67        self.skip_extensions = set()
68        self.fossilised_files = set()
69        self.own_files = set()
70
71        self.skip_files |= set ([
72                # Skip licence files.
73                'COPYING',
74                'COPYING.LIB',
75                'COPYING3',
76                'COPYING3.LIB',
77                'LICENSE',
78                'fdl.texi',
79                'gpl_v3.texi',
80                'fdl-1.3.xml',
81                'gpl-3.0.xml',
82
83                # Skip auto- and libtool-related files
84                'aclocal.m4',
85                'compile',
86                'config.guess',
87                'config.sub',
88                'depcomp',
89                'install-sh',
90                'libtool.m4',
91                'ltmain.sh',
92                'ltoptions.m4',
93                'ltsugar.m4',
94                'ltversion.m4',
95                'lt~obsolete.m4',
96                'missing',
97                'mkdep',
98                'mkinstalldirs',
99                'move-if-change',
100                'shlibpath.m4',
101                'symlink-tree',
102                'ylwrap',
103
104                # Skip FSF mission statement, etc.
105                'gnu.texi',
106                'funding.texi',
107                'appendix_free.xml',
108
109                # Skip imported texinfo files.
110                'texinfo.tex',
111                ])
112
113
114    def get_line_filter (self, dir, filename):
115        if filename.startswith ('ChangeLog'):
116            # Ignore references to copyright in changelog entries.
117            return re.compile ('\t')
118
119        return None
120
121    def skip_file (self, dir, filename):
122        if filename in self.skip_files:
123            return True
124
125        (base, extension) = os.path.splitext (os.path.join (dir, filename))
126        if extension in self.skip_extensions:
127            return True
128
129        if extension == '.in':
130            # Skip .in files produced by automake.
131            if os.path.exists (base + '.am'):
132                return True
133
134            # Skip files produced by autogen
135            if (os.path.exists (base + '.def')
136                and os.path.exists (base + '.tpl')):
137                return True
138
139        # Skip configure files produced by autoconf
140        if filename == 'configure':
141            if os.path.exists (base + '.ac'):
142                return True
143            if os.path.exists (base + '.in'):
144                return True
145
146        return False
147
148    def skip_dir (self, dir, subdir):
149        return subdir in self.skip_dirs
150
151    def is_fossilised_file (self, dir, filename):
152        if filename in self.fossilised_files:
153            return True
154        # Only touch current current ChangeLogs.
155        if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156            return True
157        return False
158
159    def by_package_author (self, dir, filename):
160        return filename in self.own_files
161
162class Copyright:
163    def __init__ (self, errors):
164        self.errors = errors
165
166        # Characters in a range of years.  Include '.' for typos.
167        ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
168
169        # Non-whitespace characters in a copyright holder's name.
170        name = '[\w.,-]'
171
172        # Matches one year.
173        self.year_re = re.compile ('[0-9]+')
174
175        # Matches part of a year or copyright holder.
176        self.continuation_re = re.compile (ranges + '|' + name)
177
178        # Matches a full copyright notice:
179        self.copyright_re = re.compile (
180            # 1: 'Copyright (C)', etc.
181            '([Cc]opyright'
182            '|[Cc]opyright\s+\([Cc]\)'
183            '|[Cc]opyright\s+%s'
184            '|[Cc]opyright\s+©'
185            '|[Cc]opyright\s+@copyright{}'
186            '|@set\s+copyright[\w-]+)'
187
188            # 2: the years.  Include the whitespace in the year, so that
189            # we can remove any excess.
190            '(\s*(?:' + ranges + ',?'
191            '|@value\{[^{}]*\})\s*)'
192
193            # 3: 'by ', if used
194            '(by\s+)?'
195
196            # 4: the copyright holder.  Don't allow multiple consecutive
197            # spaces, so that right-margin gloss doesn't get caught
198            # (e.g. gnat_ugn.texi).
199            '(' + name + '(?:\s?' + name + ')*)?')
200
201        # A regexp for notices that might have slipped by.  Just matching
202        # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
203        # HTML header markers, so check for 'copyright' and two digits.
204        self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
205                                              re.IGNORECASE)
206        self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
207        self.holders = { '@copying': '@copying' }
208        self.holder_prefixes = set()
209
210        # True to 'quilt add' files before changing them.
211        self.use_quilt = False
212
213        # If set, force all notices to include this year.
214        self.max_year = None
215
216        # Goes after the year(s).  Could be ', '.
217        self.separator = ' '
218
219    def add_package_author (self, holder, canon_form = None):
220        if not canon_form:
221            canon_form = holder
222        self.holders[holder] = canon_form
223        index = holder.find (' ')
224        while index >= 0:
225            self.holder_prefixes.add (holder[:index])
226            index = holder.find (' ', index + 1)
227
228    def add_external_author (self, holder):
229        self.holders[holder] = None
230
231    class BadYear():
232        def __init__ (self, year):
233            self.year = year
234
235        def __str__ (self):
236            return 'unrecognised year: ' + self.year
237
238    def parse_year (self, string):
239        year = int (string)
240        if len (string) == 2:
241            if year > 70:
242                return year + 1900
243        elif len (string) == 4:
244            return year
245        raise self.BadYear (string)
246
247    def year_range (self, years):
248        year_list = [self.parse_year (year)
249                     for year in self.year_re.findall (years)]
250        assert len (year_list) > 0
251        return (min (year_list), max (year_list))
252
253    def set_use_quilt (self, use_quilt):
254        self.use_quilt = use_quilt
255
256    def include_year (self, year):
257        assert not self.max_year
258        self.max_year = year
259
260    def canonicalise_years (self, dir, filename, filter, years):
261        # Leave texinfo variables alone.
262        if years.startswith ('@value'):
263            return years
264
265        (min_year, max_year) = self.year_range (years)
266
267        # Update the upper bound, if enabled.
268        if self.max_year and not filter.is_fossilised_file (dir, filename):
269            max_year = max (max_year, self.max_year)
270
271        # Use a range.
272        if min_year == max_year:
273            return '%d' % min_year
274        else:
275            return '%d-%d' % (min_year, max_year)
276
277    def strip_continuation (self, line):
278        line = line.lstrip()
279        match = self.comment_re.match (line)
280        if match:
281            line = line[match.end():].lstrip()
282        return line
283
284    def is_complete (self, match):
285        holder = match.group (4)
286        return (holder
287                and (holder not in self.holder_prefixes
288                     or holder in self.holders))
289
290    def update_copyright (self, dir, filename, filter, file, line, match):
291        orig_line = line
292        next_line = None
293        pathname = os.path.join (dir, filename)
294
295        intro = match.group (1)
296        if intro.startswith ('@set'):
297            # Texinfo year variables should always be on one line
298            after_years = line[match.end (2):].strip()
299            if after_years != '':
300                self.errors.report (pathname,
301                                    'trailing characters in @set: '
302                                    + after_years)
303                return (False, orig_line, next_line)
304        else:
305            # If it looks like the copyright is incomplete, add the next line.
306            while not self.is_complete (match):
307                try:
308                    next_line = file.next()
309                except StopIteration:
310                    break
311
312                # If the next line doesn't look like a proper continuation,
313                # assume that what we've got is complete.
314                continuation = self.strip_continuation (next_line)
315                if not self.continuation_re.match (continuation):
316                    break
317
318                # Merge the lines for matching purposes.
319                orig_line += next_line
320                line = line.rstrip() + ' ' + continuation
321                next_line = None
322
323                # Rematch with the longer line, at the original position.
324                match = self.copyright_re.match (line, match.start())
325                assert match
326
327            holder = match.group (4)
328
329            # Use the filter to test cases where markup is getting in the way.
330            if filter.by_package_author (dir, filename):
331                assert holder not in self.holders
332
333            elif not holder:
334                self.errors.report (pathname, 'missing copyright holder')
335                return (False, orig_line, next_line)
336
337            elif holder not in self.holders:
338                self.errors.report (pathname,
339                                    'unrecognised copyright holder: ' + holder)
340                return (False, orig_line, next_line)
341
342            else:
343                # See whether the copyright is associated with the package
344                # author.
345                canon_form = self.holders[holder]
346                if not canon_form:
347                    return (False, orig_line, next_line)
348
349                # Make sure the author is given in a consistent way.
350                line = (line[:match.start (4)]
351                        + canon_form
352                        + line[match.end (4):])
353
354                # Remove any 'by'
355                line = line[:match.start (3)] + line[match.end (3):]
356
357        # Update the copyright years.
358        years = match.group (2).strip()
359        try:
360            canon_form = self.canonicalise_years (dir, filename, filter, years)
361        except self.BadYear as e:
362            self.errors.report (pathname, str (e))
363            return (False, orig_line, next_line)
364
365        line = (line[:match.start (2)]
366                + ' ' + canon_form + self.separator
367                + line[match.end (2):])
368
369        # Use the standard (C) form.
370        if intro.endswith ('right'):
371            intro += ' (C)'
372        elif intro.endswith ('(c)'):
373            intro = intro[:-3] + '(C)'
374        line = line[:match.start (1)] + intro + line[match.end (1):]
375
376        # Strip trailing whitespace
377        line = line.rstrip() + '\n'
378
379        return (line != orig_line, line, next_line)
380
381    def process_file (self, dir, filename, filter):
382        pathname = os.path.join (dir, filename)
383        if filename.endswith ('.tmp'):
384            # Looks like something we tried to create before.
385            try:
386                os.remove (pathname)
387            except OSError:
388                pass
389            return
390
391        lines = []
392        changed = False
393        line_filter = filter.get_line_filter (dir, filename)
394        with open (pathname, 'r') as file:
395            prev = None
396            for line in file:
397                while line:
398                    next_line = None
399                    # Leave filtered-out lines alone.
400                    if not (line_filter and line_filter.match (line)):
401                        match = self.copyright_re.search (line)
402                        if match:
403                            res = self.update_copyright (dir, filename, filter,
404                                                         file, line, match)
405                            (this_changed, line, next_line) = res
406                            changed = changed or this_changed
407
408                        # Check for copyright lines that might have slipped by.
409                        elif self.other_copyright_re.search (line):
410                            self.errors.report (pathname,
411                                                'unrecognised copyright: %s'
412                                                % line.strip())
413                    lines.append (line)
414                    line = next_line
415
416        # If something changed, write the new file out.
417        if changed and self.errors.ok():
418            tmp_pathname = pathname + '.tmp'
419            with open (tmp_pathname, 'w') as file:
420                for line in lines:
421                    file.write (line)
422            if self.use_quilt:
423                subprocess.call (['quilt', 'add', pathname])
424            os.rename (tmp_pathname, pathname)
425
426    def process_tree (self, tree, filter):
427        for (dir, subdirs, filenames) in os.walk (tree):
428            # Don't recurse through directories that should be skipped.
429            for i in xrange (len (subdirs) - 1, -1, -1):
430                if filter.skip_dir (dir, subdirs[i]):
431                    del subdirs[i]
432
433            # Handle the files in this directory.
434            for filename in filenames:
435                if filter.skip_file (dir, filename):
436                    sys.stdout.write ('Skipping %s\n'
437                                      % os.path.join (dir, filename))
438                else:
439                    self.process_file (dir, filename, filter)
440
441class CmdLine:
442    def __init__ (self, copyright = Copyright):
443        self.errors = Errors()
444        self.copyright = copyright (self.errors)
445        self.dirs = []
446        self.default_dirs = []
447        self.chosen_dirs = []
448        self.option_handlers = dict()
449        self.option_help = []
450
451        self.add_option ('--help', 'Print this help', self.o_help)
452        self.add_option ('--quilt', '"quilt add" files before changing them',
453                         self.o_quilt)
454        self.add_option ('--this-year', 'Add the current year to every notice',
455                         self.o_this_year)
456
457    def add_option (self, name, help, handler):
458        self.option_help.append ((name, help))
459        self.option_handlers[name] = handler
460
461    def add_dir (self, dir, filter = GenericFilter()):
462        self.dirs.append ((dir, filter))
463
464    def o_help (self, option = None):
465        sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
466                          'Options:\n' % sys.argv[0])
467        format = '%-15s %s\n'
468        for (what, help) in self.option_help:
469            sys.stdout.write (format % (what, help))
470        sys.stdout.write ('\nDirectories:\n')
471
472        format = '%-25s'
473        i = 0
474        for (dir, filter) in self.dirs:
475            i += 1
476            if i % 3 == 0 or i == len (self.dirs):
477                sys.stdout.write (dir + '\n')
478            else:
479                sys.stdout.write (format % dir)
480        sys.exit (0)
481
482    def o_quilt (self, option):
483        self.copyright.set_use_quilt (True)
484
485    def o_this_year (self, option):
486        self.copyright.include_year (time.localtime().tm_year)
487
488    def main (self):
489        for arg in sys.argv[1:]:
490            if arg[:1] != '-':
491                self.chosen_dirs.append (arg)
492            elif arg in self.option_handlers:
493                self.option_handlers[arg] (arg)
494            else:
495                self.errors.report (None, 'unrecognised option: ' + arg)
496        if self.errors.ok():
497            if len (self.chosen_dirs) == 0:
498                self.chosen_dirs = self.default_dirs
499            if len (self.chosen_dirs) == 0:
500                self.o_help()
501            else:
502                for chosen_dir in self.chosen_dirs:
503                    canon_dir = os.path.join (chosen_dir, '')
504                    count = 0
505                    for (dir, filter) in self.dirs:
506                        if (dir + os.sep).startswith (canon_dir):
507                            count += 1
508                            self.copyright.process_tree (dir, filter)
509                    if count == 0:
510                        self.errors.report (None, 'unrecognised directory: '
511                                            + chosen_dir)
512        sys.exit (0 if self.errors.ok() else 1)
513
514#----------------------------------------------------------------------------
515
516class TopLevelFilter (GenericFilter):
517    def skip_dir (self, dir, subdir):
518        return True
519
520class ConfigFilter (GenericFilter):
521    def __init__ (self):
522        GenericFilter.__init__ (self)
523
524    def skip_file (self, dir, filename):
525        if filename.endswith ('.m4'):
526            pathname = os.path.join (dir, filename)
527            with open (pathname) as file:
528                # Skip files imported from gettext.
529                if file.readline().find ('gettext-') >= 0:
530                    return True
531        return GenericFilter.skip_file (self, dir, filename)
532
533class GCCFilter (GenericFilter):
534    def __init__ (self):
535        GenericFilter.__init__ (self)
536
537        self.skip_files |= set ([
538                # Not part of GCC
539                'math-68881.h',
540                ])
541
542        self.skip_dirs |= set ([
543                # Better not create a merge nightmare for the GNAT folks.
544                'ada',
545
546                # Handled separately.
547                'testsuite',
548                ])
549
550        self.skip_extensions |= set ([
551                # Maintained by the translation project.
552                '.po',
553
554                # Automatically-generated.
555                '.pot',
556                ])
557
558        self.fossilised_files |= set ([
559                # Old news won't be updated.
560                'ONEWS',
561                ])
562
563class TestsuiteFilter (GenericFilter):
564    def __init__ (self):
565        GenericFilter.__init__ (self)
566
567        self.skip_extensions |= set ([
568                # Don't change the tests, which could be woend by anyone.
569                '.c',
570                '.C',
571                '.cc',
572                '.h',
573                '.hs',
574                '.f',
575                '.f90',
576                '.go',
577                '.inc',
578                '.java',
579                ])
580
581    def skip_file (self, dir, filename):
582        # g++.niklas/README contains historical copyright information
583        # and isn't updated.
584        if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
585            return True
586        return GenericFilter.skip_file (self, dir, filename)
587
588class LibCppFilter (GenericFilter):
589    def __init__ (self):
590        GenericFilter.__init__ (self)
591
592        self.skip_extensions |= set ([
593                # Maintained by the translation project.
594                '.po',
595
596                # Automatically-generated.
597                '.pot',
598                ])
599
600class LibGCCFilter (GenericFilter):
601    def __init__ (self):
602        GenericFilter.__init__ (self)
603
604        self.skip_dirs |= set ([
605                # Imported from GLIBC.
606                'soft-fp',
607                ])
608
609class LibJavaFilter (GenericFilter):
610    def __init__ (self):
611        GenericFilter.__init__ (self)
612
613        self.skip_dirs |= set ([
614                # Handled separately.
615                'testsuite',
616
617                # Not really part of the library
618                'contrib',
619
620                # Imported from upstream
621                'classpath',
622                'libltdl',
623                ])
624
625    def get_line_filter (self, dir, filename):
626        if filename == 'NameDecoder.h':
627            return re.compile ('.*NAME_COPYRIGHT')
628        if filename == 'ICC_Profile.h':
629            return re.compile ('.*icSigCopyrightTag')
630        return GenericFilter.get_line_filter (self, dir, filename)
631
632class LibMudflapFilter (GenericFilter):
633    def __init__ (self):
634        GenericFilter.__init__ (self)
635
636        self.skip_dirs |= set ([
637                # Handled separately.
638                'testsuite',
639                ])
640
641class LibStdCxxFilter (GenericFilter):
642    def __init__ (self):
643        GenericFilter.__init__ (self)
644
645        self.skip_files |= set ([
646                # Contains no copyright of its own, but quotes the GPL.
647                'intro.xml',
648                ])
649
650        self.skip_dirs |= set ([
651                # Contains automatically-generated sources.
652                'html',
653
654                # The testsuite data files shouldn't be changed.
655                'data',
656
657                # Contains imported images
658                'images',
659                ])
660
661        self.own_files |= set ([
662                # Contains markup around the copyright owner.
663                'spine.xml',
664                ])
665
666    def get_line_filter (self, dir, filename):
667        if filename == 'boost_concept_check.h':
668            return re.compile ('// \(C\) Copyright Jeremy Siek')
669        return GenericFilter.get_line_filter (self, dir, filename)
670
671class GCCCopyright (Copyright):
672    def __init__ (self, errors):
673        Copyright.__init__ (self, errors)
674
675        canon_fsf = 'Free Software Foundation, Inc.'
676        self.add_package_author ('Free Software Foundation', canon_fsf)
677        self.add_package_author ('Free Software Foundation.', canon_fsf)
678        self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
679        self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
680        self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
681        self.add_package_author ('The Free Software Foundation', canon_fsf)
682        self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
683        self.add_package_author ('Software Foundation, Inc.', canon_fsf)
684
685        self.add_external_author ('ARM')
686        self.add_external_author ('AdaCore')
687        self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
688        self.add_external_author ('Cavium Networks.')
689        self.add_external_author ('Faraday Technology Corp.')
690        self.add_external_author ('Florida State University')
691        self.add_external_author ('Greg Colvin and Beman Dawes.')
692        self.add_external_author ('Hewlett-Packard Company')
693        self.add_external_author ('Information Technology Industry Council.')
694        self.add_external_author ('James Theiler, Brian Gough')
695        self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
696        self.add_external_author ('National Research Council of Canada.')
697        self.add_external_author ('Peter Dimov and Multi Media Ltd.')
698        self.add_external_author ('Peter Dimov')
699        self.add_external_author ('Pipeline Associates, Inc.')
700        self.add_external_author ('Regents of the University of California.')
701        self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
702        self.add_external_author ('Silicon Graphics')
703        self.add_external_author ('Stephen L. Moshier')
704        self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
705        self.add_external_author ('The Go Authors.  All rights reserved.')
706        self.add_external_author ('The Go Authors. All rights reserved.')
707        self.add_external_author ('The Go Authors.')
708        self.add_external_author ('The Regents of the University of California.')
709        self.add_external_author ('Unicode, Inc.')
710        self.add_external_author ('University of Toronto.')
711
712class GCCCmdLine (CmdLine):
713    def __init__ (self):
714        CmdLine.__init__ (self, GCCCopyright)
715
716        self.add_dir ('.', TopLevelFilter())
717        # boehm-gc is imported from upstream.
718        self.add_dir ('config', ConfigFilter())
719        # contrib isn't really part of GCC.
720        self.add_dir ('fixincludes')
721        self.add_dir ('gcc', GCCFilter())
722        self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
723        self.add_dir ('gnattools')
724        self.add_dir ('include')
725        self.add_dir ('libada')
726        self.add_dir ('libatomic')
727        self.add_dir ('libbacktrace')
728        self.add_dir ('libcpp', LibCppFilter())
729        self.add_dir ('libdecnumber')
730        # libffi is imported from upstream.
731        self.add_dir ('libgcc', LibGCCFilter())
732        self.add_dir ('libgfortran')
733        self.add_dir ('libgomp')
734        self.add_dir ('libiberty')
735        self.add_dir ('libitm')
736        self.add_dir ('libjava', LibJavaFilter())
737        self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter())
738        self.add_dir ('libmudflap', LibMudflapFilter())
739        self.add_dir (os.path.join ('libmudflap', 'testsuite'),
740                      TestsuiteFilter())
741        self.add_dir ('libobjc')
742        self.add_dir ('libquadmath')
743        # libsanitiser is imported from upstream.
744        self.add_dir ('libssp')
745        self.add_dir ('libstdc++-v3', LibStdCxxFilter())
746        self.add_dir ('lto-plugin')
747        # zlib is imported from upstream.
748
749        self.default_dirs = [
750            'gcc',
751            'libada',
752            'libatomic',
753            'libbacktrace',
754            'libcpp',
755            'libdecnumber',
756            'libgcc',
757            'libgfortran',
758            'libgomp',
759            'libitm',
760            'libmudflap',
761            'libobjc',
762            'libstdc++-v3',
763            ]
764
765GCCCmdLine().main()
766