1#!/usr/bin/python
2#
3# Copyright (C) 2013-2019 Free Software Foundation, Inc.
4#
5# This script is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option)
8# any later version.
9
10# This script adjusts the copyright notices at the top of source files
11# so that they have the form:
12#
13#   Copyright XXXX-YYYY Free Software Foundation, Inc.
14#
15# It doesn't change code that is known to be maintained elsewhere or
16# that carries a non-FSF copyright.
17#
18# Pass --this-year to the script if you want it to add the current year
19# to all applicable notices.  Pass --quilt if you are using quilt and
20# want files to be added to the quilt before being changed.
21#
22# By default the script will update all directories for which the
23# output has been vetted.  You can instead pass the names of individual
24# directories, including those that haven't been approved.  So:
25#
26#    update-copyright.pl --this-year
27#
28# is the command that would be used at the beginning of a year to update
29# all copyright notices (and possibly at other times to check whether
30# new files have been added with old years).  On the other hand:
31#
32#    update-copyright.pl --this-year libjava
33#
34# would run the script on just libjava/.
35#
36# This script was copied from gcc's contrib/ and modified to suit
37# binutils.  In contrast to the gcc script, this one will update
38# the testsuite and --version output strings too.
39
40import os
41import re
42import sys
43import time
44import subprocess
45
46class Errors:
47    def __init__ (self):
48        self.num_errors = 0
49
50    def report (self, filename, string):
51        if filename:
52            string = filename + ': ' + string
53        sys.stderr.write (string + '\n')
54        self.num_errors += 1
55
56    def ok (self):
57        return self.num_errors == 0
58
59class GenericFilter:
60    def __init__ (self):
61        self.skip_files = set()
62        self.skip_dirs = set()
63        self.skip_extensions = set()
64        self.fossilised_files = set()
65        self.own_files = set()
66
67        self.skip_files |= set ([
68                # Skip licence files.
69                'COPYING',
70                'COPYING.LIB',
71                'COPYING3',
72                'COPYING3.LIB',
73                'COPYING.LIBGLOSS',
74                'COPYING.NEWLIB',
75                'LICENSE',
76                'fdl.texi',
77                'gpl_v3.texi',
78                'fdl-1.3.xml',
79                'gpl-3.0.xml',
80
81                # Skip auto- and libtool-related files
82                'aclocal.m4',
83                'compile',
84                'config.guess',
85                'config.sub',
86                'depcomp',
87                'install-sh',
88                'libtool.m4',
89                'ltmain.sh',
90                'ltoptions.m4',
91                'ltsugar.m4',
92                'ltversion.m4',
93                'lt~obsolete.m4',
94                'missing',
95                'mkdep',
96                'mkinstalldirs',
97                'move-if-change',
98                'shlibpath.m4',
99                'symlink-tree',
100                'ylwrap',
101
102                # Skip FSF mission statement, etc.
103                'gnu.texi',
104                'funding.texi',
105                'appendix_free.xml',
106
107                # Skip imported texinfo files.
108                'texinfo.tex',
109                ])
110
111        self.skip_extensions |= set ([
112                # Maintained by the translation project.
113                '.po',
114
115                # Automatically-generated.
116                '.pot',
117                ])
118
119        self.skip_dirs |= set ([
120                'autom4te.cache',
121                ])
122
123
124    def get_line_filter (self, dir, filename):
125        if filename.startswith ('ChangeLog'):
126            # Ignore references to copyright in changelog entries.
127            return re.compile ('\t')
128
129        return None
130
131    def skip_file (self, dir, filename):
132        if filename in self.skip_files:
133            return True
134
135        (base, extension) = os.path.splitext (os.path.join (dir, filename))
136        if extension in self.skip_extensions:
137            return True
138
139        if extension == '.in':
140            # Skip .in files produced by automake.
141            if os.path.exists (base + '.am'):
142                return True
143
144            # Skip files produced by autogen
145            if (os.path.exists (base + '.def')
146                and os.path.exists (base + '.tpl')):
147                return True
148
149        # Skip configure files produced by autoconf
150        if filename == 'configure':
151            if os.path.exists (base + '.ac'):
152                return True
153            if os.path.exists (base + '.in'):
154                return True
155
156        return False
157
158    def skip_dir (self, dir, subdir):
159        return subdir in self.skip_dirs
160
161    def is_fossilised_file (self, dir, filename):
162        if filename in self.fossilised_files:
163            return True
164        # Only touch current current ChangeLogs.
165        if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
166            return True
167        return False
168
169    def by_package_author (self, dir, filename):
170        return filename in self.own_files
171
172class Copyright:
173    def __init__ (self, errors):
174        self.errors = errors
175
176        # Characters in a range of years.  Include '.' for typos.
177        ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
178
179        # Non-whitespace characters in a copyright holder's name.
180        name = '[\w.,-]'
181
182        # Matches one year.
183        self.year_re = re.compile ('[0-9]+')
184
185        # Matches part of a year or copyright holder.
186        self.continuation_re = re.compile (ranges + '|' + name)
187
188        # Matches a full copyright notice:
189        self.copyright_re = re.compile (
190            # 1: 'Copyright (C)', etc.
191            '([Cc]opyright'
192            '|[Cc]opyright\s+\([Cc]\)'
193            '|[Cc]opyright\s+%s'
194            '|[Cc]opyright\s+©'
195            '|[Cc]opyright\s+@copyright{}'
196            '|@set\s+copyright[\w-]+)'
197
198            # 2: the years.  Include the whitespace in the year, so that
199            # we can remove any excess.
200            '(\s*(?:' + ranges + ',?'
201            '|@value\{[^{}]*\})\s*)'
202
203            # 3: 'by ', if used
204            '(by\s+)?'
205
206            # 4: the copyright holder.  Don't allow multiple consecutive
207            # spaces, so that right-margin gloss doesn't get caught
208            # (e.g. gnat_ugn.texi).
209            '(' + name + '(?:\s?' + name + ')*)?')
210
211        # A regexp for notices that might have slipped by.  Just matching
212        # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
213        # HTML header markers, so check for 'copyright' and two digits.
214        self.other_copyright_re = re.compile ('(^|[^\._])copyright[^=]*[0-9][0-9]',
215                                              re.IGNORECASE)
216        self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
217        self.holders = { '@copying': '@copying' }
218        self.holder_prefixes = set()
219
220        # True to 'quilt add' files before changing them.
221        self.use_quilt = False
222
223        # If set, force all notices to include this year.
224        self.max_year = None
225
226        # Goes after the year(s).  Could be ', '.
227        self.separator = ' '
228
229    def add_package_author (self, holder, canon_form = None):
230        if not canon_form:
231            canon_form = holder
232        self.holders[holder] = canon_form
233        index = holder.find (' ')
234        while index >= 0:
235            self.holder_prefixes.add (holder[:index])
236            index = holder.find (' ', index + 1)
237
238    def add_external_author (self, holder):
239        self.holders[holder] = None
240
241    class BadYear():
242        def __init__ (self, year):
243            self.year = year
244
245        def __str__ (self):
246            return 'unrecognised year: ' + self.year
247
248    def parse_year (self, string):
249        year = int (string)
250        if len (string) == 2:
251            if year > 70:
252                return year + 1900
253        elif len (string) == 4:
254            return year
255        raise self.BadYear (string)
256
257    def year_range (self, years):
258        year_list = [self.parse_year (year)
259                     for year in self.year_re.findall (years)]
260        assert len (year_list) > 0
261        return (min (year_list), max (year_list))
262
263    def set_use_quilt (self, use_quilt):
264        self.use_quilt = use_quilt
265
266    def include_year (self, year):
267        assert not self.max_year
268        self.max_year = year
269
270    def canonicalise_years (self, dir, filename, filter, years):
271        # Leave texinfo variables alone.
272        if years.startswith ('@value'):
273            return years
274
275        (min_year, max_year) = self.year_range (years)
276
277        # Update the upper bound, if enabled.
278        if self.max_year and not filter.is_fossilised_file (dir, filename):
279            max_year = max (max_year, self.max_year)
280
281        # Use a range.
282        if min_year == max_year:
283            return '%d' % min_year
284        else:
285            return '%d-%d' % (min_year, max_year)
286
287    def strip_continuation (self, line):
288        line = line.lstrip()
289        match = self.comment_re.match (line)
290        if match:
291            line = line[match.end():].lstrip()
292        return line
293
294    def is_complete (self, match):
295        holder = match.group (4)
296        return (holder
297                and (holder not in self.holder_prefixes
298                     or holder in self.holders))
299
300    def update_copyright (self, dir, filename, filter, file, line, match):
301        orig_line = line
302        next_line = None
303        pathname = os.path.join (dir, filename)
304
305        intro = match.group (1)
306        if intro.startswith ('@set'):
307            # Texinfo year variables should always be on one line
308            after_years = line[match.end (2):].strip()
309            if after_years != '':
310                self.errors.report (pathname,
311                                    'trailing characters in @set: '
312                                    + after_years)
313                return (False, orig_line, next_line)
314        else:
315            # If it looks like the copyright is incomplete, add the next line.
316            while not self.is_complete (match):
317                try:
318                    next_line = file.next()
319                except StopIteration:
320                    break
321
322                # If the next line doesn't look like a proper continuation,
323                # assume that what we've got is complete.
324                continuation = self.strip_continuation (next_line)
325                if not self.continuation_re.match (continuation):
326                    break
327
328                # Merge the lines for matching purposes.
329                orig_line += next_line
330                line = line.rstrip() + ' ' + continuation
331                next_line = None
332
333                # Rematch with the longer line, at the original position.
334                match = self.copyright_re.match (line, match.start())
335                assert match
336
337            holder = match.group (4)
338
339            # Use the filter to test cases where markup is getting in the way.
340            if filter.by_package_author (dir, filename):
341                assert holder not in self.holders
342
343            elif not holder:
344                self.errors.report (pathname, 'missing copyright holder')
345                return (False, orig_line, next_line)
346
347            elif holder not in self.holders:
348                self.errors.report (pathname,
349                                    'unrecognised copyright holder: ' + holder)
350                return (False, orig_line, next_line)
351
352            else:
353                # See whether the copyright is associated with the package
354                # author.
355                canon_form = self.holders[holder]
356                if not canon_form:
357                    return (False, orig_line, next_line)
358
359                # Make sure the author is given in a consistent way.
360                line = (line[:match.start (4)]
361                        + canon_form
362                        + line[match.end (4):])
363
364                # Remove any 'by'
365                line = line[:match.start (3)] + line[match.end (3):]
366
367        # Update the copyright years.
368        years = match.group (2).strip()
369        if (self.max_year
370            and match.start(0) > 0 and line[match.start(0)-1] == '"'
371            and not filter.is_fossilised_file (dir, filename)):
372            # A printed copyright date consists of the current year
373            canon_form = '%d' % self.max_year
374        else:
375            try:
376                canon_form = self.canonicalise_years (dir, filename, filter, years)
377            except self.BadYear as e:
378                self.errors.report (pathname, str (e))
379                return (False, orig_line, next_line)
380
381        line = (line[:match.start (2)]
382                + ' ' + canon_form + self.separator
383                + line[match.end (2):])
384
385        # Use the standard (C) form.
386        if intro.endswith ('right'):
387            intro += ' (C)'
388        elif intro.endswith ('(c)'):
389            intro = intro[:-3] + '(C)'
390        line = line[:match.start (1)] + intro + line[match.end (1):]
391
392        # Strip trailing whitespace
393        line = line.rstrip() + '\n'
394
395        return (line != orig_line, line, next_line)
396
397    def process_file (self, dir, filename, filter):
398        pathname = os.path.join (dir, filename)
399        if filename.endswith ('.tmp'):
400            # Looks like something we tried to create before.
401            try:
402                os.remove (pathname)
403            except OSError:
404                pass
405            return
406
407        lines = []
408        changed = False
409        line_filter = filter.get_line_filter (dir, filename)
410        with open (pathname, 'r') as file:
411            prev = None
412            for line in file:
413                while line:
414                    next_line = None
415                    # Leave filtered-out lines alone.
416                    if not (line_filter and line_filter.match (line)):
417                        match = self.copyright_re.search (line)
418                        if match:
419                            res = self.update_copyright (dir, filename, filter,
420                                                         file, line, match)
421                            (this_changed, line, next_line) = res
422                            changed = changed or this_changed
423
424                        # Check for copyright lines that might have slipped by.
425                        elif self.other_copyright_re.search (line):
426                            self.errors.report (pathname,
427                                                'unrecognised copyright: %s'
428                                                % line.strip())
429                    lines.append (line)
430                    line = next_line
431
432        # If something changed, write the new file out.
433        if changed and self.errors.ok():
434            tmp_pathname = pathname + '.tmp'
435            with open (tmp_pathname, 'w') as file:
436                for line in lines:
437                    file.write (line)
438            if self.use_quilt:
439                subprocess.call (['quilt', 'add', pathname])
440            os.rename (tmp_pathname, pathname)
441
442    def process_tree (self, tree, filter):
443        for (dir, subdirs, filenames) in os.walk (tree):
444            # Don't recurse through directories that should be skipped.
445            for i in xrange (len (subdirs) - 1, -1, -1):
446                if filter.skip_dir (dir, subdirs[i]):
447                    del subdirs[i]
448
449            # Handle the files in this directory.
450            for filename in filenames:
451                if filter.skip_file (dir, filename):
452                    sys.stdout.write ('Skipping %s\n'
453                                      % os.path.join (dir, filename))
454                else:
455                    self.process_file (dir, filename, filter)
456
457class CmdLine:
458    def __init__ (self, copyright = Copyright):
459        self.errors = Errors()
460        self.copyright = copyright (self.errors)
461        self.dirs = []
462        self.default_dirs = []
463        self.chosen_dirs = []
464        self.option_handlers = dict()
465        self.option_help = []
466
467        self.add_option ('--help', 'Print this help', self.o_help)
468        self.add_option ('--quilt', '"quilt add" files before changing them',
469                         self.o_quilt)
470        self.add_option ('--this-year', 'Add the current year to every notice',
471                         self.o_this_year)
472
473    def add_option (self, name, help, handler):
474        self.option_help.append ((name, help))
475        self.option_handlers[name] = handler
476
477    def add_dir (self, dir, filter = GenericFilter()):
478        self.dirs.append ((dir, filter))
479
480    def o_help (self, option = None):
481        sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
482                          'Options:\n' % sys.argv[0])
483        format = '%-15s %s\n'
484        for (what, help) in self.option_help:
485            sys.stdout.write (format % (what, help))
486        sys.stdout.write ('\nDirectories:\n')
487
488        format = '%-25s'
489        i = 0
490        for (dir, filter) in self.dirs:
491            i += 1
492            if i % 3 == 0 or i == len (self.dirs):
493                sys.stdout.write (dir + '\n')
494            else:
495                sys.stdout.write (format % dir)
496        sys.exit (0)
497
498    def o_quilt (self, option):
499        self.copyright.set_use_quilt (True)
500
501    def o_this_year (self, option):
502        self.copyright.include_year (time.localtime().tm_year)
503
504    def main (self):
505        for arg in sys.argv[1:]:
506            if arg[:1] != '-':
507                self.chosen_dirs.append (arg)
508            elif arg in self.option_handlers:
509                self.option_handlers[arg] (arg)
510            else:
511                self.errors.report (None, 'unrecognised option: ' + arg)
512        if self.errors.ok():
513            if len (self.chosen_dirs) == 0:
514                self.chosen_dirs = self.default_dirs
515            if len (self.chosen_dirs) == 0:
516                self.o_help()
517            else:
518                for chosen_dir in self.chosen_dirs:
519                    canon_dir = os.path.join (chosen_dir, '')
520                    count = 0
521                    for (dir, filter) in self.dirs:
522                        if (dir + os.sep).startswith (canon_dir):
523                            count += 1
524                            self.copyright.process_tree (dir, filter)
525                    if count == 0:
526                        self.errors.report (None, 'unrecognised directory: '
527                                            + chosen_dir)
528        sys.exit (0 if self.errors.ok() else 1)
529
530#----------------------------------------------------------------------------
531
532class TopLevelFilter (GenericFilter):
533    def skip_dir (self, dir, subdir):
534        return True
535
536class ConfigFilter (GenericFilter):
537    def __init__ (self):
538        GenericFilter.__init__ (self)
539
540    def skip_file (self, dir, filename):
541        if filename.endswith ('.m4'):
542            pathname = os.path.join (dir, filename)
543            with open (pathname) as file:
544                # Skip files imported from gettext.
545                if file.readline().find ('gettext-') >= 0:
546                    return True
547        return GenericFilter.skip_file (self, dir, filename)
548
549class LdFilter (GenericFilter):
550    def __init__ (self):
551        GenericFilter.__init__ (self)
552
553        self.skip_extensions |= set ([
554                # ld testsuite output match files.
555                '.ro',
556                ])
557
558class BinutilsCopyright (Copyright):
559    def __init__ (self, errors):
560        Copyright.__init__ (self, errors)
561
562        canon_fsf = 'Free Software Foundation, Inc.'
563        self.add_package_author ('Free Software Foundation', canon_fsf)
564        self.add_package_author ('Free Software Foundation.', canon_fsf)
565        self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
566        self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
567        self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
568        self.add_package_author ('The Free Software Foundation', canon_fsf)
569        self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
570        self.add_package_author ('Software Foundation, Inc.', canon_fsf)
571
572        self.add_external_author ('Carnegie Mellon University')
573        self.add_external_author ('John D. Polstra.')
574        self.add_external_author ('Linaro Ltd.')
575        self.add_external_author ('MIPS Computer Systems, Inc.')
576        self.add_external_author ('Red Hat Inc.')
577        self.add_external_author ('Regents of the University of California.')
578        self.add_external_author ('The Regents of the University of California.')
579        self.add_external_author ('Third Eye Software, Inc.')
580        self.add_external_author ('Ulrich Drepper')
581        self.add_external_author ('Synopsys Inc.')
582
583class BinutilsCmdLine (CmdLine):
584    def __init__ (self):
585        CmdLine.__init__ (self, BinutilsCopyright)
586
587        self.add_dir ('.', TopLevelFilter())
588        self.add_dir ('bfd')
589        self.add_dir ('binutils')
590        self.add_dir ('config', ConfigFilter())
591        self.add_dir ('cpu')
592        self.add_dir ('elfcpp')
593        self.add_dir ('etc')
594        self.add_dir ('gas')
595        self.add_dir ('gdb')
596        self.add_dir ('gold')
597        self.add_dir ('gprof')
598        self.add_dir ('include')
599        self.add_dir ('ld', LdFilter())
600        self.add_dir ('libdecnumber')
601        self.add_dir ('libiberty')
602        self.add_dir ('opcodes')
603        self.add_dir ('readline')
604        self.add_dir ('sim')
605
606        self.default_dirs = [
607            'bfd',
608            'binutils',
609            'elfcpp',
610            'etc',
611            'gas',
612            'gold',
613            'gprof',
614            'include',
615            'ld',
616            'libiberty',
617            'opcodes',
618            ]
619
620BinutilsCmdLine().main()
621