update-copyright.py revision 1.1.1.1
1#!/usr/bin/python 2# 3# Copyright (C) 2013 Free Software Foundation, Inc. 4# 5# This script is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 3, or (at your option) 8# any later version. 9 10# This script adjusts the copyright notices at the top of source files 11# so that they have the form: 12# 13# Copyright XXXX-YYYY Free Software Foundation, Inc. 14# 15# It doesn't change code that is known to be maintained elsewhere or 16# that carries a non-FSF copyright. 17# 18# The script also doesn't change testsuite files, except those in 19# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite, 20# while most tests in other directories are just things that failed at some 21# point in the past. 22# 23# Pass --this-year to the script if you want it to add the current year 24# to all applicable notices. Pass --quilt if you are using quilt and 25# want files to be added to the quilt before being changed. 26# 27# By default the script will update all directories for which the 28# output has been vetted. You can instead pass the names of individual 29# directories, including those that haven't been approved. So: 30# 31# update-copyright.pl --this-year 32# 33# is the command that would be used at the beginning of a year to update 34# all copyright notices (and possibly at other times to check whether 35# new files have been added with old years). On the other hand: 36# 37# update-copyright.pl --this-year libjava 38# 39# would run the script on just libjava/. 40# 41# Note that things like --version output strings must be updated before 42# this script is run. There's already a separate procedure for that. 43 44import os 45import re 46import sys 47import time 48import subprocess 49 50class Errors: 51 def __init__ (self): 52 self.num_errors = 0 53 54 def report (self, filename, string): 55 if filename: 56 string = filename + ': ' + string 57 sys.stderr.write (string + '\n') 58 self.num_errors += 1 59 60 def ok (self): 61 return self.num_errors == 0 62 63class GenericFilter: 64 def __init__ (self): 65 self.skip_files = set() 66 self.skip_dirs = set() 67 self.skip_extensions = set() 68 self.fossilised_files = set() 69 self.own_files = set() 70 71 self.skip_files |= set ([ 72 # Skip licence files. 73 'COPYING', 74 'COPYING.LIB', 75 'COPYING3', 76 'COPYING3.LIB', 77 'LICENSE', 78 'fdl.texi', 79 'gpl_v3.texi', 80 'fdl-1.3.xml', 81 'gpl-3.0.xml', 82 83 # Skip auto- and libtool-related files 84 'aclocal.m4', 85 'compile', 86 'config.guess', 87 'config.sub', 88 'depcomp', 89 'install-sh', 90 'libtool.m4', 91 'ltmain.sh', 92 'ltoptions.m4', 93 'ltsugar.m4', 94 'ltversion.m4', 95 'lt~obsolete.m4', 96 'missing', 97 'mkdep', 98 'mkinstalldirs', 99 'move-if-change', 100 'shlibpath.m4', 101 'symlink-tree', 102 'ylwrap', 103 104 # Skip FSF mission statement, etc. 105 'gnu.texi', 106 'funding.texi', 107 'appendix_free.xml', 108 109 # Skip imported texinfo files. 110 'texinfo.tex', 111 ]) 112 113 114 def get_line_filter (self, dir, filename): 115 if filename.startswith ('ChangeLog'): 116 # Ignore references to copyright in changelog entries. 117 return re.compile ('\t') 118 119 return None 120 121 def skip_file (self, dir, filename): 122 if filename in self.skip_files: 123 return True 124 125 (base, extension) = os.path.splitext (os.path.join (dir, filename)) 126 if extension in self.skip_extensions: 127 return True 128 129 if extension == '.in': 130 # Skip .in files produced by automake. 131 if os.path.exists (base + '.am'): 132 return True 133 134 # Skip files produced by autogen 135 if (os.path.exists (base + '.def') 136 and os.path.exists (base + '.tpl')): 137 return True 138 139 # Skip configure files produced by autoconf 140 if filename == 'configure': 141 if os.path.exists (base + '.ac'): 142 return True 143 if os.path.exists (base + '.in'): 144 return True 145 146 return False 147 148 def skip_dir (self, dir, subdir): 149 return subdir in self.skip_dirs 150 151 def is_fossilised_file (self, dir, filename): 152 if filename in self.fossilised_files: 153 return True 154 # Only touch current current ChangeLogs. 155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0: 156 return True 157 return False 158 159 def by_package_author (self, dir, filename): 160 return filename in self.own_files 161 162class Copyright: 163 def __init__ (self, errors): 164 self.errors = errors 165 166 # Characters in a range of years. Include '.' for typos. 167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]' 168 169 # Non-whitespace characters in a copyright holder's name. 170 name = '[\w.,-]' 171 172 # Matches one year. 173 self.year_re = re.compile ('[0-9]+') 174 175 # Matches part of a year or copyright holder. 176 self.continuation_re = re.compile (ranges + '|' + name) 177 178 # Matches a full copyright notice: 179 self.copyright_re = re.compile ( 180 # 1: 'Copyright (C)', etc. 181 '([Cc]opyright' 182 '|[Cc]opyright\s+\([Cc]\)' 183 '|[Cc]opyright\s+%s' 184 '|[Cc]opyright\s+©' 185 '|[Cc]opyright\s+@copyright{}' 186 '|@set\s+copyright[\w-]+)' 187 188 # 2: the years. Include the whitespace in the year, so that 189 # we can remove any excess. 190 '(\s*(?:' + ranges + ',?' 191 '|@value\{[^{}]*\})\s*)' 192 193 # 3: 'by ', if used 194 '(by\s+)?' 195 196 # 4: the copyright holder. Don't allow multiple consecutive 197 # spaces, so that right-margin gloss doesn't get caught 198 # (e.g. gnat_ugn.texi). 199 '(' + name + '(?:\s?' + name + ')*)?') 200 201 # A regexp for notices that might have slipped by. Just matching 202 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of 203 # HTML header markers, so check for 'copyright' and two digits. 204 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]', 205 re.IGNORECASE) 206 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ') 207 self.holders = { '@copying': '@copying' } 208 self.holder_prefixes = set() 209 210 # True to 'quilt add' files before changing them. 211 self.use_quilt = False 212 213 # If set, force all notices to include this year. 214 self.max_year = None 215 216 # Goes after the year(s). Could be ', '. 217 self.separator = ' ' 218 219 def add_package_author (self, holder, canon_form = None): 220 if not canon_form: 221 canon_form = holder 222 self.holders[holder] = canon_form 223 index = holder.find (' ') 224 while index >= 0: 225 self.holder_prefixes.add (holder[:index]) 226 index = holder.find (' ', index + 1) 227 228 def add_external_author (self, holder): 229 self.holders[holder] = None 230 231 class BadYear(): 232 def __init__ (self, year): 233 self.year = year 234 235 def __str__ (self): 236 return 'unrecognised year: ' + self.year 237 238 def parse_year (self, string): 239 year = int (string) 240 if len (string) == 2: 241 if year > 70: 242 return year + 1900 243 elif len (string) == 4: 244 return year 245 raise self.BadYear (string) 246 247 def year_range (self, years): 248 year_list = [self.parse_year (year) 249 for year in self.year_re.findall (years)] 250 assert len (year_list) > 0 251 return (min (year_list), max (year_list)) 252 253 def set_use_quilt (self, use_quilt): 254 self.use_quilt = use_quilt 255 256 def include_year (self, year): 257 assert not self.max_year 258 self.max_year = year 259 260 def canonicalise_years (self, dir, filename, filter, years): 261 # Leave texinfo variables alone. 262 if years.startswith ('@value'): 263 return years 264 265 (min_year, max_year) = self.year_range (years) 266 267 # Update the upper bound, if enabled. 268 if self.max_year and not filter.is_fossilised_file (dir, filename): 269 max_year = max (max_year, self.max_year) 270 271 # Use a range. 272 if min_year == max_year: 273 return '%d' % min_year 274 else: 275 return '%d-%d' % (min_year, max_year) 276 277 def strip_continuation (self, line): 278 line = line.lstrip() 279 match = self.comment_re.match (line) 280 if match: 281 line = line[match.end():].lstrip() 282 return line 283 284 def is_complete (self, match): 285 holder = match.group (4) 286 return (holder 287 and (holder not in self.holder_prefixes 288 or holder in self.holders)) 289 290 def update_copyright (self, dir, filename, filter, file, line, match): 291 orig_line = line 292 next_line = None 293 pathname = os.path.join (dir, filename) 294 295 intro = match.group (1) 296 if intro.startswith ('@set'): 297 # Texinfo year variables should always be on one line 298 after_years = line[match.end (2):].strip() 299 if after_years != '': 300 self.errors.report (pathname, 301 'trailing characters in @set: ' 302 + after_years) 303 return (False, orig_line, next_line) 304 else: 305 # If it looks like the copyright is incomplete, add the next line. 306 while not self.is_complete (match): 307 try: 308 next_line = file.next() 309 except StopIteration: 310 break 311 312 # If the next line doesn't look like a proper continuation, 313 # assume that what we've got is complete. 314 continuation = self.strip_continuation (next_line) 315 if not self.continuation_re.match (continuation): 316 break 317 318 # Merge the lines for matching purposes. 319 orig_line += next_line 320 line = line.rstrip() + ' ' + continuation 321 next_line = None 322 323 # Rematch with the longer line, at the original position. 324 match = self.copyright_re.match (line, match.start()) 325 assert match 326 327 holder = match.group (4) 328 329 # Use the filter to test cases where markup is getting in the way. 330 if filter.by_package_author (dir, filename): 331 assert holder not in self.holders 332 333 elif not holder: 334 self.errors.report (pathname, 'missing copyright holder') 335 return (False, orig_line, next_line) 336 337 elif holder not in self.holders: 338 self.errors.report (pathname, 339 'unrecognised copyright holder: ' + holder) 340 return (False, orig_line, next_line) 341 342 else: 343 # See whether the copyright is associated with the package 344 # author. 345 canon_form = self.holders[holder] 346 if not canon_form: 347 return (False, orig_line, next_line) 348 349 # Make sure the author is given in a consistent way. 350 line = (line[:match.start (4)] 351 + canon_form 352 + line[match.end (4):]) 353 354 # Remove any 'by' 355 line = line[:match.start (3)] + line[match.end (3):] 356 357 # Update the copyright years. 358 years = match.group (2).strip() 359 try: 360 canon_form = self.canonicalise_years (dir, filename, filter, years) 361 except self.BadYear as e: 362 self.errors.report (pathname, str (e)) 363 return (False, orig_line, next_line) 364 365 line = (line[:match.start (2)] 366 + ' ' + canon_form + self.separator 367 + line[match.end (2):]) 368 369 # Use the standard (C) form. 370 if intro.endswith ('right'): 371 intro += ' (C)' 372 elif intro.endswith ('(c)'): 373 intro = intro[:-3] + '(C)' 374 line = line[:match.start (1)] + intro + line[match.end (1):] 375 376 # Strip trailing whitespace 377 line = line.rstrip() + '\n' 378 379 return (line != orig_line, line, next_line) 380 381 def process_file (self, dir, filename, filter): 382 pathname = os.path.join (dir, filename) 383 if filename.endswith ('.tmp'): 384 # Looks like something we tried to create before. 385 try: 386 os.remove (pathname) 387 except OSError: 388 pass 389 return 390 391 lines = [] 392 changed = False 393 line_filter = filter.get_line_filter (dir, filename) 394 with open (pathname, 'r') as file: 395 prev = None 396 for line in file: 397 while line: 398 next_line = None 399 # Leave filtered-out lines alone. 400 if not (line_filter and line_filter.match (line)): 401 match = self.copyright_re.search (line) 402 if match: 403 res = self.update_copyright (dir, filename, filter, 404 file, line, match) 405 (this_changed, line, next_line) = res 406 changed = changed or this_changed 407 408 # Check for copyright lines that might have slipped by. 409 elif self.other_copyright_re.search (line): 410 self.errors.report (pathname, 411 'unrecognised copyright: %s' 412 % line.strip()) 413 lines.append (line) 414 line = next_line 415 416 # If something changed, write the new file out. 417 if changed and self.errors.ok(): 418 tmp_pathname = pathname + '.tmp' 419 with open (tmp_pathname, 'w') as file: 420 for line in lines: 421 file.write (line) 422 if self.use_quilt: 423 subprocess.call (['quilt', 'add', pathname]) 424 os.rename (tmp_pathname, pathname) 425 426 def process_tree (self, tree, filter): 427 for (dir, subdirs, filenames) in os.walk (tree): 428 # Don't recurse through directories that should be skipped. 429 for i in xrange (len (subdirs) - 1, -1, -1): 430 if filter.skip_dir (dir, subdirs[i]): 431 del subdirs[i] 432 433 # Handle the files in this directory. 434 for filename in filenames: 435 if filter.skip_file (dir, filename): 436 sys.stdout.write ('Skipping %s\n' 437 % os.path.join (dir, filename)) 438 else: 439 self.process_file (dir, filename, filter) 440 441class CmdLine: 442 def __init__ (self, copyright = Copyright): 443 self.errors = Errors() 444 self.copyright = copyright (self.errors) 445 self.dirs = [] 446 self.default_dirs = [] 447 self.chosen_dirs = [] 448 self.option_handlers = dict() 449 self.option_help = [] 450 451 self.add_option ('--help', 'Print this help', self.o_help) 452 self.add_option ('--quilt', '"quilt add" files before changing them', 453 self.o_quilt) 454 self.add_option ('--this-year', 'Add the current year to every notice', 455 self.o_this_year) 456 457 def add_option (self, name, help, handler): 458 self.option_help.append ((name, help)) 459 self.option_handlers[name] = handler 460 461 def add_dir (self, dir, filter = GenericFilter()): 462 self.dirs.append ((dir, filter)) 463 464 def o_help (self, option = None): 465 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n' 466 'Options:\n' % sys.argv[0]) 467 format = '%-15s %s\n' 468 for (what, help) in self.option_help: 469 sys.stdout.write (format % (what, help)) 470 sys.stdout.write ('\nDirectories:\n') 471 472 format = '%-25s' 473 i = 0 474 for (dir, filter) in self.dirs: 475 i += 1 476 if i % 3 == 0 or i == len (self.dirs): 477 sys.stdout.write (dir + '\n') 478 else: 479 sys.stdout.write (format % dir) 480 sys.exit (0) 481 482 def o_quilt (self, option): 483 self.copyright.set_use_quilt (True) 484 485 def o_this_year (self, option): 486 self.copyright.include_year (time.localtime().tm_year) 487 488 def main (self): 489 for arg in sys.argv[1:]: 490 if arg[:1] != '-': 491 self.chosen_dirs.append (arg) 492 elif arg in self.option_handlers: 493 self.option_handlers[arg] (arg) 494 else: 495 self.errors.report (None, 'unrecognised option: ' + arg) 496 if self.errors.ok(): 497 if len (self.chosen_dirs) == 0: 498 self.chosen_dirs = self.default_dirs 499 if len (self.chosen_dirs) == 0: 500 self.o_help() 501 else: 502 for chosen_dir in self.chosen_dirs: 503 canon_dir = os.path.join (chosen_dir, '') 504 count = 0 505 for (dir, filter) in self.dirs: 506 if (dir + os.sep).startswith (canon_dir): 507 count += 1 508 self.copyright.process_tree (dir, filter) 509 if count == 0: 510 self.errors.report (None, 'unrecognised directory: ' 511 + chosen_dir) 512 sys.exit (0 if self.errors.ok() else 1) 513 514#---------------------------------------------------------------------------- 515 516class TopLevelFilter (GenericFilter): 517 def skip_dir (self, dir, subdir): 518 return True 519 520class ConfigFilter (GenericFilter): 521 def __init__ (self): 522 GenericFilter.__init__ (self) 523 524 def skip_file (self, dir, filename): 525 if filename.endswith ('.m4'): 526 pathname = os.path.join (dir, filename) 527 with open (pathname) as file: 528 # Skip files imported from gettext. 529 if file.readline().find ('gettext-') >= 0: 530 return True 531 return GenericFilter.skip_file (self, dir, filename) 532 533class GCCFilter (GenericFilter): 534 def __init__ (self): 535 GenericFilter.__init__ (self) 536 537 self.skip_files |= set ([ 538 # Not part of GCC 539 'math-68881.h', 540 ]) 541 542 self.skip_dirs |= set ([ 543 # Better not create a merge nightmare for the GNAT folks. 544 'ada', 545 546 # Handled separately. 547 'testsuite', 548 ]) 549 550 self.skip_extensions |= set ([ 551 # Maintained by the translation project. 552 '.po', 553 554 # Automatically-generated. 555 '.pot', 556 ]) 557 558 self.fossilised_files |= set ([ 559 # Old news won't be updated. 560 'ONEWS', 561 ]) 562 563class TestsuiteFilter (GenericFilter): 564 def __init__ (self): 565 GenericFilter.__init__ (self) 566 567 self.skip_extensions |= set ([ 568 # Don't change the tests, which could be woend by anyone. 569 '.c', 570 '.C', 571 '.cc', 572 '.h', 573 '.hs', 574 '.f', 575 '.f90', 576 '.go', 577 '.inc', 578 '.java', 579 ]) 580 581 def skip_file (self, dir, filename): 582 # g++.niklas/README contains historical copyright information 583 # and isn't updated. 584 if filename == 'README' and os.path.basename (dir) == 'g++.niklas': 585 return True 586 return GenericFilter.skip_file (self, dir, filename) 587 588class LibCppFilter (GenericFilter): 589 def __init__ (self): 590 GenericFilter.__init__ (self) 591 592 self.skip_extensions |= set ([ 593 # Maintained by the translation project. 594 '.po', 595 596 # Automatically-generated. 597 '.pot', 598 ]) 599 600class LibGCCFilter (GenericFilter): 601 def __init__ (self): 602 GenericFilter.__init__ (self) 603 604 self.skip_dirs |= set ([ 605 # Imported from GLIBC. 606 'soft-fp', 607 ]) 608 609class LibJavaFilter (GenericFilter): 610 def __init__ (self): 611 GenericFilter.__init__ (self) 612 613 self.skip_dirs |= set ([ 614 # Handled separately. 615 'testsuite', 616 617 # Not really part of the library 618 'contrib', 619 620 # Imported from upstream 621 'classpath', 622 'libltdl', 623 ]) 624 625 def get_line_filter (self, dir, filename): 626 if filename == 'NameDecoder.h': 627 return re.compile ('.*NAME_COPYRIGHT') 628 if filename == 'ICC_Profile.h': 629 return re.compile ('.*icSigCopyrightTag') 630 return GenericFilter.get_line_filter (self, dir, filename) 631 632class LibMudflapFilter (GenericFilter): 633 def __init__ (self): 634 GenericFilter.__init__ (self) 635 636 self.skip_dirs |= set ([ 637 # Handled separately. 638 'testsuite', 639 ]) 640 641class LibStdCxxFilter (GenericFilter): 642 def __init__ (self): 643 GenericFilter.__init__ (self) 644 645 self.skip_files |= set ([ 646 # Contains no copyright of its own, but quotes the GPL. 647 'intro.xml', 648 ]) 649 650 self.skip_dirs |= set ([ 651 # Contains automatically-generated sources. 652 'html', 653 654 # The testsuite data files shouldn't be changed. 655 'data', 656 657 # Contains imported images 658 'images', 659 ]) 660 661 self.own_files |= set ([ 662 # Contains markup around the copyright owner. 663 'spine.xml', 664 ]) 665 666 def get_line_filter (self, dir, filename): 667 if filename == 'boost_concept_check.h': 668 return re.compile ('// \(C\) Copyright Jeremy Siek') 669 return GenericFilter.get_line_filter (self, dir, filename) 670 671class GCCCopyright (Copyright): 672 def __init__ (self, errors): 673 Copyright.__init__ (self, errors) 674 675 canon_fsf = 'Free Software Foundation, Inc.' 676 self.add_package_author ('Free Software Foundation', canon_fsf) 677 self.add_package_author ('Free Software Foundation.', canon_fsf) 678 self.add_package_author ('Free Software Foundation Inc.', canon_fsf) 679 self.add_package_author ('Free Software Foundation, Inc', canon_fsf) 680 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf) 681 self.add_package_author ('The Free Software Foundation', canon_fsf) 682 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf) 683 self.add_package_author ('Software Foundation, Inc.', canon_fsf) 684 685 self.add_external_author ('ARM') 686 self.add_external_author ('AdaCore') 687 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.') 688 self.add_external_author ('Cavium Networks.') 689 self.add_external_author ('Faraday Technology Corp.') 690 self.add_external_author ('Florida State University') 691 self.add_external_author ('Greg Colvin and Beman Dawes.') 692 self.add_external_author ('Hewlett-Packard Company') 693 self.add_external_author ('Information Technology Industry Council.') 694 self.add_external_author ('James Theiler, Brian Gough') 695 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,') 696 self.add_external_author ('National Research Council of Canada.') 697 self.add_external_author ('Peter Dimov and Multi Media Ltd.') 698 self.add_external_author ('Peter Dimov') 699 self.add_external_author ('Pipeline Associates, Inc.') 700 self.add_external_author ('Regents of the University of California.') 701 self.add_external_author ('Silicon Graphics Computer Systems, Inc.') 702 self.add_external_author ('Silicon Graphics') 703 self.add_external_author ('Stephen L. Moshier') 704 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.') 705 self.add_external_author ('The Go Authors. All rights reserved.') 706 self.add_external_author ('The Go Authors. All rights reserved.') 707 self.add_external_author ('The Go Authors.') 708 self.add_external_author ('The Regents of the University of California.') 709 self.add_external_author ('Unicode, Inc.') 710 self.add_external_author ('University of Toronto.') 711 712class GCCCmdLine (CmdLine): 713 def __init__ (self): 714 CmdLine.__init__ (self, GCCCopyright) 715 716 self.add_dir ('.', TopLevelFilter()) 717 # boehm-gc is imported from upstream. 718 self.add_dir ('config', ConfigFilter()) 719 # contrib isn't really part of GCC. 720 self.add_dir ('fixincludes') 721 self.add_dir ('gcc', GCCFilter()) 722 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter()) 723 self.add_dir ('gnattools') 724 self.add_dir ('include') 725 self.add_dir ('libada') 726 self.add_dir ('libatomic') 727 self.add_dir ('libbacktrace') 728 self.add_dir ('libcpp', LibCppFilter()) 729 self.add_dir ('libdecnumber') 730 # libffi is imported from upstream. 731 self.add_dir ('libgcc', LibGCCFilter()) 732 self.add_dir ('libgfortran') 733 self.add_dir ('libgomp') 734 self.add_dir ('libiberty') 735 self.add_dir ('libitm') 736 self.add_dir ('libjava', LibJavaFilter()) 737 self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter()) 738 self.add_dir ('libmudflap', LibMudflapFilter()) 739 self.add_dir (os.path.join ('libmudflap', 'testsuite'), 740 TestsuiteFilter()) 741 self.add_dir ('libobjc') 742 self.add_dir ('libquadmath') 743 # libsanitiser is imported from upstream. 744 self.add_dir ('libssp') 745 self.add_dir ('libstdc++-v3', LibStdCxxFilter()) 746 self.add_dir ('lto-plugin') 747 # zlib is imported from upstream. 748 749 self.default_dirs = [ 750 'gcc', 751 'libada', 752 'libatomic', 753 'libbacktrace', 754 'libcpp', 755 'libdecnumber', 756 'libgcc', 757 'libgfortran', 758 'libgomp', 759 'libitm', 760 'libmudflap', 761 'libobjc', 762 'libstdc++-v3', 763 ] 764 765GCCCmdLine().main() 766