1#!/usr/bin/perl
2#
3# Check source files for SPDX-License-Identifier fields.
4#
5# Examine all source files in a distribution to check that they contain an
6# SPDX-License-Identifier field.  This does not check the syntax or whether
7# the identifiers are valid.
8#
9# The canonical version of this file is maintained in the rra-c-util package,
10# which can be found at <https://www.eyrie.org/~eagle/software/rra-c-util/>.
11#
12# Copyright 2018-2021 Russ Allbery <eagle@eyrie.org>
13#
14# Permission is hereby granted, free of charge, to any person obtaining a
15# copy of this software and associated documentation files (the "Software"),
16# to deal in the Software without restriction, including without limitation
17# the rights to use, copy, modify, merge, publish, distribute, sublicense,
18# and/or sell copies of the Software, and to permit persons to whom the
19# Software is furnished to do so, subject to the following conditions:
20#
21# The above copyright notice and this permission notice shall be included in
22# all copies or substantial portions of the Software.
23#
24# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
27# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
30# DEALINGS IN THE SOFTWARE.
31#
32# SPDX-License-Identifier: MIT
33
34use 5.010;
35use strict;
36use warnings;
37
38use lib 't/lib';
39
40use Test::RRA qw(skip_unless_automated);
41
42use File::Find qw(find);
43use Test::More;
44
45# File name (the file without any directory component) and path patterns to
46# skip for this check.
47## no critic (RegularExpressions::ProhibitFixedStringMatches)
48#<<<
49my @IGNORE = (
50    qr{ \A Build ( [.] (?!PL) .* )? \z }ixms,  # Generated file from Build.PL
51    qr{ \A LICENSE \z }xms,                 # Generated file, no license itself
52    qr{ \A (Changes|NEWS|THANKS) \z }xms,   # Package license should be fine
53    qr{ \A TODO \z }xms,                    # Package license should be fine
54    qr{ \A MANIFEST ( [.] .* )? \z }xms,    # Package license should be fine
55    qr{ \A Makefile \z }xms,                # Generated file, no license itself
56    qr{ \A (MY)? META [.] .* }xms,          # Generated file, no license itself
57    qr{ [.] output \z }xms,                 # Test data
58    qr{ pod2htm . [.] tmp \z }xms,          # Windows pod2html output
59    qr{ ~ \z }xms,                          # Backup files
60);
61my @IGNORE_PATHS = (
62    qr{ \A [.] / [.] git/ }xms,             # Version control files
63    qr{ \A [.] / [.] pc/ }xms,              # quilt metadata files
64    qr{ \A [.] /_build/ }xms,               # Module::Build metadata
65    qr{ \A [.] /blib/ }xms,                 # Perl build system artifacts
66    qr{ \A [.] /cover_db/ }xms,             # Artifacts from coverage testing
67    qr{ \A [.] /debian/ }xms,               # Found in debian/* branches
68    qr{ \A [.] /docs/metadata/ }xms,        # Package license should be fine
69    qr{ \A [.] /README ( [.] .* )? \z }xms, # Package license should be fine
70    qr{ \A [.] /share/ }xms,                # Package license should be fine
71    qr{ \A [.] /t/data/generate/ }xms,      # Test metadata
72    qr{ \A [.] /t/data/spin/ }xms,          # Test metadata
73    qr{ \A [.] /t/data/update/ }xms,        # Test output
74    qr{ \A [.] /t/data .* [.] json \z }xms, # Test metadata
75);
76#>>>
77## use critic
78
79# Only run this test during automated testing, since failure doesn't indicate
80# any user-noticable flaw in the package itself.
81skip_unless_automated('SPDX identifier tests');
82
83# Check a single file for an occurrence of the string.
84#
85# $path - Path to the file
86#
87# Returns: undef
88sub check_file {
89    my $filename = $_;
90    my $path = $File::Find::name;
91
92    # Ignore files in the whitelist and binary files.
93    for my $pattern (@IGNORE) {
94        return if $filename =~ $pattern;
95    }
96    for my $pattern (@IGNORE_PATHS) {
97        if ($path =~ $pattern) {
98            $File::Find::prune = 1;
99            return;
100        }
101    }
102    return if -d $filename;
103    return if !-T $filename;
104
105    # Scan the file.
106    my ($saw_legacy_notice, $saw_spdx, $skip_spdx);
107    open(my $file, '<', $filename) or BAIL_OUT("Cannot open $path");
108    while (defined(my $line = <$file>)) {
109        if ($line =~ m{ \b See \s+ LICENSE \s+ for \s+ licensing }xms) {
110            $saw_legacy_notice = 1;
111        }
112        if ($line =~ m{ \b SPDX-License-Identifier: \s+ \S+ }xms) {
113            $saw_spdx = 1;
114            last;
115        }
116        if ($line =~ m{ no \s SPDX-License-Identifier \s registered }xms) {
117            $skip_spdx = 1;
118            last;
119        }
120    }
121    close($file) or BAIL_OUT("Cannot close $path");
122
123    # If there is a legacy license notice, report a failure regardless of file
124    # size.  Otherwise, skip files under 1KB.  They can be rolled up into the
125    # overall project license and the license notice may be a substantial
126    # portion of the file size.
127    if ($saw_legacy_notice) {
128        ok(!$saw_legacy_notice, "$path has legacy license notice");
129    } else {
130        ok($saw_spdx || $skip_spdx || -s $filename < 1024, $path);
131    }
132    return;
133}
134
135# Use File::Find to scan all files from the top of the directory.
136find(\&check_file, q{.});
137done_testing();
138