1#!/usr/bin/env perl
2#
3# Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
4# All rights reserved.
5#
6# This code is derived from software contributed to The NetBSD Foundation
7# by Hubert Feyrer <hubert@feyrer.de>.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17#
18# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28# POSSIBILITY OF SUCH DAMAGE.
29
30#
31# Extract BSD-mandated copyright messages for NetBSD documentation
32#
33# Usage:
34# 1) find /usr/src -type f -print \
35#    | perl extract-contrib-string.pl
36#    >x
37#
38# 2) merge text after "--------" in "x" into
39#    src/distrib/notes/common/legal.common
40#
41# Options:
42#
43#     perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
44#
45# where
46#     -d  debug output
47#     -h  html output
48#     -x  xml/docbook output
49#     -?  display help/usage message
50
51
52$ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
53$ack_line2="display the following( acknowledge?ment)?";
54$ack_endline=
55      '(\d\.\s*(Neither the name'
56    .         '|The name of the company nor the name'	# Wasn't my idea
57    .         '|The name of the author may not'
58    .         '|The name of .* must not be used to endorse'
59    .         '|The names? (of )?.* nor the names? of'
60    .         '|The names? (of )?.* or any of it\'?s members'
61    .         '|Redistributions of any form whatsoever'
62    .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
63    .'|(THIS SOFTWARE IS PROVIDED)'
64    .'|(The word \'cryptographic\' can be left out if)'
65    .'|(may be used to endorse)'
66    .'|(@end cartouche)'
67    .'|(Redistribution and use in source and binary forms)'
68    .'|(may not be used to endorse)'
69    .'|(\.IP 4)'
70    .'|(ALLOW FREE USE OF)'
71    .'|(materials provided with the distribution)'
72    .'|(@InsertRedistribution@)';
73
74$known_bad_clause_3_wording=
75      'usr.bin/lex/.*'				# UCB
76    .'|dist/bind/contrib/nslint-2.1a3/lbl/.*'	#
77    .'|usr.sbin/traceroute/ifaddrlist.h'	#
78    .'|usr.sbin/traceroute/traceroute.c'	#
79    .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
80    ;
81
82sub warning {
83    local($fn,$msg) = @_;
84    print "XXX $fn line $.: $msg\n"
85}
86
87while ($#ARGV >= 0) {
88    $debug=1 if ($ARGV[0] =~ /-d/i);
89    $html=1  if ($ARGV[0] =~ /-h/i);
90    $xml=1  if ($ARGV[0] =~ /-x/i);
91    $usage=1  if ($ARGV[0] =~ /-\?/);
92    shift(@ARGV);
93}
94
95if ($usage) {
96    print "usage: find /usr/src -type f -print |\n" .
97	" perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
98	"   where\n" .
99	"    -h   output html\n" .
100	"    -x   output xml/docbook\n" .
101	"    -d   debug\n" .
102	"    -?   display this help message\n";
103    exit(0);
104}
105
106$comments = !$html && !$xml;
107
108file:
109while(<>) {
110    chomp();
111    $fn=$_;
112
113    open(F, "$fn") || die "cannot read $fn: $!\n";
114
115  line:
116    while(<F>) {
117	if (0 and /$ack_line2/i){
118	    print "?> $_" if $debug;
119
120	    if ($fn !~ m,$known_bad_clause_3_wording,) {
121		warning($fn, "clause 3 start not caught");
122	    }
123	    last line;
124	}
125
126	print "0> $_" if $debug;
127
128	# special case perl script generating a license (openssl's
129	# mkerr.pl) - ignore the quoted license, there is another one
130	# inside:
131	if (/^\"\s\*.*$ack_line1.*\\n\"\,/) {
132		while(!/$ack_endline/i) {
133		    print "S> $_" if $debug;
134		    $_ = <F>;
135		}
136	}
137
138	if (/$ack_line1/i
139	    or (/$ack_line2/ and $fn =~ m,$known_bad_clause_3_wording,)) {
140
141	    print "1> $_" if $debug;
142
143	    $_=<F>
144		unless $fn =~ m,$known_bad_clause_3_wording,;
145	    if (/$ack_line2/i or $fn =~ m,$known_bad_clause_3_wording,){
146
147		print "2> $_" if $debug;
148
149		$msg="";
150		$msg = $_ if ($fn =~ m,$known_bad_clause_3_wording, and /``/);
151		$cnt=0;
152		$_=<F>;
153		while(!/$ack_endline/i) {
154
155		    print "C> $_" if $debug;
156
157		    $msg .= $_;
158		    $cnt++;
159		    $_ = <F>;
160		    if ($cnt > 10) {
161			warning($fn,"loooong copyright?");
162			last line;
163		    }
164		}
165
166		print "E> $_" if $debug;
167
168		# post-process
169
170		if ($fn =~ m,$known_bad_clause_3_wording,) {
171			while ($msg !~ /^.*``.*\n/) {
172				last if (!$msg);
173				$msg =~ s/^.*\n//o;
174			}
175			$msg =~ s/^.*``//o;
176			$msg =~ s/\n.*``//o;
177			$msg =~ s/''.*$//o;
178		}
179
180		# *roff
181		while ($msg =~ /^\.\\"\s*/) {
182			$msg =~ s/^\.\\"\s*//o;
183		}
184		while ($msg =~ /\n\.\\"\s*/) {
185			$msg =~ s/\n\.\\"\s*/\n/o;
186		}
187		$msg =~ s/\n\.\\"\s*$/\n/g;
188
189		# C++/C99
190		while ($msg =~ /^\s*\/\/\s*/) {
191			$msg =~ s/^\s*\/\/\s*//o;
192		}
193		while ($msg =~ /\n\s*\/\/\s*$/) {
194			$msg =~ s/\n\s*\/\/\s*$//o;
195		}
196		$msg =~ s/\n\s*\/\/\s*/\n/g;
197
198		# C
199		while ($msg =~ /^\s*\*\s*/) {
200			$msg =~ s/^\s*\*\s*//o;
201		}
202		while ($msg =~ /\n\s*\*\s*$/) {
203			$msg =~ s/\n\s*\*\s*$//o;
204		}
205		$msg =~ s/\n\s*\*\s*/\n/g;
206
207		# texinfo @c
208		while ($msg =~ /^\s*\@c\s+/) {
209			$msg =~ s/^\s*\@c\s+//o;
210		}
211		while ($msg =~ /\n\s*\@c\s+$/) {
212			$msg =~ s/\n\s*\@c\s+$//o;
213		}
214		$msg =~ s/\n\s*\@c\s+/\n/g;
215
216		$msg =~ s/^REM\s*//g;			# BASIC?!?
217		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
218		$msg =~ s/^dnl\s*//g;			# m4
219		$msg =~ s/\dnl\s*/\n/g;			# m4
220		$msg =~ s/^\s+-\s+//g;			# seen in docbook files
221		$msg =~ s/\n\s+-\s+/ /g;		#
222		$msg =~ s/^[#\\\|";]+\s*//g;		# sh etc.
223		$msg =~ s/\n[#\\\|";]+\s*/\n/g;		# sh etc.
224		$msg =~ s/^[ 	*]*//g;      		# C
225		$msg =~ s/\n[ 	*]*/\n/g;    		# C
226
227		$msg =~ s/\@cartouche\n//;              # texinfo
228
229		$msg =~ s/
230//g;
231		$msg =~ s/\s*\n/\n/g;
232		$msg =~ s/^\s*//;
233		$msg =~ s/\\\@/\@/g;
234		$msg =~ s/\n\n/\n/g;
235	        $msg =~ s/^\s*``//;
236	        $msg =~ s/''\s*$//;
237		$msg =~ s/^\"//o;
238		$msg =~ s/\"$//o;
239
240		# Split up into separate paragraphs
241		#
242		$msgs=$msg;
243		$msgs=~s/(This (software|product))/|$1/g;
244		$msgs=~s,^\|,,;
245	      msg:
246		foreach $msg (split(/\|/, $msgs)) {
247		    while ($msg =~ /[\n\s]+$/) {
248			$msg =~ s/[\n\s]+$//o;
249		    }
250		    next if ($msg eq "");
251		    if ($comments) {
252			print ".\\\" File $fn:\n";
253			print "$msg";
254			print "\n\n";
255		    }
256
257		    # Figure out if there's a version w/ or w/o trailing dot
258		    #
259		    if ($msg =~ /\.$/) {
260			# check if there's a version of the same msg
261			# w/o a trailing dot
262			$msg2=$msg;
263			$msg2=~s,\.$,,;
264			if ($copyrights{"$msg2"}) {
265			    # already there - skip
266			    print "already there, w/o dot - skipping!\n"
267				if $debug;
268			    next msg;
269			}
270
271			# ... maybe with other case?
272			$lc_msg2=lc($msg2);
273			if ($lc_copyrights{$lc_msg2}) {
274			    print "already there, in different case - skipping\n"
275				if $debug;
276			    next msg;
277			}
278		    } else {
279			# check if there's a version of the same msg
280			# with a trailing dot
281			$msg2=$msg;
282			$msg2.=".";
283			if ($copyrights{"$msg2"}) {
284			    # already there - skip
285			    print "already there, w/ dot - skipping!\n"
286				if $debug;
287			    next msg;
288			}
289
290			# ... maybe with other case?
291			$lc_msg2=lc($msg2);
292			if ($lc_copyrights{$lc_msg2}) {
293			    print "already there, in different case - skipping\n"
294				if $debug;
295			    next msg;
296			}
297		    }
298
299		    $copyrights{$msg} = 1;
300		    $lc_copyrights{$lc_msg} = 1;
301		}
302
303	    } else {
304		print "?> $_" if $debug;
305
306                if ($fn !~ m,$known_bad_clause_3_wording,) {
307		    warning($fn, "bad clause 3?");
308                }
309		last line;
310	    }
311	}
312    }
313    close(F);
314}
315
316
317if ($html) {
318    print "<ul>\n";
319    foreach $msg (sort keys %copyrights) {
320	print "<li>$msg</li>\n";
321    }
322    print "</ul>\n";
323} elsif ($xml) {
324    foreach $msg (sort keys %copyrights) {
325	print "<listitem>$msg</listitem>\n";
326    }
327} else {
328    print "------------------------------------------------------------\n";
329
330    $firsttime=1;
331    foreach $msg (sort keys %copyrights) {
332	if ($firsttime) {
333	    $firsttime=0;
334	} else {
335	    print ".It\n";
336	}
337	print "$msg\n";
338    }
339}
340