1#!/usr/bin/perl -w
2# Emacs should use -*- cperl -*- mode
3#
4# Copyright (c) 2003-2006 Simon L. Nielsen <simon@FreeBSD.org>
5# All rights reserved.
6#
7# Redistribution and use in source and binary forms, with or without
8# modification, are permitted provided that the following conditions
9# are met:
10# 1. Redistributions of source code must retain the above copyright
11#    notice, this list of conditions and the following disclaimer.
12# 2. Redistributions in binary form must reproduce the above copyright
13#    notice, this list of conditions and the following disclaimer in the
14#    documentation and/or other materials provided with the distribution.
15#
16# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26# SUCH DAMAGE.
27#
28# $FreeBSD$
29#
30
31# Parse the list of supported hardware out of section 4 manual pages
32# and output it on stdout as SGML/DocBook entities.
33
34# The script will look for the following line in the manual page:
35# .Sh HARDWARE
36# and make an entity of the content until the line containing:
37# .Sh
38#
39# For Lists only the first line will be printed.  If there are
40# arguments to the .It command, only the argument will be printed.
41
42# Usage:
43# man2hwnotes.pl [-cl] [-d 0-6] [-a <archlist file>] [-o <outputfile>]
44#                <manualpage> [<manualpage> ...]
45
46use strict;
47use Getopt::Std;
48use Digest::MD5 qw(md5_hex);
49
50# Section from manual page to extract
51my $hwlist_sect = "HARDWARE";
52
53# Override default archtecture list for some devices:
54my $archlist_file = "dev.archlist.txt";
55my %archlist;
56
57# Globals
58my $compat_mode = 0; # Enable compat for old Hardware Notes style
59my $debuglevel = 0;
60my $only_list_out = 0; # Should only lists be generated in the output?
61my @out_lines; # Single lines
62my @out_dev;   # Device entities
63
64# Getopt
65my %options = ();
66if (!getopts("a:cd:lo:",\%options)) {
67    die("$!: Invalid command line arguments in ", __LINE__, "\n");
68}
69
70if (defined($options{c})) {
71    $compat_mode = 1;
72}
73if (defined($options{d})) {
74    $debuglevel = $options{d};
75}
76if (defined($options{a})) {
77    $archlist_file = $options{a};
78}
79if (defined($options{l})) {
80    $only_list_out = 1;
81}
82
83my $outputfile = $options{o};
84
85if ($debuglevel > 0) {
86    # Don't do output buffering in debug mode.
87    $| = 1;
88}
89
90load_archlist($archlist_file);
91
92if (defined($outputfile)) {
93    open(OLDOUT, ">&STDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n");
94    open(STDOUT, ">$outputfile") || die("$!: Could not open $outputfile in ", __LINE__, ".\n");
95}
96
97print <<EOT;
98<!--
99 These are automatically generated device lists for FreeBSD hardware notes.
100-->
101EOT
102
103if ($only_list_out) {
104    # Print the default device preamble entities
105    print "<!ENTITY hwlist.preamble.pre 'The'>\n";
106    print "<!ENTITY hwlist.preamble.post 'driver supports:'>\n";
107}
108
109foreach my $page (@ARGV) {
110    if ($page !~ m/\.4$/) {
111        dlog(2, "Skipped $page (not *.4)");
112        next;
113    }
114    dlog(2, "Parsing $page");
115    parse($page);
116
117    if (@out_lines) {
118        print join("\n", @out_lines), "\n";
119    }
120    if (@out_dev) {
121        print join("\n", @out_dev), "\n";
122    }
123
124    @out_lines = ();
125    @out_dev = ();
126}
127
128if (defined($outputfile)) {
129    open(STDOUT, ">&OLDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n");
130    close(OLDOUT) || die("$!: Could not close OLDOUT in ", __LINE__, ".\n");
131}
132
133sub normalize (@) {
134    my @lines = @_;
135
136    foreach my $l (@lines) {
137        $l =~ s/\\&//g;
138        $l =~ s:([\x21-\x2f\x5b-\x60\x7b-\x7f]):sprintf("&\#\%d;", ord($1)):eg;
139        # Make sure ampersand is encoded as &amp; since jade seems to
140        # be confused when it is encoded as &#38; inside an entity.
141        $l =~ s/&#38;/&amp;/g;
142    }
143    return (wantarray) ? @lines : join "", @lines;
144}
145
146sub parse {
147    my ($manpage) = @_;
148
149    my $cur_mansection;
150    my $found_hwlist = 0;
151    my %mdocvars;
152    $mdocvars{isin_hwlist} = 0;
153    $mdocvars{isin_list} = 0;
154    $mdocvars{first_para} = 1;
155    $mdocvars{parabuf} = "";
156    $mdocvars{listtype} = "";
157    $mdocvars{it_nr} = 0;
158
159    open(MANPAGE, "$manpage") || die("$!: Could not open $manpage in ", __LINE__, ".\n");
160    while(<MANPAGE>) {
161	chomp;
162	my $line = $_;
163
164	dlog(5, "Read '$line'");
165
166	# Find commands
167	if (s/^\.(.*)$/$1/) {
168	    my $cmd = $1;
169
170	    # Detect, and ignore, comment lines
171	    if (s/^\\"(.*)$/$1/) {
172		next;
173	    }
174
175	    $cmd =~ s/^([^ ]+).*$/$1/;
176
177	    if (/^Nm "?(\w+)"?/ && !defined($mdocvars{Nm})) {
178		dlog(3, "Setting Nm to $1");
179		$mdocvars{Nm} = $1;
180		# "_" cannot be used for an entity name.
181		$mdocvars{EntNm} = $1;
182		$mdocvars{EntNm} =~ s,_,.,g;
183
184	    } elsif (/^Nm$/) {
185		if (defined($mdocvars{Nm}) && $mdocvars{Nm} ne "") {
186		    parabuf_addline(\%mdocvars, "&man.".$mdocvars{EntNm}.".$cur_mansection;");
187		} else {
188		    dlog(2, "Warning: Bad Nm call in $manpage");
189		}
190
191	    } elsif (/^Sh (.+)$/) {
192		dlog(4, "Setting section to $1");
193		my $cur_section = $1;
194
195		flush_out(\%mdocvars);
196
197		if ($cur_section =~ /^${hwlist_sect}$/) {
198		    dlog(2, "Found the device section ${hwlist_sect}");
199		    $mdocvars{isin_hwlist} = 1;
200		    $found_hwlist = 1;
201		    add_sgmltag(\%mdocvars, "<!ENTITY hwlist.".$mdocvars{cur_manname}." '");
202		    if ($only_list_out) {
203			add_sgmltag("<para xmlns=\"http://docbook.org/ns/docbook\">&hwlist.preamble.pre; " .
204				    "&man.".$mdocvars{EntNm}.".$cur_mansection; " .
205				    "&hwlist.preamble.post;</para>");
206		    }
207		} elsif ($mdocvars{isin_hwlist}) {
208		    dlog(2, "Found a HWLIST STOP key!");
209		    add_sgmltag(\%mdocvars, "'>");
210		    $mdocvars{isin_hwlist} = 0;
211		}
212		if ($mdocvars{isin_list}) {
213		    dlog(1, "Warning: Still in list, but just entered new " .
214			 "section.  This is probably due to missing .El; " .
215			 "check manual page for errors.");
216		    # If we try to recover from this we will probably
217		    # just end with bad SGML output and it really
218		    # should be fixed in the manual page so we don't
219		    # even try to "fix" this.
220		}
221
222
223	    } elsif (/^Dt ([^ ]+) ([^ ]+)/) {
224		dlog(4, "Setting mansection to $2");
225		$mdocvars{cur_manname} = lc($1);
226		$cur_mansection = $2;
227
228		# "_" cannot be used for an entity name.
229		$mdocvars{cur_manname} =~ s,_,.,g;
230
231	    } elsif (/^It ?(.*)$/) {
232		my $txt = $1;
233
234		$mdocvars{it_nr}++;
235
236		# Flush last item
237		if ($mdocvars{parabuf} ne "") {
238		    add_listitem(\%mdocvars);
239		}
240
241		# Remove quotes, if any.
242		$txt =~ s/"(.*)"/$1/;
243
244		if ($mdocvars{listtype} eq "column") {
245		    # Ignore first item when it is likely to be a
246		    # header.
247		    if ($mdocvars{it_nr} == 1 && $txt =~ m/^(Em|Sy) /) {
248			dlog(2, "Skipping header line in column list");
249			next;
250		    }
251		    # Only extract the first column.
252		    $txt =~ s/ Ta /\t/g;
253		    $txt =~ s/([^\t]+)\t.*/$1/;
254		}
255
256		# Remove Li commands
257		$txt =~ s/^Li //g;
258
259		parabuf_addline(\%mdocvars, normalize($txt));
260	    } elsif (/^Bl/) {
261		$mdocvars{isin_list} = 1;
262		flush_out(\%mdocvars);
263		add_sgmltag(\%mdocvars, "<itemizedlist xmlns=\"http://docbook.org/ns/docbook\">");
264
265		if (/-tag/) {
266		    $mdocvars{listtype} = "tag";
267		    # YACK! Hack for ata(4)
268		    if ($mdocvars{Nm} eq "ata") {
269			$mdocvars{listtype} = "tagHACK";
270		    }
271		} elsif (/-bullet/) {
272		    $mdocvars{listtype} = "bullet";
273		} elsif (/-column/) {
274		    $mdocvars{listtype} = "column";
275		} else {
276		    $mdocvars{listtype} = "unknown";
277		}
278		dlog(2, "Listtype set to $mdocvars{listtype}");
279	    } elsif (/^El/) {
280		if ($mdocvars{parabuf} ne "") {
281		    add_listitem(\%mdocvars);
282		}
283
284		add_sgmltag(\%mdocvars, "</itemizedlist>");
285		$mdocvars{isin_list} = 0;
286	    } elsif (/^Tn (.+)$/) {
287		# For now we print TradeName text as regular text.
288		my ($txt, $punct_str) = split_punct_chars($1);
289
290		parabuf_addline(\%mdocvars, normalize($txt . $punct_str));
291	    } elsif (/^Xr ([^ ]+) (.+)$/) {
292		my ($xr_sect, $punct_str) = split_punct_chars($2);
293		my $txt;
294
295		# We need to check if the manual page exist to avoid
296		# breaking the doc build just because of a broken
297		# reference.
298		#$txt = "&man.$1.$xr_sect;$punct_str";
299		$txt = "$1($xr_sect)$punct_str";
300		parabuf_addline(\%mdocvars, normalize($txt));
301	    } elsif (/^Dq (.+)$/) {
302		my ($txt, $punct_str) = split_punct_chars($1);
303
304		parabuf_addline(\%mdocvars,
305				normalize("<quote xmlns=\"http://docbook.org/ns/docbook\">$txt</quote>$punct_str"));
306	    } elsif (/^Sx (.+)$/) {
307		if ($mdocvars{isin_hwlist}) {
308		    dlog(1, "Warning: Reference to another section in the " .
309			 "$hwlist_sect section in " . $mdocvars{Nm} .
310			 "(${cur_mansection})");
311		}
312		parabuf_addline(\%mdocvars, normalize($1));
313	    } elsif (/^Pa (.+)$/) {
314		my ($txt, $punct_str) = split_punct_chars($1);
315
316		$txt = make_ulink($txt) . $punct_str;
317		parabuf_addline(\%mdocvars, normalize($txt));
318	    } elsif (/^Pp/) {
319		dlog(3, "Got Pp command - forcing new para");
320		flush_out(\%mdocvars);
321	    } elsif (/^Fx (.+)/) {
322		dlog(3, "Got Fx command");
323		parabuf_addline(\%mdocvars, "FreeBSD $1");
324	    } elsif (/^Fx/) {
325		dlog(3, "Got Fx command");
326		parabuf_addline(\%mdocvars, "FreeBSD");
327	    } elsif (/^Em (.+)$/) {
328		my ($txt, $punct_str) = split_punct_chars($1);
329
330		parabuf_addline(\%mdocvars,
331				normalize("<emphasis xmlns=\"http://docbook.org/ns/docbook\">$txt</emphasis>$punct_str"));
332	    } else {
333		# Ignore all other commands.
334		dlog(3, "Ignoring unknown command $cmd");
335	    }
336	} else {
337	    # This is then regular text
338	    parabuf_addline(\%mdocvars, normalize($_));
339	}
340    }
341    close(MANPAGE) || die("$!: Could not close $manpage in ", __LINE__, ".\n");
342    if (! $found_hwlist) {
343	dlog(2, "Hardware list not found in $manpage");
344    }
345}
346
347sub dlog {
348    my ($level, $txt) = @_;
349
350    if ($level <= $debuglevel) {
351	print STDERR "$level: $txt\n";
352    }
353}
354
355# Output a SGML tag.
356sub add_sgmltag {
357    my ($mdocvars, $txt) = (@_);
358
359    # We only care about the HW list for now.
360    if (${$mdocvars}{isin_hwlist}) {
361	push(@out_dev, $txt);
362    }
363}
364
365# Add a text entity, and return the used entity name.
366sub add_txt_ent {
367    my ($itemtxt) = (@_);
368    my ($entity_name);
369
370    # Convert mdoc(7) minus
371    $itemtxt =~ s/\\-/-/g;
372
373    $itemtxt =~ s/'/&lsquo;/g;
374
375    $entity_name = "hwlist." . md5_hex($itemtxt);
376    dlog(4, "Adding '$itemtxt' as entity $entity_name");
377    push(@out_lines, "<!ENTITY $entity_name '$itemtxt'>");
378
379    return ($entity_name);
380}
381sub flush_out {
382    my ($mdocvars) = (@_);
383    my ($entity_name, $out);
384    my $para_arch = "";
385
386    if (!${$mdocvars}{isin_hwlist} || ${$mdocvars}{parabuf} eq "") {
387	return;
388    }
389
390    $entity_name = add_txt_ent(${$mdocvars}{parabuf});
391    ${$mdocvars}{parabuf} = "";
392    if(defined($archlist{${$mdocvars}{Nm}})) {
393	if ($compat_mode) {
394	    $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"';
395	} else {
396	    $para_arch = '[' . $archlist{${$mdocvars}{Nm}} . '] ';
397	}
398    }
399    if ($compat_mode) {
400	$out = "<para xmlns=\"http://docbook.org/ns/docbook\"".$para_arch.">&".$entity_name.";</para>";
401    } else {
402	if (${$mdocvars}{first_para}) {
403	    $out = "<para xmlns=\"http://docbook.org/ns/docbook\">".$para_arch."&".$entity_name.";</para>";
404	} else {
405	    $out = "<para xmlns=\"http://docbook.org/ns/docbook\">&".$entity_name.";</para>";
406	}
407	${$mdocvars}{first_para} = 0;
408    }
409
410    dlog(4, "Flushing parabuf");
411    add_sgmltag($mdocvars, $out);
412}
413
414# Add a new list item from the "parabuf".
415sub add_listitem {
416    my ($mdocvars) = (@_);
417    my ($listitem, $entity_name);
418    my $para_arch = "";
419
420    $entity_name = add_txt_ent(${$mdocvars}{parabuf});
421    ${$mdocvars}{parabuf} = "";
422
423    if ($compat_mode) {
424	if(defined($archlist{${$mdocvars}{Nm}})) {
425	    $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"';
426	}
427    }
428    $listitem = "<listitem><para".$para_arch.">&".$entity_name.";</para></listitem>";
429    dlog(4, "Adding '$listitem' to out_dev");
430    push(@out_dev, $listitem);
431
432}
433
434# Add a line to the "paragraph buffer"
435sub parabuf_addline {
436    my $mdocvars = shift;
437    my ($txt) = (@_);
438
439    dlog(5, "Now in parabuf_addline for '$txt'");
440
441    # We only care about the HW list for now.
442    if (!${$mdocvars}{isin_hwlist}) {
443	dlog(6, "Exiting parabuf_addline due to: !\${\$mdocvars}{isin_hwlist}");
444	return;
445    }
446    if ($txt eq "") {
447	dlog(6, "Exiting parabuf_addline due to: \$txt eq \"\"");
448	return;
449    }
450
451    if ($only_list_out && !${$mdocvars}{isin_list}) {
452	dlog(6, "Exiting parabuf_addline due to: ".
453	     "\$only_list_out && !\${\$mdocvars}{isin_list}");
454	return;
455    }
456
457    # We only add the first line for "tag" lists
458    if (${$mdocvars}{parabuf} ne "" && ${$mdocvars}{isin_list} &&
459	${$mdocvars}{listtype} eq "tag") {
460	dlog(6, "Exiting parabuf_addline due to: ".
461	     "\${\$mdocvars}{parabuf} ne \"\" && \${\$mdocvars}{isin_list} && ".
462	     "\${\$mdocvars}{listtype} eq \"tag\"");
463	return;
464    }
465
466    if (${$mdocvars}{parabuf} ne "") {
467	${$mdocvars}{parabuf} .= " ";
468    }
469
470    dlog(4, "Adding '$txt' to parabuf");
471
472    ${$mdocvars}{parabuf} .= $txt;
473}
474
475sub load_archlist {
476    my ($file) = (@_);
477
478    my $lineno = 0;
479
480    dlog(2, "Parsing archlist $file");
481
482    open(FILE, "$file") || die("$!: Could not open archlist $file in ", __LINE__, ".\n");
483    while(<FILE>) {
484	chomp;
485	$lineno++;
486
487	if (/^#/ || $_ eq "") {
488	    next;
489	}
490
491	if (/(\w+)\t([\w,]+)/) {
492	    dlog(4, "For driver $1 setting arch to $2");
493	    $archlist{$1} = $2;
494	} else {
495	    dlog(1, "Warning: Could not parse archlist line $lineno");
496	}
497    }
498
499    close(FILE);
500}
501
502# Check if a character is a mdoc(7) punctuation character.
503sub is_punct_char {
504    my ($str) = (@_);
505
506    return (length($str) == 1 && $str =~ /[\.,:;()\[\]\?!]/);
507}
508
509# Split out the punctuation characters of a mdoc(7) line.
510sub split_punct_chars {
511    my ($str) = (@_);
512    my (@stritems, $stritem, $punct_str);
513
514    $punct_str = "";
515    @stritems = split(/ /, $str);
516
517    while (defined($stritem = $stritems[$#stritems]) &&
518	   is_punct_char($stritem)) {
519	$punct_str = $stritem . $punct_str;
520	pop(@stritems);
521    }
522
523    return (join(' ', @stritems), $punct_str);
524}
525
526# Create a ulink, if the string contains an URL.
527sub make_ulink {
528    my ($str) = (@_);
529
530    $str =~ s,(http://[^ ]+),<link xmlns=\"http://docbook.org/ns/docbook\" xlink:href="$1"></link>,;
531
532    return $str;
533}
534