man2hwnotes.pl revision 138845
12Sjlaskey#!/usr/bin/perl -w
26Sjlaskey# Emacs should use -*- cperl -*- mode
32Sjlaskey#
4877Sattila# Copyright (c) 2003-2004 Simon L. Nielsen <simon@FreeBSD.org>
52Sjlaskey# All rights reserved.
62Sjlaskey#
72Sjlaskey# Redistribution and use in source and binary forms, with or without
8877Sattila# modification, are permitted provided that the following conditions
92Sjlaskey# are met:
102Sjlaskey# 1. Redistributions of source code must retain the above copyright
112Sjlaskey#    notice, this list of conditions and the following disclaimer.
122Sjlaskey# 2. Redistributions in binary form must reproduce the above copyright
132Sjlaskey#    notice, this list of conditions and the following disclaimer in the
14877Sattila#    documentation and/or other materials provided with the distribution.
152Sjlaskey#
162Sjlaskey# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
172Sjlaskey# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18877Sattila# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
192Sjlaskey# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
202Sjlaskey# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
212Sjlaskey# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
222Sjlaskey# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
232Sjlaskey# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
242Sjlaskey# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
252Sjlaskey# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
262Sjlaskey# SUCH DAMAGE.
272Sjlaskey#
282Sjlaskey# $FreeBSD: head/release/doc/share/misc/man2hwnotes.pl 138845 2004-12-14 13:07:58Z simon $
292Sjlaskey#
302Sjlaskey
312Sjlaskey# Parse the list of supported hardware out of section 4 manual pages
322Sjlaskey# and output it on stdout as SGML/DocBook entities.
332Sjlaskey
342Sjlaskey# The script will look for the following line in the manual page:
352Sjlaskey# .Sh HARDWARE
362Sjlaskey# and make an entity of the content until the line containing:
372Sjlaskey# .Sh
382Sjlaskey#
392Sjlaskey# For Lists only the first line will be printed.  If there are
402Sjlaskey# arguments to the .It command, only the argument will be printed.
412Sjlaskey
422Sjlaskey# Usage:
432Sjlaskey# man2hwnotes.pl [-l] [-d 0-6] [-a <archlist file>] [-o <outputfile>]
442Sjlaskey#                <manualpage> [<manualpage> ...]
452Sjlaskey
462Sjlaskeyuse strict;
472Sjlaskeyuse Getopt::Std;
482Sjlaskeyuse Digest::MD5 qw(md5_hex);
492Sjlaskey
502Sjlaskey# Section from manual page to extract
512Sjlaskeymy $hwlist_sect = "HARDWARE";
522Sjlaskey
532Sjlaskey# Override default archtecture list for some devices:
542Sjlaskeymy $archlist_file = "dev.archlist.txt";
552Sjlaskeymy %archlist;
562Sjlaskey
572Sjlaskey# Globals
582Sjlaskeymy $debuglevel = 0;
59my $only_list_out = 0; # Should only lists be generated in the output?
60my @out_lines; # Single lines
61my @out_dev;   # Device entities
62
63# Getopt
64my %options = ();
65if (!getopts("a:d:lo:",\%options)) {
66    die("$!: Invalid command line arguments in ", __LINE__, "\n");
67}
68
69if (defined($options{d})) {
70    $debuglevel = $options{d};
71}
72if (defined($options{a})) {
73    $archlist_file = $options{a};
74}
75if (defined($options{l})) {
76    $only_list_out = 1;
77}
78
79my $outputfile = $options{o};
80
81if ($debuglevel > 0) {
82    # Don't do output buffering in debug mode.
83    $| = 1;
84}
85
86load_archlist($archlist_file);
87
88if (defined($outputfile)) {
89    open(OLDOUT, ">&STDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n");
90    open(STDOUT, ">$outputfile") || die("$!: Could not open $outputfile in ", __LINE__, ".\n");
91}
92
93print <<EOT;
94<!--
95 These are automatically generated device lists for FreeBSD hardware notes.
96-->
97EOT
98
99if ($only_list_out) {
100    # Print the default device preamble entities
101    print "<!ENTITY hwlist.preamble.pre 'The'>\n";
102    print "<!ENTITY hwlist.preamble.post 'driver supports:'>\n";
103}
104
105foreach my $page (@ARGV) {
106    if ($page !~ m/\.4$/) {
107        dlog(2, "Skipped $page (not *.4)");
108        next;
109    }
110    dlog(2, "Parsing $page");
111    parse($page);
112
113    if (@out_lines) {
114        print join("\n", @out_lines), "\n";
115    }
116    if (@out_dev) {
117        print join("\n", @out_dev), "\n";
118    }
119
120    @out_lines = ();
121    @out_dev = ();
122}
123
124if (defined($outputfile)) {
125    open(STDOUT, ">&OLDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n");
126    close(OLDOUT) || die("$!: Could not close OLDOUT in ", __LINE__, ".\n");
127}
128
129sub normalize (@) {
130    my @lines = @_;
131
132    foreach my $l (@lines) {
133        $l =~ s/\\&//g;
134        $l =~ s:([\x21-\x2f\x5b-\x60\x7b-\x7f]):sprintf("&\#\%d;", ord($1)):eg;
135        # Make sure ampersand is encoded as &amp; since jade seems to
136        # be confused when it is encoded as &#38; inside an entity.
137        $l =~ s/&#38;/&amp;/g;
138    }
139    return (wantarray) ? @lines : join "", @lines;
140}
141
142sub parse {
143    my ($manpage) = @_;
144
145    my $cur_mansection;
146    my $found_hwlist = 0;
147    my %mdocvars;
148    $mdocvars{isin_hwlist} = 0;
149    $mdocvars{isin_list} = 0;
150    $mdocvars{parabuf} = "";
151    $mdocvars{listtype} = "";
152    $mdocvars{it_nr} = 0;
153
154    open(MANPAGE, "$manpage") || die("$!: Could not open $manpage in ", __LINE__, ".\n");
155    while(<MANPAGE>) {
156	chomp;
157	my $line = $_;
158
159	dlog(5, "Read '$line'");
160
161	# Find commands
162	if (s/^\.(.*)$/$1/) {
163	    my $cmd = $1;
164
165	    # Detect, and ignore, comment lines
166	    if (s/^\\"(.*)$/$1/) {
167		next;
168	    }
169
170	    $cmd =~ s/^([^ ]+).*$/$1/;
171
172	    if (/^Nm "?(\w+)"?/ && !defined($mdocvars{Nm})) {
173		dlog(3, "Setting Nm to $1");
174		$mdocvars{Nm} = $1;
175		# "_" cannot be used for an entity name.
176		$mdocvars{EntNm} = $1;
177		$mdocvars{EntNm} =~ s,_,.,g;
178
179	    } elsif (/^Nm$/) {
180		if (defined($mdocvars{Nm}) && $mdocvars{Nm} ne "") {
181		    parabuf_addline(\%mdocvars, "&man.".$mdocvars{EntNm}.".$cur_mansection;");
182		} else {
183		    dlog(2, "Warning: Bad Nm call in $manpage");
184		}
185
186	    } elsif (/^Sh (.+)$/) {
187		dlog(4, "Setting section to $1");
188		my $cur_section = $1;
189
190		flush_out(\%mdocvars);
191
192		if ($cur_section =~ /^${hwlist_sect}$/) {
193		    dlog(2, "Found the device section ${hwlist_sect}");
194		    $mdocvars{isin_hwlist} = 1;
195		    $found_hwlist = 1;
196		    add_sgmltag(\%mdocvars, "<!ENTITY hwlist.".$mdocvars{cur_manname}." '");
197		    if ($only_list_out) {
198			add_sgmltag("<para>&hwlist.preamble.pre; " .
199				    "&man.".$mdocvars{EntNm}.".$cur_mansection; " .
200				    "&hwlist.preamble.post;</para>");
201		    }
202		} elsif ($mdocvars{isin_hwlist}) {
203		    dlog(2, "Found a HWLIST STOP key!");
204		    add_sgmltag(\%mdocvars, "'>");
205		    $mdocvars{isin_hwlist} = 0;
206		}
207
208	    } elsif (/^Dt ([^ ]+) ([^ ]+)/) {
209		dlog(4, "Setting mansection to $2");
210		$mdocvars{cur_manname} = lc($1);
211		$cur_mansection = $2;
212
213		# "_" cannot be used for an entity name.
214		$mdocvars{cur_manname} =~ s,_,.,g;
215
216	    } elsif (/^It ?(.*)$/) {
217		my $txt = $1;
218
219		$mdocvars{it_nr}++;
220
221		# Flush last item
222		if ($mdocvars{parabuf} ne "") {
223		    add_listitem(\%mdocvars);
224		}
225
226		if ($mdocvars{listtype} eq "column") {
227		    # Ignore first item when it is likely to be a
228		    # header.
229		    if ($mdocvars{it_nr} == 1 && $txt =~ m/^(Em|Sy) /) {
230			dlog(2, "Skipping header line in column list");
231			next;
232		    }
233		    # Only extract the first column.
234		    $txt =~ s/ Ta /\t/g;
235		    $txt =~ s/([^\t]+)\t.*/$1/;
236		}
237		parabuf_addline(\%mdocvars, normalize($txt));
238	    } elsif (/^Bl/) {
239		$mdocvars{isin_list} = 1;
240		flush_out(\%mdocvars);
241		add_sgmltag(\%mdocvars, "<itemizedlist>");
242
243		if (/-tag/) {
244		    $mdocvars{listtype} = "tag";
245		    # YACK! Hack for ata(4)
246		    if ($mdocvars{Nm} eq "ata") {
247			$mdocvars{listtype} = "tagHACK";
248		    }
249		} elsif (/-bullet/) {
250		    $mdocvars{listtype} = "bullet";
251		} elsif (/-column/) {
252		    $mdocvars{listtype} = "column";
253		} else {
254		    $mdocvars{listtype} = "unknown";
255		}
256		dlog(2, "Listtype set to $mdocvars{listtype}");
257	    } elsif (/^El/) {
258		if ($mdocvars{parabuf} ne "") {
259		    add_listitem(\%mdocvars);
260		}
261
262		add_sgmltag(\%mdocvars, "</itemizedlist>");
263		$mdocvars{isin_list} = 0;
264	    } elsif (/^Tn (.+)$/) {
265		# For now we print TradeName text as regular text.
266		my ($txt, $punct_str) = split_punct_chars($1);
267
268		parabuf_addline(\%mdocvars, normalize($txt . $punct_str));
269	    } elsif (/^Xr ([^ ]+) (.+)$/) {
270		my ($xr_sect, $punct_str) = split_punct_chars($2);
271		my $txt;
272
273		# We need to check if the manual page exist to avoid
274		# breaking the doc build just because of a broken
275		# reference.
276		#$txt = "&man.$1.$xr_sect;$punct_str";
277		$txt = "$1($xr_sect)$punct_str";
278		parabuf_addline(\%mdocvars, normalize($txt));
279	    } elsif (/^Dq (.+)$/) {
280		my ($txt, $punct_str) = split_punct_chars($1);
281
282		parabuf_addline(\%mdocvars,
283				normalize("<quote>$txt</quote>$punct_str"));
284	    } elsif (/^Sx (.+)$/) {
285		if ($mdocvars{isin_hwlist}) {
286		    dlog(1, "Warning: Reference to another section in the " .
287			 "$hwlist_sect section in " . $mdocvars{Nm} .
288			 "(${cur_mansection})");
289		}
290		parabuf_addline(\%mdocvars, normalize($1));
291	    } elsif (/^Pa (.+)$/) {
292		my ($txt, $punct_str) = split_punct_chars($1);
293
294		$txt = make_ulink($txt) . $punct_str;
295		parabuf_addline(\%mdocvars, normalize($txt));
296	    } else {
297		# Ignore all other commands.
298		dlog(3, "Ignoring unknown command $cmd");
299	    }
300	} else {
301	    # This is then regular text
302	    parabuf_addline(\%mdocvars, normalize($_));
303	}
304    }
305    close(MANPAGE) || die("$!: Could not close $manpage in ", __LINE__, ".\n");
306    if (! $found_hwlist) {
307	dlog(2, "Hardware list not found in $manpage");
308    }
309}
310
311sub dlog {
312    my ($level, $txt) = @_;
313
314    if ($level <= $debuglevel) {
315	print STDERR "$level: $txt\n";
316    }
317}
318
319# Output a SGML tag.
320sub add_sgmltag {
321    my ($mdocvars, $txt) = (@_);
322
323    # We only care about the HW list for now.
324    if (${$mdocvars}{isin_hwlist}) {
325	push(@out_dev, $txt);
326    }
327}
328
329# Add a text entity, and return the used entity name.
330sub add_txt_ent {
331    my ($itemtxt) = (@_);
332    my ($entity_name);
333
334    # Convert mdoc(7) minus
335    $itemtxt =~ s/\\-/-/g;
336
337    $itemtxt =~ s/'/&lsquo;/g;
338
339    $entity_name = "hwlist." . md5_hex($itemtxt);
340    dlog(4, "Adding '$itemtxt' as entity $entity_name");
341    push(@out_lines, "<!ENTITY $entity_name '$itemtxt'>");
342
343    return ($entity_name);
344}
345sub flush_out {
346    my ($mdocvars) = (@_);
347    my ($entity_name, $out);
348    my $para_arch = "";
349
350    if (!${$mdocvars}{isin_hwlist} || ${$mdocvars}{parabuf} eq "") {
351	return;
352    }
353
354    $entity_name = add_txt_ent(${$mdocvars}{parabuf});
355    ${$mdocvars}{parabuf} = "";
356    if(defined($archlist{${$mdocvars}{Nm}})) {
357	$para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"';
358    }
359    $out = "<para".$para_arch.">&".$entity_name.";</para>";
360
361    dlog(4, "Flushing parabuf");
362    add_sgmltag($mdocvars, $out);
363}
364
365# Add a new list item from the "parabuf".
366sub add_listitem {
367    my ($mdocvars) = (@_);
368    my ($listitem, $entity_name);
369    my $para_arch = "";
370
371    $entity_name = add_txt_ent(${$mdocvars}{parabuf});
372    ${$mdocvars}{parabuf} = "";
373
374    if(defined($archlist{${$mdocvars}{Nm}})) {
375	$para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"';
376    }
377    $listitem = "<listitem><para".$para_arch.">&".$entity_name.";</para></listitem>";
378    dlog(4, "Adding '$listitem' to out_dev");
379    push(@out_dev, $listitem);
380
381}
382
383# Add a line to the "paragraph buffer"
384sub parabuf_addline {
385    my $mdocvars = shift;
386    my ($txt) = (@_);
387
388    dlog(5, "Now in parabuf_addline");
389
390    # We only care about the HW list for now.
391    if (!${$mdocvars}{isin_hwlist}) {
392	return;
393    }
394    if ($txt eq "") {
395	return;
396    }
397
398    if ($only_list_out && !${$mdocvars}{isin_list}) {
399	return;
400    }
401
402    # We only add the first line for "tag" lists
403    if (${$mdocvars}{parabuf} ne "" && ${$mdocvars}{isin_list} &&
404	${$mdocvars}{listtype} eq "tag") {
405	return;
406    }
407
408    if (${$mdocvars}{parabuf} ne "") {
409	${$mdocvars}{parabuf} .= " ";
410    }
411
412    dlog(4, "Adding '$txt' to parabuf");
413
414    ${$mdocvars}{parabuf} .= $txt;
415}
416
417sub load_archlist {
418    my ($file) = (@_);
419
420    my $lineno = 0;
421
422    dlog(2, "Parsing archlist $file");
423
424    open(FILE, "$file") || die("$!: Could not open archlist $file in ", __LINE__, ".\n");
425    while(<FILE>) {
426	chomp;
427	$lineno++;
428
429	if (/^#/ || $_ eq "") {
430	    next;
431	}
432
433	if (/(\w+)\t([\w,]+)/) {
434	    dlog(4, "For driver $1 setting arch to $2");
435	    $archlist{$1} = $2;
436	} else {
437	    dlog(1, "Warning: Could not parse archlist line $lineno");
438	}
439    }
440
441    close(FILE);
442}
443
444# Check if a character is a mdoc(7) punctuation character.
445sub is_punct_char {
446    my ($str) = (@_);
447
448    return (length($str) == 1 && $str =~ /[\.,:;()\[\]\?!]/);
449}
450
451# Split out the punctuation characters of a mdoc(7) line.
452sub split_punct_chars {
453    my ($str) = (@_);
454    my (@stritems, $stritem, $punct_str);
455
456    $punct_str = "";
457    @stritems = split(/ /, $str);
458
459    while (defined($stritem = $stritems[$#stritems]) &&
460	   is_punct_char($stritem)) {
461	$punct_str = $stritem . $punct_str;
462	pop(@stritems);
463    }
464
465    return (join(' ', @stritems), $punct_str);
466}
467
468# Create a ulink, if the string contains an URL.
469sub make_ulink {
470    my ($str) = (@_);
471
472    $str =~ s,(http://[^ ]+),<ulink url="$1"></ulink>,;
473
474    return $str;
475}
476