man2hwnotes.pl revision 138845
12Sjlaskey#!/usr/bin/perl -w 26Sjlaskey# Emacs should use -*- cperl -*- mode 32Sjlaskey# 4877Sattila# Copyright (c) 2003-2004 Simon L. Nielsen <simon@FreeBSD.org> 52Sjlaskey# All rights reserved. 62Sjlaskey# 72Sjlaskey# Redistribution and use in source and binary forms, with or without 8877Sattila# modification, are permitted provided that the following conditions 92Sjlaskey# are met: 102Sjlaskey# 1. Redistributions of source code must retain the above copyright 112Sjlaskey# notice, this list of conditions and the following disclaimer. 122Sjlaskey# 2. Redistributions in binary form must reproduce the above copyright 132Sjlaskey# notice, this list of conditions and the following disclaimer in the 14877Sattila# documentation and/or other materials provided with the distribution. 152Sjlaskey# 162Sjlaskey# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 172Sjlaskey# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18877Sattila# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 192Sjlaskey# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 202Sjlaskey# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 212Sjlaskey# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 222Sjlaskey# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 232Sjlaskey# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 242Sjlaskey# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 252Sjlaskey# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 262Sjlaskey# SUCH DAMAGE. 272Sjlaskey# 282Sjlaskey# $FreeBSD: head/release/doc/share/misc/man2hwnotes.pl 138845 2004-12-14 13:07:58Z simon $ 292Sjlaskey# 302Sjlaskey 312Sjlaskey# Parse the list of supported hardware out of section 4 manual pages 322Sjlaskey# and output it on stdout as SGML/DocBook entities. 332Sjlaskey 342Sjlaskey# The script will look for the following line in the manual page: 352Sjlaskey# .Sh HARDWARE 362Sjlaskey# and make an entity of the content until the line containing: 372Sjlaskey# .Sh 382Sjlaskey# 392Sjlaskey# For Lists only the first line will be printed. If there are 402Sjlaskey# arguments to the .It command, only the argument will be printed. 412Sjlaskey 422Sjlaskey# Usage: 432Sjlaskey# man2hwnotes.pl [-l] [-d 0-6] [-a <archlist file>] [-o <outputfile>] 442Sjlaskey# <manualpage> [<manualpage> ...] 452Sjlaskey 462Sjlaskeyuse strict; 472Sjlaskeyuse Getopt::Std; 482Sjlaskeyuse Digest::MD5 qw(md5_hex); 492Sjlaskey 502Sjlaskey# Section from manual page to extract 512Sjlaskeymy $hwlist_sect = "HARDWARE"; 522Sjlaskey 532Sjlaskey# Override default archtecture list for some devices: 542Sjlaskeymy $archlist_file = "dev.archlist.txt"; 552Sjlaskeymy %archlist; 562Sjlaskey 572Sjlaskey# Globals 582Sjlaskeymy $debuglevel = 0; 59my $only_list_out = 0; # Should only lists be generated in the output? 60my @out_lines; # Single lines 61my @out_dev; # Device entities 62 63# Getopt 64my %options = (); 65if (!getopts("a:d:lo:",\%options)) { 66 die("$!: Invalid command line arguments in ", __LINE__, "\n"); 67} 68 69if (defined($options{d})) { 70 $debuglevel = $options{d}; 71} 72if (defined($options{a})) { 73 $archlist_file = $options{a}; 74} 75if (defined($options{l})) { 76 $only_list_out = 1; 77} 78 79my $outputfile = $options{o}; 80 81if ($debuglevel > 0) { 82 # Don't do output buffering in debug mode. 83 $| = 1; 84} 85 86load_archlist($archlist_file); 87 88if (defined($outputfile)) { 89 open(OLDOUT, ">&STDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n"); 90 open(STDOUT, ">$outputfile") || die("$!: Could not open $outputfile in ", __LINE__, ".\n"); 91} 92 93print <<EOT; 94<!-- 95 These are automatically generated device lists for FreeBSD hardware notes. 96--> 97EOT 98 99if ($only_list_out) { 100 # Print the default device preamble entities 101 print "<!ENTITY hwlist.preamble.pre 'The'>\n"; 102 print "<!ENTITY hwlist.preamble.post 'driver supports:'>\n"; 103} 104 105foreach my $page (@ARGV) { 106 if ($page !~ m/\.4$/) { 107 dlog(2, "Skipped $page (not *.4)"); 108 next; 109 } 110 dlog(2, "Parsing $page"); 111 parse($page); 112 113 if (@out_lines) { 114 print join("\n", @out_lines), "\n"; 115 } 116 if (@out_dev) { 117 print join("\n", @out_dev), "\n"; 118 } 119 120 @out_lines = (); 121 @out_dev = (); 122} 123 124if (defined($outputfile)) { 125 open(STDOUT, ">&OLDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n"); 126 close(OLDOUT) || die("$!: Could not close OLDOUT in ", __LINE__, ".\n"); 127} 128 129sub normalize (@) { 130 my @lines = @_; 131 132 foreach my $l (@lines) { 133 $l =~ s/\\&//g; 134 $l =~ s:([\x21-\x2f\x5b-\x60\x7b-\x7f]):sprintf("&\#\%d;", ord($1)):eg; 135 # Make sure ampersand is encoded as & since jade seems to 136 # be confused when it is encoded as & inside an entity. 137 $l =~ s/&/&/g; 138 } 139 return (wantarray) ? @lines : join "", @lines; 140} 141 142sub parse { 143 my ($manpage) = @_; 144 145 my $cur_mansection; 146 my $found_hwlist = 0; 147 my %mdocvars; 148 $mdocvars{isin_hwlist} = 0; 149 $mdocvars{isin_list} = 0; 150 $mdocvars{parabuf} = ""; 151 $mdocvars{listtype} = ""; 152 $mdocvars{it_nr} = 0; 153 154 open(MANPAGE, "$manpage") || die("$!: Could not open $manpage in ", __LINE__, ".\n"); 155 while(<MANPAGE>) { 156 chomp; 157 my $line = $_; 158 159 dlog(5, "Read '$line'"); 160 161 # Find commands 162 if (s/^\.(.*)$/$1/) { 163 my $cmd = $1; 164 165 # Detect, and ignore, comment lines 166 if (s/^\\"(.*)$/$1/) { 167 next; 168 } 169 170 $cmd =~ s/^([^ ]+).*$/$1/; 171 172 if (/^Nm "?(\w+)"?/ && !defined($mdocvars{Nm})) { 173 dlog(3, "Setting Nm to $1"); 174 $mdocvars{Nm} = $1; 175 # "_" cannot be used for an entity name. 176 $mdocvars{EntNm} = $1; 177 $mdocvars{EntNm} =~ s,_,.,g; 178 179 } elsif (/^Nm$/) { 180 if (defined($mdocvars{Nm}) && $mdocvars{Nm} ne "") { 181 parabuf_addline(\%mdocvars, "&man.".$mdocvars{EntNm}.".$cur_mansection;"); 182 } else { 183 dlog(2, "Warning: Bad Nm call in $manpage"); 184 } 185 186 } elsif (/^Sh (.+)$/) { 187 dlog(4, "Setting section to $1"); 188 my $cur_section = $1; 189 190 flush_out(\%mdocvars); 191 192 if ($cur_section =~ /^${hwlist_sect}$/) { 193 dlog(2, "Found the device section ${hwlist_sect}"); 194 $mdocvars{isin_hwlist} = 1; 195 $found_hwlist = 1; 196 add_sgmltag(\%mdocvars, "<!ENTITY hwlist.".$mdocvars{cur_manname}." '"); 197 if ($only_list_out) { 198 add_sgmltag("<para>&hwlist.preamble.pre; " . 199 "&man.".$mdocvars{EntNm}.".$cur_mansection; " . 200 "&hwlist.preamble.post;</para>"); 201 } 202 } elsif ($mdocvars{isin_hwlist}) { 203 dlog(2, "Found a HWLIST STOP key!"); 204 add_sgmltag(\%mdocvars, "'>"); 205 $mdocvars{isin_hwlist} = 0; 206 } 207 208 } elsif (/^Dt ([^ ]+) ([^ ]+)/) { 209 dlog(4, "Setting mansection to $2"); 210 $mdocvars{cur_manname} = lc($1); 211 $cur_mansection = $2; 212 213 # "_" cannot be used for an entity name. 214 $mdocvars{cur_manname} =~ s,_,.,g; 215 216 } elsif (/^It ?(.*)$/) { 217 my $txt = $1; 218 219 $mdocvars{it_nr}++; 220 221 # Flush last item 222 if ($mdocvars{parabuf} ne "") { 223 add_listitem(\%mdocvars); 224 } 225 226 if ($mdocvars{listtype} eq "column") { 227 # Ignore first item when it is likely to be a 228 # header. 229 if ($mdocvars{it_nr} == 1 && $txt =~ m/^(Em|Sy) /) { 230 dlog(2, "Skipping header line in column list"); 231 next; 232 } 233 # Only extract the first column. 234 $txt =~ s/ Ta /\t/g; 235 $txt =~ s/([^\t]+)\t.*/$1/; 236 } 237 parabuf_addline(\%mdocvars, normalize($txt)); 238 } elsif (/^Bl/) { 239 $mdocvars{isin_list} = 1; 240 flush_out(\%mdocvars); 241 add_sgmltag(\%mdocvars, "<itemizedlist>"); 242 243 if (/-tag/) { 244 $mdocvars{listtype} = "tag"; 245 # YACK! Hack for ata(4) 246 if ($mdocvars{Nm} eq "ata") { 247 $mdocvars{listtype} = "tagHACK"; 248 } 249 } elsif (/-bullet/) { 250 $mdocvars{listtype} = "bullet"; 251 } elsif (/-column/) { 252 $mdocvars{listtype} = "column"; 253 } else { 254 $mdocvars{listtype} = "unknown"; 255 } 256 dlog(2, "Listtype set to $mdocvars{listtype}"); 257 } elsif (/^El/) { 258 if ($mdocvars{parabuf} ne "") { 259 add_listitem(\%mdocvars); 260 } 261 262 add_sgmltag(\%mdocvars, "</itemizedlist>"); 263 $mdocvars{isin_list} = 0; 264 } elsif (/^Tn (.+)$/) { 265 # For now we print TradeName text as regular text. 266 my ($txt, $punct_str) = split_punct_chars($1); 267 268 parabuf_addline(\%mdocvars, normalize($txt . $punct_str)); 269 } elsif (/^Xr ([^ ]+) (.+)$/) { 270 my ($xr_sect, $punct_str) = split_punct_chars($2); 271 my $txt; 272 273 # We need to check if the manual page exist to avoid 274 # breaking the doc build just because of a broken 275 # reference. 276 #$txt = "&man.$1.$xr_sect;$punct_str"; 277 $txt = "$1($xr_sect)$punct_str"; 278 parabuf_addline(\%mdocvars, normalize($txt)); 279 } elsif (/^Dq (.+)$/) { 280 my ($txt, $punct_str) = split_punct_chars($1); 281 282 parabuf_addline(\%mdocvars, 283 normalize("<quote>$txt</quote>$punct_str")); 284 } elsif (/^Sx (.+)$/) { 285 if ($mdocvars{isin_hwlist}) { 286 dlog(1, "Warning: Reference to another section in the " . 287 "$hwlist_sect section in " . $mdocvars{Nm} . 288 "(${cur_mansection})"); 289 } 290 parabuf_addline(\%mdocvars, normalize($1)); 291 } elsif (/^Pa (.+)$/) { 292 my ($txt, $punct_str) = split_punct_chars($1); 293 294 $txt = make_ulink($txt) . $punct_str; 295 parabuf_addline(\%mdocvars, normalize($txt)); 296 } else { 297 # Ignore all other commands. 298 dlog(3, "Ignoring unknown command $cmd"); 299 } 300 } else { 301 # This is then regular text 302 parabuf_addline(\%mdocvars, normalize($_)); 303 } 304 } 305 close(MANPAGE) || die("$!: Could not close $manpage in ", __LINE__, ".\n"); 306 if (! $found_hwlist) { 307 dlog(2, "Hardware list not found in $manpage"); 308 } 309} 310 311sub dlog { 312 my ($level, $txt) = @_; 313 314 if ($level <= $debuglevel) { 315 print STDERR "$level: $txt\n"; 316 } 317} 318 319# Output a SGML tag. 320sub add_sgmltag { 321 my ($mdocvars, $txt) = (@_); 322 323 # We only care about the HW list for now. 324 if (${$mdocvars}{isin_hwlist}) { 325 push(@out_dev, $txt); 326 } 327} 328 329# Add a text entity, and return the used entity name. 330sub add_txt_ent { 331 my ($itemtxt) = (@_); 332 my ($entity_name); 333 334 # Convert mdoc(7) minus 335 $itemtxt =~ s/\\-/-/g; 336 337 $itemtxt =~ s/'/‘/g; 338 339 $entity_name = "hwlist." . md5_hex($itemtxt); 340 dlog(4, "Adding '$itemtxt' as entity $entity_name"); 341 push(@out_lines, "<!ENTITY $entity_name '$itemtxt'>"); 342 343 return ($entity_name); 344} 345sub flush_out { 346 my ($mdocvars) = (@_); 347 my ($entity_name, $out); 348 my $para_arch = ""; 349 350 if (!${$mdocvars}{isin_hwlist} || ${$mdocvars}{parabuf} eq "") { 351 return; 352 } 353 354 $entity_name = add_txt_ent(${$mdocvars}{parabuf}); 355 ${$mdocvars}{parabuf} = ""; 356 if(defined($archlist{${$mdocvars}{Nm}})) { 357 $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"'; 358 } 359 $out = "<para".$para_arch.">&".$entity_name.";</para>"; 360 361 dlog(4, "Flushing parabuf"); 362 add_sgmltag($mdocvars, $out); 363} 364 365# Add a new list item from the "parabuf". 366sub add_listitem { 367 my ($mdocvars) = (@_); 368 my ($listitem, $entity_name); 369 my $para_arch = ""; 370 371 $entity_name = add_txt_ent(${$mdocvars}{parabuf}); 372 ${$mdocvars}{parabuf} = ""; 373 374 if(defined($archlist{${$mdocvars}{Nm}})) { 375 $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"'; 376 } 377 $listitem = "<listitem><para".$para_arch.">&".$entity_name.";</para></listitem>"; 378 dlog(4, "Adding '$listitem' to out_dev"); 379 push(@out_dev, $listitem); 380 381} 382 383# Add a line to the "paragraph buffer" 384sub parabuf_addline { 385 my $mdocvars = shift; 386 my ($txt) = (@_); 387 388 dlog(5, "Now in parabuf_addline"); 389 390 # We only care about the HW list for now. 391 if (!${$mdocvars}{isin_hwlist}) { 392 return; 393 } 394 if ($txt eq "") { 395 return; 396 } 397 398 if ($only_list_out && !${$mdocvars}{isin_list}) { 399 return; 400 } 401 402 # We only add the first line for "tag" lists 403 if (${$mdocvars}{parabuf} ne "" && ${$mdocvars}{isin_list} && 404 ${$mdocvars}{listtype} eq "tag") { 405 return; 406 } 407 408 if (${$mdocvars}{parabuf} ne "") { 409 ${$mdocvars}{parabuf} .= " "; 410 } 411 412 dlog(4, "Adding '$txt' to parabuf"); 413 414 ${$mdocvars}{parabuf} .= $txt; 415} 416 417sub load_archlist { 418 my ($file) = (@_); 419 420 my $lineno = 0; 421 422 dlog(2, "Parsing archlist $file"); 423 424 open(FILE, "$file") || die("$!: Could not open archlist $file in ", __LINE__, ".\n"); 425 while(<FILE>) { 426 chomp; 427 $lineno++; 428 429 if (/^#/ || $_ eq "") { 430 next; 431 } 432 433 if (/(\w+)\t([\w,]+)/) { 434 dlog(4, "For driver $1 setting arch to $2"); 435 $archlist{$1} = $2; 436 } else { 437 dlog(1, "Warning: Could not parse archlist line $lineno"); 438 } 439 } 440 441 close(FILE); 442} 443 444# Check if a character is a mdoc(7) punctuation character. 445sub is_punct_char { 446 my ($str) = (@_); 447 448 return (length($str) == 1 && $str =~ /[\.,:;()\[\]\?!]/); 449} 450 451# Split out the punctuation characters of a mdoc(7) line. 452sub split_punct_chars { 453 my ($str) = (@_); 454 my (@stritems, $stritem, $punct_str); 455 456 $punct_str = ""; 457 @stritems = split(/ /, $str); 458 459 while (defined($stritem = $stritems[$#stritems]) && 460 is_punct_char($stritem)) { 461 $punct_str = $stritem . $punct_str; 462 pop(@stritems); 463 } 464 465 return (join(' ', @stritems), $punct_str); 466} 467 468# Create a ulink, if the string contains an URL. 469sub make_ulink { 470 my ($str) = (@_); 471 472 $str =~ s,(http://[^ ]+),<ulink url="$1"></ulink>,; 473 474 return $str; 475} 476