pod2man.pl revision 100936
150476Speter: #!/usr/bin/perl-5.005
238410Sbde    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
3156813Sru	if $running_under_some_shell;
4156813Sru
537535Sdes$DEF_PM_SECTION = '3pm' || '3';
685093Sdes
760924Sdes=head1 NAME
860924Sdes
960924Sdespod2man - translate embedded Perl pod directives into man pages
1074870Sru
1160924Sdes=head1 SYNOPSIS
1237535Sdes
13160737SyarB<pod2man>
14160737Syar[ B<--section=>I<manext> ]
15160737Syar[ B<--release=>I<relpatch> ]
16160737Syar[ B<--center=>I<string> ]
17156813Sru[ B<--date=>I<string> ]
18110165Smarkm[ B<--fixed=>I<font> ]
19240496Sdes[ B<--official> ]
20240496Sdes[ B<--lax> ]
21202613SdesI<inputfile>
22202623Sdes
23202623Sdes=head1 DESCRIPTION
2497930Sru
2597930SruB<pod2man> converts its input file containing embedded pod directives (see
26169960SdesL<perlpod>) into nroff source suitable for viewing with nroff(1) or
27169960Sdestroff(1) using the man(7) macro set.
28114420Sdes
29106068SfennerBesides the obvious pod conversions, B<pod2man> also takes care of
30195767Skensmithfunc(), func(n), and simple variable references like $foo or @bar so
3137535Sdesyou don't have to use code escapes for them; complex expressions like
32174767SruC<$fred{'stuff'}> will still need to be escaped, though.  Other nagging
33174588Sdeslittle roffish things that it catches include translating the minus in
34174767Srusomething like foo-bar, making a long dash--like this--into a real em
3537535Sdesdash, fixing up "paired quotes", putting a little space after the
3637535Sdesparens in something like func(), making C++ and PI look right, making
3740975Sdesdouble underbars have a little tiny space between them, making ALLCAPS
3840975Sdesa teeny bit smaller in troff(1), and escaping backslashes so you don't
3937535Sdeshave to.
4040975Sdes
4137535Sdes=head1 OPTIONS
4237535Sdes
43174767Sru=over 8
44174588Sdes
45174767Sru=item center
4637535Sdes
4737535SdesSet the centered header to a specific string.  The default is
4840975Sdes"User Contributed Perl Documentation", unless the C<--official> flag is
4940975Sdesgiven, in which case the default is "Perl Programmers Reference Guide".
5037535Sdes
5141820Sdes=item date
5237535Sdes
5337535SdesSet the left-hand footer string to this value.  By default,
5485093Sdesthe modification date of the input file will be used.
5585093Sdes
5685093Sdes=item fixed
5785093Sdes
5885093SdesThe fixed font to use for code refs.  Defaults to CW.
5985093Sdes
6085093Sdes=item official
6185093Sdes
6285093SdesSet the default header to indicate that this page is of
6385093Sdesthe standard release in case C<--center> is not given.
6485093Sdes
6585093Sdes=item release
6685093Sdes
6785093SdesSet the centered footer.  By default, this is the current
6885093Sdesperl release.
6985093Sdes
7085093Sdes=item section
7185093Sdes
7285093SdesSet the section for the C<.TH> macro.  The standard conventions on
7385093Sdessections are to use 1 for user commands,  2 for system calls, 3 for
7485093Sdesfunctions, 4 for devices, 5 for file formats, 6 for games, 7 for
7585093Sdesmiscellaneous information, and 8 for administrator commands.  This works
7685093Sdesbest if you put your Perl man pages in a separate tree, like
7785093SdesF</usr/local/perl/man/>.  By default, section 1 will be used
7885093Sdesunless the file ends in F<.pm> in which case section 3 will be selected.
7985093Sdes
8085093Sdes=item lax
8185093Sdes
8267809SobrienDon't complain when required sections aren't present.
8337535Sdes
84=back
85
86=head1 Anatomy of a Proper Man Page
87
88For those not sure of the proper layout of a man page, here's
89an example of the skeleton of a proper man page.  Head of the
90major headers should be setout as a C<=head1> directive, and
91are historically written in the rather startling ALL UPPER CASE
92format, although this is not mandatory.
93Minor headers may be included using C<=head2>, and are
94typically in mixed case.
95
96=over 10
97
98=item NAME
99
100Mandatory section; should be a comma-separated list of programs or
101functions documented by this podpage, such as:
102
103    foo, bar - programs to do something
104
105=item SYNOPSIS
106
107A short usage summary for programs and functions, which
108may someday be deemed mandatory.
109
110=item DESCRIPTION
111
112Long drawn out discussion of the program.  It's a good idea to break this
113up into subsections using the C<=head2> directives, like
114
115    =head2 A Sample Subection
116
117    =head2 Yet Another Sample Subection
118
119=item OPTIONS
120
121Some people make this separate from the description.
122
123=item RETURN VALUE
124
125What the program or function returns if successful.
126
127=item ERRORS
128
129Exceptions, return codes, exit stati, and errno settings.
130
131=item EXAMPLES
132
133Give some example uses of the program.
134
135=item ENVIRONMENT
136
137Envariables this program might care about.
138
139=item FILES
140
141All files used by the program.  You should probably use the FE<lt>E<gt>
142for these.
143
144=item SEE ALSO
145
146Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8).
147
148=item NOTES
149
150Miscellaneous commentary.
151
152=item CAVEATS
153
154Things to take special care with; sometimes called WARNINGS.
155
156=item DIAGNOSTICS
157
158All possible messages the program can print out--and
159what they mean.
160
161=item BUGS
162
163Things that are broken or just don't work quite right.
164
165=item RESTRICTIONS
166
167Bugs you don't plan to fix :-)
168
169=item AUTHOR
170
171Who wrote it (or AUTHORS if multiple).
172
173=item HISTORY
174
175Programs derived from other sources sometimes have this, or
176you might keep a modification log here.
177
178=back
179
180=head1 EXAMPLES
181
182    pod2man program > program.1
183    pod2man some_module.pm > /usr/perl/man/man3/some_module.3
184    pod2man --section=7 note.pod > note.7
185
186=head1 DIAGNOSTICS
187
188The following diagnostics are generated by B<pod2man>.  Items
189marked "(W)" are non-fatal, whereas the "(F)" errors will cause
190B<pod2man> to immediately exit with a non-zero status.
191
192=over 4
193
194=item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s>
195
196(W) If you start include an option, you should set it off
197as bold, italic, or code.
198
199=item can't open %s: %s
200
201(F) The input file wasn't available for the given reason.
202
203=item Improper man page - no dash in NAME header in paragraph %d of %s
204
205(W) The NAME header did not have an isolated dash in it.  This is
206considered important.
207
208=item Invalid man page - no NAME line in %s
209
210(F) You did not include a NAME header, which is essential.
211
212=item roff font should be 1 or 2 chars, not `%s'  (F)
213
214(F) The font specified with the C<--fixed> option was not
215a one- or two-digit roff font.
216
217=item %s is missing required section: %s
218
219(W) Required sections include NAME, DESCRIPTION, and if you're
220using a section starting with a 3, also a SYNOPSIS.  Actually,
221not having a NAME is a fatal.
222
223=item Unknown escape: %s in %s
224
225(W) An unknown HTML entity (probably for an 8-bit character) was given via
226a C<EE<lt>E<gt>> directive.  Besides amp, lt, gt, and quot, recognized
227entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave,
228Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute,
229Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc,
230icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc,
231ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig,
232THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml,
233Yacute, yacute, and yuml.
234
235=item Unmatched =back
236
237(W) You have a C<=back> without a corresponding C<=over>.
238
239=item Unrecognized pod directive: %s
240
241(W) You specified a pod directive that isn't in the known list of
242C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>.
243
244
245=back
246
247=head1 NOTES
248
249If you would like to print out a lot of man page continuously, you
250probably want to set the C and D registers to set contiguous page
251numbering and even/odd paging, at least on some versions of man(7).
252Settting the F register will get you some additional experimental
253indexing:
254
255    troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ...
256
257The indexing merely outputs messages via C<.tm> for each
258major page, section, subsection, item, and any C<XE<lt>E<gt>>
259directives.
260
261
262=head1 RESTRICTIONS
263
264None at this time.
265
266=head1 BUGS
267
268The =over and =back directives don't really work right.  They
269take absolute positions instead of offsets, don't nest well, and
270making people count is suboptimal in any event.
271
272=head1 AUTHORS
273
274Original prototype by Larry Wall, but so massively hacked over by
275Tom Christiansen such that Larry probably doesn't recognize it anymore.
276
277=cut
278
279$/ = "";
280$cutting = 1;
281@Indices = ();
282
283# We try first to get the version number from a local binary, in case we're
284# running an installed version of Perl to produce documentation from an
285# uninstalled newer version's pod files.
286if ($^O ne 'plan9' and $^O ne 'dos' and $^O ne 'os2' and $^O ne 'MSWin32') {
287  my $perl = (-x './perl' && -f './perl' ) ?
288                 './perl' :
289                 ((-x '../perl' && -f '../perl') ?
290                      '../perl' :
291                      '');
292  ($version,$patch) = `$perl -e 'print $]'` =~ /^(\d\.\d{3})(\d{2})?/ if $perl;
293}
294# No luck; we'll just go with the running Perl's version
295($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version;
296$DEF_RELEASE  = "perl $version";
297$DEF_RELEASE .= ", patch $patch" if $patch;
298
299
300sub makedate {
301    my $secs = shift;
302    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs);
303    my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon];
304    $year += 1900;
305    return "$mday/$mname/$year";
306}
307
308use Getopt::Long;
309
310$DEF_SECTION = 1;
311$DEF_CENTER = "User Contributed Perl Documentation";
312$STD_CENTER = "Perl Programmers Reference Guide";
313$DEF_FIXED = 'CW';
314$DEF_LAX = 0;
315
316sub usage {
317    warn "$0: @_\n" if @_;
318    die <<EOF;
319usage: $0 [options] podpage
320Options are:
321	--section=manext      (default "$DEF_SECTION")
322	--release=relpatch    (default "$DEF_RELEASE")
323	--center=string       (default "$DEF_CENTER")
324	--date=string         (default "$DEF_DATE")
325	--fixed=font	      (default "$DEF_FIXED")
326	--official	      (default NOT)
327	--lax                 (default NOT)
328EOF
329}
330
331$uok = GetOptions( qw(
332	section=s
333	release=s
334	center=s
335	date=s
336	fixed=s
337	official
338	lax
339	help));
340
341$DEF_DATE = makedate((stat($ARGV[0]))[9] || time());
342
343usage("Usage error!") unless $uok;
344usage() if $opt_help;
345usage("Need one and only one podpage argument") unless @ARGV == 1;
346
347$section = $opt_section || ($ARGV[0] =~ /\.pm$/
348				? $DEF_PM_SECTION : $DEF_SECTION);
349$RP = $opt_release || $DEF_RELEASE;
350$center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER);
351$lax = $opt_lax || $DEF_LAX;
352
353$CFont = $opt_fixed || $DEF_FIXED;
354
355if (length($CFont) == 2) {
356    $CFont_embed = "\\f($CFont";
357}
358elsif (length($CFont) == 1) {
359    $CFont_embed = "\\f$CFont";
360}
361else {
362    die "roff font should be 1 or 2 chars, not `$CFont_embed'";
363}
364
365$date = $opt_date || $DEF_DATE;
366
367for (qw{NAME DESCRIPTION}) {
368# for (qw{NAME DESCRIPTION AUTHOR}) {
369    $wanna_see{$_}++;
370}
371$wanna_see{SYNOPSIS}++ if $section =~ /^3/;
372
373
374$name = @ARGV ? $ARGV[0] : "<STDIN>";
375$Filename = $name;
376if ($section =~ /^1/) {
377    require File::Basename;
378    $name = uc File::Basename::basename($name);
379}
380$name =~ s/\.(pod|p[lm])$//i;
381
382# Lose everything up to the first of
383#     */lib/*perl*	standard or site_perl module
384#     */*perl*/lib	from -D prefix=/opt/perl
385#     */*perl*/		random module hierarchy
386# which works.
387$name =~ s-//+-/-g;
388if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i
389	or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i
390	or $name =~ s-^.*?/[^/]*perl[^/]*/--i) {
391    # Lose ^site(_perl)?/.
392    $name =~ s-^site(_perl)?/--;
393    # Lose ^arch/.	(XXX should we use Config? Just for archname?)
394    $name =~ s~^(.*-$^O|$^O-.*)/~~o;
395    # Lose ^version/.
396    $name =~ s-^\d+\.\d+/--;
397}
398
399# Translate Getopt/Long to Getopt::Long, etc.
400$name =~ s(/)(::)g;
401
402if ($name ne 'something') {
403    FCHECK: {
404	open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!";
405	while (<F>) {
406	    next unless /^=\b/;
407	    if (/^=head1\s+NAME\s*$/) {  # an /m would forgive mistakes
408		$_ = <F>;
409		unless (/\s*-+\s+/) {
410		    $oops++;
411		    warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n"
412                } else {
413		    my @n = split /\s+-+\s+/;
414		    if (@n != 2) {
415			$oops++;
416			warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n"
417		    }
418		    else {
419			$n[0] =~ s/\n/ /g;
420			$n[1] =~ s/\n/ /g;
421			%namedesc = @n;
422		    }
423		}
424		last FCHECK;
425	    }
426	    next if /^=cut\b/;	# DB_File and Net::Ping have =cut before NAME
427	    next if /^=pod\b/;  # It is OK to have =pod before NAME
428	    die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax;
429	}
430	die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax;
431    }
432    close F;
433}
434
435print <<"END";
436.rn '' }`
437''' \$RCSfile\$\$Revision\$\$Date\$
438'''
439''' \$Log\$
440'''
441.de Sh
442.br
443.if t .Sp
444.ne 5
445.PP
446\\fB\\\\\$1\\fR
447.PP
448..
449.de Sp
450.if t .sp .5v
451.if n .sp
452..
453.de Ip
454.br
455.ie \\\\n(.\$>=3 .ne \\\\\$3
456.el .ne 3
457.IP "\\\\\$1" \\\\\$2
458..
459.de Vb
460.ft $CFont
461.nf
462.ne \\\\\$1
463..
464.de Ve
465.ft R
466
467.fi
468..
469'''
470'''
471'''     Set up \\*(-- to give an unbreakable dash;
472'''     string Tr holds user defined translation string.
473'''     Bell System Logo is used as a dummy character.
474'''
475.tr \\(*W-|\\(bv\\*(Tr
476.ie n \\{\\
477.ds -- \\(*W-
478.ds PI pi
479.if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch
480.if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch
481.ds L" ""
482.ds R" ""
483'''   \\*(M", \\*(S", \\*(N" and \\*(T" are the equivalent of
484'''   \\*(L" and \\*(R", except that they are used on ".xx" lines,
485'''   such as .IP and .SH, which do another additional levels of
486'''   double-quote interpretation
487.ds M" """
488.ds S" """
489.ds N" """""
490.ds T" """""
491.ds L' '
492.ds R' '
493.ds M' '
494.ds S' '
495.ds N' '
496.ds T' '
497'br\\}
498.el\\{\\
499.ds -- \\(em\\|
500.tr \\*(Tr
501.ds L" ``
502.ds R" ''
503.ds M" ``
504.ds S" ''
505.ds N" ``
506.ds T" ''
507.ds L' `
508.ds R' '
509.ds M' `
510.ds S' '
511.ds N' `
512.ds T' '
513.ds PI \\(*p
514'br\\}
515END
516
517print <<'END';
518.\"	If the F register is turned on, we'll generate
519.\"	index entries out stderr for the following things:
520.\"		TH	Title
521.\"		SH	Header
522.\"		Sh	Subsection
523.\"		Ip	Item
524.\"		X<>	Xref  (embedded
525.\"	Of course, you have to process the output yourself
526.\"	in some meaninful fashion.
527.if \nF \{
528.de IX
529.tm Index:\\$1\t\\n%\t"\\$2"
530..
531.nr % 0
532.rr F
533.\}
534END
535
536print <<"END";
537.TH $name $section "$RP" "$date" "$center"
538.UC
539END
540
541push(@Indices, qq{.IX Title "$name $section"});
542
543while (($name, $desc) = each %namedesc) {
544    for ($name, $desc) { s/^\s+//; s/\s+$//; }
545    push(@Indices, qq(.IX Name "$name - $desc"\n));
546}
547
548print <<'END';
549.if n .hy 0
550.if n .na
551.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
552.de CQ          \" put $1 in typewriter font
553END
554print ".ft $CFont\n";
555print <<'END';
556'if n "\c
557'if t \\&\\$1\c
558'if n \\&\\$1\c
559'if n \&"
560\\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
561'.ft R
562..
563.\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
564.	\" AM - accent mark definitions
565.bd B 3
566.	\" fudge factors for nroff and troff
567.if n \{\
568.	ds #H 0
569.	ds #V .8m
570.	ds #F .3m
571.	ds #[ \f1
572.	ds #] \fP
573.\}
574.if t \{\
575.	ds #H ((1u-(\\\\n(.fu%2u))*.13m)
576.	ds #V .6m
577.	ds #F 0
578.	ds #[ \&
579.	ds #] \&
580.\}
581.	\" simple accents for nroff and troff
582.if n \{\
583.	ds ' \&
584.	ds ` \&
585.	ds ^ \&
586.	ds , \&
587.	ds ~ ~
588.	ds ? ?
589.	ds ! !
590.	ds /
591.	ds q
592.\}
593.if t \{\
594.	ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
595.	ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
596.	ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
597.	ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
598.	ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
599.	ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
600.	ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
601.	ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
602.	ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
603.\}
604.	\" troff and (daisy-wheel) nroff accents
605.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
606.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
607.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
608.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
609.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
610.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
611.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
612.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
613.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
614.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
615.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
616.ds ae a\h'-(\w'a'u*4/10)'e
617.ds Ae A\h'-(\w'A'u*4/10)'E
618.ds oe o\h'-(\w'o'u*4/10)'e
619.ds Oe O\h'-(\w'O'u*4/10)'E
620.	\" corrections for vroff
621.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
622.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
623.	\" for low resolution devices (crt and lpr)
624.if \n(.H>23 .if \n(.V>19 \
625\{\
626.	ds : e
627.	ds 8 ss
628.	ds v \h'-1'\o'\(aa\(ga'
629.	ds _ \h'-1'^
630.	ds . \h'-1'.
631.	ds 3 3
632.	ds o a
633.	ds d- d\h'-1'\(ga
634.	ds D- D\h'-1'\(hy
635.	ds th \o'bp'
636.	ds Th \o'LP'
637.	ds ae ae
638.	ds Ae AE
639.	ds oe oe
640.	ds Oe OE
641.\}
642.rm #[ #] #H #V #F C
643END
644
645$indent = 0;
646
647$begun = "";
648
649# Unrolling [^A-Z>]|[A-Z](?!<) gives:    // MRE pp 165.
650my $nonest = '(?:[^A-Z>]*(?:[A-Z](?!<)[^A-Z>]*)*)';
651
652while (<>) {
653    if ($cutting) {
654	next unless /^=/;
655	$cutting = 0;
656    }
657    if ($begun) {
658	if (/^=end\s+$begun/) {
659            $begun = "";
660	}
661	elsif ($begun =~ /^(roff|man)$/) {
662	    print STDOUT $_;
663        }
664	next;
665    }
666    chomp;
667
668    # Translate verbatim paragraph
669
670    if (/^\s/) {
671	@lines = split(/\n/);
672	for (@lines) {
673	    1 while s
674		{^( [^\t]* ) \t ( \t* ) }
675		{ $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex;
676	    s/\\/\\e/g;
677	    s/\A/\\&/s;
678	}
679	$lines = @lines;
680	makespace() unless $verbatim++;
681	print ".Vb $lines\n";
682	print join("\n", @lines), "\n";
683	print ".Ve\n";
684	$needspace = 0;
685	next;
686    }
687
688    $verbatim = 0;
689
690    if (/^=for\s+(\S+)\s*/s) {
691	if ($1 eq "man" or $1 eq "roff") {
692	    print STDOUT $',"\n\n";
693	} else {
694	    # ignore unknown for
695	}
696	next;
697    }
698    elsif (/^=begin\s+(\S+)\s*/s) {
699	$begun = $1;
700	if ($1 eq "man" or $1 eq "roff") {
701	    print STDOUT $'."\n\n";
702	}
703	next;
704    }
705
706    # check for things that'll hosed our noremap scheme; affects $_
707    init_noremap();
708
709    if (!/^=item/) {
710
711	# trofficate backslashes; must do it before what happens below
712	s/\\/noremap('\\e')/ge;
713
714	# protect leading periods and quotes against *roff
715	# mistaking them for directives
716	s/^(?:[A-Z]<)?[.']/\\&$&/gm;
717
718	# first hide the escapes in case we need to
719	# intuit something and get it wrong due to fmting
720
721	1 while s/([A-Z]<$nonest>)/noremap($1)/ge;
722
723	# func() is a reference to a perl function
724	s{
725	    \b
726	    (
727		[:\w]+ \(\)
728	    )
729	} {I<$1>}gx;
730
731	# func(n) is a reference to a perl function or a man page
732	s{
733	    ([:\w]+)
734	    (
735		\( [^\051]+ \)
736	    )
737	} {I<$1>\\|$2}gx;
738
739	# convert simple variable references
740	s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g;
741
742	if (m{ (
743		    [\-\w]+
744		    \(
745			[^\051]*?
746			[\@\$,]
747			[^\051]*?
748		    \)
749		)
750	    }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
751	{
752	    warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n";
753	    $oops++;
754	}
755
756	while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
757	    warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n";
758	    $oops++;
759	}
760
761	# put it back so we get the <> processed again;
762	clear_noremap(0); # 0 means leave the E's
763
764    } else {
765	# trofficate backslashes
766	s/\\/noremap('\\e')/ge;
767
768    }
769
770    # need to hide E<> first; they're processed in clear_noremap
771    s/(E<[^<>]+>)/noremap($1)/ge;
772
773
774    $maxnest = 10;
775    while ($maxnest-- && /[A-Z]</) {
776
777	# can't do C font here
778	s/([BI])<($nonest)>/font($1) . $2 . font('R')/eg;
779
780	# files and filelike refs in italics
781	s/F<($nonest)>/I<$1>/g;
782
783	# no break -- usually we want C<> for this
784	s/S<($nonest)>/nobreak($1)/eg;
785
786	# LREF: a la HREF L<show this text|man/section>
787	s:L<([^|>]+)\|[^>]+>:$1:g;
788
789	# LREF: a manpage(3f)
790	s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g;
791
792	# LREF: an =item on another manpage
793	s{
794	    L<
795		([^/]+)
796		/
797		(
798		    [:\w]+
799		    (\(\))?
800		)
801	    >
802	} {the C<$2> entry in the I<$1> manpage}gx;
803
804	# LREF: an =item on this manpage
805	s{
806	   ((?:
807	    L<
808		/
809		(
810		    [:\w]+
811		    (\(\))?
812		)
813	    >
814	    (,?\s+(and\s+)?)?
815	  )+)
816	} { internal_lrefs($1) }gex;
817
818	# LREF: a =head2 (head1?), maybe on a manpage, maybe right here
819	# the "func" can disambiguate
820	s{
821	    L<
822		(?:
823		    ([a-zA-Z]\S+?) /
824		)?
825		"?(.*?)"?
826	    >
827	}{
828	    do {
829		$1 	# if no $1, assume it means on this page.
830		    ?  "the section on I<$2> in the I<$1> manpage"
831		    :  "the section on I<$2>"
832	    }
833	}gesx; # s in case it goes over multiple lines, so . matches \n
834
835	s/Z<>/\\&/g;
836
837	# comes last because not subject to reprocessing
838	s/C<($nonest)>/noremap("${CFont_embed}${1}\\fR")/eg;
839    }
840
841    if (s/^=//) {
842	$needspace = 0;		# Assume this.
843
844	s/\n/ /g;
845
846	($Cmd, $_) = split(' ', $_, 2);
847
848	$dotlevel = 1;
849	if ($Cmd eq 'head1') {
850	   $dotlevel = 1;
851	}
852	elsif ($Cmd eq 'head2') {
853	   $dotlevel = 1;
854	}
855	elsif ($Cmd eq 'item') {
856	   $dotlevel = 2;
857	}
858
859	if (defined $_) {
860	    &escapes($dotlevel);
861	    s/"/""/g;
862	}
863
864	clear_noremap(1);
865
866	if ($Cmd eq 'cut') {
867	    $cutting = 1;
868	}
869	elsif ($Cmd eq 'head1') {
870	    s/\s+$//;
871	    delete $wanna_see{$_} if exists $wanna_see{$_};
872	    print qq{.SH "$_"\n};
873      push(@Indices, qq{.IX Header "$_"\n});
874	}
875	elsif ($Cmd eq 'head2') {
876	    print qq{.Sh "$_"\n};
877      push(@Indices, qq{.IX Subsection "$_"\n});
878	}
879	elsif ($Cmd eq 'over') {
880	    push(@indent,$indent);
881	    $indent += ($_ + 0) || 5;
882	}
883	elsif ($Cmd eq 'back') {
884	    $indent = pop(@indent);
885	    warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent;
886	    $needspace = 1;
887	}
888	elsif ($Cmd eq 'item') {
889	    s/^\*( |$)/\\(bu$1/g;
890	    # if you know how to get ":s please do
891	    s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g;
892	    s/\\\*\(L"([^"]+?)""/'$1'/g;
893	    s/[^"]""([^"]+?)""[^"]/'$1'/g;
894	    # here do something about the $" in perlvar?
895	    print STDOUT qq{.Ip "$_" $indent\n};
896      push(@Indices, qq{.IX Item "$_"\n});
897	}
898	elsif ($Cmd eq 'pod') {
899	    # this is just a comment
900	}
901	else {
902	    warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n";
903	}
904    }
905    else {
906	if ($needspace) {
907	    &makespace;
908	}
909	&escapes(0);
910	clear_noremap(1);
911	print $_, "\n";
912	$needspace = 1;
913    }
914}
915
916print <<"END";
917
918.rn }` ''
919END
920
921if (%wanna_see && !$lax) {
922    @missing = keys %wanna_see;
923    warn "$0: $Filename is missing required section"
924	.  (@missing > 1 && "s")
925	.  ": @missing\n";
926    $oops++;
927}
928
929foreach (@Indices) { print "$_\n"; }
930
931exit;
932#exit ($oops != 0);
933
934#########################################################################
935
936sub nobreak {
937    my $string = shift;
938    $string =~ s/ /\\ /g;
939    $string;
940}
941
942sub escapes {
943    my $indot = shift;
944
945    s/X<(.*?)>/mkindex($1)/ge;
946
947    # translate the minus in foo-bar into foo\-bar for roff
948    s/([^0-9a-z-])-([^-])/$1\\-$2/g;
949
950    # make -- into the string version \*(-- (defined above)
951    s/\b--\b/\\*(--/g;
952    s/"--([^"])/"\\*(--$1/g;  # should be a better way
953    s/([^"])--"/$1\\*(--"/g;
954
955    # fix up quotes; this is somewhat tricky
956    my $dotmacroL = 'L';
957    my $dotmacroR = 'R';
958    if ( $indot == 1 ) {
959	$dotmacroL = 'M';
960	$dotmacroR = 'S';
961    }
962    elsif ( $indot >= 2 ) {
963	$dotmacroL = 'N';
964	$dotmacroR = 'T';
965    }
966    if (!/""/) {
967	s/(^|\s)(['"])/noremap("$1\\*($dotmacroL$2")/ge;
968	s/(['"])($|[\-\s,;\\!?.])/noremap("\\*($dotmacroR$1$2")/ge;
969    }
970
971    #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g;
972    #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g;
973
974
975    # make sure that func() keeps a bit a space tween the parens
976    ### s/\b\(\)/\\|()/g;
977    ### s/\b\(\)/(\\|)/g;
978
979    # make C++ into \*C+, which is a squinched version (defined above)
980    s/\bC\+\+/\\*(C+/g;
981
982    # make double underbars have a little tiny space between them
983    s/__/_\\|_/g;
984
985    # PI goes to \*(PI (defined above)
986    s/\bPI\b/noremap('\\*(PI')/ge;
987
988    # make all caps a teeny bit smaller, but don't muck with embedded code literals
989    my $hidCFont = font('C');
990    if ($Cmd !~ /^head1/) { # SH already makes smaller
991	# /g isn't enough; 1 while or we'll be off
992
993#	1 while s{
994#	    (?!$hidCFont)(..|^.|^)
995#	    \b
996#	    (
997#		[A-Z][\/A-Z+:\-\d_$.]+
998#	    )
999#	    (s?)
1000#	    \b
1001#	} {$1\\s-1$2\\s0}gmox;
1002
1003	1 while s{
1004	    (?!$hidCFont)(..|^.|^)
1005	    (
1006		\b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b
1007	    )
1008	} {
1009	    $1 . noremap( '\\s-1' .  $2 . '\\s0' )
1010	}egmox;
1011
1012    }
1013}
1014
1015# make troff just be normal, but make small nroff get quoted
1016# decided to just put the quotes in the text; sigh;
1017sub ccvt {
1018    local($_,$prev) = @_;
1019    noremap(qq{.CQ "$_" \n\\&});
1020}
1021
1022sub makespace {
1023    if ($indent) {
1024	print ".Sp\n";
1025    }
1026    else {
1027	print ".PP\n";
1028    }
1029}
1030
1031sub mkindex {
1032    my ($entry) = @_;
1033    my @entries = split m:\s*/\s*:, $entry;
1034    push @Indices, ".IX Xref " . join ' ', map {qq("$_")} @entries;
1035    return '';
1036}
1037
1038sub font {
1039    local($font) = shift;
1040    return '\\f' . noremap($font);
1041}
1042
1043sub noremap {
1044    local($thing_to_hide) = shift;
1045    $thing_to_hide =~ tr/\000-\177/\200-\377/;
1046    return $thing_to_hide;
1047}
1048
1049sub init_noremap {
1050	# escape high bit characters in input stream
1051	s/([\200-\377])/"E<".ord($1).">"/ge;
1052}
1053
1054sub clear_noremap {
1055    my $ready_to_print = $_[0];
1056
1057    tr/\200-\377/\000-\177/;
1058
1059    # trofficate backslashes
1060    # s/(?!\\e)(?:..|^.|^)\\/\\e/g;
1061
1062    # now for the E<>s, which have been hidden until now
1063    # otherwise the interative \w<> processing would have
1064    # been hosed by the E<gt>
1065    s {
1066	    E<
1067	    (
1068	        ( \d + )
1069	        | ( [A-Za-z]+ )
1070	    )
1071	    >
1072    } {
1073	 do {
1074	     defined $2
1075		? chr($2)
1076		:
1077	     exists $HTML_Escapes{$3}
1078		? do { $HTML_Escapes{$3} }
1079		: do {
1080		    warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n";
1081		    "E<$1>";
1082		}
1083	 }
1084    }egx if $ready_to_print;
1085}
1086
1087sub internal_lrefs {
1088    local($_) = shift;
1089    local $trailing_and = s/and\s+$// ? "and " : "";
1090
1091    s{L</([^>]+)>}{$1}g;
1092    my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
1093    my $retstr = "the ";
1094    my $i;
1095    for ($i = 0; $i <= $#items; $i++) {
1096	$retstr .= "C<$items[$i]>";
1097	$retstr .= ", " if @items > 2 && $i != $#items;
1098	$retstr .= " and " if $i+2 == @items;
1099    }
1100
1101    $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
1102	    .  " elsewhere in this document";
1103    # terminal space to avoid words running together (pattern used
1104    # strips terminal spaces)
1105    $retstr .= " " if length $trailing_and;
1106    $retstr .=  $trailing_and;
1107
1108    return $retstr;
1109
1110}
1111
1112BEGIN {
1113%HTML_Escapes = (
1114    'amp'	=>	'&',	#   ampersand
1115    'lt'	=>	'<',	#   left chevron, less-than
1116    'gt'	=>	'>',	#   right chevron, greater-than
1117    'quot'	=>	'"',	#   double quote
1118
1119    "Aacute"	=>	"A\\*'",	#   capital A, acute accent
1120    "aacute"	=>	"a\\*'",	#   small a, acute accent
1121    "Acirc"	=>	"A\\*^",	#   capital A, circumflex accent
1122    "acirc"	=>	"a\\*^",	#   small a, circumflex accent
1123    "AElig"	=>	'\*(AE',	#   capital AE diphthong (ligature)
1124    "aelig"	=>	'\*(ae',	#   small ae diphthong (ligature)
1125    "Agrave"	=>	"A\\*`",	#   capital A, grave accent
1126    "agrave"	=>	"A\\*`",	#   small a, grave accent
1127    "Aring"	=>	'A\\*o',	#   capital A, ring
1128    "aring"	=>	'a\\*o',	#   small a, ring
1129    "Atilde"	=>	'A\\*~',	#   capital A, tilde
1130    "atilde"	=>	'a\\*~',	#   small a, tilde
1131    "Auml"	=>	'A\\*:',	#   capital A, dieresis or umlaut mark
1132    "auml"	=>	'a\\*:',	#   small a, dieresis or umlaut mark
1133    "Ccedil"	=>	'C\\*,',	#   capital C, cedilla
1134    "ccedil"	=>	'c\\*,',	#   small c, cedilla
1135    "Eacute"	=>	"E\\*'",	#   capital E, acute accent
1136    "eacute"	=>	"e\\*'",	#   small e, acute accent
1137    "Ecirc"	=>	"E\\*^",	#   capital E, circumflex accent
1138    "ecirc"	=>	"e\\*^",	#   small e, circumflex accent
1139    "Egrave"	=>	"E\\*`",	#   capital E, grave accent
1140    "egrave"	=>	"e\\*`",	#   small e, grave accent
1141    "ETH"	=>	'\\*(D-',	#   capital Eth, Icelandic
1142    "eth"	=>	'\\*(d-',	#   small eth, Icelandic
1143    "Euml"	=>	"E\\*:",	#   capital E, dieresis or umlaut mark
1144    "euml"	=>	"e\\*:",	#   small e, dieresis or umlaut mark
1145    "Iacute"	=>	"I\\*'",	#   capital I, acute accent
1146    "iacute"	=>	"i\\*'",	#   small i, acute accent
1147    "Icirc"	=>	"I\\*^",	#   capital I, circumflex accent
1148    "icirc"	=>	"i\\*^",	#   small i, circumflex accent
1149    "Igrave"	=>	"I\\*`",	#   capital I, grave accent
1150    "igrave"	=>	"i\\*`",	#   small i, grave accent
1151    "Iuml"	=>	"I\\*:",	#   capital I, dieresis or umlaut mark
1152    "iuml"	=>	"i\\*:",	#   small i, dieresis or umlaut mark
1153    "Ntilde"	=>	'N\*~',		#   capital N, tilde
1154    "ntilde"	=>	'n\*~',		#   small n, tilde
1155    "Oacute"	=>	"O\\*'",	#   capital O, acute accent
1156    "oacute"	=>	"o\\*'",	#   small o, acute accent
1157    "Ocirc"	=>	"O\\*^",	#   capital O, circumflex accent
1158    "ocirc"	=>	"o\\*^",	#   small o, circumflex accent
1159    "Ograve"	=>	"O\\*`",	#   capital O, grave accent
1160    "ograve"	=>	"o\\*`",	#   small o, grave accent
1161    "Oslash"	=>	"O\\*/",	#   capital O, slash
1162    "oslash"	=>	"o\\*/",	#   small o, slash
1163    "Otilde"	=>	"O\\*~",	#   capital O, tilde
1164    "otilde"	=>	"o\\*~",	#   small o, tilde
1165    "Ouml"	=>	"O\\*:",	#   capital O, dieresis or umlaut mark
1166    "ouml"	=>	"o\\*:",	#   small o, dieresis or umlaut mark
1167    "szlig"	=>	'\*8',		#   small sharp s, German (sz ligature)
1168    "THORN"	=>	'\\*(Th',	#   capital THORN, Icelandic
1169    "thorn"	=>	'\\*(th',,	#   small thorn, Icelandic
1170    "Uacute"	=>	"U\\*'",	#   capital U, acute accent
1171    "uacute"	=>	"u\\*'",	#   small u, acute accent
1172    "Ucirc"	=>	"U\\*^",	#   capital U, circumflex accent
1173    "ucirc"	=>	"u\\*^",	#   small u, circumflex accent
1174    "Ugrave"	=>	"U\\*`",	#   capital U, grave accent
1175    "ugrave"	=>	"u\\*`",	#   small u, grave accent
1176    "Uuml"	=>	"U\\*:",	#   capital U, dieresis or umlaut mark
1177    "uuml"	=>	"u\\*:",	#   small u, dieresis or umlaut mark
1178    "Yacute"	=>	"Y\\*'",	#   capital Y, acute accent
1179    "yacute"	=>	"y\\*'",	#   small y, acute accent
1180    "yuml"	=>	"y\\*:",	#   small y, dieresis or umlaut mark
1181);
1182}
1183
1184