1189251Ssam=head1 NAME
2189251Ssam
3189251SsamMdoc - perl module to parse Mdoc macros
4189251Ssam
5252726Srpaulo=head1 SYNOPSIS
6252726Srpaulo
7189251Ssam    use Mdoc qw(ns pp soff son stoggle mapwords);
8189251Ssam
9189251SsamSee mdoc2man and mdoc2texi for code examples.
10189251Ssam
11189251Ssam=head1 FUNCTIONS
12189251Ssam
13189251Ssam=over 4
14189251Ssam
15189251Ssam=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
16189251Ssam
17189251SsamDefine new macro. The CODE reference will be called by call_macro(). You can
18189251Ssamhave two distinct definitions for and inline macro and for a standalone macro
19189251Ssam(i. e. 'Pa' and '.Pa').
20189251Ssam
21189251SsamThe CODE reference is passed a list of arguments and is expected to return list
22189251Ssamof strings and control characters (see C<CONSTANTS>).
23189251Ssam
24189251SsamBy default the surrouding "" from arguments to macros are removed, use C<raw>
25189251Ssamto disable this.
26189251Ssam
27189251SsamNormaly CODE reference is passed all arguments up to next nested macro. Set
28189251SsamC<greedy> to to pass everything up to the end of the line.
29189251Ssam
30189251SsamIf the concat_until is present, the line is concated until the .Xx macro is
31189251Ssamfound. For example the following macro definition
32189251Ssam
33189251Ssam    def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
34189251Ssam    def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
35189251Ssam
36189251Ssamand the following input
37189251Ssam
38189251Ssam    .Oo
39189251Ssam    .Cm foo |
40189251Ssam    .Cm bar |
41189251Ssam    .Oc
42189251Ssam
43189251Ssamresults in [(foo) | (bar)]
44189251Ssam
45189251Ssam=item get_macro( NAME )
46189251Ssam
47189251SsamReturns a hash reference like:
48189251Ssam
49189251Ssam    { run => CODE, raw => [1|0], greedy => [1|0] }
50189251Ssam
51189251SsamWhere C<CODE> is the CODE reference used to define macro called C<NAME>
52189251Ssam
53189251Ssam=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
54189251Ssam
55189251SsamParse a line from the C<INPUT> filehandle. If a macro was detected it returns a
56189251Ssamlist (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
57189251Ssamcaller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
58189251Ssamdefined it calls it prior to passing argument to a macro, giving caller a
59189251Ssamchance to alter them.  if EOF was reached undef is returned.
60189251Ssam
61189251Ssam=item call_macro( MACRO, ARGS, ... )
62189251Ssam
63189251SsamCall macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
64189251Ssamcalled and for all the nested macros. Every called macro returns a list which
65189251Ssamis appended to return value and returned when all nested macros are processed.
66189251SsamUse to_string() to produce a printable string from the list.
67189251Ssam
68189251Ssam=item to_string ( LIST )
69189251Ssam
70189251SsamProcesses C<LIST> returned from call_macro() and returns formatted string.
71189251Ssam
72189251Ssam=item mapwords BLOCK ARRAY
73189251Ssam
74189251SsamThis is like perl's map only it calls BLOCK only on elements which are not
75189251Ssampunctuation or control characters.
76189251Ssam
77189251Ssam=item space ( ['on'|'off] )
78189251Ssam
79189251SsamTurn spacing on or off. If called without argument it returns the current state.
80189251Ssam
81189251Ssam=item gen_encloser ( START, END )
82189251Ssam
83189251SsamHelper function for generating macros that enclose their arguments.
84189251Ssam    gen_encloser(qw({ }));
85189251Ssamreturns
86189251Ssam    sub { '{', ns, @_, ns, pp('}')}
87189251Ssam
88189251Ssam=item set_Bl_callback( CODE , DEFS )
89189251Ssam
90189251SsamThis module implements the Bl/El macros for you. Using set_Bl_callback you can
91189251Ssamprovide a macro definition that should be executed on a .Bl call.
92189251Ssam
93189251Ssam=item set_El_callback( CODE , DEFS )
94189251Ssam
95189251SsamThis module implements the Bl/El macros for you. Using set_El_callback you can
96189251Ssamprovide a macro definition that should be executed on a .El call.
97189251Ssam
98189251Ssam=item set_Re_callback( CODE )
99189251Ssam
100189251SsamThe C<CODE> is called after a Rs/Re block is done. With a hash reference as a
101189251Ssamparameter, describing the reference.
102189251Ssam
103189251Ssam=back
104189251Ssam
105189251Ssam=head1 CONSTANTS
106189251Ssam
107189251Ssam=over 4
108189251Ssam
109189251Ssam=item ns
110189251Ssam
111189251SsamIndicate 'no space' between to members of the list.
112189251Ssam
113189251Ssam=item pp ( STRING )
114189251Ssam
115189251SsamThe string is 'punctuation point'. It means that every punctuation
116189251Ssampreceeding that element is put behind it.
117189251Ssam
118189251Ssam=item soff
119189251Ssam
120189251SsamTurn spacing off.
121189251Ssam
122189251Ssam=item son
123189251Ssam
124189251SsamTurn spacing on.
125189251Ssam
126189251Ssam=item stoggle
127189251Ssam
128189251SsamToogle spacing.
129189251Ssam
130189251Ssam=item hs
131189251Ssam
132189251SsamPrint space no matter spacing mode.
133189251Ssam
134189251Ssam=back
135189251Ssam
136252726Srpaulo=head1 TODO
137189251Ssam
138189251Ssam* The concat_until only works with standalone macros. This means that
139189251Ssam    .Po blah Pc
140189251Ssamwill hang until .Pc in encountered.
141189251Ssam
142189251Ssam* Provide default macros for Bd/Ed
143189251Ssam
144189251Ssam* The reference implementation is uncomplete
145189251Ssam
146189251Ssam=cut
147189251Ssam
148189251Ssampackage Mdoc;
149189251Ssamuse strict;
150189251Ssamuse warnings;
151189251Ssamuse List::Util qw(reduce);
152189251Ssamuse Text::ParseWords qw(quotewords);
153189251Ssamuse Carp;
154189251Ssamuse Exporter qw(import);
155189251Ssamour @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
156189251Ssam
157189251Ssamuse constant {
158189251Ssam    ns      => ['nospace'],
159189251Ssam    soff    => ['spaceoff'],
160189251Ssam    son     => ['spaceon'],
161189251Ssam    stoggle => ['spacetoggle'],
162189251Ssam    hs      => ['hardspace'],
163189251Ssam};
164189251Ssam
165189251Ssamsub pp {
166189251Ssam    my $c = shift;
167189251Ssam    return ['pp', $c ];
168189251Ssam}
169189251Ssamsub gen_encloser {
170189251Ssam    my ($o, $c) = @_;
171189251Ssam    return sub { ($o, ns, @_, ns, pp($c)) };
172189251Ssam}
173189251Ssam
174189251Ssamsub mapwords(&@) {
175189251Ssam    my ($f, @l) = @_;
176189251Ssam    my @res;
177189251Ssam    for my $el (@l) {
178189251Ssam        local $_ = $el;
179189251Ssam        push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ?
180189251Ssam                    $el : $f->();
181189251Ssam    }
182189251Ssam    return @res;
183189251Ssam}
184189251Ssam
185189251Ssammy %macros;
186189251Ssam
187189251Ssam###############################################################################
188189251Ssam
189189251Ssam# Default macro definitions start
190189251Ssam
191189251Ssam###############################################################################
192189251Ssam
193189251Ssamdef_macro('Xo',  sub { @_ }, concat_until => '.Xc');
194189251Ssam
195189251Ssamdef_macro('.Ns', sub {ns, @_});
196189251Ssamdef_macro('Ns',  sub {ns, @_});
197189251Ssam
198189251Ssam{
199189251Ssam    my %reference;
200189251Ssam    def_macro('.Rs', sub { () } );
201189251Ssam    def_macro('.%A', sub {
202189251Ssam        if ($reference{authors}) {
203189251Ssam            $reference{authors} .= " and @_"
204189251Ssam        }
205189251Ssam        else {
206189251Ssam            $reference{authors} = "@_";
207189251Ssam        }
208189251Ssam        return ();
209189251Ssam    });
210189251Ssam    def_macro('.%T', sub { $reference{title} = "@_"; () } );
211189251Ssam    def_macro('.%O', sub { $reference{optional} = "@_"; () } );
212189251Ssam
213189251Ssam    sub set_Re_callback {
214189251Ssam        my ($sub) = @_;
215189251Ssam        croak 'Not a CODE reference' if not ref $sub eq 'CODE';
216189251Ssam        def_macro('.Re', sub {
217189251Ssam            my @ret = $sub->(\%reference);
218189251Ssam            %reference = (); @ret
219189251Ssam        });
220189251Ssam        return;
221189251Ssam    }
222189251Ssam}
223189251Ssam
224189251Ssamdef_macro('.Bl', sub { die '.Bl - no list callback set' });
225189251Ssamdef_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
226189251Ssamdef_macro('.El', sub { die '.El requires .Bl first' });
227189251Ssam
228189251Ssam
229189251Ssam{
230189251Ssam    my $elcb = sub { () };
231189251Ssam
232189251Ssam    sub set_El_callback {
233189251Ssam        my ($sub) = @_;
234189251Ssam        croak 'Not a CODE reference' if ref $sub ne 'CODE';
235189251Ssam        $elcb = $sub;
236189251Ssam        return;
237189251Ssam    }
238189251Ssam
239189251Ssam    sub set_Bl_callback {
240189251Ssam        my ($blcb, %defs) = @_;
241189251Ssam        croak 'Not a CODE reference' if ref $blcb ne 'CODE';
242189251Ssam        def_macro('.Bl', sub {
243189251Ssam
244189251Ssam            my $orig_it   = get_macro('.It');
245189251Ssam            my $orig_el   = get_macro('.El');
246189251Ssam            my $orig_bl   = get_macro('.Bl');
247189251Ssam            my $orig_elcb = $elcb;
248189251Ssam
249189251Ssam            # Restore previous .It and .El on each .El
250189251Ssam            def_macro('.El', sub {
251189251Ssam                    def_macro('.El', delete $orig_el->{run}, %$orig_el);
252189251Ssam                    def_macro('.It', delete $orig_it->{run}, %$orig_it);
253189251Ssam                    def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
254189251Ssam                    my @ret = $elcb->(@_);
255189251Ssam                    $elcb = $orig_elcb;
256189251Ssam                    @ret
257189251Ssam                });
258189251Ssam            $blcb->(@_)
259189251Ssam        }, %defs);
260189251Ssam        return;
261189251Ssam    }
262189251Ssam}
263189251Ssam
264189251Ssamdef_macro('.Sm', sub {
265189251Ssam    my ($arg) = @_;
266189251Ssam    if (defined $arg) {
267189251Ssam        space($arg);
268189251Ssam    } else {
269189251Ssam        space() eq 'off' ?
270189251Ssam            space('on') :
271189251Ssam            space('off');
272189251Ssam    }
273189251Ssam    ()
274189251Ssam} );
275189251Ssamdef_macro('Sm', do { my $off; sub {
276189251Ssam    my ($arg) = @_;
277189251Ssam    if (defined $arg && $arg =~ /^(on|off)$/) {
278189251Ssam        shift;
279189251Ssam        if    ($arg eq 'off') { soff, @_; }
280189251Ssam        elsif ($arg eq 'on')  { son, @_; }
281189251Ssam    }
282189251Ssam    else {
283189251Ssam        stoggle, @_;
284189251Ssam    }
285189251Ssam}} );
286189251Ssam
287189251Ssam###############################################################################
288189251Ssam
289189251Ssam# Default macro definitions end
290189251Ssam
291189251Ssam###############################################################################
292189251Ssam
293189251Ssamsub def_macro {
294189251Ssam    croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
295189251Ssam    my ($macro, $sub, %def) = @_;
296189251Ssam    croak 'Not a CODE reference' if ref $sub ne 'CODE';
297189251Ssam
298189251Ssam    $macros{ $macro } = {
299189251Ssam        run          => $sub,
300189251Ssam        greedy       => delete $def{greedy} || 0,
301189251Ssam        raw          => delete $def{raw}    || 0,
302189251Ssam        concat_until => delete $def{concat_until},
303189251Ssam    };
304189251Ssam    if ($macros{ $macro }{concat_until}) {
305189251Ssam        $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
306189251Ssam        $macros{ $macro }{greedy}                  = 1;
307189251Ssam    }
308189251Ssam    return;
309189251Ssam}
310189251Ssam
311189251Ssamsub get_macro {
312189251Ssam    my ($macro) = @_;
313189251Ssam    croak "Macro <$macro> not defined" if not exists $macros{ $macro };
314189251Ssam    +{ %{ $macros{ $macro } } }
315189251Ssam}
316189251Ssam
317189251Ssam#TODO: document this
318189251Ssamsub parse_opts {
319189251Ssam    my %args;
320189251Ssam    my $last;
321189251Ssam    for (@_) {
322189251Ssam        if ($_ =~ /^\\?-/) {
323189251Ssam            s/^\\?-//;
324189251Ssam            $args{$_} = 1;
325189251Ssam            $last = _unquote($_);
326189251Ssam        }
327189251Ssam        else {
328189251Ssam            $args{$last} = _unquote($_) if $last;
329189251Ssam            undef $last;
330189251Ssam        }
331189251Ssam    }
332189251Ssam    return %args;
333189251Ssam}
334189251Ssam
335189251Ssamsub _is_control {
336189251Ssam    my ($el, $expected) = @_;
337189251Ssam    if (defined $expected) {
338189251Ssam        ref $el eq 'ARRAY' and $el->[0] eq $expected;
339    }
340    else {
341        ref $el eq 'ARRAY';
342    }
343}
344
345{
346    my $sep = ' ';
347
348    sub to_string {
349        if (@_ > 0) {
350            # Handle punctunation
351            my ($in_brace, @punct) = '';
352            my @new = map {
353                if (/^([\[\(])$/) {
354                    ($in_brace = $1) =~ tr/([/)]/;
355                    $_, ns
356                }
357                elsif (/^([\)\]])$/ && $in_brace eq $1) {
358                    $in_brace = '';
359                    ns, $_
360                }
361                elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
362                    push @punct, ns, $_;
363                    ();
364                }
365                elsif (_is_control($_, 'pp')) {
366                    $_->[1]
367                }
368                elsif (_is_control($_)) {
369                    $_
370                }
371                else {
372                    splice (@punct), $_;
373                }
374            } @_;
375            push @new, @punct;
376
377            # Produce string out of an array dealing with the special control characters
378            # space('off') must but one character delayed
379            my ($no_space, $space_off) = 1;
380            my $res = '';
381            while (defined(my $el = shift @new)) {
382                if    (_is_control($el, 'hardspace'))   { $no_space = 1; $res .= ' ' }
383                elsif (_is_control($el, 'nospace'))     { $no_space = 1;             }
384                elsif (_is_control($el, 'spaceoff'))    { $space_off = 1;            }
385                elsif (_is_control($el, 'spaceon'))     { space('on');               }
386                elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ?
387                                                            $space_off = 1 :
388                                                            space('on')              }
389                else {
390                    if ($no_space) {
391                        $no_space = 0;
392                        $res .= "$el"
393                    }
394                    else {
395                        $res .= "$sep$el"
396                    }
397
398                    if ($space_off)    { space('off'); $space_off = 0; }
399                }
400            }
401            $res
402        }
403        else {
404            '';
405        }
406    }
407
408    sub space {
409        my ($arg) = @_;
410        if (defined $arg && $arg =~ /^(on|off)$/) {
411            $sep = ' ' if $arg eq 'on';
412            $sep = ''  if $arg eq 'off';
413            return;
414        }
415        else {
416            return $sep eq '' ? 'off' : 'on';
417        }
418    }
419}
420
421sub _unquote {
422    my @args = @_;
423    $_ =~ s/^"([^"]+)"$/$1/g for @args;
424    wantarray ? @args : $args[0];
425}
426
427sub call_macro {
428    my ($macro, @args) = @_;
429    my @ret;
430
431    my @newargs;
432    my $i = 0;
433
434    @args = _unquote(@args) if (!$macros{ $macro }{raw});
435
436    # Call any callable macros in the argument list
437    for (@args) {
438        if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
439            push @ret, call_macro($_, @args[$i+1 .. $#args]);
440            last;
441        } else {
442            if ($macros{ $macro }{greedy}) {
443                push @ret, $_;
444            }
445            else {
446                push @newargs, $_;
447            }
448        }
449        $i++;
450    }
451
452    if ($macros{ $macro }{concat_until}) {
453        my ($n_macro, @n_args) = ('');
454        while (1) {
455            die "EOF was reached and no $macros{ $macro }{concat_until} found"
456                if not defined $n_macro;
457            ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
458            if ($n_macro eq $macros{ $macro }{concat_until}) {
459                push @ret, call_macro($n_macro, @n_args);
460                last;
461            }
462            else {
463                $n_macro =~ s/^\.//;
464                push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
465            }
466        }
467    }
468
469    if ($macros{ $macro }{greedy}) {
470        #print "MACROG $macro (", (join ', ', @ret), ")\n";
471        return $macros{ $macro }{run}->(@ret);
472    }
473    else {
474        #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
475        return $macros{ $macro }{run}->(@newargs), @ret;
476    }
477}
478
479{
480    my ($in_fh, $out_sub, $preprocess_sub);
481    sub parse_line {
482        $in_fh          = $_[0] if defined $_[0] || !defined $in_fh;
483        $out_sub        = $_[1] if defined $_[1] || !defined $out_sub;
484        $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
485
486        croak 'out_sub not a CODE reference'
487            if not ref $out_sub eq 'CODE';
488        croak 'preprocess_sub not a CODE reference'
489            if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
490
491        while (my $line = <$in_fh>) {
492            chomp $line;
493            if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ ||
494                $line =~ /^\.\\"/)
495            {
496                $line =~ s/ +/ /g;
497                my ($macro, @args) = quotewords(' ', 1, $line);
498                @args = grep { defined $_ } @args;
499                $preprocess_sub->(@args) if defined $preprocess_sub;
500                if ($macro && exists $macros{ $macro }) {
501                    return ($macro, @args);
502                } else {
503                    $out_sub->($line);
504                }
505            }
506            else {
507                $out_sub->($line);
508            }
509        }
510        return;
511    }
512}
513
5141;
515__END__
516