152284Sobrien=begin comment
290075Sobrien
352284Sobrien## Mdoc.pm -- Perl functions for mdoc processing
490075Sobrien##
552284Sobrien## Author:	Oliver Kindernay (GSoC project for NTP.org)
690075Sobrien##
790075Sobrien##
890075Sobrien##  This file is part of AutoOpts, a companion to AutoGen.
990075Sobrien##  AutoOpts is free software.
1052284Sobrien##  AutoOpts is Copyright (C) 1992-2015 by Bruce Korb - all rights reserved
1190075Sobrien##
1290075Sobrien##  AutoOpts is available under any one of two licenses.  The license
1390075Sobrien##  in use must be one of these two and the choice is under the control
1490075Sobrien##  of the user of the license.
1552284Sobrien##
1652284Sobrien##   The GNU Lesser General Public License, version 3 or later
1790075Sobrien##      See the files "COPYING.lgplv3" and "COPYING.gplv3"
1890075Sobrien##
1990075Sobrien##   The Modified Berkeley Software Distribution License
2052284Sobrien##      See the file "COPYING.mbsd"
2190075Sobrien##
2290075Sobrien##  These files have the following sha256 sums:
2390075Sobrien##
2452284Sobrien##  8584710e9b04216a394078dc156b781d0b47e1729104d666658aecef8ee32e95  COPYING.gplv3
2552284Sobrien##  4379e7444a0e2ce2b12dd6f5a52a27a4d02d39d247901d3285c88cf0d37f477b  COPYING.lgplv3
2652284Sobrien##  13aa749a5b0a454917a944ed8fffc530b784f5ead522b1aacaf4ec8aa55a6239  COPYING.mbsd
2752284Sobrien=end comment
2890075Sobrien=head1 NAME
2952284Sobrien
3052284SobrienMdoc - perl module to parse Mdoc macros
3190075Sobrien
3290075Sobrien=head1 SYNOPSIS
3390075Sobrien
3490075Sobrien    use Mdoc qw(ns pp soff son stoggle mapwords);
3590075Sobrien
3690075SobrienSee mdoc2man and mdoc2texi for code examples.
3752284Sobrien
3852284Sobrien=head1 FUNCTIONS
3952284Sobrien
4052284Sobrien=over 4
4152284Sobrien
4290075Sobrien=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
4352284Sobrien
4490075SobrienDefine new macro. The CODE reference will be called by call_macro(). You can
4590075Sobrienhave two distinct definitions for and inline macro and for a standalone macro
4690075Sobrien(i. e. 'Pa' and '.Pa').
4790075Sobrien
4852284SobrienThe CODE reference is passed a list of arguments and is expected to return list
4990075Sobrienof strings and control characters (see C<CONSTANTS>).
5090075Sobrien
5190075SobrienBy default the surrouding "" from arguments to macros are removed, use C<raw>
5252284Sobriento disable this.
5390075Sobrien
5490075SobrienNormaly CODE reference is passed all arguments up to next nested macro. Set
5590075SobrienC<greedy> to to pass everything up to the end of the line.
5690075Sobrien
5752284SobrienIf the concat_until is present, the line is concated until the .Xx macro is
5852284Sobrienfound. For example the following macro definition
5952284Sobrien
6052284Sobrien    def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
6190075Sobrien    def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
6290075Sobrien
6352284Sobrienand the following input
6452284Sobrien
6552284Sobrien    .Oo
6690075Sobrien    .Cm foo |
6790075Sobrien    .Cm bar |
6852284Sobrien    .Oc
6952284Sobrien
7090075Sobrienresults in [(foo) | (bar)]
7152284Sobrien
7290075Sobrien=item get_macro( NAME )
7390075Sobrien
7490075SobrienReturns a hash reference like:
7590075Sobrien
7690075Sobrien    { run => CODE, raw => [1|0], greedy => [1|0] }
7790075Sobrien
7890075SobrienWhere C<CODE> is the CODE reference used to define macro called C<NAME>
7990075Sobrien
8090075Sobrien=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
8190075Sobrien
8290075SobrienParse a line from the C<INPUT> filehandle. If a macro was detected it returns a
8390075Sobrienlist (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
8490075Sobriencaller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
8590075Sobriendefined it calls it prior to passing argument to a macro, giving caller a
8652284Sobrienchance to alter them.  if EOF was reached undef is returned.
8752284Sobrien
8890075Sobrien=item call_macro( MACRO, ARGS, ... )
8990075Sobrien
9052284SobrienCall macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
9190075Sobriencalled and for all the nested macros. Every called macro returns a list which
9252284Sobrienis appended to return value and returned when all nested macros are processed.
9390075SobrienUse to_string() to produce a printable string from the list.
9490075Sobrien
9590075Sobrien=item to_string ( LIST )
9690075Sobrien
9790075SobrienProcesses C<LIST> returned from call_macro() and returns formatted string.
9890075Sobrien
9990075Sobrien=item mapwords BLOCK ARRAY
10090075Sobrien
10190075SobrienThis is like perl's map only it calls BLOCK only on elements which are not
10290075Sobrienpunctuation or control characters.
10390075Sobrien
10490075Sobrien=item space ( ['on'|'off] )
10552284Sobrien
10690075SobrienTurn spacing on or off. If called without argument it returns the current state.
10790075Sobrien
10890075Sobrien=item gen_encloser ( START, END )
10990075Sobrien
11090075SobrienHelper function for generating macros that enclose their arguments.
11190075Sobrien    gen_encloser(qw({ }));
11290075Sobrienreturns
11390075Sobrien    sub { '{', ns, @_, ns, pp('}')}
11490075Sobrien
11590075Sobrien=item set_Bl_callback( CODE , DEFS )
11690075Sobrien
11790075SobrienThis module implements the Bl/El macros for you. Using set_Bl_callback you can
11852284Sobrienprovide a macro definition that should be executed on a .Bl call.
11990075Sobrien
12090075Sobrien=item set_El_callback( CODE , DEFS )
12152284Sobrien
12290075SobrienThis module implements the Bl/El macros for you. Using set_El_callback you can
12352284Sobrienprovide a macro definition that should be executed on a .El call.
12452284Sobrien
12552284Sobrien=item set_Re_callback( CODE )
12652284Sobrien
12790075SobrienThe C<CODE> is called after a Rs/Re block is done. With a hash reference as a
12890075Sobrienparameter, describing the reference.
12952284Sobrien
13052284Sobrien=back
13190075Sobrien
13290075Sobrien=head1 CONSTANTS
13390075Sobrien
13490075Sobrien=over 4
13590075Sobrien
13690075Sobrien=item ns
13790075Sobrien
13890075SobrienIndicate 'no space' between to members of the list.
13990075Sobrien
14090075Sobrien=item pp ( STRING )
14190075Sobrien
142The string is 'punctuation point'. It means that every punctuation
143preceeding that element is put behind it.
144
145=item soff
146
147Turn spacing off.
148
149=item son
150
151Turn spacing on.
152
153=item stoggle
154
155Toogle spacing.
156
157=item hs
158
159Print space no matter spacing mode.
160
161=back
162
163=head1 TODO
164
165* The concat_until only works with standalone macros. This means that
166    .Po blah Pc
167will hang until .Pc in encountered.
168
169* Provide default macros for Bd/Ed
170
171* The reference implementation is uncomplete
172
173=cut
174
175package Mdoc;
176use strict;
177use warnings;
178use List::Util qw(reduce);
179use Text::ParseWords qw(quotewords);
180use Carp;
181use Exporter qw(import);
182our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
183
184use constant {
185    ns      => ['nospace'],
186    soff    => ['spaceoff'],
187    son     => ['spaceon'],
188    stoggle => ['spacetoggle'],
189    hs      => ['hardspace'],
190};
191
192sub pp {
193    my $c = shift;
194    return ['pp', $c ];
195}
196sub gen_encloser {
197    my ($o, $c) = @_;
198    return sub { ($o, ns, @_, ns, pp($c)) };
199}
200
201sub mapwords(&@) {
202    my ($f, @l) = @_;
203    my @res;
204    for my $el (@l) {
205        local $_ = $el;
206        push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ?
207                    $el : $f->();
208    }
209    return @res;
210}
211
212my %macros;
213
214###############################################################################
215
216# Default macro definitions start
217
218###############################################################################
219
220def_macro('Xo',  sub { @_ }, concat_until => '.Xc');
221
222def_macro('.Ns', sub {ns, @_});
223def_macro('Ns',  sub {ns, @_});
224
225{
226    my %reference;
227    def_macro('.Rs', sub { () } );
228    def_macro('.%A', sub {
229        if ($reference{authors}) {
230            $reference{authors} .= " and @_"
231        }
232        else {
233            $reference{authors} = "@_";
234        }
235        return ();
236    });
237    def_macro('.%T', sub { $reference{title} = "@_"; () } );
238    def_macro('.%O', sub { $reference{optional} = "@_"; () } );
239
240    sub set_Re_callback {
241        my ($sub) = @_;
242        croak 'Not a CODE reference' if not ref $sub eq 'CODE';
243        def_macro('.Re', sub {
244            my @ret = $sub->(\%reference);
245            %reference = (); @ret
246        });
247        return;
248    }
249}
250
251def_macro('.Bl', sub { die '.Bl - no list callback set' });
252def_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
253def_macro('.El', sub { die '.El requires .Bl first' });
254
255
256{
257    my $elcb = sub { () };
258
259    sub set_El_callback {
260        my ($sub) = @_;
261        croak 'Not a CODE reference' if ref $sub ne 'CODE';
262        $elcb = $sub;
263        return;
264    }
265
266    sub set_Bl_callback {
267        my ($blcb, %defs) = @_;
268        croak 'Not a CODE reference' if ref $blcb ne 'CODE';
269        def_macro('.Bl', sub {
270
271            my $orig_it   = get_macro('.It');
272            my $orig_el   = get_macro('.El');
273            my $orig_bl   = get_macro('.Bl');
274            my $orig_elcb = $elcb;
275
276            # Restore previous .It and .El on each .El
277            def_macro('.El', sub {
278                    def_macro('.El', delete $orig_el->{run}, %$orig_el);
279                    def_macro('.It', delete $orig_it->{run}, %$orig_it);
280                    def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
281                    my @ret = $elcb->(@_);
282                    $elcb = $orig_elcb;
283                    @ret
284                });
285            $blcb->(@_)
286        }, %defs);
287        return;
288    }
289}
290
291def_macro('.Sm', sub {
292    my ($arg) = @_;
293    if (defined $arg) {
294        space($arg);
295    } else {
296        space() eq 'off' ?
297            space('on') :
298            space('off');
299    }
300    ()
301} );
302def_macro('Sm', do { my $off; sub {
303    my ($arg) = @_;
304    if (defined $arg && $arg =~ /^(on|off)$/) {
305        shift;
306        if    ($arg eq 'off') { soff, @_; }
307        elsif ($arg eq 'on')  { son, @_; }
308    }
309    else {
310        stoggle, @_;
311    }
312}} );
313
314###############################################################################
315
316# Default macro definitions end
317
318###############################################################################
319
320sub def_macro {
321    croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
322    my ($macro, $sub, %def) = @_;
323    croak 'Not a CODE reference' if ref $sub ne 'CODE';
324
325    $macros{ $macro } = {
326        run          => $sub,
327        greedy       => delete $def{greedy} || 0,
328        raw          => delete $def{raw}    || 0,
329        concat_until => delete $def{concat_until},
330    };
331    if ($macros{ $macro }{concat_until}) {
332        $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
333        $macros{ $macro }{greedy}                  = 1;
334    }
335    return;
336}
337
338sub get_macro {
339    my ($macro) = @_;
340    croak "Macro <$macro> not defined" if not exists $macros{ $macro };
341    +{ %{ $macros{ $macro } } }
342}
343
344#TODO: document this
345sub parse_opts {
346    my %args;
347    my $last;
348    for (@_) {
349        if ($_ =~ /^\\?-/) {
350            s/^\\?-//;
351            $args{$_} = 1;
352            $last = _unquote($_);
353        }
354        else {
355            $args{$last} = _unquote($_) if $last;
356            undef $last;
357        }
358    }
359    return %args;
360}
361
362sub _is_control {
363    my ($el, $expected) = @_;
364    if (defined $expected) {
365        ref $el eq 'ARRAY' and $el->[0] eq $expected;
366    }
367    else {
368        ref $el eq 'ARRAY';
369    }
370}
371
372{
373    my $sep = ' ';
374
375    sub to_string {
376        if (@_ > 0) {
377            # Handle punctunation
378            my ($in_brace, @punct) = '';
379            my @new = map {
380                if (/^([\[\(])$/) {
381                    ($in_brace = $1) =~ tr/([/)]/;
382                    $_, ns
383                }
384                elsif (/^([\)\]])$/ && $in_brace eq $1) {
385                    $in_brace = '';
386                    ns, $_
387                }
388                elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
389                    push @punct, ns, $_;
390                    ();
391                }
392                elsif (_is_control($_, 'pp')) {
393                    $_->[1]
394                }
395                elsif (_is_control($_)) {
396                    $_
397                }
398                else {
399                    splice (@punct), $_;
400                }
401            } @_;
402            push @new, @punct;
403
404            # Produce string out of an array dealing with the special control characters
405            # space('off') must but one character delayed
406            my ($no_space, $space_off) = 1;
407            my $res = '';
408            while (defined(my $el = shift @new)) {
409                if    (_is_control($el, 'hardspace'))   { $no_space = 1; $res .= ' ' }
410                elsif (_is_control($el, 'nospace'))     { $no_space = 1;             }
411                elsif (_is_control($el, 'spaceoff'))    { $space_off = 1;            }
412                elsif (_is_control($el, 'spaceon'))     { space('on');               }
413                elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ?
414                                                            $space_off = 1 :
415                                                            space('on')              }
416                else {
417                    if ($no_space) {
418                        $no_space = 0;
419                        $res .= "$el"
420                    }
421                    else {
422                        $res .= "$sep$el"
423                    }
424
425                    if ($space_off)    { space('off'); $space_off = 0; }
426                }
427            }
428            $res
429        }
430        else {
431            '';
432        }
433    }
434
435    sub space {
436        my ($arg) = @_;
437        if (defined $arg && $arg =~ /^(on|off)$/) {
438            $sep = ' ' if $arg eq 'on';
439            $sep = ''  if $arg eq 'off';
440            return;
441        }
442        else {
443            return $sep eq '' ? 'off' : 'on';
444        }
445    }
446}
447
448sub _unquote {
449    my @args = @_;
450    $_ =~ s/^"([^"]+)"$/$1/g for @args;
451    wantarray ? @args : $args[0];
452}
453
454sub call_macro {
455    my ($macro, @args) = @_;
456    my @ret;
457
458    my @newargs;
459    my $i = 0;
460
461    @args = _unquote(@args) if (!$macros{ $macro }{raw});
462
463    # Call any callable macros in the argument list
464    for (@args) {
465        if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
466            push @ret, call_macro($_, @args[$i+1 .. $#args]);
467            last;
468        } else {
469            if ($macros{ $macro }{greedy}) {
470                push @ret, $_;
471            }
472            else {
473                push @newargs, $_;
474            }
475        }
476        $i++;
477    }
478
479    if ($macros{ $macro }{concat_until}) {
480        my ($n_macro, @n_args) = ('');
481        while (1) {
482            die "EOF was reached and no $macros{ $macro }{concat_until} found"
483                if not defined $n_macro;
484            ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
485            if ($n_macro eq $macros{ $macro }{concat_until}) {
486                push @ret, call_macro($n_macro, @n_args);
487                last;
488            }
489            else {
490                $n_macro =~ s/^\.//;
491                push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
492            }
493        }
494    }
495
496    if ($macros{ $macro }{greedy}) {
497        #print "MACROG $macro (", (join ', ', @ret), ")\n";
498        return $macros{ $macro }{run}->(@ret);
499    }
500    else {
501        #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
502        return $macros{ $macro }{run}->(@newargs), @ret;
503    }
504}
505
506{
507    my ($in_fh, $out_sub, $preprocess_sub);
508    sub parse_line {
509        $in_fh          = $_[0] if defined $_[0] || !defined $in_fh;
510        $out_sub        = $_[1] if defined $_[1] || !defined $out_sub;
511        $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
512
513        croak 'out_sub not a CODE reference'
514            if not ref $out_sub eq 'CODE';
515        croak 'preprocess_sub not a CODE reference'
516            if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
517
518        while (my $line = <$in_fh>) {
519            chomp $line;
520            if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ ||
521                $line =~ /^\.\\"/)
522            {
523                $line =~ s/ +/ /g;
524                my ($macro, @args) = quotewords(' ', 1, $line);
525                @args = grep { defined $_ } @args;
526                $preprocess_sub->(@args) if defined $preprocess_sub;
527                if ($macro && exists $macros{ $macro }) {
528                    return ($macro, @args);
529                } else {
530                    $out_sub->($line);
531                }
532            }
533            else {
534                $out_sub->($line);
535            }
536        }
537        return;
538    }
539}
540
5411;
542__END__
543