1189251Ssam=head1 NAME 2189251Ssam 3189251SsamMdoc - perl module to parse Mdoc macros 4189251Ssam 5252726Srpaulo=head1 SYNOPSIS 6252726Srpaulo 7189251Ssam use Mdoc qw(ns pp soff son stoggle mapwords); 8189251Ssam 9189251SsamSee mdoc2man and mdoc2texi for code examples. 10189251Ssam 11189251Ssam=head1 FUNCTIONS 12189251Ssam 13189251Ssam=over 4 14189251Ssam 15189251Ssam=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] ) 16189251Ssam 17189251SsamDefine new macro. The CODE reference will be called by call_macro(). You can 18189251Ssamhave two distinct definitions for and inline macro and for a standalone macro 19189251Ssam(i. e. 'Pa' and '.Pa'). 20189251Ssam 21189251SsamThe CODE reference is passed a list of arguments and is expected to return list 22189251Ssamof strings and control characters (see C<CONSTANTS>). 23189251Ssam 24189251SsamBy default the surrouding "" from arguments to macros are removed, use C<raw> 25189251Ssamto disable this. 26189251Ssam 27189251SsamNormaly CODE reference is passed all arguments up to next nested macro. Set 28189251SsamC<greedy> to to pass everything up to the end of the line. 29189251Ssam 30189251SsamIf the concat_until is present, the line is concated until the .Xx macro is 31189251Ssamfound. For example the following macro definition 32189251Ssam 33189251Ssam def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' } 34189251Ssam def_macro('.Cm', sub { mapwords {'($_)'} @_ } } 35189251Ssam 36189251Ssamand the following input 37189251Ssam 38189251Ssam .Oo 39189251Ssam .Cm foo | 40189251Ssam .Cm bar | 41189251Ssam .Oc 42189251Ssam 43189251Ssamresults in [(foo) | (bar)] 44189251Ssam 45189251Ssam=item get_macro( NAME ) 46189251Ssam 47189251SsamReturns a hash reference like: 48189251Ssam 49189251Ssam { run => CODE, raw => [1|0], greedy => [1|0] } 50189251Ssam 51189251SsamWhere C<CODE> is the CODE reference used to define macro called C<NAME> 52189251Ssam 53189251Ssam=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE ) 54189251Ssam 55189251SsamParse a line from the C<INPUT> filehandle. If a macro was detected it returns a 56189251Ssamlist (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving 57189251Ssamcaller a chance to modify line before printing it. If C<PREPROCESS_CODE> is 58189251Ssamdefined it calls it prior to passing argument to a macro, giving caller a 59189251Ssamchance to alter them. if EOF was reached undef is returned. 60189251Ssam 61189251Ssam=item call_macro( MACRO, ARGS, ... ) 62189251Ssam 63189251SsamCall macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is 64189251Ssamcalled and for all the nested macros. Every called macro returns a list which 65189251Ssamis appended to return value and returned when all nested macros are processed. 66189251SsamUse to_string() to produce a printable string from the list. 67189251Ssam 68189251Ssam=item to_string ( LIST ) 69189251Ssam 70189251SsamProcesses C<LIST> returned from call_macro() and returns formatted string. 71189251Ssam 72189251Ssam=item mapwords BLOCK ARRAY 73189251Ssam 74189251SsamThis is like perl's map only it calls BLOCK only on elements which are not 75189251Ssampunctuation or control characters. 76189251Ssam 77189251Ssam=item space ( ['on'|'off] ) 78189251Ssam 79189251SsamTurn spacing on or off. If called without argument it returns the current state. 80189251Ssam 81189251Ssam=item gen_encloser ( START, END ) 82189251Ssam 83189251SsamHelper function for generating macros that enclose their arguments. 84189251Ssam gen_encloser(qw({ })); 85189251Ssamreturns 86189251Ssam sub { '{', ns, @_, ns, pp('}')} 87189251Ssam 88189251Ssam=item set_Bl_callback( CODE , DEFS ) 89189251Ssam 90189251SsamThis module implements the Bl/El macros for you. Using set_Bl_callback you can 91189251Ssamprovide a macro definition that should be executed on a .Bl call. 92189251Ssam 93189251Ssam=item set_El_callback( CODE , DEFS ) 94189251Ssam 95189251SsamThis module implements the Bl/El macros for you. Using set_El_callback you can 96189251Ssamprovide a macro definition that should be executed on a .El call. 97189251Ssam 98189251Ssam=item set_Re_callback( CODE ) 99189251Ssam 100189251SsamThe C<CODE> is called after a Rs/Re block is done. With a hash reference as a 101189251Ssamparameter, describing the reference. 102189251Ssam 103189251Ssam=back 104189251Ssam 105189251Ssam=head1 CONSTANTS 106189251Ssam 107189251Ssam=over 4 108189251Ssam 109189251Ssam=item ns 110189251Ssam 111189251SsamIndicate 'no space' between to members of the list. 112189251Ssam 113189251Ssam=item pp ( STRING ) 114189251Ssam 115189251SsamThe string is 'punctuation point'. It means that every punctuation 116189251Ssampreceeding that element is put behind it. 117189251Ssam 118189251Ssam=item soff 119189251Ssam 120189251SsamTurn spacing off. 121189251Ssam 122189251Ssam=item son 123189251Ssam 124189251SsamTurn spacing on. 125189251Ssam 126189251Ssam=item stoggle 127189251Ssam 128189251SsamToogle spacing. 129189251Ssam 130189251Ssam=item hs 131189251Ssam 132189251SsamPrint space no matter spacing mode. 133189251Ssam 134189251Ssam=back 135189251Ssam 136252726Srpaulo=head1 TODO 137189251Ssam 138189251Ssam* The concat_until only works with standalone macros. This means that 139189251Ssam .Po blah Pc 140189251Ssamwill hang until .Pc in encountered. 141189251Ssam 142189251Ssam* Provide default macros for Bd/Ed 143189251Ssam 144189251Ssam* The reference implementation is uncomplete 145189251Ssam 146189251Ssam=cut 147189251Ssam 148189251Ssampackage Mdoc; 149189251Ssamuse strict; 150189251Ssamuse warnings; 151189251Ssamuse List::Util qw(reduce); 152189251Ssamuse Text::ParseWords qw(quotewords); 153189251Ssamuse Carp; 154189251Ssamuse Exporter qw(import); 155189251Ssamour @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl); 156189251Ssam 157189251Ssamuse constant { 158189251Ssam ns => ['nospace'], 159189251Ssam soff => ['spaceoff'], 160189251Ssam son => ['spaceon'], 161189251Ssam stoggle => ['spacetoggle'], 162189251Ssam hs => ['hardspace'], 163189251Ssam}; 164189251Ssam 165189251Ssamsub pp { 166189251Ssam my $c = shift; 167189251Ssam return ['pp', $c ]; 168189251Ssam} 169189251Ssamsub gen_encloser { 170189251Ssam my ($o, $c) = @_; 171189251Ssam return sub { ($o, ns, @_, ns, pp($c)) }; 172189251Ssam} 173189251Ssam 174189251Ssamsub mapwords(&@) { 175189251Ssam my ($f, @l) = @_; 176189251Ssam my @res; 177189251Ssam for my $el (@l) { 178189251Ssam local $_ = $el; 179189251Ssam push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ? 180189251Ssam $el : $f->(); 181189251Ssam } 182189251Ssam return @res; 183189251Ssam} 184189251Ssam 185189251Ssammy %macros; 186189251Ssam 187189251Ssam############################################################################### 188189251Ssam 189189251Ssam# Default macro definitions start 190189251Ssam 191189251Ssam############################################################################### 192189251Ssam 193189251Ssamdef_macro('Xo', sub { @_ }, concat_until => '.Xc'); 194189251Ssam 195189251Ssamdef_macro('.Ns', sub {ns, @_}); 196189251Ssamdef_macro('Ns', sub {ns, @_}); 197189251Ssam 198189251Ssam{ 199189251Ssam my %reference; 200189251Ssam def_macro('.Rs', sub { () } ); 201189251Ssam def_macro('.%A', sub { 202189251Ssam if ($reference{authors}) { 203189251Ssam $reference{authors} .= " and @_" 204189251Ssam } 205189251Ssam else { 206189251Ssam $reference{authors} = "@_"; 207189251Ssam } 208189251Ssam return (); 209189251Ssam }); 210189251Ssam def_macro('.%T', sub { $reference{title} = "@_"; () } ); 211189251Ssam def_macro('.%O', sub { $reference{optional} = "@_"; () } ); 212189251Ssam 213189251Ssam sub set_Re_callback { 214189251Ssam my ($sub) = @_; 215189251Ssam croak 'Not a CODE reference' if not ref $sub eq 'CODE'; 216189251Ssam def_macro('.Re', sub { 217189251Ssam my @ret = $sub->(\%reference); 218189251Ssam %reference = (); @ret 219189251Ssam }); 220189251Ssam return; 221189251Ssam } 222189251Ssam} 223189251Ssam 224189251Ssamdef_macro('.Bl', sub { die '.Bl - no list callback set' }); 225189251Ssamdef_macro('.It', sub { die ".It called outside of list context - maybe near line $." }); 226189251Ssamdef_macro('.El', sub { die '.El requires .Bl first' }); 227189251Ssam 228189251Ssam 229189251Ssam{ 230189251Ssam my $elcb = sub { () }; 231189251Ssam 232189251Ssam sub set_El_callback { 233189251Ssam my ($sub) = @_; 234189251Ssam croak 'Not a CODE reference' if ref $sub ne 'CODE'; 235189251Ssam $elcb = $sub; 236189251Ssam return; 237189251Ssam } 238189251Ssam 239189251Ssam sub set_Bl_callback { 240189251Ssam my ($blcb, %defs) = @_; 241189251Ssam croak 'Not a CODE reference' if ref $blcb ne 'CODE'; 242189251Ssam def_macro('.Bl', sub { 243189251Ssam 244189251Ssam my $orig_it = get_macro('.It'); 245189251Ssam my $orig_el = get_macro('.El'); 246189251Ssam my $orig_bl = get_macro('.Bl'); 247189251Ssam my $orig_elcb = $elcb; 248189251Ssam 249189251Ssam # Restore previous .It and .El on each .El 250189251Ssam def_macro('.El', sub { 251189251Ssam def_macro('.El', delete $orig_el->{run}, %$orig_el); 252189251Ssam def_macro('.It', delete $orig_it->{run}, %$orig_it); 253189251Ssam def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl); 254189251Ssam my @ret = $elcb->(@_); 255189251Ssam $elcb = $orig_elcb; 256189251Ssam @ret 257189251Ssam }); 258189251Ssam $blcb->(@_) 259189251Ssam }, %defs); 260189251Ssam return; 261189251Ssam } 262189251Ssam} 263189251Ssam 264189251Ssamdef_macro('.Sm', sub { 265189251Ssam my ($arg) = @_; 266189251Ssam if (defined $arg) { 267189251Ssam space($arg); 268189251Ssam } else { 269189251Ssam space() eq 'off' ? 270189251Ssam space('on') : 271189251Ssam space('off'); 272189251Ssam } 273189251Ssam () 274189251Ssam} ); 275189251Ssamdef_macro('Sm', do { my $off; sub { 276189251Ssam my ($arg) = @_; 277189251Ssam if (defined $arg && $arg =~ /^(on|off)$/) { 278189251Ssam shift; 279189251Ssam if ($arg eq 'off') { soff, @_; } 280189251Ssam elsif ($arg eq 'on') { son, @_; } 281189251Ssam } 282189251Ssam else { 283189251Ssam stoggle, @_; 284189251Ssam } 285189251Ssam}} ); 286189251Ssam 287189251Ssam############################################################################### 288189251Ssam 289189251Ssam# Default macro definitions end 290189251Ssam 291189251Ssam############################################################################### 292189251Ssam 293189251Ssamsub def_macro { 294189251Ssam croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2; 295189251Ssam my ($macro, $sub, %def) = @_; 296189251Ssam croak 'Not a CODE reference' if ref $sub ne 'CODE'; 297189251Ssam 298189251Ssam $macros{ $macro } = { 299189251Ssam run => $sub, 300189251Ssam greedy => delete $def{greedy} || 0, 301189251Ssam raw => delete $def{raw} || 0, 302189251Ssam concat_until => delete $def{concat_until}, 303189251Ssam }; 304189251Ssam if ($macros{ $macro }{concat_until}) { 305189251Ssam $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } }; 306189251Ssam $macros{ $macro }{greedy} = 1; 307189251Ssam } 308189251Ssam return; 309189251Ssam} 310189251Ssam 311189251Ssamsub get_macro { 312189251Ssam my ($macro) = @_; 313189251Ssam croak "Macro <$macro> not defined" if not exists $macros{ $macro }; 314189251Ssam +{ %{ $macros{ $macro } } } 315189251Ssam} 316189251Ssam 317189251Ssam#TODO: document this 318189251Ssamsub parse_opts { 319189251Ssam my %args; 320189251Ssam my $last; 321189251Ssam for (@_) { 322189251Ssam if ($_ =~ /^\\?-/) { 323189251Ssam s/^\\?-//; 324189251Ssam $args{$_} = 1; 325189251Ssam $last = _unquote($_); 326189251Ssam } 327189251Ssam else { 328189251Ssam $args{$last} = _unquote($_) if $last; 329189251Ssam undef $last; 330189251Ssam } 331189251Ssam } 332189251Ssam return %args; 333189251Ssam} 334189251Ssam 335189251Ssamsub _is_control { 336189251Ssam my ($el, $expected) = @_; 337189251Ssam if (defined $expected) { 338189251Ssam ref $el eq 'ARRAY' and $el->[0] eq $expected; 339 } 340 else { 341 ref $el eq 'ARRAY'; 342 } 343} 344 345{ 346 my $sep = ' '; 347 348 sub to_string { 349 if (@_ > 0) { 350 # Handle punctunation 351 my ($in_brace, @punct) = ''; 352 my @new = map { 353 if (/^([\[\(])$/) { 354 ($in_brace = $1) =~ tr/([/)]/; 355 $_, ns 356 } 357 elsif (/^([\)\]])$/ && $in_brace eq $1) { 358 $in_brace = ''; 359 ns, $_ 360 } 361 elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) { 362 push @punct, ns, $_; 363 (); 364 } 365 elsif (_is_control($_, 'pp')) { 366 $_->[1] 367 } 368 elsif (_is_control($_)) { 369 $_ 370 } 371 else { 372 splice (@punct), $_; 373 } 374 } @_; 375 push @new, @punct; 376 377 # Produce string out of an array dealing with the special control characters 378 # space('off') must but one character delayed 379 my ($no_space, $space_off) = 1; 380 my $res = ''; 381 while (defined(my $el = shift @new)) { 382 if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' } 383 elsif (_is_control($el, 'nospace')) { $no_space = 1; } 384 elsif (_is_control($el, 'spaceoff')) { $space_off = 1; } 385 elsif (_is_control($el, 'spaceon')) { space('on'); } 386 elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ? 387 $space_off = 1 : 388 space('on') } 389 else { 390 if ($no_space) { 391 $no_space = 0; 392 $res .= "$el" 393 } 394 else { 395 $res .= "$sep$el" 396 } 397 398 if ($space_off) { space('off'); $space_off = 0; } 399 } 400 } 401 $res 402 } 403 else { 404 ''; 405 } 406 } 407 408 sub space { 409 my ($arg) = @_; 410 if (defined $arg && $arg =~ /^(on|off)$/) { 411 $sep = ' ' if $arg eq 'on'; 412 $sep = '' if $arg eq 'off'; 413 return; 414 } 415 else { 416 return $sep eq '' ? 'off' : 'on'; 417 } 418 } 419} 420 421sub _unquote { 422 my @args = @_; 423 $_ =~ s/^"([^"]+)"$/$1/g for @args; 424 wantarray ? @args : $args[0]; 425} 426 427sub call_macro { 428 my ($macro, @args) = @_; 429 my @ret; 430 431 my @newargs; 432 my $i = 0; 433 434 @args = _unquote(@args) if (!$macros{ $macro }{raw}); 435 436 # Call any callable macros in the argument list 437 for (@args) { 438 if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) { 439 push @ret, call_macro($_, @args[$i+1 .. $#args]); 440 last; 441 } else { 442 if ($macros{ $macro }{greedy}) { 443 push @ret, $_; 444 } 445 else { 446 push @newargs, $_; 447 } 448 } 449 $i++; 450 } 451 452 if ($macros{ $macro }{concat_until}) { 453 my ($n_macro, @n_args) = (''); 454 while (1) { 455 die "EOF was reached and no $macros{ $macro }{concat_until} found" 456 if not defined $n_macro; 457 ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift }); 458 if ($n_macro eq $macros{ $macro }{concat_until}) { 459 push @ret, call_macro($n_macro, @n_args); 460 last; 461 } 462 else { 463 $n_macro =~ s/^\.//; 464 push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro }; 465 } 466 } 467 } 468 469 if ($macros{ $macro }{greedy}) { 470 #print "MACROG $macro (", (join ', ', @ret), ")\n"; 471 return $macros{ $macro }{run}->(@ret); 472 } 473 else { 474 #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n"; 475 return $macros{ $macro }{run}->(@newargs), @ret; 476 } 477} 478 479{ 480 my ($in_fh, $out_sub, $preprocess_sub); 481 sub parse_line { 482 $in_fh = $_[0] if defined $_[0] || !defined $in_fh; 483 $out_sub = $_[1] if defined $_[1] || !defined $out_sub; 484 $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub; 485 486 croak 'out_sub not a CODE reference' 487 if not ref $out_sub eq 'CODE'; 488 croak 'preprocess_sub not a CODE reference' 489 if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE'; 490 491 while (my $line = <$in_fh>) { 492 chomp $line; 493 if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ || 494 $line =~ /^\.\\"/) 495 { 496 $line =~ s/ +/ /g; 497 my ($macro, @args) = quotewords(' ', 1, $line); 498 @args = grep { defined $_ } @args; 499 $preprocess_sub->(@args) if defined $preprocess_sub; 500 if ($macro && exists $macros{ $macro }) { 501 return ($macro, @args); 502 } else { 503 $out_sub->($line); 504 } 505 } 506 else { 507 $out_sub->($line); 508 } 509 } 510 return; 511 } 512} 513 5141; 515__END__ 516