1#============================================================= -*-Perl-*- 2# 3# Template::Parser 4# 5# DESCRIPTION 6# This module implements a LALR(1) parser and assocated support 7# methods to parse template documents into the appropriate "compiled" 8# format. Much of the parser DFA code (see _parse() method) is based 9# on Francois Desarmenien's Parse::Yapp module. Kudos to him. 10# 11# AUTHOR 12# Andy Wardley <abw@wardley.org> 13# 14# COPYRIGHT 15# Copyright (C) 1996-2007 Andy Wardley. All Rights Reserved. 16# 17# This module is free software; you can redistribute it and/or 18# modify it under the same terms as Perl itself. 19# 20# The following copyright notice appears in the Parse::Yapp 21# documentation. 22# 23# The Parse::Yapp module and its related modules and shell 24# scripts are copyright (c) 1998 Francois Desarmenien, 25# France. All rights reserved. 26# 27# You may use and distribute them under the terms of either 28# the GNU General Public License or the Artistic License, as 29# specified in the Perl README file. 30# 31#============================================================================ 32 33package Template::Parser; 34 35use strict; 36use warnings; 37use base 'Template::Base'; 38 39use Template::Constants qw( :status :chomp ); 40use Template::Directive; 41use Template::Grammar; 42 43# parser state constants 44use constant CONTINUE => 0; 45use constant ACCEPT => 1; 46use constant ERROR => 2; 47use constant ABORT => 3; 48 49our $VERSION = 2.89; 50our $DEBUG = 0 unless defined $DEBUG; 51our $ERROR = ''; 52 53 54#======================================================================== 55# -- COMMON TAG STYLES -- 56#======================================================================== 57 58our $TAG_STYLE = { 59 'default' => [ '\[%', '%\]' ], 60 'template1' => [ '[\[%]%', '%[\]%]' ], 61 'metatext' => [ '%%', '%%' ], 62 'html' => [ '<!--', '-->' ], 63 'mason' => [ '<%', '>' ], 64 'asp' => [ '<%', '%>' ], 65 'php' => [ '<\?', '\?>' ], 66 'star' => [ '\[\*', '\*\]' ], 67}; 68$TAG_STYLE->{ template } = $TAG_STYLE->{ tt2 } = $TAG_STYLE->{ default }; 69 70 71our $DEFAULT_STYLE = { 72 START_TAG => $TAG_STYLE->{ default }->[0], 73 END_TAG => $TAG_STYLE->{ default }->[1], 74# TAG_STYLE => 'default', 75 ANYCASE => 0, 76 INTERPOLATE => 0, 77 PRE_CHOMP => 0, 78 POST_CHOMP => 0, 79 V1DOLLAR => 0, 80 EVAL_PERL => 0, 81}; 82 83our $QUOTED_ESCAPES = { 84 n => "\n", 85 r => "\r", 86 t => "\t", 87}; 88 89# note that '-' must come first so Perl doesn't think it denotes a range 90our $CHOMP_FLAGS = qr/[-=~+]/; 91 92 93 94#======================================================================== 95# ----- PUBLIC METHODS ----- 96#======================================================================== 97 98#------------------------------------------------------------------------ 99# new(\%config) 100# 101# Constructor method. 102#------------------------------------------------------------------------ 103 104sub new { 105 my $class = shift; 106 my $config = $_[0] && ref($_[0]) eq 'HASH' ? shift(@_) : { @_ }; 107 my ($tagstyle, $debug, $start, $end, $defaults, $grammar, $hash, $key, $udef); 108 109 my $self = bless { 110 START_TAG => undef, 111 END_TAG => undef, 112 TAG_STYLE => 'default', 113 ANYCASE => 0, 114 INTERPOLATE => 0, 115 PRE_CHOMP => 0, 116 POST_CHOMP => 0, 117 V1DOLLAR => 0, 118 EVAL_PERL => 0, 119 FILE_INFO => 1, 120 GRAMMAR => undef, 121 _ERROR => '', 122 IN_BLOCK => [ ], 123 TRACE_VARS => $config->{ TRACE_VARS }, 124 FACTORY => $config->{ FACTORY } || 'Template::Directive', 125 }, $class; 126 127 # update self with any relevant keys in config 128 foreach $key (keys %$self) { 129 $self->{ $key } = $config->{ $key } if defined $config->{ $key }; 130 } 131 $self->{ FILEINFO } = [ ]; 132 133 # DEBUG config item can be a bitmask 134 if (defined ($debug = $config->{ DEBUG })) { 135 $self->{ DEBUG } = $debug & ( Template::Constants::DEBUG_PARSER 136 | Template::Constants::DEBUG_FLAGS ); 137 $self->{ DEBUG_DIRS } = $debug & Template::Constants::DEBUG_DIRS; 138 } 139 # package variable can be set to 1 to support previous behaviour 140 elsif ($DEBUG == 1) { 141 $self->{ DEBUG } = Template::Constants::DEBUG_PARSER; 142 $self->{ DEBUG_DIRS } = 0; 143 } 144 # otherwise let $DEBUG be a bitmask 145 else { 146 $self->{ DEBUG } = $DEBUG & ( Template::Constants::DEBUG_PARSER 147 | Template::Constants::DEBUG_FLAGS ); 148 $self->{ DEBUG_DIRS } = $DEBUG & Template::Constants::DEBUG_DIRS; 149 } 150 151 $grammar = $self->{ GRAMMAR } ||= do { 152 require Template::Grammar; 153 Template::Grammar->new(); 154 }; 155 156 # instantiate a FACTORY object 157 unless (ref $self->{ FACTORY }) { 158 my $fclass = $self->{ FACTORY }; 159 $self->{ FACTORY } = $self->{ FACTORY }->new( 160 NAMESPACE => $config->{ NAMESPACE } 161 ) 162 || return $class->error($self->{ FACTORY }->error()); 163 } 164 165 # load grammar rules, states and lex table 166 @$self{ qw( LEXTABLE STATES RULES ) } 167 = @$grammar{ qw( LEXTABLE STATES RULES ) }; 168 169 $self->new_style($config) 170 || return $class->error($self->error()); 171 172 return $self; 173} 174 175#----------------------------------------------------------------------- 176# These methods are used to track nested IF and WHILE blocks. Each 177# generated if/while block is given a label indicating the directive 178# type and nesting depth, e.g. FOR0, WHILE1, FOR2, WHILE3, etc. The 179# NEXT and LAST directives use the innermost label, e.g. last WHILE3; 180#----------------------------------------------------------------------- 181 182sub enter_block { 183 my ($self, $name) = @_; 184 my $blocks = $self->{ IN_BLOCK }; 185 push(@{ $self->{ IN_BLOCK } }, $name); 186} 187 188sub leave_block { 189 my $self = shift; 190 my $label = $self->block_label; 191 pop(@{ $self->{ IN_BLOCK } }); 192 return $label; 193} 194 195sub in_block { 196 my ($self, $name) = @_; 197 my $blocks = $self->{ IN_BLOCK }; 198 return @$blocks && $blocks->[-1] eq $name; 199} 200 201sub block_label { 202 my ($self, $prefix, $suffix) = @_; 203 my $blocks = $self->{ IN_BLOCK }; 204 my $name = @$blocks 205 ? $blocks->[-1] . scalar @$blocks 206 : undef; 207 return join('', grep { defined $_ } $prefix, $name, $suffix); 208} 209 210 211 212#------------------------------------------------------------------------ 213# new_style(\%config) 214# 215# Install a new (stacked) parser style. This feature is currently 216# experimental but should mimic the previous behaviour with regard to 217# TAG_STYLE, START_TAG, END_TAG, etc. 218#------------------------------------------------------------------------ 219 220sub new_style { 221 my ($self, $config) = @_; 222 my $styles = $self->{ STYLE } ||= [ ]; 223 my ($tagstyle, $tags, $start, $end, $key); 224 225 # clone new style from previous or default style 226 my $style = { %{ $styles->[-1] || $DEFAULT_STYLE } }; 227 228 # expand START_TAG and END_TAG from specified TAG_STYLE 229 if ($tagstyle = $config->{ TAG_STYLE }) { 230 return $self->error("Invalid tag style: $tagstyle") 231 unless defined ($tags = $TAG_STYLE->{ $tagstyle }); 232 ($start, $end) = @$tags; 233 $config->{ START_TAG } ||= $start; 234 $config->{ END_TAG } ||= $end; 235 } 236 237 foreach $key (keys %$DEFAULT_STYLE) { 238 $style->{ $key } = $config->{ $key } if defined $config->{ $key }; 239 } 240 push(@$styles, $style); 241 return $style; 242} 243 244 245#------------------------------------------------------------------------ 246# old_style() 247# 248# Pop the current parser style and revert to the previous one. See 249# new_style(). ** experimental ** 250#------------------------------------------------------------------------ 251 252sub old_style { 253 my $self = shift; 254 my $styles = $self->{ STYLE }; 255 return $self->error('only 1 parser style remaining') 256 unless (@$styles > 1); 257 pop @$styles; 258 return $styles->[-1]; 259} 260 261 262#------------------------------------------------------------------------ 263# parse($text, $data) 264# 265# Parses the text string, $text and returns a hash array representing 266# the compiled template block(s) as Perl code, in the format expected 267# by Template::Document. 268#------------------------------------------------------------------------ 269 270sub parse { 271 my ($self, $text, $info) = @_; 272 my ($tokens, $block); 273 274 $info->{ DEBUG } = $self->{ DEBUG_DIRS } 275 unless defined $info->{ DEBUG }; 276 277# print "info: { ", join(', ', map { "$_ => $info->{ $_ }" } keys %$info), " }\n"; 278 279 # store for blocks defined in the template (see define_block()) 280 my $defblock = $self->{ DEFBLOCK } = { }; 281 my $metadata = $self->{ METADATA } = [ ]; 282 my $variables = $self->{ VARIABLES } = { }; 283 $self->{ DEFBLOCKS } = [ ]; 284 285 $self->{ _ERROR } = ''; 286 287 # split file into TEXT/DIRECTIVE chunks 288 $tokens = $self->split_text($text) 289 || return undef; ## RETURN ## 290 291 push(@{ $self->{ FILEINFO } }, $info); 292 293 # parse chunks 294 $block = $self->_parse($tokens, $info); 295 296 pop(@{ $self->{ FILEINFO } }); 297 298 return undef unless $block; ## RETURN ## 299 300 $self->debug("compiled main template document block:\n$block") 301 if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER; 302 303 return { 304 BLOCK => $block, 305 DEFBLOCKS => $defblock, 306 VARIABLES => $variables, 307 METADATA => { @$metadata }, 308 }; 309} 310 311 312 313#------------------------------------------------------------------------ 314# split_text($text) 315# 316# Split input template text into directives and raw text chunks. 317#------------------------------------------------------------------------ 318 319sub split_text { 320 my ($self, $text) = @_; 321 my ($pre, $dir, $prelines, $dirlines, $postlines, $chomp, $tags, @tags); 322 my $style = $self->{ STYLE }->[-1]; 323 my ($start, $end, $prechomp, $postchomp, $interp ) = 324 @$style{ qw( START_TAG END_TAG PRE_CHOMP POST_CHOMP INTERPOLATE ) }; 325 my $tags_dir = $self->{ANYCASE} ? qr<TAGS>i : qr<TAGS>; 326 327 my @tokens = (); 328 my $line = 1; 329 330 return \@tokens ## RETURN ## 331 unless defined $text && length $text; 332 333 # extract all directives from the text 334 while ($text =~ s/ 335 ^(.*?) # $1 - start of line up to directive 336 (?: 337 $start # start of tag 338 (.*?) # $2 - tag contents 339 $end # end of tag 340 ) 341 //sx) { 342 343 ($pre, $dir) = ($1, $2); 344 $pre = '' unless defined $pre; 345 $dir = '' unless defined $dir; 346 347 $prelines = ($pre =~ tr/\n//); # newlines in preceeding text 348 $dirlines = ($dir =~ tr/\n//); # newlines in directive tag 349 $postlines = 0; # newlines chomped after tag 350 351 for ($dir) { 352 if (/^\#/) { 353 # comment out entire directive except for any end chomp flag 354 $dir = ($dir =~ /($CHOMP_FLAGS)$/o) ? $1 : ''; 355 } 356 else { 357 s/^($CHOMP_FLAGS)?\s*//so; 358 # PRE_CHOMP: process whitespace before tag 359 $chomp = $1 ? $1 : $prechomp; 360 $chomp =~ tr/-=~+/1230/; 361 if ($chomp && $pre) { 362 # chomp off whitespace and newline preceding directive 363 if ($chomp == CHOMP_ALL) { 364 $pre =~ s{ (\r?\n|^) [^\S\n]* \z }{}mx; 365 } 366 elsif ($chomp == CHOMP_COLLAPSE) { 367 $pre =~ s{ (\s+) \z }{ }x; 368 } 369 elsif ($chomp == CHOMP_GREEDY) { 370 $pre =~ s{ (\s+) \z }{}x; 371 } 372 } 373 } 374 375 # POST_CHOMP: process whitespace after tag 376 s/\s*($CHOMP_FLAGS)?\s*$//so; 377 $chomp = $1 ? $1 : $postchomp; 378 $chomp =~ tr/-=~+/1230/; 379 if ($chomp) { 380 if ($chomp == CHOMP_ALL) { 381 $text =~ s{ ^ ([^\S\n]* \n) }{}x 382 && $postlines++; 383 } 384 elsif ($chomp == CHOMP_COLLAPSE) { 385 $text =~ s{ ^ (\s+) }{ }x 386 && ($postlines += $1=~y/\n//); 387 } 388 # any trailing whitespace 389 elsif ($chomp == CHOMP_GREEDY) { 390 $text =~ s{ ^ (\s+) }{}x 391 && ($postlines += $1=~y/\n//); 392 } 393 } 394 } 395 396 # any text preceding the directive can now be added 397 if (length $pre) { 398 push(@tokens, $interp 399 ? [ $pre, $line, 'ITEXT' ] 400 : ('TEXT', $pre) ); 401 } 402 $line += $prelines; 403 404 # and now the directive, along with line number information 405 if (length $dir) { 406 # the TAGS directive is a compile-time switch 407 if ($dir =~ /^$tags_dir\s+(.*)/) { 408 my @tags = split(/\s+/, $1); 409 if (scalar @tags > 1) { 410 ($start, $end) = map { quotemeta($_) } @tags; 411 } 412 elsif ($tags = $TAG_STYLE->{ $tags[0] }) { 413 ($start, $end) = @$tags; 414 } 415 else { 416 warn "invalid TAGS style: $tags[0]\n"; 417 } 418 } 419 else { 420 # DIRECTIVE is pushed as: 421 # [ $dirtext, $line_no(s), \@tokens ] 422 push(@tokens, 423 [ $dir, 424 ($dirlines 425 ? sprintf("%d-%d", $line, $line + $dirlines) 426 : $line), 427 $self->tokenise_directive($dir) ]); 428 } 429 } 430 431 # update line counter to include directive lines and any extra 432 # newline chomped off the start of the following text 433 $line += $dirlines + $postlines; 434 } 435 436 # anything remaining in the string is plain text 437 push(@tokens, $interp 438 ? [ $text, $line, 'ITEXT' ] 439 : ( 'TEXT', $text) ) 440 if length $text; 441 442 return \@tokens; ## RETURN ## 443} 444 445 446 447#------------------------------------------------------------------------ 448# interpolate_text($text, $line) 449# 450# Examines $text looking for any variable references embedded like 451# $this or like ${ this }. 452#------------------------------------------------------------------------ 453 454sub interpolate_text { 455 my ($self, $text, $line) = @_; 456 my @tokens = (); 457 my ($pre, $var, $dir); 458 459 460 while ($text =~ 461 / 462 ( (?: \\. | [^\$] ){1,3000} ) # escaped or non-'$' character [$1] 463 | 464 ( \$ (?: # embedded variable [$2] 465 (?: \{ ([^\}]*) \} ) # ${ ... } [$3] 466 | 467 ([\w\.]+) # $word [$4] 468 ) 469 ) 470 /gx) { 471 472 ($pre, $var, $dir) = ($1, $3 || $4, $2); 473 474 # preceding text 475 if (defined($pre) && length($pre)) { 476 $line += $pre =~ tr/\n//; 477 $pre =~ s/\\\$/\$/g; 478 push(@tokens, 'TEXT', $pre); 479 } 480 # $variable reference 481 if ($var) { 482 $line += $dir =~ tr/\n/ /; 483 push(@tokens, [ $dir, $line, $self->tokenise_directive($var) ]); 484 } 485 # other '$' reference - treated as text 486 elsif ($dir) { 487 $line += $dir =~ tr/\n//; 488 push(@tokens, 'TEXT', $dir); 489 } 490 } 491 492 return \@tokens; 493} 494 495 496 497#------------------------------------------------------------------------ 498# tokenise_directive($text) 499# 500# Called by the private _parse() method when it encounters a DIRECTIVE 501# token in the list provided by the split_text() or interpolate_text() 502# methods. The directive text is passed by parameter. 503# 504# The method splits the directive into individual tokens as recognised 505# by the parser grammar (see Template::Grammar for details). It 506# constructs a list of tokens each represented by 2 elements, as per 507# split_text() et al. The first element contains the token type, the 508# second the token itself. 509# 510# The method tokenises the string using a complex (but fast) regex. 511# For a deeper understanding of the regex magic at work here, see 512# Jeffrey Friedl's excellent book "Mastering Regular Expressions", 513# from O'Reilly, ISBN 1-56592-257-3 514# 515# Returns a reference to the list of chunks (each one being 2 elements) 516# identified in the directive text. On error, the internal _ERROR string 517# is set and undef is returned. 518#------------------------------------------------------------------------ 519 520sub tokenise_directive { 521 my ($self, $text, $line) = @_; 522 my ($token, $uctoken, $type, $lookup); 523 my $lextable = $self->{ LEXTABLE }; 524 my $style = $self->{ STYLE }->[-1]; 525 my ($anycase, $start, $end) = @$style{ qw( ANYCASE START_TAG END_TAG ) }; 526 my @tokens = ( ); 527 528 while ($text =~ 529 / 530 # strip out any comments 531 (\#[^\n]*) 532 | 533 # a quoted phrase matches in $3 534 (["']) # $2 - opening quote, ' or " 535 ( # $3 - quoted text buffer 536 (?: # repeat group (no backreference) 537 \\\\ # an escaped backslash \\ 538 | # ...or... 539 \\\2 # an escaped quote \" or \' (match $1) 540 | # ...or... 541 . # any other character 542 | \n 543 )*? # non-greedy repeat 544 ) # end of $3 545 \2 # match opening quote 546 | 547 # an unquoted number matches in $4 548 (-?\d+(?:\.\d+)?) # numbers 549 | 550 # filename matches in $5 551 ( \/?\w+(?:(?:\/|::?)\w*)+ | \/\w+) 552 | 553 # an identifier matches in $6 554 (\w+) # variable identifier 555 | 556 # an unquoted word or symbol matches in $7 557 ( [(){}\[\]:;,\/\\] # misc parenthesis and symbols 558# | \-> # arrow operator (for future?) 559 | [+\-*] # math operations 560 | \$\{? # dollar with option left brace 561 | => # like '=' 562 | [=!<>]?= | [!<>] # eqality tests 563 | &&? | \|\|? # boolean ops 564 | \.\.? # n..n sequence 565 | \S+ # something unquoted 566 ) # end of $7 567 /gmxo) { 568 569 # ignore comments to EOL 570 next if $1; 571 572 # quoted string 573 if (defined ($token = $3)) { 574 # double-quoted string may include $variable references 575 if ($2 eq '"') { 576 if ($token =~ /[\$\\]/) { 577 $type = 'QUOTED'; 578 # unescape " and \ but leave \$ escaped so that 579 # interpolate_text() doesn't incorrectly treat it 580 # as a variable reference 581# $token =~ s/\\([\\"])/$1/g; 582 for ($token) { 583 s/\\([^\$nrt])/$1/g; 584 s/\\([nrt])/$QUOTED_ESCAPES->{ $1 }/ge; 585 } 586 push(@tokens, ('"') x 2, 587 @{ $self->interpolate_text($token) }, 588 ('"') x 2); 589 next; 590 } 591 else { 592 $type = 'LITERAL'; 593 $token =~ s['][\\']g; 594 $token = "'$token'"; 595 } 596 } 597 else { 598 $type = 'LITERAL'; 599 $token = "'$token'"; 600 } 601 } 602 # number 603 elsif (defined ($token = $4)) { 604 $type = 'NUMBER'; 605 } 606 elsif (defined($token = $5)) { 607 $type = 'FILENAME'; 608 } 609 elsif (defined($token = $6)) { 610 # Fold potential keywords to UPPER CASE if the ANYCASE option is 611 # set, unless (we've got some preceeding tokens and) the previous 612 # token is a DOT op. This prevents the 'last' in 'data.last' 613 # from being interpreted as the LAST keyword. 614 $uctoken = 615 ($anycase && (! @tokens || $tokens[-2] ne 'DOT')) 616 ? uc $token 617 : $token; 618 if (defined ($type = $lextable->{ $uctoken })) { 619 $token = $uctoken; 620 } 621 else { 622 $type = 'IDENT'; 623 } 624 } 625 elsif (defined ($token = $7)) { 626 # reserved words may be in lower case unless case sensitive 627 $uctoken = $anycase ? uc $token : $token; 628 unless (defined ($type = $lextable->{ $uctoken })) { 629 $type = 'UNQUOTED'; 630 } 631 } 632 633 push(@tokens, $type, $token); 634 635# print(STDERR " +[ $type, $token ]\n") 636# if $DEBUG; 637 } 638 639# print STDERR "tokenise directive() returning:\n [ @tokens ]\n" 640# if $DEBUG; 641 642 return \@tokens; ## RETURN ## 643} 644 645 646#------------------------------------------------------------------------ 647# define_block($name, $block) 648# 649# Called by the parser 'defblock' rule when a BLOCK definition is 650# encountered in the template. The name of the block is passed in the 651# first parameter and a reference to the compiled block is passed in 652# the second. This method stores the block in the $self->{ DEFBLOCK } 653# hash which has been initialised by parse() and will later be used 654# by the same method to call the store() method on the calling cache 655# to define the block "externally". 656#------------------------------------------------------------------------ 657 658sub define_block { 659 my ($self, $name, $block) = @_; 660 my $defblock = $self->{ DEFBLOCK } 661 || return undef; 662 663 $self->debug("compiled block '$name':\n$block") 664 if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER; 665 666 $defblock->{ $name } = $block; 667 668 return undef; 669} 670 671sub push_defblock { 672 my $self = shift; 673 my $stack = $self->{ DEFBLOCK_STACK } ||= []; 674 push(@$stack, $self->{ DEFBLOCK } ); 675 $self->{ DEFBLOCK } = { }; 676} 677 678sub pop_defblock { 679 my $self = shift; 680 my $defs = $self->{ DEFBLOCK }; 681 my $stack = $self->{ DEFBLOCK_STACK } || return $defs; 682 return $defs unless @$stack; 683 $self->{ DEFBLOCK } = pop @$stack; 684 return $defs; 685} 686 687 688#------------------------------------------------------------------------ 689# add_metadata(\@setlist) 690#------------------------------------------------------------------------ 691 692sub add_metadata { 693 my ($self, $setlist) = @_; 694 my $metadata = $self->{ METADATA } 695 || return undef; 696 697 push(@$metadata, @$setlist); 698 699 return undef; 700} 701 702 703#------------------------------------------------------------------------ 704# location() 705# 706# Return Perl comment indicating current parser file and line 707#------------------------------------------------------------------------ 708 709sub location { 710 my $self = shift; 711 return "\n" unless $self->{ FILE_INFO }; 712 my $line = ${ $self->{ LINE } }; 713 my $info = $self->{ FILEINFO }->[-1]; 714 my $file = $info->{ path } || $info->{ name } 715 || '(unknown template)'; 716 $line =~ s/\-.*$//; # might be 'n-n' 717 $line ||= 1; 718 return "#line $line \"$file\"\n"; 719} 720 721 722#======================================================================== 723# ----- PRIVATE METHODS ----- 724#======================================================================== 725 726#------------------------------------------------------------------------ 727# _parse(\@tokens, \@info) 728# 729# Parses the list of input tokens passed by reference and returns a 730# Template::Directive::Block object which contains the compiled 731# representation of the template. 732# 733# This is the main parser DFA loop. See embedded comments for 734# further details. 735# 736# On error, undef is returned and the internal _ERROR field is set to 737# indicate the error. This can be retrieved by calling the error() 738# method. 739#------------------------------------------------------------------------ 740 741sub _parse { 742 my ($self, $tokens, $info) = @_; 743 my ($token, $value, $text, $line, $inperl); 744 my ($state, $stateno, $status, $action, $lookup, $coderet, @codevars); 745 my ($lhs, $len, $code); # rule contents 746 my $stack = [ [ 0, undef ] ]; # DFA stack 747 748# DEBUG 749# local $" = ', '; 750 751 # retrieve internal rule and state tables 752 my ($states, $rules) = @$self{ qw( STATES RULES ) }; 753 754 # If we're tracing variable usage then we need to give the factory a 755 # reference to our $self->{ VARIABLES } for it to fill in. This is a 756 # bit of a hack to back-patch this functionality into TT2. 757 $self->{ FACTORY }->trace_vars($self->{ VARIABLES }) 758 if $self->{ TRACE_VARS }; 759 760 # call the grammar set_factory method to install emitter factory 761 $self->{ GRAMMAR }->install_factory($self->{ FACTORY }); 762 763 $line = $inperl = 0; 764 $self->{ LINE } = \$line; 765 $self->{ FILE } = $info->{ name }; 766 $self->{ INPERL } = \$inperl; 767 768 $status = CONTINUE; 769 my $in_string = 0; 770 771 while(1) { 772 # get state number and state 773 $stateno = $stack->[-1]->[0]; 774 $state = $states->[$stateno]; 775 776 # see if any lookaheads exist for the current state 777 if (exists $state->{'ACTIONS'}) { 778 779 # get next token and expand any directives (i.e. token is an 780 # array ref) onto the front of the token list 781 while (! defined $token && @$tokens) { 782 $token = shift(@$tokens); 783 if (ref $token) { 784 ($text, $line, $token) = @$token; 785 if (ref $token) { 786 if ($info->{ DEBUG } && ! $in_string) { 787 # - - - - - - - - - - - - - - - - - - - - - - - - - 788 # This is gnarly. Look away now if you're easily 789 # frightened. We're pushing parse tokens onto the 790 # pending list to simulate a DEBUG directive like so: 791 # [% DEBUG msg line='20' text='INCLUDE foo' %] 792 # - - - - - - - - - - - - - - - - - - - - - - - - - 793 my $dtext = $text; 794 $dtext =~ s[(['\\])][\\$1]g; 795 unshift(@$tokens, 796 DEBUG => 'DEBUG', 797 IDENT => 'msg', 798 IDENT => 'line', 799 ASSIGN => '=', 800 LITERAL => "'$line'", 801 IDENT => 'text', 802 ASSIGN => '=', 803 LITERAL => "'$dtext'", 804 IDENT => 'file', 805 ASSIGN => '=', 806 LITERAL => "'$info->{ name }'", 807 (';') x 2, 808 @$token, 809 (';') x 2); 810 } 811 else { 812 unshift(@$tokens, @$token, (';') x 2); 813 } 814 $token = undef; # force redo 815 } 816 elsif ($token eq 'ITEXT') { 817 if ($inperl) { 818 # don't perform interpolation in PERL blocks 819 $token = 'TEXT'; 820 $value = $text; 821 } 822 else { 823 unshift(@$tokens, 824 @{ $self->interpolate_text($text, $line) }); 825 $token = undef; # force redo 826 } 827 } 828 } 829 else { 830 # toggle string flag to indicate if we're crossing 831 # a string boundary 832 $in_string = ! $in_string if $token eq '"'; 833 $value = shift(@$tokens); 834 } 835 }; 836 # clear undefined token to avoid 'undefined variable blah blah' 837 # warnings and let the parser logic pick it up in a minute 838 $token = '' unless defined $token; 839 840 # get the next state for the current lookahead token 841 $action = defined ($lookup = $state->{'ACTIONS'}->{ $token }) 842 ? $lookup 843 : defined ($lookup = $state->{'DEFAULT'}) 844 ? $lookup 845 : undef; 846 } 847 else { 848 # no lookahead actions 849 $action = $state->{'DEFAULT'}; 850 } 851 852 # ERROR: no ACTION 853 last unless defined $action; 854 855 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 856 # shift (+ive ACTION) 857 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 858 if ($action > 0) { 859 push(@$stack, [ $action, $value ]); 860 $token = $value = undef; 861 redo; 862 }; 863 864 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 865 # reduce (-ive ACTION) 866 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 867 ($lhs, $len, $code) = @{ $rules->[ -$action ] }; 868 869 # no action imples ACCEPTance 870 $action 871 or $status = ACCEPT; 872 873 # use dummy sub if code ref doesn't exist 874 $code = sub { $_[1] } 875 unless $code; 876 877 @codevars = $len 878 ? map { $_->[1] } @$stack[ -$len .. -1 ] 879 : (); 880 881 eval { 882 $coderet = &$code( $self, @codevars ); 883 }; 884 if ($@) { 885 my $err = $@; 886 chomp $err; 887 return $self->_parse_error($err); 888 } 889 890 # reduce stack by $len 891 splice(@$stack, -$len, $len); 892 893 # ACCEPT 894 return $coderet ## RETURN ## 895 if $status == ACCEPT; 896 897 # ABORT 898 return undef ## RETURN ## 899 if $status == ABORT; 900 901 # ERROR 902 last 903 if $status == ERROR; 904 } 905 continue { 906 push(@$stack, [ $states->[ $stack->[-1][0] ]->{'GOTOS'}->{ $lhs }, 907 $coderet ]), 908 } 909 910 # ERROR ## RETURN ## 911 return $self->_parse_error('unexpected end of input') 912 unless defined $value; 913 914 # munge text of last directive to make it readable 915# $text =~ s/\n/\\n/g; 916 917 return $self->_parse_error("unexpected end of directive", $text) 918 if $value eq ';'; # end of directive SEPARATOR 919 920 return $self->_parse_error("unexpected token ($value)", $text); 921} 922 923 924 925#------------------------------------------------------------------------ 926# _parse_error($msg, $dirtext) 927# 928# Method used to handle errors encountered during the parse process 929# in the _parse() method. 930#------------------------------------------------------------------------ 931 932sub _parse_error { 933 my ($self, $msg, $text) = @_; 934 my $line = $self->{ LINE }; 935 $line = ref($line) ? $$line : $line; 936 $line = 'unknown' unless $line; 937 938 $msg .= "\n [% $text %]" 939 if defined $text; 940 941 return $self->error("line $line: $msg"); 942} 943 944 945#------------------------------------------------------------------------ 946# _dump() 947# 948# Debug method returns a string representing the internal state of the 949# object. 950#------------------------------------------------------------------------ 951 952sub _dump { 953 my $self = shift; 954 my $output = "[Template::Parser] {\n"; 955 my $format = " %-16s => %s\n"; 956 my $key; 957 958 foreach $key (qw( START_TAG END_TAG TAG_STYLE ANYCASE INTERPOLATE 959 PRE_CHOMP POST_CHOMP V1DOLLAR )) { 960 my $val = $self->{ $key }; 961 $val = '<undef>' unless defined $val; 962 $output .= sprintf($format, $key, $val); 963 } 964 965 $output .= '}'; 966 return $output; 967} 968 969 9701; 971 972__END__ 973 974=head1 NAME 975 976Template::Parser - LALR(1) parser for compiling template documents 977 978=head1 SYNOPSIS 979 980 use Template::Parser; 981 982 $parser = Template::Parser->new(\%config); 983 $template = $parser->parse($text) 984 || die $parser->error(), "\n"; 985 986=head1 DESCRIPTION 987 988The C<Template::Parser> module implements a LALR(1) parser and associated 989methods for parsing template documents into Perl code. 990 991=head1 PUBLIC METHODS 992 993=head2 new(\%params) 994 995The C<new()> constructor creates and returns a reference to a new 996C<Template::Parser> object. 997 998A reference to a hash may be supplied as a parameter to provide configuration values. 999See L<CONFIGURATION OPTIONS> below for a summary of these options and 1000L<Template::Manual::Config> for full details. 1001 1002 my $parser = Template::Parser->new({ 1003 START_TAG => quotemeta('<+'), 1004 END_TAG => quotemeta('+>'), 1005 }); 1006 1007=head2 parse($text) 1008 1009The C<parse()> method parses the text passed in the first parameter and 1010returns a reference to a hash array of data defining the compiled 1011representation of the template text, suitable for passing to the 1012L<Template::Document> L<new()|Template::Document#new()> constructor method. On 1013error, undef is returned. 1014 1015 $data = $parser->parse($text) 1016 || die $parser->error(); 1017 1018The C<$data> hash reference returned contains a C<BLOCK> item containing the 1019compiled Perl code for the template, a C<DEFBLOCKS> item containing a 1020reference to a hash array of sub-template C<BLOCK>s defined within in the 1021template, and a C<METADATA> item containing a reference to a hash array 1022of metadata values defined in C<META> tags. 1023 1024=head1 CONFIGURATION OPTIONS 1025 1026The C<Template::Parser> module accepts the following configuration 1027options. Please see L<Template::Manual::Config> for futher details 1028on each option. 1029 1030=head2 START_TAG, END_TAG 1031 1032The L<START_TAG|Template::Manual::Config#START_TAG_END_TAG> and 1033L<END_TAG|Template::Manual::Config#START_TAG_END_TAG> options are used to 1034specify character sequences or regular expressions that mark the start and end 1035of a template directive. 1036 1037 my $parser = Template::Parser->new({ 1038 START_TAG => quotemeta('<+'), 1039 END_TAG => quotemeta('+>'), 1040 }); 1041 1042=head2 TAG_STYLE 1043 1044The L<TAG_STYLE|Template::Manual::Config#TAG_STYLE> option can be used to set 1045both L<START_TAG> and L<END_TAG> according to pre-defined tag styles. 1046 1047 my $parser = Template::Parser->new({ 1048 TAG_STYLE => 'star', # [* ... *] 1049 }); 1050 1051=head2 PRE_CHOMP, POST_CHOMP 1052 1053The L<PRE_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> and 1054L<POST_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> can be set to remove 1055any whitespace before or after a directive tag, respectively. 1056 1057 my $parser = Template::Parser-E<gt>new({ 1058 PRE_CHOMP => 1, 1059 POST_CHOMP => 1, 1060 }); 1061 1062=head2 INTERPOLATE 1063 1064The L<INTERPOLATE|Template::Manual::Config#INTERPOLATE> flag can be set 1065to allow variables to be embedded in plain text blocks. 1066 1067 my $parser = Template::Parser->new({ 1068 INTERPOLATE => 1, 1069 }); 1070 1071Variables should be prefixed by a C<$> to identify them, using curly braces 1072to explicitly scope the variable name where necessary. 1073 1074 Hello ${name}, 1075 1076 The day today is ${day.today}. 1077 1078=head2 ANYCASE 1079 1080The L<ANYCASE|Template::Manual::Config#ANYCASE> option can be set 1081to allow directive keywords to be specified in any case. 1082 1083 # with ANYCASE set to 1 1084 [% INCLUDE foobar %] # OK 1085 [% include foobar %] # OK 1086 [% include = 10 %] # ERROR, 'include' is a reserved word 1087 1088=head2 GRAMMAR 1089 1090The L<GRAMMAR|Template::Manual::Config#GRAMMAR> configuration item can be used 1091to specify an alternate grammar for the parser. This allows a modified or 1092entirely new template language to be constructed and used by the Template 1093Toolkit. 1094 1095 use MyOrg::Template::Grammar; 1096 1097 my $parser = Template::Parser->new({ 1098 GRAMMAR = MyOrg::Template::Grammar->new(); 1099 }); 1100 1101By default, an instance of the default L<Template::Grammar> will be 1102created and used automatically if a C<GRAMMAR> item isn't specified. 1103 1104=head2 DEBUG 1105 1106The L<DEBUG|Template::Manual::Config#DEBUG> option can be used to enable 1107various debugging features of the C<Template::Parser> module. 1108 1109 use Template::Constants qw( :debug ); 1110 1111 my $template = Template->new({ 1112 DEBUG => DEBUG_PARSER | DEBUG_DIRS, 1113 }); 1114 1115=head1 AUTHOR 1116 1117Andy Wardley E<lt>abw@wardley.orgE<gt> L<http://wardley.org/> 1118 1119=head1 COPYRIGHT 1120 1121Copyright (C) 1996-2007 Andy Wardley. All Rights Reserved. 1122 1123This module is free software; you can redistribute it and/or 1124modify it under the same terms as Perl itself. 1125 1126The main parsing loop of the C<Template::Parser> module was derived from a 1127standalone parser generated by version 0.16 of the C<Parse::Yapp> module. The 1128following copyright notice appears in the C<Parse::Yapp> documentation. 1129 1130 The Parse::Yapp module and its related modules and shell 1131 scripts are copyright (c) 1998 Francois Desarmenien, 1132 France. All rights reserved. 1133 1134 You may use and distribute them under the terms of either 1135 the GNU General Public License or the Artistic License, as 1136 specified in the Perl README file. 1137 1138=head1 SEE ALSO 1139 1140L<Template>, L<Template::Grammar>, L<Template::Directive> 1141 1142