1#============================================================= -*-Perl-*- 2# 3# Template::Parser 4# 5# DESCRIPTION 6# This module implements a LALR(1) parser and associated support 7# methods to parse template documents into the appropriate "compiled" 8# format. Much of the parser DFA code (see _parse() method) is based 9# on Francois Desarmenien's Parse::Yapp module. Kudos to him. 10# 11# AUTHOR 12# Andy Wardley <abw@wardley.org> 13# 14# COPYRIGHT 15# Copyright (C) 1996-2007 Andy Wardley. All Rights Reserved. 16# 17# This module is free software; you can redistribute it and/or 18# modify it under the same terms as Perl itself. 19# 20# The following copyright notice appears in the Parse::Yapp 21# documentation. 22# 23# The Parse::Yapp module and its related modules and shell 24# scripts are copyright (c) 1998 Francois Desarmenien, 25# France. All rights reserved. 26# 27# You may use and distribute them under the terms of either 28# the GNU General Public License or the Artistic License, as 29# specified in the Perl README file. 30# 31#============================================================================ 32 33package Template::Parser; 34 35use strict; 36use warnings; 37use base 'Template::Base'; 38 39use Template::Constants qw( :status :chomp ); 40use Template::Directive; 41use Template::Grammar; 42 43# parser state constants 44use constant CONTINUE => 0; 45use constant ACCEPT => 1; 46use constant ERROR => 2; 47use constant ABORT => 3; 48 49our $VERSION = 2.89; 50our $DEBUG = 0 unless defined $DEBUG; 51our $ERROR = ''; 52 53 54#======================================================================== 55# -- COMMON TAG STYLES -- 56#======================================================================== 57 58our $TAG_STYLE = { 59 'default' => [ '\[%', '%\]' ], 60 'template1' => [ '[\[%]%', '%[\]%]' ], 61 'metatext' => [ '%%', '%%' ], 62 'html' => [ '<!--', '-->' ], 63 'mason' => [ '<%', '>' ], 64 'asp' => [ '<%', '%>' ], 65 'php' => [ '<\?', '\?>' ], 66 'star' => [ '\[\*', '\*\]' ], 67}; 68$TAG_STYLE->{ template } = $TAG_STYLE->{ tt2 } = $TAG_STYLE->{ default }; 69 70 71our $DEFAULT_STYLE = { 72 START_TAG => $TAG_STYLE->{ default }->[0], 73 END_TAG => $TAG_STYLE->{ default }->[1], 74# TAG_STYLE => 'default', 75 ANYCASE => 0, 76 INTERPOLATE => 0, 77 PRE_CHOMP => 0, 78 POST_CHOMP => 0, 79 V1DOLLAR => 0, 80 EVAL_PERL => 0, 81}; 82 83our $QUOTED_ESCAPES = { 84 n => "\n", 85 r => "\r", 86 t => "\t", 87}; 88 89# note that '-' must come first so Perl doesn't think it denotes a range 90our $CHOMP_FLAGS = qr/[-=~+]/; 91 92 93 94#======================================================================== 95# ----- PUBLIC METHODS ----- 96#======================================================================== 97 98#------------------------------------------------------------------------ 99# new(\%config) 100# 101# Constructor method. 102#------------------------------------------------------------------------ 103 104sub new { 105 my $class = shift; 106 my $config = $_[0] && ref($_[0]) eq 'HASH' ? shift(@_) : { @_ }; 107 my ($tagstyle, $debug, $start, $end, $defaults, $grammar, $hash, $key, $udef); 108 109 my $self = bless { 110 START_TAG => undef, 111 END_TAG => undef, 112 TAG_STYLE => 'default', 113 ANYCASE => 0, 114 INTERPOLATE => 0, 115 PRE_CHOMP => 0, 116 POST_CHOMP => 0, 117 V1DOLLAR => 0, 118 EVAL_PERL => 0, 119 FILE_INFO => 1, 120 GRAMMAR => undef, 121 _ERROR => '', 122 IN_BLOCK => [ ], 123 TRACE_VARS => $config->{ TRACE_VARS }, 124 FACTORY => $config->{ FACTORY } || 'Template::Directive', 125 }, $class; 126 127 # update self with any relevant keys in config 128 foreach $key (keys %$self) { 129 $self->{ $key } = $config->{ $key } if defined $config->{ $key }; 130 } 131 $self->{ FILEINFO } = [ ]; 132 133 # DEBUG config item can be a bitmask 134 if (defined ($debug = $config->{ DEBUG })) { 135 $self->{ DEBUG } = $debug & ( Template::Constants::DEBUG_PARSER 136 | Template::Constants::DEBUG_FLAGS ); 137 $self->{ DEBUG_DIRS } = $debug & Template::Constants::DEBUG_DIRS; 138 } 139 # package variable can be set to 1 to support previous behaviour 140 elsif ($DEBUG == 1) { 141 $self->{ DEBUG } = Template::Constants::DEBUG_PARSER; 142 $self->{ DEBUG_DIRS } = 0; 143 } 144 # otherwise let $DEBUG be a bitmask 145 else { 146 $self->{ DEBUG } = $DEBUG & ( Template::Constants::DEBUG_PARSER 147 | Template::Constants::DEBUG_FLAGS ); 148 $self->{ DEBUG_DIRS } = $DEBUG & Template::Constants::DEBUG_DIRS; 149 } 150 151 $grammar = $self->{ GRAMMAR } ||= do { 152 require Template::Grammar; 153 Template::Grammar->new(); 154 }; 155 156 # instantiate a FACTORY object 157 unless (ref $self->{ FACTORY }) { 158 my $fclass = $self->{ FACTORY }; 159 $self->{ FACTORY } = $self->{ FACTORY }->new( 160 NAMESPACE => $config->{ NAMESPACE } 161 ) 162 || return $class->error($self->{ FACTORY }->error()); 163 } 164 165 # load grammar rules, states and lex table 166 @$self{ qw( LEXTABLE STATES RULES ) } 167 = @$grammar{ qw( LEXTABLE STATES RULES ) }; 168 169 $self->new_style($config) 170 || return $class->error($self->error()); 171 172 return $self; 173} 174 175#----------------------------------------------------------------------- 176# These methods are used to track nested IF and WHILE blocks. Each 177# generated if/while block is given a label indicating the directive 178# type and nesting depth, e.g. FOR0, WHILE1, FOR2, WHILE3, etc. The 179# NEXT and LAST directives use the innermost label, e.g. last WHILE3; 180#----------------------------------------------------------------------- 181 182sub enter_block { 183 my ($self, $name) = @_; 184 my $blocks = $self->{ IN_BLOCK }; 185 push(@{ $self->{ IN_BLOCK } }, $name); 186} 187 188sub leave_block { 189 my $self = shift; 190 my $label = $self->block_label; 191 pop(@{ $self->{ IN_BLOCK } }); 192 return $label; 193} 194 195sub in_block { 196 my ($self, $name) = @_; 197 my $blocks = $self->{ IN_BLOCK }; 198 return @$blocks && $blocks->[-1] eq $name; 199} 200 201sub block_label { 202 my ($self, $prefix, $suffix) = @_; 203 my $blocks = $self->{ IN_BLOCK }; 204 my $name = @$blocks 205 ? $blocks->[-1] . scalar @$blocks 206 : undef; 207 return join('', grep { defined $_ } $prefix, $name, $suffix); 208} 209 210 211 212#------------------------------------------------------------------------ 213# new_style(\%config) 214# 215# Install a new (stacked) parser style. This feature is currently 216# experimental but should mimic the previous behaviour with regard to 217# TAG_STYLE, START_TAG, END_TAG, etc. 218#------------------------------------------------------------------------ 219 220sub new_style { 221 my ($self, $config) = @_; 222 my $styles = $self->{ STYLE } ||= [ ]; 223 my ($tagstyle, $tags, $start, $end, $key); 224 225 # clone new style from previous or default style 226 my $style = { %{ $styles->[-1] || $DEFAULT_STYLE } }; 227 228 # expand START_TAG and END_TAG from specified TAG_STYLE 229 if ($tagstyle = $config->{ TAG_STYLE }) { 230 return $self->error("Invalid tag style: $tagstyle") 231 unless defined ($tags = $TAG_STYLE->{ $tagstyle }); 232 ($start, $end) = @$tags; 233 $config->{ START_TAG } ||= $start; 234 $config->{ END_TAG } ||= $end; 235 } 236 237 foreach $key (keys %$DEFAULT_STYLE) { 238 $style->{ $key } = $config->{ $key } if defined $config->{ $key }; 239 } 240 push(@$styles, $style); 241 return $style; 242} 243 244 245#------------------------------------------------------------------------ 246# old_style() 247# 248# Pop the current parser style and revert to the previous one. See 249# new_style(). ** experimental ** 250#------------------------------------------------------------------------ 251 252sub old_style { 253 my $self = shift; 254 my $styles = $self->{ STYLE }; 255 return $self->error('only 1 parser style remaining') 256 unless (@$styles > 1); 257 pop @$styles; 258 return $styles->[-1]; 259} 260 261 262#------------------------------------------------------------------------ 263# parse($text, $data) 264# 265# Parses the text string, $text and returns a hash array representing 266# the compiled template block(s) as Perl code, in the format expected 267# by Template::Document. 268#------------------------------------------------------------------------ 269 270sub parse { 271 my ($self, $text, $info) = @_; 272 my ($tokens, $block); 273 274 $info->{ DEBUG } = $self->{ DEBUG_DIRS } 275 unless defined $info->{ DEBUG }; 276 277# print "info: { ", join(', ', map { "$_ => $info->{ $_ }" } keys %$info), " }\n"; 278 279 # store for blocks defined in the template (see define_block()) 280 my $defblock = $self->{ DEFBLOCK } = { }; 281 my $metadata = $self->{ METADATA } = [ ]; 282 my $variables = $self->{ VARIABLES } = { }; 283 $self->{ DEFBLOCKS } = [ ]; 284 285 $self->{ _ERROR } = ''; 286 287 # split file into TEXT/DIRECTIVE chunks 288 $tokens = $self->split_text($text) 289 || return undef; ## RETURN ## 290 291 push(@{ $self->{ FILEINFO } }, $info); 292 293 # parse chunks 294 $block = $self->_parse($tokens, $info); 295 296 pop(@{ $self->{ FILEINFO } }); 297 298 return undef unless $block; ## RETURN ## 299 300 $self->debug("compiled main template document block:\n$block") 301 if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER; 302 303 return { 304 BLOCK => $block, 305 DEFBLOCKS => $defblock, 306 VARIABLES => $variables, 307 METADATA => { @$metadata }, 308 }; 309} 310 311 312 313#------------------------------------------------------------------------ 314# split_text($text) 315# 316# Split input template text into directives and raw text chunks. 317#------------------------------------------------------------------------ 318 319sub split_text { 320 my ($self, $text) = @_; 321 my ($pre, $dir, $prelines, $dirlines, $postlines, $chomp, $tags, @tags); 322 my $style = $self->{ STYLE }->[-1]; 323 my ($start, $end, $prechomp, $postchomp, $interp ) = 324 @$style{ qw( START_TAG END_TAG PRE_CHOMP POST_CHOMP INTERPOLATE ) }; 325 my $tags_dir = $self->{ANYCASE} ? qr<TAGS>i : qr<TAGS>; 326 327 my @tokens = (); 328 my $line = 1; 329 330 return \@tokens ## RETURN ## 331 unless defined $text && length $text; 332 333 # extract all directives from the text 334 while ($text =~ s/ 335 ^(.*?) # $1 - start of line up to directive 336 (?: 337 $start # start of tag 338 (.*?) # $2 - tag contents 339 $end # end of tag 340 ) 341 //sx) { 342 343 ($pre, $dir) = ($1, $2); 344 $pre = '' unless defined $pre; 345 $dir = '' unless defined $dir; 346 347 $prelines = ($pre =~ tr/\n//); # newlines in preceding text 348 $dirlines = ($dir =~ tr/\n//); # newlines in directive tag 349 $postlines = 0; # newlines chomped after tag 350 351 for ($dir) { 352 if (/^\#/) { 353 # comment out entire directive except for any end chomp flag 354 $dir = ($dir =~ /($CHOMP_FLAGS)$/o) ? $1 : ''; 355 } 356 else { 357 358 if(s/^($CHOMP_FLAGS)?(\s*)//so && $2) { 359 my $chomped = $2; 360 my $linecount = ($chomped =~ tr/\n//); # newlines in chomped whitespace 361 $linecount ||= 0; 362 $prelines += $linecount; 363 $dirlines -= $linecount; 364 } 365 # PRE_CHOMP: process whitespace before tag 366 $chomp = $1 ? $1 : $prechomp; 367 $chomp =~ tr/-=~+/1230/; 368 if ($chomp && $pre) { 369 # chomp off whitespace and newline preceding directive 370 if ($chomp == CHOMP_ALL) { 371 $pre =~ s{ (\r?\n|^) [^\S\n]* \z }{}mx; 372 } 373 elsif ($chomp == CHOMP_COLLAPSE) { 374 $pre =~ s{ (\s+) \z }{ }x; 375 } 376 elsif ($chomp == CHOMP_GREEDY) { 377 $pre =~ s{ (\s+) \z }{}x; 378 } 379 } 380 } 381 382 # POST_CHOMP: process whitespace after tag 383 s/\s*($CHOMP_FLAGS)?\s*$//so; 384 $chomp = $1 ? $1 : $postchomp; 385 $chomp =~ tr/-=~+/1230/; 386 if ($chomp) { 387 if ($chomp == CHOMP_ALL) { 388 $text =~ s{ ^ ([^\S\n]* \n) }{}x 389 && $postlines++; 390 } 391 elsif ($chomp == CHOMP_COLLAPSE) { 392 $text =~ s{ ^ (\s+) }{ }x 393 && ($postlines += $1=~y/\n//); 394 } 395 # any trailing whitespace 396 elsif ($chomp == CHOMP_GREEDY) { 397 $text =~ s{ ^ (\s+) }{}x 398 && ($postlines += $1=~y/\n//); 399 } 400 } 401 } 402 403 # any text preceding the directive can now be added 404 if (length $pre) { 405 push(@tokens, $interp 406 ? [ $pre, $line, 'ITEXT' ] 407 : ('TEXT', $pre) ); 408 } 409 $line += $prelines; 410 411 # and now the directive, along with line number information 412 if (length $dir) { 413 # the TAGS directive is a compile-time switch 414 if ($dir =~ /^$tags_dir\s+(.*)/) { 415 my @tags = split(/\s+/, $1); 416 if (scalar @tags > 1) { 417 ($start, $end) = map { quotemeta($_) } @tags; 418 } 419 elsif ($tags = $TAG_STYLE->{ $tags[0] }) { 420 ($start, $end) = @$tags; 421 } 422 else { 423 warn "invalid TAGS style: $tags[0]\n"; 424 } 425 } 426 else { 427 # DIRECTIVE is pushed as: 428 # [ $dirtext, $line_no(s), \@tokens ] 429 push(@tokens, 430 [ $dir, 431 ($dirlines 432 ? sprintf("%d-%d", $line, $line + $dirlines) 433 : $line), 434 $self->tokenise_directive($dir) ]); 435 } 436 } 437 438 # update line counter to include directive lines and any extra 439 # newline chomped off the start of the following text 440 $line += $dirlines + $postlines; 441 } 442 443 # anything remaining in the string is plain text 444 push(@tokens, $interp 445 ? [ $text, $line, 'ITEXT' ] 446 : ( 'TEXT', $text) ) 447 if length $text; 448 449 return \@tokens; ## RETURN ## 450} 451 452 453 454#------------------------------------------------------------------------ 455# interpolate_text($text, $line) 456# 457# Examines $text looking for any variable references embedded like 458# $this or like ${ this }. 459#------------------------------------------------------------------------ 460 461sub interpolate_text { 462 my ($self, $text, $line) = @_; 463 my @tokens = (); 464 my ($pre, $var, $dir); 465 466 467 while ($text =~ 468 / 469 ( (?: \\. | [^\$] ){1,3000} ) # escaped or non-'$' character [$1] 470 | 471 ( \$ (?: # embedded variable [$2] 472 (?: \{ ([^\}]*) \} ) # ${ ... } [$3] 473 | 474 ([\w\.]+) # $word [$4] 475 ) 476 ) 477 /gx) { 478 479 ($pre, $var, $dir) = ($1, $3 || $4, $2); 480 481 # preceding text 482 if (defined($pre) && length($pre)) { 483 $line += $pre =~ tr/\n//; 484 $pre =~ s/\\\$/\$/g; 485 push(@tokens, 'TEXT', $pre); 486 } 487 # $variable reference 488 if ($var) { 489 $line += $dir =~ tr/\n/ /; 490 push(@tokens, [ $dir, $line, $self->tokenise_directive($var) ]); 491 } 492 # other '$' reference - treated as text 493 elsif ($dir) { 494 $line += $dir =~ tr/\n//; 495 push(@tokens, 'TEXT', $dir); 496 } 497 } 498 499 return \@tokens; 500} 501 502 503 504#------------------------------------------------------------------------ 505# tokenise_directive($text) 506# 507# Called by the private _parse() method when it encounters a DIRECTIVE 508# token in the list provided by the split_text() or interpolate_text() 509# methods. The directive text is passed by parameter. 510# 511# The method splits the directive into individual tokens as recognised 512# by the parser grammar (see Template::Grammar for details). It 513# constructs a list of tokens each represented by 2 elements, as per 514# split_text() et al. The first element contains the token type, the 515# second the token itself. 516# 517# The method tokenises the string using a complex (but fast) regex. 518# For a deeper understanding of the regex magic at work here, see 519# Jeffrey Friedl's excellent book "Mastering Regular Expressions", 520# from O'Reilly, ISBN 1-56592-257-3 521# 522# Returns a reference to the list of chunks (each one being 2 elements) 523# identified in the directive text. On error, the internal _ERROR string 524# is set and undef is returned. 525#------------------------------------------------------------------------ 526 527sub tokenise_directive { 528 my ($self, $text, $line) = @_; 529 my ($token, $uctoken, $type, $lookup); 530 my $lextable = $self->{ LEXTABLE }; 531 my $style = $self->{ STYLE }->[-1]; 532 my ($anycase, $start, $end) = @$style{ qw( ANYCASE START_TAG END_TAG ) }; 533 my @tokens = ( ); 534 535 while ($text =~ 536 / 537 # strip out any comments 538 (\#[^\n]*) 539 | 540 # a quoted phrase matches in $3 541 (["']) # $2 - opening quote, ' or " 542 ( # $3 - quoted text buffer 543 (?: # repeat group (no backreference) 544 \\\\ # an escaped backslash \\ 545 | # ...or... 546 \\\2 # an escaped quote \" or \' (match $1) 547 | # ...or... 548 . # any other character 549 | \n 550 )*? # non-greedy repeat 551 ) # end of $3 552 \2 # match opening quote 553 | 554 # an unquoted number matches in $4 555 (-?\d+(?:\.\d+)?) # numbers 556 | 557 # filename matches in $5 558 ( \/?\w+(?:(?:\/|::?)\w*)+ | \/\w+) 559 | 560 # an identifier matches in $6 561 (\w+) # variable identifier 562 | 563 # an unquoted word or symbol matches in $7 564 ( [(){}\[\]:;,\/\\] # misc parenthesis and symbols 565# | \-> # arrow operator (for future?) 566 | [+\-*] # math operations 567 | \$\{? # dollar with option left brace 568 | => # like '=' 569 | [=!<>]?= | [!<>] # eqality tests 570 | &&? | \|\|? # boolean ops 571 | \.\.? # n..n sequence 572 | \S+ # something unquoted 573 ) # end of $7 574 /gmxo) { 575 576 # ignore comments to EOL 577 next if $1; 578 579 # quoted string 580 if (defined ($token = $3)) { 581 # double-quoted string may include $variable references 582 if ($2 eq '"') { 583 if ($token =~ /[\$\\]/) { 584 $type = 'QUOTED'; 585 # unescape " and \ but leave \$ escaped so that 586 # interpolate_text() doesn't incorrectly treat it 587 # as a variable reference 588# $token =~ s/\\([\\"])/$1/g; 589 for ($token) { 590 s/\\([^\$nrt])/$1/g; 591 s/\\([nrt])/$QUOTED_ESCAPES->{ $1 }/ge; 592 } 593 push(@tokens, ('"') x 2, 594 @{ $self->interpolate_text($token) }, 595 ('"') x 2); 596 next; 597 } 598 else { 599 $type = 'LITERAL'; 600 $token =~ s['][\\']g; 601 $token = "'$token'"; 602 } 603 } 604 else { 605 $type = 'LITERAL'; 606 $token = "'$token'"; 607 } 608 } 609 # number 610 elsif (defined ($token = $4)) { 611 $type = 'NUMBER'; 612 } 613 elsif (defined($token = $5)) { 614 $type = 'FILENAME'; 615 } 616 elsif (defined($token = $6)) { 617 # Fold potential keywords to UPPER CASE if the ANYCASE option is 618 # set, unless (we've got some preceding tokens and) the previous 619 # token is a DOT op. This prevents the 'last' in 'data.last' 620 # from being interpreted as the LAST keyword. 621 $uctoken = 622 ($anycase && (! @tokens || $tokens[-2] ne 'DOT')) 623 ? uc $token 624 : $token; 625 if (defined ($type = $lextable->{ $uctoken })) { 626 $token = $uctoken; 627 } 628 else { 629 $type = 'IDENT'; 630 } 631 } 632 elsif (defined ($token = $7)) { 633 # reserved words may be in lower case unless case sensitive 634 $uctoken = $anycase ? uc $token : $token; 635 unless (defined ($type = $lextable->{ $uctoken })) { 636 $type = 'UNQUOTED'; 637 } 638 } 639 640 push(@tokens, $type, $token); 641 642# print(STDERR " +[ $type, $token ]\n") 643# if $DEBUG; 644 } 645 646# print STDERR "tokenise directive() returning:\n [ @tokens ]\n" 647# if $DEBUG; 648 649 return \@tokens; ## RETURN ## 650} 651 652 653#------------------------------------------------------------------------ 654# define_block($name, $block) 655# 656# Called by the parser 'defblock' rule when a BLOCK definition is 657# encountered in the template. The name of the block is passed in the 658# first parameter and a reference to the compiled block is passed in 659# the second. This method stores the block in the $self->{ DEFBLOCK } 660# hash which has been initialised by parse() and will later be used 661# by the same method to call the store() method on the calling cache 662# to define the block "externally". 663#------------------------------------------------------------------------ 664 665sub define_block { 666 my ($self, $name, $block) = @_; 667 my $defblock = $self->{ DEFBLOCK } 668 || return undef; 669 670 $self->debug("compiled block '$name':\n$block") 671 if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER; 672 673 $defblock->{ $name } = $block; 674 675 return undef; 676} 677 678sub push_defblock { 679 my $self = shift; 680 my $stack = $self->{ DEFBLOCK_STACK } ||= []; 681 push(@$stack, $self->{ DEFBLOCK } ); 682 $self->{ DEFBLOCK } = { }; 683} 684 685sub pop_defblock { 686 my $self = shift; 687 my $defs = $self->{ DEFBLOCK }; 688 my $stack = $self->{ DEFBLOCK_STACK } || return $defs; 689 return $defs unless @$stack; 690 $self->{ DEFBLOCK } = pop @$stack; 691 return $defs; 692} 693 694 695#------------------------------------------------------------------------ 696# add_metadata(\@setlist) 697#------------------------------------------------------------------------ 698 699sub add_metadata { 700 my ($self, $setlist) = @_; 701 my $metadata = $self->{ METADATA } 702 || return undef; 703 704 push(@$metadata, @$setlist); 705 706 return undef; 707} 708 709 710#------------------------------------------------------------------------ 711# location() 712# 713# Return Perl comment indicating current parser file and line 714#------------------------------------------------------------------------ 715 716sub location { 717 my $self = shift; 718 return "\n" unless $self->{ FILE_INFO }; 719 my $line = ${ $self->{ LINE } }; 720 my $info = $self->{ FILEINFO }->[-1]; 721 my $file = $info->{ path } || $info->{ name } 722 || '(unknown template)'; 723 $line =~ s/\-.*$//; # might be 'n-n' 724 $line ||= 1; 725 return "#line $line \"$file\"\n"; 726} 727 728 729#======================================================================== 730# ----- PRIVATE METHODS ----- 731#======================================================================== 732 733#------------------------------------------------------------------------ 734# _parse(\@tokens, \@info) 735# 736# Parses the list of input tokens passed by reference and returns a 737# Template::Directive::Block object which contains the compiled 738# representation of the template. 739# 740# This is the main parser DFA loop. See embedded comments for 741# further details. 742# 743# On error, undef is returned and the internal _ERROR field is set to 744# indicate the error. This can be retrieved by calling the error() 745# method. 746#------------------------------------------------------------------------ 747 748sub _parse { 749 my ($self, $tokens, $info) = @_; 750 my ($token, $value, $text, $line, $inperl); 751 my ($state, $stateno, $status, $action, $lookup, $coderet, @codevars); 752 my ($lhs, $len, $code); # rule contents 753 my $stack = [ [ 0, undef ] ]; # DFA stack 754 755# DEBUG 756# local $" = ', '; 757 758 # retrieve internal rule and state tables 759 my ($states, $rules) = @$self{ qw( STATES RULES ) }; 760 761 # If we're tracing variable usage then we need to give the factory a 762 # reference to our $self->{ VARIABLES } for it to fill in. This is a 763 # bit of a hack to back-patch this functionality into TT2. 764 $self->{ FACTORY }->trace_vars($self->{ VARIABLES }) 765 if $self->{ TRACE_VARS }; 766 767 # call the grammar set_factory method to install emitter factory 768 $self->{ GRAMMAR }->install_factory($self->{ FACTORY }); 769 770 $line = $inperl = 0; 771 $self->{ LINE } = \$line; 772 $self->{ FILE } = $info->{ name }; 773 $self->{ INPERL } = \$inperl; 774 775 $status = CONTINUE; 776 my $in_string = 0; 777 778 while(1) { 779 # get state number and state 780 $stateno = $stack->[-1]->[0]; 781 $state = $states->[$stateno]; 782 783 # see if any lookaheads exist for the current state 784 if (exists $state->{'ACTIONS'}) { 785 786 # get next token and expand any directives (i.e. token is an 787 # array ref) onto the front of the token list 788 while (! defined $token && @$tokens) { 789 $token = shift(@$tokens); 790 if (ref $token) { 791 ($text, $line, $token) = @$token; 792 if (ref $token) { 793 if ($info->{ DEBUG } && ! $in_string) { 794 # - - - - - - - - - - - - - - - - - - - - - - - - - 795 # This is gnarly. Look away now if you're easily 796 # frightened. We're pushing parse tokens onto the 797 # pending list to simulate a DEBUG directive like so: 798 # [% DEBUG msg line='20' text='INCLUDE foo' %] 799 # - - - - - - - - - - - - - - - - - - - - - - - - - 800 my $dtext = $text; 801 $dtext =~ s[(['\\])][\\$1]g; 802 unshift(@$tokens, 803 DEBUG => 'DEBUG', 804 IDENT => 'msg', 805 IDENT => 'line', 806 ASSIGN => '=', 807 LITERAL => "'$line'", 808 IDENT => 'text', 809 ASSIGN => '=', 810 LITERAL => "'$dtext'", 811 IDENT => 'file', 812 ASSIGN => '=', 813 LITERAL => "'$info->{ name }'", 814 (';') x 2, 815 @$token, 816 (';') x 2); 817 } 818 else { 819 unshift(@$tokens, @$token, (';') x 2); 820 } 821 $token = undef; # force redo 822 } 823 elsif ($token eq 'ITEXT') { 824 if ($inperl) { 825 # don't perform interpolation in PERL blocks 826 $token = 'TEXT'; 827 $value = $text; 828 } 829 else { 830 unshift(@$tokens, 831 @{ $self->interpolate_text($text, $line) }); 832 $token = undef; # force redo 833 } 834 } 835 } 836 else { 837 # toggle string flag to indicate if we're crossing 838 # a string boundary 839 $in_string = ! $in_string if $token eq '"'; 840 $value = shift(@$tokens); 841 } 842 }; 843 # clear undefined token to avoid 'undefined variable blah blah' 844 # warnings and let the parser logic pick it up in a minute 845 $token = '' unless defined $token; 846 847 # get the next state for the current lookahead token 848 $action = defined ($lookup = $state->{'ACTIONS'}->{ $token }) 849 ? $lookup 850 : defined ($lookup = $state->{'DEFAULT'}) 851 ? $lookup 852 : undef; 853 } 854 else { 855 # no lookahead actions 856 $action = $state->{'DEFAULT'}; 857 } 858 859 # ERROR: no ACTION 860 last unless defined $action; 861 862 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 863 # shift (+ive ACTION) 864 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 865 if ($action > 0) { 866 push(@$stack, [ $action, $value ]); 867 $token = $value = undef; 868 redo; 869 }; 870 871 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 872 # reduce (-ive ACTION) 873 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - 874 ($lhs, $len, $code) = @{ $rules->[ -$action ] }; 875 876 # no action imples ACCEPTance 877 $action 878 or $status = ACCEPT; 879 880 # use dummy sub if code ref doesn't exist 881 $code = sub { $_[1] } 882 unless $code; 883 884 @codevars = $len 885 ? map { $_->[1] } @$stack[ -$len .. -1 ] 886 : (); 887 888 eval { 889 $coderet = &$code( $self, @codevars ); 890 }; 891 if ($@) { 892 my $err = $@; 893 chomp $err; 894 return $self->_parse_error($err); 895 } 896 897 # reduce stack by $len 898 splice(@$stack, -$len, $len); 899 900 # ACCEPT 901 return $coderet ## RETURN ## 902 if $status == ACCEPT; 903 904 # ABORT 905 return undef ## RETURN ## 906 if $status == ABORT; 907 908 # ERROR 909 last 910 if $status == ERROR; 911 } 912 continue { 913 push(@$stack, [ $states->[ $stack->[-1][0] ]->{'GOTOS'}->{ $lhs }, 914 $coderet ]), 915 } 916 917 # ERROR ## RETURN ## 918 return $self->_parse_error('unexpected end of input') 919 unless defined $value; 920 921 # munge text of last directive to make it readable 922# $text =~ s/\n/\\n/g; 923 924 return $self->_parse_error("unexpected end of directive", $text) 925 if $value eq ';'; # end of directive SEPARATOR 926 927 return $self->_parse_error("unexpected token ($value)", $text); 928} 929 930 931 932#------------------------------------------------------------------------ 933# _parse_error($msg, $dirtext) 934# 935# Method used to handle errors encountered during the parse process 936# in the _parse() method. 937#------------------------------------------------------------------------ 938 939sub _parse_error { 940 my ($self, $msg, $text) = @_; 941 my $line = $self->{ LINE }; 942 $line = ref($line) ? $$line : $line; 943 $line = 'unknown' unless $line; 944 945 $msg .= "\n [% $text %]" 946 if defined $text; 947 948 return $self->error("line $line: $msg"); 949} 950 951 952#------------------------------------------------------------------------ 953# _dump() 954# 955# Debug method returns a string representing the internal state of the 956# object. 957#------------------------------------------------------------------------ 958 959sub _dump { 960 my $self = shift; 961 my $output = "[Template::Parser] {\n"; 962 my $format = " %-16s => %s\n"; 963 my $key; 964 965 foreach $key (qw( START_TAG END_TAG TAG_STYLE ANYCASE INTERPOLATE 966 PRE_CHOMP POST_CHOMP V1DOLLAR )) { 967 my $val = $self->{ $key }; 968 $val = '<undef>' unless defined $val; 969 $output .= sprintf($format, $key, $val); 970 } 971 972 $output .= '}'; 973 return $output; 974} 975 976 9771; 978 979__END__ 980 981=head1 NAME 982 983Template::Parser - LALR(1) parser for compiling template documents 984 985=head1 SYNOPSIS 986 987 use Template::Parser; 988 989 $parser = Template::Parser->new(\%config); 990 $template = $parser->parse($text) 991 || die $parser->error(), "\n"; 992 993=head1 DESCRIPTION 994 995The C<Template::Parser> module implements a LALR(1) parser and associated 996methods for parsing template documents into Perl code. 997 998=head1 PUBLIC METHODS 999 1000=head2 new(\%params) 1001 1002The C<new()> constructor creates and returns a reference to a new 1003C<Template::Parser> object. 1004 1005A reference to a hash may be supplied as a parameter to provide configuration values. 1006See L<CONFIGURATION OPTIONS> below for a summary of these options and 1007L<Template::Manual::Config> for full details. 1008 1009 my $parser = Template::Parser->new({ 1010 START_TAG => quotemeta('<+'), 1011 END_TAG => quotemeta('+>'), 1012 }); 1013 1014=head2 parse($text) 1015 1016The C<parse()> method parses the text passed in the first parameter and 1017returns a reference to a hash array of data defining the compiled 1018representation of the template text, suitable for passing to the 1019L<Template::Document> L<new()|Template::Document#new()> constructor method. On 1020error, undef is returned. 1021 1022 $data = $parser->parse($text) 1023 || die $parser->error(); 1024 1025The C<$data> hash reference returned contains a C<BLOCK> item containing the 1026compiled Perl code for the template, a C<DEFBLOCKS> item containing a 1027reference to a hash array of sub-template C<BLOCK>s defined within in the 1028template, and a C<METADATA> item containing a reference to a hash array 1029of metadata values defined in C<META> tags. 1030 1031=head1 CONFIGURATION OPTIONS 1032 1033The C<Template::Parser> module accepts the following configuration 1034options. Please see L<Template::Manual::Config> for further details 1035on each option. 1036 1037=head2 START_TAG, END_TAG 1038 1039The L<START_TAG|Template::Manual::Config#START_TAG_END_TAG> and 1040L<END_TAG|Template::Manual::Config#START_TAG_END_TAG> options are used to 1041specify character sequences or regular expressions that mark the start and end 1042of a template directive. 1043 1044 my $parser = Template::Parser->new({ 1045 START_TAG => quotemeta('<+'), 1046 END_TAG => quotemeta('+>'), 1047 }); 1048 1049=head2 TAG_STYLE 1050 1051The L<TAG_STYLE|Template::Manual::Config#TAG_STYLE> option can be used to set 1052both L<START_TAG> and L<END_TAG> according to pre-defined tag styles. 1053 1054 my $parser = Template::Parser->new({ 1055 TAG_STYLE => 'star', # [* ... *] 1056 }); 1057 1058=head2 PRE_CHOMP, POST_CHOMP 1059 1060The L<PRE_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> and 1061L<POST_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> can be set to remove 1062any whitespace before or after a directive tag, respectively. 1063 1064 my $parser = Template::Parser-E<gt>new({ 1065 PRE_CHOMP => 1, 1066 POST_CHOMP => 1, 1067 }); 1068 1069=head2 INTERPOLATE 1070 1071The L<INTERPOLATE|Template::Manual::Config#INTERPOLATE> flag can be set 1072to allow variables to be embedded in plain text blocks. 1073 1074 my $parser = Template::Parser->new({ 1075 INTERPOLATE => 1, 1076 }); 1077 1078Variables should be prefixed by a C<$> to identify them, using curly braces 1079to explicitly scope the variable name where necessary. 1080 1081 Hello ${name}, 1082 1083 The day today is ${day.today}. 1084 1085=head2 ANYCASE 1086 1087The L<ANYCASE|Template::Manual::Config#ANYCASE> option can be set 1088to allow directive keywords to be specified in any case. 1089 1090 # with ANYCASE set to 1 1091 [% INCLUDE foobar %] # OK 1092 [% include foobar %] # OK 1093 [% include = 10 %] # ERROR, 'include' is a reserved word 1094 1095=head2 GRAMMAR 1096 1097The L<GRAMMAR|Template::Manual::Config#GRAMMAR> configuration item can be used 1098to specify an alternate grammar for the parser. This allows a modified or 1099entirely new template language to be constructed and used by the Template 1100Toolkit. 1101 1102 use MyOrg::Template::Grammar; 1103 1104 my $parser = Template::Parser->new({ 1105 GRAMMAR = MyOrg::Template::Grammar->new(); 1106 }); 1107 1108By default, an instance of the default L<Template::Grammar> will be 1109created and used automatically if a C<GRAMMAR> item isn't specified. 1110 1111=head2 DEBUG 1112 1113The L<DEBUG|Template::Manual::Config#DEBUG> option can be used to enable 1114various debugging features of the C<Template::Parser> module. 1115 1116 use Template::Constants qw( :debug ); 1117 1118 my $template = Template->new({ 1119 DEBUG => DEBUG_PARSER | DEBUG_DIRS, 1120 }); 1121 1122=head1 AUTHOR 1123 1124Andy Wardley E<lt>abw@wardley.orgE<gt> L<http://wardley.org/> 1125 1126=head1 COPYRIGHT 1127 1128Copyright (C) 1996-2007 Andy Wardley. All Rights Reserved. 1129 1130This module is free software; you can redistribute it and/or 1131modify it under the same terms as Perl itself. 1132 1133The main parsing loop of the C<Template::Parser> module was derived from a 1134standalone parser generated by version 0.16 of the C<Parse::Yapp> module. The 1135following copyright notice appears in the C<Parse::Yapp> documentation. 1136 1137 The Parse::Yapp module and its related modules and shell 1138 scripts are copyright (c) 1998 Francois Desarmenien, 1139 France. All rights reserved. 1140 1141 You may use and distribute them under the terms of either 1142 the GNU General Public License or the Artistic License, as 1143 specified in the Perl README file. 1144 1145=head1 SEE ALSO 1146 1147L<Template>, L<Template::Grammar>, L<Template::Directive> 1148 1149