1#============================================================= -*-Perl-*-
2#
3# Template::Parser
4#
5# DESCRIPTION
6#   This module implements a LALR(1) parser and associated support
7#   methods to parse template documents into the appropriate "compiled"
8#   format.  Much of the parser DFA code (see _parse() method) is based
9#   on Francois Desarmenien's Parse::Yapp module.  Kudos to him.
10#
11# AUTHOR
12#   Andy Wardley <abw@wardley.org>
13#
14# COPYRIGHT
15#   Copyright (C) 1996-2007 Andy Wardley.  All Rights Reserved.
16#
17#   This module is free software; you can redistribute it and/or
18#   modify it under the same terms as Perl itself.
19#
20#   The following copyright notice appears in the Parse::Yapp
21#   documentation.
22#
23#      The Parse::Yapp module and its related modules and shell
24#      scripts are copyright (c) 1998 Francois Desarmenien,
25#      France. All rights reserved.
26#
27#      You may use and distribute them under the terms of either
28#      the GNU General Public License or the Artistic License, as
29#      specified in the Perl README file.
30#
31#============================================================================
32
33package Template::Parser;
34
35use strict;
36use warnings;
37use base 'Template::Base';
38
39use Template::Constants qw( :status :chomp );
40use Template::Directive;
41use Template::Grammar;
42
43# parser state constants
44use constant CONTINUE => 0;
45use constant ACCEPT   => 1;
46use constant ERROR    => 2;
47use constant ABORT    => 3;
48
49our $VERSION = 2.89;
50our $DEBUG   = 0 unless defined $DEBUG;
51our $ERROR   = '';
52
53
54#========================================================================
55#                        -- COMMON TAG STYLES --
56#========================================================================
57
58our $TAG_STYLE   = {
59    'default'   => [ '\[%',    '%\]'    ],
60    'template1' => [ '[\[%]%', '%[\]%]' ],
61    'metatext'  => [ '%%',     '%%'     ],
62    'html'      => [ '<!--',   '-->'    ],
63    'mason'     => [ '<%',     '>'      ],
64    'asp'       => [ '<%',     '%>'     ],
65    'php'       => [ '<\?',    '\?>'    ],
66    'star'      => [ '\[\*',   '\*\]'   ],
67};
68$TAG_STYLE->{ template } = $TAG_STYLE->{ tt2 } = $TAG_STYLE->{ default };
69
70
71our $DEFAULT_STYLE = {
72    START_TAG   => $TAG_STYLE->{ default }->[0],
73    END_TAG     => $TAG_STYLE->{ default }->[1],
74#    TAG_STYLE   => 'default',
75    ANYCASE     => 0,
76    INTERPOLATE => 0,
77    PRE_CHOMP   => 0,
78    POST_CHOMP  => 0,
79    V1DOLLAR    => 0,
80    EVAL_PERL   => 0,
81};
82
83our $QUOTED_ESCAPES = {
84        n => "\n",
85        r => "\r",
86        t => "\t",
87};
88
89# note that '-' must come first so Perl doesn't think it denotes a range
90our $CHOMP_FLAGS  = qr/[-=~+]/;
91
92
93
94#========================================================================
95#                      -----  PUBLIC METHODS -----
96#========================================================================
97
98#------------------------------------------------------------------------
99# new(\%config)
100#
101# Constructor method.
102#------------------------------------------------------------------------
103
104sub new {
105    my $class  = shift;
106    my $config = $_[0] && ref($_[0]) eq 'HASH' ? shift(@_) : { @_ };
107    my ($tagstyle, $debug, $start, $end, $defaults, $grammar, $hash, $key, $udef);
108
109    my $self = bless {
110        START_TAG   => undef,
111        END_TAG     => undef,
112        TAG_STYLE   => 'default',
113        ANYCASE     => 0,
114        INTERPOLATE => 0,
115        PRE_CHOMP   => 0,
116        POST_CHOMP  => 0,
117        V1DOLLAR    => 0,
118        EVAL_PERL   => 0,
119        FILE_INFO   => 1,
120        GRAMMAR     => undef,
121        _ERROR      => '',
122        IN_BLOCK    => [ ],
123        TRACE_VARS  => $config->{ TRACE_VARS },
124        FACTORY     => $config->{ FACTORY } || 'Template::Directive',
125    }, $class;
126
127    # update self with any relevant keys in config
128    foreach $key (keys %$self) {
129        $self->{ $key } = $config->{ $key } if defined $config->{ $key };
130    }
131    $self->{ FILEINFO } = [ ];
132
133    # DEBUG config item can be a bitmask
134    if (defined ($debug = $config->{ DEBUG })) {
135        $self->{ DEBUG } = $debug & ( Template::Constants::DEBUG_PARSER
136                                    | Template::Constants::DEBUG_FLAGS );
137        $self->{ DEBUG_DIRS } = $debug & Template::Constants::DEBUG_DIRS;
138    }
139    # package variable can be set to 1 to support previous behaviour
140    elsif ($DEBUG == 1) {
141        $self->{ DEBUG } = Template::Constants::DEBUG_PARSER;
142        $self->{ DEBUG_DIRS } = 0;
143    }
144    # otherwise let $DEBUG be a bitmask
145    else {
146        $self->{ DEBUG } = $DEBUG & ( Template::Constants::DEBUG_PARSER
147                                    | Template::Constants::DEBUG_FLAGS );
148        $self->{ DEBUG_DIRS } = $DEBUG & Template::Constants::DEBUG_DIRS;
149    }
150
151    $grammar = $self->{ GRAMMAR } ||= do {
152        require Template::Grammar;
153        Template::Grammar->new();
154    };
155
156    # instantiate a FACTORY object
157    unless (ref $self->{ FACTORY }) {
158        my $fclass = $self->{ FACTORY };
159        $self->{ FACTORY } = $self->{ FACTORY }->new(
160             NAMESPACE => $config->{ NAMESPACE }
161        )
162        || return $class->error($self->{ FACTORY }->error());
163    }
164
165    # load grammar rules, states and lex table
166    @$self{ qw( LEXTABLE STATES RULES ) }
167        = @$grammar{ qw( LEXTABLE STATES RULES ) };
168
169    $self->new_style($config)
170        || return $class->error($self->error());
171
172    return $self;
173}
174
175#-----------------------------------------------------------------------
176# These methods are used to track nested IF and WHILE blocks.  Each
177# generated if/while block is given a label indicating the directive
178# type and nesting depth, e.g. FOR0, WHILE1, FOR2, WHILE3, etc.  The
179# NEXT and LAST directives use the innermost label, e.g. last WHILE3;
180#-----------------------------------------------------------------------
181
182sub enter_block {
183    my ($self, $name) = @_;
184    my $blocks = $self->{ IN_BLOCK };
185    push(@{ $self->{ IN_BLOCK } }, $name);
186}
187
188sub leave_block {
189    my $self = shift;
190    my $label = $self->block_label;
191    pop(@{ $self->{ IN_BLOCK } });
192    return $label;
193}
194
195sub in_block {
196    my ($self, $name) = @_;
197    my $blocks = $self->{ IN_BLOCK };
198    return @$blocks && $blocks->[-1] eq $name;
199}
200
201sub block_label {
202    my ($self, $prefix, $suffix) = @_;
203    my $blocks = $self->{ IN_BLOCK };
204    my $name   = @$blocks
205        ? $blocks->[-1] . scalar @$blocks
206        : undef;
207    return join('', grep { defined $_ } $prefix, $name, $suffix);
208}
209
210
211
212#------------------------------------------------------------------------
213# new_style(\%config)
214#
215# Install a new (stacked) parser style.  This feature is currently
216# experimental but should mimic the previous behaviour with regard to
217# TAG_STYLE, START_TAG, END_TAG, etc.
218#------------------------------------------------------------------------
219
220sub new_style {
221    my ($self, $config) = @_;
222    my $styles = $self->{ STYLE } ||= [ ];
223    my ($tagstyle, $tags, $start, $end, $key);
224
225    # clone new style from previous or default style
226    my $style  = { %{ $styles->[-1] || $DEFAULT_STYLE } };
227
228    # expand START_TAG and END_TAG from specified TAG_STYLE
229    if ($tagstyle = $config->{ TAG_STYLE }) {
230        return $self->error("Invalid tag style: $tagstyle")
231            unless defined ($tags = $TAG_STYLE->{ $tagstyle });
232        ($start, $end) = @$tags;
233        $config->{ START_TAG } ||= $start;
234        $config->{   END_TAG } ||= $end;
235    }
236
237    foreach $key (keys %$DEFAULT_STYLE) {
238        $style->{ $key } = $config->{ $key } if defined $config->{ $key };
239    }
240    push(@$styles, $style);
241    return $style;
242}
243
244
245#------------------------------------------------------------------------
246# old_style()
247#
248# Pop the current parser style and revert to the previous one.  See
249# new_style().   ** experimental **
250#------------------------------------------------------------------------
251
252sub old_style {
253    my $self = shift;
254    my $styles = $self->{ STYLE };
255    return $self->error('only 1 parser style remaining')
256        unless (@$styles > 1);
257    pop @$styles;
258    return $styles->[-1];
259}
260
261
262#------------------------------------------------------------------------
263# parse($text, $data)
264#
265# Parses the text string, $text and returns a hash array representing
266# the compiled template block(s) as Perl code, in the format expected
267# by Template::Document.
268#------------------------------------------------------------------------
269
270sub parse {
271    my ($self, $text, $info) = @_;
272    my ($tokens, $block);
273
274    $info->{ DEBUG } = $self->{ DEBUG_DIRS }
275        unless defined $info->{ DEBUG };
276
277#    print "info: { ", join(', ', map { "$_ => $info->{ $_ }" } keys %$info), " }\n";
278
279    # store for blocks defined in the template (see define_block())
280    my $defblock  = $self->{ DEFBLOCK  } = { };
281    my $metadata  = $self->{ METADATA  } = [ ];
282    my $variables = $self->{ VARIABLES } = { };
283    $self->{ DEFBLOCKS } = [ ];
284
285    $self->{ _ERROR } = '';
286
287    # split file into TEXT/DIRECTIVE chunks
288    $tokens = $self->split_text($text)
289        || return undef;                                    ## RETURN ##
290
291    push(@{ $self->{ FILEINFO } }, $info);
292
293    # parse chunks
294    $block = $self->_parse($tokens, $info);
295
296    pop(@{ $self->{ FILEINFO } });
297
298    return undef unless $block;                             ## RETURN ##
299
300    $self->debug("compiled main template document block:\n$block")
301        if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER;
302
303    return {
304        BLOCK     => $block,
305        DEFBLOCKS => $defblock,
306        VARIABLES => $variables,
307        METADATA  => { @$metadata },
308    };
309}
310
311
312
313#------------------------------------------------------------------------
314# split_text($text)
315#
316# Split input template text into directives and raw text chunks.
317#------------------------------------------------------------------------
318
319sub split_text {
320    my ($self, $text) = @_;
321    my ($pre, $dir, $prelines, $dirlines, $postlines, $chomp, $tags, @tags);
322    my $style = $self->{ STYLE }->[-1];
323    my ($start, $end, $prechomp, $postchomp, $interp ) =
324        @$style{ qw( START_TAG END_TAG PRE_CHOMP POST_CHOMP INTERPOLATE ) };
325    my $tags_dir = $self->{ANYCASE} ? qr<TAGS>i : qr<TAGS>;
326
327    my @tokens = ();
328    my $line = 1;
329
330    return \@tokens                                         ## RETURN ##
331        unless defined $text && length $text;
332
333    # extract all directives from the text
334    while ($text =~ s/
335           ^(.*?)               # $1 - start of line up to directive
336           (?:
337            $start          # start of tag
338            (.*?)           # $2 - tag contents
339            $end            # end of tag
340            )
341           //sx) {
342
343        ($pre, $dir) = ($1, $2);
344        $pre = '' unless defined $pre;
345        $dir = '' unless defined $dir;
346
347        $prelines  = ($pre =~ tr/\n//);  # newlines in preceding text
348        $dirlines  = ($dir =~ tr/\n//);  # newlines in directive tag
349        $postlines = 0;                  # newlines chomped after tag
350
351        for ($dir) {
352            if (/^\#/) {
353                # comment out entire directive except for any end chomp flag
354                $dir = ($dir =~ /($CHOMP_FLAGS)$/o) ? $1 : '';
355            }
356            else {
357
358                if(s/^($CHOMP_FLAGS)?(\s*)//so && $2) {
359                  my $chomped = $2;
360                  my $linecount = ($chomped =~ tr/\n//); # newlines in chomped whitespace
361                  $linecount ||= 0;
362                  $prelines += $linecount;
363                  $dirlines -= $linecount;
364                }
365                # PRE_CHOMP: process whitespace before tag
366                $chomp = $1 ? $1 : $prechomp;
367                $chomp =~ tr/-=~+/1230/;
368                if ($chomp && $pre) {
369                    # chomp off whitespace and newline preceding directive
370                    if ($chomp == CHOMP_ALL) {
371                        $pre =~ s{ (\r?\n|^) [^\S\n]* \z }{}mx;
372                    }
373                    elsif ($chomp == CHOMP_COLLAPSE) {
374                        $pre =~ s{ (\s+) \z }{ }x;
375                    }
376                    elsif ($chomp == CHOMP_GREEDY) {
377                        $pre =~ s{ (\s+) \z }{}x;
378                    }
379                }
380            }
381
382            # POST_CHOMP: process whitespace after tag
383            s/\s*($CHOMP_FLAGS)?\s*$//so;
384            $chomp = $1 ? $1 : $postchomp;
385            $chomp =~ tr/-=~+/1230/;
386            if ($chomp) {
387                if ($chomp == CHOMP_ALL) {
388                    $text =~ s{ ^ ([^\S\n]* \n) }{}x
389                        && $postlines++;
390                }
391                elsif ($chomp == CHOMP_COLLAPSE) {
392                    $text =~ s{ ^ (\s+) }{ }x
393                        && ($postlines += $1=~y/\n//);
394                }
395                # any trailing whitespace
396                elsif ($chomp == CHOMP_GREEDY) {
397                    $text =~ s{ ^ (\s+) }{}x
398                        && ($postlines += $1=~y/\n//);
399                }
400            }
401        }
402
403        # any text preceding the directive can now be added
404        if (length $pre) {
405            push(@tokens, $interp
406                 ? [ $pre, $line, 'ITEXT' ]
407                 : ('TEXT', $pre) );
408        }
409        $line += $prelines;
410
411        # and now the directive, along with line number information
412        if (length $dir) {
413            # the TAGS directive is a compile-time switch
414            if ($dir =~ /^$tags_dir\s+(.*)/) {
415                my @tags = split(/\s+/, $1);
416                if (scalar @tags > 1) {
417                    ($start, $end) = map { quotemeta($_) } @tags;
418                }
419                elsif ($tags = $TAG_STYLE->{ $tags[0] }) {
420                    ($start, $end) = @$tags;
421                }
422                else {
423                    warn "invalid TAGS style: $tags[0]\n";
424                }
425            }
426            else {
427                # DIRECTIVE is pushed as:
428                #   [ $dirtext, $line_no(s), \@tokens ]
429                push(@tokens,
430                     [ $dir,
431                       ($dirlines
432                        ? sprintf("%d-%d", $line, $line + $dirlines)
433                        : $line),
434                       $self->tokenise_directive($dir) ]);
435            }
436        }
437
438        # update line counter to include directive lines and any extra
439        # newline chomped off the start of the following text
440        $line += $dirlines + $postlines;
441    }
442
443    # anything remaining in the string is plain text
444    push(@tokens, $interp
445         ? [ $text, $line, 'ITEXT' ]
446         : ( 'TEXT', $text) )
447        if length $text;
448
449    return \@tokens;                                        ## RETURN ##
450}
451
452
453
454#------------------------------------------------------------------------
455# interpolate_text($text, $line)
456#
457# Examines $text looking for any variable references embedded like
458# $this or like ${ this }.
459#------------------------------------------------------------------------
460
461sub interpolate_text {
462    my ($self, $text, $line) = @_;
463    my @tokens  = ();
464    my ($pre, $var, $dir);
465
466
467   while ($text =~
468           /
469           ( (?: \\. | [^\$] ){1,3000} ) # escaped or non-'$' character [$1]
470           |
471           ( \$ (?:                 # embedded variable            [$2]
472             (?: \{ ([^\}]*) \} )   # ${ ... }                     [$3]
473             |
474             ([\w\.]+)              # $word                        [$4]
475             )
476           )
477        /gx) {
478
479        ($pre, $var, $dir) = ($1, $3 || $4, $2);
480
481        # preceding text
482        if (defined($pre) && length($pre)) {
483            $line += $pre =~ tr/\n//;
484            $pre =~ s/\\\$/\$/g;
485            push(@tokens, 'TEXT', $pre);
486        }
487        # $variable reference
488        if ($var) {
489            $line += $dir =~ tr/\n/ /;
490            push(@tokens, [ $dir, $line, $self->tokenise_directive($var) ]);
491        }
492        # other '$' reference - treated as text
493        elsif ($dir) {
494            $line += $dir =~ tr/\n//;
495            push(@tokens, 'TEXT', $dir);
496        }
497    }
498
499    return \@tokens;
500}
501
502
503
504#------------------------------------------------------------------------
505# tokenise_directive($text)
506#
507# Called by the private _parse() method when it encounters a DIRECTIVE
508# token in the list provided by the split_text() or interpolate_text()
509# methods.  The directive text is passed by parameter.
510#
511# The method splits the directive into individual tokens as recognised
512# by the parser grammar (see Template::Grammar for details).  It
513# constructs a list of tokens each represented by 2 elements, as per
514# split_text() et al.  The first element contains the token type, the
515# second the token itself.
516#
517# The method tokenises the string using a complex (but fast) regex.
518# For a deeper understanding of the regex magic at work here, see
519# Jeffrey Friedl's excellent book "Mastering Regular Expressions",
520# from O'Reilly, ISBN 1-56592-257-3
521#
522# Returns a reference to the list of chunks (each one being 2 elements)
523# identified in the directive text.  On error, the internal _ERROR string
524# is set and undef is returned.
525#------------------------------------------------------------------------
526
527sub tokenise_directive {
528    my ($self, $text, $line) = @_;
529    my ($token, $uctoken, $type, $lookup);
530    my $lextable = $self->{ LEXTABLE };
531    my $style    = $self->{ STYLE }->[-1];
532    my ($anycase, $start, $end) = @$style{ qw( ANYCASE START_TAG END_TAG ) };
533    my @tokens = ( );
534
535    while ($text =~
536            /
537                # strip out any comments
538                (\#[^\n]*)
539           |
540                # a quoted phrase matches in $3
541                (["'])                   # $2 - opening quote, ' or "
542                (                        # $3 - quoted text buffer
543                    (?:                  # repeat group (no backreference)
544                        \\\\             # an escaped backslash \\
545                    |                    # ...or...
546                        \\\2             # an escaped quote \" or \' (match $1)
547                    |                    # ...or...
548                        .                # any other character
549                    |   \n
550                    )*?                  # non-greedy repeat
551                )                        # end of $3
552                \2                       # match opening quote
553            |
554                # an unquoted number matches in $4
555                (-?\d+(?:\.\d+)?)       # numbers
556            |
557                # filename matches in $5
558                ( \/?\w+(?:(?:\/|::?)\w*)+ | \/\w+)
559            |
560                # an identifier matches in $6
561                (\w+)                    # variable identifier
562            |
563                # an unquoted word or symbol matches in $7
564                (   [(){}\[\]:;,\/\\]    # misc parenthesis and symbols
565#               |   \->                  # arrow operator (for future?)
566                |   [+\-*]               # math operations
567                |   \$\{?                # dollar with option left brace
568                |   =>                   # like '='
569                |   [=!<>]?= | [!<>]     # eqality tests
570                |   &&? | \|\|?          # boolean ops
571                |   \.\.?                # n..n sequence
572                |   \S+                  # something unquoted
573                )                        # end of $7
574            /gmxo) {
575
576        # ignore comments to EOL
577        next if $1;
578
579        # quoted string
580        if (defined ($token = $3)) {
581            # double-quoted string may include $variable references
582            if ($2 eq '"') {
583                if ($token =~ /[\$\\]/) {
584                    $type = 'QUOTED';
585                    # unescape " and \ but leave \$ escaped so that
586                        # interpolate_text() doesn't incorrectly treat it
587                    # as a variable reference
588#                   $token =~ s/\\([\\"])/$1/g;
589                        for ($token) {
590                                s/\\([^\$nrt])/$1/g;
591                                s/\\([nrt])/$QUOTED_ESCAPES->{ $1 }/ge;
592                        }
593                    push(@tokens, ('"') x 2,
594                                  @{ $self->interpolate_text($token) },
595                                  ('"') x 2);
596                    next;
597                }
598                else {
599                    $type = 'LITERAL';
600                    $token =~ s['][\\']g;
601                    $token = "'$token'";
602                }
603            }
604            else {
605                $type = 'LITERAL';
606                $token = "'$token'";
607            }
608        }
609        # number
610        elsif (defined ($token = $4)) {
611            $type = 'NUMBER';
612        }
613        elsif (defined($token = $5)) {
614            $type = 'FILENAME';
615        }
616        elsif (defined($token = $6)) {
617            # Fold potential keywords to UPPER CASE if the ANYCASE option is
618            # set, unless (we've got some preceding tokens and) the previous
619            # token is a DOT op.  This prevents the 'last' in 'data.last'
620            # from being interpreted as the LAST keyword.
621            $uctoken =
622                ($anycase && (! @tokens || $tokens[-2] ne 'DOT'))
623                    ? uc $token
624                    :    $token;
625            if (defined ($type = $lextable->{ $uctoken })) {
626                $token = $uctoken;
627            }
628            else {
629                $type = 'IDENT';
630            }
631        }
632        elsif (defined ($token = $7)) {
633            # reserved words may be in lower case unless case sensitive
634            $uctoken = $anycase ? uc $token : $token;
635            unless (defined ($type = $lextable->{ $uctoken })) {
636                $type = 'UNQUOTED';
637            }
638        }
639
640        push(@tokens, $type, $token);
641
642#       print(STDERR " +[ $type, $token ]\n")
643#           if $DEBUG;
644    }
645
646#    print STDERR "tokenise directive() returning:\n  [ @tokens ]\n"
647#       if $DEBUG;
648
649    return \@tokens;                                        ## RETURN ##
650}
651
652
653#------------------------------------------------------------------------
654# define_block($name, $block)
655#
656# Called by the parser 'defblock' rule when a BLOCK definition is
657# encountered in the template.  The name of the block is passed in the
658# first parameter and a reference to the compiled block is passed in
659# the second.  This method stores the block in the $self->{ DEFBLOCK }
660# hash which has been initialised by parse() and will later be used
661# by the same method to call the store() method on the calling cache
662# to define the block "externally".
663#------------------------------------------------------------------------
664
665sub define_block {
666    my ($self, $name, $block) = @_;
667    my $defblock = $self->{ DEFBLOCK }
668        || return undef;
669
670    $self->debug("compiled block '$name':\n$block")
671        if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER;
672
673    $defblock->{ $name } = $block;
674
675    return undef;
676}
677
678sub push_defblock {
679    my $self = shift;
680    my $stack = $self->{ DEFBLOCK_STACK } ||= [];
681    push(@$stack, $self->{ DEFBLOCK } );
682    $self->{ DEFBLOCK } = { };
683}
684
685sub pop_defblock {
686    my $self  = shift;
687    my $defs  = $self->{ DEFBLOCK };
688    my $stack = $self->{ DEFBLOCK_STACK } || return $defs;
689    return $defs unless @$stack;
690    $self->{ DEFBLOCK } = pop @$stack;
691    return $defs;
692}
693
694
695#------------------------------------------------------------------------
696# add_metadata(\@setlist)
697#------------------------------------------------------------------------
698
699sub add_metadata {
700    my ($self, $setlist) = @_;
701    my $metadata = $self->{ METADATA }
702        || return undef;
703
704    push(@$metadata, @$setlist);
705
706    return undef;
707}
708
709
710#------------------------------------------------------------------------
711# location()
712#
713# Return Perl comment indicating current parser file and line
714#------------------------------------------------------------------------
715
716sub location {
717    my $self = shift;
718    return "\n" unless $self->{ FILE_INFO };
719    my $line = ${ $self->{ LINE } };
720    my $info = $self->{ FILEINFO }->[-1];
721    my $file = $info->{ path } || $info->{ name }
722        || '(unknown template)';
723    $line =~ s/\-.*$//; # might be 'n-n'
724    $line ||= 1;
725    return "#line $line \"$file\"\n";
726}
727
728
729#========================================================================
730#                     -----  PRIVATE METHODS -----
731#========================================================================
732
733#------------------------------------------------------------------------
734# _parse(\@tokens, \@info)
735#
736# Parses the list of input tokens passed by reference and returns a
737# Template::Directive::Block object which contains the compiled
738# representation of the template.
739#
740# This is the main parser DFA loop.  See embedded comments for
741# further details.
742#
743# On error, undef is returned and the internal _ERROR field is set to
744# indicate the error.  This can be retrieved by calling the error()
745# method.
746#------------------------------------------------------------------------
747
748sub _parse {
749    my ($self, $tokens, $info) = @_;
750    my ($token, $value, $text, $line, $inperl);
751    my ($state, $stateno, $status, $action, $lookup, $coderet, @codevars);
752    my ($lhs, $len, $code);         # rule contents
753    my $stack = [ [ 0, undef ] ];   # DFA stack
754
755# DEBUG
756#   local $" = ', ';
757
758    # retrieve internal rule and state tables
759    my ($states, $rules) = @$self{ qw( STATES RULES ) };
760
761    # If we're tracing variable usage then we need to give the factory a
762    # reference to our $self->{ VARIABLES } for it to fill in.  This is a
763    # bit of a hack to back-patch this functionality into TT2.
764    $self->{ FACTORY }->trace_vars($self->{ VARIABLES })
765        if $self->{ TRACE_VARS };
766
767    # call the grammar set_factory method to install emitter factory
768    $self->{ GRAMMAR }->install_factory($self->{ FACTORY });
769
770    $line = $inperl = 0;
771    $self->{ LINE   } = \$line;
772    $self->{ FILE   } = $info->{ name };
773    $self->{ INPERL } = \$inperl;
774
775    $status = CONTINUE;
776    my $in_string = 0;
777
778    while(1) {
779        # get state number and state
780        $stateno =  $stack->[-1]->[0];
781        $state   = $states->[$stateno];
782
783        # see if any lookaheads exist for the current state
784        if (exists $state->{'ACTIONS'}) {
785
786            # get next token and expand any directives (i.e. token is an
787            # array ref) onto the front of the token list
788            while (! defined $token && @$tokens) {
789                $token = shift(@$tokens);
790                if (ref $token) {
791                    ($text, $line, $token) = @$token;
792                    if (ref $token) {
793                        if ($info->{ DEBUG } && ! $in_string) {
794                            # - - - - - - - - - - - - - - - - - - - - - - - - -
795                            # This is gnarly.  Look away now if you're easily
796                            # frightened.  We're pushing parse tokens onto the
797                            # pending list to simulate a DEBUG directive like so:
798                            # [% DEBUG msg line='20' text='INCLUDE foo' %]
799                            # - - - - - - - - - - - - - - - - - - - - - - - - -
800                            my $dtext = $text;
801                            $dtext =~ s[(['\\])][\\$1]g;
802                            unshift(@$tokens,
803                                    DEBUG   => 'DEBUG',
804                                    IDENT   => 'msg',
805                                    IDENT   => 'line',
806                                    ASSIGN  => '=',
807                                    LITERAL => "'$line'",
808                                    IDENT   => 'text',
809                                    ASSIGN  => '=',
810                                    LITERAL => "'$dtext'",
811                                    IDENT   => 'file',
812                                    ASSIGN  => '=',
813                                    LITERAL => "'$info->{ name }'",
814                                    (';') x 2,
815                                    @$token,
816                                    (';') x 2);
817                        }
818                        else {
819                            unshift(@$tokens, @$token, (';') x 2);
820                        }
821                        $token = undef;  # force redo
822                    }
823                    elsif ($token eq 'ITEXT') {
824                        if ($inperl) {
825                            # don't perform interpolation in PERL blocks
826                            $token = 'TEXT';
827                            $value = $text;
828                        }
829                        else {
830                            unshift(@$tokens,
831                                    @{ $self->interpolate_text($text, $line) });
832                            $token = undef; # force redo
833                        }
834                    }
835                }
836                else {
837                    # toggle string flag to indicate if we're crossing
838                    # a string boundary
839                    $in_string = ! $in_string if $token eq '"';
840                    $value = shift(@$tokens);
841                }
842            };
843            # clear undefined token to avoid 'undefined variable blah blah'
844            # warnings and let the parser logic pick it up in a minute
845            $token = '' unless defined $token;
846
847            # get the next state for the current lookahead token
848            $action = defined ($lookup = $state->{'ACTIONS'}->{ $token })
849                      ? $lookup
850                      : defined ($lookup = $state->{'DEFAULT'})
851                        ? $lookup
852                        : undef;
853        }
854        else {
855            # no lookahead actions
856            $action = $state->{'DEFAULT'};
857        }
858
859        # ERROR: no ACTION
860        last unless defined $action;
861
862        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
863        # shift (+ive ACTION)
864        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
865        if ($action > 0) {
866            push(@$stack, [ $action, $value ]);
867            $token = $value = undef;
868            redo;
869        };
870
871        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
872        # reduce (-ive ACTION)
873        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
874        ($lhs, $len, $code) = @{ $rules->[ -$action ] };
875
876        # no action imples ACCEPTance
877        $action
878            or $status = ACCEPT;
879
880        # use dummy sub if code ref doesn't exist
881        $code = sub { $_[1] }
882            unless $code;
883
884        @codevars = $len
885                ?   map { $_->[1] } @$stack[ -$len .. -1 ]
886                :   ();
887
888        eval {
889            $coderet = &$code( $self, @codevars );
890        };
891        if ($@) {
892            my $err = $@;
893            chomp $err;
894            return $self->_parse_error($err);
895        }
896
897        # reduce stack by $len
898        splice(@$stack, -$len, $len);
899
900        # ACCEPT
901        return $coderet                                     ## RETURN ##
902            if $status == ACCEPT;
903
904        # ABORT
905        return undef                                        ## RETURN ##
906            if $status == ABORT;
907
908        # ERROR
909        last
910            if $status == ERROR;
911    }
912    continue {
913        push(@$stack, [ $states->[ $stack->[-1][0] ]->{'GOTOS'}->{ $lhs },
914              $coderet ]),
915    }
916
917    # ERROR                                                 ## RETURN ##
918    return $self->_parse_error('unexpected end of input')
919        unless defined $value;
920
921    # munge text of last directive to make it readable
922#    $text =~ s/\n/\\n/g;
923
924    return $self->_parse_error("unexpected end of directive", $text)
925        if $value eq ';';   # end of directive SEPARATOR
926
927    return $self->_parse_error("unexpected token ($value)", $text);
928}
929
930
931
932#------------------------------------------------------------------------
933# _parse_error($msg, $dirtext)
934#
935# Method used to handle errors encountered during the parse process
936# in the _parse() method.
937#------------------------------------------------------------------------
938
939sub _parse_error {
940    my ($self, $msg, $text) = @_;
941    my $line = $self->{ LINE };
942    $line = ref($line) ? $$line : $line;
943    $line = 'unknown' unless $line;
944
945    $msg .= "\n  [% $text %]"
946        if defined $text;
947
948    return $self->error("line $line: $msg");
949}
950
951
952#------------------------------------------------------------------------
953# _dump()
954#
955# Debug method returns a string representing the internal state of the
956# object.
957#------------------------------------------------------------------------
958
959sub _dump {
960    my $self = shift;
961    my $output = "[Template::Parser] {\n";
962    my $format = "    %-16s => %s\n";
963    my $key;
964
965    foreach $key (qw( START_TAG END_TAG TAG_STYLE ANYCASE INTERPOLATE
966                      PRE_CHOMP POST_CHOMP V1DOLLAR )) {
967        my $val = $self->{ $key };
968        $val = '<undef>' unless defined $val;
969        $output .= sprintf($format, $key, $val);
970    }
971
972    $output .= '}';
973    return $output;
974}
975
976
9771;
978
979__END__
980
981=head1 NAME
982
983Template::Parser - LALR(1) parser for compiling template documents
984
985=head1 SYNOPSIS
986
987    use Template::Parser;
988
989    $parser   = Template::Parser->new(\%config);
990    $template = $parser->parse($text)
991        || die $parser->error(), "\n";
992
993=head1 DESCRIPTION
994
995The C<Template::Parser> module implements a LALR(1) parser and associated
996methods for parsing template documents into Perl code.
997
998=head1 PUBLIC METHODS
999
1000=head2 new(\%params)
1001
1002The C<new()> constructor creates and returns a reference to a new
1003C<Template::Parser> object.
1004
1005A reference to a hash may be supplied as a parameter to provide configuration values.
1006See L<CONFIGURATION OPTIONS> below for a summary of these options and
1007L<Template::Manual::Config> for full details.
1008
1009    my $parser = Template::Parser->new({
1010        START_TAG => quotemeta('<+'),
1011        END_TAG   => quotemeta('+>'),
1012    });
1013
1014=head2 parse($text)
1015
1016The C<parse()> method parses the text passed in the first parameter and
1017returns a reference to a hash array of data defining the compiled
1018representation of the template text, suitable for passing to the
1019L<Template::Document> L<new()|Template::Document#new()> constructor method. On
1020error, undef is returned.
1021
1022    $data = $parser->parse($text)
1023        || die $parser->error();
1024
1025The C<$data> hash reference returned contains a C<BLOCK> item containing the
1026compiled Perl code for the template, a C<DEFBLOCKS> item containing a
1027reference to a hash array of sub-template C<BLOCK>s defined within in the
1028template, and a C<METADATA> item containing a reference to a hash array
1029of metadata values defined in C<META> tags.
1030
1031=head1 CONFIGURATION OPTIONS
1032
1033The C<Template::Parser> module accepts the following configuration
1034options.  Please see L<Template::Manual::Config> for further details
1035on each option.
1036
1037=head2 START_TAG, END_TAG
1038
1039The L<START_TAG|Template::Manual::Config#START_TAG_END_TAG> and
1040L<END_TAG|Template::Manual::Config#START_TAG_END_TAG> options are used to
1041specify character sequences or regular expressions that mark the start and end
1042of a template directive.
1043
1044    my $parser = Template::Parser->new({
1045        START_TAG => quotemeta('<+'),
1046        END_TAG   => quotemeta('+>'),
1047    });
1048
1049=head2 TAG_STYLE
1050
1051The L<TAG_STYLE|Template::Manual::Config#TAG_STYLE> option can be used to set
1052both L<START_TAG> and L<END_TAG> according to pre-defined tag styles.
1053
1054    my $parser = Template::Parser->new({
1055        TAG_STYLE => 'star',     # [* ... *]
1056    });
1057
1058=head2 PRE_CHOMP, POST_CHOMP
1059
1060The L<PRE_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> and
1061L<POST_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> can be set to remove
1062any whitespace before or after a directive tag, respectively.
1063
1064    my $parser = Template::Parser-E<gt>new({
1065        PRE_CHOMP  => 1,
1066        POST_CHOMP => 1,
1067    });
1068
1069=head2 INTERPOLATE
1070
1071The L<INTERPOLATE|Template::Manual::Config#INTERPOLATE> flag can be set
1072to allow variables to be embedded in plain text blocks.
1073
1074    my $parser = Template::Parser->new({
1075        INTERPOLATE => 1,
1076    });
1077
1078Variables should be prefixed by a C<$> to identify them, using curly braces
1079to explicitly scope the variable name where necessary.
1080
1081    Hello ${name},
1082
1083    The day today is ${day.today}.
1084
1085=head2 ANYCASE
1086
1087The L<ANYCASE|Template::Manual::Config#ANYCASE> option can be set
1088to allow directive keywords to be specified in any case.
1089
1090    # with ANYCASE set to 1
1091    [% INCLUDE foobar %]    # OK
1092    [% include foobar %]    # OK
1093    [% include = 10   %]    # ERROR, 'include' is a reserved word
1094
1095=head2 GRAMMAR
1096
1097The L<GRAMMAR|Template::Manual::Config#GRAMMAR> configuration item can be used
1098to specify an alternate grammar for the parser. This allows a modified or
1099entirely new template language to be constructed and used by the Template
1100Toolkit.
1101
1102    use MyOrg::Template::Grammar;
1103
1104    my $parser = Template::Parser->new({
1105        GRAMMAR = MyOrg::Template::Grammar->new();
1106    });
1107
1108By default, an instance of the default L<Template::Grammar> will be
1109created and used automatically if a C<GRAMMAR> item isn't specified.
1110
1111=head2 DEBUG
1112
1113The L<DEBUG|Template::Manual::Config#DEBUG> option can be used to enable
1114various debugging features of the C<Template::Parser> module.
1115
1116    use Template::Constants qw( :debug );
1117
1118    my $template = Template->new({
1119        DEBUG => DEBUG_PARSER | DEBUG_DIRS,
1120    });
1121
1122=head1 AUTHOR
1123
1124Andy Wardley E<lt>abw@wardley.orgE<gt> L<http://wardley.org/>
1125
1126=head1 COPYRIGHT
1127
1128Copyright (C) 1996-2007 Andy Wardley.  All Rights Reserved.
1129
1130This module is free software; you can redistribute it and/or
1131modify it under the same terms as Perl itself.
1132
1133The main parsing loop of the C<Template::Parser> module was derived from a
1134standalone parser generated by version 0.16 of the C<Parse::Yapp> module. The
1135following copyright notice appears in the C<Parse::Yapp> documentation.
1136
1137    The Parse::Yapp module and its related modules and shell
1138    scripts are copyright (c) 1998 Francois Desarmenien,
1139    France. All rights reserved.
1140
1141    You may use and distribute them under the terms of either
1142    the GNU General Public License or the Artistic License, as
1143    specified in the Perl README file.
1144
1145=head1 SEE ALSO
1146
1147L<Template>, L<Template::Grammar>, L<Template::Directive>
1148
1149