1#============================================================= -*-Perl-*-
2#
3# Template::Parser
4#
5# DESCRIPTION
6#   This module implements a LALR(1) parser and assocated support
7#   methods to parse template documents into the appropriate "compiled"
8#   format.  Much of the parser DFA code (see _parse() method) is based
9#   on Francois Desarmenien's Parse::Yapp module.  Kudos to him.
10#
11# AUTHOR
12#   Andy Wardley <abw@wardley.org>
13#
14# COPYRIGHT
15#   Copyright (C) 1996-2007 Andy Wardley.  All Rights Reserved.
16#
17#   This module is free software; you can redistribute it and/or
18#   modify it under the same terms as Perl itself.
19#
20#   The following copyright notice appears in the Parse::Yapp
21#   documentation.
22#
23#      The Parse::Yapp module and its related modules and shell
24#      scripts are copyright (c) 1998 Francois Desarmenien,
25#      France. All rights reserved.
26#
27#      You may use and distribute them under the terms of either
28#      the GNU General Public License or the Artistic License, as
29#      specified in the Perl README file.
30#
31#============================================================================
32
33package Template::Parser;
34
35use strict;
36use warnings;
37use base 'Template::Base';
38
39use Template::Constants qw( :status :chomp );
40use Template::Directive;
41use Template::Grammar;
42
43# parser state constants
44use constant CONTINUE => 0;
45use constant ACCEPT   => 1;
46use constant ERROR    => 2;
47use constant ABORT    => 3;
48
49our $VERSION = 2.89;
50our $DEBUG   = 0 unless defined $DEBUG;
51our $ERROR   = '';
52
53
54#========================================================================
55#                        -- COMMON TAG STYLES --
56#========================================================================
57
58our $TAG_STYLE   = {
59    'default'   => [ '\[%',    '%\]'    ],
60    'template1' => [ '[\[%]%', '%[\]%]' ],
61    'metatext'  => [ '%%',     '%%'     ],
62    'html'      => [ '<!--',   '-->'    ],
63    'mason'     => [ '<%',     '>'      ],
64    'asp'       => [ '<%',     '%>'     ],
65    'php'       => [ '<\?',    '\?>'    ],
66    'star'      => [ '\[\*',   '\*\]'   ],
67};
68$TAG_STYLE->{ template } = $TAG_STYLE->{ tt2 } = $TAG_STYLE->{ default };
69
70
71our $DEFAULT_STYLE = {
72    START_TAG   => $TAG_STYLE->{ default }->[0],
73    END_TAG     => $TAG_STYLE->{ default }->[1],
74#    TAG_STYLE   => 'default',
75    ANYCASE     => 0,
76    INTERPOLATE => 0,
77    PRE_CHOMP   => 0,
78    POST_CHOMP  => 0,
79    V1DOLLAR    => 0,
80    EVAL_PERL   => 0,
81};
82
83our $QUOTED_ESCAPES = {
84        n => "\n",
85        r => "\r",
86        t => "\t",
87};
88
89# note that '-' must come first so Perl doesn't think it denotes a range
90our $CHOMP_FLAGS  = qr/[-=~+]/;
91
92
93
94#========================================================================
95#                      -----  PUBLIC METHODS -----
96#========================================================================
97
98#------------------------------------------------------------------------
99# new(\%config)
100#
101# Constructor method.
102#------------------------------------------------------------------------
103
104sub new {
105    my $class  = shift;
106    my $config = $_[0] && ref($_[0]) eq 'HASH' ? shift(@_) : { @_ };
107    my ($tagstyle, $debug, $start, $end, $defaults, $grammar, $hash, $key, $udef);
108
109    my $self = bless {
110        START_TAG   => undef,
111        END_TAG     => undef,
112        TAG_STYLE   => 'default',
113        ANYCASE     => 0,
114        INTERPOLATE => 0,
115        PRE_CHOMP   => 0,
116        POST_CHOMP  => 0,
117        V1DOLLAR    => 0,
118        EVAL_PERL   => 0,
119        FILE_INFO   => 1,
120        GRAMMAR     => undef,
121        _ERROR      => '',
122        IN_BLOCK    => [ ],
123        TRACE_VARS  => $config->{ TRACE_VARS },
124        FACTORY     => $config->{ FACTORY } || 'Template::Directive',
125    }, $class;
126
127    # update self with any relevant keys in config
128    foreach $key (keys %$self) {
129        $self->{ $key } = $config->{ $key } if defined $config->{ $key };
130    }
131    $self->{ FILEINFO } = [ ];
132
133    # DEBUG config item can be a bitmask
134    if (defined ($debug = $config->{ DEBUG })) {
135        $self->{ DEBUG } = $debug & ( Template::Constants::DEBUG_PARSER
136                                    | Template::Constants::DEBUG_FLAGS );
137        $self->{ DEBUG_DIRS } = $debug & Template::Constants::DEBUG_DIRS;
138    }
139    # package variable can be set to 1 to support previous behaviour
140    elsif ($DEBUG == 1) {
141        $self->{ DEBUG } = Template::Constants::DEBUG_PARSER;
142        $self->{ DEBUG_DIRS } = 0;
143    }
144    # otherwise let $DEBUG be a bitmask
145    else {
146        $self->{ DEBUG } = $DEBUG & ( Template::Constants::DEBUG_PARSER
147                                    | Template::Constants::DEBUG_FLAGS );
148        $self->{ DEBUG_DIRS } = $DEBUG & Template::Constants::DEBUG_DIRS;
149    }
150
151    $grammar = $self->{ GRAMMAR } ||= do {
152        require Template::Grammar;
153        Template::Grammar->new();
154    };
155
156    # instantiate a FACTORY object
157    unless (ref $self->{ FACTORY }) {
158        my $fclass = $self->{ FACTORY };
159        $self->{ FACTORY } = $self->{ FACTORY }->new(
160             NAMESPACE => $config->{ NAMESPACE }
161        )
162        || return $class->error($self->{ FACTORY }->error());
163    }
164
165    # load grammar rules, states and lex table
166    @$self{ qw( LEXTABLE STATES RULES ) }
167        = @$grammar{ qw( LEXTABLE STATES RULES ) };
168
169    $self->new_style($config)
170        || return $class->error($self->error());
171
172    return $self;
173}
174
175#-----------------------------------------------------------------------
176# These methods are used to track nested IF and WHILE blocks.  Each
177# generated if/while block is given a label indicating the directive
178# type and nesting depth, e.g. FOR0, WHILE1, FOR2, WHILE3, etc.  The
179# NEXT and LAST directives use the innermost label, e.g. last WHILE3;
180#-----------------------------------------------------------------------
181
182sub enter_block {
183    my ($self, $name) = @_;
184    my $blocks = $self->{ IN_BLOCK };
185    push(@{ $self->{ IN_BLOCK } }, $name);
186}
187
188sub leave_block {
189    my $self = shift;
190    my $label = $self->block_label;
191    pop(@{ $self->{ IN_BLOCK } });
192    return $label;
193}
194
195sub in_block {
196    my ($self, $name) = @_;
197    my $blocks = $self->{ IN_BLOCK };
198    return @$blocks && $blocks->[-1] eq $name;
199}
200
201sub block_label {
202    my ($self, $prefix, $suffix) = @_;
203    my $blocks = $self->{ IN_BLOCK };
204    my $name   = @$blocks
205        ? $blocks->[-1] . scalar @$blocks
206        : undef;
207    return join('', grep { defined $_ } $prefix, $name, $suffix);
208}
209
210
211
212#------------------------------------------------------------------------
213# new_style(\%config)
214#
215# Install a new (stacked) parser style.  This feature is currently
216# experimental but should mimic the previous behaviour with regard to
217# TAG_STYLE, START_TAG, END_TAG, etc.
218#------------------------------------------------------------------------
219
220sub new_style {
221    my ($self, $config) = @_;
222    my $styles = $self->{ STYLE } ||= [ ];
223    my ($tagstyle, $tags, $start, $end, $key);
224
225    # clone new style from previous or default style
226    my $style  = { %{ $styles->[-1] || $DEFAULT_STYLE } };
227
228    # expand START_TAG and END_TAG from specified TAG_STYLE
229    if ($tagstyle = $config->{ TAG_STYLE }) {
230        return $self->error("Invalid tag style: $tagstyle")
231            unless defined ($tags = $TAG_STYLE->{ $tagstyle });
232        ($start, $end) = @$tags;
233        $config->{ START_TAG } ||= $start;
234        $config->{   END_TAG } ||= $end;
235    }
236
237    foreach $key (keys %$DEFAULT_STYLE) {
238        $style->{ $key } = $config->{ $key } if defined $config->{ $key };
239    }
240    push(@$styles, $style);
241    return $style;
242}
243
244
245#------------------------------------------------------------------------
246# old_style()
247#
248# Pop the current parser style and revert to the previous one.  See
249# new_style().   ** experimental **
250#------------------------------------------------------------------------
251
252sub old_style {
253    my $self = shift;
254    my $styles = $self->{ STYLE };
255    return $self->error('only 1 parser style remaining')
256        unless (@$styles > 1);
257    pop @$styles;
258    return $styles->[-1];
259}
260
261
262#------------------------------------------------------------------------
263# parse($text, $data)
264#
265# Parses the text string, $text and returns a hash array representing
266# the compiled template block(s) as Perl code, in the format expected
267# by Template::Document.
268#------------------------------------------------------------------------
269
270sub parse {
271    my ($self, $text, $info) = @_;
272    my ($tokens, $block);
273
274    $info->{ DEBUG } = $self->{ DEBUG_DIRS }
275        unless defined $info->{ DEBUG };
276
277#    print "info: { ", join(', ', map { "$_ => $info->{ $_ }" } keys %$info), " }\n";
278
279    # store for blocks defined in the template (see define_block())
280    my $defblock  = $self->{ DEFBLOCK  } = { };
281    my $metadata  = $self->{ METADATA  } = [ ];
282    my $variables = $self->{ VARIABLES } = { };
283    $self->{ DEFBLOCKS } = [ ];
284
285    $self->{ _ERROR } = '';
286
287    # split file into TEXT/DIRECTIVE chunks
288    $tokens = $self->split_text($text)
289        || return undef;                                    ## RETURN ##
290
291    push(@{ $self->{ FILEINFO } }, $info);
292
293    # parse chunks
294    $block = $self->_parse($tokens, $info);
295
296    pop(@{ $self->{ FILEINFO } });
297
298    return undef unless $block;                             ## RETURN ##
299
300    $self->debug("compiled main template document block:\n$block")
301        if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER;
302
303    return {
304        BLOCK     => $block,
305        DEFBLOCKS => $defblock,
306        VARIABLES => $variables,
307        METADATA  => { @$metadata },
308    };
309}
310
311
312
313#------------------------------------------------------------------------
314# split_text($text)
315#
316# Split input template text into directives and raw text chunks.
317#------------------------------------------------------------------------
318
319sub split_text {
320    my ($self, $text) = @_;
321    my ($pre, $dir, $prelines, $dirlines, $postlines, $chomp, $tags, @tags);
322    my $style = $self->{ STYLE }->[-1];
323    my ($start, $end, $prechomp, $postchomp, $interp ) =
324        @$style{ qw( START_TAG END_TAG PRE_CHOMP POST_CHOMP INTERPOLATE ) };
325    my $tags_dir = $self->{ANYCASE} ? qr<TAGS>i : qr<TAGS>;
326
327    my @tokens = ();
328    my $line = 1;
329
330    return \@tokens                                         ## RETURN ##
331        unless defined $text && length $text;
332
333    # extract all directives from the text
334    while ($text =~ s/
335           ^(.*?)               # $1 - start of line up to directive
336           (?:
337            $start          # start of tag
338            (.*?)           # $2 - tag contents
339            $end            # end of tag
340            )
341           //sx) {
342
343        ($pre, $dir) = ($1, $2);
344        $pre = '' unless defined $pre;
345        $dir = '' unless defined $dir;
346
347        $prelines  = ($pre =~ tr/\n//);  # newlines in preceeding text
348        $dirlines  = ($dir =~ tr/\n//);  # newlines in directive tag
349        $postlines = 0;                  # newlines chomped after tag
350
351        for ($dir) {
352            if (/^\#/) {
353                # comment out entire directive except for any end chomp flag
354                $dir = ($dir =~ /($CHOMP_FLAGS)$/o) ? $1 : '';
355            }
356            else {
357                s/^($CHOMP_FLAGS)?\s*//so;
358                # PRE_CHOMP: process whitespace before tag
359                $chomp = $1 ? $1 : $prechomp;
360                $chomp =~ tr/-=~+/1230/;
361                if ($chomp && $pre) {
362                    # chomp off whitespace and newline preceding directive
363                    if ($chomp == CHOMP_ALL) {
364                        $pre =~ s{ (\r?\n|^) [^\S\n]* \z }{}mx;
365                    }
366                    elsif ($chomp == CHOMP_COLLAPSE) {
367                        $pre =~ s{ (\s+) \z }{ }x;
368                    }
369                    elsif ($chomp == CHOMP_GREEDY) {
370                        $pre =~ s{ (\s+) \z }{}x;
371                    }
372                }
373            }
374
375            # POST_CHOMP: process whitespace after tag
376            s/\s*($CHOMP_FLAGS)?\s*$//so;
377            $chomp = $1 ? $1 : $postchomp;
378            $chomp =~ tr/-=~+/1230/;
379            if ($chomp) {
380                if ($chomp == CHOMP_ALL) {
381                    $text =~ s{ ^ ([^\S\n]* \n) }{}x
382                        && $postlines++;
383                }
384                elsif ($chomp == CHOMP_COLLAPSE) {
385                    $text =~ s{ ^ (\s+) }{ }x
386                        && ($postlines += $1=~y/\n//);
387                }
388                # any trailing whitespace
389                elsif ($chomp == CHOMP_GREEDY) {
390                    $text =~ s{ ^ (\s+) }{}x
391                        && ($postlines += $1=~y/\n//);
392                }
393            }
394        }
395
396        # any text preceding the directive can now be added
397        if (length $pre) {
398            push(@tokens, $interp
399                 ? [ $pre, $line, 'ITEXT' ]
400                 : ('TEXT', $pre) );
401        }
402        $line += $prelines;
403
404        # and now the directive, along with line number information
405        if (length $dir) {
406            # the TAGS directive is a compile-time switch
407            if ($dir =~ /^$tags_dir\s+(.*)/) {
408                my @tags = split(/\s+/, $1);
409                if (scalar @tags > 1) {
410                    ($start, $end) = map { quotemeta($_) } @tags;
411                }
412                elsif ($tags = $TAG_STYLE->{ $tags[0] }) {
413                    ($start, $end) = @$tags;
414                }
415                else {
416                    warn "invalid TAGS style: $tags[0]\n";
417                }
418            }
419            else {
420                # DIRECTIVE is pushed as:
421                #   [ $dirtext, $line_no(s), \@tokens ]
422                push(@tokens,
423                     [ $dir,
424                       ($dirlines
425                        ? sprintf("%d-%d", $line, $line + $dirlines)
426                        : $line),
427                       $self->tokenise_directive($dir) ]);
428            }
429        }
430
431        # update line counter to include directive lines and any extra
432        # newline chomped off the start of the following text
433        $line += $dirlines + $postlines;
434    }
435
436    # anything remaining in the string is plain text
437    push(@tokens, $interp
438         ? [ $text, $line, 'ITEXT' ]
439         : ( 'TEXT', $text) )
440        if length $text;
441
442    return \@tokens;                                        ## RETURN ##
443}
444
445
446
447#------------------------------------------------------------------------
448# interpolate_text($text, $line)
449#
450# Examines $text looking for any variable references embedded like
451# $this or like ${ this }.
452#------------------------------------------------------------------------
453
454sub interpolate_text {
455    my ($self, $text, $line) = @_;
456    my @tokens  = ();
457    my ($pre, $var, $dir);
458
459
460   while ($text =~
461           /
462           ( (?: \\. | [^\$] ){1,3000} ) # escaped or non-'$' character [$1]
463           |
464           ( \$ (?:                 # embedded variable            [$2]
465             (?: \{ ([^\}]*) \} )   # ${ ... }                     [$3]
466             |
467             ([\w\.]+)              # $word                        [$4]
468             )
469           )
470        /gx) {
471
472        ($pre, $var, $dir) = ($1, $3 || $4, $2);
473
474        # preceding text
475        if (defined($pre) && length($pre)) {
476            $line += $pre =~ tr/\n//;
477            $pre =~ s/\\\$/\$/g;
478            push(@tokens, 'TEXT', $pre);
479        }
480        # $variable reference
481        if ($var) {
482            $line += $dir =~ tr/\n/ /;
483            push(@tokens, [ $dir, $line, $self->tokenise_directive($var) ]);
484        }
485        # other '$' reference - treated as text
486        elsif ($dir) {
487            $line += $dir =~ tr/\n//;
488            push(@tokens, 'TEXT', $dir);
489        }
490    }
491
492    return \@tokens;
493}
494
495
496
497#------------------------------------------------------------------------
498# tokenise_directive($text)
499#
500# Called by the private _parse() method when it encounters a DIRECTIVE
501# token in the list provided by the split_text() or interpolate_text()
502# methods.  The directive text is passed by parameter.
503#
504# The method splits the directive into individual tokens as recognised
505# by the parser grammar (see Template::Grammar for details).  It
506# constructs a list of tokens each represented by 2 elements, as per
507# split_text() et al.  The first element contains the token type, the
508# second the token itself.
509#
510# The method tokenises the string using a complex (but fast) regex.
511# For a deeper understanding of the regex magic at work here, see
512# Jeffrey Friedl's excellent book "Mastering Regular Expressions",
513# from O'Reilly, ISBN 1-56592-257-3
514#
515# Returns a reference to the list of chunks (each one being 2 elements)
516# identified in the directive text.  On error, the internal _ERROR string
517# is set and undef is returned.
518#------------------------------------------------------------------------
519
520sub tokenise_directive {
521    my ($self, $text, $line) = @_;
522    my ($token, $uctoken, $type, $lookup);
523    my $lextable = $self->{ LEXTABLE };
524    my $style    = $self->{ STYLE }->[-1];
525    my ($anycase, $start, $end) = @$style{ qw( ANYCASE START_TAG END_TAG ) };
526    my @tokens = ( );
527
528    while ($text =~
529            /
530                # strip out any comments
531                (\#[^\n]*)
532           |
533                # a quoted phrase matches in $3
534                (["'])                   # $2 - opening quote, ' or "
535                (                        # $3 - quoted text buffer
536                    (?:                  # repeat group (no backreference)
537                        \\\\             # an escaped backslash \\
538                    |                    # ...or...
539                        \\\2             # an escaped quote \" or \' (match $1)
540                    |                    # ...or...
541                        .                # any other character
542                    |   \n
543                    )*?                  # non-greedy repeat
544                )                        # end of $3
545                \2                       # match opening quote
546            |
547                # an unquoted number matches in $4
548                (-?\d+(?:\.\d+)?)       # numbers
549            |
550                # filename matches in $5
551                ( \/?\w+(?:(?:\/|::?)\w*)+ | \/\w+)
552            |
553                # an identifier matches in $6
554                (\w+)                    # variable identifier
555            |
556                # an unquoted word or symbol matches in $7
557                (   [(){}\[\]:;,\/\\]    # misc parenthesis and symbols
558#               |   \->                  # arrow operator (for future?)
559                |   [+\-*]               # math operations
560                |   \$\{?                # dollar with option left brace
561                |   =>                   # like '='
562                |   [=!<>]?= | [!<>]     # eqality tests
563                |   &&? | \|\|?          # boolean ops
564                |   \.\.?                # n..n sequence
565                |   \S+                  # something unquoted
566                )                        # end of $7
567            /gmxo) {
568
569        # ignore comments to EOL
570        next if $1;
571
572        # quoted string
573        if (defined ($token = $3)) {
574            # double-quoted string may include $variable references
575            if ($2 eq '"') {
576                if ($token =~ /[\$\\]/) {
577                    $type = 'QUOTED';
578                    # unescape " and \ but leave \$ escaped so that
579                        # interpolate_text() doesn't incorrectly treat it
580                    # as a variable reference
581#                   $token =~ s/\\([\\"])/$1/g;
582                        for ($token) {
583                                s/\\([^\$nrt])/$1/g;
584                                s/\\([nrt])/$QUOTED_ESCAPES->{ $1 }/ge;
585                        }
586                    push(@tokens, ('"') x 2,
587                                  @{ $self->interpolate_text($token) },
588                                  ('"') x 2);
589                    next;
590                }
591                else {
592                    $type = 'LITERAL';
593                    $token =~ s['][\\']g;
594                    $token = "'$token'";
595                }
596            }
597            else {
598                $type = 'LITERAL';
599                $token = "'$token'";
600            }
601        }
602        # number
603        elsif (defined ($token = $4)) {
604            $type = 'NUMBER';
605        }
606        elsif (defined($token = $5)) {
607            $type = 'FILENAME';
608        }
609        elsif (defined($token = $6)) {
610            # Fold potential keywords to UPPER CASE if the ANYCASE option is
611            # set, unless (we've got some preceeding tokens and) the previous
612            # token is a DOT op.  This prevents the 'last' in 'data.last'
613            # from being interpreted as the LAST keyword.
614            $uctoken =
615                ($anycase && (! @tokens || $tokens[-2] ne 'DOT'))
616                    ? uc $token
617                    :    $token;
618            if (defined ($type = $lextable->{ $uctoken })) {
619                $token = $uctoken;
620            }
621            else {
622                $type = 'IDENT';
623            }
624        }
625        elsif (defined ($token = $7)) {
626            # reserved words may be in lower case unless case sensitive
627            $uctoken = $anycase ? uc $token : $token;
628            unless (defined ($type = $lextable->{ $uctoken })) {
629                $type = 'UNQUOTED';
630            }
631        }
632
633        push(@tokens, $type, $token);
634
635#       print(STDERR " +[ $type, $token ]\n")
636#           if $DEBUG;
637    }
638
639#    print STDERR "tokenise directive() returning:\n  [ @tokens ]\n"
640#       if $DEBUG;
641
642    return \@tokens;                                        ## RETURN ##
643}
644
645
646#------------------------------------------------------------------------
647# define_block($name, $block)
648#
649# Called by the parser 'defblock' rule when a BLOCK definition is
650# encountered in the template.  The name of the block is passed in the
651# first parameter and a reference to the compiled block is passed in
652# the second.  This method stores the block in the $self->{ DEFBLOCK }
653# hash which has been initialised by parse() and will later be used
654# by the same method to call the store() method on the calling cache
655# to define the block "externally".
656#------------------------------------------------------------------------
657
658sub define_block {
659    my ($self, $name, $block) = @_;
660    my $defblock = $self->{ DEFBLOCK }
661        || return undef;
662
663    $self->debug("compiled block '$name':\n$block")
664        if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER;
665
666    $defblock->{ $name } = $block;
667
668    return undef;
669}
670
671sub push_defblock {
672    my $self = shift;
673    my $stack = $self->{ DEFBLOCK_STACK } ||= [];
674    push(@$stack, $self->{ DEFBLOCK } );
675    $self->{ DEFBLOCK } = { };
676}
677
678sub pop_defblock {
679    my $self  = shift;
680    my $defs  = $self->{ DEFBLOCK };
681    my $stack = $self->{ DEFBLOCK_STACK } || return $defs;
682    return $defs unless @$stack;
683    $self->{ DEFBLOCK } = pop @$stack;
684    return $defs;
685}
686
687
688#------------------------------------------------------------------------
689# add_metadata(\@setlist)
690#------------------------------------------------------------------------
691
692sub add_metadata {
693    my ($self, $setlist) = @_;
694    my $metadata = $self->{ METADATA }
695        || return undef;
696
697    push(@$metadata, @$setlist);
698
699    return undef;
700}
701
702
703#------------------------------------------------------------------------
704# location()
705#
706# Return Perl comment indicating current parser file and line
707#------------------------------------------------------------------------
708
709sub location {
710    my $self = shift;
711    return "\n" unless $self->{ FILE_INFO };
712    my $line = ${ $self->{ LINE } };
713    my $info = $self->{ FILEINFO }->[-1];
714    my $file = $info->{ path } || $info->{ name }
715        || '(unknown template)';
716    $line =~ s/\-.*$//; # might be 'n-n'
717    $line ||= 1;
718    return "#line $line \"$file\"\n";
719}
720
721
722#========================================================================
723#                     -----  PRIVATE METHODS -----
724#========================================================================
725
726#------------------------------------------------------------------------
727# _parse(\@tokens, \@info)
728#
729# Parses the list of input tokens passed by reference and returns a
730# Template::Directive::Block object which contains the compiled
731# representation of the template.
732#
733# This is the main parser DFA loop.  See embedded comments for
734# further details.
735#
736# On error, undef is returned and the internal _ERROR field is set to
737# indicate the error.  This can be retrieved by calling the error()
738# method.
739#------------------------------------------------------------------------
740
741sub _parse {
742    my ($self, $tokens, $info) = @_;
743    my ($token, $value, $text, $line, $inperl);
744    my ($state, $stateno, $status, $action, $lookup, $coderet, @codevars);
745    my ($lhs, $len, $code);         # rule contents
746    my $stack = [ [ 0, undef ] ];   # DFA stack
747
748# DEBUG
749#   local $" = ', ';
750
751    # retrieve internal rule and state tables
752    my ($states, $rules) = @$self{ qw( STATES RULES ) };
753
754    # If we're tracing variable usage then we need to give the factory a
755    # reference to our $self->{ VARIABLES } for it to fill in.  This is a
756    # bit of a hack to back-patch this functionality into TT2.
757    $self->{ FACTORY }->trace_vars($self->{ VARIABLES })
758        if $self->{ TRACE_VARS };
759
760    # call the grammar set_factory method to install emitter factory
761    $self->{ GRAMMAR }->install_factory($self->{ FACTORY });
762
763    $line = $inperl = 0;
764    $self->{ LINE   } = \$line;
765    $self->{ FILE   } = $info->{ name };
766    $self->{ INPERL } = \$inperl;
767
768    $status = CONTINUE;
769    my $in_string = 0;
770
771    while(1) {
772        # get state number and state
773        $stateno =  $stack->[-1]->[0];
774        $state   = $states->[$stateno];
775
776        # see if any lookaheads exist for the current state
777        if (exists $state->{'ACTIONS'}) {
778
779            # get next token and expand any directives (i.e. token is an
780            # array ref) onto the front of the token list
781            while (! defined $token && @$tokens) {
782                $token = shift(@$tokens);
783                if (ref $token) {
784                    ($text, $line, $token) = @$token;
785                    if (ref $token) {
786                        if ($info->{ DEBUG } && ! $in_string) {
787                            # - - - - - - - - - - - - - - - - - - - - - - - - -
788                            # This is gnarly.  Look away now if you're easily
789                            # frightened.  We're pushing parse tokens onto the
790                            # pending list to simulate a DEBUG directive like so:
791                            # [% DEBUG msg line='20' text='INCLUDE foo' %]
792                            # - - - - - - - - - - - - - - - - - - - - - - - - -
793                            my $dtext = $text;
794                            $dtext =~ s[(['\\])][\\$1]g;
795                            unshift(@$tokens,
796                                    DEBUG   => 'DEBUG',
797                                    IDENT   => 'msg',
798                                    IDENT   => 'line',
799                                    ASSIGN  => '=',
800                                    LITERAL => "'$line'",
801                                    IDENT   => 'text',
802                                    ASSIGN  => '=',
803                                    LITERAL => "'$dtext'",
804                                    IDENT   => 'file',
805                                    ASSIGN  => '=',
806                                    LITERAL => "'$info->{ name }'",
807                                    (';') x 2,
808                                    @$token,
809                                    (';') x 2);
810                        }
811                        else {
812                            unshift(@$tokens, @$token, (';') x 2);
813                        }
814                        $token = undef;  # force redo
815                    }
816                    elsif ($token eq 'ITEXT') {
817                        if ($inperl) {
818                            # don't perform interpolation in PERL blocks
819                            $token = 'TEXT';
820                            $value = $text;
821                        }
822                        else {
823                            unshift(@$tokens,
824                                    @{ $self->interpolate_text($text, $line) });
825                            $token = undef; # force redo
826                        }
827                    }
828                }
829                else {
830                    # toggle string flag to indicate if we're crossing
831                    # a string boundary
832                    $in_string = ! $in_string if $token eq '"';
833                    $value = shift(@$tokens);
834                }
835            };
836            # clear undefined token to avoid 'undefined variable blah blah'
837            # warnings and let the parser logic pick it up in a minute
838            $token = '' unless defined $token;
839
840            # get the next state for the current lookahead token
841            $action = defined ($lookup = $state->{'ACTIONS'}->{ $token })
842                      ? $lookup
843                      : defined ($lookup = $state->{'DEFAULT'})
844                        ? $lookup
845                        : undef;
846        }
847        else {
848            # no lookahead actions
849            $action = $state->{'DEFAULT'};
850        }
851
852        # ERROR: no ACTION
853        last unless defined $action;
854
855        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
856        # shift (+ive ACTION)
857        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
858        if ($action > 0) {
859            push(@$stack, [ $action, $value ]);
860            $token = $value = undef;
861            redo;
862        };
863
864        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
865        # reduce (-ive ACTION)
866        # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
867        ($lhs, $len, $code) = @{ $rules->[ -$action ] };
868
869        # no action imples ACCEPTance
870        $action
871            or $status = ACCEPT;
872
873        # use dummy sub if code ref doesn't exist
874        $code = sub { $_[1] }
875            unless $code;
876
877        @codevars = $len
878                ?   map { $_->[1] } @$stack[ -$len .. -1 ]
879                :   ();
880
881        eval {
882            $coderet = &$code( $self, @codevars );
883        };
884        if ($@) {
885            my $err = $@;
886            chomp $err;
887            return $self->_parse_error($err);
888        }
889
890        # reduce stack by $len
891        splice(@$stack, -$len, $len);
892
893        # ACCEPT
894        return $coderet                                     ## RETURN ##
895            if $status == ACCEPT;
896
897        # ABORT
898        return undef                                        ## RETURN ##
899            if $status == ABORT;
900
901        # ERROR
902        last
903            if $status == ERROR;
904    }
905    continue {
906        push(@$stack, [ $states->[ $stack->[-1][0] ]->{'GOTOS'}->{ $lhs },
907              $coderet ]),
908    }
909
910    # ERROR                                                 ## RETURN ##
911    return $self->_parse_error('unexpected end of input')
912        unless defined $value;
913
914    # munge text of last directive to make it readable
915#    $text =~ s/\n/\\n/g;
916
917    return $self->_parse_error("unexpected end of directive", $text)
918        if $value eq ';';   # end of directive SEPARATOR
919
920    return $self->_parse_error("unexpected token ($value)", $text);
921}
922
923
924
925#------------------------------------------------------------------------
926# _parse_error($msg, $dirtext)
927#
928# Method used to handle errors encountered during the parse process
929# in the _parse() method.
930#------------------------------------------------------------------------
931
932sub _parse_error {
933    my ($self, $msg, $text) = @_;
934    my $line = $self->{ LINE };
935    $line = ref($line) ? $$line : $line;
936    $line = 'unknown' unless $line;
937
938    $msg .= "\n  [% $text %]"
939        if defined $text;
940
941    return $self->error("line $line: $msg");
942}
943
944
945#------------------------------------------------------------------------
946# _dump()
947#
948# Debug method returns a string representing the internal state of the
949# object.
950#------------------------------------------------------------------------
951
952sub _dump {
953    my $self = shift;
954    my $output = "[Template::Parser] {\n";
955    my $format = "    %-16s => %s\n";
956    my $key;
957
958    foreach $key (qw( START_TAG END_TAG TAG_STYLE ANYCASE INTERPOLATE
959                      PRE_CHOMP POST_CHOMP V1DOLLAR )) {
960        my $val = $self->{ $key };
961        $val = '<undef>' unless defined $val;
962        $output .= sprintf($format, $key, $val);
963    }
964
965    $output .= '}';
966    return $output;
967}
968
969
9701;
971
972__END__
973
974=head1 NAME
975
976Template::Parser - LALR(1) parser for compiling template documents
977
978=head1 SYNOPSIS
979
980    use Template::Parser;
981
982    $parser   = Template::Parser->new(\%config);
983    $template = $parser->parse($text)
984        || die $parser->error(), "\n";
985
986=head1 DESCRIPTION
987
988The C<Template::Parser> module implements a LALR(1) parser and associated
989methods for parsing template documents into Perl code.
990
991=head1 PUBLIC METHODS
992
993=head2 new(\%params)
994
995The C<new()> constructor creates and returns a reference to a new
996C<Template::Parser> object.
997
998A reference to a hash may be supplied as a parameter to provide configuration values.
999See L<CONFIGURATION OPTIONS> below for a summary of these options and
1000L<Template::Manual::Config> for full details.
1001
1002    my $parser = Template::Parser->new({
1003        START_TAG => quotemeta('<+'),
1004        END_TAG   => quotemeta('+>'),
1005    });
1006
1007=head2 parse($text)
1008
1009The C<parse()> method parses the text passed in the first parameter and
1010returns a reference to a hash array of data defining the compiled
1011representation of the template text, suitable for passing to the
1012L<Template::Document> L<new()|Template::Document#new()> constructor method. On
1013error, undef is returned.
1014
1015    $data = $parser->parse($text)
1016        || die $parser->error();
1017
1018The C<$data> hash reference returned contains a C<BLOCK> item containing the
1019compiled Perl code for the template, a C<DEFBLOCKS> item containing a
1020reference to a hash array of sub-template C<BLOCK>s defined within in the
1021template, and a C<METADATA> item containing a reference to a hash array
1022of metadata values defined in C<META> tags.
1023
1024=head1 CONFIGURATION OPTIONS
1025
1026The C<Template::Parser> module accepts the following configuration
1027options.  Please see L<Template::Manual::Config> for futher details
1028on each option.
1029
1030=head2 START_TAG, END_TAG
1031
1032The L<START_TAG|Template::Manual::Config#START_TAG_END_TAG> and
1033L<END_TAG|Template::Manual::Config#START_TAG_END_TAG> options are used to
1034specify character sequences or regular expressions that mark the start and end
1035of a template directive.
1036
1037    my $parser = Template::Parser->new({
1038        START_TAG => quotemeta('<+'),
1039        END_TAG   => quotemeta('+>'),
1040    });
1041
1042=head2 TAG_STYLE
1043
1044The L<TAG_STYLE|Template::Manual::Config#TAG_STYLE> option can be used to set
1045both L<START_TAG> and L<END_TAG> according to pre-defined tag styles.
1046
1047    my $parser = Template::Parser->new({
1048        TAG_STYLE => 'star',     # [* ... *]
1049    });
1050
1051=head2 PRE_CHOMP, POST_CHOMP
1052
1053The L<PRE_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> and
1054L<POST_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> can be set to remove
1055any whitespace before or after a directive tag, respectively.
1056
1057    my $parser = Template::Parser-E<gt>new({
1058        PRE_CHOMP  => 1,
1059        POST_CHOMP => 1,
1060    });
1061
1062=head2 INTERPOLATE
1063
1064The L<INTERPOLATE|Template::Manual::Config#INTERPOLATE> flag can be set
1065to allow variables to be embedded in plain text blocks.
1066
1067    my $parser = Template::Parser->new({
1068        INTERPOLATE => 1,
1069    });
1070
1071Variables should be prefixed by a C<$> to identify them, using curly braces
1072to explicitly scope the variable name where necessary.
1073
1074    Hello ${name},
1075
1076    The day today is ${day.today}.
1077
1078=head2 ANYCASE
1079
1080The L<ANYCASE|Template::Manual::Config#ANYCASE> option can be set
1081to allow directive keywords to be specified in any case.
1082
1083    # with ANYCASE set to 1
1084    [% INCLUDE foobar %]    # OK
1085    [% include foobar %]    # OK
1086    [% include = 10   %]    # ERROR, 'include' is a reserved word
1087
1088=head2 GRAMMAR
1089
1090The L<GRAMMAR|Template::Manual::Config#GRAMMAR> configuration item can be used
1091to specify an alternate grammar for the parser. This allows a modified or
1092entirely new template language to be constructed and used by the Template
1093Toolkit.
1094
1095    use MyOrg::Template::Grammar;
1096
1097    my $parser = Template::Parser->new({
1098        GRAMMAR = MyOrg::Template::Grammar->new();
1099    });
1100
1101By default, an instance of the default L<Template::Grammar> will be
1102created and used automatically if a C<GRAMMAR> item isn't specified.
1103
1104=head2 DEBUG
1105
1106The L<DEBUG|Template::Manual::Config#DEBUG> option can be used to enable
1107various debugging features of the C<Template::Parser> module.
1108
1109    use Template::Constants qw( :debug );
1110
1111    my $template = Template->new({
1112        DEBUG => DEBUG_PARSER | DEBUG_DIRS,
1113    });
1114
1115=head1 AUTHOR
1116
1117Andy Wardley E<lt>abw@wardley.orgE<gt> L<http://wardley.org/>
1118
1119=head1 COPYRIGHT
1120
1121Copyright (C) 1996-2007 Andy Wardley.  All Rights Reserved.
1122
1123This module is free software; you can redistribute it and/or
1124modify it under the same terms as Perl itself.
1125
1126The main parsing loop of the C<Template::Parser> module was derived from a
1127standalone parser generated by version 0.16 of the C<Parse::Yapp> module. The
1128following copyright notice appears in the C<Parse::Yapp> documentation.
1129
1130    The Parse::Yapp module and its related modules and shell
1131    scripts are copyright (c) 1998 Francois Desarmenien,
1132    France. All rights reserved.
1133
1134    You may use and distribute them under the terms of either
1135    the GNU General Public License or the Artistic License, as
1136    specified in the Perl README file.
1137
1138=head1 SEE ALSO
1139
1140L<Template>, L<Template::Grammar>, L<Template::Directive>
1141
1142