1%{# (c) Copyright Francois Desarmenien 1998-2001, all rights reserved.
2# (see COPYRIGHT in Parse::Yapp.pm pod section for use and distribution rights)
3#
4# Parse/Yapp/Parser.yp: Parse::Yapp::Parser.pm source file
5#
6# Use: yapp -m 'Parse::Yapp::Parse' -o Parse/Yapp/Parse.pm YappParse.yp
7#
8# to generate the Parser module.
9# 
10%}
11
12%{
13require 5.004;
14
15use Carp;
16
17my($input,$lexlevel,@lineno,$nberr,$prec,$labelno);
18my($syms,$head,$tail,$token,$term,$nterm,$rules,$precterm,$start,$nullable);
19my($expect);
20
21%}
22
23%%
24
25# Main rule
26yapp:   head body tail ;
27
28#Common rules:
29
30symbol: LITERAL {
31                        exists($$syms{$_[1][0]})
32                    or  do {
33                        $$syms{$_[1][0]} = $_[1][1];
34                        $$term{$_[1][0]} = undef;
35                    };
36                    $_[1]
37                }
38    |   ident   #default action
39;
40
41ident:  IDENT   {
42                        exists($$syms{$_[1][0]})
43                    or  do {
44                        $$syms{$_[1][0]} = $_[1][1];
45                        $$term{$_[1][0]} = undef;
46                    };
47                    $_[1]
48                }
49;
50
51
52# Head section:
53head:   headsec '%%'
54;
55
56headsec:    #empty  #default action
57    |       decls   #default action
58;
59
60decls:  decls decl  #default action
61    |   decl        #default action
62;
63
64decl:  '\n'                 #default action
65    |   TOKEN typedecl symlist '\n'
66            {
67                for (@{$_[3]}) {
68                    my($symbol,$lineno)=@$_;
69
70                        exists($$token{$symbol})
71                    and do {
72                        _SyntaxError(0,
73                                "Token $symbol redefined: ".
74                                "Previously defined line $$syms{$symbol}",
75                                $lineno);
76                        next;
77                    };
78                    $$token{$symbol}=$lineno;
79                    $$term{$symbol} = [ ];
80                }
81                undef
82            }
83    |   ASSOC typedecl symlist '\n'  
84            {
85                for (@{$_[3]}) {
86                    my($symbol,$lineno)=@$_;
87
88                        defined($$term{$symbol}[0])
89                    and do {
90                        _SyntaxError(1,
91                            "Precedence for symbol $symbol redefined: ".
92                            "Previously defined line $$syms{$symbol}",
93                            $lineno);
94                        next;
95                    };
96                    $$token{$symbol}=$lineno;
97                    $$term{$symbol} = [ $_[1][0], $prec ];
98                }
99                ++$prec;
100                undef
101            }
102    |   START ident '\n'                { $start=$_[2][0]; undef }
103    |   HEADCODE '\n'                   { push(@$head,$_[1]); undef }
104    |   UNION CODE '\n'                 { undef } #ignore
105    |   TYPE typedecl identlist '\n'
106            {
107                for ( @{$_[3]} ) {
108                    my($symbol,$lineno)=@$_;
109
110                        exists($$nterm{$symbol})
111                    and do {
112                        _SyntaxError(0,
113                                "Non-terminal $symbol redefined: ".
114                                "Previously defined line $$syms{$symbol}",
115                                $lineno);
116                        next;
117                    };
118                    delete($$term{$symbol});   #not a terminal
119                    $$nterm{$symbol}=undef;    #is a non-terminal
120                }
121            }
122    |   EXPECT NUMBER '\n'             { $expect=$_[2][0]; undef }
123    |   error '\n'                     { $_[0]->YYErrok }
124;    
125
126typedecl:   #empty
127    |       '<' IDENT '>'
128;
129
130symlist:    symlist symbol  { push(@{$_[1]},$_[2]); $_[1] }
131    |       symbol          { [ $_[1] ] }
132;
133
134identlist:  identlist ident { push(@{$_[1]},$_[2]); $_[1] }
135    |       ident           { [ $_[1] ] }
136;
137
138# Rule section
139body:   rulesec '%%'
140            {
141                    $start
142                or  $start=$$rules[1][0];
143
144                    ref($$nterm{$start})
145                or  _SyntaxError(2,"Start symbol $start not found ".
146                                   "in rules section",$_[2][1]);
147
148                $$rules[0]=[ '$start', [ $start, chr(0) ], undef, undef ];
149            }
150    |   '%%'    { _SyntaxError(2,"No rules in input grammar",$_[1][1]); }
151;
152
153rulesec:  rulesec rules #default action
154    |     rules         #default action
155;
156
157rules:	IDENT ':' rhss ';'	{ _AddRules($_[1],$_[3]); undef }
158	|	error ';'           { $_[0]->YYErrok }
159;
160
161rhss:	rhss '|' rule   { push(@{$_[1]},$_[3]); $_[1] }
162	|	rule            { [ $_[1] ] }
163;
164
165rule:   rhs prec epscode    { push(@{$_[1]}, $_[2], $_[3]); $_[1] }
166    |   rhs                 {
167                                my($code)=undef;
168
169                                    defined($_[1])
170                                and $_[1][-1][0] eq 'CODE'
171                                and $code = ${pop(@{$_[1]})}[1];
172
173                                push(@{$_[1]}, undef, $code);
174
175                                $_[1]
176                            }
177;
178
179rhs:    #empty      #default action (will return undef)
180    |   rhselts     #default action
181;
182
183rhselts:    rhselts rhselt	{ push(@{$_[1]},$_[2]); $_[1] }
184		|   rhselt			{ [ $_[1] ] }
185;
186
187rhselt:     symbol  	{ [ 'SYMB', $_[1] ] }
188		|	code		{ [ 'CODE', $_[1] ] }
189		;
190
191prec:	PREC symbol
192				{
193                       	defined($$term{$_[2][0]})
194                    or  do {
195                        _SyntaxError(1,"No precedence for symbol $_[2][0]",
196                                         $_[2][1]);
197                        return undef;
198                    };
199
200                    ++$$precterm{$_[2][0]};
201                    $$term{$_[2][0]}[1];
202				}
203;
204
205epscode:				{ undef }
206		|	code		{ $_[1] }
207;
208
209code:		CODE	    { $_[1] }
210;
211
212# Tail section:
213
214tail:       /*empty*/
215        |   TAILCODE    { $tail=$_[1] }
216;
217
218%%
219sub _Error {
220    my($value)=$_[0]->YYCurval;
221
222    my($what)= $token ? "input: '$$value[0]'" : "end of input";
223
224    _SyntaxError(1,"Unexpected $what",$$value[1]);
225}
226
227sub _Lexer {
228 
229    #At EOF
230        pos($$input) >= length($$input)
231    and return('',[ undef, -1 ]);
232
233    #In TAIL section
234        $lexlevel > 1
235    and do {
236        my($pos)=pos($$input);
237
238        $lineno[0]=$lineno[1];
239        $lineno[1]=-1;
240        pos($$input)=length($$input);
241        return('TAILCODE',[ substr($$input,$pos), $lineno[0] ]);
242    };
243
244    #Skip blanks
245            $lexlevel == 0
246        ?   $$input=~m{\G((?:
247                                [\t\ ]+    # Any white space char but \n
248                            |   \#[^\n]*  # Perl like comments
249                            |   /\*.*?\*/ # C like comments
250                            )+)}xsgc
251        :   $$input=~m{\G((?:
252                                \s+       # any white space char
253                            |   \#[^\n]*  # Perl like comments
254                            |   /\*.*?\*/ # C like comments
255                            )+)}xsgc
256    and do {
257        my($blanks)=$1;
258
259        #Maybe At EOF
260            pos($$input) >= length($$input)
261        and return('',[ undef, -1 ]);
262
263        $lineno[1]+= $blanks=~tr/\n//;
264    };
265
266    $lineno[0]=$lineno[1];
267
268        $$input=~/\G([A-Za-z_][A-Za-z0-9_]*)/gc
269    and return('IDENT',[ $1, $lineno[0] ]);
270
271        $$input=~/\G('(?:[^'\\]|\\\\|\\'|\\)+?')/gc
272    and do {
273            $1 eq "'error'"
274        and do {
275            _SyntaxError(0,"Literal 'error' ".
276                           "will be treated as error token",$lineno[0]);
277            return('IDENT',[ 'error', $lineno[0] ]);
278        };
279        return('LITERAL',[ $1, $lineno[0] ]);
280    };
281
282        $$input=~/\G(%%)/gc
283    and do {
284        ++$lexlevel;
285        return($1, [ $1, $lineno[0] ]);
286    };
287
288        $$input=~/\G{/gc
289    and do {
290        my($level,$from,$code);
291
292        $from=pos($$input);
293
294        $level=1;
295        while($$input=~/([{}])/gc) {
296                substr($$input,pos($$input)-1,1) eq '\\' #Quoted
297            and next;
298                $level += ($1 eq '{' ? 1 : -1)
299            or last;
300        }
301            $level
302        and  _SyntaxError(2,"Unmatched { opened line $lineno[0]",-1);
303        $code = substr($$input,$from,pos($$input)-$from-1);
304        $lineno[1]+= $code=~tr/\n//;
305        return('CODE',[ $code, $lineno[0] ]);
306    };
307
308    if($lexlevel == 0) {# In head section
309            $$input=~/\G%(left|right|nonassoc)/gc
310        and return('ASSOC',[ uc($1), $lineno[0] ]);
311            $$input=~/\G%(start)/gc
312        and return('START',[ undef, $lineno[0] ]);
313            $$input=~/\G%(expect)/gc
314        and return('EXPECT',[ undef, $lineno[0] ]);
315            $$input=~/\G%{/gc
316        and do {
317            my($code);
318
319                $$input=~/\G(.*?)%}/sgc
320            or  _SyntaxError(2,"Unmatched %{ opened line $lineno[0]",-1);
321
322            $code=$1;
323            $lineno[1]+= $code=~tr/\n//;
324            return('HEADCODE',[ $code, $lineno[0] ]);
325        };
326            $$input=~/\G%(token)/gc
327        and return('TOKEN',[ undef, $lineno[0] ]);
328            $$input=~/\G%(type)/gc
329        and return('TYPE',[ undef, $lineno[0] ]);
330            $$input=~/\G%(union)/gc
331        and return('UNION',[ undef, $lineno[0] ]);
332            $$input=~/\G([0-9]+)/gc
333        and return('NUMBER',[ $1, $lineno[0] ]);
334
335    }
336    else {# In rule section
337            $$input=~/\G%(prec)/gc
338        and return('PREC',[ undef, $lineno[0] ]);
339    }
340
341    #Always return something
342        $$input=~/\G(.)/sg
343    or  die "Parse::Yapp::Grammar::Parse: Match (.) failed: report as a BUG";
344
345        $1 eq "\n"
346    and ++$lineno[1];
347
348    ( $1 ,[ $1, $lineno[0] ]);
349
350}
351
352sub _SyntaxError {
353    my($level,$message,$lineno)=@_;
354
355    $message= "*".
356              [ 'Warning', 'Error', 'Fatal' ]->[$level].
357              "* $message, at ".
358              ($lineno < 0 ? "eof" : "line $lineno").
359              ".\n";
360
361        $level > 1
362    and die $message;
363
364    warn $message;
365
366        $level > 0
367    and ++$nberr;
368
369        $nberr == 20 
370    and die "*Fatal* Too many errors detected.\n"
371}
372
373sub _AddRules {
374    my($lhs,$lineno)=@{$_[0]};
375    my($rhss)=$_[1];
376
377        ref($$nterm{$lhs})
378    and do {
379        _SyntaxError(1,"Non-terminal $lhs redefined: ".
380                       "Previously declared line $$syms{$lhs}",$lineno);
381        return;
382    };
383
384        ref($$term{$lhs})
385    and do {
386        my($where) = exists($$token{$lhs}) ? $$token{$lhs} : $$syms{$lhs};
387        _SyntaxError(1,"Non-terminal $lhs previously ".
388                       "declared as token line $where",$lineno);
389        return;
390    };
391
392        ref($$nterm{$lhs})      #declared through %type
393    or  do {
394            $$syms{$lhs}=$lineno;   #Say it's declared here
395            delete($$term{$lhs});   #No more a terminal
396    };
397    $$nterm{$lhs}=[];       #It's a non-terminal now
398
399    my($epsrules)=0;        #To issue a warning if more than one epsilon rule
400
401    for my $rhs (@$rhss) {
402        my($tmprule)=[ $lhs, [ ], splice(@$rhs,-2) ]; #Init rule
403
404            @$rhs
405        or  do {
406            ++$$nullable{$lhs};
407            ++$epsrules;
408        };
409
410        for (0..$#$rhs) {
411            my($what,$value)=@{$$rhs[$_]};
412
413                $what eq 'CODE'
414            and do {
415                my($name)='@'.++$labelno."-$_";
416                push(@$rules,[ $name, [], undef, $value ]);
417                push(@{$$tmprule[1]},$name);
418                next;
419            };
420            push(@{$$tmprule[1]},$$value[0]);
421        }
422        push(@$rules,$tmprule);
423        push(@{$$nterm{$lhs}},$#$rules);
424    }
425
426        $epsrules > 1
427    and _SyntaxError(0,"More than one empty rule for symbol $lhs",$lineno);
428}
429
430sub Parse {
431    my($self)=shift;
432
433        @_ > 0
434    or  croak("No input grammar\n");
435
436    my($parsed)={};
437
438    $input=\$_[0];
439
440    $lexlevel=0;
441    @lineno=(1,1);
442    $nberr=0;
443    $prec=0;
444    $labelno=0;
445
446    $head=();
447    $tail="";
448
449    $syms={};
450    $token={};
451    $term={};
452    $nterm={};
453    $rules=[ undef ];   #reserve slot 0 for start rule
454    $precterm={};
455
456    $start="";
457    $nullable={};
458    $expect=0;
459
460    pos($$input)=0;
461
462
463    $self->YYParse(yylex => \&_Lexer, yyerror => \&_Error);
464
465        $nberr
466    and _SyntaxError(2,"Errors detected: No output",-1);
467
468    @$parsed{ 'HEAD', 'TAIL', 'RULES', 'NTERM', 'TERM',
469              'NULL', 'PREC', 'SYMS',  'START', 'EXPECT' }
470    =       (  $head,  $tail,  $rules,  $nterm,  $term,
471               $nullable, $precterm, $syms, $start, $expect);
472
473    undef($input);
474    undef($lexlevel);
475    undef(@lineno);
476    undef($nberr);
477    undef($prec);
478    undef($labelno);
479
480    undef($head);
481    undef($tail);
482
483    undef($syms);
484    undef($token);
485    undef($term);
486    undef($nterm);
487    undef($rules);
488    undef($precterm);
489
490    undef($start);
491    undef($nullable);
492    undef($expect);
493
494    $parsed
495}
496
497