1%{# (c) Copyright Francois Desarmenien 1998-2001, all rights reserved. 2# (see COPYRIGHT in Parse::Yapp.pm pod section for use and distribution rights) 3# 4# Parse/Yapp/Parser.yp: Parse::Yapp::Parser.pm source file 5# 6# Use: yapp -m 'Parse::Yapp::Parse' -o Parse/Yapp/Parse.pm YappParse.yp 7# 8# to generate the Parser module. 9# 10%} 11 12%{ 13require 5.004; 14 15use Carp; 16 17my($input,$lexlevel,@lineno,$nberr,$prec,$labelno); 18my($syms,$head,$tail,$token,$term,$nterm,$rules,$precterm,$start,$nullable); 19my($expect); 20 21%} 22 23%% 24 25# Main rule 26yapp: head body tail ; 27 28#Common rules: 29 30symbol: LITERAL { 31 exists($$syms{$_[1][0]}) 32 or do { 33 $$syms{$_[1][0]} = $_[1][1]; 34 $$term{$_[1][0]} = undef; 35 }; 36 $_[1] 37 } 38 | ident #default action 39; 40 41ident: IDENT { 42 exists($$syms{$_[1][0]}) 43 or do { 44 $$syms{$_[1][0]} = $_[1][1]; 45 $$term{$_[1][0]} = undef; 46 }; 47 $_[1] 48 } 49; 50 51 52# Head section: 53head: headsec '%%' 54; 55 56headsec: #empty #default action 57 | decls #default action 58; 59 60decls: decls decl #default action 61 | decl #default action 62; 63 64decl: '\n' #default action 65 | TOKEN typedecl symlist '\n' 66 { 67 for (@{$_[3]}) { 68 my($symbol,$lineno)=@$_; 69 70 exists($$token{$symbol}) 71 and do { 72 _SyntaxError(0, 73 "Token $symbol redefined: ". 74 "Previously defined line $$syms{$symbol}", 75 $lineno); 76 next; 77 }; 78 $$token{$symbol}=$lineno; 79 $$term{$symbol} = [ ]; 80 } 81 undef 82 } 83 | ASSOC typedecl symlist '\n' 84 { 85 for (@{$_[3]}) { 86 my($symbol,$lineno)=@$_; 87 88 defined($$term{$symbol}[0]) 89 and do { 90 _SyntaxError(1, 91 "Precedence for symbol $symbol redefined: ". 92 "Previously defined line $$syms{$symbol}", 93 $lineno); 94 next; 95 }; 96 $$token{$symbol}=$lineno; 97 $$term{$symbol} = [ $_[1][0], $prec ]; 98 } 99 ++$prec; 100 undef 101 } 102 | START ident '\n' { $start=$_[2][0]; undef } 103 | HEADCODE '\n' { push(@$head,$_[1]); undef } 104 | UNION CODE '\n' { undef } #ignore 105 | TYPE typedecl identlist '\n' 106 { 107 for ( @{$_[3]} ) { 108 my($symbol,$lineno)=@$_; 109 110 exists($$nterm{$symbol}) 111 and do { 112 _SyntaxError(0, 113 "Non-terminal $symbol redefined: ". 114 "Previously defined line $$syms{$symbol}", 115 $lineno); 116 next; 117 }; 118 delete($$term{$symbol}); #not a terminal 119 $$nterm{$symbol}=undef; #is a non-terminal 120 } 121 } 122 | EXPECT NUMBER '\n' { $expect=$_[2][0]; undef } 123 | error '\n' { $_[0]->YYErrok } 124; 125 126typedecl: #empty 127 | '<' IDENT '>' 128; 129 130symlist: symlist symbol { push(@{$_[1]},$_[2]); $_[1] } 131 | symbol { [ $_[1] ] } 132; 133 134identlist: identlist ident { push(@{$_[1]},$_[2]); $_[1] } 135 | ident { [ $_[1] ] } 136; 137 138# Rule section 139body: rulesec '%%' 140 { 141 $start 142 or $start=$$rules[1][0]; 143 144 ref($$nterm{$start}) 145 or _SyntaxError(2,"Start symbol $start not found ". 146 "in rules section",$_[2][1]); 147 148 $$rules[0]=[ '$start', [ $start, chr(0) ], undef, undef ]; 149 } 150 | '%%' { _SyntaxError(2,"No rules in input grammar",$_[1][1]); } 151; 152 153rulesec: rulesec rules #default action 154 | rules #default action 155; 156 157rules: IDENT ':' rhss ';' { _AddRules($_[1],$_[3]); undef } 158 | error ';' { $_[0]->YYErrok } 159; 160 161rhss: rhss '|' rule { push(@{$_[1]},$_[3]); $_[1] } 162 | rule { [ $_[1] ] } 163; 164 165rule: rhs prec epscode { push(@{$_[1]}, $_[2], $_[3]); $_[1] } 166 | rhs { 167 my($code)=undef; 168 169 defined($_[1]) 170 and $_[1][-1][0] eq 'CODE' 171 and $code = ${pop(@{$_[1]})}[1]; 172 173 push(@{$_[1]}, undef, $code); 174 175 $_[1] 176 } 177; 178 179rhs: #empty #default action (will return undef) 180 | rhselts #default action 181; 182 183rhselts: rhselts rhselt { push(@{$_[1]},$_[2]); $_[1] } 184 | rhselt { [ $_[1] ] } 185; 186 187rhselt: symbol { [ 'SYMB', $_[1] ] } 188 | code { [ 'CODE', $_[1] ] } 189 ; 190 191prec: PREC symbol 192 { 193 defined($$term{$_[2][0]}) 194 or do { 195 _SyntaxError(1,"No precedence for symbol $_[2][0]", 196 $_[2][1]); 197 return undef; 198 }; 199 200 ++$$precterm{$_[2][0]}; 201 $$term{$_[2][0]}[1]; 202 } 203; 204 205epscode: { undef } 206 | code { $_[1] } 207; 208 209code: CODE { $_[1] } 210; 211 212# Tail section: 213 214tail: /*empty*/ 215 | TAILCODE { $tail=$_[1] } 216; 217 218%% 219sub _Error { 220 my($value)=$_[0]->YYCurval; 221 222 my($what)= $token ? "input: '$$value[0]'" : "end of input"; 223 224 _SyntaxError(1,"Unexpected $what",$$value[1]); 225} 226 227sub _Lexer { 228 229 #At EOF 230 pos($$input) >= length($$input) 231 and return('',[ undef, -1 ]); 232 233 #In TAIL section 234 $lexlevel > 1 235 and do { 236 my($pos)=pos($$input); 237 238 $lineno[0]=$lineno[1]; 239 $lineno[1]=-1; 240 pos($$input)=length($$input); 241 return('TAILCODE',[ substr($$input,$pos), $lineno[0] ]); 242 }; 243 244 #Skip blanks 245 $lexlevel == 0 246 ? $$input=~m{\G((?: 247 [\t\ ]+ # Any white space char but \n 248 | \#[^\n]* # Perl like comments 249 | /\*.*?\*/ # C like comments 250 )+)}xsgc 251 : $$input=~m{\G((?: 252 \s+ # any white space char 253 | \#[^\n]* # Perl like comments 254 | /\*.*?\*/ # C like comments 255 )+)}xsgc 256 and do { 257 my($blanks)=$1; 258 259 #Maybe At EOF 260 pos($$input) >= length($$input) 261 and return('',[ undef, -1 ]); 262 263 $lineno[1]+= $blanks=~tr/\n//; 264 }; 265 266 $lineno[0]=$lineno[1]; 267 268 $$input=~/\G([A-Za-z_][A-Za-z0-9_]*)/gc 269 and return('IDENT',[ $1, $lineno[0] ]); 270 271 $$input=~/\G('(?:[^'\\]|\\\\|\\'|\\)+?')/gc 272 and do { 273 $1 eq "'error'" 274 and do { 275 _SyntaxError(0,"Literal 'error' ". 276 "will be treated as error token",$lineno[0]); 277 return('IDENT',[ 'error', $lineno[0] ]); 278 }; 279 return('LITERAL',[ $1, $lineno[0] ]); 280 }; 281 282 $$input=~/\G(%%)/gc 283 and do { 284 ++$lexlevel; 285 return($1, [ $1, $lineno[0] ]); 286 }; 287 288 $$input=~/\G{/gc 289 and do { 290 my($level,$from,$code); 291 292 $from=pos($$input); 293 294 $level=1; 295 while($$input=~/([{}])/gc) { 296 substr($$input,pos($$input)-1,1) eq '\\' #Quoted 297 and next; 298 $level += ($1 eq '{' ? 1 : -1) 299 or last; 300 } 301 $level 302 and _SyntaxError(2,"Unmatched { opened line $lineno[0]",-1); 303 $code = substr($$input,$from,pos($$input)-$from-1); 304 $lineno[1]+= $code=~tr/\n//; 305 return('CODE',[ $code, $lineno[0] ]); 306 }; 307 308 if($lexlevel == 0) {# In head section 309 $$input=~/\G%(left|right|nonassoc)/gc 310 and return('ASSOC',[ uc($1), $lineno[0] ]); 311 $$input=~/\G%(start)/gc 312 and return('START',[ undef, $lineno[0] ]); 313 $$input=~/\G%(expect)/gc 314 and return('EXPECT',[ undef, $lineno[0] ]); 315 $$input=~/\G%{/gc 316 and do { 317 my($code); 318 319 $$input=~/\G(.*?)%}/sgc 320 or _SyntaxError(2,"Unmatched %{ opened line $lineno[0]",-1); 321 322 $code=$1; 323 $lineno[1]+= $code=~tr/\n//; 324 return('HEADCODE',[ $code, $lineno[0] ]); 325 }; 326 $$input=~/\G%(token)/gc 327 and return('TOKEN',[ undef, $lineno[0] ]); 328 $$input=~/\G%(type)/gc 329 and return('TYPE',[ undef, $lineno[0] ]); 330 $$input=~/\G%(union)/gc 331 and return('UNION',[ undef, $lineno[0] ]); 332 $$input=~/\G([0-9]+)/gc 333 and return('NUMBER',[ $1, $lineno[0] ]); 334 335 } 336 else {# In rule section 337 $$input=~/\G%(prec)/gc 338 and return('PREC',[ undef, $lineno[0] ]); 339 } 340 341 #Always return something 342 $$input=~/\G(.)/sg 343 or die "Parse::Yapp::Grammar::Parse: Match (.) failed: report as a BUG"; 344 345 $1 eq "\n" 346 and ++$lineno[1]; 347 348 ( $1 ,[ $1, $lineno[0] ]); 349 350} 351 352sub _SyntaxError { 353 my($level,$message,$lineno)=@_; 354 355 $message= "*". 356 [ 'Warning', 'Error', 'Fatal' ]->[$level]. 357 "* $message, at ". 358 ($lineno < 0 ? "eof" : "line $lineno"). 359 ".\n"; 360 361 $level > 1 362 and die $message; 363 364 warn $message; 365 366 $level > 0 367 and ++$nberr; 368 369 $nberr == 20 370 and die "*Fatal* Too many errors detected.\n" 371} 372 373sub _AddRules { 374 my($lhs,$lineno)=@{$_[0]}; 375 my($rhss)=$_[1]; 376 377 ref($$nterm{$lhs}) 378 and do { 379 _SyntaxError(1,"Non-terminal $lhs redefined: ". 380 "Previously declared line $$syms{$lhs}",$lineno); 381 return; 382 }; 383 384 ref($$term{$lhs}) 385 and do { 386 my($where) = exists($$token{$lhs}) ? $$token{$lhs} : $$syms{$lhs}; 387 _SyntaxError(1,"Non-terminal $lhs previously ". 388 "declared as token line $where",$lineno); 389 return; 390 }; 391 392 ref($$nterm{$lhs}) #declared through %type 393 or do { 394 $$syms{$lhs}=$lineno; #Say it's declared here 395 delete($$term{$lhs}); #No more a terminal 396 }; 397 $$nterm{$lhs}=[]; #It's a non-terminal now 398 399 my($epsrules)=0; #To issue a warning if more than one epsilon rule 400 401 for my $rhs (@$rhss) { 402 my($tmprule)=[ $lhs, [ ], splice(@$rhs,-2) ]; #Init rule 403 404 @$rhs 405 or do { 406 ++$$nullable{$lhs}; 407 ++$epsrules; 408 }; 409 410 for (0..$#$rhs) { 411 my($what,$value)=@{$$rhs[$_]}; 412 413 $what eq 'CODE' 414 and do { 415 my($name)='@'.++$labelno."-$_"; 416 push(@$rules,[ $name, [], undef, $value ]); 417 push(@{$$tmprule[1]},$name); 418 next; 419 }; 420 push(@{$$tmprule[1]},$$value[0]); 421 } 422 push(@$rules,$tmprule); 423 push(@{$$nterm{$lhs}},$#$rules); 424 } 425 426 $epsrules > 1 427 and _SyntaxError(0,"More than one empty rule for symbol $lhs",$lineno); 428} 429 430sub Parse { 431 my($self)=shift; 432 433 @_ > 0 434 or croak("No input grammar\n"); 435 436 my($parsed)={}; 437 438 $input=\$_[0]; 439 440 $lexlevel=0; 441 @lineno=(1,1); 442 $nberr=0; 443 $prec=0; 444 $labelno=0; 445 446 $head=(); 447 $tail=""; 448 449 $syms={}; 450 $token={}; 451 $term={}; 452 $nterm={}; 453 $rules=[ undef ]; #reserve slot 0 for start rule 454 $precterm={}; 455 456 $start=""; 457 $nullable={}; 458 $expect=0; 459 460 pos($$input)=0; 461 462 463 $self->YYParse(yylex => \&_Lexer, yyerror => \&_Error); 464 465 $nberr 466 and _SyntaxError(2,"Errors detected: No output",-1); 467 468 @$parsed{ 'HEAD', 'TAIL', 'RULES', 'NTERM', 'TERM', 469 'NULL', 'PREC', 'SYMS', 'START', 'EXPECT' } 470 = ( $head, $tail, $rules, $nterm, $term, 471 $nullable, $precterm, $syms, $start, $expect); 472 473 undef($input); 474 undef($lexlevel); 475 undef(@lineno); 476 undef($nberr); 477 undef($prec); 478 undef($labelno); 479 480 undef($head); 481 undef($tail); 482 483 undef($syms); 484 undef($token); 485 undef($term); 486 undef($nterm); 487 undef($rules); 488 undef($precterm); 489 490 undef($start); 491 undef($nullable); 492 undef($expect); 493 494 $parsed 495} 496 497