1#-- 2# $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $ 3# 4# Copyright (c) 1999-2006 Minero Aoki 5# 6# This program is free software. 7# You can distribute/modify this program under the same terms of ruby. 8# 9# As a special exception, when this code is copied by Racc 10# into a Racc output file, you may use that output file 11# without restriction. 12#++ 13 14module Racc 15 class ParseError < StandardError; end 16end 17unless defined?(::ParseError) 18 ParseError = Racc::ParseError 19end 20 21# Racc is a LALR(1) parser generator. 22# It is written in Ruby itself, and generates Ruby programs. 23# 24# == Command-line Reference 25# 26# racc [-o<var>filename</var>] [--output-file=<var>filename</var>] 27# [-e<var>rubypath</var>] [--embedded=<var>rubypath</var>] 28# [-v] [--verbose] 29# [-O<var>filename</var>] [--log-file=<var>filename</var>] 30# [-g] [--debug] 31# [-E] [--embedded] 32# [-l] [--no-line-convert] 33# [-c] [--line-convert-all] 34# [-a] [--no-omit-actions] 35# [-C] [--check-only] 36# [-S] [--output-status] 37# [--version] [--copyright] [--help] <var>grammarfile</var> 38# 39# [+filename+] 40# Racc grammar file. Any extention is permitted. 41# [-o+outfile+, --output-file=+outfile+] 42# A filename for output. default is <+filename+>.tab.rb 43# [-O+filename+, --log-file=+filename+] 44# Place logging output in file +filename+. 45# Default log file name is <+filename+>.output. 46# [-e+rubypath+, --executable=+rubypath+] 47# output executable file(mode 755). where +path+ is the ruby interpreter. 48# [-v, --verbose] 49# verbose mode. create +filename+.output file, like yacc's y.output file. 50# [-g, --debug] 51# add debug code to parser class. To display debuggin information, 52# use this '-g' option and set @yydebug true in parser class. 53# [-E, --embedded] 54# Output parser which doesn't need runtime files (racc/parser.rb). 55# [-C, --check-only] 56# Check syntax of racc grammer file and quit. 57# [-S, --output-status] 58# Print messages time to time while compiling. 59# [-l, --no-line-convert] 60# turns off line number converting. 61# [-c, --line-convert-all] 62# Convert line number of actions, inner, header and footer. 63# [-a, --no-omit-actions] 64# Call all actions, even if an action is empty. 65# [--version] 66# print Racc version and quit. 67# [--copyright] 68# Print copyright and quit. 69# [--help] 70# Print usage and quit. 71# 72# == Generating Parser Using Racc 73# 74# To compile Racc grammar file, simply type: 75# 76# $ racc parse.y 77# 78# This creates ruby script file "parse.tab.y". The -o option can change the output filename. 79# 80# == Writing A Racc Grammar File 81# 82# If you want your own parser, you have to write a grammar file. 83# A grammar file contains the name of your parser class, grammar for the parser, 84# user code, and anything else. 85# When writing a grammar file, yacc's knowledge is helpful. 86# If you have not used yacc before, Racc is not too difficult. 87# 88# Here's an example Racc grammar file. 89# 90# class Calcparser 91# rule 92# target: exp { print val[0] } 93# 94# exp: exp '+' exp 95# | exp '*' exp 96# | '(' exp ')' 97# | NUMBER 98# end 99# 100# Racc grammar files resemble yacc files. 101# But (of course), this is Ruby code. 102# yacc's $$ is the 'result', $0, $1... is 103# an array called 'val', and $-1, $-2... is an array called '_values'. 104# 105# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for 106# more information on grammar files. 107# 108# == Parser 109# 110# Then you must prepare the parse entry method. There are two types of 111# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse 112# 113# Racc::Parser#do_parse is simple. 114# 115# It's yyparse() of yacc, and Racc::Parser#next_token is yylex(). 116# This method must returns an array like [TOKENSYMBOL, ITS_VALUE]. 117# EOF is [false, false]. 118# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default. 119# If you want to change this, see the grammar reference. 120# 121# Racc::Parser#yyparse is little complicated, but useful. 122# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator. 123# 124# For example, <code>yyparse(obj, :scan)</code> causes 125# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+. 126# 127# == Debugging 128# 129# When debugging, "-v" or/and the "-g" option is helpful. 130# 131# "-v" creates verbose log file (.output). 132# "-g" creates a "Verbose Parser". 133# Verbose Parser prints the internal status when parsing. 134# But it's _not_ automatic. 135# You must use -g option and set +@yydebug+ to +true+ in order to get output. 136# -g option only creates the verbose parser. 137# 138# === Racc reported syntax error. 139# 140# Isn't there too many "end"? 141# grammar of racc file is changed in v0.10. 142# 143# Racc does not use '%' mark, while yacc uses huge number of '%' marks.. 144# 145# === Racc reported "XXXX conflicts". 146# 147# Try "racc -v xxxx.y". 148# It causes producing racc's internal log file, xxxx.output. 149# 150# === Generated parsers does not work correctly 151# 152# Try "racc -g xxxx.y". 153# This command let racc generate "debugging parser". 154# Then set @yydebug=true in your parser. 155# It produces a working log of your parser. 156# 157# == Re-distributing Racc runtime 158# 159# A parser, which is created by Racc, requires the Racc runtime module; 160# racc/parser.rb. 161# 162# Ruby 1.8.x comes with Racc runtime module, 163# you need NOT distribute Racc runtime files. 164# 165# If you want to include the Racc runtime module with your parser. 166# This can be done by using '-E' option: 167# 168# $ racc -E -omyparser.rb myparser.y 169# 170# This command creates myparser.rb which `includes' Racc runtime. 171# Only you must do is to distribute your parser file (myparser.rb). 172# 173# Note: parser.rb is LGPL, but your parser is not. 174# Your own parser is completely yours. 175module Racc 176 177 unless defined?(Racc_No_Extentions) 178 Racc_No_Extentions = false # :nodoc: 179 end 180 181 class Parser 182 183 Racc_Runtime_Version = '1.4.6' 184 Racc_Runtime_Revision = %w$originalRevision: 1.8 $[1] 185 186 Racc_Runtime_Core_Version_R = '1.4.6' 187 Racc_Runtime_Core_Revision_R = %w$originalRevision: 1.8 $[1] 188 begin 189 require 'racc/cparse' 190 # Racc_Runtime_Core_Version_C = (defined in extention) 191 Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] 192 unless new.respond_to?(:_racc_do_parse_c, true) 193 raise LoadError, 'old cparse.so' 194 end 195 if Racc_No_Extentions 196 raise LoadError, 'selecting ruby version of racc runtime core' 197 end 198 199 Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc: 200 Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc: 201 Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc: 202 Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc: 203 Racc_Runtime_Type = 'c' # :nodoc: 204 rescue LoadError 205 Racc_Main_Parsing_Routine = :_racc_do_parse_rb 206 Racc_YY_Parse_Method = :_racc_yyparse_rb 207 Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R 208 Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_R 209 Racc_Runtime_Type = 'ruby' 210 end 211 212 def Parser.racc_runtime_type # :nodoc: 213 Racc_Runtime_Type 214 end 215 216 def _racc_setup 217 @yydebug = false unless self.class::Racc_debug_parser 218 @yydebug = false unless defined?(@yydebug) 219 if @yydebug 220 @racc_debug_out = $stderr unless defined?(@racc_debug_out) 221 @racc_debug_out ||= $stderr 222 end 223 arg = self.class::Racc_arg 224 arg[13] = true if arg.size < 14 225 arg 226 end 227 228 def _racc_init_sysvars 229 @racc_state = [0] 230 @racc_tstack = [] 231 @racc_vstack = [] 232 233 @racc_t = nil 234 @racc_val = nil 235 236 @racc_read_next = true 237 238 @racc_user_yyerror = false 239 @racc_error_status = 0 240 end 241 242 # The entry point of the parser. This method is used with #next_token. 243 # If Racc wants to get token (and its value), calls next_token. 244 # 245 # Example: 246 # def parse 247 # @q = [[1,1], 248 # [2,2], 249 # [3,3], 250 # [false, '$']] 251 # do_parse 252 # end 253 # 254 # def next_token 255 # @q.shift 256 # end 257 def do_parse 258 __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) 259 end 260 261 # The method to fetch next token. 262 # If you use #do_parse method, you must implement #next_token. 263 # 264 # The format of return value is [TOKEN_SYMBOL, VALUE]. 265 # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT 266 # for 'IDENT'. ";" (String) for ';'. 267 # 268 # The final symbol (End of file) must be false. 269 def next_token 270 raise NotImplementedError, "#{self.class}\#next_token is not defined" 271 end 272 273 def _racc_do_parse_rb(arg, in_debug) 274 action_table, action_check, action_default, action_pointer, 275 _, _, _, _, 276 _, _, token_table, _, 277 _, _, * = arg 278 279 _racc_init_sysvars 280 tok = act = i = nil 281 282 catch(:racc_end_parse) { 283 while true 284 if i = action_pointer[@racc_state[-1]] 285 if @racc_read_next 286 if @racc_t != 0 # not EOF 287 tok, @racc_val = next_token() 288 unless tok # EOF 289 @racc_t = 0 290 else 291 @racc_t = (token_table[tok] or 1) # error token 292 end 293 racc_read_token(@racc_t, tok, @racc_val) if @yydebug 294 @racc_read_next = false 295 end 296 end 297 i += @racc_t 298 unless i >= 0 and 299 act = action_table[i] and 300 action_check[i] == @racc_state[-1] 301 act = action_default[@racc_state[-1]] 302 end 303 else 304 act = action_default[@racc_state[-1]] 305 end 306 while act = _racc_evalact(act, arg) 307 ; 308 end 309 end 310 } 311 end 312 313 # Another entry point for the parser. 314 # If you use this method, you must implement RECEIVER#METHOD_ID method. 315 # 316 # RECEIVER#METHOD_ID is a method to get next token. 317 # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. 318 def yyparse(recv, mid) 319 __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true) 320 end 321 322 def _racc_yyparse_rb(recv, mid, arg, c_debug) 323 action_table, action_check, action_default, action_pointer, 324 _, _, _, _, 325 _, _, token_table, _, 326 _, _, * = arg 327 328 _racc_init_sysvars 329 act = nil 330 i = nil 331 332 catch(:racc_end_parse) { 333 until i = action_pointer[@racc_state[-1]] 334 while act = _racc_evalact(action_default[@racc_state[-1]], arg) 335 ; 336 end 337 end 338 recv.__send__(mid) do |tok, val| 339 unless tok 340 @racc_t = 0 341 else 342 @racc_t = (token_table[tok] or 1) # error token 343 end 344 @racc_val = val 345 @racc_read_next = false 346 347 i += @racc_t 348 unless i >= 0 and 349 act = action_table[i] and 350 action_check[i] == @racc_state[-1] 351 act = action_default[@racc_state[-1]] 352 end 353 while act = _racc_evalact(act, arg) 354 ; 355 end 356 357 while not(i = action_pointer[@racc_state[-1]]) or 358 not @racc_read_next or 359 @racc_t == 0 # $ 360 unless i and i += @racc_t and 361 i >= 0 and 362 act = action_table[i] and 363 action_check[i] == @racc_state[-1] 364 act = action_default[@racc_state[-1]] 365 end 366 while act = _racc_evalact(act, arg) 367 ; 368 end 369 end 370 end 371 } 372 end 373 374 ### 375 ### common 376 ### 377 378 def _racc_evalact(act, arg) 379 action_table, action_check, _, action_pointer, 380 _, _, _, _, 381 _, _, _, shift_n, reduce_n, 382 _, _, * = arg 383 nerr = 0 # tmp 384 385 if act > 0 and act < shift_n 386 # 387 # shift 388 # 389 if @racc_error_status > 0 390 @racc_error_status -= 1 unless @racc_t == 1 # error token 391 end 392 @racc_vstack.push @racc_val 393 @racc_state.push act 394 @racc_read_next = true 395 if @yydebug 396 @racc_tstack.push @racc_t 397 racc_shift @racc_t, @racc_tstack, @racc_vstack 398 end 399 400 elsif act < 0 and act > -reduce_n 401 # 402 # reduce 403 # 404 code = catch(:racc_jump) { 405 @racc_state.push _racc_do_reduce(arg, act) 406 false 407 } 408 if code 409 case code 410 when 1 # yyerror 411 @racc_user_yyerror = true # user_yyerror 412 return -reduce_n 413 when 2 # yyaccept 414 return shift_n 415 else 416 raise '[Racc Bug] unknown jump code' 417 end 418 end 419 420 elsif act == shift_n 421 # 422 # accept 423 # 424 racc_accept if @yydebug 425 throw :racc_end_parse, @racc_vstack[0] 426 427 elsif act == -reduce_n 428 # 429 # error 430 # 431 case @racc_error_status 432 when 0 433 unless arg[21] # user_yyerror 434 nerr += 1 435 on_error @racc_t, @racc_val, @racc_vstack 436 end 437 when 3 438 if @racc_t == 0 # is $ 439 throw :racc_end_parse, nil 440 end 441 @racc_read_next = true 442 end 443 @racc_user_yyerror = false 444 @racc_error_status = 3 445 while true 446 if i = action_pointer[@racc_state[-1]] 447 i += 1 # error token 448 if i >= 0 and 449 (act = action_table[i]) and 450 action_check[i] == @racc_state[-1] 451 break 452 end 453 end 454 throw :racc_end_parse, nil if @racc_state.size <= 1 455 @racc_state.pop 456 @racc_vstack.pop 457 if @yydebug 458 @racc_tstack.pop 459 racc_e_pop @racc_state, @racc_tstack, @racc_vstack 460 end 461 end 462 return act 463 464 else 465 raise "[Racc Bug] unknown action #{act.inspect}" 466 end 467 468 racc_next_state(@racc_state[-1], @racc_state) if @yydebug 469 470 nil 471 end 472 473 def _racc_do_reduce(arg, act) 474 _, _, _, _, 475 goto_table, goto_check, goto_default, goto_pointer, 476 nt_base, reduce_table, _, _, 477 _, use_result, * = arg 478 state = @racc_state 479 vstack = @racc_vstack 480 tstack = @racc_tstack 481 482 i = act * -3 483 len = reduce_table[i] 484 reduce_to = reduce_table[i+1] 485 method_id = reduce_table[i+2] 486 void_array = [] 487 488 tmp_t = tstack[-len, len] if @yydebug 489 tmp_v = vstack[-len, len] 490 tstack[-len, len] = void_array if @yydebug 491 vstack[-len, len] = void_array 492 state[-len, len] = void_array 493 494 # tstack must be updated AFTER method call 495 if use_result 496 vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) 497 else 498 vstack.push __send__(method_id, tmp_v, vstack) 499 end 500 tstack.push reduce_to 501 502 racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug 503 504 k1 = reduce_to - nt_base 505 if i = goto_pointer[k1] 506 i += state[-1] 507 if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 508 return curstate 509 end 510 end 511 goto_default[k1] 512 end 513 514 # This method is called when a parse error is found. 515 # 516 # ERROR_TOKEN_ID is an internal ID of token which caused error. 517 # You can get string representation of this ID by calling 518 # #token_to_str. 519 # 520 # ERROR_VALUE is a value of error token. 521 # 522 # value_stack is a stack of symbol values. 523 # DO NOT MODIFY this object. 524 # 525 # This method raises ParseError by default. 526 # 527 # If this method returns, parsers enter "error recovering mode". 528 def on_error(t, val, vstack) 529 raise ParseError, sprintf("\nparse error on value %s (%s)", 530 val.inspect, token_to_str(t) || '?') 531 end 532 533 # Enter error recovering mode. 534 # This method does not call #on_error. 535 def yyerror 536 throw :racc_jump, 1 537 end 538 539 # Exit parser. 540 # Return value is Symbol_Value_Stack[0]. 541 def yyaccept 542 throw :racc_jump, 2 543 end 544 545 # Leave error recovering mode. 546 def yyerrok 547 @racc_error_status = 0 548 end 549 550 # For debugging output 551 def racc_read_token(t, tok, val) 552 @racc_debug_out.print 'read ' 553 @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' 554 @racc_debug_out.puts val.inspect 555 @racc_debug_out.puts 556 end 557 558 def racc_shift(tok, tstack, vstack) 559 @racc_debug_out.puts "shift #{racc_token2str tok}" 560 racc_print_stacks tstack, vstack 561 @racc_debug_out.puts 562 end 563 564 def racc_reduce(toks, sim, tstack, vstack) 565 out = @racc_debug_out 566 out.print 'reduce ' 567 if toks.empty? 568 out.print ' <none>' 569 else 570 toks.each {|t| out.print ' ', racc_token2str(t) } 571 end 572 out.puts " --> #{racc_token2str(sim)}" 573 574 racc_print_stacks tstack, vstack 575 @racc_debug_out.puts 576 end 577 578 def racc_accept 579 @racc_debug_out.puts 'accept' 580 @racc_debug_out.puts 581 end 582 583 def racc_e_pop(state, tstack, vstack) 584 @racc_debug_out.puts 'error recovering mode: pop token' 585 racc_print_states state 586 racc_print_stacks tstack, vstack 587 @racc_debug_out.puts 588 end 589 590 def racc_next_state(curstate, state) 591 @racc_debug_out.puts "goto #{curstate}" 592 racc_print_states state 593 @racc_debug_out.puts 594 end 595 596 def racc_print_stacks(t, v) 597 out = @racc_debug_out 598 out.print ' [' 599 t.each_index do |i| 600 out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' 601 end 602 out.puts ' ]' 603 end 604 605 def racc_print_states(s) 606 out = @racc_debug_out 607 out.print ' [' 608 s.each {|st| out.print ' ', st } 609 out.puts ' ]' 610 end 611 612 def racc_token2str(tok) 613 self.class::Racc_token_to_s_table[tok] or 614 raise "[Racc Bug] can't convert token #{tok} to string" 615 end 616 617 # Convert internal ID of token symbol to the string. 618 def token_to_str(t) 619 self.class::Racc_token_to_s_table[t] 620 end 621 622 end 623 624end 625