1#--
2# $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $
3#
4# Copyright (c) 1999-2006 Minero Aoki
5#
6# This program is free software.
7# You can distribute/modify this program under the same terms of ruby.
8#
9# As a special exception, when this code is copied by Racc
10# into a Racc output file, you may use that output file
11# without restriction.
12#++
13
14module Racc
15  class ParseError < StandardError; end
16end
17unless defined?(::ParseError)
18  ParseError = Racc::ParseError
19end
20
21# Racc is a LALR(1) parser generator.
22# It is written in Ruby itself, and generates Ruby programs.
23#
24# == Command-line Reference
25#
26#     racc [-o<var>filename</var>] [--output-file=<var>filename</var>]
27#          [-e<var>rubypath</var>] [--embedded=<var>rubypath</var>]
28#          [-v] [--verbose]
29#          [-O<var>filename</var>] [--log-file=<var>filename</var>]
30#          [-g] [--debug]
31#          [-E] [--embedded]
32#          [-l] [--no-line-convert]
33#          [-c] [--line-convert-all]
34#          [-a] [--no-omit-actions]
35#          [-C] [--check-only]
36#          [-S] [--output-status]
37#          [--version] [--copyright] [--help] <var>grammarfile</var>
38#
39# [+filename+]
40#   Racc grammar file. Any extention is permitted.
41# [-o+outfile+, --output-file=+outfile+]
42#   A filename for output. default is <+filename+>.tab.rb
43# [-O+filename+, --log-file=+filename+]
44#   Place logging output in file +filename+.
45#   Default log file name is <+filename+>.output.
46# [-e+rubypath+, --executable=+rubypath+]
47#   output executable file(mode 755). where +path+ is the ruby interpreter.
48# [-v, --verbose]
49#   verbose mode. create +filename+.output file, like yacc's y.output file.
50# [-g, --debug]
51#   add debug code to parser class. To display debuggin information,
52#   use this '-g' option and set @yydebug true in parser class.
53# [-E, --embedded]
54#   Output parser which doesn't need runtime files (racc/parser.rb).
55# [-C, --check-only]
56#   Check syntax of racc grammer file and quit.
57# [-S, --output-status]
58#   Print messages time to time while compiling.
59# [-l, --no-line-convert]
60#   turns off line number converting.
61# [-c, --line-convert-all]
62#   Convert line number of actions, inner, header and footer.
63# [-a, --no-omit-actions]
64#   Call all actions, even if an action is empty.
65# [--version]
66#   print Racc version and quit.
67# [--copyright]
68#   Print copyright and quit.
69# [--help]
70#   Print usage and quit.
71#
72# == Generating Parser Using Racc
73#
74# To compile Racc grammar file, simply type:
75#
76#   $ racc parse.y
77#
78# This creates ruby script file "parse.tab.y". The -o option can change the output filename.
79#
80# == Writing A Racc Grammar File
81#
82# If you want your own parser, you have to write a grammar file.
83# A grammar file contains the name of your parser class, grammar for the parser,
84# user code, and anything else.
85# When writing a grammar file, yacc's knowledge is helpful.
86# If you have not used yacc before, Racc is not too difficult.
87#
88# Here's an example Racc grammar file.
89#
90#   class Calcparser
91#   rule
92#     target: exp { print val[0] }
93#
94#     exp: exp '+' exp
95#        | exp '*' exp
96#        | '(' exp ')'
97#        | NUMBER
98#   end
99#
100# Racc grammar files resemble yacc files.
101# But (of course), this is Ruby code.
102# yacc's $$ is the 'result', $0, $1... is
103# an array called 'val', and $-1, $-2... is an array called '_values'.
104#
105# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for
106# more information on grammar files.
107#
108# == Parser
109#
110# Then you must prepare the parse entry method. There are two types of
111# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse
112#
113# Racc::Parser#do_parse is simple.
114#
115# It's yyparse() of yacc, and Racc::Parser#next_token is yylex().
116# This method must returns an array like [TOKENSYMBOL, ITS_VALUE].
117# EOF is [false, false].
118# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default.
119# If you want to change this, see the grammar reference.
120#
121# Racc::Parser#yyparse is little complicated, but useful.
122# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator.
123#
124# For example, <code>yyparse(obj, :scan)</code> causes
125# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+.
126#
127# == Debugging
128#
129# When debugging, "-v" or/and the "-g" option is helpful.
130#
131# "-v" creates verbose log file (.output).
132# "-g" creates a "Verbose Parser".
133# Verbose Parser prints the internal status when parsing.
134# But it's _not_ automatic.
135# You must use -g option and set +@yydebug+ to +true+ in order to get output.
136# -g option only creates the verbose parser.
137#
138# === Racc reported syntax error.
139#
140# Isn't there too many "end"?
141# grammar of racc file is changed in v0.10.
142#
143# Racc does not use '%' mark, while yacc uses huge number of '%' marks..
144#
145# === Racc reported "XXXX conflicts".
146#
147# Try "racc -v xxxx.y".
148# It causes producing racc's internal log file, xxxx.output.
149#
150# === Generated parsers does not work correctly
151#
152# Try "racc -g xxxx.y".
153# This command let racc generate "debugging parser".
154# Then set @yydebug=true in your parser.
155# It produces a working log of your parser.
156#
157# == Re-distributing Racc runtime
158#
159# A parser, which is created by Racc, requires the Racc runtime module;
160# racc/parser.rb.
161#
162# Ruby 1.8.x comes with Racc runtime module,
163# you need NOT distribute Racc runtime files.
164#
165# If you want to include the Racc runtime module with your parser.
166# This can be done by using '-E' option:
167#
168#   $ racc -E -omyparser.rb myparser.y
169#
170# This command creates myparser.rb which `includes' Racc runtime.
171# Only you must do is to distribute your parser file (myparser.rb).
172#
173# Note: parser.rb is LGPL, but your parser is not.
174# Your own parser is completely yours.
175module Racc
176
177  unless defined?(Racc_No_Extentions)
178    Racc_No_Extentions = false # :nodoc:
179  end
180
181  class Parser
182
183    Racc_Runtime_Version = '1.4.6'
184    Racc_Runtime_Revision = %w$originalRevision: 1.8 $[1]
185
186    Racc_Runtime_Core_Version_R = '1.4.6'
187    Racc_Runtime_Core_Revision_R = %w$originalRevision: 1.8 $[1]
188    begin
189      require 'racc/cparse'
190    # Racc_Runtime_Core_Version_C  = (defined in extention)
191      Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2]
192      unless new.respond_to?(:_racc_do_parse_c, true)
193        raise LoadError, 'old cparse.so'
194      end
195      if Racc_No_Extentions
196        raise LoadError, 'selecting ruby version of racc runtime core'
197      end
198
199      Racc_Main_Parsing_Routine    = :_racc_do_parse_c # :nodoc:
200      Racc_YY_Parse_Method         = :_racc_yyparse_c # :nodoc:
201      Racc_Runtime_Core_Version    = Racc_Runtime_Core_Version_C # :nodoc:
202      Racc_Runtime_Core_Revision   = Racc_Runtime_Core_Revision_C # :nodoc:
203      Racc_Runtime_Type            = 'c' # :nodoc:
204    rescue LoadError
205      Racc_Main_Parsing_Routine    = :_racc_do_parse_rb
206      Racc_YY_Parse_Method         = :_racc_yyparse_rb
207      Racc_Runtime_Core_Version    = Racc_Runtime_Core_Version_R
208      Racc_Runtime_Core_Revision   = Racc_Runtime_Core_Revision_R
209      Racc_Runtime_Type            = 'ruby'
210    end
211
212    def Parser.racc_runtime_type # :nodoc:
213      Racc_Runtime_Type
214    end
215
216    def _racc_setup
217      @yydebug = false unless self.class::Racc_debug_parser
218      @yydebug = false unless defined?(@yydebug)
219      if @yydebug
220        @racc_debug_out = $stderr unless defined?(@racc_debug_out)
221        @racc_debug_out ||= $stderr
222      end
223      arg = self.class::Racc_arg
224      arg[13] = true if arg.size < 14
225      arg
226    end
227
228    def _racc_init_sysvars
229      @racc_state  = [0]
230      @racc_tstack = []
231      @racc_vstack = []
232
233      @racc_t = nil
234      @racc_val = nil
235
236      @racc_read_next = true
237
238      @racc_user_yyerror = false
239      @racc_error_status = 0
240    end
241
242    # The entry point of the parser. This method is used with #next_token.
243    # If Racc wants to get token (and its value), calls next_token.
244    #
245    # Example:
246    #     def parse
247    #       @q = [[1,1],
248    #             [2,2],
249    #             [3,3],
250    #             [false, '$']]
251    #       do_parse
252    #     end
253    #
254    #     def next_token
255    #       @q.shift
256    #     end
257    def do_parse
258      __send__(Racc_Main_Parsing_Routine, _racc_setup(), false)
259    end
260
261    # The method to fetch next token.
262    # If you use #do_parse method, you must implement #next_token.
263    #
264    # The format of return value is [TOKEN_SYMBOL, VALUE].
265    # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT
266    # for 'IDENT'.  ";" (String) for ';'.
267    #
268    # The final symbol (End of file) must be false.
269    def next_token
270      raise NotImplementedError, "#{self.class}\#next_token is not defined"
271    end
272
273    def _racc_do_parse_rb(arg, in_debug)
274      action_table, action_check, action_default, action_pointer,
275      _,            _,            _,              _,
276      _,            _,            token_table,    _,
277      _,            _,            * = arg
278
279      _racc_init_sysvars
280      tok = act = i = nil
281
282      catch(:racc_end_parse) {
283        while true
284          if i = action_pointer[@racc_state[-1]]
285            if @racc_read_next
286              if @racc_t != 0   # not EOF
287                tok, @racc_val = next_token()
288                unless tok      # EOF
289                  @racc_t = 0
290                else
291                  @racc_t = (token_table[tok] or 1)   # error token
292                end
293                racc_read_token(@racc_t, tok, @racc_val) if @yydebug
294                @racc_read_next = false
295              end
296            end
297            i += @racc_t
298            unless i >= 0 and
299                   act = action_table[i] and
300                   action_check[i] == @racc_state[-1]
301              act = action_default[@racc_state[-1]]
302            end
303          else
304            act = action_default[@racc_state[-1]]
305          end
306          while act = _racc_evalact(act, arg)
307            ;
308          end
309        end
310      }
311    end
312
313    # Another entry point for the parser.
314    # If you use this method, you must implement RECEIVER#METHOD_ID method.
315    #
316    # RECEIVER#METHOD_ID is a method to get next token.
317    # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE].
318    def yyparse(recv, mid)
319      __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true)
320    end
321
322    def _racc_yyparse_rb(recv, mid, arg, c_debug)
323      action_table, action_check, action_default, action_pointer,
324      _,             _,            _,              _,
325      _,            _,            token_table,    _,
326      _,            _,            * = arg
327
328      _racc_init_sysvars
329      act = nil
330      i = nil
331
332      catch(:racc_end_parse) {
333        until i = action_pointer[@racc_state[-1]]
334          while act = _racc_evalact(action_default[@racc_state[-1]], arg)
335            ;
336          end
337        end
338        recv.__send__(mid) do |tok, val|
339          unless tok
340            @racc_t = 0
341          else
342            @racc_t = (token_table[tok] or 1)   # error token
343          end
344          @racc_val = val
345          @racc_read_next = false
346
347          i += @racc_t
348          unless i >= 0 and
349                 act = action_table[i] and
350                 action_check[i] == @racc_state[-1]
351            act = action_default[@racc_state[-1]]
352          end
353          while act = _racc_evalact(act, arg)
354            ;
355          end
356
357          while not(i = action_pointer[@racc_state[-1]]) or
358                not @racc_read_next or
359                @racc_t == 0   # $
360            unless i and i += @racc_t and
361                   i >= 0 and
362                   act = action_table[i] and
363                   action_check[i] == @racc_state[-1]
364              act = action_default[@racc_state[-1]]
365            end
366            while act = _racc_evalact(act, arg)
367              ;
368            end
369          end
370        end
371      }
372    end
373
374    ###
375    ### common
376    ###
377
378    def _racc_evalact(act, arg)
379      action_table, action_check, _, action_pointer,
380      _,   _, _, _,
381      _,   _, _, shift_n,  reduce_n,
382      _,   _, * = arg
383      nerr = 0   # tmp
384
385      if act > 0 and act < shift_n
386        #
387        # shift
388        #
389        if @racc_error_status > 0
390          @racc_error_status -= 1 unless @racc_t == 1   # error token
391        end
392        @racc_vstack.push @racc_val
393        @racc_state.push act
394        @racc_read_next = true
395        if @yydebug
396          @racc_tstack.push @racc_t
397          racc_shift @racc_t, @racc_tstack, @racc_vstack
398        end
399
400      elsif act < 0 and act > -reduce_n
401        #
402        # reduce
403        #
404        code = catch(:racc_jump) {
405          @racc_state.push _racc_do_reduce(arg, act)
406          false
407        }
408        if code
409          case code
410          when 1 # yyerror
411            @racc_user_yyerror = true   # user_yyerror
412            return -reduce_n
413          when 2 # yyaccept
414            return shift_n
415          else
416            raise '[Racc Bug] unknown jump code'
417          end
418        end
419
420      elsif act == shift_n
421        #
422        # accept
423        #
424        racc_accept if @yydebug
425        throw :racc_end_parse, @racc_vstack[0]
426
427      elsif act == -reduce_n
428        #
429        # error
430        #
431        case @racc_error_status
432        when 0
433          unless arg[21]    # user_yyerror
434            nerr += 1
435            on_error @racc_t, @racc_val, @racc_vstack
436          end
437        when 3
438          if @racc_t == 0   # is $
439            throw :racc_end_parse, nil
440          end
441          @racc_read_next = true
442        end
443        @racc_user_yyerror = false
444        @racc_error_status = 3
445        while true
446          if i = action_pointer[@racc_state[-1]]
447            i += 1   # error token
448            if  i >= 0 and
449                (act = action_table[i]) and
450                action_check[i] == @racc_state[-1]
451              break
452            end
453          end
454          throw :racc_end_parse, nil if @racc_state.size <= 1
455          @racc_state.pop
456          @racc_vstack.pop
457          if @yydebug
458            @racc_tstack.pop
459            racc_e_pop @racc_state, @racc_tstack, @racc_vstack
460          end
461        end
462        return act
463
464      else
465        raise "[Racc Bug] unknown action #{act.inspect}"
466      end
467
468      racc_next_state(@racc_state[-1], @racc_state) if @yydebug
469
470      nil
471    end
472
473    def _racc_do_reduce(arg, act)
474      _, _, _, _,
475      goto_table,   goto_check,   goto_default,   goto_pointer,
476      nt_base,      reduce_table, _,    _,
477      _,     use_result,   * = arg
478      state = @racc_state
479      vstack = @racc_vstack
480      tstack = @racc_tstack
481
482      i = act * -3
483      len       = reduce_table[i]
484      reduce_to = reduce_table[i+1]
485      method_id = reduce_table[i+2]
486      void_array = []
487
488      tmp_t = tstack[-len, len] if @yydebug
489      tmp_v = vstack[-len, len]
490      tstack[-len, len] = void_array if @yydebug
491      vstack[-len, len] = void_array
492      state[-len, len]  = void_array
493
494      # tstack must be updated AFTER method call
495      if use_result
496        vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0])
497      else
498        vstack.push __send__(method_id, tmp_v, vstack)
499      end
500      tstack.push reduce_to
501
502      racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug
503
504      k1 = reduce_to - nt_base
505      if i = goto_pointer[k1]
506        i += state[-1]
507        if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1
508          return curstate
509        end
510      end
511      goto_default[k1]
512    end
513
514    # This method is called when a parse error is found.
515    #
516    # ERROR_TOKEN_ID is an internal ID of token which caused error.
517    # You can get string representation of this ID by calling
518    # #token_to_str.
519    #
520    # ERROR_VALUE is a value of error token.
521    #
522    # value_stack is a stack of symbol values.
523    # DO NOT MODIFY this object.
524    #
525    # This method raises ParseError by default.
526    #
527    # If this method returns, parsers enter "error recovering mode".
528    def on_error(t, val, vstack)
529      raise ParseError, sprintf("\nparse error on value %s (%s)",
530                                val.inspect, token_to_str(t) || '?')
531    end
532
533    # Enter error recovering mode.
534    # This method does not call #on_error.
535    def yyerror
536      throw :racc_jump, 1
537    end
538
539    # Exit parser.
540    # Return value is Symbol_Value_Stack[0].
541    def yyaccept
542      throw :racc_jump, 2
543    end
544
545    # Leave error recovering mode.
546    def yyerrok
547      @racc_error_status = 0
548    end
549
550    # For debugging output
551    def racc_read_token(t, tok, val)
552      @racc_debug_out.print 'read    '
553      @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') '
554      @racc_debug_out.puts val.inspect
555      @racc_debug_out.puts
556    end
557
558    def racc_shift(tok, tstack, vstack)
559      @racc_debug_out.puts "shift   #{racc_token2str tok}"
560      racc_print_stacks tstack, vstack
561      @racc_debug_out.puts
562    end
563
564    def racc_reduce(toks, sim, tstack, vstack)
565      out = @racc_debug_out
566      out.print 'reduce '
567      if toks.empty?
568        out.print ' <none>'
569      else
570        toks.each {|t| out.print ' ', racc_token2str(t) }
571      end
572      out.puts " --> #{racc_token2str(sim)}"
573
574      racc_print_stacks tstack, vstack
575      @racc_debug_out.puts
576    end
577
578    def racc_accept
579      @racc_debug_out.puts 'accept'
580      @racc_debug_out.puts
581    end
582
583    def racc_e_pop(state, tstack, vstack)
584      @racc_debug_out.puts 'error recovering mode: pop token'
585      racc_print_states state
586      racc_print_stacks tstack, vstack
587      @racc_debug_out.puts
588    end
589
590    def racc_next_state(curstate, state)
591      @racc_debug_out.puts  "goto    #{curstate}"
592      racc_print_states state
593      @racc_debug_out.puts
594    end
595
596    def racc_print_stacks(t, v)
597      out = @racc_debug_out
598      out.print '        ['
599      t.each_index do |i|
600        out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')'
601      end
602      out.puts ' ]'
603    end
604
605    def racc_print_states(s)
606      out = @racc_debug_out
607      out.print '        ['
608      s.each {|st| out.print ' ', st }
609      out.puts ' ]'
610    end
611
612    def racc_token2str(tok)
613      self.class::Racc_token_to_s_table[tok] or
614          raise "[Racc Bug] can't convert token #{tok} to string"
615    end
616
617    # Convert internal ID of token symbol to the string.
618    def token_to_str(t)
619      self.class::Racc_token_to_s_table[t]
620    end
621
622  end
623
624end
625