1#
2#   irb/ruby-lex.rb - ruby lexcal analyzer
3#   	$Release Version: 0.9.6$
4#   	$Revision: 38632 $
5#   	by Keiju ISHITSUKA(keiju@ruby-lang.org)
6#
7# --
8#
9#
10#
11
12require "e2mmap"
13require "irb/slex"
14require "irb/ruby-token"
15
16# :stopdoc:
17class RubyLex
18  @RCS_ID='-$Id: ruby-lex.rb 38632 2012-12-27 14:48:42Z keiju $-'
19
20  extend Exception2MessageMapper
21  def_exception(:AlreadyDefinedToken, "Already defined token(%s)")
22  def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')")
23  def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')")
24  def_exception(:TkReading2TokenDuplicateError,
25		"key duplicate(token_n='%s', key='%s')")
26  def_exception(:SyntaxError, "%s")
27
28  def_exception(:TerminateLineInput, "Terminate Line Input")
29
30  include RubyToken
31
32  class << self
33    attr_accessor :debug_level
34    def debug?
35      @debug_level > 0
36    end
37  end
38  @debug_level = 0
39
40  def initialize
41    lex_init
42    set_input(STDIN)
43
44    @seek = 0
45    @exp_line_no = @line_no = 1
46    @base_char_no = 0
47    @char_no = 0
48    @rests = []
49    @readed = []
50    @here_readed = []
51
52    @indent = 0
53    @indent_stack = []
54    @lex_state = EXPR_BEG
55    @space_seen = false
56    @here_header = false
57    @post_symbeg = false
58
59    @continue = false
60    @line = ""
61
62    @skip_space = false
63    @readed_auto_clean_up = false
64    @exception_on_syntax_error = true
65
66    @prompt = nil
67  end
68
69  attr_accessor :skip_space
70  attr_accessor :readed_auto_clean_up
71  attr_accessor :exception_on_syntax_error
72
73  attr_reader :seek
74  attr_reader :char_no
75  attr_reader :line_no
76  attr_reader :indent
77
78  # io functions
79  def set_input(io, p = nil, &block)
80    @io = io
81    if p.respond_to?(:call)
82      @input = p
83    elsif block_given?
84      @input = block
85    else
86      @input = Proc.new{@io.gets}
87    end
88  end
89
90  def get_readed
91    if idx = @readed.rindex("\n")
92      @base_char_no = @readed.size - (idx + 1)
93    else
94      @base_char_no += @readed.size
95    end
96
97    readed = @readed.join("")
98    @readed = []
99    readed
100  end
101
102  def getc
103    while @rests.empty?
104#      return nil unless buf_input
105      @rests.push nil unless buf_input
106    end
107    c = @rests.shift
108    if @here_header
109      @here_readed.push c
110    else
111      @readed.push c
112    end
113    @seek += 1
114    if c == "\n"
115      @line_no += 1
116      @char_no = 0
117    else
118      @char_no += 1
119    end
120    c
121  end
122
123  def gets
124    l = ""
125    while c = getc
126      l.concat(c)
127      break if c == "\n"
128    end
129    return nil if l == "" and c.nil?
130    l
131  end
132
133  def eof?
134    @io.eof?
135  end
136
137  def getc_of_rests
138    if @rests.empty?
139      nil
140    else
141      getc
142    end
143  end
144
145  def ungetc(c = nil)
146    if @here_readed.empty?
147      c2 = @readed.pop
148    else
149      c2 = @here_readed.pop
150    end
151    c = c2 unless c
152    @rests.unshift c #c =
153    @seek -= 1
154    if c == "\n"
155      @line_no -= 1
156      if idx = @readed.rindex("\n")
157	@char_no = idx + 1
158      else
159	@char_no = @base_char_no + @readed.size
160      end
161    else
162      @char_no -= 1
163    end
164  end
165
166  def peek_equal?(str)
167    chrs = str.split(//)
168    until @rests.size >= chrs.size
169      return false unless buf_input
170    end
171    @rests[0, chrs.size] == chrs
172  end
173
174  def peek_match?(regexp)
175    while @rests.empty?
176      return false unless buf_input
177    end
178    regexp =~ @rests.join("")
179  end
180
181  def peek(i = 0)
182    while @rests.size <= i
183      return nil unless buf_input
184    end
185    @rests[i]
186  end
187
188  def buf_input
189    prompt
190    line = @input.call
191    return nil unless line
192    @rests.concat line.chars.to_a
193    true
194  end
195  private :buf_input
196
197  def set_prompt(p = nil, &block)
198    p = block if block_given?
199    if p.respond_to?(:call)
200      @prompt = p
201    else
202      @prompt = Proc.new{print p}
203    end
204  end
205
206  def prompt
207    if @prompt
208      @prompt.call(@ltype, @indent, @continue, @line_no)
209    end
210  end
211
212  def initialize_input
213    @ltype = nil
214    @quoted = nil
215    @indent = 0
216    @indent_stack = []
217    @lex_state = EXPR_BEG
218    @space_seen = false
219    @here_header = false
220
221    @continue = false
222    @post_symbeg = false
223
224    prompt
225
226    @line = ""
227    @exp_line_no = @line_no
228  end
229
230  def each_top_level_statement
231    initialize_input
232    catch(:TERM_INPUT) do
233      loop do
234	begin
235	  @continue = false
236	  prompt
237	  unless l = lex
238	    throw :TERM_INPUT if @line == ''
239	  else
240	    @line.concat l
241	    if @ltype or @continue or @indent > 0
242	      next
243	    end
244	  end
245	  if @line != "\n"
246            @line.force_encoding(@io.encoding)
247	    yield @line, @exp_line_no
248	  end
249	  break unless l
250	  @line = ''
251	  @exp_line_no = @line_no
252
253	  @indent = 0
254	  @indent_stack = []
255	  prompt
256	rescue TerminateLineInput
257	  initialize_input
258	  prompt
259	  get_readed
260	end
261      end
262    end
263  end
264
265  def lex
266    until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
267	     !@continue or
268	     tk.nil?)
269      #p tk
270      #p @lex_state
271      #p self
272    end
273    line = get_readed
274    #      print self.inspect
275    if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
276      nil
277    else
278      line
279    end
280  end
281
282  def token
283    #      require "tracer"
284    #      Tracer.on
285    @prev_seek = @seek
286    @prev_line_no = @line_no
287    @prev_char_no = @char_no
288    begin
289      begin
290	tk = @OP.match(self)
291	@space_seen = tk.kind_of?(TkSPACE)
292	@lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp)
293	@post_symbeg = tk.kind_of?(TkSYMBEG)
294      rescue SyntaxError
295	raise if @exception_on_syntax_error
296	tk = TkError.new(@seek, @line_no, @char_no)
297      end
298    end while @skip_space and tk.kind_of?(TkSPACE)
299    if @readed_auto_clean_up
300      get_readed
301    end
302    #      Tracer.off
303    tk
304  end
305
306  ENINDENT_CLAUSE = [
307    "case", "class", "def", "do", "for", "if",
308    "module", "unless", "until", "while", "begin" #, "when"
309  ]
310  DEINDENT_CLAUSE = ["end" #, "when"
311  ]
312
313  PERCENT_LTYPE = {
314    "q" => "\'",
315    "Q" => "\"",
316    "x" => "\`",
317    "r" => "/",
318    "w" => "]",
319    "W" => "]",
320    "i" => "]",
321    "I" => "]",
322    "s" => ":"
323  }
324
325  PERCENT_PAREN = {
326    "{" => "}",
327    "[" => "]",
328    "<" => ">",
329    "(" => ")"
330  }
331
332  Ltype2Token = {
333    "\'" => TkSTRING,
334    "\"" => TkSTRING,
335    "\`" => TkXSTRING,
336    "/" => TkREGEXP,
337    "]" => TkDSTRING,
338    ":" => TkSYMBOL
339  }
340  DLtype2Token = {
341    "\"" => TkDSTRING,
342    "\`" => TkDXSTRING,
343    "/" => TkDREGEXP,
344  }
345
346  def lex_init()
347    @OP = IRB::SLex.new
348    @OP.def_rules("\0", "\004", "\032") do |op, io|
349      Token(TkEND_OF_SCRIPT)
350    end
351
352    @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
353      @space_seen = true
354      while getc =~ /[ \t\f\r\13]/; end
355      ungetc
356      Token(TkSPACE)
357    end
358
359    @OP.def_rule("#") do |op, io|
360      identify_comment
361    end
362
363    @OP.def_rule("=begin",
364		 proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
365      |op, io|
366      @ltype = "="
367      until getc == "\n"; end
368      until peek_equal?("=end") && peek(4) =~ /\s/
369	until getc == "\n"; end
370      end
371      gets
372      @ltype = nil
373      Token(TkRD_COMMENT)
374    end
375
376    @OP.def_rule("\n") do |op, io|
377      print "\\n\n" if RubyLex.debug?
378      case @lex_state
379      when EXPR_BEG, EXPR_FNAME, EXPR_DOT
380	@continue = true
381      else
382	@continue = false
383	@lex_state = EXPR_BEG
384	until (@indent_stack.empty? ||
385	       [TkLPAREN, TkLBRACK, TkLBRACE,
386		 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
387	  @indent_stack.pop
388	end
389      end
390      @here_header = false
391      @here_readed = []
392      Token(TkNL)
393    end
394
395    @OP.def_rules("*", "**",
396		  "=", "==", "===",
397		  "=~", "<=>",
398		  "<", "<=",
399		  ">", ">=", ">>",
400		  "!", "!=", "!~") do
401      |op, io|
402      case @lex_state
403      when EXPR_FNAME, EXPR_DOT
404	@lex_state = EXPR_ARG
405      else
406	@lex_state = EXPR_BEG
407      end
408      Token(op)
409    end
410
411    @OP.def_rules("<<") do
412      |op, io|
413      tk = nil
414      if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
415	  (@lex_state != EXPR_ARG || @space_seen)
416	c = peek(0)
417	if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
418	  tk = identify_here_document
419	end
420      end
421      unless tk
422	tk = Token(op)
423	case @lex_state
424	when EXPR_FNAME, EXPR_DOT
425	  @lex_state = EXPR_ARG
426	else
427	  @lex_state = EXPR_BEG
428	end
429      end
430      tk
431    end
432
433    @OP.def_rules("'", '"') do
434      |op, io|
435      identify_string(op)
436    end
437
438    @OP.def_rules("`") do
439      |op, io|
440      if @lex_state == EXPR_FNAME
441	@lex_state = EXPR_END
442	Token(op)
443      else
444	identify_string(op)
445      end
446    end
447
448    @OP.def_rules('?') do
449      |op, io|
450      if @lex_state == EXPR_END
451	@lex_state = EXPR_BEG
452	Token(TkQUESTION)
453      else
454	ch = getc
455	if @lex_state == EXPR_ARG && ch =~ /\s/
456	  ungetc
457	  @lex_state = EXPR_BEG;
458	  Token(TkQUESTION)
459	else
460	  if (ch == '\\')
461	    read_escape
462	  end
463	  @lex_state = EXPR_END
464	  Token(TkINTEGER)
465	end
466      end
467    end
468
469    @OP.def_rules("&", "&&", "|", "||") do
470      |op, io|
471      @lex_state = EXPR_BEG
472      Token(op)
473    end
474
475    @OP.def_rules("+=", "-=", "*=", "**=",
476		  "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
477      |op, io|
478      @lex_state = EXPR_BEG
479      op =~ /^(.*)=$/
480      Token(TkOPASGN, $1)
481    end
482
483    @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
484      |op, io|
485      @lex_state = EXPR_ARG
486      Token(op)
487    end
488
489    @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
490      |op, io|
491      @lex_state = EXPR_ARG
492      Token(op)
493    end
494
495    @OP.def_rules("+", "-") do
496      |op, io|
497      catch(:RET) do
498	if @lex_state == EXPR_ARG
499	  if @space_seen and peek(0) =~ /[0-9]/
500	    throw :RET, identify_number
501	  else
502	    @lex_state = EXPR_BEG
503	  end
504	elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
505	  throw :RET, identify_number
506	else
507	  @lex_state = EXPR_BEG
508	end
509	Token(op)
510      end
511    end
512
513    @OP.def_rule(".") do
514      |op, io|
515      @lex_state = EXPR_BEG
516      if peek(0) =~ /[0-9]/
517	ungetc
518	identify_number
519      else
520	# for "obj.if" etc.
521	@lex_state = EXPR_DOT
522	Token(TkDOT)
523      end
524    end
525
526    @OP.def_rules("..", "...") do
527      |op, io|
528      @lex_state = EXPR_BEG
529      Token(op)
530    end
531
532    lex_int2
533  end
534
535  def lex_int2
536    @OP.def_rules("]", "}", ")") do
537      |op, io|
538      @lex_state = EXPR_END
539      @indent -= 1
540      @indent_stack.pop
541      Token(op)
542    end
543
544    @OP.def_rule(":") do
545      |op, io|
546      if @lex_state == EXPR_END || peek(0) =~ /\s/
547	@lex_state = EXPR_BEG
548	Token(TkCOLON)
549      else
550	@lex_state = EXPR_FNAME
551	Token(TkSYMBEG)
552      end
553    end
554
555    @OP.def_rule("::") do
556       |op, io|
557#      p @lex_state.id2name, @space_seen
558      if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
559	@lex_state = EXPR_BEG
560	Token(TkCOLON3)
561      else
562	@lex_state = EXPR_DOT
563	Token(TkCOLON2)
564      end
565    end
566
567    @OP.def_rule("/") do
568      |op, io|
569      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
570	identify_string(op)
571      elsif peek(0) == '='
572	getc
573	@lex_state = EXPR_BEG
574	Token(TkOPASGN, "/") #/)
575      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
576	identify_string(op)
577      else
578	@lex_state = EXPR_BEG
579	Token("/") #/)
580      end
581    end
582
583    @OP.def_rules("^") do
584      |op, io|
585      @lex_state = EXPR_BEG
586      Token("^")
587    end
588
589    #       @OP.def_rules("^=") do
590    # 	@lex_state = EXPR_BEG
591    # 	Token(OP_ASGN, :^)
592    #       end
593
594    @OP.def_rules(",") do
595      |op, io|
596      @lex_state = EXPR_BEG
597      Token(op)
598    end
599
600    @OP.def_rules(";") do
601      |op, io|
602      @lex_state = EXPR_BEG
603      until (@indent_stack.empty? ||
604	     [TkLPAREN, TkLBRACK, TkLBRACE,
605	       TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
606	@indent_stack.pop
607      end
608      Token(op)
609    end
610
611    @OP.def_rule("~") do
612      |op, io|
613      @lex_state = EXPR_BEG
614      Token("~")
615    end
616
617    @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
618      |op, io|
619      @lex_state = EXPR_BEG
620      Token("~")
621    end
622
623    @OP.def_rule("(") do
624      |op, io|
625      @indent += 1
626      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
627	@lex_state = EXPR_BEG
628	tk_c = TkfLPAREN
629      else
630	@lex_state = EXPR_BEG
631	tk_c = TkLPAREN
632      end
633      @indent_stack.push tk_c
634      Token(tk_c)
635    end
636
637    @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
638      |op, io|
639      @lex_state = EXPR_ARG
640      Token("[]")
641    end
642
643    @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
644      |op, io|
645      @lex_state = EXPR_ARG
646      Token("[]=")
647    end
648
649    @OP.def_rule("[") do
650      |op, io|
651      @indent += 1
652      if @lex_state == EXPR_FNAME
653	tk_c = TkfLBRACK
654      else
655	if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
656	  tk_c = TkLBRACK
657	elsif @lex_state == EXPR_ARG && @space_seen
658	  tk_c = TkLBRACK
659	else
660	  tk_c = TkfLBRACK
661	end
662	@lex_state = EXPR_BEG
663      end
664      @indent_stack.push tk_c
665      Token(tk_c)
666    end
667
668    @OP.def_rule("{") do
669      |op, io|
670      @indent += 1
671      if @lex_state != EXPR_END && @lex_state != EXPR_ARG
672	tk_c = TkLBRACE
673      else
674	tk_c = TkfLBRACE
675      end
676      @lex_state = EXPR_BEG
677      @indent_stack.push tk_c
678      Token(tk_c)
679    end
680
681    @OP.def_rule('\\') do
682      |op, io|
683      if getc == "\n"
684	@space_seen = true
685	@continue = true
686	Token(TkSPACE)
687      else
688	read_escape
689	Token("\\")
690      end
691    end
692
693    @OP.def_rule('%') do
694      |op, io|
695      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
696	identify_quotation
697      elsif peek(0) == '='
698	getc
699	Token(TkOPASGN, :%)
700      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
701	identify_quotation
702      else
703	@lex_state = EXPR_BEG
704	Token("%") #))
705      end
706    end
707
708    @OP.def_rule('$') do
709      |op, io|
710      identify_gvar
711    end
712
713    @OP.def_rule('@') do
714      |op, io|
715      if peek(0) =~ /[\w@]/
716	ungetc
717	identify_identifier
718      else
719	Token("@")
720      end
721    end
722
723    #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
724    # 	|op, io|
725    # 	@indent += 1
726    # 	@lex_state = EXPR_FNAME
727    # #	@lex_state = EXPR_END
728    # #	until @rests[0] == "\n" or @rests[0] == ";"
729    # #	  rests.shift
730    # #	end
731    #       end
732
733    @OP.def_rule("") do
734      |op, io|
735      printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
736      if peek(0) =~ /[0-9]/
737	t = identify_number
738      elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
739	t = identify_identifier
740      end
741      printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
742      t
743    end
744
745    p @OP if RubyLex.debug?
746  end
747
748  def identify_gvar
749    @lex_state = EXPR_END
750
751    case ch = getc
752    when /[~_*$?!@\/\\;,=:<>".]/   #"
753      Token(TkGVAR, "$" + ch)
754    when "-"
755      Token(TkGVAR, "$-" + getc)
756    when "&", "`", "'", "+"
757      Token(TkBACK_REF, "$"+ch)
758    when /[1-9]/
759      while getc =~ /[0-9]/; end
760      ungetc
761      Token(TkNTH_REF)
762    when /\w/
763      ungetc
764      ungetc
765      identify_identifier
766    else
767      ungetc
768      Token("$")
769    end
770  end
771
772  def identify_identifier
773    token = ""
774    if peek(0) =~ /[$@]/
775      token.concat(c = getc)
776      if c == "@" and peek(0) == "@"
777	token.concat getc
778      end
779    end
780
781    while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
782      print ":", ch, ":" if RubyLex.debug?
783      token.concat ch
784    end
785    ungetc
786
787    if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
788      token.concat getc
789    end
790
791    # almost fix token
792
793    case token
794    when /^\$/
795      return Token(TkGVAR, token)
796    when /^\@\@/
797      @lex_state = EXPR_END
798      # p Token(TkCVAR, token)
799      return Token(TkCVAR, token)
800    when /^\@/
801      @lex_state = EXPR_END
802      return Token(TkIVAR, token)
803    end
804
805    if @lex_state != EXPR_DOT
806      print token, "\n" if RubyLex.debug?
807
808      token_c, *trans = TkReading2Token[token]
809      if token_c
810	# reserved word?
811
812	if (@lex_state != EXPR_BEG &&
813	    @lex_state != EXPR_FNAME &&
814	    trans[1])
815	  # modifiers
816	  token_c = TkSymbol2Token[trans[1]]
817	  @lex_state = trans[0]
818	else
819	  if @lex_state != EXPR_FNAME
820	    if ENINDENT_CLAUSE.include?(token)
821	      # check for ``class = val'' etc.
822	      valid = true
823	      case token
824	      when "class"
825		valid = false unless peek_match?(/^\s*(<<|\w|::)/)
826	      when "def"
827		valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
828	      when "do"
829		valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
830	      when *ENINDENT_CLAUSE
831		valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
832	      else
833		# no nothing
834	      end
835	      if valid
836		if token == "do"
837		  if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
838		    @indent += 1
839		    @indent_stack.push token_c
840		  end
841		else
842		  @indent += 1
843		  @indent_stack.push token_c
844		end
845#		p @indent_stack
846	      end
847
848	    elsif DEINDENT_CLAUSE.include?(token)
849	      @indent -= 1
850	      @indent_stack.pop
851	    end
852	    @lex_state = trans[0]
853	  else
854	    @lex_state = EXPR_END
855	  end
856	end
857	return Token(token_c, token)
858      end
859    end
860
861    if @lex_state == EXPR_FNAME
862      @lex_state = EXPR_END
863      if peek(0) == '='
864	token.concat getc
865      end
866    elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
867      @lex_state = EXPR_ARG
868    else
869      @lex_state = EXPR_END
870    end
871
872    if token[0, 1] =~ /[A-Z]/
873      return Token(TkCONSTANT, token)
874    elsif token[token.size - 1, 1] =~ /[!?]/
875      return Token(TkFID, token)
876    else
877      return Token(TkIDENTIFIER, token)
878    end
879  end
880
881  def identify_here_document
882    ch = getc
883#    if lt = PERCENT_LTYPE[ch]
884    if ch == "-"
885      ch = getc
886      indent = true
887    end
888    if /['"`]/ =~ ch
889      lt = ch
890      quoted = ""
891      while (c = getc) && c != lt
892	quoted.concat c
893      end
894    else
895      lt = '"'
896      quoted = ch.dup
897      while (c = getc) && c =~ /\w/
898	quoted.concat c
899      end
900      ungetc
901    end
902
903    ltback, @ltype = @ltype, lt
904    reserve = []
905    while ch = getc
906      reserve.push ch
907      if ch == "\\"
908	reserve.push ch = getc
909      elsif ch == "\n"
910	break
911      end
912    end
913
914    @here_header = false
915#     while l = gets
916#       l = l.sub(/(:?\r)?\n\z/, '')
917#       if (indent ? l.strip : l) == quoted
918#  	break
919#       end
920#     end
921
922    line = ""
923    while ch = getc
924      if ch == "\n"
925	if line == quoted
926	  break
927	end
928	line = ""
929      else
930	line.concat ch unless indent && line == "" && /\s/ =~ ch
931	if @ltype != "'" && ch == "#" && peek(0) == "{"
932	  identify_string_dvar
933	end
934      end
935    end
936
937    @here_header = true
938    @here_readed.concat reserve
939    while ch = reserve.pop
940      ungetc ch
941    end
942
943    @ltype = ltback
944    @lex_state = EXPR_END
945    Token(Ltype2Token[lt])
946  end
947
948  def identify_quotation
949    ch = getc
950    if lt = PERCENT_LTYPE[ch]
951      ch = getc
952    elsif ch =~ /\W/
953      lt = "\""
954    else
955      RubyLex.fail SyntaxError, "unknown type of %string"
956    end
957#     if ch !~ /\W/
958#       ungetc
959#       next
960#     end
961    #@ltype = lt
962    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
963    identify_string(lt, @quoted)
964  end
965
966  def identify_number
967    @lex_state = EXPR_END
968
969    if peek(0) == "0" && peek(1) !~ /[.eE]/
970      getc
971      case peek(0)
972      when /[xX]/
973	ch = getc
974	match = /[0-9a-fA-F_]/
975      when /[bB]/
976	ch = getc
977	match = /[01_]/
978      when /[oO]/
979	ch = getc
980	match = /[0-7_]/
981      when /[dD]/
982	ch = getc
983	match = /[0-9_]/
984      when /[0-7]/
985	match = /[0-7_]/
986      when /[89]/
987	RubyLex.fail SyntaxError, "Invalid octal digit"
988      else
989	return Token(TkINTEGER)
990      end
991
992      len0 = true
993      non_digit = false
994      while ch = getc
995	if match =~ ch
996	  if ch == "_"
997	    if non_digit
998	      RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
999	    else
1000	      non_digit = ch
1001	    end
1002	  else
1003	    non_digit = false
1004	    len0 = false
1005	  end
1006	else
1007	  ungetc
1008	  if len0
1009	    RubyLex.fail SyntaxError, "numeric literal without digits"
1010	  end
1011	  if non_digit
1012	    RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1013	  end
1014	  break
1015	end
1016      end
1017      return Token(TkINTEGER)
1018    end
1019
1020    type = TkINTEGER
1021    allow_point = true
1022    allow_e = true
1023    non_digit = false
1024    while ch = getc
1025      case ch
1026      when /[0-9]/
1027	non_digit = false
1028      when "_"
1029	non_digit = ch
1030      when allow_point && "."
1031	if non_digit
1032	  RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1033	end
1034	type = TkFLOAT
1035	if peek(0) !~ /[0-9]/
1036	  type = TkINTEGER
1037	  ungetc
1038	  break
1039	end
1040	allow_point = false
1041      when allow_e && "e", allow_e && "E"
1042	if non_digit
1043	  RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1044	end
1045	type = TkFLOAT
1046	if peek(0) =~ /[+-]/
1047	  getc
1048	end
1049	allow_e = false
1050	allow_point = false
1051	non_digit = ch
1052      else
1053	if non_digit
1054	  RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
1055	end
1056	ungetc
1057	break
1058      end
1059    end
1060    Token(type)
1061  end
1062
1063  def identify_string(ltype, quoted = ltype)
1064    @ltype = ltype
1065    @quoted = quoted
1066    subtype = nil
1067    begin
1068      nest = 0
1069      while ch = getc
1070	if @quoted == ch and nest == 0
1071	  break
1072	elsif @ltype != "'" && ch == "#" && peek(0) == "{"
1073	  identify_string_dvar
1074	elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
1075	  subtype = true
1076	elsif ch == '\\' and @ltype == "'" #'
1077	  case ch = getc
1078	  when "\\", "\n", "'"
1079	  else
1080	    ungetc
1081	  end
1082	elsif ch == '\\' #'
1083	  read_escape
1084	end
1085	if PERCENT_PAREN.values.include?(@quoted)
1086	  if PERCENT_PAREN[ch] == @quoted
1087	    nest += 1
1088	  elsif ch == @quoted
1089	    nest -= 1
1090	  end
1091	end
1092      end
1093      if @ltype == "/"
1094        while /[imxoesun]/ =~ peek(0)
1095	  getc
1096	end
1097      end
1098      if subtype
1099	Token(DLtype2Token[ltype])
1100      else
1101	Token(Ltype2Token[ltype])
1102      end
1103    ensure
1104      @ltype = nil
1105      @quoted = nil
1106      @lex_state = EXPR_END
1107    end
1108  end
1109
1110  def identify_string_dvar
1111    begin
1112      getc
1113
1114      reserve_continue = @continue
1115      reserve_ltype = @ltype
1116      reserve_indent = @indent
1117      reserve_indent_stack = @indent_stack
1118      reserve_state = @lex_state
1119      reserve_quoted = @quoted
1120
1121      @ltype = nil
1122      @quoted = nil
1123      @indent = 0
1124      @indent_stack = []
1125      @lex_state = EXPR_BEG
1126
1127      loop do
1128	@continue = false
1129	prompt
1130	tk = token
1131	if @ltype or @continue or @indent > 0
1132	  next
1133	end
1134	break if tk.kind_of?(TkRBRACE)
1135      end
1136    ensure
1137      @continue = reserve_continue
1138      @ltype = reserve_ltype
1139      @indent = reserve_indent
1140      @indent_stack = reserve_indent_stack
1141      @lex_state = reserve_state
1142      @quoted = reserve_quoted
1143    end
1144  end
1145
1146  def identify_comment
1147    @ltype = "#"
1148
1149    while ch = getc
1150#      if ch == "\\" #"
1151#	read_escape
1152#      end
1153      if ch == "\n"
1154	@ltype = nil
1155	ungetc
1156	break
1157      end
1158    end
1159    return Token(TkCOMMENT)
1160  end
1161
1162  def read_escape
1163    case ch = getc
1164    when "\n", "\r", "\f"
1165    when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
1166    when /[0-7]/
1167      ungetc ch
1168      3.times do
1169	case ch = getc
1170	when /[0-7]/
1171	when nil
1172	  break
1173	else
1174	  ungetc
1175	  break
1176	end
1177      end
1178
1179    when "x"
1180      2.times do
1181	case ch = getc
1182	when /[0-9a-fA-F]/
1183	when nil
1184	  break
1185	else
1186	  ungetc
1187	  break
1188	end
1189      end
1190
1191    when "M"
1192      if (ch = getc) != '-'
1193	ungetc
1194      else
1195	if (ch = getc) == "\\" #"
1196	  read_escape
1197	end
1198      end
1199
1200    when "C", "c" #, "^"
1201      if ch == "C" and (ch = getc) != "-"
1202	ungetc
1203      elsif (ch = getc) == "\\" #"
1204	read_escape
1205      end
1206    else
1207      # other characters
1208    end
1209  end
1210end
1211# :startdoc:
1212