1# 2# irb/ruby-lex.rb - ruby lexcal analyzer 3# $Release Version: 0.9.6$ 4# $Revision: 38632 $ 5# by Keiju ISHITSUKA(keiju@ruby-lang.org) 6# 7# -- 8# 9# 10# 11 12require "e2mmap" 13require "irb/slex" 14require "irb/ruby-token" 15 16# :stopdoc: 17class RubyLex 18 @RCS_ID='-$Id: ruby-lex.rb 38632 2012-12-27 14:48:42Z keiju $-' 19 20 extend Exception2MessageMapper 21 def_exception(:AlreadyDefinedToken, "Already defined token(%s)") 22 def_exception(:TkReading2TokenNoKey, "key nothing(key='%s')") 23 def_exception(:TkSymbol2TokenNoKey, "key nothing(key='%s')") 24 def_exception(:TkReading2TokenDuplicateError, 25 "key duplicate(token_n='%s', key='%s')") 26 def_exception(:SyntaxError, "%s") 27 28 def_exception(:TerminateLineInput, "Terminate Line Input") 29 30 include RubyToken 31 32 class << self 33 attr_accessor :debug_level 34 def debug? 35 @debug_level > 0 36 end 37 end 38 @debug_level = 0 39 40 def initialize 41 lex_init 42 set_input(STDIN) 43 44 @seek = 0 45 @exp_line_no = @line_no = 1 46 @base_char_no = 0 47 @char_no = 0 48 @rests = [] 49 @readed = [] 50 @here_readed = [] 51 52 @indent = 0 53 @indent_stack = [] 54 @lex_state = EXPR_BEG 55 @space_seen = false 56 @here_header = false 57 @post_symbeg = false 58 59 @continue = false 60 @line = "" 61 62 @skip_space = false 63 @readed_auto_clean_up = false 64 @exception_on_syntax_error = true 65 66 @prompt = nil 67 end 68 69 attr_accessor :skip_space 70 attr_accessor :readed_auto_clean_up 71 attr_accessor :exception_on_syntax_error 72 73 attr_reader :seek 74 attr_reader :char_no 75 attr_reader :line_no 76 attr_reader :indent 77 78 # io functions 79 def set_input(io, p = nil, &block) 80 @io = io 81 if p.respond_to?(:call) 82 @input = p 83 elsif block_given? 84 @input = block 85 else 86 @input = Proc.new{@io.gets} 87 end 88 end 89 90 def get_readed 91 if idx = @readed.rindex("\n") 92 @base_char_no = @readed.size - (idx + 1) 93 else 94 @base_char_no += @readed.size 95 end 96 97 readed = @readed.join("") 98 @readed = [] 99 readed 100 end 101 102 def getc 103 while @rests.empty? 104# return nil unless buf_input 105 @rests.push nil unless buf_input 106 end 107 c = @rests.shift 108 if @here_header 109 @here_readed.push c 110 else 111 @readed.push c 112 end 113 @seek += 1 114 if c == "\n" 115 @line_no += 1 116 @char_no = 0 117 else 118 @char_no += 1 119 end 120 c 121 end 122 123 def gets 124 l = "" 125 while c = getc 126 l.concat(c) 127 break if c == "\n" 128 end 129 return nil if l == "" and c.nil? 130 l 131 end 132 133 def eof? 134 @io.eof? 135 end 136 137 def getc_of_rests 138 if @rests.empty? 139 nil 140 else 141 getc 142 end 143 end 144 145 def ungetc(c = nil) 146 if @here_readed.empty? 147 c2 = @readed.pop 148 else 149 c2 = @here_readed.pop 150 end 151 c = c2 unless c 152 @rests.unshift c #c = 153 @seek -= 1 154 if c == "\n" 155 @line_no -= 1 156 if idx = @readed.rindex("\n") 157 @char_no = idx + 1 158 else 159 @char_no = @base_char_no + @readed.size 160 end 161 else 162 @char_no -= 1 163 end 164 end 165 166 def peek_equal?(str) 167 chrs = str.split(//) 168 until @rests.size >= chrs.size 169 return false unless buf_input 170 end 171 @rests[0, chrs.size] == chrs 172 end 173 174 def peek_match?(regexp) 175 while @rests.empty? 176 return false unless buf_input 177 end 178 regexp =~ @rests.join("") 179 end 180 181 def peek(i = 0) 182 while @rests.size <= i 183 return nil unless buf_input 184 end 185 @rests[i] 186 end 187 188 def buf_input 189 prompt 190 line = @input.call 191 return nil unless line 192 @rests.concat line.chars.to_a 193 true 194 end 195 private :buf_input 196 197 def set_prompt(p = nil, &block) 198 p = block if block_given? 199 if p.respond_to?(:call) 200 @prompt = p 201 else 202 @prompt = Proc.new{print p} 203 end 204 end 205 206 def prompt 207 if @prompt 208 @prompt.call(@ltype, @indent, @continue, @line_no) 209 end 210 end 211 212 def initialize_input 213 @ltype = nil 214 @quoted = nil 215 @indent = 0 216 @indent_stack = [] 217 @lex_state = EXPR_BEG 218 @space_seen = false 219 @here_header = false 220 221 @continue = false 222 @post_symbeg = false 223 224 prompt 225 226 @line = "" 227 @exp_line_no = @line_no 228 end 229 230 def each_top_level_statement 231 initialize_input 232 catch(:TERM_INPUT) do 233 loop do 234 begin 235 @continue = false 236 prompt 237 unless l = lex 238 throw :TERM_INPUT if @line == '' 239 else 240 @line.concat l 241 if @ltype or @continue or @indent > 0 242 next 243 end 244 end 245 if @line != "\n" 246 @line.force_encoding(@io.encoding) 247 yield @line, @exp_line_no 248 end 249 break unless l 250 @line = '' 251 @exp_line_no = @line_no 252 253 @indent = 0 254 @indent_stack = [] 255 prompt 256 rescue TerminateLineInput 257 initialize_input 258 prompt 259 get_readed 260 end 261 end 262 end 263 end 264 265 def lex 266 until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) && 267 !@continue or 268 tk.nil?) 269 #p tk 270 #p @lex_state 271 #p self 272 end 273 line = get_readed 274 # print self.inspect 275 if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil? 276 nil 277 else 278 line 279 end 280 end 281 282 def token 283 # require "tracer" 284 # Tracer.on 285 @prev_seek = @seek 286 @prev_line_no = @line_no 287 @prev_char_no = @char_no 288 begin 289 begin 290 tk = @OP.match(self) 291 @space_seen = tk.kind_of?(TkSPACE) 292 @lex_state = EXPR_END if @post_symbeg && tk.kind_of?(TkOp) 293 @post_symbeg = tk.kind_of?(TkSYMBEG) 294 rescue SyntaxError 295 raise if @exception_on_syntax_error 296 tk = TkError.new(@seek, @line_no, @char_no) 297 end 298 end while @skip_space and tk.kind_of?(TkSPACE) 299 if @readed_auto_clean_up 300 get_readed 301 end 302 # Tracer.off 303 tk 304 end 305 306 ENINDENT_CLAUSE = [ 307 "case", "class", "def", "do", "for", "if", 308 "module", "unless", "until", "while", "begin" #, "when" 309 ] 310 DEINDENT_CLAUSE = ["end" #, "when" 311 ] 312 313 PERCENT_LTYPE = { 314 "q" => "\'", 315 "Q" => "\"", 316 "x" => "\`", 317 "r" => "/", 318 "w" => "]", 319 "W" => "]", 320 "i" => "]", 321 "I" => "]", 322 "s" => ":" 323 } 324 325 PERCENT_PAREN = { 326 "{" => "}", 327 "[" => "]", 328 "<" => ">", 329 "(" => ")" 330 } 331 332 Ltype2Token = { 333 "\'" => TkSTRING, 334 "\"" => TkSTRING, 335 "\`" => TkXSTRING, 336 "/" => TkREGEXP, 337 "]" => TkDSTRING, 338 ":" => TkSYMBOL 339 } 340 DLtype2Token = { 341 "\"" => TkDSTRING, 342 "\`" => TkDXSTRING, 343 "/" => TkDREGEXP, 344 } 345 346 def lex_init() 347 @OP = IRB::SLex.new 348 @OP.def_rules("\0", "\004", "\032") do |op, io| 349 Token(TkEND_OF_SCRIPT) 350 end 351 352 @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io| 353 @space_seen = true 354 while getc =~ /[ \t\f\r\13]/; end 355 ungetc 356 Token(TkSPACE) 357 end 358 359 @OP.def_rule("#") do |op, io| 360 identify_comment 361 end 362 363 @OP.def_rule("=begin", 364 proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do 365 |op, io| 366 @ltype = "=" 367 until getc == "\n"; end 368 until peek_equal?("=end") && peek(4) =~ /\s/ 369 until getc == "\n"; end 370 end 371 gets 372 @ltype = nil 373 Token(TkRD_COMMENT) 374 end 375 376 @OP.def_rule("\n") do |op, io| 377 print "\\n\n" if RubyLex.debug? 378 case @lex_state 379 when EXPR_BEG, EXPR_FNAME, EXPR_DOT 380 @continue = true 381 else 382 @continue = false 383 @lex_state = EXPR_BEG 384 until (@indent_stack.empty? || 385 [TkLPAREN, TkLBRACK, TkLBRACE, 386 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) 387 @indent_stack.pop 388 end 389 end 390 @here_header = false 391 @here_readed = [] 392 Token(TkNL) 393 end 394 395 @OP.def_rules("*", "**", 396 "=", "==", "===", 397 "=~", "<=>", 398 "<", "<=", 399 ">", ">=", ">>", 400 "!", "!=", "!~") do 401 |op, io| 402 case @lex_state 403 when EXPR_FNAME, EXPR_DOT 404 @lex_state = EXPR_ARG 405 else 406 @lex_state = EXPR_BEG 407 end 408 Token(op) 409 end 410 411 @OP.def_rules("<<") do 412 |op, io| 413 tk = nil 414 if @lex_state != EXPR_END && @lex_state != EXPR_CLASS && 415 (@lex_state != EXPR_ARG || @space_seen) 416 c = peek(0) 417 if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-") 418 tk = identify_here_document 419 end 420 end 421 unless tk 422 tk = Token(op) 423 case @lex_state 424 when EXPR_FNAME, EXPR_DOT 425 @lex_state = EXPR_ARG 426 else 427 @lex_state = EXPR_BEG 428 end 429 end 430 tk 431 end 432 433 @OP.def_rules("'", '"') do 434 |op, io| 435 identify_string(op) 436 end 437 438 @OP.def_rules("`") do 439 |op, io| 440 if @lex_state == EXPR_FNAME 441 @lex_state = EXPR_END 442 Token(op) 443 else 444 identify_string(op) 445 end 446 end 447 448 @OP.def_rules('?') do 449 |op, io| 450 if @lex_state == EXPR_END 451 @lex_state = EXPR_BEG 452 Token(TkQUESTION) 453 else 454 ch = getc 455 if @lex_state == EXPR_ARG && ch =~ /\s/ 456 ungetc 457 @lex_state = EXPR_BEG; 458 Token(TkQUESTION) 459 else 460 if (ch == '\\') 461 read_escape 462 end 463 @lex_state = EXPR_END 464 Token(TkINTEGER) 465 end 466 end 467 end 468 469 @OP.def_rules("&", "&&", "|", "||") do 470 |op, io| 471 @lex_state = EXPR_BEG 472 Token(op) 473 end 474 475 @OP.def_rules("+=", "-=", "*=", "**=", 476 "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do 477 |op, io| 478 @lex_state = EXPR_BEG 479 op =~ /^(.*)=$/ 480 Token(TkOPASGN, $1) 481 end 482 483 @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do 484 |op, io| 485 @lex_state = EXPR_ARG 486 Token(op) 487 end 488 489 @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do 490 |op, io| 491 @lex_state = EXPR_ARG 492 Token(op) 493 end 494 495 @OP.def_rules("+", "-") do 496 |op, io| 497 catch(:RET) do 498 if @lex_state == EXPR_ARG 499 if @space_seen and peek(0) =~ /[0-9]/ 500 throw :RET, identify_number 501 else 502 @lex_state = EXPR_BEG 503 end 504 elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/ 505 throw :RET, identify_number 506 else 507 @lex_state = EXPR_BEG 508 end 509 Token(op) 510 end 511 end 512 513 @OP.def_rule(".") do 514 |op, io| 515 @lex_state = EXPR_BEG 516 if peek(0) =~ /[0-9]/ 517 ungetc 518 identify_number 519 else 520 # for "obj.if" etc. 521 @lex_state = EXPR_DOT 522 Token(TkDOT) 523 end 524 end 525 526 @OP.def_rules("..", "...") do 527 |op, io| 528 @lex_state = EXPR_BEG 529 Token(op) 530 end 531 532 lex_int2 533 end 534 535 def lex_int2 536 @OP.def_rules("]", "}", ")") do 537 |op, io| 538 @lex_state = EXPR_END 539 @indent -= 1 540 @indent_stack.pop 541 Token(op) 542 end 543 544 @OP.def_rule(":") do 545 |op, io| 546 if @lex_state == EXPR_END || peek(0) =~ /\s/ 547 @lex_state = EXPR_BEG 548 Token(TkCOLON) 549 else 550 @lex_state = EXPR_FNAME 551 Token(TkSYMBEG) 552 end 553 end 554 555 @OP.def_rule("::") do 556 |op, io| 557# p @lex_state.id2name, @space_seen 558 if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen 559 @lex_state = EXPR_BEG 560 Token(TkCOLON3) 561 else 562 @lex_state = EXPR_DOT 563 Token(TkCOLON2) 564 end 565 end 566 567 @OP.def_rule("/") do 568 |op, io| 569 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID 570 identify_string(op) 571 elsif peek(0) == '=' 572 getc 573 @lex_state = EXPR_BEG 574 Token(TkOPASGN, "/") #/) 575 elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ 576 identify_string(op) 577 else 578 @lex_state = EXPR_BEG 579 Token("/") #/) 580 end 581 end 582 583 @OP.def_rules("^") do 584 |op, io| 585 @lex_state = EXPR_BEG 586 Token("^") 587 end 588 589 # @OP.def_rules("^=") do 590 # @lex_state = EXPR_BEG 591 # Token(OP_ASGN, :^) 592 # end 593 594 @OP.def_rules(",") do 595 |op, io| 596 @lex_state = EXPR_BEG 597 Token(op) 598 end 599 600 @OP.def_rules(";") do 601 |op, io| 602 @lex_state = EXPR_BEG 603 until (@indent_stack.empty? || 604 [TkLPAREN, TkLBRACK, TkLBRACE, 605 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last)) 606 @indent_stack.pop 607 end 608 Token(op) 609 end 610 611 @OP.def_rule("~") do 612 |op, io| 613 @lex_state = EXPR_BEG 614 Token("~") 615 end 616 617 @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do 618 |op, io| 619 @lex_state = EXPR_BEG 620 Token("~") 621 end 622 623 @OP.def_rule("(") do 624 |op, io| 625 @indent += 1 626 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID 627 @lex_state = EXPR_BEG 628 tk_c = TkfLPAREN 629 else 630 @lex_state = EXPR_BEG 631 tk_c = TkLPAREN 632 end 633 @indent_stack.push tk_c 634 Token(tk_c) 635 end 636 637 @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do 638 |op, io| 639 @lex_state = EXPR_ARG 640 Token("[]") 641 end 642 643 @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do 644 |op, io| 645 @lex_state = EXPR_ARG 646 Token("[]=") 647 end 648 649 @OP.def_rule("[") do 650 |op, io| 651 @indent += 1 652 if @lex_state == EXPR_FNAME 653 tk_c = TkfLBRACK 654 else 655 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID 656 tk_c = TkLBRACK 657 elsif @lex_state == EXPR_ARG && @space_seen 658 tk_c = TkLBRACK 659 else 660 tk_c = TkfLBRACK 661 end 662 @lex_state = EXPR_BEG 663 end 664 @indent_stack.push tk_c 665 Token(tk_c) 666 end 667 668 @OP.def_rule("{") do 669 |op, io| 670 @indent += 1 671 if @lex_state != EXPR_END && @lex_state != EXPR_ARG 672 tk_c = TkLBRACE 673 else 674 tk_c = TkfLBRACE 675 end 676 @lex_state = EXPR_BEG 677 @indent_stack.push tk_c 678 Token(tk_c) 679 end 680 681 @OP.def_rule('\\') do 682 |op, io| 683 if getc == "\n" 684 @space_seen = true 685 @continue = true 686 Token(TkSPACE) 687 else 688 read_escape 689 Token("\\") 690 end 691 end 692 693 @OP.def_rule('%') do 694 |op, io| 695 if @lex_state == EXPR_BEG || @lex_state == EXPR_MID 696 identify_quotation 697 elsif peek(0) == '=' 698 getc 699 Token(TkOPASGN, :%) 700 elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/ 701 identify_quotation 702 else 703 @lex_state = EXPR_BEG 704 Token("%") #)) 705 end 706 end 707 708 @OP.def_rule('$') do 709 |op, io| 710 identify_gvar 711 end 712 713 @OP.def_rule('@') do 714 |op, io| 715 if peek(0) =~ /[\w@]/ 716 ungetc 717 identify_identifier 718 else 719 Token("@") 720 end 721 end 722 723 # @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do 724 # |op, io| 725 # @indent += 1 726 # @lex_state = EXPR_FNAME 727 # # @lex_state = EXPR_END 728 # # until @rests[0] == "\n" or @rests[0] == ";" 729 # # rests.shift 730 # # end 731 # end 732 733 @OP.def_rule("") do 734 |op, io| 735 printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug? 736 if peek(0) =~ /[0-9]/ 737 t = identify_number 738 elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/ 739 t = identify_identifier 740 end 741 printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug? 742 t 743 end 744 745 p @OP if RubyLex.debug? 746 end 747 748 def identify_gvar 749 @lex_state = EXPR_END 750 751 case ch = getc 752 when /[~_*$?!@\/\\;,=:<>".]/ #" 753 Token(TkGVAR, "$" + ch) 754 when "-" 755 Token(TkGVAR, "$-" + getc) 756 when "&", "`", "'", "+" 757 Token(TkBACK_REF, "$"+ch) 758 when /[1-9]/ 759 while getc =~ /[0-9]/; end 760 ungetc 761 Token(TkNTH_REF) 762 when /\w/ 763 ungetc 764 ungetc 765 identify_identifier 766 else 767 ungetc 768 Token("$") 769 end 770 end 771 772 def identify_identifier 773 token = "" 774 if peek(0) =~ /[$@]/ 775 token.concat(c = getc) 776 if c == "@" and peek(0) == "@" 777 token.concat getc 778 end 779 end 780 781 while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/ 782 print ":", ch, ":" if RubyLex.debug? 783 token.concat ch 784 end 785 ungetc 786 787 if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "=" 788 token.concat getc 789 end 790 791 # almost fix token 792 793 case token 794 when /^\$/ 795 return Token(TkGVAR, token) 796 when /^\@\@/ 797 @lex_state = EXPR_END 798 # p Token(TkCVAR, token) 799 return Token(TkCVAR, token) 800 when /^\@/ 801 @lex_state = EXPR_END 802 return Token(TkIVAR, token) 803 end 804 805 if @lex_state != EXPR_DOT 806 print token, "\n" if RubyLex.debug? 807 808 token_c, *trans = TkReading2Token[token] 809 if token_c 810 # reserved word? 811 812 if (@lex_state != EXPR_BEG && 813 @lex_state != EXPR_FNAME && 814 trans[1]) 815 # modifiers 816 token_c = TkSymbol2Token[trans[1]] 817 @lex_state = trans[0] 818 else 819 if @lex_state != EXPR_FNAME 820 if ENINDENT_CLAUSE.include?(token) 821 # check for ``class = val'' etc. 822 valid = true 823 case token 824 when "class" 825 valid = false unless peek_match?(/^\s*(<<|\w|::)/) 826 when "def" 827 valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/) 828 when "do" 829 valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/) 830 when *ENINDENT_CLAUSE 831 valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/) 832 else 833 # no nothing 834 end 835 if valid 836 if token == "do" 837 if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last) 838 @indent += 1 839 @indent_stack.push token_c 840 end 841 else 842 @indent += 1 843 @indent_stack.push token_c 844 end 845# p @indent_stack 846 end 847 848 elsif DEINDENT_CLAUSE.include?(token) 849 @indent -= 1 850 @indent_stack.pop 851 end 852 @lex_state = trans[0] 853 else 854 @lex_state = EXPR_END 855 end 856 end 857 return Token(token_c, token) 858 end 859 end 860 861 if @lex_state == EXPR_FNAME 862 @lex_state = EXPR_END 863 if peek(0) == '=' 864 token.concat getc 865 end 866 elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT 867 @lex_state = EXPR_ARG 868 else 869 @lex_state = EXPR_END 870 end 871 872 if token[0, 1] =~ /[A-Z]/ 873 return Token(TkCONSTANT, token) 874 elsif token[token.size - 1, 1] =~ /[!?]/ 875 return Token(TkFID, token) 876 else 877 return Token(TkIDENTIFIER, token) 878 end 879 end 880 881 def identify_here_document 882 ch = getc 883# if lt = PERCENT_LTYPE[ch] 884 if ch == "-" 885 ch = getc 886 indent = true 887 end 888 if /['"`]/ =~ ch 889 lt = ch 890 quoted = "" 891 while (c = getc) && c != lt 892 quoted.concat c 893 end 894 else 895 lt = '"' 896 quoted = ch.dup 897 while (c = getc) && c =~ /\w/ 898 quoted.concat c 899 end 900 ungetc 901 end 902 903 ltback, @ltype = @ltype, lt 904 reserve = [] 905 while ch = getc 906 reserve.push ch 907 if ch == "\\" 908 reserve.push ch = getc 909 elsif ch == "\n" 910 break 911 end 912 end 913 914 @here_header = false 915# while l = gets 916# l = l.sub(/(:?\r)?\n\z/, '') 917# if (indent ? l.strip : l) == quoted 918# break 919# end 920# end 921 922 line = "" 923 while ch = getc 924 if ch == "\n" 925 if line == quoted 926 break 927 end 928 line = "" 929 else 930 line.concat ch unless indent && line == "" && /\s/ =~ ch 931 if @ltype != "'" && ch == "#" && peek(0) == "{" 932 identify_string_dvar 933 end 934 end 935 end 936 937 @here_header = true 938 @here_readed.concat reserve 939 while ch = reserve.pop 940 ungetc ch 941 end 942 943 @ltype = ltback 944 @lex_state = EXPR_END 945 Token(Ltype2Token[lt]) 946 end 947 948 def identify_quotation 949 ch = getc 950 if lt = PERCENT_LTYPE[ch] 951 ch = getc 952 elsif ch =~ /\W/ 953 lt = "\"" 954 else 955 RubyLex.fail SyntaxError, "unknown type of %string" 956 end 957# if ch !~ /\W/ 958# ungetc 959# next 960# end 961 #@ltype = lt 962 @quoted = ch unless @quoted = PERCENT_PAREN[ch] 963 identify_string(lt, @quoted) 964 end 965 966 def identify_number 967 @lex_state = EXPR_END 968 969 if peek(0) == "0" && peek(1) !~ /[.eE]/ 970 getc 971 case peek(0) 972 when /[xX]/ 973 ch = getc 974 match = /[0-9a-fA-F_]/ 975 when /[bB]/ 976 ch = getc 977 match = /[01_]/ 978 when /[oO]/ 979 ch = getc 980 match = /[0-7_]/ 981 when /[dD]/ 982 ch = getc 983 match = /[0-9_]/ 984 when /[0-7]/ 985 match = /[0-7_]/ 986 when /[89]/ 987 RubyLex.fail SyntaxError, "Invalid octal digit" 988 else 989 return Token(TkINTEGER) 990 end 991 992 len0 = true 993 non_digit = false 994 while ch = getc 995 if match =~ ch 996 if ch == "_" 997 if non_digit 998 RubyLex.fail SyntaxError, "trailing `#{ch}' in number" 999 else 1000 non_digit = ch 1001 end 1002 else 1003 non_digit = false 1004 len0 = false 1005 end 1006 else 1007 ungetc 1008 if len0 1009 RubyLex.fail SyntaxError, "numeric literal without digits" 1010 end 1011 if non_digit 1012 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" 1013 end 1014 break 1015 end 1016 end 1017 return Token(TkINTEGER) 1018 end 1019 1020 type = TkINTEGER 1021 allow_point = true 1022 allow_e = true 1023 non_digit = false 1024 while ch = getc 1025 case ch 1026 when /[0-9]/ 1027 non_digit = false 1028 when "_" 1029 non_digit = ch 1030 when allow_point && "." 1031 if non_digit 1032 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" 1033 end 1034 type = TkFLOAT 1035 if peek(0) !~ /[0-9]/ 1036 type = TkINTEGER 1037 ungetc 1038 break 1039 end 1040 allow_point = false 1041 when allow_e && "e", allow_e && "E" 1042 if non_digit 1043 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" 1044 end 1045 type = TkFLOAT 1046 if peek(0) =~ /[+-]/ 1047 getc 1048 end 1049 allow_e = false 1050 allow_point = false 1051 non_digit = ch 1052 else 1053 if non_digit 1054 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number" 1055 end 1056 ungetc 1057 break 1058 end 1059 end 1060 Token(type) 1061 end 1062 1063 def identify_string(ltype, quoted = ltype) 1064 @ltype = ltype 1065 @quoted = quoted 1066 subtype = nil 1067 begin 1068 nest = 0 1069 while ch = getc 1070 if @quoted == ch and nest == 0 1071 break 1072 elsif @ltype != "'" && ch == "#" && peek(0) == "{" 1073 identify_string_dvar 1074 elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#" 1075 subtype = true 1076 elsif ch == '\\' and @ltype == "'" #' 1077 case ch = getc 1078 when "\\", "\n", "'" 1079 else 1080 ungetc 1081 end 1082 elsif ch == '\\' #' 1083 read_escape 1084 end 1085 if PERCENT_PAREN.values.include?(@quoted) 1086 if PERCENT_PAREN[ch] == @quoted 1087 nest += 1 1088 elsif ch == @quoted 1089 nest -= 1 1090 end 1091 end 1092 end 1093 if @ltype == "/" 1094 while /[imxoesun]/ =~ peek(0) 1095 getc 1096 end 1097 end 1098 if subtype 1099 Token(DLtype2Token[ltype]) 1100 else 1101 Token(Ltype2Token[ltype]) 1102 end 1103 ensure 1104 @ltype = nil 1105 @quoted = nil 1106 @lex_state = EXPR_END 1107 end 1108 end 1109 1110 def identify_string_dvar 1111 begin 1112 getc 1113 1114 reserve_continue = @continue 1115 reserve_ltype = @ltype 1116 reserve_indent = @indent 1117 reserve_indent_stack = @indent_stack 1118 reserve_state = @lex_state 1119 reserve_quoted = @quoted 1120 1121 @ltype = nil 1122 @quoted = nil 1123 @indent = 0 1124 @indent_stack = [] 1125 @lex_state = EXPR_BEG 1126 1127 loop do 1128 @continue = false 1129 prompt 1130 tk = token 1131 if @ltype or @continue or @indent > 0 1132 next 1133 end 1134 break if tk.kind_of?(TkRBRACE) 1135 end 1136 ensure 1137 @continue = reserve_continue 1138 @ltype = reserve_ltype 1139 @indent = reserve_indent 1140 @indent_stack = reserve_indent_stack 1141 @lex_state = reserve_state 1142 @quoted = reserve_quoted 1143 end 1144 end 1145 1146 def identify_comment 1147 @ltype = "#" 1148 1149 while ch = getc 1150# if ch == "\\" #" 1151# read_escape 1152# end 1153 if ch == "\n" 1154 @ltype = nil 1155 ungetc 1156 break 1157 end 1158 end 1159 return Token(TkCOMMENT) 1160 end 1161 1162 def read_escape 1163 case ch = getc 1164 when "\n", "\r", "\f" 1165 when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #" 1166 when /[0-7]/ 1167 ungetc ch 1168 3.times do 1169 case ch = getc 1170 when /[0-7]/ 1171 when nil 1172 break 1173 else 1174 ungetc 1175 break 1176 end 1177 end 1178 1179 when "x" 1180 2.times do 1181 case ch = getc 1182 when /[0-9a-fA-F]/ 1183 when nil 1184 break 1185 else 1186 ungetc 1187 break 1188 end 1189 end 1190 1191 when "M" 1192 if (ch = getc) != '-' 1193 ungetc 1194 else 1195 if (ch = getc) == "\\" #" 1196 read_escape 1197 end 1198 end 1199 1200 when "C", "c" #, "^" 1201 if ch == "C" and (ch = getc) != "-" 1202 ungetc 1203 elsif (ch = getc) == "\\" #" 1204 read_escape 1205 end 1206 else 1207 # other characters 1208 end 1209 end 1210end 1211# :startdoc: 1212