1# 2# $Id: lexer.rb 44985 2014-02-15 16:01:02Z nagachika $ 3# 4# Copyright (c) 2004,2005 Minero Aoki 5# 6# This program is free software. 7# You can distribute and/or modify this program under the Ruby License. 8# For details of Ruby License, see ruby/COPYING. 9# 10 11require 'ripper/core' 12 13class Ripper 14 15 # Tokenizes the Ruby program and returns an array of strings. 16 # 17 # p Ripper.tokenize("def m(a) nil end") 18 # # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"] 19 # 20 def Ripper.tokenize(src, filename = '-', lineno = 1) 21 Lexer.new(src, filename, lineno).tokenize 22 end 23 24 # Tokenizes the Ruby program and returns an array of an array, 25 # which is formatted like <code>[[lineno, column], type, token]</code>. 26 # 27 # require 'ripper' 28 # require 'pp' 29 # 30 # pp Ripper.lex("def m(a) nil end") 31 # #=> [[[1, 0], :on_kw, "def"], 32 # [[1, 3], :on_sp, " " ], 33 # [[1, 4], :on_ident, "m" ], 34 # [[1, 5], :on_lparen, "(" ], 35 # [[1, 6], :on_ident, "a" ], 36 # [[1, 7], :on_rparen, ")" ], 37 # [[1, 8], :on_sp, " " ], 38 # [[1, 9], :on_kw, "nil"], 39 # [[1, 12], :on_sp, " " ], 40 # [[1, 13], :on_kw, "end"]] 41 # 42 def Ripper.lex(src, filename = '-', lineno = 1) 43 Lexer.new(src, filename, lineno).lex 44 end 45 46 class Lexer < ::Ripper #:nodoc: internal use only 47 def tokenize 48 lex().map {|pos, event, tok| tok } 49 end 50 51 def lex 52 parse().sort_by {|pos, event, tok| pos } 53 end 54 55 def parse 56 @buf = [] 57 super 58 @buf 59 end 60 61 private 62 63 SCANNER_EVENTS.each do |event| 64 module_eval(<<-End, __FILE__+'/module_eval', __LINE__ + 1) 65 def on_#{event}(tok) 66 @buf.push [[lineno(), column()], :on_#{event}, tok] 67 end 68 End 69 end 70 end 71 72 # [EXPERIMENTAL] 73 # Parses +src+ and return a string which was matched to +pattern+. 74 # +pattern+ should be described as Regexp. 75 # 76 # require 'ripper' 77 # 78 # p Ripper.slice('def m(a) nil end', 'ident') #=> "m" 79 # p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)" 80 # p Ripper.slice("<<EOS\nstring\nEOS", 81 # 'heredoc_beg nl $(tstring_content*) heredoc_end', 1) 82 # #=> "string\n" 83 # 84 def Ripper.slice(src, pattern, n = 0) 85 if m = token_match(src, pattern) 86 then m.string(n) 87 else nil 88 end 89 end 90 91 def Ripper.token_match(src, pattern) #:nodoc: 92 TokenPattern.compile(pattern).match(src) 93 end 94 95 class TokenPattern #:nodoc: 96 97 class Error < ::StandardError # :nodoc: 98 end 99 class CompileError < Error # :nodoc: 100 end 101 class MatchError < Error # :nodoc: 102 end 103 104 class << self 105 alias compile new 106 end 107 108 def initialize(pattern) 109 @source = pattern 110 @re = compile(pattern) 111 end 112 113 def match(str) 114 match_list(::Ripper.lex(str)) 115 end 116 117 def match_list(tokens) 118 if m = @re.match(map_tokens(tokens)) 119 then MatchData.new(tokens, m) 120 else nil 121 end 122 end 123 124 private 125 126 def compile(pattern) 127 if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern) 128 raise CompileError, "invalid char in pattern: #{m[0].inspect}" 129 end 130 buf = '' 131 pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok| 132 case tok 133 when /\w/ 134 buf.concat map_token(tok) 135 when '$(' 136 buf.concat '(' 137 when '(' 138 buf.concat '(?:' 139 when /[?*\[\])\.]/ 140 buf.concat tok 141 else 142 raise 'must not happen' 143 end 144 end 145 Regexp.compile(buf) 146 rescue RegexpError => err 147 raise CompileError, err.message 148 end 149 150 def map_tokens(tokens) 151 tokens.map {|pos,type,str| map_token(type.to_s.sub(/\Aon_/,'')) }.join 152 end 153 154 MAP = {} 155 seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a 156 SCANNER_EVENT_TABLE.each do |ev, | 157 raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty? 158 MAP[ev.to_s.sub(/\Aon_/,'')] = seed.shift 159 end 160 161 def map_token(tok) 162 MAP[tok] or raise CompileError, "unknown token: #{tok}" 163 end 164 165 class MatchData # :nodoc: 166 def initialize(tokens, match) 167 @tokens = tokens 168 @match = match 169 end 170 171 def string(n = 0) 172 return nil unless @match 173 match(n).join 174 end 175 176 private 177 178 def match(n = 0) 179 return [] unless @match 180 @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str } 181 end 182 end 183 184 end 185 186end 187