1#
2# $Id: lexer.rb 44985 2014-02-15 16:01:02Z nagachika $
3#
4# Copyright (c) 2004,2005 Minero Aoki
5#
6# This program is free software.
7# You can distribute and/or modify this program under the Ruby License.
8# For details of Ruby License, see ruby/COPYING.
9#
10
11require 'ripper/core'
12
13class Ripper
14
15  # Tokenizes the Ruby program and returns an array of strings.
16  #
17  #   p Ripper.tokenize("def m(a) nil end")
18  #      # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
19  #
20  def Ripper.tokenize(src, filename = '-', lineno = 1)
21    Lexer.new(src, filename, lineno).tokenize
22  end
23
24  # Tokenizes the Ruby program and returns an array of an array,
25  # which is formatted like <code>[[lineno, column], type, token]</code>.
26  #
27  #   require 'ripper'
28  #   require 'pp'
29  #
30  #   pp Ripper.lex("def m(a) nil end")
31  #     #=> [[[1,  0], :on_kw,     "def"],
32  #          [[1,  3], :on_sp,     " "  ],
33  #          [[1,  4], :on_ident,  "m"  ],
34  #          [[1,  5], :on_lparen, "("  ],
35  #          [[1,  6], :on_ident,  "a"  ],
36  #          [[1,  7], :on_rparen, ")"  ],
37  #          [[1,  8], :on_sp,     " "  ],
38  #          [[1,  9], :on_kw,     "nil"],
39  #          [[1, 12], :on_sp,     " "  ],
40  #          [[1, 13], :on_kw,     "end"]]
41  #
42  def Ripper.lex(src, filename = '-', lineno = 1)
43    Lexer.new(src, filename, lineno).lex
44  end
45
46  class Lexer < ::Ripper   #:nodoc: internal use only
47    def tokenize
48      lex().map {|pos, event, tok| tok }
49    end
50
51    def lex
52      parse().sort_by {|pos, event, tok| pos }
53    end
54
55    def parse
56      @buf = []
57      super
58      @buf
59    end
60
61    private
62
63    SCANNER_EVENTS.each do |event|
64      module_eval(<<-End, __FILE__+'/module_eval', __LINE__ + 1)
65        def on_#{event}(tok)
66          @buf.push [[lineno(), column()], :on_#{event}, tok]
67        end
68      End
69    end
70  end
71
72  # [EXPERIMENTAL]
73  # Parses +src+ and return a string which was matched to +pattern+.
74  # +pattern+ should be described as Regexp.
75  #
76  #   require 'ripper'
77  #
78  #   p Ripper.slice('def m(a) nil end', 'ident')                   #=> "m"
79  #   p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+')  #=> "m(a)"
80  #   p Ripper.slice("<<EOS\nstring\nEOS",
81  #                  'heredoc_beg nl $(tstring_content*) heredoc_end', 1)
82  #       #=> "string\n"
83  #
84  def Ripper.slice(src, pattern, n = 0)
85    if m = token_match(src, pattern)
86    then m.string(n)
87    else nil
88    end
89  end
90
91  def Ripper.token_match(src, pattern)   #:nodoc:
92    TokenPattern.compile(pattern).match(src)
93  end
94
95  class TokenPattern   #:nodoc:
96
97    class Error < ::StandardError # :nodoc:
98    end
99    class CompileError < Error # :nodoc:
100    end
101    class MatchError < Error # :nodoc:
102    end
103
104    class << self
105      alias compile new
106    end
107
108    def initialize(pattern)
109      @source = pattern
110      @re = compile(pattern)
111    end
112
113    def match(str)
114      match_list(::Ripper.lex(str))
115    end
116
117    def match_list(tokens)
118      if m = @re.match(map_tokens(tokens))
119      then MatchData.new(tokens, m)
120      else nil
121      end
122    end
123
124    private
125
126    def compile(pattern)
127      if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern)
128        raise CompileError, "invalid char in pattern: #{m[0].inspect}"
129      end
130      buf = ''
131      pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok|
132        case tok
133        when /\w/
134          buf.concat map_token(tok)
135        when '$('
136          buf.concat '('
137        when '('
138          buf.concat '(?:'
139        when /[?*\[\])\.]/
140          buf.concat tok
141        else
142          raise 'must not happen'
143        end
144      end
145      Regexp.compile(buf)
146    rescue RegexpError => err
147      raise CompileError, err.message
148    end
149
150    def map_tokens(tokens)
151      tokens.map {|pos,type,str| map_token(type.to_s.sub(/\Aon_/,'')) }.join
152    end
153
154    MAP = {}
155    seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a
156    SCANNER_EVENT_TABLE.each do |ev, |
157      raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty?
158      MAP[ev.to_s.sub(/\Aon_/,'')] = seed.shift
159    end
160
161    def map_token(tok)
162      MAP[tok]  or raise CompileError, "unknown token: #{tok}"
163    end
164
165    class MatchData # :nodoc:
166      def initialize(tokens, match)
167        @tokens = tokens
168        @match = match
169      end
170
171      def string(n = 0)
172        return nil unless @match
173        match(n).join
174      end
175
176      private
177
178      def match(n = 0)
179        return [] unless @match
180        @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str }
181      end
182    end
183
184  end
185
186end
187