1# -*- coding: us-ascii -*-
2
3##
4# A parser is simple a class that subclasses RDoc::Parser and implements #scan
5# to fill in an RDoc::TopLevel with parsed data.
6#
7# The initialize method takes an RDoc::TopLevel to fill with parsed content,
8# the name of the file to be parsed, the content of the file, an RDoc::Options
9# object and an RDoc::Stats object to inform the user of parsed items.  The
10# scan method is then called to parse the file and must return the
11# RDoc::TopLevel object.  By calling super these items will be set for you.
12#
13# In order to be used by RDoc the parser needs to register the file extensions
14# it can parse.  Use ::parse_files_matching to register extensions.
15#
16#   require 'rdoc'
17#
18#   class RDoc::Parser::Xyz < RDoc::Parser
19#     parse_files_matching /\.xyz$/
20#
21#     def initialize top_level, file_name, content, options, stats
22#       super
23#
24#       # extra initialization if needed
25#     end
26#
27#     def scan
28#       # parse file and fill in @top_level
29#     end
30#   end
31
32class RDoc::Parser
33
34  @parsers = []
35
36  class << self
37
38    ##
39    # An Array of arrays that maps file extension (or name) regular
40    # expressions to parser classes that will parse matching filenames.
41    #
42    # Use parse_files_matching to register a parser's file extensions.
43
44    attr_reader :parsers
45
46  end
47
48  ##
49  # The name of the file being parsed
50
51  attr_reader :file_name
52
53  ##
54  # Alias an extension to another extension. After this call, files ending
55  # "new_ext" will be parsed using the same parser as "old_ext"
56
57  def self.alias_extension(old_ext, new_ext)
58    old_ext = old_ext.sub(/^\.(.*)/, '\1')
59    new_ext = new_ext.sub(/^\.(.*)/, '\1')
60
61    parser = can_parse_by_name "xxx.#{old_ext}"
62    return false unless parser
63
64    RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]
65
66    true
67  end
68
69  ##
70  # Determines if the file is a "binary" file which basically means it has
71  # content that an RDoc parser shouldn't try to consume.
72
73  def self.binary?(file)
74    return false if file =~ /\.(rdoc|txt)$/
75
76    s = File.read(file, 1024) or return false
77
78    have_encoding = s.respond_to? :encoding
79
80    return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00")
81
82    if have_encoding then
83      mode = "r"
84      s.sub!(/\A#!.*\n/, '')     # assume shebang line isn't longer than 1024.
85      encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1]
86      mode = "r:#{encoding}" if encoding
87      s = File.open(file, mode) {|f| f.gets(nil, 1024)}
88
89      not s.valid_encoding?
90    else
91      if 0.respond_to? :fdiv then
92        s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3
93      else # HACK 1.8.6
94        (s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3
95      end
96    end
97  end
98
99  ##
100  # Processes common directives for CodeObjects for the C and Ruby parsers.
101  #
102  # Applies +directive+'s +value+ to +code_object+, if appropriate
103
104  def self.process_directive code_object, directive, value
105    warn "RDoc::Parser::process_directive is deprecated and wil be removed in RDoc 4.  Use RDoc::Markup::PreProcess#handle_directive instead" if $-w
106
107    case directive
108    when 'nodoc' then
109      code_object.document_self = nil # notify nodoc
110      code_object.document_children = value.downcase != 'all'
111    when 'doc' then
112      code_object.document_self = true
113      code_object.force_documentation = true
114    when 'yield', 'yields' then
115      # remove parameter &block
116      code_object.params.sub!(/,?\s*&\w+/, '') if code_object.params
117
118      code_object.block_params = value
119    when 'arg', 'args' then
120      code_object.params = value
121    end
122  end
123
124  ##
125  # Checks if +file+ is a zip file in disguise.  Signatures from
126  # http://www.garykessler.net/library/file_sigs.html
127
128  def self.zip? file
129    zip_signature = File.read file, 4
130
131    zip_signature == "PK\x03\x04" or
132      zip_signature == "PK\x05\x06" or
133      zip_signature == "PK\x07\x08"
134  rescue
135    false
136  end
137
138  ##
139  # Return a parser that can handle a particular extension
140
141  def self.can_parse file_name
142    parser = can_parse_by_name file_name
143
144    # HACK Selenium hides a jar file using a .txt extension
145    return if parser == RDoc::Parser::Simple and zip? file_name
146
147    parser
148  end
149
150  ##
151  # Returns a parser that can handle the extension for +file_name+.  This does
152  # not depend upon the file being readable.
153
154  def self.can_parse_by_name file_name
155    _, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }
156
157    # The default parser must not parse binary files
158    ext_name = File.extname file_name
159    return parser if ext_name.empty?
160
161    if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then
162      case check_modeline file_name
163      when nil, 'rdoc' then # continue
164      else return nil
165      end
166    end
167
168    parser
169  rescue Errno::EACCES
170  end
171
172  ##
173  # Returns the file type from the modeline in +file_name+
174
175  def self.check_modeline file_name
176    line = open file_name do |io|
177      io.gets
178    end
179
180    /-\*-\s*(.*?\S)\s*-\*-/ =~ line
181
182    return nil unless type = $1
183
184    if /;/ =~ type then
185      return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type
186      type = $1
187    end
188
189    return nil if /coding:/i =~ type
190
191    type.downcase
192  rescue ArgumentError # invalid byte sequence, etc.
193  end
194
195  ##
196  # Finds and instantiates the correct parser for the given +file_name+ and
197  # +content+.
198
199  def self.for top_level, file_name, content, options, stats
200    return if binary? file_name
201
202    parser = use_markup content
203
204    unless parser then
205      parse_name = file_name
206
207      # If no extension, look for shebang
208      if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then
209        shebang = $1
210        case shebang
211        when %r{env\s+ruby}, %r{/ruby}
212          parse_name = 'dummy.rb'
213        end
214      end
215
216      parser = can_parse parse_name
217    end
218
219    return unless parser
220
221    parser.new top_level, file_name, content, options, stats
222  rescue SystemCallError
223    nil
224  end
225
226  ##
227  # Record which file types this parser can understand.
228  #
229  # It is ok to call this multiple times.
230
231  def self.parse_files_matching(regexp)
232    RDoc::Parser.parsers.unshift [regexp, self]
233  end
234
235  ##
236  # If there is a <tt>markup: parser_name</tt> comment at the front of the
237  # file, use it to determine the parser.  For example:
238  #
239  #   # markup: rdoc
240  #   # Class comment can go here
241  #
242  #   class C
243  #   end
244  #
245  # The comment should appear as the first line of the +content+.
246  #
247  # If the content contains a shebang or editor modeline the comment may
248  # appear on the second or third line.
249  #
250  # Any comment style may be used to hide the markup comment.
251
252  def self.use_markup content
253    markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first
254
255    return unless markup
256
257    # TODO Ruby should be returned only when the filename is correct
258    return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup
259
260    markup = Regexp.escape markup
261
262    RDoc::Parser.parsers.find do |_, parser|
263      /^#{markup}$/i =~ parser.name.sub(/.*:/, '')
264    end.last
265  end
266
267  ##
268  # Creates a new Parser storing +top_level+, +file_name+, +content+,
269  # +options+ and +stats+ in instance variables.  In +@preprocess+ an
270  # RDoc::Markup::PreProcess object is created which allows processing of
271  # directives.
272
273  def initialize top_level, file_name, content, options, stats
274    @top_level = top_level
275    @top_level.parser = self.class
276    @store = @top_level.store
277
278    @file_name = file_name
279    @content = content
280    @options = options
281    @stats = stats
282
283    @preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
284    @preprocess.options = @options
285  end
286
287  autoload :RubyTools, 'rdoc/parser/ruby_tools'
288  autoload :Text,      'rdoc/parser/text'
289
290end
291
292# simple must come first in order to show up last in the parsers list
293require 'rdoc/parser/simple'
294require 'rdoc/parser/c'
295require 'rdoc/parser/changelog'
296require 'rdoc/parser/markdown'
297require 'rdoc/parser/rd'
298require 'rdoc/parser/ruby'
299
300