1# -*- coding: us-ascii -*- 2 3## 4# A parser is simple a class that subclasses RDoc::Parser and implements #scan 5# to fill in an RDoc::TopLevel with parsed data. 6# 7# The initialize method takes an RDoc::TopLevel to fill with parsed content, 8# the name of the file to be parsed, the content of the file, an RDoc::Options 9# object and an RDoc::Stats object to inform the user of parsed items. The 10# scan method is then called to parse the file and must return the 11# RDoc::TopLevel object. By calling super these items will be set for you. 12# 13# In order to be used by RDoc the parser needs to register the file extensions 14# it can parse. Use ::parse_files_matching to register extensions. 15# 16# require 'rdoc' 17# 18# class RDoc::Parser::Xyz < RDoc::Parser 19# parse_files_matching /\.xyz$/ 20# 21# def initialize top_level, file_name, content, options, stats 22# super 23# 24# # extra initialization if needed 25# end 26# 27# def scan 28# # parse file and fill in @top_level 29# end 30# end 31 32class RDoc::Parser 33 34 @parsers = [] 35 36 class << self 37 38 ## 39 # An Array of arrays that maps file extension (or name) regular 40 # expressions to parser classes that will parse matching filenames. 41 # 42 # Use parse_files_matching to register a parser's file extensions. 43 44 attr_reader :parsers 45 46 end 47 48 ## 49 # The name of the file being parsed 50 51 attr_reader :file_name 52 53 ## 54 # Alias an extension to another extension. After this call, files ending 55 # "new_ext" will be parsed using the same parser as "old_ext" 56 57 def self.alias_extension(old_ext, new_ext) 58 old_ext = old_ext.sub(/^\.(.*)/, '\1') 59 new_ext = new_ext.sub(/^\.(.*)/, '\1') 60 61 parser = can_parse_by_name "xxx.#{old_ext}" 62 return false unless parser 63 64 RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser] 65 66 true 67 end 68 69 ## 70 # Determines if the file is a "binary" file which basically means it has 71 # content that an RDoc parser shouldn't try to consume. 72 73 def self.binary?(file) 74 return false if file =~ /\.(rdoc|txt)$/ 75 76 s = File.read(file, 1024) or return false 77 78 have_encoding = s.respond_to? :encoding 79 80 return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00") 81 82 if have_encoding then 83 mode = "r" 84 s.sub!(/\A#!.*\n/, '') # assume shebang line isn't longer than 1024. 85 encoding = s[/^\s*\#\s*(?:-\*-\s*)?(?:en)?coding:\s*([^\s;]+?)(?:-\*-|[\s;])/, 1] 86 mode = "r:#{encoding}" if encoding 87 s = File.open(file, mode) {|f| f.gets(nil, 1024)} 88 89 not s.valid_encoding? 90 else 91 if 0.respond_to? :fdiv then 92 s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3 93 else # HACK 1.8.6 94 (s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3 95 end 96 end 97 end 98 99 ## 100 # Processes common directives for CodeObjects for the C and Ruby parsers. 101 # 102 # Applies +directive+'s +value+ to +code_object+, if appropriate 103 104 def self.process_directive code_object, directive, value 105 warn "RDoc::Parser::process_directive is deprecated and wil be removed in RDoc 4. Use RDoc::Markup::PreProcess#handle_directive instead" if $-w 106 107 case directive 108 when 'nodoc' then 109 code_object.document_self = nil # notify nodoc 110 code_object.document_children = value.downcase != 'all' 111 when 'doc' then 112 code_object.document_self = true 113 code_object.force_documentation = true 114 when 'yield', 'yields' then 115 # remove parameter &block 116 code_object.params.sub!(/,?\s*&\w+/, '') if code_object.params 117 118 code_object.block_params = value 119 when 'arg', 'args' then 120 code_object.params = value 121 end 122 end 123 124 ## 125 # Checks if +file+ is a zip file in disguise. Signatures from 126 # http://www.garykessler.net/library/file_sigs.html 127 128 def self.zip? file 129 zip_signature = File.read file, 4 130 131 zip_signature == "PK\x03\x04" or 132 zip_signature == "PK\x05\x06" or 133 zip_signature == "PK\x07\x08" 134 rescue 135 false 136 end 137 138 ## 139 # Return a parser that can handle a particular extension 140 141 def self.can_parse file_name 142 parser = can_parse_by_name file_name 143 144 # HACK Selenium hides a jar file using a .txt extension 145 return if parser == RDoc::Parser::Simple and zip? file_name 146 147 parser 148 end 149 150 ## 151 # Returns a parser that can handle the extension for +file_name+. This does 152 # not depend upon the file being readable. 153 154 def self.can_parse_by_name file_name 155 _, parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name } 156 157 # The default parser must not parse binary files 158 ext_name = File.extname file_name 159 return parser if ext_name.empty? 160 161 if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/ then 162 case check_modeline file_name 163 when nil, 'rdoc' then # continue 164 else return nil 165 end 166 end 167 168 parser 169 rescue Errno::EACCES 170 end 171 172 ## 173 # Returns the file type from the modeline in +file_name+ 174 175 def self.check_modeline file_name 176 line = open file_name do |io| 177 io.gets 178 end 179 180 /-\*-\s*(.*?\S)\s*-\*-/ =~ line 181 182 return nil unless type = $1 183 184 if /;/ =~ type then 185 return nil unless /(?:\s|\A)mode:\s*([^\s;]+)/i =~ type 186 type = $1 187 end 188 189 return nil if /coding:/i =~ type 190 191 type.downcase 192 rescue ArgumentError # invalid byte sequence, etc. 193 end 194 195 ## 196 # Finds and instantiates the correct parser for the given +file_name+ and 197 # +content+. 198 199 def self.for top_level, file_name, content, options, stats 200 return if binary? file_name 201 202 parser = use_markup content 203 204 unless parser then 205 parse_name = file_name 206 207 # If no extension, look for shebang 208 if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then 209 shebang = $1 210 case shebang 211 when %r{env\s+ruby}, %r{/ruby} 212 parse_name = 'dummy.rb' 213 end 214 end 215 216 parser = can_parse parse_name 217 end 218 219 return unless parser 220 221 parser.new top_level, file_name, content, options, stats 222 rescue SystemCallError 223 nil 224 end 225 226 ## 227 # Record which file types this parser can understand. 228 # 229 # It is ok to call this multiple times. 230 231 def self.parse_files_matching(regexp) 232 RDoc::Parser.parsers.unshift [regexp, self] 233 end 234 235 ## 236 # If there is a <tt>markup: parser_name</tt> comment at the front of the 237 # file, use it to determine the parser. For example: 238 # 239 # # markup: rdoc 240 # # Class comment can go here 241 # 242 # class C 243 # end 244 # 245 # The comment should appear as the first line of the +content+. 246 # 247 # If the content contains a shebang or editor modeline the comment may 248 # appear on the second or third line. 249 # 250 # Any comment style may be used to hide the markup comment. 251 252 def self.use_markup content 253 markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first 254 255 return unless markup 256 257 # TODO Ruby should be returned only when the filename is correct 258 return RDoc::Parser::Ruby if %w[tomdoc markdown].include? markup 259 260 markup = Regexp.escape markup 261 262 RDoc::Parser.parsers.find do |_, parser| 263 /^#{markup}$/i =~ parser.name.sub(/.*:/, '') 264 end.last 265 end 266 267 ## 268 # Creates a new Parser storing +top_level+, +file_name+, +content+, 269 # +options+ and +stats+ in instance variables. In +@preprocess+ an 270 # RDoc::Markup::PreProcess object is created which allows processing of 271 # directives. 272 273 def initialize top_level, file_name, content, options, stats 274 @top_level = top_level 275 @top_level.parser = self.class 276 @store = @top_level.store 277 278 @file_name = file_name 279 @content = content 280 @options = options 281 @stats = stats 282 283 @preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include 284 @preprocess.options = @options 285 end 286 287 autoload :RubyTools, 'rdoc/parser/ruby_tools' 288 autoload :Text, 'rdoc/parser/text' 289 290end 291 292# simple must come first in order to show up last in the parsers list 293require 'rdoc/parser/simple' 294require 'rdoc/parser/c' 295require 'rdoc/parser/changelog' 296require 'rdoc/parser/markdown' 297require 'rdoc/parser/rd' 298require 'rdoc/parser/ruby' 299 300