1# coding: US-ASCII 2 3## 4# This class is a wrapper around File IO and Encoding that helps RDoc load 5# files and convert them to the correct encoding. 6 7module RDoc::Encoding 8 9 ## 10 # Reads the contents of +filename+ and handles any encoding directives in 11 # the file. 12 # 13 # The content will be converted to the +encoding+. If the file cannot be 14 # converted a warning will be printed and nil will be returned. 15 # 16 # If +force_transcode+ is true the document will be transcoded and any 17 # unknown character in the target encoding will be replaced with '?' 18 19 def self.read_file filename, encoding, force_transcode = false 20 content = open filename, "rb" do |f| f.read end 21 content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/ 22 23 utf8 = content.sub!(/\A\xef\xbb\xbf/, '') 24 25 RDoc::Encoding.set_encoding content 26 27 if Object.const_defined? :Encoding then 28 begin 29 encoding ||= Encoding.default_external 30 orig_encoding = content.encoding 31 32 if utf8 then 33 content.force_encoding Encoding::UTF_8 34 content.encode! encoding 35 else 36 # assume the content is in our output encoding 37 content.force_encoding encoding 38 end 39 40 unless content.valid_encoding? then 41 # revert and try to transcode 42 content.force_encoding orig_encoding 43 content.encode! encoding 44 end 45 46 unless content.valid_encoding? then 47 warn "unable to convert #{filename} to #{encoding}, skipping" 48 content = nil 49 end 50 rescue Encoding::InvalidByteSequenceError, 51 Encoding::UndefinedConversionError => e 52 if force_transcode then 53 content.force_encoding orig_encoding 54 content.encode!(encoding, 55 :invalid => :replace, :undef => :replace, 56 :replace => '?') 57 return content 58 else 59 warn "unable to convert #{e.message} for #{filename}, skipping" 60 return nil 61 end 62 end 63 end 64 65 content 66 rescue ArgumentError => e 67 raise unless e.message =~ /unknown encoding name - (.*)/ 68 warn "unknown encoding name \"#{$1}\" for #{filename}, skipping" 69 nil 70 rescue Errno::EISDIR, Errno::ENOENT 71 nil 72 end 73 74 ## 75 # Sets the encoding of +string+ based on the magic comment 76 77 def self.set_encoding string 78 string =~ /\A(?:#!.*\n)?(.*\n)/ 79 80 first_line = $1 81 82 name = case first_line 83 when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2 84 when /\b(?:en)?coding[=:]\s*([^\s;]+)/i then $1 85 else return 86 end 87 88 string.sub! first_line, '' 89 90 return unless Object.const_defined? :Encoding 91 92 enc = Encoding.find name 93 string.force_encoding enc if enc 94 end 95 96end 97 98