1# coding: US-ASCII
2
3##
4# This class is a wrapper around File IO and Encoding that helps RDoc load
5# files and convert them to the correct encoding.
6
7module RDoc::Encoding
8
9  ##
10  # Reads the contents of +filename+ and handles any encoding directives in
11  # the file.
12  #
13  # The content will be converted to the +encoding+.  If the file cannot be
14  # converted a warning will be printed and nil will be returned.
15  #
16  # If +force_transcode+ is true the document will be transcoded and any
17  # unknown character in the target encoding will be replaced with '?'
18
19  def self.read_file filename, encoding, force_transcode = false
20    content = open filename, "rb" do |f| f.read end
21    content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
22
23    utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
24
25    RDoc::Encoding.set_encoding content
26
27    if Object.const_defined? :Encoding then
28      begin
29        encoding ||= Encoding.default_external
30        orig_encoding = content.encoding
31
32        if utf8 then
33          content.force_encoding Encoding::UTF_8
34          content.encode! encoding
35        else
36          # assume the content is in our output encoding
37          content.force_encoding encoding
38        end
39
40        unless content.valid_encoding? then
41          # revert and try to transcode
42          content.force_encoding orig_encoding
43          content.encode! encoding
44        end
45
46        unless content.valid_encoding? then
47          warn "unable to convert #{filename} to #{encoding}, skipping"
48          content = nil
49        end
50      rescue Encoding::InvalidByteSequenceError,
51             Encoding::UndefinedConversionError => e
52        if force_transcode then
53          content.force_encoding orig_encoding
54          content.encode!(encoding,
55                          :invalid => :replace, :undef => :replace,
56                          :replace => '?')
57          return content
58        else
59          warn "unable to convert #{e.message} for #{filename}, skipping"
60          return nil
61        end
62      end
63    end
64
65    content
66  rescue ArgumentError => e
67    raise unless e.message =~ /unknown encoding name - (.*)/
68    warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
69    nil
70  rescue Errno::EISDIR, Errno::ENOENT
71    nil
72  end
73
74  ##
75  # Sets the encoding of +string+ based on the magic comment
76
77  def self.set_encoding string
78    string =~ /\A(?:#!.*\n)?(.*\n)/
79
80    first_line = $1
81
82    name = case first_line
83           when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
84           when /\b(?:en)?coding[=:]\s*([^\s;]+)/i   then $1
85           else                                           return
86           end
87
88    string.sub! first_line, ''
89
90    return unless Object.const_defined? :Encoding
91
92    enc = Encoding.find name
93    string.force_encoding enc if enc
94  end
95
96end
97
98