1# -*- coding: utf-8 -*-
2
3require 'psych/helper'
4
5module Psych
6  class TestEncoding < TestCase
7    class EncodingCatcher < Handler
8      attr_reader :strings
9      def initialize
10        @strings = []
11      end
12
13      (Handler.instance_methods(true) -
14       Object.instance_methods).each do |m|
15        class_eval %{
16          def #{m} *args
17            @strings += args.flatten.find_all { |a|
18              String === a
19            }
20          end
21        }
22      end
23    end
24
25    def setup
26      super
27      @buffer  = StringIO.new
28      @handler = EncodingCatcher.new
29      @parser  = Psych::Parser.new @handler
30      @utf8    = Encoding.find('UTF-8')
31      @emitter = Psych::Emitter.new @buffer
32    end
33
34    def test_transcode_shiftjis
35      str = "こんにちは!"
36      loaded = Psych.load("--- こんにちは!".encode('SHIFT_JIS'))
37      assert_equal str, loaded
38    end
39
40    def test_transcode_utf16le
41      str = "こんにちは!"
42      loaded = Psych.load("--- こんにちは!".encode('UTF-16LE'))
43      assert_equal str, loaded
44    end
45
46    def test_transcode_utf16be
47      str = "こんにちは!"
48      loaded = Psych.load("--- こんにちは!".encode('UTF-16BE'))
49      assert_equal str, loaded
50    end
51
52    def test_io_shiftjis
53      t = Tempfile.new(['shiftjis', 'yml'], :encoding => 'SHIFT_JIS')
54      t.write '--- こんにちは!'
55      t.close
56
57      # If the external encoding isn't utf8, utf16le, or utf16be, we cannot
58      # process the file.
59      File.open(t.path, 'r', :encoding => 'SHIFT_JIS') do |f|
60        assert_raises Psych::SyntaxError do
61          Psych.load(f)
62        end
63      end
64
65      t.close(true)
66    end
67
68    def test_io_utf16le
69      t = Tempfile.new(['utf16le', 'yml'])
70      t.binmode
71      t.write '--- こんにちは!'.encode('UTF-16LE')
72      t.close
73
74      File.open(t.path, 'rb', :encoding => 'UTF-16LE') do |f|
75        assert_equal "こんにちは!", Psych.load(f)
76      end
77
78      t.close(true)
79    end
80
81    def test_io_utf16be
82      t = Tempfile.new(['utf16be', 'yml'])
83      t.binmode
84      t.write '--- こんにちは!'.encode('UTF-16BE')
85      t.close
86
87      File.open(t.path, 'rb', :encoding => 'UTF-16BE') do |f|
88        assert_equal "こんにちは!", Psych.load(f)
89      end
90
91      t.close(true)
92    end
93
94    def test_io_utf8
95      t = Tempfile.new(['utf8', 'yml'])
96      t.binmode
97      t.write '--- こんにちは!'.encode('UTF-8')
98      t.close
99
100      File.open(t.path, 'rb', :encoding => 'UTF-8') do |f|
101        assert_equal "こんにちは!", Psych.load(f)
102      end
103
104      t.close(true)
105    end
106
107    def test_emit_alias
108      @emitter.start_stream Psych::Parser::UTF8
109      @emitter.start_document [], [], true
110      e = assert_raises(RuntimeError) do
111        @emitter.alias 'ドラえもん'.encode('EUC-JP')
112      end
113      assert_match(/alias value/, e.message)
114    end
115
116    def test_to_yaml_is_valid
117      ext_before = Encoding.default_external
118      int_before = Encoding.default_internal
119
120      Encoding.default_external = Encoding::US_ASCII
121      Encoding.default_internal = nil
122
123      s = "こんにちは!"
124      # If no encoding is specified, use UTF-8
125      assert_equal Encoding::UTF_8, Psych.dump(s).encoding
126      assert_equal s, Psych.load(Psych.dump(s))
127    ensure
128      Encoding.default_external = ext_before
129      Encoding.default_internal = int_before
130    end
131
132    def test_start_mapping
133      foo = 'foo'
134      bar = 'バー'
135
136      @emitter.start_stream Psych::Parser::UTF8
137      @emitter.start_document [], [], true
138      @emitter.start_mapping(
139        foo.encode('Shift_JIS'),
140        bar.encode('UTF-16LE'),
141        false, Nodes::Sequence::ANY)
142      @emitter.end_mapping
143      @emitter.end_document false
144      @emitter.end_stream
145
146      @parser.parse @buffer.string
147      assert_encodings @utf8, @handler.strings
148      assert_equal [foo, bar], @handler.strings
149    end
150
151    def test_start_sequence
152      foo = 'foo'
153      bar = 'バー'
154
155      @emitter.start_stream Psych::Parser::UTF8
156      @emitter.start_document [], [], true
157      @emitter.start_sequence(
158        foo.encode('Shift_JIS'),
159        bar.encode('UTF-16LE'),
160        false, Nodes::Sequence::ANY)
161      @emitter.end_sequence
162      @emitter.end_document false
163      @emitter.end_stream
164
165      @parser.parse @buffer.string
166      assert_encodings @utf8, @handler.strings
167      assert_equal [foo, bar], @handler.strings
168    end
169
170    def test_doc_tag_encoding
171      key = '鍵'
172      @emitter.start_stream Psych::Parser::UTF8
173      @emitter.start_document(
174        [1, 1],
175        [['!'.encode('EUC-JP'), key.encode('EUC-JP')]],
176        true
177      )
178      @emitter.scalar 'foo', nil, nil, true, false, Nodes::Scalar::ANY
179      @emitter.end_document false
180      @emitter.end_stream
181
182      @parser.parse @buffer.string
183      assert_encodings @utf8, @handler.strings
184      assert_equal key, @handler.strings[1]
185    end
186
187    def test_emitter_encoding
188      str  = "壁に耳あり、障子に目あり"
189      thing = Psych.load Psych.dump str.encode('EUC-JP')
190      assert_equal str, thing
191    end
192
193    def test_default_internal
194      before = Encoding.default_internal
195
196      Encoding.default_internal = 'EUC-JP'
197
198      str  = "壁に耳あり、障子に目あり"
199      yaml = "--- #{str}"
200      assert_equal @utf8, str.encoding
201
202      @parser.parse str
203      assert_encodings Encoding.find('EUC-JP'), @handler.strings
204      assert_equal str, @handler.strings.first.encode('UTF-8')
205    ensure
206      Encoding.default_internal = before
207    end
208
209    def test_scalar
210      @parser.parse("--- a")
211      assert_encodings @utf8, @handler.strings
212    end
213
214    def test_alias
215      @parser.parse(<<-eoyml)
216%YAML 1.1
217---
218!!seq [
219  !!str "Without properties",
220  &A !!str "Anchored",
221  !!str "Tagged",
222  *A,
223  !!str "",
224]
225      eoyml
226      assert_encodings @utf8, @handler.strings
227    end
228
229    def test_list_anchor
230      list = %w{ a b }
231      list << list
232      @parser.parse(Psych.dump(list))
233      assert_encodings @utf8, @handler.strings
234    end
235
236    def test_map_anchor
237      h = {}
238      h['a'] = h
239      @parser.parse(Psych.dump(h))
240      assert_encodings @utf8, @handler.strings
241    end
242
243    def test_map_tag
244      @parser.parse(<<-eoyml)
245%YAML 1.1
246---
247!!map { a : b }
248      eoyml
249      assert_encodings @utf8, @handler.strings
250    end
251
252    def test_doc_tag
253      @parser.parse(<<-eoyml)
254%YAML 1.1
255%TAG ! tag:tenderlovemaking.com,2009:
256--- !fun
257      eoyml
258      assert_encodings @utf8, @handler.strings
259    end
260
261    private
262    def assert_encodings encoding, strings
263      strings.each do |str|
264        assert_equal encoding, str.encoding, str
265      end
266    end
267  end
268end
269