1#!/usr/bin/env ruby -w
2# encoding: UTF-8
3
4# tc_csv_parsing.rb
5#
6#  Created by James Edward Gray II on 2005-10-31.
7#  Copyright 2005 James Edward Gray II. You can redistribute or modify this code
8#  under the terms of Ruby's license.
9
10require "timeout"
11
12require_relative "base"
13
14#
15# Following tests are my interpretation of the
16# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt].  I only deviate from that
17# document in one place (intentionally) and that is to make the default row
18# separator <tt>$/</tt>.
19#
20class TestCSV::Parsing < TestCSV
21  extend DifferentOFS
22
23  BIG_DATA = "123456789\n" * 1024
24
25  def test_mastering_regex_example
26    ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
27    assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
28                    "It's \"10 Grand\", baby", "10K" ],
29                  CSV.parse_line(ex) )
30  end
31
32  # Old Ruby 1.8 CSV library tests.
33  def test_std_lib_csv
34    [ ["\t", ["\t"]],
35      ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
36      ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
37      ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
38      ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
39      ["\"\"", [""]],
40      ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
41      ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
42      ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
43      ["foo,\"\",baz", ["foo", "", "baz"]],
44      ["\",\"", [","]],
45      ["foo", ["foo"]],
46      [",,", [nil, nil, nil]],
47      [",", [nil, nil]],
48      ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
49      ["foo,,baz", ["foo", nil, "baz"]],
50      ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
51      ["\",\",\",\"", [",", ","]],
52      ["foo,bar,", ["foo", "bar", nil]],
53      [",foo,bar", [nil, "foo", "bar"]],
54      ["foo,bar", ["foo", "bar"]],
55      [";", [";"]],
56      ["\t,\t", ["\t", "\t"]],
57      ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
58      ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
59      ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
60      [";,;", [";", ";"]] ].each do |csv_test|
61      assert_equal(csv_test.last, CSV.parse_line(csv_test.first))
62    end
63
64    [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
65      ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
66      ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
67      ["\"\"", [""]],
68      ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
69      ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
70      ["foo,\"\r\",baz", ["foo", "\r", "baz"]],
71      ["foo,\"\",baz", ["foo", "", "baz"]],
72      ["foo", ["foo"]],
73      [",,", [nil, nil, nil]],
74      [",", [nil, nil]],
75      ["foo,\"\n\",baz", ["foo", "\n", "baz"]],
76      ["foo,,baz", ["foo", nil, "baz"]],
77      ["foo,bar", ["foo", "bar"]],
78      ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
79      ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
80      assert_equal(csv_test.last, CSV.parse_line(csv_test.first))
81    end
82  end
83
84  # From:  http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
85  def test_aras_edge_cases
86    [ [%Q{a,b},               ["a", "b"]],
87      [%Q{a,"""b"""},         ["a", "\"b\""]],
88      [%Q{a,"""b"},           ["a", "\"b"]],
89      [%Q{a,"b"""},           ["a", "b\""]],
90      [%Q{a,"\nb"""},         ["a", "\nb\""]],
91      [%Q{a,"""\nb"},         ["a", "\"\nb"]],
92      [%Q{a,"""\nb\n"""},     ["a", "\"\nb\n\""]],
93      [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
94      [%Q{a,,,},              ["a", nil, nil, nil]],
95      [%Q{,},                 [nil, nil]],
96      [%Q{"",""},             ["", ""]],
97      [%Q{""""},              ["\""]],
98      [%Q{"""",""},           ["\"",""]],
99      [%Q{,""},               [nil,""]],
100      [%Q{,"\r"},             [nil,"\r"]],
101      [%Q{"\r\n,"},           ["\r\n,"]],
102      [%Q{"\r\n,",},          ["\r\n,", nil]] ].each do |edge_case|
103        assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
104      end
105  end
106
107  def test_james_edge_cases
108    # A read at eof? should return nil.
109    assert_equal(nil, CSV.parse_line(""))
110    #
111    # With Ruby 1.8 CSV it's impossible to tell an empty line from a line
112    # containing a single +nil+ field.  The old CSV library returns
113    # <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to
114    # me.
115    #
116    assert_equal(Array.new, CSV.parse_line("\n1,2,3\n"))
117  end
118
119  def test_rob_edge_cases
120    [ [%Q{"a\nb"},                         ["a\nb"]],
121      [%Q{"\n\n\n"},                       ["\n\n\n"]],
122      [%Q{a,"b\n\nc"},                     ['a', "b\n\nc"]],
123      [%Q{,"\r\n"},                        [nil,"\r\n"]],
124      [%Q{,"\r\n."},                       [nil,"\r\n."]],
125      [%Q{"a\na","one newline"},           ["a\na", 'one newline']],
126      [%Q{"a\n\na","two newlines"},        ["a\n\na", 'two newlines']],
127      [%Q{"a\r\na","one CRLF"},            ["a\r\na", 'one CRLF']],
128      [%Q{"a\r\n\r\na","two CRLFs"},       ["a\r\n\r\na", 'two CRLFs']],
129      [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
130    ].each do |edge_case|
131      assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
132    end
133  end
134
135  def test_non_regex_edge_cases
136    # An early version of the non-regex parser fails this test
137    [ [ "foo,\"foo,bar,baz,foo\",\"foo\"",
138        ["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case|
139      assert_equal(edge_case.last, CSV.parse_line(edge_case.first))
140    end
141
142    assert_raise(CSV::MalformedCSVError) do
143      CSV.parse_line("1,\"23\"4\"5\", 6")
144    end
145  end
146
147  def test_malformed_csv
148    assert_raise(CSV::MalformedCSVError) do
149      CSV.parse_line("1,2\r,3", row_sep: "\n")
150    end
151
152    bad_data = <<-END_DATA.gsub(/^ +/, "")
153    line,1,abc
154    line,2,"def\nghi"
155
156    line,4,some\rjunk
157    line,5,jkl
158    END_DATA
159    lines = bad_data.lines.to_a
160    assert_equal(6, lines.size)
161    assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ })
162
163    csv = CSV.new(bad_data)
164    begin
165      loop do
166        assert_not_nil(csv.shift)
167        assert_send([csv.lineno, :<, 4])
168      end
169    rescue CSV::MalformedCSVError
170      assert_equal( "Unquoted fields do not allow \\r or \\n (line 4).",
171                    $!.message )
172    end
173
174    assert_raise(CSV::MalformedCSVError) { CSV.parse_line('1,2,"3...') }
175
176    bad_data = <<-END_DATA.gsub(/^ +/, "")
177    line,1,abc
178    line,2,"def\nghi"
179
180    line,4,8'10"
181    line,5,jkl
182    END_DATA
183    lines = bad_data.lines.to_a
184    assert_equal(6, lines.size)
185    assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ })
186
187    csv = CSV.new(bad_data)
188    begin
189      loop do
190        assert_not_nil(csv.shift)
191        assert_send([csv.lineno, :<, 4])
192      end
193    rescue CSV::MalformedCSVError
194      assert_equal("Illegal quoting in line 4.", $!.message)
195    end
196  end
197
198  def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
199    assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
200  end
201
202  def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
203    assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
204  end
205
206  def test_field_size_limit_controls_lookahead
207    assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
208                             field_size_limit: 2048 )
209  end
210
211  private
212
213  def assert_parse_errors_out(*args)
214    assert_raise(CSV::MalformedCSVError) do
215      Timeout.timeout(0.2) do
216        CSV.parse(*args)
217        fail("Parse didn't error out")
218      end
219    end
220  end
221end
222