1#!/usr/bin/env ruby -w 2# encoding: UTF-8 3 4# tc_csv_parsing.rb 5# 6# Created by James Edward Gray II on 2005-10-31. 7# Copyright 2005 James Edward Gray II. You can redistribute or modify this code 8# under the terms of Ruby's license. 9 10require "timeout" 11 12require_relative "base" 13 14# 15# Following tests are my interpretation of the 16# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that 17# document in one place (intentionally) and that is to make the default row 18# separator <tt>$/</tt>. 19# 20class TestCSV::Parsing < TestCSV 21 extend DifferentOFS 22 23 BIG_DATA = "123456789\n" * 1024 24 25 def test_mastering_regex_example 26 ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K} 27 assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000", 28 "It's \"10 Grand\", baby", "10K" ], 29 CSV.parse_line(ex) ) 30 end 31 32 # Old Ruby 1.8 CSV library tests. 33 def test_std_lib_csv 34 [ ["\t", ["\t"]], 35 ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], 36 ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], 37 ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]], 38 ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], 39 ["\"\"", [""]], 40 ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], 41 ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], 42 ["foo,\"\r\",baz", ["foo", "\r", "baz"]], 43 ["foo,\"\",baz", ["foo", "", "baz"]], 44 ["\",\"", [","]], 45 ["foo", ["foo"]], 46 [",,", [nil, nil, nil]], 47 [",", [nil, nil]], 48 ["foo,\"\n\",baz", ["foo", "\n", "baz"]], 49 ["foo,,baz", ["foo", nil, "baz"]], 50 ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]], 51 ["\",\",\",\"", [",", ","]], 52 ["foo,bar,", ["foo", "bar", nil]], 53 [",foo,bar", [nil, "foo", "bar"]], 54 ["foo,bar", ["foo", "bar"]], 55 [";", [";"]], 56 ["\t,\t", ["\t", "\t"]], 57 ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]], 58 ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], 59 ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]], 60 [";,;", [";", ";"]] ].each do |csv_test| 61 assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) 62 end 63 64 [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], 65 ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], 66 ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], 67 ["\"\"", [""]], 68 ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], 69 ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], 70 ["foo,\"\r\",baz", ["foo", "\r", "baz"]], 71 ["foo,\"\",baz", ["foo", "", "baz"]], 72 ["foo", ["foo"]], 73 [",,", [nil, nil, nil]], 74 [",", [nil, nil]], 75 ["foo,\"\n\",baz", ["foo", "\n", "baz"]], 76 ["foo,,baz", ["foo", nil, "baz"]], 77 ["foo,bar", ["foo", "bar"]], 78 ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], 79 ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test| 80 assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) 81 end 82 end 83 84 # From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496 85 def test_aras_edge_cases 86 [ [%Q{a,b}, ["a", "b"]], 87 [%Q{a,"""b"""}, ["a", "\"b\""]], 88 [%Q{a,"""b"}, ["a", "\"b"]], 89 [%Q{a,"b"""}, ["a", "b\""]], 90 [%Q{a,"\nb"""}, ["a", "\nb\""]], 91 [%Q{a,"""\nb"}, ["a", "\"\nb"]], 92 [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], 93 [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], 94 [%Q{a,,,}, ["a", nil, nil, nil]], 95 [%Q{,}, [nil, nil]], 96 [%Q{"",""}, ["", ""]], 97 [%Q{""""}, ["\""]], 98 [%Q{"""",""}, ["\"",""]], 99 [%Q{,""}, [nil,""]], 100 [%Q{,"\r"}, [nil,"\r"]], 101 [%Q{"\r\n,"}, ["\r\n,"]], 102 [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case| 103 assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) 104 end 105 end 106 107 def test_james_edge_cases 108 # A read at eof? should return nil. 109 assert_equal(nil, CSV.parse_line("")) 110 # 111 # With Ruby 1.8 CSV it's impossible to tell an empty line from a line 112 # containing a single +nil+ field. The old CSV library returns 113 # <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to 114 # me. 115 # 116 assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) 117 end 118 119 def test_rob_edge_cases 120 [ [%Q{"a\nb"}, ["a\nb"]], 121 [%Q{"\n\n\n"}, ["\n\n\n"]], 122 [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]], 123 [%Q{,"\r\n"}, [nil,"\r\n"]], 124 [%Q{,"\r\n."}, [nil,"\r\n."]], 125 [%Q{"a\na","one newline"}, ["a\na", 'one newline']], 126 [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']], 127 [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']], 128 [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']], 129 [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]], 130 ].each do |edge_case| 131 assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) 132 end 133 end 134 135 def test_non_regex_edge_cases 136 # An early version of the non-regex parser fails this test 137 [ [ "foo,\"foo,bar,baz,foo\",\"foo\"", 138 ["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case| 139 assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) 140 end 141 142 assert_raise(CSV::MalformedCSVError) do 143 CSV.parse_line("1,\"23\"4\"5\", 6") 144 end 145 end 146 147 def test_malformed_csv 148 assert_raise(CSV::MalformedCSVError) do 149 CSV.parse_line("1,2\r,3", row_sep: "\n") 150 end 151 152 bad_data = <<-END_DATA.gsub(/^ +/, "") 153 line,1,abc 154 line,2,"def\nghi" 155 156 line,4,some\rjunk 157 line,5,jkl 158 END_DATA 159 lines = bad_data.lines.to_a 160 assert_equal(6, lines.size) 161 assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ }) 162 163 csv = CSV.new(bad_data) 164 begin 165 loop do 166 assert_not_nil(csv.shift) 167 assert_send([csv.lineno, :<, 4]) 168 end 169 rescue CSV::MalformedCSVError 170 assert_equal( "Unquoted fields do not allow \\r or \\n (line 4).", 171 $!.message ) 172 end 173 174 assert_raise(CSV::MalformedCSVError) { CSV.parse_line('1,2,"3...') } 175 176 bad_data = <<-END_DATA.gsub(/^ +/, "") 177 line,1,abc 178 line,2,"def\nghi" 179 180 line,4,8'10" 181 line,5,jkl 182 END_DATA 183 lines = bad_data.lines.to_a 184 assert_equal(6, lines.size) 185 assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ }) 186 187 csv = CSV.new(bad_data) 188 begin 189 loop do 190 assert_not_nil(csv.shift) 191 assert_send([csv.lineno, :<, 4]) 192 end 193 rescue CSV::MalformedCSVError 194 assert_equal("Illegal quoting in line 4.", $!.message) 195 end 196 end 197 198 def test_the_parse_fails_fast_when_it_can_for_unquoted_fields 199 assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA) 200 end 201 202 def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes 203 assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA) 204 end 205 206 def test_field_size_limit_controls_lookahead 207 assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', 208 field_size_limit: 2048 ) 209 end 210 211 private 212 213 def assert_parse_errors_out(*args) 214 assert_raise(CSV::MalformedCSVError) do 215 Timeout.timeout(0.2) do 216 CSV.parse(*args) 217 fail("Parse didn't error out") 218 end 219 end 220 end 221end 222