1# coding: US-ASCII 2require 'test/unit' 3require 'tmpdir' 4require 'timeout' 5require_relative 'envutil' 6 7class TestIO_M17N < Test::Unit::TestCase 8 ENCS = [ 9 Encoding::ASCII_8BIT, 10 Encoding::EUC_JP, 11 Encoding::Shift_JIS, 12 Encoding::UTF_8 13 ] 14 15 def with_tmpdir 16 Dir.mktmpdir {|dir| 17 Dir.chdir(dir) { 18 yield dir 19 } 20 } 21 end 22 23 def pipe(*args, wp, rp) 24 re, we = nil, nil 25 r, w = IO.pipe(*args) 26 rt = Thread.new do 27 begin 28 rp.call(r) 29 rescue Exception 30 r.close 31 re = $! 32 end 33 end 34 wt = Thread.new do 35 begin 36 wp.call(w) 37 rescue Exception 38 w.close 39 we = $! 40 end 41 end 42 flunk("timeout") unless wt.join(10) && rt.join(10) 43 ensure 44 w.close unless !w || w.closed? 45 r.close unless !r || r.closed? 46 (wt.kill; wt.join) if wt 47 (rt.kill; rt.join) if rt 48 raise we if we 49 raise re if re 50 end 51 52 def with_pipe(*args) 53 r, w = IO.pipe(*args) 54 begin 55 yield r, w 56 ensure 57 r.close if !r.closed? 58 w.close if !w.closed? 59 end 60 end 61 62 def generate_file(path, content) 63 open(path, "wb") {|f| f.write content } 64 end 65 66 def encdump(str) 67 "#{str.dump}.force_encoding(#{str.encoding.name.dump})" 68 end 69 70 def assert_str_equal(expected, actual, message=nil) 71 full_message = build_message(message, <<EOT) 72#{encdump expected} expected but not equal to 73#{encdump actual}. 74EOT 75 assert_equal(expected, actual, full_message) 76 end 77 78 def test_open_r 79 with_tmpdir { 80 generate_file('tmp', "") 81 open("tmp", "r") {|f| 82 assert_equal(Encoding.default_external, f.external_encoding) 83 assert_equal(nil, f.internal_encoding) 84 } 85 } 86 end 87 88 def test_open_rb 89 with_tmpdir { 90 generate_file('tmp', "") 91 open("tmp", "rb") {|f| 92 assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding) 93 assert_equal(nil, f.internal_encoding) 94 } 95 } 96 end 97 98 def test_open_r_enc 99 with_tmpdir { 100 generate_file('tmp', "") 101 open("tmp", "r:euc-jp") {|f| 102 assert_equal(Encoding::EUC_JP, f.external_encoding) 103 assert_equal(nil, f.internal_encoding) 104 } 105 } 106 end 107 108 def test_open_r_enc_in_opt 109 with_tmpdir { 110 generate_file('tmp', "") 111 open("tmp", "r", encoding: "euc-jp") {|f| 112 assert_equal(Encoding::EUC_JP, f.external_encoding) 113 assert_equal(nil, f.internal_encoding) 114 } 115 } 116 end 117 118 def test_open_r_encname_in_opt 119 with_tmpdir { 120 generate_file('tmp', "") 121 open("tmp", "r", encoding: Encoding::EUC_JP) {|f| 122 assert_equal(Encoding::EUC_JP, f.external_encoding) 123 assert_equal(nil, f.internal_encoding) 124 } 125 } 126 end 127 128 def test_open_r_ext_enc_in_opt 129 with_tmpdir { 130 generate_file('tmp', "") 131 open("tmp", "r", external_encoding: Encoding::EUC_JP) {|f| 132 assert_equal(Encoding::EUC_JP, f.external_encoding) 133 assert_equal(nil, f.internal_encoding) 134 } 135 } 136 end 137 138 def test_open_r_ext_encname_in_opt 139 with_tmpdir { 140 generate_file('tmp', "") 141 open("tmp", "r", external_encoding: "euc-jp") {|f| 142 assert_equal(Encoding::EUC_JP, f.external_encoding) 143 assert_equal(nil, f.internal_encoding) 144 } 145 } 146 end 147 148 def test_open_r_enc_enc 149 with_tmpdir { 150 generate_file('tmp', "") 151 open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f| 152 assert_equal(Encoding::EUC_JP, f.external_encoding) 153 assert_equal(Encoding::UTF_8, f.internal_encoding) 154 } 155 } 156 end 157 158 def test_open_r_encname_encname 159 with_tmpdir { 160 generate_file('tmp', "") 161 open("tmp", "r:euc-jp:utf-8") {|f| 162 assert_equal(Encoding::EUC_JP, f.external_encoding) 163 assert_equal(Encoding::UTF_8, f.internal_encoding) 164 } 165 } 166 end 167 168 def test_open_r_encname_encname_in_opt 169 with_tmpdir { 170 generate_file('tmp', "") 171 open("tmp", "r", encoding: "euc-jp:utf-8") {|f| 172 assert_equal(Encoding::EUC_JP, f.external_encoding) 173 assert_equal(Encoding::UTF_8, f.internal_encoding) 174 } 175 } 176 end 177 178 def test_open_r_enc_enc_in_opt 179 with_tmpdir { 180 generate_file('tmp', "") 181 open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f| 182 assert_equal(Encoding::EUC_JP, f.external_encoding) 183 assert_equal(Encoding::UTF_8, f.internal_encoding) 184 } 185 } 186 end 187 188 def test_open_r_externalencname_internalencname_in_opt 189 with_tmpdir { 190 generate_file('tmp', "") 191 open("tmp", "r", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f| 192 assert_equal(Encoding::EUC_JP, f.external_encoding) 193 assert_equal(Encoding::UTF_8, f.internal_encoding) 194 } 195 } 196 end 197 198 def test_open_w 199 with_tmpdir { 200 open("tmp", "w") {|f| 201 assert_equal(nil, f.external_encoding) 202 assert_equal(nil, f.internal_encoding) 203 } 204 } 205 end 206 207 def test_open_wb 208 with_tmpdir { 209 open("tmp", "wb") {|f| 210 assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding) 211 assert_equal(nil, f.internal_encoding) 212 } 213 } 214 end 215 216 def test_open_w_enc 217 with_tmpdir { 218 open("tmp", "w:euc-jp") {|f| 219 assert_equal(Encoding::EUC_JP, f.external_encoding) 220 assert_equal(nil, f.internal_encoding) 221 } 222 } 223 end 224 225 def test_open_w_enc_in_opt 226 with_tmpdir { 227 open("tmp", "w", encoding: "euc-jp") {|f| 228 assert_equal(Encoding::EUC_JP, f.external_encoding) 229 assert_equal(nil, f.internal_encoding) 230 } 231 } 232 end 233 234 def test_open_w_enc_in_opt2 235 with_tmpdir { 236 open("tmp", "w", external_encoding: "euc-jp") {|f| 237 assert_equal(Encoding::EUC_JP, f.external_encoding) 238 assert_equal(nil, f.internal_encoding) 239 } 240 } 241 end 242 243 def test_open_w_enc_enc 244 with_tmpdir { 245 open("tmp", "w:euc-jp:utf-8") {|f| 246 assert_equal(Encoding::EUC_JP, f.external_encoding) 247 assert_equal(Encoding::UTF_8, f.internal_encoding) 248 } 249 } 250 end 251 252 def test_open_w_enc_enc_in_opt 253 with_tmpdir { 254 open("tmp", "w", encoding: "euc-jp:utf-8") {|f| 255 assert_equal(Encoding::EUC_JP, f.external_encoding) 256 assert_equal(Encoding::UTF_8, f.internal_encoding) 257 } 258 } 259 end 260 261 def test_open_w_enc_enc_in_opt2 262 with_tmpdir { 263 open("tmp", "w", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f| 264 assert_equal(Encoding::EUC_JP, f.external_encoding) 265 assert_equal(Encoding::UTF_8, f.internal_encoding) 266 } 267 } 268 end 269 270 def test_open_w_enc_enc_perm 271 with_tmpdir { 272 open("tmp", "w:euc-jp:utf-8", 0600) {|f| 273 assert_equal(Encoding::EUC_JP, f.external_encoding) 274 assert_equal(Encoding::UTF_8, f.internal_encoding) 275 } 276 } 277 end 278 279 def test_io_new_enc 280 with_tmpdir { 281 generate_file("tmp", "\xa1") 282 fd = IO.sysopen("tmp") 283 f = IO.new(fd, "r:sjis") 284 begin 285 assert_equal(Encoding::Windows_31J, f.read.encoding) 286 ensure 287 f.close 288 end 289 } 290 end 291 292 def test_s_pipe_invalid 293 pipe("utf-8", "euc-jp", { :invalid=>:replace }, 294 proc do |w| 295 w << "\x80" 296 w.close 297 end, 298 proc do |r| 299 assert_equal("?", r.read) 300 end) 301 end 302 303 def test_s_pipe_undef 304 pipe("utf-8:euc-jp", { :undef=>:replace }, 305 proc do |w| 306 w << "\ufffd" 307 w.close 308 end, 309 proc do |r| 310 assert_equal("?", r.read) 311 end) 312 end 313 314 def test_s_pipe_undef_replace_string 315 pipe("utf-8:euc-jp", { :undef=>:replace, :replace=>"X" }, 316 proc do |w| 317 w << "\ufffd" 318 w.close 319 end, 320 proc do |r| 321 assert_equal("X", r.read) 322 end) 323 end 324 325 def test_dup 326 pipe("utf-8:euc-jp", 327 proc do |w| 328 w << "\u3042" 329 w.close 330 end, 331 proc do |r| 332 r2 = r.dup 333 begin 334 assert_equal("\xA4\xA2".force_encoding("euc-jp"), r2.read) 335 ensure 336 r2.close 337 end 338 end) 339 end 340 341 def test_dup_undef 342 pipe("utf-8:euc-jp", { :undef=>:replace }, 343 proc do |w| 344 w << "\uFFFD" 345 w.close 346 end, 347 proc do |r| 348 r2 = r.dup 349 begin 350 assert_equal("?", r2.read) 351 ensure 352 r2.close 353 end 354 end) 355 end 356 357 def test_stdin 358 assert_equal(Encoding.default_external, STDIN.external_encoding) 359 assert_equal(nil, STDIN.internal_encoding) 360 end 361 362 def test_stdout 363 assert_equal(nil, STDOUT.external_encoding) 364 assert_equal(nil, STDOUT.internal_encoding) 365 end 366 367 def test_stderr 368 assert_equal(nil, STDERR.external_encoding) 369 assert_equal(nil, STDERR.internal_encoding) 370 end 371 372 def test_terminator_conversion 373 with_tmpdir { 374 generate_file('tmp', "before \u00FF after") 375 s = open("tmp", "r:utf-8:iso-8859-1") {|f| 376 f.gets("\xFF".force_encoding("iso-8859-1")) 377 } 378 assert_equal(Encoding.find("iso-8859-1"), s.encoding) 379 assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]') 380 } 381 end 382 383 def test_terminator_conversion2 384 with_tmpdir { 385 generate_file('tmp', "before \xA1\xA2\xA2\xA3 after") 386 s = open("tmp", "r:euc-jp:utf-8") {|f| 387 f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8")) 388 } 389 assert_equal(Encoding.find("utf-8"), s.encoding) 390 assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]') 391 } 392 end 393 394 def test_terminator_stateful_conversion 395 with_tmpdir { 396 src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp") 397 generate_file('tmp', src) 398 s = open("tmp", "r:iso-2022-jp:euc-jp") {|f| 399 f.gets("0".force_encoding("euc-jp")) 400 } 401 assert_equal(Encoding.find("euc-jp"), s.encoding) 402 assert_str_equal(src.encode("euc-jp"), s) 403 } 404 end 405 406 def test_nonascii_terminator 407 with_tmpdir { 408 generate_file('tmp', "before \xA2\xA2 after") 409 open("tmp", "r:euc-jp") {|f| 410 assert_raise(ArgumentError) { 411 f.gets("\xA2\xA2".force_encoding("utf-8")) 412 } 413 } 414 } 415 end 416 417 def test_pipe_terminator_conversion 418 rs = "\xA2\xA2".encode("utf-8", "euc-jp") 419 pipe("euc-jp:utf-8", 420 proc do |w| 421 w.write "before \xa2\xa2 after" 422 w.close 423 end, 424 proc do |r| 425 timeout(1) { 426 assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"), 427 r.gets(rs)) 428 } 429 end) 430 end 431 432 def test_pipe_conversion 433 pipe("euc-jp:utf-8", 434 proc do |w| 435 w.write "\xa1\xa1" 436 end, 437 proc do |r| 438 assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc) 439 end) 440 end 441 442 def test_pipe_convert_partial_read 443 pipe("euc-jp:utf-8", 444 proc do |w| 445 w.write "\xa1" 446 sleep 0.1 447 w.write "\xa1" 448 end, 449 proc do |r| 450 assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc) 451 end) 452 end 453 454 def test_getc_invalid 455 pipe("euc-jp:utf-8", 456 proc do |w| 457 w << "\xa1xyz" 458 w.close 459 end, 460 proc do |r| 461 err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc } 462 assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes) 463 assert_equal("xyz", r.read(10)) 464 end) 465 end 466 467 def test_getc_stateful_conversion 468 with_tmpdir { 469 src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp") 470 generate_file('tmp', src) 471 open("tmp", "r:iso-2022-jp:euc-jp") {|f| 472 assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc) 473 assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc) 474 } 475 } 476 end 477 478 def test_getc_newlineconv 479 with_tmpdir { 480 src = "\u3042" 481 generate_file('tmp', src) 482 EnvUtil.with_default_external(Encoding::UTF_8) do 483 open("tmp", "rt") {|f| 484 s = f.getc 485 assert_equal(true, s.valid_encoding?) 486 assert_equal("\u3042", s) 487 } 488 end 489 } 490 end 491 492 def test_getc_newlineconv_invalid 493 with_tmpdir { 494 src = "\xE3\x81" 495 generate_file('tmp', src) 496 EnvUtil.with_default_external(Encoding::UTF_8) do 497 open("tmp", "rt") {|f| 498 s = f.getc 499 assert_equal(false, s.valid_encoding?) 500 assert_equal("\xE3".force_encoding("UTF-8"), s) 501 s = f.getc 502 assert_equal(false, s.valid_encoding?) 503 assert_equal("\x81".force_encoding("UTF-8"), s) 504 } 505 end 506 } 507 end 508 509 def test_ungetc_int 510 with_tmpdir { 511 generate_file('tmp', "A") 512 s = open("tmp", "r:GB18030") {|f| 513 f.ungetc(0x8431A439) 514 f.read 515 } 516 assert_equal(Encoding::GB18030, s.encoding) 517 assert_str_equal(0x8431A439.chr("GB18030")+"A", s) 518 } 519 end 520 521 def test_ungetc_str 522 with_tmpdir { 523 generate_file('tmp', "A") 524 s = open("tmp", "r:GB18030") {|f| 525 f.ungetc(0x8431A439.chr("GB18030")) 526 f.read 527 } 528 assert_equal(Encoding::GB18030, s.encoding) 529 assert_str_equal(0x8431A439.chr("GB18030")+"A", s) 530 } 531 end 532 533 def test_ungetc_stateful_conversion 534 with_tmpdir { 535 src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp") 536 generate_file('tmp', src) 537 s = open("tmp", "r:iso-2022-jp:euc-jp") {|f| 538 f.ungetc("0".force_encoding("euc-jp")) 539 f.read 540 } 541 assert_equal(Encoding.find("euc-jp"), s.encoding) 542 assert_str_equal("0" + src.encode("euc-jp"), s) 543 } 544 end 545 546 def test_ungetc_stateful_conversion2 547 with_tmpdir { 548 src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp") 549 former = "before \e$B\x23\x30\e(B".force_encoding("iso-2022-jp") 550 rs = "\e$B\x23\x30\e(B".force_encoding("iso-2022-jp") 551 latter = "\e$B\x23\x31\e(B after".force_encoding("iso-2022-jp") 552 generate_file('tmp', src) 553 s = open("tmp", "r:iso-2022-jp:euc-jp") {|f| 554 assert_equal(former.encode("euc-jp", "iso-2022-jp"), 555 f.gets(rs.encode("euc-jp", "iso-2022-jp"))) 556 f.ungetc("0") 557 f.read 558 } 559 assert_equal(Encoding.find("euc-jp"), s.encoding) 560 assert_str_equal("0" + latter.encode("euc-jp"), s) 561 } 562 end 563 564 def test_open_ascii 565 with_tmpdir { 566 src = "abc\n" 567 generate_file('tmp', "abc\n") 568 ENCS.each {|enc| 569 s = open('tmp', "r:#{enc}") {|f| f.gets } 570 assert_equal(enc, s.encoding) 571 assert_str_equal(src, s) 572 } 573 } 574 end 575 576 def test_open_nonascii 577 with_tmpdir { 578 src = "\xc2\xa1\n" 579 generate_file('tmp', src) 580 ENCS.each {|enc| 581 content = src.dup.force_encoding(enc) 582 s = open('tmp', "r:#{enc}") {|f| f.gets } 583 assert_equal(enc, s.encoding) 584 assert_str_equal(content, s) 585 } 586 } 587 end 588 589 def test_read_encoding 590 with_tmpdir { 591 src = "\xc2\xa1\n".force_encoding("ASCII-8BIT") 592 generate_file('tmp', "\xc2\xa1\n") 593 ENCS.each {|enc| 594 content = src.dup.force_encoding(enc) 595 open('tmp', "r:#{enc}") {|f| 596 s = f.getc 597 assert_equal(enc, s.encoding) 598 assert_str_equal(content[0], s) 599 } 600 open('tmp', "r:#{enc}") {|f| 601 s = f.readchar 602 assert_equal(enc, s.encoding) 603 assert_str_equal(content[0], s) 604 } 605 open('tmp', "r:#{enc}") {|f| 606 s = f.gets 607 assert_equal(enc, s.encoding) 608 assert_str_equal(content, s) 609 } 610 open('tmp', "r:#{enc}") {|f| 611 s = f.readline 612 assert_equal(enc, s.encoding) 613 assert_str_equal(content, s) 614 } 615 open('tmp', "r:#{enc}") {|f| 616 lines = f.readlines 617 assert_equal(1, lines.length) 618 s = lines[0] 619 assert_equal(enc, s.encoding) 620 assert_str_equal(content, s) 621 } 622 open('tmp', "r:#{enc}") {|f| 623 f.each_line {|s| 624 assert_equal(enc, s.encoding) 625 assert_str_equal(content, s) 626 } 627 } 628 open('tmp', "r:#{enc}") {|f| 629 s = f.read 630 assert_equal(enc, s.encoding) 631 assert_str_equal(content, s) 632 } 633 open('tmp', "r:#{enc}") {|f| 634 s = f.read(1) 635 assert_equal(Encoding::ASCII_8BIT, s.encoding) 636 assert_str_equal(src[0], s) 637 } 638 open('tmp', "r:#{enc}") {|f| 639 s = f.readpartial(1) 640 assert_equal(Encoding::ASCII_8BIT, s.encoding) 641 assert_str_equal(src[0], s) 642 } 643 open('tmp', "r:#{enc}") {|f| 644 s = f.sysread(1) 645 assert_equal(Encoding::ASCII_8BIT, s.encoding) 646 assert_str_equal(src[0], s) 647 } 648 } 649 } 650 end 651 652 def test_write_noenc 653 src = "\xc2\xa1\n".force_encoding("ascii-8bit") 654 with_tmpdir { 655 open('tmp', "w") {|f| 656 ENCS.each {|enc| 657 f.write src.dup.force_encoding(enc) 658 } 659 } 660 open('tmp', 'r:ascii-8bit') {|f| 661 assert_equal(src*ENCS.length, f.read) 662 } 663 } 664 end 665 666 def test_write_conversion 667 utf8 = "\u6666" 668 eucjp = "\xb3\xa2".force_encoding("EUC-JP") 669 with_tmpdir { 670 open('tmp', "w:EUC-JP") {|f| 671 assert_equal(Encoding::EUC_JP, f.external_encoding) 672 assert_equal(nil, f.internal_encoding) 673 f.print utf8 674 } 675 assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP")) 676 open('tmp', 'r:EUC-JP:UTF-8') {|f| 677 assert_equal(Encoding::EUC_JP, f.external_encoding) 678 assert_equal(Encoding::UTF_8, f.internal_encoding) 679 assert_equal(utf8, f.read) 680 } 681 } 682 end 683 684 def test_pipe 685 utf8 = "\u6666" 686 eucjp = "\xb3\xa2".force_encoding("EUC-JP") 687 688 pipe(proc do |w| 689 w << utf8 690 w.close 691 end, proc do |r| 692 assert_equal(Encoding.default_external, r.external_encoding) 693 assert_equal(nil, r.internal_encoding) 694 s = r.read 695 assert_equal(Encoding.default_external, s.encoding) 696 assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s) 697 end) 698 699 pipe("EUC-JP", 700 proc do |w| 701 w << eucjp 702 w.close 703 end, 704 proc do |r| 705 assert_equal(Encoding::EUC_JP, r.external_encoding) 706 assert_equal(nil, r.internal_encoding) 707 assert_equal(eucjp, r.read) 708 end) 709 710 pipe("UTF-8", 711 proc do |w| 712 w << "a" * 1023 + "\u3042" + "a" * 1022 713 w.close 714 end, 715 proc do |r| 716 assert_equal(true, r.read.valid_encoding?) 717 end) 718 719 pipe("UTF-8:EUC-JP", 720 proc do |w| 721 w << utf8 722 w.close 723 end, 724 proc do |r| 725 assert_equal(Encoding::UTF_8, r.external_encoding) 726 assert_equal(Encoding::EUC_JP, r.internal_encoding) 727 assert_equal(eucjp, r.read) 728 end) 729 730 e = assert_raise(ArgumentError) {with_pipe("UTF-8", "UTF-8".encode("UTF-32BE")) {}} 731 assert_match(/invalid name encoding/, e.message) 732 e = assert_raise(ArgumentError) {with_pipe("UTF-8".encode("UTF-32BE")) {}} 733 assert_match(/invalid name encoding/, e.message) 734 735 ENCS.each {|enc| 736 pipe(enc, 737 proc do |w| 738 w << "\xc2\xa1" 739 w.close 740 end, 741 proc do |r| 742 s = r.getc 743 assert_equal(enc, s.encoding) 744 end) 745 } 746 747 ENCS.each {|enc| 748 next if enc == Encoding::ASCII_8BIT 749 next if enc == Encoding::UTF_8 750 pipe("#{enc}:UTF-8", 751 proc do |w| 752 w << "\xc2\xa1" 753 w.close 754 end, 755 proc do |r| 756 s = r.read 757 assert_equal(Encoding::UTF_8, s.encoding) 758 assert_equal(s.encode("UTF-8"), s) 759 end) 760 } 761 762 end 763 764 def test_marshal 765 data = 56225 766 pipe("EUC-JP", 767 proc do |w| 768 Marshal.dump(data, w) 769 w.close 770 end, 771 proc do |r| 772 result = nil 773 assert_nothing_raised("[ruby-dev:33264]") { result = Marshal.load(r) } 774 assert_equal(data, result) 775 end) 776 end 777 778 def test_gets_nil 779 pipe("UTF-8:EUC-JP", 780 proc do |w| 781 w << "\u{3042}" 782 w.close 783 end, 784 proc do |r| 785 result = r.gets(nil) 786 assert_equal("\u{3042}".encode("euc-jp"), result) 787 end) 788 end 789 790 def test_gets_limit 791 pipe("euc-jp", 792 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 793 proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(1)) }) 794 pipe("euc-jp", 795 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 796 proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(2)) }) 797 pipe("euc-jp", 798 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 799 proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(3)) }) 800 pipe("euc-jp", 801 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 802 proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(4)) }) 803 pipe("euc-jp", 804 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 805 proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(5)) }) 806 pipe("euc-jp", 807 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 808 proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(6)) }) 809 pipe("euc-jp", 810 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 811 proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(7)) }) 812 pipe("euc-jp", 813 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 814 proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(8)) }) 815 pipe("euc-jp", 816 proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close }, 817 proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(9)) }) 818 end 819 820 def test_gets_invalid 821 before = "\u{3042}\u{3044}" 822 invalid = "\x80".force_encoding("utf-8") 823 after = "\u{3046}\u{3048}" 824 pipe("utf-8:euc-jp", 825 proc do |w| 826 w << before + invalid + after 827 w.close 828 end, 829 proc do |r| 830 err = assert_raise(Encoding::InvalidByteSequenceError) { r.gets } 831 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) 832 assert_equal(after.encode("euc-jp"), r.gets) 833 end) 834 end 835 836 def test_getc_invalid2 837 before1 = "\u{3042}" 838 before2 = "\u{3044}" 839 invalid = "\x80".force_encoding("utf-8") 840 after1 = "\u{3046}" 841 after2 = "\u{3048}" 842 pipe("utf-8:euc-jp", 843 proc do |w| 844 w << before1 + before2 + invalid + after1 + after2 845 w.close 846 end, 847 proc do |r| 848 assert_equal(before1.encode("euc-jp"), r.getc) 849 assert_equal(before2.encode("euc-jp"), r.getc) 850 err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc } 851 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) 852 assert_equal(after1.encode("euc-jp"), r.getc) 853 assert_equal(after2.encode("euc-jp"), r.getc) 854 end) 855 end 856 857 def test_getc_invalid3 858 before1 = "\x42\x30".force_encoding("utf-16le") 859 before2 = "\x44\x30".force_encoding("utf-16le") 860 invalid = "\x00\xd8".force_encoding("utf-16le") 861 after1 = "\x46\x30".force_encoding("utf-16le") 862 after2 = "\x48\x30".force_encoding("utf-16le") 863 pipe("utf-16le:euc-jp", { :binmode => true }, 864 proc do |w| 865 w << before1 + before2 + invalid + after1 + after2 866 w.close 867 end, 868 proc do |r| 869 assert_equal(before1.encode("euc-jp"), r.getc) 870 assert_equal(before2.encode("euc-jp"), r.getc) 871 err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc } 872 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) 873 assert_equal(after1.encode("euc-jp"), r.getc) 874 assert_equal(after2.encode("euc-jp"), r.getc) 875 end) 876 end 877 878 def test_read_all 879 str = "\u3042\u3044" 880 pipe("utf-8:euc-jp", 881 proc do |w| 882 w << str 883 w.close 884 end, 885 proc do |r| 886 assert_equal(str.encode("euc-jp"), r.read) 887 end) 888 end 889 890 def test_read_all_invalid 891 before = "\u{3042}\u{3044}" 892 invalid = "\x80".force_encoding("utf-8") 893 after = "\u{3046}\u{3048}" 894 pipe("utf-8:euc-jp", 895 proc do |w| 896 w << before + invalid + after 897 w.close 898 end, 899 proc do |r| 900 err = assert_raise(Encoding::InvalidByteSequenceError) { r.read } 901 assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes) 902 assert_equal(after.encode("euc-jp"), r.read) 903 end) 904 end 905 906 def test_file_foreach 907 with_tmpdir { 908 generate_file('tst', 'a' * 8191 + "\xa1\xa1") 909 assert_nothing_raised { 910 File.foreach('tst', :encoding=>"euc-jp") {|line| line.inspect } 911 } 912 } 913 end 914 915 def test_set_encoding 916 pipe("utf-8:euc-jp", 917 proc do |w| 918 s = "\u3042".force_encoding("ascii-8bit") 919 s << "\x82\xa0".force_encoding("ascii-8bit") 920 w << s 921 w.close 922 end, 923 proc do |r| 924 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) 925 r.set_encoding("shift_jis:euc-jp") 926 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) 927 end) 928 end 929 930 def test_set_encoding2 931 pipe("utf-8:euc-jp", 932 proc do |w| 933 s = "\u3042".force_encoding("ascii-8bit") 934 s << "\x82\xa0".force_encoding("ascii-8bit") 935 w << s 936 w.close 937 end, 938 proc do |r| 939 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) 940 r.set_encoding("shift_jis", "euc-jp") 941 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) 942 end) 943 end 944 945 def test_set_encoding_nil 946 pipe("utf-8:euc-jp", 947 proc do |w| 948 s = "\u3042".force_encoding("ascii-8bit") 949 s << "\x82\xa0".force_encoding("ascii-8bit") 950 w << s 951 w.close 952 end, 953 proc do |r| 954 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) 955 r.set_encoding(nil) 956 assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read) 957 end) 958 end 959 960 def test_set_encoding_enc 961 pipe("utf-8:euc-jp", 962 proc do |w| 963 s = "\u3042".force_encoding("ascii-8bit") 964 s << "\x82\xa0".force_encoding("ascii-8bit") 965 w << s 966 w.close 967 end, 968 proc do |r| 969 assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc) 970 r.set_encoding(Encoding::Shift_JIS) 971 assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc) 972 end) 973 end 974 975 def test_set_encoding_invalid 976 pipe(proc do |w| 977 w << "\x80" 978 w.close 979 end, 980 proc do |r| 981 r.set_encoding("utf-8:euc-jp", :invalid=>:replace) 982 assert_equal("?", r.read) 983 end) 984 end 985 986 def test_set_encoding_identical 987 #bug5568 = '[ruby-core:40727]' 988 bug6324 = '[ruby-core:44455]' 989 open(__FILE__, "r") do |f| 990 assert_warning('', bug6324) { 991 f.set_encoding("eucjp:euc-jp") 992 } 993 assert_warning('', bug6324) { 994 f.set_encoding("eucjp", "euc-jp") 995 } 996 assert_warning('', bug6324) { 997 f.set_encoding(Encoding::EUC_JP, "euc-jp") 998 } 999 assert_warning('', bug6324) { 1000 f.set_encoding("eucjp", Encoding::EUC_JP) 1001 } 1002 assert_warning('', bug6324) { 1003 f.set_encoding(Encoding::EUC_JP, Encoding::EUC_JP) 1004 } 1005 nonstr = Object.new 1006 def nonstr.to_str; "eucjp"; end 1007 assert_warning('', bug6324) { 1008 f.set_encoding(nonstr, nonstr) 1009 } 1010 end 1011 end 1012 1013 def test_set_encoding_undef 1014 pipe(proc do |w| 1015 w << "\ufffd" 1016 w.close 1017 end, 1018 proc do |r| 1019 r.set_encoding("utf-8", "euc-jp", :undef=>:replace) 1020 assert_equal("?", r.read) 1021 end) 1022 end 1023 1024 def test_set_encoding_undef_replace 1025 pipe(proc do |w| 1026 w << "\ufffd" 1027 w.close 1028 end, 1029 proc do |r| 1030 r.set_encoding("utf-8", "euc-jp", :undef=>:replace, :replace=>"ZZZ") 1031 assert_equal("ZZZ", r.read) 1032 end) 1033 pipe(proc do |w| 1034 w << "\ufffd" 1035 w.close 1036 end, 1037 proc do |r| 1038 r.set_encoding("utf-8:euc-jp", :undef=>:replace, :replace=>"ZZZ") 1039 assert_equal("ZZZ", r.read) 1040 end) 1041 end 1042 1043 def test_set_encoding_binmode 1044 assert_raise(ArgumentError) { 1045 open(__FILE__, "rt") {|f| 1046 f.set_encoding("iso-2022-jp") 1047 } 1048 } 1049 assert_raise(ArgumentError) { 1050 open(__FILE__, "r") {|f| 1051 f.set_encoding("iso-2022-jp") 1052 } 1053 } 1054 assert_nothing_raised { 1055 open(__FILE__, "rb") {|f| 1056 f.set_encoding("iso-2022-jp") 1057 } 1058 } 1059 assert_nothing_raised { 1060 open(__FILE__, "r") {|f| 1061 f.binmode 1062 f.set_encoding("iso-2022-jp") 1063 } 1064 } 1065 assert_nothing_raised { 1066 open(__FILE__, "rt") {|f| 1067 f.binmode 1068 f.set_encoding("iso-2022-jp") 1069 } 1070 } 1071 assert_nothing_raised { 1072 open(__FILE__, "r", binmode: true) {|f| 1073 assert_equal(Encoding::ASCII_8BIT, f.external_encoding) 1074 f.set_encoding("iso-2022-jp") 1075 } 1076 } 1077 assert_raise(ArgumentError) { 1078 open(__FILE__, "rb", binmode: true) {|f| 1079 f.set_encoding("iso-2022-jp") 1080 } 1081 } 1082 assert_raise(ArgumentError) { 1083 open(__FILE__, "rb", binmode: false) {|f| 1084 f.set_encoding("iso-2022-jp") 1085 } 1086 } 1087 end 1088 1089 def test_set_encoding_unsupported 1090 bug5567 = '[ruby-core:40726]' 1091 IO.pipe do |r, w| 1092 assert_nothing_raised(bug5567) do 1093 assert_warning(/Unsupported/, bug5567) {r.set_encoding("fffffffffffxx")} 1094 assert_warning(/Unsupported/, bug5567) {r.set_encoding("fffffffffffxx", "us-ascii")} 1095 assert_warning(/Unsupported/, bug5567) {r.set_encoding("us-ascii", "fffffffffffxx")} 1096 end 1097 end 1098 end 1099 1100 def test_textmode_twice 1101 assert_raise(ArgumentError) { 1102 open(__FILE__, "rt", textmode: true) {|f| 1103 f.set_encoding("iso-2022-jp") 1104 } 1105 } 1106 assert_raise(ArgumentError) { 1107 open(__FILE__, "rt", textmode: false) {|f| 1108 f.set_encoding("iso-2022-jp") 1109 } 1110 } 1111 end 1112 1113 def test_write_conversion_fixenc 1114 pipe(proc do |w| 1115 w.set_encoding("iso-2022-jp:utf-8") 1116 w << "\u3042" 1117 w << "\u3044" 1118 w.close 1119 end, 1120 proc do |r| 1121 assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), 1122 r.read.force_encoding("ascii-8bit")) 1123 end) 1124 end 1125 1126 def test_write_conversion_anyenc_stateful 1127 pipe(proc do |w| 1128 w.set_encoding("iso-2022-jp") 1129 w << "\u3042" 1130 w << "\x82\xa2".force_encoding("sjis") 1131 w.close 1132 end, 1133 proc do |r| 1134 assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), 1135 r.read.force_encoding("ascii-8bit")) 1136 end) 1137 end 1138 1139 def test_write_conversion_anyenc_stateless 1140 pipe(proc do |w| 1141 w.set_encoding("euc-jp") 1142 w << "\u3042" 1143 w << "\x82\xa2".force_encoding("sjis") 1144 w.close 1145 end, 1146 proc do |r| 1147 assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"), 1148 r.read.force_encoding("ascii-8bit")) 1149 end) 1150 end 1151 1152 def test_write_conversion_anyenc_stateful_nosync 1153 pipe(proc do |w| 1154 w.sync = false 1155 w.set_encoding("iso-2022-jp") 1156 w << "\u3042" 1157 w << "\x82\xa2".force_encoding("sjis") 1158 w.close 1159 end, 1160 proc do |r| 1161 assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"), 1162 r.read.force_encoding("ascii-8bit")) 1163 end) 1164 end 1165 1166 def test_read_stateful 1167 pipe("euc-jp:iso-2022-jp", 1168 proc do |w| 1169 w << "\xA4\xA2" 1170 w.close 1171 end, 1172 proc do |r| 1173 assert_equal("\e$B$\"\e(B".force_encoding("iso-2022-jp"), r.read) 1174 end) 1175 end 1176 1177 def test_stdin_external_encoding_with_reopen 1178 skip "passing non-stdio fds is not supported" if /mswin|mingw/ =~ RUBY_PLATFORM 1179 with_tmpdir { 1180 open("tst", "w+") {|f| 1181 pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f) 1182 io = IO.new(10, "r+") 1183 STDIN.reopen(io) 1184 STDIN.external_encoding 1185 STDIN.write "\u3042" 1186 STDIN.flush 1187 End 1188 Process.wait pid 1189 f.rewind 1190 result = f.read.force_encoding("ascii-8bit") 1191 assert_equal("\u3042".force_encoding("ascii-8bit"), result) 1192 } 1193 } 1194 end 1195 1196 def test_popen_r_enc 1197 IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f| 1198 assert_equal(Encoding::ASCII_8BIT, f.external_encoding) 1199 assert_equal(nil, f.internal_encoding) 1200 s = f.read 1201 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1202 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1203 } 1204 end 1205 1206 def test_popen_r_enc_in_opt 1207 IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", encoding: "ascii-8bit") {|f| 1208 assert_equal(Encoding::ASCII_8BIT, f.external_encoding) 1209 assert_equal(nil, f.internal_encoding) 1210 s = f.read 1211 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1212 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1213 } 1214 end 1215 1216 def test_popen_r_enc_in_opt2 1217 IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", external_encoding: "ascii-8bit") {|f| 1218 assert_equal(Encoding::ASCII_8BIT, f.external_encoding) 1219 assert_equal(nil, f.internal_encoding) 1220 s = f.read 1221 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1222 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1223 } 1224 end 1225 1226 def test_popen_r_enc_enc 1227 IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r:shift_jis:euc-jp") {|f| 1228 assert_equal(Encoding::Shift_JIS, f.external_encoding) 1229 assert_equal(Encoding::EUC_JP, f.internal_encoding) 1230 s = f.read 1231 assert_equal(Encoding::EUC_JP, s.encoding) 1232 assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) 1233 } 1234 end 1235 1236 def test_popen_r_enc_enc_in_opt 1237 IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", encoding: "shift_jis:euc-jp") {|f| 1238 assert_equal(Encoding::Shift_JIS, f.external_encoding) 1239 assert_equal(Encoding::EUC_JP, f.internal_encoding) 1240 s = f.read 1241 assert_equal(Encoding::EUC_JP, s.encoding) 1242 assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) 1243 } 1244 end 1245 1246 def test_popen_r_enc_enc_in_opt2 1247 IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f| 1248 assert_equal(Encoding::Shift_JIS, f.external_encoding) 1249 assert_equal(Encoding::EUC_JP, f.internal_encoding) 1250 s = f.read 1251 assert_equal(Encoding::EUC_JP, s.encoding) 1252 assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) 1253 } 1254 end 1255 1256 def test_popenv_r_enc_enc_in_opt2 1257 IO.popen([EnvUtil.rubybin, "-e", "putc 0xa1"], "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f| 1258 assert_equal(Encoding::Shift_JIS, f.external_encoding) 1259 assert_equal(Encoding::EUC_JP, f.internal_encoding) 1260 s = f.read 1261 assert_equal(Encoding::EUC_JP, s.encoding) 1262 assert_equal("\x8e\xa1".force_encoding("euc-jp"), s) 1263 } 1264 end 1265 1266 def test_open_pipe_r_enc 1267 open("|#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f| 1268 assert_equal(Encoding::ASCII_8BIT, f.external_encoding) 1269 assert_equal(nil, f.internal_encoding) 1270 s = f.read 1271 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1272 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1273 } 1274 end 1275 1276 def test_open_pipe_r_enc2 1277 open("|#{EnvUtil.rubybin} -e 'putc \"\\u3042\"'", "r:UTF-8") {|f| 1278 assert_equal(Encoding::UTF_8, f.external_encoding) 1279 assert_equal(nil, f.internal_encoding) 1280 s = f.read 1281 assert_equal(Encoding::UTF_8, s.encoding) 1282 assert_equal("\u3042", s) 1283 } 1284 end 1285 1286 def test_s_foreach_enc 1287 with_tmpdir { 1288 generate_file("t", "\xff") 1289 IO.foreach("t", :mode => "r:ascii-8bit") {|s| 1290 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1291 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1292 } 1293 } 1294 end 1295 1296 def test_s_foreach_enc_in_opt 1297 with_tmpdir { 1298 generate_file("t", "\xff") 1299 IO.foreach("t", :encoding => "ascii-8bit") {|s| 1300 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1301 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1302 } 1303 } 1304 end 1305 1306 def test_s_foreach_enc_in_opt2 1307 with_tmpdir { 1308 generate_file("t", "\xff") 1309 IO.foreach("t", :external_encoding => "ascii-8bit") {|s| 1310 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1311 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1312 } 1313 } 1314 end 1315 1316 def test_s_foreach_enc_enc 1317 with_tmpdir { 1318 generate_file("t", "\u3042") 1319 IO.foreach("t", :mode => "r:utf-8:euc-jp") {|s| 1320 assert_equal(Encoding::EUC_JP, s.encoding) 1321 assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) 1322 } 1323 } 1324 end 1325 1326 def test_s_foreach_enc_enc_in_opt 1327 with_tmpdir { 1328 generate_file("t", "\u3042") 1329 IO.foreach("t", :mode => "r", :encoding => "utf-8:euc-jp") {|s| 1330 assert_equal(Encoding::EUC_JP, s.encoding) 1331 assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) 1332 } 1333 } 1334 end 1335 1336 def test_s_foreach_enc_enc_in_opt2 1337 with_tmpdir { 1338 generate_file("t", "\u3042") 1339 IO.foreach("t", :mode => "r", :external_encoding => "utf-8", :internal_encoding => "euc-jp") {|s| 1340 assert_equal(Encoding::EUC_JP, s.encoding) 1341 assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) 1342 } 1343 } 1344 end 1345 1346 def test_s_foreach_open_args_enc 1347 with_tmpdir { 1348 generate_file("t", "\xff") 1349 IO.foreach("t", :open_args => ["r:ascii-8bit"]) {|s| 1350 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1351 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1352 } 1353 } 1354 end 1355 1356 def test_s_foreach_open_args_enc_in_opt 1357 with_tmpdir { 1358 generate_file("t", "\xff") 1359 IO.foreach("t", :open_args => ["r", encoding: "ascii-8bit"]) {|s| 1360 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1361 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1362 } 1363 } 1364 end 1365 1366 def test_s_foreach_open_args_enc_in_opt2 1367 with_tmpdir { 1368 generate_file("t", "\xff") 1369 IO.foreach("t", :open_args => ["r", external_encoding: "ascii-8bit"]) {|s| 1370 assert_equal(Encoding::ASCII_8BIT, s.encoding) 1371 assert_equal("\xff".force_encoding("ascii-8bit"), s) 1372 } 1373 } 1374 end 1375 1376 def test_s_foreach_open_args_enc_enc 1377 with_tmpdir { 1378 generate_file("t", "\u3042") 1379 IO.foreach("t", :open_args => ["r:utf-8:euc-jp"]) {|s| 1380 assert_equal(Encoding::EUC_JP, s.encoding) 1381 assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) 1382 } 1383 } 1384 end 1385 1386 def test_s_foreach_open_args_enc_enc_in_opt 1387 with_tmpdir { 1388 generate_file("t", "\u3042") 1389 IO.foreach("t", :open_args => ["r", encoding: "utf-8:euc-jp"]) {|s| 1390 assert_equal(Encoding::EUC_JP, s.encoding) 1391 assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) 1392 } 1393 } 1394 end 1395 1396 def test_s_foreach_open_args_enc_enc_in_opt2 1397 with_tmpdir { 1398 generate_file("t", "\u3042") 1399 IO.foreach("t", :open_args => ["r", external_encoding: "utf-8", internal_encoding: "euc-jp"]) {|s| 1400 assert_equal(Encoding::EUC_JP, s.encoding) 1401 assert_equal("\xa4\xa2".force_encoding("euc-jp"), s) 1402 } 1403 } 1404 end 1405 1406 def test_both_textmode_binmode 1407 assert_raise(ArgumentError) { open("not-exist", "r", :textmode=>true, :binmode=>true) } 1408 assert_raise(ArgumentError) { open("not-exist", "rt", :binmode=>true) } 1409 assert_raise(ArgumentError) { open("not-exist", "rb", :textmode=>true) } 1410 end 1411 1412 def test_textmode_decode_universal_newline_read 1413 with_tmpdir { 1414 generate_file("t.crlf", "a\r\nb\r\nc\r\n") 1415 assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8")) 1416 assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt")) 1417 open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) } 1418 open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) } 1419 open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) } 1420 open("t.crlf", "r", textmode: true, universal_newline: false) {|f| 1421 assert_equal("a\r\nb\r\nc\r\n", f.read) 1422 } 1423 1424 generate_file("t.cr", "a\rb\rc\r") 1425 assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) 1426 assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt")) 1427 1428 generate_file("t.lf", "a\nb\nc\n") 1429 assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) 1430 assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt")) 1431 } 1432 end 1433 1434 def test_textmode_decode_universal_newline_getc 1435 with_tmpdir { 1436 generate_file("t.crlf", "a\r\nb\r\nc\r\n") 1437 open("t.crlf", "rt") {|f| 1438 assert_equal("a", f.getc) 1439 assert_equal("\n", f.getc) 1440 assert_equal("b", f.getc) 1441 assert_equal("\n", f.getc) 1442 assert_equal("c", f.getc) 1443 assert_equal("\n", f.getc) 1444 assert_equal(nil, f.getc) 1445 } 1446 1447 generate_file("t.cr", "a\rb\rc\r") 1448 open("t.cr", "rt") {|f| 1449 assert_equal("a", f.getc) 1450 assert_equal("\n", f.getc) 1451 assert_equal("b", f.getc) 1452 assert_equal("\n", f.getc) 1453 assert_equal("c", f.getc) 1454 assert_equal("\n", f.getc) 1455 assert_equal(nil, f.getc) 1456 } 1457 1458 generate_file("t.lf", "a\nb\nc\n") 1459 open("t.lf", "rt") {|f| 1460 assert_equal("a", f.getc) 1461 assert_equal("\n", f.getc) 1462 assert_equal("b", f.getc) 1463 assert_equal("\n", f.getc) 1464 assert_equal("c", f.getc) 1465 assert_equal("\n", f.getc) 1466 assert_equal(nil, f.getc) 1467 } 1468 } 1469 end 1470 1471 def test_textmode_decode_universal_newline_gets 1472 with_tmpdir { 1473 generate_file("t.crlf", "a\r\nb\r\nc\r\n") 1474 open("t.crlf", "rt") {|f| 1475 assert_equal("a\n", f.gets) 1476 assert_equal("b\n", f.gets) 1477 assert_equal("c\n", f.gets) 1478 assert_equal(nil, f.gets) 1479 } 1480 1481 generate_file("t.cr", "a\rb\rc\r") 1482 open("t.cr", "rt") {|f| 1483 assert_equal("a\n", f.gets) 1484 assert_equal("b\n", f.gets) 1485 assert_equal("c\n", f.gets) 1486 assert_equal(nil, f.gets) 1487 } 1488 1489 generate_file("t.lf", "a\nb\nc\n") 1490 open("t.lf", "rt") {|f| 1491 assert_equal("a\n", f.gets) 1492 assert_equal("b\n", f.gets) 1493 assert_equal("c\n", f.gets) 1494 assert_equal(nil, f.gets) 1495 } 1496 } 1497 end 1498 1499 def test_textmode_decode_universal_newline_utf16 1500 with_tmpdir { 1501 generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n") 1502 assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8")) 1503 1504 generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0") 1505 assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8")) 1506 1507 generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r") 1508 assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8")) 1509 1510 generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0") 1511 assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8")) 1512 1513 generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n") 1514 assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8")) 1515 1516 generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0") 1517 assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8")) 1518 } 1519 end 1520 1521 SYSTEM_NEWLINE = [] 1522 def system_newline 1523 return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty? 1524 with_tmpdir { 1525 open("newline", "wt") {|f| 1526 f.print "\n" 1527 } 1528 open("newline", "rb") {|f| 1529 SYSTEM_NEWLINE << f.read 1530 } 1531 } 1532 SYSTEM_NEWLINE.first 1533 end 1534 1535 def test_textmode_encode_newline 1536 with_tmpdir { 1537 open("t.txt", "wt") {|f| 1538 f.puts "abc" 1539 f.puts "def" 1540 } 1541 content = File.read("t.txt", :mode=>"rb") 1542 nl = system_newline 1543 assert_equal("abc#{nl}def#{nl}", content) 1544 } 1545 end 1546 1547 def test_textmode_encode_newline_enc 1548 with_tmpdir { 1549 open("t.txt", "wt:euc-jp") {|f| 1550 f.puts "abc\u3042" 1551 f.puts "def\u3044" 1552 } 1553 content = File.read("t.txt", :mode=>"rb:ascii-8bit") 1554 nl = system_newline 1555 assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content) 1556 } 1557 end 1558 1559 def test_read_newline_conversion_with_encoding_conversion 1560 with_tmpdir { 1561 generate_file("t.utf8.crlf", "a\r\nb\r\n") 1562 open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f| 1563 content = f.read 1564 assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content) 1565 } 1566 open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| 1567 content = f.read 1568 assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content) 1569 } 1570 open("t.utf8.crlf", "r:utf-8:utf-16be") {|f| 1571 content = f.read 1572 if system_newline == "\n" 1573 assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content) 1574 else 1575 assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content) 1576 end 1577 } 1578 } 1579 end 1580 1581 def test_read_newline_conversion_without_encoding_conversion 1582 with_tmpdir { 1583 generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n") 1584 open("t.utf16.crlf", "rb:utf-16be") {|f| 1585 content = f.read 1586 assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), 1587 content) 1588 } 1589 } 1590 end 1591 1592 def test_read_newline_conversion_error 1593 with_tmpdir { 1594 generate_file("empty.txt", "") 1595 # ascii incompatible encoding without conversion needs binmode. 1596 assert_raise(ArgumentError) { 1597 open("empty.txt", "rt:utf-16be") {|f| } 1598 } 1599 assert_raise(ArgumentError) { 1600 open("empty.txt", "r:utf-16be") {|f| } 1601 } 1602 } 1603 end 1604 1605 def test_read_mode 1606 with_tmpdir { 1607 generate_file("t", "a\rb\r\nc\n\xc2\xa2") 1608 generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B") 1609 generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B") 1610 generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35") 1611 generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2") 1612 # "\xc2\xa2" is valid as EUC-JP and UTF-8 1613 # EUC-JP UTF-8 Unicode 1614 # 0xC2A2 0xE894B5 U+8535 1615 # 0xA1F1 0xC2A2 U+00A2 1616 1617 open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) } 1618 open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding::ASCII_8BIT), f.read) } 1619 1620 open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } 1621 open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } 1622 open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } 1623 open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } 1624 assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } } 1625 open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) } 1626 1627 open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) } 1628 open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) } 1629 open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) } 1630 open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) } 1631 1632 open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) } 1633 open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) } 1634 open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) } 1635 open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) } 1636 open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)} 1637 open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)} 1638 open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)} 1639 open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)} 1640 1641 open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } 1642 open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } 1643 open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) } 1644 open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) } 1645 open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)} 1646 open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)} 1647 open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)} 1648 open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)} 1649 1650 open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)} 1651 open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)} 1652 open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)} 1653 open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)} 1654 } 1655 end 1656 1657 def assert_write(expected, mode, *args) 1658 with_tmpdir { 1659 open("t", mode) {|f| 1660 args.each {|arg| f.print arg } 1661 } 1662 content = File.read("t", :mode=>"rb:ascii-8bit") 1663 assert_equal(expected.dup.force_encoding("ascii-8bit"), 1664 content.force_encoding("ascii-8bit")) 1665 } 1666 end 1667 1668 def test_write_mode 1669 # "\xc2\xa2" is valid as EUC-JP and UTF-8 1670 # EUC-JP UTF-8 Unicode 1671 # 0xC2A2 0xE894B5 U+8535 1672 # 0xA1F1 0xC2A2 U+00A2 1673 a = "a\rb\r\nc\n" 1674 e = "\xc2\xa2".force_encoding("euc-jp") 1675 u8 = "\xc2\xa2".force_encoding("utf-8") 1676 u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be") 1677 i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp") 1678 n = system_newline 1679 n.encode("utf-16be").force_encoding("ascii-8bit") 1680 1681 assert_write("a\rb\r#{n}c#{n}", "wt", a) 1682 assert_write("\xc2\xa2", "wt", e) 1683 assert_write("\xc2\xa2", "wt", u8) 1684 1685 assert_write("a\rb\r\nc\n", "wb", a) 1686 assert_write("\xc2\xa2", "wb", e) 1687 assert_write("\xc2\xa2", "wb", u8) 1688 1689 #assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise 1690 #assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise 1691 assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16) 1692 assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i) 1693 1694 t_write_mode_enc 1695 t_write_mode_enc(":utf-8") 1696 end 1697 1698 def t_write_mode_enc(enc="") 1699 # "\xc2\xa2" is valid as EUC-JP and UTF-8 1700 # EUC-JP UTF-8 Unicode 1701 # 0xC2A2 0xE894B5 U+8535 1702 # 0xA1F1 0xC2A2 U+00A2 1703 a = "a\rb\r\nc\n" 1704 e = "\xc2\xa2".force_encoding("euc-jp") 1705 u8 = "\xc2\xa2".force_encoding("utf-8") 1706 u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be") 1707 i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp") 1708 n = system_newline 1709 un = n.encode("utf-16be").force_encoding("ascii-8bit") 1710 1711 assert_write("a\rb\r#{n}c#{n}", "wt:euc-jp#{enc}", a) 1712 assert_write("\xc2\xa2", "wt:euc-jp#{enc}", e) 1713 assert_write("\xa1\xf1", "wt:euc-jp#{enc}", u8) 1714 1715 assert_write("a\rb\r\nc\n", "wb:euc-jp#{enc}", a) 1716 assert_write("\xc2\xa2", "wb:euc-jp#{enc}", e) 1717 assert_write("\xa1\xf1", "wb:euc-jp#{enc}", u8) 1718 1719 assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", u16) 1720 assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", i) 1721 assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", u16) 1722 assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", i) 1723 1724 assert_write("\0a\0\r\0b\0\r#{un}\0c#{un}", "wt:utf-16be#{enc}", a) 1725 assert_write("\x85\x35", "wt:utf-16be#{enc}", e) 1726 assert_write("\x00\xa2", "wt:utf-16be#{enc}", u8) 1727 assert_write("a\rb\r#{n}c#{n}", "wt:iso-2022-jp#{enc}", a) 1728 assert_write("\e$B\x42\x22\e(B", "wt:iso-2022-jp#{enc}", e) 1729 assert_write("\e$B\x21\x71\e(B", "wt:iso-2022-jp#{enc}", u8) 1730 1731 assert_write("\0a\0\r\0b\0\r\0\n\0c\0\n", "wb:utf-16be#{enc}", a) 1732 assert_write("\x85\x35", "wb:utf-16be#{enc}", e) 1733 assert_write("\x00\xa2", "wb:utf-16be#{enc}", u8) 1734 assert_write("a\rb\r\nc\n", "wb:iso-2022-jp#{enc}", a) 1735 assert_write("\e$B\x42\x22\e(B", "wb:iso-2022-jp#{enc}", e) 1736 assert_write("\e$B\x21\x71\e(B", "wb:iso-2022-jp#{enc}", u8) 1737 1738 assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", u16) 1739 assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", i) 1740 assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", u16) 1741 assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", i) 1742 assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", u16) 1743 assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", i) 1744 assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", u16) 1745 assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", i) 1746 end 1747 1748 def test_write_mode_fail 1749 return if system_newline == "\n" 1750 with_tmpdir { 1751 open("t", "wt") {|f| 1752 assert_raise(ArgumentError) { f.print "\0\r\0\r\0\n\0\n".force_encoding("utf-16be") } 1753 } 1754 } 1755 end 1756 1757 def test_write_ascii_incompat 1758 with_tmpdir { 1759 open("t.utf8", "wb:utf-8:utf-16be") {|f| } 1760 open("t.utf8", "wt:utf-8:utf-16be") {|f| } 1761 open("t.utf8", "w:utf-8:utf-16be") {|f| } 1762 open("t.utf16", "wb:utf-16be") {|f| } 1763 open("t.utf16", "wt:utf-16be") {|f| } 1764 open("t.utf16", "w:utf-16be") {|f| } 1765 } 1766 end 1767 1768 def test_binmode_write_ascii_incompat_internal 1769 with_tmpdir { 1770 open("t.utf8.lf", "wb:utf-8:utf-16be") {|f| 1771 f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE") 1772 } 1773 content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit") 1774 assert_equal("a\nb\n", content) 1775 1776 open("t.utf8.lf", "wb:utf-16be") {|f| 1777 f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE") 1778 } 1779 content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit") 1780 assert_equal("\0a\0\n\0b\0\n", content) 1781 } 1782 end 1783 1784 def test_binary 1785 with_tmpdir { 1786 src = "a\nb\rc\r\nd\n" 1787 generate_file("t.txt", src) 1788 open("t.txt", "rb") {|f| 1789 assert_equal(src, f.read) 1790 } 1791 open("t.txt", "r", :binmode=>true) {|f| 1792 assert_equal(src, f.read) 1793 } 1794 if system_newline == "\n" 1795 open("t.txt", "r") {|f| 1796 assert_equal(src, f.read) 1797 } 1798 end 1799 } 1800 end 1801 1802 def test_binmode 1803 with_tmpdir { 1804 src = "a\r\nb\r\nc\r\n" 1805 generate_file("t.txt", src) 1806 open("t.txt", "rt") {|f| 1807 assert_equal("a", f.getc) 1808 assert_equal("\n", f.getc) 1809 f.binmode 1810 assert_equal("b", f.getc) 1811 assert_equal("\r", f.getc) 1812 assert_equal("\n", f.getc) 1813 assert_equal("c", f.getc) 1814 assert_equal("\r", f.getc) 1815 assert_equal("\n", f.getc) 1816 assert_equal(nil, f.getc) 1817 } 1818 } 1819 end 1820 1821 def test_binmode2 1822 with_tmpdir { 1823 src = "a\r\nb\r\nc\r\n" 1824 generate_file("t.txt", src) 1825 open("t.txt", "rt:euc-jp:utf-8") {|f| 1826 assert_equal("a", f.getc) 1827 assert_equal("\n", f.getc) 1828 f.binmode 1829 assert_equal("b", f.getc) 1830 assert_equal("\r", f.getc) 1831 assert_equal("\n", f.getc) 1832 assert_equal("c", f.getc) 1833 assert_equal("\r", f.getc) 1834 assert_equal("\n", f.getc) 1835 assert_equal(nil, f.getc) 1836 } 1837 } 1838 end 1839 1840 def test_binmode3 1841 with_tmpdir { 1842 src = "\u3042\r\n" 1843 generate_file("t.txt", src) 1844 srcbin = src.dup.force_encoding("ascii-8bit") 1845 open("t.txt", "rt:utf-8:euc-jp") {|f| 1846 f.binmode 1847 result = f.read 1848 assert_str_equal(srcbin, result) 1849 assert_equal(Encoding::ASCII_8BIT, result.encoding) 1850 } 1851 } 1852 end 1853 1854 def test_invalid_r 1855 with_tmpdir { 1856 generate_file("t.txt", "a\x80b") 1857 open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f| 1858 assert_equal("a?b", f.read) 1859 } 1860 open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f| 1861 assert_equal("ab", f.read) 1862 } 1863 open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f| 1864 assert_raise(Encoding::InvalidByteSequenceError) { f.read } 1865 assert_equal("b", f.read) 1866 } 1867 open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f| 1868 assert_raise(Encoding::InvalidByteSequenceError) { f.read } 1869 assert_equal("b", f.read) 1870 } 1871 } 1872 end 1873 1874 def test_undef_r 1875 with_tmpdir { 1876 generate_file("t.txt", "a\uFFFDb") 1877 open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f| 1878 assert_equal("a?b", f.read) 1879 } 1880 open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f| 1881 assert_equal("ab", f.read) 1882 } 1883 open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f| 1884 assert_raise(Encoding::UndefinedConversionError) { f.read } 1885 assert_equal("b", f.read) 1886 } 1887 open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f| 1888 assert_raise(Encoding::UndefinedConversionError) { f.read } 1889 assert_equal("b", f.read) 1890 } 1891 } 1892 end 1893 1894 def test_invalid_w 1895 with_tmpdir { 1896 invalid_utf8 = "a\x80b".force_encoding("utf-8") 1897 open("t.txt", "w:euc-jp", :invalid => :replace) {|f| 1898 assert_nothing_raised { f.write invalid_utf8 } 1899 } 1900 assert_equal("a?b", File.read("t.txt")) 1901 1902 open("t.txt", "w:euc-jp", :invalid => :replace, :replace => "") {|f| 1903 assert_nothing_raised { f.write invalid_utf8 } 1904 } 1905 assert_equal("ab", File.read("t.txt")) 1906 1907 open("t.txt", "w:euc-jp", :undef => :replace) {|f| 1908 assert_raise(Encoding::InvalidByteSequenceError) { f.write invalid_utf8 } 1909 } 1910 open("t.txt", "w:euc-jp", :undef => :replace, :replace => "") {|f| 1911 assert_raise(Encoding::InvalidByteSequenceError) { f.write invalid_utf8 } 1912 } 1913 } 1914 end 1915 1916 def test_undef_w_stateless 1917 with_tmpdir { 1918 generate_file("t.txt", "a\uFFFDb") 1919 open("t.txt", "w:euc-jp:utf-8", :undef => :replace) {|f| 1920 assert_nothing_raised { f.write "a\uFFFDb" } 1921 } 1922 assert_equal("a?b", File.read("t.txt")) 1923 open("t.txt", "w:euc-jp:utf-8", :undef => :replace, :replace => "") {|f| 1924 assert_nothing_raised { f.write "a\uFFFDb" } 1925 } 1926 assert_equal("ab", File.read("t.txt")) 1927 open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f| 1928 assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" } 1929 } 1930 open("t.txt", "w:euc-jp:utf-8", :invalid => :replace, :replace => "") {|f| 1931 assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" } 1932 } 1933 } 1934 end 1935 1936 def test_undef_w_stateful 1937 with_tmpdir { 1938 generate_file("t.txt", "a\uFFFDb") 1939 open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace) {|f| 1940 assert_nothing_raised { f.write "a\uFFFDb" } 1941 } 1942 assert_equal("a?b", File.read("t.txt")) 1943 open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace, :replace => "") {|f| 1944 assert_nothing_raised { f.write "a\uFFFDb" } 1945 } 1946 assert_equal("ab", File.read("t.txt")) 1947 open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f| 1948 assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" } 1949 } 1950 open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace, :replace => "") {|f| 1951 assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" } 1952 } 1953 } 1954 end 1955 1956 def test_w_xml_attr 1957 with_tmpdir { 1958 open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } 1959 content = File.read("raw.txt", :mode=>"rb:ascii-8bit") 1960 assert_equal("\"&<>"'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content) 1961 1962 open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } 1963 content = File.read("ascii.txt", :mode=>"rb:ascii-8bit") 1964 assert_equal("\"&<>"'丂あ\n\"".force_encoding("ascii-8bit"), content) 1965 1966 open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } 1967 content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit") 1968 assert_equal("\"&<>"'丂\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content) 1969 1970 open("utf-16be.txt", "wb:utf-16be", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" } 1971 content = File.read("utf-16be.txt", :mode=>"rb:ascii-8bit") 1972 assert_equal("\0\"\0&\0a\0m\0p\0;\0&\0l\0t\0;\0&\0g\0t\0;\0&\0q\0u\0o\0t\0;\0'\x4E\x02\x30\x42\0\n\0\"".force_encoding("ascii-8bit"), content) 1973 1974 open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f| 1975 f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212 1976 } 1977 content = File.read("eucjp.txt", :mode=>"rb:ascii-8bit") 1978 assert_equal("\"\x8F\xB0\xA1\"".force_encoding("ascii-8bit"), content) 1979 1980 open("sjis.txt", "w:sjis:utf-8", xml: :attr) {|f| 1981 f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212 1982 } 1983 content = File.read("sjis.txt", :mode=>"rb:ascii-8bit") 1984 assert_equal("\"丂\"".force_encoding("ascii-8bit"), content) 1985 1986 open("iso-2022-jp.txt", "w:iso-2022-jp:utf-8", xml: :attr) {|f| 1987 f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212 1988 } 1989 content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit") 1990 assert_equal("\"丂\"".force_encoding("ascii-8bit"), content) 1991 } 1992 end 1993 1994 def test_strip_bom 1995 with_tmpdir { 1996 text = "\uFEFFa" 1997 stripped = "a" 1998 %w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name| 1999 path = '%s-bom.txt' % name 2000 content = text.encode(name) 2001 generate_file(path, content) 2002 result = File.read(path, mode: 'rb:BOM|UTF-8') 2003 assert_equal(content[1].force_encoding("ascii-8bit"), 2004 result.force_encoding("ascii-8bit")) 2005 result = File.read(path, mode: 'rb:BOM|UTF-8:UTF-8') 2006 assert_equal(Encoding::UTF_8, result.encoding) 2007 assert_equal(stripped, result) 2008 end 2009 2010 bug3407 = '[ruby-core:30641]' 2011 path = 'UTF-8-bom.txt' 2012 result = File.read(path, encoding: 'BOM|UTF-8') 2013 assert_equal("a", result.force_encoding("ascii-8bit"), bug3407) 2014 2015 bug8323 = '[ruby-core:54563] [Bug #8323]' 2016 expected = "a\xff".force_encoding("utf-8") 2017 open(path, 'ab') {|f| f.write("\xff")} 2018 result = File.read(path, encoding: 'BOM|UTF-8') 2019 assert_not_predicate(result, :valid_encoding?, bug8323) 2020 assert_equal(expected, result, bug8323) 2021 result = File.read(path, encoding: 'BOM|UTF-8:UTF-8') 2022 assert_not_predicate(result, :valid_encoding?, bug8323) 2023 assert_equal(expected, result, bug8323) 2024 2025 path = 'ascii.txt' 2026 generate_file(path, stripped) 2027 result = File.read(path, encoding: 'BOM|UTF-8') 2028 assert_equal(stripped, result, bug8323) 2029 result = File.read(path, encoding: 'BOM|UTF-8:UTF-8') 2030 assert_equal(stripped, result, bug8323) 2031 } 2032 end 2033 2034 def test_cbuf 2035 with_tmpdir { 2036 fn = "tst" 2037 open(fn, "w") {|f| f.print "foo" } 2038 open(fn, "r+t") {|f| 2039 f.ungetc(f.getc) 2040 assert_raise(IOError, "[ruby-dev:40493]") { f.readpartial(2) } 2041 assert_raise(IOError) { f.read(2) } 2042 assert_raise(IOError) { f.each_byte {|c| } } 2043 assert_raise(IOError) { f.getbyte } 2044 assert_raise(IOError) { f.ungetbyte(0) } 2045 assert_raise(IOError) { f.sysread(2) } 2046 assert_raise(IOError) { IO.copy_stream(f, "tmpout") } 2047 assert_raise(IOError) { f.sysseek(2) } 2048 } 2049 open(fn, "r+t") {|f| 2050 f.ungetc(f.getc) 2051 assert_equal("foo", f.read) 2052 } 2053 } 2054 end 2055 2056 def test_text_mode_ungetc_eof 2057 with_tmpdir { 2058 open("ff", "w") {|f| } 2059 open("ff", "rt") {|f| 2060 f.ungetc "a" 2061 assert(!f.eof?, "[ruby-dev:40506] (3)") 2062 } 2063 } 2064 end 2065 2066 def test_cbuf_select 2067 pipe("US-ASCII:UTF-8", { :universal_newline => true }, 2068 proc do |w| 2069 w << "\r\n" 2070 end, 2071 proc do |r| 2072 r.ungetc(r.getc) 2073 assert_equal([[r],[],[]], IO.select([r], nil, nil, 1)) 2074 end) 2075 end 2076 2077 def test_textmode_paragraphmode 2078 pipe("US-ASCII:UTF-8", { :universal_newline => true }, 2079 proc do |w| 2080 w << "a\n\n\nc".gsub(/\n/, "\r\n") 2081 w.close 2082 end, 2083 proc do |r| 2084 assert_equal("a\n\n", r.gets("")) 2085 assert_equal("c", r.gets(""), "[ruby-core:23723] (18)") 2086 end) 2087 end 2088 2089 def test_textmode_paragraph_binaryread 2090 pipe("US-ASCII:UTF-8", { :universal_newline => true }, 2091 proc do |w| 2092 w << "a\n\n\ncdefgh".gsub(/\n/, "\r\n") 2093 w.close 2094 end, 2095 proc do |r| 2096 assert_equal("a\n\n", r.gets("")) 2097 assert_equal("c", r.getc) 2098 assert_equal("defgh", r.readpartial(10)) 2099 end) 2100 end 2101 2102 def test_textmode_paragraph_nonasciicompat 2103 bug3534 = ['[ruby-dev:41803]', '[Bug #3534]'] 2104 r, w = IO.pipe 2105 [Encoding::UTF_32BE, Encoding::UTF_32LE, 2106 Encoding::UTF_16BE, Encoding::UTF_16LE, 2107 Encoding::UTF_8].each do |e| 2108 r.set_encoding(Encoding::US_ASCII, e) 2109 wthr = Thread.new{ w.print(bug3534[0], "\n\n\n\n", bug3534[1], "\n") } 2110 assert_equal((bug3534[0]+"\n\n").encode(e), r.gets(""), bug3534[0]) 2111 assert_equal((bug3534[1]+"\n").encode(e), r.gets(), bug3534[1]) 2112 wthr.join 2113 end 2114 end 2115 2116 def test_binmode_paragraph_nonasciicompat 2117 bug3534 = ['[ruby-dev:41803]', '[Bug #3534]'] 2118 r, w = IO.pipe 2119 r.binmode 2120 w.binmode 2121 [Encoding::UTF_32BE, Encoding::UTF_32LE, 2122 Encoding::UTF_16BE, Encoding::UTF_16LE, 2123 Encoding::UTF_8].each do |e| 2124 r.set_encoding(Encoding::US_ASCII, e) 2125 wthr = Thread.new{ w.print(bug3534[0], "\n\n\n\n", bug3534[1], "\n") } 2126 assert_equal((bug3534[0]+"\n\n").encode(e), r.gets(""), bug3534[0]) 2127 assert_equal((bug3534[1]+"\n").encode(e), r.gets(), bug3534[1]) 2128 wthr.join 2129 end 2130 end 2131 2132 def test_puts_widechar 2133 bug = '[ruby-dev:42212]' 2134 pipe(Encoding::ASCII_8BIT, 2135 proc do |w| 2136 w.binmode 2137 w.puts(0x010a.chr(Encoding::UTF_32BE)) 2138 w.puts(0x010a.chr(Encoding::UTF_16BE)) 2139 w.puts(0x0a010000.chr(Encoding::UTF_32LE)) 2140 w.puts(0x0a01.chr(Encoding::UTF_16LE)) 2141 w.close 2142 end, 2143 proc do |r| 2144 r.binmode 2145 assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug) 2146 assert_equal("\x01\x0a\n", r.read(3), bug) 2147 assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug) 2148 assert_equal("\x01\x0a\n", r.read(3), bug) 2149 assert_equal("", r.read, bug) 2150 r.close 2151 end) 2152 end 2153 2154 def test_getc_ascii_only 2155 bug4557 = '[ruby-core:35630]' 2156 c = with_tmpdir { 2157 open("a", "wb") {|f| f.puts "a"} 2158 open("a", "rt") {|f| f.getc} 2159 } 2160 assert_predicate(c, :ascii_only?, bug4557) 2161 end 2162 2163 def test_getc_conversion 2164 bug8516 = '[ruby-core:55444] [Bug #8516]' 2165 c = with_tmpdir { 2166 open("a", "wb") {|f| f.putc "\xe1"} 2167 open("a", "r:iso-8859-1:utf-8") {|f| f.getc} 2168 } 2169 assert_not_predicate(c, :ascii_only?, bug8516) 2170 assert_equal(1, c.size, bug8516) 2171 end 2172 2173 def test_default_mode_on_dosish 2174 with_tmpdir { 2175 open("a", "w") {|f| f.write "\n"} 2176 assert_equal("\r\n", IO.binread("a")) 2177 } 2178 end if /mswin|mingw/ =~ RUBY_PLATFORM 2179 2180 def test_default_mode_on_unix 2181 with_tmpdir { 2182 open("a", "w") {|f| f.write "\n"} 2183 assert_equal("\n", IO.binread("a")) 2184 } 2185 end unless /mswin|mingw/ =~ RUBY_PLATFORM 2186 2187 def test_text_mode 2188 with_tmpdir { 2189 open("a", "wb") {|f| f.write "\r\n"} 2190 assert_equal("\n", open("a", "rt"){|f| f.read}) 2191 } 2192 end 2193 2194 def test_binary_mode 2195 with_tmpdir { 2196 open("a", "wb") {|f| f.write "\r\n"} 2197 assert_equal("\r\n", open("a", "rb"){|f| f.read}) 2198 } 2199 end 2200 2201 def test_default_stdout_stderr_mode 2202 with_pipe do |in_r, in_w| 2203 with_pipe do |out_r, out_w| 2204 pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w, err: out_w) 2205 in_r.close 2206 out_w.close 2207 in_w.write <<-EOS 2208 STDOUT.puts "abc" 2209 STDOUT.flush 2210 STDERR.puts "def" 2211 STDERR.flush 2212 EOS 2213 in_w.close 2214 Process.wait pid 2215 assert_equal "abc\r\ndef\r\n", out_r.binmode.read 2216 out_r.close 2217 end 2218 end 2219 end if /mswin|mingw/ =~ RUBY_PLATFORM 2220 2221 def test_cr_decorator_on_stdout 2222 with_pipe do |in_r, in_w| 2223 with_pipe do |out_r, out_w| 2224 pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w) 2225 in_r.close 2226 out_w.close 2227 in_w.write <<-EOS 2228 STDOUT.set_encoding('locale', nil, newline: :cr) 2229 STDOUT.puts "abc" 2230 STDOUT.flush 2231 EOS 2232 in_w.close 2233 Process.wait pid 2234 assert_equal "abc\r", out_r.binmode.read 2235 out_r.close 2236 end 2237 end 2238 end 2239 2240 def test_lf_decorator_on_stdout 2241 with_pipe do |in_r, in_w| 2242 with_pipe do |out_r, out_w| 2243 pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w) 2244 in_r.close 2245 out_w.close 2246 in_w.write <<-EOS 2247 STDOUT.set_encoding('locale', nil, newline: :lf) 2248 STDOUT.puts "abc" 2249 STDOUT.flush 2250 EOS 2251 in_w.close 2252 Process.wait pid 2253 assert_equal "abc\n", out_r.binmode.read 2254 out_r.close 2255 end 2256 end 2257 end 2258 2259 def test_crlf_decorator_on_stdout 2260 with_pipe do |in_r, in_w| 2261 with_pipe do |out_r, out_w| 2262 pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w) 2263 in_r.close 2264 out_w.close 2265 in_w.write <<-EOS 2266 STDOUT.set_encoding('locale', nil, newline: :crlf) 2267 STDOUT.puts "abc" 2268 STDOUT.flush 2269 EOS 2270 in_w.close 2271 Process.wait pid 2272 assert_equal "abc\r\n", out_r.binmode.read 2273 out_r.close 2274 end 2275 end 2276 end 2277 2278 def test_binmode_with_pipe 2279 with_pipe do |r, w| 2280 src = "a\r\nb\r\nc\r\n" 2281 w.binmode.write src 2282 w.close 2283 2284 assert_equal("a", r.getc) 2285 assert_equal("\n", r.getc) 2286 r.binmode 2287 assert_equal("b", r.getc) 2288 assert_equal("\r", r.getc) 2289 assert_equal("\n", r.getc) 2290 assert_equal("c", r.getc) 2291 assert_equal("\r", r.getc) 2292 assert_equal("\n", r.getc) 2293 assert_equal(nil, r.getc) 2294 r.close 2295 end 2296 end if /mswin|mingw/ =~ RUBY_PLATFORM 2297 2298 def test_stdin_binmode 2299 with_pipe do |in_r, in_w| 2300 with_pipe do |out_r, out_w| 2301 pid = Process.spawn({}, EnvUtil.rubybin, '-e', <<-'End', in: in_r, out: out_w) 2302 STDOUT.binmode 2303 STDOUT.write STDIN.getc 2304 STDOUT.write STDIN.getc 2305 STDIN.binmode 2306 STDOUT.write STDIN.getc 2307 STDOUT.write STDIN.getc 2308 STDOUT.write STDIN.getc 2309 STDOUT.write STDIN.getc 2310 STDOUT.write STDIN.getc 2311 STDOUT.write STDIN.getc 2312 STDOUT.write STDIN.getc 2313 End 2314 in_r.close 2315 out_w.close 2316 src = "a\r\nb\r\nc\r\n" 2317 in_w.binmode.write src 2318 in_w.close 2319 Process.wait pid 2320 assert_equal "a\nb\r\nc\r\n", out_r.binmode.read 2321 out_r.close 2322 end 2323 end 2324 end if /mswin|mingw/ =~ RUBY_PLATFORM 2325 2326 def test_read_with_length 2327 with_tmpdir { 2328 str = "a\nb" 2329 generate_file("tmp", str) 2330 open("tmp", "r") do |f| 2331 assert_equal(str, f.read(3)) 2332 end 2333 } 2334 end if /mswin|mingw/ =~ RUBY_PLATFORM 2335 2336 def test_read_with_length_binmode 2337 with_tmpdir { 2338 str = "a\r\nb\r\nc\r\n\r\n" 2339 generate_file("tmp", str) 2340 open("tmp", "r") do |f| 2341 # read with length should be binary mode 2342 assert_equal("a\r\n", f.read(3)) # binary 2343 assert_equal("b\nc\n\n", f.read) # text 2344 end 2345 } 2346 end if /mswin|mingw/ =~ RUBY_PLATFORM 2347 2348 def test_gets_and_read_with_binmode 2349 with_tmpdir { 2350 str = "a\r\nb\r\nc\r\n\n\r\n" 2351 generate_file("tmp", str) 2352 open("tmp", "r") do |f| 2353 assert_equal("a\n", f.gets) # text 2354 assert_equal("b\r\n", f.read(3)) # binary 2355 assert_equal("c\r\n", f.read(3)) # binary 2356 assert_equal("\n\n", f.read) # text 2357 end 2358 } 2359 end if /mswin|mingw/ =~ RUBY_PLATFORM 2360 2361 def test_getc_and_read_with_binmode 2362 with_tmpdir { 2363 str = "a\r\nb\r\nc\n\n\r\n\r\n" 2364 generate_file("tmp", str) 2365 open("tmp", "r") do |f| 2366 assert_equal("a", f.getc) # text 2367 assert_equal("\n", f.getc) # text 2368 assert_equal("b\r\n", f.read(3)) # binary 2369 assert_equal("c\n\n\n\n", f.read) # text 2370 end 2371 } 2372 end if /mswin|mingw/ =~ RUBY_PLATFORM 2373 2374 def test_read_with_binmode_and_gets 2375 with_tmpdir { 2376 str = "a\r\nb\r\nc\r\n\r\n" 2377 open("tmp", "wb") { |f| f.write str } 2378 open("tmp", "r") do |f| 2379 assert_equal("a", f.getc) # text 2380 assert_equal("\n", f.getc) # text 2381 assert_equal("b\r\n", f.read(3)) # binary 2382 assert_equal("c\n", f.gets) # text 2383 assert_equal("\n", f.gets) # text 2384 end 2385 } 2386 end if /mswin|mingw/ =~ RUBY_PLATFORM 2387 2388 def test_read_with_binmode_and_getc 2389 with_tmpdir { 2390 str = "a\r\nb\r\nc\r\n\r\n" 2391 open("tmp", "wb") { |f| f.write str } 2392 open("tmp", "r") do |f| 2393 assert_equal("a", f.getc) # text 2394 assert_equal("\n", f.getc) # text 2395 assert_equal("b\r\n", f.read(3)) # binary 2396 assert_equal("c", f.getc) # text 2397 assert_equal("\n", f.getc) # text 2398 assert_equal("\n", f.getc) # text 2399 end 2400 } 2401 end if /mswin|mingw/ =~ RUBY_PLATFORM 2402 2403 def test_read_write_with_binmode 2404 with_tmpdir { 2405 str = "a\r\n" 2406 generate_file("tmp", str) 2407 open("tmp", "r+") do |f| 2408 assert_equal("a\r\n", f.read(3)) # binary 2409 f.write("b\n\n"); # text 2410 f.rewind 2411 assert_equal("a\nb\n\n", f.read) # text 2412 f.rewind 2413 assert_equal("a\r\nb\r\n\r\n", f.binmode.read) # binary 2414 end 2415 } 2416 end if /mswin|mingw/ =~ RUBY_PLATFORM 2417 2418 def test_seek_with_setting_binmode 2419 with_tmpdir { 2420 str = "a\r\nb\r\nc\r\n\r\n\n\n\n\n\n\n\n" 2421 generate_file("tmp", str) 2422 open("tmp", "r") do |f| 2423 assert_equal("a\n", f.gets) # text 2424 assert_equal("b\r\n", f.read(3)) # binary 2425 end 2426 } 2427 end if /mswin|mingw/ =~ RUBY_PLATFORM 2428 2429 def test_error_nonascii 2430 bug6071 = '[ruby-dev:45279]' 2431 paths = ["\u{3042}".encode("sjis"), "\u{ff}".encode("iso-8859-1")] 2432 encs = with_tmpdir { 2433 paths.map {|path| 2434 open(path) rescue $!.message.encoding 2435 } 2436 } 2437 assert_equal(paths.map(&:encoding), encs, bug6071) 2438 end 2439 2440 def test_inspect_nonascii 2441 bug6072 = '[ruby-dev:45280]' 2442 paths = ["\u{3042}".encode("sjis"), "\u{ff}".encode("iso-8859-1")] 2443 encs = with_tmpdir { 2444 paths.map {|path| 2445 open(path, "wb") {|f| f.inspect.encoding} 2446 } 2447 } 2448 assert_equal(paths.map(&:encoding), encs, bug6072) 2449 end 2450 2451 def test_pos_dont_move_cursor_position 2452 bug6179 = '[ruby-core:43497]' 2453 with_tmpdir { 2454 str = "line one\r\nline two\r\nline three\r\n" 2455 generate_file("tmp", str) 2456 open("tmp", "r") do |f| 2457 assert_equal("line one\n", f.readline) 2458 assert_equal(10, f.pos, bug6179) 2459 assert_equal("line two\n", f.readline, bug6179) 2460 assert_equal(20, f.pos, bug6179) 2461 assert_equal("line three\n", f.readline, bug6179) 2462 end 2463 } 2464 end if /mswin|mingw/ =~ RUBY_PLATFORM 2465 2466 def test_pos_with_buffer_end_cr 2467 bug6401 = '[ruby-core:44874]' 2468 with_tmpdir { 2469 # Read buffer size is 8191. This generates '\r' at 8191. 2470 lines = ["X" * 8187, "X"] 2471 generate_file("tmp", lines.join("\r\n") + "\r\n") 2472 2473 open("tmp", "r") do |f| 2474 lines.each do |line| 2475 f.pos 2476 assert_equal(line, f.readline.chomp, bug6401) 2477 end 2478 end 2479 } 2480 end if /mswin|mingw/ =~ RUBY_PLATFORM 2481 2482 def test_read_crlf_and_eof 2483 bug6271 = '[ruby-core:44189]' 2484 with_tmpdir { 2485 str = "a\r\nb\r\nc\r\n" 2486 generate_file("tmp", str) 2487 open("tmp", "r") do |f| 2488 i = 0 2489 until f.eof? 2490 assert_equal(str[i], f.read(1), bug6271) 2491 i += 1 2492 end 2493 assert_equal(str.size, i, bug6271) 2494 end 2495 } 2496 end if /mswin|mingw/ =~ RUBY_PLATFORM 2497end 2498