test_pax_filename_encoding.c revision 248616
1/*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26__FBSDID("$FreeBSD: head/contrib/libarchive/libarchive/test/test_pax_filename_encoding.c 248616 2013-03-22 13:36:03Z mm $"); 27 28#include <locale.h> 29 30/* 31 * Pax interchange is supposed to encode filenames into 32 * UTF-8. Of course, that's not always possible. This 33 * test is intended to verify that filenames always get 34 * stored and restored correctly, regardless of the encodings. 35 */ 36 37/* 38 * Read a manually-created archive that has filenames that are 39 * stored in binary instead of UTF-8 and verify that we get 40 * the right filename returned and that we get a warning only 41 * if the header isn't marked as binary. 42 */ 43static void 44test_pax_filename_encoding_1(void) 45{ 46 static const char testname[] = "test_pax_filename_encoding.tar"; 47 /* 48 * \314\214 is a valid 2-byte UTF-8 sequence. 49 * \374 is invalid in UTF-8. 50 */ 51 char filename[] = "abc\314\214mno\374xyz"; 52 struct archive *a; 53 struct archive_entry *entry; 54 55 /* 56 * Read an archive that has non-UTF8 pax filenames in it. 57 */ 58 extract_reference_file(testname); 59 a = archive_read_new(); 60 assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a)); 61 assertEqualInt(ARCHIVE_OK, archive_read_support_filter_all(a)); 62 assertEqualInt(ARCHIVE_OK, 63 archive_read_open_filename(a, testname, 10240)); 64 /* 65 * First entry in this test archive has an invalid UTF-8 sequence 66 * in it, but the header is not marked as hdrcharset=BINARY, so that 67 * requires a warning. 68 */ 69 failure("Invalid UTF8 in a pax archive pathname should cause a warning"); 70 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 71 assertEqualString(filename, archive_entry_pathname(entry)); 72 /* 73 * Second entry is identical except that it does have 74 * hdrcharset=BINARY, so no warning should be generated. 75 */ 76 failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n" 77 " characters in it without generating a warning"); 78 assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry)); 79 assertEqualString(filename, archive_entry_pathname(entry)); 80 archive_read_free(a); 81} 82 83/* 84 * Set the locale and write a pathname containing invalid characters. 85 * This should work; the underlying implementation should automatically 86 * fall back to storing the pathname in binary. 87 */ 88static void 89test_pax_filename_encoding_2(void) 90{ 91 char filename[] = "abc\314\214mno\374xyz"; 92 struct archive *a; 93 struct archive_entry *entry; 94 char buff[65536]; 95 char longname[] = "abc\314\214mno\374xyz" 96 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 97 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 98 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 99 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 100 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 101 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 102 ; 103 size_t used; 104 105 /* 106 * We need a starting locale which has invalid sequences. 107 * en_US.UTF-8 seems to be commonly supported. 108 */ 109 /* If it doesn't exist, just warn and return. */ 110 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 111 skipping("invalid encoding tests require a suitable locale;" 112 " en_US.UTF-8 not available on this system"); 113 return; 114 } 115 116 assert((a = archive_write_new()) != NULL); 117 assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 118 assertEqualIntA(a, 0, archive_write_add_filter_none(a)); 119 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 120 assertEqualInt(0, 121 archive_write_open_memory(a, buff, sizeof(buff), &used)); 122 123 assert((entry = archive_entry_new()) != NULL); 124 /* Set pathname, gname, uname, hardlink to nonconvertible values. */ 125 archive_entry_copy_pathname(entry, filename); 126 archive_entry_copy_gname(entry, filename); 127 archive_entry_copy_uname(entry, filename); 128 archive_entry_copy_hardlink(entry, filename); 129 archive_entry_set_filetype(entry, AE_IFREG); 130 failure("This should generate a warning for nonconvertible names."); 131 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 132 archive_entry_free(entry); 133 134 assert((entry = archive_entry_new()) != NULL); 135 /* Set path, gname, uname, and symlink to nonconvertible values. */ 136 archive_entry_copy_pathname(entry, filename); 137 archive_entry_copy_gname(entry, filename); 138 archive_entry_copy_uname(entry, filename); 139 archive_entry_copy_symlink(entry, filename); 140 archive_entry_set_filetype(entry, AE_IFLNK); 141 failure("This should generate a warning for nonconvertible names."); 142 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 143 archive_entry_free(entry); 144 145 assert((entry = archive_entry_new()) != NULL); 146 /* Set pathname to a very long nonconvertible value. */ 147 archive_entry_copy_pathname(entry, longname); 148 archive_entry_set_filetype(entry, AE_IFREG); 149 failure("This should generate a warning for nonconvertible names."); 150 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 151 archive_entry_free(entry); 152 153 assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); 154 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 155 156 /* 157 * Now read the entries back. 158 */ 159 160 assert((a = archive_read_new()) != NULL); 161 assertEqualInt(0, archive_read_support_format_tar(a)); 162 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 163 164 assertEqualInt(0, archive_read_next_header(a, &entry)); 165 assertEqualString(filename, archive_entry_pathname(entry)); 166 assertEqualString(filename, archive_entry_gname(entry)); 167 assertEqualString(filename, archive_entry_uname(entry)); 168 assertEqualString(filename, archive_entry_hardlink(entry)); 169 170 assertEqualInt(0, archive_read_next_header(a, &entry)); 171 assertEqualString(filename, archive_entry_pathname(entry)); 172 assertEqualString(filename, archive_entry_gname(entry)); 173 assertEqualString(filename, archive_entry_uname(entry)); 174 assertEqualString(filename, archive_entry_symlink(entry)); 175 176 assertEqualInt(0, archive_read_next_header(a, &entry)); 177 assertEqualString(longname, archive_entry_pathname(entry)); 178 179 assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); 180 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 181} 182 183#if 0 /* Disable this until Tim check out it. */ 184 185/* 186 * Create an entry starting from a wide-character Unicode pathname, 187 * read it back into "C" locale, which doesn't support the name. 188 * TODO: Figure out the "right" behavior here. 189 */ 190static void 191test_pax_filename_encoding_3(void) 192{ 193 wchar_t badname[] = L"xxxAyyyBzzz"; 194 const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz"; 195 struct archive *a; 196 struct archive_entry *entry; 197 char buff[65536]; 198 size_t used; 199 200 badname[3] = 0x1234; 201 badname[7] = 0x5678; 202 203 /* If it doesn't exist, just warn and return. */ 204 if (NULL == setlocale(LC_ALL, "C")) { 205 skipping("Can't set \"C\" locale, so can't exercise " 206 "certain character-conversion failures"); 207 return; 208 } 209 210 /* If wctomb is broken, warn and return. */ 211 if (wctomb(buff, 0x1234) > 0) { 212 skipping("Cannot test conversion failures because \"C\" " 213 "locale on this system has no invalid characters."); 214 return; 215 } 216 217 /* If wctomb is broken, warn and return. */ 218 if (wctomb(buff, 0x1234) > 0) { 219 skipping("Cannot test conversion failures because \"C\" " 220 "locale on this system has no invalid characters."); 221 return; 222 } 223 224 /* Skip test if archive_entry_update_pathname_utf8() is broken. */ 225 /* In particular, this is currently broken on Win32 because 226 * setlocale() does not set the default encoding for CP_ACP. */ 227 entry = archive_entry_new(); 228 if (archive_entry_update_pathname_utf8(entry, badname_utf8)) { 229 archive_entry_free(entry); 230 skipping("Cannot test conversion failures."); 231 return; 232 } 233 archive_entry_free(entry); 234 235 assert((a = archive_write_new()) != NULL); 236 assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 237 assertEqualIntA(a, 0, archive_write_add_filter_none(a)); 238 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 239 assertEqualInt(0, 240 archive_write_open_memory(a, buff, sizeof(buff), &used)); 241 242 assert((entry = archive_entry_new()) != NULL); 243 /* Set pathname to non-convertible wide value. */ 244 archive_entry_copy_pathname_w(entry, badname); 245 archive_entry_set_filetype(entry, AE_IFREG); 246 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 247 archive_entry_free(entry); 248 249 assert((entry = archive_entry_new()) != NULL); 250 archive_entry_copy_pathname_w(entry, L"abc"); 251 /* Set gname to non-convertible wide value. */ 252 archive_entry_copy_gname_w(entry, badname); 253 archive_entry_set_filetype(entry, AE_IFREG); 254 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 255 archive_entry_free(entry); 256 257 assert((entry = archive_entry_new()) != NULL); 258 archive_entry_copy_pathname_w(entry, L"abc"); 259 /* Set uname to non-convertible wide value. */ 260 archive_entry_copy_uname_w(entry, badname); 261 archive_entry_set_filetype(entry, AE_IFREG); 262 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 263 archive_entry_free(entry); 264 265 assert((entry = archive_entry_new()) != NULL); 266 archive_entry_copy_pathname_w(entry, L"abc"); 267 /* Set hardlink to non-convertible wide value. */ 268 archive_entry_copy_hardlink_w(entry, badname); 269 archive_entry_set_filetype(entry, AE_IFREG); 270 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 271 archive_entry_free(entry); 272 273 assert((entry = archive_entry_new()) != NULL); 274 archive_entry_copy_pathname_w(entry, L"abc"); 275 /* Set symlink to non-convertible wide value. */ 276 archive_entry_copy_symlink_w(entry, badname); 277 archive_entry_set_filetype(entry, AE_IFLNK); 278 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 279 archive_entry_free(entry); 280 281 assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); 282 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 283 284 /* 285 * Now read the entries back. 286 */ 287 288 assert((a = archive_read_new()) != NULL); 289 assertEqualInt(0, archive_read_support_format_tar(a)); 290 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 291 292 failure("A non-convertible pathname should cause a warning."); 293 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 294 assertEqualWString(badname, archive_entry_pathname_w(entry)); 295 failure("If native locale can't convert, we should get UTF-8 back."); 296 assertEqualString(badname_utf8, archive_entry_pathname(entry)); 297 298 failure("A non-convertible gname should cause a warning."); 299 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 300 assertEqualWString(badname, archive_entry_gname_w(entry)); 301 failure("If native locale can't convert, we should get UTF-8 back."); 302 assertEqualString(badname_utf8, archive_entry_gname(entry)); 303 304 failure("A non-convertible uname should cause a warning."); 305 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 306 assertEqualWString(badname, archive_entry_uname_w(entry)); 307 failure("If native locale can't convert, we should get UTF-8 back."); 308 assertEqualString(badname_utf8, archive_entry_uname(entry)); 309 310 failure("A non-convertible hardlink should cause a warning."); 311 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 312 assertEqualWString(badname, archive_entry_hardlink_w(entry)); 313 failure("If native locale can't convert, we should get UTF-8 back."); 314 assertEqualString(badname_utf8, archive_entry_hardlink(entry)); 315 316 failure("A non-convertible symlink should cause a warning."); 317 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 318 assertEqualWString(badname, archive_entry_symlink_w(entry)); 319 assertEqualWString(NULL, archive_entry_hardlink_w(entry)); 320 failure("If native locale can't convert, we should get UTF-8 back."); 321 assertEqualString(badname_utf8, archive_entry_symlink(entry)); 322 323 assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry)); 324 325 assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); 326 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 327} 328#else 329static void 330test_pax_filename_encoding_3(void) 331{ 332} 333#endif 334 335/* 336 * Verify that KOI8-R filenames are correctly translated to Unicode and UTF-8. 337 */ 338static void 339test_pax_filename_encoding_KOI8R(void) 340{ 341 struct archive *a; 342 struct archive_entry *entry; 343 char buff[4096]; 344 size_t used; 345 346 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 347 skipping("KOI8-R locale not available on this system."); 348 return; 349 } 350 351 /* Check if the paltform completely supports the string conversion. */ 352 a = archive_write_new(); 353 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 354 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 355 skipping("This system cannot convert character-set" 356 " from KOI8-R to UTF-8."); 357 archive_write_free(a); 358 return; 359 } 360 archive_write_free(a); 361 362 /* Re-create a write archive object since filenames should be written 363 * in UTF-8 by default. */ 364 a = archive_write_new(); 365 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 366 assertEqualInt(ARCHIVE_OK, 367 archive_write_open_memory(a, buff, sizeof(buff), &used)); 368 369 entry = archive_entry_new2(a); 370 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 371 archive_entry_set_filetype(entry, AE_IFREG); 372 archive_entry_set_size(entry, 0); 373 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 374 archive_entry_free(entry); 375 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 376 377 /* Above three characters in KOI8-R should translate to the following 378 * three characters (two bytes each) in UTF-8. */ 379 assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); 380} 381 382/* 383 * Verify that CP1251 filenames are correctly translated to Unicode and UTF-8. 384 */ 385static void 386test_pax_filename_encoding_CP1251(void) 387{ 388 struct archive *a; 389 struct archive_entry *entry; 390 char buff[4096]; 391 size_t used; 392 393 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 394 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 395 skipping("KOI8-R locale not available on this system."); 396 return; 397 } 398 399 /* Check if the paltform completely supports the string conversion. */ 400 a = archive_write_new(); 401 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 402 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 403 skipping("This system cannot convert character-set" 404 " from KOI8-R to UTF-8."); 405 archive_write_free(a); 406 return; 407 } 408 archive_write_free(a); 409 410 /* Re-create a write archive object since filenames should be written 411 * in UTF-8 by default. */ 412 a = archive_write_new(); 413 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 414 assertEqualInt(ARCHIVE_OK, 415 archive_write_open_memory(a, buff, sizeof(buff), &used)); 416 417 entry = archive_entry_new2(a); 418 archive_entry_set_pathname(entry, "\xef\xf0\xe8"); 419 archive_entry_set_filetype(entry, AE_IFREG); 420 archive_entry_set_size(entry, 0); 421 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 422 archive_entry_free(entry); 423 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 424 425 /* Above three characters in KOI8-R should translate to the following 426 * three characters (two bytes each) in UTF-8. */ 427 assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); 428} 429 430/* 431 * Verify that EUC-JP filenames are correctly translated to Unicode and UTF-8. 432 */ 433static void 434test_pax_filename_encoding_EUCJP(void) 435{ 436 struct archive *a; 437 struct archive_entry *entry; 438 char buff[4096]; 439 size_t used; 440 441 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 442 skipping("eucJP locale not available on this system."); 443 return; 444 } 445 446 /* Check if the paltform completely supports the string conversion. */ 447 a = archive_write_new(); 448 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 449 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 450 skipping("This system cannot convert character-set" 451 " from eucJP to UTF-8."); 452 archive_write_free(a); 453 return; 454 } 455 archive_write_free(a); 456 457 /* Re-create a write archive object since filenames should be written 458 * in UTF-8 by default. */ 459 a = archive_write_new(); 460 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 461 assertEqualInt(ARCHIVE_OK, 462 archive_write_open_memory(a, buff, sizeof(buff), &used)); 463 464 entry = archive_entry_new2(a); 465 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 466 /* Check the Unicode version. */ 467 archive_entry_set_filetype(entry, AE_IFREG); 468 archive_entry_set_size(entry, 0); 469 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 470 archive_entry_free(entry); 471 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 472 473 /* Check UTF-8 version. */ 474 assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16); 475 476} 477 478/* 479 * Verify that CP932/SJIS filenames are correctly translated to Unicode and UTF-8. 480 */ 481static void 482test_pax_filename_encoding_CP932(void) 483{ 484 struct archive *a; 485 struct archive_entry *entry; 486 char buff[4096]; 487 size_t used; 488 489 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 490 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 491 skipping("eucJP locale not available on this system."); 492 return; 493 } 494 495 /* Check if the paltform completely supports the string conversion. */ 496 a = archive_write_new(); 497 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 498 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 499 skipping("This system cannot convert character-set" 500 " from CP932/SJIS to UTF-8."); 501 archive_write_free(a); 502 return; 503 } 504 archive_write_free(a); 505 506 /* Re-create a write archive object since filenames should be written 507 * in UTF-8 by default. */ 508 a = archive_write_new(); 509 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 510 assertEqualInt(ARCHIVE_OK, 511 archive_write_open_memory(a, buff, sizeof(buff), &used)); 512 513 entry = archive_entry_new2(a); 514 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 515 /* Check the Unicode version. */ 516 archive_entry_set_filetype(entry, AE_IFREG); 517 archive_entry_set_size(entry, 0); 518 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 519 archive_entry_free(entry); 520 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 521 522 /* Check UTF-8 version. */ 523 assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16); 524 525} 526 527/* 528 * Verify that KOI8-R filenames are not translated to Unicode and UTF-8 529 * when using hdrcharset=BINARY option. 530 */ 531static void 532test_pax_filename_encoding_KOI8R_BINARY(void) 533{ 534 struct archive *a; 535 struct archive_entry *entry; 536 char buff[4096]; 537 size_t used; 538 539 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 540 skipping("KOI8-R locale not available on this system."); 541 return; 542 } 543 544 a = archive_write_new(); 545 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 546 /* BINARY mode should be accepted. */ 547 assertEqualInt(ARCHIVE_OK, 548 archive_write_set_options(a, "hdrcharset=BINARY")); 549 assertEqualInt(ARCHIVE_OK, 550 archive_write_open_memory(a, buff, sizeof(buff), &used)); 551 552 entry = archive_entry_new2(a); 553 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 554 archive_entry_set_filetype(entry, AE_IFREG); 555 archive_entry_set_size(entry, 0); 556 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 557 archive_entry_free(entry); 558 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 559 560 /* "hdrcharset=BINARY" pax attribute should be written. */ 561 assertEqualMem(buff + 512, "21 hdrcharset=BINARY\x0A", 21); 562 /* Above three characters in KOI8-R should not translate to any 563 * character-set. */ 564 assertEqualMem(buff + 512+21, "12 path=\xD0\xD2\xC9\x0A", 12); 565} 566 567/* 568 * Pax format writer only accepts both BINARY and UTF-8. 569 * If other character-set name is specified, you will get ARCHIVE_FAILED. 570 */ 571static void 572test_pax_filename_encoding_KOI8R_CP1251(void) 573{ 574 struct archive *a; 575 576 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 577 skipping("KOI8-R locale not available on this system."); 578 return; 579 } 580 581 a = archive_write_new(); 582 assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 583 /* pax format writer only accepts both BINARY and UTF-8. */ 584 assertEqualInt(ARCHIVE_FAILED, 585 archive_write_set_options(a, "hdrcharset=CP1251")); 586 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 587} 588 589 590DEFINE_TEST(test_pax_filename_encoding) 591{ 592 test_pax_filename_encoding_1(); 593 test_pax_filename_encoding_2(); 594 test_pax_filename_encoding_3(); 595 test_pax_filename_encoding_KOI8R(); 596 test_pax_filename_encoding_CP1251(); 597 test_pax_filename_encoding_EUCJP(); 598 test_pax_filename_encoding_CP932(); 599 test_pax_filename_encoding_KOI8R_BINARY(); 600 test_pax_filename_encoding_KOI8R_CP1251(); 601} 602