1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm#include "test.h" 26228763Smm__FBSDID("$FreeBSD: stable/11/contrib/libarchive/libarchive/test/test_pax_filename_encoding.c 311041 2017-01-02 01:41:31Z mm $"); 27228753Smm 28228753Smm#include <locale.h> 29228753Smm 30228753Smm/* 31228753Smm * Pax interchange is supposed to encode filenames into 32228753Smm * UTF-8. Of course, that's not always possible. This 33228753Smm * test is intended to verify that filenames always get 34228753Smm * stored and restored correctly, regardless of the encodings. 35228753Smm */ 36228753Smm 37228753Smm/* 38228753Smm * Read a manually-created archive that has filenames that are 39228753Smm * stored in binary instead of UTF-8 and verify that we get 40228753Smm * the right filename returned and that we get a warning only 41228753Smm * if the header isn't marked as binary. 42228753Smm */ 43228753Smmstatic void 44228753Smmtest_pax_filename_encoding_1(void) 45228753Smm{ 46228753Smm static const char testname[] = "test_pax_filename_encoding.tar"; 47228753Smm /* 48228753Smm * \314\214 is a valid 2-byte UTF-8 sequence. 49228753Smm * \374 is invalid in UTF-8. 50228753Smm */ 51228753Smm char filename[] = "abc\314\214mno\374xyz"; 52228753Smm struct archive *a; 53228753Smm struct archive_entry *entry; 54228753Smm 55228753Smm /* 56228753Smm * Read an archive that has non-UTF8 pax filenames in it. 57228753Smm */ 58228753Smm extract_reference_file(testname); 59228753Smm a = archive_read_new(); 60228753Smm assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a)); 61232153Smm assertEqualInt(ARCHIVE_OK, archive_read_support_filter_all(a)); 62228753Smm assertEqualInt(ARCHIVE_OK, 63228753Smm archive_read_open_filename(a, testname, 10240)); 64228753Smm /* 65228753Smm * First entry in this test archive has an invalid UTF-8 sequence 66228753Smm * in it, but the header is not marked as hdrcharset=BINARY, so that 67228753Smm * requires a warning. 68228753Smm */ 69228753Smm failure("Invalid UTF8 in a pax archive pathname should cause a warning"); 70228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 71228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 72228753Smm /* 73228753Smm * Second entry is identical except that it does have 74228753Smm * hdrcharset=BINARY, so no warning should be generated. 75228753Smm */ 76228753Smm failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n" 77228753Smm " characters in it without generating a warning"); 78228753Smm assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry)); 79228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 80232153Smm archive_read_free(a); 81228753Smm} 82228753Smm 83228753Smm/* 84228753Smm * Set the locale and write a pathname containing invalid characters. 85228753Smm * This should work; the underlying implementation should automatically 86228753Smm * fall back to storing the pathname in binary. 87228753Smm */ 88228753Smmstatic void 89228753Smmtest_pax_filename_encoding_2(void) 90228753Smm{ 91228753Smm char filename[] = "abc\314\214mno\374xyz"; 92228753Smm struct archive *a; 93228753Smm struct archive_entry *entry; 94228753Smm char buff[65536]; 95228753Smm char longname[] = "abc\314\214mno\374xyz" 96228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 97228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 98228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 99228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 100228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 101228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 102228753Smm ; 103228753Smm size_t used; 104228753Smm 105228753Smm /* 106228753Smm * We need a starting locale which has invalid sequences. 107232153Smm * en_US.UTF-8 seems to be commonly supported. 108228753Smm */ 109228753Smm /* If it doesn't exist, just warn and return. */ 110232153Smm if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 111228753Smm skipping("invalid encoding tests require a suitable locale;" 112232153Smm " en_US.UTF-8 not available on this system"); 113228753Smm return; 114228753Smm } 115228753Smm 116228753Smm assert((a = archive_write_new()) != NULL); 117228753Smm assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 118248616Smm assertEqualIntA(a, 0, archive_write_add_filter_none(a)); 119228753Smm assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 120228753Smm assertEqualInt(0, 121228753Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 122228753Smm 123228753Smm assert((entry = archive_entry_new()) != NULL); 124228753Smm /* Set pathname, gname, uname, hardlink to nonconvertible values. */ 125228753Smm archive_entry_copy_pathname(entry, filename); 126228753Smm archive_entry_copy_gname(entry, filename); 127228753Smm archive_entry_copy_uname(entry, filename); 128228753Smm archive_entry_copy_hardlink(entry, filename); 129228753Smm archive_entry_set_filetype(entry, AE_IFREG); 130228753Smm failure("This should generate a warning for nonconvertible names."); 131228753Smm assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 132228753Smm archive_entry_free(entry); 133228753Smm 134228753Smm assert((entry = archive_entry_new()) != NULL); 135228753Smm /* Set path, gname, uname, and symlink to nonconvertible values. */ 136228753Smm archive_entry_copy_pathname(entry, filename); 137228753Smm archive_entry_copy_gname(entry, filename); 138228753Smm archive_entry_copy_uname(entry, filename); 139228753Smm archive_entry_copy_symlink(entry, filename); 140228753Smm archive_entry_set_filetype(entry, AE_IFLNK); 141228753Smm failure("This should generate a warning for nonconvertible names."); 142228753Smm assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 143228753Smm archive_entry_free(entry); 144228753Smm 145228753Smm assert((entry = archive_entry_new()) != NULL); 146228753Smm /* Set pathname to a very long nonconvertible value. */ 147228753Smm archive_entry_copy_pathname(entry, longname); 148228753Smm archive_entry_set_filetype(entry, AE_IFREG); 149228753Smm failure("This should generate a warning for nonconvertible names."); 150228753Smm assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 151228753Smm archive_entry_free(entry); 152228753Smm 153232153Smm assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); 154232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 155228753Smm 156228753Smm /* 157228753Smm * Now read the entries back. 158228753Smm */ 159228753Smm 160228753Smm assert((a = archive_read_new()) != NULL); 161228753Smm assertEqualInt(0, archive_read_support_format_tar(a)); 162228753Smm assertEqualInt(0, archive_read_open_memory(a, buff, used)); 163228753Smm 164228753Smm assertEqualInt(0, archive_read_next_header(a, &entry)); 165228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 166228753Smm assertEqualString(filename, archive_entry_gname(entry)); 167228753Smm assertEqualString(filename, archive_entry_uname(entry)); 168228753Smm assertEqualString(filename, archive_entry_hardlink(entry)); 169228753Smm 170228753Smm assertEqualInt(0, archive_read_next_header(a, &entry)); 171228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 172228753Smm assertEqualString(filename, archive_entry_gname(entry)); 173228753Smm assertEqualString(filename, archive_entry_uname(entry)); 174228753Smm assertEqualString(filename, archive_entry_symlink(entry)); 175228753Smm 176228753Smm assertEqualInt(0, archive_read_next_header(a, &entry)); 177228753Smm assertEqualString(longname, archive_entry_pathname(entry)); 178228753Smm 179232153Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); 180232153Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 181228753Smm} 182228753Smm 183232153Smm#if 0 /* Disable this until Tim check out it. */ 184232153Smm 185228753Smm/* 186228753Smm * Create an entry starting from a wide-character Unicode pathname, 187228753Smm * read it back into "C" locale, which doesn't support the name. 188228753Smm * TODO: Figure out the "right" behavior here. 189228753Smm */ 190228753Smmstatic void 191228753Smmtest_pax_filename_encoding_3(void) 192228753Smm{ 193228753Smm wchar_t badname[] = L"xxxAyyyBzzz"; 194228753Smm const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz"; 195228753Smm struct archive *a; 196228753Smm struct archive_entry *entry; 197228753Smm char buff[65536]; 198228753Smm size_t used; 199228753Smm 200228753Smm badname[3] = 0x1234; 201228753Smm badname[7] = 0x5678; 202228753Smm 203228753Smm /* If it doesn't exist, just warn and return. */ 204228753Smm if (NULL == setlocale(LC_ALL, "C")) { 205228753Smm skipping("Can't set \"C\" locale, so can't exercise " 206228753Smm "certain character-conversion failures"); 207228753Smm return; 208228753Smm } 209228753Smm 210228753Smm /* If wctomb is broken, warn and return. */ 211228753Smm if (wctomb(buff, 0x1234) > 0) { 212228753Smm skipping("Cannot test conversion failures because \"C\" " 213228753Smm "locale on this system has no invalid characters."); 214228753Smm return; 215228753Smm } 216228753Smm 217228753Smm /* If wctomb is broken, warn and return. */ 218228753Smm if (wctomb(buff, 0x1234) > 0) { 219228753Smm skipping("Cannot test conversion failures because \"C\" " 220228753Smm "locale on this system has no invalid characters."); 221228753Smm return; 222228753Smm } 223228753Smm 224228753Smm /* Skip test if archive_entry_update_pathname_utf8() is broken. */ 225228753Smm /* In particular, this is currently broken on Win32 because 226228753Smm * setlocale() does not set the default encoding for CP_ACP. */ 227228753Smm entry = archive_entry_new(); 228228753Smm if (archive_entry_update_pathname_utf8(entry, badname_utf8)) { 229228753Smm archive_entry_free(entry); 230228753Smm skipping("Cannot test conversion failures."); 231228753Smm return; 232228753Smm } 233228753Smm archive_entry_free(entry); 234228753Smm 235228753Smm assert((a = archive_write_new()) != NULL); 236228753Smm assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 237248616Smm assertEqualIntA(a, 0, archive_write_add_filter_none(a)); 238228753Smm assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 239228753Smm assertEqualInt(0, 240228753Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 241228753Smm 242228753Smm assert((entry = archive_entry_new()) != NULL); 243228753Smm /* Set pathname to non-convertible wide value. */ 244228753Smm archive_entry_copy_pathname_w(entry, badname); 245228753Smm archive_entry_set_filetype(entry, AE_IFREG); 246228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 247228753Smm archive_entry_free(entry); 248228753Smm 249228753Smm assert((entry = archive_entry_new()) != NULL); 250228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 251228753Smm /* Set gname to non-convertible wide value. */ 252228753Smm archive_entry_copy_gname_w(entry, badname); 253228753Smm archive_entry_set_filetype(entry, AE_IFREG); 254228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 255228753Smm archive_entry_free(entry); 256228753Smm 257228753Smm assert((entry = archive_entry_new()) != NULL); 258228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 259228753Smm /* Set uname to non-convertible wide value. */ 260228753Smm archive_entry_copy_uname_w(entry, badname); 261228753Smm archive_entry_set_filetype(entry, AE_IFREG); 262228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 263228753Smm archive_entry_free(entry); 264228753Smm 265228753Smm assert((entry = archive_entry_new()) != NULL); 266228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 267228753Smm /* Set hardlink to non-convertible wide value. */ 268228753Smm archive_entry_copy_hardlink_w(entry, badname); 269228753Smm archive_entry_set_filetype(entry, AE_IFREG); 270228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 271228753Smm archive_entry_free(entry); 272228753Smm 273228753Smm assert((entry = archive_entry_new()) != NULL); 274228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 275228753Smm /* Set symlink to non-convertible wide value. */ 276228753Smm archive_entry_copy_symlink_w(entry, badname); 277228753Smm archive_entry_set_filetype(entry, AE_IFLNK); 278228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 279228753Smm archive_entry_free(entry); 280228753Smm 281232153Smm assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); 282232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 283228753Smm 284228753Smm /* 285228753Smm * Now read the entries back. 286228753Smm */ 287228753Smm 288228753Smm assert((a = archive_read_new()) != NULL); 289228753Smm assertEqualInt(0, archive_read_support_format_tar(a)); 290228753Smm assertEqualInt(0, archive_read_open_memory(a, buff, used)); 291228753Smm 292228753Smm failure("A non-convertible pathname should cause a warning."); 293228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 294228753Smm assertEqualWString(badname, archive_entry_pathname_w(entry)); 295228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 296228753Smm assertEqualString(badname_utf8, archive_entry_pathname(entry)); 297228753Smm 298228753Smm failure("A non-convertible gname should cause a warning."); 299228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 300228753Smm assertEqualWString(badname, archive_entry_gname_w(entry)); 301228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 302228753Smm assertEqualString(badname_utf8, archive_entry_gname(entry)); 303228753Smm 304228753Smm failure("A non-convertible uname should cause a warning."); 305228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 306228753Smm assertEqualWString(badname, archive_entry_uname_w(entry)); 307228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 308228753Smm assertEqualString(badname_utf8, archive_entry_uname(entry)); 309228753Smm 310228753Smm failure("A non-convertible hardlink should cause a warning."); 311228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 312228753Smm assertEqualWString(badname, archive_entry_hardlink_w(entry)); 313228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 314228753Smm assertEqualString(badname_utf8, archive_entry_hardlink(entry)); 315228753Smm 316228753Smm failure("A non-convertible symlink should cause a warning."); 317228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 318228753Smm assertEqualWString(badname, archive_entry_symlink_w(entry)); 319228753Smm assertEqualWString(NULL, archive_entry_hardlink_w(entry)); 320228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 321228753Smm assertEqualString(badname_utf8, archive_entry_symlink(entry)); 322228753Smm 323228753Smm assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry)); 324228753Smm 325232153Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); 326232153Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 327228753Smm} 328232153Smm#else 329232153Smmstatic void 330232153Smmtest_pax_filename_encoding_3(void) 331232153Smm{ 332232153Smm} 333232153Smm#endif 334228753Smm 335232153Smm/* 336232153Smm * Verify that KOI8-R filenames are correctly translated to Unicode and UTF-8. 337232153Smm */ 338299529SmmDEFINE_TEST(test_pax_filename_encoding_KOI8R) 339232153Smm{ 340232153Smm struct archive *a; 341232153Smm struct archive_entry *entry; 342232153Smm char buff[4096]; 343232153Smm size_t used; 344232153Smm 345232153Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 346232153Smm skipping("KOI8-R locale not available on this system."); 347232153Smm return; 348232153Smm } 349232153Smm 350311041Smm /* Check if the platform completely supports the string conversion. */ 351232153Smm a = archive_write_new(); 352232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 353232153Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 354232153Smm skipping("This system cannot convert character-set" 355232153Smm " from KOI8-R to UTF-8."); 356232153Smm archive_write_free(a); 357232153Smm return; 358232153Smm } 359232153Smm archive_write_free(a); 360232153Smm 361232153Smm /* Re-create a write archive object since filenames should be written 362232153Smm * in UTF-8 by default. */ 363232153Smm a = archive_write_new(); 364232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 365232153Smm assertEqualInt(ARCHIVE_OK, 366232153Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 367232153Smm 368232153Smm entry = archive_entry_new2(a); 369232153Smm archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 370232153Smm archive_entry_set_filetype(entry, AE_IFREG); 371232153Smm archive_entry_set_size(entry, 0); 372232153Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 373232153Smm archive_entry_free(entry); 374232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 375232153Smm 376232153Smm /* Above three characters in KOI8-R should translate to the following 377232153Smm * three characters (two bytes each) in UTF-8. */ 378232153Smm assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); 379232153Smm} 380232153Smm 381232153Smm/* 382232153Smm * Verify that CP1251 filenames are correctly translated to Unicode and UTF-8. 383232153Smm */ 384299529SmmDEFINE_TEST(test_pax_filename_encoding_CP1251) 385232153Smm{ 386232153Smm struct archive *a; 387232153Smm struct archive_entry *entry; 388232153Smm char buff[4096]; 389232153Smm size_t used; 390232153Smm 391232153Smm if (NULL == setlocale(LC_ALL, "Russian_Russia") && 392232153Smm NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 393232153Smm skipping("KOI8-R locale not available on this system."); 394232153Smm return; 395232153Smm } 396232153Smm 397311041Smm /* Check if the platform completely supports the string conversion. */ 398232153Smm a = archive_write_new(); 399232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 400232153Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 401232153Smm skipping("This system cannot convert character-set" 402232153Smm " from KOI8-R to UTF-8."); 403232153Smm archive_write_free(a); 404232153Smm return; 405232153Smm } 406232153Smm archive_write_free(a); 407232153Smm 408232153Smm /* Re-create a write archive object since filenames should be written 409232153Smm * in UTF-8 by default. */ 410232153Smm a = archive_write_new(); 411232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 412232153Smm assertEqualInt(ARCHIVE_OK, 413232153Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 414232153Smm 415232153Smm entry = archive_entry_new2(a); 416232153Smm archive_entry_set_pathname(entry, "\xef\xf0\xe8"); 417232153Smm archive_entry_set_filetype(entry, AE_IFREG); 418232153Smm archive_entry_set_size(entry, 0); 419232153Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 420232153Smm archive_entry_free(entry); 421232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 422232153Smm 423232153Smm /* Above three characters in KOI8-R should translate to the following 424232153Smm * three characters (two bytes each) in UTF-8. */ 425232153Smm assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15); 426232153Smm} 427232153Smm 428232153Smm/* 429232153Smm * Verify that EUC-JP filenames are correctly translated to Unicode and UTF-8. 430232153Smm */ 431299529SmmDEFINE_TEST(test_pax_filename_encoding_EUCJP) 432232153Smm{ 433232153Smm struct archive *a; 434232153Smm struct archive_entry *entry; 435232153Smm char buff[4096]; 436232153Smm size_t used; 437232153Smm 438232153Smm if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 439232153Smm skipping("eucJP locale not available on this system."); 440232153Smm return; 441232153Smm } 442232153Smm 443311041Smm /* Check if the platform completely supports the string conversion. */ 444232153Smm a = archive_write_new(); 445232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 446232153Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 447232153Smm skipping("This system cannot convert character-set" 448232153Smm " from eucJP to UTF-8."); 449232153Smm archive_write_free(a); 450232153Smm return; 451232153Smm } 452232153Smm archive_write_free(a); 453232153Smm 454232153Smm /* Re-create a write archive object since filenames should be written 455232153Smm * in UTF-8 by default. */ 456232153Smm a = archive_write_new(); 457232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 458232153Smm assertEqualInt(ARCHIVE_OK, 459232153Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 460232153Smm 461232153Smm entry = archive_entry_new2(a); 462232153Smm archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 463232153Smm /* Check the Unicode version. */ 464232153Smm archive_entry_set_filetype(entry, AE_IFREG); 465232153Smm archive_entry_set_size(entry, 0); 466232153Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 467232153Smm archive_entry_free(entry); 468232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 469232153Smm 470232153Smm /* Check UTF-8 version. */ 471232153Smm assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16); 472232153Smm 473232153Smm} 474232153Smm 475232153Smm/* 476232153Smm * Verify that CP932/SJIS filenames are correctly translated to Unicode and UTF-8. 477232153Smm */ 478299529SmmDEFINE_TEST(test_pax_filename_encoding_CP932) 479232153Smm{ 480232153Smm struct archive *a; 481232153Smm struct archive_entry *entry; 482232153Smm char buff[4096]; 483232153Smm size_t used; 484232153Smm 485232153Smm if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 486232153Smm NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 487232153Smm skipping("eucJP locale not available on this system."); 488232153Smm return; 489232153Smm } 490232153Smm 491311041Smm /* Check if the platform completely supports the string conversion. */ 492232153Smm a = archive_write_new(); 493232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 494232153Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 495232153Smm skipping("This system cannot convert character-set" 496232153Smm " from CP932/SJIS to UTF-8."); 497232153Smm archive_write_free(a); 498232153Smm return; 499232153Smm } 500232153Smm archive_write_free(a); 501232153Smm 502232153Smm /* Re-create a write archive object since filenames should be written 503232153Smm * in UTF-8 by default. */ 504232153Smm a = archive_write_new(); 505232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 506232153Smm assertEqualInt(ARCHIVE_OK, 507232153Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 508232153Smm 509232153Smm entry = archive_entry_new2(a); 510232153Smm archive_entry_set_pathname(entry, "\x95\x5C.txt"); 511232153Smm /* Check the Unicode version. */ 512232153Smm archive_entry_set_filetype(entry, AE_IFREG); 513232153Smm archive_entry_set_size(entry, 0); 514232153Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 515232153Smm archive_entry_free(entry); 516232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 517232153Smm 518232153Smm /* Check UTF-8 version. */ 519232153Smm assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16); 520232153Smm 521232153Smm} 522232153Smm 523232153Smm/* 524232153Smm * Verify that KOI8-R filenames are not translated to Unicode and UTF-8 525232153Smm * when using hdrcharset=BINARY option. 526232153Smm */ 527299529SmmDEFINE_TEST(test_pax_filename_encoding_KOI8R_BINARY) 528232153Smm{ 529232153Smm struct archive *a; 530232153Smm struct archive_entry *entry; 531232153Smm char buff[4096]; 532232153Smm size_t used; 533232153Smm 534232153Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 535232153Smm skipping("KOI8-R locale not available on this system."); 536232153Smm return; 537232153Smm } 538232153Smm 539232153Smm a = archive_write_new(); 540232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 541232153Smm /* BINARY mode should be accepted. */ 542232153Smm assertEqualInt(ARCHIVE_OK, 543232153Smm archive_write_set_options(a, "hdrcharset=BINARY")); 544232153Smm assertEqualInt(ARCHIVE_OK, 545232153Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 546232153Smm 547232153Smm entry = archive_entry_new2(a); 548232153Smm archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 549232153Smm archive_entry_set_filetype(entry, AE_IFREG); 550232153Smm archive_entry_set_size(entry, 0); 551232153Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 552232153Smm archive_entry_free(entry); 553232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 554232153Smm 555232153Smm /* "hdrcharset=BINARY" pax attribute should be written. */ 556232153Smm assertEqualMem(buff + 512, "21 hdrcharset=BINARY\x0A", 21); 557232153Smm /* Above three characters in KOI8-R should not translate to any 558232153Smm * character-set. */ 559232153Smm assertEqualMem(buff + 512+21, "12 path=\xD0\xD2\xC9\x0A", 12); 560232153Smm} 561232153Smm 562232153Smm/* 563232153Smm * Pax format writer only accepts both BINARY and UTF-8. 564232153Smm * If other character-set name is specified, you will get ARCHIVE_FAILED. 565232153Smm */ 566299529SmmDEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251) 567232153Smm{ 568232153Smm struct archive *a; 569232153Smm 570232153Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 571232153Smm skipping("KOI8-R locale not available on this system."); 572232153Smm return; 573232153Smm } 574232153Smm 575232153Smm a = archive_write_new(); 576232153Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a)); 577232153Smm /* pax format writer only accepts both BINARY and UTF-8. */ 578232153Smm assertEqualInt(ARCHIVE_FAILED, 579232153Smm archive_write_set_options(a, "hdrcharset=CP1251")); 580232153Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 581232153Smm} 582232153Smm 583232153Smm 584228753SmmDEFINE_TEST(test_pax_filename_encoding) 585228753Smm{ 586228753Smm test_pax_filename_encoding_1(); 587228753Smm test_pax_filename_encoding_2(); 588228753Smm test_pax_filename_encoding_3(); 589228753Smm} 590