1231200Smm/*- 2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA 3231200Smm * All rights reserved. 4231200Smm * 5231200Smm * Redistribution and use in source and binary forms, with or without 6231200Smm * modification, are permitted provided that the following conditions 7231200Smm * are met: 8231200Smm * 1. Redistributions of source code must retain the above copyright 9231200Smm * notice, this list of conditions and the following disclaimer. 10231200Smm * 2. Redistributions in binary form must reproduce the above copyright 11231200Smm * notice, this list of conditions and the following disclaimer in the 12231200Smm * documentation and/or other materials provided with the distribution. 13231200Smm * 14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24231200Smm */ 25231200Smm#include "test.h" 26231200Smm__FBSDID("$FreeBSD"); 27231200Smm 28231200Smm#include <locale.h> 29231200Smm 30231200Smm/* 31231200Smm * The sample tar file was made in LANG=KOI8-R and it contains two 32231200Smm * files the charset of which are different. 33231200Smm * - the filename of first file is stored in BINARY mode. 34231200Smm * - the filename of second file is stored in UTF-8. 35231200Smm * 36231200Smm * Whenever hdrcharset option is specified, we will correctly read the 37238856Smm * filename of second file, which is stored in UTF-8 by default. 38231200Smm */ 39231200Smm 40231200Smmstatic void 41231200Smmtest_read_format_tar_filename_KOI8R_CP866(const char *refname) 42231200Smm{ 43231200Smm struct archive *a; 44231200Smm struct archive_entry *ae; 45231200Smm 46231200Smm /* 47231200Smm * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option. 48231200Smm * We should correctly read two filenames. 49231200Smm */ 50231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia.866") && 51231200Smm NULL == setlocale(LC_ALL, "ru_RU.CP866")) { 52231200Smm skipping("ru_RU.CP866 locale not available on this system."); 53231200Smm return; 54231200Smm } 55231200Smm 56231200Smm /* Test if the platform can convert from UTF-8. */ 57231200Smm assert((a = archive_read_new()) != NULL); 58231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 59231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 60231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 61231200Smm skipping("This system cannot convert character-set" 62231200Smm " from UTF-8 to CP866."); 63231200Smm return; 64231200Smm } 65231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 66231200Smm 67231200Smm assert((a = archive_read_new()) != NULL); 68231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 69231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 70231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 71231200Smm skipping("This system cannot convert character-set" 72231200Smm " from KOI8-R to CP866."); 73231200Smm goto next_test; 74231200Smm } 75231200Smm assertEqualIntA(a, ARCHIVE_OK, 76231200Smm archive_read_open_filename(a, refname, 10240)); 77231200Smm 78231200Smm /* Verify regular first file. */ 79231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 80231200Smm assertEqualString("\x8f\x90\x88\x82\x85\x92", 81231200Smm archive_entry_pathname(ae)); 82231200Smm assertEqualInt(6, archive_entry_size(ae)); 83299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 84299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 85231200Smm 86231200Smm /* Verify regular second file. */ 87231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 88231200Smm assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 89231200Smm archive_entry_pathname(ae)); 90231200Smm assertEqualInt(6, archive_entry_size(ae)); 91299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 92299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 93231200Smm 94231200Smm 95231200Smm /* End of archive. */ 96231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 97231200Smm 98231200Smm /* Verify archive format. */ 99231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 100231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 101231200Smm archive_format(a)); 102231200Smm 103231200Smm /* Close the archive. */ 104231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 105231200Smmnext_test: 106231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 107231200Smm 108231200Smm 109231200Smm /* 110231200Smm * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option. 111231200Smm * The filename we can properly read is only second file. 112231200Smm */ 113231200Smm 114231200Smm assert((a = archive_read_new()) != NULL); 115231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 116231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 117231200Smm assertEqualIntA(a, ARCHIVE_OK, 118231200Smm archive_read_open_filename(a, refname, 10240)); 119231200Smm 120231200Smm /* 121231200Smm * Verify regular first file. 122231200Smm * The filename is not translated to CP866 because hdrcharset 123231200Smm * attribute is BINARY and there is not way to know its charset. 124231200Smm */ 125231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 126231200Smm /* A filename is in KOI8-R. */ 127231200Smm assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 128231200Smm archive_entry_pathname(ae)); 129231200Smm assertEqualInt(6, archive_entry_size(ae)); 130299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 131299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 132231200Smm 133231200Smm /* 134231200Smm * Verify regular second file. 135231200Smm * The filename is translated from UTF-8 to CP866 136231200Smm */ 137231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 138231200Smm assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 139231200Smm archive_entry_pathname(ae)); 140231200Smm assertEqualInt(6, archive_entry_size(ae)); 141299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 142299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 143231200Smm 144231200Smm 145231200Smm /* End of archive. */ 146231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 147231200Smm 148231200Smm /* Verify archive format. */ 149231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 150231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 151231200Smm archive_format(a)); 152231200Smm 153231200Smm /* Close the archive. */ 154231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 155231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 156231200Smm} 157231200Smm 158231200Smmstatic void 159231200Smmtest_read_format_tar_filename_KOI8R_UTF8(const char *refname) 160231200Smm{ 161231200Smm struct archive *a; 162231200Smm struct archive_entry *ae; 163231200Smm 164231200Smm /* 165231200Smm * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option. 166231200Smm * We should correctly read two filenames. 167231200Smm */ 168231200Smm if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 169231200Smm skipping("en_US.UTF-8 locale not available on this system."); 170231200Smm return; 171231200Smm } 172231200Smm 173231200Smm assert((a = archive_read_new()) != NULL); 174231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 175231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 176231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 177231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 178231200Smm skipping("This system cannot convert character-set" 179231200Smm " from KOI8-R to UTF-8."); 180231200Smm return; 181231200Smm } 182231200Smm assertEqualIntA(a, ARCHIVE_OK, 183231200Smm archive_read_open_filename(a, refname, 10240)); 184231200Smm 185231200Smm /* Verify regular file. */ 186231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 187231200Smm assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2", 188231200Smm archive_entry_pathname(ae)); 189231200Smm assertEqualInt(6, archive_entry_size(ae)); 190299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 191299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 192231200Smm 193231200Smm /* Verify regular file. */ 194231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 195231200Smm assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 196231200Smm archive_entry_pathname(ae)); 197231200Smm assertEqualInt(6, archive_entry_size(ae)); 198299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 199299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 200231200Smm 201299529Smm /* Verify encryption status */ 202299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 203299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 204231200Smm 205231200Smm /* End of archive. */ 206231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 207231200Smm 208231200Smm /* Verify archive format. */ 209231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 210231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 211231200Smm archive_format(a)); 212299529Smm 213299529Smm /* Verify encryption status */ 214299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 215299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 216231200Smm 217231200Smm /* Close the archive. */ 218231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 219231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 220231200Smm 221231200Smm /* 222231200Smm * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option. 223231200Smm * The filename we can properly read is only second file. 224231200Smm */ 225231200Smm 226231200Smm assert((a = archive_read_new()) != NULL); 227231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 228231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 229231200Smm assertEqualIntA(a, ARCHIVE_OK, 230231200Smm archive_read_open_filename(a, refname, 10240)); 231231200Smm 232231200Smm /* 233231200Smm * Verify regular first file. 234231200Smm * The filename is not translated to UTF-8 because hdrcharset 235231200Smm * attribute is BINARY and there is not way to know its charset. 236231200Smm */ 237231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 238231200Smm /* A filename is in KOI8-R. */ 239231200Smm assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 240231200Smm archive_entry_pathname(ae)); 241231200Smm assertEqualInt(6, archive_entry_size(ae)); 242299529Smm 243299529Smm /* Verify encryption status */ 244299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 245299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 246231200Smm 247231200Smm /* 248231200Smm * Verify regular second file. 249231200Smm */ 250231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 251231200Smm assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 252231200Smm archive_entry_pathname(ae)); 253231200Smm assertEqualInt(6, archive_entry_size(ae)); 254231200Smm 255231200Smm 256231200Smm /* End of archive. */ 257231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 258231200Smm 259231200Smm /* Verify archive format. */ 260231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 261231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 262231200Smm archive_format(a)); 263231200Smm 264231200Smm /* Close the archive. */ 265231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 266231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 267231200Smm} 268231200Smm 269231200Smmstatic void 270231200Smmtest_read_format_tar_filename_KOI8R_CP1251(const char *refname) 271231200Smm{ 272231200Smm struct archive *a; 273231200Smm struct archive_entry *ae; 274231200Smm 275231200Smm /* 276231200Smm * Read filename in CP1251 with "hdrcharset=KOI8-R" option. 277231200Smm * We should correctly read two filenames. 278231200Smm */ 279231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia") && 280231200Smm NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 281231200Smm skipping("CP1251 locale not available on this system."); 282231200Smm return; 283231200Smm } 284231200Smm 285231200Smm /* Test if the platform can convert from UTF-8. */ 286231200Smm assert((a = archive_read_new()) != NULL); 287231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 288231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 289231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 290231200Smm skipping("This system cannot convert character-set" 291231200Smm " from UTF-8 to CP1251."); 292231200Smm return; 293231200Smm } 294231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 295231200Smm 296231200Smm assert((a = archive_read_new()) != NULL); 297231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 298231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 299231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 300231200Smm skipping("This system cannot convert character-set" 301231200Smm " from KOI8-R to CP1251."); 302231200Smm goto next_test; 303231200Smm } 304231200Smm assertEqualIntA(a, ARCHIVE_OK, 305231200Smm archive_read_open_filename(a, refname, 10240)); 306231200Smm 307231200Smm /* Verify regular first file. */ 308231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 309231200Smm assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2", 310231200Smm archive_entry_pathname(ae)); 311231200Smm assertEqualInt(6, archive_entry_size(ae)); 312299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 313299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 314231200Smm 315231200Smm /* Verify regular second file. */ 316231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 317231200Smm assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 318231200Smm archive_entry_pathname(ae)); 319231200Smm assertEqualInt(6, archive_entry_size(ae)); 320299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 321299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 322231200Smm 323231200Smm 324231200Smm /* End of archive. */ 325231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 326231200Smm 327231200Smm /* Verify archive format. */ 328231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 329231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 330231200Smm archive_format(a)); 331231200Smm 332231200Smm /* Close the archive. */ 333231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 334231200Smmnext_test: 335231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 336231200Smm 337231200Smm /* 338231200Smm * Read filename in CP1251 without "hdrcharset=KOI8-R" option. 339231200Smm * The filename we can properly read is only second file. 340231200Smm */ 341231200Smm 342231200Smm assert((a = archive_read_new()) != NULL); 343231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 344231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 345231200Smm assertEqualIntA(a, ARCHIVE_OK, 346231200Smm archive_read_open_filename(a, refname, 10240)); 347231200Smm 348231200Smm /* 349231200Smm * Verify regular first file. 350231200Smm * The filename is not translated to CP1251 because hdrcharset 351231200Smm * attribute is BINARY and there is not way to know its charset. 352231200Smm */ 353231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 354231200Smm /* A filename is in KOI8-R. */ 355231200Smm assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 356231200Smm archive_entry_pathname(ae)); 357231200Smm assertEqualInt(6, archive_entry_size(ae)); 358299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 359299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 360231200Smm 361231200Smm /* 362231200Smm * Verify regular second file. 363231200Smm */ 364231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 365231200Smm assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 366231200Smm archive_entry_pathname(ae)); 367231200Smm assertEqualInt(6, archive_entry_size(ae)); 368299529Smm assertEqualInt(archive_entry_is_encrypted(ae), 0); 369299529Smm assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 370231200Smm 371231200Smm 372231200Smm /* End of archive. */ 373231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 374231200Smm 375231200Smm /* Verify archive format. */ 376231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 377231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 378231200Smm archive_format(a)); 379231200Smm 380231200Smm /* Close the archive. */ 381231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 382231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 383231200Smm} 384231200Smm 385231200Smm 386231200SmmDEFINE_TEST(test_read_format_tar_filename) 387231200Smm{ 388231200Smm const char *refname = "test_read_format_tar_filename_koi8r.tar.Z"; 389231200Smm 390231200Smm extract_reference_file(refname); 391231200Smm test_read_format_tar_filename_KOI8R_CP866(refname); 392231200Smm test_read_format_tar_filename_KOI8R_UTF8(refname); 393231200Smm test_read_format_tar_filename_KOI8R_CP1251(refname); 394231200Smm} 395