test_read_format_tar_filename.c revision 238856
1231200Smm/*- 2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA 3231200Smm * All rights reserved. 4231200Smm * 5231200Smm * Redistribution and use in source and binary forms, with or without 6231200Smm * modification, are permitted provided that the following conditions 7231200Smm * are met: 8231200Smm * 1. Redistributions of source code must retain the above copyright 9231200Smm * notice, this list of conditions and the following disclaimer. 10231200Smm * 2. Redistributions in binary form must reproduce the above copyright 11231200Smm * notice, this list of conditions and the following disclaimer in the 12231200Smm * documentation and/or other materials provided with the distribution. 13231200Smm * 14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24231200Smm */ 25231200Smm#include "test.h" 26231200Smm__FBSDID("$FreeBSD"); 27231200Smm 28231200Smm#include <locale.h> 29231200Smm 30231200Smm/* 31231200Smm * The sample tar file was made in LANG=KOI8-R and it contains two 32231200Smm * files the charset of which are different. 33231200Smm * - the filename of first file is stored in BINARY mode. 34231200Smm * - the filename of second file is stored in UTF-8. 35231200Smm * 36231200Smm * Whenever hdrcharset option is specified, we will correctly read the 37238856Smm * filename of second file, which is stored in UTF-8 by default. 38231200Smm */ 39231200Smm 40231200Smmstatic void 41231200Smmtest_read_format_tar_filename_KOI8R_CP866(const char *refname) 42231200Smm{ 43231200Smm struct archive *a; 44231200Smm struct archive_entry *ae; 45231200Smm 46231200Smm /* 47231200Smm * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option. 48231200Smm * We should correctly read two filenames. 49231200Smm */ 50231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia.866") && 51231200Smm NULL == setlocale(LC_ALL, "ru_RU.CP866")) { 52231200Smm skipping("ru_RU.CP866 locale not available on this system."); 53231200Smm return; 54231200Smm } 55231200Smm 56231200Smm /* Test if the platform can convert from UTF-8. */ 57231200Smm assert((a = archive_read_new()) != NULL); 58231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 59231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 60231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 61231200Smm skipping("This system cannot convert character-set" 62231200Smm " from UTF-8 to CP866."); 63231200Smm return; 64231200Smm } 65231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 66231200Smm 67231200Smm assert((a = archive_read_new()) != NULL); 68231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 69231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 70231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 71231200Smm skipping("This system cannot convert character-set" 72231200Smm " from KOI8-R to CP866."); 73231200Smm goto next_test; 74231200Smm } 75231200Smm assertEqualIntA(a, ARCHIVE_OK, 76231200Smm archive_read_open_filename(a, refname, 10240)); 77231200Smm 78231200Smm /* Verify regular first file. */ 79231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 80231200Smm assertEqualString("\x8f\x90\x88\x82\x85\x92", 81231200Smm archive_entry_pathname(ae)); 82231200Smm assertEqualInt(6, archive_entry_size(ae)); 83231200Smm 84231200Smm /* Verify regular second file. */ 85231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 86231200Smm assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 87231200Smm archive_entry_pathname(ae)); 88231200Smm assertEqualInt(6, archive_entry_size(ae)); 89231200Smm 90231200Smm 91231200Smm /* End of archive. */ 92231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 93231200Smm 94231200Smm /* Verify archive format. */ 95231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 96231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 97231200Smm archive_format(a)); 98231200Smm 99231200Smm /* Close the archive. */ 100231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 101231200Smmnext_test: 102231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 103231200Smm 104231200Smm 105231200Smm /* 106231200Smm * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option. 107231200Smm * The filename we can properly read is only second file. 108231200Smm */ 109231200Smm 110231200Smm assert((a = archive_read_new()) != NULL); 111231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 112231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 113231200Smm assertEqualIntA(a, ARCHIVE_OK, 114231200Smm archive_read_open_filename(a, refname, 10240)); 115231200Smm 116231200Smm /* 117231200Smm * Verify regular first file. 118231200Smm * The filename is not translated to CP866 because hdrcharset 119231200Smm * attribute is BINARY and there is not way to know its charset. 120231200Smm */ 121231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 122231200Smm /* A filename is in KOI8-R. */ 123231200Smm assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 124231200Smm archive_entry_pathname(ae)); 125231200Smm assertEqualInt(6, archive_entry_size(ae)); 126231200Smm 127231200Smm /* 128231200Smm * Verify regular second file. 129231200Smm * The filename is translated from UTF-8 to CP866 130231200Smm */ 131231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 132231200Smm assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 133231200Smm archive_entry_pathname(ae)); 134231200Smm assertEqualInt(6, archive_entry_size(ae)); 135231200Smm 136231200Smm 137231200Smm /* End of archive. */ 138231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 139231200Smm 140231200Smm /* Verify archive format. */ 141231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 142231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 143231200Smm archive_format(a)); 144231200Smm 145231200Smm /* Close the archive. */ 146231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 147231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 148231200Smm} 149231200Smm 150231200Smmstatic void 151231200Smmtest_read_format_tar_filename_KOI8R_UTF8(const char *refname) 152231200Smm{ 153231200Smm struct archive *a; 154231200Smm struct archive_entry *ae; 155231200Smm 156231200Smm /* 157231200Smm * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option. 158231200Smm * We should correctly read two filenames. 159231200Smm */ 160231200Smm if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 161231200Smm skipping("en_US.UTF-8 locale not available on this system."); 162231200Smm return; 163231200Smm } 164231200Smm 165231200Smm assert((a = archive_read_new()) != NULL); 166231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 167231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 168231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 169231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 170231200Smm skipping("This system cannot convert character-set" 171231200Smm " from KOI8-R to UTF-8."); 172231200Smm return; 173231200Smm } 174231200Smm assertEqualIntA(a, ARCHIVE_OK, 175231200Smm archive_read_open_filename(a, refname, 10240)); 176231200Smm 177231200Smm /* Verify regular file. */ 178231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 179231200Smm assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2", 180231200Smm archive_entry_pathname(ae)); 181231200Smm assertEqualInt(6, archive_entry_size(ae)); 182231200Smm 183231200Smm /* Verify regular file. */ 184231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 185231200Smm assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 186231200Smm archive_entry_pathname(ae)); 187231200Smm assertEqualInt(6, archive_entry_size(ae)); 188231200Smm 189231200Smm 190231200Smm /* End of archive. */ 191231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 192231200Smm 193231200Smm /* Verify archive format. */ 194231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 195231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 196231200Smm archive_format(a)); 197231200Smm 198231200Smm /* Close the archive. */ 199231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 200231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 201231200Smm 202231200Smm /* 203231200Smm * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option. 204231200Smm * The filename we can properly read is only second file. 205231200Smm */ 206231200Smm 207231200Smm assert((a = archive_read_new()) != NULL); 208231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 209231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 210231200Smm assertEqualIntA(a, ARCHIVE_OK, 211231200Smm archive_read_open_filename(a, refname, 10240)); 212231200Smm 213231200Smm /* 214231200Smm * Verify regular first file. 215231200Smm * The filename is not translated to UTF-8 because hdrcharset 216231200Smm * attribute is BINARY and there is not way to know its charset. 217231200Smm */ 218231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 219231200Smm /* A filename is in KOI8-R. */ 220231200Smm assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 221231200Smm archive_entry_pathname(ae)); 222231200Smm assertEqualInt(6, archive_entry_size(ae)); 223231200Smm 224231200Smm /* 225231200Smm * Verify regular second file. 226231200Smm */ 227231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 228231200Smm assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 229231200Smm archive_entry_pathname(ae)); 230231200Smm assertEqualInt(6, archive_entry_size(ae)); 231231200Smm 232231200Smm 233231200Smm /* End of archive. */ 234231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 235231200Smm 236231200Smm /* Verify archive format. */ 237231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 238231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 239231200Smm archive_format(a)); 240231200Smm 241231200Smm /* Close the archive. */ 242231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 243231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 244231200Smm} 245231200Smm 246231200Smmstatic void 247231200Smmtest_read_format_tar_filename_KOI8R_CP1251(const char *refname) 248231200Smm{ 249231200Smm struct archive *a; 250231200Smm struct archive_entry *ae; 251231200Smm 252231200Smm /* 253231200Smm * Read filename in CP1251 with "hdrcharset=KOI8-R" option. 254231200Smm * We should correctly read two filenames. 255231200Smm */ 256231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia") && 257231200Smm NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 258231200Smm skipping("CP1251 locale not available on this system."); 259231200Smm return; 260231200Smm } 261231200Smm 262231200Smm /* Test if the platform can convert from UTF-8. */ 263231200Smm assert((a = archive_read_new()) != NULL); 264231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 265231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 266231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 267231200Smm skipping("This system cannot convert character-set" 268231200Smm " from UTF-8 to CP1251."); 269231200Smm return; 270231200Smm } 271231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 272231200Smm 273231200Smm assert((a = archive_read_new()) != NULL); 274231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 275231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 276231200Smm if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 277231200Smm skipping("This system cannot convert character-set" 278231200Smm " from KOI8-R to CP1251."); 279231200Smm goto next_test; 280231200Smm } 281231200Smm assertEqualIntA(a, ARCHIVE_OK, 282231200Smm archive_read_open_filename(a, refname, 10240)); 283231200Smm 284231200Smm /* Verify regular first file. */ 285231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 286231200Smm assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2", 287231200Smm archive_entry_pathname(ae)); 288231200Smm assertEqualInt(6, archive_entry_size(ae)); 289231200Smm 290231200Smm /* Verify regular second file. */ 291231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 292231200Smm assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 293231200Smm archive_entry_pathname(ae)); 294231200Smm assertEqualInt(6, archive_entry_size(ae)); 295231200Smm 296231200Smm 297231200Smm /* End of archive. */ 298231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 299231200Smm 300231200Smm /* Verify archive format. */ 301231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 302231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 303231200Smm archive_format(a)); 304231200Smm 305231200Smm /* Close the archive. */ 306231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 307231200Smmnext_test: 308231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 309231200Smm 310231200Smm /* 311231200Smm * Read filename in CP1251 without "hdrcharset=KOI8-R" option. 312231200Smm * The filename we can properly read is only second file. 313231200Smm */ 314231200Smm 315231200Smm assert((a = archive_read_new()) != NULL); 316231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 317231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 318231200Smm assertEqualIntA(a, ARCHIVE_OK, 319231200Smm archive_read_open_filename(a, refname, 10240)); 320231200Smm 321231200Smm /* 322231200Smm * Verify regular first file. 323231200Smm * The filename is not translated to CP1251 because hdrcharset 324231200Smm * attribute is BINARY and there is not way to know its charset. 325231200Smm */ 326231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 327231200Smm /* A filename is in KOI8-R. */ 328231200Smm assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 329231200Smm archive_entry_pathname(ae)); 330231200Smm assertEqualInt(6, archive_entry_size(ae)); 331231200Smm 332231200Smm /* 333231200Smm * Verify regular second file. 334231200Smm */ 335231200Smm assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 336231200Smm assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 337231200Smm archive_entry_pathname(ae)); 338231200Smm assertEqualInt(6, archive_entry_size(ae)); 339231200Smm 340231200Smm 341231200Smm /* End of archive. */ 342231200Smm assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 343231200Smm 344231200Smm /* Verify archive format. */ 345231200Smm assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 346231200Smm assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 347231200Smm archive_format(a)); 348231200Smm 349231200Smm /* Close the archive. */ 350231200Smm assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 351231200Smm assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 352231200Smm} 353231200Smm 354231200Smm 355231200SmmDEFINE_TEST(test_read_format_tar_filename) 356231200Smm{ 357231200Smm const char *refname = "test_read_format_tar_filename_koi8r.tar.Z"; 358231200Smm 359231200Smm extract_reference_file(refname); 360231200Smm test_read_format_tar_filename_KOI8R_CP866(refname); 361231200Smm test_read_format_tar_filename_KOI8R_UTF8(refname); 362231200Smm test_read_format_tar_filename_KOI8R_CP1251(refname); 363231200Smm} 364