1231200Smm/*- 2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA 3231200Smm * All rights reserved. 4231200Smm * 5231200Smm * Redistribution and use in source and binary forms, with or without 6231200Smm * modification, are permitted provided that the following conditions 7231200Smm * are met: 8231200Smm * 1. Redistributions of source code must retain the above copyright 9231200Smm * notice, this list of conditions and the following disclaimer. 10231200Smm * 2. Redistributions in binary form must reproduce the above copyright 11231200Smm * notice, this list of conditions and the following disclaimer in the 12231200Smm * documentation and/or other materials provided with the distribution. 13231200Smm * 14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24231200Smm */ 25231200Smm#include "test.h" 26231200Smm__FBSDID("$FreeBSD$"); 27231200Smm 28231200Smm#include <locale.h> 29231200Smm 30299529SmmDEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866) 31231200Smm{ 32231200Smm struct archive *a; 33231200Smm struct archive_entry *entry; 34231200Smm char buff[4096]; 35231200Smm size_t used; 36231200Smm 37231200Smm if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 38231200Smm skipping("en_US.UTF-8 locale not available on this system."); 39231200Smm return; 40231200Smm } 41231200Smm 42231200Smm /* 43231200Smm * Verify that UTF-8 filenames are correctly translated into CP866 44231200Smm * and stored with hdrcharset=CP866 option. 45231200Smm */ 46231200Smm a = archive_write_new(); 47231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 48231200Smm if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 49231200Smm skipping("This system cannot convert character-set" 50231200Smm " from UTF-8 to CP866."); 51231200Smm archive_write_free(a); 52231200Smm return; 53231200Smm } 54231200Smm assertEqualInt(ARCHIVE_OK, 55231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 56231200Smm 57231200Smm entry = archive_entry_new2(a); 58231200Smm /* Set a UTF-8 filename. */ 59231200Smm archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 60231200Smm archive_entry_set_filetype(entry, AE_IFREG); 61231200Smm archive_entry_set_size(entry, 0); 62231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 63231200Smm archive_entry_free(entry); 64231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 65231200Smm 66231200Smm /* Above three characters in UTF-8 should translate to the following 67231200Smm * three characters in CP866. */ 68231200Smm assertEqualMem(buff, "\xAF\xE0\xA8", 3); 69231200Smm} 70231200Smm 71299529SmmDEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8) 72231200Smm{ 73231200Smm struct archive *a; 74231200Smm struct archive_entry *entry; 75231200Smm char buff[4096]; 76231200Smm size_t used; 77231200Smm 78231200Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 79231200Smm skipping("KOI8-R locale not available on this system."); 80231200Smm return; 81231200Smm } 82231200Smm 83231200Smm /* 84231200Smm * Verify that KOI8-R filenames are correctly translated into UTF-8 85231200Smm * and stored with hdrcharset=UTF-8 option. 86231200Smm */ 87231200Smm a = archive_write_new(); 88231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 89231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 90231200Smm skipping("This system cannot convert character-set" 91231200Smm " from KOI8-R to UTF-8."); 92231200Smm archive_write_free(a); 93231200Smm return; 94231200Smm } 95231200Smm assertEqualInt(ARCHIVE_OK, 96231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 97231200Smm 98231200Smm entry = archive_entry_new2(a); 99231200Smm /* Set a KOI8-R filename. */ 100231200Smm archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 101231200Smm archive_entry_set_filetype(entry, AE_IFREG); 102231200Smm archive_entry_set_size(entry, 0); 103231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 104231200Smm archive_entry_free(entry); 105231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 106231200Smm 107231200Smm /* Above three characters in KOI8-R should translate to the following 108231200Smm * three characters (two bytes each) in UTF-8. */ 109231200Smm assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 110231200Smm} 111231200Smm 112299529SmmDEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866) 113231200Smm{ 114231200Smm struct archive *a; 115231200Smm struct archive_entry *entry; 116231200Smm char buff[4096]; 117231200Smm size_t used; 118231200Smm 119231200Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 120231200Smm skipping("KOI8-R locale not available on this system."); 121231200Smm return; 122231200Smm } 123231200Smm 124231200Smm /* 125231200Smm * Verify that KOI8-R filenames are correctly translated into CP866 126231200Smm * and stored with hdrcharset=CP866 option. 127231200Smm */ 128231200Smm a = archive_write_new(); 129231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 130231200Smm if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 131231200Smm skipping("This system cannot convert character-set" 132231200Smm " from KOI8-R to CP866."); 133231200Smm archive_write_free(a); 134231200Smm return; 135231200Smm } 136231200Smm assertEqualInt(ARCHIVE_OK, 137231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 138231200Smm 139231200Smm entry = archive_entry_new2(a); 140231200Smm /* Set a KOI8-R filename. */ 141231200Smm archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 142231200Smm archive_entry_set_filetype(entry, AE_IFREG); 143231200Smm archive_entry_set_size(entry, 0); 144231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 145231200Smm archive_entry_free(entry); 146231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 147231200Smm 148231200Smm /* Above three characters in KOI8-R should translate to the following 149231200Smm * three characters in CP866. */ 150231200Smm assertEqualMem(buff, "\xAF\xE0\xA8", 3); 151231200Smm} 152231200Smm 153299529SmmDEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8) 154231200Smm{ 155231200Smm struct archive *a; 156231200Smm struct archive_entry *entry; 157231200Smm char buff[4096]; 158231200Smm size_t used; 159231200Smm 160231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia") && 161231200Smm NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 162231200Smm skipping("KOI8-R locale not available on this system."); 163231200Smm return; 164231200Smm } 165231200Smm 166231200Smm /* 167231200Smm * Verify that CP1251 filenames are correctly translated into UTF-8 168231200Smm * and stored with hdrcharset=UTF-8 option. 169231200Smm */ 170231200Smm a = archive_write_new(); 171231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 172231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 173231200Smm skipping("This system cannot convert character-set" 174231200Smm " from KOI8-R to UTF-8."); 175231200Smm archive_write_free(a); 176231200Smm return; 177231200Smm } 178231200Smm assertEqualInt(ARCHIVE_OK, 179231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 180231200Smm 181231200Smm entry = archive_entry_new2(a); 182231200Smm /* Set a KOI8-R filename. */ 183231200Smm archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 184231200Smm archive_entry_set_filetype(entry, AE_IFREG); 185231200Smm archive_entry_set_size(entry, 0); 186231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 187231200Smm archive_entry_free(entry); 188231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 189231200Smm 190231200Smm /* Above three characters in CP1251 should translate to the following 191231200Smm * three characters (two bytes each) in UTF-8. */ 192231200Smm assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 193231200Smm} 194231200Smm 195231200Smm/* 196231200Smm * Do not translate CP1251 into CP866 if non Windows platform. 197231200Smm */ 198299529SmmDEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251) 199231200Smm{ 200231200Smm struct archive *a; 201231200Smm struct archive_entry *entry; 202231200Smm char buff[4096]; 203231200Smm size_t used; 204231200Smm 205231200Smm if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 206231200Smm skipping("KOI8-R locale not available on this system."); 207231200Smm return; 208231200Smm } 209231200Smm 210231200Smm /* 211231200Smm * Verify that CP1251 filenames are not translated into any 212231200Smm * other character-set, in particular, CP866. 213231200Smm */ 214231200Smm a = archive_write_new(); 215231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 216231200Smm assertEqualInt(ARCHIVE_OK, 217231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 218231200Smm 219231200Smm entry = archive_entry_new2(a); 220231200Smm /* Set a KOI8-R filename. */ 221231200Smm archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 222231200Smm archive_entry_set_filetype(entry, AE_IFREG); 223231200Smm archive_entry_set_size(entry, 0); 224231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 225231200Smm archive_entry_free(entry); 226231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 227231200Smm 228231200Smm /* Above three characters in CP1251 should not translate to 229231200Smm * any other character-set. */ 230231200Smm assertEqualMem(buff, "\xEF\xF0\xE8", 3); 231231200Smm} 232231200Smm 233231200Smm/* 234231200Smm * Other archiver applications on Windows translate CP1251 filenames 235231200Smm * into CP866 filenames and store it in the ustar file. 236231200Smm * Test above behavior works well. 237231200Smm */ 238299529SmmDEFINE_TEST(test_ustar_filename_encoding_Russian_Russia) 239231200Smm{ 240231200Smm struct archive *a; 241231200Smm struct archive_entry *entry; 242231200Smm char buff[4096]; 243231200Smm size_t used; 244231200Smm 245231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 246231200Smm skipping("Russian_Russia locale not available on this system."); 247231200Smm return; 248231200Smm } 249231200Smm 250231200Smm /* 251231200Smm * Verify that Russian_Russia(CP1251) filenames are correctly translated 252231200Smm * to CP866. 253231200Smm */ 254231200Smm a = archive_write_new(); 255231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 256231200Smm assertEqualInt(ARCHIVE_OK, 257231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 258231200Smm 259231200Smm entry = archive_entry_new2(a); 260231200Smm /* Set a CP1251 filename. */ 261231200Smm archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 262231200Smm archive_entry_set_filetype(entry, AE_IFREG); 263231200Smm archive_entry_set_size(entry, 0); 264231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 265231200Smm archive_entry_free(entry); 266231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 267231200Smm 268231200Smm /* Above three characters in CP1251 should translate to the following 269231200Smm * three characters in CP866. */ 270231200Smm assertEqualMem(buff, "\xAF\xE0\xA8", 3); 271231200Smm} 272231200Smm 273299529SmmDEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8) 274231200Smm{ 275231200Smm struct archive *a; 276231200Smm struct archive_entry *entry; 277231200Smm char buff[4096]; 278231200Smm size_t used; 279231200Smm 280231200Smm if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 281231200Smm skipping("eucJP locale not available on this system."); 282231200Smm return; 283231200Smm } 284231200Smm 285231200Smm /* 286231200Smm * Verify that EUC-JP filenames are correctly translated to UTF-8. 287231200Smm */ 288231200Smm a = archive_write_new(); 289231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 290231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 291231200Smm skipping("This system cannot convert character-set" 292231200Smm " from eucJP to UTF-8."); 293231200Smm archive_write_free(a); 294231200Smm return; 295231200Smm } 296231200Smm assertEqualInt(ARCHIVE_OK, 297231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 298231200Smm 299231200Smm entry = archive_entry_new2(a); 300231200Smm /* Set an EUC-JP filename. */ 301231200Smm archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 302231200Smm /* Check the Unicode version. */ 303231200Smm archive_entry_set_filetype(entry, AE_IFREG); 304231200Smm archive_entry_set_size(entry, 0); 305231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 306231200Smm archive_entry_free(entry); 307231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 308231200Smm 309231200Smm /* Check UTF-8 version. */ 310231200Smm assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 311231200Smm} 312231200Smm 313299529SmmDEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932) 314231200Smm{ 315231200Smm struct archive *a; 316231200Smm struct archive_entry *entry; 317231200Smm char buff[4096]; 318231200Smm size_t used; 319231200Smm 320231200Smm if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 321231200Smm skipping("eucJP locale not available on this system."); 322231200Smm return; 323231200Smm } 324231200Smm 325231200Smm /* 326231200Smm * Verify that EUC-JP filenames are correctly translated to CP932. 327231200Smm */ 328231200Smm a = archive_write_new(); 329231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 330231200Smm if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 331231200Smm skipping("This system cannot convert character-set" 332231200Smm " from eucJP to CP932."); 333231200Smm archive_write_free(a); 334231200Smm return; 335231200Smm } 336231200Smm assertEqualInt(ARCHIVE_OK, 337231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 338231200Smm 339231200Smm entry = archive_entry_new2(a); 340231200Smm /* Set an EUC-JP filename. */ 341231200Smm archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 342231200Smm /* Check the Unicode version. */ 343231200Smm archive_entry_set_filetype(entry, AE_IFREG); 344231200Smm archive_entry_set_size(entry, 0); 345231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 346231200Smm archive_entry_free(entry); 347231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 348231200Smm 349231200Smm /* Check CP932 version. */ 350231200Smm assertEqualMem(buff, "\x95\x5C.txt", 6); 351231200Smm} 352231200Smm 353299529SmmDEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8) 354231200Smm{ 355231200Smm struct archive *a; 356231200Smm struct archive_entry *entry; 357231200Smm char buff[4096]; 358231200Smm size_t used; 359231200Smm 360231200Smm if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 361231200Smm NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 362231200Smm skipping("CP932/SJIS locale not available on this system."); 363231200Smm return; 364231200Smm } 365231200Smm 366231200Smm /* 367231200Smm * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 368231200Smm */ 369231200Smm a = archive_write_new(); 370231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 371231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 372231200Smm skipping("This system cannot convert character-set" 373231200Smm " from CP932/SJIS to UTF-8."); 374231200Smm archive_write_free(a); 375231200Smm return; 376231200Smm } 377231200Smm assertEqualInt(ARCHIVE_OK, 378231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 379231200Smm 380231200Smm entry = archive_entry_new2(a); 381231200Smm /* Set a CP932/SJIS filename. */ 382231200Smm archive_entry_set_pathname(entry, "\x95\x5C.txt"); 383231200Smm /* Check the Unicode version. */ 384231200Smm archive_entry_set_filetype(entry, AE_IFREG); 385231200Smm archive_entry_set_size(entry, 0); 386231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 387231200Smm archive_entry_free(entry); 388231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 389231200Smm 390231200Smm /* Check UTF-8 version. */ 391231200Smm assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 392231200Smm} 393231200Smm 394