test_ustar_filename_encoding.c revision 231200
1231200Smm/*- 2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA 3231200Smm * All rights reserved. 4231200Smm * 5231200Smm * Redistribution and use in source and binary forms, with or without 6231200Smm * modification, are permitted provided that the following conditions 7231200Smm * are met: 8231200Smm * 1. Redistributions of source code must retain the above copyright 9231200Smm * notice, this list of conditions and the following disclaimer. 10231200Smm * 2. Redistributions in binary form must reproduce the above copyright 11231200Smm * notice, this list of conditions and the following disclaimer in the 12231200Smm * documentation and/or other materials provided with the distribution. 13231200Smm * 14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24231200Smm */ 25231200Smm#include "test.h" 26231200Smm__FBSDID("$FreeBSD$"); 27231200Smm 28231200Smm#include <locale.h> 29231200Smm 30231200Smmstatic void 31231200Smmtest_ustar_filename_encoding_UTF8_CP866(void) 32231200Smm{ 33231200Smm struct archive *a; 34231200Smm struct archive_entry *entry; 35231200Smm char buff[4096]; 36231200Smm size_t used; 37231200Smm 38231200Smm if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 39231200Smm skipping("en_US.UTF-8 locale not available on this system."); 40231200Smm return; 41231200Smm } 42231200Smm 43231200Smm /* 44231200Smm * Verify that UTF-8 filenames are correctly translated into CP866 45231200Smm * and stored with hdrcharset=CP866 option. 46231200Smm */ 47231200Smm a = archive_write_new(); 48231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 49231200Smm if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 50231200Smm skipping("This system cannot convert character-set" 51231200Smm " from UTF-8 to CP866."); 52231200Smm archive_write_free(a); 53231200Smm return; 54231200Smm } 55231200Smm assertEqualInt(ARCHIVE_OK, 56231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 57231200Smm 58231200Smm entry = archive_entry_new2(a); 59231200Smm /* Set a UTF-8 filename. */ 60231200Smm archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 61231200Smm archive_entry_set_filetype(entry, AE_IFREG); 62231200Smm archive_entry_set_size(entry, 0); 63231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 64231200Smm archive_entry_free(entry); 65231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 66231200Smm 67231200Smm /* Above three characters in UTF-8 should translate to the following 68231200Smm * three characters in CP866. */ 69231200Smm assertEqualMem(buff, "\xAF\xE0\xA8", 3); 70231200Smm} 71231200Smm 72231200Smmstatic void 73231200Smmtest_ustar_filename_encoding_KOI8R_UTF8(void) 74231200Smm{ 75231200Smm struct archive *a; 76231200Smm struct archive_entry *entry; 77231200Smm char buff[4096]; 78231200Smm size_t used; 79231200Smm 80231200Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 81231200Smm skipping("KOI8-R locale not available on this system."); 82231200Smm return; 83231200Smm } 84231200Smm 85231200Smm /* 86231200Smm * Verify that KOI8-R filenames are correctly translated into UTF-8 87231200Smm * and stored with hdrcharset=UTF-8 option. 88231200Smm */ 89231200Smm a = archive_write_new(); 90231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 91231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 92231200Smm skipping("This system cannot convert character-set" 93231200Smm " from KOI8-R to UTF-8."); 94231200Smm archive_write_free(a); 95231200Smm return; 96231200Smm } 97231200Smm assertEqualInt(ARCHIVE_OK, 98231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 99231200Smm 100231200Smm entry = archive_entry_new2(a); 101231200Smm /* Set a KOI8-R filename. */ 102231200Smm archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 103231200Smm archive_entry_set_filetype(entry, AE_IFREG); 104231200Smm archive_entry_set_size(entry, 0); 105231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 106231200Smm archive_entry_free(entry); 107231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 108231200Smm 109231200Smm /* Above three characters in KOI8-R should translate to the following 110231200Smm * three characters (two bytes each) in UTF-8. */ 111231200Smm assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 112231200Smm} 113231200Smm 114231200Smmstatic void 115231200Smmtest_ustar_filename_encoding_KOI8R_CP866(void) 116231200Smm{ 117231200Smm struct archive *a; 118231200Smm struct archive_entry *entry; 119231200Smm char buff[4096]; 120231200Smm size_t used; 121231200Smm 122231200Smm if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 123231200Smm skipping("KOI8-R locale not available on this system."); 124231200Smm return; 125231200Smm } 126231200Smm 127231200Smm /* 128231200Smm * Verify that KOI8-R filenames are correctly translated into CP866 129231200Smm * and stored with hdrcharset=CP866 option. 130231200Smm */ 131231200Smm a = archive_write_new(); 132231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 133231200Smm if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 134231200Smm skipping("This system cannot convert character-set" 135231200Smm " from KOI8-R to CP866."); 136231200Smm archive_write_free(a); 137231200Smm return; 138231200Smm } 139231200Smm assertEqualInt(ARCHIVE_OK, 140231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 141231200Smm 142231200Smm entry = archive_entry_new2(a); 143231200Smm /* Set a KOI8-R filename. */ 144231200Smm archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 145231200Smm archive_entry_set_filetype(entry, AE_IFREG); 146231200Smm archive_entry_set_size(entry, 0); 147231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 148231200Smm archive_entry_free(entry); 149231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 150231200Smm 151231200Smm /* Above three characters in KOI8-R should translate to the following 152231200Smm * three characters in CP866. */ 153231200Smm assertEqualMem(buff, "\xAF\xE0\xA8", 3); 154231200Smm} 155231200Smm 156231200Smmstatic void 157231200Smmtest_ustar_filename_encoding_CP1251_UTF8(void) 158231200Smm{ 159231200Smm struct archive *a; 160231200Smm struct archive_entry *entry; 161231200Smm char buff[4096]; 162231200Smm size_t used; 163231200Smm 164231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia") && 165231200Smm NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 166231200Smm skipping("KOI8-R locale not available on this system."); 167231200Smm return; 168231200Smm } 169231200Smm 170231200Smm /* 171231200Smm * Verify that CP1251 filenames are correctly translated into UTF-8 172231200Smm * and stored with hdrcharset=UTF-8 option. 173231200Smm */ 174231200Smm a = archive_write_new(); 175231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 176231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 177231200Smm skipping("This system cannot convert character-set" 178231200Smm " from KOI8-R to UTF-8."); 179231200Smm archive_write_free(a); 180231200Smm return; 181231200Smm } 182231200Smm assertEqualInt(ARCHIVE_OK, 183231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 184231200Smm 185231200Smm entry = archive_entry_new2(a); 186231200Smm /* Set a KOI8-R filename. */ 187231200Smm archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 188231200Smm archive_entry_set_filetype(entry, AE_IFREG); 189231200Smm archive_entry_set_size(entry, 0); 190231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 191231200Smm archive_entry_free(entry); 192231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 193231200Smm 194231200Smm /* Above three characters in CP1251 should translate to the following 195231200Smm * three characters (two bytes each) in UTF-8. */ 196231200Smm assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 197231200Smm} 198231200Smm 199231200Smm/* 200231200Smm * Do not translate CP1251 into CP866 if non Windows platform. 201231200Smm */ 202231200Smmstatic void 203231200Smmtest_ustar_filename_encoding_ru_RU_CP1251(void) 204231200Smm{ 205231200Smm struct archive *a; 206231200Smm struct archive_entry *entry; 207231200Smm char buff[4096]; 208231200Smm size_t used; 209231200Smm 210231200Smm if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 211231200Smm skipping("KOI8-R locale not available on this system."); 212231200Smm return; 213231200Smm } 214231200Smm 215231200Smm /* 216231200Smm * Verify that CP1251 filenames are not translated into any 217231200Smm * other character-set, in particular, CP866. 218231200Smm */ 219231200Smm a = archive_write_new(); 220231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 221231200Smm assertEqualInt(ARCHIVE_OK, 222231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 223231200Smm 224231200Smm entry = archive_entry_new2(a); 225231200Smm /* Set a KOI8-R filename. */ 226231200Smm archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 227231200Smm archive_entry_set_filetype(entry, AE_IFREG); 228231200Smm archive_entry_set_size(entry, 0); 229231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 230231200Smm archive_entry_free(entry); 231231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 232231200Smm 233231200Smm /* Above three characters in CP1251 should not translate to 234231200Smm * any other character-set. */ 235231200Smm assertEqualMem(buff, "\xEF\xF0\xE8", 3); 236231200Smm} 237231200Smm 238231200Smm/* 239231200Smm * Other archiver applications on Windows translate CP1251 filenames 240231200Smm * into CP866 filenames and store it in the ustar file. 241231200Smm * Test above behavior works well. 242231200Smm */ 243231200Smmstatic void 244231200Smmtest_ustar_filename_encoding_Russian_Russia(void) 245231200Smm{ 246231200Smm struct archive *a; 247231200Smm struct archive_entry *entry; 248231200Smm char buff[4096]; 249231200Smm size_t used; 250231200Smm 251231200Smm if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 252231200Smm skipping("Russian_Russia locale not available on this system."); 253231200Smm return; 254231200Smm } 255231200Smm 256231200Smm /* 257231200Smm * Verify that Russian_Russia(CP1251) filenames are correctly translated 258231200Smm * to CP866. 259231200Smm */ 260231200Smm a = archive_write_new(); 261231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 262231200Smm assertEqualInt(ARCHIVE_OK, 263231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 264231200Smm 265231200Smm entry = archive_entry_new2(a); 266231200Smm /* Set a CP1251 filename. */ 267231200Smm archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 268231200Smm archive_entry_set_filetype(entry, AE_IFREG); 269231200Smm archive_entry_set_size(entry, 0); 270231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 271231200Smm archive_entry_free(entry); 272231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 273231200Smm 274231200Smm /* Above three characters in CP1251 should translate to the following 275231200Smm * three characters in CP866. */ 276231200Smm assertEqualMem(buff, "\xAF\xE0\xA8", 3); 277231200Smm} 278231200Smm 279231200Smmstatic void 280231200Smmtest_ustar_filename_encoding_EUCJP_UTF8(void) 281231200Smm{ 282231200Smm struct archive *a; 283231200Smm struct archive_entry *entry; 284231200Smm char buff[4096]; 285231200Smm size_t used; 286231200Smm 287231200Smm if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 288231200Smm skipping("eucJP locale not available on this system."); 289231200Smm return; 290231200Smm } 291231200Smm 292231200Smm /* 293231200Smm * Verify that EUC-JP filenames are correctly translated to UTF-8. 294231200Smm */ 295231200Smm a = archive_write_new(); 296231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 297231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 298231200Smm skipping("This system cannot convert character-set" 299231200Smm " from eucJP to UTF-8."); 300231200Smm archive_write_free(a); 301231200Smm return; 302231200Smm } 303231200Smm assertEqualInt(ARCHIVE_OK, 304231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 305231200Smm 306231200Smm entry = archive_entry_new2(a); 307231200Smm /* Set an EUC-JP filename. */ 308231200Smm archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 309231200Smm /* Check the Unicode version. */ 310231200Smm archive_entry_set_filetype(entry, AE_IFREG); 311231200Smm archive_entry_set_size(entry, 0); 312231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 313231200Smm archive_entry_free(entry); 314231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 315231200Smm 316231200Smm /* Check UTF-8 version. */ 317231200Smm assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 318231200Smm} 319231200Smm 320231200Smmstatic void 321231200Smmtest_ustar_filename_encoding_EUCJP_CP932(void) 322231200Smm{ 323231200Smm struct archive *a; 324231200Smm struct archive_entry *entry; 325231200Smm char buff[4096]; 326231200Smm size_t used; 327231200Smm 328231200Smm if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 329231200Smm skipping("eucJP locale not available on this system."); 330231200Smm return; 331231200Smm } 332231200Smm 333231200Smm /* 334231200Smm * Verify that EUC-JP filenames are correctly translated to CP932. 335231200Smm */ 336231200Smm a = archive_write_new(); 337231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 338231200Smm if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 339231200Smm skipping("This system cannot convert character-set" 340231200Smm " from eucJP to CP932."); 341231200Smm archive_write_free(a); 342231200Smm return; 343231200Smm } 344231200Smm assertEqualInt(ARCHIVE_OK, 345231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 346231200Smm 347231200Smm entry = archive_entry_new2(a); 348231200Smm /* Set an EUC-JP filename. */ 349231200Smm archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 350231200Smm /* Check the Unicode version. */ 351231200Smm archive_entry_set_filetype(entry, AE_IFREG); 352231200Smm archive_entry_set_size(entry, 0); 353231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 354231200Smm archive_entry_free(entry); 355231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 356231200Smm 357231200Smm /* Check CP932 version. */ 358231200Smm assertEqualMem(buff, "\x95\x5C.txt", 6); 359231200Smm} 360231200Smm 361231200Smmstatic void 362231200Smmtest_ustar_filename_encoding_CP932_UTF8(void) 363231200Smm{ 364231200Smm struct archive *a; 365231200Smm struct archive_entry *entry; 366231200Smm char buff[4096]; 367231200Smm size_t used; 368231200Smm 369231200Smm if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 370231200Smm NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 371231200Smm skipping("CP932/SJIS locale not available on this system."); 372231200Smm return; 373231200Smm } 374231200Smm 375231200Smm /* 376231200Smm * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 377231200Smm */ 378231200Smm a = archive_write_new(); 379231200Smm assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 380231200Smm if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 381231200Smm skipping("This system cannot convert character-set" 382231200Smm " from CP932/SJIS to UTF-8."); 383231200Smm archive_write_free(a); 384231200Smm return; 385231200Smm } 386231200Smm assertEqualInt(ARCHIVE_OK, 387231200Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 388231200Smm 389231200Smm entry = archive_entry_new2(a); 390231200Smm /* Set a CP932/SJIS filename. */ 391231200Smm archive_entry_set_pathname(entry, "\x95\x5C.txt"); 392231200Smm /* Check the Unicode version. */ 393231200Smm archive_entry_set_filetype(entry, AE_IFREG); 394231200Smm archive_entry_set_size(entry, 0); 395231200Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 396231200Smm archive_entry_free(entry); 397231200Smm assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 398231200Smm 399231200Smm /* Check UTF-8 version. */ 400231200Smm assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 401231200Smm} 402231200Smm 403231200SmmDEFINE_TEST(test_ustar_filename_encoding) 404231200Smm{ 405231200Smm test_ustar_filename_encoding_UTF8_CP866(); 406231200Smm test_ustar_filename_encoding_KOI8R_UTF8(); 407231200Smm test_ustar_filename_encoding_KOI8R_CP866(); 408231200Smm test_ustar_filename_encoding_CP1251_UTF8(); 409231200Smm test_ustar_filename_encoding_ru_RU_CP1251(); 410231200Smm test_ustar_filename_encoding_Russian_Russia(); 411231200Smm test_ustar_filename_encoding_EUCJP_UTF8(); 412231200Smm test_ustar_filename_encoding_EUCJP_CP932(); 413231200Smm test_ustar_filename_encoding_CP932_UTF8(); 414231200Smm} 415