1/*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26__FBSDID("$FreeBSD$"); 27 28#include <locale.h> 29 30DEFINE_TEST(test_zip_filename_encoding_UTF8) 31{ 32 struct archive *a; 33 struct archive_entry *entry; 34 char buff[4096]; 35 size_t used; 36 37 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 38 skipping("en_US.UTF-8 locale not available on this system."); 39 return; 40 } 41 42 /* 43 * Verify that UTF-8 filenames are correctly stored with 44 * hdrcharset=UTF-8 option. 45 */ 46 a = archive_write_new(); 47 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 48 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 49 skipping("This system cannot convert character-set" 50 " for UTF-8."); 51 archive_write_free(a); 52 return; 53 } 54 assertEqualInt(ARCHIVE_OK, 55 archive_write_open_memory(a, buff, sizeof(buff), &used)); 56 57 entry = archive_entry_new2(a); 58 /* Set a UTF-8 filename. */ 59 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 60 archive_entry_set_filetype(entry, AE_IFREG); 61 archive_entry_set_size(entry, 0); 62 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 63 archive_entry_free(entry); 64 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 65 66 /* A bit 11 of general purpose flag should be 0x08, 67 * which indicates the filename charset is UTF-8. */ 68 assertEqualInt(0x08, buff[7]); 69 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 70 71 /* 72 * Verify that UTF-8 filenames are correctly stored without 73 * hdrcharset=UTF-8 option. 74 */ 75 a = archive_write_new(); 76 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 77 assertEqualInt(ARCHIVE_OK, 78 archive_write_open_memory(a, buff, sizeof(buff), &used)); 79 80 entry = archive_entry_new2(a); 81 /* Set a UTF-8 filename. */ 82 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 83 archive_entry_set_filetype(entry, AE_IFREG); 84 archive_entry_set_size(entry, 0); 85 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 86 archive_entry_free(entry); 87 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 88 89 /* A bit 11 of general purpose flag should be 0x08, 90 * which indicates the filename charset is UTF-8. */ 91 assertEqualInt(0x08, buff[7]); 92 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 93 94 /* 95 * Verify that A bit 11 of general purpose flag is not set 96 * when ASCII filenames are stored. 97 */ 98 a = archive_write_new(); 99 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 100 assertEqualInt(ARCHIVE_OK, 101 archive_write_open_memory(a, buff, sizeof(buff), &used)); 102 103 entry = archive_entry_new2(a); 104 /* Set an ASCII filename. */ 105 archive_entry_set_pathname(entry, "abcABC"); 106 archive_entry_set_filetype(entry, AE_IFREG); 107 archive_entry_set_size(entry, 0); 108 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 109 archive_entry_free(entry); 110 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 111 112 /* A bit 11 of general purpose flag should be 0, 113 * which indicates the filename charset is unknown. */ 114 assertEqualInt(0, buff[7]); 115 assertEqualMem(buff + 30, "abcABC", 6); 116} 117 118DEFINE_TEST(test_zip_filename_encoding_KOI8R) 119{ 120 struct archive *a; 121 struct archive_entry *entry; 122 char buff[4096]; 123 size_t used; 124 125 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 126 skipping("KOI8-R locale not available on this system."); 127 return; 128 } 129 130 /* 131 * Verify that KOI8-R filenames are correctly translated to UTF-8. 132 */ 133 a = archive_write_new(); 134 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 135 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 136 skipping("This system cannot convert character-set" 137 " from KOI8-R to UTF-8."); 138 archive_write_free(a); 139 return; 140 } 141 assertEqualInt(ARCHIVE_OK, 142 archive_write_open_memory(a, buff, sizeof(buff), &used)); 143 144 entry = archive_entry_new2(a); 145 /* Set a KOI8-R filename. */ 146 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 147 archive_entry_set_filetype(entry, AE_IFREG); 148 archive_entry_set_size(entry, 0); 149 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 150 archive_entry_free(entry); 151 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 152 153 /* A bit 11 of general purpose flag should be 0x08, 154 * which indicates the filename charset is UTF-8. */ 155 assertEqualInt(0x08, buff[7]); 156 /* Above three characters in KOI8-R should translate to the following 157 * three characters (two bytes each) in UTF-8. */ 158 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 159 160 /* 161 * Verify that KOI8-R filenames are not translated to UTF-8. 162 */ 163 a = archive_write_new(); 164 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 165 assertEqualInt(ARCHIVE_OK, 166 archive_write_open_memory(a, buff, sizeof(buff), &used)); 167 168 entry = archive_entry_new2(a); 169 /* Set a KOI8-R filename. */ 170 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 171 archive_entry_set_filetype(entry, AE_IFREG); 172 archive_entry_set_size(entry, 0); 173 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 174 archive_entry_free(entry); 175 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 176 177 /* A bit 11 of general purpose flag should be 0, 178 * which indicates the filename charset is unknown. */ 179 assertEqualInt(0, buff[7]); 180 /* Above three characters in KOI8-R should not translate to 181 * any character-set. */ 182 assertEqualMem(buff + 30, "\xD0\xD2\xC9", 3); 183 184 /* 185 * Verify that A bit 11 of general purpose flag is not set 186 * when ASCII filenames are stored even if hdrcharset=UTF-8 187 * is specified. 188 */ 189 a = archive_write_new(); 190 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 191 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 192 skipping("This system cannot convert character-set" 193 " from KOI8-R to UTF-8."); 194 archive_write_free(a); 195 return; 196 } 197 assertEqualInt(ARCHIVE_OK, 198 archive_write_open_memory(a, buff, sizeof(buff), &used)); 199 200 entry = archive_entry_new2(a); 201 /* Set an ASCII filename. */ 202 archive_entry_set_pathname(entry, "abcABC"); 203 archive_entry_set_filetype(entry, AE_IFREG); 204 archive_entry_set_size(entry, 0); 205 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 206 archive_entry_free(entry); 207 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 208 209 /* A bit 11 of general purpose flag should be 0, 210 * which indicates the filename charset is unknown. */ 211 assertEqualInt(0, buff[7]); 212 assertEqualMem(buff + 30, "abcABC", 6); 213} 214 215/* 216 * Do not translate CP1251 into CP866 if non Windows platform. 217 */ 218DEFINE_TEST(test_zip_filename_encoding_ru_RU_CP1251) 219{ 220 struct archive *a; 221 struct archive_entry *entry; 222 char buff[4096]; 223 size_t used; 224 225 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 226 skipping("Russian_Russia locale not available on this system."); 227 return; 228 } 229 230 /* 231 * Verify that CP1251 filenames are not translated into any 232 * other character-set, in particular, CP866. 233 */ 234 a = archive_write_new(); 235 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 236 assertEqualInt(ARCHIVE_OK, 237 archive_write_open_memory(a, buff, sizeof(buff), &used)); 238 239 entry = archive_entry_new2(a); 240 /* Set a CP1251 filename. */ 241 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 242 archive_entry_set_filetype(entry, AE_IFREG); 243 archive_entry_set_size(entry, 0); 244 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 245 archive_entry_free(entry); 246 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 247 248 /* A bit 11 of general purpose flag should be 0, 249 * which indicates the filename charset is unknown. */ 250 assertEqualInt(0, buff[7]); 251 /* Above three characters in CP1251 should not translate into 252 * any other character-set. */ 253 assertEqualMem(buff + 30, "\xEF\xF0\xE8", 3); 254} 255 256/* 257 * Other archiver applications on Windows translate CP1251 filenames 258 * into CP866 filenames and store it in the zip file. 259 * Test above behavior works well. 260 */ 261DEFINE_TEST(test_zip_filename_encoding_Russian_Russia) 262{ 263 struct archive *a; 264 struct archive_entry *entry; 265 char buff[4096]; 266 size_t used; 267 268 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 269 skipping("Russian_Russia locale not available on this system."); 270 return; 271 } 272 273 /* 274 * Verify that Russian_Russia(CP1251) filenames are correctly translated 275 * to UTF-8. 276 */ 277 a = archive_write_new(); 278 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 279 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 280 skipping("This system cannot convert character-set" 281 " from Russian_Russia.CP1251 to UTF-8."); 282 archive_write_free(a); 283 return; 284 } 285 assertEqualInt(ARCHIVE_OK, 286 archive_write_open_memory(a, buff, sizeof(buff), &used)); 287 288 entry = archive_entry_new2(a); 289 /* Set a CP1251 filename. */ 290 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 291 archive_entry_set_filetype(entry, AE_IFREG); 292 archive_entry_set_size(entry, 0); 293 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 294 archive_entry_free(entry); 295 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 296 297 /* A bit 11 of general purpose flag should be 0x08, 298 * which indicates the filename charset is UTF-8. */ 299 assertEqualInt(0x08, buff[7]); 300 /* Above three characters in CP1251 should translate to the following 301 * three characters (two bytes each) in UTF-8. */ 302 assertEqualMem(buff + 30, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 303 304 /* 305 * Verify that Russian_Russia(CP1251) filenames are correctly translated 306 * to CP866. 307 */ 308 a = archive_write_new(); 309 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 310 assertEqualInt(ARCHIVE_OK, 311 archive_write_open_memory(a, buff, sizeof(buff), &used)); 312 313 entry = archive_entry_new2(a); 314 /* Set a CP1251 filename. */ 315 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 316 archive_entry_set_filetype(entry, AE_IFREG); 317 archive_entry_set_size(entry, 0); 318 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 319 archive_entry_free(entry); 320 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 321 322 /* A bit 11 of general purpose flag should be 0, 323 * which indicates the filename charset is unknown. */ 324 assertEqualInt(0, buff[7]); 325 /* Above three characters in CP1251 should translate to the following 326 * three characters in CP866. */ 327 assertEqualMem(buff + 30, "\xAF\xE0\xA8", 3); 328} 329 330DEFINE_TEST(test_zip_filename_encoding_EUCJP) 331{ 332 struct archive *a; 333 struct archive_entry *entry; 334 char buff[4096]; 335 size_t used; 336 337 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 338 skipping("eucJP locale not available on this system."); 339 return; 340 } 341 342 /* 343 * Verify that EUC-JP filenames are correctly translated to UTF-8. 344 */ 345 a = archive_write_new(); 346 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 347 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 348 skipping("This system cannot convert character-set" 349 " from eucJP to UTF-8."); 350 archive_write_free(a); 351 return; 352 } 353 assertEqualInt(ARCHIVE_OK, 354 archive_write_open_memory(a, buff, sizeof(buff), &used)); 355 356 entry = archive_entry_new2(a); 357 /* Set an EUC-JP filename. */ 358 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 359 /* Check the Unicode version. */ 360 archive_entry_set_filetype(entry, AE_IFREG); 361 archive_entry_set_size(entry, 0); 362 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 363 archive_entry_free(entry); 364 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 365 366 /* A bit 11 of general purpose flag should be 0x08, 367 * which indicates the filename charset is UTF-8. */ 368 assertEqualInt(0x08, buff[7]); 369 /* Check UTF-8 version. */ 370 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); 371 372 /* 373 * Verify that EUC-JP filenames are not translated to UTF-8. 374 */ 375 a = archive_write_new(); 376 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 377 assertEqualInt(ARCHIVE_OK, 378 archive_write_open_memory(a, buff, sizeof(buff), &used)); 379 380 entry = archive_entry_new2(a); 381 /* Set an EUC-JP filename. */ 382 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 383 /* Check the Unicode version. */ 384 archive_entry_set_filetype(entry, AE_IFREG); 385 archive_entry_set_size(entry, 0); 386 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 387 archive_entry_free(entry); 388 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 389 390 /* A bit 11 of general purpose flag should be 0, 391 * which indicates the filename charset is unknown. */ 392 assertEqualInt(0, buff[7]); 393 /* Above three characters in EUC-JP should not translate to 394 * any character-set. */ 395 assertEqualMem(buff + 30, "\xC9\xBD.txt", 6); 396 397 /* 398 * Verify that A bit 11 of general purpose flag is not set 399 * when ASCII filenames are stored even if hdrcharset=UTF-8 400 * is specified. 401 */ 402 a = archive_write_new(); 403 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 404 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 405 skipping("This system cannot convert character-set" 406 " from eucJP to UTF-8."); 407 archive_write_free(a); 408 return; 409 } 410 assertEqualInt(ARCHIVE_OK, 411 archive_write_open_memory(a, buff, sizeof(buff), &used)); 412 413 entry = archive_entry_new2(a); 414 /* Set an ASCII filename. */ 415 archive_entry_set_pathname(entry, "abcABC"); 416 /* Check the Unicode version. */ 417 archive_entry_set_filetype(entry, AE_IFREG); 418 archive_entry_set_size(entry, 0); 419 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 420 archive_entry_free(entry); 421 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 422 423 /* A bit 11 of general purpose flag should be 0, 424 * which indicates the filename charset is unknown. */ 425 assertEqualInt(0, buff[7]); 426 assertEqualMem(buff + 30, "abcABC", 6); 427} 428 429DEFINE_TEST(test_zip_filename_encoding_CP932) 430{ 431 struct archive *a; 432 struct archive_entry *entry; 433 char buff[4096]; 434 size_t used; 435 436 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 437 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 438 skipping("CP932/SJIS locale not available on this system."); 439 return; 440 } 441 442 /* 443 * Verify that EUC-JP filenames are correctly translated to UTF-8. 444 */ 445 a = archive_write_new(); 446 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 447 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 448 skipping("This system cannot convert character-set" 449 " from CP932/SJIS to UTF-8."); 450 archive_write_free(a); 451 return; 452 } 453 assertEqualInt(ARCHIVE_OK, 454 archive_write_open_memory(a, buff, sizeof(buff), &used)); 455 456 entry = archive_entry_new2(a); 457 /* Set a CP932/SJIS filename. */ 458 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 459 /* Check the Unicode version. */ 460 archive_entry_set_filetype(entry, AE_IFREG); 461 archive_entry_set_size(entry, 0); 462 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 463 archive_entry_free(entry); 464 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 465 466 /* A bit 11 of general purpose flag should be 0x08, 467 * which indicates the filename charset is UTF-8. */ 468 assertEqualInt(0x08, buff[7]); 469 /* Check UTF-8 version. */ 470 assertEqualMem(buff + 30, "\xE8\xA1\xA8.txt", 7); 471 472 /* 473 * Verify that CP932/SJIS filenames are not translated to UTF-8. 474 */ 475 a = archive_write_new(); 476 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 477 assertEqualInt(ARCHIVE_OK, 478 archive_write_open_memory(a, buff, sizeof(buff), &used)); 479 480 entry = archive_entry_new2(a); 481 /* Set a CP932/SJIS filename. */ 482 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 483 /* Check the Unicode version. */ 484 archive_entry_set_filetype(entry, AE_IFREG); 485 archive_entry_set_size(entry, 0); 486 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 487 archive_entry_free(entry); 488 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 489 490 /* A bit 11 of general purpose flag should be 0, 491 * which indicates the filename charset is unknown. */ 492 assertEqualInt(0, buff[7]); 493 /* Above three characters in CP932/SJIS should not translate to 494 * any character-set. */ 495 assertEqualMem(buff + 30, "\x95\x5C.txt", 6); 496 497 /* 498 * Verify that A bit 11 of general purpose flag is not set 499 * when ASCII filenames are stored even if hdrcharset=UTF-8 500 * is specified. 501 */ 502 a = archive_write_new(); 503 assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a)); 504 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 505 skipping("This system cannot convert character-set" 506 " from CP932/SJIS to UTF-8."); 507 archive_write_free(a); 508 return; 509 } 510 assertEqualInt(ARCHIVE_OK, 511 archive_write_open_memory(a, buff, sizeof(buff), &used)); 512 513 entry = archive_entry_new2(a); 514 /* Set an ASCII filename. */ 515 archive_entry_set_pathname(entry, "abcABC"); 516 /* Check the Unicode version. */ 517 archive_entry_set_filetype(entry, AE_IFREG); 518 archive_entry_set_size(entry, 0); 519 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 520 archive_entry_free(entry); 521 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 522 523 /* A bit 11 of general purpose flag should be 0, 524 * which indicates the filename charset is unknown. */ 525 assertEqualInt(0, buff[7]); 526 assertEqualMem(buff + 30, "abcABC", 6); 527} 528