1/*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26__FBSDID("$FreeBSD$"); 27 28#include <locale.h> 29 30DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866) 31{ 32 struct archive *a; 33 struct archive_entry *entry; 34 char buff[4096]; 35 size_t used; 36 37 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 38 skipping("en_US.UTF-8 locale not available on this system."); 39 return; 40 } 41 42 /* 43 * Verify that UTF-8 filenames are correctly translated into CP866 44 * and stored with hdrcharset=CP866 option. 45 */ 46 a = archive_write_new(); 47 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 48 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 49 skipping("This system cannot convert character-set" 50 " from UTF-8 to CP866."); 51 archive_write_free(a); 52 return; 53 } 54 assertEqualInt(ARCHIVE_OK, 55 archive_write_open_memory(a, buff, sizeof(buff), &used)); 56 57 entry = archive_entry_new2(a); 58 /* Set a UTF-8 filename. */ 59 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 60 archive_entry_set_filetype(entry, AE_IFREG); 61 archive_entry_set_size(entry, 0); 62 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 63 archive_entry_free(entry); 64 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 65 66 /* Above three characters in UTF-8 should translate to the following 67 * three characters in CP866. */ 68 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 69} 70 71DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8) 72{ 73 struct archive *a; 74 struct archive_entry *entry; 75 char buff[4096]; 76 size_t used; 77 78 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 79 skipping("KOI8-R locale not available on this system."); 80 return; 81 } 82 83 /* 84 * Verify that KOI8-R filenames are correctly translated into UTF-8 85 * and stored with hdrcharset=UTF-8 option. 86 */ 87 a = archive_write_new(); 88 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 89 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 90 skipping("This system cannot convert character-set" 91 " from KOI8-R to UTF-8."); 92 archive_write_free(a); 93 return; 94 } 95 assertEqualInt(ARCHIVE_OK, 96 archive_write_open_memory(a, buff, sizeof(buff), &used)); 97 98 entry = archive_entry_new2(a); 99 /* Set a KOI8-R filename. */ 100 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 101 archive_entry_set_filetype(entry, AE_IFREG); 102 archive_entry_set_size(entry, 0); 103 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 104 archive_entry_free(entry); 105 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 106 107 /* Above three characters in KOI8-R should translate to the following 108 * three characters (two bytes each) in UTF-8. */ 109 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 110} 111 112DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866) 113{ 114 struct archive *a; 115 struct archive_entry *entry; 116 char buff[4096]; 117 size_t used; 118 119 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 120 skipping("KOI8-R locale not available on this system."); 121 return; 122 } 123 124 /* 125 * Verify that KOI8-R filenames are correctly translated into CP866 126 * and stored with hdrcharset=CP866 option. 127 */ 128 a = archive_write_new(); 129 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 130 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 131 skipping("This system cannot convert character-set" 132 " from KOI8-R to CP866."); 133 archive_write_free(a); 134 return; 135 } 136 assertEqualInt(ARCHIVE_OK, 137 archive_write_open_memory(a, buff, sizeof(buff), &used)); 138 139 entry = archive_entry_new2(a); 140 /* Set a KOI8-R filename. */ 141 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 142 archive_entry_set_filetype(entry, AE_IFREG); 143 archive_entry_set_size(entry, 0); 144 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 145 archive_entry_free(entry); 146 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 147 148 /* Above three characters in KOI8-R should translate to the following 149 * three characters in CP866. */ 150 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 151} 152 153DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8) 154{ 155 struct archive *a; 156 struct archive_entry *entry; 157 char buff[4096]; 158 size_t used; 159 160 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 161 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 162 skipping("KOI8-R locale not available on this system."); 163 return; 164 } 165 166 /* 167 * Verify that CP1251 filenames are correctly translated into UTF-8 168 * and stored with hdrcharset=UTF-8 option. 169 */ 170 a = archive_write_new(); 171 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 172 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 173 skipping("This system cannot convert character-set" 174 " from KOI8-R to UTF-8."); 175 archive_write_free(a); 176 return; 177 } 178 assertEqualInt(ARCHIVE_OK, 179 archive_write_open_memory(a, buff, sizeof(buff), &used)); 180 181 entry = archive_entry_new2(a); 182 /* Set a KOI8-R filename. */ 183 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 184 archive_entry_set_filetype(entry, AE_IFREG); 185 archive_entry_set_size(entry, 0); 186 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 187 archive_entry_free(entry); 188 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 189 190 /* Above three characters in CP1251 should translate to the following 191 * three characters (two bytes each) in UTF-8. */ 192 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 193} 194 195/* 196 * Do not translate CP1251 into CP866 if non Windows platform. 197 */ 198DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251) 199{ 200 struct archive *a; 201 struct archive_entry *entry; 202 char buff[4096]; 203 size_t used; 204 205 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 206 skipping("KOI8-R locale not available on this system."); 207 return; 208 } 209 210 /* 211 * Verify that CP1251 filenames are not translated into any 212 * other character-set, in particular, CP866. 213 */ 214 a = archive_write_new(); 215 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 216 assertEqualInt(ARCHIVE_OK, 217 archive_write_open_memory(a, buff, sizeof(buff), &used)); 218 219 entry = archive_entry_new2(a); 220 /* Set a KOI8-R filename. */ 221 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 222 archive_entry_set_filetype(entry, AE_IFREG); 223 archive_entry_set_size(entry, 0); 224 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 225 archive_entry_free(entry); 226 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 227 228 /* Above three characters in CP1251 should not translate to 229 * any other character-set. */ 230 assertEqualMem(buff, "\xEF\xF0\xE8", 3); 231} 232 233/* 234 * Other archiver applications on Windows translate CP1251 filenames 235 * into CP866 filenames and store it in the gnutar file. 236 * Test above behavior works well. 237 */ 238DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia) 239{ 240 struct archive *a; 241 struct archive_entry *entry; 242 char buff[4096]; 243 size_t used; 244 245 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 246 skipping("Russian_Russia locale not available on this system."); 247 return; 248 } 249 250 /* 251 * Verify that Russian_Russia(CP1251) filenames are correctly translated 252 * to CP866. 253 */ 254 a = archive_write_new(); 255 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 256 assertEqualInt(ARCHIVE_OK, 257 archive_write_open_memory(a, buff, sizeof(buff), &used)); 258 259 entry = archive_entry_new2(a); 260 /* Set a CP1251 filename. */ 261 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 262 archive_entry_set_filetype(entry, AE_IFREG); 263 archive_entry_set_size(entry, 0); 264 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 265 archive_entry_free(entry); 266 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 267 268 /* Above three characters in CP1251 should translate to the following 269 * three characters in CP866. */ 270 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 271} 272 273DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8) 274{ 275 struct archive *a; 276 struct archive_entry *entry; 277 char buff[4096]; 278 size_t used; 279 280 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 281 skipping("eucJP locale not available on this system."); 282 return; 283 } 284 285 /* 286 * Verify that EUC-JP filenames are correctly translated to UTF-8. 287 */ 288 a = archive_write_new(); 289 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 290 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 291 skipping("This system cannot convert character-set" 292 " from eucJP to UTF-8."); 293 archive_write_free(a); 294 return; 295 } 296 assertEqualInt(ARCHIVE_OK, 297 archive_write_open_memory(a, buff, sizeof(buff), &used)); 298 299 entry = archive_entry_new2(a); 300 /* Set an EUC-JP filename. */ 301 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 302 /* Check the Unicode version. */ 303 archive_entry_set_filetype(entry, AE_IFREG); 304 archive_entry_set_size(entry, 0); 305 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 306 archive_entry_free(entry); 307 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 308 309 /* Check UTF-8 version. */ 310 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 311} 312 313DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932) 314{ 315 struct archive *a; 316 struct archive_entry *entry; 317 char buff[4096]; 318 size_t used; 319 320 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 321 skipping("eucJP locale not available on this system."); 322 return; 323 } 324 325 /* 326 * Verify that EUC-JP filenames are correctly translated to CP932. 327 */ 328 a = archive_write_new(); 329 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 330 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 331 skipping("This system cannot convert character-set" 332 " from eucJP to CP932."); 333 archive_write_free(a); 334 return; 335 } 336 assertEqualInt(ARCHIVE_OK, 337 archive_write_open_memory(a, buff, sizeof(buff), &used)); 338 339 entry = archive_entry_new2(a); 340 /* Set an EUC-JP filename. */ 341 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 342 /* Check the Unicode version. */ 343 archive_entry_set_filetype(entry, AE_IFREG); 344 archive_entry_set_size(entry, 0); 345 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 346 archive_entry_free(entry); 347 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 348 349 /* Check CP932 version. */ 350 assertEqualMem(buff, "\x95\x5C.txt", 6); 351} 352 353DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8) 354{ 355 struct archive *a; 356 struct archive_entry *entry; 357 char buff[4096]; 358 size_t used; 359 360 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 361 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 362 skipping("CP932/SJIS locale not available on this system."); 363 return; 364 } 365 366 /* 367 * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 368 */ 369 a = archive_write_new(); 370 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 371 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 372 skipping("This system cannot convert character-set" 373 " from CP932/SJIS to UTF-8."); 374 archive_write_free(a); 375 return; 376 } 377 assertEqualInt(ARCHIVE_OK, 378 archive_write_open_memory(a, buff, sizeof(buff), &used)); 379 380 entry = archive_entry_new2(a); 381 /* Set an CP932/SJIS filename. */ 382 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 383 /* Check the Unicode version. */ 384 archive_entry_set_filetype(entry, AE_IFREG); 385 archive_entry_set_size(entry, 0); 386 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 387 archive_entry_free(entry); 388 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 389 390 /* Check UTF-8 version. */ 391 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 392} 393 394