test_gnutar_filename_encoding.c revision 232153
1/*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26__FBSDID("$FreeBSD$"); 27 28#include <locale.h> 29 30static void 31test_gnutar_filename_encoding_UTF8_CP866(void) 32{ 33 struct archive *a; 34 struct archive_entry *entry; 35 char buff[4096]; 36 size_t used; 37 38 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 39 skipping("en_US.UTF-8 locale not available on this system."); 40 return; 41 } 42 43 /* 44 * Verify that UTF-8 filenames are correctly translated into CP866 45 * and stored with hdrcharset=CP866 option. 46 */ 47 a = archive_write_new(); 48 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 49 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 50 skipping("This system cannot convert character-set" 51 " from UTF-8 to CP866."); 52 archive_write_free(a); 53 return; 54 } 55 assertEqualInt(ARCHIVE_OK, 56 archive_write_open_memory(a, buff, sizeof(buff), &used)); 57 58 entry = archive_entry_new2(a); 59 /* Set a UTF-8 filename. */ 60 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 61 archive_entry_set_filetype(entry, AE_IFREG); 62 archive_entry_set_size(entry, 0); 63 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 64 archive_entry_free(entry); 65 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 66 67 /* Above three characters in UTF-8 should translate to the following 68 * three characters in CP866. */ 69 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 70} 71 72static void 73test_gnutar_filename_encoding_KOI8R_UTF8(void) 74{ 75 struct archive *a; 76 struct archive_entry *entry; 77 char buff[4096]; 78 size_t used; 79 80 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 81 skipping("KOI8-R locale not available on this system."); 82 return; 83 } 84 85 /* 86 * Verify that KOI8-R filenames are correctly translated into UTF-8 87 * and stored with hdrcharset=UTF-8 option. 88 */ 89 a = archive_write_new(); 90 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 91 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 92 skipping("This system cannot convert character-set" 93 " from KOI8-R to UTF-8."); 94 archive_write_free(a); 95 return; 96 } 97 assertEqualInt(ARCHIVE_OK, 98 archive_write_open_memory(a, buff, sizeof(buff), &used)); 99 100 entry = archive_entry_new2(a); 101 /* Set a KOI8-R filename. */ 102 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 103 archive_entry_set_filetype(entry, AE_IFREG); 104 archive_entry_set_size(entry, 0); 105 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 106 archive_entry_free(entry); 107 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 108 109 /* Above three characters in KOI8-R should translate to the following 110 * three characters (two bytes each) in UTF-8. */ 111 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 112} 113 114static void 115test_gnutar_filename_encoding_KOI8R_CP866(void) 116{ 117 struct archive *a; 118 struct archive_entry *entry; 119 char buff[4096]; 120 size_t used; 121 122 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 123 skipping("KOI8-R locale not available on this system."); 124 return; 125 } 126 127 /* 128 * Verify that KOI8-R filenames are correctly translated into CP866 129 * and stored with hdrcharset=CP866 option. 130 */ 131 a = archive_write_new(); 132 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 133 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 134 skipping("This system cannot convert character-set" 135 " from KOI8-R to CP866."); 136 archive_write_free(a); 137 return; 138 } 139 assertEqualInt(ARCHIVE_OK, 140 archive_write_open_memory(a, buff, sizeof(buff), &used)); 141 142 entry = archive_entry_new2(a); 143 /* Set a KOI8-R filename. */ 144 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 145 archive_entry_set_filetype(entry, AE_IFREG); 146 archive_entry_set_size(entry, 0); 147 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 148 archive_entry_free(entry); 149 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 150 151 /* Above three characters in KOI8-R should translate to the following 152 * three characters in CP866. */ 153 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 154} 155 156static void 157test_gnutar_filename_encoding_CP1251_UTF8(void) 158{ 159 struct archive *a; 160 struct archive_entry *entry; 161 char buff[4096]; 162 size_t used; 163 164 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 165 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 166 skipping("KOI8-R locale not available on this system."); 167 return; 168 } 169 170 /* 171 * Verify that CP1251 filenames are correctly translated into UTF-8 172 * and stored with hdrcharset=UTF-8 option. 173 */ 174 a = archive_write_new(); 175 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 176 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 177 skipping("This system cannot convert character-set" 178 " from KOI8-R to UTF-8."); 179 archive_write_free(a); 180 return; 181 } 182 assertEqualInt(ARCHIVE_OK, 183 archive_write_open_memory(a, buff, sizeof(buff), &used)); 184 185 entry = archive_entry_new2(a); 186 /* Set a KOI8-R filename. */ 187 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 188 archive_entry_set_filetype(entry, AE_IFREG); 189 archive_entry_set_size(entry, 0); 190 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 191 archive_entry_free(entry); 192 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 193 194 /* Above three characters in CP1251 should translate to the following 195 * three characters (two bytes each) in UTF-8. */ 196 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 197} 198 199/* 200 * Do not translate CP1251 into CP866 if non Windows platform. 201 */ 202static void 203test_gnutar_filename_encoding_ru_RU_CP1251(void) 204{ 205 struct archive *a; 206 struct archive_entry *entry; 207 char buff[4096]; 208 size_t used; 209 210 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 211 skipping("KOI8-R locale not available on this system."); 212 return; 213 } 214 215 /* 216 * Verify that CP1251 filenames are not translated into any 217 * other character-set, in particular, CP866. 218 */ 219 a = archive_write_new(); 220 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 221 assertEqualInt(ARCHIVE_OK, 222 archive_write_open_memory(a, buff, sizeof(buff), &used)); 223 224 entry = archive_entry_new2(a); 225 /* Set a KOI8-R filename. */ 226 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 227 archive_entry_set_filetype(entry, AE_IFREG); 228 archive_entry_set_size(entry, 0); 229 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 230 archive_entry_free(entry); 231 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 232 233 /* Above three characters in CP1251 should not translate to 234 * any other character-set. */ 235 assertEqualMem(buff, "\xEF\xF0\xE8", 3); 236} 237 238/* 239 * Other archiver applications on Windows translate CP1251 filenames 240 * into CP866 filenames and store it in the gnutar file. 241 * Test above behavior works well. 242 */ 243static void 244test_gnutar_filename_encoding_Russian_Russia(void) 245{ 246 struct archive *a; 247 struct archive_entry *entry; 248 char buff[4096]; 249 size_t used; 250 251 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 252 skipping("Russian_Russia locale not available on this system."); 253 return; 254 } 255 256 /* 257 * Verify that Russian_Russia(CP1251) filenames are correctly translated 258 * to CP866. 259 */ 260 a = archive_write_new(); 261 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 262 assertEqualInt(ARCHIVE_OK, 263 archive_write_open_memory(a, buff, sizeof(buff), &used)); 264 265 entry = archive_entry_new2(a); 266 /* Set a CP1251 filename. */ 267 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 268 archive_entry_set_filetype(entry, AE_IFREG); 269 archive_entry_set_size(entry, 0); 270 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 271 archive_entry_free(entry); 272 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 273 274 /* Above three characters in CP1251 should translate to the following 275 * three characters in CP866. */ 276 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 277} 278 279static void 280test_gnutar_filename_encoding_EUCJP_UTF8(void) 281{ 282 struct archive *a; 283 struct archive_entry *entry; 284 char buff[4096]; 285 size_t used; 286 287 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 288 skipping("eucJP locale not available on this system."); 289 return; 290 } 291 292 /* 293 * Verify that EUC-JP filenames are correctly translated to UTF-8. 294 */ 295 a = archive_write_new(); 296 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 297 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 298 skipping("This system cannot convert character-set" 299 " from eucJP to UTF-8."); 300 archive_write_free(a); 301 return; 302 } 303 assertEqualInt(ARCHIVE_OK, 304 archive_write_open_memory(a, buff, sizeof(buff), &used)); 305 306 entry = archive_entry_new2(a); 307 /* Set an EUC-JP filename. */ 308 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 309 /* Check the Unicode version. */ 310 archive_entry_set_filetype(entry, AE_IFREG); 311 archive_entry_set_size(entry, 0); 312 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 313 archive_entry_free(entry); 314 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 315 316 /* Check UTF-8 version. */ 317 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 318} 319 320static void 321test_gnutar_filename_encoding_EUCJP_CP932(void) 322{ 323 struct archive *a; 324 struct archive_entry *entry; 325 char buff[4096]; 326 size_t used; 327 328 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 329 skipping("eucJP locale not available on this system."); 330 return; 331 } 332 333 /* 334 * Verify that EUC-JP filenames are correctly translated to CP932. 335 */ 336 a = archive_write_new(); 337 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 338 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 339 skipping("This system cannot convert character-set" 340 " from eucJP to CP932."); 341 archive_write_free(a); 342 return; 343 } 344 assertEqualInt(ARCHIVE_OK, 345 archive_write_open_memory(a, buff, sizeof(buff), &used)); 346 347 entry = archive_entry_new2(a); 348 /* Set an EUC-JP filename. */ 349 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 350 /* Check the Unicode version. */ 351 archive_entry_set_filetype(entry, AE_IFREG); 352 archive_entry_set_size(entry, 0); 353 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 354 archive_entry_free(entry); 355 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 356 357 /* Check CP932 version. */ 358 assertEqualMem(buff, "\x95\x5C.txt", 6); 359} 360 361static void 362test_gnutar_filename_encoding_CP932_UTF8(void) 363{ 364 struct archive *a; 365 struct archive_entry *entry; 366 char buff[4096]; 367 size_t used; 368 369 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 370 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 371 skipping("CP932/SJIS locale not available on this system."); 372 return; 373 } 374 375 /* 376 * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 377 */ 378 a = archive_write_new(); 379 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a)); 380 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 381 skipping("This system cannot convert character-set" 382 " from CP932/SJIS to UTF-8."); 383 archive_write_free(a); 384 return; 385 } 386 assertEqualInt(ARCHIVE_OK, 387 archive_write_open_memory(a, buff, sizeof(buff), &used)); 388 389 entry = archive_entry_new2(a); 390 /* Set an CP932/SJIS filename. */ 391 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 392 /* Check the Unicode version. */ 393 archive_entry_set_filetype(entry, AE_IFREG); 394 archive_entry_set_size(entry, 0); 395 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 396 archive_entry_free(entry); 397 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 398 399 /* Check UTF-8 version. */ 400 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 401} 402 403DEFINE_TEST(test_gnutar_filename_encoding) 404{ 405 test_gnutar_filename_encoding_UTF8_CP866(); 406 test_gnutar_filename_encoding_KOI8R_UTF8(); 407 test_gnutar_filename_encoding_KOI8R_CP866(); 408 test_gnutar_filename_encoding_CP1251_UTF8(); 409 test_gnutar_filename_encoding_ru_RU_CP1251(); 410 test_gnutar_filename_encoding_Russian_Russia(); 411 test_gnutar_filename_encoding_EUCJP_UTF8(); 412 test_gnutar_filename_encoding_EUCJP_CP932(); 413 test_gnutar_filename_encoding_CP932_UTF8(); 414} 415