1/*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25#include "test.h" 26__FBSDID("$FreeBSD: head/lib/libarchive/test/test_pax_filename_encoding.c 201247 2009-12-30 05:59:21Z kientzle $"); 27 28#include <locale.h> 29 30/* 31 * Pax interchange is supposed to encode filenames into 32 * UTF-8. Of course, that's not always possible. This 33 * test is intended to verify that filenames always get 34 * stored and restored correctly, regardless of the encodings. 35 */ 36 37/* 38 * Read a manually-created archive that has filenames that are 39 * stored in binary instead of UTF-8 and verify that we get 40 * the right filename returned and that we get a warning only 41 * if the header isn't marked as binary. 42 */ 43static void 44test_pax_filename_encoding_1(void) 45{ 46 static const char testname[] = "test_pax_filename_encoding.tar"; 47 /* 48 * \314\214 is a valid 2-byte UTF-8 sequence. 49 * \374 is invalid in UTF-8. 50 */ 51 char filename[] = "abc\314\214mno\374xyz"; 52 struct archive *a; 53 struct archive_entry *entry; 54 55 /* 56 * Read an archive that has non-UTF8 pax filenames in it. 57 */ 58 extract_reference_file(testname); 59 a = archive_read_new(); 60 assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a)); 61 assertEqualInt(ARCHIVE_OK, archive_read_support_compression_all(a)); 62 assertEqualInt(ARCHIVE_OK, 63 archive_read_open_filename(a, testname, 10240)); 64 /* 65 * First entry in this test archive has an invalid UTF-8 sequence 66 * in it, but the header is not marked as hdrcharset=BINARY, so that 67 * requires a warning. 68 */ 69 failure("Invalid UTF8 in a pax archive pathname should cause a warning"); 70 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 71 assertEqualString(filename, archive_entry_pathname(entry)); 72 /* 73 * Second entry is identical except that it does have 74 * hdrcharset=BINARY, so no warning should be generated. 75 */ 76 failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n" 77 " characters in it without generating a warning"); 78 assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry)); 79 assertEqualString(filename, archive_entry_pathname(entry)); 80 archive_read_finish(a); 81} 82 83/* 84 * Set the locale and write a pathname containing invalid characters. 85 * This should work; the underlying implementation should automatically 86 * fall back to storing the pathname in binary. 87 */ 88static void 89test_pax_filename_encoding_2(void) 90{ 91 char filename[] = "abc\314\214mno\374xyz"; 92 struct archive *a; 93 struct archive_entry *entry; 94 char buff[65536]; 95 char longname[] = "abc\314\214mno\374xyz" 96 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 97 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 98 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 99 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 100 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 101 "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 102 ; 103 size_t used; 104 105 /* 106 * We need a starting locale which has invalid sequences. 107 * de_DE.UTF-8 seems to be commonly supported. 108 */ 109 /* If it doesn't exist, just warn and return. */ 110 if (LOCALE_UTF8 == NULL 111 || NULL == setlocale(LC_ALL, LOCALE_UTF8)) { 112 skipping("invalid encoding tests require a suitable locale;" 113 " %s not available on this system", LOCALE_UTF8); 114 return; 115 } 116 117 assert((a = archive_write_new()) != NULL); 118 assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 119 assertEqualIntA(a, 0, archive_write_set_compression_none(a)); 120 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 121 assertEqualInt(0, 122 archive_write_open_memory(a, buff, sizeof(buff), &used)); 123 124 assert((entry = archive_entry_new()) != NULL); 125 /* Set pathname, gname, uname, hardlink to nonconvertible values. */ 126 archive_entry_copy_pathname(entry, filename); 127 archive_entry_copy_gname(entry, filename); 128 archive_entry_copy_uname(entry, filename); 129 archive_entry_copy_hardlink(entry, filename); 130 archive_entry_set_filetype(entry, AE_IFREG); 131 failure("This should generate a warning for nonconvertible names."); 132 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 133 archive_entry_free(entry); 134 135 assert((entry = archive_entry_new()) != NULL); 136 /* Set path, gname, uname, and symlink to nonconvertible values. */ 137 archive_entry_copy_pathname(entry, filename); 138 archive_entry_copy_gname(entry, filename); 139 archive_entry_copy_uname(entry, filename); 140 archive_entry_copy_symlink(entry, filename); 141 archive_entry_set_filetype(entry, AE_IFLNK); 142 failure("This should generate a warning for nonconvertible names."); 143 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 144 archive_entry_free(entry); 145 146 assert((entry = archive_entry_new()) != NULL); 147 /* Set pathname to a very long nonconvertible value. */ 148 archive_entry_copy_pathname(entry, longname); 149 archive_entry_set_filetype(entry, AE_IFREG); 150 failure("This should generate a warning for nonconvertible names."); 151 assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 152 archive_entry_free(entry); 153 154 assertEqualInt(0, archive_write_close(a)); 155 assertEqualInt(0, archive_write_finish(a)); 156 157 /* 158 * Now read the entries back. 159 */ 160 161 assert((a = archive_read_new()) != NULL); 162 assertEqualInt(0, archive_read_support_format_tar(a)); 163 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 164 165 assertEqualInt(0, archive_read_next_header(a, &entry)); 166 assertEqualString(filename, archive_entry_pathname(entry)); 167 assertEqualString(filename, archive_entry_gname(entry)); 168 assertEqualString(filename, archive_entry_uname(entry)); 169 assertEqualString(filename, archive_entry_hardlink(entry)); 170 171 assertEqualInt(0, archive_read_next_header(a, &entry)); 172 assertEqualString(filename, archive_entry_pathname(entry)); 173 assertEqualString(filename, archive_entry_gname(entry)); 174 assertEqualString(filename, archive_entry_uname(entry)); 175 assertEqualString(filename, archive_entry_symlink(entry)); 176 177 assertEqualInt(0, archive_read_next_header(a, &entry)); 178 assertEqualString(longname, archive_entry_pathname(entry)); 179 180 assertEqualInt(0, archive_read_close(a)); 181 assertEqualInt(0, archive_read_finish(a)); 182} 183 184/* 185 * Create an entry starting from a wide-character Unicode pathname, 186 * read it back into "C" locale, which doesn't support the name. 187 * TODO: Figure out the "right" behavior here. 188 */ 189static void 190test_pax_filename_encoding_3(void) 191{ 192 wchar_t badname[] = L"xxxAyyyBzzz"; 193 const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz"; 194 struct archive *a; 195 struct archive_entry *entry; 196 char buff[65536]; 197 size_t used; 198 199 badname[3] = 0x1234; 200 badname[7] = 0x5678; 201 202 /* If it doesn't exist, just warn and return. */ 203 if (NULL == setlocale(LC_ALL, "C")) { 204 skipping("Can't set \"C\" locale, so can't exercise " 205 "certain character-conversion failures"); 206 return; 207 } 208 209 /* If wctomb is broken, warn and return. */ 210 if (wctomb(buff, 0x1234) > 0) { 211 skipping("Cannot test conversion failures because \"C\" " 212 "locale on this system has no invalid characters."); 213 return; 214 } 215 216 /* If wctomb is broken, warn and return. */ 217 if (wctomb(buff, 0x1234) > 0) { 218 skipping("Cannot test conversion failures because \"C\" " 219 "locale on this system has no invalid characters."); 220 return; 221 } 222 223 /* Skip test if archive_entry_update_pathname_utf8() is broken. */ 224 /* In particular, this is currently broken on Win32 because 225 * setlocale() does not set the default encoding for CP_ACP. */ 226 entry = archive_entry_new(); 227 if (archive_entry_update_pathname_utf8(entry, badname_utf8)) { 228 archive_entry_free(entry); 229 skipping("Cannot test conversion failures."); 230 return; 231 } 232 archive_entry_free(entry); 233 234 assert((a = archive_write_new()) != NULL); 235 assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 236 assertEqualIntA(a, 0, archive_write_set_compression_none(a)); 237 assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 238 assertEqualInt(0, 239 archive_write_open_memory(a, buff, sizeof(buff), &used)); 240 241 assert((entry = archive_entry_new()) != NULL); 242 /* Set pathname to non-convertible wide value. */ 243 archive_entry_copy_pathname_w(entry, badname); 244 archive_entry_set_filetype(entry, AE_IFREG); 245 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 246 archive_entry_free(entry); 247 248 assert((entry = archive_entry_new()) != NULL); 249 archive_entry_copy_pathname_w(entry, L"abc"); 250 /* Set gname to non-convertible wide value. */ 251 archive_entry_copy_gname_w(entry, badname); 252 archive_entry_set_filetype(entry, AE_IFREG); 253 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 254 archive_entry_free(entry); 255 256 assert((entry = archive_entry_new()) != NULL); 257 archive_entry_copy_pathname_w(entry, L"abc"); 258 /* Set uname to non-convertible wide value. */ 259 archive_entry_copy_uname_w(entry, badname); 260 archive_entry_set_filetype(entry, AE_IFREG); 261 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 262 archive_entry_free(entry); 263 264 assert((entry = archive_entry_new()) != NULL); 265 archive_entry_copy_pathname_w(entry, L"abc"); 266 /* Set hardlink to non-convertible wide value. */ 267 archive_entry_copy_hardlink_w(entry, badname); 268 archive_entry_set_filetype(entry, AE_IFREG); 269 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 270 archive_entry_free(entry); 271 272 assert((entry = archive_entry_new()) != NULL); 273 archive_entry_copy_pathname_w(entry, L"abc"); 274 /* Set symlink to non-convertible wide value. */ 275 archive_entry_copy_symlink_w(entry, badname); 276 archive_entry_set_filetype(entry, AE_IFLNK); 277 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 278 archive_entry_free(entry); 279 280 assertEqualInt(0, archive_write_close(a)); 281 assertEqualInt(0, archive_write_finish(a)); 282 283 /* 284 * Now read the entries back. 285 */ 286 287 assert((a = archive_read_new()) != NULL); 288 assertEqualInt(0, archive_read_support_format_tar(a)); 289 assertEqualInt(0, archive_read_open_memory(a, buff, used)); 290 291 failure("A non-convertible pathname should cause a warning."); 292 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 293 assertEqualWString(badname, archive_entry_pathname_w(entry)); 294 failure("If native locale can't convert, we should get UTF-8 back."); 295 assertEqualString(badname_utf8, archive_entry_pathname(entry)); 296 297 failure("A non-convertible gname should cause a warning."); 298 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 299 assertEqualWString(badname, archive_entry_gname_w(entry)); 300 failure("If native locale can't convert, we should get UTF-8 back."); 301 assertEqualString(badname_utf8, archive_entry_gname(entry)); 302 303 failure("A non-convertible uname should cause a warning."); 304 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 305 assertEqualWString(badname, archive_entry_uname_w(entry)); 306 failure("If native locale can't convert, we should get UTF-8 back."); 307 assertEqualString(badname_utf8, archive_entry_uname(entry)); 308 309 failure("A non-convertible hardlink should cause a warning."); 310 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 311 assertEqualWString(badname, archive_entry_hardlink_w(entry)); 312 failure("If native locale can't convert, we should get UTF-8 back."); 313 assertEqualString(badname_utf8, archive_entry_hardlink(entry)); 314 315 failure("A non-convertible symlink should cause a warning."); 316 assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 317 assertEqualWString(badname, archive_entry_symlink_w(entry)); 318 assertEqualWString(NULL, archive_entry_hardlink_w(entry)); 319 failure("If native locale can't convert, we should get UTF-8 back."); 320 assertEqualString(badname_utf8, archive_entry_symlink(entry)); 321 322 assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry)); 323 324 assertEqualInt(0, archive_read_close(a)); 325 assertEqualInt(0, archive_read_finish(a)); 326} 327 328DEFINE_TEST(test_pax_filename_encoding) 329{ 330 test_pax_filename_encoding_1(); 331 test_pax_filename_encoding_2(); 332 test_pax_filename_encoding_3(); 333} 334