test_pax_filename_encoding.c revision 228763
1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm#include "test.h" 26228763Smm__FBSDID("$FreeBSD: head/contrib/libarchive/libarchive/test/test_pax_filename_encoding.c 228763 2011-12-21 11:13:29Z mm $"); 27228753Smm 28228753Smm#include <locale.h> 29228753Smm 30228753Smm/* 31228753Smm * Pax interchange is supposed to encode filenames into 32228753Smm * UTF-8. Of course, that's not always possible. This 33228753Smm * test is intended to verify that filenames always get 34228753Smm * stored and restored correctly, regardless of the encodings. 35228753Smm */ 36228753Smm 37228753Smm/* 38228753Smm * Read a manually-created archive that has filenames that are 39228753Smm * stored in binary instead of UTF-8 and verify that we get 40228753Smm * the right filename returned and that we get a warning only 41228753Smm * if the header isn't marked as binary. 42228753Smm */ 43228753Smmstatic void 44228753Smmtest_pax_filename_encoding_1(void) 45228753Smm{ 46228753Smm static const char testname[] = "test_pax_filename_encoding.tar"; 47228753Smm /* 48228753Smm * \314\214 is a valid 2-byte UTF-8 sequence. 49228753Smm * \374 is invalid in UTF-8. 50228753Smm */ 51228753Smm char filename[] = "abc\314\214mno\374xyz"; 52228753Smm struct archive *a; 53228753Smm struct archive_entry *entry; 54228753Smm 55228753Smm /* 56228753Smm * Read an archive that has non-UTF8 pax filenames in it. 57228753Smm */ 58228753Smm extract_reference_file(testname); 59228753Smm a = archive_read_new(); 60228753Smm assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a)); 61228753Smm assertEqualInt(ARCHIVE_OK, archive_read_support_compression_all(a)); 62228753Smm assertEqualInt(ARCHIVE_OK, 63228753Smm archive_read_open_filename(a, testname, 10240)); 64228753Smm /* 65228753Smm * First entry in this test archive has an invalid UTF-8 sequence 66228753Smm * in it, but the header is not marked as hdrcharset=BINARY, so that 67228753Smm * requires a warning. 68228753Smm */ 69228753Smm failure("Invalid UTF8 in a pax archive pathname should cause a warning"); 70228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 71228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 72228753Smm /* 73228753Smm * Second entry is identical except that it does have 74228753Smm * hdrcharset=BINARY, so no warning should be generated. 75228753Smm */ 76228753Smm failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n" 77228753Smm " characters in it without generating a warning"); 78228753Smm assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry)); 79228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 80228753Smm archive_read_finish(a); 81228753Smm} 82228753Smm 83228753Smm/* 84228753Smm * Set the locale and write a pathname containing invalid characters. 85228753Smm * This should work; the underlying implementation should automatically 86228753Smm * fall back to storing the pathname in binary. 87228753Smm */ 88228753Smmstatic void 89228753Smmtest_pax_filename_encoding_2(void) 90228753Smm{ 91228753Smm char filename[] = "abc\314\214mno\374xyz"; 92228753Smm struct archive *a; 93228753Smm struct archive_entry *entry; 94228753Smm char buff[65536]; 95228753Smm char longname[] = "abc\314\214mno\374xyz" 96228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 97228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 98228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 99228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 100228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 101228753Smm "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" 102228753Smm ; 103228753Smm size_t used; 104228753Smm 105228753Smm /* 106228753Smm * We need a starting locale which has invalid sequences. 107228753Smm * de_DE.UTF-8 seems to be commonly supported. 108228753Smm */ 109228753Smm /* If it doesn't exist, just warn and return. */ 110228753Smm if (LOCALE_UTF8 == NULL 111228753Smm || NULL == setlocale(LC_ALL, LOCALE_UTF8)) { 112228753Smm skipping("invalid encoding tests require a suitable locale;" 113228753Smm " %s not available on this system", LOCALE_UTF8); 114228753Smm return; 115228753Smm } 116228753Smm 117228753Smm assert((a = archive_write_new()) != NULL); 118228753Smm assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 119228753Smm assertEqualIntA(a, 0, archive_write_set_compression_none(a)); 120228753Smm assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 121228753Smm assertEqualInt(0, 122228753Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 123228753Smm 124228753Smm assert((entry = archive_entry_new()) != NULL); 125228753Smm /* Set pathname, gname, uname, hardlink to nonconvertible values. */ 126228753Smm archive_entry_copy_pathname(entry, filename); 127228753Smm archive_entry_copy_gname(entry, filename); 128228753Smm archive_entry_copy_uname(entry, filename); 129228753Smm archive_entry_copy_hardlink(entry, filename); 130228753Smm archive_entry_set_filetype(entry, AE_IFREG); 131228753Smm failure("This should generate a warning for nonconvertible names."); 132228753Smm assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 133228753Smm archive_entry_free(entry); 134228753Smm 135228753Smm assert((entry = archive_entry_new()) != NULL); 136228753Smm /* Set path, gname, uname, and symlink to nonconvertible values. */ 137228753Smm archive_entry_copy_pathname(entry, filename); 138228753Smm archive_entry_copy_gname(entry, filename); 139228753Smm archive_entry_copy_uname(entry, filename); 140228753Smm archive_entry_copy_symlink(entry, filename); 141228753Smm archive_entry_set_filetype(entry, AE_IFLNK); 142228753Smm failure("This should generate a warning for nonconvertible names."); 143228753Smm assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 144228753Smm archive_entry_free(entry); 145228753Smm 146228753Smm assert((entry = archive_entry_new()) != NULL); 147228753Smm /* Set pathname to a very long nonconvertible value. */ 148228753Smm archive_entry_copy_pathname(entry, longname); 149228753Smm archive_entry_set_filetype(entry, AE_IFREG); 150228753Smm failure("This should generate a warning for nonconvertible names."); 151228753Smm assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry)); 152228753Smm archive_entry_free(entry); 153228753Smm 154228753Smm assertEqualInt(0, archive_write_close(a)); 155228753Smm assertEqualInt(0, archive_write_finish(a)); 156228753Smm 157228753Smm /* 158228753Smm * Now read the entries back. 159228753Smm */ 160228753Smm 161228753Smm assert((a = archive_read_new()) != NULL); 162228753Smm assertEqualInt(0, archive_read_support_format_tar(a)); 163228753Smm assertEqualInt(0, archive_read_open_memory(a, buff, used)); 164228753Smm 165228753Smm assertEqualInt(0, archive_read_next_header(a, &entry)); 166228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 167228753Smm assertEqualString(filename, archive_entry_gname(entry)); 168228753Smm assertEqualString(filename, archive_entry_uname(entry)); 169228753Smm assertEqualString(filename, archive_entry_hardlink(entry)); 170228753Smm 171228753Smm assertEqualInt(0, archive_read_next_header(a, &entry)); 172228753Smm assertEqualString(filename, archive_entry_pathname(entry)); 173228753Smm assertEqualString(filename, archive_entry_gname(entry)); 174228753Smm assertEqualString(filename, archive_entry_uname(entry)); 175228753Smm assertEqualString(filename, archive_entry_symlink(entry)); 176228753Smm 177228753Smm assertEqualInt(0, archive_read_next_header(a, &entry)); 178228753Smm assertEqualString(longname, archive_entry_pathname(entry)); 179228753Smm 180228753Smm assertEqualInt(0, archive_read_close(a)); 181228753Smm assertEqualInt(0, archive_read_finish(a)); 182228753Smm} 183228753Smm 184228753Smm/* 185228753Smm * Create an entry starting from a wide-character Unicode pathname, 186228753Smm * read it back into "C" locale, which doesn't support the name. 187228753Smm * TODO: Figure out the "right" behavior here. 188228753Smm */ 189228753Smmstatic void 190228753Smmtest_pax_filename_encoding_3(void) 191228753Smm{ 192228753Smm wchar_t badname[] = L"xxxAyyyBzzz"; 193228753Smm const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz"; 194228753Smm struct archive *a; 195228753Smm struct archive_entry *entry; 196228753Smm char buff[65536]; 197228753Smm size_t used; 198228753Smm 199228753Smm badname[3] = 0x1234; 200228753Smm badname[7] = 0x5678; 201228753Smm 202228753Smm /* If it doesn't exist, just warn and return. */ 203228753Smm if (NULL == setlocale(LC_ALL, "C")) { 204228753Smm skipping("Can't set \"C\" locale, so can't exercise " 205228753Smm "certain character-conversion failures"); 206228753Smm return; 207228753Smm } 208228753Smm 209228753Smm /* If wctomb is broken, warn and return. */ 210228753Smm if (wctomb(buff, 0x1234) > 0) { 211228753Smm skipping("Cannot test conversion failures because \"C\" " 212228753Smm "locale on this system has no invalid characters."); 213228753Smm return; 214228753Smm } 215228753Smm 216228753Smm /* If wctomb is broken, warn and return. */ 217228753Smm if (wctomb(buff, 0x1234) > 0) { 218228753Smm skipping("Cannot test conversion failures because \"C\" " 219228753Smm "locale on this system has no invalid characters."); 220228753Smm return; 221228753Smm } 222228753Smm 223228753Smm /* Skip test if archive_entry_update_pathname_utf8() is broken. */ 224228753Smm /* In particular, this is currently broken on Win32 because 225228753Smm * setlocale() does not set the default encoding for CP_ACP. */ 226228753Smm entry = archive_entry_new(); 227228753Smm if (archive_entry_update_pathname_utf8(entry, badname_utf8)) { 228228753Smm archive_entry_free(entry); 229228753Smm skipping("Cannot test conversion failures."); 230228753Smm return; 231228753Smm } 232228753Smm archive_entry_free(entry); 233228753Smm 234228753Smm assert((a = archive_write_new()) != NULL); 235228753Smm assertEqualIntA(a, 0, archive_write_set_format_pax(a)); 236228753Smm assertEqualIntA(a, 0, archive_write_set_compression_none(a)); 237228753Smm assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); 238228753Smm assertEqualInt(0, 239228753Smm archive_write_open_memory(a, buff, sizeof(buff), &used)); 240228753Smm 241228753Smm assert((entry = archive_entry_new()) != NULL); 242228753Smm /* Set pathname to non-convertible wide value. */ 243228753Smm archive_entry_copy_pathname_w(entry, badname); 244228753Smm archive_entry_set_filetype(entry, AE_IFREG); 245228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 246228753Smm archive_entry_free(entry); 247228753Smm 248228753Smm assert((entry = archive_entry_new()) != NULL); 249228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 250228753Smm /* Set gname to non-convertible wide value. */ 251228753Smm archive_entry_copy_gname_w(entry, badname); 252228753Smm archive_entry_set_filetype(entry, AE_IFREG); 253228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 254228753Smm archive_entry_free(entry); 255228753Smm 256228753Smm assert((entry = archive_entry_new()) != NULL); 257228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 258228753Smm /* Set uname to non-convertible wide value. */ 259228753Smm archive_entry_copy_uname_w(entry, badname); 260228753Smm archive_entry_set_filetype(entry, AE_IFREG); 261228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 262228753Smm archive_entry_free(entry); 263228753Smm 264228753Smm assert((entry = archive_entry_new()) != NULL); 265228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 266228753Smm /* Set hardlink to non-convertible wide value. */ 267228753Smm archive_entry_copy_hardlink_w(entry, badname); 268228753Smm archive_entry_set_filetype(entry, AE_IFREG); 269228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 270228753Smm archive_entry_free(entry); 271228753Smm 272228753Smm assert((entry = archive_entry_new()) != NULL); 273228753Smm archive_entry_copy_pathname_w(entry, L"abc"); 274228753Smm /* Set symlink to non-convertible wide value. */ 275228753Smm archive_entry_copy_symlink_w(entry, badname); 276228753Smm archive_entry_set_filetype(entry, AE_IFLNK); 277228753Smm assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 278228753Smm archive_entry_free(entry); 279228753Smm 280228753Smm assertEqualInt(0, archive_write_close(a)); 281228753Smm assertEqualInt(0, archive_write_finish(a)); 282228753Smm 283228753Smm /* 284228753Smm * Now read the entries back. 285228753Smm */ 286228753Smm 287228753Smm assert((a = archive_read_new()) != NULL); 288228753Smm assertEqualInt(0, archive_read_support_format_tar(a)); 289228753Smm assertEqualInt(0, archive_read_open_memory(a, buff, used)); 290228753Smm 291228753Smm failure("A non-convertible pathname should cause a warning."); 292228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 293228753Smm assertEqualWString(badname, archive_entry_pathname_w(entry)); 294228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 295228753Smm assertEqualString(badname_utf8, archive_entry_pathname(entry)); 296228753Smm 297228753Smm failure("A non-convertible gname should cause a warning."); 298228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 299228753Smm assertEqualWString(badname, archive_entry_gname_w(entry)); 300228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 301228753Smm assertEqualString(badname_utf8, archive_entry_gname(entry)); 302228753Smm 303228753Smm failure("A non-convertible uname should cause a warning."); 304228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 305228753Smm assertEqualWString(badname, archive_entry_uname_w(entry)); 306228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 307228753Smm assertEqualString(badname_utf8, archive_entry_uname(entry)); 308228753Smm 309228753Smm failure("A non-convertible hardlink should cause a warning."); 310228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 311228753Smm assertEqualWString(badname, archive_entry_hardlink_w(entry)); 312228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 313228753Smm assertEqualString(badname_utf8, archive_entry_hardlink(entry)); 314228753Smm 315228753Smm failure("A non-convertible symlink should cause a warning."); 316228753Smm assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); 317228753Smm assertEqualWString(badname, archive_entry_symlink_w(entry)); 318228753Smm assertEqualWString(NULL, archive_entry_hardlink_w(entry)); 319228753Smm failure("If native locale can't convert, we should get UTF-8 back."); 320228753Smm assertEqualString(badname_utf8, archive_entry_symlink(entry)); 321228753Smm 322228753Smm assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry)); 323228753Smm 324228753Smm assertEqualInt(0, archive_read_close(a)); 325228753Smm assertEqualInt(0, archive_read_finish(a)); 326228753Smm} 327228753Smm 328228753SmmDEFINE_TEST(test_pax_filename_encoding) 329228753Smm{ 330228753Smm test_pax_filename_encoding_1(); 331228753Smm test_pax_filename_encoding_2(); 332228753Smm test_pax_filename_encoding_3(); 333228753Smm} 334