1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm#include "test.h"
26228763Smm__FBSDID("$FreeBSD: stable/11/contrib/libarchive/libarchive/test/test_pax_filename_encoding.c 311041 2017-01-02 01:41:31Z mm $");
27228753Smm
28228753Smm#include <locale.h>
29228753Smm
30228753Smm/*
31228753Smm * Pax interchange is supposed to encode filenames into
32228753Smm * UTF-8.  Of course, that's not always possible.  This
33228753Smm * test is intended to verify that filenames always get
34228753Smm * stored and restored correctly, regardless of the encodings.
35228753Smm */
36228753Smm
37228753Smm/*
38228753Smm * Read a manually-created archive that has filenames that are
39228753Smm * stored in binary instead of UTF-8 and verify that we get
40228753Smm * the right filename returned and that we get a warning only
41228753Smm * if the header isn't marked as binary.
42228753Smm */
43228753Smmstatic void
44228753Smmtest_pax_filename_encoding_1(void)
45228753Smm{
46228753Smm	static const char testname[] = "test_pax_filename_encoding.tar";
47228753Smm	/*
48228753Smm	 * \314\214 is a valid 2-byte UTF-8 sequence.
49228753Smm	 * \374 is invalid in UTF-8.
50228753Smm	 */
51228753Smm	char filename[] = "abc\314\214mno\374xyz";
52228753Smm	struct archive *a;
53228753Smm	struct archive_entry *entry;
54228753Smm
55228753Smm	/*
56228753Smm	 * Read an archive that has non-UTF8 pax filenames in it.
57228753Smm	 */
58228753Smm	extract_reference_file(testname);
59228753Smm	a = archive_read_new();
60228753Smm	assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a));
61232153Smm	assertEqualInt(ARCHIVE_OK, archive_read_support_filter_all(a));
62228753Smm	assertEqualInt(ARCHIVE_OK,
63228753Smm	    archive_read_open_filename(a, testname, 10240));
64228753Smm	/*
65228753Smm	 * First entry in this test archive has an invalid UTF-8 sequence
66228753Smm	 * in it, but the header is not marked as hdrcharset=BINARY, so that
67228753Smm	 * requires a warning.
68228753Smm	 */
69228753Smm	failure("Invalid UTF8 in a pax archive pathname should cause a warning");
70228753Smm	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
71228753Smm	assertEqualString(filename, archive_entry_pathname(entry));
72228753Smm	/*
73228753Smm	 * Second entry is identical except that it does have
74228753Smm	 * hdrcharset=BINARY, so no warning should be generated.
75228753Smm	 */
76228753Smm	failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n"
77228753Smm	    " characters in it without generating a warning");
78228753Smm	assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
79228753Smm	assertEqualString(filename, archive_entry_pathname(entry));
80232153Smm	archive_read_free(a);
81228753Smm}
82228753Smm
83228753Smm/*
84228753Smm * Set the locale and write a pathname containing invalid characters.
85228753Smm * This should work; the underlying implementation should automatically
86228753Smm * fall back to storing the pathname in binary.
87228753Smm */
88228753Smmstatic void
89228753Smmtest_pax_filename_encoding_2(void)
90228753Smm{
91228753Smm	char filename[] = "abc\314\214mno\374xyz";
92228753Smm	struct archive *a;
93228753Smm	struct archive_entry *entry;
94228753Smm	char buff[65536];
95228753Smm	char longname[] = "abc\314\214mno\374xyz"
96228753Smm	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
97228753Smm	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
98228753Smm	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
99228753Smm	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
100228753Smm	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
101228753Smm	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
102228753Smm	    ;
103228753Smm	size_t used;
104228753Smm
105228753Smm	/*
106228753Smm	 * We need a starting locale which has invalid sequences.
107232153Smm	 * en_US.UTF-8 seems to be commonly supported.
108228753Smm	 */
109228753Smm	/* If it doesn't exist, just warn and return. */
110232153Smm	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
111228753Smm		skipping("invalid encoding tests require a suitable locale;"
112232153Smm		    " en_US.UTF-8 not available on this system");
113228753Smm		return;
114228753Smm	}
115228753Smm
116228753Smm	assert((a = archive_write_new()) != NULL);
117228753Smm	assertEqualIntA(a, 0, archive_write_set_format_pax(a));
118248616Smm	assertEqualIntA(a, 0, archive_write_add_filter_none(a));
119228753Smm	assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
120228753Smm	assertEqualInt(0,
121228753Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
122228753Smm
123228753Smm	assert((entry = archive_entry_new()) != NULL);
124228753Smm	/* Set pathname, gname, uname, hardlink to nonconvertible values. */
125228753Smm	archive_entry_copy_pathname(entry, filename);
126228753Smm	archive_entry_copy_gname(entry, filename);
127228753Smm	archive_entry_copy_uname(entry, filename);
128228753Smm	archive_entry_copy_hardlink(entry, filename);
129228753Smm	archive_entry_set_filetype(entry, AE_IFREG);
130228753Smm	failure("This should generate a warning for nonconvertible names.");
131228753Smm	assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
132228753Smm	archive_entry_free(entry);
133228753Smm
134228753Smm	assert((entry = archive_entry_new()) != NULL);
135228753Smm	/* Set path, gname, uname, and symlink to nonconvertible values. */
136228753Smm	archive_entry_copy_pathname(entry, filename);
137228753Smm	archive_entry_copy_gname(entry, filename);
138228753Smm	archive_entry_copy_uname(entry, filename);
139228753Smm	archive_entry_copy_symlink(entry, filename);
140228753Smm	archive_entry_set_filetype(entry, AE_IFLNK);
141228753Smm	failure("This should generate a warning for nonconvertible names.");
142228753Smm	assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
143228753Smm	archive_entry_free(entry);
144228753Smm
145228753Smm	assert((entry = archive_entry_new()) != NULL);
146228753Smm	/* Set pathname to a very long nonconvertible value. */
147228753Smm	archive_entry_copy_pathname(entry, longname);
148228753Smm	archive_entry_set_filetype(entry, AE_IFREG);
149228753Smm	failure("This should generate a warning for nonconvertible names.");
150228753Smm	assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
151228753Smm	archive_entry_free(entry);
152228753Smm
153232153Smm	assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a));
154232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
155228753Smm
156228753Smm	/*
157228753Smm	 * Now read the entries back.
158228753Smm	 */
159228753Smm
160228753Smm	assert((a = archive_read_new()) != NULL);
161228753Smm	assertEqualInt(0, archive_read_support_format_tar(a));
162228753Smm	assertEqualInt(0, archive_read_open_memory(a, buff, used));
163228753Smm
164228753Smm	assertEqualInt(0, archive_read_next_header(a, &entry));
165228753Smm	assertEqualString(filename, archive_entry_pathname(entry));
166228753Smm	assertEqualString(filename, archive_entry_gname(entry));
167228753Smm	assertEqualString(filename, archive_entry_uname(entry));
168228753Smm	assertEqualString(filename, archive_entry_hardlink(entry));
169228753Smm
170228753Smm	assertEqualInt(0, archive_read_next_header(a, &entry));
171228753Smm	assertEqualString(filename, archive_entry_pathname(entry));
172228753Smm	assertEqualString(filename, archive_entry_gname(entry));
173228753Smm	assertEqualString(filename, archive_entry_uname(entry));
174228753Smm	assertEqualString(filename, archive_entry_symlink(entry));
175228753Smm
176228753Smm	assertEqualInt(0, archive_read_next_header(a, &entry));
177228753Smm	assertEqualString(longname, archive_entry_pathname(entry));
178228753Smm
179232153Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
180232153Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
181228753Smm}
182228753Smm
183232153Smm#if 0 /* Disable this until Tim check out it. */
184232153Smm
185228753Smm/*
186228753Smm * Create an entry starting from a wide-character Unicode pathname,
187228753Smm * read it back into "C" locale, which doesn't support the name.
188228753Smm * TODO: Figure out the "right" behavior here.
189228753Smm */
190228753Smmstatic void
191228753Smmtest_pax_filename_encoding_3(void)
192228753Smm{
193228753Smm	wchar_t badname[] = L"xxxAyyyBzzz";
194228753Smm	const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
195228753Smm	struct archive *a;
196228753Smm	struct archive_entry *entry;
197228753Smm	char buff[65536];
198228753Smm	size_t used;
199228753Smm
200228753Smm	badname[3] = 0x1234;
201228753Smm	badname[7] = 0x5678;
202228753Smm
203228753Smm	/* If it doesn't exist, just warn and return. */
204228753Smm	if (NULL == setlocale(LC_ALL, "C")) {
205228753Smm		skipping("Can't set \"C\" locale, so can't exercise "
206228753Smm		    "certain character-conversion failures");
207228753Smm		return;
208228753Smm	}
209228753Smm
210228753Smm	/* If wctomb is broken, warn and return. */
211228753Smm	if (wctomb(buff, 0x1234) > 0) {
212228753Smm		skipping("Cannot test conversion failures because \"C\" "
213228753Smm		    "locale on this system has no invalid characters.");
214228753Smm		return;
215228753Smm	}
216228753Smm
217228753Smm	/* If wctomb is broken, warn and return. */
218228753Smm	if (wctomb(buff, 0x1234) > 0) {
219228753Smm		skipping("Cannot test conversion failures because \"C\" "
220228753Smm		    "locale on this system has no invalid characters.");
221228753Smm		return;
222228753Smm	}
223228753Smm
224228753Smm	/* Skip test if archive_entry_update_pathname_utf8() is broken. */
225228753Smm	/* In particular, this is currently broken on Win32 because
226228753Smm	 * setlocale() does not set the default encoding for CP_ACP. */
227228753Smm	entry = archive_entry_new();
228228753Smm	if (archive_entry_update_pathname_utf8(entry, badname_utf8)) {
229228753Smm		archive_entry_free(entry);
230228753Smm		skipping("Cannot test conversion failures.");
231228753Smm		return;
232228753Smm	}
233228753Smm	archive_entry_free(entry);
234228753Smm
235228753Smm	assert((a = archive_write_new()) != NULL);
236228753Smm	assertEqualIntA(a, 0, archive_write_set_format_pax(a));
237248616Smm	assertEqualIntA(a, 0, archive_write_add_filter_none(a));
238228753Smm	assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
239228753Smm	assertEqualInt(0,
240228753Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
241228753Smm
242228753Smm	assert((entry = archive_entry_new()) != NULL);
243228753Smm	/* Set pathname to non-convertible wide value. */
244228753Smm	archive_entry_copy_pathname_w(entry, badname);
245228753Smm	archive_entry_set_filetype(entry, AE_IFREG);
246228753Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
247228753Smm	archive_entry_free(entry);
248228753Smm
249228753Smm	assert((entry = archive_entry_new()) != NULL);
250228753Smm	archive_entry_copy_pathname_w(entry, L"abc");
251228753Smm	/* Set gname to non-convertible wide value. */
252228753Smm	archive_entry_copy_gname_w(entry, badname);
253228753Smm	archive_entry_set_filetype(entry, AE_IFREG);
254228753Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
255228753Smm	archive_entry_free(entry);
256228753Smm
257228753Smm	assert((entry = archive_entry_new()) != NULL);
258228753Smm	archive_entry_copy_pathname_w(entry, L"abc");
259228753Smm	/* Set uname to non-convertible wide value. */
260228753Smm	archive_entry_copy_uname_w(entry, badname);
261228753Smm	archive_entry_set_filetype(entry, AE_IFREG);
262228753Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
263228753Smm	archive_entry_free(entry);
264228753Smm
265228753Smm	assert((entry = archive_entry_new()) != NULL);
266228753Smm	archive_entry_copy_pathname_w(entry, L"abc");
267228753Smm	/* Set hardlink to non-convertible wide value. */
268228753Smm	archive_entry_copy_hardlink_w(entry, badname);
269228753Smm	archive_entry_set_filetype(entry, AE_IFREG);
270228753Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
271228753Smm	archive_entry_free(entry);
272228753Smm
273228753Smm	assert((entry = archive_entry_new()) != NULL);
274228753Smm	archive_entry_copy_pathname_w(entry, L"abc");
275228753Smm	/* Set symlink to non-convertible wide value. */
276228753Smm	archive_entry_copy_symlink_w(entry, badname);
277228753Smm	archive_entry_set_filetype(entry, AE_IFLNK);
278228753Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
279228753Smm	archive_entry_free(entry);
280228753Smm
281232153Smm	assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a));
282232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
283228753Smm
284228753Smm	/*
285228753Smm	 * Now read the entries back.
286228753Smm	 */
287228753Smm
288228753Smm	assert((a = archive_read_new()) != NULL);
289228753Smm	assertEqualInt(0, archive_read_support_format_tar(a));
290228753Smm	assertEqualInt(0, archive_read_open_memory(a, buff, used));
291228753Smm
292228753Smm	failure("A non-convertible pathname should cause a warning.");
293228753Smm	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
294228753Smm	assertEqualWString(badname, archive_entry_pathname_w(entry));
295228753Smm	failure("If native locale can't convert, we should get UTF-8 back.");
296228753Smm	assertEqualString(badname_utf8, archive_entry_pathname(entry));
297228753Smm
298228753Smm	failure("A non-convertible gname should cause a warning.");
299228753Smm	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
300228753Smm	assertEqualWString(badname, archive_entry_gname_w(entry));
301228753Smm	failure("If native locale can't convert, we should get UTF-8 back.");
302228753Smm	assertEqualString(badname_utf8, archive_entry_gname(entry));
303228753Smm
304228753Smm	failure("A non-convertible uname should cause a warning.");
305228753Smm	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
306228753Smm	assertEqualWString(badname, archive_entry_uname_w(entry));
307228753Smm	failure("If native locale can't convert, we should get UTF-8 back.");
308228753Smm	assertEqualString(badname_utf8, archive_entry_uname(entry));
309228753Smm
310228753Smm	failure("A non-convertible hardlink should cause a warning.");
311228753Smm	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
312228753Smm	assertEqualWString(badname, archive_entry_hardlink_w(entry));
313228753Smm	failure("If native locale can't convert, we should get UTF-8 back.");
314228753Smm	assertEqualString(badname_utf8, archive_entry_hardlink(entry));
315228753Smm
316228753Smm	failure("A non-convertible symlink should cause a warning.");
317228753Smm	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
318228753Smm	assertEqualWString(badname, archive_entry_symlink_w(entry));
319228753Smm	assertEqualWString(NULL, archive_entry_hardlink_w(entry));
320228753Smm	failure("If native locale can't convert, we should get UTF-8 back.");
321228753Smm	assertEqualString(badname_utf8, archive_entry_symlink(entry));
322228753Smm
323228753Smm	assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
324228753Smm
325232153Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
326232153Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
327228753Smm}
328232153Smm#else
329232153Smmstatic void
330232153Smmtest_pax_filename_encoding_3(void)
331232153Smm{
332232153Smm}
333232153Smm#endif
334228753Smm
335232153Smm/*
336232153Smm * Verify that KOI8-R filenames are correctly translated to Unicode and UTF-8.
337232153Smm */
338299529SmmDEFINE_TEST(test_pax_filename_encoding_KOI8R)
339232153Smm{
340232153Smm  	struct archive *a;
341232153Smm  	struct archive_entry *entry;
342232153Smm	char buff[4096];
343232153Smm	size_t used;
344232153Smm
345232153Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
346232153Smm		skipping("KOI8-R locale not available on this system.");
347232153Smm		return;
348232153Smm	}
349232153Smm
350311041Smm	/* Check if the platform completely supports the string conversion. */
351232153Smm	a = archive_write_new();
352232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
353232153Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
354232153Smm		skipping("This system cannot convert character-set"
355232153Smm		    " from KOI8-R to UTF-8.");
356232153Smm		archive_write_free(a);
357232153Smm		return;
358232153Smm	}
359232153Smm	archive_write_free(a);
360232153Smm
361232153Smm	/* Re-create a write archive object since filenames should be written
362232153Smm	 * in UTF-8 by default. */
363232153Smm	a = archive_write_new();
364232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
365232153Smm	assertEqualInt(ARCHIVE_OK,
366232153Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
367232153Smm
368232153Smm	entry = archive_entry_new2(a);
369232153Smm	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
370232153Smm	archive_entry_set_filetype(entry, AE_IFREG);
371232153Smm	archive_entry_set_size(entry, 0);
372232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
373232153Smm	archive_entry_free(entry);
374232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
375232153Smm
376232153Smm	/* Above three characters in KOI8-R should translate to the following
377232153Smm	 * three characters (two bytes each) in UTF-8. */
378232153Smm	assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15);
379232153Smm}
380232153Smm
381232153Smm/*
382232153Smm * Verify that CP1251 filenames are correctly translated to Unicode and UTF-8.
383232153Smm */
384299529SmmDEFINE_TEST(test_pax_filename_encoding_CP1251)
385232153Smm{
386232153Smm  	struct archive *a;
387232153Smm  	struct archive_entry *entry;
388232153Smm	char buff[4096];
389232153Smm	size_t used;
390232153Smm
391232153Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
392232153Smm	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
393232153Smm		skipping("KOI8-R locale not available on this system.");
394232153Smm		return;
395232153Smm	}
396232153Smm
397311041Smm	/* Check if the platform completely supports the string conversion. */
398232153Smm	a = archive_write_new();
399232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
400232153Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
401232153Smm		skipping("This system cannot convert character-set"
402232153Smm		    " from KOI8-R to UTF-8.");
403232153Smm		archive_write_free(a);
404232153Smm		return;
405232153Smm	}
406232153Smm	archive_write_free(a);
407232153Smm
408232153Smm	/* Re-create a write archive object since filenames should be written
409232153Smm	 * in UTF-8 by default. */
410232153Smm	a = archive_write_new();
411232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
412232153Smm	assertEqualInt(ARCHIVE_OK,
413232153Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
414232153Smm
415232153Smm	entry = archive_entry_new2(a);
416232153Smm	archive_entry_set_pathname(entry, "\xef\xf0\xe8");
417232153Smm	archive_entry_set_filetype(entry, AE_IFREG);
418232153Smm	archive_entry_set_size(entry, 0);
419232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
420232153Smm	archive_entry_free(entry);
421232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
422232153Smm
423232153Smm	/* Above three characters in KOI8-R should translate to the following
424232153Smm	 * three characters (two bytes each) in UTF-8. */
425232153Smm	assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15);
426232153Smm}
427232153Smm
428232153Smm/*
429232153Smm * Verify that EUC-JP filenames are correctly translated to Unicode and UTF-8.
430232153Smm */
431299529SmmDEFINE_TEST(test_pax_filename_encoding_EUCJP)
432232153Smm{
433232153Smm  	struct archive *a;
434232153Smm  	struct archive_entry *entry;
435232153Smm	char buff[4096];
436232153Smm	size_t used;
437232153Smm
438232153Smm	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
439232153Smm		skipping("eucJP locale not available on this system.");
440232153Smm		return;
441232153Smm	}
442232153Smm
443311041Smm	/* Check if the platform completely supports the string conversion. */
444232153Smm	a = archive_write_new();
445232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
446232153Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
447232153Smm		skipping("This system cannot convert character-set"
448232153Smm		    " from eucJP to UTF-8.");
449232153Smm		archive_write_free(a);
450232153Smm		return;
451232153Smm	}
452232153Smm	archive_write_free(a);
453232153Smm
454232153Smm	/* Re-create a write archive object since filenames should be written
455232153Smm	 * in UTF-8 by default. */
456232153Smm	a = archive_write_new();
457232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
458232153Smm	assertEqualInt(ARCHIVE_OK,
459232153Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
460232153Smm
461232153Smm	entry = archive_entry_new2(a);
462232153Smm	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
463232153Smm	/* Check the Unicode version. */
464232153Smm	archive_entry_set_filetype(entry, AE_IFREG);
465232153Smm	archive_entry_set_size(entry, 0);
466232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
467232153Smm	archive_entry_free(entry);
468232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
469232153Smm
470232153Smm	/* Check UTF-8 version. */
471232153Smm	assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16);
472232153Smm
473232153Smm}
474232153Smm
475232153Smm/*
476232153Smm * Verify that CP932/SJIS filenames are correctly translated to Unicode and UTF-8.
477232153Smm */
478299529SmmDEFINE_TEST(test_pax_filename_encoding_CP932)
479232153Smm{
480232153Smm  	struct archive *a;
481232153Smm  	struct archive_entry *entry;
482232153Smm	char buff[4096];
483232153Smm	size_t used;
484232153Smm
485232153Smm	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
486232153Smm	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
487232153Smm		skipping("eucJP locale not available on this system.");
488232153Smm		return;
489232153Smm	}
490232153Smm
491311041Smm	/* Check if the platform completely supports the string conversion. */
492232153Smm	a = archive_write_new();
493232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
494232153Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
495232153Smm		skipping("This system cannot convert character-set"
496232153Smm		    " from CP932/SJIS to UTF-8.");
497232153Smm		archive_write_free(a);
498232153Smm		return;
499232153Smm	}
500232153Smm	archive_write_free(a);
501232153Smm
502232153Smm	/* Re-create a write archive object since filenames should be written
503232153Smm	 * in UTF-8 by default. */
504232153Smm	a = archive_write_new();
505232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
506232153Smm	assertEqualInt(ARCHIVE_OK,
507232153Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
508232153Smm
509232153Smm	entry = archive_entry_new2(a);
510232153Smm	archive_entry_set_pathname(entry, "\x95\x5C.txt");
511232153Smm	/* Check the Unicode version. */
512232153Smm	archive_entry_set_filetype(entry, AE_IFREG);
513232153Smm	archive_entry_set_size(entry, 0);
514232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
515232153Smm	archive_entry_free(entry);
516232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
517232153Smm
518232153Smm	/* Check UTF-8 version. */
519232153Smm	assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16);
520232153Smm
521232153Smm}
522232153Smm
523232153Smm/*
524232153Smm * Verify that KOI8-R filenames are not translated to Unicode and UTF-8
525232153Smm * when using hdrcharset=BINARY option.
526232153Smm */
527299529SmmDEFINE_TEST(test_pax_filename_encoding_KOI8R_BINARY)
528232153Smm{
529232153Smm  	struct archive *a;
530232153Smm  	struct archive_entry *entry;
531232153Smm	char buff[4096];
532232153Smm	size_t used;
533232153Smm
534232153Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
535232153Smm		skipping("KOI8-R locale not available on this system.");
536232153Smm		return;
537232153Smm	}
538232153Smm
539232153Smm	a = archive_write_new();
540232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
541232153Smm	/* BINARY mode should be accepted. */
542232153Smm	assertEqualInt(ARCHIVE_OK,
543232153Smm	    archive_write_set_options(a, "hdrcharset=BINARY"));
544232153Smm	assertEqualInt(ARCHIVE_OK,
545232153Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
546232153Smm
547232153Smm	entry = archive_entry_new2(a);
548232153Smm	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
549232153Smm	archive_entry_set_filetype(entry, AE_IFREG);
550232153Smm	archive_entry_set_size(entry, 0);
551232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
552232153Smm	archive_entry_free(entry);
553232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
554232153Smm
555232153Smm	/* "hdrcharset=BINARY" pax attribute should be written. */
556232153Smm	assertEqualMem(buff + 512, "21 hdrcharset=BINARY\x0A", 21);
557232153Smm	/* Above three characters in KOI8-R should not translate to any
558232153Smm	 * character-set. */
559232153Smm	assertEqualMem(buff + 512+21, "12 path=\xD0\xD2\xC9\x0A", 12);
560232153Smm}
561232153Smm
562232153Smm/*
563232153Smm * Pax format writer only accepts both BINARY and UTF-8.
564232153Smm * If other character-set name is specified, you will get ARCHIVE_FAILED.
565232153Smm */
566299529SmmDEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251)
567232153Smm{
568232153Smm  	struct archive *a;
569232153Smm
570232153Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
571232153Smm		skipping("KOI8-R locale not available on this system.");
572232153Smm		return;
573232153Smm	}
574232153Smm
575232153Smm	a = archive_write_new();
576232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
577232153Smm	/* pax format writer only accepts both BINARY and UTF-8. */
578232153Smm	assertEqualInt(ARCHIVE_FAILED,
579232153Smm	    archive_write_set_options(a, "hdrcharset=CP1251"));
580232153Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
581232153Smm}
582232153Smm
583232153Smm
584228753SmmDEFINE_TEST(test_pax_filename_encoding)
585228753Smm{
586228753Smm	test_pax_filename_encoding_1();
587228753Smm	test_pax_filename_encoding_2();
588228753Smm	test_pax_filename_encoding_3();
589228753Smm}
590