test_read_format_tar_filename.c revision 238856
1231200Smm/*-
2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA
3231200Smm * All rights reserved.
4231200Smm *
5231200Smm * Redistribution and use in source and binary forms, with or without
6231200Smm * modification, are permitted provided that the following conditions
7231200Smm * are met:
8231200Smm * 1. Redistributions of source code must retain the above copyright
9231200Smm *    notice, this list of conditions and the following disclaimer.
10231200Smm * 2. Redistributions in binary form must reproduce the above copyright
11231200Smm *    notice, this list of conditions and the following disclaimer in the
12231200Smm *    documentation and/or other materials provided with the distribution.
13231200Smm *
14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24231200Smm */
25231200Smm#include "test.h"
26231200Smm__FBSDID("$FreeBSD");
27231200Smm
28231200Smm#include <locale.h>
29231200Smm
30231200Smm/*
31231200Smm * The sample tar file was made in LANG=KOI8-R and it contains two
32231200Smm * files the charset of which are different.
33231200Smm * - the filename of first file is stored in BINARY mode.
34231200Smm * - the filename of second file is stored in UTF-8.
35231200Smm *
36231200Smm * Whenever hdrcharset option is specified, we will correctly read the
37238856Smm * filename of second file, which is stored in UTF-8 by default.
38231200Smm */
39231200Smm
40231200Smmstatic void
41231200Smmtest_read_format_tar_filename_KOI8R_CP866(const char *refname)
42231200Smm{
43231200Smm	struct archive *a;
44231200Smm	struct archive_entry *ae;
45231200Smm
46231200Smm	/*
47231200Smm 	* Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
48231200Smm 	* We should correctly read two filenames.
49231200Smm	*/
50231200Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
51231200Smm	    NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
52231200Smm		skipping("ru_RU.CP866 locale not available on this system.");
53231200Smm		return;
54231200Smm	}
55231200Smm
56231200Smm	/* Test if the platform can convert from UTF-8. */
57231200Smm	assert((a = archive_read_new()) != NULL);
58231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
59231200Smm	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
60231200Smm		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
61231200Smm		skipping("This system cannot convert character-set"
62231200Smm		    " from UTF-8 to CP866.");
63231200Smm		return;
64231200Smm	}
65231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
66231200Smm
67231200Smm	assert((a = archive_read_new()) != NULL);
68231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
69231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
70231200Smm	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
71231200Smm		skipping("This system cannot convert character-set"
72231200Smm		    " from KOI8-R to CP866.");
73231200Smm		goto next_test;
74231200Smm	}
75231200Smm	assertEqualIntA(a, ARCHIVE_OK,
76231200Smm	    archive_read_open_filename(a, refname, 10240));
77231200Smm
78231200Smm	/* Verify regular first file. */
79231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
80231200Smm	assertEqualString("\x8f\x90\x88\x82\x85\x92",
81231200Smm	    archive_entry_pathname(ae));
82231200Smm	assertEqualInt(6, archive_entry_size(ae));
83231200Smm
84231200Smm	/* Verify regular second file. */
85231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
86231200Smm	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
87231200Smm	    archive_entry_pathname(ae));
88231200Smm	assertEqualInt(6, archive_entry_size(ae));
89231200Smm
90231200Smm
91231200Smm	/* End of archive. */
92231200Smm	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
93231200Smm
94231200Smm	/* Verify archive format. */
95231200Smm	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
96231200Smm	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
97231200Smm	    archive_format(a));
98231200Smm
99231200Smm	/* Close the archive. */
100231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
101231200Smmnext_test:
102231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
103231200Smm
104231200Smm
105231200Smm	/*
106231200Smm	 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option.
107231200Smm	 * The filename we can properly read is only second file.
108231200Smm	 */
109231200Smm
110231200Smm	assert((a = archive_read_new()) != NULL);
111231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
112231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
113231200Smm	assertEqualIntA(a, ARCHIVE_OK,
114231200Smm	    archive_read_open_filename(a, refname, 10240));
115231200Smm
116231200Smm	/*
117231200Smm	 * Verify regular first file.
118231200Smm	 * The filename is not translated to CP866 because hdrcharset
119231200Smm	 * attribute is BINARY and there is not way to know its charset.
120231200Smm	 */
121231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
122231200Smm	/* A filename is in KOI8-R. */
123231200Smm	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
124231200Smm	    archive_entry_pathname(ae));
125231200Smm	assertEqualInt(6, archive_entry_size(ae));
126231200Smm
127231200Smm	/*
128231200Smm	 * Verify regular second file.
129231200Smm	 * The filename is translated from UTF-8 to CP866
130231200Smm	 */
131231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
132231200Smm	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
133231200Smm	    archive_entry_pathname(ae));
134231200Smm	assertEqualInt(6, archive_entry_size(ae));
135231200Smm
136231200Smm
137231200Smm	/* End of archive. */
138231200Smm	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
139231200Smm
140231200Smm	/* Verify archive format. */
141231200Smm	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
142231200Smm	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
143231200Smm	    archive_format(a));
144231200Smm
145231200Smm	/* Close the archive. */
146231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
147231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
148231200Smm}
149231200Smm
150231200Smmstatic void
151231200Smmtest_read_format_tar_filename_KOI8R_UTF8(const char *refname)
152231200Smm{
153231200Smm	struct archive *a;
154231200Smm	struct archive_entry *ae;
155231200Smm
156231200Smm	/*
157231200Smm	 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
158231200Smm	 * We should correctly read two filenames.
159231200Smm	 */
160231200Smm	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
161231200Smm		skipping("en_US.UTF-8 locale not available on this system.");
162231200Smm		return;
163231200Smm	}
164231200Smm
165231200Smm	assert((a = archive_read_new()) != NULL);
166231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
167231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
168231200Smm	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
169231200Smm		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
170231200Smm		skipping("This system cannot convert character-set"
171231200Smm		    " from KOI8-R to UTF-8.");
172231200Smm		return;
173231200Smm	}
174231200Smm	assertEqualIntA(a, ARCHIVE_OK,
175231200Smm	    archive_read_open_filename(a, refname, 10240));
176231200Smm
177231200Smm	/* Verify regular file. */
178231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
179231200Smm	assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
180231200Smm	    archive_entry_pathname(ae));
181231200Smm	assertEqualInt(6, archive_entry_size(ae));
182231200Smm
183231200Smm	/* Verify regular file. */
184231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
185231200Smm	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
186231200Smm	    archive_entry_pathname(ae));
187231200Smm	assertEqualInt(6, archive_entry_size(ae));
188231200Smm
189231200Smm
190231200Smm	/* End of archive. */
191231200Smm	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
192231200Smm
193231200Smm	/* Verify archive format. */
194231200Smm	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
195231200Smm	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
196231200Smm	    archive_format(a));
197231200Smm
198231200Smm	/* Close the archive. */
199231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
200231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
201231200Smm
202231200Smm	/*
203231200Smm	 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option.
204231200Smm	 * The filename we can properly read is only second file.
205231200Smm	 */
206231200Smm
207231200Smm	assert((a = archive_read_new()) != NULL);
208231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
209231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
210231200Smm	assertEqualIntA(a, ARCHIVE_OK,
211231200Smm	    archive_read_open_filename(a, refname, 10240));
212231200Smm
213231200Smm	/*
214231200Smm	 * Verify regular first file.
215231200Smm	 * The filename is not translated to UTF-8 because hdrcharset
216231200Smm	 * attribute is BINARY and there is not way to know its charset.
217231200Smm	 */
218231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
219231200Smm	/* A filename is in KOI8-R. */
220231200Smm	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
221231200Smm	    archive_entry_pathname(ae));
222231200Smm	assertEqualInt(6, archive_entry_size(ae));
223231200Smm
224231200Smm	/*
225231200Smm	 * Verify regular second file.
226231200Smm	 */
227231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
228231200Smm	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
229231200Smm	    archive_entry_pathname(ae));
230231200Smm	assertEqualInt(6, archive_entry_size(ae));
231231200Smm
232231200Smm
233231200Smm	/* End of archive. */
234231200Smm	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
235231200Smm
236231200Smm	/* Verify archive format. */
237231200Smm	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
238231200Smm	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
239231200Smm	    archive_format(a));
240231200Smm
241231200Smm	/* Close the archive. */
242231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
243231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
244231200Smm}
245231200Smm
246231200Smmstatic void
247231200Smmtest_read_format_tar_filename_KOI8R_CP1251(const char *refname)
248231200Smm{
249231200Smm	struct archive *a;
250231200Smm	struct archive_entry *ae;
251231200Smm
252231200Smm	/*
253231200Smm 	* Read filename in CP1251 with "hdrcharset=KOI8-R" option.
254231200Smm 	* We should correctly read two filenames.
255231200Smm	*/
256231200Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
257231200Smm	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
258231200Smm		skipping("CP1251 locale not available on this system.");
259231200Smm		return;
260231200Smm	}
261231200Smm
262231200Smm	/* Test if the platform can convert from UTF-8. */
263231200Smm	assert((a = archive_read_new()) != NULL);
264231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
265231200Smm	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
266231200Smm		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
267231200Smm		skipping("This system cannot convert character-set"
268231200Smm		    " from UTF-8 to CP1251.");
269231200Smm		return;
270231200Smm	}
271231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
272231200Smm
273231200Smm	assert((a = archive_read_new()) != NULL);
274231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
275231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
276231200Smm	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
277231200Smm		skipping("This system cannot convert character-set"
278231200Smm		    " from KOI8-R to CP1251.");
279231200Smm		goto next_test;
280231200Smm	}
281231200Smm	assertEqualIntA(a, ARCHIVE_OK,
282231200Smm	    archive_read_open_filename(a, refname, 10240));
283231200Smm
284231200Smm	/* Verify regular first file. */
285231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
286231200Smm	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
287231200Smm	    archive_entry_pathname(ae));
288231200Smm	assertEqualInt(6, archive_entry_size(ae));
289231200Smm
290231200Smm	/* Verify regular second file. */
291231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
292231200Smm	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
293231200Smm	    archive_entry_pathname(ae));
294231200Smm	assertEqualInt(6, archive_entry_size(ae));
295231200Smm
296231200Smm
297231200Smm	/* End of archive. */
298231200Smm	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
299231200Smm
300231200Smm	/* Verify archive format. */
301231200Smm	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
302231200Smm	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
303231200Smm	    archive_format(a));
304231200Smm
305231200Smm	/* Close the archive. */
306231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
307231200Smmnext_test:
308231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
309231200Smm
310231200Smm	/*
311231200Smm	 * Read filename in CP1251 without "hdrcharset=KOI8-R" option.
312231200Smm	 * The filename we can properly read is only second file.
313231200Smm	 */
314231200Smm
315231200Smm	assert((a = archive_read_new()) != NULL);
316231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
317231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
318231200Smm	assertEqualIntA(a, ARCHIVE_OK,
319231200Smm	    archive_read_open_filename(a, refname, 10240));
320231200Smm
321231200Smm	/*
322231200Smm	 * Verify regular first file.
323231200Smm	 * The filename is not translated to CP1251 because hdrcharset
324231200Smm	 * attribute is BINARY and there is not way to know its charset.
325231200Smm	 */
326231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
327231200Smm	/* A filename is in KOI8-R. */
328231200Smm	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
329231200Smm	    archive_entry_pathname(ae));
330231200Smm	assertEqualInt(6, archive_entry_size(ae));
331231200Smm
332231200Smm	/*
333231200Smm	 * Verify regular second file.
334231200Smm	 */
335231200Smm	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
336231200Smm	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
337231200Smm	    archive_entry_pathname(ae));
338231200Smm	assertEqualInt(6, archive_entry_size(ae));
339231200Smm
340231200Smm
341231200Smm	/* End of archive. */
342231200Smm	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
343231200Smm
344231200Smm	/* Verify archive format. */
345231200Smm	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
346231200Smm	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
347231200Smm	    archive_format(a));
348231200Smm
349231200Smm	/* Close the archive. */
350231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
351231200Smm	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
352231200Smm}
353231200Smm
354231200Smm
355231200SmmDEFINE_TEST(test_read_format_tar_filename)
356231200Smm{
357231200Smm	const char *refname = "test_read_format_tar_filename_koi8r.tar.Z";
358231200Smm
359231200Smm	extract_reference_file(refname);
360231200Smm	test_read_format_tar_filename_KOI8R_CP866(refname);
361231200Smm	test_read_format_tar_filename_KOI8R_UTF8(refname);
362231200Smm	test_read_format_tar_filename_KOI8R_CP1251(refname);
363231200Smm}
364