test_read_format_gtar_filename.c revision 232153
1/*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD");
27
28#include <locale.h>
29
30static void
31test_read_format_gtar_filename_eucJP_UTF8(const char *refname)
32{
33	struct archive *a;
34	struct archive_entry *ae;
35
36	/*
37	 * Read eucJP filename in en_US.UTF-8 with "hdrcharset=eucJP" option.
38	 */
39	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
40		skipping("en_US.UTF-8 locale not available on this system.");
41		return;
42	}
43
44	assert((a = archive_read_new()) != NULL);
45	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
46	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
47	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=eucJP")) {
48		skipping("This system cannot convert character-set"
49		    " from eucJP to UTF-8.");
50		goto cleanup;
51	}
52	assertEqualIntA(a, ARCHIVE_OK,
53	    archive_read_open_filename(a, refname, 10240));
54
55	/* Verify regular file. */
56	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
57	assertEqualString("\xe6\xbc\xa2\xe5\xad\x97.txt",
58	    archive_entry_pathname(ae));
59	assertEqualInt(8, archive_entry_size(ae));
60
61	/* Verify regular file. */
62	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
63	assertEqualString("\xe8\xa1\xa8.txt", archive_entry_pathname(ae));
64	assertEqualInt(4, archive_entry_size(ae));
65
66
67	/* End of archive. */
68	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
69
70	/* Verify archive format. */
71	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
72	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
73
74	/* Close the archive. */
75	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
76cleanup:
77	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
78}
79
80static void
81test_read_format_gtar_filename_CP866_KOI8R(const char *refname)
82{
83	struct archive *a;
84	struct archive_entry *ae;
85
86	/*
87	 * Read CP866 filename in ru_RU.KOI8-R with "hdrcharset=CP866" option.
88	 */
89	if (NULL == setlocale(LC_ALL, "Russian_Russia.20866") &&
90	    NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
91		skipping("ru_RU.KOI8-R locale not available on this system.");
92		return;
93	}
94
95	assert((a = archive_read_new()) != NULL);
96	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
97	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
98	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=CP866")) {
99		skipping("This system cannot convert character-set"
100		    " from CP866 to KOI8-R.");
101		goto cleanup;
102	}
103	assertEqualIntA(a, ARCHIVE_OK,
104	    archive_read_open_filename(a, refname, 10240));
105
106	/* Verify regular file. */
107	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
108	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
109	    archive_entry_pathname(ae));
110	assertEqualInt(6, archive_entry_size(ae));
111
112	/* Verify regular file. */
113	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
114	assertEqualString("\xd0\xd2\xc9\xd7\xc5\xd4",
115	    archive_entry_pathname(ae));
116	assertEqualInt(6, archive_entry_size(ae));
117
118
119	/* End of archive. */
120	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
121
122	/* Verify archive format. */
123	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
124	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
125
126	/* Close the archive. */
127	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
128cleanup:
129	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
130}
131
132static void
133test_read_format_gtar_filename_CP866_UTF8(const char *refname)
134{
135	struct archive *a;
136	struct archive_entry *ae;
137
138	/*
139	 * Read CP866 filename in en_US.UTF-8 with "hdrcharset=CP866" option.
140	 */
141	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
142		skipping("en_US.UTF-8 locale not available on this system.");
143		return;
144	}
145
146	assert((a = archive_read_new()) != NULL);
147	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
148	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
149	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=CP866")) {
150		skipping("This system cannot convert character-set"
151		    " from CP866 to UTF-8.");
152		goto cleanup;
153	}
154	assertEqualIntA(a, ARCHIVE_OK,
155	    archive_read_open_filename(a, refname, 10240));
156
157	/* Verify regular file. */
158	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
159	assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
160	    archive_entry_pathname(ae));
161	assertEqualInt(6, archive_entry_size(ae));
162
163	/* Verify regular file. */
164	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
165	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
166	    archive_entry_pathname(ae));
167	assertEqualInt(6, archive_entry_size(ae));
168
169
170	/* End of archive. */
171	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
172
173	/* Verify archive format. */
174	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
175	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
176
177	/* Close the archive. */
178	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
179cleanup:
180	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
181}
182
183static void
184test_read_format_gtar_filename_KOI8R_CP866(const char *refname)
185{
186	struct archive *a;
187	struct archive_entry *ae;
188
189	/*
190	 * Read KOI8-R filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
191	 */
192	if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
193	    NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
194		skipping("ru_RU.CP866 locale not available on this system.");
195		return;
196	}
197
198	assert((a = archive_read_new()) != NULL);
199	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
200	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
201	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
202		skipping("This system cannot convert character-set"
203		    " from KOI8-R to CP866.");
204		goto cleanup;
205	}
206	assertEqualIntA(a, ARCHIVE_OK,
207	    archive_read_open_filename(a, refname, 10240));
208
209	/* Verify regular file. */
210	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
211	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
212	    archive_entry_pathname(ae));
213	assertEqualInt(6, archive_entry_size(ae));
214
215	/* Verify regular file. */
216	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
217	assertEqualString("\x8f\x90\x88\x82\x85\x92",
218	    archive_entry_pathname(ae));
219	assertEqualInt(6, archive_entry_size(ae));
220
221
222	/* End of archive. */
223	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
224
225	/* Verify archive format. */
226	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
227	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
228
229	/* Close the archive. */
230	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
231cleanup:
232	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
233}
234
235static void
236test_read_format_gtar_filename_KOI8R_UTF8(const char *refname)
237{
238	struct archive *a;
239	struct archive_entry *ae;
240
241	/*
242	 * Read KOI8-R filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
243	 */
244	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
245		skipping("en_US.UTF-8 locale not available on this system.");
246		return;
247	}
248
249	assert((a = archive_read_new()) != NULL);
250	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
251	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
252	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
253		skipping("This system cannot convert character-set"
254		    " from KOI8-R to UTF-8.");
255		goto cleanup;
256	}
257	assertEqualIntA(a, ARCHIVE_OK,
258	    archive_read_open_filename(a, refname, 10240));
259
260	/* Verify regular file. */
261	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
262	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
263	    archive_entry_pathname(ae));
264	assertEqualInt(6, archive_entry_size(ae));
265
266	/* Verify regular file. */
267	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
268	assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
269	    archive_entry_pathname(ae));
270	assertEqualInt(6, archive_entry_size(ae));
271
272
273	/* End of archive. */
274	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
275
276	/* Verify archive format. */
277	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
278	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
279
280	/* Close the archive. */
281	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
282cleanup:
283	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
284}
285
286static void
287test_read_format_gtar_filename_eucJP_CP932(const char *refname)
288{
289	struct archive *a;
290	struct archive_entry *ae;
291
292	/*
293	 * Read eucJP filename in CP932/SJIS with "hdrcharset=eucJP" option.
294	 */
295	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
296	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
297		skipping("CP932 locale not available on this system.");
298		return;
299	}
300
301	assert((a = archive_read_new()) != NULL);
302	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
303	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
304	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=eucJP")) {
305		skipping("This system cannot convert character-set"
306		    " from eucJP.");
307		goto cleanup;
308	}
309	assertEqualIntA(a, ARCHIVE_OK,
310	    archive_read_open_filename(a, refname, 10240));
311
312	/* Verify regular file. */
313	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
314	assertEqualString("\x8a\xbf\x8e\x9a.txt", archive_entry_pathname(ae));
315	assertEqualInt(8, archive_entry_size(ae));
316
317	/* Verify regular file. */
318	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
319	assertEqualString("\x95\x5c.txt", archive_entry_pathname(ae));
320	assertEqualInt(4, archive_entry_size(ae));
321
322
323	/* End of archive. */
324	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
325
326	/* Verify archive format. */
327	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
328	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
329
330	/* Close the archive. */
331	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
332cleanup:
333	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
334}
335
336static void
337test_read_format_gtar_filename_CP866_CP1251(const char *refname)
338{
339	struct archive *a;
340	struct archive_entry *ae;
341
342	/*
343	 * Read CP866 filename in CP1251 with "hdrcharset=CP866" option.
344	 */
345	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
346	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
347		skipping("CP1251 locale not available on this system.");
348		return;
349	}
350
351	assert((a = archive_read_new()) != NULL);
352	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
353	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
354	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=CP866")) {
355		skipping("This system cannot convert character-set"
356		    " from CP866 to CP1251.");
357		goto cleanup;
358	}
359	assertEqualIntA(a, ARCHIVE_OK,
360	    archive_read_open_filename(a, refname, 10240));
361
362	/* Verify regular file. */
363	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
364	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
365	    archive_entry_pathname(ae));
366	assertEqualInt(6, archive_entry_size(ae));
367
368	/* Verify regular file. */
369	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
370	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
371	    archive_entry_pathname(ae));
372	assertEqualInt(6, archive_entry_size(ae));
373
374
375	/* End of archive. */
376	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
377
378	/* Verify archive format. */
379	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
380	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
381
382	/* Close the archive. */
383	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
384cleanup:
385	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
386}
387
388/*
389 * This test only for Windows platform because other archiver
390 * applications on Windows translate CP1251 filenames into CP866
391 * filenames and store it in the gtar file and so we should read
392 * it by default on Windows.
393 */
394static void
395test_read_format_gtar_filename_CP866_CP1251_win(const char *refname)
396{
397	struct archive *a;
398	struct archive_entry *ae;
399
400	/*
401	 * Read CP866 filename in CP1251 without "hdrcharset=CP866" option.
402	 */
403	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
404		skipping("Russian_Russia locale not available on this system.");
405		return;
406	}
407
408	assert((a = archive_read_new()) != NULL);
409	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
410	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
411	assertEqualIntA(a, ARCHIVE_OK,
412	    archive_read_open_filename(a, refname, 10240));
413
414	/* Verify regular file. */
415	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
416	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
417	    archive_entry_pathname(ae));
418	assertEqualInt(6, archive_entry_size(ae));
419
420	/* Verify regular file. */
421	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
422	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
423	    archive_entry_pathname(ae));
424	assertEqualInt(6, archive_entry_size(ae));
425
426
427	/* End of archive. */
428	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
429
430	/* Verify archive format. */
431	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
432	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
433
434	/* Close the archive. */
435	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
436	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
437}
438
439static void
440test_read_format_gtar_filename_KOI8R_CP1251(const char *refname)
441{
442	struct archive *a;
443	struct archive_entry *ae;
444
445	/*
446	 * Read KOI8-R filename in CP1251 with "hdrcharset=KOI8-R" option.
447	 */
448	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
449	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
450		skipping("CP1251 locale not available on this system.");
451		return;
452	}
453
454	assert((a = archive_read_new()) != NULL);
455	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
456	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
457	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
458		skipping("This system cannot convert character-set"
459		    " from KOI8-R to CP1251.");
460		goto cleanup;
461	}
462	assertEqualIntA(a, ARCHIVE_OK,
463	    archive_read_open_filename(a, refname, 10240));
464
465	/* Verify regular file. */
466	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
467	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
468	    archive_entry_pathname(ae));
469	assertEqualInt(6, archive_entry_size(ae));
470
471	/* Verify regular file. */
472	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
473	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
474	    archive_entry_pathname(ae));
475	assertEqualInt(6, archive_entry_size(ae));
476
477
478	/* End of archive. */
479	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
480
481	/* Verify archive format. */
482	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
483	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_GNUTAR, archive_format(a));
484
485	/* Close the archive. */
486	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
487cleanup:
488	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
489}
490
491
492DEFINE_TEST(test_read_format_gtar_filename)
493{
494	const char *refname1 = "test_read_format_gtar_filename_eucjp.tar.Z";
495	const char *refname2 = "test_read_format_gtar_filename_cp866.tar.Z";
496	const char *refname3 = "test_read_format_gtar_filename_koi8r.tar.Z";
497
498	extract_reference_file(refname1);
499	test_read_format_gtar_filename_eucJP_UTF8(refname1);
500	test_read_format_gtar_filename_eucJP_CP932(refname1);
501
502	extract_reference_file(refname2);
503	test_read_format_gtar_filename_CP866_KOI8R(refname2);
504	test_read_format_gtar_filename_CP866_UTF8(refname2);
505	test_read_format_gtar_filename_CP866_CP1251(refname2);
506	test_read_format_gtar_filename_CP866_CP1251_win(refname2);
507
508	extract_reference_file(refname3);
509	test_read_format_gtar_filename_KOI8R_CP866(refname3);
510	test_read_format_gtar_filename_KOI8R_UTF8(refname3);
511	test_read_format_gtar_filename_KOI8R_CP1251(refname3);
512}
513