test_ustar_filename_encoding.c revision 231200
1231200Smm/*-
2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA
3231200Smm * All rights reserved.
4231200Smm *
5231200Smm * Redistribution and use in source and binary forms, with or without
6231200Smm * modification, are permitted provided that the following conditions
7231200Smm * are met:
8231200Smm * 1. Redistributions of source code must retain the above copyright
9231200Smm *    notice, this list of conditions and the following disclaimer.
10231200Smm * 2. Redistributions in binary form must reproduce the above copyright
11231200Smm *    notice, this list of conditions and the following disclaimer in the
12231200Smm *    documentation and/or other materials provided with the distribution.
13231200Smm *
14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24231200Smm */
25231200Smm#include "test.h"
26231200Smm__FBSDID("$FreeBSD$");
27231200Smm
28231200Smm#include <locale.h>
29231200Smm
30231200Smmstatic void
31231200Smmtest_ustar_filename_encoding_UTF8_CP866(void)
32231200Smm{
33231200Smm  	struct archive *a;
34231200Smm  	struct archive_entry *entry;
35231200Smm	char buff[4096];
36231200Smm	size_t used;
37231200Smm
38231200Smm	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
39231200Smm		skipping("en_US.UTF-8 locale not available on this system.");
40231200Smm		return;
41231200Smm	}
42231200Smm
43231200Smm	/*
44231200Smm	 * Verify that UTF-8 filenames are correctly translated into CP866
45231200Smm	 * and stored with hdrcharset=CP866 option.
46231200Smm	 */
47231200Smm	a = archive_write_new();
48231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
49231200Smm	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
50231200Smm		skipping("This system cannot convert character-set"
51231200Smm		    " from UTF-8 to CP866.");
52231200Smm		archive_write_free(a);
53231200Smm		return;
54231200Smm	}
55231200Smm	assertEqualInt(ARCHIVE_OK,
56231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
57231200Smm
58231200Smm	entry = archive_entry_new2(a);
59231200Smm	/* Set a UTF-8 filename. */
60231200Smm	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
61231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
62231200Smm	archive_entry_set_size(entry, 0);
63231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
64231200Smm	archive_entry_free(entry);
65231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
66231200Smm
67231200Smm	/* Above three characters in UTF-8 should translate to the following
68231200Smm	 * three characters in CP866. */
69231200Smm	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
70231200Smm}
71231200Smm
72231200Smmstatic void
73231200Smmtest_ustar_filename_encoding_KOI8R_UTF8(void)
74231200Smm{
75231200Smm  	struct archive *a;
76231200Smm  	struct archive_entry *entry;
77231200Smm	char buff[4096];
78231200Smm	size_t used;
79231200Smm
80231200Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
81231200Smm		skipping("KOI8-R locale not available on this system.");
82231200Smm		return;
83231200Smm	}
84231200Smm
85231200Smm	/*
86231200Smm	 * Verify that KOI8-R filenames are correctly translated into UTF-8
87231200Smm	 * and stored with hdrcharset=UTF-8 option.
88231200Smm	 */
89231200Smm	a = archive_write_new();
90231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
91231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
92231200Smm		skipping("This system cannot convert character-set"
93231200Smm		    " from KOI8-R to UTF-8.");
94231200Smm		archive_write_free(a);
95231200Smm		return;
96231200Smm	}
97231200Smm	assertEqualInt(ARCHIVE_OK,
98231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
99231200Smm
100231200Smm	entry = archive_entry_new2(a);
101231200Smm	/* Set a KOI8-R filename. */
102231200Smm	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
103231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
104231200Smm	archive_entry_set_size(entry, 0);
105231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
106231200Smm	archive_entry_free(entry);
107231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
108231200Smm
109231200Smm	/* Above three characters in KOI8-R should translate to the following
110231200Smm	 * three characters (two bytes each) in UTF-8. */
111231200Smm	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
112231200Smm}
113231200Smm
114231200Smmstatic void
115231200Smmtest_ustar_filename_encoding_KOI8R_CP866(void)
116231200Smm{
117231200Smm  	struct archive *a;
118231200Smm  	struct archive_entry *entry;
119231200Smm	char buff[4096];
120231200Smm	size_t used;
121231200Smm
122231200Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
123231200Smm		skipping("KOI8-R locale not available on this system.");
124231200Smm		return;
125231200Smm	}
126231200Smm
127231200Smm	/*
128231200Smm	 * Verify that KOI8-R filenames are correctly translated into CP866
129231200Smm	 * and stored with hdrcharset=CP866 option.
130231200Smm	 */
131231200Smm	a = archive_write_new();
132231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
133231200Smm	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
134231200Smm		skipping("This system cannot convert character-set"
135231200Smm		    " from KOI8-R to CP866.");
136231200Smm		archive_write_free(a);
137231200Smm		return;
138231200Smm	}
139231200Smm	assertEqualInt(ARCHIVE_OK,
140231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
141231200Smm
142231200Smm	entry = archive_entry_new2(a);
143231200Smm	/* Set a KOI8-R filename. */
144231200Smm	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
145231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
146231200Smm	archive_entry_set_size(entry, 0);
147231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
148231200Smm	archive_entry_free(entry);
149231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
150231200Smm
151231200Smm	/* Above three characters in KOI8-R should translate to the following
152231200Smm	 * three characters in CP866. */
153231200Smm	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
154231200Smm}
155231200Smm
156231200Smmstatic void
157231200Smmtest_ustar_filename_encoding_CP1251_UTF8(void)
158231200Smm{
159231200Smm  	struct archive *a;
160231200Smm  	struct archive_entry *entry;
161231200Smm	char buff[4096];
162231200Smm	size_t used;
163231200Smm
164231200Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
165231200Smm	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
166231200Smm		skipping("KOI8-R locale not available on this system.");
167231200Smm		return;
168231200Smm	}
169231200Smm
170231200Smm	/*
171231200Smm	 * Verify that CP1251 filenames are correctly translated into UTF-8
172231200Smm	 * and stored with hdrcharset=UTF-8 option.
173231200Smm	 */
174231200Smm	a = archive_write_new();
175231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
176231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
177231200Smm		skipping("This system cannot convert character-set"
178231200Smm		    " from KOI8-R to UTF-8.");
179231200Smm		archive_write_free(a);
180231200Smm		return;
181231200Smm	}
182231200Smm	assertEqualInt(ARCHIVE_OK,
183231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
184231200Smm
185231200Smm	entry = archive_entry_new2(a);
186231200Smm	/* Set a KOI8-R filename. */
187231200Smm	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
188231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
189231200Smm	archive_entry_set_size(entry, 0);
190231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
191231200Smm	archive_entry_free(entry);
192231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
193231200Smm
194231200Smm	/* Above three characters in CP1251 should translate to the following
195231200Smm	 * three characters (two bytes each) in UTF-8. */
196231200Smm	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
197231200Smm}
198231200Smm
199231200Smm/*
200231200Smm * Do not translate CP1251 into CP866 if non Windows platform.
201231200Smm */
202231200Smmstatic void
203231200Smmtest_ustar_filename_encoding_ru_RU_CP1251(void)
204231200Smm{
205231200Smm  	struct archive *a;
206231200Smm  	struct archive_entry *entry;
207231200Smm	char buff[4096];
208231200Smm	size_t used;
209231200Smm
210231200Smm	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
211231200Smm		skipping("KOI8-R locale not available on this system.");
212231200Smm		return;
213231200Smm	}
214231200Smm
215231200Smm	/*
216231200Smm	 * Verify that CP1251 filenames are not translated into any
217231200Smm	 * other character-set, in particular, CP866.
218231200Smm	 */
219231200Smm	a = archive_write_new();
220231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
221231200Smm	assertEqualInt(ARCHIVE_OK,
222231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
223231200Smm
224231200Smm	entry = archive_entry_new2(a);
225231200Smm	/* Set a KOI8-R filename. */
226231200Smm	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
227231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
228231200Smm	archive_entry_set_size(entry, 0);
229231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
230231200Smm	archive_entry_free(entry);
231231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
232231200Smm
233231200Smm	/* Above three characters in CP1251 should not translate to
234231200Smm	 * any other character-set. */
235231200Smm	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
236231200Smm}
237231200Smm
238231200Smm/*
239231200Smm * Other archiver applications on Windows translate CP1251 filenames
240231200Smm * into CP866 filenames and store it in the ustar file.
241231200Smm * Test above behavior works well.
242231200Smm */
243231200Smmstatic void
244231200Smmtest_ustar_filename_encoding_Russian_Russia(void)
245231200Smm{
246231200Smm  	struct archive *a;
247231200Smm  	struct archive_entry *entry;
248231200Smm	char buff[4096];
249231200Smm	size_t used;
250231200Smm
251231200Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
252231200Smm		skipping("Russian_Russia locale not available on this system.");
253231200Smm		return;
254231200Smm	}
255231200Smm
256231200Smm	/*
257231200Smm	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
258231200Smm	 * to CP866.
259231200Smm	 */
260231200Smm	a = archive_write_new();
261231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
262231200Smm	assertEqualInt(ARCHIVE_OK,
263231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
264231200Smm
265231200Smm	entry = archive_entry_new2(a);
266231200Smm	/* Set a CP1251 filename. */
267231200Smm	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
268231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
269231200Smm	archive_entry_set_size(entry, 0);
270231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
271231200Smm	archive_entry_free(entry);
272231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
273231200Smm
274231200Smm	/* Above three characters in CP1251 should translate to the following
275231200Smm	 * three characters in CP866. */
276231200Smm	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
277231200Smm}
278231200Smm
279231200Smmstatic void
280231200Smmtest_ustar_filename_encoding_EUCJP_UTF8(void)
281231200Smm{
282231200Smm  	struct archive *a;
283231200Smm  	struct archive_entry *entry;
284231200Smm	char buff[4096];
285231200Smm	size_t used;
286231200Smm
287231200Smm	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
288231200Smm		skipping("eucJP locale not available on this system.");
289231200Smm		return;
290231200Smm	}
291231200Smm
292231200Smm	/*
293231200Smm	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
294231200Smm	 */
295231200Smm	a = archive_write_new();
296231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
297231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
298231200Smm		skipping("This system cannot convert character-set"
299231200Smm		    " from eucJP to UTF-8.");
300231200Smm		archive_write_free(a);
301231200Smm		return;
302231200Smm	}
303231200Smm	assertEqualInt(ARCHIVE_OK,
304231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
305231200Smm
306231200Smm	entry = archive_entry_new2(a);
307231200Smm	/* Set an EUC-JP filename. */
308231200Smm	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
309231200Smm	/* Check the Unicode version. */
310231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
311231200Smm	archive_entry_set_size(entry, 0);
312231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
313231200Smm	archive_entry_free(entry);
314231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
315231200Smm
316231200Smm	/* Check UTF-8 version. */
317231200Smm	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
318231200Smm}
319231200Smm
320231200Smmstatic void
321231200Smmtest_ustar_filename_encoding_EUCJP_CP932(void)
322231200Smm{
323231200Smm  	struct archive *a;
324231200Smm  	struct archive_entry *entry;
325231200Smm	char buff[4096];
326231200Smm	size_t used;
327231200Smm
328231200Smm	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
329231200Smm		skipping("eucJP locale not available on this system.");
330231200Smm		return;
331231200Smm	}
332231200Smm
333231200Smm	/*
334231200Smm	 * Verify that EUC-JP filenames are correctly translated to CP932.
335231200Smm	 */
336231200Smm	a = archive_write_new();
337231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
338231200Smm	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
339231200Smm		skipping("This system cannot convert character-set"
340231200Smm		    " from eucJP to CP932.");
341231200Smm		archive_write_free(a);
342231200Smm		return;
343231200Smm	}
344231200Smm	assertEqualInt(ARCHIVE_OK,
345231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
346231200Smm
347231200Smm	entry = archive_entry_new2(a);
348231200Smm	/* Set an EUC-JP filename. */
349231200Smm	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
350231200Smm	/* Check the Unicode version. */
351231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
352231200Smm	archive_entry_set_size(entry, 0);
353231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
354231200Smm	archive_entry_free(entry);
355231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
356231200Smm
357231200Smm	/* Check CP932 version. */
358231200Smm	assertEqualMem(buff, "\x95\x5C.txt", 6);
359231200Smm}
360231200Smm
361231200Smmstatic void
362231200Smmtest_ustar_filename_encoding_CP932_UTF8(void)
363231200Smm{
364231200Smm  	struct archive *a;
365231200Smm  	struct archive_entry *entry;
366231200Smm	char buff[4096];
367231200Smm	size_t used;
368231200Smm
369231200Smm	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
370231200Smm	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
371231200Smm		skipping("CP932/SJIS locale not available on this system.");
372231200Smm		return;
373231200Smm	}
374231200Smm
375231200Smm	/*
376231200Smm	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
377231200Smm	 */
378231200Smm	a = archive_write_new();
379231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
380231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
381231200Smm		skipping("This system cannot convert character-set"
382231200Smm		    " from CP932/SJIS to UTF-8.");
383231200Smm		archive_write_free(a);
384231200Smm		return;
385231200Smm	}
386231200Smm	assertEqualInt(ARCHIVE_OK,
387231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
388231200Smm
389231200Smm	entry = archive_entry_new2(a);
390231200Smm	/* Set a CP932/SJIS filename. */
391231200Smm	archive_entry_set_pathname(entry, "\x95\x5C.txt");
392231200Smm	/* Check the Unicode version. */
393231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
394231200Smm	archive_entry_set_size(entry, 0);
395231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
396231200Smm	archive_entry_free(entry);
397231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
398231200Smm
399231200Smm	/* Check UTF-8 version. */
400231200Smm	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
401231200Smm}
402231200Smm
403231200SmmDEFINE_TEST(test_ustar_filename_encoding)
404231200Smm{
405231200Smm	test_ustar_filename_encoding_UTF8_CP866();
406231200Smm	test_ustar_filename_encoding_KOI8R_UTF8();
407231200Smm	test_ustar_filename_encoding_KOI8R_CP866();
408231200Smm	test_ustar_filename_encoding_CP1251_UTF8();
409231200Smm	test_ustar_filename_encoding_ru_RU_CP1251();
410231200Smm	test_ustar_filename_encoding_Russian_Russia();
411231200Smm	test_ustar_filename_encoding_EUCJP_UTF8();
412231200Smm	test_ustar_filename_encoding_EUCJP_CP932();
413231200Smm	test_ustar_filename_encoding_CP932_UTF8();
414231200Smm}
415