1231200Smm/*-
2231200Smm * Copyright (c) 2011 Michihiro NAKAJIMA
3231200Smm * All rights reserved.
4231200Smm *
5231200Smm * Redistribution and use in source and binary forms, with or without
6231200Smm * modification, are permitted provided that the following conditions
7231200Smm * are met:
8231200Smm * 1. Redistributions of source code must retain the above copyright
9231200Smm *    notice, this list of conditions and the following disclaimer.
10231200Smm * 2. Redistributions in binary form must reproduce the above copyright
11231200Smm *    notice, this list of conditions and the following disclaimer in the
12231200Smm *    documentation and/or other materials provided with the distribution.
13231200Smm *
14231200Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15231200Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16231200Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17231200Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18231200Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19231200Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20231200Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21231200Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22231200Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23231200Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24231200Smm */
25231200Smm#include "test.h"
26231200Smm__FBSDID("$FreeBSD$");
27231200Smm
28231200Smm#include <locale.h>
29231200Smm
30299529SmmDEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
31231200Smm{
32231200Smm  	struct archive *a;
33231200Smm  	struct archive_entry *entry;
34231200Smm	char buff[4096];
35231200Smm	size_t used;
36231200Smm
37231200Smm	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38231200Smm		skipping("en_US.UTF-8 locale not available on this system.");
39231200Smm		return;
40231200Smm	}
41231200Smm
42231200Smm	/*
43231200Smm	 * Verify that UTF-8 filenames are correctly translated into CP866
44231200Smm	 * and stored with hdrcharset=CP866 option.
45231200Smm	 */
46231200Smm	a = archive_write_new();
47231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
48231200Smm	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
49231200Smm		skipping("This system cannot convert character-set"
50231200Smm		    " from UTF-8 to CP866.");
51231200Smm		archive_write_free(a);
52231200Smm		return;
53231200Smm	}
54231200Smm	assertEqualInt(ARCHIVE_OK,
55231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
56231200Smm
57231200Smm	entry = archive_entry_new2(a);
58231200Smm	/* Set a UTF-8 filename. */
59231200Smm	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
61231200Smm	archive_entry_set_size(entry, 0);
62231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63231200Smm	archive_entry_free(entry);
64231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
65231200Smm
66231200Smm	/* Above three characters in UTF-8 should translate to the following
67231200Smm	 * three characters in CP866. */
68231200Smm	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
69231200Smm}
70231200Smm
71299529SmmDEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
72231200Smm{
73231200Smm  	struct archive *a;
74231200Smm  	struct archive_entry *entry;
75231200Smm	char buff[4096];
76231200Smm	size_t used;
77231200Smm
78231200Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
79231200Smm		skipping("KOI8-R locale not available on this system.");
80231200Smm		return;
81231200Smm	}
82231200Smm
83231200Smm	/*
84231200Smm	 * Verify that KOI8-R filenames are correctly translated into UTF-8
85231200Smm	 * and stored with hdrcharset=UTF-8 option.
86231200Smm	 */
87231200Smm	a = archive_write_new();
88231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
89231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
90231200Smm		skipping("This system cannot convert character-set"
91231200Smm		    " from KOI8-R to UTF-8.");
92231200Smm		archive_write_free(a);
93231200Smm		return;
94231200Smm	}
95231200Smm	assertEqualInt(ARCHIVE_OK,
96231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
97231200Smm
98231200Smm	entry = archive_entry_new2(a);
99231200Smm	/* Set a KOI8-R filename. */
100231200Smm	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
101231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
102231200Smm	archive_entry_set_size(entry, 0);
103231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
104231200Smm	archive_entry_free(entry);
105231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
106231200Smm
107231200Smm	/* Above three characters in KOI8-R should translate to the following
108231200Smm	 * three characters (two bytes each) in UTF-8. */
109231200Smm	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
110231200Smm}
111231200Smm
112299529SmmDEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
113231200Smm{
114231200Smm  	struct archive *a;
115231200Smm  	struct archive_entry *entry;
116231200Smm	char buff[4096];
117231200Smm	size_t used;
118231200Smm
119231200Smm	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
120231200Smm		skipping("KOI8-R locale not available on this system.");
121231200Smm		return;
122231200Smm	}
123231200Smm
124231200Smm	/*
125231200Smm	 * Verify that KOI8-R filenames are correctly translated into CP866
126231200Smm	 * and stored with hdrcharset=CP866 option.
127231200Smm	 */
128231200Smm	a = archive_write_new();
129231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
130231200Smm	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
131231200Smm		skipping("This system cannot convert character-set"
132231200Smm		    " from KOI8-R to CP866.");
133231200Smm		archive_write_free(a);
134231200Smm		return;
135231200Smm	}
136231200Smm	assertEqualInt(ARCHIVE_OK,
137231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
138231200Smm
139231200Smm	entry = archive_entry_new2(a);
140231200Smm	/* Set a KOI8-R filename. */
141231200Smm	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
142231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
143231200Smm	archive_entry_set_size(entry, 0);
144231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
145231200Smm	archive_entry_free(entry);
146231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
147231200Smm
148231200Smm	/* Above three characters in KOI8-R should translate to the following
149231200Smm	 * three characters in CP866. */
150231200Smm	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
151231200Smm}
152231200Smm
153299529SmmDEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
154231200Smm{
155231200Smm  	struct archive *a;
156231200Smm  	struct archive_entry *entry;
157231200Smm	char buff[4096];
158231200Smm	size_t used;
159231200Smm
160231200Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
161231200Smm	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
162231200Smm		skipping("KOI8-R locale not available on this system.");
163231200Smm		return;
164231200Smm	}
165231200Smm
166231200Smm	/*
167231200Smm	 * Verify that CP1251 filenames are correctly translated into UTF-8
168231200Smm	 * and stored with hdrcharset=UTF-8 option.
169231200Smm	 */
170231200Smm	a = archive_write_new();
171231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
172231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
173231200Smm		skipping("This system cannot convert character-set"
174231200Smm		    " from KOI8-R to UTF-8.");
175231200Smm		archive_write_free(a);
176231200Smm		return;
177231200Smm	}
178231200Smm	assertEqualInt(ARCHIVE_OK,
179231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
180231200Smm
181231200Smm	entry = archive_entry_new2(a);
182231200Smm	/* Set a KOI8-R filename. */
183231200Smm	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
184231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
185231200Smm	archive_entry_set_size(entry, 0);
186231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
187231200Smm	archive_entry_free(entry);
188231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
189231200Smm
190231200Smm	/* Above three characters in CP1251 should translate to the following
191231200Smm	 * three characters (two bytes each) in UTF-8. */
192231200Smm	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
193231200Smm}
194231200Smm
195231200Smm/*
196231200Smm * Do not translate CP1251 into CP866 if non Windows platform.
197231200Smm */
198299529SmmDEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
199231200Smm{
200231200Smm  	struct archive *a;
201231200Smm  	struct archive_entry *entry;
202231200Smm	char buff[4096];
203231200Smm	size_t used;
204231200Smm
205231200Smm	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
206231200Smm		skipping("KOI8-R locale not available on this system.");
207231200Smm		return;
208231200Smm	}
209231200Smm
210231200Smm	/*
211231200Smm	 * Verify that CP1251 filenames are not translated into any
212231200Smm	 * other character-set, in particular, CP866.
213231200Smm	 */
214231200Smm	a = archive_write_new();
215231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
216231200Smm	assertEqualInt(ARCHIVE_OK,
217231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
218231200Smm
219231200Smm	entry = archive_entry_new2(a);
220231200Smm	/* Set a KOI8-R filename. */
221231200Smm	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
222231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
223231200Smm	archive_entry_set_size(entry, 0);
224231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225231200Smm	archive_entry_free(entry);
226231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
227231200Smm
228231200Smm	/* Above three characters in CP1251 should not translate to
229231200Smm	 * any other character-set. */
230231200Smm	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
231231200Smm}
232231200Smm
233231200Smm/*
234231200Smm * Other archiver applications on Windows translate CP1251 filenames
235231200Smm * into CP866 filenames and store it in the ustar file.
236231200Smm * Test above behavior works well.
237231200Smm */
238299529SmmDEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
239231200Smm{
240231200Smm  	struct archive *a;
241231200Smm  	struct archive_entry *entry;
242231200Smm	char buff[4096];
243231200Smm	size_t used;
244231200Smm
245231200Smm	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
246231200Smm		skipping("Russian_Russia locale not available on this system.");
247231200Smm		return;
248231200Smm	}
249231200Smm
250231200Smm	/*
251231200Smm	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
252231200Smm	 * to CP866.
253231200Smm	 */
254231200Smm	a = archive_write_new();
255231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
256231200Smm	assertEqualInt(ARCHIVE_OK,
257231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
258231200Smm
259231200Smm	entry = archive_entry_new2(a);
260231200Smm	/* Set a CP1251 filename. */
261231200Smm	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
262231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
263231200Smm	archive_entry_set_size(entry, 0);
264231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
265231200Smm	archive_entry_free(entry);
266231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
267231200Smm
268231200Smm	/* Above three characters in CP1251 should translate to the following
269231200Smm	 * three characters in CP866. */
270231200Smm	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
271231200Smm}
272231200Smm
273299529SmmDEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
274231200Smm{
275231200Smm  	struct archive *a;
276231200Smm  	struct archive_entry *entry;
277231200Smm	char buff[4096];
278231200Smm	size_t used;
279231200Smm
280231200Smm	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
281231200Smm		skipping("eucJP locale not available on this system.");
282231200Smm		return;
283231200Smm	}
284231200Smm
285231200Smm	/*
286231200Smm	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
287231200Smm	 */
288231200Smm	a = archive_write_new();
289231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
290231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
291231200Smm		skipping("This system cannot convert character-set"
292231200Smm		    " from eucJP to UTF-8.");
293231200Smm		archive_write_free(a);
294231200Smm		return;
295231200Smm	}
296231200Smm	assertEqualInt(ARCHIVE_OK,
297231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
298231200Smm
299231200Smm	entry = archive_entry_new2(a);
300231200Smm	/* Set an EUC-JP filename. */
301231200Smm	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
302231200Smm	/* Check the Unicode version. */
303231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
304231200Smm	archive_entry_set_size(entry, 0);
305231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
306231200Smm	archive_entry_free(entry);
307231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
308231200Smm
309231200Smm	/* Check UTF-8 version. */
310231200Smm	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
311231200Smm}
312231200Smm
313299529SmmDEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
314231200Smm{
315231200Smm  	struct archive *a;
316231200Smm  	struct archive_entry *entry;
317231200Smm	char buff[4096];
318231200Smm	size_t used;
319231200Smm
320231200Smm	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
321231200Smm		skipping("eucJP locale not available on this system.");
322231200Smm		return;
323231200Smm	}
324231200Smm
325231200Smm	/*
326231200Smm	 * Verify that EUC-JP filenames are correctly translated to CP932.
327231200Smm	 */
328231200Smm	a = archive_write_new();
329231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
330231200Smm	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
331231200Smm		skipping("This system cannot convert character-set"
332231200Smm		    " from eucJP to CP932.");
333231200Smm		archive_write_free(a);
334231200Smm		return;
335231200Smm	}
336231200Smm	assertEqualInt(ARCHIVE_OK,
337231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
338231200Smm
339231200Smm	entry = archive_entry_new2(a);
340231200Smm	/* Set an EUC-JP filename. */
341231200Smm	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
342231200Smm	/* Check the Unicode version. */
343231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
344231200Smm	archive_entry_set_size(entry, 0);
345231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
346231200Smm	archive_entry_free(entry);
347231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
348231200Smm
349231200Smm	/* Check CP932 version. */
350231200Smm	assertEqualMem(buff, "\x95\x5C.txt", 6);
351231200Smm}
352231200Smm
353299529SmmDEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
354231200Smm{
355231200Smm  	struct archive *a;
356231200Smm  	struct archive_entry *entry;
357231200Smm	char buff[4096];
358231200Smm	size_t used;
359231200Smm
360231200Smm	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
361231200Smm	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
362231200Smm		skipping("CP932/SJIS locale not available on this system.");
363231200Smm		return;
364231200Smm	}
365231200Smm
366231200Smm	/*
367231200Smm	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
368231200Smm	 */
369231200Smm	a = archive_write_new();
370231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
371231200Smm	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
372231200Smm		skipping("This system cannot convert character-set"
373231200Smm		    " from CP932/SJIS to UTF-8.");
374231200Smm		archive_write_free(a);
375231200Smm		return;
376231200Smm	}
377231200Smm	assertEqualInt(ARCHIVE_OK,
378231200Smm	    archive_write_open_memory(a, buff, sizeof(buff), &used));
379231200Smm
380231200Smm	entry = archive_entry_new2(a);
381231200Smm	/* Set a CP932/SJIS filename. */
382231200Smm	archive_entry_set_pathname(entry, "\x95\x5C.txt");
383231200Smm	/* Check the Unicode version. */
384231200Smm	archive_entry_set_filetype(entry, AE_IFREG);
385231200Smm	archive_entry_set_size(entry, 0);
386231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387231200Smm	archive_entry_free(entry);
388231200Smm	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
389231200Smm
390231200Smm	/* Check UTF-8 version. */
391231200Smm	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
392231200Smm}
393231200Smm
394