1/*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD$");
27
28#include <locale.h>
29
30DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
31{
32  	struct archive *a;
33  	struct archive_entry *entry;
34	char buff[4096];
35	size_t used;
36
37	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38		skipping("en_US.UTF-8 locale not available on this system.");
39		return;
40	}
41
42	/*
43	 * Verify that UTF-8 filenames are correctly translated into CP866
44	 * and stored with hdrcharset=CP866 option.
45	 */
46	a = archive_write_new();
47	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
48	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
49		skipping("This system cannot convert character-set"
50		    " from UTF-8 to CP866.");
51		archive_write_free(a);
52		return;
53	}
54	assertEqualInt(ARCHIVE_OK,
55	    archive_write_open_memory(a, buff, sizeof(buff), &used));
56
57	entry = archive_entry_new2(a);
58	/* Set a UTF-8 filename. */
59	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60	archive_entry_set_filetype(entry, AE_IFREG);
61	archive_entry_set_size(entry, 0);
62	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63	archive_entry_free(entry);
64	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
65
66	/* Above three characters in UTF-8 should translate to the following
67	 * three characters in CP866. */
68	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
69}
70
71DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
72{
73  	struct archive *a;
74  	struct archive_entry *entry;
75	char buff[4096];
76	size_t used;
77
78	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
79		skipping("KOI8-R locale not available on this system.");
80		return;
81	}
82
83	/*
84	 * Verify that KOI8-R filenames are correctly translated into UTF-8
85	 * and stored with hdrcharset=UTF-8 option.
86	 */
87	a = archive_write_new();
88	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
89	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
90		skipping("This system cannot convert character-set"
91		    " from KOI8-R to UTF-8.");
92		archive_write_free(a);
93		return;
94	}
95	assertEqualInt(ARCHIVE_OK,
96	    archive_write_open_memory(a, buff, sizeof(buff), &used));
97
98	entry = archive_entry_new2(a);
99	/* Set a KOI8-R filename. */
100	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
101	archive_entry_set_filetype(entry, AE_IFREG);
102	archive_entry_set_size(entry, 0);
103	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
104	archive_entry_free(entry);
105	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
106
107	/* Above three characters in KOI8-R should translate to the following
108	 * three characters (two bytes each) in UTF-8. */
109	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
110}
111
112DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
113{
114  	struct archive *a;
115  	struct archive_entry *entry;
116	char buff[4096];
117	size_t used;
118
119	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
120		skipping("KOI8-R locale not available on this system.");
121		return;
122	}
123
124	/*
125	 * Verify that KOI8-R filenames are correctly translated into CP866
126	 * and stored with hdrcharset=CP866 option.
127	 */
128	a = archive_write_new();
129	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
130	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
131		skipping("This system cannot convert character-set"
132		    " from KOI8-R to CP866.");
133		archive_write_free(a);
134		return;
135	}
136	assertEqualInt(ARCHIVE_OK,
137	    archive_write_open_memory(a, buff, sizeof(buff), &used));
138
139	entry = archive_entry_new2(a);
140	/* Set a KOI8-R filename. */
141	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
142	archive_entry_set_filetype(entry, AE_IFREG);
143	archive_entry_set_size(entry, 0);
144	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
145	archive_entry_free(entry);
146	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
147
148	/* Above three characters in KOI8-R should translate to the following
149	 * three characters in CP866. */
150	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
151}
152
153DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
154{
155  	struct archive *a;
156  	struct archive_entry *entry;
157	char buff[4096];
158	size_t used;
159
160	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
161	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
162		skipping("KOI8-R locale not available on this system.");
163		return;
164	}
165
166	/*
167	 * Verify that CP1251 filenames are correctly translated into UTF-8
168	 * and stored with hdrcharset=UTF-8 option.
169	 */
170	a = archive_write_new();
171	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
172	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
173		skipping("This system cannot convert character-set"
174		    " from KOI8-R to UTF-8.");
175		archive_write_free(a);
176		return;
177	}
178	assertEqualInt(ARCHIVE_OK,
179	    archive_write_open_memory(a, buff, sizeof(buff), &used));
180
181	entry = archive_entry_new2(a);
182	/* Set a KOI8-R filename. */
183	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
184	archive_entry_set_filetype(entry, AE_IFREG);
185	archive_entry_set_size(entry, 0);
186	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
187	archive_entry_free(entry);
188	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
189
190	/* Above three characters in CP1251 should translate to the following
191	 * three characters (two bytes each) in UTF-8. */
192	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
193}
194
195/*
196 * Do not translate CP1251 into CP866 if non Windows platform.
197 */
198DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
199{
200  	struct archive *a;
201  	struct archive_entry *entry;
202	char buff[4096];
203	size_t used;
204
205	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
206		skipping("KOI8-R locale not available on this system.");
207		return;
208	}
209
210	/*
211	 * Verify that CP1251 filenames are not translated into any
212	 * other character-set, in particular, CP866.
213	 */
214	a = archive_write_new();
215	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
216	assertEqualInt(ARCHIVE_OK,
217	    archive_write_open_memory(a, buff, sizeof(buff), &used));
218
219	entry = archive_entry_new2(a);
220	/* Set a KOI8-R filename. */
221	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
222	archive_entry_set_filetype(entry, AE_IFREG);
223	archive_entry_set_size(entry, 0);
224	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225	archive_entry_free(entry);
226	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
227
228	/* Above three characters in CP1251 should not translate to
229	 * any other character-set. */
230	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
231}
232
233/*
234 * Other archiver applications on Windows translate CP1251 filenames
235 * into CP866 filenames and store it in the ustar file.
236 * Test above behavior works well.
237 */
238DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
239{
240  	struct archive *a;
241  	struct archive_entry *entry;
242	char buff[4096];
243	size_t used;
244
245	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
246		skipping("Russian_Russia locale not available on this system.");
247		return;
248	}
249
250	/*
251	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
252	 * to CP866.
253	 */
254	a = archive_write_new();
255	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
256	assertEqualInt(ARCHIVE_OK,
257	    archive_write_open_memory(a, buff, sizeof(buff), &used));
258
259	entry = archive_entry_new2(a);
260	/* Set a CP1251 filename. */
261	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
262	archive_entry_set_filetype(entry, AE_IFREG);
263	archive_entry_set_size(entry, 0);
264	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
265	archive_entry_free(entry);
266	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
267
268	/* Above three characters in CP1251 should translate to the following
269	 * three characters in CP866. */
270	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
271}
272
273DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
274{
275  	struct archive *a;
276  	struct archive_entry *entry;
277	char buff[4096];
278	size_t used;
279
280	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
281		skipping("eucJP locale not available on this system.");
282		return;
283	}
284
285	/*
286	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
287	 */
288	a = archive_write_new();
289	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
290	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
291		skipping("This system cannot convert character-set"
292		    " from eucJP to UTF-8.");
293		archive_write_free(a);
294		return;
295	}
296	assertEqualInt(ARCHIVE_OK,
297	    archive_write_open_memory(a, buff, sizeof(buff), &used));
298
299	entry = archive_entry_new2(a);
300	/* Set an EUC-JP filename. */
301	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
302	/* Check the Unicode version. */
303	archive_entry_set_filetype(entry, AE_IFREG);
304	archive_entry_set_size(entry, 0);
305	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
306	archive_entry_free(entry);
307	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
308
309	/* Check UTF-8 version. */
310	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
311}
312
313DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
314{
315  	struct archive *a;
316  	struct archive_entry *entry;
317	char buff[4096];
318	size_t used;
319
320	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
321		skipping("eucJP locale not available on this system.");
322		return;
323	}
324
325	/*
326	 * Verify that EUC-JP filenames are correctly translated to CP932.
327	 */
328	a = archive_write_new();
329	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
330	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
331		skipping("This system cannot convert character-set"
332		    " from eucJP to CP932.");
333		archive_write_free(a);
334		return;
335	}
336	assertEqualInt(ARCHIVE_OK,
337	    archive_write_open_memory(a, buff, sizeof(buff), &used));
338
339	entry = archive_entry_new2(a);
340	/* Set an EUC-JP filename. */
341	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
342	/* Check the Unicode version. */
343	archive_entry_set_filetype(entry, AE_IFREG);
344	archive_entry_set_size(entry, 0);
345	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
346	archive_entry_free(entry);
347	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
348
349	/* Check CP932 version. */
350	assertEqualMem(buff, "\x95\x5C.txt", 6);
351}
352
353DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
354{
355  	struct archive *a;
356  	struct archive_entry *entry;
357	char buff[4096];
358	size_t used;
359
360	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
361	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
362		skipping("CP932/SJIS locale not available on this system.");
363		return;
364	}
365
366	/*
367	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
368	 */
369	a = archive_write_new();
370	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
371	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
372		skipping("This system cannot convert character-set"
373		    " from CP932/SJIS to UTF-8.");
374		archive_write_free(a);
375		return;
376	}
377	assertEqualInt(ARCHIVE_OK,
378	    archive_write_open_memory(a, buff, sizeof(buff), &used));
379
380	entry = archive_entry_new2(a);
381	/* Set a CP932/SJIS filename. */
382	archive_entry_set_pathname(entry, "\x95\x5C.txt");
383	/* Check the Unicode version. */
384	archive_entry_set_filetype(entry, AE_IFREG);
385	archive_entry_set_size(entry, 0);
386	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387	archive_entry_free(entry);
388	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
389
390	/* Check UTF-8 version. */
391	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
392}
393
394