test_read_format_lha_filename_utf16.c revision 358090
1/*-
2 * Copyright (c) 2019 Martin Matuska
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25#include "test.h"
26__FBSDID("$FreeBSD");
27
28#include <locale.h>
29
30static void
31test_read_format_lha_filename_UTF16_UTF8(const char *refname)
32{
33	struct archive *a;
34	struct archive_entry *ae;
35
36	/*
37	 * Read LHA filename in en_US.UTF-8.
38	 */
39	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
40		skipping("en_US.UTF-8 locale not available on this system.");
41		return;
42	}
43	/*
44	 * Create a read object only for a test that platform support
45	 * a character-set conversion because we can read a character-set
46	 * of filenames from the header of an lha archive file and so we
47	 * want to test that it works well.
48	 */
49	assert((a = archive_read_new()) != NULL);
50	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
51    if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=CP932")) {
52        assertEqualInt(ARCHIVE_OK, archive_read_free(a));
53        skipping("This system cannot convert character-set"
54            " from CP932 to UTF-8.");
55        return;
56    }
57	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-16")) {
58		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
59		skipping("This system cannot convert character-set"
60		    " from UTF-16 to UTF-8.");
61		return;
62	}
63	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
64	assert((a = archive_read_new()) != NULL);
65	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
66	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
67	assertEqualIntA(a, ARCHIVE_OK,
68	    archive_read_open_filename(a, refname, 10240));
69
70	/* Note that usual Japanese filenames are tested in other cases */
71#if defined(__APPLE__)
72 /* NFD normalization */
73 /* U:O:A:u:o:a: */
74 #define UMLAUT_DIRNAME "\x55\xcc\x88\x4f\xcc\x88\x41\xcc\x88\x75\xcc\x88\x6f"\
75	    "\xcc\x88\x61\xcc\x88/"
76 /* a:o:u:A:O:U:.txt */
77 #define UMLAUT_FNAME "\x61\xcc\x88\x6f\xcc\x88\x75\xcc\x88\x41\xcc\x88"\
78	    "\x4f\xcc\x88\x55\xcc\x88.txt"
79#else
80 /* NFC normalization */
81 /* U:O:A:u:o:a: */
82 #define UMLAUT_DIRNAME "\xc3\x9c\xc3\x96\xc3\x84\xc3\xbc\xc3\xb6\xc3\xa4/"
83 /* a:o:u:A:O:U:.txt */
84 #define UMLAUT_FNAME "\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c.txt"
85#endif
86
87/* "Test" in Japanese Katakana */
88#define KATAKANA_FNAME "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88.txt"
89#define KATAKANA_DIRNAME "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88/"
90
91	/* Verify regular file. U:O:A:u:o:a:/a:o:u:A:O:U:.txt */
92	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
93	assertEqualString(UMLAUT_DIRNAME UMLAUT_FNAME, archive_entry_pathname(ae));
94	assertEqualInt(12, archive_entry_size(ae));
95
96	/* Verify directory. U:O:A:u:o:a:/ */
97	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
98	assertEqualString(UMLAUT_DIRNAME, archive_entry_pathname(ae));
99	assertEqualInt(0, archive_entry_size(ae));
100
101	/* Verify regular file. U:O:A:u:o:a:/("Test" in Japanese).txt */
102	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
103	assertEqualString(UMLAUT_DIRNAME KATAKANA_FNAME,
104	    archive_entry_pathname(ae));
105	assertEqualInt(25, archive_entry_size(ae));
106
107	/* Verify regular file. ("Test" in Japanese)/a:o:u:A:O:U:.txt */
108	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
109	assertEqualString(KATAKANA_DIRNAME UMLAUT_FNAME,
110	    archive_entry_pathname(ae));
111	assertEqualInt(12, archive_entry_size(ae));
112
113	/* Verify directory. ("Test" in Japanese)/ */
114	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
115	assertEqualString(KATAKANA_DIRNAME, archive_entry_pathname(ae));
116	assertEqualInt(0, archive_entry_size(ae));
117
118	/* Verify regular file. a:o:u:A:O:U:.txt */
119	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
120	assertEqualString(UMLAUT_FNAME, archive_entry_pathname(ae));
121	assertEqualInt(12, archive_entry_size(ae));
122
123	/* End of archive. */
124	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
125
126	/* Verify archive format. */
127	assertEqualIntA(a, ARCHIVE_FILTER_NONE, archive_filter_code(a, 0));
128	assertEqualIntA(a, ARCHIVE_FORMAT_LHA, archive_format(a));
129
130	/* Close the archive. */
131	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
132	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
133}
134
135DEFINE_TEST(test_read_format_lha_filename_UTF16)
136{
137	/* A sample file was created with Unlha32.dll. */
138	const char *refname = "test_read_format_lha_filename_utf16.lzh";
139	extract_reference_file(refname);
140
141	test_read_format_lha_filename_UTF16_UTF8(refname);
142}
143
144