1/* Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.  */
19
20/* We always want assert to be fully defined.  */
21#undef NDEBUG
22#include <assert.h>
23#include <locale.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <wchar.h>
28
29static int check_ascii(const char *locname);
30
31
32/* UTF-8 single byte feeding test for mbrtowc(),
33 contributed by Markus Kuhn <mkuhn@acm.org>.  */
34static int
35utf8_test_1(void)
36{
37	wchar_t wc;
38	mbstate_t s;
39
40	const char str[] = "\xe0\xa0\x80";
41
42	wc = 42; /* arbitrary number */
43	memset(&s, 0, sizeof(s)); /* get s into initial state */
44	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
45	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
46	assert (wc == 42); /* no value has not been stored into &wc yet */
47	assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
48	assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */
49	assert (mbrtowc (&wc, "", 1, &s) == 0); /* test final byte processing */
50	assert (wc == 0); /* test final byte decoding */
51
52	/* The following test is by Al Viro <aviro@redhat.com>.  */
53	wc = 42; /* arbitrary number */
54	memset(&s, 0, sizeof(s)); /* get s into initial state */
55	assert (mbrtowc (&wc, str, 1, &s) == (size_t)-2);
56	assert (mbrtowc (&wc, str + 1, 2, &s) == 2);
57	assert (wc == 0x800);
58
59	wc = 42; /* arbitrary number */
60	memset(&s, 0, sizeof(s)); /* get s into initial state */
61	assert (mbrtowc (&wc, str, 3, &s) == 3);
62	assert (wc == 0x800);
63
64	return 0;
65}
66
67
68/* Test for NUL byte processing via empty string.  */
69static int
70utf8_test_2(void)
71{
72	wchar_t wc;
73	mbstate_t s;
74
75	wc = 42; /* arbitrary number */
76	memset(&s, 0, sizeof(s)); /* get s into initial state */
77	assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
78	assert (mbsinit (&s));
79
80	wc = 42; /* arbitrary number */
81	memset(&s, 0, sizeof(s)); /* get s into initial state */
82	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
83	assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
84
85	wc = 42; /* arbitrary number */
86	memset(&s, 0, sizeof(s)); /* get s into initial state */
87	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
88	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
89	assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
90
91	wc = 42; /* arbitrary number */
92	memset(&s, 0, sizeof(s)); /* get s into initial state */
93	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
94	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
95	assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
96	assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
97	assert (mbsinit (&s));
98
99	return 0;
100}
101
102
103/* Test for NUL byte processing via NULL string.  */
104static int
105utf8_test_3(void)
106{
107	wchar_t wc;
108	mbstate_t s;
109
110	wc = 42; /* arbitrary number */
111	memset(&s, 0, sizeof(s)); /* get s into initial state */
112	assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
113	assert (mbsinit (&s));
114
115	wc = 42; /* arbitrary number */
116	memset(&s, 0, sizeof(s)); /* get s into initial state */
117	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
118	assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
119
120	wc = 42; /* arbitrary number */
121	memset(&s, 0, sizeof(s)); /* get s into initial state */
122	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
123	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
124	assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
125
126	wc = 42; /* arbitrary number */
127	memset(&s, 0, sizeof(s)); /* get s into initial state */
128	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
129	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
130	assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
131	assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
132	assert (mbsinit (&s));
133
134	return 0;
135}
136
137
138static int
139utf8_test(void)
140{
141	const char *locale = "de_DE.UTF-8";
142	int error = 0;
143
144	if (!setlocale(LC_CTYPE, locale)) {
145		fprintf(stderr, "locale '%s' not available!\n", locale);
146		exit(1);
147	}
148
149	error |= utf8_test_1();
150	error |= utf8_test_2();
151	error |= utf8_test_3();
152
153	return error;
154}
155
156
157int
158main(void)
159{
160	int result = 0;
161
162	/* Check mapping of ASCII range for some character sets which have
163	 ASCII as a subset.  For those the wide char generated must have
164	 the same value.  */
165	setlocale(LC_ALL, "C");
166	result |= check_ascii(setlocale(LC_ALL, NULL));
167
168	setlocale(LC_ALL, "de_DE.UTF-8");
169	result |= check_ascii(setlocale(LC_ALL, NULL));
170	result |= utf8_test();
171
172	setlocale(LC_ALL, "ja_JP.EUC-JP");
173	result |= check_ascii(setlocale(LC_ALL, NULL));
174
175	return result;
176}
177
178
179static int
180check_ascii(const char *locname)
181{
182	int c;
183	int res = 0;
184
185	printf("Testing locale \"%s\":\n", locname);
186
187	for (c = 0; c <= 127; ++c) {
188		char buf[MB_CUR_MAX];
189		wchar_t wc = (wchar_t) 0xffffffff;
190		mbstate_t s;
191		size_t n, i;
192
193		for (i = 0; i < MB_CUR_MAX; ++i)
194			buf[i] = c + i;
195
196		memset(&s, '\0', sizeof(s));
197
198		n = mbrtowc(&wc, buf, MB_CUR_MAX, &s);
199		if (n == (size_t) - 1) {
200			printf("%s: '\\x%x': encoding error\n", locname, c);
201			++res;
202		} else if (n == (size_t) - 2) {
203			printf("%s: '\\x%x': incomplete character\n", locname, c);
204			++res;
205		} else if (n == 0 && c != 0) {
206			printf("%s: '\\x%x': 0 returned\n", locname, c);
207			++res;
208		} else if (n != 0 && c == 0) {
209			printf("%s: '\\x%x': not 0 returned\n", locname, c);
210			++res;
211		} else if (c != 0 && n != 1) {
212			printf("%s: '\\x%x': not 1 returned\n", locname, c);
213			++res;
214		} else if (wc != (wchar_t) c) {
215			printf("%s: '\\x%x' != wc != L'\\x%x'\n", locname, c, wc);
216			++res;
217		}
218	}
219
220	printf(res == 1 ? "%d error\n" : "%d errors\n", res);
221
222	return res != 0;
223}
224