1/* Test of conversion of multibyte character to wide character.
2   Copyright (C) 2008-2011 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18
19#undef NDEBUG
20#include <assert.h>
21#include <locale.h>
22#include <stdio.h>
23#include <string.h>
24#include <wchar.h>
25
26#include <Debug.h>
27
28int
29main (int argc, char *argv[])
30{
31  mbstate_t state;
32  wchar_t wc;
33  size_t ret;
34  int i;
35
36  /* configure should already have checked that the locale is supported.  */
37  if (setlocale (LC_ALL, "") == NULL) {
38	fprintf(stderr, "unable to set standard locale\n");
39    return 1;
40  }
41
42  /* Test zero-length input.  */
43  printf("zero-length input ...\n");
44  {
45    memset (&state, '\0', sizeof (mbstate_t));
46    wc = (wchar_t) 0xBADFACE;
47    ret = mbrtowc (&wc, "x", 0, &state);
48    /* gnulib's implementation returns (size_t)(-2).
49       The AIX 5.1 implementation returns (size_t)(-1).
50       glibc's implementation returns 0.  */
51    assert (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
52    assert (mbsinit (&state));
53  }
54
55  /* Test NUL byte input.  */
56  printf("NUL byte input ...\n");
57  {
58    memset (&state, '\0', sizeof (mbstate_t));
59    wc = (wchar_t) 0xBADFACE;
60    ret = mbrtowc (&wc, "", 1, &state);
61    assert (ret == 0);
62    assert (wc == 0);
63    assert (mbsinit (&state));
64    ret = mbrtowc (NULL, "", 1, &state);
65    assert (ret == 0);
66    assert (mbsinit (&state));
67  }
68
69  /* Test single-byte input.  */
70  printf("single-byte input ...\n");
71  {
72    char buf[1];
73    int c;
74
75    memset (&state, '\0', sizeof (mbstate_t));
76    for (c = 0; c < 0x100; c++)
77      switch (c)
78        {
79        case '\t': case '\v': case '\f':
80        case ' ': case '!': case '"': case '#': case '%':
81        case '&': case '\'': case '(': case ')': case '*':
82        case '+': case ',': case '-': case '.': case '/':
83        case '0': case '1': case '2': case '3': case '4':
84        case '5': case '6': case '7': case '8': case '9':
85        case ':': case ';': case '<': case '=': case '>':
86        case '?':
87        case 'A': case 'B': case 'C': case 'D': case 'E':
88        case 'F': case 'G': case 'H': case 'I': case 'J':
89        case 'K': case 'L': case 'M': case 'N': case 'O':
90        case 'P': case 'Q': case 'R': case 'S': case 'T':
91        case 'U': case 'V': case 'W': case 'X': case 'Y':
92        case 'Z':
93        case '[': case '\\': case ']': case '^': case '_':
94        case 'a': case 'b': case 'c': case 'd': case 'e':
95        case 'f': case 'g': case 'h': case 'i': case 'j':
96        case 'k': case 'l': case 'm': case 'n': case 'o':
97        case 'p': case 'q': case 'r': case 's': case 't':
98        case 'u': case 'v': case 'w': case 'x': case 'y':
99        case 'z': case '{': case '|': case '}': case '~':
100          /* c is in the ISO C "basic character set".  */
101          buf[0] = c;
102          wc = (wchar_t) 0xBADFACE;
103          ret = mbrtowc (&wc, buf, 1, &state);
104          assert (ret == 1);
105          assert (wc == c);
106          assert (mbsinit (&state));
107          ret = mbrtowc (NULL, buf, 1, &state);
108          assert (ret == 1);
109          assert (mbsinit (&state));
110          break;
111        }
112  }
113
114  /* Test special calling convention, passing a NULL pointer.  */
115  printf("special calling convention, passing NULL ...\n");
116  {
117    memset (&state, '\0', sizeof (mbstate_t));
118    wc = (wchar_t) 0xBADFACE;
119    ret = mbrtowc (&wc, NULL, 5, &state);
120    assert (ret == 0);
121    assert (wc == (wchar_t) 0xBADFACE);
122    assert (mbsinit (&state));
123  }
124
125  for (i = '1'; i <= '4'; ++i) {
126    switch (i)
127      {
128      case '1':
129        /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
130    	printf("ISO8859-1 ...\n");
131        {
132          char input[] = "B\374\337er"; /* "B����er" */
133          memset (&state, '\0', sizeof (mbstate_t));
134
135       	  if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) {
136       		  fprintf(stderr, "unable to set ISO8859-1 locale, skipping\n");
137       		  break;
138       	  }
139
140          wc = (wchar_t) 0xBADFACE;
141          ret = mbrtowc (&wc, input, 1, &state);
142          assert (ret == 1);
143          assert (wc == 'B');
144          assert (mbsinit (&state));
145          input[0] = '\0';
146
147          wc = (wchar_t) 0xBADFACE;
148          ret = mbrtowc (&wc, input + 1, 1, &state);
149          assert (ret == 1);
150          assert (wctob (wc) == (unsigned char) '\374');
151          assert (mbsinit (&state));
152          input[1] = '\0';
153
154          /* Test support of NULL first argument.  */
155          ret = mbrtowc (NULL, input + 2, 3, &state);
156          assert (ret == 1);
157          assert (mbsinit (&state));
158
159          wc = (wchar_t) 0xBADFACE;
160          ret = mbrtowc (&wc, input + 2, 3, &state);
161          assert (ret == 1);
162          assert (wctob (wc) == (unsigned char) '\337');
163          assert (mbsinit (&state));
164          input[2] = '\0';
165
166          wc = (wchar_t) 0xBADFACE;
167          ret = mbrtowc (&wc, input + 3, 2, &state);
168          assert (ret == 1);
169          assert (wc == 'e');
170          assert (mbsinit (&state));
171          input[3] = '\0';
172
173          wc = (wchar_t) 0xBADFACE;
174          ret = mbrtowc (&wc, input + 4, 1, &state);
175          assert (ret == 1);
176          assert (wc == 'r');
177          assert (mbsinit (&state));
178        }
179        break;
180
181      case '2':
182        /* Locale encoding is UTF-8.  */
183      	printf("UTF-8 ...\n");
184        {
185          char input[] = "B\303\274\303\237er"; /* "B����er" */
186          memset (&state, '\0', sizeof (mbstate_t));
187
188		  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) {
189			  fprintf(stderr, "unable to set UTF-8 locale, skipping\n");
190			  break;
191		  }
192
193          wc = (wchar_t) 0xBADFACE;
194          ret = mbrtowc (&wc, input, 1, &state);
195          assert (ret == 1);
196          assert (wc == 'B');
197          assert (mbsinit (&state));
198          input[0] = '\0';
199
200          wc = (wchar_t) 0xBADFACE;
201          ret = mbrtowc (&wc, input + 1, 1, &state);
202          assert (ret == (size_t)(-2));
203          assert (wc == (wchar_t) 0xBADFACE);
204          assert (!mbsinit (&state));
205          input[1] = '\0';
206
207          wc = (wchar_t) 0xBADFACE;
208          ret = mbrtowc (&wc, input + 2, 5, &state);
209          assert (ret == 1);
210          assert (wctob (wc) == EOF);
211          assert (mbsinit (&state));
212          input[2] = '\0';
213
214          /* Test support of NULL first argument.  */
215          ret = mbrtowc (NULL, input + 3, 4, &state);
216          assert (ret == 2);
217          assert (mbsinit (&state));
218
219          wc = (wchar_t) 0xBADFACE;
220          ret = mbrtowc (&wc, input + 3, 4, &state);
221          assert (ret == 2);
222          assert (wctob (wc) == EOF);
223          assert (mbsinit (&state));
224          input[3] = '\0';
225          input[4] = '\0';
226
227          wc = (wchar_t) 0xBADFACE;
228          ret = mbrtowc (&wc, input + 5, 2, &state);
229          assert (ret == 1);
230          assert (wc == 'e');
231          assert (mbsinit (&state));
232          input[5] = '\0';
233
234          wc = (wchar_t) 0xBADFACE;
235          ret = mbrtowc (&wc, input + 6, 1, &state);
236          assert (ret == 1);
237          assert (wc == 'r');
238          assert (mbsinit (&state));
239
240          /* reproduce a valid use case from readline (as used in our bash): */
241          {
242	        char tooShort[] = "\303";
243	        char ok[] = "\303\274";
244        	/* make a backup of the state */
245        	mbstate_t stateBackup = state;
246        	/* try with a source that's too short */
247            ret = mbrtowc (&wc, tooShort, 1, &state);
248            assert (ret == (size_t)-2);
249            /* restore the state from the backup */
250            state = stateBackup;
251			/* retry with enough source */
252            ret = mbrtowc (&wc, ok, 2, &state);
253            assert (ret == 2);
254          }
255        }
256        break;
257
258      case '3':
259        /* Locale encoding is EUC-JP.  */
260       	printf("EUC-JP ...\n");
261        {
262          char input[] = "<\306\374\313\334\270\354>"; /* "<���������>" */
263          memset (&state, '\0', sizeof (mbstate_t));
264
265		  if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) {
266			  fprintf(stderr, "unable to set EUC-JP locale, skipping\n");
267			  break;
268		  }
269
270          wc = (wchar_t) 0xBADFACE;
271          ret = mbrtowc (&wc, input, 1, &state);
272          assert (ret == 1);
273          assert (wc == '<');
274          assert (mbsinit (&state));
275          input[0] = '\0';
276
277          wc = (wchar_t) 0xBADFACE;
278          ret = mbrtowc (&wc, input + 1, 2, &state);
279          assert (ret == 2);
280          assert (wctob (wc) == EOF);
281          assert (mbsinit (&state));
282          input[1] = '\0';
283          input[2] = '\0';
284
285          wc = (wchar_t) 0xBADFACE;
286          ret = mbrtowc (&wc, input + 3, 1, &state);
287          assert (ret == (size_t)(-2));
288          assert (wc == (wchar_t) 0xBADFACE);
289          assert (!mbsinit (&state));
290          input[3] = '\0';
291
292          wc = (wchar_t) 0xBADFACE;
293          ret = mbrtowc (&wc, input + 4, 4, &state);
294          assert (ret == 1);
295          assert (wctob (wc) == EOF);
296          assert (mbsinit (&state));
297          input[4] = '\0';
298
299          /* Test support of NULL first argument.  */
300          ret = mbrtowc (NULL, input + 5, 3, &state);
301          assert (ret == 2);
302          assert (mbsinit (&state));
303
304          wc = (wchar_t) 0xBADFACE;
305          ret = mbrtowc (&wc, input + 5, 3, &state);
306          assert (ret == 2);
307          assert (wctob (wc) == EOF);
308          assert (mbsinit (&state));
309          input[5] = '\0';
310          input[6] = '\0';
311
312          wc = (wchar_t) 0xBADFACE;
313          ret = mbrtowc (&wc, input + 7, 1, &state);
314          assert (ret == 1);
315          assert (wc == '>');
316          assert (mbsinit (&state));
317        }
318        break;
319
320      case '4':
321        /* Locale encoding is GB18030.  */
322       	printf("GB18030 ...\n");
323        {
324          char input[] = "B\250\271\201\060\211\070er"; /* "B����er" */
325          memset (&state, '\0', sizeof (mbstate_t));
326
327		  if (setlocale (LC_ALL, "en_US.GB18030") == NULL) {
328			  fprintf(stderr, "unable to set GB18030 locale, skipping\n");
329			  break;
330		  }
331
332          wc = (wchar_t) 0xBADFACE;
333          ret = mbrtowc (&wc, input, 1, &state);
334          assert (ret == 1);
335          assert (wc == 'B');
336          assert (mbsinit (&state));
337          input[0] = '\0';
338
339          wc = (wchar_t) 0xBADFACE;
340          ret = mbrtowc (&wc, input + 1, 1, &state);
341          assert (ret == (size_t)(-2));
342          assert (wc == (wchar_t) 0xBADFACE);
343          assert (!mbsinit (&state));
344          input[1] = '\0';
345
346          wc = (wchar_t) 0xBADFACE;
347          ret = mbrtowc (&wc, input + 2, 7, &state);
348          assert (ret == 1);
349          assert (wctob (wc) == EOF);
350          assert (mbsinit (&state));
351          input[2] = '\0';
352
353          /* Test support of NULL first argument.  */
354          ret = mbrtowc (NULL, input + 3, 6, &state);
355          assert (ret == 4);
356          assert (mbsinit (&state));
357
358          wc = (wchar_t) 0xBADFACE;
359          ret = mbrtowc (&wc, input + 3, 6, &state);
360          assert (ret == 4);
361          assert (wctob (wc) == EOF);
362          assert (mbsinit (&state));
363          input[3] = '\0';
364          input[4] = '\0';
365          input[5] = '\0';
366          input[6] = '\0';
367
368          wc = (wchar_t) 0xBADFACE;
369          ret = mbrtowc (&wc, input + 7, 2, &state);
370          assert (ret == 1);
371          assert (wc == 'e');
372          assert (mbsinit (&state));
373          input[5] = '\0';
374
375          wc = (wchar_t) 0xBADFACE;
376          ret = mbrtowc (&wc, input + 8, 1, &state);
377          assert (ret == 1);
378          assert (wc == 'r');
379          assert (mbsinit (&state));
380        }
381        break;
382      }
383  }
384
385  return 0;
386}
387