1/* Test of conversion of multibyte character to wide character.
2   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18
19#include <config.h>
20
21#include <wchar.h>
22
23#include "signature.h"
24SIGNATURE_CHECK (mbrtowc, size_t, (wchar_t *, char const *, size_t,
25                                   mbstate_t *));
26
27#include <locale.h>
28#include <stdio.h>
29#include <string.h>
30
31#include "macros.h"
32
33int
34main (int argc, char *argv[])
35{
36  mbstate_t state;
37  wchar_t wc;
38  size_t ret;
39
40  /* configure should already have checked that the locale is supported.  */
41  if (setlocale (LC_ALL, "") == NULL)
42    return 1;
43
44  /* Test zero-length input.  */
45  {
46    memset (&state, '\0', sizeof (mbstate_t));
47    wc = (wchar_t) 0xBADFACE;
48    ret = mbrtowc (&wc, "x", 0, &state);
49    /* gnulib's implementation returns (size_t)(-2).
50       The AIX 5.1 implementation returns (size_t)(-1).
51       glibc's implementation returns 0.  */
52    ASSERT (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
53    ASSERT (mbsinit (&state));
54  }
55
56  /* Test NUL byte input.  */
57  {
58    memset (&state, '\0', sizeof (mbstate_t));
59    wc = (wchar_t) 0xBADFACE;
60    ret = mbrtowc (&wc, "", 1, &state);
61    ASSERT (ret == 0);
62    ASSERT (wc == 0);
63    ASSERT (mbsinit (&state));
64    ret = mbrtowc (NULL, "", 1, &state);
65    ASSERT (ret == 0);
66    ASSERT (mbsinit (&state));
67  }
68
69  /* Test single-byte input.  */
70  {
71    int c;
72    char buf[1];
73
74    memset (&state, '\0', sizeof (mbstate_t));
75    for (c = 0; c < 0x100; c++)
76      switch (c)
77        {
78        case '\t': case '\v': case '\f':
79        case ' ': case '!': case '"': case '#': case '%':
80        case '&': case '\'': case '(': case ')': case '*':
81        case '+': case ',': case '-': case '.': case '/':
82        case '0': case '1': case '2': case '3': case '4':
83        case '5': case '6': case '7': case '8': case '9':
84        case ':': case ';': case '<': case '=': case '>':
85        case '?':
86        case 'A': case 'B': case 'C': case 'D': case 'E':
87        case 'F': case 'G': case 'H': case 'I': case 'J':
88        case 'K': case 'L': case 'M': case 'N': case 'O':
89        case 'P': case 'Q': case 'R': case 'S': case 'T':
90        case 'U': case 'V': case 'W': case 'X': case 'Y':
91        case 'Z':
92        case '[': case '\\': case ']': case '^': case '_':
93        case 'a': case 'b': case 'c': case 'd': case 'e':
94        case 'f': case 'g': case 'h': case 'i': case 'j':
95        case 'k': case 'l': case 'm': case 'n': case 'o':
96        case 'p': case 'q': case 'r': case 's': case 't':
97        case 'u': case 'v': case 'w': case 'x': case 'y':
98        case 'z': case '{': case '|': case '}': case '~':
99          /* c is in the ISO C "basic character set".  */
100          buf[0] = c;
101          wc = (wchar_t) 0xBADFACE;
102          ret = mbrtowc (&wc, buf, 1, &state);
103          ASSERT (ret == 1);
104          ASSERT (wc == c);
105          ASSERT (mbsinit (&state));
106          ret = mbrtowc (NULL, buf, 1, &state);
107          ASSERT (ret == 1);
108          ASSERT (mbsinit (&state));
109          break;
110        }
111  }
112
113  /* Test special calling convention, passing a NULL pointer.  */
114  {
115    memset (&state, '\0', sizeof (mbstate_t));
116    wc = (wchar_t) 0xBADFACE;
117    ret = mbrtowc (&wc, NULL, 5, &state);
118    ASSERT (ret == 0);
119    ASSERT (wc == (wchar_t) 0xBADFACE);
120    ASSERT (mbsinit (&state));
121  }
122
123  if (argc > 1)
124    switch (argv[1][0])
125      {
126      case '1':
127        /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
128        {
129          char input[] = "B\374\337er"; /* "B����er" */
130          memset (&state, '\0', sizeof (mbstate_t));
131
132          wc = (wchar_t) 0xBADFACE;
133          ret = mbrtowc (&wc, input, 1, &state);
134          ASSERT (ret == 1);
135          ASSERT (wc == 'B');
136          ASSERT (mbsinit (&state));
137          input[0] = '\0';
138
139          wc = (wchar_t) 0xBADFACE;
140          ret = mbrtowc (&wc, input + 1, 1, &state);
141          ASSERT (ret == 1);
142          ASSERT (wctob (wc) == (unsigned char) '\374');
143          ASSERT (mbsinit (&state));
144          input[1] = '\0';
145
146          wc = (wchar_t) 0xBADFACE;
147          ret = mbrtowc (&wc, input + 2, 3, &state);
148          ASSERT (ret == 1);
149          ASSERT (wctob (wc) == (unsigned char) '\337');
150          ASSERT (mbsinit (&state));
151          input[2] = '\0';
152
153          wc = (wchar_t) 0xBADFACE;
154          ret = mbrtowc (&wc, input + 3, 2, &state);
155          ASSERT (ret == 1);
156          ASSERT (wc == 'e');
157          ASSERT (mbsinit (&state));
158          input[3] = '\0';
159
160          wc = (wchar_t) 0xBADFACE;
161          ret = mbrtowc (&wc, input + 4, 1, &state);
162          ASSERT (ret == 1);
163          ASSERT (wc == 'r');
164          ASSERT (mbsinit (&state));
165        }
166        return 0;
167
168      case '2':
169        /* Locale encoding is UTF-8.  */
170        {
171          char input[] = "B\303\274\303\237er"; /* "B����er" */
172          memset (&state, '\0', sizeof (mbstate_t));
173
174          wc = (wchar_t) 0xBADFACE;
175          ret = mbrtowc (&wc, input, 1, &state);
176          ASSERT (ret == 1);
177          ASSERT (wc == 'B');
178          ASSERT (mbsinit (&state));
179          input[0] = '\0';
180
181          wc = (wchar_t) 0xBADFACE;
182          ret = mbrtowc (&wc, input + 1, 1, &state);
183          ASSERT (ret == (size_t)(-2));
184          ASSERT (wc == (wchar_t) 0xBADFACE);
185          ASSERT (!mbsinit (&state));
186          input[1] = '\0';
187
188          wc = (wchar_t) 0xBADFACE;
189          ret = mbrtowc (&wc, input + 2, 5, &state);
190          ASSERT (ret == 1);
191          ASSERT (wctob (wc) == EOF);
192          ASSERT (mbsinit (&state));
193          input[2] = '\0';
194
195          wc = (wchar_t) 0xBADFACE;
196          ret = mbrtowc (&wc, input + 3, 4, &state);
197          ASSERT (ret == 2);
198          ASSERT (wctob (wc) == EOF);
199          ASSERT (mbsinit (&state));
200          input[3] = '\0';
201          input[4] = '\0';
202
203          wc = (wchar_t) 0xBADFACE;
204          ret = mbrtowc (&wc, input + 5, 2, &state);
205          ASSERT (ret == 1);
206          ASSERT (wc == 'e');
207          ASSERT (mbsinit (&state));
208          input[5] = '\0';
209
210          wc = (wchar_t) 0xBADFACE;
211          ret = mbrtowc (&wc, input + 6, 1, &state);
212          ASSERT (ret == 1);
213          ASSERT (wc == 'r');
214          ASSERT (mbsinit (&state));
215        }
216        return 0;
217
218      case '3':
219        /* Locale encoding is EUC-JP.  */
220        {
221          char input[] = "<\306\374\313\334\270\354>"; /* "<���������>" */
222          memset (&state, '\0', sizeof (mbstate_t));
223
224          wc = (wchar_t) 0xBADFACE;
225          ret = mbrtowc (&wc, input, 1, &state);
226          ASSERT (ret == 1);
227          ASSERT (wc == '<');
228          ASSERT (mbsinit (&state));
229          input[0] = '\0';
230
231          wc = (wchar_t) 0xBADFACE;
232          ret = mbrtowc (&wc, input + 1, 2, &state);
233          ASSERT (ret == 2);
234          ASSERT (wctob (wc) == EOF);
235          ASSERT (mbsinit (&state));
236          input[1] = '\0';
237          input[2] = '\0';
238
239          wc = (wchar_t) 0xBADFACE;
240          ret = mbrtowc (&wc, input + 3, 1, &state);
241          ASSERT (ret == (size_t)(-2));
242          ASSERT (wc == (wchar_t) 0xBADFACE);
243          ASSERT (!mbsinit (&state));
244          input[3] = '\0';
245
246          wc = (wchar_t) 0xBADFACE;
247          ret = mbrtowc (&wc, input + 4, 4, &state);
248          ASSERT (ret == 1);
249          ASSERT (wctob (wc) == EOF);
250          ASSERT (mbsinit (&state));
251          input[4] = '\0';
252
253          wc = (wchar_t) 0xBADFACE;
254          ret = mbrtowc (&wc, input + 5, 3, &state);
255          ASSERT (ret == 2);
256          ASSERT (wctob (wc) == EOF);
257          ASSERT (mbsinit (&state));
258          input[5] = '\0';
259          input[6] = '\0';
260
261          wc = (wchar_t) 0xBADFACE;
262          ret = mbrtowc (&wc, input + 7, 1, &state);
263          ASSERT (ret == 1);
264          ASSERT (wc == '>');
265          ASSERT (mbsinit (&state));
266        }
267        return 0;
268
269      case '4':
270        /* Locale encoding is GB18030.  */
271        {
272          char input[] = "B\250\271\201\060\211\070er"; /* "B����er" */
273          memset (&state, '\0', sizeof (mbstate_t));
274
275          wc = (wchar_t) 0xBADFACE;
276          ret = mbrtowc (&wc, input, 1, &state);
277          ASSERT (ret == 1);
278          ASSERT (wc == 'B');
279          ASSERT (mbsinit (&state));
280          input[0] = '\0';
281
282          wc = (wchar_t) 0xBADFACE;
283          ret = mbrtowc (&wc, input + 1, 1, &state);
284          ASSERT (ret == (size_t)(-2));
285          ASSERT (wc == (wchar_t) 0xBADFACE);
286          ASSERT (!mbsinit (&state));
287          input[1] = '\0';
288
289          wc = (wchar_t) 0xBADFACE;
290          ret = mbrtowc (&wc, input + 2, 7, &state);
291          ASSERT (ret == 1);
292          ASSERT (wctob (wc) == EOF);
293          ASSERT (mbsinit (&state));
294          input[2] = '\0';
295
296          wc = (wchar_t) 0xBADFACE;
297          ret = mbrtowc (&wc, input + 3, 6, &state);
298          ASSERT (ret == 4);
299          ASSERT (wctob (wc) == EOF);
300          ASSERT (mbsinit (&state));
301          input[3] = '\0';
302          input[4] = '\0';
303          input[5] = '\0';
304          input[6] = '\0';
305
306          wc = (wchar_t) 0xBADFACE;
307          ret = mbrtowc (&wc, input + 7, 2, &state);
308          ASSERT (ret == 1);
309          ASSERT (wc == 'e');
310          ASSERT (mbsinit (&state));
311          input[5] = '\0';
312
313          wc = (wchar_t) 0xBADFACE;
314          ret = mbrtowc (&wc, input + 8, 1, &state);
315          ASSERT (ret == 1);
316          ASSERT (wc == 'r');
317          ASSERT (mbsinit (&state));
318        }
319        return 0;
320      }
321
322  return 1;
323}
324