1/*
2 * Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 */
18
19#if HAVE_CONFIG_H
20#  include <config.h>
21#endif
22
23#include <assert.h>
24#include <string.h>
25
26#include "charset.h"
27
28void test_any(struct charset *charset)
29{
30  int wc;
31  char s[2];
32
33  assert(charset);
34
35  /* Decoder */
36
37  assert(charset_mbtowc(charset, 0, 0, 0) == 0);
38  assert(charset_mbtowc(charset, 0, 0, 1) == 0);
39  assert(charset_mbtowc(charset, 0, (char *)(-1), 0) == 0);
40
41  assert(charset_mbtowc(charset, 0, "a", 0) == 0);
42  assert(charset_mbtowc(charset, 0, "", 1) == 0);
43  assert(charset_mbtowc(charset, 0, "b", 1) == 1);
44  assert(charset_mbtowc(charset, 0, "", 2) == 0);
45  assert(charset_mbtowc(charset, 0, "c", 2) == 1);
46
47  wc = 'x';
48  assert(charset_mbtowc(charset, &wc, "a", 0) == 0 && wc == 'x');
49  assert(charset_mbtowc(charset, &wc, "", 1) == 0 && wc == 0);
50  assert(charset_mbtowc(charset, &wc, "b", 1) == 1 && wc == 'b');
51  assert(charset_mbtowc(charset, &wc, "", 2) == 0 && wc == 0);
52  assert(charset_mbtowc(charset, &wc, "c", 2) == 1 && wc == 'c');
53
54  /* Encoder */
55
56  assert(charset_wctomb(charset, 0, 0) == 0);
57
58  s[0] = s[1] = '.';
59  assert(charset_wctomb(charset, s, 0) == 1 &&
60	 s[0] == '\0' && s[1] == '.');
61  assert(charset_wctomb(charset, s, 'x') == 1 &&
62	 s[0] == 'x' && s[1] == '.');
63}
64
65void test_utf8()
66{
67  struct charset *charset;
68  int wc;
69  char s[8];
70
71  charset = charset_find("UTF-8");
72  test_any(charset);
73
74  /* Decoder */
75  wc = 0;
76  assert(charset_mbtowc(charset, &wc, "\177", 1) == 1 && wc == 127);
77  assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
78  assert(charset_mbtowc(charset, &wc, "\301\277", 9) == -1);
79  assert(charset_mbtowc(charset, &wc, "\302\200", 1) == -1);
80  assert(charset_mbtowc(charset, &wc, "\302\200", 2) == 2 && wc == 128);
81  assert(charset_mbtowc(charset, &wc, "\302\200", 3) == 2 && wc == 128);
82  assert(charset_mbtowc(charset, &wc, "\340\237\200", 9) == -1);
83  assert(charset_mbtowc(charset, &wc, "\340\240\200", 9) == 3 &&
84	 wc == 1 << 11);
85  assert(charset_mbtowc(charset, &wc, "\360\217\277\277", 9) == -1);
86  assert(charset_mbtowc(charset, &wc, "\360\220\200\200", 9) == 4 &&
87	 wc == 1 << 16);
88  assert(charset_mbtowc(charset, &wc, "\370\207\277\277\277", 9) == -1);
89  assert(charset_mbtowc(charset, &wc, "\370\210\200\200\200", 9) == 5 &&
90	 wc == 1 << 21);
91  assert(charset_mbtowc(charset, &wc, "\374\203\277\277\277\277", 9) == -1);
92  assert(charset_mbtowc(charset, &wc, "\374\204\200\200\200\200", 9) == 6 &&
93	 wc == 1 << 26);
94  assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\277", 9) == 6 &&
95	 wc == 0x7fffffff);
96
97  assert(charset_mbtowc(charset, &wc, "\302\000", 2) == -1);
98  assert(charset_mbtowc(charset, &wc, "\302\300", 2) == -1);
99  assert(charset_mbtowc(charset, &wc, "\340\040\200", 9) == -1);
100  assert(charset_mbtowc(charset, &wc, "\340\340\200", 9) == -1);
101  assert(charset_mbtowc(charset, &wc, "\340\240\000", 9) == -1);
102  assert(charset_mbtowc(charset, &wc, "\340\240\300", 9) == -1);
103  assert(charset_mbtowc(charset, &wc, "\360\020\200\200", 9) == -1);
104  assert(charset_mbtowc(charset, &wc, "\360\320\200\200", 9) == -1);
105  assert(charset_mbtowc(charset, &wc, "\360\220\000\200", 9) == -1);
106  assert(charset_mbtowc(charset, &wc, "\360\220\300\200", 9) == -1);
107  assert(charset_mbtowc(charset, &wc, "\360\220\200\000", 9) == -1);
108  assert(charset_mbtowc(charset, &wc, "\360\220\200\300", 9) == -1);
109  assert(charset_mbtowc(charset, &wc, "\375\077\277\277\277\277", 9) == -1);
110  assert(charset_mbtowc(charset, &wc, "\375\377\277\277\277\277", 9) == -1);
111  assert(charset_mbtowc(charset, &wc, "\375\277\077\277\277\277", 9) == -1);
112  assert(charset_mbtowc(charset, &wc, "\375\277\377\277\277\277", 9) == -1);
113  assert(charset_mbtowc(charset, &wc, "\375\277\277\277\077\277", 9) == -1);
114  assert(charset_mbtowc(charset, &wc, "\375\277\277\277\377\277", 9) == -1);
115  assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\077", 9) == -1);
116  assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\377", 9) == -1);
117
118  assert(charset_mbtowc(charset, &wc, "\376\277\277\277\277\277", 9) == -1);
119  assert(charset_mbtowc(charset, &wc, "\377\277\277\277\277\277", 9) == -1);
120
121  /* Encoder */
122  strcpy(s, ".......");
123  assert(charset_wctomb(charset, s, 1 << 31) == -1 &&
124	 !strcmp(s, "......."));
125  assert(charset_wctomb(charset, s, 127) == 1 &&
126	 !strcmp(s, "\177......"));
127  assert(charset_wctomb(charset, s, 128) == 2 &&
128	 !strcmp(s, "\302\200....."));
129  assert(charset_wctomb(charset, s, 0x7ff) == 2 &&
130	 !strcmp(s, "\337\277....."));
131  assert(charset_wctomb(charset, s, 0x800) == 3 &&
132	 !strcmp(s, "\340\240\200...."));
133  assert(charset_wctomb(charset, s, 0xffff) == 3 &&
134	 !strcmp(s, "\357\277\277...."));
135  assert(charset_wctomb(charset, s, 0x10000) == 4 &&
136	 !strcmp(s, "\360\220\200\200..."));
137  assert(charset_wctomb(charset, s, 0x1fffff) == 4 &&
138	 !strcmp(s, "\367\277\277\277..."));
139  assert(charset_wctomb(charset, s, 0x200000) == 5 &&
140	 !strcmp(s, "\370\210\200\200\200.."));
141  assert(charset_wctomb(charset, s, 0x3ffffff) == 5 &&
142	 !strcmp(s, "\373\277\277\277\277.."));
143  assert(charset_wctomb(charset, s, 0x4000000) == 6 &&
144	 !strcmp(s, "\374\204\200\200\200\200."));
145  assert(charset_wctomb(charset, s, 0x7fffffff) == 6 &&
146	 !strcmp(s, "\375\277\277\277\277\277."));
147}
148
149void test_ascii()
150{
151  struct charset *charset;
152  int wc;
153  char s[3];
154
155  charset = charset_find("us-ascii");
156  test_any(charset);
157
158  /* Decoder */
159  wc = 0;
160  assert(charset_mbtowc(charset, &wc, "\177", 2) == 1 && wc == 127);
161  assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
162
163  /* Encoder */
164  strcpy(s, "..");
165  assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
166  assert(charset_wctomb(charset, s, 255) == -1);
167  assert(charset_wctomb(charset, s, 128) == -1);
168  assert(charset_wctomb(charset, s, 127) == 1 && !strcmp(s, "\177."));
169}
170
171void test_iso1()
172{
173  struct charset *charset;
174  int wc;
175  char s[3];
176
177  charset = charset_find("iso-8859-1");
178  test_any(charset);
179
180  /* Decoder */
181  wc = 0;
182  assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
183
184  /* Encoder */
185  strcpy(s, "..");
186  assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
187  assert(charset_wctomb(charset, s, 255) == 1 && !strcmp(s, "\377."));
188  assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
189}
190
191void test_iso2()
192{
193  struct charset *charset;
194  int wc;
195  char s[3];
196
197  charset = charset_find("iso-8859-2");
198  test_any(charset);
199
200  /* Decoder */
201  wc = 0;
202  assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
203  assert(charset_mbtowc(charset, &wc, "\377", 2) == 1 && wc == 0x2d9);
204
205  /* Encoder */
206  strcpy(s, "..");
207  assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
208  assert(charset_wctomb(charset, s, 255) == -1 && !strcmp(s, ".."));
209  assert(charset_wctomb(charset, s, 258) == 1 && !strcmp(s, "\303."));
210  assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
211}
212
213void test_convert()
214{
215  const char *p;
216  char *q, *r;
217  char s[256];
218  size_t n, n2;
219  int i;
220
221  p = "\000x\302\200\375\277\277\277\277\277";
222  assert(charset_convert("UTF-8", "UTF-8", p, 10, &q, &n) == 0 &&
223	 n == 10 && !strcmp(p, q));
224  assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, &n) == 2 &&
225	 n == 4 && !strcmp(q, "x##y"));
226  assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, 0, &n) == 2 &&
227	 n == 4);
228  assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, 0) == 2 &&
229	 !strcmp(q, "x##y"));
230  assert(charset_convert("UTF-8", "iso-8859-1",
231			 "\302\200\304\200x", 5, &q, &n) == 1 &&
232	 n == 3 && !strcmp(q, "\200?x"));
233  assert(charset_convert("iso-8859-1", "UTF-8",
234			 "\000\200\377", 3, &q, &n) == 0 &&
235	 n == 5 && !memcmp(q, "\000\302\200\303\277", 5));
236  assert(charset_convert("iso-8859-1", "iso-8859-1",
237			 "\000\200\377", 3, &q, &n) == 0 &&
238	 n == 3 && !memcmp(q, "\000\200\377", 3));
239
240  assert(charset_convert("iso-8859-2", "utf-8", "\300", 1, &q, &n) == 0 &&
241	 n == 2 && !strcmp(q, "\305\224"));
242  assert(charset_convert("utf-8", "iso-8859-2", "\305\224", 2, &q, &n) == 0 &&
243	 n == 1 && !strcmp(q, "\300"));
244
245  for (i = 0; i < 256; i++)
246    s[i] = i;
247
248  assert(charset_convert("iso-8859-2", "utf-8", s, 256, &q, &n) == 0);
249  assert(charset_convert("utf-8", "iso-8859-2", q, n, &r, &n2) == 0);
250  assert(n2 == 256 && !memcmp(r, s, n2));
251}
252
253int main()
254{
255  test_utf8();
256  test_ascii();
257  test_iso1();
258  test_iso2();
259
260  test_convert();
261
262  return 0;
263}
264