• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src/router/samba-3.5.8/lib/util/charset/tests/
1/*
2   Unix SMB/CIFS implementation.
3
4   local testing of iconv routines. This tests the system iconv code against
5   the built-in iconv code
6
7   Copyright (C) Andrew Tridgell 2004
8
9   This program is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; either version 3 of the License, or
12   (at your option) any later version.
13
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program.  If not, see <http://www.gnu.org/licenses/>.
21*/
22
23#include "includes.h"
24#include "torture/torture.h"
25#include "system/iconv.h"
26#include "system/time.h"
27#include "libcli/raw/libcliraw.h"
28#include "param/param.h"
29#include "torture/util.h"
30#include "talloc.h"
31
32#if HAVE_NATIVE_ICONV
33
34static bool iconv_untestable(struct torture_context *tctx)
35{
36	iconv_t cd;
37
38	if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true))
39		torture_skip(tctx, "system iconv disabled - skipping test");
40
41	cd = iconv_open("UTF-16LE", "UCS-4LE");
42	if (cd == (iconv_t)-1)
43		torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE");
44	iconv_close(cd);
45
46	cd = iconv_open("UTF-16LE", "CP850");
47	if (cd == (iconv_t)-1)
48		torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n");
49	iconv_close(cd);
50
51	return false;
52}
53
54/*
55  generate a UTF-16LE buffer for a given unicode codepoint
56*/
57static int gen_codepoint_utf16(unsigned int codepoint,
58			       char *buf, size_t *size)
59{
60	static iconv_t cd;
61	uint8_t in[4];
62	char *ptr_in;
63	size_t size_in, size_out, ret;
64	if (!cd) {
65		cd = iconv_open("UTF-16LE", "UCS-4LE");
66		if (cd == (iconv_t)-1) {
67			cd = NULL;
68			return -1;
69		}
70	}
71
72	in[0] = codepoint & 0xFF;
73	in[1] = (codepoint>>8) & 0xFF;
74	in[2] = (codepoint>>16) & 0xFF;
75	in[3] = (codepoint>>24) & 0xFF;
76
77	ptr_in = (char *)in;
78	size_in = 4;
79	size_out = 8;
80
81	ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out);
82
83	*size = 8 - size_out;
84
85	return ret;
86}
87
88
89/*
90  work out the unicode codepoint of the first UTF-8 character in the buffer
91*/
92static unsigned int get_codepoint(char *buf, size_t size, const char *charset)
93{
94	iconv_t cd;
95	uint8_t out[4];
96	char *ptr_out;
97	size_t size_out, size_in, ret;
98
99	cd = iconv_open("UCS-4LE", charset);
100
101	size_in = size;
102	ptr_out = (char *)out;
103	size_out = sizeof(out);
104	memset(out, 0, sizeof(out));
105
106	ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out);
107
108	iconv_close(cd);
109
110	return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24);
111}
112
113/*
114  display a buffer with name prefix
115*/
116static void show_buf(const char *name, uint8_t *buf, size_t size)
117{
118	int i;
119	printf("%s ", name);
120	for (i=0;i<size;i++) {
121		printf("%02x ", buf[i]);
122	}
123	printf("\n");
124}
125
126/*
127  given a UTF-16LE buffer, test the system and built-in iconv code to
128  make sure they do exactly the same thing in converting the buffer to
129  "charset", then convert it back again and ensure we get the same
130  buffer back
131*/
132static bool test_buffer(struct torture_context *test,
133			uint8_t *inbuf, size_t size, const char *charset)
134{
135	uint8_t buf1[1000], buf2[1000], buf3[1000];
136	size_t outsize1, outsize2, outsize3;
137	const char *ptr_in;
138	char *ptr_out;
139	size_t size_in1, size_in2, size_in3;
140	size_t ret1, ret2, ret3, len1, len2;
141	int errno1, errno2;
142	static iconv_t cd;
143	static smb_iconv_t cd2, cd3;
144	static const char *last_charset;
145
146	if (cd && last_charset) {
147		iconv_close(cd);
148		smb_iconv_close(cd2);
149		smb_iconv_close(cd3);
150		cd = NULL;
151	}
152
153	if (!cd) {
154		cd = iconv_open(charset, "UTF-16LE");
155		if (cd == (iconv_t)-1) {
156			torture_fail(test,
157				     talloc_asprintf(test,
158						     "failed to open %s to UTF-16LE",
159						     charset));
160		}
161		cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
162		cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true));
163		last_charset = charset;
164	}
165
166	/* internal convert to charset - placing result in buf1 */
167	ptr_in = (const char *)inbuf;
168	ptr_out = (char *)buf1;
169	size_in1 = size;
170	outsize1 = sizeof(buf1);
171
172	memset(ptr_out, 0, outsize1);
173	errno = 0;
174	ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1);
175	errno1 = errno;
176
177	/* system convert to charset - placing result in buf2 */
178	ptr_in = (const char *)inbuf;
179	ptr_out = (char *)buf2;
180	size_in2 = size;
181	outsize2 = sizeof(buf2);
182
183	memset(ptr_out, 0, outsize2);
184	errno = 0;
185	ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2);
186	errno2 = errno;
187
188	len1 = sizeof(buf1) - outsize1;
189	len2 = sizeof(buf2) - outsize2;
190
191	/* codepoints above 1M are not interesting for now */
192	if (len2 > len1 &&
193	    memcmp(buf1, buf2, len1) == 0 &&
194	    get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) {
195		return true;
196	}
197	if (len1 > len2 &&
198	    memcmp(buf1, buf2, len2) == 0 &&
199	    get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) {
200		return true;
201	}
202
203	torture_assert_int_equal(test, ret1, ret2, "ret mismatch");
204
205	if (errno1 != errno2) {
206		show_buf(" rem1:", inbuf+(size-size_in1), size_in1);
207		show_buf(" rem2:", inbuf+(size-size_in2), size_in2);
208		torture_fail(test, talloc_asprintf(test,
209						   "e1=%d/%s e2=%d/%s",
210						   errno1, strerror(errno1),
211						   errno2, strerror(errno2)));
212	}
213
214	torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch");
215
216	torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch");
217
218	if (len1 != len2 ||
219	    memcmp(buf1, buf2, len1) != 0) {
220		torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2);
221		show_buf(" IN1:", inbuf, size-size_in1);
222		show_buf(" IN2:", inbuf, size-size_in2);
223		show_buf("OUT1:", buf1, len1);
224		show_buf("OUT2:", buf2, len2);
225		if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) {
226			torture_comment(test, "next codepoint is %u",
227			       get_codepoint((char *)(buf2+len1), len2-len1, charset));
228		}
229		if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) {
230			torture_comment(test, "next codepoint is %u",
231			       get_codepoint((char *)(buf1+len2),len1-len2, charset));
232		}
233
234		torture_fail(test, "failed");
235	}
236
237	/* convert back to UTF-16, putting result in buf3 */
238	size = size - size_in1;
239	ptr_in = (const char *)buf1;
240	ptr_out = (char *)buf3;
241	size_in3 = len1;
242	outsize3 = sizeof(buf3);
243
244	memset(ptr_out, 0, outsize3);
245	ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3);
246
247	/* we only internally support the first 1M codepoints */
248	if (outsize3 != sizeof(buf3) - size &&
249	    get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
250			  size - (sizeof(buf3) - outsize3),
251			  "UTF-16LE") >= (1<<20)) {
252		return true;
253	}
254
255	torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test,
256								"pull failed - %s", strerror(errno)));
257
258	if (strncmp(charset, "UTF", 3) != 0) {
259		/* don't expect perfect mappings for non UTF charsets */
260		return true;
261	}
262
263
264	torture_assert_int_equal(test, outsize3, sizeof(buf3) - size,
265		"wrong outsize3");
266
267	if (memcmp(buf3, inbuf, size) != 0) {
268		torture_comment(test, "pull bytes mismatch:");
269		show_buf("inbuf", inbuf, size);
270		show_buf(" buf3", buf3, sizeof(buf3) - outsize3);
271		torture_comment(test, "next codepoint is %u\n",
272		       get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3),
273				     size - (sizeof(buf3) - outsize3),
274				     "UTF-16LE"));
275		torture_fail(test, "");
276	}
277
278	return true;
279}
280
281
282/*
283  test the push_codepoint() and next_codepoint() functions for a given
284  codepoint
285*/
286static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint)
287{
288	uint8_t buf[10];
289	size_t size, size2;
290	codepoint_t c;
291
292	size = push_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint);
293	torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000),
294		       "Invalid Codepoint range");
295
296	if (size == -1) return true;
297
298	buf[size] = random();
299	buf[size+1] = random();
300	buf[size+2] = random();
301	buf[size+3] = random();
302
303	c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2);
304
305	torture_assert(tctx, c == codepoint,
306		       talloc_asprintf(tctx,
307				       "next_codepoint(%u) failed - gave %u", codepoint, c));
308
309	torture_assert(tctx, size2 == size,
310			talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n",
311		       codepoint, (int)size2, (int)size));
312
313	return true;
314}
315
316static bool test_next_codepoint(struct torture_context *tctx)
317{
318	unsigned int codepoint;
319	if (iconv_untestable(tctx))
320		return true;
321
322	for (codepoint=0;codepoint<(1<<20);codepoint++) {
323		if (!test_codepoint(tctx, codepoint))
324			return false;
325	}
326	return true;
327}
328
329static bool test_first_1m(struct torture_context *tctx)
330{
331	unsigned int codepoint;
332	size_t size;
333	unsigned char inbuf[1000];
334
335	if (iconv_untestable(tctx))
336		return true;
337
338	for (codepoint=0;codepoint<(1<<20);codepoint++) {
339		if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) {
340			continue;
341		}
342
343		if (codepoint % 1000 == 0) {
344			if (torture_setting_bool(tctx, "progress", true)) {
345				torture_comment(tctx, "codepoint=%u   \r", codepoint);
346				fflush(stdout);
347			}
348		}
349
350		if (!test_buffer(tctx, inbuf, size, "UTF-8"))
351			return false;
352	}
353	return true;
354}
355
356static bool test_random_5m(struct torture_context *tctx)
357{
358	unsigned char inbuf[1000];
359	unsigned int i;
360
361	if (iconv_untestable(tctx))
362		return true;
363
364	for (i=0;i<500000;i++) {
365		size_t size;
366		unsigned int c;
367
368		if (i % 1000 == 0) {
369			if (torture_setting_bool(tctx, "progress", true)) {
370				torture_comment(tctx, "i=%u              \r", i);
371				fflush(stdout);
372			}
373		}
374
375		size = random() % 100;
376		for (c=0;c<size;c++) {
377			if (random() % 100 < 80) {
378				inbuf[c] = random() % 128;
379			} else {
380				inbuf[c] = random();
381			}
382			if (random() % 10 == 0) {
383				inbuf[c] |= 0xd8;
384			}
385			if (random() % 10 == 0) {
386				inbuf[c] |= 0xdc;
387			}
388		}
389		if (!test_buffer(tctx, inbuf, size, "UTF-8")) {
390			printf("i=%d failed UTF-8\n", i);
391			return false;
392		}
393
394		if (!test_buffer(tctx, inbuf, size, "CP850")) {
395			printf("i=%d failed CP850\n", i);
396			return false;
397		}
398	}
399	return true;
400}
401
402
403static bool test_string2key(struct torture_context *tctx)
404{
405	uint16_t *buf;
406	char *dest = NULL;
407	TALLOC_CTX *mem_ctx = talloc_new(tctx);
408	size_t len = (random()%1000)+1;
409	const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' };
410	uint8_t le1[20];
411	uint8_t *munged1;
412	uint8_t *out1;
413	size_t ret;
414	int i;
415	const char *correct = "a\357\277\275b\357\277\275c\001defg";
416
417	buf = talloc_size(mem_ctx, len*2);
418	generate_random_buffer((uint8_t *)buf, len*2);
419
420	torture_comment(tctx, "converting random buffer\n");
421
422	if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) {
423		torture_fail(tctx, "Failed to convert random buffer\n");
424	}
425
426	for (i=0;i<10;i++) {
427		SSVAL(&le1[2*i], 0, in1[i]);
428	}
429
430	torture_comment(tctx, "converting fixed buffer to UTF16\n");
431
432	if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) {
433		torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n");
434	}
435
436	torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n");
437
438	torture_comment(tctx, "converting fixed buffer to UTF8\n");
439
440	if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) {
441		torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n");
442	}
443
444	torture_assert(tctx, strcmp(correct, (const char *) out1) == 0,
445		"conversion gave incorrect result\n");
446
447	talloc_free(mem_ctx);
448
449	return true;
450}
451
452struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
453{
454	struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV");
455
456	torture_suite_add_simple_test(suite, "string2key",
457				      test_string2key);
458
459	torture_suite_add_simple_test(suite, "next_codepoint()",
460				      test_next_codepoint);
461
462	torture_suite_add_simple_test(suite, "first 1M codepoints",
463				      test_first_1m);
464
465	torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences",
466				      test_random_5m);
467
468	torture_suite_add_simple_test(suite, "string2key",
469				      test_string2key);
470	return suite;
471}
472
473#else
474
475struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx)
476{
477	printf("No native iconv library - can't run iconv test\n");
478	return NULL;
479}
480
481#endif
482