1/* 2 Unix SMB/CIFS implementation. 3 4 local testing of iconv routines. This tests the system iconv code against 5 the built-in iconv code 6 7 Copyright (C) Andrew Tridgell 2004 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program. If not, see <http://www.gnu.org/licenses/>. 21*/ 22 23#include "includes.h" 24#include "torture/torture.h" 25#include "system/iconv.h" 26#include "system/time.h" 27#include "libcli/raw/libcliraw.h" 28#include "param/param.h" 29#include "torture/util.h" 30#include "talloc.h" 31 32#if HAVE_NATIVE_ICONV 33 34static bool iconv_untestable(struct torture_context *tctx) 35{ 36 iconv_t cd; 37 38 if (!lp_parm_bool(tctx->lp_ctx, NULL, "iconv", "native", true)) 39 torture_skip(tctx, "system iconv disabled - skipping test"); 40 41 cd = iconv_open("UTF-16LE", "UCS-4LE"); 42 if (cd == (iconv_t)-1) 43 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE"); 44 iconv_close(cd); 45 46 cd = iconv_open("UTF-16LE", "CP850"); 47 if (cd == (iconv_t)-1) 48 torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n"); 49 iconv_close(cd); 50 51 return false; 52} 53 54/* 55 generate a UTF-16LE buffer for a given unicode codepoint 56*/ 57static int gen_codepoint_utf16(unsigned int codepoint, 58 char *buf, size_t *size) 59{ 60 static iconv_t cd; 61 uint8_t in[4]; 62 char *ptr_in; 63 size_t size_in, size_out, ret; 64 if (!cd) { 65 cd = iconv_open("UTF-16LE", "UCS-4LE"); 66 if (cd == (iconv_t)-1) { 67 cd = NULL; 68 return -1; 69 } 70 } 71 72 in[0] = codepoint & 0xFF; 73 in[1] = (codepoint>>8) & 0xFF; 74 in[2] = (codepoint>>16) & 0xFF; 75 in[3] = (codepoint>>24) & 0xFF; 76 77 ptr_in = (char *)in; 78 size_in = 4; 79 size_out = 8; 80 81 ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out); 82 83 *size = 8 - size_out; 84 85 return ret; 86} 87 88 89/* 90 work out the unicode codepoint of the first UTF-8 character in the buffer 91*/ 92static unsigned int get_codepoint(char *buf, size_t size, const char *charset) 93{ 94 iconv_t cd; 95 uint8_t out[4]; 96 char *ptr_out; 97 size_t size_out, size_in, ret; 98 99 cd = iconv_open("UCS-4LE", charset); 100 101 size_in = size; 102 ptr_out = (char *)out; 103 size_out = sizeof(out); 104 memset(out, 0, sizeof(out)); 105 106 ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out); 107 108 iconv_close(cd); 109 110 return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24); 111} 112 113/* 114 display a buffer with name prefix 115*/ 116static void show_buf(const char *name, uint8_t *buf, size_t size) 117{ 118 int i; 119 printf("%s ", name); 120 for (i=0;i<size;i++) { 121 printf("%02x ", buf[i]); 122 } 123 printf("\n"); 124} 125 126/* 127 given a UTF-16LE buffer, test the system and built-in iconv code to 128 make sure they do exactly the same thing in converting the buffer to 129 "charset", then convert it back again and ensure we get the same 130 buffer back 131*/ 132static bool test_buffer(struct torture_context *test, 133 uint8_t *inbuf, size_t size, const char *charset) 134{ 135 uint8_t buf1[1000], buf2[1000], buf3[1000]; 136 size_t outsize1, outsize2, outsize3; 137 const char *ptr_in; 138 char *ptr_out; 139 size_t size_in1, size_in2, size_in3; 140 size_t ret1, ret2, ret3, len1, len2; 141 int errno1, errno2; 142 static iconv_t cd; 143 static smb_iconv_t cd2, cd3; 144 static const char *last_charset; 145 146 if (cd && last_charset) { 147 iconv_close(cd); 148 smb_iconv_close(cd2); 149 smb_iconv_close(cd3); 150 cd = NULL; 151 } 152 153 if (!cd) { 154 cd = iconv_open(charset, "UTF-16LE"); 155 if (cd == (iconv_t)-1) { 156 torture_fail(test, 157 talloc_asprintf(test, 158 "failed to open %s to UTF-16LE", 159 charset)); 160 } 161 cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true)); 162 cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lp_parm_bool(test->lp_ctx, NULL, "iconv", "native", true)); 163 last_charset = charset; 164 } 165 166 /* internal convert to charset - placing result in buf1 */ 167 ptr_in = (const char *)inbuf; 168 ptr_out = (char *)buf1; 169 size_in1 = size; 170 outsize1 = sizeof(buf1); 171 172 memset(ptr_out, 0, outsize1); 173 errno = 0; 174 ret1 = smb_iconv(cd2, &ptr_in, &size_in1, &ptr_out, &outsize1); 175 errno1 = errno; 176 177 /* system convert to charset - placing result in buf2 */ 178 ptr_in = (const char *)inbuf; 179 ptr_out = (char *)buf2; 180 size_in2 = size; 181 outsize2 = sizeof(buf2); 182 183 memset(ptr_out, 0, outsize2); 184 errno = 0; 185 ret2 = iconv(cd, discard_const_p(char *, &ptr_in), &size_in2, &ptr_out, &outsize2); 186 errno2 = errno; 187 188 len1 = sizeof(buf1) - outsize1; 189 len2 = sizeof(buf2) - outsize2; 190 191 /* codepoints above 1M are not interesting for now */ 192 if (len2 > len1 && 193 memcmp(buf1, buf2, len1) == 0 && 194 get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) { 195 return true; 196 } 197 if (len1 > len2 && 198 memcmp(buf1, buf2, len2) == 0 && 199 get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) { 200 return true; 201 } 202 203 torture_assert_int_equal(test, ret1, ret2, "ret mismatch"); 204 205 if (errno1 != errno2) { 206 show_buf(" rem1:", inbuf+(size-size_in1), size_in1); 207 show_buf(" rem2:", inbuf+(size-size_in2), size_in2); 208 torture_fail(test, talloc_asprintf(test, 209 "e1=%d/%s e2=%d/%s", 210 errno1, strerror(errno1), 211 errno2, strerror(errno2))); 212 } 213 214 torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch"); 215 216 torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch"); 217 218 if (len1 != len2 || 219 memcmp(buf1, buf2, len1) != 0) { 220 torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2); 221 show_buf(" IN1:", inbuf, size-size_in1); 222 show_buf(" IN2:", inbuf, size-size_in2); 223 show_buf("OUT1:", buf1, len1); 224 show_buf("OUT2:", buf2, len2); 225 if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) { 226 torture_comment(test, "next codepoint is %u", 227 get_codepoint((char *)(buf2+len1), len2-len1, charset)); 228 } 229 if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) { 230 torture_comment(test, "next codepoint is %u", 231 get_codepoint((char *)(buf1+len2),len1-len2, charset)); 232 } 233 234 torture_fail(test, "failed"); 235 } 236 237 /* convert back to UTF-16, putting result in buf3 */ 238 size = size - size_in1; 239 ptr_in = (const char *)buf1; 240 ptr_out = (char *)buf3; 241 size_in3 = len1; 242 outsize3 = sizeof(buf3); 243 244 memset(ptr_out, 0, outsize3); 245 ret3 = smb_iconv(cd3, &ptr_in, &size_in3, &ptr_out, &outsize3); 246 247 /* we only internally support the first 1M codepoints */ 248 if (outsize3 != sizeof(buf3) - size && 249 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), 250 size - (sizeof(buf3) - outsize3), 251 "UTF-16LE") >= (1<<20)) { 252 return true; 253 } 254 255 torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test, 256 "pull failed - %s", strerror(errno))); 257 258 if (strncmp(charset, "UTF", 3) != 0) { 259 /* don't expect perfect mappings for non UTF charsets */ 260 return true; 261 } 262 263 264 torture_assert_int_equal(test, outsize3, sizeof(buf3) - size, 265 "wrong outsize3"); 266 267 if (memcmp(buf3, inbuf, size) != 0) { 268 torture_comment(test, "pull bytes mismatch:"); 269 show_buf("inbuf", inbuf, size); 270 show_buf(" buf3", buf3, sizeof(buf3) - outsize3); 271 torture_comment(test, "next codepoint is %u\n", 272 get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), 273 size - (sizeof(buf3) - outsize3), 274 "UTF-16LE")); 275 torture_fail(test, ""); 276 } 277 278 return true; 279} 280 281 282/* 283 test the push_codepoint() and next_codepoint() functions for a given 284 codepoint 285*/ 286static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint) 287{ 288 uint8_t buf[10]; 289 size_t size, size2; 290 codepoint_t c; 291 292 size = push_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, codepoint); 293 torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000), 294 "Invalid Codepoint range"); 295 296 if (size == -1) return true; 297 298 buf[size] = random(); 299 buf[size+1] = random(); 300 buf[size+2] = random(); 301 buf[size+3] = random(); 302 303 c = next_codepoint_convenience(lp_iconv_convenience(tctx->lp_ctx), (char *)buf, &size2); 304 305 torture_assert(tctx, c == codepoint, 306 talloc_asprintf(tctx, 307 "next_codepoint(%u) failed - gave %u", codepoint, c)); 308 309 torture_assert(tctx, size2 == size, 310 talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n", 311 codepoint, (int)size2, (int)size)); 312 313 return true; 314} 315 316static bool test_next_codepoint(struct torture_context *tctx) 317{ 318 unsigned int codepoint; 319 if (iconv_untestable(tctx)) 320 return true; 321 322 for (codepoint=0;codepoint<(1<<20);codepoint++) { 323 if (!test_codepoint(tctx, codepoint)) 324 return false; 325 } 326 return true; 327} 328 329static bool test_first_1m(struct torture_context *tctx) 330{ 331 unsigned int codepoint; 332 size_t size; 333 unsigned char inbuf[1000]; 334 335 if (iconv_untestable(tctx)) 336 return true; 337 338 for (codepoint=0;codepoint<(1<<20);codepoint++) { 339 if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) { 340 continue; 341 } 342 343 if (codepoint % 1000 == 0) { 344 if (torture_setting_bool(tctx, "progress", true)) { 345 torture_comment(tctx, "codepoint=%u \r", codepoint); 346 fflush(stdout); 347 } 348 } 349 350 if (!test_buffer(tctx, inbuf, size, "UTF-8")) 351 return false; 352 } 353 return true; 354} 355 356static bool test_random_5m(struct torture_context *tctx) 357{ 358 unsigned char inbuf[1000]; 359 unsigned int i; 360 361 if (iconv_untestable(tctx)) 362 return true; 363 364 for (i=0;i<500000;i++) { 365 size_t size; 366 unsigned int c; 367 368 if (i % 1000 == 0) { 369 if (torture_setting_bool(tctx, "progress", true)) { 370 torture_comment(tctx, "i=%u \r", i); 371 fflush(stdout); 372 } 373 } 374 375 size = random() % 100; 376 for (c=0;c<size;c++) { 377 if (random() % 100 < 80) { 378 inbuf[c] = random() % 128; 379 } else { 380 inbuf[c] = random(); 381 } 382 if (random() % 10 == 0) { 383 inbuf[c] |= 0xd8; 384 } 385 if (random() % 10 == 0) { 386 inbuf[c] |= 0xdc; 387 } 388 } 389 if (!test_buffer(tctx, inbuf, size, "UTF-8")) { 390 printf("i=%d failed UTF-8\n", i); 391 return false; 392 } 393 394 if (!test_buffer(tctx, inbuf, size, "CP850")) { 395 printf("i=%d failed CP850\n", i); 396 return false; 397 } 398 } 399 return true; 400} 401 402 403static bool test_string2key(struct torture_context *tctx) 404{ 405 uint16_t *buf; 406 char *dest = NULL; 407 TALLOC_CTX *mem_ctx = talloc_new(tctx); 408 size_t len = (random()%1000)+1; 409 const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' }; 410 uint8_t le1[20]; 411 uint8_t *munged1; 412 uint8_t *out1; 413 size_t ret; 414 int i; 415 const char *correct = "a\357\277\275b\357\277\275c\001defg"; 416 417 buf = talloc_size(mem_ctx, len*2); 418 generate_random_buffer((uint8_t *)buf, len*2); 419 420 torture_comment(tctx, "converting random buffer\n"); 421 422 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret, false)) { 423 torture_fail(tctx, "Failed to convert random buffer\n"); 424 } 425 426 for (i=0;i<10;i++) { 427 SSVAL(&le1[2*i], 0, in1[i]); 428 } 429 430 torture_comment(tctx, "converting fixed buffer to UTF16\n"); 431 432 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret, false)) { 433 torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n"); 434 } 435 436 torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n"); 437 438 torture_comment(tctx, "converting fixed buffer to UTF8\n"); 439 440 if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret, false)) { 441 torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n"); 442 } 443 444 torture_assert(tctx, strcmp(correct, (const char *) out1) == 0, 445 "conversion gave incorrect result\n"); 446 447 talloc_free(mem_ctx); 448 449 return true; 450} 451 452struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx) 453{ 454 struct torture_suite *suite = torture_suite_create(mem_ctx, "ICONV"); 455 456 torture_suite_add_simple_test(suite, "string2key", 457 test_string2key); 458 459 torture_suite_add_simple_test(suite, "next_codepoint()", 460 test_next_codepoint); 461 462 torture_suite_add_simple_test(suite, "first 1M codepoints", 463 test_first_1m); 464 465 torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences", 466 test_random_5m); 467 468 torture_suite_add_simple_test(suite, "string2key", 469 test_string2key); 470 return suite; 471} 472 473#else 474 475struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx) 476{ 477 printf("No native iconv library - can't run iconv test\n"); 478 return NULL; 479} 480 481#endif 482