1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6/* 7******************************************************************************** 8* File NCCBTST.C 9* 10* Modification History: 11* Name Description 12* Madhu Katragadda 7/21/1999 Testing error callback routines 13******************************************************************************** 14*/ 15#include <stdio.h> 16#include <stdlib.h> 17#include <string.h> 18#include <ctype.h> 19#include "cstring.h" 20#include "unicode/uloc.h" 21#include "unicode/ucnv.h" 22#include "unicode/ucnv_err.h" 23#include "cintltst.h" 24#include "unicode/utypes.h" 25#include "unicode/ustring.h" 26#include "nccbtst.h" 27#include "unicode/ucnv_cb.h" 28#include "unicode/utf16.h" 29 30#define NEW_MAX_BUFFER 999 31 32#define nct_min(x,y) ((x<y) ? x : y) 33#define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) 34 35static int32_t gInBufferSize = 0; 36static int32_t gOutBufferSize = 0; 37static char gNuConvTestName[1024]; 38 39static void printSeq(const uint8_t* a, int len) 40{ 41 int i=0; 42 log_verbose("\n{"); 43 while (i<len) 44 log_verbose("0x%02X, ", a[i++]); 45 log_verbose("}\n"); 46} 47 48static void printUSeq(const UChar* a, int len) 49{ 50 int i=0; 51 log_verbose("{"); 52 while (i<len) 53 log_verbose(" 0x%04x, ", a[i++]); 54 log_verbose("}\n"); 55} 56 57static void printSeqErr(const uint8_t* a, int len) 58{ 59 int i=0; 60 fprintf(stderr, "{"); 61 while (i<len) 62 fprintf(stderr, " 0x%02x, ", a[i++]); 63 fprintf(stderr, "}\n"); 64} 65 66static void printUSeqErr(const UChar* a, int len) 67{ 68 int i=0; 69 fprintf(stderr, "{"); 70 while (i<len) 71 fprintf(stderr, "0x%04x, ", a[i++]); 72 fprintf(stderr,"}\n"); 73} 74 75static void setNuConvTestName(const char *codepage, const char *direction) 76{ 77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 78 codepage, 79 direction, 80 (int)gInBufferSize, 81 (int)gOutBufferSize); 82} 83 84 85static void TestCallBackFailure(void); 86 87void addTestConvertErrorCallBack(TestNode** root); 88 89void addTestConvertErrorCallBack(TestNode** root) 90{ 91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); 92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); 93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); 94 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack"); 95 96#if !UCONFIG_NO_LEGACY_CONVERSION 97 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack"); 98 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack"); 99#endif 100 101 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); 102} 103 104static void TestSkipCallBack() 105{ 106 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 107 TestSkip(1,NEW_MAX_BUFFER); 108 TestSkip(1,1); 109 TestSkip(NEW_MAX_BUFFER, 1); 110} 111 112static void TestStopCallBack() 113{ 114 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 115 TestStop(1,NEW_MAX_BUFFER); 116 TestStop(1,1); 117 TestStop(NEW_MAX_BUFFER, 1); 118} 119 120static void TestSubCallBack() 121{ 122 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 123 TestSub(1,NEW_MAX_BUFFER); 124 TestSub(1,1); 125 TestSub(NEW_MAX_BUFFER, 1); 126 127#if !UCONFIG_NO_LEGACY_CONVERSION 128 TestEBCDIC_STATEFUL_Sub(1, 1); 129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); 130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); 131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 132#endif 133} 134 135static void TestSubWithValueCallBack() 136{ 137 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 138 TestSubWithValue(1,NEW_MAX_BUFFER); 139 TestSubWithValue(1,1); 140 TestSubWithValue(NEW_MAX_BUFFER, 1); 141} 142 143#if !UCONFIG_NO_LEGACY_CONVERSION 144static void TestLegalAndOtherCallBack() 145{ 146 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 147 TestLegalAndOthers(1,NEW_MAX_BUFFER); 148 TestLegalAndOthers(1,1); 149 TestLegalAndOthers(NEW_MAX_BUFFER, 1); 150} 151 152static void TestSingleByteCallBack() 153{ 154 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); 155 TestSingleByte(1,NEW_MAX_BUFFER); 156 TestSingleByte(1,1); 157 TestSingleByte(NEW_MAX_BUFFER, 1); 158} 159#endif 160 161static void TestSkip(int32_t inputsize, int32_t outputsize) 162{ 163 static const uint8_t expskipIBM_949[]= { 164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 165 166 static const uint8_t expskipIBM_943[] = { 167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; 168 169 static const uint8_t expskipIBM_930[] = { 170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; 171 172 gInBufferSize = inputsize; 173 gOutBufferSize = outputsize; 174 175 /*From Unicode*/ 176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); 177 178#if !UCONFIG_NO_LEGACY_CONVERSION 179 { 180 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 181 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 182 183 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; 184 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; 185 186 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 187 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", 188 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) 189 log_err("u-> ibm-949 with skip did not match.\n"); 190 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 191 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", 192 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) 193 log_err("u-> ibm-943 with skip did not match.\n"); 194 } 195 196 { 197 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 }; 198 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; 199 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 }; 200 201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */ 202 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, 203 fromUBytes, sizeof(fromUBytes), 204 "ibm-930", 205 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, 206 NULL, 0) 207 ) { 208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n"); 209 } 210 } 211#endif 212 213 { 214 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 215 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; 216 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; 217 218 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 }; 219 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; 220 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; 221 222 /* US-ASCII */ 223 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 224 usasciiFromUBytes, sizeof(usasciiFromUBytes), 225 "US-ASCII", 226 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 227 NULL, 0) 228 ) { 229 log_err("u->US-ASCII with skip did not match.\n"); 230 } 231 232#if !UCONFIG_NO_LEGACY_CONVERSION 233 /* SBCS NLTC codepage 367 for US-ASCII */ 234 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR, 235 usasciiFromUBytes, sizeof(usasciiFromUBytes), 236 "ibm-367", 237 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets, 238 NULL, 0) 239 ) { 240 log_err("u->ibm-367 with skip did not match.\n"); 241 } 242#endif 243 244 /* ISO-Latin-1 */ 245 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 246 latin1FromUBytes, sizeof(latin1FromUBytes), 247 "LATIN_1", 248 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 249 NULL, 0) 250 ) { 251 log_err("u->LATIN_1 with skip did not match.\n"); 252 } 253 254#if !UCONFIG_NO_LEGACY_CONVERSION 255 /* windows-1252 */ 256 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR, 257 latin1FromUBytes, sizeof(latin1FromUBytes), 258 "windows-1252", 259 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets, 260 NULL, 0) 261 ) { 262 log_err("u->windows-1252 with skip did not match.\n"); 263 } 264 } 265 266 { 267 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 268 static const uint8_t toIBM943[]= { 0x61, 0x61 }; 269 static const int32_t offset[]= {0, 4}; 270 271 /* EUC_JP*/ 272 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 273 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 274 0x61, 0x8e, 0xe0, 275 }; 276 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; 277 278 /*EUC_TW*/ 279 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 280 static const uint8_t to_euc_tw[]={ 281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 282 0x61, 0xe6, 0xca, 0x8a, 283 }; 284 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,}; 285 286 /*ISO-2022-JP*/ 287 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, }; 288 static const uint8_t to_iso_2022_jp[]={ 289 0x41, 290 0x42, 291 292 }; 293 static const int32_t from_iso_2022_jpOffs [] ={0,2}; 294 295 /*ISO-2022-JP*/ 296 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 297 static const uint8_t to_iso_2022_jp2[]={ 298 0x41, 299 0x43, 300 301 }; 302 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; 303 304 /*ISO-2022-cn*/ 305 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 306 static const uint8_t to_iso_2022_cn[]={ 307 0x41, 0x42 308 }; 309 static const int32_t from_iso_2022_cnOffs [] ={ 310 0, 2 311 }; 312 313 /*ISO-2022-CN*/ 314 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 315 static const uint8_t to_iso_2022_cn1[]={ 316 0x41, 0x43 317 318 }; 319 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; 320 321 /*ISO-2022-kr*/ 322 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 323 static const uint8_t to_iso_2022_kr[]={ 324 0x1b, 0x24, 0x29, 0x43, 325 0x41, 326 0x0e, 0x25, 0x50, 327 0x25, 0x50, 328 0x0f, 0x42, 329 }; 330 static const int32_t from_iso_2022_krOffs [] ={ 331 -1,-1,-1,-1, 332 0, 333 1,1,1, 334 3,3, 335 4,4 336 }; 337 338 /*ISO-2022-kr*/ 339 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 340 static const uint8_t to_iso_2022_kr1[]={ 341 0x1b, 0x24, 0x29, 0x43, 342 0x41, 343 0x0e, 0x25, 0x50, 344 0x25, 0x50, 345 346 }; 347 static const int32_t from_iso_2022_krOffs1 [] ={ 348 -1,-1,-1,-1, 349 0, 350 1,1,1, 351 3,3, 352 353 }; 354 /* HZ encoding */ 355 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 356 357 static const uint8_t to_hz[]={ 358 0x7e, 0x7d, 0x41, 359 0x7e, 0x7b, 0x26, 0x30, 360 0x26, 0x30, 361 0x7e, 0x7d, 0x42, 362 363 }; 364 static const int32_t from_hzOffs [] ={ 365 0,0,0, 366 1,1,1,1, 367 3,3, 368 4,4,4,4 369 }; 370 371 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; 372 373 static const uint8_t to_hz1[]={ 374 0x7e, 0x7d, 0x41, 375 0x7e, 0x7b, 0x26, 0x30, 376 0x26, 0x30, 377 378 379 }; 380 static const int32_t from_hzOffs1 [] ={ 381 0,0,0, 382 1,1,1,1, 383 3,3, 384 385 }; 386 387#endif 388 389 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 390 391 static const uint8_t to_SCSU[]={ 392 0x41, 393 0x42 394 395 396 }; 397 static const int32_t from_SCSUOffs [] ={ 398 0, 399 2, 400 401 }; 402 403#if !UCONFIG_NO_LEGACY_CONVERSION 404 /* ISCII */ 405 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, }; 406 static const uint8_t to_iscii[]={ 407 0x41, 408 0x42, 409 }; 410 static const int32_t from_isciiOffs [] ={ 411 0,2, 412 413 }; 414 /*ISCII*/ 415 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; 416 static const uint8_t to_iscii1[]={ 417 0x44, 418 0x43, 419 420 }; 421 static const int32_t from_isciiOffs1 [] ={0,2}; 422 423 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 424 toIBM943, sizeof(toIBM943), "ibm-943", 425 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) 426 log_err("u-> ibm-943 with skip did not match.\n"); 427 428 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 429 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 430 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) 431 log_err("u-> euc-jp with skip did not match.\n"); 432 433 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 434 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 435 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) 436 log_err("u-> euc-tw with skip did not match.\n"); 437 438 /*iso_2022_jp*/ 439 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 440 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 441 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) 442 log_err("u-> iso-2022-jp with skip did not match.\n"); 443 444 /* with context */ 445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 446 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 449 450 /*iso_2022_cn*/ 451 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 452 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) 454 log_err("u-> iso-2022-cn with skip did not match.\n"); 455 /*with context*/ 456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), 457 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", 458 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 460 461 /*iso_2022_kr*/ 462 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 463 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) 465 log_err("u-> iso-2022-kr with skip did not match.\n"); 466 /*with context*/ 467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), 468 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", 469 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 471 472 /*hz*/ 473 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 474 to_hz, sizeof(to_hz), "HZ", 475 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) 476 log_err("u-> HZ with skip did not match.\n"); 477 /*with context*/ 478 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]), 479 to_hz1, sizeof(to_hz1), "hz", 480 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 482#endif 483 484 /*SCSU*/ 485 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 486 to_SCSU, sizeof(to_SCSU), "SCSU", 487 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) 488 log_err("u-> SCSU with skip did not match.\n"); 489 490#if !UCONFIG_NO_LEGACY_CONVERSION 491 /*ISCII*/ 492 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 493 to_iscii, sizeof(to_iscii), "ISCII,version=0", 494 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) 495 log_err("u-> iscii with skip did not match.\n"); 496 /*with context*/ 497 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]), 498 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", 499 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) 500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n"); 501#endif 502 } 503 504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 505 { 506 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 507 0xFB, 0xEE, 0x28, /* from source offset 0 */ 508 0x24, 0x1E, 0x52, 509 0xB2, 510 0x20, 511 0xB3, 512 0xB1, 513 0x0D, 514 0x0A, 515 516 0x20, /* from 8 */ 517 0x00, 518 0xD0, 0x6C, 519 0xB6, 520 0xD8, 0xA5, 521 0x20, 522 0x68, 523 0x59, 524 525 0xF9, 0x28, /* from 16 */ 526 0x6D, 527 0x20, 528 0x73, 529 0xE0, 0x2D, 530 0xDE, 0x43, 531 0xD0, 0x33, 532 0x20, 533 534 0xFA, 0x83, /* from 24 */ 535 0x25, 0x01, 536 0xFB, 0x16, 0x87, 537 0x4B, 0x16, 538 0x20, 539 0xE6, 0xBD, 540 0xEB, 0x5B, 541 0x4B, 0xCC, 542 543 0xF9, 0xA2, /* from 32 */ 544 0xFC, 0x10, 0x3E, 545 0xFE, 0x16, 0x3A, 0x8C, 546 0x20, 547 0xFC, 0x03, 0xAC, 548 549 0x01, /* from 41 */ 550 0xDE, 0x83, 551 0x20, 552 0x09 553 }; 554 static const UChar expected[]={ 555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ 556 0x0063, 0x0061, 0x000D, 0x000A, 557 558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ 559 0x0930, 0x0020, 0x0918, 0x0909, 560 561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ 562 0x4000, 0x4E00, 0x7777, 0x0020, 563 564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ 565 0x0020, 0xD7A3, 0xDC00, 0xD800, 566 567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ 568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 569 570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ 571 0x0009 572 }; 573 static const int32_t offsets[]={ 574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, 575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, 576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, 578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, 579 41, 42, 42, 43, 44 580 }; 581 582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */ 583 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 584 sampleText, sizeof(sampleText), 585 "BOCU-1", 586 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 587 ) { 588 log_err("u->BOCU-1 with skip did not match.\n"); 589 } 590 } 591 592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 593 { 594 const uint8_t sampleText[]={ 595 0x61, /* 'a' */ 596 0xc4, 0xb5, /* U+0135 */ 597 0xed, 0x80, 0xa0, /* Hangul U+d020 */ 598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ 599 0xee, 0x80, 0x80, /* PUA U+e000 */ 600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */ 601 0x62, /* 'b' */ 602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */ 603 0xd0, 0x80 /* U+0400 */ 604 }; 605 UChar expected[]={ 606 0x0061, 607 0x0135, 608 0xd020, 609 0xd801, 0xdc01, 610 0xe000, 611 0xdc01, 612 0x0062, 613 0xd801, 614 0x0400 615 }; 616 int32_t offsets[]={ 617 0, 618 1, 1, 619 2, 2, 2, 620 3, 3, 3, 4, 4, 4, 621 5, 5, 5, 622 6, 6, 6, 623 7, 624 8, 8, 8, 625 9, 9 626 }; 627 628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */ 629 630 /* without offsets */ 631 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 632 sampleText, sizeof(sampleText), 633 "CESU-8", 634 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) 635 ) { 636 log_err("u->CESU-8 with skip did not match.\n"); 637 } 638 639 /* with offsets */ 640 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), 641 sampleText, sizeof(sampleText), 642 "CESU-8", 643 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) 644 ) { 645 log_err("u->CESU-8 with skip did not match.\n"); 646 } 647 } 648 649 /*to Unicode*/ 650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); 651 652#if !UCONFIG_NO_LEGACY_CONVERSION 653 { 654 655 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 }; 656 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 657 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; 658 659 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; 660 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; 661 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; 662 663 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), 664 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949", 665 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) 666 log_err("ibm-949->u with skip did not match.\n"); 667 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), 668 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943", 669 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) 670 log_err("ibm-943->u with skip did not match.\n"); 671 672 673 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), 674 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 675 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) 676 log_err("ibm-930->u with skip did not match.\n"); 677 678 679 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930), 680 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930", 681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 682 log_err("ibm-930->u with skip did not match.\n"); 683 } 684#endif 685 686 { 687 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; 688 static const UChar usasciiToU[] = { 0x61, 0x31 }; 689 static const int32_t usasciiToUOffsets[] = { 0, 2 }; 690 691 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; 692 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; 693 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; 694 695 /* US-ASCII */ 696 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 697 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 698 "US-ASCII", 699 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 700 NULL, 0) 701 ) { 702 log_err("US-ASCII->u with skip did not match.\n"); 703 } 704 705#if !UCONFIG_NO_LEGACY_CONVERSION 706 /* SBCS NLTC codepage 367 for US-ASCII */ 707 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), 708 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, 709 "ibm-367", 710 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, 711 NULL, 0) 712 ) { 713 log_err("ibm-367->u with skip did not match.\n"); 714 } 715#endif 716 717 /* ISO-Latin-1 */ 718 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 719 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 720 "LATIN_1", 721 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 722 NULL, 0) 723 ) { 724 log_err("LATIN_1->u with skip did not match.\n"); 725 } 726 727#if !UCONFIG_NO_LEGACY_CONVERSION 728 /* windows-1252 */ 729 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), 730 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, 731 "windows-1252", 732 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, 733 NULL, 0) 734 ) { 735 log_err("windows-1252->u with skip did not match.\n"); 736 } 737#endif 738 } 739 740#if !UCONFIG_NO_LEGACY_CONVERSION 741 { 742 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 744 }; 745 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 746 }; 747 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; 748 749 750 /* euc-jp*/ 751 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 752 0x8f, 0xda, 0xa1, /*unassigned*/ 753 0x8e, 0xe0, 754 }; 755 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; 756 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; 757 758 /*EUC_TW*/ 759 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 761 0xe6, 0xca, 0x8a, 762 }; 763 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, }; 764 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; 765 /*iso-2022-jp*/ 766 static const uint8_t sampleTxt_iso_2022_jp[]={ 767 0x41, 768 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 769 0x1b, 0x28, 0x42, 0x42, 770 771 }; 772 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; 773 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; 774 775 /*iso-2022-cn*/ 776 static const uint8_t sampleTxt_iso_2022_cn[]={ 777 0x0f, 0x41, 0x44, 778 0x1B, 0x24, 0x29, 0x47, 779 0x0E, 0x40, 0x6f, /*unassigned*/ 780 0x0f, 0x42, 781 782 }; 783 784 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; 785 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; 786 787 /*iso-2022-kr*/ 788 static const uint8_t sampleTxt_iso_2022_kr[]={ 789 0x1b, 0x24, 0x29, 0x43, 790 0x41, 791 0x0E, 0x7f, 0x1E, 792 0x0e, 0x25, 0x50, 793 0x0f, 0x51, 794 0x42, 0x43, 795 796 }; 797 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43}; 798 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 }; 799 800 /*hz*/ 801 static const uint8_t sampleTxt_hz[]={ 802 0x41, 803 0x7e, 0x7b, 0x26, 0x30, 804 0x7f, 0x1E, /*unassigned*/ 805 0x26, 0x30, 806 0x7e, 0x7d, 0x42, 807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 808 0x7e, 0x7d, 0x42, 809 }; 810 static const UChar hztoUnicode[]={ 811 0x41, 812 0x03a0, 813 0x03A0, 814 0x42, 815 0x42,}; 816 817 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; 818 819 /*ISCII*/ 820 static const uint8_t sampleTxt_iscii[]={ 821 0x41, 822 0xa1, 823 0xEB, /*unassigned*/ 824 0x26, 825 0x30, 826 0xa2, 827 0xEC, /*unassigned*/ 828 0x42, 829 }; 830 static const UChar isciitoUnicode[]={ 831 0x41, 832 0x0901, 833 0x26, 834 0x30, 835 0x0902, 836 0x42, 837 }; 838 839 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; 840 841 /*LMBCS*/ 842 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, 843 0x12, 0x92, 0xa0, /*unassigned*/ 844 0x12, 0x92, 0xA1, 845 }; 846 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; 847 static const int32_t fromLMBCS[] = {0, 6}; 848 849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 850 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 851 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 853 854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 855 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 856 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); 858 859 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 860 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 861 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) 862 log_err("euc-jp->u with skip did not match.\n"); 863 864 865 866 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 867 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 868 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) 869 log_err("euc-tw->u with skip did not match.\n"); 870 871 872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 873 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 874 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) 875 log_err("iso-2022-jp->u with skip did not match.\n"); 876 877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 878 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 879 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) 880 log_err("iso-2022-cn->u with skip did not match.\n"); 881 882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 883 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 884 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) 885 log_err("iso-2022-kr->u with skip did not match.\n"); 886 887 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 888 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 889 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) 890 log_err("HZ->u with skip did not match.\n"); 891 892 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 893 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 894 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) 895 log_err("iscii->u with skip did not match.\n"); 896 897 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), 898 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1", 899 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) 900 log_err("LMBCS->u with skip did not match.\n"); 901 902 } 903#endif 904 905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); 906 { 907 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 908 0xe0, 0x80, 0x61,}; 909 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; 910 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; 911 912 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 913 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 914 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 915 log_err("utf8->u with skip did not match.\n");; 916 } 917 918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); 919 { 920 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 921 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe}; 922 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 923 924 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 925 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 926 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) 927 log_err("scsu->u with skip did not match.\n"); 928 } 929 930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); 931 { 932 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */ 933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ 934 0x24, 0x1E, 0x52, /* 3 */ 935 0xB2, /* 6 */ 936 0x20, /* 7 */ 937 0x40, 0x07, /* 8 - wrong trail byte */ 938 0xB3, /* 10 */ 939 0xB1, /* 11 */ 940 0xD0, 0x20, /* 12 - wrong trail byte */ 941 0x0D, /* 14 */ 942 0x0A, /* 15 */ 943 0x20, /* 16 */ 944 0x00, /* 17 */ 945 0xD0, 0x6C, /* 18 */ 946 0xB6, /* 20 */ 947 0xD8, 0xA5, /* 21 */ 948 0x20, /* 23 */ 949 0x68, /* 24 */ 950 0x59, /* 25 */ 951 0xF9, 0x28, /* 26 */ 952 0x6D, /* 28 */ 953 0x20, /* 29 */ 954 0x73, /* 30 */ 955 0xE0, 0x2D, /* 31 */ 956 0xDE, 0x43, /* 33 */ 957 0xD0, 0x33, /* 35 */ 958 0x20, /* 37 */ 959 0xFA, 0x83, /* 38 */ 960 0x25, 0x01, /* 40 */ 961 0xFB, 0x16, 0x87, /* 42 */ 962 0x4B, 0x16, /* 45 */ 963 0x20, /* 47 */ 964 0xE6, 0xBD, /* 48 */ 965 0xEB, 0x5B, /* 50 */ 966 0x4B, 0xCC, /* 52 */ 967 0xF9, 0xA2, /* 54 */ 968 0xFC, 0x10, 0x3E, /* 56 */ 969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ 970 0x20, /* 63 */ 971 0xFC, 0x03, 0xAC, /* 64 */ 972 0xFF, /* 67 - FF just resets the state without encoding anything */ 973 0x01, /* 68 */ 974 0xDE, 0x83, /* 69 */ 975 0x20, /* 71 */ 976 0x09 /* 72 */ 977 }; 978 UChar expected[]={ 979 0xFEFF, 0x0061, 0x0062, 0x0020, 980 0x0063, 0x0061, 0x000D, 0x000A, 981 0x0020, 0x0000, 0x00DF, 0x00E6, 982 0x0930, 0x0020, 0x0918, 0x0909, 983 0x3086, 0x304D, 0x0020, 0x3053, 984 0x4000, 0x4E00, 0x7777, 0x0020, 985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, 986 0x0020, 0xD7A3, 0xDC00, 0xD800, 987 0xD800, 0xDC00, 0xD845, 0xDDDD, 988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 989 0xDFFF, 0x0001, 0x0E40, 0x0020, 990 0x0009 991 }; 992 int32_t offsets[]={ 993 0, 3, 6, 7, /* skip 8, */ 994 10, 11, /* skip 12, */ 995 14, 15, 16, 17, 18, 996 20, 21, 23, 24, 25, 26, 28, 29, 997 30, 31, 33, 35, 37, 38, 998 40, 42, 45, 47, 48, 999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, 1000 63, 64, /* trail */ 64, /* reset only 67, */ 1001 68, 69, 1002 71, 72 1003 }; 1004 1005 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1006 expected, ARRAY_LENGTH(expected), "BOCU-1", 1007 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1008 ) { 1009 log_err("BOCU-1->u with skip did not match.\n"); 1010 } 1011 } 1012 1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); 1014 { 1015 const uint8_t sampleText[]={ 1016 0x61, /* 0 'a' */ 1017 0xc0, 0x80, /* 1 non-shortest form */ 1018 0xc4, 0xb5, /* 3 U+0135 */ 1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ 1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */ 1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ 1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */ 1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */ 1024 0x62, /* 24 'b' */ 1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */ 1026 0xed, 0xa0, /* 28 incomplete sequence */ 1027 0xd0, 0x80 /* 30 U+0400 */ 1028 }; 1029 UChar expected[]={ 1030 0x0061, 1031 /* skip */ 1032 0x0135, 1033 0xd020, 1034 0xd801, 0xdc01, 1035 0xe000, 1036 0xdc01, 1037 /* skip */ 1038 0x0062, 1039 0xd801, 1040 0x0400 1041 }; 1042 int32_t offsets[]={ 1043 0, 1044 /* skip 1, */ 1045 3, 1046 5, 1047 8, 11, 1048 14, 1049 17, 1050 /* skip 20, 20, */ 1051 24, 1052 25, 1053 /* skip 28 */ 1054 30 1055 }; 1056 1057 /* without offsets */ 1058 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1059 expected, ARRAY_LENGTH(expected), "CESU-8", 1060 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) 1061 ) { 1062 log_err("CESU-8->u with skip did not match.\n"); 1063 } 1064 1065 /* with offsets */ 1066 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 1067 expected, ARRAY_LENGTH(expected), "CESU-8", 1068 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) 1069 ) { 1070 log_err("CESU-8->u with skip did not match.\n"); 1071 } 1072 } 1073} 1074 1075static void TestStop(int32_t inputsize, int32_t outputsize) 1076{ 1077 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1078 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1079 1080 static const uint8_t expstopIBM_949[]= { 1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; 1082 1083 static const uint8_t expstopIBM_943[] = { 1084 0x9f, 0xaf, 0x9f, 0xb1}; 1085 1086 static const uint8_t expstopIBM_930[] = { 1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; 1088 1089 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; 1090 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; 1091 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; 1092 1093 1094 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; 1095 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; 1096 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; 1097 1098 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; 1099 static const int32_t fromIBM943Offs [] = { 0, 2}; 1100 static const int32_t fromIBM930Offs [] = { 1, 3}; 1101 1102 gInBufferSize = inputsize; 1103 gOutBufferSize = outputsize; 1104 1105 /*From Unicode*/ 1106 1107#if !UCONFIG_NO_LEGACY_CONVERSION 1108 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1109 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", 1110 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) 1111 log_err("u-> ibm-949 with stop did not match.\n"); 1112 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1113 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", 1114 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) 1115 log_err("u-> ibm-943 with stop did not match.\n"); 1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1117 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", 1118 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) 1119 log_err("u-> ibm-930 with stop did not match.\n"); 1120 1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); 1122 { 1123 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1124 static const uint8_t toIBM943[]= { 0x61,}; 1125 static const int32_t offset[]= {0,} ; 1126 1127 /*EUC_JP*/ 1128 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1129 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; 1130 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; 1131 1132 /*EUC_TW*/ 1133 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1134 static const uint8_t to_euc_tw[]={ 1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; 1136 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; 1137 1138 /*ISO-2022-JP*/ 1139 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; 1140 static const uint8_t to_iso_2022_jp[]={ 1141 0x41, 1142 1143 }; 1144 static const int32_t from_iso_2022_jpOffs [] ={0,}; 1145 1146 /*ISO-2022-cn*/ 1147 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1148 static const uint8_t to_iso_2022_cn[]={ 1149 0x41, 1150 1151 }; 1152 static const int32_t from_iso_2022_cnOffs [] ={ 1153 0,0, 1154 2,2, 1155 }; 1156 1157 /*ISO-2022-kr*/ 1158 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, }; 1159 static const uint8_t to_iso_2022_kr[]={ 1160 0x1b, 0x24, 0x29, 0x43, 1161 0x41, 1162 0x0e, 0x25, 0x50, 1163 }; 1164 static const int32_t from_iso_2022_krOffs [] ={ 1165 -1,-1,-1,-1, 1166 0, 1167 1,1,1, 1168 }; 1169 1170 /* HZ encoding */ 1171 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1172 1173 static const uint8_t to_hz[]={ 1174 0x7e, 0x7d, 0x41, 1175 0x7e, 0x7b, 0x26, 0x30, 1176 1177 }; 1178 static const int32_t from_hzOffs [] ={ 1179 0, 0,0, 1180 1,1,1,1, 1181 }; 1182 1183 /*ISCII*/ 1184 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1185 static const uint8_t to_iscii[]={ 1186 0x41, 1187 }; 1188 static const int32_t from_isciiOffs [] ={ 1189 0, 1190 }; 1191 1192 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1193 toIBM943, sizeof(toIBM943), "ibm-943", 1194 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) 1195 log_err("u-> ibm-943 with stop did not match.\n"); 1196 1197 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1198 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 1199 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) 1200 log_err("u-> euc-jp with stop did not match.\n"); 1201 1202 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1203 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1204 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1205 log_err("u-> euc-tw with stop did not match.\n"); 1206 1207 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1208 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1209 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1210 log_err("u-> iso-2022-jp with stop did not match.\n"); 1211 1212 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), 1213 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", 1214 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) 1215 log_err("u-> iso-2022-jp with stop did not match.\n"); 1216 1217 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 1218 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 1219 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) 1220 log_err("u-> iso-2022-cn with stop did not match.\n"); 1221 1222 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 1223 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 1224 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) 1225 log_err("u-> iso-2022-kr with stop did not match.\n"); 1226 1227 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 1228 to_hz, sizeof(to_hz), "HZ", 1229 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) 1230 log_err("u-> HZ with stop did not match.\n");\ 1231 1232 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 1233 to_iscii, sizeof(to_iscii), "ISCII,version=0", 1234 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) 1235 log_err("u-> iscii with stop did not match.\n"); 1236 1237 1238 } 1239#endif 1240 1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"); 1242 { 1243 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1244 1245 static const uint8_t to_SCSU[]={ 1246 0x41, 1247 1248 }; 1249 int32_t from_SCSUOffs [] ={ 1250 0, 1251 1252 }; 1253 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1254 to_SCSU, sizeof(to_SCSU), "SCSU", 1255 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) 1256 log_err("u-> SCSU with skip did not match.\n"); 1257 1258 } 1259 1260 /*to Unicode*/ 1261 1262#if !UCONFIG_NO_LEGACY_CONVERSION 1263 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), 1264 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949", 1265 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) 1266 log_err("ibm-949->u with stop did not match.\n"); 1267 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), 1268 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943", 1269 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) 1270 log_err("ibm-943->u with stop did not match.\n"); 1271 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), 1272 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930", 1273 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) 1274 log_err("ibm-930->u with stop did not match.\n"); 1275 1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); 1277 { 1278 1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1281 }; 1282 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; 1283 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; 1284 1285 1286 /*EUC-JP*/ 1287 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1288 0x8f, 0xda, 0xa1, /*unassigned*/ 1289 0x8e, 0xe0, 1290 }; 1291 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; 1292 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; 1293 1294 /*EUC_TW*/ 1295 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1297 0xe6, 0xca, 0x8a, 1298 }; 1299 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; 1300 int32_t from_euc_twOffs [] ={ 0, 1, 3}; 1301 1302 1303 1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1305 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1306 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); 1308 1309 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1310 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 1311 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) 1312 log_err("euc-jp->u with stop did not match.\n"); 1313 1314 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1315 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1316 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) 1317 log_err("euc-tw->u with stop did not match.\n"); 1318 } 1319#endif 1320 1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); 1322 { 1323 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1324 0xe0, 0x80, 0x61,}; 1325 static const UChar expected1[] = { 0x0031, 0x4e8c,}; 1326 static const int32_t offsets1[] = { 0x0000, 0x0001}; 1327 1328 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1329 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1330 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1331 log_err("utf8->u with stop did not match.\n");; 1332 } 1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); 1334 { 1335 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04}; 1336 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061}; 1337 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; 1338 1339 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1340 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1341 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) 1342 log_err("scsu->u with stop did not match.\n");; 1343 } 1344 1345} 1346 1347static void TestSub(int32_t inputsize, int32_t outputsize) 1348{ 1349 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1350 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1351 1352 static const uint8_t expsubIBM_949[] = 1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; 1354 1355 static const uint8_t expsubIBM_943[] = { 1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; 1357 1358 static const uint8_t expsubIBM_930[] = { 1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; 1360 1361 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 }; 1362 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1363 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; 1364 1365 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1366 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; 1367 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; 1368 1369 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; 1370 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; 1371 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; 1372 1373 gInBufferSize = inputsize; 1374 gOutBufferSize = outputsize; 1375 1376 /*from unicode*/ 1377 1378#if !UCONFIG_NO_LEGACY_CONVERSION 1379 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1380 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", 1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) 1382 log_err("u-> ibm-949 with subst did not match.\n"); 1383 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1384 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", 1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) 1386 log_err("u-> ibm-943 with subst did not match.\n"); 1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1388 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", 1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) 1390 log_err("u-> ibm-930 with subst did not match.\n"); 1391 1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1393 { 1394 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1395 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; 1396 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; 1397 1398 1399 /* EUC_JP*/ 1400 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 }; 1401 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1402 0xf4, 0xfe, 0xf4, 0xfe, 1403 0x61, 0x8e, 0xe0, 1404 }; 1405 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7}; 1406 1407 /*EUC_TW*/ 1408 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1409 static const uint8_t to_euc_tw[]={ 1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1411 0xfd, 0xfe, 0xfd, 0xfe, 1412 0x61, 0xe6, 0xca, 0x8a, 1413 }; 1414 1415 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; 1416 1417 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 1418 toIBM943, sizeof(toIBM943), "ibm-943", 1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) 1420 log_err("u-> ibm-943 with substitute did not match.\n"); 1421 1422 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 1423 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) 1425 log_err("u-> euc-jp with substitute did not match.\n"); 1426 1427 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 1428 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1430 log_err("u-> euc-tw with substitute did not match.\n"); 1431 } 1432#endif 1433 1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); 1435 { 1436 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; 1437 1438 const uint8_t to_SCSU[]={ 1439 0x41, 1440 0x0e, 0xff,0xfd, 1441 0x42 1442 1443 1444 }; 1445 int32_t from_SCSUOffs [] ={ 1446 0, 1447 1,1,1, 1448 2, 1449 1450 }; 1451 const uint8_t to_SCSU_1[]={ 1452 0x41, 1453 1454 }; 1455 int32_t from_SCSUOffs_1 [] ={ 1456 0, 1457 1458 }; 1459 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1460 to_SCSU, sizeof(to_SCSU), "SCSU", 1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) 1462 log_err("u-> SCSU with substitute did not match.\n"); 1463 1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]), 1465 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", 1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) 1467 log_err("u-> SCSU with substitute did not match.\n"); 1468 } 1469 1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1471 { 1472 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,}; 1473 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, 1474 0xf0, 0x90, 0x90, 0x81, 1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 1476 0xef, 0xbf, 0xbf, 0x61, 1477 1478 }; 1479 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 }; 1480 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]), 1481 expectedUTF8, sizeof(expectedUTF8), "utf8", 1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { 1483 log_err("u-> utf8 with stop did not match.\n"); 1484 } 1485 } 1486 1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1488 { 1489 static const UChar in[]={ 0x0041, 0xfeff }; 1490 1491 static const uint8_t out[]={ 1492#if U_IS_BIG_ENDIAN 1493 0xfe, 0xff, 1494 0x00, 0x41, 1495 0xfe, 0xff 1496#else 1497 0xff, 0xfe, 1498 0x41, 0x00, 1499 0xff, 0xfe 1500#endif 1501 }; 1502 static const int32_t offsets[]={ 1503 -1, -1, 0, 0, 1, 1 1504 }; 1505 1506 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1507 out, sizeof(out), "UTF-16", 1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1509 ) { 1510 log_err("u->UTF-16 with substitute did not match.\n"); 1511 } 1512 } 1513 1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n"); 1515 { 1516 static const UChar in[]={ 0x0041, 0xfeff }; 1517 1518 static const uint8_t out[]={ 1519#if U_IS_BIG_ENDIAN 1520 0x00, 0x00, 0xfe, 0xff, 1521 0x00, 0x00, 0x00, 0x41, 1522 0x00, 0x00, 0xfe, 0xff 1523#else 1524 0xff, 0xfe, 0x00, 0x00, 1525 0x41, 0x00, 0x00, 0x00, 1526 0xff, 0xfe, 0x00, 0x00 1527#endif 1528 }; 1529 static const int32_t offsets[]={ 1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 1531 }; 1532 1533 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), 1534 out, sizeof(out), "UTF-32", 1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1536 ) { 1537 log_err("u->UTF-32 with substitute did not match.\n"); 1538 } 1539 } 1540 1541 /*to unicode*/ 1542 1543#if !UCONFIG_NO_LEGACY_CONVERSION 1544 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), 1545 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949", 1546 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) 1547 log_err("ibm-949->u with substitute did not match.\n"); 1548 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), 1549 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943", 1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) 1551 log_err("ibm-943->u with substitute did not match.\n"); 1552 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), 1553 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930", 1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) 1555 log_err("ibm-930->u with substitute did not match.\n"); 1556 1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1558 { 1559 1560 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ 1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 1562 }; 1563 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 1564 }; 1565 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; 1566 1567 1568 /* EUC_JP*/ 1569 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1570 0x8f, 0xda, 0xa1, /*unassigned*/ 1571 0x8e, 0xe0, 0x8a 1572 }; 1573 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a }; 1574 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; 1575 1576 /*EUC_TW*/ 1577 const uint8_t sampleTxt_euc_tw[]={ 1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 1580 0xe6, 0xca, 0x8a, 1581 }; 1582 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, }; 1583 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; 1584 1585 1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL), 1587 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", 1588 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) 1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); 1590 1591 1592 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1593 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) 1595 log_err("euc-jp->u with substitute did not match.\n"); 1596 1597 1598 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 1599 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) 1601 log_err("euc-tw->u with substitute did not match.\n"); 1602 1603 1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), 1605 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP", 1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND)) 1607 log_err("euc-jp->u with substitute did not match.\n"); 1608 } 1609#endif 1610 1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1612 { 1613 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 1614 0xe0, 0x80, 0x61,}; 1615 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 1616 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 1617 1618 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1619 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", 1620 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1621 log_err("utf8->u with substitute did not match.\n");; 1622 } 1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 1624 { 1625 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; 1626 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd}; 1627 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; 1628 1629 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), 1630 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", 1631 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) 1632 log_err("scsu->u with stop did not match.\n");; 1633 } 1634 1635#if !UCONFIG_NO_LEGACY_CONVERSION 1636 log_verbose("Testing ibm-930 subchar/subchar1\n"); 1637 { 1638 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf }; 1639 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f }; 1640 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 }; 1641 1642 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a }; 1643 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 }; 1644 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 }; 1645 1646 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930", 1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1648 ) { 1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); 1650 } 1651 1652 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930", 1653 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1654 ) { 1655 log_err("ibm-930->u subchar/subchar1 did not match.\n"); 1656 } 1657 } 1658 1659 log_verbose("Testing GB 18030 with substitute callbacks\n"); 1660 { 1661 static const UChar u2[]={ 1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff }; 1663 static const uint8_t gb2[]={ 1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; 1665 static const int32_t offsets2[]={ 1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; 1667 1668 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030", 1669 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1670 ) { 1671 log_err("gb18030->u with substitute did not match.\n"); 1672 } 1673 } 1674#endif 1675 1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); 1677 { 1678 static const uint8_t utf7[]={ 1679 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 1681 }; 1682 static const UChar unicode[]={ 1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 1684 }; 1685 static const int32_t offsets[]={ 1686 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24 1687 }; 1688 1689 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7", 1690 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0) 1691 ) { 1692 log_err("UTF-7->u with substitute did not match.\n"); 1693 } 1694 } 1695 1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); 1697 { 1698 static const uint8_t 1699 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, 1700 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, 1701 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; 1702 1703 static const UChar 1704 out1[]={ 0x4e00, 0xfeff }, 1705 out2[]={ 0x004e, 0xfffe }, 1706 out3[]={ 0xfefd, 0x4e00, 0xfeff }; 1707 1708 static const int32_t 1709 offsets1[]={ 2, 4 }, 1710 offsets2[]={ 2, 4 }, 1711 offsets3[]={ 0, 2, 4 }; 1712 1713 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16", 1714 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1715 ) { 1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); 1717 } 1718 1719 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16", 1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1721 ) { 1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); 1723 } 1724 1725 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16", 1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1727 ) { 1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); 1729 } 1730 } 1731 1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); 1733 { 1734 static const uint8_t 1735 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }, 1736 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }, 1737 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, 1738 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 }; 1739 1740 static const UChar 1741 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, 1742 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, 1743 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd }, 1744 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; 1745 1746 static const int32_t 1747 offsets1[]={ 4, 4, 8 }, 1748 offsets2[]={ 4, 4, 8 }, 1749 offsets3[]={ 0, 4, 4, 8, 12 }, 1750 offsets4[]={ 0, 0, 4, 8 }; 1751 1752 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32", 1753 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0) 1754 ) { 1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); 1756 } 1757 1758 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32", 1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0) 1760 ) { 1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); 1762 } 1763 1764 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32", 1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0) 1766 ) { 1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); 1768 } 1769 1770 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32", 1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0) 1772 ) { 1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n"); 1774 } 1775 } 1776} 1777 1778static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1779{ 1780 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; 1781 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; 1782 1783 const uint8_t expsubwvalIBM_949[]= { 1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; 1786 1787 const uint8_t expsubwvalIBM_943[]= { 1788 0x9f, 0xaf, 0x9f, 0xb1, 1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; 1790 1791 const uint8_t expsubwvalIBM_930[] = { 1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f }; 1793 1794 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; 1795 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; 1796 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ 1797 1798 gInBufferSize = inputsize; 1799 gOutBufferSize = outputsize; 1800 1801 /*from Unicode*/ 1802 1803#if !UCONFIG_NO_LEGACY_CONVERSION 1804 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1805 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", 1806 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) 1807 log_err("u-> ibm-949 with subst with value did not match.\n"); 1808 1809 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1810 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", 1811 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) 1812 log_err("u-> ibm-943 with sub with value did not match.\n"); 1813 1814 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 1815 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", 1816 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) 1817 log_err("u-> ibm-930 with subst with value did not match.\n"); 1818 1819 1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 1821 { 1822 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 1823 static const uint8_t toIBM943[]= { 0x61, 1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1827 0x61 }; 1828 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 1829 1830 1831 /* EUC_JP*/ 1832 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, }; 1833 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1837 0x61, 0x8e, 0xe0, 1838 }; 1839 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 1840 3, 3, 3, 3, 3, 3, 1841 3, 3, 3, 3, 3, 3, 1842 5, 5, 5, 5, 5, 5, 1843 6, 7, 7, 1844 }; 1845 1846 /*EUC_TW*/ 1847 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; 1848 static const uint8_t to_euc_tw[]={ 1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 1853 0x61, 0xe6, 0xca, 0x8a, 1854 }; 1855 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 1857 6, 7, 7, 8, 1858 }; 1859 /*ISO-2022-JP*/ 1860 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; 1861 static const uint8_t to_iso_2022_jp1[]={ 1862 0x1b, 0x24, 0x42, 0x21, 0x21, 1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1864 0x1b, 0x24, 0x42, 0x21, 0x22, 1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, 1866 0x42, 1867 }; 1868 1869 static const int32_t from_iso_2022_jpOffs1 [] ={ 1870 0,0,0,0,0, 1871 1,1,1,1,1,1,1,1,1, 1872 2,2,2,2,2, 1873 3,3,3,3,3,3,3,3,3, 1874 4, 1875 }; 1876 /* surrogate pair*/ 1877 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ; 1878 static const uint8_t to_iso_2022_jp2[]={ 1879 0x1b, 0x24, 0x42, 0x21, 0x21, 1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1882 0x1b, 0x24, 0x42, 0x21, 0x22, 1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1885 0x42, 1886 }; 1887 static const int32_t from_iso_2022_jpOffs2 [] ={ 1888 0,0,0,0,0, 1889 1,1,1,1,1,1,1,1,1, 1890 1,1,1,1,1,1, 1891 3,3,3,3,3, 1892 4,4,4,4,4,4,4,4,4, 1893 4,4,4,4,4,4, 1894 6, 1895 }; 1896 1897 /*ISO-2022-cn*/ 1898 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; 1899 static const uint8_t to_iso_2022_cn[]={ 1900 0x41, 1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, 1902 0x42, 1903 }; 1904 static const int32_t from_iso_2022_cnOffs [] ={ 1905 0, 1906 1,1,1,1,1,1, 1907 2, 1908 }; 1909 1910 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; 1911 1912 static const uint8_t to_iso_2022_cn4[]={ 1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1916 0x0e, 0x21, 0x22, 1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1919 0x42, 1920 }; 1921 static const int32_t from_iso_2022_cnOffs4 [] ={ 1922 0,0,0,0,0,0,0, 1923 1,1,1,1,1,1,1, 1924 1,1,1,1,1,1, 1925 3,3,3, 1926 4,4,4,4,4,4,4, 1927 4,4,4,4,4,4, 1928 6 1929 1930 }; 1931 1932 /*ISO-2022-kr*/ 1933 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 1934 static const uint8_t to_iso_2022_kr2[]={ 1935 0x1b, 0x24, 0x29, 0x43, 1936 0x41, 1937 0x0e, 0x25, 0x50, 1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1940 0x0e, 0x25, 0x50, 1941 0x0f, 0x42, 1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 1944 0x43 1945 }; 1946 static const int32_t from_iso_2022_krOffs2 [] ={ 1947 -1,-1,-1,-1, 1948 0, 1949 1,1,1, 1950 2,2,2,2,2,2,2, 1951 2,2,2,2,2,2, 1952 4,4,4, 1953 5,5, 1954 6,6,6,6,6,6, 1955 6,6,6,6,6,6, 1956 8, 1957 }; 1958 1959 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; 1960 static const uint8_t to_iso_2022_kr[]={ 1961 0x1b, 0x24, 0x29, 0x43, 1962 0x41, 1963 0x0e, 0x25, 0x50, 1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1965 0x0e, 0x25, 0x50, 1966 0x0f, 0x42, 1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 1968 0x43 1969 }; 1970 1971 1972 static const int32_t from_iso_2022_krOffs [] ={ 1973 -1,-1,-1,-1, 1974 0, 1975 1,1,1, 1976 2,2,2,2,2,2,2, 1977 3,3,3, 1978 4,4, 1979 5,5,5,5,5,5, 1980 6, 1981 }; 1982 /* HZ encoding */ 1983 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, }; 1984 1985 static const uint8_t to_hz[]={ 1986 0x7e, 0x7d, 0x41, 1987 0x7e, 0x7b, 0x26, 0x30, 1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/ 1989 0x7e, 0x7b, 0x26, 0x30, 1990 0x7e, 0x7d, 0x42, 1991 1992 }; 1993 static const int32_t from_hzOffs [] ={ 1994 0,0,0, 1995 1,1,1,1, 1996 2,2,2,2,2,2,2,2, 1997 3,3,3,3, 1998 4,4,4 1999 }; 2000 2001 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; 2002 static const uint8_t to_hz2[]={ 2003 0x7e, 0x7d, 0x41, 2004 0x7e, 0x7b, 0x26, 0x30, 2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2007 0x7e, 0x7b, 0x26, 0x30, 2008 0x7e, 0x7d, 0x42, 2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2011 0x43 2012 }; 2013 static const int32_t from_hzOffs2 [] ={ 2014 0,0,0, 2015 1,1,1,1, 2016 2,2,2,2,2,2,2,2, 2017 2,2,2,2,2,2, 2018 4,4,4,4, 2019 5,5,5, 2020 6,6,6,6,6,6, 2021 6,6,6,6,6,6, 2022 8, 2023 }; 2024 2025 /*ISCII*/ 2026 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; 2027 static const uint8_t to_iscii[]={ 2028 0x41, 2029 0xef, 0x42, 0xa1, 2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2031 0xa2, 2032 0x42, 2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ 2034 0x43 2035 }; 2036 2037 2038 static const int32_t from_isciiOffs [] ={ 2039 0, 2040 1,1,1, 2041 2,2,2,2,2,2, 2042 3, 2043 4, 2044 5,5,5,5,5,5, 2045 6, 2046 }; 2047 2048 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 2049 toIBM943, sizeof(toIBM943), "ibm-943", 2050 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) 2051 log_err("u-> ibm-943 with subst with value did not match.\n"); 2052 2053 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]), 2054 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", 2055 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) 2056 log_err("u-> euc-jp with subst with value did not match.\n"); 2057 2058 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]), 2059 to_euc_tw, sizeof(to_euc_tw), "euc-tw", 2060 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) 2061 log_err("u-> euc-tw with subst with value did not match.\n"); 2062 2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2064 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2065 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2066 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2067 2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), 2069 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", 2070 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) 2071 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2072 2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), 2074 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", 2075 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) 2076 log_err("u-> iso_2022_jp with subst with value did not match.\n"); 2077 /*ESCAPE OPTIONS*/ 2078 { 2079 /* surrogate pair*/ 2080 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; 2081 static const uint8_t to_iso_2022_jp3_v2[]={ 2082 0x1b, 0x24, 0x42, 0x21, 0x21, 2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2084 2085 0x1b, 0x24, 0x42, 0x21, 0x22, 2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b, 2087 2088 0x42, 2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b, 2090 }; 2091 2092 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ 2093 0,0,0,0,0, 2094 1,1,1,1,1,1,1,1,1,1,1,1, 2095 2096 3,3,3,3,3, 2097 4,4,4,4,4,4,4,4,4,4,4,4, 2098 2099 6, 2100 7,7,7,7,7,7,7,7,7 2101 }; 2102 2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), 2104 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp", 2105 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n"); 2107 } 2108 { 2109 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2110 static const uint8_t to_iso_2022_cn5_v2[]={ 2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2114 0x0e, 0x21, 0x22, 2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44, 2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, 2117 0x42, 2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, 2119 }; 2120 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ 2121 0,0,0,0,0,0,0, 2122 1,1,1,1,1,1,1, 2123 1,1,1,1,1,1, 2124 3,3,3, 2125 4,4,4,4,4,4,4, 2126 4,4,4,4,4,4, 2127 6, 2128 7,7,7,7,7,7 2129 }; 2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), 2131 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", 2132 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR )) 2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n"); 2134 2135 } 2136 { 2137 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2138 static const uint8_t to_iso_2022_cn6_v2[]={ 2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2141 0x0e, 0x21, 0x22, 2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d, 2143 0x42, 2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d 2145 }; 2146 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ 2147 0, 0, 0, 0, 0, 0, 0, 2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2149 3, 3, 3, 2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2151 6, 2152 7, 7, 7, 7, 7, 7, 7, 7, 2153 }; 2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), 2155 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", 2156 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR )) 2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n"); 2158 2159 } 2160 { 2161 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; 2162 static const uint8_t to_iso_2022_cn7_v2[]={ 2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2165 0x0e, 0x21, 0x22, 2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, 2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32, 2168 }; 2169 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ 2170 0, 0, 0, 0, 0, 0, 0, 2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2172 3, 3, 3, 2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2174 6, 2175 7, 7, 7, 7, 7, 7, 2176 }; 2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), 2178 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", 2179 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR )) 2180 log_err("u-> iso-2022-cn with sub & K did not match.\n"); 2181 2182 } 2183 { 2184 static const UChar iso_2022_cn_inputText8[]={ 2185 0x3000, 2186 0xD84D, 0xDC56, 2187 0x3001, 2188 0xD84D, 0xDC56, 2189 0xDBFF, 0xDFFF, 2190 0x0042, 2191 0x0902}; 2192 static const uint8_t to_iso_2022_cn8_v2[]={ 2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2195 0x0e, 0x21, 0x22, 2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20, 2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20, 2198 0x42, 2199 0x5c, 0x39, 0x30, 0x32, 0x20 2200 }; 2201 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ 2202 0, 0, 0, 0, 0, 0, 0, 2203 1, 1, 1, 1, 1, 1, 1, 1, 2204 3, 3, 3, 2205 4, 4, 4, 4, 4, 4, 4, 4, 2206 6, 6, 6, 6, 6, 6, 6, 6, 2207 8, 2208 9, 9, 9, 9, 9 2209 }; 2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), 2211 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", 2212 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR )) 2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n"); 2214 2215 } 2216 { 2217 static const uint8_t to_iso_2022_cn4_v3[]={ 2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, 2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2220 0x0e, 0x21, 0x22, 2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36, 2222 0x42 2223 }; 2224 2225 2226 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ 2227 0,0,0,0,0,0,0, 2228 1,1,1,1,1,1,1,1,1,1,1, 2229 2230 3,3,3, 2231 4,4,4,4,4,4,4,4,4,4,4, 2232 2233 6 2234 2235 }; 2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2237 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", 2238 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2239 { 2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n"); 2241 } 2242 } 2243 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]), 2244 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", 2245 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) 2246 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2247 2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), 2249 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", 2250 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) 2251 log_err("u-> iso_2022_cn with subst with value did not match.\n"); 2252 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]), 2253 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", 2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) 2255 log_err("u-> iso_2022_kr with subst with value did not match.\n"); 2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]), 2257 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", 2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) 2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); 2260 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]), 2261 to_hz, sizeof(to_hz), "HZ", 2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) 2263 log_err("u-> hz with subst with value did not match.\n"); 2264 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]), 2265 to_hz2, sizeof(to_hz2), "HZ", 2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) 2267 log_err("u-> hz with subst with value did not match.\n"); 2268 2269 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]), 2270 to_iscii, sizeof(to_iscii), "ISCII,version=0", 2271 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) 2272 log_err("u-> iscii with subst with value did not match.\n"); 2273 } 2274#endif 2275 2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 2277 /*to Unicode*/ 2278 { 2279#if !UCONFIG_NO_LEGACY_CONVERSION 2280 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 2281 0x81, 0xad, /*unassigned*/ 2282 0x89, 0xd3 }; 2283 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 2285 0x7B87}; 2286 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 2287 2288 /* EUC_JP*/ 2289 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, 2290 0x8f, 0xda, 0xa1, /*unassigned*/ 2291 0x8e, 0xe0, 2292 }; 2293 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31, 2295 0x00a2 }; 2296 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, 2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2298 9, 2299 }; 2300 2301 /*EUC_TW*/ 2302 static const uint8_t sampleTxt_euc_tw[]={ 2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, 2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ 2305 0xe6, 0xca, 0x8a, 2306 }; 2307 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43, 2309 0x8706, 0x8a, }; 2310 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2312 11, 13}; 2313 2314 /*iso-2022-jp*/ 2315 static const uint8_t sampleTxt_iso_2022_jp[]={ 2316 0x1b, 0x28, 0x42, 0x41, 2317 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ 2318 0x1b, 0x28, 0x42, 0x42, 2319 2320 }; 2321 /* A % X 3 A % X 1 A B */ 2322 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 }; 2323 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 }; 2324 2325 /*iso-2022-cn*/ 2326 static const uint8_t sampleTxt_iso_2022_cn[]={ 2327 0x0f, 0x41, 0x44, 2328 0x1B, 0x24, 0x29, 0x47, 2329 0x0E, 0x40, 0x6c, /*unassigned*/ 2330 0x0f, 0x42, 2331 2332 }; 2333 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 }; 2334 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 }; 2335 2336 /*iso-2022-kr*/ 2337 static const uint8_t sampleTxt_iso_2022_kr[]={ 2338 0x1b, 0x24, 0x29, 0x43, 2339 0x41, 2340 0x0E, 0x7f, 0x1E, 2341 0x0e, 0x25, 0x50, 2342 0x0f, 0x51, 2343 0x42, 0x43, 2344 2345 }; 2346 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; 2347 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 }; 2348 2349 /*hz*/ 2350 static const uint8_t sampleTxt_hz[]={ 2351 0x41, 2352 0x7e, 0x7b, 0x26, 0x30, 2353 0x7f, 0x1E, /*unassigned*/ 2354 0x26, 0x30, 2355 0x7e, 0x7d, 0x42, 2356 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ 2357 0x7e, 0x7d, 0x42, 2358 }; 2359 static const UChar hztoUnicode[]={ 2360 0x41, 2361 0x03a0, 2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2363 0x03A0, 2364 0x42, 2365 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, 2366 0x42,}; 2367 2368 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, }; 2369 2370 2371 /*iscii*/ 2372 static const uint8_t sampleTxt_iscii[]={ 2373 0x41, 2374 0x30, 2375 0xEB, /*unassigned*/ 2376 0xa3, 2377 0x42, 2378 0xEC, /*unassigned*/ 2379 0x42, 2380 }; 2381 static const UChar isciitoUnicode[]={ 2382 0x41, 2383 0x30, 2384 0x25, 0x58, 0x45, 0x42, 2385 0x0903, 2386 0x42, 2387 0x25, 0x58, 0x45, 0x43, 2388 0x42,}; 2389 2390 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; 2391#endif 2392 2393 /*UTF8*/ 2394 static const uint8_t sampleTxtUTF8[]={ 2395 0x20, 0x64, 0x50, 2396 0xC2, 0x7E, /* truncated char */ 2397 0x20, 2398 0xE0, 0xB5, 0x7E, /* truncated char */ 2399 0x40, 2400 }; 2401 static const UChar UTF8ToUnicode[]={ 2402 0x0020, 0x0064, 0x0050, 2403 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ 2404 0x0020, 2405 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E, 2406 0x0040 2407 }; 2408 static const int32_t fromUTF8[] = { 2409 0, 1, 2, 2410 3, 3, 3, 3, 4, 2411 5, 2412 6, 6, 6, 6, 6, 6, 6, 6, 8, 2413 9 2414 }; 2415 static const UChar UTF8ToUnicodeXML_DEC[]={ 2416 0x0020, 0x0064, 0x0050, 2417 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */ 2418 0x0020, 2419 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E, 2420 0x0040 2421 }; 2422 static const int32_t fromUTF8XML_DEC[] = { 2423 0, 1, 2, 2424 3, 3, 3, 3, 3, 3, 4, 2425 5, 2426 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 2427 9 2428 }; 2429 2430 2431#if !UCONFIG_NO_LEGACY_CONVERSION 2432 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), 2433 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 2434 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) 2435 log_err("ibm-943->u with substitute with value did not match.\n"); 2436 2437 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), 2438 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP", 2439 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) 2440 log_err("euc-jp->u with substitute with value did not match.\n"); 2441 2442 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), 2443 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw", 2444 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) 2445 log_err("euc-tw->u with substitute with value did not match.\n"); 2446 2447 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2448 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2449 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) 2450 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2451 2452 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2453 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2454 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR)) 2455 log_err("iso-2022-jp->u with substitute with value did not match.\n"); 2456 2457 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ 2458 { 2459 static const UChar iso_2022_jptoUnicodeDec[]={ 2460 0x0041, 2461 /* & # 5 8 ; */ 2462 0x0026, 0x0023, 0x0035, 0x0038, 0x003b, 2463 0x0026, 0x0023, 0x0032, 0x0036, 0x003b, 2464 0x0042 }; 2465 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, }; 2466 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2467 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2468 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) 2469 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n"); 2470 } 2471 { 2472 static const UChar iso_2022_jptoUnicodeHex[]={ 2473 0x0041, 2474 /* & # x 3 A ; */ 2475 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b, 2476 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b, 2477 0x0042 }; 2478 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 }; 2479 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2480 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2481 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) 2482 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n"); 2483 } 2484 { 2485 static const UChar iso_2022_jptoUnicodeC[]={ 2486 0x0041, 2487 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */ 2488 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */ 2489 0x0042 }; 2490 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; 2491 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp), 2492 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", 2493 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR )) 2494 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n"); 2495 } 2496 } 2497 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn), 2498 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn", 2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) 2500 log_err("iso-2022-cn->u with substitute with value did not match.\n"); 2501 2502 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr), 2503 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr", 2504 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) 2505 log_err("iso-2022-kr->u with substitute with value did not match.\n"); 2506 2507 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), 2508 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", 2509 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) 2510 log_err("hz->u with substitute with value did not match.\n"); 2511 2512 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), 2513 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0", 2514 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) 2515 log_err("ISCII ->u with substitute with value did not match.\n"); 2516#endif 2517 2518 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2519 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8", 2520 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) 2521 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2522 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8), 2523 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8", 2524 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR)) 2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n"); 2526 } 2527} 2528 2529#if !UCONFIG_NO_LEGACY_CONVERSION 2530static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) 2531{ 2532 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; 2533 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; 2534 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; 2535 2536 2537 static const uint8_t text943[] = { 2538 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; 2539 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2540 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; 2541 static const UChar toUnicode943stop[]= { 0x304b}; 2542 2543 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; 2544 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; 2545 static const int32_t fromIBM943Offsstop[] = { 0}; 2546 2547 gInBufferSize = inputsize; 2548 gOutBufferSize = outputsize; 2549 /*checking with a legal value*/ 2550 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]), 2551 templegal949, sizeof(templegal949), "ibm-949", 2552 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) 2553 log_err("u-> ibm-949 with skip did not match.\n"); 2554 2555 /*checking illegal value for ibm-943 with substitute*/ 2556 if(!testConvertToUnicode(text943, sizeof(text943), 2557 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2558 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2559 log_err("ibm-943->u with subst did not match.\n"); 2560 /*checking illegal value for ibm-943 with skip */ 2561 if(!testConvertToUnicode(text943, sizeof(text943), 2562 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943", 2563 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) 2564 log_err("ibm-943->u with skip did not match.\n"); 2565 2566 /*checking illegal value for ibm-943 with stop */ 2567 if(!testConvertToUnicode(text943, sizeof(text943), 2568 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943", 2569 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) 2570 log_err("ibm-943->u with stop did not match.\n"); 2571 2572} 2573 2574static void TestSingleByte(int32_t inputsize, int32_t outputsize) 2575{ 2576 static const uint8_t sampleText[] = { 2577 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, 2578 0xff, 0x32, 0x33}; 2579 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; 2580 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; 2581 /*checking illegal value for ibm-943 with substitute*/ 2582 gInBufferSize = inputsize; 2583 gOutBufferSize = outputsize; 2584 2585 if(!testConvertToUnicode(sampleText, sizeof(sampleText), 2586 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943", 2587 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) 2588 log_err("ibm-943->u with subst did not match.\n"); 2589} 2590 2591static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) 2592{ 2593 /*EBCDIC_STATEFUL*/ 2594 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 }; 2595 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; 2596 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 }; 2597/* s SO doubl SI sng s SO fe fe SI s */ 2598 2599 /*EBCDIC_STATEFUL with subChar=3f*/ 2600 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 }; 2601 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 }; 2602 static const char mySubChar[]={ 0x3f}; 2603 2604 gInBufferSize = inputsize; 2605 gOutBufferSize = outputsize; 2606 2607 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2608 toIBM930, sizeof(toIBM930), "ibm-930", 2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) 2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); 2611 2612 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]), 2613 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", 2614 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) 2615 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n"); 2616} 2617#endif 2618 2619UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 2620 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 2621 const char *mySubChar, int8_t len) 2622{ 2623 2624 2625 UErrorCode status = U_ZERO_ERROR; 2626 UConverter *conv = 0; 2627 char junkout[NEW_MAX_BUFFER]; /* FIX */ 2628 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2629 const UChar *src; 2630 char *end; 2631 char *targ; 2632 int32_t *offs; 2633 int i; 2634 int32_t realBufferSize; 2635 char *realBufferEnd; 2636 const UChar *realSourceEnd; 2637 const UChar *sourceLimit; 2638 UBool checkOffsets = TRUE; 2639 UBool doFlush; 2640 char junk[9999]; 2641 char offset_str[9999]; 2642 char *p; 2643 UConverterFromUCallback oldAction = NULL; 2644 const void* oldContext = NULL; 2645 2646 2647 for(i=0;i<NEW_MAX_BUFFER;i++) 2648 junkout[i] = (char)0xF0; 2649 for(i=0;i<NEW_MAX_BUFFER;i++) 2650 junokout[i] = 0xFF; 2651 setNuConvTestName(codepage, "FROM"); 2652 2653 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 2654 gOutBufferSize); 2655 2656 conv = ucnv_open(codepage, &status); 2657 if(U_FAILURE(status)) 2658 { 2659 log_data_err("Couldn't open converter %s\n",codepage); 2660 return TRUE; 2661 } 2662 2663 log_verbose("Converter opened..\n"); 2664 2665 /*----setting the callback routine----*/ 2666 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2667 if (U_FAILURE(status)) 2668 { 2669 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2670 } 2671 /*------------------------*/ 2672 /*setting the subChar*/ 2673 if(mySubChar != NULL){ 2674 ucnv_setSubstChars(conv, mySubChar, len, &status); 2675 if (U_FAILURE(status)) { 2676 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2677 } 2678 } 2679 /*------------*/ 2680 2681 src = source; 2682 targ = junkout; 2683 offs = junokout; 2684 2685 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2686 realBufferEnd = junkout + realBufferSize; 2687 realSourceEnd = source + sourceLen; 2688 2689 if ( gOutBufferSize != realBufferSize ) 2690 checkOffsets = FALSE; 2691 2692 if( gInBufferSize != NEW_MAX_BUFFER ) 2693 checkOffsets = FALSE; 2694 2695 do 2696 { 2697 end = nct_min(targ + gOutBufferSize, realBufferEnd); 2698 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 2699 2700 doFlush = (UBool)(sourceLimit == realSourceEnd); 2701 2702 if(targ == realBufferEnd) 2703 { 2704 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 2705 return FALSE; 2706 } 2707 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 2708 2709 2710 status = U_ZERO_ERROR; 2711 2712 ucnv_fromUnicode (conv, 2713 (char **)&targ, 2714 (const char *)end, 2715 &src, 2716 sourceLimit, 2717 checkOffsets ? offs : NULL, 2718 doFlush, /* flush if we're at the end of the input data */ 2719 &status); 2720 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 2721 2722 2723 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2724 UChar errChars[50]; /* should be sufficient */ 2725 int8_t errLen = 50; 2726 UErrorCode err = U_ZERO_ERROR; 2727 const UChar* start= NULL; 2728 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); 2729 if(U_FAILURE(err)){ 2730 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err)); 2731 } 2732 /* length of in invalid chars should be equal to returned length*/ 2733 start = src - errLen; 2734 if(u_strncmp(errChars,start,errLen)!=0){ 2735 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2736 } 2737 } 2738 /* allow failure codes for the stop callback */ 2739 if(U_FAILURE(status) && 2740 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND))) 2741 { 2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2743 return FALSE; 2744 } 2745 2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 2747 sourceLen, targ-junkout); 2748 if(getTestOption(VERBOSITY_OPTION)) 2749 { 2750 2751 junk[0] = 0; 2752 offset_str[0] = 0; 2753 for(p = junkout;p<targ;p++) 2754 { 2755 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 2756 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 2757 } 2758 2759 log_verbose(junk); 2760 printSeq(expect, expectLen); 2761 if ( checkOffsets ) 2762 { 2763 log_verbose("\nOffsets:"); 2764 log_verbose(offset_str); 2765 } 2766 log_verbose("\n"); 2767 } 2768 ucnv_close(conv); 2769 2770 2771 if(expectLen != targ-junkout) 2772 { 2773 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 2775 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2776 printSeqErr(expect, expectLen); 2777 return FALSE; 2778 } 2779 2780 if (checkOffsets && (expectOffsets != 0) ) 2781 { 2782 log_verbose("comparing %d offsets..\n", targ-junkout); 2783 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 2784 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2785 log_err("Got Output : "); 2786 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 2787 log_err("Got Offsets: "); 2788 for(p=junkout;p<targ;p++) 2789 log_err("%d,", junokout[p-junkout]); 2790 log_err("\n"); 2791 log_err("Expected Offsets: "); 2792 for(i=0; i<(targ-junkout); i++) 2793 log_err("%d,", expectOffsets[i]); 2794 log_err("\n"); 2795 return FALSE; 2796 } 2797 } 2798 2799 if(!memcmp(junkout, expect, expectLen)) 2800 { 2801 log_verbose("String matches! %s\n", gNuConvTestName); 2802 return TRUE; 2803 } 2804 else 2805 { 2806 log_err("String does not match. %s\n", gNuConvTestName); 2807 log_err("source: "); 2808 printUSeqErr(source, sourceLen); 2809 log_err("Got: "); 2810 printSeqErr((const uint8_t *)junkout, expectLen); 2811 log_err("Expected: "); 2812 printSeqErr(expect, expectLen); 2813 return FALSE; 2814 } 2815} 2816 2817UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 2818 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 2819 const char *mySubChar, int8_t len) 2820{ 2821 UErrorCode status = U_ZERO_ERROR; 2822 UConverter *conv = 0; 2823 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 2824 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 2825 const char *src; 2826 const char *realSourceEnd; 2827 const char *srcLimit; 2828 UChar *targ; 2829 UChar *end; 2830 int32_t *offs; 2831 int i; 2832 UBool checkOffsets = TRUE; 2833 char junk[9999]; 2834 char offset_str[9999]; 2835 UChar *p; 2836 UConverterToUCallback oldAction = NULL; 2837 const void* oldContext = NULL; 2838 2839 int32_t realBufferSize; 2840 UChar *realBufferEnd; 2841 2842 2843 for(i=0;i<NEW_MAX_BUFFER;i++) 2844 junkout[i] = 0xFFFE; 2845 2846 for(i=0;i<NEW_MAX_BUFFER;i++) 2847 junokout[i] = -1; 2848 2849 setNuConvTestName(codepage, "TO"); 2850 2851 log_verbose("\n========= %s\n", gNuConvTestName); 2852 2853 conv = ucnv_open(codepage, &status); 2854 if(U_FAILURE(status)) 2855 { 2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 2857 return TRUE; 2858 } 2859 2860 log_verbose("Converter opened..\n"); 2861 2862 src = (const char *)source; 2863 targ = junkout; 2864 offs = junokout; 2865 2866 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 2867 realBufferEnd = junkout + realBufferSize; 2868 realSourceEnd = src + sourcelen; 2869 /*----setting the callback routine----*/ 2870 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 2871 if (U_FAILURE(status)) 2872 { 2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2874 } 2875 /*-------------------------------------*/ 2876 /*setting the subChar*/ 2877 if(mySubChar != NULL){ 2878 ucnv_setSubstChars(conv, mySubChar, len, &status); 2879 if (U_FAILURE(status)) { 2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 2881 } 2882 } 2883 /*------------*/ 2884 2885 2886 if ( gOutBufferSize != realBufferSize ) 2887 checkOffsets = FALSE; 2888 2889 if( gInBufferSize != NEW_MAX_BUFFER ) 2890 checkOffsets = FALSE; 2891 2892 do 2893 { 2894 end = nct_min( targ + gOutBufferSize, realBufferEnd); 2895 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 2896 2897 if(targ == realBufferEnd) 2898 { 2899 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 2900 return FALSE; 2901 } 2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 2903 2904 2905 2906 status = U_ZERO_ERROR; 2907 2908 ucnv_toUnicode (conv, 2909 &targ, 2910 end, 2911 (const char **)&src, 2912 (const char *)srcLimit, 2913 checkOffsets ? offs : NULL, 2914 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 2915 &status); 2916 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 2917 2918 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ 2919 char errChars[50]; /* should be sufficient */ 2920 int8_t errLen = 50; 2921 UErrorCode err = U_ZERO_ERROR; 2922 const char* start= NULL; 2923 ucnv_getInvalidChars(conv,errChars, &errLen, &err); 2924 if(U_FAILURE(err)){ 2925 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err)); 2926 } 2927 /* length of in invalid chars should be equal to returned length*/ 2928 start = src - errLen; 2929 if(uprv_strncmp(errChars,start,errLen)!=0){ 2930 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err)); 2931 } 2932 } 2933 /* allow failure codes for the stop callback */ 2934 if(U_FAILURE(status) && 2935 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) 2936 { 2937 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 2938 return FALSE; 2939 } 2940 2941 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 2942 sourcelen, targ-junkout); 2943 if(getTestOption(VERBOSITY_OPTION)) 2944 { 2945 2946 junk[0] = 0; 2947 offset_str[0] = 0; 2948 2949 for(p = junkout;p<targ;p++) 2950 { 2951 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 2952 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 2953 } 2954 2955 log_verbose(junk); 2956 printUSeq(expect, expectlen); 2957 if ( checkOffsets ) 2958 { 2959 log_verbose("\nOffsets:"); 2960 log_verbose(offset_str); 2961 } 2962 log_verbose("\n"); 2963 } 2964 ucnv_close(conv); 2965 2966 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 2967 2968 if (checkOffsets && (expectOffsets != 0)) 2969 { 2970 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 2971 { 2972 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 2973 log_err("Got offsets: "); 2974 for(p=junkout;p<targ;p++) 2975 log_err(" %2d,", junokout[p-junkout]); 2976 log_err("\n"); 2977 log_err("Expected offsets: "); 2978 for(i=0; i<(targ-junkout); i++) 2979 log_err(" %2d,", expectOffsets[i]); 2980 log_err("\n"); 2981 log_err("Got output: "); 2982 for(i=0; i<(targ-junkout); i++) 2983 log_err("0x%04x,", junkout[i]); 2984 log_err("\n"); 2985 log_err("From source: "); 2986 for(i=0; i<(src-(const char *)source); i++) 2987 log_err(" 0x%02x,", (unsigned char)source[i]); 2988 log_err("\n"); 2989 } 2990 } 2991 2992 if(!memcmp(junkout, expect, expectlen*2)) 2993 { 2994 log_verbose("Matches!\n"); 2995 return TRUE; 2996 } 2997 else 2998 { 2999 log_err("String does not match. %s\n", gNuConvTestName); 3000 log_verbose("String does not match. %s\n", gNuConvTestName); 3001 log_err("Got: "); 3002 printUSeqErr(junkout, expectlen); 3003 log_err("Expected: "); 3004 printUSeqErr(expect, expectlen); 3005 log_err("\n"); 3006 return FALSE; 3007 } 3008} 3009 3010UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 3011 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, 3012 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3013{ 3014 3015 3016 UErrorCode status = U_ZERO_ERROR; 3017 UConverter *conv = 0; 3018 char junkout[NEW_MAX_BUFFER]; /* FIX */ 3019 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3020 const UChar *src; 3021 char *end; 3022 char *targ; 3023 int32_t *offs; 3024 int i; 3025 int32_t realBufferSize; 3026 char *realBufferEnd; 3027 const UChar *realSourceEnd; 3028 const UChar *sourceLimit; 3029 UBool checkOffsets = TRUE; 3030 UBool doFlush; 3031 char junk[9999]; 3032 char offset_str[9999]; 3033 char *p; 3034 UConverterFromUCallback oldAction = NULL; 3035 const void* oldContext = NULL; 3036 3037 3038 for(i=0;i<NEW_MAX_BUFFER;i++) 3039 junkout[i] = (char)0xF0; 3040 for(i=0;i<NEW_MAX_BUFFER;i++) 3041 junokout[i] = 0xFF; 3042 setNuConvTestName(codepage, "FROM"); 3043 3044 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 3045 gOutBufferSize); 3046 3047 conv = ucnv_open(codepage, &status); 3048 if(U_FAILURE(status)) 3049 { 3050 log_data_err("Couldn't open converter %s\n",codepage); 3051 return TRUE; /* Because the err has already been logged. */ 3052 } 3053 3054 log_verbose("Converter opened..\n"); 3055 3056 /*----setting the callback routine----*/ 3057 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3058 if (U_FAILURE(status)) 3059 { 3060 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3061 } 3062 /*------------------------*/ 3063 /*setting the subChar*/ 3064 if(mySubChar != NULL){ 3065 ucnv_setSubstChars(conv, mySubChar, len, &status); 3066 if (U_FAILURE(status)) { 3067 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status)); 3068 } 3069 } 3070 /*------------*/ 3071 3072 src = source; 3073 targ = junkout; 3074 offs = junokout; 3075 3076 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3077 realBufferEnd = junkout + realBufferSize; 3078 realSourceEnd = source + sourceLen; 3079 3080 if ( gOutBufferSize != realBufferSize ) 3081 checkOffsets = FALSE; 3082 3083 if( gInBufferSize != NEW_MAX_BUFFER ) 3084 checkOffsets = FALSE; 3085 3086 do 3087 { 3088 end = nct_min(targ + gOutBufferSize, realBufferEnd); 3089 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 3090 3091 doFlush = (UBool)(sourceLimit == realSourceEnd); 3092 3093 if(targ == realBufferEnd) 3094 { 3095 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 3096 return FALSE; 3097 } 3098 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 3099 3100 3101 status = U_ZERO_ERROR; 3102 3103 ucnv_fromUnicode (conv, 3104 (char **)&targ, 3105 (const char *)end, 3106 &src, 3107 sourceLimit, 3108 checkOffsets ? offs : NULL, 3109 doFlush, /* flush if we're at the end of the input data */ 3110 &status); 3111 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) ); 3112 3113 /* allow failure codes for the stop callback */ 3114 if(U_FAILURE(status) && status != expectedError) 3115 { 3116 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3117 return FALSE; 3118 } 3119 3120 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 3121 sourceLen, targ-junkout); 3122 if(getTestOption(VERBOSITY_OPTION)) 3123 { 3124 3125 junk[0] = 0; 3126 offset_str[0] = 0; 3127 for(p = junkout;p<targ;p++) 3128 { 3129 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 3130 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 3131 } 3132 3133 log_verbose(junk); 3134 printSeq(expect, expectLen); 3135 if ( checkOffsets ) 3136 { 3137 log_verbose("\nOffsets:"); 3138 log_verbose(offset_str); 3139 } 3140 log_verbose("\n"); 3141 } 3142 ucnv_close(conv); 3143 3144 3145 if(expectLen != targ-junkout) 3146 { 3147 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3148 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 3149 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3150 printSeqErr(expect, expectLen); 3151 return FALSE; 3152 } 3153 3154 if (checkOffsets && (expectOffsets != 0) ) 3155 { 3156 log_verbose("comparing %d offsets..\n", targ-junkout); 3157 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 3158 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3159 log_err("Got Output : "); 3160 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); 3161 log_err("Got Offsets: "); 3162 for(p=junkout;p<targ;p++) 3163 log_err("%d,", junokout[p-junkout]); 3164 log_err("\n"); 3165 log_err("Expected Offsets: "); 3166 for(i=0; i<(targ-junkout); i++) 3167 log_err("%d,", expectOffsets[i]); 3168 log_err("\n"); 3169 return FALSE; 3170 } 3171 } 3172 3173 if(!memcmp(junkout, expect, expectLen)) 3174 { 3175 log_verbose("String matches! %s\n", gNuConvTestName); 3176 return TRUE; 3177 } 3178 else 3179 { 3180 log_err("String does not match. %s\n", gNuConvTestName); 3181 log_err("source: "); 3182 printUSeqErr(source, sourceLen); 3183 log_err("Got: "); 3184 printSeqErr((const uint8_t *)junkout, expectLen); 3185 log_err("Expected: "); 3186 printSeqErr(expect, expectLen); 3187 return FALSE; 3188 } 3189} 3190UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 3191 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, 3192 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError) 3193{ 3194 UErrorCode status = U_ZERO_ERROR; 3195 UConverter *conv = 0; 3196 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 3197 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 3198 const char *src; 3199 const char *realSourceEnd; 3200 const char *srcLimit; 3201 UChar *targ; 3202 UChar *end; 3203 int32_t *offs; 3204 int i; 3205 UBool checkOffsets = TRUE; 3206 char junk[9999]; 3207 char offset_str[9999]; 3208 UChar *p; 3209 UConverterToUCallback oldAction = NULL; 3210 const void* oldContext = NULL; 3211 3212 int32_t realBufferSize; 3213 UChar *realBufferEnd; 3214 3215 3216 for(i=0;i<NEW_MAX_BUFFER;i++) 3217 junkout[i] = 0xFFFE; 3218 3219 for(i=0;i<NEW_MAX_BUFFER;i++) 3220 junokout[i] = -1; 3221 3222 setNuConvTestName(codepage, "TO"); 3223 3224 log_verbose("\n========= %s\n", gNuConvTestName); 3225 3226 conv = ucnv_open(codepage, &status); 3227 if(U_FAILURE(status)) 3228 { 3229 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 3230 return TRUE; 3231 } 3232 3233 log_verbose("Converter opened..\n"); 3234 3235 src = (const char *)source; 3236 targ = junkout; 3237 offs = junokout; 3238 3239 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 3240 realBufferEnd = junkout + realBufferSize; 3241 realSourceEnd = src + sourcelen; 3242 /*----setting the callback routine----*/ 3243 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status); 3244 if (U_FAILURE(status)) 3245 { 3246 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3247 } 3248 /*-------------------------------------*/ 3249 /*setting the subChar*/ 3250 if(mySubChar != NULL){ 3251 ucnv_setSubstChars(conv, mySubChar, len, &status); 3252 if (U_FAILURE(status)) { 3253 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 3254 } 3255 } 3256 /*------------*/ 3257 3258 3259 if ( gOutBufferSize != realBufferSize ) 3260 checkOffsets = FALSE; 3261 3262 if( gInBufferSize != NEW_MAX_BUFFER ) 3263 checkOffsets = FALSE; 3264 3265 do 3266 { 3267 end = nct_min( targ + gOutBufferSize, realBufferEnd); 3268 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 3269 3270 if(targ == realBufferEnd) 3271 { 3272 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 3273 return FALSE; 3274 } 3275 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 3276 3277 3278 3279 status = U_ZERO_ERROR; 3280 3281 ucnv_toUnicode (conv, 3282 &targ, 3283 end, 3284 (const char **)&src, 3285 (const char *)srcLimit, 3286 checkOffsets ? offs : NULL, 3287 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */ 3288 &status); 3289 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 3290 3291 /* allow failure codes for the stop callback */ 3292 if(U_FAILURE(status) && status!=expectedError) 3293 { 3294 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 3295 return FALSE; 3296 } 3297 3298 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 3299 sourcelen, targ-junkout); 3300 if(getTestOption(VERBOSITY_OPTION)) 3301 { 3302 3303 junk[0] = 0; 3304 offset_str[0] = 0; 3305 3306 for(p = junkout;p<targ;p++) 3307 { 3308 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 3309 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 3310 } 3311 3312 log_verbose(junk); 3313 printUSeq(expect, expectlen); 3314 if ( checkOffsets ) 3315 { 3316 log_verbose("\nOffsets:"); 3317 log_verbose(offset_str); 3318 } 3319 log_verbose("\n"); 3320 } 3321 ucnv_close(conv); 3322 3323 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 3324 3325 if (checkOffsets && (expectOffsets != 0)) 3326 { 3327 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 3328 { 3329 log_err("did not get the expected offsets while %s \n", gNuConvTestName); 3330 log_err("Got offsets: "); 3331 for(p=junkout;p<targ;p++) 3332 log_err(" %2d,", junokout[p-junkout]); 3333 log_err("\n"); 3334 log_err("Expected offsets: "); 3335 for(i=0; i<(targ-junkout); i++) 3336 log_err(" %2d,", expectOffsets[i]); 3337 log_err("\n"); 3338 log_err("Got output: "); 3339 for(i=0; i<(targ-junkout); i++) 3340 log_err("0x%04x,", junkout[i]); 3341 log_err("\n"); 3342 log_err("From source: "); 3343 for(i=0; i<(src-(const char *)source); i++) 3344 log_err(" 0x%02x,", (unsigned char)source[i]); 3345 log_err("\n"); 3346 } 3347 } 3348 3349 if(!memcmp(junkout, expect, expectlen*2)) 3350 { 3351 log_verbose("Matches!\n"); 3352 return TRUE; 3353 } 3354 else 3355 { 3356 log_err("String does not match. %s\n", gNuConvTestName); 3357 log_verbose("String does not match. %s\n", gNuConvTestName); 3358 log_err("Got: "); 3359 printUSeqErr(junkout, expectlen); 3360 log_err("Expected: "); 3361 printUSeqErr(expect, expectlen); 3362 log_err("\n"); 3363 return FALSE; 3364 } 3365} 3366 3367static void TestCallBackFailure(void) { 3368 UErrorCode status = U_USELESS_COLLATOR_ERROR; 3369 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); 3370 if (status != U_USELESS_COLLATOR_ERROR) { 3371 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n"); 3372 } 3373 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); 3374 if (status != U_USELESS_COLLATOR_ERROR) { 3375 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n"); 3376 } 3377 ucnv_cbFromUWriteSub(NULL, -1, &status); 3378 if (status != U_USELESS_COLLATOR_ERROR) { 3379 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n"); 3380 } 3381 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); 3382 if (status != U_USELESS_COLLATOR_ERROR) { 3383 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n"); 3384 } 3385} 3386