1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CUCDTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API, added tests for string functions
13********************************************************************************
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
25#include "unicode/unorm2.h"
26
27#include "cintltst.h"
28#include "putilimp.h"
29#include "uparse.h"
30#include "ucase.h"
31#include "ubidi_props.h"
32#include "uprops.h"
33#include "uset_imp.h"
34#include "usc_impl.h"
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
37
38#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
39
40/* prototypes --------------------------------------------------------------- */
41
42static void TestUpperLower(void);
43static void TestLetterNumber(void);
44static void TestMisc(void);
45static void TestPOSIX(void);
46static void TestControlPrint(void);
47static void TestIdentifier(void);
48static void TestUnicodeData(void);
49static void TestCodeUnit(void);
50static void TestCodePoint(void);
51static void TestCharLength(void);
52static void TestCharNames(void);
53static void TestMirroring(void);
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
60static void TestUCase(void);
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72             char *fields[][2], int32_t fieldCount,
73             UParseLineFn *lineFn, void *context,
74             UErrorCode *pErrorCode) {
75    char path[256];
76    char backupPath[256];
77
78    if(U_FAILURE(*pErrorCode)) {
79        return;
80    }
81
82    /* Look inside ICU_DATA first */
83    strcpy(path, u_getDataDirectory());
84    strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85    strcat(path, filename);
86
87    /* As a fallback, try to guess where the source data was located
88     *    at the time ICU was built, and look there.
89     */
90    strcpy(backupPath, ctest_dataSrcDir());
91    strcat(backupPath, U_FILE_SEP_STRING);
92    strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93    strcat(backupPath, filename);
94
95    u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96    if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97        *pErrorCode=U_ZERO_ERROR;
98        u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99    }
100    if(U_FAILURE(*pErrorCode)) {
101        log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
102    }
103}
104
105/* test data ---------------------------------------------------------------- */
106
107static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
108static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
109static const int32_t tagValues[] =
110    {
111    /* Mn */ U_NON_SPACING_MARK,
112    /* Mc */ U_COMBINING_SPACING_MARK,
113    /* Me */ U_ENCLOSING_MARK,
114    /* Nd */ U_DECIMAL_DIGIT_NUMBER,
115    /* Nl */ U_LETTER_NUMBER,
116    /* No */ U_OTHER_NUMBER,
117    /* Zs */ U_SPACE_SEPARATOR,
118    /* Zl */ U_LINE_SEPARATOR,
119    /* Zp */ U_PARAGRAPH_SEPARATOR,
120    /* Cc */ U_CONTROL_CHAR,
121    /* Cf */ U_FORMAT_CHAR,
122    /* Cs */ U_SURROGATE,
123    /* Co */ U_PRIVATE_USE_CHAR,
124    /* Cn */ U_UNASSIGNED,
125    /* Lu */ U_UPPERCASE_LETTER,
126    /* Ll */ U_LOWERCASE_LETTER,
127    /* Lt */ U_TITLECASE_LETTER,
128    /* Lm */ U_MODIFIER_LETTER,
129    /* Lo */ U_OTHER_LETTER,
130    /* Pc */ U_CONNECTOR_PUNCTUATION,
131    /* Pd */ U_DASH_PUNCTUATION,
132    /* Ps */ U_START_PUNCTUATION,
133    /* Pe */ U_END_PUNCTUATION,
134    /* Po */ U_OTHER_PUNCTUATION,
135    /* Sm */ U_MATH_SYMBOL,
136    /* Sc */ U_CURRENCY_SYMBOL,
137    /* Sk */ U_MODIFIER_SYMBOL,
138    /* So */ U_OTHER_SYMBOL,
139    /* Pi */ U_INITIAL_PUNCTUATION,
140    /* Pf */ U_FINAL_PUNCTUATION
141    };
142
143static const char dirStrings[][5] = {
144    "L",
145    "R",
146    "EN",
147    "ES",
148    "ET",
149    "AN",
150    "CS",
151    "B",
152    "S",
153    "WS",
154    "ON",
155    "LRE",
156    "LRO",
157    "AL",
158    "RLE",
159    "RLO",
160    "PDF",
161    "NSM",
162    "BN"
163};
164
165void addUnicodeTest(TestNode** root);
166
167void addUnicodeTest(TestNode** root)
168{
169    addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
170    addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
171    addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
172    addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
173    addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
174    addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
175    addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
176    addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
177    addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
178    addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
179    addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
180    addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
181    addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
182    addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
183    addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
184    addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
185    addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript");
186    addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions");
187    addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI");
188    addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
189    addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
190    addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
191    addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
192    addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
193    addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
194    addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
195}
196
197/*==================================================== */
198/* test u_toupper() and u_tolower()                    */
199/*==================================================== */
200static void TestUpperLower()
201{
202    const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
203    const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
204    U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
205    U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
206    int32_t i;
207
208    U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
209    U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
210
211/*
212Checks LetterLike Symbols which were previously a source of confusion
213[Bertrand A. D. 02/04/98]
214*/
215    for (i=0x2100;i<0x2138;i++)
216    {
217        /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
218        if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
219        {
220            if (i != (int)u_tolower(i)) /* itself */
221                log_err("Failed case conversion with itself: U+%04x\n", i);
222            if (i != (int)u_toupper(i))
223                log_err("Failed case conversion with itself: U+%04x\n", i);
224        }
225    }
226
227    for(i=0; i < u_strlen(upper); i++){
228        if(u_tolower(upper[i]) != lower[i]){
229            log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
230        }
231    }
232
233    log_verbose("testing upper lower\n");
234    for (i = 0; i < 21; i++) {
235
236        if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
237        {
238            log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
239        }
240        else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
241         {
242            log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
243        }
244        else if (upperTest[i] != u_tolower(lowerTest[i]))
245        {
246            log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
247        }
248        else if (lowerTest[i] != u_toupper(upperTest[i]))
249         {
250            log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
251        }
252        else if (upperTest[i] != u_tolower(upperTest[i]))
253        {
254            log_err("Failed case conversion with itself: %c\n", upperTest[i]);
255        }
256        else if (lowerTest[i] != u_toupper(lowerTest[i]))
257        {
258            log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
259        }
260    }
261    log_verbose("done testing upper lower\n");
262
263    log_verbose("testing u_istitle\n");
264    {
265        static const UChar expected[] = {
266            0x1F88,
267            0x1F89,
268            0x1F8A,
269            0x1F8B,
270            0x1F8C,
271            0x1F8D,
272            0x1F8E,
273            0x1F8F,
274            0x1F88,
275            0x1F89,
276            0x1F8A,
277            0x1F8B,
278            0x1F8C,
279            0x1F8D,
280            0x1F8E,
281            0x1F8F,
282            0x1F98,
283            0x1F99,
284            0x1F9A,
285            0x1F9B,
286            0x1F9C,
287            0x1F9D,
288            0x1F9E,
289            0x1F9F,
290            0x1F98,
291            0x1F99,
292            0x1F9A,
293            0x1F9B,
294            0x1F9C,
295            0x1F9D,
296            0x1F9E,
297            0x1F9F,
298            0x1FA8,
299            0x1FA9,
300            0x1FAA,
301            0x1FAB,
302            0x1FAC,
303            0x1FAD,
304            0x1FAE,
305            0x1FAF,
306            0x1FA8,
307            0x1FA9,
308            0x1FAA,
309            0x1FAB,
310            0x1FAC,
311            0x1FAD,
312            0x1FAE,
313            0x1FAF,
314            0x1FBC,
315            0x1FBC,
316            0x1FCC,
317            0x1FCC,
318            0x1FFC,
319            0x1FFC,
320        };
321        int32_t num = sizeof(expected)/sizeof(expected[0]);
322        for(i=0; i<num; i++){
323            if(!u_istitle(expected[i])){
324                log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
325            }
326        }
327
328    }
329}
330
331/* compare two sets and verify that their difference or intersection is empty */
332static UBool
333showADiffB(const USet *a, const USet *b,
334           const char *a_name, const char *b_name,
335           UBool expect, UBool diffIsError) {
336    USet *aa;
337    int32_t i, start, end, length;
338    UErrorCode errorCode;
339
340    /*
341     * expect:
342     * TRUE  -> a-b should be empty, that is, b should contain all of a
343     * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
344     */
345    if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
346        return TRUE;
347    }
348
349    /* clone a to aa because a is const */
350    aa=uset_open(1, 0);
351    if(aa==NULL) {
352        /* unusual problem - out of memory? */
353        return FALSE;
354    }
355    uset_addAll(aa, a);
356
357    /* compute the set in question */
358    if(expect) {
359        /* a-b */
360        uset_removeAll(aa, b);
361    } else {
362        /* a&b */
363        uset_retainAll(aa, b);
364    }
365
366    /* aa is not empty because of the initial tests above; show its contents */
367    errorCode=U_ZERO_ERROR;
368    i=0;
369    for(;;) {
370        length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
371        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
372            break; /* done */
373        }
374        if(U_FAILURE(errorCode)) {
375            log_err("error comparing %s with %s at difference item %d: %s\n",
376                a_name, b_name, i, u_errorName(errorCode));
377            break;
378        }
379        if(length!=0) {
380            break; /* done with code points, got a string or -1 */
381        }
382
383        if(diffIsError) {
384            if(expect) {
385                log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
386            } else {
387                log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
388            }
389        } else {
390            if(expect) {
391                log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
392            } else {
393                log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
394            }
395        }
396
397        ++i;
398    }
399
400    uset_close(aa);
401    return FALSE;
402}
403
404static UBool
405showAMinusB(const USet *a, const USet *b,
406            const char *a_name, const char *b_name,
407            UBool diffIsError) {
408    return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
409}
410
411static UBool
412showAIntersectB(const USet *a, const USet *b,
413                const char *a_name, const char *b_name,
414                UBool diffIsError) {
415    return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
416}
417
418static UBool
419compareUSets(const USet *a, const USet *b,
420             const char *a_name, const char *b_name,
421             UBool diffIsError) {
422    /*
423     * Use an arithmetic & not a logical && so that both branches
424     * are always taken and all differences are shown.
425     */
426    return
427        showAMinusB(a, b, a_name, b_name, diffIsError) &
428        showAMinusB(b, a, b_name, a_name, diffIsError);
429}
430
431/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
432static void TestLetterNumber()
433{
434    UChar i = 0x0000;
435
436    log_verbose("Testing for isalpha\n");
437    for (i = 0x0041; i < 0x005B; i++) {
438        if (!u_isalpha(i))
439        {
440            log_err("Failed isLetter test at  %.4X\n", i);
441        }
442    }
443    for (i = 0x0660; i < 0x066A; i++) {
444        if (u_isalpha(i))
445        {
446            log_err("Failed isLetter test with numbers at %.4X\n", i);
447        }
448    }
449
450    log_verbose("Testing for isdigit\n");
451    for (i = 0x0660; i < 0x066A; i++) {
452        if (!u_isdigit(i))
453        {
454            log_verbose("Failed isNumber test at %.4X\n", i);
455        }
456    }
457
458    log_verbose("Testing for isalnum\n");
459    for (i = 0x0041; i < 0x005B; i++) {
460        if (!u_isalnum(i))
461        {
462            log_err("Failed isAlNum test at  %.4X\n", i);
463        }
464    }
465    for (i = 0x0660; i < 0x066A; i++) {
466        if (!u_isalnum(i))
467        {
468            log_err("Failed isAlNum test at  %.4X\n", i);
469        }
470    }
471
472    {
473        /*
474         * The following checks work only starting from Unicode 4.0.
475         * Check the version number here.
476         */
477        static UVersionInfo u401={ 4, 0, 1, 0 };
478        UVersionInfo version;
479        u_getUnicodeVersion(version);
480        if(version[0]<4 || 0==memcmp(version, u401, 4)) {
481            return;
482        }
483    }
484
485    {
486        /*
487         * Sanity check:
488         * Verify that exactly the digit characters have decimal digit values.
489         * This assumption is used in the implementation of u_digit()
490         * (which checks nt=de)
491         * compared with the parallel java.lang.Character.digit()
492         * (which checks Nd).
493         *
494         * This was not true in Unicode 3.2 and earlier.
495         * Unicode 4.0 fixed discrepancies.
496         * Unicode 4.0.1 re-introduced problems in this area due to an
497         * unintentionally incomplete last-minute change.
498         */
499        U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
500        U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
501
502        USet *digits, *decimalValues;
503        UErrorCode errorCode;
504
505        U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
506        U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
507        errorCode=U_ZERO_ERROR;
508        digits=uset_openPattern(digitsPattern, 6, &errorCode);
509        decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
510
511        if(U_SUCCESS(errorCode)) {
512            compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
513        }
514
515        uset_close(digits);
516        uset_close(decimalValues);
517    }
518}
519
520static void testSampleCharProps(UBool propFn(UChar32), const char *propName,
521                                const UChar32 *sampleChars, int32_t sampleCharsLength,
522                                UBool expected) {
523    int32_t i;
524    for (i = 0; i < sampleCharsLength; ++i) {
525        UBool result = propFn(sampleChars[i]);
526        if (result != expected) {
527            log_err("error: character property function %s(U+%04x)=%d is wrong\n",
528                    propName, sampleChars[i], result);
529        }
530    }
531}
532
533/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
534static void TestMisc()
535{
536    static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
537    static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
538    static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e};
539    static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
540    static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2};
541    static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B};
542/*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
543    static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
544    static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
545    static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
546    static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef};
547
548    static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
549
550    uint32_t mask;
551
552    int32_t i;
553    char icuVersion[U_MAX_VERSION_STRING_LENGTH];
554    UVersionInfo realVersion;
555
556    memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
557
558    testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
559    testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
560
561    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
562                        sampleSpaces, LENGTHOF(sampleSpaces), TRUE);
563    testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar",
564                        sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE);
565
566    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
567                        sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE);
568    testSampleCharProps(u_isWhitespace, "u_isWhitespace",
569                        sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE);
570
571    testSampleCharProps(u_isdefined, "u_isdefined",
572                        sampleDefined, LENGTHOF(sampleDefined), TRUE);
573    testSampleCharProps(u_isdefined, "u_isdefined",
574                        sampleUndefined, LENGTHOF(sampleUndefined), FALSE);
575
576    testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE);
577    testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE);
578
579    testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE);
580    testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE);
581
582    for (i = 0; i < LENGTHOF(sampleDigits); i++) {
583        if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) {
584            log_err("error: u_charDigitValue(U+04x)=%d != %d\n",
585                    sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]);
586        }
587    }
588
589    /* Tests the ICU version #*/
590    u_getVersion(realVersion);
591    u_versionToString(realVersion, icuVersion);
592    if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
593    {
594        log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
595    }
596#if defined(ICU_VERSION)
597    /* test only happens where we have configure.in with VERSION - sanity check. */
598    if(strcmp(U_ICU_VERSION, ICU_VERSION))
599    {
600        log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
601    }
602#endif
603
604    /* test U_GC_... */
605    if(
606        U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
607        U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
608        U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
609        U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
610        U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
611        U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
612    ) {
613        log_err("error: U_GET_GC_MASK does not work properly\n");
614    }
615
616    mask=0;
617    mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
618
619    mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
620    mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
621    mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
622    mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
623    mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
624
625    mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
626    mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
627    mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
628
629    mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
630    mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
631    mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
632
633    mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
634    mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
635    mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
636
637    mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
638    mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
639    mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
640    mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
641
642    mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
643    mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
644    mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
645    mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
646    mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
647
648    mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
649    mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
650    mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
651    mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
652
653    mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
654    mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
655
656    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
657        log_err("error: problems with U_GC_XX_MASK constants\n");
658    }
659
660    mask=0;
661    mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
662    mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
663    mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
664    mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
665    mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
666    mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
667    mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
668
669    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
670        log_err("error: problems with U_GC_Y_MASK constants\n");
671    }
672    {
673        static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
674        for(i=0; i<10; i++){
675            if(digit[i]!=u_forDigit(i,10)){
676                log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
677            }
678        }
679    }
680
681    /* test u_digit() */
682    {
683        static const struct {
684            UChar32 c;
685            int8_t radix, value;
686        } data[]={
687            /* base 16 */
688            { 0x0031, 16, 1 },
689            { 0x0038, 16, 8 },
690            { 0x0043, 16, 12 },
691            { 0x0066, 16, 15 },
692            { 0x00e4, 16, -1 },
693            { 0x0662, 16, 2 },
694            { 0x06f5, 16, 5 },
695            { 0xff13, 16, 3 },
696            { 0xff41, 16, 10 },
697
698            /* base 8 */
699            { 0x0031, 8, 1 },
700            { 0x0038, 8, -1 },
701            { 0x0043, 8, -1 },
702            { 0x0066, 8, -1 },
703            { 0x00e4, 8, -1 },
704            { 0x0662, 8, 2 },
705            { 0x06f5, 8, 5 },
706            { 0xff13, 8, 3 },
707            { 0xff41, 8, -1 },
708
709            /* base 36 */
710            { 0x5a, 36, 35 },
711            { 0x7a, 36, 35 },
712            { 0xff3a, 36, 35 },
713            { 0xff5a, 36, 35 },
714
715            /* wrong radix values */
716            { 0x0031, 1, -1 },
717            { 0xff3a, 37, -1 }
718        };
719
720        for(i=0; i<LENGTHOF(data); ++i) {
721            if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
722                log_err("u_digit(U+%04x, %d)=%d expected %d\n",
723                        data[i].c,
724                        data[i].radix,
725                        u_digit(data[i].c, data[i].radix),
726                        data[i].value);
727            }
728        }
729    }
730}
731
732/* test C/POSIX-style functions --------------------------------------------- */
733
734/* bit flags */
735#define ISAL     1
736#define ISLO     2
737#define ISUP     4
738
739#define ISDI     8
740#define ISXD  0x10
741
742#define ISAN  0x20
743
744#define ISPU  0x40
745#define ISGR  0x80
746#define ISPR 0x100
747
748#define ISSP 0x200
749#define ISBL 0x400
750#define ISCN 0x800
751
752/* C/POSIX-style functions, in the same order as the bit flags */
753typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
754
755static const struct {
756    IsPOSIXClass *fn;
757    const char *name;
758} posixClasses[]={
759    { u_isalpha, "isalpha" },
760    { u_islower, "islower" },
761    { u_isupper, "isupper" },
762    { u_isdigit, "isdigit" },
763    { u_isxdigit, "isxdigit" },
764    { u_isalnum, "isalnum" },
765    { u_ispunct, "ispunct" },
766    { u_isgraph, "isgraph" },
767    { u_isprint, "isprint" },
768    { u_isspace, "isspace" },
769    { u_isblank, "isblank" },
770    { u_iscntrl, "iscntrl" }
771};
772
773static const struct {
774    UChar32 c;
775    uint32_t posixResults;
776} posixData[]={
777    { 0x0008,                                                        ISCN },    /* backspace */
778    { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
779    { 0x000a,                                              ISSP|     ISCN },    /* LF */
780    { 0x000c,                                              ISSP|     ISCN },    /* FF */
781    { 0x000d,                                              ISSP|     ISCN },    /* CR */
782    { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
783    { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
784    { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
785    { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
786    { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
787    { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
788    { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
789    { 0x0085,                                              ISSP|     ISCN },    /* NEL */
790    { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
791    { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
792    { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
793    { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
794    { 0x0600,                                                        ISCN },    /* arabic number sign */
795    { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
796    { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
797    { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
798    { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
799    { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
800    { 0x200b,                                                        ISCN },    /* ZWSP */
801  /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
802    { 0x200e,                                                        ISCN },    /* LRM */
803    { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
804    { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
805    { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
806    { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
807    { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
808    { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
809    { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
810    { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
811};
812
813static void
814TestPOSIX() {
815    uint32_t mask;
816    int32_t cl, i;
817    UBool expect;
818
819    mask=1;
820    for(cl=0; cl<12; ++cl) {
821        for(i=0; i<LENGTHOF(posixData); ++i) {
822            expect=(UBool)((posixData[i].posixResults&mask)!=0);
823            if(posixClasses[cl].fn(posixData[i].c)!=expect) {
824                log_err("u_%s(U+%04x)=%s is wrong\n",
825                    posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
826            }
827        }
828        mask<<=1;
829    }
830}
831
832/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
833static void TestControlPrint()
834{
835    const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
836    const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2};
837    const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014};
838    const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
839    UChar32 c;
840
841    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE);
842    testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE);
843
844    testSampleCharProps(u_isprint, "u_isprint",
845                        samplePrintable, LENGTHOF(samplePrintable), TRUE);
846    testSampleCharProps(u_isprint, "u_isprint",
847                        sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE);
848
849    /* test all ISO 8 controls */
850    for(c=0; c<=0x9f; ++c) {
851        if(c==0x20) {
852            /* skip ASCII graphic characters and continue with DEL */
853            c=0x7f;
854        }
855        if(!u_iscntrl(c)) {
856            log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
857        }
858        if(!u_isISOControl(c)) {
859            log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
860        }
861        if(u_isprint(c)) {
862            log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
863        }
864    }
865
866    /* test all Latin-1 graphic characters */
867    for(c=0x20; c<=0xff; ++c) {
868        if(c==0x7f) {
869            c=0xa0;
870        } else if(c==0xad) {
871            /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
872            ++c;
873        }
874        if(!u_isprint(c)) {
875            log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
876        }
877    }
878}
879
880/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
881static void TestIdentifier()
882{
883    const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
884    const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
885    const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
886    const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
887    const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
888    const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
889    const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
890    const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
891    const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
892    const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
893
894    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
895                        sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
896    testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart",
897                        sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE);
898
899    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
900                        sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE);
901    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
902                        sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE);
903
904    /* IDPart should imply IDStart */
905    testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart",
906                        sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE);
907
908    testSampleCharProps(u_isIDStart, "u_isIDStart",
909                        sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
910    testSampleCharProps(u_isIDStart, "u_isIDStart",
911                        sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE);
912
913    testSampleCharProps(u_isIDPart, "u_isIDPart",
914                        sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE);
915    testSampleCharProps(u_isIDPart, "u_isIDPart",
916                        sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE);
917
918    /* IDPart should imply IDStart */
919    testSampleCharProps(u_isIDPart, "u_isIDPart",
920                        sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE);
921
922    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
923                        sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE);
924    testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable",
925                        sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE);
926}
927
928/* for each line of UnicodeData.txt, check some of the properties */
929typedef struct UnicodeDataContext {
930#if UCONFIG_NO_NORMALIZATION
931    const void *dummy;
932#else
933    const UNormalizer2 *nfc;
934    const UNormalizer2 *nfkc;
935#endif
936} UnicodeDataContext;
937
938/*
939 * ### TODO
940 * This test fails incorrectly if the First or Last code point of a repetitive area
941 * is overridden, which is allowed and is encouraged for the PUAs.
942 * Currently, this means that both area First/Last and override lines are
943 * tested against the properties from the API,
944 * and the area boundary will not match and cause an error.
945 *
946 * This function should detect area boundaries and skip them for the test of individual
947 * code points' properties.
948 * Then it should check that the areas contain all the same properties except where overridden.
949 * For this, it would have had to set a flag for which code points were listed explicitly.
950 */
951static void U_CALLCONV
952unicodeDataLineFn(void *context,
953                  char *fields[][2], int32_t fieldCount,
954                  UErrorCode *pErrorCode)
955{
956    char buffer[100];
957    const char *d;
958    char *end;
959    uint32_t value;
960    UChar32 c;
961    int32_t i;
962    int8_t type;
963    int32_t dt;
964    UChar dm[32], s[32];
965    int32_t dmLength, length;
966
967#if !UCONFIG_NO_NORMALIZATION
968    const UNormalizer2 *nfc, *nfkc;
969#endif
970
971    /* get the character code, field 0 */
972    c=strtoul(fields[0][0], &end, 16);
973    if(end<=fields[0][0] || end!=fields[0][1]) {
974        log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
975        return;
976    }
977    if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
978        log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
979        return;
980    }
981
982    /* get general category, field 2 */
983    *fields[2][1]=0;
984    type = (int8_t)tagValues[MakeProp(fields[2][0])];
985    if(u_charType(c)!=type) {
986        log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
987    }
988    if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
989        log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
990    }
991
992    /* get canonical combining class, field 3 */
993    value=strtoul(fields[3][0], &end, 10);
994    if(end<=fields[3][0] || end!=fields[3][1]) {
995        log_err("error: syntax error in field 3 at code 0x%lx\n", c);
996        return;
997    }
998    if(value>255) {
999        log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1000        return;
1001    }
1002#if !UCONFIG_NO_NORMALIZATION
1003    if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1004        log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1005    }
1006    nfkc=((UnicodeDataContext *)context)->nfkc;
1007    if(value!=unorm2_getCombiningClass(nfkc, c)) {
1008        log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value);
1009    }
1010#endif
1011
1012    /* get BiDi category, field 4 */
1013    *fields[4][1]=0;
1014    i=MakeDir(fields[4][0]);
1015    if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1016        log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1017    }
1018
1019    /* get Decomposition_Type & Decomposition_Mapping, field 5 */
1020    d=NULL;
1021    if(fields[5][0]==fields[5][1]) {
1022        /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */
1023        if(c==0xac00 || c==0xd7a3) {
1024            dt=U_DT_CANONICAL;
1025        } else {
1026            dt=U_DT_NONE;
1027        }
1028    } else {
1029        d=fields[5][0];
1030        *fields[5][1]=0;
1031        dt=UCHAR_INVALID_CODE;
1032        if(*d=='<') {
1033            end=strchr(++d, '>');
1034            if(end!=NULL) {
1035                *end=0;
1036                dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d);
1037                d=u_skipWhitespace(end+1);
1038            }
1039        } else {
1040            dt=U_DT_CANONICAL;
1041        }
1042    }
1043    if(dt>U_DT_NONE) {
1044        if(c==0xac00) {
1045            dm[0]=0x1100;
1046            dm[1]=0x1161;
1047            dm[2]=0;
1048            dmLength=2;
1049        } else if(c==0xd7a3) {
1050            dm[0]=0xd788;
1051            dm[1]=0x11c2;
1052            dm[2]=0;
1053            dmLength=2;
1054        } else {
1055            dmLength=u_parseString(d, dm, 32, NULL, pErrorCode);
1056        }
1057    } else {
1058        dmLength=-1;
1059    }
1060    if(dt<0 || U_FAILURE(*pErrorCode)) {
1061        log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c);
1062        return;
1063    }
1064#if !UCONFIG_NO_NORMALIZATION
1065    i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE);
1066    if(i!=dt) {
1067        log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt);
1068    }
1069    /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */
1070    length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode);
1071    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1072        log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d "
1073                "or the Decomposition_Mapping is different (%s)\n",
1074                c, length, dmLength, u_errorName(*pErrorCode));
1075        return;
1076    }
1077    /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */
1078    if(dt!=U_DT_CANONICAL) {
1079        dmLength=-1;
1080    }
1081    nfc=((UnicodeDataContext *)context)->nfc;
1082    length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode);
1083    if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) {
1084        log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d "
1085                "or the Decomposition_Mapping is different (%s)\n",
1086                c, length, dmLength, u_errorName(*pErrorCode));
1087        return;
1088    }
1089    /* recompose */
1090    if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) {
1091        UChar32 a, b, composite;
1092        i=0;
1093        U16_NEXT(dm, i, dmLength, a);
1094        U16_NEXT(dm, i, dmLength, b);
1095        /* i==dmLength */
1096        composite=unorm2_composePair(nfc, a, b);
1097        if(composite!=c) {
1098            log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n",
1099                    (long)c, (long)a, (long)b, (long)composite);
1100        }
1101        /*
1102         * Note: NFKC has fewer round-trip mappings than NFC,
1103         * so we can't just test unorm2_composePair(nfkc, a, b) here without further data.
1104         */
1105    }
1106#endif
1107
1108    /* get ISO Comment, field 11 */
1109    *fields[11][1]=0;
1110    i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1111    if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1112        log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1113            c, u_errorName(*pErrorCode),
1114            U_FAILURE(*pErrorCode) ? buffer : "[error]",
1115            fields[11][0]);
1116    }
1117
1118    /* get uppercase mapping, field 12 */
1119    if(fields[12][0]!=fields[12][1]) {
1120        value=strtoul(fields[12][0], &end, 16);
1121        if(end!=fields[12][1]) {
1122            log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1123            return;
1124        }
1125        if((UChar32)value!=u_toupper(c)) {
1126            log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1127        }
1128    } else {
1129        /* no case mapping: the API must map the code point to itself */
1130        if(c!=u_toupper(c)) {
1131            log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1132        }
1133    }
1134
1135    /* get lowercase mapping, field 13 */
1136    if(fields[13][0]!=fields[13][1]) {
1137        value=strtoul(fields[13][0], &end, 16);
1138        if(end!=fields[13][1]) {
1139            log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1140            return;
1141        }
1142        if((UChar32)value!=u_tolower(c)) {
1143            log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1144        }
1145    } else {
1146        /* no case mapping: the API must map the code point to itself */
1147        if(c!=u_tolower(c)) {
1148            log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1149        }
1150    }
1151
1152    /* get titlecase mapping, field 14 */
1153    if(fields[14][0]!=fields[14][1]) {
1154        value=strtoul(fields[14][0], &end, 16);
1155        if(end!=fields[14][1]) {
1156            log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1157            return;
1158        }
1159        if((UChar32)value!=u_totitle(c)) {
1160            log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1161        }
1162    } else {
1163        /* no case mapping: the API must map the code point to itself */
1164        if(c!=u_totitle(c)) {
1165            log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1166        }
1167    }
1168}
1169
1170static UBool U_CALLCONV
1171enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1172    static const UChar32 test[][2]={
1173        {0x41, U_UPPERCASE_LETTER},
1174        {0x308, U_NON_SPACING_MARK},
1175        {0xfffe, U_GENERAL_OTHER_TYPES},
1176        {0xe0041, U_FORMAT_CHAR},
1177        {0xeffff, U_UNASSIGNED}
1178    };
1179
1180    int32_t i, count;
1181
1182    if(0!=strcmp((const char *)context, "a1")) {
1183        log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1184        return FALSE;
1185    }
1186
1187    count=LENGTHOF(test);
1188    for(i=0; i<count; ++i) {
1189        if(start<=test[i][0] && test[i][0]<limit) {
1190            if(type!=(UCharCategory)test[i][1]) {
1191                log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1192                        start, limit, (long)type, test[i][0], test[i][1]);
1193            }
1194            /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1195            return i==(count-1) ? FALSE : TRUE;
1196        }
1197    }
1198
1199    if(start>test[count-1][0]) {
1200        log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1201                start, limit, (long)type);
1202        return FALSE;
1203    }
1204
1205    return TRUE;
1206}
1207
1208static UBool U_CALLCONV
1209enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1210    /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */
1211    static const int32_t defaultBidi[][2]={ /* { limit, class } */
1212        { 0x0590, U_LEFT_TO_RIGHT },
1213        { 0x0600, U_RIGHT_TO_LEFT },
1214        { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1215        { 0x08A0, U_RIGHT_TO_LEFT },
1216        { 0x0900, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */
1217        { 0xFB1D, U_LEFT_TO_RIGHT },
1218        { 0xFB50, U_RIGHT_TO_LEFT },
1219        { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1220        { 0xFE70, U_LEFT_TO_RIGHT },
1221        { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1222        { 0x10800, U_LEFT_TO_RIGHT },
1223        { 0x11000, U_RIGHT_TO_LEFT },
1224        { 0x1E800, U_LEFT_TO_RIGHT },  /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */
1225        { 0x1EE00, U_RIGHT_TO_LEFT },
1226        { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC },  /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */
1227        { 0x1F000, U_RIGHT_TO_LEFT },
1228        { 0x110000, U_LEFT_TO_RIGHT }
1229    };
1230
1231    UChar32 c;
1232    int32_t i;
1233    UCharDirection shouldBeDir;
1234
1235    /*
1236     * LineBreak.txt specifies:
1237     *   #  - Assigned characters that are not listed explicitly are given the value
1238     *   #    "AL".
1239     *   #  - Unassigned characters are given the value "XX".
1240     *
1241     * PUA characters are listed explicitly with "XX".
1242     * Verify that no assigned character has "XX".
1243     */
1244    if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1245        c=start;
1246        while(c<limit) {
1247            if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1248                log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1249            }
1250            ++c;
1251        }
1252    }
1253
1254    /*
1255     * Verify default Bidi classes.
1256     * For recent Unicode versions, see UCD.html.
1257     *
1258     * For older Unicode versions:
1259     * See table 3-7 "Bidirectional Character Types" in UAX #9.
1260     * http://www.unicode.org/reports/tr9/
1261     *
1262     * See also DerivedBidiClass.txt for Cn code points!
1263     *
1264     * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1265     * changed some default values.
1266     * In particular, non-characters and unassigned Default Ignorable Code Points
1267     * change from L to BN.
1268     *
1269     * UCD.html version 4.0.1 does not yet reflect these changes.
1270     */
1271    if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1272        /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1273        c=start;
1274        for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1275            if((int32_t)c<defaultBidi[i][0]) {
1276                while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1277                    if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1278                        shouldBeDir=U_BOUNDARY_NEUTRAL;
1279                    } else {
1280                        shouldBeDir=(UCharDirection)defaultBidi[i][1];
1281                    }
1282
1283                    if( u_charDirection(c)!=shouldBeDir ||
1284                        u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1285                    ) {
1286                        log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1287                            c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1288                    }
1289                    ++c;
1290                }
1291            }
1292        }
1293    }
1294
1295    return TRUE;
1296}
1297
1298/* tests for several properties */
1299static void TestUnicodeData()
1300{
1301    UVersionInfo expectVersionArray;
1302    UVersionInfo versionArray;
1303    char *fields[15][2];
1304    UErrorCode errorCode;
1305    UChar32 c;
1306    int8_t type;
1307
1308    UnicodeDataContext context;
1309
1310    u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1311    u_getUnicodeVersion(versionArray);
1312    if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1313    {
1314        log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1315        versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1316    }
1317
1318#if defined(ICU_UNICODE_VERSION)
1319    /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1320    if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1321    {
1322         log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1323    }
1324#endif
1325
1326    if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1327        log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1328    }
1329
1330    errorCode=U_ZERO_ERROR;
1331#if !UCONFIG_NO_NORMALIZATION
1332    context.nfc=unorm2_getNFCInstance(&errorCode);
1333    context.nfkc=unorm2_getNFKCInstance(&errorCode);
1334    if(U_FAILURE(errorCode)) {
1335        log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode));
1336        return;
1337    }
1338#endif
1339    parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode);
1340    if(U_FAILURE(errorCode)) {
1341        return; /* if we couldn't parse UnicodeData.txt, we should return */
1342    }
1343
1344    /* sanity check on repeated properties */
1345    for(c=0xfffe; c<=0x10ffff;) {
1346        type=u_charType(c);
1347        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1348            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1349        }
1350        if(type!=U_UNASSIGNED) {
1351            log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1352        }
1353        if((c&0xffff)==0xfffe) {
1354            ++c;
1355        } else {
1356            c+=0xffff;
1357        }
1358    }
1359
1360    /* test that PUA is not "unassigned" */
1361    for(c=0xe000; c<=0x10fffd;) {
1362        type=u_charType(c);
1363        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1364            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1365        }
1366        if(type==U_UNASSIGNED) {
1367            log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1368        } else if(type!=U_PRIVATE_USE_CHAR) {
1369            log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1370        }
1371        if(c==0xf8ff) {
1372            c=0xf0000;
1373        } else if(c==0xffffd) {
1374            c=0x100000;
1375        } else {
1376            ++c;
1377        }
1378    }
1379
1380    /* test u_enumCharTypes() */
1381    u_enumCharTypes(enumTypeRange, "a1");
1382
1383    /* check default properties */
1384    u_enumCharTypes(enumDefaultsRange, NULL);
1385}
1386
1387static void TestCodeUnit(){
1388    const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1389
1390    int32_t i;
1391
1392    for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1393        UChar c=codeunit[i];
1394        if(i<4){
1395            if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1396                log_err("ERROR: U+%04x is a single", c);
1397            }
1398
1399        }
1400        if(i >= 4 && i< 8){
1401            if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1402                log_err("ERROR: U+%04x is a first surrogate", c);
1403            }
1404        }
1405        if(i >= 8 && i< 12){
1406            if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1407                log_err("ERROR: U+%04x is a second surrogate", c);
1408            }
1409        }
1410    }
1411
1412}
1413
1414static void TestCodePoint(){
1415    const UChar32 codePoint[]={
1416        /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1417        0xd800,
1418        0xdbff,
1419        0xdc00,
1420        0xdfff,
1421        0xdc04,
1422        0xd821,
1423        /*not a surrogate, valid, isUnicodeChar , not Error*/
1424        0x20ac,
1425        0xd7ff,
1426        0xe000,
1427        0xe123,
1428        0x0061,
1429        0xe065,
1430        0x20402,
1431        0x24506,
1432        0x23456,
1433        0x20402,
1434        0x10402,
1435        0x23456,
1436        /*not a surrogate, not valid, isUnicodeChar, isError */
1437        0x0015,
1438        0x009f,
1439        /*not a surrogate, not valid, not isUnicodeChar, isError */
1440        0xffff,
1441        0xfffe,
1442    };
1443    int32_t i;
1444    for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1445        UChar32 c=codePoint[i];
1446        if(i<6){
1447            if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1448                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1449            }
1450            if(UTF_IS_VALID(c)){
1451                log_err("ERROR: isValid() failed for U+%04x\n", c);
1452            }
1453            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1454                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1455            }
1456            if(UTF_IS_ERROR(c)){
1457                log_err("ERROR: isError() failed for U+%04x\n", c);
1458            }
1459        }else if(i >=6 && i<18){
1460            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1461                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1462            }
1463            if(!UTF_IS_VALID(c)){
1464                log_err("ERROR: isValid() failed for U+%04x\n", c);
1465            }
1466            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1467                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1468            }
1469            if(UTF_IS_ERROR(c)){
1470                log_err("ERROR: isError() failed for U+%04x\n", c);
1471            }
1472        }else if(i >=18 && i<20){
1473            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1474                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1475            }
1476            if(UTF_IS_VALID(c)){
1477                log_err("ERROR: isValid() failed for U+%04x\n", c);
1478            }
1479            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1480                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1481            }
1482            if(!UTF_IS_ERROR(c)){
1483                log_err("ERROR: isError() failed for U+%04x\n", c);
1484            }
1485        }
1486        else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1487            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1488                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1489            }
1490            if(UTF_IS_VALID(c)){
1491                log_err("ERROR: isValid() failed for U+%04x\n", c);
1492            }
1493            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1494                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1495            }
1496            if(!UTF_IS_ERROR(c)){
1497                log_err("ERROR: isError() failed for U+%04x\n", c);
1498            }
1499        }
1500    }
1501
1502    if(
1503        !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1504        !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1505        U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1506        U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1507    ) {
1508        log_err("error with U_IS_BMP()\n");
1509    }
1510
1511    if(
1512        U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1513        U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1514        U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1515        !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1516    ) {
1517        log_err("error with U_IS_SUPPLEMENTARY()\n");
1518    }
1519}
1520
1521static void TestCharLength()
1522{
1523    const int32_t codepoint[]={
1524        1, 0x0061,
1525        1, 0xe065,
1526        1, 0x20ac,
1527        2, 0x20402,
1528        2, 0x23456,
1529        2, 0x24506,
1530        2, 0x20402,
1531        2, 0x10402,
1532        1, 0xd7ff,
1533        1, 0xe000
1534    };
1535
1536    int32_t i;
1537    UBool multiple;
1538    for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1539        UChar32 c=codepoint[i+1];
1540        if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1541            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c));
1542        }
1543        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1544        if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1545            log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1546        }
1547    }
1548}
1549
1550/*internal functions ----*/
1551static int32_t MakeProp(char* str)
1552{
1553    int32_t result = 0;
1554    char* matchPosition =0;
1555
1556    matchPosition = strstr(tagStrings, str);
1557    if (matchPosition == 0)
1558    {
1559        log_err("unrecognized type letter ");
1560        log_err(str);
1561    }
1562    else
1563        result = (int32_t)((matchPosition - tagStrings) / 2);
1564    return result;
1565}
1566
1567static int32_t MakeDir(char* str)
1568{
1569    int32_t pos = 0;
1570    for (pos = 0; pos < 19; pos++) {
1571        if (strcmp(str, dirStrings[pos]) == 0) {
1572            return pos;
1573        }
1574    }
1575    return -1;
1576}
1577
1578/* test u_charName() -------------------------------------------------------- */
1579
1580static const struct {
1581    uint32_t code;
1582    const char *name, *oldName, *extName, *alias;
1583} names[]={
1584    {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1585    {0x01a2, "LATIN CAPITAL LETTER OI", "",
1586             "LATIN CAPITAL LETTER OI",
1587             "LATIN CAPITAL LETTER GHA"},
1588    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "",
1589             "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1590    {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "",
1591             "TIBETAN MARK BSKA- SHOG GI MGO RGYAN",
1592             "TIBETAN MARK BKA- SHOG GI MGO RGYAN"},
1593    {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1594    {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1595    {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1596    {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1597    {0xd800, "", "", "<lead surrogate-D800>" },
1598    {0xdc00, "", "", "<trail surrogate-DC00>" },
1599    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" },
1600    {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1601    {0xffff, "", "", "<noncharacter-FFFF>" },
1602    {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "",
1603              "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS",
1604              "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"},
1605    {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1606};
1607
1608static UBool
1609enumCharNamesFn(void *context,
1610                UChar32 code, UCharNameChoice nameChoice,
1611                const char *name, int32_t length) {
1612    int32_t *pCount=(int32_t *)context;
1613    const char *expected;
1614    int i;
1615
1616    if(length<=0 || length!=(int32_t)strlen(name)) {
1617        /* should not be called with an empty string or invalid length */
1618        log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1619        return TRUE;
1620    }
1621
1622    ++*pCount;
1623    for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1624        if(code==(UChar32)names[i].code) {
1625            switch (nameChoice) {
1626                case U_EXTENDED_CHAR_NAME:
1627                    if(0!=strcmp(name, names[i].extName)) {
1628                        log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1629                    }
1630                    break;
1631                case U_UNICODE_CHAR_NAME:
1632                    if(0!=strcmp(name, names[i].name)) {
1633                        log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1634                    }
1635                    break;
1636                case U_UNICODE_10_CHAR_NAME:
1637                    expected=names[i].oldName;
1638                    if(expected[0]==0 || 0!=strcmp(name, expected)) {
1639                        log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected);
1640                    }
1641                    break;
1642                case U_CHAR_NAME_ALIAS:
1643                    expected=names[i].alias;
1644                    if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) {
1645                        log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected);
1646                    }
1647                    break;
1648                case U_CHAR_NAME_CHOICE_COUNT:
1649                    break;
1650            }
1651            break;
1652        }
1653    }
1654    return TRUE;
1655}
1656
1657struct enumExtCharNamesContext {
1658    uint32_t length;
1659    int32_t last;
1660};
1661
1662static UBool
1663enumExtCharNamesFn(void *context,
1664                UChar32 code, UCharNameChoice nameChoice,
1665                const char *name, int32_t length) {
1666    struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1667
1668    if (ecncp->last != (int32_t) code - 1) {
1669        if (ecncp->last < 0) {
1670            log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1671        } else {
1672            log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1673        }
1674    }
1675    ecncp->last = (int32_t) code;
1676
1677    if (!*name) {
1678        log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1679    }
1680
1681    return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1682}
1683
1684/**
1685 * This can be made more efficient by moving it into putil.c and having
1686 * it directly access the ebcdic translation tables.
1687 * TODO: If we get this method in putil.c, then delete it from here.
1688 */
1689static UChar
1690u_charToUChar(char c) {
1691    UChar uc;
1692    u_charsToUChars(&c, &uc, 1);
1693    return uc;
1694}
1695
1696static void
1697TestCharNames() {
1698    static char name[80];
1699    UErrorCode errorCode=U_ZERO_ERROR;
1700    struct enumExtCharNamesContext extContext;
1701    const char *expected;
1702    int32_t length;
1703    UChar32 c;
1704    int32_t i;
1705
1706    log_verbose("Testing uprv_getMaxCharNameLength()\n");
1707    length=uprv_getMaxCharNameLength();
1708    if(length==0) {
1709        /* no names data available */
1710        return;
1711    }
1712    if(length<83) { /* Unicode 3.2 max char name length */
1713        log_err("uprv_getMaxCharNameLength()=%d is too short");
1714    }
1715    /* ### TODO same tests for max ISO comment length as for max name length */
1716
1717    log_verbose("Testing u_charName()\n");
1718    for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1719        /* modern Unicode character name */
1720        length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1721        if(U_FAILURE(errorCode)) {
1722            log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1723            return;
1724        }
1725        if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1726            log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1727        }
1728
1729        /* find the modern name */
1730        if (*names[i].name) {
1731            c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1732            if(U_FAILURE(errorCode)) {
1733                log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1734                return;
1735            }
1736            if(c!=(UChar32)names[i].code) {
1737                log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1738            }
1739        }
1740
1741        /* Unicode 1.0 character name */
1742        length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1743        if(U_FAILURE(errorCode)) {
1744            log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1745            return;
1746        }
1747        if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1748            log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1749        }
1750
1751        /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1752        if(names[i].oldName[0]!=0 /* && length>0 */) {
1753            c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1754            if(U_FAILURE(errorCode)) {
1755                log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1756                return;
1757            }
1758            if(c!=(UChar32)names[i].code) {
1759                log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1760            }
1761        }
1762
1763        /* Unicode character name alias */
1764        length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode);
1765        if(U_FAILURE(errorCode)) {
1766            log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode));
1767            return;
1768        }
1769        expected=names[i].alias;
1770        if(expected==NULL) {
1771            expected="";
1772        }
1773        if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) {
1774            log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n",
1775                    names[i].code, name, length, expected);
1776        }
1777
1778        /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */
1779        if(expected[0]!=0 /* && length>0 */) {
1780            c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode);
1781            if(U_FAILURE(errorCode)) {
1782                log_err("u_charFromName(%s - alias) error %s\n",
1783                        expected, u_errorName(errorCode));
1784                return;
1785            }
1786            if(c!=(UChar32)names[i].code) {
1787                log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n",
1788                        expected, c, names[i].code);
1789            }
1790        }
1791    }
1792
1793    /* test u_enumCharNames() */
1794    length=0;
1795    errorCode=U_ZERO_ERROR;
1796    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1797    if(U_FAILURE(errorCode) || length<94140) {
1798        log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1799    }
1800
1801    extContext.length = 0;
1802    extContext.last = -1;
1803    errorCode=U_ZERO_ERROR;
1804    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1805    if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1806        log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1807    }
1808
1809    /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1810    if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1811        log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1812    }
1813
1814    /* Test getCharNameCharacters */
1815    if(!getTestOption(QUICK_OPTION)) {
1816        enum { BUFSIZE = 256 };
1817        UErrorCode ec = U_ZERO_ERROR;
1818        char buf[BUFSIZE];
1819        int32_t maxLength;
1820        UChar32 cp;
1821        UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1822        int32_t l1, l2;
1823        UBool map[256];
1824        UBool ok;
1825
1826        USet* set = uset_open(1, 0); /* empty set */
1827        USet* dumb = uset_open(1, 0); /* empty set */
1828
1829        /*
1830         * uprv_getCharNameCharacters() will likely return more lowercase
1831         * letters than actual character names contain because
1832         * it includes all the characters in lowercased names of
1833         * general categories, for the full possible set of extended names.
1834         */
1835        {
1836            USetAdder sa={
1837                NULL,
1838                uset_add,
1839                uset_addRange,
1840                uset_addString,
1841                NULL /* don't need remove() */
1842            };
1843            sa.set=set;
1844            uprv_getCharNameCharacters(&sa);
1845        }
1846
1847        /* build set the dumb (but sure-fire) way */
1848        for (i=0; i<256; ++i) {
1849            map[i] = FALSE;
1850        }
1851
1852        maxLength=0;
1853        for (cp=0; cp<0x110000; ++cp) {
1854            int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1855                                     buf, BUFSIZE, &ec);
1856            if (U_FAILURE(ec)) {
1857                log_err("FAIL: u_charName failed when it shouldn't\n");
1858                uset_close(set);
1859                uset_close(dumb);
1860                return;
1861            }
1862            if(len>maxLength) {
1863                maxLength=len;
1864            }
1865
1866            for (i=0; i<len; ++i) {
1867                if (!map[(uint8_t) buf[i]]) {
1868                    uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1869                    map[(uint8_t) buf[i]] = TRUE;
1870                }
1871            }
1872
1873            /* test for leading/trailing whitespace */
1874            if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1875                log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1876            }
1877        }
1878
1879        if(map[(uint8_t)'\t']) {
1880            log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1881        }
1882
1883        length=uprv_getMaxCharNameLength();
1884        if(length!=maxLength) {
1885            log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1886                    length, maxLength);
1887        }
1888
1889        /* compare the sets.  Where is my uset_equals?!! */
1890        ok=TRUE;
1891        for(i=0; i<256; ++i) {
1892            if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1893                if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1894                    /* ignore lowercase a-z that are in set but not in dumb */
1895                    ok=TRUE;
1896                } else {
1897                    ok=FALSE;
1898                    break;
1899                }
1900            }
1901        }
1902
1903        l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1904        l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1905        if (U_FAILURE(ec)) {
1906            log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1907            uset_close(set);
1908            uset_close(dumb);
1909            return;
1910        }
1911
1912        if (l1 >= BUFSIZE) {
1913            l1 = BUFSIZE-1;
1914            pat[l1] = 0;
1915        }
1916        if (l2 >= BUFSIZE) {
1917            l2 = BUFSIZE-1;
1918            dumbPat[l2] = 0;
1919        }
1920
1921        if (!ok) {
1922            log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1923                    aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1924        } else if(getTestOption(VERBOSITY_OPTION)) {
1925            log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1926        }
1927
1928        uset_close(set);
1929        uset_close(dumb);
1930    }
1931
1932    /* ### TODO: test error cases and other interesting things */
1933}
1934
1935/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1936
1937static void
1938TestMirroring() {
1939    USet *set;
1940    UErrorCode errorCode;
1941
1942    UChar32 start, end, c2, c3;
1943    int32_t i;
1944
1945    U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1946
1947    U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1948
1949    log_verbose("Testing u_isMirrored()\n");
1950    if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1951         !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1952        )
1953    ) {
1954        log_err("u_isMirrored() does not work correctly\n");
1955    }
1956
1957    log_verbose("Testing u_charMirror()\n");
1958    if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1959         u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1960         u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1961         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1962         u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1963         )
1964    ) {
1965        log_err("u_charMirror() does not work correctly\n");
1966    }
1967
1968    /* verify that Bidi_Mirroring_Glyph roundtrips */
1969    errorCode=U_ZERO_ERROR;
1970    set=uset_openPattern(mirroredPattern, 17, &errorCode);
1971
1972    if (U_FAILURE(errorCode)) {
1973        log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1974    } else {
1975        for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1976            do {
1977                c2=u_charMirror(start);
1978                c3=u_charMirror(c2);
1979                if(c3!=start) {
1980                    log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1981                }
1982            } while(++start<=end);
1983        }
1984    }
1985
1986    uset_close(set);
1987}
1988
1989
1990struct RunTestData
1991{
1992    const char *runText;
1993    UScriptCode runCode;
1994};
1995
1996typedef struct RunTestData RunTestData;
1997
1998static void
1999CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
2000                const char *prefix)
2001{
2002    int32_t run, runStart, runLimit;
2003    UScriptCode runCode;
2004
2005    /* iterate over all the runs */
2006    run = 0;
2007    while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
2008        if (runStart != runStarts[run]) {
2009            log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
2010                prefix, run, runStarts[run], runStart);
2011        }
2012
2013        if (runLimit != runStarts[run + 1]) {
2014            log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
2015                prefix, run, runStarts[run + 1], runLimit);
2016        }
2017
2018        if (runCode != testData[run].runCode) {
2019            log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
2020                prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
2021        }
2022
2023        run += 1;
2024
2025        /* stop when we've seen all the runs we expect to see */
2026        if (run >= nRuns) {
2027            break;
2028        }
2029    }
2030
2031    /* Complain if we didn't see then number of runs we expected */
2032    if (run != nRuns) {
2033        log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
2034    }
2035}
2036
2037static void
2038TestUScriptRunAPI()
2039{
2040    static const RunTestData testData1[] = {
2041        {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
2042        {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
2043        {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
2044        {"English (", USCRIPT_LATIN},
2045        {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
2046        {") ", USCRIPT_LATIN},
2047        {"\\u6F22\\u5B75", USCRIPT_HAN},
2048        {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
2049        {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
2050        {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
2051    };
2052
2053    static const RunTestData testData2[] = {
2054       {"((((((((((abc))))))))))", USCRIPT_LATIN}
2055    };
2056
2057    static const struct {
2058      const RunTestData *testData;
2059      int32_t nRuns;
2060    } testDataEntries[] = {
2061        {testData1, LENGTHOF(testData1)},
2062        {testData2, LENGTHOF(testData2)}
2063    };
2064
2065    static const int32_t nTestEntries = LENGTHOF(testDataEntries);
2066    int32_t testEntry;
2067
2068    for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
2069        UChar testString[1024];
2070        int32_t runStarts[256];
2071        int32_t nTestRuns = testDataEntries[testEntry].nRuns;
2072        const RunTestData *testData = testDataEntries[testEntry].testData;
2073
2074        int32_t run, stringLimit;
2075        UScriptRun *scriptRun = NULL;
2076        UErrorCode err;
2077
2078        /*
2079         * Fill in the test string and the runStarts array.
2080         */
2081        stringLimit = 0;
2082        for (run = 0; run < nTestRuns; run += 1) {
2083            runStarts[run] = stringLimit;
2084            stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
2085            /*stringLimit -= 1;*/
2086        }
2087
2088        /* The limit of the last run */
2089        runStarts[nTestRuns] = stringLimit;
2090
2091        /*
2092         * Make sure that calling uscript_OpenRun with a NULL text pointer
2093         * and a non-zero text length returns the correct error.
2094         */
2095        err = U_ZERO_ERROR;
2096        scriptRun = uscript_openRun(NULL, stringLimit, &err);
2097
2098        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2099            log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2100        }
2101
2102        if (scriptRun != NULL) {
2103            log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
2104            uscript_closeRun(scriptRun);
2105        }
2106
2107        /*
2108         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2109         * and a zero text length returns the correct error.
2110         */
2111        err = U_ZERO_ERROR;
2112        scriptRun = uscript_openRun(testString, 0, &err);
2113
2114        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2115            log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2116        }
2117
2118        if (scriptRun != NULL) {
2119            log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
2120            uscript_closeRun(scriptRun);
2121        }
2122
2123        /*
2124         * Make sure that calling uscript_openRun with a NULL text pointer
2125         * and a zero text length doesn't return an error.
2126         */
2127        err = U_ZERO_ERROR;
2128        scriptRun = uscript_openRun(NULL, 0, &err);
2129
2130        if (U_FAILURE(err)) {
2131            log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2132        }
2133
2134        /* Make sure that the empty iterator doesn't find any runs */
2135        if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2136            log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2137        }
2138
2139        /*
2140         * Make sure that calling uscript_setRunText with a NULL text pointer
2141         * and a non-zero text length returns the correct error.
2142         */
2143        err = U_ZERO_ERROR;
2144        uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2145
2146        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2147            log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2148        }
2149
2150        /*
2151         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2152         * and a zero text length returns the correct error.
2153         */
2154        err = U_ZERO_ERROR;
2155        uscript_setRunText(scriptRun, testString, 0, &err);
2156
2157        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2158            log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2159        }
2160
2161        /*
2162         * Now call uscript_setRunText on the empty iterator
2163         * and make sure that it works.
2164         */
2165        err = U_ZERO_ERROR;
2166        uscript_setRunText(scriptRun, testString, stringLimit, &err);
2167
2168        if (U_FAILURE(err)) {
2169            log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2170        } else {
2171            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2172        }
2173
2174        uscript_closeRun(scriptRun);
2175
2176        /*
2177         * Now open an interator over the testString
2178         * using uscript_openRun and make sure that it works
2179         */
2180        scriptRun = uscript_openRun(testString, stringLimit, &err);
2181
2182        if (U_FAILURE(err)) {
2183            log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2184        } else {
2185            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2186        }
2187
2188        /* Now reset the iterator, and make sure
2189         * that it still works.
2190         */
2191        uscript_resetRun(scriptRun);
2192
2193        CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2194
2195        /* Close the iterator */
2196        uscript_closeRun(scriptRun);
2197    }
2198}
2199
2200/* test additional, non-core properties */
2201static void
2202TestAdditionalProperties() {
2203    /* test data for u_charAge() */
2204    static const struct {
2205        UChar32 c;
2206        UVersionInfo version;
2207    } charAges[]={
2208        {0x41,    { 1, 1, 0, 0 }},
2209        {0xffff,  { 1, 1, 0, 0 }},
2210        {0x20ab,  { 2, 0, 0, 0 }},
2211        {0x2fffe, { 2, 0, 0, 0 }},
2212        {0x20ac,  { 2, 1, 0, 0 }},
2213        {0xfb1d,  { 3, 0, 0, 0 }},
2214        {0x3f4,   { 3, 1, 0, 0 }},
2215        {0x10300, { 3, 1, 0, 0 }},
2216        {0x220,   { 3, 2, 0, 0 }},
2217        {0xff60,  { 3, 2, 0, 0 }}
2218    };
2219
2220    /* test data for u_hasBinaryProperty() */
2221    static const int32_t
2222    props[][3]={ /* code point, property, value */
2223        { 0x0627, UCHAR_ALPHABETIC, TRUE },
2224        { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2225        { 0x2028, UCHAR_ALPHABETIC, FALSE },
2226
2227        { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2228        { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2229
2230        { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2231        { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2232
2233        { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2234        { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2235
2236        /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2237        { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2238        { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2239        { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2240        { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2241
2242        { 0x058a, UCHAR_DASH, TRUE },
2243        { 0x007e, UCHAR_DASH, FALSE },
2244
2245        { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2246        { 0x3000, UCHAR_DIACRITIC, FALSE },
2247
2248        { 0x0e46, UCHAR_EXTENDER, TRUE },
2249        { 0x0020, UCHAR_EXTENDER, FALSE },
2250
2251#if !UCONFIG_NO_NORMALIZATION
2252        { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2253        { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2254        { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2255
2256        { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2257        { 0x0308, UCHAR_NFD_INERT, FALSE },
2258
2259        { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2260        { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2261
2262        { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2263        { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2264        { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2265        { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2266        { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2267        { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2268
2269        { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2270        { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2271
2272        { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2273        { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2274        { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2275        { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2276        { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2277        { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2278#endif
2279
2280        { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2281        { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2282        { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2283
2284        { 0x30fb, UCHAR_HYPHEN, TRUE },
2285        { 0xfe58, UCHAR_HYPHEN, FALSE },
2286
2287        { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2288        { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2289        { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2290
2291        { 0x2172, UCHAR_ID_START, TRUE },
2292        { 0x007a, UCHAR_ID_START, TRUE },
2293        { 0x0039, UCHAR_ID_START, FALSE },
2294
2295        { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2296        { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2297        { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2298
2299        { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2300        { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2301
2302        { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2303        { 0x0345, UCHAR_LOWERCASE, TRUE },
2304        { 0x0030, UCHAR_LOWERCASE, FALSE },
2305
2306        { 0x1d7a9, UCHAR_MATH, TRUE },
2307        { 0x2135, UCHAR_MATH, TRUE },
2308        { 0x0062, UCHAR_MATH, FALSE },
2309
2310        { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2311        { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2312        { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2313
2314        { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2315        { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2316        { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2317
2318        { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2319        { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2320
2321        { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2322        { 0x2162, UCHAR_UPPERCASE, TRUE },
2323        { 0x0345, UCHAR_UPPERCASE, FALSE },
2324
2325        { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2326        { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2327        { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2328
2329        { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2330        { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2331        { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2332
2333        { 0x16ee, UCHAR_XID_START, TRUE },
2334        { 0x23456, UCHAR_XID_START, TRUE },
2335        { 0x1d1aa, UCHAR_XID_START, FALSE },
2336
2337        /*
2338         * Version break:
2339         * The following properties are only supported starting with the
2340         * Unicode version indicated in the second field.
2341         */
2342        { -1, 0x320, 0 },
2343
2344        { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2345        { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2346        { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2347
2348        { 0x0149, UCHAR_DEPRECATED, TRUE },         /* changed in Unicode 5.2 */
2349        { 0x0341, UCHAR_DEPRECATED, FALSE },        /* changed in Unicode 5.2 */
2350        { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2351        { 0xe0100, UCHAR_DEPRECATED, FALSE },
2352
2353        { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2354        { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2355        { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2356        { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2357
2358        { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2359        { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2360        { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2361        { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2362
2363        { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2364        { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2365
2366        { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2367        { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2368
2369        { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2370        { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2371
2372        { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2373        { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2374
2375        { 0x2e9b, UCHAR_RADICAL, TRUE },
2376        { 0x4e00, UCHAR_RADICAL, FALSE },
2377
2378        { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2379        { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2380
2381        { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2382        { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2383
2384        { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2385
2386        { 0x002e, UCHAR_S_TERM, TRUE },
2387        { 0x0061, UCHAR_S_TERM, FALSE },
2388
2389        { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2390        { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2391        { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2392        { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2393
2394        /* enum/integer type properties */
2395
2396        /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2397        /* test default Bidi classes for unassigned code points */
2398        { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2399        { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2400        { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2401        { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2402        { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2403        { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2404        { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2405        { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2406        { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2407        { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2408        { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2409
2410        { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2411        { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2412        { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2413        { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2414        { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2415        { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2416        { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2417        { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2418
2419        { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2420        { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2421        { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2422        { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2423        { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2424        { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2425        { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2426        { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2427        { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2428        { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2429        { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2430
2431        /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2432        { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2433
2434        { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2435        { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2436        { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2437        { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2438        { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2439        { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2440        { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2441        { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2442        { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2443
2444        { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2445        { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2446        { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2447        { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2448        { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2449        { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2450        { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2451        { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2452        { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2453        { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2454        { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2455        { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2456        { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2457        { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2458        { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2459        { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2460        { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2461
2462        /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2463        { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 },
2464        { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER },     /* changed in Unicode 5.2 */
2465
2466        { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2467        { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2468        { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2469        { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2470        { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2471
2472        { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2473        { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2474        { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2475        { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2476        { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2477        { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2478        { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2479        { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2480
2481        /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2482        { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2483        { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2484        { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2485        { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2486        { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2487        { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2488        { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2489        { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2490        { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2491        { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2492        { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2493        { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2494        { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2495        { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2496        { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2497
2498        /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2499
2500        /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2501
2502        { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2503        { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2504        { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2505        { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2506        { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2507        { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2508        { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2509
2510        { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2511        { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2512        { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },     /* changed in Unicode 5.2 */
2513        { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2514
2515        { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2516        { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2517        { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2518        { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2519        { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2520        { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2521
2522        { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2523        { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2524        { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },       /* changed in Unicode 5.2 */
2525        { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2526
2527        { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2528        { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2529        { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2530        { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2531        { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2532        { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2533        { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2534
2535        { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2536        { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2537        { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },    /* changed in Unicode 5.2 */
2538        { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2539
2540        { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2541        { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2542        { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2543        { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2544
2545        { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2546        { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2547        { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2548        { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2549        { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2550
2551        { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2552
2553        { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2554
2555        { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2556        { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2557        { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2558
2559        { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2560        { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2561        { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2562        { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2563        { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2564
2565        { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2566        { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2567        { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2568
2569        { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2570        { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2571        { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2572        { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2573
2574        { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2575        { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2576        { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2577        { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2578        { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2579        { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2580
2581        { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2582        { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2583        { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2584        { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2585
2586        { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2587        { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2588        { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2589        { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2590
2591        { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2592        { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2593        { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2594        { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2595
2596        { -1, 0x520, 0 }, /* version break for Unicode 5.2 */
2597
2598        /* unassigned code points in new default Bidi R blocks */
2599        { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2600        { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2601
2602        /* test some script codes >127 */
2603        { 0xa6e6,  UCHAR_SCRIPT, USCRIPT_BAMUM },
2604        { 0xa4d0,  UCHAR_SCRIPT, USCRIPT_LISU },
2605        { 0x10a7f,  UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN },
2606
2607        { -1, 0x600, 0 }, /* version break for Unicode 6.0 */
2608
2609        /* value changed in Unicode 6.0 */
2610        { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL },
2611
2612        { -1, 0x610, 0 }, /* version break for Unicode 6.1 */
2613
2614        /* unassigned code points in new/changed default Bidi AL blocks */
2615        { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2616        { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2617
2618        /* undefined UProperty values */
2619        { 0x61, 0x4a7, 0 },
2620        { 0x234bc, 0x15ed, 0 }
2621    };
2622
2623    UVersionInfo version;
2624    UChar32 c;
2625    int32_t i, result, uVersion;
2626    UProperty which;
2627
2628    /* what is our Unicode version? */
2629    u_getUnicodeVersion(version);
2630    uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2631
2632    u_charAge(0x20, version);
2633    if(version[0]==0) {
2634        /* no additional properties available */
2635        log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2636        return;
2637    }
2638
2639    /* test u_charAge() */
2640    for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2641        u_charAge(charAges[i].c, version);
2642        if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2643            log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2644                charAges[i].c,
2645                version[0], version[1], version[2], version[3],
2646                charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2647        }
2648    }
2649
2650    if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2651        u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2652        u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2653        u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2654        u_getIntPropertyMinValue(0x2345)!=0
2655    ) {
2656        log_err("error: u_getIntPropertyMinValue() wrong\n");
2657    }
2658    if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2659        log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2660    }
2661    if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2662        log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2663    }
2664    if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2665        log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2666    }
2667    if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2668        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2669    }
2670    if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2671        log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2672    }
2673    if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2674        log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2675    }
2676    if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2677        log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2678    }
2679    if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2680        log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2681    }
2682    if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2683        log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2684    }
2685    if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2686        log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2687    }
2688    if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2689        log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2690    }
2691    if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2692        log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2693    }
2694    if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2695        log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2696    }
2697    /*JB#2410*/
2698    if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2699        log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2700    }
2701    if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2702        log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2703    }
2704    if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2705        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2706    }
2707    if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2708        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2709    }
2710    if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2711        log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2712    }
2713
2714    /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2715    for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2716        const char *whichName;
2717
2718        if(props[i][0]<0) {
2719            /* Unicode version break */
2720            if(uVersion<props[i][1]) {
2721                break; /* do not test properties that are not yet supported */
2722            } else {
2723                continue; /* skip this row */
2724            }
2725        }
2726
2727        c=(UChar32)props[i][0];
2728        which=(UProperty)props[i][1];
2729        whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME);
2730
2731        if(which<UCHAR_INT_START) {
2732            result=u_hasBinaryProperty(c, which);
2733            if(result!=props[i][2]) {
2734                log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n",
2735                        c, whichName, result, i);
2736            }
2737        }
2738
2739        result=u_getIntPropertyValue(c, which);
2740        if(result!=props[i][2]) {
2741            log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n",
2742                    c, whichName, result, props[i][2], i);
2743        }
2744
2745        /* test separate functions, too */
2746        switch((UProperty)props[i][1]) {
2747        case UCHAR_ALPHABETIC:
2748            if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2749                log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2750                        props[i][0], result, i);
2751            }
2752            break;
2753        case UCHAR_LOWERCASE:
2754            if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2755                log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2756                        props[i][0], result, i);
2757            }
2758            break;
2759        case UCHAR_UPPERCASE:
2760            if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2761                log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2762                        props[i][0], result, i);
2763            }
2764            break;
2765        case UCHAR_WHITE_SPACE:
2766            if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2767                log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2768                        props[i][0], result, i);
2769            }
2770            break;
2771        default:
2772            break;
2773        }
2774    }
2775}
2776
2777static void
2778TestNumericProperties(void) {
2779    /* see UnicodeData.txt, DerivedNumericValues.txt */
2780    static const struct {
2781        UChar32 c;
2782        int32_t type;
2783        double numValue;
2784    } values[]={
2785        { 0x12456, U_NT_NUMERIC, -1. },
2786        { 0x12457, U_NT_NUMERIC, -1. },
2787        { 0x0F33, U_NT_NUMERIC, -1./2. },
2788        { 0x0C66, U_NT_DECIMAL, 0 },
2789        { 0x96f6, U_NT_NUMERIC, 0 },
2790        { 0xa833, U_NT_NUMERIC, 1./16. },
2791        { 0x2152, U_NT_NUMERIC, 1./10. },
2792        { 0x2151, U_NT_NUMERIC, 1./9. },
2793        { 0x1245f, U_NT_NUMERIC, 1./8. },
2794        { 0x2150, U_NT_NUMERIC, 1./7. },
2795        { 0x2159, U_NT_NUMERIC, 1./6. },
2796        { 0x09f6, U_NT_NUMERIC, 3./16. },
2797        { 0x2155, U_NT_NUMERIC, 1./5. },
2798        { 0x00BD, U_NT_NUMERIC, 1./2. },
2799        { 0x0031, U_NT_DECIMAL, 1. },
2800        { 0x4e00, U_NT_NUMERIC, 1. },
2801        { 0x58f1, U_NT_NUMERIC, 1. },
2802        { 0x10320, U_NT_NUMERIC, 1. },
2803        { 0x0F2B, U_NT_NUMERIC, 3./2. },
2804        { 0x00B2, U_NT_DIGIT, 2. },
2805        { 0x5f10, U_NT_NUMERIC, 2. },
2806        { 0x1813, U_NT_DECIMAL, 3. },
2807        { 0x5f0e, U_NT_NUMERIC, 3. },
2808        { 0x2173, U_NT_NUMERIC, 4. },
2809        { 0x8086, U_NT_NUMERIC, 4. },
2810        { 0x278E, U_NT_DIGIT, 5. },
2811        { 0x1D7F2, U_NT_DECIMAL, 6. },
2812        { 0x247A, U_NT_DIGIT, 7. },
2813        { 0x7396, U_NT_NUMERIC, 9. },
2814        { 0x1372, U_NT_NUMERIC, 10. },
2815        { 0x216B, U_NT_NUMERIC, 12. },
2816        { 0x16EE, U_NT_NUMERIC, 17. },
2817        { 0x249A, U_NT_NUMERIC, 19. },
2818        { 0x303A, U_NT_NUMERIC, 30. },
2819        { 0x5345, U_NT_NUMERIC, 30. },
2820        { 0x32B2, U_NT_NUMERIC, 37. },
2821        { 0x1375, U_NT_NUMERIC, 40. },
2822        { 0x10323, U_NT_NUMERIC, 50. },
2823        { 0x0BF1, U_NT_NUMERIC, 100. },
2824        { 0x964c, U_NT_NUMERIC, 100. },
2825        { 0x217E, U_NT_NUMERIC, 500. },
2826        { 0x2180, U_NT_NUMERIC, 1000. },
2827        { 0x4edf, U_NT_NUMERIC, 1000. },
2828        { 0x2181, U_NT_NUMERIC, 5000. },
2829        { 0x137C, U_NT_NUMERIC, 10000. },
2830        { 0x4e07, U_NT_NUMERIC, 10000. },
2831        { 0x12432, U_NT_NUMERIC, 216000. },
2832        { 0x12433, U_NT_NUMERIC, 432000. },
2833        { 0x4ebf, U_NT_NUMERIC, 100000000. },
2834        { 0x5146, U_NT_NUMERIC, 1000000000000. },
2835        { -1, U_NT_NONE, U_NO_NUMERIC_VALUE },
2836        { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2837        { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2838        { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2839        { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2840        { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2841        { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE },
2842        { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE }
2843    };
2844
2845    double nv;
2846    UChar32 c;
2847    int32_t i, type;
2848
2849    for(i=0; i<LENGTHOF(values); ++i) {
2850        c=values[i].c;
2851        type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2852        nv=u_getNumericValue(c);
2853
2854        if(type!=values[i].type) {
2855            log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2856        }
2857        if(0.000001 <= fabs(nv - values[i].numValue)) {
2858            log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2859        }
2860    }
2861}
2862
2863/**
2864 * Test the property names and property value names API.
2865 */
2866static void
2867TestPropertyNames(void) {
2868    int32_t p, v, choice=0, rev;
2869    UBool atLeastSomething = FALSE;
2870
2871    for (p=0; ; ++p) {
2872        UProperty propEnum = (UProperty)p;
2873        UBool sawProp = FALSE;
2874        if(p > 10 && !atLeastSomething) {
2875          log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2876          return;
2877        }
2878
2879        for (choice=0; ; ++choice) {
2880            const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2881            if (name) {
2882                if (!sawProp)
2883                    log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2884                log_verbose("%d=\"%s\"", choice, name);
2885                sawProp = TRUE;
2886                atLeastSomething = TRUE;
2887
2888                /* test reverse mapping */
2889                rev = u_getPropertyEnum(name);
2890                if (rev != p) {
2891                    log_err("Property round-trip failure: %d -> %s -> %d\n",
2892                            p, name, rev);
2893                }
2894            }
2895            if (!name && choice>0) break;
2896        }
2897        if (sawProp) {
2898            /* looks like a valid property; check the values */
2899            const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2900            int32_t max = 0;
2901            if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2902                max = 255;
2903            } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2904                /* it's far too slow to iterate all the way up to
2905                   the real max, U_GC_P_MASK */
2906                max = U_GC_NL_MASK;
2907            } else if (p == UCHAR_BLOCK) {
2908                /* UBlockCodes, unlike other values, start at 1 */
2909                max = 1;
2910            }
2911            log_verbose("\n");
2912            for (v=-1; ; ++v) {
2913                UBool sawValue = FALSE;
2914                for (choice=0; ; ++choice) {
2915                    const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2916                    if (vname) {
2917                        if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2918                        log_verbose("%d=\"%s\"", choice, vname);
2919                        sawValue = TRUE;
2920
2921                        /* test reverse mapping */
2922                        rev = u_getPropertyValueEnum(propEnum, vname);
2923                        if (rev != v) {
2924                            log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2925                                    pname, v, vname, rev);
2926                        }
2927                    }
2928                    if (!vname && choice>0) break;
2929                }
2930                if (sawValue) {
2931                    log_verbose("\n");
2932                }
2933                if (!sawValue && v>=max) break;
2934            }
2935        }
2936        if (!sawProp) {
2937            if (p>=UCHAR_STRING_LIMIT) {
2938                break;
2939            } else if (p>=UCHAR_DOUBLE_LIMIT) {
2940                p = UCHAR_STRING_START - 1;
2941            } else if (p>=UCHAR_MASK_LIMIT) {
2942                p = UCHAR_DOUBLE_START - 1;
2943            } else if (p>=UCHAR_INT_LIMIT) {
2944                p = UCHAR_MASK_START - 1;
2945            } else if (p>=UCHAR_BINARY_LIMIT) {
2946                p = UCHAR_INT_START - 1;
2947            }
2948        }
2949    }
2950}
2951
2952/**
2953 * Test the property values API.  See JB#2410.
2954 */
2955static void
2956TestPropertyValues(void) {
2957    int32_t i, p, min, max;
2958    UErrorCode ec;
2959
2960    /* Min should be 0 for everything. */
2961    /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2962    for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2963        UProperty propEnum = (UProperty)p;
2964        min = u_getIntPropertyMinValue(propEnum);
2965        if (min != 0) {
2966            if (p == UCHAR_BLOCK) {
2967                /* This is okay...for now.  See JB#2487.
2968                   TODO Update this for JB#2487. */
2969            } else {
2970                const char* name;
2971                name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2972                if (name == NULL)
2973                    name = "<ERROR>";
2974                log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2975                        name, min);
2976            }
2977        }
2978    }
2979
2980    if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2981        u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2982        log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2983    }
2984
2985    /* Max should be -1 for invalid properties. */
2986    max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
2987    if (max != -1) {
2988        log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2989                max);
2990    }
2991
2992    /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
2993    for (i=0; i<2; ++i) {
2994        int32_t script;
2995        const char* desc;
2996        ec = U_ZERO_ERROR;
2997        switch (i) {
2998        case 0:
2999            script = uscript_getScript(-1, &ec);
3000            desc = "uscript_getScript(-1)";
3001            break;
3002        case 1:
3003            script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
3004            desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
3005            break;
3006        default:
3007            log_err("Internal test error. Too many scripts\n");
3008            return;
3009        }
3010        /* We don't explicitly test ec.  It should be U_FAILURE but it
3011           isn't documented as such. */
3012        if (script != (int32_t)USCRIPT_INVALID_CODE) {
3013            log_err("FAIL: %s = %d, exp. 0\n",
3014                    desc, script);
3015        }
3016    }
3017}
3018
3019/* various tests for consistency of UCD data and API behavior */
3020static void
3021TestConsistency() {
3022    char buffer[300];
3023    USet *set1, *set2, *set3, *set4;
3024    UErrorCode errorCode;
3025
3026    UChar32 start, end;
3027    int32_t i, length;
3028
3029    U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
3030    U_STRING_DECL(dashPattern, "[:Dash:]", 8);
3031    U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
3032    U_STRING_DECL(formatPattern, "[:Cf:]", 6);
3033    U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
3034
3035    U_STRING_DECL(mathBlocksPattern,
3036        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3037        1+32+46+46+45+43+1+1); /* +1 for NUL */
3038    U_STRING_DECL(mathPattern, "[:Math:]", 8);
3039    U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
3040    U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
3041    U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3042
3043    U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
3044    U_STRING_INIT(dashPattern, "[:Dash:]", 8);
3045    U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
3046    U_STRING_INIT(formatPattern, "[:Cf:]", 6);
3047    U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
3048
3049    U_STRING_INIT(mathBlocksPattern,
3050        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
3051        1+32+46+46+45+43+1+1); /* +1 for NUL */
3052    U_STRING_INIT(mathPattern, "[:Math:]", 8);
3053    U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
3054    U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
3055    U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
3056
3057    /*
3058     * It used to be that UCD.html and its precursors said
3059     * "Those dashes used to mark connections between pieces of words,
3060     *  plus the Katakana middle dot."
3061     *
3062     * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
3063     * but not from Hyphen.
3064     * UTC 94 (2003mar) decided to leave it that way and to change UCD.html.
3065     * Therefore, do not show errors when testing the Hyphen property.
3066     */
3067    log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
3068                "known to the UTC and not considered errors.\n");
3069
3070    errorCode=U_ZERO_ERROR;
3071    set1=uset_openPattern(hyphenPattern, 10, &errorCode);
3072    set2=uset_openPattern(dashPattern, 8, &errorCode);
3073    if(U_SUCCESS(errorCode)) {
3074        /* remove the Katakana middle dot(s) from set1 */
3075        uset_remove(set1, 0x30fb);
3076        uset_remove(set1, 0xff65); /* halfwidth variant */
3077        showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
3078    } else {
3079        log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3080    }
3081
3082    /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
3083    set3=uset_openPattern(formatPattern, 6, &errorCode);
3084    set4=uset_openPattern(alphaPattern, 14, &errorCode);
3085    if(U_SUCCESS(errorCode)) {
3086        showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
3087        showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
3088        showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
3089    } else {
3090        log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3091    }
3092
3093    uset_close(set1);
3094    uset_close(set2);
3095    uset_close(set3);
3096    uset_close(set4);
3097
3098    /*
3099     * Check that each lowercase character has "small" in its name
3100     * and not "capital".
3101     * There are some such characters, some of which seem odd.
3102     * Use the verbose flag to see these notices.
3103     */
3104    errorCode=U_ZERO_ERROR;
3105    set1=uset_openPattern(lowerPattern, 13, &errorCode);
3106    if(U_SUCCESS(errorCode)) {
3107        for(i=0;; ++i) {
3108            length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
3109            if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
3110                break; /* done */
3111            }
3112            if(U_FAILURE(errorCode)) {
3113                log_err("error iterating over [:Lowercase:] at item %d: %s\n",
3114                        i, u_errorName(errorCode));
3115                break;
3116            }
3117            if(length!=0) {
3118                break; /* done with code points, got a string or -1 */
3119            }
3120
3121            while(start<=end) {
3122                length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
3123                if(U_FAILURE(errorCode)) {
3124                    log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
3125                    errorCode=U_ZERO_ERROR;
3126                }
3127                if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
3128                    strstr(buffer, "SMALL CAPITAL")==NULL
3129                ) {
3130                    log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
3131                }
3132                ++start;
3133            }
3134        }
3135    } else {
3136        log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3137    }
3138    uset_close(set1);
3139
3140    /* verify that all assigned characters in Math blocks are exactly Math characters */
3141    errorCode=U_ZERO_ERROR;
3142    set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3143    set2=uset_openPattern(mathPattern, 8, &errorCode);
3144    set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3145    if(U_SUCCESS(errorCode)) {
3146        uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3147        uset_complement(set3);      /* assigned characters */
3148        uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3149        compareUSets(set1, set2,
3150                     "[assigned Math block chars]", "[math blocks]&[:Math:]",
3151                     TRUE);
3152    } else {
3153        log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3154    }
3155    uset_close(set1);
3156    uset_close(set2);
3157    uset_close(set3);
3158
3159    /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3160    errorCode=U_ZERO_ERROR;
3161    set1=uset_openPattern(unknownPattern, 14, &errorCode);
3162    set2=uset_openPattern(reservedPattern, 20, &errorCode);
3163    if(U_SUCCESS(errorCode)) {
3164        compareUSets(set1, set2,
3165                     "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3166                     TRUE);
3167    } else {
3168        log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3169    }
3170    uset_close(set1);
3171    uset_close(set2);
3172}
3173
3174/*
3175 * Starting with ICU4C 3.4, the core Unicode properties files
3176 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3177 * are hardcoded in the common DLL and therefore not included
3178 * in the data package any more.
3179 * Test requiring these files are disabled so that
3180 * we need not jump through hoops (like adding snapshots of these files
3181 * to testdata).
3182 * See Jitterbug 4497.
3183 */
3184#define HARDCODED_DATA_4497 1
3185
3186/* API coverage for ucase.c */
3187static void TestUCase() {
3188#if !HARDCODED_DATA_4497
3189    UDataMemory *pData;
3190    UCaseProps *csp;
3191    const UCaseProps *ccsp;
3192    UErrorCode errorCode;
3193
3194    /* coverage for ucase_openBinary() */
3195    errorCode=U_ZERO_ERROR;
3196    pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3197    if(U_FAILURE(errorCode)) {
3198        log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3199                    u_errorName(errorCode));
3200        return;
3201    }
3202
3203    csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3204    if(U_FAILURE(errorCode)) {
3205        log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3206                u_errorName(errorCode));
3207        udata_close(pData);
3208        return;
3209    }
3210
3211    if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3212        log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3213    }
3214
3215    ucase_close(csp);
3216    udata_close(pData);
3217
3218    /* coverage for ucase_getDummy() */
3219    errorCode=U_ZERO_ERROR;
3220    ccsp=ucase_getDummy(&errorCode);
3221    if(ucase_tolower(ccsp, 0x41)!=0x41) {
3222        log_err("ucase_tolower(dummy, A)!=A\n");
3223    }
3224#endif
3225}
3226
3227/* API coverage for ubidi_props.c */
3228static void TestUBiDiProps() {
3229#if !HARDCODED_DATA_4497
3230    UDataMemory *pData;
3231    UBiDiProps *bdp;
3232    const UBiDiProps *cbdp;
3233    UErrorCode errorCode;
3234
3235    /* coverage for ubidi_openBinary() */
3236    errorCode=U_ZERO_ERROR;
3237    pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3238    if(U_FAILURE(errorCode)) {
3239        log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3240                    u_errorName(errorCode));
3241        return;
3242    }
3243
3244    bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3245    if(U_FAILURE(errorCode)) {
3246        log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3247                u_errorName(errorCode));
3248        udata_close(pData);
3249        return;
3250    }
3251
3252    if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3253        log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3254    }
3255
3256    ubidi_closeProps(bdp);
3257    udata_close(pData);
3258
3259    /* coverage for ubidi_getDummy() */
3260    errorCode=U_ZERO_ERROR;
3261    cbdp=ubidi_getDummy(&errorCode);
3262    if(ubidi_getClass(cbdp, 0x20)!=0) {
3263        log_err("ubidi_getClass(dummy, space)!=0\n");
3264    }
3265#endif
3266}
3267
3268/* test case folding, compare return values with CaseFolding.txt ------------ */
3269
3270/* bit set for which case foldings for a character have been tested already */
3271enum {
3272    CF_SIMPLE=1,
3273    CF_FULL=2,
3274    CF_TURKIC=4,
3275    CF_ALL=7
3276};
3277
3278static void
3279testFold(UChar32 c, int which,
3280         UChar32 simple, UChar32 turkic,
3281         const UChar *full, int32_t fullLength,
3282         const UChar *turkicFull, int32_t turkicFullLength) {
3283    UChar s[2], t[32];
3284    UChar32 c2;
3285    int32_t length, length2;
3286
3287    UErrorCode errorCode=U_ZERO_ERROR;
3288
3289    length=0;
3290    U16_APPEND_UNSAFE(s, length, c);
3291
3292    if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3293        log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3294    }
3295    if((which&CF_FULL)!=0) {
3296        length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3297        if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3298            log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3299        }
3300    }
3301    if((which&CF_TURKIC)!=0) {
3302        if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3303            log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3304        }
3305
3306        length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3307        if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3308            log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3309        }
3310    }
3311}
3312
3313/* test that c case-folds to itself */
3314static void
3315testFoldToSelf(UChar32 c, int which) {
3316    UChar s[2];
3317    int32_t length;
3318
3319    length=0;
3320    U16_APPEND_UNSAFE(s, length, c);
3321    testFold(c, which, c, c, s, length, s, length);
3322}
3323
3324struct CaseFoldingData {
3325    USet *notSeen;
3326    UChar32 prev, prevSimple;
3327    UChar prevFull[32];
3328    int32_t prevFullLength;
3329    int which;
3330};
3331typedef struct CaseFoldingData CaseFoldingData;
3332
3333static void U_CALLCONV
3334caseFoldingLineFn(void *context,
3335                  char *fields[][2], int32_t fieldCount,
3336                  UErrorCode *pErrorCode) {
3337    CaseFoldingData *pData=(CaseFoldingData *)context;
3338    char *end;
3339    UChar full[32];
3340    UChar32 c, prev, simple;
3341    int32_t count;
3342    int which;
3343    char status;
3344
3345    /* get code point */
3346    const char *s=u_skipWhitespace(fields[0][0]);
3347    if(0==strncmp(s, "0000..10FFFF", 12)) {
3348        /*
3349         * Ignore the line
3350         * # @missing: 0000..10FFFF; C; <code point>
3351         * because maps-to-self is already our default, and this line breaks this parser.
3352         */
3353        return;
3354    }
3355    c=(UChar32)strtoul(s, &end, 16);
3356    end=(char *)u_skipWhitespace(end);
3357    if(end<=fields[0][0] || end!=fields[0][1]) {
3358        log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3359        *pErrorCode=U_PARSE_ERROR;
3360        return;
3361    }
3362
3363    /* get the status of this mapping */
3364    status=*u_skipWhitespace(fields[1][0]);
3365    if(status!='C' && status!='S' && status!='F' && status!='T') {
3366        log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3367        *pErrorCode=U_PARSE_ERROR;
3368        return;
3369    }
3370
3371    /* get the mapping */
3372    count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3373    if(U_FAILURE(*pErrorCode)) {
3374        log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3375        return;
3376    }
3377
3378    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3379    if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3380        simple=c;
3381    }
3382
3383    if(c!=(prev=pData->prev)) {
3384        /*
3385         * Test remaining mappings for the previous code point.
3386         * If a turkic folding was not mentioned, then it should fold the same
3387         * as the regular simple case folding.
3388         */
3389        UChar prevString[2];
3390        int32_t length;
3391
3392        length=0;
3393        U16_APPEND_UNSAFE(prevString, length, prev);
3394        testFold(prev, (~pData->which)&CF_ALL,
3395                 prev, pData->prevSimple,
3396                 prevString, length,
3397                 pData->prevFull, pData->prevFullLength);
3398        pData->prev=pData->prevSimple=c;
3399        length=0;
3400        U16_APPEND_UNSAFE(pData->prevFull, length, c);
3401        pData->prevFullLength=length;
3402        pData->which=0;
3403    }
3404
3405    /*
3406     * Turn the status into a bit set of case foldings to test.
3407     * Remember non-Turkic case foldings as defaults for Turkic mode.
3408     */
3409    switch(status) {
3410    case 'C':
3411        which=CF_SIMPLE|CF_FULL;
3412        pData->prevSimple=simple;
3413        u_memcpy(pData->prevFull, full, count);
3414        pData->prevFullLength=count;
3415        break;
3416    case 'S':
3417        which=CF_SIMPLE;
3418        pData->prevSimple=simple;
3419        break;
3420    case 'F':
3421        which=CF_FULL;
3422        u_memcpy(pData->prevFull, full, count);
3423        pData->prevFullLength=count;
3424        break;
3425    case 'T':
3426        which=CF_TURKIC;
3427        break;
3428    default:
3429        which=0;
3430        break; /* won't happen because of test above */
3431    }
3432
3433    testFold(c, which, simple, simple, full, count, full, count);
3434
3435    /* remember which case foldings of c have been tested */
3436    pData->which|=which;
3437
3438    /* remove c from the set of ones not mentioned in CaseFolding.txt */
3439    uset_remove(pData->notSeen, c);
3440}
3441
3442static void
3443TestCaseFolding() {
3444    CaseFoldingData data={ NULL };
3445    char *fields[3][2];
3446    UErrorCode errorCode;
3447
3448    static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3449
3450    errorCode=U_ZERO_ERROR;
3451    /* test BMP & plane 1 - nothing interesting above */
3452    data.notSeen=uset_open(0, 0x1ffff);
3453    data.prevFullLength=1; /* length of full case folding of U+0000 */
3454
3455    parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3456    if(U_SUCCESS(errorCode)) {
3457        int32_t i, start, end;
3458
3459        /* add a pseudo-last line to finish testing of the actual last one */
3460        fields[0][0]=lastLine;
3461        fields[0][1]=lastLine+6;
3462        fields[1][0]=lastLine+7;
3463        fields[1][1]=lastLine+9;
3464        fields[2][0]=lastLine+10;
3465        fields[2][1]=lastLine+17;
3466        caseFoldingLineFn(&data, fields, 3, &errorCode);
3467
3468        /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3469        for(i=0;
3470            0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3471                U_SUCCESS(errorCode);
3472            ++i
3473        ) {
3474            do {
3475                testFoldToSelf(start, CF_ALL);
3476            } while(++start<=end);
3477        }
3478    }
3479
3480    uset_close(data.notSeen);
3481}
3482