1/********************************************************************
2 * Copyright (c) 1997-2013, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************/
5
6#include <string.h>
7#include "unicode/utypes.h"
8#include "unicode/uscript.h"
9#include "unicode/uchar.h"
10#include "cintltst.h"
11#include "cucdapi.h"
12
13#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
14
15void TestUScriptCodeAPI(){
16    int i =0;
17    int numErrors =0;
18    {
19        const char* testNames[]={
20        /* test locale */
21        "en", "en_US", "sr", "ta" , "te_IN",
22        "hi", "he", "ar",
23        /* test abbr */
24        "Hani", "Hang","Hebr","Hira",
25        "Knda","Kana","Khmr","Lao",
26        "Latn",/*"Latf","Latg",*/
27        "Mlym", "Mong",
28
29        /* test names */
30        "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
31        "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
32        /* test lower case names */
33        "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
34        "oriya",     "runic",     "sinhala", "syriac","tamil",
35        "telugu",    "thaana",    "thai",    "tibetan",
36        /* test the bounds*/
37        "tagb", "arabic",
38        /* test bogus */
39        "asfdasd", "5464", "12235",
40        /* test the last index */
41        "zyyy", "YI",
42        '\0'
43        };
44        UScriptCode expected[] ={
45            /* locales should return */
46            USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
47            USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
48            /* abbr should return */
49            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
50            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
51            USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
52            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
53            /* names should return */
54            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
55            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
56            /* lower case names should return */
57            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
58            USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
59            USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
60            /* bounds */
61            USCRIPT_TAGBANWA, USCRIPT_ARABIC,
62            /* bogus names should return invalid code */
63            USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
64            USCRIPT_COMMON, USCRIPT_YI,
65        };
66
67        UErrorCode err = U_ZERO_ERROR;
68
69        const int32_t capacity = 10;
70
71        for( ; testNames[i]!='\0'; i++){
72            UScriptCode script[10]={USCRIPT_INVALID_CODE};
73            uscript_getCode(testNames[i],script,capacity, &err);
74            if( script[0] != expected[i]){
75                   log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
76                       script[0],expected[i],testNames[i]);
77                   numErrors++;
78            }
79        }
80        if(numErrors >0 ){
81            log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
82        }
83    }
84
85    {
86        UErrorCode err = U_ZERO_ERROR;
87        int32_t capacity=0;
88        int32_t j;
89        UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
90        UScriptCode script[10]={USCRIPT_INVALID_CODE};
91        int32_t num = uscript_getCode("ja",script,capacity, &err);
92        /* preflight */
93        if(err==U_BUFFER_OVERFLOW_ERROR){
94            err = U_ZERO_ERROR;
95            capacity = 10;
96            num = uscript_getCode("ja",script,capacity, &err);
97            if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
98                log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
99                        num, (sizeof(jaCode)/sizeof(UScriptCode)));
100            }
101            for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
102                if(script[j]!=jaCode[j]) {
103                    log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
104                            script[j], uscript_getName(script[j]),
105                            jaCode[j], uscript_getName(jaCode[j]));
106
107                }
108            }
109        }else{
110            log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
111                "U_BUFFER_OVERFLOW_ERROR",
112                 u_errorName(err));
113        }
114
115    }
116
117    {
118        UScriptCode testAbbr[]={
119            /* names should return */
120            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
121            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
122        };
123
124        const char* expectedNames[]={
125
126            /* test names */
127            "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
128            "Gothic",  "Greek",  "Gujarati",
129             '\0'
130        };
131        i=0;
132        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
133            const char* name = uscript_getName(testAbbr[i]);
134             if(name == NULL) {
135               log_data_err("Couldn't get script name\n");
136               return;
137             }
138            numErrors=0;
139            if(strcmp(expectedNames[i],name)!=0){
140                log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
141                numErrors++;
142            }
143            if(numErrors > 0){
144                if(numErrors >0 ){
145                    log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
146                }
147            }
148            i++;
149        }
150
151    }
152
153    {
154        UScriptCode testAbbr[]={
155            /* abbr should return */
156            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
157            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
158            USCRIPT_LATIN,
159            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
160        };
161
162        const char* expectedAbbr[]={
163              /* test abbr */
164            "Hani", "Hang","Hebr","Hira",
165            "Knda","Kana","Khmr","Laoo",
166            "Latn",
167            "Mlym", "Mong",
168             '\0'
169        };
170        i=0;
171        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
172            const char* name = uscript_getShortName(testAbbr[i]);
173            numErrors=0;
174            if(strcmp(expectedAbbr[i],name)!=0){
175                log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
176                numErrors++;
177            }
178            if(numErrors > 0){
179                if(numErrors >0 ){
180                    log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
181                }
182            }
183            i++;
184        }
185
186    }
187    /* now test uscript_getScript() API */
188    {
189        uint32_t codepoints[] = {
190                0x0000FF9D, /* USCRIPT_KATAKANA*/
191                0x0000FFBE, /* USCRIPT_HANGUL*/
192                0x0000FFC7, /* USCRIPT_HANGUL*/
193                0x0000FFCF, /* USCRIPT_HANGUL*/
194                0x0000FFD7, /* USCRIPT_HANGUL*/
195                0x0000FFDC, /* USCRIPT_HANGUL*/
196                0x00010300, /* USCRIPT_OLD_ITALIC*/
197                0x00010330, /* USCRIPT_GOTHIC*/
198                0x0001034A, /* USCRIPT_GOTHIC*/
199                0x00010400, /* USCRIPT_DESERET*/
200                0x00010428, /* USCRIPT_DESERET*/
201                0x0001D167, /* USCRIPT_INHERITED*/
202                0x0001D17B, /* USCRIPT_INHERITED*/
203                0x0001D185, /* USCRIPT_INHERITED*/
204                0x0001D1AA, /* USCRIPT_INHERITED*/
205                0x00020000, /* USCRIPT_HAN*/
206                0x00000D02, /* USCRIPT_MALAYALAM*/
207                0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
208                0x00000000, /* USCRIPT_COMMON*/
209                0x0001D169, /* USCRIPT_INHERITED*/
210                0x0001D182, /* USCRIPT_INHERITED*/
211                0x0001D18B, /* USCRIPT_INHERITED*/
212                0x0001D1AD, /* USCRIPT_INHERITED*/
213        };
214
215        UScriptCode expected[] = {
216                USCRIPT_KATAKANA ,
217                USCRIPT_HANGUL ,
218                USCRIPT_HANGUL ,
219                USCRIPT_HANGUL ,
220                USCRIPT_HANGUL ,
221                USCRIPT_HANGUL ,
222                USCRIPT_OLD_ITALIC,
223                USCRIPT_GOTHIC ,
224                USCRIPT_GOTHIC ,
225                USCRIPT_DESERET ,
226                USCRIPT_DESERET ,
227                USCRIPT_INHERITED,
228                USCRIPT_INHERITED,
229                USCRIPT_INHERITED,
230                USCRIPT_INHERITED,
231                USCRIPT_HAN ,
232                USCRIPT_MALAYALAM,
233                USCRIPT_UNKNOWN,
234                USCRIPT_COMMON,
235                USCRIPT_INHERITED ,
236                USCRIPT_INHERITED ,
237                USCRIPT_INHERITED ,
238                USCRIPT_INHERITED ,
239        };
240        UScriptCode code = USCRIPT_INVALID_CODE;
241        UErrorCode status = U_ZERO_ERROR;
242        UBool passed = TRUE;
243
244        for(i=0; i<LENGTHOF(codepoints); ++i){
245            code = uscript_getScript(codepoints[i],&status);
246            if(U_SUCCESS(status)){
247                if( code != expected[i] ||
248                    code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
249                ) {
250                    log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
251                    passed = FALSE;
252                }
253            }else{
254                log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
255                         codepoints[i],u_errorName(status));
256                break;
257            }
258        }
259
260        if(passed==FALSE){
261           log_err("uscript_getScript failed.\n");
262        }
263    }
264    {
265        UScriptCode code= USCRIPT_INVALID_CODE;
266        UErrorCode  status = U_ZERO_ERROR;
267        code = uscript_getScript(0x001D169,&status);
268        if(code != USCRIPT_INHERITED){
269            log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
270        }
271    }
272    {
273        UScriptCode code= USCRIPT_INVALID_CODE;
274        UErrorCode  status = U_ZERO_ERROR;
275        int32_t err = 0;
276
277        for(i = 0; i<=0x10ffff; i++){
278            code =  uscript_getScript(i,&status);
279            if(code == USCRIPT_INVALID_CODE){
280                err++;
281                log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
282            }
283        }
284        if(err>0){
285            log_err("uscript_getScript failed for %d codepoints\n", err);
286        }
287    }
288    {
289        for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
290            const char* name = uscript_getName((UScriptCode)i);
291            if(name==NULL || strcmp(name,"")==0){
292                log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
293            }
294        }
295    }
296
297    {
298        /*
299         * These script codes were originally added to ICU pre-3.6, so that ICU would
300         * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
301         * These script codes were added with only short names because we don't
302         * want to invent long names ourselves.
303         * Unicode 5 and later encode some of these scripts and give them long names.
304         * Whenever this happens, the long script names here need to be updated.
305         */
306        static const char* expectedLong[] = {
307            "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
308            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
309            "Lepcha", "Lina", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
310            "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
311            "Zxxx", "Unknown",
312            "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
313            "Moon", "Meetei_Mayek",
314            /* new in ICU 4.0 */
315            "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
316            "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
317            "Zmth", "Zsym",
318            /* new in ICU 4.4 */
319            "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
320            /* new in ICU 4.6 */
321            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Meroitic_Cursive",
322            "Narb", "Nbat", "Palm", "Sind", "Wara",
323            /* new in ICU 4.8 */
324            "Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
325            /* new in ICU 49 */
326            "Hluw", "Khoj", "Tirh",
327        };
328        static const char* expectedShort[] = {
329            "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
330            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
331            "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
332            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
333            "Zxxx", "Zzzz",
334            "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
335            "Moon", "Mtei",
336            /* new in ICU 4.0 */
337            "Armi", "Avst", "Cakm", "Kore",
338            "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
339            "Zmth", "Zsym",
340            /* new in ICU 4.4 */
341            "Bamu", "Lisu", "Nkgb", "Sarb",
342            /* new in ICU 4.6 */
343            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
344            "Narb", "Nbat", "Palm", "Sind", "Wara",
345            /* new in ICU 4.8 */
346            "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
347            /* new in ICU 49 */
348            "Hluw", "Khoj", "Tirh",
349        };
350        int32_t j = 0;
351        if(LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
352            log_err("need to add new script codes in cucdapi.c!\n");
353            return;
354        }
355        for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
356            const char* name = uscript_getName((UScriptCode)i);
357            if(name==NULL || strcmp(name,expectedLong[j])!=0){
358                log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
359            }
360            name = uscript_getShortName((UScriptCode)i);
361            if(name==NULL || strcmp(name,expectedShort[j])!=0){
362                log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
363            }
364        }
365        for(i=0; i<LENGTHOF(expectedLong); i++){
366            UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
367            UErrorCode status = U_ZERO_ERROR;
368            int32_t len = 0;
369            len = uscript_getCode(expectedShort[i], fillIn, LENGTHOF(fillIn), &status);
370            if(U_FAILURE(status)){
371                log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
372            }
373            if(len>1){
374                log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
375            }
376            if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
377                log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
378            }
379        }
380    }
381
382    {
383        /* test characters which have Script_Extensions */
384        UErrorCode errorCode=U_ZERO_ERROR;
385        if(!(
386                USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
387                USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
388                USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
389            U_FAILURE(errorCode)
390        ) {
391            log_err("uscript_getScript(character with Script_Extensions) failed\n");
392        }
393    }
394}
395
396void TestHasScript() {
397    if(!(
398        !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
399        uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
400        !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
401        !uscript_hasScript(0x063f, USCRIPT_THAANA))
402    ) {
403        log_err("uscript_hasScript(U+063F, ...) is wrong\n");
404    }
405    if(!(
406        !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
407        uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
408        uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
409        !uscript_hasScript(0x0640, USCRIPT_THAANA))
410    ) {
411        log_err("uscript_hasScript(U+0640, ...) is wrong\n");
412    }
413    if(!(
414        !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
415        uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
416        uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
417        !uscript_hasScript(0x0650, USCRIPT_THAANA))
418    ) {
419        log_err("uscript_hasScript(U+0650, ...) is wrong\n");
420    }
421    if(!(
422        !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
423        uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
424        !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
425        uscript_hasScript(0x0660, USCRIPT_THAANA))
426    ) {
427        log_err("uscript_hasScript(U+0660, ...) is wrong\n");
428    }
429    if(!(
430        !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
431        uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
432        !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
433        uscript_hasScript(0xfdf2, USCRIPT_THAANA))
434    ) {
435        log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
436    }
437    if(uscript_hasScript(0x0640, 0xaffe)) {
438        /* An unguarded implementation might go into an infinite loop. */
439        log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
440    }
441}
442
443void TestGetScriptExtensions() {
444    UScriptCode scripts[20];
445    int32_t length;
446    UErrorCode errorCode;
447
448    /* errors and overflows */
449    errorCode=U_PARSE_ERROR;
450    length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
451    if(errorCode!=U_PARSE_ERROR) {
452        log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
453              u_errorName(errorCode));
454    }
455    errorCode=U_ZERO_ERROR;
456    length=uscript_getScriptExtensions(0x0640, NULL, LENGTHOF(scripts), &errorCode);
457    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
458        log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
459              u_errorName(errorCode));
460    }
461    errorCode=U_ZERO_ERROR;
462    length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
463    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
464        log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
465              u_errorName(errorCode));
466    }
467    errorCode=U_ZERO_ERROR;
468    length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
469    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
470        log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d != 3 - %s\n",
471              (int)length, u_errorName(errorCode));
472    }
473    errorCode=U_ZERO_ERROR;
474    length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
475    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
476        log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d != 3 - %s\n",
477              (int)length, u_errorName(errorCode));
478    }
479    /* U+063F has only a Script code, no Script_Extensions. */
480    errorCode=U_ZERO_ERROR;
481    length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
482    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
483        log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
484              (int)length, u_errorName(errorCode));
485    }
486
487    /* invalid code points */
488    errorCode=U_ZERO_ERROR;
489    length=uscript_getScriptExtensions(-1, scripts, LENGTHOF(scripts), &errorCode);
490    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
491        log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
492              (int)length, u_errorName(errorCode));
493    }
494    errorCode=U_ZERO_ERROR;
495    length=uscript_getScriptExtensions(0x110000, scripts, LENGTHOF(scripts), &errorCode);
496    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
497        log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
498              (int)length, u_errorName(errorCode));
499    }
500
501    /* normal usage */
502    errorCode=U_ZERO_ERROR;
503    length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
504    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
505        log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
506              (int)length, u_errorName(errorCode));
507    }
508    errorCode=U_ZERO_ERROR;
509    length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
510    if(U_FAILURE(errorCode) || length!=3 ||
511       scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_SYRIAC || scripts[2]!=USCRIPT_MANDAIC
512    ) {
513        log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
514              (int)length, u_errorName(errorCode));
515    }
516    errorCode=U_ZERO_ERROR;
517    length=uscript_getScriptExtensions(0xfdf2, scripts, LENGTHOF(scripts), &errorCode);
518    if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
519        log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
520              (int)length, u_errorName(errorCode));
521    }
522    errorCode=U_ZERO_ERROR;
523    length=uscript_getScriptExtensions(0xff65, scripts, LENGTHOF(scripts), &errorCode);
524    if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
525        log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
526              (int)length, u_errorName(errorCode));
527    }
528}
529
530void TestScriptMetadataAPI() {
531    /* API & code coverage. More testing in intltest/ucdtest.cpp. */
532    UErrorCode errorCode=U_ZERO_ERROR;
533    UChar sample[8];
534
535    if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
536            U_FAILURE(errorCode) ||
537            uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
538            sample[1]!=0) {
539        log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
540    }
541    sample[0]=0xfffe;
542    if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
543            errorCode!=U_BUFFER_OVERFLOW_ERROR ||
544            sample[0]!=0xfffe) {
545        log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
546    }
547    errorCode=U_ZERO_ERROR;
548    if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
549            U_FAILURE(errorCode) ||
550            sample[0]!=0) {
551        log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
552    }
553    sample[0]=0xfffe;
554    if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
555            errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
556            sample[0]!=0xfffe) {
557        log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
558    }
559
560    if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
561            uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
562            uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
563            uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
564            uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
565            uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
566            uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
567        log_err("uscript_getUsage() failed\n");
568    }
569
570    if(uscript_isRightToLeft(USCRIPT_LATIN) ||
571            uscript_isRightToLeft(USCRIPT_CIRTH) ||
572            !uscript_isRightToLeft(USCRIPT_ARABIC) ||
573            !uscript_isRightToLeft(USCRIPT_HEBREW)) {
574        log_err("uscript_isRightToLeft() failed\n");
575    }
576
577    if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
578            uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
579            !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
580            !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
581        log_err("uscript_breaksBetweenLetters() failed\n");
582    }
583
584    if(uscript_isCased(USCRIPT_CIRTH) ||
585            uscript_isCased(USCRIPT_HAN) ||
586            !uscript_isCased(USCRIPT_LATIN) ||
587            !uscript_isCased(USCRIPT_GREEK)) {
588        log_err("uscript_isCased() failed\n");
589    }
590}
591
592void TestBinaryValues() {
593    /*
594     * Unicode 5.1 explicitly defines binary property value aliases.
595     * Verify that they are all recognized.
596     */
597    static const char *const falseValues[]={ "N", "No", "F", "False" };
598    static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
599    int32_t i;
600    for(i=0; i<LENGTHOF(falseValues); ++i) {
601        if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
602            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
603        }
604    }
605    for(i=0; i<LENGTHOF(trueValues); ++i) {
606        if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
607            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
608        }
609    }
610}
611