1/*
2*******************************************************************************
3*
4*   Copyright (C) 1998-2014, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File parse.cpp
10*
11* Modification History:
12*
13*   Date          Name          Description
14*   05/26/99     stephen       Creation.
15*   02/25/00     weiv          Overhaul to write udata
16*   5/10/01      Ram           removed ustdio dependency
17*   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
18*******************************************************************************
19*/
20
21// Safer use of UnicodeString.
22#ifndef UNISTR_FROM_CHAR_EXPLICIT
23#   define UNISTR_FROM_CHAR_EXPLICIT explicit
24#endif
25
26// Less important, but still a good idea.
27#ifndef UNISTR_FROM_STRING_EXPLICIT
28#   define UNISTR_FROM_STRING_EXPLICIT explicit
29#endif
30
31#include "parse.h"
32#include "errmsg.h"
33#include "uhash.h"
34#include "cmemory.h"
35#include "cstring.h"
36#include "uinvchar.h"
37#include "read.h"
38#include "ustr.h"
39#include "reslist.h"
40#include "rbt_pars.h"
41#include "genrb.h"
42#include "unicode/ustring.h"
43#include "unicode/uscript.h"
44#include "unicode/utf16.h"
45#include "unicode/putil.h"
46#include "collationbuilder.h"
47#include "collationdata.h"
48#include "collationdatareader.h"
49#include "collationdatawriter.h"
50#include "collationfastlatinbuilder.h"
51#include "collationinfo.h"
52#include "collationroot.h"
53#include "collationruleparser.h"
54#include "collationtailoring.h"
55#include <stdio.h>
56
57/* Number of tokens to read ahead of the current stream position */
58#define MAX_LOOKAHEAD   3
59
60#define CR               0x000D
61#define LF               0x000A
62#define SPACE            0x0020
63#define TAB              0x0009
64#define ESCAPE           0x005C
65#define HASH             0x0023
66#define QUOTE            0x0027
67#define ZERO             0x0030
68#define STARTCOMMAND     0x005B
69#define ENDCOMMAND       0x005D
70#define OPENSQBRACKET    0x005B
71#define CLOSESQBRACKET   0x005D
72
73using icu::LocalPointer;
74using icu::UnicodeString;
75
76struct Lookahead
77{
78     enum   ETokenType type;
79     struct UString    value;
80     struct UString    comment;
81     uint32_t          line;
82};
83
84/* keep in sync with token defines in read.h */
85const char *tokenNames[TOK_TOKEN_COUNT] =
86{
87     "string",             /* A string token, such as "MonthNames" */
88     "'{'",                 /* An opening brace character */
89     "'}'",                 /* A closing brace character */
90     "','",                 /* A comma */
91     "':'",                 /* A colon */
92
93     "<end of file>",     /* End of the file has been reached successfully */
94     "<end of line>"
95};
96
97/* Just to store "TRUE" */
98//static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
99
100typedef struct {
101    struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
102    uint32_t          lookaheadPosition;
103    UCHARBUF         *buffer;
104    struct SRBRoot *bundle;
105    const char     *inputdir;
106    uint32_t        inputdirLength;
107    const char     *outputdir;
108    uint32_t        outputdirLength;
109    const char     *filename;
110    UBool           makeBinaryCollation;
111    UBool           omitCollationRules;
112} ParseState;
113
114typedef struct SResource *
115ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
116
117static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
118
119/* The nature of the lookahead buffer:
120   There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
121   MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122   When getToken is called, the current pointer is moved to the next slot and the
123   old slot is filled with the next token from the reader by calling getNextToken.
124   The token values are stored in the slot, which means that token values don't
125   survive a call to getToken, ie.
126
127   UString *value;
128
129   getToken(&value, NULL, status);
130   getToken(NULL,   NULL, status);       bad - value is now a different string
131*/
132static void
133initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
134{
135    static uint32_t initTypeStrings = 0;
136    uint32_t i;
137
138    if (!initTypeStrings)
139    {
140        initTypeStrings = 1;
141    }
142
143    state->lookaheadPosition   = 0;
144    state->buffer              = buf;
145
146    resetLineNumber();
147
148    for (i = 0; i < MAX_LOOKAHEAD; i++)
149    {
150        state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
151        if (U_FAILURE(*status))
152        {
153            return;
154        }
155    }
156
157    *status = U_ZERO_ERROR;
158}
159
160static void
161cleanupLookahead(ParseState* state)
162{
163    uint32_t i;
164    for (i = 0; i <= MAX_LOOKAHEAD; i++)
165    {
166        ustr_deinit(&state->lookahead[i].value);
167        ustr_deinit(&state->lookahead[i].comment);
168    }
169
170}
171
172static enum ETokenType
173getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
174{
175    enum ETokenType result;
176    uint32_t          i;
177
178    result = state->lookahead[state->lookaheadPosition].type;
179
180    if (tokenValue != NULL)
181    {
182        *tokenValue = &state->lookahead[state->lookaheadPosition].value;
183    }
184
185    if (linenumber != NULL)
186    {
187        *linenumber = state->lookahead[state->lookaheadPosition].line;
188    }
189
190    if (comment != NULL)
191    {
192        ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
193    }
194
195    i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
196    state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
197    ustr_setlen(&state->lookahead[i].comment, 0, status);
198    ustr_setlen(&state->lookahead[i].value, 0, status);
199    state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
200
201    /* printf("getToken, returning %s\n", tokenNames[result]); */
202
203    return result;
204}
205
206static enum ETokenType
207peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
208{
209    uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
210
211    if (U_FAILURE(*status))
212    {
213        return TOK_ERROR;
214    }
215
216    if (lookaheadCount >= MAX_LOOKAHEAD)
217    {
218        *status = U_INTERNAL_PROGRAM_ERROR;
219        return TOK_ERROR;
220    }
221
222    if (tokenValue != NULL)
223    {
224        *tokenValue = &state->lookahead[i].value;
225    }
226
227    if (linenumber != NULL)
228    {
229        *linenumber = state->lookahead[i].line;
230    }
231
232    if(comment != NULL){
233        ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
234    }
235
236    return state->lookahead[i].type;
237}
238
239static void
240expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
241{
242    uint32_t        line;
243
244    enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
245
246    if (linenumber != NULL)
247    {
248        *linenumber = line;
249    }
250
251    if (U_FAILURE(*status))
252    {
253        return;
254    }
255
256    if (token != expectedToken)
257    {
258        *status = U_INVALID_FORMAT_ERROR;
259        error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
260    }
261    else
262    {
263        *status = U_ZERO_ERROR;
264    }
265}
266
267static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
268{
269    struct UString *tokenValue;
270    char           *result;
271    uint32_t        count;
272
273    expect(state, TOK_STRING, &tokenValue, comment, line, status);
274
275    if (U_FAILURE(*status))
276    {
277        return NULL;
278    }
279
280    count = u_strlen(tokenValue->fChars);
281    if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
282        *status = U_INVALID_FORMAT_ERROR;
283        error(*line, "invariant characters required for table keys, binary data, etc.");
284        return NULL;
285    }
286
287    result = static_cast<char *>(uprv_malloc(count+1));
288
289    if (result == NULL)
290    {
291        *status = U_MEMORY_ALLOCATION_ERROR;
292        return NULL;
293    }
294
295    u_UCharsToChars(tokenValue->fChars, result, count+1);
296    return result;
297}
298
299static struct SResource *
300parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
301{
302    struct SResource *result = NULL;
303    struct UString   *tokenValue;
304    FileStream       *file          = NULL;
305    char              filename[256] = { '\0' };
306    char              cs[128]       = { '\0' };
307    uint32_t          line;
308    UBool quoted = FALSE;
309    UCHARBUF *ucbuf=NULL;
310    UChar32   c     = 0;
311    const char* cp  = NULL;
312    UChar *pTarget     = NULL;
313    UChar *target      = NULL;
314    UChar *targetLimit = NULL;
315    int32_t size = 0;
316
317    expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
318
319    if(isVerbose()){
320        printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
321    }
322
323    if (U_FAILURE(*status))
324    {
325        return NULL;
326    }
327    /* make the filename including the directory */
328    if (state->inputdir != NULL)
329    {
330        uprv_strcat(filename, state->inputdir);
331
332        if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
333        {
334            uprv_strcat(filename, U_FILE_SEP_STRING);
335        }
336    }
337
338    u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
339
340    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
341
342    if (U_FAILURE(*status))
343    {
344        return NULL;
345    }
346    uprv_strcat(filename, cs);
347
348    if(state->omitCollationRules) {
349        return res_none();
350    }
351
352    ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
353
354    if (U_FAILURE(*status)) {
355        error(line, "An error occured while opening the input file %s\n", filename);
356        return NULL;
357    }
358
359    /* We allocate more space than actually required
360    * since the actual size needed for storing UChars
361    * is not known in UTF-8 byte stream
362    */
363    size        = ucbuf_size(ucbuf) + 1;
364    pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
365    uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
366    target      = pTarget;
367    targetLimit = pTarget+size;
368
369    /* read the rules into the buffer */
370    while (target < targetLimit)
371    {
372        c = ucbuf_getc(ucbuf, status);
373        if(c == QUOTE) {
374            quoted = (UBool)!quoted;
375        }
376        /* weiv (06/26/2002): adding the following:
377         * - preserving spaces in commands [...]
378         * - # comments until the end of line
379         */
380        if (c == STARTCOMMAND && !quoted)
381        {
382            /* preserve commands
383             * closing bracket will be handled by the
384             * append at the end of the loop
385             */
386            while(c != ENDCOMMAND) {
387                U_APPEND_CHAR32_ONLY(c, target);
388                c = ucbuf_getc(ucbuf, status);
389            }
390        }
391        else if (c == HASH && !quoted) {
392            /* skip comments */
393            while(c != CR && c != LF) {
394                c = ucbuf_getc(ucbuf, status);
395            }
396            continue;
397        }
398        else if (c == ESCAPE)
399        {
400            c = unescape(ucbuf, status);
401
402            if (c == (UChar32)U_ERR)
403            {
404                uprv_free(pTarget);
405                T_FileStream_close(file);
406                return NULL;
407            }
408        }
409        else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
410        {
411            /* ignore spaces carriage returns
412            * and line feed unless in the form \uXXXX
413            */
414            continue;
415        }
416
417        /* Append UChar * after dissembling if c > 0xffff*/
418        if (c != (UChar32)U_EOF)
419        {
420            U_APPEND_CHAR32_ONLY(c, target);
421        }
422        else
423        {
424            break;
425        }
426    }
427
428    /* terminate the string */
429    if(target < targetLimit){
430        *target = 0x0000;
431    }
432
433    result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
434
435
436    ucbuf_close(ucbuf);
437    uprv_free(pTarget);
438    T_FileStream_close(file);
439
440    return result;
441}
442
443static struct SResource *
444parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
445{
446    struct SResource *result = NULL;
447    struct UString   *tokenValue;
448    FileStream       *file          = NULL;
449    char              filename[256] = { '\0' };
450    char              cs[128]       = { '\0' };
451    uint32_t          line;
452    UCHARBUF *ucbuf=NULL;
453    const char* cp  = NULL;
454    UChar *pTarget     = NULL;
455    const UChar *pSource     = NULL;
456    int32_t size = 0;
457
458    expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
459
460    if(isVerbose()){
461        printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
462    }
463
464    if (U_FAILURE(*status))
465    {
466        return NULL;
467    }
468    /* make the filename including the directory */
469    if (state->inputdir != NULL)
470    {
471        uprv_strcat(filename, state->inputdir);
472
473        if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
474        {
475            uprv_strcat(filename, U_FILE_SEP_STRING);
476        }
477    }
478
479    u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
480
481    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
482
483    if (U_FAILURE(*status))
484    {
485        return NULL;
486    }
487    uprv_strcat(filename, cs);
488
489
490    ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
491
492    if (U_FAILURE(*status)) {
493        error(line, "An error occured while opening the input file %s\n", filename);
494        return NULL;
495    }
496
497    /* We allocate more space than actually required
498    * since the actual size needed for storing UChars
499    * is not known in UTF-8 byte stream
500    */
501    pSource = ucbuf_getBuffer(ucbuf, &size, status);
502    pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
503    uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
504
505#if !UCONFIG_NO_TRANSLITERATION
506    size = utrans_stripRules(pSource, size, pTarget, status);
507#else
508    size = 0;
509    fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
510#endif
511    result = string_open(state->bundle, tag, pTarget, size, NULL, status);
512
513    ucbuf_close(ucbuf);
514    uprv_free(pTarget);
515    T_FileStream_close(file);
516
517    return result;
518}
519static struct SResource* dependencyArray = NULL;
520
521static struct SResource *
522parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
523{
524    struct SResource *result = NULL;
525    struct SResource *elem = NULL;
526    struct UString   *tokenValue;
527    uint32_t          line;
528    char              filename[256] = { '\0' };
529    char              cs[128]       = { '\0' };
530
531    expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
532
533    if(isVerbose()){
534        printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
535    }
536
537    if (U_FAILURE(*status))
538    {
539        return NULL;
540    }
541    /* make the filename including the directory */
542    if (state->outputdir != NULL)
543    {
544        uprv_strcat(filename, state->outputdir);
545
546        if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
547        {
548            uprv_strcat(filename, U_FILE_SEP_STRING);
549        }
550    }
551
552    u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
553
554    if (U_FAILURE(*status))
555    {
556        return NULL;
557    }
558    uprv_strcat(filename, cs);
559    if(!T_FileStream_file_exists(filename)){
560        if(isStrict()){
561            error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
562        }else{
563            warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
564        }
565    }
566    if(dependencyArray==NULL){
567        dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
568    }
569    if(tag!=NULL){
570        result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
571    }
572    elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
573
574    array_add(dependencyArray, elem, status);
575
576    if (U_FAILURE(*status))
577    {
578        return NULL;
579    }
580    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
581    return result;
582}
583static struct SResource *
584parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
585{
586    struct UString   *tokenValue;
587    struct SResource *result = NULL;
588
589/*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
590    {
591        return parseUCARules(tag, startline, status);
592    }*/
593    if(isVerbose()){
594        printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
595    }
596    expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
597
598    if (U_SUCCESS(*status))
599    {
600        /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601        doesn't survive expect either) */
602
603        result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
604        if(U_SUCCESS(*status) && result) {
605            expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
606
607            if (U_FAILURE(*status))
608            {
609                res_close(result);
610                return NULL;
611            }
612        }
613    }
614
615    return result;
616}
617
618static struct SResource *
619parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
620{
621    struct UString   *tokenValue;
622    struct SResource *result  = NULL;
623
624    expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
625
626    if(isVerbose()){
627        printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
628    }
629
630    if (U_SUCCESS(*status))
631    {
632        /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633        doesn't survive expect either) */
634
635        result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
636
637        expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
638
639        if (U_FAILURE(*status))
640        {
641            res_close(result);
642            return NULL;
643        }
644    }
645
646    return result;
647}
648
649#if !UCONFIG_NO_COLLATION
650
651namespace {
652
653static struct SResource* resLookup(struct SResource* res, const char* key){
654    struct SResource *current = NULL;
655    struct SResTable *list;
656    if (res == res_none()) {
657        return NULL;
658    }
659
660    list = &(res->u.fTable);
661
662    current = list->fFirst;
663    while (current != NULL) {
664        if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
665            return current;
666        }
667        current = current->fNext;
668    }
669    return NULL;
670}
671
672class GenrbImporter : public icu::CollationRuleParser::Importer {
673public:
674    GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
675    virtual ~GenrbImporter();
676    virtual const UnicodeString *getRules(
677            const char *localeID, const char *collationType,
678            const char *&errorReason, UErrorCode &errorCode);
679
680private:
681    const char *inputDir;
682    const char *outputDir;
683    UnicodeString rules;
684};
685
686GenrbImporter::~GenrbImporter() {}
687
688const UnicodeString *
689GenrbImporter::getRules(
690        const char *localeID, const char *collationType,
691        const char *& /*errorReason*/, UErrorCode &errorCode) {
692    struct SRBRoot *data         = NULL;
693    UCHARBUF       *ucbuf        = NULL;
694    int localeLength = strlen(localeID);
695    char* filename = (char*)uprv_malloc(localeLength+5);
696    char           *inputDirBuf  = NULL;
697    char           *openFileName = NULL;
698    const char* cp = "";
699    int32_t i = 0;
700    int32_t dirlen  = 0;
701    int32_t filelen = 0;
702    struct SResource* root;
703    struct SResource* collations;
704    struct SResource* collation;
705    struct SResource* sequence;
706
707    memcpy(filename, localeID, localeLength);
708    for(i = 0; i < localeLength; i++){
709        if(filename[i] == '-'){
710            filename[i] = '_';
711        }
712    }
713    filename[localeLength]   = '.';
714    filename[localeLength+1] = 't';
715    filename[localeLength+2] = 'x';
716    filename[localeLength+3] = 't';
717    filename[localeLength+4] = 0;
718
719
720    if (U_FAILURE(errorCode)) {
721        return NULL;
722    }
723    if(filename==NULL){
724        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
725        return NULL;
726    }else{
727        filelen = (int32_t)uprv_strlen(filename);
728    }
729    if(inputDir == NULL) {
730        const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
731        openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
732        openFileName[0] = '\0';
733        if (filenameBegin != NULL) {
734            /*
735             * When a filename ../../../data/root.txt is specified,
736             * we presume that the input directory is ../../../data
737             * This is very important when the resource file includes
738             * another file, like UCARules.txt or thaidict.brk.
739             */
740            int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
741            inputDirBuf = (char *)uprv_malloc(filenameSize);
742
743            /* test for NULL */
744            if(inputDirBuf == NULL) {
745                errorCode = U_MEMORY_ALLOCATION_ERROR;
746                goto finish;
747            }
748
749            uprv_strncpy(inputDirBuf, filename, filenameSize);
750            inputDirBuf[filenameSize - 1] = 0;
751            inputDir = inputDirBuf;
752            dirlen  = (int32_t)uprv_strlen(inputDir);
753        }
754    }else{
755        dirlen  = (int32_t)uprv_strlen(inputDir);
756
757        if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
758            openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
759
760            /* test for NULL */
761            if(openFileName == NULL) {
762                errorCode = U_MEMORY_ALLOCATION_ERROR;
763                goto finish;
764            }
765
766            openFileName[0] = '\0';
767            /*
768             * append the input dir to openFileName if the first char in
769             * filename is not file seperation char and the last char input directory is  not '.'.
770             * This is to support :
771             * genrb -s. /home/icu/data
772             * genrb -s. icu/data
773             * The user cannot mix notations like
774             * genrb -s. /icu/data --- the absolute path specified. -s redundant
775             * user should use
776             * genrb -s. icu/data  --- start from CWD and look in icu/data dir
777             */
778            if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
779                uprv_strcpy(openFileName, inputDir);
780                openFileName[dirlen]     = U_FILE_SEP_CHAR;
781            }
782            openFileName[dirlen + 1] = '\0';
783        } else {
784            openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
785
786            /* test for NULL */
787            if(openFileName == NULL) {
788                errorCode = U_MEMORY_ALLOCATION_ERROR;
789                goto finish;
790            }
791
792            uprv_strcpy(openFileName, inputDir);
793
794        }
795    }
796    uprv_strcat(openFileName, filename);
797    /* printf("%s\n", openFileName);  */
798    errorCode = U_ZERO_ERROR;
799    ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
800
801    if(errorCode == U_FILE_ACCESS_ERROR) {
802
803        fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
804        goto finish;
805    }
806    if (ucbuf == NULL || U_FAILURE(errorCode)) {
807        fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
808        goto finish;
809    }
810
811    /* Parse the data into an SRBRoot */
812    data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
813
814    root = data->fRoot;
815    collations = resLookup(root, "collations");
816    if (collations != NULL) {
817      collation = resLookup(collations, collationType);
818      if (collation != NULL) {
819        sequence = resLookup(collation, "Sequence");
820        if (sequence != NULL) {
821          rules.setTo(FALSE, sequence->u.fString.fChars, sequence->u.fString.fLength);
822        }
823      }
824    }
825
826finish:
827    if (inputDirBuf != NULL) {
828        uprv_free(inputDirBuf);
829    }
830
831    if (openFileName != NULL) {
832        uprv_free(openFileName);
833    }
834
835    if(ucbuf) {
836        ucbuf_close(ucbuf);
837    }
838
839    return &rules;
840}
841
842// Quick-and-dirty escaping function.
843// Assumes that we are on an ASCII-based platform.
844static void
845escape(const UChar *s, char *buffer) {
846    int32_t length = u_strlen(s);
847    int32_t i = 0;
848    for (;;) {
849        UChar32 c;
850        U16_NEXT(s, i, length, c);
851        if (c == 0) {
852            *buffer = 0;
853            return;
854        } else if (0x20 <= c && c <= 0x7e) {
855            // printable ASCII
856            *buffer++ = (char)c;  // assumes ASCII-based platform
857        } else {
858            buffer += sprintf(buffer, "\\u%04X", (int)c);
859        }
860    }
861}
862
863}  // namespace
864
865#endif  // !UCONFIG_NO_COLLATION
866
867static struct SResource *
868addCollation(ParseState* state, struct SResource  *result, const char *collationType,
869             uint32_t startline, UErrorCode *status)
870{
871    // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
872    struct SResource  *member = NULL;
873    struct UString    *tokenValue;
874    struct UString     comment;
875    enum   ETokenType  token;
876    char               subtag[1024];
877    UnicodeString      rules;
878    UBool              haveRules = FALSE;
879    UVersionInfo       version;
880    uint32_t           line;
881
882    /* '{' . (name resource)* '}' */
883    version[0]=0; version[1]=0; version[2]=0; version[3]=0;
884
885    for (;;)
886    {
887        ustr_init(&comment);
888        token = getToken(state, &tokenValue, &comment, &line, status);
889
890        if (token == TOK_CLOSE_BRACE)
891        {
892            break;
893        }
894
895        if (token != TOK_STRING)
896        {
897            res_close(result);
898            *status = U_INVALID_FORMAT_ERROR;
899
900            if (token == TOK_EOF)
901            {
902                error(startline, "unterminated table");
903            }
904            else
905            {
906                error(line, "Unexpected token %s", tokenNames[token]);
907            }
908
909            return NULL;
910        }
911
912        u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
913
914        if (U_FAILURE(*status))
915        {
916            res_close(result);
917            return NULL;
918        }
919
920        member = parseResource(state, subtag, NULL, status);
921
922        if (U_FAILURE(*status))
923        {
924            res_close(result);
925            return NULL;
926        }
927        if (result == NULL)
928        {
929            // Ignore the parsed resources, continue parsing.
930        }
931        else if (uprv_strcmp(subtag, "Version") == 0)
932        {
933            char     ver[40];
934            int32_t length = member->u.fString.fLength;
935
936            if (length >= (int32_t) sizeof(ver))
937            {
938                length = (int32_t) sizeof(ver) - 1;
939            }
940
941            u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
942            u_versionFromString(version, ver);
943
944            table_add(result, member, line, status);
945            member = NULL;
946        }
947        else if(uprv_strcmp(subtag, "%%CollationBin")==0)
948        {
949            /* discard duplicate %%CollationBin if any*/
950        }
951        else if (uprv_strcmp(subtag, "Sequence") == 0)
952        {
953            rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
954            haveRules = TRUE;
955            // Defer building the collator until we have seen
956            // all sub-elements of the collation table, including the Version.
957            /* in order to achieve smaller data files, we can direct genrb */
958            /* to omit collation rules */
959            if(!state->omitCollationRules) {
960                table_add(result, member, line, status);
961                member = NULL;
962            }
963        }
964        else  // Just copy non-special items.
965        {
966            table_add(result, member, line, status);
967            member = NULL;
968        }
969        res_close(member);  // TODO: use LocalPointer
970        if (U_FAILURE(*status))
971        {
972            res_close(result);
973            return NULL;
974        }
975    }
976
977    if (!haveRules) { return result; }
978
979#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
980    warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
981    (void)collationType;
982#else
983    if(!state->makeBinaryCollation) {
984        if(isVerbose()) {
985            printf("Not building %s~%s collation binary\n", state->filename, collationType);
986        }
987        return result;
988    }
989    UErrorCode intStatus = U_ZERO_ERROR;
990    UParseError parseError;
991    uprv_memset(&parseError, 0, sizeof(parseError));
992    GenrbImporter importer(state->inputdir, state->outputdir);
993    const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
994    if(U_FAILURE(intStatus)) {
995        error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
996        res_close(result);
997        return NULL;  // TODO: use LocalUResourceBundlePointer for result
998    }
999    icu::CollationBuilder builder(base, intStatus);
1000    if(uprv_strncmp(collationType, "search", 6) == 0) {
1001        builder.disableFastLatin();  // build fast-Latin table unless search collator
1002    }
1003    LocalPointer<icu::CollationTailoring> t(
1004            builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
1005    if(U_FAILURE(intStatus)) {
1006        const char *reason = builder.getErrorReason();
1007        if(reason == NULL) { reason = ""; }
1008        error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
1009                state->filename, collationType,
1010                (long)parseError.offset, u_errorName(intStatus), reason);
1011        if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1012            // Print pre- and post-context.
1013            char preBuffer[100], postBuffer[100];
1014            escape(parseError.preContext, preBuffer);
1015            escape(parseError.postContext, postBuffer);
1016            error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
1017        }
1018        if(isStrict()) {
1019            *status = intStatus;
1020            res_close(result);
1021            return NULL;
1022        }
1023    }
1024    icu::LocalMemory<uint8_t> buffer;
1025    int32_t capacity = 100000;
1026    uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1027    if(dest == NULL) {
1028        fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1029                (long)capacity);
1030        *status = U_MEMORY_ALLOCATION_ERROR;
1031        res_close(result);
1032        return NULL;
1033    }
1034    int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1035    int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1036            *t, *t->settings, indexes, dest, capacity, intStatus);
1037    if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1038        intStatus = U_ZERO_ERROR;
1039        capacity = totalSize;
1040        dest = buffer.allocateInsteadAndCopy(capacity);
1041        if(dest == NULL) {
1042            fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1043                    (long)capacity);
1044            *status = U_MEMORY_ALLOCATION_ERROR;
1045            res_close(result);
1046            return NULL;
1047        }
1048        totalSize = icu::CollationDataWriter::writeTailoring(
1049                *t, *t->settings, indexes, dest, capacity, intStatus);
1050    }
1051    if(U_FAILURE(intStatus)) {
1052        fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1053                u_errorName(intStatus));
1054        res_close(result);
1055        return NULL;
1056    }
1057    if(isVerbose()) {
1058        printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1059        icu::CollationInfo::printSizes(totalSize, indexes);
1060    }
1061    struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1062    table_add(result, collationBin, line, status);
1063    if (U_FAILURE(*status)) {
1064        res_close(result);
1065        return NULL;
1066    }
1067#endif
1068    return result;
1069}
1070
1071static UBool
1072keepCollationType(const char *type) {
1073    return gIncludeUnihanColl || uprv_strcmp(type, "unihan") != 0;
1074}
1075
1076static struct SResource *
1077parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1078{
1079    struct SResource  *result = NULL;
1080    struct SResource  *member = NULL;
1081    struct SResource  *collationRes = NULL;
1082    struct UString    *tokenValue;
1083    struct UString     comment;
1084    enum   ETokenType  token;
1085    char               subtag[1024], typeKeyword[1024];
1086    uint32_t           line;
1087
1088    result = table_open(state->bundle, tag, NULL, status);
1089
1090    if (result == NULL || U_FAILURE(*status))
1091    {
1092        return NULL;
1093    }
1094    if(isVerbose()){
1095        printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1096    }
1097    if(!newCollation) {
1098        return addCollation(state, result, "(no type)", startline, status);
1099    }
1100    else {
1101        for(;;) {
1102            ustr_init(&comment);
1103            token = getToken(state, &tokenValue, &comment, &line, status);
1104
1105            if (token == TOK_CLOSE_BRACE)
1106            {
1107                return result;
1108            }
1109
1110            if (token != TOK_STRING)
1111            {
1112                res_close(result);
1113                *status = U_INVALID_FORMAT_ERROR;
1114
1115                if (token == TOK_EOF)
1116                {
1117                    error(startline, "unterminated table");
1118                }
1119                else
1120                {
1121                    error(line, "Unexpected token %s", tokenNames[token]);
1122                }
1123
1124                return NULL;
1125            }
1126
1127            u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1128
1129            if (U_FAILURE(*status))
1130            {
1131                res_close(result);
1132                return NULL;
1133            }
1134
1135            if (uprv_strcmp(subtag, "default") == 0)
1136            {
1137                member = parseResource(state, subtag, NULL, status);
1138
1139                if (U_FAILURE(*status))
1140                {
1141                    res_close(result);
1142                    return NULL;
1143                }
1144
1145                table_add(result, member, line, status);
1146            }
1147            else
1148            {
1149                token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1150                /* this probably needs to be refactored or recursively use the parser */
1151                /* first we assume that our collation table won't have the explicit type */
1152                /* then, we cannot handle aliases */
1153                if(token == TOK_OPEN_BRACE) {
1154                    token = getToken(state, &tokenValue, &comment, &line, status);
1155                    if (keepCollationType(subtag)) {
1156                        collationRes = table_open(state->bundle, subtag, NULL, status);
1157                    } else {
1158                        collationRes = NULL;
1159                    }
1160                    // need to parse the collation data regardless
1161                    collationRes = addCollation(state, collationRes, subtag, startline, status);
1162                    if (collationRes != NULL) {
1163                        table_add(result, collationRes, startline, status);
1164                    }
1165                } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1166                    /* we could have a table too */
1167                    token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1168                    u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1169                    if(uprv_strcmp(typeKeyword, "alias") == 0) {
1170                        member = parseResource(state, subtag, NULL, status);
1171                        if (U_FAILURE(*status))
1172                        {
1173                            res_close(result);
1174                            return NULL;
1175                        }
1176
1177                        table_add(result, member, line, status);
1178                    } else {
1179                        res_close(result);
1180                        *status = U_INVALID_FORMAT_ERROR;
1181                        return NULL;
1182                    }
1183                } else {
1184                    res_close(result);
1185                    *status = U_INVALID_FORMAT_ERROR;
1186                    return NULL;
1187                }
1188            }
1189
1190            /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1191
1192            /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1193
1194            if (U_FAILURE(*status))
1195            {
1196                res_close(result);
1197                return NULL;
1198            }
1199        }
1200    }
1201}
1202
1203/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1204   if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1205static struct SResource *
1206realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1207{
1208    struct SResource  *member = NULL;
1209    struct UString    *tokenValue=NULL;
1210    struct UString    comment;
1211    enum   ETokenType token;
1212    char              subtag[1024];
1213    uint32_t          line;
1214    UBool             readToken = FALSE;
1215
1216    /* '{' . (name resource)* '}' */
1217
1218    if(isVerbose()){
1219        printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1220    }
1221    for (;;)
1222    {
1223        ustr_init(&comment);
1224        token = getToken(state, &tokenValue, &comment, &line, status);
1225
1226        if (token == TOK_CLOSE_BRACE)
1227        {
1228            if (!readToken) {
1229                warning(startline, "Encountered empty table");
1230            }
1231            return table;
1232        }
1233
1234        if (token != TOK_STRING)
1235        {
1236            *status = U_INVALID_FORMAT_ERROR;
1237
1238            if (token == TOK_EOF)
1239            {
1240                error(startline, "unterminated table");
1241            }
1242            else
1243            {
1244                error(line, "unexpected token %s", tokenNames[token]);
1245            }
1246
1247            return NULL;
1248        }
1249
1250        if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1251            u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1252        } else {
1253            *status = U_INVALID_FORMAT_ERROR;
1254            error(line, "invariant characters required for table keys");
1255            return NULL;
1256        }
1257
1258        if (U_FAILURE(*status))
1259        {
1260            error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1261            return NULL;
1262        }
1263
1264        member = parseResource(state, subtag, &comment, status);
1265
1266        if (member == NULL || U_FAILURE(*status))
1267        {
1268            error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1269            return NULL;
1270        }
1271
1272        table_add(table, member, line, status);
1273
1274        if (U_FAILURE(*status))
1275        {
1276            error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1277            return NULL;
1278        }
1279        readToken = TRUE;
1280        ustr_deinit(&comment);
1281   }
1282
1283    /* not reached */
1284    /* A compiler warning will appear if all paths don't contain a return statement. */
1285/*     *status = U_INTERNAL_PROGRAM_ERROR;
1286     return NULL;*/
1287}
1288
1289static struct SResource *
1290parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1291{
1292    struct SResource *result;
1293
1294    if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1295    {
1296        return parseCollationElements(state, tag, startline, FALSE, status);
1297    }
1298    if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1299    {
1300        return parseCollationElements(state, tag, startline, TRUE, status);
1301    }
1302    if(isVerbose()){
1303        printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1304    }
1305
1306    result = table_open(state->bundle, tag, comment, status);
1307
1308    if (result == NULL || U_FAILURE(*status))
1309    {
1310        return NULL;
1311    }
1312    return realParseTable(state, result, tag, startline,  status);
1313}
1314
1315static struct SResource *
1316parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1317{
1318    struct SResource  *result = NULL;
1319    struct SResource  *member = NULL;
1320    struct UString    *tokenValue;
1321    struct UString    memberComments;
1322    enum   ETokenType token;
1323    UBool             readToken = FALSE;
1324
1325    result = array_open(state->bundle, tag, comment, status);
1326
1327    if (result == NULL || U_FAILURE(*status))
1328    {
1329        return NULL;
1330    }
1331    if(isVerbose()){
1332        printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1333    }
1334
1335    ustr_init(&memberComments);
1336
1337    /* '{' . resource [','] '}' */
1338    for (;;)
1339    {
1340        /* reset length */
1341        ustr_setlen(&memberComments, 0, status);
1342
1343        /* check for end of array, but don't consume next token unless it really is the end */
1344        token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1345
1346
1347        if (token == TOK_CLOSE_BRACE)
1348        {
1349            getToken(state, NULL, NULL, NULL, status);
1350            if (!readToken) {
1351                warning(startline, "Encountered empty array");
1352            }
1353            break;
1354        }
1355
1356        if (token == TOK_EOF)
1357        {
1358            res_close(result);
1359            *status = U_INVALID_FORMAT_ERROR;
1360            error(startline, "unterminated array");
1361            return NULL;
1362        }
1363
1364        /* string arrays are a special case */
1365        if (token == TOK_STRING)
1366        {
1367            getToken(state, &tokenValue, &memberComments, NULL, status);
1368            member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1369        }
1370        else
1371        {
1372            member = parseResource(state, NULL, &memberComments, status);
1373        }
1374
1375        if (member == NULL || U_FAILURE(*status))
1376        {
1377            res_close(result);
1378            return NULL;
1379        }
1380
1381        array_add(result, member, status);
1382
1383        if (U_FAILURE(*status))
1384        {
1385            res_close(result);
1386            return NULL;
1387        }
1388
1389        /* eat optional comma if present */
1390        token = peekToken(state, 0, NULL, NULL, NULL, status);
1391
1392        if (token == TOK_COMMA)
1393        {
1394            getToken(state, NULL, NULL, NULL, status);
1395        }
1396
1397        if (U_FAILURE(*status))
1398        {
1399            res_close(result);
1400            return NULL;
1401        }
1402        readToken = TRUE;
1403    }
1404
1405    ustr_deinit(&memberComments);
1406    return result;
1407}
1408
1409static struct SResource *
1410parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1411{
1412    struct SResource  *result = NULL;
1413    enum   ETokenType  token;
1414    char              *string;
1415    int32_t            value;
1416    UBool              readToken = FALSE;
1417    char              *stopstring;
1418    uint32_t           len;
1419    struct UString     memberComments;
1420
1421    result = intvector_open(state->bundle, tag, comment, status);
1422
1423    if (result == NULL || U_FAILURE(*status))
1424    {
1425        return NULL;
1426    }
1427
1428    if(isVerbose()){
1429        printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1430    }
1431    ustr_init(&memberComments);
1432    /* '{' . string [','] '}' */
1433    for (;;)
1434    {
1435        ustr_setlen(&memberComments, 0, status);
1436
1437        /* check for end of array, but don't consume next token unless it really is the end */
1438        token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1439
1440        if (token == TOK_CLOSE_BRACE)
1441        {
1442            /* it's the end, consume the close brace */
1443            getToken(state, NULL, NULL, NULL, status);
1444            if (!readToken) {
1445                warning(startline, "Encountered empty int vector");
1446            }
1447            ustr_deinit(&memberComments);
1448            return result;
1449        }
1450
1451        string = getInvariantString(state, NULL, NULL, status);
1452
1453        if (U_FAILURE(*status))
1454        {
1455            res_close(result);
1456            return NULL;
1457        }
1458
1459        /* For handling illegal char in the Intvector */
1460        value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1461        len=(uint32_t)(stopstring-string);
1462
1463        if(len==uprv_strlen(string))
1464        {
1465            intvector_add(result, value, status);
1466            uprv_free(string);
1467            token = peekToken(state, 0, NULL, NULL, NULL, status);
1468        }
1469        else
1470        {
1471            uprv_free(string);
1472            *status=U_INVALID_CHAR_FOUND;
1473        }
1474
1475        if (U_FAILURE(*status))
1476        {
1477            res_close(result);
1478            return NULL;
1479        }
1480
1481        /* the comma is optional (even though it is required to prevent the reader from concatenating
1482        consecutive entries) so that a missing comma on the last entry isn't an error */
1483        if (token == TOK_COMMA)
1484        {
1485            getToken(state, NULL, NULL, NULL, status);
1486        }
1487        readToken = TRUE;
1488    }
1489
1490    /* not reached */
1491    /* A compiler warning will appear if all paths don't contain a return statement. */
1492/*    intvector_close(result, status);
1493    *status = U_INTERNAL_PROGRAM_ERROR;
1494    return NULL;*/
1495}
1496
1497static struct SResource *
1498parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1499{
1500    struct SResource *result = NULL;
1501    uint8_t          *value;
1502    char             *string;
1503    char              toConv[3] = {'\0', '\0', '\0'};
1504    uint32_t          count;
1505    uint32_t          i;
1506    uint32_t          line;
1507    char             *stopstring;
1508    uint32_t          len;
1509
1510    string = getInvariantString(state, &line, NULL, status);
1511
1512    if (string == NULL || U_FAILURE(*status))
1513    {
1514        return NULL;
1515    }
1516
1517    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1518
1519    if (U_FAILURE(*status))
1520    {
1521        uprv_free(string);
1522        return NULL;
1523    }
1524
1525    if(isVerbose()){
1526        printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1527    }
1528
1529    count = (uint32_t)uprv_strlen(string);
1530    if (count > 0){
1531        if((count % 2)==0){
1532            value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1533
1534            if (value == NULL)
1535            {
1536                uprv_free(string);
1537                *status = U_MEMORY_ALLOCATION_ERROR;
1538                return NULL;
1539            }
1540
1541            for (i = 0; i < count; i += 2)
1542            {
1543                toConv[0] = string[i];
1544                toConv[1] = string[i + 1];
1545
1546                value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1547                len=(uint32_t)(stopstring-toConv);
1548
1549                if(len!=uprv_strlen(toConv))
1550                {
1551                    uprv_free(string);
1552                    *status=U_INVALID_CHAR_FOUND;
1553                    return NULL;
1554                }
1555            }
1556
1557            result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1558
1559            uprv_free(value);
1560        }
1561        else
1562        {
1563            *status = U_INVALID_CHAR_FOUND;
1564            uprv_free(string);
1565            error(line, "Encountered invalid binary string");
1566            return NULL;
1567        }
1568    }
1569    else
1570    {
1571        result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1572        warning(startline, "Encountered empty binary tag");
1573    }
1574    uprv_free(string);
1575
1576    return result;
1577}
1578
1579static struct SResource *
1580parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1581{
1582    struct SResource *result = NULL;
1583    int32_t           value;
1584    char             *string;
1585    char             *stopstring;
1586    uint32_t          len;
1587
1588    string = getInvariantString(state, NULL, NULL, status);
1589
1590    if (string == NULL || U_FAILURE(*status))
1591    {
1592        return NULL;
1593    }
1594
1595    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1596
1597    if (U_FAILURE(*status))
1598    {
1599        uprv_free(string);
1600        return NULL;
1601    }
1602
1603    if(isVerbose()){
1604        printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1605    }
1606
1607    if (uprv_strlen(string) <= 0)
1608    {
1609        warning(startline, "Encountered empty integer. Default value is 0.");
1610    }
1611
1612    /* Allow integer support for hexdecimal, octal digit and decimal*/
1613    /* and handle illegal char in the integer*/
1614    value = uprv_strtoul(string, &stopstring, 0);
1615    len=(uint32_t)(stopstring-string);
1616    if(len==uprv_strlen(string))
1617    {
1618        result = int_open(state->bundle, tag, value, comment, status);
1619    }
1620    else
1621    {
1622        *status=U_INVALID_CHAR_FOUND;
1623    }
1624    uprv_free(string);
1625
1626    return result;
1627}
1628
1629static struct SResource *
1630parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1631{
1632    struct SResource *result;
1633    FileStream       *file;
1634    int32_t           len;
1635    uint8_t          *data;
1636    char             *filename;
1637    uint32_t          line;
1638    char     *fullname = NULL;
1639    filename = getInvariantString(state, &line, NULL, status);
1640
1641    if (U_FAILURE(*status))
1642    {
1643        return NULL;
1644    }
1645
1646    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1647
1648    if (U_FAILURE(*status))
1649    {
1650        uprv_free(filename);
1651        return NULL;
1652    }
1653
1654    if(isVerbose()){
1655        printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1656    }
1657
1658    /* Open the input file for reading */
1659    if (state->inputdir == NULL)
1660    {
1661#if 1
1662        /*
1663         * Always save file file name, even if there's
1664         * no input directory specified. MIGHT BREAK SOMETHING
1665         */
1666        int32_t filenameLength = uprv_strlen(filename);
1667
1668        fullname = (char *) uprv_malloc(filenameLength + 1);
1669        uprv_strcpy(fullname, filename);
1670#endif
1671
1672        file = T_FileStream_open(filename, "rb");
1673    }
1674    else
1675    {
1676
1677        int32_t  count     = (int32_t)uprv_strlen(filename);
1678
1679        if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1680        {
1681            fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1682
1683            /* test for NULL */
1684            if(fullname == NULL)
1685            {
1686                *status = U_MEMORY_ALLOCATION_ERROR;
1687                return NULL;
1688            }
1689
1690            uprv_strcpy(fullname, state->inputdir);
1691
1692            fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1693            fullname[state->inputdirLength + 1] = '\0';
1694
1695            uprv_strcat(fullname, filename);
1696        }
1697        else
1698        {
1699            fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1700
1701            /* test for NULL */
1702            if(fullname == NULL)
1703            {
1704                *status = U_MEMORY_ALLOCATION_ERROR;
1705                return NULL;
1706            }
1707
1708            uprv_strcpy(fullname, state->inputdir);
1709            uprv_strcat(fullname, filename);
1710        }
1711
1712        file = T_FileStream_open(fullname, "rb");
1713
1714    }
1715
1716    if (file == NULL)
1717    {
1718        error(line, "couldn't open input file %s", filename);
1719        *status = U_FILE_ACCESS_ERROR;
1720        return NULL;
1721    }
1722
1723    len  = T_FileStream_size(file);
1724    data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1725    /* test for NULL */
1726    if(data == NULL)
1727    {
1728        *status = U_MEMORY_ALLOCATION_ERROR;
1729        T_FileStream_close (file);
1730        return NULL;
1731    }
1732
1733    /* int32_t numRead = */ T_FileStream_read  (file, data, len);
1734    T_FileStream_close (file);
1735
1736    result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1737
1738    uprv_free(data);
1739    uprv_free(filename);
1740    uprv_free(fullname);
1741
1742    return result;
1743}
1744
1745static struct SResource *
1746parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1747{
1748    struct SResource *result;
1749    int32_t           len=0;
1750    char             *filename;
1751    uint32_t          line;
1752    UChar *pTarget     = NULL;
1753
1754    UCHARBUF *ucbuf;
1755    char     *fullname = NULL;
1756    int32_t  count     = 0;
1757    const char* cp = NULL;
1758    const UChar* uBuffer = NULL;
1759
1760    filename = getInvariantString(state, &line, NULL, status);
1761    count     = (int32_t)uprv_strlen(filename);
1762
1763    if (U_FAILURE(*status))
1764    {
1765        return NULL;
1766    }
1767
1768    expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1769
1770    if (U_FAILURE(*status))
1771    {
1772        uprv_free(filename);
1773        return NULL;
1774    }
1775
1776    if(isVerbose()){
1777        printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1778    }
1779
1780    fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1781    /* test for NULL */
1782    if(fullname == NULL)
1783    {
1784        *status = U_MEMORY_ALLOCATION_ERROR;
1785        uprv_free(filename);
1786        return NULL;
1787    }
1788
1789    if(state->inputdir!=NULL){
1790        if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1791        {
1792
1793            uprv_strcpy(fullname, state->inputdir);
1794
1795            fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1796            fullname[state->inputdirLength + 1] = '\0';
1797
1798            uprv_strcat(fullname, filename);
1799        }
1800        else
1801        {
1802            uprv_strcpy(fullname, state->inputdir);
1803            uprv_strcat(fullname, filename);
1804        }
1805    }else{
1806        uprv_strcpy(fullname,filename);
1807    }
1808
1809    ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1810
1811    if (U_FAILURE(*status)) {
1812        error(line, "couldn't open input file %s\n", filename);
1813        return NULL;
1814    }
1815
1816    uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1817    result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1818
1819    ucbuf_close(ucbuf);
1820
1821    uprv_free(pTarget);
1822
1823    uprv_free(filename);
1824    uprv_free(fullname);
1825
1826    return result;
1827}
1828
1829
1830
1831
1832
1833U_STRING_DECL(k_type_string,    "string",    6);
1834U_STRING_DECL(k_type_binary,    "binary",    6);
1835U_STRING_DECL(k_type_bin,       "bin",       3);
1836U_STRING_DECL(k_type_table,     "table",     5);
1837U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1838U_STRING_DECL(k_type_int,       "int",       3);
1839U_STRING_DECL(k_type_integer,   "integer",   7);
1840U_STRING_DECL(k_type_array,     "array",     5);
1841U_STRING_DECL(k_type_alias,     "alias",     5);
1842U_STRING_DECL(k_type_intvector, "intvector", 9);
1843U_STRING_DECL(k_type_import,    "import",    6);
1844U_STRING_DECL(k_type_include,   "include",   7);
1845
1846/* Various non-standard processing plugins that create one or more special resources. */
1847U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1848U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1849U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1850U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1851
1852typedef enum EResourceType
1853{
1854    RESTYPE_UNKNOWN,
1855    RESTYPE_STRING,
1856    RESTYPE_BINARY,
1857    RESTYPE_TABLE,
1858    RESTYPE_TABLE_NO_FALLBACK,
1859    RESTYPE_INTEGER,
1860    RESTYPE_ARRAY,
1861    RESTYPE_ALIAS,
1862    RESTYPE_INTVECTOR,
1863    RESTYPE_IMPORT,
1864    RESTYPE_INCLUDE,
1865    RESTYPE_PROCESS_UCA_RULES,
1866    RESTYPE_PROCESS_COLLATION,
1867    RESTYPE_PROCESS_TRANSLITERATOR,
1868    RESTYPE_PROCESS_DEPENDENCY,
1869    RESTYPE_RESERVED
1870} EResourceType;
1871
1872static struct {
1873    const char *nameChars;   /* only used for debugging */
1874    const UChar *nameUChars;
1875    ParseResourceFunction *parseFunction;
1876} gResourceTypes[] = {
1877    {"Unknown", NULL, NULL},
1878    {"string", k_type_string, parseString},
1879    {"binary", k_type_binary, parseBinary},
1880    {"table", k_type_table, parseTable},
1881    {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1882    {"integer", k_type_integer, parseInteger},
1883    {"array", k_type_array, parseArray},
1884    {"alias", k_type_alias, parseAlias},
1885    {"intvector", k_type_intvector, parseIntVector},
1886    {"import", k_type_import, parseImport},
1887    {"include", k_type_include, parseInclude},
1888    {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1889    {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1890    {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1891    {"process(dependency)", k_type_plugin_dependency, parseDependency},
1892    {"reserved", NULL, NULL}
1893};
1894
1895void initParser()
1896{
1897    U_STRING_INIT(k_type_string,    "string",    6);
1898    U_STRING_INIT(k_type_binary,    "binary",    6);
1899    U_STRING_INIT(k_type_bin,       "bin",       3);
1900    U_STRING_INIT(k_type_table,     "table",     5);
1901    U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1902    U_STRING_INIT(k_type_int,       "int",       3);
1903    U_STRING_INIT(k_type_integer,   "integer",   7);
1904    U_STRING_INIT(k_type_array,     "array",     5);
1905    U_STRING_INIT(k_type_alias,     "alias",     5);
1906    U_STRING_INIT(k_type_intvector, "intvector", 9);
1907    U_STRING_INIT(k_type_import,    "import",    6);
1908    U_STRING_INIT(k_type_include,   "include",   7);
1909
1910    U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1911    U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1912    U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1913    U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1914}
1915
1916static inline UBool isTable(enum EResourceType type) {
1917    return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1918}
1919
1920static enum EResourceType
1921parseResourceType(ParseState* state, UErrorCode *status)
1922{
1923    struct UString        *tokenValue;
1924    struct UString        comment;
1925    enum   EResourceType  result = RESTYPE_UNKNOWN;
1926    uint32_t              line=0;
1927    ustr_init(&comment);
1928    expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1929
1930    if (U_FAILURE(*status))
1931    {
1932        return RESTYPE_UNKNOWN;
1933    }
1934
1935    *status = U_ZERO_ERROR;
1936
1937    /* Search for normal types */
1938    result=RESTYPE_UNKNOWN;
1939    while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1940        if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1941            break;
1942        }
1943    }
1944    /* Now search for the aliases */
1945    if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1946        result = RESTYPE_INTEGER;
1947    }
1948    else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1949        result = RESTYPE_BINARY;
1950    }
1951    else if (result == RESTYPE_RESERVED) {
1952        char tokenBuffer[1024];
1953        u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1954        tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1955        *status = U_INVALID_FORMAT_ERROR;
1956        error(line, "unknown resource type '%s'", tokenBuffer);
1957    }
1958
1959    return result;
1960}
1961
1962/* parse a non-top-level resource */
1963static struct SResource *
1964parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1965{
1966    enum   ETokenType      token;
1967    enum   EResourceType  resType = RESTYPE_UNKNOWN;
1968    ParseResourceFunction *parseFunction = NULL;
1969    struct UString        *tokenValue;
1970    uint32_t                 startline;
1971    uint32_t                 line;
1972
1973
1974    token = getToken(state, &tokenValue, NULL, &startline, status);
1975
1976    if(isVerbose()){
1977        printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1978    }
1979
1980    /* name . [ ':' type ] '{' resource '}' */
1981    /* This function parses from the colon onwards.  If the colon is present, parse the
1982    type then try to parse a resource of that type.  If there is no explicit type,
1983    work it out using the lookahead tokens. */
1984    switch (token)
1985    {
1986    case TOK_EOF:
1987        *status = U_INVALID_FORMAT_ERROR;
1988        error(startline, "Unexpected EOF encountered");
1989        return NULL;
1990
1991    case TOK_ERROR:
1992        *status = U_INVALID_FORMAT_ERROR;
1993        return NULL;
1994
1995    case TOK_COLON:
1996        resType = parseResourceType(state, status);
1997        expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1998
1999        if (U_FAILURE(*status))
2000        {
2001            return NULL;
2002        }
2003
2004        break;
2005
2006    case TOK_OPEN_BRACE:
2007        break;
2008
2009    default:
2010        *status = U_INVALID_FORMAT_ERROR;
2011        error(startline, "syntax error while reading a resource, expected '{' or ':'");
2012        return NULL;
2013    }
2014
2015
2016    if (resType == RESTYPE_UNKNOWN)
2017    {
2018        /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
2019        We could have any of the following:
2020        { {         => array (nested)
2021        { :/}       => array
2022        { string ,  => string array
2023
2024        { string {  => table
2025
2026        { string :/{    => table
2027        { string }      => string
2028        */
2029
2030        token = peekToken(state, 0, NULL, &line, NULL,status);
2031
2032        if (U_FAILURE(*status))
2033        {
2034            return NULL;
2035        }
2036
2037        if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
2038        {
2039            resType = RESTYPE_ARRAY;
2040        }
2041        else if (token == TOK_STRING)
2042        {
2043            token = peekToken(state, 1, NULL, &line, NULL, status);
2044
2045            if (U_FAILURE(*status))
2046            {
2047                return NULL;
2048            }
2049
2050            switch (token)
2051            {
2052            case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
2053            case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
2054            case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
2055            case TOK_COLON:         resType = RESTYPE_TABLE;  break;
2056            default:
2057                *status = U_INVALID_FORMAT_ERROR;
2058                error(line, "Unexpected token after string, expected ',', '{' or '}'");
2059                return NULL;
2060            }
2061        }
2062        else
2063        {
2064            *status = U_INVALID_FORMAT_ERROR;
2065            error(line, "Unexpected token after '{'");
2066            return NULL;
2067        }
2068
2069        /* printf("Type guessed as %s\n", resourceNames[resType]); */
2070    } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
2071        *status = U_INVALID_FORMAT_ERROR;
2072        error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2073        return NULL;
2074    }
2075
2076
2077    /* We should now know what we need to parse next, so call the appropriate parser
2078    function and return. */
2079    parseFunction = gResourceTypes[resType].parseFunction;
2080    if (parseFunction != NULL) {
2081        return parseFunction(state, tag, startline, comment, status);
2082    }
2083    else {
2084        *status = U_INTERNAL_PROGRAM_ERROR;
2085        error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2086    }
2087
2088    return NULL;
2089}
2090
2091/* parse the top-level resource */
2092struct SRBRoot *
2093parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
2094      UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2095{
2096    struct UString    *tokenValue;
2097    struct UString    comment;
2098    uint32_t           line;
2099    enum EResourceType bundleType;
2100    enum ETokenType    token;
2101    ParseState state;
2102    uint32_t i;
2103
2104
2105    for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2106    {
2107        ustr_init(&state.lookahead[i].value);
2108        ustr_init(&state.lookahead[i].comment);
2109    }
2110
2111    initLookahead(&state, buf, status);
2112
2113    state.inputdir       = inputDir;
2114    state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2115    state.outputdir       = outputDir;
2116    state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2117    state.filename = filename;
2118    state.makeBinaryCollation = makeBinaryCollation;
2119    state.omitCollationRules = omitCollationRules;
2120
2121    ustr_init(&comment);
2122    expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2123
2124    state.bundle = bundle_open(&comment, FALSE, status);
2125
2126    if (state.bundle == NULL || U_FAILURE(*status))
2127    {
2128        return NULL;
2129    }
2130
2131
2132    bundle_setlocale(state.bundle, tokenValue->fChars, status);
2133
2134    /* The following code is to make Empty bundle work no matter with :table specifer or not */
2135    token = getToken(&state, NULL, NULL, &line, status);
2136    if(token==TOK_COLON) {
2137        *status=U_ZERO_ERROR;
2138        bundleType=parseResourceType(&state, status);
2139
2140        if(isTable(bundleType))
2141        {
2142            expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2143        }
2144        else
2145        {
2146            *status=U_PARSE_ERROR;
2147             error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2148        }
2149    }
2150    else
2151    {
2152        /* not a colon */
2153        if(token==TOK_OPEN_BRACE)
2154        {
2155            *status=U_ZERO_ERROR;
2156            bundleType=RESTYPE_TABLE;
2157        }
2158        else
2159        {
2160            /* neither colon nor open brace */
2161            *status=U_PARSE_ERROR;
2162            bundleType=RESTYPE_UNKNOWN;
2163            error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2164        }
2165    }
2166
2167    if (U_FAILURE(*status))
2168    {
2169        bundle_close(state.bundle, status);
2170        return NULL;
2171    }
2172
2173    if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2174        /*
2175         * Parse a top-level table with the table(nofallback) declaration.
2176         * This is the same as a regular table, but also sets the
2177         * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2178         */
2179        state.bundle->noFallback=TRUE;
2180    }
2181    /* top-level tables need not handle special table names like "collations" */
2182    realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2183    if(dependencyArray!=NULL){
2184        table_add(state.bundle->fRoot, dependencyArray, 0, status);
2185        dependencyArray = NULL;
2186    }
2187   if (U_FAILURE(*status))
2188    {
2189        bundle_close(state.bundle, status);
2190        res_close(dependencyArray);
2191        return NULL;
2192    }
2193
2194    if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2195    {
2196        warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2197        if(isStrict()){
2198            *status = U_INVALID_FORMAT_ERROR;
2199            return NULL;
2200        }
2201    }
2202
2203    cleanupLookahead(&state);
2204    ustr_deinit(&comment);
2205    return state.bundle;
2206}
2207