1/******************************************************************************
2 *   Copyright (C) 2008-2012, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#include <stdio.h>
9#include <stdlib.h>
10#include "unicode/utypes.h"
11#include "unicode/putil.h"
12#include "cmemory.h"
13#include "cstring.h"
14#include "filestrm.h"
15#include "toolutil.h"
16#include "unicode/uclean.h"
17#include "unewdata.h"
18#include "putilimp.h"
19#include "pkg_gencmn.h"
20
21#define STRING_STORE_SIZE 200000
22
23#define COMMON_DATA_NAME U_ICUDATA_NAME
24#define DATA_TYPE "dat"
25
26/* ICU package data file format (.dat files) ------------------------------- ***
27
28Description of the data format after the usual ICU data file header
29(UDataInfo etc.).
30
31Format version 1
32
33A .dat package file contains a simple Table of Contents of item names,
34followed by the items themselves:
35
361. ToC table
37
38uint32_t count; - number of items
39UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40    uint32_t nameOffset; - offset of the item name
41    uint32_t dataOffset; - offset of the item data
42both are byte offsets from the beginning of the data
43
442. item name strings
45
46All item names are stored as char * strings in one block between the ToC table
47and the data items.
48
493. data items
50
51The data items are stored following the item names block.
52Each data item is 16-aligned.
53The data items are stored in the sorted order of their names.
54
55Therefore, the top of the name strings block is the offset of the first item,
56the length of the last item is the difference between its offset and
57the .dat file length, and the length of all previous items is the difference
58between its offset and the next one.
59
60----------------------------------------------------------------------------- */
61
62/* UDataInfo cf. udata.h */
63static const UDataInfo dataInfo={
64    sizeof(UDataInfo),
65    0,
66
67    U_IS_BIG_ENDIAN,
68    U_CHARSET_FAMILY,
69    sizeof(UChar),
70    0,
71
72    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
73    {1, 0, 0, 0},                 /* formatVersion */
74    {3, 0, 0, 0}                  /* dataVersion */
75};
76
77static uint32_t maxSize;
78
79static char stringStore[STRING_STORE_SIZE];
80static uint32_t stringTop=0, basenameTotal=0;
81
82typedef struct {
83    char *pathname, *basename;
84    uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
85} File;
86
87#define CHUNK_FILE_COUNT 256
88static File *files = NULL;
89static uint32_t fileCount=0;
90static uint32_t fileMax = 0;
91
92
93static char *symPrefix = NULL;
94
95#define LINE_BUFFER_SIZE 512
96/* prototypes --------------------------------------------------------------- */
97
98static void
99addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
100
101static char *
102allocString(uint32_t length);
103
104static int
105compareFiles(const void *file1, const void *file2);
106
107static char *
108pathToFullPath(const char *path, const char *source);
109
110/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
111static void
112fixDirToTreePath(char *s);
113/* -------------------------------------------------------------------------- */
114
115U_CAPI void U_EXPORT2
116createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
117                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
118    static char buffer[4096];
119    char *line;
120    char *linePtr;
121    char *s = NULL;
122    UErrorCode errorCode=U_ZERO_ERROR;
123    uint32_t i, fileOffset, basenameOffset, length, nread;
124    FileStream *in, *file;
125
126    line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
127    if (line == NULL) {
128        fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
129        exit(U_MEMORY_ALLOCATION_ERROR);
130    }
131
132    linePtr = line;
133
134    maxSize = max_size;
135
136    if (destDir == NULL) {
137        destDir = u_getDataDirectory();
138    }
139    if (name == NULL) {
140        name = COMMON_DATA_NAME;
141    }
142    if (type == NULL) {
143        type = DATA_TYPE;
144    }
145    if (source == NULL) {
146        source = ".";
147    }
148
149    if (dataFile == NULL) {
150        in = T_FileStream_stdin();
151    } else {
152        in = T_FileStream_open(dataFile, "r");
153        if(in == NULL) {
154            fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
155            exit(U_FILE_ACCESS_ERROR);
156        }
157    }
158
159    if (verbose) {
160        if(sourceTOC) {
161            printf("generating %s_%s.c (table of contents source file)\n", name, type);
162        } else {
163            printf("generating %s.%s (common data file with table of contents)\n", name, type);
164        }
165    }
166
167    /* read the list of files and get their lengths */
168    while((s != NULL && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
169                                                             LINE_BUFFER_SIZE))!=NULL) {
170        /* remove trailing newline characters and parse space separated items */
171        if (s != NULL && *s != 0) {
172            line=s;
173        } else {
174            s=line;
175        }
176        while(*s!=0) {
177            if(*s==' ') {
178                *s=0;
179                ++s;
180                break;
181            } else if(*s=='\r' || *s=='\n') {
182                *s=0;
183                break;
184            }
185            ++s;
186        }
187
188        /* check for comment */
189
190        if (*line == '#') {
191            continue;
192        }
193
194        /* add the file */
195#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
196        {
197          char *t;
198          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
199            *t = U_FILE_SEP_CHAR;
200          }
201        }
202#endif
203        addFile(getLongPathname(line), name, source, sourceTOC, verbose);
204    }
205
206    uprv_free(linePtr);
207
208    if(in!=T_FileStream_stdin()) {
209        T_FileStream_close(in);
210    }
211
212    if(fileCount==0) {
213        fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
214        return;
215    }
216
217    /* sort the files by basename */
218    qsort(files, fileCount, sizeof(File), compareFiles);
219
220    if(!sourceTOC) {
221        UNewDataMemory *out;
222
223        /* determine the offsets of all basenames and files in this common one */
224        basenameOffset=4+8*fileCount;
225        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
226        for(i=0; i<fileCount; ++i) {
227            files[i].fileOffset=fileOffset;
228            fileOffset+=(files[i].fileSize+15)&~0xf;
229            files[i].basenameOffset=basenameOffset;
230            basenameOffset+=files[i].basenameLength;
231        }
232
233        /* create the output file */
234        out=udata_create(destDir, type, name,
235                         &dataInfo,
236                         copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
237                         &errorCode);
238        if(U_FAILURE(errorCode)) {
239            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
240                destDir, name, type,
241                u_errorName(errorCode));
242            exit(errorCode);
243        }
244
245        /* write the table of contents */
246        udata_write32(out, fileCount);
247        for(i=0; i<fileCount; ++i) {
248            udata_write32(out, files[i].basenameOffset);
249            udata_write32(out, files[i].fileOffset);
250        }
251
252        /* write the basenames */
253        for(i=0; i<fileCount; ++i) {
254            udata_writeString(out, files[i].basename, files[i].basenameLength);
255        }
256        length=4+8*fileCount+basenameTotal;
257
258        /* copy the files */
259        for(i=0; i<fileCount; ++i) {
260            /* pad to 16-align the next file */
261            length&=0xf;
262            if(length!=0) {
263                udata_writePadding(out, 16-length);
264            }
265
266            if (verbose) {
267                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
268            }
269
270            /* copy the next file */
271            file=T_FileStream_open(files[i].pathname, "rb");
272            if(file==NULL) {
273                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
274                exit(U_FILE_ACCESS_ERROR);
275            }
276            for(nread = 0;;) {
277                length=T_FileStream_read(file, buffer, sizeof(buffer));
278                if(length <= 0) {
279                    break;
280                }
281                nread += length;
282                udata_writeBlock(out, buffer, length);
283            }
284            T_FileStream_close(file);
285            length=files[i].fileSize;
286
287            if (nread != files[i].fileSize) {
288              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
289                exit(U_FILE_ACCESS_ERROR);
290            }
291        }
292
293        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
294        length&=0xf;
295        if(length!=0) {
296            udata_writePadding(out, 16-length);
297        }
298
299        /* finish */
300        udata_finish(out, &errorCode);
301        if(U_FAILURE(errorCode)) {
302            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
303            exit(errorCode);
304        }
305    } else {
306        /* write a .c source file with the table of contents */
307        char *filename;
308        FileStream *out;
309
310        /* create the output filename */
311        filename=s=buffer;
312        uprv_strcpy(filename, destDir);
313        s=filename+uprv_strlen(filename);
314        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
315            *s++=U_FILE_SEP_CHAR;
316        }
317        uprv_strcpy(s, name);
318        if(*(type)!=0) {
319            s+=uprv_strlen(s);
320            *s++='_';
321            uprv_strcpy(s, type);
322        }
323        s+=uprv_strlen(s);
324        uprv_strcpy(s, ".c");
325
326        /* open the output file */
327        out=T_FileStream_open(filename, "w");
328        if (gencmnFileName != NULL) {
329            uprv_strcpy(gencmnFileName, filename);
330        }
331        if(out==NULL) {
332            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
333            exit(U_FILE_ACCESS_ERROR);
334        }
335
336        /* write the source file */
337        sprintf(buffer,
338            "/*\n"
339            " * ICU common data table of contents for %s.%s\n"
340            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
341            " */\n\n"
342            "#include \"unicode/utypes.h\"\n"
343            "#include \"unicode/udata.h\"\n"
344            "\n"
345            "/* external symbol declarations for data (%d files) */\n",
346                name, type, fileCount);
347        T_FileStream_writeLine(out, buffer);
348
349        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
350        T_FileStream_writeLine(out, buffer);
351        for(i=1; i<fileCount; ++i) {
352            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
353            T_FileStream_writeLine(out, buffer);
354        }
355        T_FileStream_writeLine(out, ";\n\n");
356
357        sprintf(
358            buffer,
359            "U_EXPORT struct {\n"
360            "    uint16_t headerSize;\n"
361            "    uint8_t magic1, magic2;\n"
362            "    UDataInfo info;\n"
363            "    char padding[%lu];\n"
364            "    uint32_t count, reserved;\n"
365            "    struct {\n"
366            "        const char *name;\n"
367            "        const void *data;\n"
368            "    } toc[%lu];\n"
369            "} U_EXPORT2 %s_dat = {\n"
370            "    32, 0xda, 0x27, {\n"
371            "        %lu, 0,\n"
372            "        %u, %u, %u, 0,\n"
373            "        {0x54, 0x6f, 0x43, 0x50},\n"
374            "        {1, 0, 0, 0},\n"
375            "        {0, 0, 0, 0}\n"
376            "    },\n"
377            "    \"\", %lu, 0, {\n",
378            (unsigned long)32-4-sizeof(UDataInfo),
379            (unsigned long)fileCount,
380            entrypointName,
381            (unsigned long)sizeof(UDataInfo),
382            U_IS_BIG_ENDIAN,
383            U_CHARSET_FAMILY,
384            U_SIZEOF_UCHAR,
385            (unsigned long)fileCount
386        );
387        T_FileStream_writeLine(out, buffer);
388
389        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
390        T_FileStream_writeLine(out, buffer);
391        for(i=1; i<fileCount; ++i) {
392            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
393            T_FileStream_writeLine(out, buffer);
394        }
395
396        T_FileStream_writeLine(out, "\n    }\n};\n");
397        T_FileStream_close(out);
398
399        uprv_free(symPrefix);
400    }
401}
402
403static void
404addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
405    char *s;
406    uint32_t length;
407    char *fullPath = NULL;
408
409    if(fileCount==fileMax) {
410      fileMax += CHUNK_FILE_COUNT;
411      files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
412      if(files==NULL) {
413        fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
414        exit(U_MEMORY_ALLOCATION_ERROR);
415      }
416    }
417
418    if(!sourceTOC) {
419        FileStream *file;
420
421        if(uprv_pathIsAbsolute(filename)) {
422            fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
423            exit(U_ILLEGAL_ARGUMENT_ERROR);
424        }
425        fullPath = pathToFullPath(filename, source);
426        /* store the pathname */
427        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
428        s=allocString(length);
429        uprv_strcpy(s, name);
430        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
431        uprv_strcat(s, filename);
432
433        /* get the basename */
434        fixDirToTreePath(s);
435        files[fileCount].basename=s;
436        files[fileCount].basenameLength=length;
437
438        files[fileCount].pathname=fullPath;
439
440        basenameTotal+=length;
441
442        /* try to open the file */
443        file=T_FileStream_open(fullPath, "rb");
444        if(file==NULL) {
445            fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
446            exit(U_FILE_ACCESS_ERROR);
447        }
448
449        /* get the file length */
450        length=T_FileStream_size(file);
451        if(T_FileStream_error(file) || length<=20) {
452            fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
453            exit(U_FILE_ACCESS_ERROR);
454        }
455
456        T_FileStream_close(file);
457
458        /* do not add files that are longer than maxSize */
459        if(maxSize && length>maxSize) {
460            if (verbose) {
461                printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
462            }
463            return;
464        }
465        files[fileCount].fileSize=length;
466    } else {
467        char *t;
468        /* get and store the basename */
469        /* need to include the package name */
470        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
471        s=allocString(length);
472        uprv_strcpy(s, name);
473        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
474        uprv_strcat(s, filename);
475        fixDirToTreePath(s);
476        files[fileCount].basename=s;
477        /* turn the basename into an entry point name and store in the pathname field */
478        t=files[fileCount].pathname=allocString(length);
479        while(--length>0) {
480            if(*s=='.' || *s=='-' || *s=='/') {
481                *t='_';
482            } else {
483                *t=*s;
484            }
485            ++s;
486            ++t;
487        }
488        *t=0;
489    }
490    ++fileCount;
491}
492
493static char *
494allocString(uint32_t length) {
495    uint32_t top=stringTop+length;
496    char *p;
497
498    if(top>STRING_STORE_SIZE) {
499        fprintf(stderr, "gencmn: out of memory\n");
500        exit(U_MEMORY_ALLOCATION_ERROR);
501    }
502    p=stringStore+stringTop;
503    stringTop=top;
504    return p;
505}
506
507static char *
508pathToFullPath(const char *path, const char *source) {
509    int32_t length;
510    int32_t newLength;
511    char *fullPath;
512    int32_t n;
513
514    length = (uint32_t)(uprv_strlen(path) + 1);
515    newLength = (length + 1 + (int32_t)uprv_strlen(source));
516    fullPath = uprv_malloc(newLength);
517    if(source != NULL) {
518        uprv_strcpy(fullPath, source);
519        uprv_strcat(fullPath, U_FILE_SEP_STRING);
520    } else {
521        fullPath[0] = 0;
522    }
523    n = (int32_t)uprv_strlen(fullPath);
524    fullPath[n] = 0;       /* Suppress compiler warning for unused variable n    */
525                           /*  when conditional code below is not compiled.      */
526    uprv_strcat(fullPath, path);
527
528#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
529#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
530    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
531    for(;fullPath[n];n++) {
532        if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
533            fullPath[n] = U_FILE_SEP_CHAR;
534        }
535    }
536#endif
537#endif
538#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
539    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
540    for(;fullPath[n];n++) {
541        if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
542            fullPath[n] = U_FILE_SEP_CHAR;
543        }
544    }
545#endif
546    return fullPath;
547}
548
549static int
550compareFiles(const void *file1, const void *file2) {
551    /* sort by basename */
552    return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
553}
554
555static void
556fixDirToTreePath(char *s)
557{
558#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
559    char *t;
560#endif
561#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
562    for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
563        *t = U_TREE_ENTRY_SEP_CHAR;
564    }
565#endif
566#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
567    for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
568        *t = U_TREE_ENTRY_SEP_CHAR;
569    }
570#endif
571}
572