1/******************************************************************************
2 *   Copyright (C) 2009-2012, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#if U_PLATFORM_HAS_WIN32_API
9#   define VC_EXTRALEAN
10#   define WIN32_LEAN_AND_MEAN
11#   define NOUSER
12#   define NOSERVICE
13#   define NOIME
14#   define NOMCX
15#include <windows.h>
16#include <time.h>
17#   ifdef __GNUC__
18#       define WINDOWS_WITH_GNUC
19#   endif
20#endif
21
22#if U_PLATFORM_IS_LINUX_BASED
23#   define U_ELF
24#endif
25
26#ifdef U_ELF
27#   include <elf.h>
28#   if defined(ELFCLASS64)
29#       define U_ELF64
30#   endif
31    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32#   ifndef EM_X86_64
33#       define EM_X86_64 62
34#   endif
35#   define ICU_ENTRY_OFFSET 0
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include "unicode/putil.h"
41#include "cmemory.h"
42#include "cstring.h"
43#include "filestrm.h"
44#include "toolutil.h"
45#include "unicode/uclean.h"
46#include "uoptions.h"
47#include "pkg_genc.h"
48
49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51#define HEX_0X 0 /*  0x1234 */
52#define HEX_0H 1 /*  01234h */
53
54/* prototypes --------------------------------------------------------------- */
55static void
56getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58static uint32_t
59write8(FileStream *out, uint8_t byte, uint32_t column);
60
61static uint32_t
62write32(FileStream *out, uint32_t byte, uint32_t column);
63
64#if U_PLATFORM == U_PF_OS400
65static uint32_t
66write8str(FileStream *out, uint8_t byte, uint32_t column);
67#endif
68/* -------------------------------------------------------------------------- */
69
70/*
71Creating Template Files for New Platforms
72
73Let the cc compiler help you get started.
74Compile this program
75    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76with the -S option to produce assembly output.
77
78For example, this will generate array.s:
79gcc -S array.c
80
81This will produce a .s file that may look like this:
82
83    .file   "array.c"
84    .version        "01.01"
85gcc2_compiled.:
86    .globl x
87    .section        .rodata
88    .align 4
89    .type    x,@object
90    .size    x,20
91x:
92    .long   1
93    .long   2
94    .long   -559038737
95    .long   -1
96    .long   16
97    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99which gives a starting point that will compile, and can be transformed
100to become the template, generally with some consulting of as docs and
101some experimentation.
102
103If you want ICU to automatically use this assembly, you should
104specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105where the name is the compiler or platform that you used in this
106assemblyHeader data structure.
107*/
108static const struct AssemblyType {
109    const char *name;
110    const char *header;
111    const char *beginLine;
112    const char *footer;
113    int8_t      hexType; /* HEX_0X or HEX_0h */
114} assemblyHeader[] = {
115    {"gcc",
116        ".globl %s\n"
117        "\t.section .note.GNU-stack,\"\",%%progbits\n"
118        "\t.section .rodata\n"
119        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
120        "\t.type %s,%%object\n"
121        "%s:\n\n",
122
123        ".long ","",HEX_0X
124    },
125    {"gcc-darwin",
126        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
127        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
128        ".globl _%s\n"
129        "\t.data\n"
130        "\t.const\n"
131        "\t.align 4\n"  /* 1<<4 = 16 */
132        "_%s:\n\n",
133
134        ".long ","",HEX_0X
135    },
136    {"gcc-cygwin",
137        ".globl _%s\n"
138        "\t.section .rodata\n"
139        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
140        "_%s:\n\n",
141
142        ".long ","",HEX_0X
143    },
144    {"gcc-mingw64",
145        ".globl %s\n"
146        "\t.section .rodata\n"
147        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
148        "%s:\n\n",
149
150        ".long ","",HEX_0X
151    },
152    {"sun",
153        "\t.section \".rodata\"\n"
154        "\t.align   8\n"
155        ".globl     %s\n"
156        "%s:\n",
157
158        ".word ","",HEX_0X
159    },
160    {"sun-x86",
161        "Drodata.rodata:\n"
162        "\t.type   Drodata.rodata,@object\n"
163        "\t.size   Drodata.rodata,0\n"
164        "\t.globl  %s\n"
165        "\t.align  8\n"
166        "%s:\n",
167
168        ".4byte ","",HEX_0X
169    },
170    {"xlc",
171        ".globl %s{RO}\n"
172        "\t.toc\n"
173        "%s:\n"
174        "\t.csect %s{RO}, 4\n",
175
176        ".long ","",HEX_0X
177    },
178    {"aCC-ia64",
179        "\t.file   \"%s.s\"\n"
180        "\t.type   %s,@object\n"
181        "\t.global %s\n"
182        "\t.secalias .abe$0.rodata, \".rodata\"\n"
183        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
184        "\t.align  16\n"
185        "%s::\t",
186
187        "data4 ","",HEX_0X
188    },
189    {"aCC-parisc",
190        "\t.SPACE  $TEXT$\n"
191        "\t.SUBSPA $LIT$\n"
192        "%s\n"
193        "\t.EXPORT %s\n"
194        "\t.ALIGN  16\n",
195
196        ".WORD ","",HEX_0X
197    },
198    { "masm",
199      "\tTITLE %s\n"
200      "; generated by genccode\n"
201      ".386\n"
202      ".model flat\n"
203      "\tPUBLIC _%s\n"
204      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
205      "\tALIGN 16\n"
206      "_%s\tLABEL DWORD\n",
207      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
208    }
209};
210
211static int32_t assemblyHeaderIndex = -1;
212static int32_t hexType = HEX_0X;
213
214U_CAPI UBool U_EXPORT2
215checkAssemblyHeaderName(const char* optAssembly) {
216    int32_t idx;
217    assemblyHeaderIndex = -1;
218    for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
219        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
220            assemblyHeaderIndex = idx;
221            hexType = assemblyHeader[idx].hexType; /* set the hex type */
222            return TRUE;
223        }
224    }
225
226    return FALSE;
227}
228
229
230U_CAPI void U_EXPORT2
231printAssemblyHeadersToStdErr(void) {
232    int32_t idx;
233    fprintf(stderr, "%s", assemblyHeader[0].name);
234    for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
235        fprintf(stderr, ", %s", assemblyHeader[idx].name);
236    }
237    fprintf(stderr,
238        ")\n");
239}
240
241U_CAPI void U_EXPORT2
242writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
243    uint32_t column = MAX_COLUMN;
244    char entry[64];
245    uint32_t buffer[1024];
246    char *bufferStr = (char *)buffer;
247    FileStream *in, *out;
248    size_t i, length;
249
250    in=T_FileStream_open(filename, "rb");
251    if(in==NULL) {
252        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
253        exit(U_FILE_ACCESS_ERROR);
254    }
255
256    getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
257    out=T_FileStream_open(bufferStr, "w");
258    if(out==NULL) {
259        fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
260        exit(U_FILE_ACCESS_ERROR);
261    }
262
263    if (outFilePath != NULL) {
264        uprv_strcpy(outFilePath, bufferStr);
265    }
266
267#ifdef WINDOWS_WITH_GNUC
268    /* Need to fix the file seperator character when using MinGW. */
269    swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
270#endif
271
272    if(optEntryPoint != NULL) {
273        uprv_strcpy(entry, optEntryPoint);
274        uprv_strcat(entry, "_dat");
275    }
276
277    /* turn dashes or dots in the entry name into underscores */
278    length=uprv_strlen(entry);
279    for(i=0; i<length; ++i) {
280        if(entry[i]=='-' || entry[i]=='.') {
281            entry[i]='_';
282        }
283    }
284
285    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
286        entry, entry, entry, entry,
287        entry, entry, entry, entry);
288    T_FileStream_writeLine(out, bufferStr);
289    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
290
291    for(;;) {
292        length=T_FileStream_read(in, buffer, sizeof(buffer));
293        if(length==0) {
294            break;
295        }
296        if (length != sizeof(buffer)) {
297            /* pad with extra 0's when at the end of the file */
298            for(i=0; i < (length % sizeof(uint32_t)); ++i) {
299                buffer[length+i] = 0;
300            }
301        }
302        for(i=0; i<(length/sizeof(buffer[0])); i++) {
303            column = write32(out, buffer[i], column);
304        }
305    }
306
307    T_FileStream_writeLine(out, "\n");
308
309    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
310        entry, entry, entry, entry,
311        entry, entry, entry, entry);
312    T_FileStream_writeLine(out, bufferStr);
313
314    if(T_FileStream_error(in)) {
315        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
316        exit(U_FILE_ACCESS_ERROR);
317    }
318
319    if(T_FileStream_error(out)) {
320        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
321        exit(U_FILE_ACCESS_ERROR);
322    }
323
324    T_FileStream_close(out);
325    T_FileStream_close(in);
326}
327
328U_CAPI void U_EXPORT2
329writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
330    uint32_t column = MAX_COLUMN;
331    char buffer[4096], entry[64];
332    FileStream *in, *out;
333    size_t i, length;
334
335    in=T_FileStream_open(filename, "rb");
336    if(in==NULL) {
337        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
338        exit(U_FILE_ACCESS_ERROR);
339    }
340
341    if(optName != NULL) { /* prepend  'icudt28_' */
342      strcpy(entry, optName);
343      strcat(entry, "_");
344    } else {
345      entry[0] = 0;
346    }
347
348    getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
349    if (outFilePath != NULL) {
350        uprv_strcpy(outFilePath, buffer);
351    }
352    out=T_FileStream_open(buffer, "w");
353    if(out==NULL) {
354        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
355        exit(U_FILE_ACCESS_ERROR);
356    }
357
358    /* turn dashes or dots in the entry name into underscores */
359    length=uprv_strlen(entry);
360    for(i=0; i<length; ++i) {
361        if(entry[i]=='-' || entry[i]=='.') {
362            entry[i]='_';
363        }
364    }
365
366#if U_PLATFORM == U_PF_OS400
367    /*
368    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
369
370    This is here because this platform can't currently put
371    const data into the read-only pages of an object or
372    shared library (service program). Only strings are allowed in read-only
373    pages, so we use char * strings to store the data.
374
375    In order to prevent the beginning of the data from ever matching the
376    magic numbers we must still use the initial double.
377    [grhoten 4/24/2003]
378    */
379    sprintf(buffer,
380        "#ifndef IN_GENERATED_CCODE\n"
381        "#define IN_GENERATED_CCODE\n"
382        "#define U_DISABLE_RENAMING 1\n"
383        "#include \"unicode/umachine.h\"\n"
384        "#endif\n"
385        "U_CDECL_BEGIN\n"
386        "const struct {\n"
387        "    double bogus;\n"
388        "    const char *bytes; \n"
389        "} %s={ 0.0, \n",
390        entry);
391    T_FileStream_writeLine(out, buffer);
392
393    for(;;) {
394        length=T_FileStream_read(in, buffer, sizeof(buffer));
395        if(length==0) {
396            break;
397        }
398        for(i=0; i<length; ++i) {
399            column = write8str(out, (uint8_t)buffer[i], column);
400        }
401    }
402
403    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
404#else
405    /* Function renaming shouldn't be done in data */
406    sprintf(buffer,
407        "#ifndef IN_GENERATED_CCODE\n"
408        "#define IN_GENERATED_CCODE\n"
409        "#define U_DISABLE_RENAMING 1\n"
410        "#include \"unicode/umachine.h\"\n"
411        "#endif\n"
412        "U_CDECL_BEGIN\n"
413        "const struct {\n"
414        "    double bogus;\n"
415        "    uint8_t bytes[%ld]; \n"
416        "} %s={ 0.0, {\n",
417        (long)T_FileStream_size(in), entry);
418    T_FileStream_writeLine(out, buffer);
419
420    for(;;) {
421        length=T_FileStream_read(in, buffer, sizeof(buffer));
422        if(length==0) {
423            break;
424        }
425        for(i=0; i<length; ++i) {
426            column = write8(out, (uint8_t)buffer[i], column);
427        }
428    }
429
430    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
431#endif
432
433    if(T_FileStream_error(in)) {
434        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
435        exit(U_FILE_ACCESS_ERROR);
436    }
437
438    if(T_FileStream_error(out)) {
439        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
440        exit(U_FILE_ACCESS_ERROR);
441    }
442
443    T_FileStream_close(out);
444    T_FileStream_close(in);
445}
446
447static uint32_t
448write32(FileStream *out, uint32_t bitField, uint32_t column) {
449    int32_t i;
450    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
451    char *s = bitFieldStr;
452    uint8_t *ptrIdx = (uint8_t *)&bitField;
453    static const char hexToStr[16] = {
454        '0','1','2','3',
455        '4','5','6','7',
456        '8','9','A','B',
457        'C','D','E','F'
458    };
459
460    /* write the value, possibly with comma and newline */
461    if(column==MAX_COLUMN) {
462        /* first byte */
463        column=1;
464    } else if(column<32) {
465        *(s++)=',';
466        ++column;
467    } else {
468        *(s++)='\n';
469        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
470        s+=uprv_strlen(s);
471        column=1;
472    }
473
474    if (bitField < 10) {
475        /* It's a small number. Don't waste the space for 0x */
476        *(s++)=hexToStr[bitField];
477    }
478    else {
479        int seenNonZero = 0; /* This is used to remove leading zeros */
480
481        if(hexType==HEX_0X) {
482         *(s++)='0';
483         *(s++)='x';
484        } else if(hexType==HEX_0H) {
485         *(s++)='0';
486        }
487
488        /* This creates a 32-bit field */
489#if U_IS_BIG_ENDIAN
490        for (i = 0; i < sizeof(uint32_t); i++)
491#else
492        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
493#endif
494        {
495            uint8_t value = ptrIdx[i];
496            if (value || seenNonZero) {
497                *(s++)=hexToStr[value>>4];
498                *(s++)=hexToStr[value&0xF];
499                seenNonZero = 1;
500            }
501        }
502        if(hexType==HEX_0H) {
503         *(s++)='h';
504        }
505    }
506
507    *(s++)=0;
508    T_FileStream_writeLine(out, bitFieldStr);
509    return column;
510}
511
512static uint32_t
513write8(FileStream *out, uint8_t byte, uint32_t column) {
514    char s[4];
515    int i=0;
516
517    /* convert the byte value to a string */
518    if(byte>=100) {
519        s[i++]=(char)('0'+byte/100);
520        byte%=100;
521    }
522    if(i>0 || byte>=10) {
523        s[i++]=(char)('0'+byte/10);
524        byte%=10;
525    }
526    s[i++]=(char)('0'+byte);
527    s[i]=0;
528
529    /* write the value, possibly with comma and newline */
530    if(column==MAX_COLUMN) {
531        /* first byte */
532        column=1;
533    } else if(column<16) {
534        T_FileStream_writeLine(out, ",");
535        ++column;
536    } else {
537        T_FileStream_writeLine(out, ",\n");
538        column=1;
539    }
540    T_FileStream_writeLine(out, s);
541    return column;
542}
543
544#if U_PLATFORM == U_PF_OS400
545static uint32_t
546write8str(FileStream *out, uint8_t byte, uint32_t column) {
547    char s[8];
548
549    if (byte > 7)
550        sprintf(s, "\\x%X", byte);
551    else
552        sprintf(s, "\\%X", byte);
553
554    /* write the value, possibly with comma and newline */
555    if(column==MAX_COLUMN) {
556        /* first byte */
557        column=1;
558        T_FileStream_writeLine(out, "\"");
559    } else if(column<24) {
560        ++column;
561    } else {
562        T_FileStream_writeLine(out, "\"\n\"");
563        column=1;
564    }
565    T_FileStream_writeLine(out, s);
566    return column;
567}
568#endif
569
570static void
571getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
572    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
573
574    /* copy path */
575    if(destdir!=NULL && *destdir!=0) {
576        do {
577            *outFilename++=*destdir++;
578        } while(*destdir!=0);
579        if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
580            *outFilename++=U_FILE_SEP_CHAR;
581        }
582        inFilename=basename;
583    } else {
584        while(inFilename<basename) {
585            *outFilename++=*inFilename++;
586        }
587    }
588
589    if(suffix==NULL) {
590        /* the filename does not have a suffix */
591        uprv_strcpy(entryName, inFilename);
592        if(optFilename != NULL) {
593          uprv_strcpy(outFilename, optFilename);
594        } else {
595          uprv_strcpy(outFilename, inFilename);
596        }
597        uprv_strcat(outFilename, newSuffix);
598    } else {
599        char *saveOutFilename = outFilename;
600        /* copy basename */
601        while(inFilename<suffix) {
602            if(*inFilename=='-') {
603                /* iSeries cannot have '-' in the .o objects. */
604                *outFilename++=*entryName++='_';
605                inFilename++;
606            }
607            else {
608                *outFilename++=*entryName++=*inFilename++;
609            }
610        }
611
612        /* replace '.' by '_' */
613        *outFilename++=*entryName++='_';
614        ++inFilename;
615
616        /* copy suffix */
617        while(*inFilename!=0) {
618            *outFilename++=*entryName++=*inFilename++;
619        }
620
621        *entryName=0;
622
623        if(optFilename != NULL) {
624            uprv_strcpy(saveOutFilename, optFilename);
625            uprv_strcat(saveOutFilename, newSuffix);
626        } else {
627            /* add ".c" */
628            uprv_strcpy(outFilename, newSuffix);
629        }
630    }
631}
632
633#ifdef CAN_GENERATE_OBJECTS
634static void
635getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
636    union {
637        char        bytes[2048];
638#ifdef U_ELF
639        Elf32_Ehdr  header32;
640        /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
641#elif U_PLATFORM_HAS_WIN32_API
642        IMAGE_FILE_HEADER header;
643#endif
644    } buffer;
645
646    const char *filename;
647    FileStream *in;
648    int32_t length;
649
650#ifdef U_ELF
651
652#elif U_PLATFORM_HAS_WIN32_API
653    const IMAGE_FILE_HEADER *pHeader;
654#else
655#   error "Unknown platform for CAN_GENERATE_OBJECTS."
656#endif
657
658    if(optMatchArch != NULL) {
659        filename=optMatchArch;
660    } else {
661        /* set defaults */
662#ifdef U_ELF
663        /* set EM_386 because elf.h does not provide better defaults */
664        *pCPU=EM_386;
665        *pBits=32;
666        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
667#elif U_PLATFORM_HAS_WIN32_API
668/* _M_IA64 should be defined in windows.h */
669#   if defined(_M_IA64)
670        *pCPU=IMAGE_FILE_MACHINE_IA64;
671#   elif defined(_M_AMD64)
672        *pCPU=IMAGE_FILE_MACHINE_AMD64;
673#   else
674        *pCPU=IMAGE_FILE_MACHINE_I386;
675#   endif
676        *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
677        *pIsBigEndian=FALSE;
678#else
679#   error "Unknown platform for CAN_GENERATE_OBJECTS."
680#endif
681        return;
682    }
683
684    in=T_FileStream_open(filename, "rb");
685    if(in==NULL) {
686        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
687        exit(U_FILE_ACCESS_ERROR);
688    }
689    length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
690
691#ifdef U_ELF
692    if(length<sizeof(Elf32_Ehdr)) {
693        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
694        exit(U_UNSUPPORTED_ERROR);
695    }
696    if(
697        buffer.header32.e_ident[0]!=ELFMAG0 ||
698        buffer.header32.e_ident[1]!=ELFMAG1 ||
699        buffer.header32.e_ident[2]!=ELFMAG2 ||
700        buffer.header32.e_ident[3]!=ELFMAG3 ||
701        buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
702    ) {
703        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
704        exit(U_UNSUPPORTED_ERROR);
705    }
706
707    *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
708#ifdef U_ELF64
709    if(*pBits!=32 && *pBits!=64) {
710        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
711        exit(U_UNSUPPORTED_ERROR);
712    }
713#else
714    if(*pBits!=32) {
715        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
716        exit(U_UNSUPPORTED_ERROR);
717    }
718#endif
719
720    *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
721    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
722        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
723        exit(U_UNSUPPORTED_ERROR);
724    }
725    /* TODO: Support byte swapping */
726
727    *pCPU=buffer.header32.e_machine;
728#elif U_PLATFORM_HAS_WIN32_API
729    if(length<sizeof(IMAGE_FILE_HEADER)) {
730        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
731        exit(U_UNSUPPORTED_ERROR);
732    }
733    /* TODO: Use buffer.header.  Keep aliasing legal.  */
734    pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
735    *pCPU=pHeader->Machine;
736    /*
737     * The number of bits is implicit with the Machine value.
738     * *pBits is ignored in the calling code, so this need not be precise.
739     */
740    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
741    /* Windows always runs on little-endian CPUs. */
742    *pIsBigEndian=FALSE;
743#else
744#   error "Unknown platform for CAN_GENERATE_OBJECTS."
745#endif
746
747    T_FileStream_close(in);
748}
749
750U_CAPI void U_EXPORT2
751writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
752    /* common variables */
753    char buffer[4096], entry[40]={ 0 };
754    FileStream *in, *out;
755    const char *newSuffix;
756    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
757
758    uint16_t cpu, bits;
759    UBool makeBigEndian;
760
761    /* platform-specific variables and initialization code */
762#ifdef U_ELF
763    /* 32-bit Elf file header */
764    static Elf32_Ehdr header32={
765        {
766            /* e_ident[] */
767            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
768            ELFCLASS32,
769            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
770            EV_CURRENT /* EI_VERSION */
771        },
772        ET_REL,
773        EM_386,
774        EV_CURRENT, /* e_version */
775        0, /* e_entry */
776        0, /* e_phoff */
777        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
778        0, /* e_flags */
779        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
780        0, /* e_phentsize */
781        0, /* e_phnum */
782        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
783        5, /* e_shnum */
784        2 /* e_shstrndx */
785    };
786
787    /* 32-bit Elf section header table */
788    static Elf32_Shdr sectionHeaders32[5]={
789        { /* SHN_UNDEF */
790            0
791        },
792        { /* .symtab */
793            1, /* sh_name */
794            SHT_SYMTAB,
795            0, /* sh_flags */
796            0, /* sh_addr */
797            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
798            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
799            3, /* sh_link=sect hdr index of .strtab */
800            1, /* sh_info=One greater than the symbol table index of the last
801                * local symbol (with STB_LOCAL). */
802            4, /* sh_addralign */
803            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
804        },
805        { /* .shstrtab */
806            9, /* sh_name */
807            SHT_STRTAB,
808            0, /* sh_flags */
809            0, /* sh_addr */
810            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
811            40, /* sh_size */
812            0, /* sh_link */
813            0, /* sh_info */
814            1, /* sh_addralign */
815            0 /* sh_entsize */
816        },
817        { /* .strtab */
818            19, /* sh_name */
819            SHT_STRTAB,
820            0, /* sh_flags */
821            0, /* sh_addr */
822            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
823            (Elf32_Word)sizeof(entry), /* sh_size */
824            0, /* sh_link */
825            0, /* sh_info */
826            1, /* sh_addralign */
827            0 /* sh_entsize */
828        },
829        { /* .rodata */
830            27, /* sh_name */
831            SHT_PROGBITS,
832            SHF_ALLOC, /* sh_flags */
833            0, /* sh_addr */
834            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
835            0, /* sh_size */
836            0, /* sh_link */
837            0, /* sh_info */
838            16, /* sh_addralign */
839            0 /* sh_entsize */
840        }
841    };
842
843    /* symbol table */
844    static Elf32_Sym symbols32[2]={
845        { /* STN_UNDEF */
846            0
847        },
848        { /* data entry point */
849            1, /* st_name */
850            0, /* st_value */
851            0, /* st_size */
852            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
853            0, /* st_other */
854            4 /* st_shndx=index of related section table entry */
855        }
856    };
857
858    /* section header string table, with decimal string offsets */
859    static const char sectionStrings[40]=
860        /*  0 */ "\0"
861        /*  1 */ ".symtab\0"
862        /*  9 */ ".shstrtab\0"
863        /* 19 */ ".strtab\0"
864        /* 27 */ ".rodata\0"
865        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
866        /* 40: padded to multiple of 8 bytes */
867
868    /*
869     * Use entry[] for the string table which will contain only the
870     * entry point name.
871     * entry[0] must be 0 (NUL)
872     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
873     */
874
875    /* 16-align .rodata in the .o file, just in case */
876    static const char padding[16]={ 0 };
877    int32_t paddingSize;
878
879#ifdef U_ELF64
880    /* 64-bit Elf file header */
881    static Elf64_Ehdr header64={
882        {
883            /* e_ident[] */
884            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
885            ELFCLASS64,
886            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
887            EV_CURRENT /* EI_VERSION */
888        },
889        ET_REL,
890        EM_X86_64,
891        EV_CURRENT, /* e_version */
892        0, /* e_entry */
893        0, /* e_phoff */
894        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
895        0, /* e_flags */
896        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
897        0, /* e_phentsize */
898        0, /* e_phnum */
899        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
900        5, /* e_shnum */
901        2 /* e_shstrndx */
902    };
903
904    /* 64-bit Elf section header table */
905    static Elf64_Shdr sectionHeaders64[5]={
906        { /* SHN_UNDEF */
907            0
908        },
909        { /* .symtab */
910            1, /* sh_name */
911            SHT_SYMTAB,
912            0, /* sh_flags */
913            0, /* sh_addr */
914            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
915            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
916            3, /* sh_link=sect hdr index of .strtab */
917            1, /* sh_info=One greater than the symbol table index of the last
918                * local symbol (with STB_LOCAL). */
919            4, /* sh_addralign */
920            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
921        },
922        { /* .shstrtab */
923            9, /* sh_name */
924            SHT_STRTAB,
925            0, /* sh_flags */
926            0, /* sh_addr */
927            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
928            40, /* sh_size */
929            0, /* sh_link */
930            0, /* sh_info */
931            1, /* sh_addralign */
932            0 /* sh_entsize */
933        },
934        { /* .strtab */
935            19, /* sh_name */
936            SHT_STRTAB,
937            0, /* sh_flags */
938            0, /* sh_addr */
939            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
940            (Elf64_Xword)sizeof(entry), /* sh_size */
941            0, /* sh_link */
942            0, /* sh_info */
943            1, /* sh_addralign */
944            0 /* sh_entsize */
945        },
946        { /* .rodata */
947            27, /* sh_name */
948            SHT_PROGBITS,
949            SHF_ALLOC, /* sh_flags */
950            0, /* sh_addr */
951            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
952            0, /* sh_size */
953            0, /* sh_link */
954            0, /* sh_info */
955            16, /* sh_addralign */
956            0 /* sh_entsize */
957        }
958    };
959
960    /*
961     * 64-bit symbol table
962     * careful: different order of items compared with Elf32_sym!
963     */
964    static Elf64_Sym symbols64[2]={
965        { /* STN_UNDEF */
966            0
967        },
968        { /* data entry point */
969            1, /* st_name */
970            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
971            0, /* st_other */
972            4, /* st_shndx=index of related section table entry */
973            0, /* st_value */
974            0 /* st_size */
975        }
976    };
977
978#endif /* U_ELF64 */
979
980    /* entry[] have a leading NUL */
981    entryOffset=1;
982
983    /* in the common code, count entryLength from after the NUL */
984    entryLengthOffset=1;
985
986    newSuffix=".o";
987
988#elif U_PLATFORM_HAS_WIN32_API
989    struct {
990        IMAGE_FILE_HEADER fileHeader;
991        IMAGE_SECTION_HEADER sections[2];
992        char linkerOptions[100];
993    } objHeader;
994    IMAGE_SYMBOL symbols[1];
995    struct {
996        DWORD sizeofLongNames;
997        char longNames[100];
998    } symbolNames;
999
1000    /*
1001     * entry sometimes have a leading '_'
1002     * overwritten if entryOffset==0 depending on the target platform
1003     * see check for cpu below
1004     */
1005    entry[0]='_';
1006
1007    newSuffix=".obj";
1008#else
1009#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1010#endif
1011
1012    /* deal with options, files and the entry point name */
1013    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1014    printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1015#if U_PLATFORM_HAS_WIN32_API
1016    if(cpu==IMAGE_FILE_MACHINE_I386) {
1017        entryOffset=1;
1018    }
1019#endif
1020
1021    in=T_FileStream_open(filename, "rb");
1022    if(in==NULL) {
1023        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1024        exit(U_FILE_ACCESS_ERROR);
1025    }
1026    size=T_FileStream_size(in);
1027
1028    getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1029    if (outFilePath != NULL) {
1030        uprv_strcpy(outFilePath, buffer);
1031    }
1032
1033    if(optEntryPoint != NULL) {
1034        uprv_strcpy(entry+entryOffset, optEntryPoint);
1035        uprv_strcat(entry+entryOffset, "_dat");
1036    }
1037    /* turn dashes in the entry name into underscores */
1038    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1039    for(i=0; i<entryLength; ++i) {
1040        if(entry[entryLengthOffset+i]=='-') {
1041            entry[entryLengthOffset+i]='_';
1042        }
1043    }
1044
1045    /* open the output file */
1046    out=T_FileStream_open(buffer, "wb");
1047    if(out==NULL) {
1048        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1049        exit(U_FILE_ACCESS_ERROR);
1050    }
1051
1052#ifdef U_ELF
1053    if(bits==32) {
1054        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1055        header32.e_machine=cpu;
1056
1057        /* 16-align .rodata in the .o file, just in case */
1058        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1059        if(paddingSize!=0) {
1060                paddingSize=0x10-paddingSize;
1061                sectionHeaders32[4].sh_offset+=paddingSize;
1062        }
1063
1064        sectionHeaders32[4].sh_size=(Elf32_Word)size;
1065
1066        symbols32[1].st_size=(Elf32_Word)size;
1067
1068        /* write .o headers */
1069        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1070        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1071        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1072    } else /* bits==64 */ {
1073#ifdef U_ELF64
1074        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1075        header64.e_machine=cpu;
1076
1077        /* 16-align .rodata in the .o file, just in case */
1078        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1079        if(paddingSize!=0) {
1080                paddingSize=0x10-paddingSize;
1081                sectionHeaders64[4].sh_offset+=paddingSize;
1082        }
1083
1084        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1085
1086        symbols64[1].st_size=(Elf64_Xword)size;
1087
1088        /* write .o headers */
1089        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1090        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1091        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1092#endif
1093    }
1094
1095    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1096    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1097    if(paddingSize!=0) {
1098        T_FileStream_write(out, padding, paddingSize);
1099    }
1100#elif U_PLATFORM_HAS_WIN32_API
1101    /* populate the .obj headers */
1102    uprv_memset(&objHeader, 0, sizeof(objHeader));
1103    uprv_memset(&symbols, 0, sizeof(symbols));
1104    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1105
1106    /* write the linker export directive */
1107    uprv_strcpy(objHeader.linkerOptions, "-export:");
1108    length=8;
1109    uprv_strcpy(objHeader.linkerOptions+length, entry);
1110    length+=entryLength;
1111    uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1112    length+=6;
1113
1114    /* set the file header */
1115    objHeader.fileHeader.Machine=cpu;
1116    objHeader.fileHeader.NumberOfSections=2;
1117    objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1118    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1119    objHeader.fileHeader.NumberOfSymbols=1;
1120
1121    /* set the section for the linker options */
1122    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1123    objHeader.sections[0].SizeOfRawData=length;
1124    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1125    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1126
1127    /* set the data section */
1128    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1129    objHeader.sections[1].SizeOfRawData=size;
1130    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1131    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1132
1133    /* set the symbol table */
1134    if(entryLength<=8) {
1135        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1136        symbolNames.sizeofLongNames=4;
1137    } else {
1138        symbols[0].N.Name.Short=0;
1139        symbols[0].N.Name.Long=4;
1140        symbolNames.sizeofLongNames=4+entryLength+1;
1141        uprv_strcpy(symbolNames.longNames, entry);
1142    }
1143    symbols[0].SectionNumber=2;
1144    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1145
1146    /* write the file header and the linker options section */
1147    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1148#else
1149#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1150#endif
1151
1152    /* copy the data file into section 2 */
1153    for(;;) {
1154        length=T_FileStream_read(in, buffer, sizeof(buffer));
1155        if(length==0) {
1156            break;
1157        }
1158        T_FileStream_write(out, buffer, (int32_t)length);
1159    }
1160
1161#if U_PLATFORM_HAS_WIN32_API
1162    /* write the symbol table */
1163    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1164    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1165#endif
1166
1167    if(T_FileStream_error(in)) {
1168        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1169        exit(U_FILE_ACCESS_ERROR);
1170    }
1171
1172    if(T_FileStream_error(out)) {
1173        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1174        exit(U_FILE_ACCESS_ERROR);
1175    }
1176
1177    T_FileStream_close(out);
1178    T_FileStream_close(in);
1179}
1180#endif
1181