1/* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc. 2 This file is part of the GNU LIBICONV Library. 3 4 The GNU LIBICONV Library is free software; you can redistribute it 5 and/or modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 The GNU LIBICONV Library is distributed in the hope that it will be 10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public 15 License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19/* Create a table from CHARSET to Unicode. */ 20 21#include "config.h" 22 23#include <stddef.h> 24#include <stdio.h> 25#include <stdlib.h> 26#include <string.h> 27#include <iconv.h> 28#include <errno.h> 29 30#include "binary-io.h" 31 32/* If nonzero, ignore conversions outside Unicode plane 0. */ 33static int bmp_only; 34 35static const char* hexbuf (unsigned char buf[], unsigned int buflen) 36{ 37 static char msg[50]; 38 switch (buflen) { 39 case 1: sprintf(msg,"0x%02X",buf[0]); break; 40 case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break; 41 case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break; 42 case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break; 43 default: abort(); 44 } 45 return msg; 46} 47 48static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out) 49{ 50 const char* inbuf = (const char*) buf; 51 size_t inbytesleft = buflen; 52 char* outbuf = (char*) out; 53 size_t outbytesleft = 3*sizeof(unsigned int); 54 size_t result; 55 iconv(cd,NULL,NULL,NULL,NULL); 56 result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft); 57 if (result != (size_t)(-1)) 58 result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft); 59 if (result == (size_t)(-1)) { 60 if (errno == EILSEQ) { 61 return -1; 62 } else if (errno == EINVAL) { 63 return 0; 64 } else { 65 int saved_errno = errno; 66 fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen)); 67 errno = saved_errno; 68 perror(""); 69 exit(1); 70 } 71 } else if (result > 0) /* ignore conversions with transliteration */ { 72 return -1; 73 } else { 74 if (inbytesleft != 0) { 75 fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft)); 76 exit(1); 77 } 78 return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int); 79 } 80} 81 82/* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */ 83static const char* ucs4_decode (const unsigned int* out, unsigned int outlen) 84{ 85 static char hexbuf[21]; 86 char* p = hexbuf; 87 while (outlen > 0) { 88 if (p > hexbuf) 89 *p++ = ' '; 90 sprintf (p, "0x%04X", out[0]); 91 out += 1; outlen -= 1; 92 if (bmp_only && strlen(p) > 6) 93 return NULL; 94 p += strlen(p); 95 } 96 return hexbuf; 97} 98 99int main (int argc, char* argv[]) 100{ 101 const char* charset; 102 iconv_t cd; 103 int search_depth; 104 105 if (argc != 2) { 106 fprintf(stderr,"Usage: table-from charset\n"); 107 exit(1); 108 } 109 charset = argv[1]; 110 111#if O_BINARY 112 SET_BINARY(fileno(stdout)); 113#endif 114 115 cd = iconv_open("UCS-4-INTERNAL",charset); 116 if (cd == (iconv_t)(-1)) { 117 perror("iconv_open"); 118 exit(1); 119 } 120 121 /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too 122 big. */ 123 bmp_only = (strcmp(charset,"UTF-8") == 0); 124 search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4); 125 126 { 127 unsigned int out[3]; 128 unsigned char buf[4]; 129 unsigned int i0, i1, i2, i3; 130 int result; 131 for (i0 = 0; i0 < 0x100; i0++) { 132 buf[0] = i0; 133 result = try(cd,buf,1,out); 134 if (result < 0) { 135 } else if (result > 0) { 136 const char* unicode = ucs4_decode(out,result); 137 if (unicode != NULL) 138 printf("0x%02X\t%s\n",i0,unicode); 139 } else { 140 for (i1 = 0; i1 < 0x100; i1++) { 141 buf[1] = i1; 142 result = try(cd,buf,2,out); 143 if (result < 0) { 144 } else if (result > 0) { 145 const char* unicode = ucs4_decode(out,result); 146 if (unicode != NULL) 147 printf("0x%02X%02X\t%s\n",i0,i1,unicode); 148 } else { 149 for (i2 = 0; i2 < 0x100; i2++) { 150 buf[2] = i2; 151 result = try(cd,buf,3,out); 152 if (result < 0) { 153 } else if (result > 0) { 154 const char* unicode = ucs4_decode(out,result); 155 if (unicode != NULL) 156 printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode); 157 } else if (search_depth > 3) { 158 for (i3 = 0; i3 < 0x100; i3++) { 159 buf[3] = i3; 160 result = try(cd,buf,4,out); 161 if (result < 0) { 162 } else if (result > 0) { 163 const char* unicode = ucs4_decode(out,result); 164 if (unicode != NULL) 165 printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode); 166 } else { 167 fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4)); 168 exit(1); 169 } 170 } 171 } 172 } 173 } 174 } 175 } 176 } 177 } 178 179 if (iconv_close(cd) < 0) { 180 perror("iconv_close"); 181 exit(1); 182 } 183 184 if (ferror(stdin) || ferror(stdout) || fclose(stdout)) { 185 fprintf(stderr,"I/O error\n"); 186 exit(1); 187 } 188 189 exit(0); 190} 191