1/* Test the Unicode character name functions. 2 Copyright (C) 2000-2003, 2005 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software Foundation, 16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 17 18#ifdef HAVE_CONFIG_H 19# include <config.h> 20#endif 21 22#include <stdio.h> 23#include <stdlib.h> 24#include <string.h> 25 26#include "exit.h" 27#include "xalloc.h" 28#include "uniname.h" 29 30/* The names according to the UnicodeData.txt file, modified to contain the 31 Hangul syllable names, as described in the Unicode 3.0 book. */ 32const char * unicode_names [0x110000]; 33 34/* Maximum length of a field in the UnicodeData.txt file. */ 35#define FIELDLEN 120 36 37/* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN. 38 Reads up to (but excluding) DELIM. 39 Returns 1 when a field was successfully read, otherwise 0. */ 40static int 41getfield (FILE *stream, char *buffer, int delim) 42{ 43 int count = 0; 44 int c; 45 46 for (; (c = getc (stream)), (c != EOF && c != delim); ) 47 { 48 /* Put c into the buffer. */ 49 if (++count >= FIELDLEN - 1) 50 { 51 fprintf (stderr, "field too long\n"); 52 exit (EXIT_FAILURE); 53 } 54 *buffer++ = c; 55 } 56 57 if (c == EOF) 58 return 0; 59 60 *buffer = '\0'; 61 return 1; 62} 63 64/* Stores in unicode_names[] the relevant contents of the UnicodeData.txt 65 file. */ 66static void 67fill_names (const char *unicodedata_filename) 68{ 69 unsigned int i; 70 FILE *stream; 71 char field0[FIELDLEN]; 72 char field1[FIELDLEN]; 73 int lineno = 0; 74 75 for (i = 0; i < 0x110000; i++) 76 unicode_names[i] = NULL; 77 78 stream = fopen (unicodedata_filename, "r"); 79 if (stream == NULL) 80 { 81 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename); 82 exit (EXIT_FAILURE); 83 } 84 85 for (;;) 86 { 87 int n; 88 int c; 89 90 lineno++; 91 n = getfield (stream, field0, ';'); 92 n += getfield (stream, field1, ';'); 93 if (n == 0) 94 break; 95 if (n != 2) 96 { 97 fprintf (stderr, "short line in '%s':%d\n", 98 unicodedata_filename, lineno); 99 exit (EXIT_FAILURE); 100 } 101 for (; (c = getc (stream)), (c != EOF && c != '\n'); ) 102 ; 103 i = strtoul (field0, NULL, 16); 104 if (i >= 0x110000) 105 { 106 fprintf (stderr, "index too large\n"); 107 exit (EXIT_FAILURE); 108 } 109 unicode_names[i] = xstrdup (field1); 110 } 111 if (ferror (stream) || fclose (stream)) 112 { 113 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename); 114 exit (1); 115 } 116} 117 118/* Perform an exhaustive test of the unicode_character_name function. */ 119static int 120test_name_lookup () 121{ 122 int error = 0; 123 unsigned int i; 124 char buf[UNINAME_MAX]; 125 126 for (i = 0; i < 0x11000; i++) 127 { 128 char *result = unicode_character_name (i, buf); 129 130 if (unicode_names[i] != NULL) 131 { 132 if (result == NULL) 133 { 134 fprintf (stderr, "\\u%04X name lookup failed!\n", i); 135 error = 1; 136 } 137 else if (strcmp (result, unicode_names[i]) != 0) 138 { 139 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", 140 i, result); 141 error = 1; 142 } 143 } 144 else 145 { 146 if (result != NULL) 147 { 148 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", 149 i, result); 150 error = 1; 151 } 152 } 153 } 154 155 for (i = 0x110000; i < 0x1000000; i++) 156 { 157 char *result = unicode_character_name (i, buf); 158 159 if (result != NULL) 160 { 161 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", 162 i, result); 163 error = 1; 164 } 165 } 166 167 return error; 168} 169 170/* Perform a test of the unicode_name_character function. */ 171static int 172test_inverse_lookup () 173{ 174 int error = 0; 175 unsigned int i; 176 177 /* First, verify all valid character names are recognized. */ 178 for (i = 0; i < 0x110000; i++) 179 if (unicode_names[i] != NULL) 180 { 181 unsigned int result = unicode_name_character (unicode_names[i]); 182 if (result != i) 183 { 184 if (result == UNINAME_INVALID) 185 fprintf (stderr, "inverse name lookup of \"%s\" failed\n", 186 unicode_names[i]); 187 else 188 fprintf (stderr, 189 "inverse name lookup of \"%s\" returned 0x%04X\n", 190 unicode_names[i], result); 191 error = 1; 192 } 193 } 194 195 /* Second, generate random but likely names and verify they are not 196 recognized unless really valid. */ 197 for (i = 0; i < 10000; i++) 198 { 199 unsigned int i1, i2; 200 const char *s1; 201 const char *s2; 202 unsigned int l1, l2, j1, j2; 203 char buf[2*UNINAME_MAX]; 204 unsigned int result; 205 206 do i1 = ((rand () % 0x11) << 16) 207 + ((rand () & 0xff) << 8) 208 + (rand () & 0xff); 209 while (unicode_names[i1] == NULL); 210 211 do i2 = ((rand () % 0x11) << 16) 212 + ((rand () & 0xff) << 8) 213 + (rand () & 0xff); 214 while (unicode_names[i2] == NULL); 215 216 s1 = unicode_names[i1]; 217 l1 = strlen (s1); 218 s2 = unicode_names[i2]; 219 l2 = strlen (s2); 220 221 /* Concatenate a starting piece of s1 with an ending piece of s2. */ 222 for (j1 = 1; j1 <= l1; j1++) 223 if (j1 == l1 || s1[j1] == ' ') 224 for (j2 = 0; j2 < l2; j2++) 225 if (j2 == 0 || s2[j2-1] == ' ') 226 { 227 memcpy (buf, s1, j1); 228 buf[j1] = ' '; 229 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1); 230 231 result = unicode_name_character (buf); 232 if (result != UNINAME_INVALID 233 && !(unicode_names[result] != NULL 234 && strcmp (unicode_names[result], buf) == 0)) 235 { 236 fprintf (stderr, 237 "inverse name lookup of \"%s\" returned 0x%04X\n", 238 unicode_names[i], result); 239 error = 1; 240 } 241 } 242 } 243 244 /* Third, some extreme case that used to loop. */ 245 if (unicode_name_character ("A A") != UNINAME_INVALID) 246 error = 1; 247 248 return error; 249} 250 251int 252main (int argc, char *argv[]) 253{ 254 int error = 0; 255 256 fill_names (argv[1]); 257 258 error |= test_name_lookup (); 259 error |= test_inverse_lookup (); 260 261 return error; 262} 263