1/* Test the Unicode character name functions. 2 Copyright (C) 2000-2003, 2005, 2007 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17#include <config.h> 18 19#include <stdio.h> 20#include <stdlib.h> 21#include <string.h> 22 23#include "xalloc.h" 24#include "uniname.h" 25 26/* The names according to the UnicodeData.txt file, modified to contain the 27 Hangul syllable names, as described in the Unicode 3.0 book. */ 28const char * unicode_names [0x110000]; 29 30/* Maximum length of a field in the UnicodeData.txt file. */ 31#define FIELDLEN 120 32 33/* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN. 34 Reads up to (but excluding) DELIM. 35 Returns 1 when a field was successfully read, otherwise 0. */ 36static int 37getfield (FILE *stream, char *buffer, int delim) 38{ 39 int count = 0; 40 int c; 41 42 for (; (c = getc (stream)), (c != EOF && c != delim); ) 43 { 44 /* Put c into the buffer. */ 45 if (++count >= FIELDLEN - 1) 46 { 47 fprintf (stderr, "field too long\n"); 48 exit (EXIT_FAILURE); 49 } 50 *buffer++ = c; 51 } 52 53 if (c == EOF) 54 return 0; 55 56 *buffer = '\0'; 57 return 1; 58} 59 60/* Stores in unicode_names[] the relevant contents of the UnicodeData.txt 61 file. */ 62static void 63fill_names (const char *unicodedata_filename) 64{ 65 unsigned int i; 66 FILE *stream; 67 char field0[FIELDLEN]; 68 char field1[FIELDLEN]; 69 int lineno = 0; 70 71 for (i = 0; i < 0x110000; i++) 72 unicode_names[i] = NULL; 73 74 stream = fopen (unicodedata_filename, "r"); 75 if (stream == NULL) 76 { 77 fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename); 78 exit (EXIT_FAILURE); 79 } 80 81 for (;;) 82 { 83 int n; 84 int c; 85 86 lineno++; 87 n = getfield (stream, field0, ';'); 88 n += getfield (stream, field1, ';'); 89 if (n == 0) 90 break; 91 if (n != 2) 92 { 93 fprintf (stderr, "short line in '%s':%d\n", 94 unicodedata_filename, lineno); 95 exit (EXIT_FAILURE); 96 } 97 for (; (c = getc (stream)), (c != EOF && c != '\n'); ) 98 ; 99 i = strtoul (field0, NULL, 16); 100 if (i >= 0x110000) 101 { 102 fprintf (stderr, "index too large\n"); 103 exit (EXIT_FAILURE); 104 } 105 unicode_names[i] = xstrdup (field1); 106 } 107 if (ferror (stream) || fclose (stream)) 108 { 109 fprintf (stderr, "error reading from '%s'\n", unicodedata_filename); 110 exit (1); 111 } 112} 113 114/* Perform an exhaustive test of the unicode_character_name function. */ 115static int 116test_name_lookup () 117{ 118 int error = 0; 119 unsigned int i; 120 char buf[UNINAME_MAX]; 121 122 for (i = 0; i < 0x11000; i++) 123 { 124 char *result = unicode_character_name (i, buf); 125 126 if (unicode_names[i] != NULL) 127 { 128 if (result == NULL) 129 { 130 fprintf (stderr, "\\u%04X name lookup failed!\n", i); 131 error = 1; 132 } 133 else if (strcmp (result, unicode_names[i]) != 0) 134 { 135 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", 136 i, result); 137 error = 1; 138 } 139 } 140 else 141 { 142 if (result != NULL) 143 { 144 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", 145 i, result); 146 error = 1; 147 } 148 } 149 } 150 151 for (i = 0x110000; i < 0x1000000; i++) 152 { 153 char *result = unicode_character_name (i, buf); 154 155 if (result != NULL) 156 { 157 fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", 158 i, result); 159 error = 1; 160 } 161 } 162 163 return error; 164} 165 166/* Perform a test of the unicode_name_character function. */ 167static int 168test_inverse_lookup () 169{ 170 int error = 0; 171 unsigned int i; 172 173 /* First, verify all valid character names are recognized. */ 174 for (i = 0; i < 0x110000; i++) 175 if (unicode_names[i] != NULL) 176 { 177 unsigned int result = unicode_name_character (unicode_names[i]); 178 if (result != i) 179 { 180 if (result == UNINAME_INVALID) 181 fprintf (stderr, "inverse name lookup of \"%s\" failed\n", 182 unicode_names[i]); 183 else 184 fprintf (stderr, 185 "inverse name lookup of \"%s\" returned 0x%04X\n", 186 unicode_names[i], result); 187 error = 1; 188 } 189 } 190 191 /* Second, generate random but likely names and verify they are not 192 recognized unless really valid. */ 193 for (i = 0; i < 10000; i++) 194 { 195 unsigned int i1, i2; 196 const char *s1; 197 const char *s2; 198 unsigned int l1, l2, j1, j2; 199 char buf[2*UNINAME_MAX]; 200 unsigned int result; 201 202 do i1 = ((rand () % 0x11) << 16) 203 + ((rand () & 0xff) << 8) 204 + (rand () & 0xff); 205 while (unicode_names[i1] == NULL); 206 207 do i2 = ((rand () % 0x11) << 16) 208 + ((rand () & 0xff) << 8) 209 + (rand () & 0xff); 210 while (unicode_names[i2] == NULL); 211 212 s1 = unicode_names[i1]; 213 l1 = strlen (s1); 214 s2 = unicode_names[i2]; 215 l2 = strlen (s2); 216 217 /* Concatenate a starting piece of s1 with an ending piece of s2. */ 218 for (j1 = 1; j1 <= l1; j1++) 219 if (j1 == l1 || s1[j1] == ' ') 220 for (j2 = 0; j2 < l2; j2++) 221 if (j2 == 0 || s2[j2-1] == ' ') 222 { 223 memcpy (buf, s1, j1); 224 buf[j1] = ' '; 225 memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1); 226 227 result = unicode_name_character (buf); 228 if (result != UNINAME_INVALID 229 && !(unicode_names[result] != NULL 230 && strcmp (unicode_names[result], buf) == 0)) 231 { 232 fprintf (stderr, 233 "inverse name lookup of \"%s\" returned 0x%04X\n", 234 unicode_names[i], result); 235 error = 1; 236 } 237 } 238 } 239 240 /* Third, some extreme case that used to loop. */ 241 if (unicode_name_character ("A A") != UNINAME_INVALID) 242 error = 1; 243 244 return error; 245} 246 247int 248main (int argc, char *argv[]) 249{ 250 int error = 0; 251 252 fill_names (argv[1]); 253 254 error |= test_name_lookup (); 255 error |= test_inverse_lookup (); 256 257 return error; 258} 259