• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/gnulib-tests/uniname/
1/* Test the Unicode character name functions.
2   Copyright (C) 2000-2003, 2005, 2007 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17#include <config.h>
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
23#include "xalloc.h"
24#include "uniname.h"
25
26/* The names according to the UnicodeData.txt file, modified to contain the
27   Hangul syllable names, as described in the Unicode 3.0 book.  */
28const char * unicode_names [0x110000];
29
30/* Maximum length of a field in the UnicodeData.txt file.  */
31#define FIELDLEN 120
32
33/* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
34   Reads up to (but excluding) DELIM.
35   Returns 1 when a field was successfully read, otherwise 0.  */
36static int
37getfield (FILE *stream, char *buffer, int delim)
38{
39  int count = 0;
40  int c;
41
42  for (; (c = getc (stream)), (c != EOF && c != delim); )
43    {
44      /* Put c into the buffer.  */
45      if (++count >= FIELDLEN - 1)
46	{
47	  fprintf (stderr, "field too long\n");
48	  exit (EXIT_FAILURE);
49	}
50      *buffer++ = c;
51    }
52
53  if (c == EOF)
54    return 0;
55
56  *buffer = '\0';
57  return 1;
58}
59
60/* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
61   file.  */
62static void
63fill_names (const char *unicodedata_filename)
64{
65  unsigned int i;
66  FILE *stream;
67  char field0[FIELDLEN];
68  char field1[FIELDLEN];
69  int lineno = 0;
70
71  for (i = 0; i < 0x110000; i++)
72    unicode_names[i] = NULL;
73
74  stream = fopen (unicodedata_filename, "r");
75  if (stream == NULL)
76    {
77      fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
78      exit (EXIT_FAILURE);
79    }
80
81  for (;;)
82    {
83      int n;
84      int c;
85
86      lineno++;
87      n = getfield (stream, field0, ';');
88      n += getfield (stream, field1, ';');
89      if (n == 0)
90	break;
91      if (n != 2)
92	{
93	  fprintf (stderr, "short line in '%s':%d\n",
94		   unicodedata_filename, lineno);
95	  exit (EXIT_FAILURE);
96	}
97      for (; (c = getc (stream)), (c != EOF && c != '\n'); )
98	;
99      i = strtoul (field0, NULL, 16);
100      if (i >= 0x110000)
101	{
102	  fprintf (stderr, "index too large\n");
103	  exit (EXIT_FAILURE);
104	}
105      unicode_names[i] = xstrdup (field1);
106    }
107  if (ferror (stream) || fclose (stream))
108    {
109      fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
110      exit (1);
111    }
112}
113
114/* Perform an exhaustive test of the unicode_character_name function.  */
115static int
116test_name_lookup ()
117{
118  int error = 0;
119  unsigned int i;
120  char buf[UNINAME_MAX];
121
122  for (i = 0; i < 0x11000; i++)
123    {
124      char *result = unicode_character_name (i, buf);
125
126      if (unicode_names[i] != NULL)
127	{
128	  if (result == NULL)
129	    {
130	      fprintf (stderr, "\\u%04X name lookup failed!\n", i);
131	      error = 1;
132	    }
133	  else if (strcmp (result, unicode_names[i]) != 0)
134	    {
135	      fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
136			       i, result);
137	      error = 1;
138	    }
139	}
140      else
141	{
142	  if (result != NULL)
143	    {
144	      fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
145			       i, result);
146	      error = 1;
147	    }
148	}
149    }
150
151  for (i = 0x110000; i < 0x1000000; i++)
152    {
153      char *result = unicode_character_name (i, buf);
154
155      if (result != NULL)
156	{
157	  fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
158			   i, result);
159	  error = 1;
160	}
161    }
162
163  return error;
164}
165
166/* Perform a test of the unicode_name_character function.  */
167static int
168test_inverse_lookup ()
169{
170  int error = 0;
171  unsigned int i;
172
173  /* First, verify all valid character names are recognized.  */
174  for (i = 0; i < 0x110000; i++)
175    if (unicode_names[i] != NULL)
176      {
177	unsigned int result = unicode_name_character (unicode_names[i]);
178	if (result != i)
179	  {
180	    if (result == UNINAME_INVALID)
181	      fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
182		       unicode_names[i]);
183	    else
184	      fprintf (stderr,
185		       "inverse name lookup of \"%s\" returned 0x%04X\n",
186		       unicode_names[i], result);
187	    error = 1;
188	  }
189      }
190
191  /* Second, generate random but likely names and verify they are not
192     recognized unless really valid.  */
193  for (i = 0; i < 10000; i++)
194    {
195      unsigned int i1, i2;
196      const char *s1;
197      const char *s2;
198      unsigned int l1, l2, j1, j2;
199      char buf[2*UNINAME_MAX];
200      unsigned int result;
201
202      do i1 = ((rand () % 0x11) << 16)
203	      + ((rand () & 0xff) << 8)
204	      + (rand () & 0xff);
205      while (unicode_names[i1] == NULL);
206
207      do i2 = ((rand () % 0x11) << 16)
208	      + ((rand () & 0xff) << 8)
209	      + (rand () & 0xff);
210      while (unicode_names[i2] == NULL);
211
212      s1 = unicode_names[i1];
213      l1 = strlen (s1);
214      s2 = unicode_names[i2];
215      l2 = strlen (s2);
216
217      /* Concatenate a starting piece of s1 with an ending piece of s2.  */
218      for (j1 = 1; j1 <= l1; j1++)
219	if (j1 == l1 || s1[j1] == ' ')
220	  for (j2 = 0; j2 < l2; j2++)
221	    if (j2 == 0 || s2[j2-1] == ' ')
222	      {
223		memcpy (buf, s1, j1);
224		buf[j1] = ' ';
225		memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
226
227		result = unicode_name_character (buf);
228		if (result != UNINAME_INVALID
229		    && !(unicode_names[result] != NULL
230			 && strcmp (unicode_names[result], buf) == 0))
231		  {
232		    fprintf (stderr,
233			     "inverse name lookup of \"%s\" returned 0x%04X\n",
234			     unicode_names[i], result);
235		    error = 1;
236		  }
237	      }
238    }
239
240  /* Third, some extreme case that used to loop.  */
241  if (unicode_name_character ("A A") != UNINAME_INVALID)
242    error = 1;
243
244  return error;
245}
246
247int
248main (int argc, char *argv[])
249{
250  int error = 0;
251
252  fill_names (argv[1]);
253
254  error |= test_name_lookup ();
255  error |= test_inverse_lookup ();
256
257  return error;
258}
259