1/* Test of canonical decomposition of UTF-8 strings.
2   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
18
19#include <config.h>
20
21#if GNULIB_TEST_UNINORM_U8_NORMALIZE
22
23#include "uninorm.h"
24
25#include <signal.h>
26#include <stdlib.h>
27#include <unistd.h>
28
29#include "unistr.h"
30#include "macros.h"
31
32static int
33check (const uint8_t *input, size_t input_length,
34       const uint8_t *expected, size_t expected_length)
35{
36  size_t length;
37  uint8_t *result;
38
39  /* Test return conventions with resultbuf == NULL.  */
40  result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length);
41  if (!(result != NULL))
42    return 1;
43  if (!(length == expected_length))
44    return 2;
45  if (!(u8_cmp (result, expected, expected_length) == 0))
46    return 3;
47  free (result);
48
49  /* Test return conventions with resultbuf too small.  */
50  if (expected_length > 0)
51    {
52      uint8_t *preallocated;
53
54      length = expected_length - 1;
55      preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56      result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
57      if (!(result != NULL))
58        return 4;
59      if (!(result != preallocated))
60        return 5;
61      if (!(length == expected_length))
62        return 6;
63      if (!(u8_cmp (result, expected, expected_length) == 0))
64        return 7;
65      free (result);
66      free (preallocated);
67    }
68
69  /* Test return conventions with resultbuf large enough.  */
70  {
71    uint8_t *preallocated;
72
73    length = expected_length;
74    preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75    result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length);
76    if (!(result != NULL))
77      return 8;
78    if (!(preallocated == NULL || result == preallocated))
79      return 9;
80    if (!(length == expected_length))
81      return 10;
82    if (!(u8_cmp (result, expected, expected_length) == 0))
83      return 11;
84    free (preallocated);
85  }
86
87  return 0;
88}
89
90void
91test_u8_nfd (void)
92{
93  { /* Empty string.  */
94    ASSERT (check (NULL, 0, NULL, 0) == 0);
95  }
96  { /* SPACE */
97    static const uint8_t input[]    = { 0x20 };
98    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
99  }
100
101  { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102    static const uint8_t input[]    = { 0xC3, 0x84 };
103    static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
104    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
105  }
106
107  { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
108    static const uint8_t input[]    = { 0xC7, 0x9E };
109    static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
110    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
111  }
112
113  { /* GREEK DIALYTIKA AND PERISPOMENI */
114    static const uint8_t input[]    = { 0xE1, 0xBF, 0x81 };
115    static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 };
116    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
117  }
118
119  { /* SCRIPT SMALL L */
120    static const uint8_t input[]    = { 0xE2, 0x84, 0x93 };
121    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
122  }
123
124  { /* NO-BREAK SPACE */
125    static const uint8_t input[]    = { 0xC2, 0xA0 };
126    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
127  }
128
129  { /* ARABIC LETTER VEH INITIAL FORM */
130    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAC };
131    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
132  }
133
134  { /* ARABIC LETTER VEH MEDIAL FORM */
135    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAD };
136    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
137  }
138
139  { /* ARABIC LETTER VEH FINAL FORM */
140    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAB };
141    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
142  }
143
144  { /* ARABIC LETTER VEH ISOLATED FORM */
145    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAA };
146    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
147  }
148
149  { /* CIRCLED NUMBER FIFTEEN */
150    static const uint8_t input[]    = { 0xE2, 0x91, 0xAE };
151    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
152  }
153
154  { /* TRADE MARK SIGN */
155    static const uint8_t input[]    = { 0xE2, 0x84, 0xA2 };
156    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
157  }
158
159  { /* LATIN SUBSCRIPT SMALL LETTER I */
160    static const uint8_t input[]    = { 0xE1, 0xB5, 0xA2 };
161    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
162  }
163
164  { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
165    static const uint8_t input[]    = { 0xEF, 0xB8, 0xB5 };
166    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
167  }
168
169  { /* FULLWIDTH LATIN CAPITAL LETTER A */
170    static const uint8_t input[]    = { 0xEF, 0xBC, 0xA1 };
171    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
172  }
173
174  { /* HALFWIDTH IDEOGRAPHIC COMMA */
175    static const uint8_t input[]    = { 0xEF, 0xBD, 0xA4 };
176    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
177  }
178
179  { /* SMALL IDEOGRAPHIC COMMA */
180    static const uint8_t input[]    = { 0xEF, 0xB9, 0x91 };
181    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
182  }
183
184  { /* SQUARE MHZ */
185    static const uint8_t input[]    = { 0xE3, 0x8E, 0x92 };
186    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
187  }
188
189  { /* VULGAR FRACTION THREE EIGHTHS */
190    static const uint8_t input[]    = { 0xE2, 0x85, 0x9C };
191    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
192  }
193
194  { /* MICRO SIGN */
195    static const uint8_t input[]    = { 0xC2, 0xB5 };
196    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
197  }
198
199  { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
200    static const uint8_t input[]    = { 0xEF, 0xB7, 0xBA };
201    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
202  }
203
204  { /* HANGUL SYLLABLE GEUL */
205    static const uint8_t input[]    = { 0xEA, 0xB8, 0x80 };
206    static const uint8_t expected[] =
207      { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
208    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
209  }
210
211  { /* HANGUL SYLLABLE GEU */
212    static const uint8_t input[]    = { 0xEA, 0xB7, 0xB8 };
213    static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
214    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
215  }
216
217  { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a)  ���������,������,������" */
218    static const uint8_t input[] =
219      { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
220        ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
221        0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
222        0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
223        's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
224        '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
225        0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
226        0xED, 0x95, 0x9C,
227        0xEA, 0xB8, 0x80, '\n'
228      };
229    static const uint8_t expected[] =
230      { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
231        ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
232        0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
233        0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
234        's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
235        '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
236        0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
237        0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
238        0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
239      };
240    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
241  }
242
243#if HAVE_DECL_ALARM
244  /* Declare failure if test takes too long, by using default abort
245     caused by SIGALRM.  */
246  signal (SIGALRM, SIG_DFL);
247  alarm (50);
248#endif
249
250  /* Check that the sorting is not O(n��) but O(n log n).  */
251  {
252    int pass;
253    for (pass = 0; pass < 3; pass++)
254      {
255        size_t repeat = 1;
256        size_t m = 100000;
257        uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
258        if (input != NULL)
259          {
260            uint8_t *expected = input + (2 * m - 1);
261            size_t m1 = m / 2;
262            size_t m2 = (m - 1) / 2;
263            /* NB: m1 + m2 == m - 1.  */
264            uint8_t *p;
265            size_t i;
266
267            input[0] = 0x41;
268            p = input + 1;
269            switch (pass)
270              {
271              case 0:
272                for (i = 0; i < m1; i++)
273                  {
274                    *p++ = 0xCC;
275                    *p++ = 0x99;
276                  }
277                for (i = 0; i < m2; i++)
278                  {
279                    *p++ = 0xCC;
280                    *p++ = 0x80;
281                  }
282                break;
283
284              case 1:
285                for (i = 0; i < m2; i++)
286                  {
287                    *p++ = 0xCC;
288                    *p++ = 0x80;
289                  }
290                for (i = 0; i < m1; i++)
291                  {
292                    *p++ = 0xCC;
293                    *p++ = 0x99;
294                  }
295                break;
296
297              case 2:
298                for (i = 0; i < m2; i++)
299                  {
300                    *p++ = 0xCC;
301                    *p++ = 0x99;
302                    *p++ = 0xCC;
303                    *p++ = 0x80;
304                  }
305                for (; i < m1; i++)
306                  {
307                    *p++ = 0xCC;
308                    *p++ = 0x99;
309                  }
310                break;
311
312              default:
313                abort ();
314              }
315
316            expected[0] = 0x41;
317            p = expected + 1;
318            for (i = 0; i < m1; i++)
319              {
320                *p++ = 0xCC;
321                *p++ = 0x99;
322              }
323            for (i = 0; i < m2; i++)
324              {
325                *p++ = 0xCC;
326                *p++ = 0x80;
327              }
328
329            for (; repeat > 0; repeat--)
330              ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);
331
332            free (input);
333          }
334      }
335  }
336}
337
338#else
339
340void
341test_u8_nfd (void)
342{
343}
344
345#endif
346