1/* Test of compatibility decomposition of UTF-8 strings.
2   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
18
19#include <config.h>
20
21#if GNULIB_TEST_UNINORM_U8_NORMALIZE
22
23#include "uninorm.h"
24
25#include <signal.h>
26#include <stdlib.h>
27#include <unistd.h>
28
29#include "unistr.h"
30#include "macros.h"
31
32static int
33check (const uint8_t *input, size_t input_length,
34       const uint8_t *expected, size_t expected_length)
35{
36  size_t length;
37  uint8_t *result;
38
39  /* Test return conventions with resultbuf == NULL.  */
40  result = u8_normalize (UNINORM_NFKD, input, input_length, NULL, &length);
41  if (!(result != NULL))
42    return 1;
43  if (!(length == expected_length))
44    return 2;
45  if (!(u8_cmp (result, expected, expected_length) == 0))
46    return 3;
47  free (result);
48
49  /* Test return conventions with resultbuf too small.  */
50  if (expected_length > 0)
51    {
52      uint8_t *preallocated;
53
54      length = expected_length - 1;
55      preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
56      result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
57      if (!(result != NULL))
58        return 4;
59      if (!(result != preallocated))
60        return 5;
61      if (!(length == expected_length))
62        return 6;
63      if (!(u8_cmp (result, expected, expected_length) == 0))
64        return 7;
65      free (result);
66      free (preallocated);
67    }
68
69  /* Test return conventions with resultbuf large enough.  */
70  {
71    uint8_t *preallocated;
72
73    length = expected_length;
74    preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
75    result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length);
76    if (!(result != NULL))
77      return 8;
78    if (!(preallocated == NULL || result == preallocated))
79      return 9;
80    if (!(length == expected_length))
81      return 10;
82    if (!(u8_cmp (result, expected, expected_length) == 0))
83      return 11;
84    free (preallocated);
85  }
86
87  return 0;
88}
89
90void
91test_u8_nfkd (void)
92{
93  { /* Empty string.  */
94    ASSERT (check (NULL, 0, NULL, 0) == 0);
95  }
96  { /* SPACE */
97    static const uint8_t input[]    = { 0x20 };
98    ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
99  }
100
101  { /* LATIN CAPITAL LETTER A WITH DIAERESIS */
102    static const uint8_t input[]    = { 0xC3, 0x84 };
103    static const uint8_t expected[] = { 0x41, 0xCC, 0x88 };
104    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
105  }
106
107  { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
108    static const uint8_t input[]    = { 0xC7, 0x9E };
109    static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
110    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
111  }
112
113  { /* GREEK DIALYTIKA AND PERISPOMENI */
114    static const uint8_t input[]    = { 0xE1, 0xBF, 0x81 };
115    static const uint8_t expected[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 };
116    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
117  }
118
119  { /* SCRIPT SMALL L */
120    static const uint8_t input[]    = { 0xE2, 0x84, 0x93 };
121    static const uint8_t expected[] = { 0x6C };
122    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
123  }
124
125  { /* NO-BREAK SPACE */
126    static const uint8_t input[]    = { 0xC2, 0xA0 };
127    static const uint8_t expected[] = { 0x20 };
128    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
129  }
130
131  { /* ARABIC LETTER VEH INITIAL FORM */
132    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAC };
133    static const uint8_t expected[] = { 0xDA, 0xA4 };
134    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
135  }
136
137  { /* ARABIC LETTER VEH MEDIAL FORM */
138    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAD };
139    static const uint8_t expected[] = { 0xDA, 0xA4 };
140    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
141  }
142
143  { /* ARABIC LETTER VEH FINAL FORM */
144    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAB };
145    static const uint8_t expected[] = { 0xDA, 0xA4 };
146    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
147  }
148
149  { /* ARABIC LETTER VEH ISOLATED FORM */
150    static const uint8_t input[]    = { 0xEF, 0xAD, 0xAA };
151    static const uint8_t expected[] = { 0xDA, 0xA4 };
152    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
153  }
154
155  { /* CIRCLED NUMBER FIFTEEN */
156    static const uint8_t input[]    = { 0xE2, 0x91, 0xAE };
157    static const uint8_t expected[] = { 0x31, 0x35 };
158    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
159  }
160
161  { /* TRADE MARK SIGN */
162    static const uint8_t input[]    = { 0xE2, 0x84, 0xA2 };
163    static const uint8_t expected[] = { 0x54, 0x4D };
164    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
165  }
166
167  { /* LATIN SUBSCRIPT SMALL LETTER I */
168    static const uint8_t input[]    = { 0xE1, 0xB5, 0xA2 };
169    static const uint8_t expected[] = { 0x69 };
170    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
171  }
172
173  { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
174    static const uint8_t input[]    = { 0xEF, 0xB8, 0xB5 };
175    static const uint8_t expected[] = { 0x28 };
176    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
177  }
178
179  { /* FULLWIDTH LATIN CAPITAL LETTER A */
180    static const uint8_t input[]    = { 0xEF, 0xBC, 0xA1 };
181    static const uint8_t expected[] = { 0x41 };
182    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
183  }
184
185  { /* HALFWIDTH IDEOGRAPHIC COMMA */
186    static const uint8_t input[]    = { 0xEF, 0xBD, 0xA4 };
187    static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
188    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
189  }
190
191  { /* SMALL IDEOGRAPHIC COMMA */
192    static const uint8_t input[]    = { 0xEF, 0xB9, 0x91 };
193    static const uint8_t expected[] = { 0xE3, 0x80, 0x81 };
194    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
195  }
196
197  { /* SQUARE MHZ */
198    static const uint8_t input[]    = { 0xE3, 0x8E, 0x92 };
199    static const uint8_t expected[] = { 0x4D, 0x48, 0x7A };
200    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
201  }
202
203  { /* VULGAR FRACTION THREE EIGHTHS */
204    static const uint8_t input[]    = { 0xE2, 0x85, 0x9C };
205    static const uint8_t expected[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 };
206    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
207  }
208
209  { /* MICRO SIGN */
210    static const uint8_t input[]    = { 0xC2, 0xB5 };
211    static const uint8_t expected[] = { 0xCE, 0xBC };
212    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
213  }
214
215  { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
216    static const uint8_t input[]    = { 0xEF, 0xB7, 0xBA };
217    static const uint8_t expected[] =
218      { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9,
219        0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87,
220        0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85
221      };
222    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
223  }
224
225  { /* HANGUL SYLLABLE GEUL */
226    static const uint8_t input[]    = { 0xEA, 0xB8, 0x80 };
227    static const uint8_t expected[] =
228      { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
229    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
230  }
231
232  { /* HANGUL SYLLABLE GEU */
233    static const uint8_t input[]    = { 0xEA, 0xB7, 0xB8 };
234    static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
235    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
236  }
237
238  { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a)  ���������,������,������" */
239    static const uint8_t input[] =
240      { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
241        ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
242        0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
243        0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
244        's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
245        '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
246        0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
247        0xED, 0x95, 0x9C,
248        0xEA, 0xB8, 0x80, '\n'
249      };
250    static const uint8_t expected[] =
251      { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
252        ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
253        0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
254        0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
255        's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')',
256        '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
257        0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
258        0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
259        0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
260      };
261    ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
262  }
263
264#if HAVE_DECL_ALARM
265  /* Declare failure if test takes too long, by using default abort
266     caused by SIGALRM.  */
267  signal (SIGALRM, SIG_DFL);
268  alarm (50);
269#endif
270
271  /* Check that the sorting is not O(n��) but O(n log n).  */
272  {
273    int pass;
274    for (pass = 0; pass < 3; pass++)
275      {
276        size_t repeat = 1;
277        size_t m = 100000;
278        uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
279        if (input != NULL)
280          {
281            uint8_t *expected = input + (2 * m - 1);
282            size_t m1 = m / 2;
283            size_t m2 = (m - 1) / 2;
284            /* NB: m1 + m2 == m - 1.  */
285            uint8_t *p;
286            size_t i;
287
288            input[0] = 0x41;
289            p = input + 1;
290            switch (pass)
291              {
292              case 0:
293                for (i = 0; i < m1; i++)
294                  {
295                    *p++ = 0xCC;
296                    *p++ = 0x99;
297                  }
298                for (i = 0; i < m2; i++)
299                  {
300                    *p++ = 0xCC;
301                    *p++ = 0x80;
302                  }
303                break;
304
305              case 1:
306                for (i = 0; i < m2; i++)
307                  {
308                    *p++ = 0xCC;
309                    *p++ = 0x80;
310                  }
311                for (i = 0; i < m1; i++)
312                  {
313                    *p++ = 0xCC;
314                    *p++ = 0x99;
315                  }
316                break;
317
318              case 2:
319                for (i = 0; i < m2; i++)
320                  {
321                    *p++ = 0xCC;
322                    *p++ = 0x99;
323                    *p++ = 0xCC;
324                    *p++ = 0x80;
325                  }
326                for (; i < m1; i++)
327                  {
328                    *p++ = 0xCC;
329                    *p++ = 0x99;
330                  }
331                break;
332
333              default:
334                abort ();
335              }
336
337            expected[0] = 0x41;
338            p = expected + 1;
339            for (i = 0; i < m1; i++)
340              {
341                *p++ = 0xCC;
342                *p++ = 0x99;
343              }
344            for (i = 0; i < m2; i++)
345              {
346                *p++ = 0xCC;
347                *p++ = 0x80;
348              }
349
350            for (; repeat > 0; repeat--)
351              ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0);
352
353            free (input);
354          }
355      }
356  }
357}
358
359#else
360
361void
362test_u8_nfkd (void)
363{
364}
365
366#endif
367