1/* Test of canonical decomposition of UTF-8 strings. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#if GNULIB_TEST_UNINORM_U8_NORMALIZE 22 23#include "uninorm.h" 24 25#include <signal.h> 26#include <stdlib.h> 27#include <unistd.h> 28 29#include "unistr.h" 30#include "macros.h" 31 32static int 33check (const uint8_t *input, size_t input_length, 34 const uint8_t *expected, size_t expected_length) 35{ 36 size_t length; 37 uint8_t *result; 38 39 /* Test return conventions with resultbuf == NULL. */ 40 result = u8_normalize (UNINORM_NFD, input, input_length, NULL, &length); 41 if (!(result != NULL)) 42 return 1; 43 if (!(length == expected_length)) 44 return 2; 45 if (!(u8_cmp (result, expected, expected_length) == 0)) 46 return 3; 47 free (result); 48 49 /* Test return conventions with resultbuf too small. */ 50 if (expected_length > 0) 51 { 52 uint8_t *preallocated; 53 54 length = expected_length - 1; 55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); 56 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length); 57 if (!(result != NULL)) 58 return 4; 59 if (!(result != preallocated)) 60 return 5; 61 if (!(length == expected_length)) 62 return 6; 63 if (!(u8_cmp (result, expected, expected_length) == 0)) 64 return 7; 65 free (result); 66 free (preallocated); 67 } 68 69 /* Test return conventions with resultbuf large enough. */ 70 { 71 uint8_t *preallocated; 72 73 length = expected_length; 74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); 75 result = u8_normalize (UNINORM_NFD, input, input_length, preallocated, &length); 76 if (!(result != NULL)) 77 return 8; 78 if (!(preallocated == NULL || result == preallocated)) 79 return 9; 80 if (!(length == expected_length)) 81 return 10; 82 if (!(u8_cmp (result, expected, expected_length) == 0)) 83 return 11; 84 free (preallocated); 85 } 86 87 return 0; 88} 89 90void 91test_u8_nfd (void) 92{ 93 { /* Empty string. */ 94 ASSERT (check (NULL, 0, NULL, 0) == 0); 95 } 96 { /* SPACE */ 97 static const uint8_t input[] = { 0x20 }; 98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 99 } 100 101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 102 static const uint8_t input[] = { 0xC3, 0x84 }; 103 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 }; 104 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 105 } 106 107 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 108 static const uint8_t input[] = { 0xC7, 0x9E }; 109 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 }; 110 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 111 } 112 113 { /* GREEK DIALYTIKA AND PERISPOMENI */ 114 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 }; 115 static const uint8_t expected[] = { 0xC2, 0xA8, 0xCD, 0x82 }; 116 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 117 } 118 119 { /* SCRIPT SMALL L */ 120 static const uint8_t input[] = { 0xE2, 0x84, 0x93 }; 121 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 122 } 123 124 { /* NO-BREAK SPACE */ 125 static const uint8_t input[] = { 0xC2, 0xA0 }; 126 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 127 } 128 129 { /* ARABIC LETTER VEH INITIAL FORM */ 130 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC }; 131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 132 } 133 134 { /* ARABIC LETTER VEH MEDIAL FORM */ 135 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD }; 136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 137 } 138 139 { /* ARABIC LETTER VEH FINAL FORM */ 140 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB }; 141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 142 } 143 144 { /* ARABIC LETTER VEH ISOLATED FORM */ 145 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA }; 146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 147 } 148 149 { /* CIRCLED NUMBER FIFTEEN */ 150 static const uint8_t input[] = { 0xE2, 0x91, 0xAE }; 151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 152 } 153 154 { /* TRADE MARK SIGN */ 155 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 }; 156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 157 } 158 159 { /* LATIN SUBSCRIPT SMALL LETTER I */ 160 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 }; 161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 162 } 163 164 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 165 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 }; 166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 167 } 168 169 { /* FULLWIDTH LATIN CAPITAL LETTER A */ 170 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 }; 171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 172 } 173 174 { /* HALFWIDTH IDEOGRAPHIC COMMA */ 175 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 }; 176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 177 } 178 179 { /* SMALL IDEOGRAPHIC COMMA */ 180 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 }; 181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 182 } 183 184 { /* SQUARE MHZ */ 185 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 }; 186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 187 } 188 189 { /* VULGAR FRACTION THREE EIGHTHS */ 190 static const uint8_t input[] = { 0xE2, 0x85, 0x9C }; 191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 192 } 193 194 { /* MICRO SIGN */ 195 static const uint8_t input[] = { 0xC2, 0xB5 }; 196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 197 } 198 199 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 200 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA }; 201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 202 } 203 204 { /* HANGUL SYLLABLE GEUL */ 205 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 }; 206 static const uint8_t expected[] = 207 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF }; 208 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 209 } 210 211 { /* HANGUL SYLLABLE GEU */ 212 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 }; 213 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 }; 214 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 215 } 216 217 { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a) ���������,������,������" */ 218 static const uint8_t input[] = 219 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', 220 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 221 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 222 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 223 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', 224 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 225 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', 226 0xED, 0x95, 0x9C, 227 0xEA, 0xB8, 0x80, '\n' 228 }; 229 static const uint8_t expected[] = 230 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', 231 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 232 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86, 233 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 234 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', 235 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 236 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', 237 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB, 238 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n' 239 }; 240 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 241 } 242 243#if HAVE_DECL_ALARM 244 /* Declare failure if test takes too long, by using default abort 245 caused by SIGALRM. */ 246 signal (SIGALRM, SIG_DFL); 247 alarm (50); 248#endif 249 250 /* Check that the sorting is not O(n��) but O(n log n). */ 251 { 252 int pass; 253 for (pass = 0; pass < 3; pass++) 254 { 255 size_t repeat = 1; 256 size_t m = 100000; 257 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t)); 258 if (input != NULL) 259 { 260 uint8_t *expected = input + (2 * m - 1); 261 size_t m1 = m / 2; 262 size_t m2 = (m - 1) / 2; 263 /* NB: m1 + m2 == m - 1. */ 264 uint8_t *p; 265 size_t i; 266 267 input[0] = 0x41; 268 p = input + 1; 269 switch (pass) 270 { 271 case 0: 272 for (i = 0; i < m1; i++) 273 { 274 *p++ = 0xCC; 275 *p++ = 0x99; 276 } 277 for (i = 0; i < m2; i++) 278 { 279 *p++ = 0xCC; 280 *p++ = 0x80; 281 } 282 break; 283 284 case 1: 285 for (i = 0; i < m2; i++) 286 { 287 *p++ = 0xCC; 288 *p++ = 0x80; 289 } 290 for (i = 0; i < m1; i++) 291 { 292 *p++ = 0xCC; 293 *p++ = 0x99; 294 } 295 break; 296 297 case 2: 298 for (i = 0; i < m2; i++) 299 { 300 *p++ = 0xCC; 301 *p++ = 0x99; 302 *p++ = 0xCC; 303 *p++ = 0x80; 304 } 305 for (; i < m1; i++) 306 { 307 *p++ = 0xCC; 308 *p++ = 0x99; 309 } 310 break; 311 312 default: 313 abort (); 314 } 315 316 expected[0] = 0x41; 317 p = expected + 1; 318 for (i = 0; i < m1; i++) 319 { 320 *p++ = 0xCC; 321 *p++ = 0x99; 322 } 323 for (i = 0; i < m2; i++) 324 { 325 *p++ = 0xCC; 326 *p++ = 0x80; 327 } 328 329 for (; repeat > 0; repeat--) 330 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0); 331 332 free (input); 333 } 334 } 335 } 336} 337 338#else 339 340void 341test_u8_nfd (void) 342{ 343} 344 345#endif 346