1/* Test of compatibility normalization of UTF-32 strings. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#if GNULIB_TEST_UNINORM_U32_NORMALIZE 22 23#include "uninorm.h" 24 25#include <signal.h> 26#include <stdlib.h> 27#include <unistd.h> 28 29#include "unistr.h" 30#include "macros.h" 31 32static int 33check (const uint32_t *input, size_t input_length, 34 const uint32_t *expected, size_t expected_length) 35{ 36 size_t length; 37 uint32_t *result; 38 39 /* Test return conventions with resultbuf == NULL. */ 40 result = u32_normalize (UNINORM_NFKC, input, input_length, NULL, &length); 41 if (!(result != NULL)) 42 return 1; 43 if (!(length == expected_length)) 44 return 2; 45 if (!(u32_cmp (result, expected, expected_length) == 0)) 46 return 3; 47 free (result); 48 49 /* Test return conventions with resultbuf too small. */ 50 if (expected_length > 0) 51 { 52 uint32_t *preallocated; 53 54 length = expected_length - 1; 55 preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); 56 result = u32_normalize (UNINORM_NFKC, input, input_length, preallocated, &length); 57 if (!(result != NULL)) 58 return 4; 59 if (!(result != preallocated)) 60 return 5; 61 if (!(length == expected_length)) 62 return 6; 63 if (!(u32_cmp (result, expected, expected_length) == 0)) 64 return 7; 65 free (result); 66 free (preallocated); 67 } 68 69 /* Test return conventions with resultbuf large enough. */ 70 { 71 uint32_t *preallocated; 72 73 length = expected_length; 74 preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); 75 result = u32_normalize (UNINORM_NFKC, input, input_length, preallocated, &length); 76 if (!(result != NULL)) 77 return 8; 78 if (!(preallocated == NULL || result == preallocated)) 79 return 9; 80 if (!(length == expected_length)) 81 return 10; 82 if (!(u32_cmp (result, expected, expected_length) == 0)) 83 return 11; 84 free (preallocated); 85 } 86 87 return 0; 88} 89 90void 91test_u32_nfkc (void) 92{ 93 { /* Empty string. */ 94 ASSERT (check (NULL, 0, NULL, 0) == 0); 95 } 96 { /* SPACE */ 97 static const uint32_t input[] = { 0x0020 }; 98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 99 } 100 101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 102 static const uint32_t input[] = { 0x00C4 }; 103 static const uint32_t decomposed[] = { 0x0041, 0x0308 }; 104 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 105 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 106 } 107 108 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 109 static const uint32_t input[] = { 0x01DE }; 110 static const uint32_t decomposed[] = { 0x0041, 0x0308, 0x0304 }; 111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 112 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 113 } 114 115 { /* ANGSTROM SIGN */ 116 static const uint32_t input[] = { 0x212B }; 117 static const uint32_t decomposed[] = { 0x0041, 0x030A }; 118 static const uint32_t expected[] = { 0x00C5 }; 119 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 120 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0); 121 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0); 122 } 123 124 { /* GREEK DIALYTIKA AND PERISPOMENI */ 125 static const uint32_t input[] = { 0x1FC1 }; 126 static const uint32_t decomposed[] = { 0x0020, 0x0308, 0x0342 }; 127 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 128 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 129 } 130 131 { /* SCRIPT SMALL L */ 132 static const uint32_t input[] = { 0x2113 }; 133 static const uint32_t decomposed[] = { 0x006C }; 134 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 135 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 136 } 137 138 { /* NO-BREAK SPACE */ 139 static const uint32_t input[] = { 0x00A0 }; 140 static const uint32_t decomposed[] = { 0x0020 }; 141 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 142 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 143 } 144 145 { /* ARABIC LETTER VEH INITIAL FORM */ 146 static const uint32_t input[] = { 0xFB6C }; 147 static const uint32_t decomposed[] = { 0x06A4 }; 148 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 149 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 150 } 151 152 { /* ARABIC LETTER VEH MEDIAL FORM */ 153 static const uint32_t input[] = { 0xFB6D }; 154 static const uint32_t decomposed[] = { 0x06A4 }; 155 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 156 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 157 } 158 159 { /* ARABIC LETTER VEH FINAL FORM */ 160 static const uint32_t input[] = { 0xFB6B }; 161 static const uint32_t decomposed[] = { 0x06A4 }; 162 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 163 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 164 } 165 166 { /* ARABIC LETTER VEH ISOLATED FORM */ 167 static const uint32_t input[] = { 0xFB6A }; 168 static const uint32_t decomposed[] = { 0x06A4 }; 169 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 170 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 171 } 172 173 { /* CIRCLED NUMBER FIFTEEN */ 174 static const uint32_t input[] = { 0x246E }; 175 static const uint32_t decomposed[] = { 0x0031, 0x0035 }; 176 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 177 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 178 } 179 180 { /* TRADE MARK SIGN */ 181 static const uint32_t input[] = { 0x2122 }; 182 static const uint32_t decomposed[] = { 0x0054, 0x004D }; 183 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 184 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 185 } 186 187 { /* LATIN SUBSCRIPT SMALL LETTER I */ 188 static const uint32_t input[] = { 0x1D62 }; 189 static const uint32_t decomposed[] = { 0x0069 }; 190 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 191 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 192 } 193 194 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 195 static const uint32_t input[] = { 0xFE35 }; 196 static const uint32_t decomposed[] = { 0x0028 }; 197 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 198 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 199 } 200 201 { /* FULLWIDTH LATIN CAPITAL LETTER A */ 202 static const uint32_t input[] = { 0xFF21 }; 203 static const uint32_t decomposed[] = { 0x0041 }; 204 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 205 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 206 } 207 208 { /* HALFWIDTH IDEOGRAPHIC COMMA */ 209 static const uint32_t input[] = { 0xFF64 }; 210 static const uint32_t decomposed[] = { 0x3001 }; 211 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 212 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 213 } 214 215 { /* SMALL IDEOGRAPHIC COMMA */ 216 static const uint32_t input[] = { 0xFE51 }; 217 static const uint32_t decomposed[] = { 0x3001 }; 218 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 219 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 220 } 221 222 { /* SQUARE MHZ */ 223 static const uint32_t input[] = { 0x3392 }; 224 static const uint32_t decomposed[] = { 0x004D, 0x0048, 0x007A }; 225 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 226 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 227 } 228 229 { /* VULGAR FRACTION THREE EIGHTHS */ 230 static const uint32_t input[] = { 0x215C }; 231 static const uint32_t decomposed[] = { 0x0033, 0x2044, 0x0038 }; 232 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 233 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 234 } 235 236 { /* MICRO SIGN */ 237 static const uint32_t input[] = { 0x00B5 }; 238 static const uint32_t decomposed[] = { 0x03BC }; 239 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 240 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 241 } 242 243 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 244 static const uint32_t input[] = { 0xFDFA }; 245 static const uint32_t decomposed[] = 246 { 0x0635, 0x0644, 0x0649, 0x0020, 0x0627, 0x0644, 0x0644, 0x0647, 0x0020, 247 0x0639, 0x0644, 0x064A, 0x0647, 0x0020, 0x0648, 0x0633, 0x0644, 0x0645 248 }; 249 ASSERT (check (input, SIZEOF (input), decomposed, SIZEOF (decomposed)) == 0); 250 ASSERT (check (decomposed, SIZEOF (decomposed), decomposed, SIZEOF (decomposed)) == 0); 251 } 252 253 { /* HANGUL SYLLABLE GEUL */ 254 static const uint32_t input[] = { 0xAE00 }; 255 static const uint32_t decomposed[] = { 0x1100, 0x1173, 0x11AF }; 256 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 257 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 258 } 259 260 { /* HANGUL SYLLABLE GEU */ 261 static const uint32_t input[] = { 0xADF8 }; 262 static const uint32_t decomposed[] = { 0x1100, 0x1173 }; 263 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 264 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 265 } 266 267 { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a) ���������,������,������" */ 268 static const uint32_t input[] = 269 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 270 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 271 0x0439, 0x0442, 0x0435, '!', ' ', 272 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, 273 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 274 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' 275 }; 276 static const uint32_t decomposed[] = 277 { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 278 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 279 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ', 280 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x0032, 281 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 282 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 283 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n' 284 }; 285 static const uint32_t expected[] = 286 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 287 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 288 0x0439, 0x0442, 0x0435, '!', ' ', 289 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x0032, 290 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 291 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' 292 }; 293 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 294 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0); 295 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0); 296 } 297 298#if HAVE_DECL_ALARM 299 /* Declare failure if test takes too long, by using default abort 300 caused by SIGALRM. */ 301 signal (SIGALRM, SIG_DFL); 302 alarm (50); 303#endif 304 305 /* Check that the sorting is not O(n��) but O(n log n). */ 306 { 307 int pass; 308 for (pass = 0; pass < 3; pass++) 309 { 310 size_t repeat = 1; 311 size_t m = 100000; 312 uint32_t *input = (uint32_t *) malloc (2 * m * sizeof (uint32_t)); 313 if (input != NULL) 314 { 315 uint32_t *expected = input + m; 316 size_t m1 = m / 2; 317 size_t m2 = (m - 1) / 2; 318 /* NB: m1 + m2 == m - 1. */ 319 uint32_t *p; 320 size_t i; 321 322 input[0] = 0x0041; 323 p = input + 1; 324 switch (pass) 325 { 326 case 0: 327 for (i = 0; i < m1; i++) 328 *p++ = 0x0319; 329 for (i = 0; i < m2; i++) 330 *p++ = 0x0300; 331 break; 332 333 case 1: 334 for (i = 0; i < m2; i++) 335 *p++ = 0x0300; 336 for (i = 0; i < m1; i++) 337 *p++ = 0x0319; 338 break; 339 340 case 2: 341 for (i = 0; i < m2; i++) 342 { 343 *p++ = 0x0319; 344 *p++ = 0x0300; 345 } 346 for (; i < m1; i++) 347 *p++ = 0x0319; 348 break; 349 350 default: 351 abort (); 352 } 353 354 expected[0] = 0x00C0; 355 p = expected + 1; 356 for (i = 0; i < m1; i++) 357 *p++ = 0x0319; 358 for (i = 0; i < m2 - 1; i++) 359 *p++ = 0x0300; 360 361 for (; repeat > 0; repeat--) 362 { 363 ASSERT (check (input, m, expected, m - 1) == 0); 364 ASSERT (check (expected, m - 1, expected, m - 1) == 0); 365 } 366 367 free (input); 368 } 369 } 370 } 371} 372 373#else 374 375void 376test_u32_nfkc (void) 377{ 378} 379 380#endif 381