1/* Test of compatibility decomposition of UTF-8 strings. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#if GNULIB_TEST_UNINORM_U8_NORMALIZE 22 23#include "uninorm.h" 24 25#include <signal.h> 26#include <stdlib.h> 27#include <unistd.h> 28 29#include "unistr.h" 30#include "macros.h" 31 32static int 33check (const uint8_t *input, size_t input_length, 34 const uint8_t *expected, size_t expected_length) 35{ 36 size_t length; 37 uint8_t *result; 38 39 /* Test return conventions with resultbuf == NULL. */ 40 result = u8_normalize (UNINORM_NFKD, input, input_length, NULL, &length); 41 if (!(result != NULL)) 42 return 1; 43 if (!(length == expected_length)) 44 return 2; 45 if (!(u8_cmp (result, expected, expected_length) == 0)) 46 return 3; 47 free (result); 48 49 /* Test return conventions with resultbuf too small. */ 50 if (expected_length > 0) 51 { 52 uint8_t *preallocated; 53 54 length = expected_length - 1; 55 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); 56 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length); 57 if (!(result != NULL)) 58 return 4; 59 if (!(result != preallocated)) 60 return 5; 61 if (!(length == expected_length)) 62 return 6; 63 if (!(u8_cmp (result, expected, expected_length) == 0)) 64 return 7; 65 free (result); 66 free (preallocated); 67 } 68 69 /* Test return conventions with resultbuf large enough. */ 70 { 71 uint8_t *preallocated; 72 73 length = expected_length; 74 preallocated = (uint8_t *) malloc (length * sizeof (uint8_t)); 75 result = u8_normalize (UNINORM_NFKD, input, input_length, preallocated, &length); 76 if (!(result != NULL)) 77 return 8; 78 if (!(preallocated == NULL || result == preallocated)) 79 return 9; 80 if (!(length == expected_length)) 81 return 10; 82 if (!(u8_cmp (result, expected, expected_length) == 0)) 83 return 11; 84 free (preallocated); 85 } 86 87 return 0; 88} 89 90void 91test_u8_nfkd (void) 92{ 93 { /* Empty string. */ 94 ASSERT (check (NULL, 0, NULL, 0) == 0); 95 } 96 { /* SPACE */ 97 static const uint8_t input[] = { 0x20 }; 98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 99 } 100 101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 102 static const uint8_t input[] = { 0xC3, 0x84 }; 103 static const uint8_t expected[] = { 0x41, 0xCC, 0x88 }; 104 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 105 } 106 107 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 108 static const uint8_t input[] = { 0xC7, 0x9E }; 109 static const uint8_t expected[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 }; 110 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 111 } 112 113 { /* GREEK DIALYTIKA AND PERISPOMENI */ 114 static const uint8_t input[] = { 0xE1, 0xBF, 0x81 }; 115 static const uint8_t expected[] = { 0x20, 0xCC, 0x88, 0xCD, 0x82 }; 116 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 117 } 118 119 { /* SCRIPT SMALL L */ 120 static const uint8_t input[] = { 0xE2, 0x84, 0x93 }; 121 static const uint8_t expected[] = { 0x6C }; 122 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 123 } 124 125 { /* NO-BREAK SPACE */ 126 static const uint8_t input[] = { 0xC2, 0xA0 }; 127 static const uint8_t expected[] = { 0x20 }; 128 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 129 } 130 131 { /* ARABIC LETTER VEH INITIAL FORM */ 132 static const uint8_t input[] = { 0xEF, 0xAD, 0xAC }; 133 static const uint8_t expected[] = { 0xDA, 0xA4 }; 134 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 135 } 136 137 { /* ARABIC LETTER VEH MEDIAL FORM */ 138 static const uint8_t input[] = { 0xEF, 0xAD, 0xAD }; 139 static const uint8_t expected[] = { 0xDA, 0xA4 }; 140 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 141 } 142 143 { /* ARABIC LETTER VEH FINAL FORM */ 144 static const uint8_t input[] = { 0xEF, 0xAD, 0xAB }; 145 static const uint8_t expected[] = { 0xDA, 0xA4 }; 146 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 147 } 148 149 { /* ARABIC LETTER VEH ISOLATED FORM */ 150 static const uint8_t input[] = { 0xEF, 0xAD, 0xAA }; 151 static const uint8_t expected[] = { 0xDA, 0xA4 }; 152 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 153 } 154 155 { /* CIRCLED NUMBER FIFTEEN */ 156 static const uint8_t input[] = { 0xE2, 0x91, 0xAE }; 157 static const uint8_t expected[] = { 0x31, 0x35 }; 158 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 159 } 160 161 { /* TRADE MARK SIGN */ 162 static const uint8_t input[] = { 0xE2, 0x84, 0xA2 }; 163 static const uint8_t expected[] = { 0x54, 0x4D }; 164 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 165 } 166 167 { /* LATIN SUBSCRIPT SMALL LETTER I */ 168 static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 }; 169 static const uint8_t expected[] = { 0x69 }; 170 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 171 } 172 173 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 174 static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 }; 175 static const uint8_t expected[] = { 0x28 }; 176 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 177 } 178 179 { /* FULLWIDTH LATIN CAPITAL LETTER A */ 180 static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 }; 181 static const uint8_t expected[] = { 0x41 }; 182 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 183 } 184 185 { /* HALFWIDTH IDEOGRAPHIC COMMA */ 186 static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 }; 187 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 }; 188 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 189 } 190 191 { /* SMALL IDEOGRAPHIC COMMA */ 192 static const uint8_t input[] = { 0xEF, 0xB9, 0x91 }; 193 static const uint8_t expected[] = { 0xE3, 0x80, 0x81 }; 194 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 195 } 196 197 { /* SQUARE MHZ */ 198 static const uint8_t input[] = { 0xE3, 0x8E, 0x92 }; 199 static const uint8_t expected[] = { 0x4D, 0x48, 0x7A }; 200 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 201 } 202 203 { /* VULGAR FRACTION THREE EIGHTHS */ 204 static const uint8_t input[] = { 0xE2, 0x85, 0x9C }; 205 static const uint8_t expected[] = { 0x33, 0xE2, 0x81, 0x84, 0x38 }; 206 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 207 } 208 209 { /* MICRO SIGN */ 210 static const uint8_t input[] = { 0xC2, 0xB5 }; 211 static const uint8_t expected[] = { 0xCE, 0xBC }; 212 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 213 } 214 215 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 216 static const uint8_t input[] = { 0xEF, 0xB7, 0xBA }; 217 static const uint8_t expected[] = 218 { 0xD8, 0xB5, 0xD9, 0x84, 0xD9, 0x89, 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 219 0x84, 0xD9, 0x87, 0x20, 0xD8, 0xB9, 0xD9, 0x84, 0xD9, 0x8A, 0xD9, 0x87, 220 0x20, 0xD9, 0x88, 0xD8, 0xB3, 0xD9, 0x84, 0xD9, 0x85 221 }; 222 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 223 } 224 225 { /* HANGUL SYLLABLE GEUL */ 226 static const uint8_t input[] = { 0xEA, 0xB8, 0x80 }; 227 static const uint8_t expected[] = 228 { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF }; 229 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 230 } 231 232 { /* HANGUL SYLLABLE GEU */ 233 static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 }; 234 static const uint8_t expected[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 }; 235 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 236 } 237 238 { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a) ���������,������,������" */ 239 static const uint8_t input[] = 240 { 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', 241 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 242 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9, 243 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 244 's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')', 245 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 246 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', 247 0xED, 0x95, 0x9C, 248 0xEA, 0xB8, 0x80, '\n' 249 }; 250 static const uint8_t expected[] = 251 { 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.', 252 ' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1, 253 0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86, 254 0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1, 255 's', 'q', 'r', 't', '(', 'b', 0x32, '-', '4', 'a', 'c', ')', ')', 256 '/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 257 0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',', 258 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB, 259 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n' 260 }; 261 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 262 } 263 264#if HAVE_DECL_ALARM 265 /* Declare failure if test takes too long, by using default abort 266 caused by SIGALRM. */ 267 signal (SIGALRM, SIG_DFL); 268 alarm (50); 269#endif 270 271 /* Check that the sorting is not O(n��) but O(n log n). */ 272 { 273 int pass; 274 for (pass = 0; pass < 3; pass++) 275 { 276 size_t repeat = 1; 277 size_t m = 100000; 278 uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t)); 279 if (input != NULL) 280 { 281 uint8_t *expected = input + (2 * m - 1); 282 size_t m1 = m / 2; 283 size_t m2 = (m - 1) / 2; 284 /* NB: m1 + m2 == m - 1. */ 285 uint8_t *p; 286 size_t i; 287 288 input[0] = 0x41; 289 p = input + 1; 290 switch (pass) 291 { 292 case 0: 293 for (i = 0; i < m1; i++) 294 { 295 *p++ = 0xCC; 296 *p++ = 0x99; 297 } 298 for (i = 0; i < m2; i++) 299 { 300 *p++ = 0xCC; 301 *p++ = 0x80; 302 } 303 break; 304 305 case 1: 306 for (i = 0; i < m2; i++) 307 { 308 *p++ = 0xCC; 309 *p++ = 0x80; 310 } 311 for (i = 0; i < m1; i++) 312 { 313 *p++ = 0xCC; 314 *p++ = 0x99; 315 } 316 break; 317 318 case 2: 319 for (i = 0; i < m2; i++) 320 { 321 *p++ = 0xCC; 322 *p++ = 0x99; 323 *p++ = 0xCC; 324 *p++ = 0x80; 325 } 326 for (; i < m1; i++) 327 { 328 *p++ = 0xCC; 329 *p++ = 0x99; 330 } 331 break; 332 333 default: 334 abort (); 335 } 336 337 expected[0] = 0x41; 338 p = expected + 1; 339 for (i = 0; i < m1; i++) 340 { 341 *p++ = 0xCC; 342 *p++ = 0x99; 343 } 344 for (i = 0; i < m2; i++) 345 { 346 *p++ = 0xCC; 347 *p++ = 0x80; 348 } 349 350 for (; repeat > 0; repeat--) 351 ASSERT (check (input, 2 * m - 1, expected, 2 * m - 1) == 0); 352 353 free (input); 354 } 355 } 356 } 357} 358 359#else 360 361void 362test_u8_nfkd (void) 363{ 364} 365 366#endif 367