1/* Test of compatibility decomposition of UTF-32 strings. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#if GNULIB_TEST_UNINORM_U32_NORMALIZE 22 23#include "uninorm.h" 24 25#include <signal.h> 26#include <stdlib.h> 27#include <unistd.h> 28 29#include "unistr.h" 30#include "macros.h" 31 32static int 33check (const uint32_t *input, size_t input_length, 34 const uint32_t *expected, size_t expected_length) 35{ 36 size_t length; 37 uint32_t *result; 38 39 /* Test return conventions with resultbuf == NULL. */ 40 result = u32_normalize (UNINORM_NFKD, input, input_length, NULL, &length); 41 if (!(result != NULL)) 42 return 1; 43 if (!(length == expected_length)) 44 return 2; 45 if (!(u32_cmp (result, expected, expected_length) == 0)) 46 return 3; 47 free (result); 48 49 /* Test return conventions with resultbuf too small. */ 50 if (expected_length > 0) 51 { 52 uint32_t *preallocated; 53 54 length = expected_length - 1; 55 preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); 56 result = u32_normalize (UNINORM_NFKD, input, input_length, preallocated, &length); 57 if (!(result != NULL)) 58 return 4; 59 if (!(result != preallocated)) 60 return 5; 61 if (!(length == expected_length)) 62 return 6; 63 if (!(u32_cmp (result, expected, expected_length) == 0)) 64 return 7; 65 free (result); 66 free (preallocated); 67 } 68 69 /* Test return conventions with resultbuf large enough. */ 70 { 71 uint32_t *preallocated; 72 73 length = expected_length; 74 preallocated = (uint32_t *) malloc (length * sizeof (uint32_t)); 75 result = u32_normalize (UNINORM_NFKD, input, input_length, preallocated, &length); 76 if (!(result != NULL)) 77 return 8; 78 if (!(preallocated == NULL || result == preallocated)) 79 return 9; 80 if (!(length == expected_length)) 81 return 10; 82 if (!(u32_cmp (result, expected, expected_length) == 0)) 83 return 11; 84 free (preallocated); 85 } 86 87 return 0; 88} 89 90void 91test_u32_nfkd (void) 92{ 93 { /* Empty string. */ 94 ASSERT (check (NULL, 0, NULL, 0) == 0); 95 } 96 { /* SPACE */ 97 static const uint32_t input[] = { 0x0020 }; 98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 99 } 100 101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 102 static const uint32_t input[] = { 0x00C4 }; 103 static const uint32_t expected[] = { 0x0041, 0x0308 }; 104 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 105 } 106 107 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 108 static const uint32_t input[] = { 0x01DE }; 109 static const uint32_t expected[] = { 0x0041, 0x0308, 0x0304 }; 110 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 111 } 112 113 { /* GREEK DIALYTIKA AND PERISPOMENI */ 114 static const uint32_t input[] = { 0x1FC1 }; 115 static const uint32_t expected[] = { 0x0020, 0x0308, 0x0342 }; 116 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 117 } 118 119 { /* SCRIPT SMALL L */ 120 static const uint32_t input[] = { 0x2113 }; 121 static const uint32_t expected[] = { 0x006C }; 122 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 123 } 124 125 { /* NO-BREAK SPACE */ 126 static const uint32_t input[] = { 0x00A0 }; 127 static const uint32_t expected[] = { 0x0020 }; 128 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 129 } 130 131 { /* ARABIC LETTER VEH INITIAL FORM */ 132 static const uint32_t input[] = { 0xFB6C }; 133 static const uint32_t expected[] = { 0x06A4 }; 134 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 135 } 136 137 { /* ARABIC LETTER VEH MEDIAL FORM */ 138 static const uint32_t input[] = { 0xFB6D }; 139 static const uint32_t expected[] = { 0x06A4 }; 140 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 141 } 142 143 { /* ARABIC LETTER VEH FINAL FORM */ 144 static const uint32_t input[] = { 0xFB6B }; 145 static const uint32_t expected[] = { 0x06A4 }; 146 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 147 } 148 149 { /* ARABIC LETTER VEH ISOLATED FORM */ 150 static const uint32_t input[] = { 0xFB6A }; 151 static const uint32_t expected[] = { 0x06A4 }; 152 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 153 } 154 155 { /* CIRCLED NUMBER FIFTEEN */ 156 static const uint32_t input[] = { 0x246E }; 157 static const uint32_t expected[] = { 0x0031, 0x0035 }; 158 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 159 } 160 161 { /* TRADE MARK SIGN */ 162 static const uint32_t input[] = { 0x2122 }; 163 static const uint32_t expected[] = { 0x0054, 0x004D }; 164 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 165 } 166 167 { /* LATIN SUBSCRIPT SMALL LETTER I */ 168 static const uint32_t input[] = { 0x1D62 }; 169 static const uint32_t expected[] = { 0x0069 }; 170 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 171 } 172 173 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 174 static const uint32_t input[] = { 0xFE35 }; 175 static const uint32_t expected[] = { 0x0028 }; 176 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 177 } 178 179 { /* FULLWIDTH LATIN CAPITAL LETTER A */ 180 static const uint32_t input[] = { 0xFF21 }; 181 static const uint32_t expected[] = { 0x0041 }; 182 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 183 } 184 185 { /* HALFWIDTH IDEOGRAPHIC COMMA */ 186 static const uint32_t input[] = { 0xFF64 }; 187 static const uint32_t expected[] = { 0x3001 }; 188 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 189 } 190 191 { /* SMALL IDEOGRAPHIC COMMA */ 192 static const uint32_t input[] = { 0xFE51 }; 193 static const uint32_t expected[] = { 0x3001 }; 194 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 195 } 196 197 { /* SQUARE MHZ */ 198 static const uint32_t input[] = { 0x3392 }; 199 static const uint32_t expected[] = { 0x004D, 0x0048, 0x007A }; 200 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 201 } 202 203 { /* VULGAR FRACTION THREE EIGHTHS */ 204 static const uint32_t input[] = { 0x215C }; 205 static const uint32_t expected[] = { 0x0033, 0x2044, 0x0038 }; 206 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 207 } 208 209 { /* MICRO SIGN */ 210 static const uint32_t input[] = { 0x00B5 }; 211 static const uint32_t expected[] = { 0x03BC }; 212 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 213 } 214 215 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 216 static const uint32_t input[] = { 0xFDFA }; 217 static const uint32_t expected[] = 218 { 0x0635, 0x0644, 0x0649, 0x0020, 0x0627, 0x0644, 0x0644, 0x0647, 0x0020, 219 0x0639, 0x0644, 0x064A, 0x0647, 0x0020, 0x0648, 0x0633, 0x0644, 0x0645 220 }; 221 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 222 } 223 224 { /* HANGUL SYLLABLE GEUL */ 225 static const uint32_t input[] = { 0xAE00 }; 226 static const uint32_t expected[] = { 0x1100, 0x1173, 0x11AF }; 227 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 228 } 229 230 { /* HANGUL SYLLABLE GEU */ 231 static const uint32_t input[] = { 0xADF8 }; 232 static const uint32_t expected[] = { 0x1100, 0x1173 }; 233 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 234 } 235 236 { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a) ���������,������,������" */ 237 static const uint32_t input[] = 238 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 239 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 240 0x0439, 0x0442, 0x0435, '!', ' ', 241 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, 242 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 243 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' 244 }; 245 static const uint32_t expected[] = 246 { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 247 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 248 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ', 249 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x0032, 250 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 251 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 252 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n' 253 }; 254 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 255 } 256 257#if HAVE_DECL_ALARM 258 /* Declare failure if test takes too long, by using default abort 259 caused by SIGALRM. */ 260 signal (SIGALRM, SIG_DFL); 261 alarm (50); 262#endif 263 264 /* Check that the sorting is not O(n��) but O(n log n). */ 265 { 266 int pass; 267 for (pass = 0; pass < 3; pass++) 268 { 269 size_t repeat = 1; 270 size_t m = 100000; 271 uint32_t *input = (uint32_t *) malloc (2 * m * sizeof (uint32_t)); 272 if (input != NULL) 273 { 274 uint32_t *expected = input + m; 275 size_t m1 = m / 2; 276 size_t m2 = (m - 1) / 2; 277 /* NB: m1 + m2 == m - 1. */ 278 uint32_t *p; 279 size_t i; 280 281 input[0] = 0x0041; 282 p = input + 1; 283 switch (pass) 284 { 285 case 0: 286 for (i = 0; i < m1; i++) 287 *p++ = 0x0319; 288 for (i = 0; i < m2; i++) 289 *p++ = 0x0300; 290 break; 291 292 case 1: 293 for (i = 0; i < m2; i++) 294 *p++ = 0x0300; 295 for (i = 0; i < m1; i++) 296 *p++ = 0x0319; 297 break; 298 299 case 2: 300 for (i = 0; i < m2; i++) 301 { 302 *p++ = 0x0319; 303 *p++ = 0x0300; 304 } 305 for (; i < m1; i++) 306 *p++ = 0x0319; 307 break; 308 309 default: 310 abort (); 311 } 312 313 expected[0] = 0x0041; 314 p = expected + 1; 315 for (i = 0; i < m1; i++) 316 *p++ = 0x0319; 317 for (i = 0; i < m2; i++) 318 *p++ = 0x0300; 319 320 for (; repeat > 0; repeat--) 321 ASSERT (check (input, m, expected, m) == 0); 322 323 free (input); 324 } 325 } 326 } 327} 328 329#else 330 331void 332test_u32_nfkd (void) 333{ 334} 335 336#endif 337