1/* Test of canonical normalization of UTF-16 strings. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#if GNULIB_TEST_UNINORM_U16_NORMALIZE 22 23#include "uninorm.h" 24 25#include <signal.h> 26#include <stdlib.h> 27#include <unistd.h> 28 29#include "unistr.h" 30#include "macros.h" 31 32static int 33check (const uint16_t *input, size_t input_length, 34 const uint16_t *expected, size_t expected_length) 35{ 36 size_t length; 37 uint16_t *result; 38 39 /* Test return conventions with resultbuf == NULL. */ 40 result = u16_normalize (UNINORM_NFC, input, input_length, NULL, &length); 41 if (!(result != NULL)) 42 return 1; 43 if (!(length == expected_length)) 44 return 2; 45 if (!(u16_cmp (result, expected, expected_length) == 0)) 46 return 3; 47 free (result); 48 49 /* Test return conventions with resultbuf too small. */ 50 if (expected_length > 0) 51 { 52 uint16_t *preallocated; 53 54 length = expected_length - 1; 55 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); 56 result = u16_normalize (UNINORM_NFC, input, input_length, preallocated, &length); 57 if (!(result != NULL)) 58 return 4; 59 if (!(result != preallocated)) 60 return 5; 61 if (!(length == expected_length)) 62 return 6; 63 if (!(u16_cmp (result, expected, expected_length) == 0)) 64 return 7; 65 free (result); 66 free (preallocated); 67 } 68 69 /* Test return conventions with resultbuf large enough. */ 70 { 71 uint16_t *preallocated; 72 73 length = expected_length; 74 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); 75 result = u16_normalize (UNINORM_NFC, input, input_length, preallocated, &length); 76 if (!(result != NULL)) 77 return 8; 78 if (!(preallocated == NULL || result == preallocated)) 79 return 9; 80 if (!(length == expected_length)) 81 return 10; 82 if (!(u16_cmp (result, expected, expected_length) == 0)) 83 return 11; 84 free (preallocated); 85 } 86 87 return 0; 88} 89 90void 91test_u16_nfc (void) 92{ 93 { /* Empty string. */ 94 ASSERT (check (NULL, 0, NULL, 0) == 0); 95 } 96 { /* SPACE */ 97 static const uint16_t input[] = { 0x0020 }; 98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 99 } 100 101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 102 static const uint16_t input[] = { 0x00C4 }; 103 static const uint16_t decomposed[] = { 0x0041, 0x0308 }; 104 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 105 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 106 } 107 108 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 109 static const uint16_t input[] = { 0x01DE }; 110 static const uint16_t decomposed[] = { 0x0041, 0x0308, 0x0304 }; 111 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 112 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 113 } 114 115 { /* ANGSTROM SIGN */ 116 static const uint16_t input[] = { 0x212B }; 117 static const uint16_t decomposed[] = { 0x0041, 0x030A }; 118 static const uint16_t expected[] = { 0x00C5 }; 119 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 120 ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0); 121 ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0); 122 } 123 124 { /* GREEK DIALYTIKA AND PERISPOMENI */ 125 static const uint16_t input[] = { 0x1FC1 }; 126 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 127 } 128 129 { /* SCRIPT SMALL L */ 130 static const uint16_t input[] = { 0x2113 }; 131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 132 } 133 134 { /* NO-BREAK SPACE */ 135 static const uint16_t input[] = { 0x00A0 }; 136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 137 } 138 139 { /* ARABIC LETTER VEH INITIAL FORM */ 140 static const uint16_t input[] = { 0xFB6C }; 141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 142 } 143 144 { /* ARABIC LETTER VEH MEDIAL FORM */ 145 static const uint16_t input[] = { 0xFB6D }; 146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 147 } 148 149 { /* ARABIC LETTER VEH FINAL FORM */ 150 static const uint16_t input[] = { 0xFB6B }; 151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 152 } 153 154 { /* ARABIC LETTER VEH ISOLATED FORM */ 155 static const uint16_t input[] = { 0xFB6A }; 156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 157 } 158 159 { /* CIRCLED NUMBER FIFTEEN */ 160 static const uint16_t input[] = { 0x246E }; 161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 162 } 163 164 { /* TRADE MARK SIGN */ 165 static const uint16_t input[] = { 0x2122 }; 166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 167 } 168 169 { /* LATIN SUBSCRIPT SMALL LETTER I */ 170 static const uint16_t input[] = { 0x1D62 }; 171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 172 } 173 174 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 175 static const uint16_t input[] = { 0xFE35 }; 176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 177 } 178 179 { /* FULLWIDTH LATIN CAPITAL LETTER A */ 180 static const uint16_t input[] = { 0xFF21 }; 181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 182 } 183 184 { /* HALFWIDTH IDEOGRAPHIC COMMA */ 185 static const uint16_t input[] = { 0xFF64 }; 186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 187 } 188 189 { /* SMALL IDEOGRAPHIC COMMA */ 190 static const uint16_t input[] = { 0xFE51 }; 191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 192 } 193 194 { /* SQUARE MHZ */ 195 static const uint16_t input[] = { 0x3392 }; 196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 197 } 198 199 { /* VULGAR FRACTION THREE EIGHTHS */ 200 static const uint16_t input[] = { 0x215C }; 201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 202 } 203 204 { /* MICRO SIGN */ 205 static const uint16_t input[] = { 0x00B5 }; 206 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 207 } 208 209 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 210 static const uint16_t input[] = { 0xFDFA }; 211 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 212 } 213 214 { /* HANGUL SYLLABLE GEUL */ 215 static const uint16_t input[] = { 0xAE00 }; 216 static const uint16_t decomposed[] = { 0x1100, 0x1173, 0x11AF }; 217 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 218 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 219 } 220 221 { /* HANGUL SYLLABLE GEU */ 222 static const uint16_t input[] = { 0xADF8 }; 223 static const uint16_t decomposed[] = { 0x1100, 0x1173 }; 224 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 225 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 226 } 227 228 { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a) ���������,������,������" */ 229 static const uint16_t input[] = 230 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 231 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 232 0x0439, 0x0442, 0x0435, '!', ' ', 233 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, 234 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 235 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' 236 }; 237 static const uint16_t decomposed[] = 238 { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 239 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 240 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ', 241 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, 242 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 243 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 244 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n' 245 }; 246 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 247 ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0); 248 } 249 250#if HAVE_DECL_ALARM 251 /* Declare failure if test takes too long, by using default abort 252 caused by SIGALRM. */ 253 signal (SIGALRM, SIG_DFL); 254 alarm (50); 255#endif 256 257 /* Check that the sorting is not O(n��) but O(n log n). */ 258 { 259 int pass; 260 for (pass = 0; pass < 3; pass++) 261 { 262 size_t repeat = 1; 263 size_t m = 100000; 264 uint16_t *input = (uint16_t *) malloc (2 * m * sizeof (uint16_t)); 265 if (input != NULL) 266 { 267 uint16_t *expected = input + m; 268 size_t m1 = m / 2; 269 size_t m2 = (m - 1) / 2; 270 /* NB: m1 + m2 == m - 1. */ 271 uint16_t *p; 272 size_t i; 273 274 input[0] = 0x0041; 275 p = input + 1; 276 switch (pass) 277 { 278 case 0: 279 for (i = 0; i < m1; i++) 280 *p++ = 0x0319; 281 for (i = 0; i < m2; i++) 282 *p++ = 0x0300; 283 break; 284 285 case 1: 286 for (i = 0; i < m2; i++) 287 *p++ = 0x0300; 288 for (i = 0; i < m1; i++) 289 *p++ = 0x0319; 290 break; 291 292 case 2: 293 for (i = 0; i < m2; i++) 294 { 295 *p++ = 0x0319; 296 *p++ = 0x0300; 297 } 298 for (; i < m1; i++) 299 *p++ = 0x0319; 300 break; 301 302 default: 303 abort (); 304 } 305 306 expected[0] = 0x00C0; 307 p = expected + 1; 308 for (i = 0; i < m1; i++) 309 *p++ = 0x0319; 310 for (i = 0; i < m2 - 1; i++) 311 *p++ = 0x0300; 312 313 for (; repeat > 0; repeat--) 314 { 315 ASSERT (check (input, m, expected, m - 1) == 0); 316 ASSERT (check (expected, m - 1, expected, m - 1) == 0); 317 } 318 319 free (input); 320 } 321 } 322 } 323} 324 325#else 326 327void 328test_u16_nfc (void) 329{ 330} 331 332#endif 333