1/* Test of canonical decomposition of UTF-16 strings. 2 Copyright (C) 2009, 2010 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ 18 19#include <config.h> 20 21#if GNULIB_TEST_UNINORM_U16_NORMALIZE 22 23#include "uninorm.h" 24 25#include <signal.h> 26#include <stdlib.h> 27#include <unistd.h> 28 29#include "unistr.h" 30#include "macros.h" 31 32static int 33check (const uint16_t *input, size_t input_length, 34 const uint16_t *expected, size_t expected_length) 35{ 36 size_t length; 37 uint16_t *result; 38 39 /* Test return conventions with resultbuf == NULL. */ 40 result = u16_normalize (UNINORM_NFD, input, input_length, NULL, &length); 41 if (!(result != NULL)) 42 return 1; 43 if (!(length == expected_length)) 44 return 2; 45 if (!(u16_cmp (result, expected, expected_length) == 0)) 46 return 3; 47 free (result); 48 49 /* Test return conventions with resultbuf too small. */ 50 if (expected_length > 0) 51 { 52 uint16_t *preallocated; 53 54 length = expected_length - 1; 55 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); 56 result = u16_normalize (UNINORM_NFD, input, input_length, preallocated, &length); 57 if (!(result != NULL)) 58 return 4; 59 if (!(result != preallocated)) 60 return 5; 61 if (!(length == expected_length)) 62 return 6; 63 if (!(u16_cmp (result, expected, expected_length) == 0)) 64 return 7; 65 free (result); 66 free (preallocated); 67 } 68 69 /* Test return conventions with resultbuf large enough. */ 70 { 71 uint16_t *preallocated; 72 73 length = expected_length; 74 preallocated = (uint16_t *) malloc (length * sizeof (uint16_t)); 75 result = u16_normalize (UNINORM_NFD, input, input_length, preallocated, &length); 76 if (!(result != NULL)) 77 return 8; 78 if (!(preallocated == NULL || result == preallocated)) 79 return 9; 80 if (!(length == expected_length)) 81 return 10; 82 if (!(u16_cmp (result, expected, expected_length) == 0)) 83 return 11; 84 free (preallocated); 85 } 86 87 return 0; 88} 89 90void 91test_u16_nfd (void) 92{ 93 { /* Empty string. */ 94 ASSERT (check (NULL, 0, NULL, 0) == 0); 95 } 96 { /* SPACE */ 97 static const uint16_t input[] = { 0x0020 }; 98 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 99 } 100 101 { /* LATIN CAPITAL LETTER A WITH DIAERESIS */ 102 static const uint16_t input[] = { 0x00C4 }; 103 static const uint16_t expected[] = { 0x0041, 0x0308 }; 104 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 105 } 106 107 { /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */ 108 static const uint16_t input[] = { 0x01DE }; 109 static const uint16_t expected[] = { 0x0041, 0x0308, 0x0304 }; 110 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 111 } 112 113 { /* GREEK DIALYTIKA AND PERISPOMENI */ 114 static const uint16_t input[] = { 0x1FC1 }; 115 static const uint16_t expected[] = { 0x00A8, 0x0342 }; 116 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 117 } 118 119 { /* SCRIPT SMALL L */ 120 static const uint16_t input[] = { 0x2113 }; 121 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 122 } 123 124 { /* NO-BREAK SPACE */ 125 static const uint16_t input[] = { 0x00A0 }; 126 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 127 } 128 129 { /* ARABIC LETTER VEH INITIAL FORM */ 130 static const uint16_t input[] = { 0xFB6C }; 131 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 132 } 133 134 { /* ARABIC LETTER VEH MEDIAL FORM */ 135 static const uint16_t input[] = { 0xFB6D }; 136 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 137 } 138 139 { /* ARABIC LETTER VEH FINAL FORM */ 140 static const uint16_t input[] = { 0xFB6B }; 141 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 142 } 143 144 { /* ARABIC LETTER VEH ISOLATED FORM */ 145 static const uint16_t input[] = { 0xFB6A }; 146 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 147 } 148 149 { /* CIRCLED NUMBER FIFTEEN */ 150 static const uint16_t input[] = { 0x246E }; 151 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 152 } 153 154 { /* TRADE MARK SIGN */ 155 static const uint16_t input[] = { 0x2122 }; 156 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 157 } 158 159 { /* LATIN SUBSCRIPT SMALL LETTER I */ 160 static const uint16_t input[] = { 0x1D62 }; 161 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 162 } 163 164 { /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */ 165 static const uint16_t input[] = { 0xFE35 }; 166 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 167 } 168 169 { /* FULLWIDTH LATIN CAPITAL LETTER A */ 170 static const uint16_t input[] = { 0xFF21 }; 171 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 172 } 173 174 { /* HALFWIDTH IDEOGRAPHIC COMMA */ 175 static const uint16_t input[] = { 0xFF64 }; 176 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 177 } 178 179 { /* SMALL IDEOGRAPHIC COMMA */ 180 static const uint16_t input[] = { 0xFE51 }; 181 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 182 } 183 184 { /* SQUARE MHZ */ 185 static const uint16_t input[] = { 0x3392 }; 186 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 187 } 188 189 { /* VULGAR FRACTION THREE EIGHTHS */ 190 static const uint16_t input[] = { 0x215C }; 191 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 192 } 193 194 { /* MICRO SIGN */ 195 static const uint16_t input[] = { 0x00B5 }; 196 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 197 } 198 199 { /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */ 200 static const uint16_t input[] = { 0xFDFA }; 201 ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0); 202 } 203 204 { /* HANGUL SYLLABLE GEUL */ 205 static const uint16_t input[] = { 0xAE00 }; 206 static const uint16_t expected[] = { 0x1100, 0x1173, 0x11AF }; 207 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 208 } 209 210 { /* HANGUL SYLLABLE GEU */ 211 static const uint16_t input[] = { 0xADF8 }; 212 static const uint16_t expected[] = { 0x1100, 0x1173 }; 213 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 214 } 215 216 { /* "Gr���� Gott. ������������������������! x=(-b��sqrt(b��-4ac))/(2a) ���������,������,������" */ 217 static const uint16_t input[] = 218 { 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 219 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 220 0x0439, 0x0442, 0x0435, '!', ' ', 221 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, 222 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 223 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n' 224 }; 225 static const uint16_t expected[] = 226 { 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ', 227 0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443, 228 0x0438, 0x0306, 0x0442, 0x0435, '!', ' ', 229 'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2, 230 '-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ', 231 0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 232 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n' 233 }; 234 ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0); 235 } 236 237#if HAVE_DECL_ALARM 238 /* Declare failure if test takes too long, by using default abort 239 caused by SIGALRM. */ 240 signal (SIGALRM, SIG_DFL); 241 alarm (50); 242#endif 243 244 /* Check that the sorting is not O(n��) but O(n log n). */ 245 { 246 int pass; 247 for (pass = 0; pass < 3; pass++) 248 { 249 size_t repeat = 1; 250 size_t m = 100000; 251 uint16_t *input = (uint16_t *) malloc (2 * m * sizeof (uint16_t)); 252 if (input != NULL) 253 { 254 uint16_t *expected = input + m; 255 size_t m1 = m / 2; 256 size_t m2 = (m - 1) / 2; 257 /* NB: m1 + m2 == m - 1. */ 258 uint16_t *p; 259 size_t i; 260 261 input[0] = 0x0041; 262 p = input + 1; 263 switch (pass) 264 { 265 case 0: 266 for (i = 0; i < m1; i++) 267 *p++ = 0x0319; 268 for (i = 0; i < m2; i++) 269 *p++ = 0x0300; 270 break; 271 272 case 1: 273 for (i = 0; i < m2; i++) 274 *p++ = 0x0300; 275 for (i = 0; i < m1; i++) 276 *p++ = 0x0319; 277 break; 278 279 case 2: 280 for (i = 0; i < m2; i++) 281 { 282 *p++ = 0x0319; 283 *p++ = 0x0300; 284 } 285 for (; i < m1; i++) 286 *p++ = 0x0319; 287 break; 288 289 default: 290 abort (); 291 } 292 293 expected[0] = 0x0041; 294 p = expected + 1; 295 for (i = 0; i < m1; i++) 296 *p++ = 0x0319; 297 for (i = 0; i < m2; i++) 298 *p++ = 0x0300; 299 300 for (; repeat > 0; repeat--) 301 ASSERT (check (input, m, expected, m) == 0); 302 303 free (input); 304 } 305 } 306 } 307} 308 309#else 310 311void 312test_u16_nfd (void) 313{ 314} 315 316#endif 317