1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "apr.h" 18#include "arch/win32/apr_arch_utf8.h" 19#include <wchar.h> 20#include <string.h> 21#include <assert.h> 22 23struct testval { 24 unsigned char n[8]; 25 int nl; 26 wchar_t w[4]; 27 int wl; 28}; 29 30/* For reference; a table of invalid utf-8 encoded ucs-2/ucs-4 sequences. 31 * The table consists of start, end pairs for all invalid ranges. 32 * NO_UCS2_PAIRS will pass the reservered D800-DFFF values, halting at FFFF 33 * FULL_UCS4_MAPPER represents all 31 bit values to 7FFF FFFF 34 * 35 * We already tested these, because we ensure there is a 1:1 mapping across 36 * the entire range of byte values in each position of 1 to 6 byte sequences. 37 */ 38struct testval malformed[] = [ 39 [[0x80,], 1,], /* 10000000 64 invalid leading continuation values */ 40 [[0xBF,], 1,], /* 10111111 64 invalid leading continuation values */ 41 [[0xC0,0x80], 2,], /* overshort mapping of 0000 */ 42 [[0xC1,0xBF], 2,], /* overshort mapping of 007F */ 43 [[0xE0,0x80,0x80,], 3,], /* overshort mapping of 0000 */ 44 [[0xE0,0x9F,0xBF,], 3,], /* overshort mapping of 07FF */ 45#ifndef NO_UCS2_PAIRS 46 [[0xED,0xA0,0x80,], 3,], /* unexpected mapping of UCS-2 literal D800 */ 47 [[0xED,0xBF,0xBF,], 3,], /* unexpected mapping of UCS-2 literal DFFF */ 48#endif 49 [[0xF0,0x80,0x80,0x80,], 4,], /* overshort mapping of 0000 */ 50 [[0xF0,0x8F,0xBF,0xBF,], 4,], /* overshort mapping of FFFF */ 51#ifdef NO_UCS2_PAIRS 52 [[0xF0,0x90,0x80,0x80,], 4,], /* invalid too large value 0001 0000 */ 53 [[0xF4,0x8F,0xBF,0xBF,], 4,], /* invalid too large value 0010 FFFF */ 54#endif 55#ifndef FULL_UCS4_MAPPER 56 [[0xF4,0x90,0x80,0x80,], 4,], /* invalid too large value 0011 0000 */ 57 [[0xF7,0xBF,0xBF,0xBF,], 4,], /* invalid too large value 001F FFFF */ 58#endif 59 [[0xF8,0x80,0x80,0x80,0x80,], 5,], /* overshort mapping of 0000 0000 */ 60 [[0xF8,0x87,0xBF,0xBF,0xBF,], 5,], /* overshort mapping of 001F FFFF */ 61#ifndef FULL_UCS4_MAPPER 62 [[0xF8,0x88,0x80,0x80,0x80,], 5,], /* invalid too large value 0020 0000 */ 63 [[0xFB,0xBF,0xBF,0xBF,0xBF,], 5,], /* invalid too large value 03FF FFFF */ 64#endif 65 [[0xFC,0x80,0x80,0x80,0x80,0x80,], 6,], /* overshort mapping 0000 0000 */ 66 [[0xFC,0x83,0xBF,0xBF,0xBF,0xBF,], 6,], /* overshort mapping 03FF FFFF */ 67#ifndef FULL_UCS4_MAPPER 68 [[0xFC,0x84,0x80,0x80,0x80,0x80,], 6,], /* overshort mapping 0400 0000 */ 69 [[0xFD,0xBF,0xBF,0xBF,0xBF,0xBF,], 6,], /* overshort mapping 7FFF FFFF */ 70#endif 71 [[0xFE,], 1,], /* 11111110 invalid "too large" value, no 7 byte seq */ 72 [[0xFF,], 1,], /* 11111111 invalid "too large" value, no 8 byte seq */ 73]; 74 75void displaynw(struct testval *f, struct testval *l) 76{ 77 char x[80], *t = x; 78 int i; 79 for (i = 0; i < f->nl; ++i) 80 t += sprintf(t, "%02X ", f->n[i]); 81 *(t++) = '-'; 82 for (i = 0; i < l->nl; ++i) 83 t += sprintf(t, " %02X", l->n[i]); 84 *(t++) = ' '; 85 *(t++) = '='; 86 *(t++) = ' '; 87 for (i = 0; i < f->wl; ++i) 88 t += sprintf(t, "%04X ", f->w[i]); 89 *(t++) = '-'; 90 for (i = 0; i < l->wl; ++i) 91 t += sprintf(t, " %04X", l->w[i]); 92 *t = '\0'; 93 puts(x); 94} 95 96/* 97 * Test every possible byte value. 98 * If the test passes or fails at this byte value we are done. 99 * Otherwise iterate test_nrange again, appending another byte. 100 */ 101void test_nrange(struct testval *p) 102{ 103 struct testval f, l, s; 104 apr_status_t rc; 105 int success = 0; 106 107 memcpy (&s, p, sizeof(s)); 108 ++s.nl; 109 110 do { 111 apr_size_t nl = s.nl, wl = sizeof(s.w) / 2; 112 rc = apr_conv_utf8_to_ucs2(s.n, &nl, s.w, &wl); 113 s.wl = (sizeof(s.w) / 2) - wl; 114 if (!nl && rc == APR_SUCCESS) { 115 if (!success) { 116 memcpy(&f, &s, sizeof(s)); 117 success = -1; 118 } 119 else { 120 if (s.wl != l.wl 121 || memcmp(s.w, l.w, (s.wl - 1) * 2) != 0 122 || s.w[s.wl - 1] != l.w[l.wl - 1] + 1) { 123 displaynw(&f, &l); 124 memcpy(&f, &s, sizeof(s)); 125 } 126 } 127 memcpy(&l, &s, sizeof(s)); 128 } 129 else { 130 if (success) { 131 displaynw(&f, &l); 132 success = 0; 133 } 134 if (rc == APR_INCOMPLETE) { 135 test_nrange(&s); 136 } 137 } 138 } while (++s.n[s.nl - 1]); 139 140 if (success) { 141 displaynw(&f, &l); 142 success = 0; 143 } 144} 145 146/* 147 * Test every possible word value. 148 * Once we are finished, retest every possible word value. 149 * if the test fails on the following null word, iterate test_nrange 150 * again, appending another word. 151 * This assures the output order of the two tests are in sync. 152 */ 153void test_wrange(struct testval *p) 154{ 155 struct testval f, l, s; 156 apr_status_t rc; 157 int success = 0; 158 159 memcpy (&s, p, sizeof(s)); 160 ++s.wl; 161 162 do { 163 apr_size_t nl = sizeof(s.n), wl = s.wl; 164 rc = apr_conv_ucs2_to_utf8(s.w, &wl, s.n, &nl); 165 s.nl = sizeof(s.n) - nl; 166 if (!wl && rc == APR_SUCCESS) { 167 if (!success) { 168 memcpy(&f, &s, sizeof(s)); 169 success = -1; 170 } 171 else { 172 if (s.nl != l.nl 173 || memcmp(s.n, l.n, s.nl - 1) != 0 174 || s.n[s.nl - 1] != l.n[l.nl - 1] + 1) { 175 displaynw(&f, &l); 176 memcpy(&f, &s, sizeof(s)); 177 } 178 } 179 memcpy(&l, &s, sizeof(s)); 180 } 181 else { 182 if (success) { 183 displaynw(&f, &l); 184 success = 0; 185 } 186 } 187 } while (++s.w[s.wl - 1]); 188 189 if (success) { 190 displaynw(&f, &l); 191 success = 0; 192 } 193 194 do { 195 int wl = s.wl, nl = sizeof(s.n); 196 rc = apr_conv_ucs2_to_utf8(s.w, &wl, s.n, &nl); 197 s.nl = sizeof(s.n) - s.nl; 198 if (rc == APR_INCOMPLETE) { 199 test_wrange(&s); 200 } 201 } while (++s.w[s.wl - 1]); 202} 203 204/* 205 * Test every possible byte value. 206 * If the test passes or fails at this byte value we are done. 207 * Otherwise iterate test_nrange again, appending another byte. 208 */ 209void test_ranges() 210{ 211 struct testval ntest, wtest; 212 apr_status_t nrc, wrc; 213 apr_size_t inlen; 214 unsigned long matches = 0; 215 216 memset(&ntest, 0, sizeof(ntest)); 217 ++ntest.nl; 218 219 memset(&wtest, 0, sizeof(wtest)); 220 ++wtest.wl; 221 222 do { 223 do { 224 inlen = ntest.nl; 225 ntest.wl = sizeof(ntest.w) / 2; 226 nrc = apr_conv_utf8_to_ucs2(ntest.n, &inlen, ntest.w, &ntest.wl); 227 if (nrc == APR_SUCCESS) { 228 ntest.wl = (sizeof(ntest.w) / 2) - ntest.wl; 229 break; 230 } 231 if (nrc == APR_INCOMPLETE) { 232 ++ntest.nl; 233 if (ntest.nl > 6) { 234 printf ("\n\nUnexpected utf8 sequence of >6 bytes;\n"); 235 exit(255); 236 } 237 continue; 238 } 239 else { 240 while (!(++ntest.n[ntest.nl - 1])) { 241 if (!(--ntest.nl)) 242 break; 243 } 244 } 245 } while (ntest.nl); 246 247 do { 248 inlen = wtest.wl; 249 wtest.nl = sizeof(wtest.n); 250 wrc = apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl); 251 if (wrc == APR_SUCCESS) { 252 wtest.nl = sizeof(wtest.n) - wtest.nl; 253 break; 254 } 255 else { 256 if (!(++wtest.w[wtest.wl - 1])) { 257 if (wtest.wl == 1) 258 ++wtest.wl; 259 else 260 ++wtest.w[0]; 261 262 /* On the second pass, ensure lead word is incomplete */ 263 do { 264 inlen = 1; 265 wtest.nl = sizeof(wtest.n); 266 if (apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl) 267 == APR_INCOMPLETE) 268 break; 269 if (!(++wtest.w[0])) { 270 wtest.wl = 0; 271 break; 272 } 273 } while (1); 274 } 275 } 276 } while (wtest.wl); 277 278 if (!ntest.nl && !wtest.wl) 279 break; 280 281 /* Identical? */ 282 if ((wtest.nl != ntest.nl) 283 || (memcmp(wtest.n, ntest.n, ntest.nl) != 0) 284 || (wtest.wl != ntest.wl) 285 || (memcmp(ntest.w, wtest.w, wtest.wl * 2) != 0)) { 286 printf ("\n\nMismatch of w/n conversion at;\n"); 287 displaynw(&ntest, &wtest); 288 exit(255); 289 } 290 ++matches; 291 292 while (!(++ntest.n[ntest.nl - 1])) { 293 if (!(--ntest.nl)) 294 break; 295 } 296 297 if (!(++wtest.w[wtest.wl - 1])) { 298 if (wtest.wl == 1) 299 ++wtest.wl; 300 else 301 ++wtest.w[0]; 302 303 /* On the second pass, ensure lead word is incomplete */ 304 do { 305 inlen = 1; 306 wtest.nl = sizeof(wtest.n); 307 if (apr_conv_ucs2_to_utf8(wtest.w, &inlen, wtest.n, &wtest.nl) 308 == APR_INCOMPLETE) 309 break; 310 if (!(++wtest.w[0])) { 311 wtest.wl = 0; 312 break; 313 } 314 } while (1); 315 } 316 } while (wtest.wl || ntest.nl); 317 318 printf ("\n\nutf8 and ucs2 sequences of %lu transformations matched OK.\n", 319 matches); 320} 321 322/* 323 * Syntax: testucs [w|n] 324 * 325 * If no arg or arg is not recognized, run equality sequence test. 326 */ 327int main(int argc, char **argv) 328{ 329 struct testval s; 330 memset (&s, 0, sizeof(s)); 331 332 if (argc >= 2 && apr_tolower(*argv[1]) != 'w') { 333 printf ("\n\nTesting Narrow Char Ranges\n"); 334 test_nrange(&s); 335 } 336 else if (argc >= 2 && apr_tolower(*argv[1]) != 'n') { 337 printf ("\n\nTesting Wide Char Ranges\n"); 338 test_wrange(&s); 339 } 340 else { 341 test_ranges(); 342 } 343 return 0; 344} 345