1#ifndef lint 2static char *rcsid = "$Id: normalizer.c,v 1.1 2003/06/04 00:26:05 marka Exp $"; 3#endif 4 5/* 6 * Copyright (c) 2000,2002 Japan Network Information Center. 7 * All rights reserved. 8 * 9 * By using this file, you agree to the terms and conditions set forth bellow. 10 * 11 * LICENSE TERMS AND CONDITIONS 12 * 13 * The following License Terms and Conditions apply, unless a different 14 * license is obtained from Japan Network Information Center ("JPNIC"), 15 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 16 * Chiyoda-ku, Tokyo 101-0047, Japan. 17 * 18 * 1. Use, Modification and Redistribution (including distribution of any 19 * modified or derived work) in source and/or binary forms is permitted 20 * under this License Terms and Conditions. 21 * 22 * 2. Redistribution of source code must retain the copyright notices as they 23 * appear in each source code file, this License Terms and Conditions. 24 * 25 * 3. Redistribution in binary form must reproduce the Copyright Notice, 26 * this License Terms and Conditions, in the documentation and/or other 27 * materials provided with the distribution. For the purposes of binary 28 * distribution the "Copyright Notice" refers to the following language: 29 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 30 * 31 * 4. The name of JPNIC may not be used to endorse or promote products 32 * derived from this Software without specific prior written approval of 33 * JPNIC. 34 * 35 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 38 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 42 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 43 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 44 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 45 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 46 */ 47 48#include <config.h> 49 50#include <stddef.h> 51#include <stdlib.h> 52#include <string.h> 53#include <ctype.h> 54 55#include <idn/assert.h> 56#include <idn/logmacro.h> 57#include <idn/result.h> 58#include <idn/normalizer.h> 59#include <idn/strhash.h> 60#include <idn/unormalize.h> 61#include <idn/unicode.h> 62#include <idn/ucs4.h> 63#include <idn/debug.h> 64#include <idn/util.h> 65 66#define MAX_LOCAL_SCHEME 3 67 68#define INITIALIZED (scheme_hash != NULL) 69 70typedef struct { 71 char *name; 72 idn_normalizer_proc_t proc; 73} normalize_scheme_t; 74 75struct idn_normalizer { 76 int nschemes; 77 int scheme_size; 78 normalize_scheme_t **schemes; 79 normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME]; 80 int reference_count; 81}; 82 83static idn__strhash_t scheme_hash; 84 85static idn__unicode_version_t vcur = NULL; 86static idn__unicode_version_t v320 = NULL; 87#define INIT_VERSION(version, var) \ 88 if (var == NULL) { \ 89 idn_result_t r = idn__unicode_create(version, &var); \ 90 if (r != idn_success) \ 91 return (r); \ 92 } 93 94static idn_result_t expand_schemes(idn_normalizer_t ctx); 95static idn_result_t register_standard_normalizers(void); 96static idn_result_t normalizer_formkc(const unsigned long *from, 97 unsigned long *to, size_t tolen); 98static idn_result_t normalizer_formkc_v320(const unsigned long *from, 99 unsigned long *to, 100 size_t tolen); 101 102static struct standard_normalizer { 103 char *name; 104 idn_normalizer_proc_t proc; 105} standard_normalizer[] = { 106 { "unicode-form-kc", normalizer_formkc }, 107 { "unicode-form-kc/3.2.0", normalizer_formkc_v320 }, 108 { "RFC3491", normalizer_formkc_v320 }, 109 { NULL, NULL }, 110}; 111 112idn_result_t 113idn_normalizer_initialize(void) { 114 idn__strhash_t hash; 115 idn_result_t r; 116 117 TRACE(("idn_normalizer_initialize()\n")); 118 119 if (scheme_hash != NULL) { 120 r = idn_success; /* already initialized */ 121 goto ret; 122 } 123 124 if ((r = idn__strhash_create(&hash)) != idn_success) 125 goto ret; 126 scheme_hash = hash; 127 128 /* Register standard normalizers */ 129 r = register_standard_normalizers(); 130ret: 131 TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r))); 132 return (r); 133} 134 135idn_result_t 136idn_normalizer_create(idn_normalizer_t *ctxp) { 137 idn_normalizer_t ctx; 138 idn_result_t r; 139 140 assert(ctxp != NULL); 141 TRACE(("idn_normalizer_create()\n")); 142 143 if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) { 144 r = idn_nomemory; 145 goto ret; 146 } 147 148 ctx->nschemes = 0; 149 ctx->scheme_size = MAX_LOCAL_SCHEME; 150 ctx->schemes = ctx->local_buf; 151 ctx->reference_count = 1; 152 *ctxp = ctx; 153 154 r = idn_success; 155ret: 156 TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r))); 157 return (r); 158} 159 160void 161idn_normalizer_destroy(idn_normalizer_t ctx) { 162 assert(ctx != NULL); 163 164 TRACE(("idn_normalizer_destroy()\n")); 165 166 ctx->reference_count--; 167 if (ctx->reference_count <= 0) { 168 TRACE(("idn_normalizer_destroy(): the object is destroyed\n")); 169 if (ctx->schemes != ctx->local_buf) 170 free(ctx->schemes); 171 free(ctx); 172 } else { 173 TRACE(("idn_normalizer_destroy(): " 174 "update reference count (%d->%d)\n", 175 ctx->reference_count + 1, ctx->reference_count)); 176 } 177} 178 179void 180idn_normalizer_incrref(idn_normalizer_t ctx) { 181 assert(ctx != NULL); 182 183 TRACE(("idn_normalizer_incrref()\n")); 184 TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n", 185 ctx->reference_count, ctx->reference_count + 1)); 186 187 ctx->reference_count++; 188} 189 190idn_result_t 191idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) { 192 idn_result_t r; 193 void *v; 194 normalize_scheme_t *scheme; 195 196 assert(ctx != NULL && scheme_name != NULL); 197 198 TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name)); 199 200 assert(INITIALIZED); 201 202 if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) { 203 ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n", 204 scheme_name)); 205 r = idn_invalid_name; 206 goto ret; 207 } 208 209 scheme = v; 210 211 assert(ctx->nschemes <= ctx->scheme_size); 212 213 if (ctx->nschemes == ctx->scheme_size && 214 (r = expand_schemes(ctx)) != idn_success) { 215 goto ret; 216 } 217 218 ctx->schemes[ctx->nschemes++] = scheme; 219 r = idn_success; 220ret: 221 TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r))); 222 return (r); 223} 224 225idn_result_t 226idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names, 227 int nschemes) { 228 idn_result_t r; 229 int i; 230 231 assert(ctx != NULL && scheme_names != NULL); 232 233 TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes)); 234 235 for (i = 0; i < nschemes; i++) { 236 r = idn_normalizer_add(ctx, (const char *)*scheme_names); 237 if (r != idn_success) 238 goto ret; 239 scheme_names++; 240 } 241 242 r = idn_success; 243ret: 244 TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r))); 245 return (r); 246} 247 248idn_result_t 249idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from, 250 unsigned long *to, size_t tolen) { 251 idn_result_t r; 252 unsigned long *src, *dst; 253 unsigned long *buffers[2] = {NULL, NULL}; 254 size_t buflen[2] = {0, 0}; 255 size_t dstlen; 256 int idx; 257 int i; 258 259 assert(scheme_hash != NULL); 260 assert(ctx != NULL && from != NULL && to != NULL); 261 262 TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n", 263 idn__debug_ucs4xstring(from, 50), (int)tolen)); 264 265 if (ctx->nschemes <= 0) { 266 if (tolen < idn_ucs4_strlen(from) + 1) { 267 r = idn_buffer_overflow; 268 goto ret; 269 } 270 idn_ucs4_strcpy(to, from); 271 r = idn_success; 272 goto ret; 273 } 274 275 /* 276 * Normalize. 277 */ 278 src = (void *)from; 279 dstlen = idn_ucs4_strlen(from) + 1; 280 281 i = 0; 282 while (i < ctx->nschemes) { 283 TRACE(("idn_normalizer_normalize(): normalize %s\n", 284 ctx->schemes[i]->name)); 285 286 /* 287 * Choose destination area to restore the result of a mapping. 288 */ 289 if (i + 1 == ctx->nschemes) { 290 dst = to; 291 dstlen = tolen; 292 } else { 293 if (src == buffers[0]) 294 idx = 1; 295 else 296 idx = 0; 297 298 if (buflen[idx] < dstlen) { 299 void *newbuf; 300 301 newbuf = realloc(buffers[idx], 302 sizeof(long) * dstlen); 303 if (newbuf == NULL) { 304 r = idn_nomemory; 305 goto ret; 306 } 307 buffers[idx] = (unsigned long *)newbuf; 308 buflen[idx] = dstlen; 309 } 310 311 dst = buffers[idx]; 312 dstlen = buflen[idx]; 313 } 314 315 /* 316 * Perform i-th normalization scheme. 317 * If buffer size is not enough, we double it and try again. 318 */ 319 r = (ctx->schemes[i]->proc)(src, dst, dstlen); 320 if (r == idn_buffer_overflow && dst != to) { 321 dstlen *= 2; 322 continue; 323 } 324 if (r != idn_success) 325 goto ret; 326 327 src = dst; 328 i++; 329 } 330 331 r = idn_success; 332ret: 333 free(buffers[0]); 334 free(buffers[1]); 335 if (r == idn_success) { 336 TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n", 337 idn__debug_ucs4xstring(to, 50))); 338 } else { 339 TRACE(("idn_normalizer_normalize(): %s\n", 340 idn_result_tostring(r))); 341 } 342 return (r); 343} 344 345idn_result_t 346idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) { 347 idn_result_t r; 348 normalize_scheme_t *scheme; 349 350 assert(scheme_name != NULL && proc != NULL); 351 352 TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name)); 353 354 assert(INITIALIZED); 355 356 scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1); 357 if (scheme == NULL) { 358 r = idn_nomemory; 359 goto ret; 360 } 361 scheme->name = (char *)(scheme + 1); 362 (void)strcpy(scheme->name, scheme_name); 363 scheme->proc = proc; 364 365 r = idn__strhash_put(scheme_hash, scheme_name, scheme); 366 if (r != idn_success) 367 goto ret; 368 369 r = idn_success; 370ret: 371 TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r))); 372 return (r); 373} 374 375static idn_result_t 376expand_schemes(idn_normalizer_t ctx) { 377 normalize_scheme_t **new_schemes; 378 int new_size = ctx->scheme_size * 2; 379 380 if (ctx->schemes == ctx->local_buf) { 381 new_schemes = malloc(sizeof(normalize_scheme_t) * new_size); 382 } else { 383 new_schemes = realloc(ctx->schemes, 384 sizeof(normalize_scheme_t) * new_size); 385 } 386 if (new_schemes == NULL) 387 return (idn_nomemory); 388 389 if (ctx->schemes == ctx->local_buf) 390 memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf)); 391 392 ctx->schemes = new_schemes; 393 ctx->scheme_size = new_size; 394 395 return (idn_success); 396} 397 398static idn_result_t 399register_standard_normalizers(void) { 400 int i; 401 int failed = 0; 402 403 for (i = 0; standard_normalizer[i].name != NULL; i++) { 404 idn_result_t r; 405 r = idn_normalizer_register(standard_normalizer[i].name, 406 standard_normalizer[i].proc); 407 if (r != idn_success) { 408 WARNING(("idn_normalizer_initialize(): " 409 "failed to register \"%-.100s\"\n", 410 standard_normalizer[i].name)); 411 failed++; 412 } 413 } 414 if (failed > 0) 415 return (idn_failure); 416 else 417 return (idn_success); 418} 419 420/* 421 * Unicode Normalization Forms -- latest version 422 */ 423 424static idn_result_t 425normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) { 426 INIT_VERSION(NULL, vcur); 427 return (idn__unormalize_formkc(vcur, from, to, tolen)); 428} 429 430/* 431 * Unicode Normalization Forms -- version 3.2.0 432 */ 433 434static idn_result_t 435normalizer_formkc_v320(const unsigned long *from, unsigned long *to, 436 size_t tolen) { 437 INIT_VERSION("3.2.0", v320); 438 return (idn__unormalize_formkc(v320, from, to, tolen)); 439} 440