1#ifndef lint 2static char *rcsid = "$Id: ucsmap.c,v 1.1 2003/06/04 00:26:14 marka Exp $"; 3#endif 4 5/* 6 * Copyright (c) 2001 Japan Network Information Center. All rights reserved. 7 * 8 * By using this file, you agree to the terms and conditions set forth bellow. 9 * 10 * LICENSE TERMS AND CONDITIONS 11 * 12 * The following License Terms and Conditions apply, unless a different 13 * license is obtained from Japan Network Information Center ("JPNIC"), 14 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 15 * Chiyoda-ku, Tokyo 101-0047, Japan. 16 * 17 * 1. Use, Modification and Redistribution (including distribution of any 18 * modified or derived work) in source and/or binary forms is permitted 19 * under this License Terms and Conditions. 20 * 21 * 2. Redistribution of source code must retain the copyright notices as they 22 * appear in each source code file, this License Terms and Conditions. 23 * 24 * 3. Redistribution in binary form must reproduce the Copyright Notice, 25 * this License Terms and Conditions, in the documentation and/or other 26 * materials provided with the distribution. For the purposes of binary 27 * distribution the "Copyright Notice" refers to the following language: 28 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 29 * 30 * 4. The name of JPNIC may not be used to endorse or promote products 31 * derived from this Software without specific prior written approval of 32 * JPNIC. 33 * 34 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 37 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 39 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 40 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 41 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 42 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 43 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 44 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 45 */ 46 47#include <config.h> 48 49#include <stdlib.h> 50#include <string.h> 51 52#include <idn/result.h> 53#include <idn/assert.h> 54#include <idn/log.h> 55#include <idn/logmacro.h> 56#include <idn/ucsmap.h> 57 58#define INIT_SIZE 50 59#define DEFAULT_BUF_SIZE 500 60#define UCSMAP_HASH_SIZE 103 61#define MAX_MAPLEN 0xffff 62 63/* 64 * This module implements UCS 1-to-N mapping. 65 * To speed up mapping table lookup, a combination of hash and 66 * binary search is used. 67 */ 68 69/* 70 * Mapping entry. 71 * Entries are sorted by its hash index and code point. 72 */ 73typedef struct { 74 short hidx; /* hash index */ 75 unsigned short len; /* length of mapped sequence */ 76 unsigned long ucs; /* code point to be mapped */ 77 unsigned long *map; /* mapped sequence of code points */ 78} ucsmap_entry_t; 79 80/* 81 * Hash table entry. 82 * Since the entries pointed by ucsmap_hash_t.entry are sorted, 83 * binary search can be used. 84 */ 85typedef struct { 86 ucsmap_entry_t *entry; /* sorted by code point */ 87 int n; /* length of 'entry' */ 88} ucsmap_hash_t; 89 90/* 91 * UCS character buffer for storing target character sequence. 92 */ 93typedef struct ucsmap_buf { 94 struct ucsmap_buf *next; 95 unsigned long buf[1]; /* actually a variable length array */ 96} ucsmap_buf_t; 97 98/* 99 * Mapping object. 100 */ 101typedef struct idn_ucsmap { 102 ucsmap_hash_t hash[UCSMAP_HASH_SIZE]; 103 ucsmap_entry_t *entries; /* array of entries */ 104 size_t entry_size; /* allocated size */ 105 size_t nentries; /* # of entries in use */ 106 ucsmap_buf_t *mapdata; /* list of character buffers */ 107 size_t mapdata_size; /* allocated size of current buffer */ 108 size_t mapdata_used; /* # of chars in use */ 109 int fixed; /* already fixed? */ 110 int refcnt; /* reference count */ 111} ucsmap_t; 112 113static int ucsmap_hash(unsigned long v); 114static unsigned long *save_mapped_sequence(idn_ucsmap_t ctx, 115 unsigned long *map, 116 size_t maplen); 117static void free_mapbuf(ucsmap_buf_t *buf); 118static int comp_entry(const void *v1, const void *v2); 119 120idn_result_t 121idn_ucsmap_create(idn_ucsmap_t *ctxp) { 122 idn_ucsmap_t ctx; 123 124 assert(ctxp != NULL); 125 126 TRACE(("idn_ucsmap_create()\n")); 127 128 if ((ctx = malloc(sizeof(*ctx))) == NULL) { 129 WARNING(("idn_ucsmap_create: malloc failed\n")); 130 return (idn_nomemory); 131 } 132 133 ctx->entry_size = 0; 134 ctx->nentries = 0; 135 ctx->entries = NULL; 136 ctx->mapdata = NULL; 137 ctx->mapdata_size = 0; 138 ctx->mapdata_used = 0; 139 ctx->fixed = 0; 140 ctx->refcnt = 1; 141 *ctxp = ctx; 142 return (idn_success); 143} 144 145void 146idn_ucsmap_destroy(idn_ucsmap_t ctx) { 147 assert(ctx != NULL && ctx->refcnt > 0); 148 149 TRACE(("idn_ucsmap_destroy()\n")); 150 151 if (--ctx->refcnt == 0) { 152 if (ctx->entries != NULL) 153 free(ctx->entries); 154 if (ctx->mapdata != NULL) 155 free_mapbuf(ctx->mapdata); 156 free(ctx); 157 } 158} 159 160void 161idn_ucsmap_incrref(idn_ucsmap_t ctx) { 162 assert(ctx != NULL && ctx->refcnt > 0); 163 164 ctx->refcnt++; 165} 166 167idn_result_t 168idn_ucsmap_add(idn_ucsmap_t ctx, unsigned long ucs, 169 unsigned long *map, size_t maplen) 170{ 171 ucsmap_entry_t *e; 172 ucsmap_entry_t *newbuf; 173 174 assert(ctx != NULL && ctx->refcnt > 0); 175 176 TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs, maplen)); 177 178 /* Make sure it is not fixed yet. */ 179 if (ctx->fixed) { 180 WARNING(("idn_ucsmap_add: attempt to add to fixed map\n")); 181 return (idn_failure); 182 } 183 184 if (maplen > MAX_MAPLEN) { 185 WARNING(("idn_ucsmap_add: maplen too large (> %d)\n", 186 MAX_MAPLEN)); 187 return (idn_failure); 188 } 189 190 /* Append an entry. */ 191 if (ctx->nentries >= ctx->entry_size) { 192 if (ctx->entry_size == 0) 193 ctx->entry_size = INIT_SIZE; 194 else 195 ctx->entry_size *= 2; 196 newbuf = realloc(ctx->entries, sizeof(*e) * ctx->entry_size); 197 if (newbuf == NULL) 198 return (idn_nomemory); 199 ctx->entries = newbuf; 200 } 201 e = &ctx->entries[ctx->nentries]; 202 e->hidx = ucsmap_hash(ucs); 203 e->len = maplen; 204 e->ucs = ucs; 205 if (maplen > 0) { 206 /* Save mapped sequence in the buffer. */ 207 e->map = save_mapped_sequence(ctx, map, maplen); 208 if (e->map == NULL) 209 return (idn_nomemory); 210 } else { 211 /* 212 * Zero 'maplen' is perfectly valid meaning one-to-zero 213 * mapping. 214 */ 215 e->map = NULL; 216 } 217 ctx->nentries++; 218 219 return (idn_success); 220} 221 222void 223idn_ucsmap_fix(idn_ucsmap_t ctx) { 224 ucsmap_entry_t *e; 225 int last_hidx; 226 int i; 227 228 assert(ctx != NULL && ctx->refcnt > 0); 229 230 TRACE(("idn_ucsmap_fix()\n")); 231 232 if (ctx->fixed) 233 return; 234 235 ctx->fixed = 1; 236 237 /* Initialize hash. */ 238 for (i = 0; i < UCSMAP_HASH_SIZE; i++) { 239 ctx->hash[i].entry = NULL; 240 ctx->hash[i].n = 0; 241 } 242 243 if (ctx->nentries == 0) 244 return; 245 246 /* Sort entries by the hash value and code point. */ 247 qsort(ctx->entries, ctx->nentries, sizeof(ucsmap_entry_t), comp_entry); 248 249 /* 250 * Now the entries are sorted by their hash value, and 251 * sorted by its code point among the ones with the same hash value. 252 */ 253 254 /* Build hash table. */ 255 last_hidx = -1; 256 for (i = 0, e = ctx->entries; i < ctx->nentries; i++, e++) { 257 if (e->hidx != last_hidx) { 258 ctx->hash[e->hidx].entry = e; 259 last_hidx = e->hidx; 260 } 261 ctx->hash[last_hidx].n++; 262 } 263} 264 265idn_result_t 266idn_ucsmap_map(idn_ucsmap_t ctx, unsigned long v, unsigned long *to, 267 size_t tolen, size_t *maplenp) { 268 int hash; 269 ucsmap_entry_t *e; 270 int n; 271 int hi, lo, mid; 272 273 assert(ctx != NULL && ctx->refcnt > 0 && to != NULL && 274 maplenp != NULL); 275 276 TRACE(("idn_ucsmap_map(v=U+%lX)\n", v)); 277 278 if (!ctx->fixed) { 279 WARNING(("idn_ucsmap_map: not fixed yet\n")); 280 return (idn_failure); 281 } 282 283 /* First, look up hash table. */ 284 hash = ucsmap_hash(v); 285 if ((n = ctx->hash[hash].n) == 0) 286 goto nomap; 287 288 /* Then do binary search. */ 289 e = ctx->hash[hash].entry; 290 lo = 0; 291 hi = n - 1; 292 while (lo <= hi) { 293 mid = (lo + hi) / 2; 294 if (v < e[mid].ucs) 295 hi = mid - 1; 296 else if (v > e[mid].ucs) 297 lo = mid + 1; 298 else { 299 /* Found. */ 300 if (tolen < e[mid].len) 301 return (idn_buffer_overflow); 302 memcpy(to, e[mid].map, sizeof(*to) * e[mid].len); 303 *maplenp = e[mid].len; 304 return (idn_success); 305 } 306 } 307 308 /* 309 * Not found. Put the original character to 'to' 310 * just for convenience. 311 */ 312 nomap: 313 if (tolen < 1) 314 return (idn_buffer_overflow); 315 *to = v; 316 *maplenp = 1; 317 return (idn_nomapping); 318} 319 320static int 321ucsmap_hash(unsigned long v) { 322 return (v % UCSMAP_HASH_SIZE); 323} 324 325static unsigned long * 326save_mapped_sequence(idn_ucsmap_t ctx, unsigned long *map, size_t maplen) { 327 ucsmap_buf_t *buf; 328 unsigned long *p; 329 size_t allocsize; 330 331 /* 332 * If the current buffer (the first one in the ctx->mapdata list) 333 * has enough space, use it. Otherwise, allocate a new buffer and 334 * insert it at the beginning of the list. 335 */ 336 if (ctx->mapdata_used + maplen > ctx->mapdata_size) { 337 if (maplen > DEFAULT_BUF_SIZE) 338 allocsize = maplen * 2; 339 else 340 allocsize = DEFAULT_BUF_SIZE; 341 buf = malloc(sizeof(ucsmap_hash_t) + 342 sizeof(unsigned long) * allocsize); 343 if (buf == NULL) 344 return (NULL); 345 buf->next = ctx->mapdata; 346 ctx->mapdata = buf; 347 ctx->mapdata_size = allocsize; 348 ctx->mapdata_used = 0; 349 } 350 p = ctx->mapdata->buf + ctx->mapdata_used; 351 memcpy(p, map, sizeof(unsigned long) * maplen); 352 ctx->mapdata_used += maplen; 353 return (p); 354} 355 356static void 357free_mapbuf(ucsmap_buf_t *buf) { 358 while (buf != NULL) { 359 ucsmap_buf_t *next = buf->next; 360 free(buf); 361 buf = next; 362 } 363} 364 365static int 366comp_entry(const void *v1, const void *v2) { 367 const ucsmap_entry_t *e1 = v1; 368 const ucsmap_entry_t *e2 = v2; 369 370 if (e1->hidx < e2->hidx) 371 return (-1); 372 else if (e1->hidx > e2->hidx) 373 return (1); 374 else if (e1->ucs < e2->ucs) 375 return (-1); 376 else if (e1->ucs > e2->ucs) 377 return (1); 378 else 379 return (0); 380} 381