1#ifndef lint
2static char *rcsid = "$Id: ucsmap.c,v 1.1 2003/06/04 00:26:14 marka Exp $";
3#endif
4
5/*
6 * Copyright (c) 2001 Japan Network Information Center.  All rights reserved.
7 *
8 * By using this file, you agree to the terms and conditions set forth bellow.
9 *
10 * 			LICENSE TERMS AND CONDITIONS
11 *
12 * The following License Terms and Conditions apply, unless a different
13 * license is obtained from Japan Network Information Center ("JPNIC"),
14 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
15 * Chiyoda-ku, Tokyo 101-0047, Japan.
16 *
17 * 1. Use, Modification and Redistribution (including distribution of any
18 *    modified or derived work) in source and/or binary forms is permitted
19 *    under this License Terms and Conditions.
20 *
21 * 2. Redistribution of source code must retain the copyright notices as they
22 *    appear in each source code file, this License Terms and Conditions.
23 *
24 * 3. Redistribution in binary form must reproduce the Copyright Notice,
25 *    this License Terms and Conditions, in the documentation and/or other
26 *    materials provided with the distribution.  For the purposes of binary
27 *    distribution the "Copyright Notice" refers to the following language:
28 *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
29 *
30 * 4. The name of JPNIC may not be used to endorse or promote products
31 *    derived from this Software without specific prior written approval of
32 *    JPNIC.
33 *
34 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
35 *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37 *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
38 *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
39 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
40 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
41 *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
42 *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
43 *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
44 *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
45 */
46
47#include <config.h>
48
49#include <stdlib.h>
50#include <string.h>
51
52#include <idn/result.h>
53#include <idn/assert.h>
54#include <idn/log.h>
55#include <idn/logmacro.h>
56#include <idn/ucsmap.h>
57
58#define INIT_SIZE		50
59#define DEFAULT_BUF_SIZE	500
60#define UCSMAP_HASH_SIZE	103
61#define MAX_MAPLEN		0xffff
62
63/*
64 * This module implements UCS 1-to-N mapping.
65 * To speed up mapping table lookup, a combination of hash and
66 * binary search is used.
67 */
68
69/*
70 * Mapping entry.
71 * Entries are sorted by its hash index and code point.
72 */
73typedef struct {
74	short hidx;		/* hash index */
75	unsigned short len;	/* length of mapped sequence */
76	unsigned long ucs;	/* code point to be mapped */
77	unsigned long *map;	/* mapped sequence of code points */
78} ucsmap_entry_t;
79
80/*
81 * Hash table entry.
82 * Since the entries pointed by ucsmap_hash_t.entry are sorted,
83 * binary search can be used.
84 */
85typedef struct {
86	ucsmap_entry_t *entry;	/* sorted by code point */
87	int n;			/* length of 'entry' */
88} ucsmap_hash_t;
89
90/*
91 * UCS character buffer for storing target character sequence.
92 */
93typedef struct ucsmap_buf {
94	struct ucsmap_buf *next;
95	unsigned long buf[1];		/* actually a variable length array */
96} ucsmap_buf_t;
97
98/*
99 * Mapping object.
100 */
101typedef struct idn_ucsmap {
102	ucsmap_hash_t hash[UCSMAP_HASH_SIZE];
103	ucsmap_entry_t *entries;	/* array of entries */
104	size_t entry_size;		/* allocated size */
105	size_t nentries;		/* # of entries in use */
106	ucsmap_buf_t *mapdata;		/* list of character buffers */
107	size_t mapdata_size;		/* allocated size of current buffer */
108	size_t mapdata_used;		/* # of chars in use */
109	int fixed;			/* already fixed? */
110	int refcnt;			/* reference count */
111} ucsmap_t;
112
113static int		ucsmap_hash(unsigned long v);
114static unsigned long	*save_mapped_sequence(idn_ucsmap_t ctx,
115					      unsigned long *map,
116					      size_t maplen);
117static void		free_mapbuf(ucsmap_buf_t *buf);
118static int		comp_entry(const void *v1, const void *v2);
119
120idn_result_t
121idn_ucsmap_create(idn_ucsmap_t *ctxp) {
122	idn_ucsmap_t ctx;
123
124	assert(ctxp != NULL);
125
126	TRACE(("idn_ucsmap_create()\n"));
127
128	if ((ctx = malloc(sizeof(*ctx))) == NULL) {
129		WARNING(("idn_ucsmap_create: malloc failed\n"));
130		return (idn_nomemory);
131	}
132
133	ctx->entry_size = 0;
134	ctx->nentries = 0;
135	ctx->entries = NULL;
136	ctx->mapdata = NULL;
137	ctx->mapdata_size = 0;
138	ctx->mapdata_used = 0;
139	ctx->fixed = 0;
140	ctx->refcnt = 1;
141	*ctxp = ctx;
142	return (idn_success);
143}
144
145void
146idn_ucsmap_destroy(idn_ucsmap_t ctx) {
147	assert(ctx != NULL && ctx->refcnt > 0);
148
149	TRACE(("idn_ucsmap_destroy()\n"));
150
151	if (--ctx->refcnt == 0) {
152		if (ctx->entries != NULL)
153			free(ctx->entries);
154		if (ctx->mapdata != NULL)
155			free_mapbuf(ctx->mapdata);
156		free(ctx);
157	}
158}
159
160void
161idn_ucsmap_incrref(idn_ucsmap_t ctx) {
162	assert(ctx != NULL && ctx->refcnt > 0);
163
164	ctx->refcnt++;
165}
166
167idn_result_t
168idn_ucsmap_add(idn_ucsmap_t ctx, unsigned long ucs,
169	       unsigned long *map, size_t maplen)
170{
171	ucsmap_entry_t *e;
172	ucsmap_entry_t *newbuf;
173
174	assert(ctx != NULL && ctx->refcnt > 0);
175
176	TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs, maplen));
177
178	/* Make sure it is not fixed yet. */
179	if (ctx->fixed) {
180		WARNING(("idn_ucsmap_add: attempt to add to fixed map\n"));
181		return (idn_failure);
182	}
183
184	if (maplen > MAX_MAPLEN) {
185		WARNING(("idn_ucsmap_add: maplen too large (> %d)\n",
186			 MAX_MAPLEN));
187		return (idn_failure);
188	}
189
190	/* Append an entry. */
191	if (ctx->nentries >= ctx->entry_size) {
192		if (ctx->entry_size == 0)
193			ctx->entry_size = INIT_SIZE;
194		else
195			ctx->entry_size *= 2;
196		newbuf = realloc(ctx->entries, sizeof(*e) * ctx->entry_size);
197		if (newbuf == NULL)
198			return (idn_nomemory);
199		ctx->entries = newbuf;
200	}
201	e = &ctx->entries[ctx->nentries];
202	e->hidx = ucsmap_hash(ucs);
203	e->len = maplen;
204	e->ucs = ucs;
205	if (maplen > 0) {
206		/* Save mapped sequence in the buffer. */
207		e->map = save_mapped_sequence(ctx, map, maplen);
208		if (e->map == NULL)
209			return (idn_nomemory);
210	} else {
211		/*
212		 * Zero 'maplen' is perfectly valid meaning one-to-zero
213		 * mapping.
214		 */
215		e->map = NULL;
216	}
217	ctx->nentries++;
218
219	return (idn_success);
220}
221
222void
223idn_ucsmap_fix(idn_ucsmap_t ctx) {
224	ucsmap_entry_t *e;
225	int last_hidx;
226	int i;
227
228	assert(ctx != NULL && ctx->refcnt > 0);
229
230	TRACE(("idn_ucsmap_fix()\n"));
231
232	if (ctx->fixed)
233		return;
234
235	ctx->fixed = 1;
236
237	/* Initialize hash. */
238	for (i = 0; i < UCSMAP_HASH_SIZE; i++) {
239		ctx->hash[i].entry = NULL;
240		ctx->hash[i].n = 0;
241	}
242
243	if (ctx->nentries == 0)
244		return;
245
246	/* Sort entries by the hash value and code point. */
247	qsort(ctx->entries, ctx->nentries, sizeof(ucsmap_entry_t), comp_entry);
248
249	/*
250	 * Now the entries are sorted by their hash value, and
251	 * sorted by its code point among the ones with the same hash value.
252	 */
253
254	/* Build hash table. */
255	last_hidx = -1;
256	for (i = 0, e = ctx->entries; i < ctx->nentries; i++, e++) {
257		if (e->hidx != last_hidx) {
258			ctx->hash[e->hidx].entry = e;
259			last_hidx = e->hidx;
260		}
261		ctx->hash[last_hidx].n++;
262	}
263}
264
265idn_result_t
266idn_ucsmap_map(idn_ucsmap_t ctx, unsigned long v, unsigned long *to,
267	       size_t tolen, size_t *maplenp) {
268	int hash;
269	ucsmap_entry_t *e;
270	int n;
271	int hi, lo, mid;
272
273	assert(ctx != NULL && ctx->refcnt > 0 && to != NULL &&
274	       maplenp != NULL);
275
276	TRACE(("idn_ucsmap_map(v=U+%lX)\n", v));
277
278	if (!ctx->fixed) {
279		WARNING(("idn_ucsmap_map: not fixed yet\n"));
280		return (idn_failure);
281	}
282
283	/* First, look up hash table. */
284	hash = ucsmap_hash(v);
285	if ((n = ctx->hash[hash].n) == 0)
286		goto nomap;
287
288	/* Then do binary search. */
289	e = ctx->hash[hash].entry;
290	lo = 0;
291	hi = n - 1;
292	while (lo <= hi) {
293		mid = (lo + hi) / 2;
294		if (v < e[mid].ucs)
295			hi = mid - 1;
296		else if (v > e[mid].ucs)
297			lo = mid + 1;
298		else {
299			/* Found. */
300			if (tolen < e[mid].len)
301				return (idn_buffer_overflow);
302			memcpy(to, e[mid].map, sizeof(*to) * e[mid].len);
303			*maplenp = e[mid].len;
304			return (idn_success);
305		}
306	}
307
308	/*
309	 * Not found. Put the original character to 'to'
310	 * just for convenience.
311	 */
312 nomap:
313	if (tolen < 1)
314		return (idn_buffer_overflow);
315	*to = v;
316	*maplenp = 1;
317	return (idn_nomapping);
318}
319
320static int
321ucsmap_hash(unsigned long v) {
322	return (v % UCSMAP_HASH_SIZE);
323}
324
325static unsigned long *
326save_mapped_sequence(idn_ucsmap_t ctx, unsigned long *map, size_t maplen) {
327	ucsmap_buf_t *buf;
328	unsigned long *p;
329	size_t allocsize;
330
331	/*
332	 * If the current buffer (the first one in the ctx->mapdata list)
333	 * has enough space, use it.  Otherwise, allocate a new buffer and
334	 * insert it at the beginning of the list.
335	 */
336	if (ctx->mapdata_used + maplen > ctx->mapdata_size) {
337		if (maplen > DEFAULT_BUF_SIZE)
338			allocsize = maplen * 2;
339		else
340			allocsize = DEFAULT_BUF_SIZE;
341		buf = malloc(sizeof(ucsmap_hash_t) +
342			     sizeof(unsigned long) * allocsize);
343		if (buf == NULL)
344			return (NULL);
345		buf->next = ctx->mapdata;
346		ctx->mapdata = buf;
347		ctx->mapdata_size = allocsize;
348		ctx->mapdata_used = 0;
349	}
350	p = ctx->mapdata->buf + ctx->mapdata_used;
351	memcpy(p, map, sizeof(unsigned long) * maplen);
352	ctx->mapdata_used += maplen;
353	return (p);
354}
355
356static void
357free_mapbuf(ucsmap_buf_t *buf) {
358	while (buf != NULL) {
359		ucsmap_buf_t *next = buf->next;
360		free(buf);
361		buf = next;
362	}
363}
364
365static int
366comp_entry(const void *v1, const void *v2) {
367	const ucsmap_entry_t *e1 = v1;
368	const ucsmap_entry_t *e2 = v2;
369
370	if (e1->hidx < e2->hidx)
371		return (-1);
372	else if (e1->hidx > e2->hidx)
373		return (1);
374	else if (e1->ucs < e2->ucs)
375		return (-1);
376	else if (e1->ucs > e2->ucs)
377		return (1);
378	else
379		return (0);
380}
381