1#ifndef lint
2static char *rcsid = "$Id: normalizer.c,v 1.1 2003/06/04 00:26:05 marka Exp $";
3#endif
4
5/*
6 * Copyright (c) 2000,2002 Japan Network Information Center.
7 * All rights reserved.
8 *
9 * By using this file, you agree to the terms and conditions set forth bellow.
10 *
11 * 			LICENSE TERMS AND CONDITIONS
12 *
13 * The following License Terms and Conditions apply, unless a different
14 * license is obtained from Japan Network Information Center ("JPNIC"),
15 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
16 * Chiyoda-ku, Tokyo 101-0047, Japan.
17 *
18 * 1. Use, Modification and Redistribution (including distribution of any
19 *    modified or derived work) in source and/or binary forms is permitted
20 *    under this License Terms and Conditions.
21 *
22 * 2. Redistribution of source code must retain the copyright notices as they
23 *    appear in each source code file, this License Terms and Conditions.
24 *
25 * 3. Redistribution in binary form must reproduce the Copyright Notice,
26 *    this License Terms and Conditions, in the documentation and/or other
27 *    materials provided with the distribution.  For the purposes of binary
28 *    distribution the "Copyright Notice" refers to the following language:
29 *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
30 *
31 * 4. The name of JPNIC may not be used to endorse or promote products
32 *    derived from this Software without specific prior written approval of
33 *    JPNIC.
34 *
35 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
36 *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
38 *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
39 *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40 *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41 *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
42 *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
43 *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
44 *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
45 *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
46 */
47
48#include <config.h>
49
50#include <stddef.h>
51#include <stdlib.h>
52#include <string.h>
53#include <ctype.h>
54
55#include <idn/assert.h>
56#include <idn/logmacro.h>
57#include <idn/result.h>
58#include <idn/normalizer.h>
59#include <idn/strhash.h>
60#include <idn/unormalize.h>
61#include <idn/unicode.h>
62#include <idn/ucs4.h>
63#include <idn/debug.h>
64#include <idn/util.h>
65
66#define MAX_LOCAL_SCHEME	3
67
68#define INITIALIZED		(scheme_hash != NULL)
69
70typedef struct {
71	char *name;
72	idn_normalizer_proc_t proc;
73} normalize_scheme_t;
74
75struct idn_normalizer {
76	int nschemes;
77	int scheme_size;
78	normalize_scheme_t **schemes;
79	normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME];
80	int reference_count;
81};
82
83static idn__strhash_t scheme_hash;
84
85static idn__unicode_version_t vcur = NULL;
86static idn__unicode_version_t v320 = NULL;
87#define INIT_VERSION(version, var) \
88	if (var == NULL) { \
89		idn_result_t r = idn__unicode_create(version, &var); \
90		if (r != idn_success) \
91			return (r); \
92	}
93
94static idn_result_t	expand_schemes(idn_normalizer_t ctx);
95static idn_result_t	register_standard_normalizers(void);
96static idn_result_t	normalizer_formkc(const unsigned long *from,
97					  unsigned long *to, size_t tolen);
98static idn_result_t	normalizer_formkc_v320(const unsigned long *from,
99					       unsigned long *to,
100					       size_t tolen);
101
102static struct standard_normalizer {
103	char *name;
104	idn_normalizer_proc_t proc;
105} standard_normalizer[] = {
106	{ "unicode-form-kc", normalizer_formkc },
107	{ "unicode-form-kc/3.2.0", normalizer_formkc_v320 },
108	{ "RFC3491", normalizer_formkc_v320 },
109	{ NULL, NULL },
110};
111
112idn_result_t
113idn_normalizer_initialize(void) {
114	idn__strhash_t hash;
115	idn_result_t r;
116
117	TRACE(("idn_normalizer_initialize()\n"));
118
119	if (scheme_hash != NULL) {
120		r = idn_success;	/* already initialized */
121		goto ret;
122	}
123
124	if ((r = idn__strhash_create(&hash)) != idn_success)
125		goto ret;
126	scheme_hash = hash;
127
128	/* Register standard normalizers */
129	r = register_standard_normalizers();
130ret:
131	TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r)));
132	return (r);
133}
134
135idn_result_t
136idn_normalizer_create(idn_normalizer_t *ctxp) {
137	idn_normalizer_t ctx;
138	idn_result_t r;
139
140	assert(ctxp != NULL);
141	TRACE(("idn_normalizer_create()\n"));
142
143	if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) {
144		r = idn_nomemory;
145		goto ret;
146	}
147
148	ctx->nschemes = 0;
149	ctx->scheme_size = MAX_LOCAL_SCHEME;
150	ctx->schemes = ctx->local_buf;
151	ctx->reference_count = 1;
152	*ctxp = ctx;
153
154	r = idn_success;
155ret:
156	TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r)));
157	return (r);
158}
159
160void
161idn_normalizer_destroy(idn_normalizer_t ctx) {
162	assert(ctx != NULL);
163
164	TRACE(("idn_normalizer_destroy()\n"));
165
166	ctx->reference_count--;
167	if (ctx->reference_count <= 0) {
168		TRACE(("idn_normalizer_destroy(): the object is destroyed\n"));
169		if (ctx->schemes != ctx->local_buf)
170			free(ctx->schemes);
171		free(ctx);
172	} else {
173		TRACE(("idn_normalizer_destroy(): "
174		       "update reference count (%d->%d)\n",
175		       ctx->reference_count + 1, ctx->reference_count));
176	}
177}
178
179void
180idn_normalizer_incrref(idn_normalizer_t ctx) {
181	assert(ctx != NULL);
182
183	TRACE(("idn_normalizer_incrref()\n"));
184	TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n",
185	    ctx->reference_count, ctx->reference_count + 1));
186
187	ctx->reference_count++;
188}
189
190idn_result_t
191idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) {
192	idn_result_t r;
193	void *v;
194	normalize_scheme_t *scheme;
195
196	assert(ctx != NULL && scheme_name != NULL);
197
198	TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name));
199
200	assert(INITIALIZED);
201
202	if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) {
203		ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n",
204		       scheme_name));
205		r = idn_invalid_name;
206		goto ret;
207	}
208
209	scheme = v;
210
211	assert(ctx->nschemes <= ctx->scheme_size);
212
213	if (ctx->nschemes == ctx->scheme_size &&
214	    (r = expand_schemes(ctx)) != idn_success) {
215		goto ret;
216	}
217
218	ctx->schemes[ctx->nschemes++] = scheme;
219	r = idn_success;
220ret:
221	TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r)));
222	return (r);
223}
224
225idn_result_t
226idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names,
227		      int nschemes) {
228	idn_result_t r;
229	int i;
230
231	assert(ctx != NULL && scheme_names != NULL);
232
233	TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes));
234
235	for (i = 0; i < nschemes; i++) {
236		r = idn_normalizer_add(ctx, (const char *)*scheme_names);
237		if (r != idn_success)
238			goto ret;
239		scheme_names++;
240	}
241
242	r = idn_success;
243ret:
244	TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r)));
245	return (r);
246}
247
248idn_result_t
249idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from,
250			 unsigned long *to, size_t tolen) {
251	idn_result_t r;
252	unsigned long *src, *dst;
253	unsigned long *buffers[2] = {NULL, NULL};
254	size_t buflen[2] = {0, 0};
255	size_t dstlen;
256	int idx;
257	int i;
258
259	assert(scheme_hash != NULL);
260	assert(ctx != NULL && from != NULL && to != NULL);
261
262	TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n",
263	       idn__debug_ucs4xstring(from, 50), (int)tolen));
264
265	if (ctx->nschemes <= 0) {
266		if (tolen < idn_ucs4_strlen(from) + 1) {
267			r = idn_buffer_overflow;
268			goto ret;
269		}
270		idn_ucs4_strcpy(to, from);
271		r = idn_success;
272		goto ret;
273	}
274
275	/*
276	 * Normalize.
277	 */
278	src = (void *)from;
279	dstlen = idn_ucs4_strlen(from) + 1;
280
281	i = 0;
282	while (i < ctx->nschemes) {
283		TRACE(("idn_normalizer_normalize(): normalize %s\n",
284		       ctx->schemes[i]->name));
285
286		/*
287		 * Choose destination area to restore the result of a mapping.
288		 */
289		if (i + 1 == ctx->nschemes) {
290			dst = to;
291			dstlen = tolen;
292		} else {
293			if (src == buffers[0])
294				idx = 1;
295			else
296				idx = 0;
297
298			if (buflen[idx] < dstlen) {
299				void *newbuf;
300
301				newbuf = realloc(buffers[idx],
302						 sizeof(long) * dstlen);
303				if (newbuf == NULL) {
304					r = idn_nomemory;
305					goto ret;
306				}
307				buffers[idx] = (unsigned long *)newbuf;
308				buflen[idx] = dstlen;
309			}
310
311			dst = buffers[idx];
312			dstlen = buflen[idx];
313		}
314
315		/*
316		 * Perform i-th normalization scheme.
317		 * If buffer size is not enough, we double it and try again.
318		 */
319		r = (ctx->schemes[i]->proc)(src, dst, dstlen);
320		if (r == idn_buffer_overflow && dst != to) {
321			dstlen *= 2;
322			continue;
323		}
324		if (r != idn_success)
325			goto ret;
326
327		src = dst;
328		i++;
329	}
330
331	r = idn_success;
332ret:
333	free(buffers[0]);
334	free(buffers[1]);
335	if (r == idn_success) {
336		TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n",
337		       idn__debug_ucs4xstring(to, 50)));
338	} else {
339		TRACE(("idn_normalizer_normalize(): %s\n",
340		       idn_result_tostring(r)));
341	}
342	return (r);
343}
344
345idn_result_t
346idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) {
347	idn_result_t r;
348	normalize_scheme_t *scheme;
349
350	assert(scheme_name != NULL && proc != NULL);
351
352	TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name));
353
354	assert(INITIALIZED);
355
356	scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1);
357	if (scheme == NULL) {
358		r = idn_nomemory;
359		goto ret;
360	}
361	scheme->name = (char *)(scheme + 1);
362	(void)strcpy(scheme->name, scheme_name);
363	scheme->proc = proc;
364
365	r = idn__strhash_put(scheme_hash, scheme_name, scheme);
366	if (r != idn_success)
367		goto ret;
368
369	r = idn_success;
370ret:
371	TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r)));
372	return (r);
373}
374
375static idn_result_t
376expand_schemes(idn_normalizer_t ctx) {
377	normalize_scheme_t **new_schemes;
378	int new_size = ctx->scheme_size * 2;
379
380	if (ctx->schemes == ctx->local_buf) {
381		new_schemes = malloc(sizeof(normalize_scheme_t) * new_size);
382	} else {
383		new_schemes = realloc(ctx->schemes,
384				      sizeof(normalize_scheme_t) * new_size);
385	}
386	if (new_schemes == NULL)
387		return (idn_nomemory);
388
389	if (ctx->schemes == ctx->local_buf)
390		memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf));
391
392	ctx->schemes = new_schemes;
393	ctx->scheme_size = new_size;
394
395	return (idn_success);
396}
397
398static idn_result_t
399register_standard_normalizers(void) {
400	int i;
401	int failed = 0;
402
403	for (i = 0; standard_normalizer[i].name != NULL; i++) {
404		idn_result_t r;
405		r = idn_normalizer_register(standard_normalizer[i].name,
406					    standard_normalizer[i].proc);
407		if (r != idn_success) {
408			WARNING(("idn_normalizer_initialize(): "
409				"failed to register \"%-.100s\"\n",
410				standard_normalizer[i].name));
411			failed++;
412		}
413	}
414	if (failed > 0)
415		return (idn_failure);
416	else
417		return (idn_success);
418}
419
420/*
421 * Unicode Normalization Forms -- latest version
422 */
423
424static idn_result_t
425normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) {
426	INIT_VERSION(NULL, vcur);
427	return (idn__unormalize_formkc(vcur, from, to, tolen));
428}
429
430/*
431 * Unicode Normalization Forms -- version 3.2.0
432 */
433
434static idn_result_t
435normalizer_formkc_v320(const unsigned long *from, unsigned long *to,
436		       size_t tolen) {
437	INIT_VERSION("3.2.0", v320);
438	return (idn__unormalize_formkc(v320, from, to, tolen));
439}
440