1/*	$NetBSD: citrus_utf7.c,v 1.4 2006/03/19 01:55:48 christos Exp $	*/
2
3/*-
4 * Copyright (c)2004, 2005 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30#include <sys/cdefs.h>
31#if defined(LIB_SCCS) && !defined(lint)
32__RCSID("$NetBSD: citrus_utf7.c,v 1.4 2006/03/19 01:55:48 christos Exp $");
33#endif /* LIB_SCCS and not lint */
34
35#include <assert.h>
36#include <errno.h>
37#include <string.h>
38#include <stdio.h>
39#include <stdint.h>
40#include <stdlib.h>
41#include <limits.h>
42#include <wchar.h>
43
44#include "citrus_namespace.h"
45#include "citrus_types.h"
46#include "citrus_module.h"
47#include "citrus_ctype.h"
48#include "citrus_stdenc.h"
49#include "citrus_utf7.h"
50
51/* ----------------------------------------------------------------------
52 * private stuffs used by templates
53 */
54
55typedef struct {
56	uint16_t	cell[0x80];
57#define	EI_MASK		UINT16_C(0xff)
58#define EI_DIRECT	UINT16_C(0x100)
59#define EI_OPTION	UINT16_C(0x200)
60#define EI_SPACE	UINT16_C(0x400)
61} _UTF7EncodingInfo;
62
63typedef struct {
64	unsigned int
65		mode: 1,	/* whether base64 mode */
66		bits: 4,	/* need to hold 0 - 15 */
67		cache: 22,	/* 22 = BASE64_BIT + UTF16_BIT */
68		surrogate: 1;	/* whether surrogate pair or not */
69	int chlen;
70	char ch[4]; /* BASE64_IN, 3 * 6 = 18, most closed to UTF16_BIT */
71} _UTF7State;
72
73typedef struct {
74	_UTF7EncodingInfo	ei;
75	struct {
76		/* for future multi-locale facility */
77		_UTF7State	s_mblen;
78		_UTF7State	s_mbrlen;
79		_UTF7State	s_mbrtowc;
80		_UTF7State	s_mbtowc;
81		_UTF7State	s_mbsrtowcs;
82		_UTF7State	s_wcrtomb;
83		_UTF7State	s_wcsrtombs;
84		_UTF7State	s_wctomb;
85	} states;
86} _UTF7CTypeInfo;
87
88#define	_CEI_TO_EI(_cei_)		(&(_cei_)->ei)
89#define	_CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
90
91#define	_FUNCNAME(m)			_citrus_UTF7_##m
92#define	_ENCODING_INFO			_UTF7EncodingInfo
93#define	_CTYPE_INFO			_UTF7CTypeInfo
94#define	_ENCODING_STATE			_UTF7State
95#define	_ENCODING_MB_CUR_MAX(_ei_)		4
96#define	_ENCODING_IS_STATE_DEPENDENT		1
97#define	_STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
98
99static __inline void
100/*ARGSUSED*/
101_citrus_UTF7_init_state(_UTF7EncodingInfo * __restrict ei,
102	_UTF7State * __restrict s)
103{
104	/* ei appears to be unused */
105	_DIAGASSERT(s != NULL);
106
107	memset((void *)s, 0, sizeof(*s));
108}
109
110static __inline void
111/*ARGSUSED*/
112_citrus_UTF7_pack_state(_UTF7EncodingInfo * __restrict ei,
113	void *__restrict pspriv, const _UTF7State * __restrict s)
114{
115	/* ei seem to be unused */
116	_DIAGASSERT(pspriv != NULL);
117	_DIAGASSERT(s != NULL);
118
119	memcpy(pspriv, (const void *)s, sizeof(*s));
120}
121
122static __inline void
123/*ARGSUSED*/
124_citrus_UTF7_unpack_state(_UTF7EncodingInfo * __restrict ei,
125	_UTF7State * __restrict s, const void * __restrict pspriv)
126{
127	/* ei seem to be unused */
128	_DIAGASSERT(s != NULL);
129	_DIAGASSERT(pspriv != NULL);
130
131	memcpy((void *)s, pspriv, sizeof(*s));
132}
133
134static const char base64[] =
135	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
136	"abcdefghijklmnopqrstuvwxyz"
137	"0123456789+/";
138
139static const char direct[] =
140	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
141	"abcdefghijklmnopqrstuvwxyz"
142	"0123456789(),-./:?";
143
144static const char option[] = "!\"#$%&';<=>@[]^_`{|}";
145static const char spaces[] = " \t\r\n";
146
147#define	BASE64_BIT	6
148#define	UTF16_BIT	16
149
150#define	BASE64_MAX	0x3f
151#define	UTF16_MAX	UINT16_C(0xffff)
152#define	UTF32_MAX	UINT32_C(0x10ffff)
153
154#define	BASE64_IN	'+'
155#define	BASE64_OUT	'-'
156
157#define	SHIFT7BIT(c)	((c) >> 7)
158#define	ISSPECIAL(c)	((c) == '\0' || (c) == BASE64_IN)
159
160#define	FINDLEN(ei, c) \
161	(SHIFT7BIT((c)) ? -1 : (((ei)->cell[(c)] & EI_MASK) - 1))
162
163#define	ISDIRECT(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
164	ei->cell[(c)] & (EI_DIRECT | EI_OPTION | EI_SPACE)))
165
166#define	ISSAFE(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
167	(c < 0x80 && ei->cell[(c)] & (EI_DIRECT | EI_SPACE))))
168
169/* surrogate pair */
170#define	SRG_BASE	UINT32_C(0x10000)
171#define	HISRG_MIN	UINT16_C(0xd800)
172#define	HISRG_MAX	UINT16_C(0xdbff)
173#define	LOSRG_MIN	UINT16_C(0xdc00)
174#define	LOSRG_MAX	UINT16_C(0xdfff)
175
176static int
177_citrus_UTF7_mbtoutf16(_UTF7EncodingInfo * __restrict ei,
178	uint16_t * __restrict u16, const char ** __restrict s, size_t n,
179	_UTF7State * __restrict psenc, size_t * __restrict nresult)
180{
181	_UTF7State sv;
182	const char *s0;
183	int i, done, len;
184
185	_DIAGASSERT(ei != NULL);
186	_DIAGASSERT(s != NULL && *s != NULL);
187	_DIAGASSERT(psenc != NULL);
188
189	s0 = *s;
190	sv = *psenc;
191
192	for (i = 0, done = 0; done == 0; i++) {
193		_DIAGASSERT(i <= psenc->chlen);
194		if (i == psenc->chlen) {
195			if (n-- < 1) {
196				*nresult = (size_t)-2;
197				*s = s0;
198				sv.chlen = psenc->chlen;
199				*psenc = sv;
200				return 0;
201			}
202			psenc->ch[psenc->chlen++] = *s0++;
203		}
204		if (SHIFT7BIT((int)psenc->ch[i]))
205			goto ilseq;
206		if (!psenc->mode) {
207			if (psenc->bits > 0 || psenc->cache > 0)
208				return EINVAL;
209			if (psenc->ch[i] == BASE64_IN) {
210				psenc->mode = 1;
211			} else {
212				if (!ISDIRECT(ei, (int)psenc->ch[i]))
213					goto ilseq;
214				*u16 = (uint16_t)psenc->ch[i];
215				done = 1;
216				continue;
217			}
218		} else {
219			if (psenc->ch[i] == BASE64_OUT && psenc->cache == 0) {
220				psenc->mode = 0;
221				*u16 = (uint16_t)BASE64_IN;
222				done = 1;
223				continue;
224			}
225			len = FINDLEN(ei, (int)psenc->ch[i]);
226			if (len < 0) {
227				if (psenc->bits >= BASE64_BIT)
228					return EINVAL;
229				psenc->mode = 0;
230				psenc->bits = psenc->cache = 0;
231				if (psenc->ch[i] != BASE64_OUT) {
232					if (!ISDIRECT(ei, (int)psenc->ch[i]))
233						goto ilseq;
234					*u16 = (uint16_t)psenc->ch[i];
235					done = 1;
236				}
237			} else {
238				psenc->cache =
239				    (psenc->cache << BASE64_BIT) | len;
240				switch (psenc->bits) {
241				case 0: case 2: case 4: case 6: case 8:
242					psenc->bits += BASE64_BIT;
243					break;
244				case 10: case 12: case 14:
245					psenc->bits -= (UTF16_BIT - BASE64_BIT);
246					*u16 = (psenc->cache >> psenc->bits)
247					    & UTF16_MAX;
248					done = 1;
249					break;
250				default:
251					return EINVAL;
252				}
253			}
254		}
255	}
256
257	if (psenc->chlen > i)
258		return EINVAL;
259	psenc->chlen = 0;
260	*nresult = (size_t)((*u16 == 0) ? 0 : s0 - *s);
261	*s = s0;
262
263	return 0;
264
265ilseq:
266	*nresult = (size_t)-1;
267	return EILSEQ;
268}
269
270static int
271_citrus_UTF7_mbrtowc_priv(_UTF7EncodingInfo * __restrict ei,
272	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
273	_UTF7State * __restrict psenc, size_t * __restrict nresult)
274{
275	const char *s0;
276	uint32_t u32;
277	uint16_t hi, lo;
278	size_t siz, nr;
279	int err;
280
281	_DIAGASSERT(ei != NULL);
282	/* pwc may be null */
283	_DIAGASSERT(s != NULL);
284	_DIAGASSERT(psenc != NULL);
285
286	if (*s == NULL) {
287		_citrus_UTF7_init_state(ei, psenc);
288		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
289		return 0;
290	}
291	s0 = *s;
292	if (psenc->surrogate) {
293		hi = (psenc->cache >> 2) & UTF16_MAX;
294		if (hi < HISRG_MIN || hi > HISRG_MAX)
295			return EINVAL;
296		siz = 0;
297	} else {
298		err = _citrus_UTF7_mbtoutf16(ei, &hi, &s0, n, psenc, &nr);
299		if (nr == (size_t)-1 || nr == (size_t)-2) {
300			*nresult = nr;
301			return err;
302		}
303		if (err != 0)
304			return err;
305		n -= nr;
306		siz = nr;
307		if (hi < HISRG_MIN || hi > HISRG_MAX) {
308			u32 = (uint32_t)hi;
309			goto done;
310		}
311		psenc->surrogate = 1;
312	}
313	err = _citrus_UTF7_mbtoutf16(ei, &lo, &s0, n, psenc, &nr);
314	if (nr == (size_t)-1 || nr == (size_t)-2) {
315		*nresult = nr;
316		return err;
317	}
318	if (err != 0)
319		return err;
320	hi -= HISRG_MIN;
321	lo -= LOSRG_MIN;
322	u32 = (hi << 10 | lo) + SRG_BASE;
323	siz += nr;
324done:
325	*s = s0;
326	if (pwc != NULL)
327		*pwc = (wchar_t)u32;
328	if (u32 == (uint32_t)0) {
329		*nresult = (size_t)0;
330		_citrus_UTF7_init_state(ei, psenc);
331	} else {
332		*nresult = siz;
333		psenc->surrogate = 0;
334	}
335	return err;
336}
337
338static int
339_citrus_UTF7_utf16tomb(_UTF7EncodingInfo * __restrict ei,
340	char * __restrict s, size_t n, uint16_t u16,
341	_UTF7State * __restrict psenc, size_t * __restrict nresult)
342{
343	int bits, i;
344
345	_DIAGASSERT(ei != NULL);
346	_DIAGASSERT(psenc != NULL);
347
348	if (psenc->chlen != 0 || psenc->bits > BASE64_BIT)
349		return EINVAL;
350
351	if (ISSAFE(ei, u16)) {
352		if (psenc->mode) {
353			if (psenc->bits > 0) {
354				bits = BASE64_BIT - psenc->bits;
355				i = (psenc->cache << bits) & BASE64_MAX;
356				psenc->ch[psenc->chlen++] = base64[i];
357				psenc->bits = psenc->cache = 0;
358			}
359			if (u16 == BASE64_OUT || FINDLEN(ei, u16) >= 0)
360				psenc->ch[psenc->chlen++] = BASE64_OUT;
361			psenc->mode = 0;
362		}
363		if (psenc->bits != 0)
364			return EINVAL;
365		psenc->ch[psenc->chlen++] = (char)u16;
366		if (u16 == BASE64_IN)
367			psenc->ch[psenc->chlen++] = BASE64_OUT;
368	} else {
369		if (!psenc->mode) {
370			if (psenc->bits > 0)
371				return EINVAL;
372			psenc->ch[psenc->chlen++] = BASE64_IN;
373			psenc->mode = 1;
374		}
375		psenc->cache = (psenc->cache << UTF16_BIT) | u16;
376		bits = UTF16_BIT + psenc->bits;
377		psenc->bits = bits % BASE64_BIT;
378		while ((bits -= BASE64_BIT) >= 0) {
379			i = (psenc->cache >> bits) & BASE64_MAX;
380			psenc->ch[psenc->chlen++] = base64[i];
381		}
382	}
383	memcpy(s, psenc->ch, psenc->chlen);
384	*nresult = psenc->chlen;
385	psenc->chlen = 0;
386
387	return 0;
388}
389
390static int
391_citrus_UTF7_wcrtomb_priv(_UTF7EncodingInfo * __restrict ei,
392	char * __restrict s, size_t n, wchar_t wchar,
393	_UTF7State * __restrict psenc, size_t * __restrict nresult)
394{
395	uint32_t u32;
396	uint16_t u16[2];
397	int err, len, i;
398	size_t siz, nr;
399
400	_DIAGASSERT(ei != NULL);
401	_DIAGASSERT(s != NULL);
402	_DIAGASSERT(psenc != NULL);
403	_DIAGASSERT(nresult != NULL);
404
405	u32 = (uint32_t)wchar;
406	if (u32 <= UTF16_MAX) {
407		u16[0] = (uint16_t)u32;
408		len = 1;
409	} else if (u32 <= UTF32_MAX) {
410		u32 -= SRG_BASE;
411		u16[0] = (u32 >> 10) + HISRG_MIN;
412		u16[1] = ((uint16_t)(u32 & UINT32_C(0x3ff))) + LOSRG_MIN;
413		len = 2;
414	} else {
415		*nresult = (size_t)-1;
416		return EILSEQ;
417	}
418	siz = 0;
419	for (i = 0; i < len; ++i) {
420		err = _citrus_UTF7_utf16tomb(ei, s, n, u16[i], psenc, &nr);
421		if (err != 0)
422			return err; /* XXX: state has been modified */
423		s += nr;
424		n -= nr;
425		siz += nr;
426	}
427	*nresult = siz;
428
429	return 0;
430}
431
432static int
433/* ARGSUSED */
434_citrus_UTF7_put_state_reset(_UTF7EncodingInfo * __restrict ei,
435	char * __restrict s, size_t n, _UTF7State * __restrict psenc,
436	size_t * __restrict nresult)
437{
438	int bits, pos;
439
440	_DIAGASSERT(ei != NULL);
441	_DIAGASSERT(s != NULL);
442	_DIAGASSERT(psenc != NULL);
443	_DIAGASSERT(nresult != NULL);
444
445	if (psenc->chlen != 0 || psenc->bits > BASE64_BIT || psenc->surrogate)
446		return EINVAL;
447
448	if (psenc->mode) {
449		if (psenc->bits > 0) {
450			if (n-- < 1)
451				return E2BIG;
452			bits = BASE64_BIT - psenc->bits;
453			pos = (psenc->cache << bits) & BASE64_MAX;
454			psenc->ch[psenc->chlen++] = base64[pos];
455			psenc->ch[psenc->chlen++] = BASE64_OUT;
456			psenc->bits = psenc->cache = 0;
457		}
458		psenc->mode = 0;
459	}
460	if (psenc->bits != 0)
461		return EINVAL;
462	if (n-- < 1)
463		return E2BIG;
464
465	_DIAGASSERT(n >= psenc->chlen);
466	*nresult = (size_t)psenc->chlen;
467	if (psenc->chlen > 0) {
468		memcpy(s, psenc->ch, psenc->chlen);
469		psenc->chlen = 0;
470	}
471
472	return 0;
473}
474
475static __inline int
476/*ARGSUSED*/
477_citrus_UTF7_stdenc_wctocs(_UTF7EncodingInfo * __restrict ei,
478			   _csid_t * __restrict csid,
479			   _index_t * __restrict idx, wchar_t wc)
480{
481	/* ei seem to be unused */
482	_DIAGASSERT(csid != NULL);
483	_DIAGASSERT(idx != NULL);
484
485	*csid = 0;
486	*idx = (_index_t)wc;
487
488	return 0;
489}
490
491static __inline int
492/*ARGSUSED*/
493_citrus_UTF7_stdenc_cstowc(_UTF7EncodingInfo * __restrict ei,
494			   wchar_t * __restrict wc,
495			   _csid_t csid, _index_t idx)
496{
497	/* ei seem to be unused */
498	_DIAGASSERT(wc != NULL);
499
500	if (csid != 0)
501		return EILSEQ;
502	*wc = (wchar_t)idx;
503
504	return 0;
505}
506
507static __inline int
508/*ARGSUSED*/
509_citrus_UTF7_stdenc_get_state_desc_generic(_UTF7EncodingInfo * __restrict ei,
510					   _UTF7State * __restrict psenc,
511					   int * __restrict rstate)
512{
513
514	if (psenc->chlen == 0)
515		*rstate = _STDENC_SDGEN_INITIAL;
516	else
517		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
518
519	return 0;
520}
521
522static void
523/*ARGSUSED*/
524_citrus_UTF7_encoding_module_uninit(_UTF7EncodingInfo *ei)
525{
526	/* ei seems to be unused */
527}
528
529static int
530/*ARGSUSED*/
531_citrus_UTF7_encoding_module_init(_UTF7EncodingInfo * __restrict ei,
532				  const void * __restrict var, size_t lenvar)
533{
534	const char *s;
535
536	_DIAGASSERT(ei != NULL);
537	/* var may be null */
538
539	memset(ei, 0, sizeof(*ei));
540
541#define FILL(str, flag)				\
542do {						\
543	for (s = str; *s != '\0'; s++)		\
544		ei->cell[*s & 0x7f] |= flag;	\
545} while (/*CONSTCOND*/0)
546
547	FILL(base64, (s - base64) + 1);
548	FILL(direct, EI_DIRECT);
549	FILL(option, EI_OPTION);
550	FILL(spaces, EI_SPACE);
551
552	return 0;
553}
554
555/* ----------------------------------------------------------------------
556 * public interface for ctype
557 */
558
559_CITRUS_CTYPE_DECLS(UTF7);
560_CITRUS_CTYPE_DEF_OPS(UTF7);
561
562#include "citrus_ctype_template.h"
563
564/* ----------------------------------------------------------------------
565 * public interface for stdenc
566 */
567
568_CITRUS_STDENC_DECLS(UTF7);
569_CITRUS_STDENC_DEF_OPS(UTF7);
570
571#include "citrus_stdenc_template.h"
572