1/*	$NetBSD: citrus_iso2022.c,v 1.22 2011/10/10 22:45:45 tnozaki Exp $	*/
2
3/*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 *	$Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 */
30
31#include <sys/cdefs.h>
32#if defined(LIBC_SCCS) && !defined(lint)
33__RCSID("$NetBSD: citrus_iso2022.c,v 1.22 2011/10/10 22:45:45 tnozaki Exp $");
34#endif /* LIBC_SCCS and not lint */
35
36#include <assert.h>
37#include <errno.h>
38#include <string.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <stddef.h>
42#include <wchar.h>
43#include <sys/types.h>
44#include <limits.h>
45
46#include "citrus_namespace.h"
47#include "citrus_types.h"
48#include "citrus_module.h"
49#include "citrus_ctype.h"
50#include "citrus_stdenc.h"
51#include "citrus_iso2022.h"
52
53
54/* ----------------------------------------------------------------------
55 * private stuffs used by templates
56 */
57
58
59/*
60 * wchar_t mappings:
61 * ASCII (ESC ( B)		00000000 00000000 00000000 0xxxxxxx
62 * iso-8859-1 (ESC , A)		00000000 00000000 00000000 1xxxxxxx
63 * 94 charset (ESC ( F)		0fffffff 00000000 00000000 0xxxxxxx
64 * 94 charset (ESC ( M F)	0fffffff 1mmmmmmm 00000000 0xxxxxxx
65 * 96 charset (ESC , F)		0fffffff 00000000 00000000 1xxxxxxx
66 * 96 charset (ESC , M F)	0fffffff 1mmmmmmm 00000000 1xxxxxxx
67 * 94x94 charset (ESC $ ( F)	0fffffff 00000000 0xxxxxxx 0xxxxxxx
68 * 96x96 charset (ESC $ , F)	0fffffff 00000000 0xxxxxxx 1xxxxxxx
69 * 94x94 charset (ESC & V ESC $ ( F)
70 *				0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
71 * 94x94x94 charset (ESC $ ( F)	0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
72 * 96x96x96 charset (ESC $ , F)	0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
74 *				1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
75 */
76
77typedef struct {
78	u_char	type;
79#define	CS94		(0U)
80#define	CS96		(1U)
81#define	CS94MULTI	(2U)
82#define	CS96MULTI	(3U)
83
84	u_char	final;
85	u_char	interm;
86	u_char	vers;
87} _ISO2022Charset;
88
89static const _ISO2022Charset ascii    = { CS94, 'B', '\0', '\0' };
90static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' };
91
92typedef struct {
93	_ISO2022Charset	g[4];
94	/* need 3 bits to hold -1, 0, ..., 3 */
95	int	gl:3,
96		gr:3,
97		singlegl:3,
98		singlegr:3;
99	char ch[7];	/* longest escape sequence (ESC & V ESC $ ( F) */
100	int chlen;
101	int flags;
102#define _ISO2022STATE_FLAG_INITIALIZED	1
103} _ISO2022State;
104
105typedef struct {
106	_ISO2022Charset	*recommend[4];
107	size_t	recommendsize[4];
108	_ISO2022Charset	initg[4];
109	int	maxcharset;
110	int	flags;
111#define	F_8BIT	0x0001
112#define	F_NOOLD	0x0002
113#define	F_SI	0x0010	/*0F*/
114#define	F_SO	0x0020	/*0E*/
115#define	F_LS0	0x0010	/*0F*/
116#define	F_LS1	0x0020	/*0E*/
117#define	F_LS2	0x0040	/*ESC n*/
118#define	F_LS3	0x0080	/*ESC o*/
119#define	F_LS1R	0x0100	/*ESC ~*/
120#define	F_LS2R	0x0200	/*ESC }*/
121#define	F_LS3R	0x0400	/*ESC |*/
122#define	F_SS2	0x0800	/*ESC N*/
123#define	F_SS3	0x1000	/*ESC O*/
124#define	F_SS2R	0x2000	/*8E*/
125#define	F_SS3R	0x4000	/*8F*/
126} _ISO2022EncodingInfo;
127typedef struct {
128	_ISO2022EncodingInfo ei;
129	struct {
130		/* for future multi-locale facility */
131		_ISO2022State	s_mblen;
132		_ISO2022State	s_mbrlen;
133		_ISO2022State	s_mbrtowc;
134		_ISO2022State	s_mbtowc;
135		_ISO2022State	s_mbsrtowcs;
136		_ISO2022State	s_mbsnrtowcs;
137		_ISO2022State	s_wcrtomb;
138		_ISO2022State	s_wcsrtombs;
139		_ISO2022State	s_wcsnrtombs;
140		_ISO2022State	s_wctomb;
141	} states;
142} _ISO2022CTypeInfo;
143
144#define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
145#define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
146
147#define _FUNCNAME(m)			_citrus_ISO2022_##m
148#define _ENCODING_INFO			_ISO2022EncodingInfo
149#define _CTYPE_INFO			_ISO2022CTypeInfo
150#define _ENCODING_STATE			_ISO2022State
151#define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
152#define _ENCODING_IS_STATE_DEPENDENT	1
153#define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	\
154    (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
155
156
157#define _ISO2022INVALID (wchar_t)-1
158
159static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
160static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
161static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
162static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
163static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
164static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
165static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
166static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
167
168static __inline int
169getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
170{
171
172	_DIAGASSERT(p != NULL);
173	_DIAGASSERT(cs != NULL);
174
175	if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
176		cs->final = (u_char)(p[3] & 0xff);
177		cs->interm = '\0';
178		cs->vers = '\0';
179		cs->type = CS94MULTI;
180	} else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
181		cs->final = (u_char)(p[3] & 0xff);
182		cs->interm = '\0';
183		cs->vers = '\0';
184		cs->type = CS96MULTI;
185	} else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
186		cs->final = (u_char)(p[2] & 0xff);
187		cs->interm = '\0';
188		cs->vers = '\0';
189		cs->type = CS94;
190	} else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
191		cs->final = (u_char )(p[2] & 0xff);
192		cs->interm = '\0';
193		cs->vers = '\0';
194		cs->type = CS96;
195	} else {
196		return 1;
197	}
198
199	return 0;
200}
201
202
203#define _NOTMATCH	0
204#define _MATCH		1
205#define _PARSEFAIL	2
206
207static __inline int
208get_recommend(_ISO2022EncodingInfo * __restrict ei,
209	      const char * __restrict token)
210{
211	int i;
212	_ISO2022Charset cs, *p;
213
214	if (!strchr("0123", token[0]) || token[1] != '=')
215		return (_NOTMATCH);
216
217	if (getcs(&token[2], &cs) == 0)
218		;
219	else if (!strcmp(&token[2], "94")) {
220		cs.final = (u_char)(token[4]);
221		cs.interm = '\0';
222		cs.vers = '\0';
223		cs.type = CS94;
224	} else if (!strcmp(&token[2], "96")) {
225		cs.final = (u_char)(token[4]);
226		cs.interm = '\0';
227		cs.vers = '\0';
228		cs.type = CS96;
229	} else if (!strcmp(&token[2], "94$")) {
230		cs.final = (u_char)(token[5]);
231		cs.interm = '\0';
232		cs.vers = '\0';
233		cs.type = CS94MULTI;
234	} else if (!strcmp(&token[2], "96$")) {
235		cs.final = (u_char)(token[5]);
236		cs.interm = '\0';
237		cs.vers = '\0';
238		cs.type = CS96MULTI;
239	} else {
240		return (_PARSEFAIL);
241	}
242
243	i = token[0] - '0';
244	if (!ei->recommend[i]) {
245		ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
246	} else {
247		p = realloc(ei->recommend[i],
248		    sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
249		if (!p)
250			return (_PARSEFAIL);
251		ei->recommend[i] = p;
252	}
253	if (!ei->recommend[i])
254		return (_PARSEFAIL);
255	ei->recommendsize[i]++;
256
257	(ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
258	(ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
259	(ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
260	(ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
261
262	return (_MATCH);
263}
264
265static __inline int
266get_initg(_ISO2022EncodingInfo * __restrict ei,
267	  const char * __restrict token)
268{
269	_ISO2022Charset cs;
270
271	if (strncmp("INIT", &token[0], 4) ||
272	    !strchr("0123", token[4]) ||
273	    token[5] != '=')
274		return (_NOTMATCH);
275
276	if (getcs(&token[6], &cs) != 0)
277		return (_PARSEFAIL);
278
279	ei->initg[token[4] - '0'].type = cs.type;
280	ei->initg[token[4] - '0'].final = cs.final;
281	ei->initg[token[4] - '0'].interm = cs.interm;
282	ei->initg[token[4] - '0'].vers = cs.vers;
283
284	return (_MATCH);
285}
286
287static __inline int
288get_max(_ISO2022EncodingInfo * __restrict ei,
289	const char * __restrict token)
290{
291	if (!strcmp(token, "MAX1")) {
292		ei->maxcharset = 1;
293	} else if (!strcmp(token, "MAX2")) {
294		ei->maxcharset = 2;
295	} else if (!strcmp(token, "MAX3")) {
296		ei->maxcharset = 3;
297	} else
298		return (_NOTMATCH);
299
300	return (_MATCH);
301}
302
303
304static __inline int
305get_flags(_ISO2022EncodingInfo * __restrict ei,
306	  const char * __restrict token)
307{
308	int i;
309	static struct {
310		const char	*tag;
311		int		flag;
312	} const tags[] = {
313		{ "DUMMY",	0	},
314		{ "8BIT",	F_8BIT	},
315		{ "NOOLD",	F_NOOLD	},
316		{ "SI",		F_SI	},
317		{ "SO",		F_SO	},
318		{ "LS0",	F_LS0	},
319		{ "LS1",	F_LS1	},
320		{ "LS2",	F_LS2	},
321		{ "LS3",	F_LS3	},
322		{ "LS1R",	F_LS1R	},
323		{ "LS2R",	F_LS2R	},
324		{ "LS3R",	F_LS3R	},
325		{ "SS2",	F_SS2	},
326		{ "SS3",	F_SS3	},
327		{ "SS2R",	F_SS2R	},
328		{ "SS3R",	F_SS3R	},
329		{ NULL,		0 }
330	};
331
332	for (i = 0; tags[i].tag; i++) {
333		if (!strcmp(token, tags[i].tag)) {
334			ei->flags |= tags[i].flag;
335			return (_MATCH);
336		}
337	}
338
339	return (_NOTMATCH);
340}
341
342
343static __inline int
344_citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
345			       const void * __restrict var, size_t lenvar)
346{
347	char const *v, *e;
348	char buf[20];
349	int i, len, ret;
350
351	_DIAGASSERT(ei != NULL);
352
353
354	/*
355	 * parse VARIABLE section.
356	 */
357
358	if (!var)
359		return (EFTYPE);
360
361	v = (const char *) var;
362
363	/* initialize structure */
364	ei->maxcharset = 0;
365	for (i = 0; i < 4; i++) {
366		ei->recommend[i] = NULL;
367		ei->recommendsize[i] = 0;
368	}
369	ei->flags = 0;
370
371	while (*v) {
372		while (*v == ' ' || *v == '\t')
373			++v;
374
375		/* find the token */
376		e = v;
377		while (*e && *e != ' ' && *e != '\t')
378			++e;
379
380		len = e-v;
381		if (len == 0)
382			break;
383		if (len>=sizeof(buf))
384			goto parsefail;
385		snprintf(buf, sizeof(buf), "%.*s", len, v);
386
387		if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
388			;
389		else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
390			;
391		else if ((ret = get_max(ei, buf)) != _NOTMATCH)
392			;
393		else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
394			;
395		else
396			ret = _PARSEFAIL;
397		if (ret==_PARSEFAIL)
398			goto parsefail;
399		v = e;
400
401	}
402
403	return (0);
404
405parsefail:
406	free(ei->recommend[0]);
407	free(ei->recommend[1]);
408	free(ei->recommend[2]);
409	free(ei->recommend[3]);
410
411	return (EFTYPE);
412}
413
414static __inline void
415/*ARGSUSED*/
416_citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
417			   _ISO2022State * __restrict s)
418{
419	int i;
420
421	memset(s, 0, sizeof(*s));
422	s->gl = 0;
423	s->gr = (ei->flags & F_8BIT) ? 1 : -1;
424
425	for (i = 0; i < 4; i++) {
426		if (ei->initg[i].final) {
427			s->g[i].type = ei->initg[i].type;
428			s->g[i].final = ei->initg[i].final;
429			s->g[i].interm = ei->initg[i].interm;
430		}
431	}
432	s->singlegl = s->singlegr = -1;
433	s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
434}
435
436static __inline void
437/*ARGSUSED*/
438_citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
439			   void * __restrict pspriv,
440			   const _ISO2022State * __restrict s)
441{
442	memcpy(pspriv, (const void *)s, sizeof(*s));
443}
444
445static __inline void
446/*ARGSUSED*/
447_citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
448			     _ISO2022State * __restrict s,
449			     const void * __restrict pspriv)
450{
451	memcpy((void *)s, pspriv, sizeof(*s));
452}
453
454static int
455/*ARGSUSED*/
456_citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
457				     const void * __restrict var,
458				     size_t lenvar)
459{
460
461	_DIAGASSERT(ei != NULL);
462
463	return _citrus_ISO2022_parse_variable(ei, var, lenvar);
464}
465
466static void
467/*ARGSUSED*/
468_citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
469{
470}
471
472#define	ESC	'\033'
473#define	ECMA	-1
474#define	INTERM	-2
475#define	OECMA	-3
476static const struct seqtable {
477	int type;
478	int csoff;
479	int finaloff;
480	int intermoff;
481	int versoff;
482	int len;
483	int chars[10];
484} seqtable[] = {
485	/* G0 94MULTI special */
486	{ CS94MULTI, -1, 2, -1, -1,	3, { ESC, '$', OECMA }, },
487	/* G0 94MULTI special with version identification */
488	{ CS94MULTI, -1, 5, -1, 2,	6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
489	/* G? 94 */
490	{ CS94, 1, 2, -1, -1,		3, { ESC, CS94, ECMA, }, },
491	/* G? 94 with 2nd intermediate char */
492	{ CS94, 1, 3, 2, -1,		4, { ESC, CS94, INTERM, ECMA, }, },
493	/* G? 96 */
494	{ CS96, 1, 2, -1, -1,		3, { ESC, CS96, ECMA, }, },
495	/* G? 96 with 2nd intermediate char */
496	{ CS96, 1, 3, 2, -1,		4, { ESC, CS96, INTERM, ECMA, }, },
497	/* G? 94MULTI */
498	{ CS94MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS94, ECMA, }, },
499	/* G? 96MULTI */
500	{ CS96MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS96, ECMA, }, },
501	/* G? 94MULTI with version specification */
502	{ CS94MULTI, 5, 6, -1, 2,	7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
503	/* LS2/3 */
504	{ -1, -1, -1, -1, -1,		2, { ESC, 'n', }, },
505	{ -1, -1, -1, -1, -1,		2, { ESC, 'o', }, },
506	/* LS1/2/3R */
507	{ -1, -1, -1, -1, -1,		2, { ESC, '~', }, },
508	{ -1, -1, -1, -1, -1,		2, { ESC, /*{*/ '}', }, },
509	{ -1, -1, -1, -1, -1,		2, { ESC, '|', }, },
510	/* SS2/3 */
511	{ -1, -1, -1, -1, -1,		2, { ESC, 'N', }, },
512	{ -1, -1, -1, -1, -1,		2, { ESC, 'O', }, },
513	/* end of records */
514	{ 0, }
515};
516
517static int
518seqmatch(const char * __restrict s, size_t n,
519	 const struct seqtable * __restrict sp)
520{
521	const int *p;
522
523	_DIAGASSERT(s != NULL);
524	_DIAGASSERT(sp != NULL);
525
526	p = sp->chars;
527	while (p - sp->chars < n && p - sp->chars < sp->len) {
528		switch (*p) {
529		case ECMA:
530			if (!isecma(*s))
531				goto terminate;
532			break;
533		case OECMA:
534			if (*s && strchr("@AB", *s))
535				break;
536			else
537				goto terminate;
538		case INTERM:
539			if (!isinterm(*s))
540				goto terminate;
541			break;
542		case CS94:
543			if (*s && strchr("()*+", *s))
544				break;
545			else
546				goto terminate;
547		case CS96:
548			if (*s && strchr(",-./", *s))
549				break;
550			else
551				goto terminate;
552		default:
553			if (*s != *p)
554				goto terminate;
555			break;
556		}
557
558		p++;
559		s++;
560	}
561
562terminate:
563	return p - sp->chars;
564}
565
566static wchar_t
567_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
568		const char * __restrict string, size_t n,
569		const char ** __restrict result,
570		_ISO2022State * __restrict psenc)
571{
572	wchar_t wchar = 0;
573	int cur;
574	const struct seqtable *sp;
575	int nmatch;
576	int i;
577
578	_DIAGASSERT(ei != NULL);
579	_DIAGASSERT(psenc != NULL);
580	_DIAGASSERT(string != NULL);
581	/* result may be NULL */
582
583	while (1) {
584		/* SI/SO */
585		if (1 <= n && string[0] == '\017') {
586			psenc->gl = 0;
587			string++;
588			n--;
589			continue;
590		}
591		if (1 <= n && string[0] == '\016') {
592			psenc->gl = 1;
593			string++;
594			n--;
595			continue;
596		}
597
598		/* SS2/3R */
599		if (1 <= n && string[0] && strchr("\217\216", string[0])) {
600			psenc->singlegl = psenc->singlegr =
601			    (string[0] - '\216') + 2;
602			string++;
603			n--;
604			continue;
605		}
606
607		/* eat the letter if this is not ESC */
608		if (1 <= n && string[0] != '\033')
609			break;
610
611		/* look for a perfect match from escape sequences */
612		for (sp = &seqtable[0]; sp->len; sp++) {
613			nmatch = seqmatch(string, n, sp);
614			if (sp->len == nmatch && n >= sp->len)
615				break;
616		}
617
618		if (!sp->len)
619			goto notseq;
620
621		if (sp->type != -1) {
622			if (sp->csoff == -1)
623				i = 0;
624			else {
625				switch (sp->type) {
626				case CS94:
627				case CS94MULTI:
628					i = string[sp->csoff] - '(';
629					break;
630				case CS96:
631				case CS96MULTI:
632					i = string[sp->csoff] - ',';
633					break;
634				default:
635					return (_ISO2022INVALID);
636				}
637			}
638			psenc->g[i].type = sp->type;
639			psenc->g[i].final = '\0';
640			psenc->g[i].interm = '\0';
641			psenc->g[i].vers = '\0';
642			/* sp->finaloff must not be -1 */
643			if (sp->finaloff != -1)
644				psenc->g[i].final = string[sp->finaloff];
645			if (sp->intermoff != -1)
646				psenc->g[i].interm = string[sp->intermoff];
647			if (sp->versoff != -1)
648				psenc->g[i].vers = string[sp->versoff];
649
650			string += sp->len;
651			n -= sp->len;
652			continue;
653		}
654
655		/* LS2/3 */
656		if (2 <= n && string[0] == '\033'
657		 && string[1] && strchr("no", string[1])) {
658			psenc->gl = string[1] - 'n' + 2;
659			string += 2;
660			n -= 2;
661			continue;
662		}
663
664		/* LS1/2/3R */
665			/* XXX: { for vi showmatch */
666		if (2 <= n && string[0] == '\033'
667		 && string[1] && strchr("~}|", string[1])) {
668			psenc->gr = 3 - (string[1] - '|');
669			string += 2;
670			n -= 2;
671			continue;
672		}
673
674		/* SS2/3 */
675		if (2 <= n && string[0] == '\033'
676		 && string[1] && strchr("NO", string[1])) {
677			psenc->singlegl = (string[1] - 'N') + 2;
678			string += 2;
679			n -= 2;
680			continue;
681		}
682
683	notseq:
684		/*
685		 * if we've got an unknown escape sequence, eat the ESC at the
686		 * head.  otherwise, wait till full escape sequence comes.
687		 */
688		for (sp = &seqtable[0]; sp->len; sp++) {
689			nmatch = seqmatch(string, n, sp);
690			if (!nmatch)
691				continue;
692
693			/*
694			 * if we are in the middle of escape sequence,
695			 * we still need to wait for more characters to come
696			 */
697			if (n < sp->len) {
698				if (nmatch == n) {
699					if (result)
700						*result = string;
701					return (_ISO2022INVALID);
702				}
703			} else {
704				if (nmatch == sp->len) {
705					/* this case should not happen */
706					goto eat;
707				}
708			}
709		}
710
711		break;
712	}
713
714eat:
715	/* no letter to eat */
716	if (n < 1) {
717		if (result)
718			*result = string;
719		return (_ISO2022INVALID);
720	}
721
722	/* normal chars.  always eat C0/C1 as is. */
723	if (iscntl(*string & 0xff))
724		cur = -1;
725	else if (*string & 0x80) {
726		cur = (psenc->singlegr == -1)
727			? psenc->gr : psenc->singlegr;
728	} else {
729		cur = (psenc->singlegl == -1)
730			? psenc->gl : psenc->singlegl;
731	}
732
733	if (cur == -1) {
734asis:
735		wchar = *string++ & 0xff;
736		if (result)
737			*result = string;
738		/* reset single shift state */
739		psenc->singlegr = psenc->singlegl = -1;
740		return wchar;
741	}
742
743	/* length error check */
744	switch (psenc->g[cur].type) {
745	case CS94MULTI:
746	case CS96MULTI:
747		if (!isthree(psenc->g[cur].final)) {
748			if (2 <= n
749			 && (string[0] & 0x80) == (string[1] & 0x80))
750				break;
751		} else {
752			if (3 <= n
753			 && (string[0] & 0x80) == (string[1] & 0x80)
754			 && (string[0] & 0x80) == (string[2] & 0x80))
755				break;
756		}
757
758		/* we still need to wait for more characters to come */
759		if (result)
760			*result = string;
761		return (_ISO2022INVALID);
762
763	case CS94:
764	case CS96:
765		if (1 <= n)
766			break;
767
768		/* we still need to wait for more characters to come */
769		if (result)
770			*result = string;
771		return (_ISO2022INVALID);
772	}
773
774	/* range check */
775	switch (psenc->g[cur].type) {
776	case CS94:
777		if (!(is94(string[0] & 0x7f)))
778			goto asis;
779	case CS96:
780		if (!(is96(string[0] & 0x7f)))
781			goto asis;
782		break;
783	case CS94MULTI:
784		if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
785			goto asis;
786		break;
787	case CS96MULTI:
788		if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
789			goto asis;
790		break;
791	}
792
793	/* extract the character. */
794	switch (psenc->g[cur].type) {
795	case CS94:
796		/* special case for ASCII. */
797		if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
798			wchar = *string++;
799			wchar &= 0x7f;
800			break;
801		}
802		wchar = psenc->g[cur].final;
803		wchar = (wchar << 8);
804		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
805		wchar = (wchar << 8);
806		wchar = (wchar << 8) | (*string++ & 0x7f);
807		break;
808	case CS96:
809		/* special case for ISO-8859-1. */
810		if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
811			wchar = *string++;
812			wchar &= 0x7f;
813			wchar |= 0x80;
814			break;
815		}
816		wchar = psenc->g[cur].final;
817		wchar = (wchar << 8);
818		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
819		wchar = (wchar << 8);
820		wchar = (wchar << 8) | (*string++ & 0x7f);
821		wchar |= 0x80;
822		break;
823	case CS94MULTI:
824	case CS96MULTI:
825		wchar = psenc->g[cur].final;
826		wchar = (wchar << 8);
827		if (isthree(psenc->g[cur].final))
828			wchar |= (*string++ & 0x7f);
829		wchar = (wchar << 8) | (*string++ & 0x7f);
830		wchar = (wchar << 8) | (*string++ & 0x7f);
831		if (psenc->g[cur].type == CS96MULTI)
832			wchar |= 0x80;
833		break;
834	}
835
836	if (result)
837		*result = string;
838	/* reset single shift state */
839	psenc->singlegr = psenc->singlegl = -1;
840	return wchar;
841}
842
843
844
845static int
846_citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
847			     wchar_t * __restrict pwc,
848			     const char ** __restrict s,
849			     size_t n, _ISO2022State * __restrict psenc,
850			     size_t * __restrict nresult)
851{
852	wchar_t wchar;
853	const char *s0, *p, *result;
854	int c;
855	int chlenbak;
856
857	_DIAGASSERT(nresult != 0);
858	_DIAGASSERT(ei != NULL);
859	_DIAGASSERT(psenc != NULL);
860	_DIAGASSERT(s != NULL);
861
862	if (*s == NULL) {
863		_citrus_ISO2022_init_state(ei, psenc);
864		*nresult = _ENCODING_IS_STATE_DEPENDENT;
865		return 0;
866	}
867	s0 = *s;
868	c = 0;
869	chlenbak = psenc->chlen;
870
871	/*
872	 * if we have something in buffer, use that.
873	 * otherwise, skip here
874	 */
875	if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
876		/* illgeal state */
877		_citrus_ISO2022_init_state(ei, psenc);
878		goto encoding_error;
879	}
880	if (psenc->chlen == 0)
881		goto emptybuf;
882
883	/* buffer is not empty */
884	p = psenc->ch;
885	while (psenc->chlen < sizeof(psenc->ch)) {
886		if (n > 0) {
887			psenc->ch[psenc->chlen++] = *s0++;
888			n--;
889		}
890
891		wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
892					   &result, psenc);
893		c += result - p;
894		if (wchar != _ISO2022INVALID) {
895			if (psenc->chlen > c)
896				memmove(psenc->ch, result, psenc->chlen - c);
897			if (psenc->chlen < c)
898				psenc->chlen = 0;
899			else
900				psenc->chlen -= c;
901			goto output;
902		}
903
904		if (n == 0) {
905			if ((result - p) == psenc->chlen)
906				/* complete shift sequence. */
907				psenc->chlen = 0;
908			goto restart;
909		}
910
911		p = result;
912	}
913
914	/* escape sequence too long? */
915	goto encoding_error;
916
917emptybuf:
918	wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
919	if (wchar != _ISO2022INVALID) {
920		c += result - s0;
921		psenc->chlen = 0;
922		s0 = result;
923		goto output;
924	}
925	if (result > s0) {
926		c += (result - s0);
927		n -= (result - s0);
928		s0 = result;
929		if (n>0)
930			goto emptybuf;
931		/* complete shift sequence. */
932		goto restart;
933	}
934	n += c;
935	if (n < sizeof(psenc->ch)) {
936		memcpy(psenc->ch, s0 - c, n);
937		psenc->chlen = n;
938		s0 = result;
939		goto restart;
940	}
941
942	/* escape sequence too long? */
943
944encoding_error:
945	psenc->chlen = 0;
946	*nresult = (size_t)-1;
947	return (EILSEQ);
948
949output:
950	*s = s0;
951	if (pwc)
952		*pwc = wchar;
953
954	if (!wchar)
955		*nresult = 0;
956	else
957		*nresult = c - chlenbak;
958
959	return (0);
960
961restart:
962	*s = s0;
963	*nresult = (size_t)-2;
964
965	return (0);
966}
967
968static int
969recommendation(_ISO2022EncodingInfo * __restrict ei,
970	       _ISO2022Charset * __restrict cs)
971{
972	int i, j;
973	_ISO2022Charset *recommend;
974
975	_DIAGASSERT(ei != NULL);
976	_DIAGASSERT(cs != NULL);
977
978	/* first, try a exact match. */
979	for (i = 0; i < 4; i++) {
980		recommend = ei->recommend[i];
981		for (j = 0; j < ei->recommendsize[i]; j++) {
982			if (cs->type != recommend[j].type)
983				continue;
984			if (cs->final != recommend[j].final)
985				continue;
986			if (cs->interm != recommend[j].interm)
987				continue;
988
989			return i;
990		}
991	}
992
993	/* then, try a wildcard match over final char. */
994	for (i = 0; i < 4; i++) {
995		recommend = ei->recommend[i];
996		for (j = 0; j < ei->recommendsize[i]; j++) {
997			if (cs->type != recommend[j].type)
998				continue;
999			if (cs->final && (cs->final != recommend[j].final))
1000				continue;
1001			if (cs->interm && (cs->interm != recommend[j].interm))
1002				continue;
1003
1004			return i;
1005		}
1006	}
1007
1008	/* there's no recommendation. make a guess. */
1009	if (ei->maxcharset == 0) {
1010		return 0;
1011	} else {
1012		switch (cs->type) {
1013		case CS94:
1014		case CS94MULTI:
1015			return 0;
1016		case CS96:
1017		case CS96MULTI:
1018			return 1;
1019		}
1020	}
1021	return 0;
1022}
1023
1024static int
1025_ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1026		   char * __restrict string, size_t n,
1027		   char ** __restrict result,
1028		   _ISO2022State * __restrict psenc,
1029		   size_t * __restrict nresult)
1030{
1031	int i = 0;
1032	size_t len;
1033	_ISO2022Charset cs;
1034	char *p;
1035	char tmp[MB_LEN_MAX];
1036	int target;
1037	u_char mask;
1038	int bit8;
1039
1040	_DIAGASSERT(ei != NULL);
1041	_DIAGASSERT(string != NULL);
1042	/* result may be NULL */
1043	_DIAGASSERT(psenc != NULL);
1044	_DIAGASSERT(nresult != NULL);
1045
1046	if (isc0(wc & 0xff)) {
1047		/* go back to INIT0 or ASCII on control chars */
1048		cs = ei->initg[0].final ? ei->initg[0] : ascii;
1049	} else if (isc1(wc & 0xff)) {
1050		/* go back to INIT1 or ISO-8859-1 on control chars */
1051		cs = ei->initg[1].final ? ei->initg[1] : iso88591;
1052	} else if (!(wc & ~0xff)) {
1053		if (wc & 0x80) {
1054			/* special treatment for ISO-8859-1 */
1055			cs = iso88591;
1056		} else {
1057			/* special treatment for ASCII */
1058			cs = ascii;
1059		}
1060	} else {
1061		cs.final = (wc >> 24) & 0x7f;
1062		if ((wc >> 16) & 0x80)
1063			cs.interm = (wc >> 16) & 0x7f;
1064		else
1065			cs.interm = '\0';
1066		if (wc & 0x80)
1067			cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1068		else
1069			cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1070	}
1071	target = recommendation(ei, &cs);
1072	p = tmp;
1073	bit8 = ei->flags & F_8BIT;
1074
1075	/* designate the charset onto the target plane(G0/1/2/3). */
1076	if (psenc->g[target].type == cs.type
1077	 && psenc->g[target].final == cs.final
1078	 && psenc->g[target].interm == cs.interm)
1079		goto planeok;
1080
1081	*p++ = '\033';
1082	if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1083		*p++ = '$';
1084	if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1085	 && !cs.interm && !(ei->flags & F_NOOLD))
1086		;
1087	else if (cs.type == CS94 || cs.type == CS94MULTI)
1088		*p++ = "()*+"[target];
1089	else
1090		*p++ = ",-./"[target];
1091	if (cs.interm)
1092		*p++ = cs.interm;
1093	*p++ = cs.final;
1094
1095	psenc->g[target].type = cs.type;
1096	psenc->g[target].final = cs.final;
1097	psenc->g[target].interm = cs.interm;
1098
1099planeok:
1100	/* invoke the plane onto GL or GR. */
1101	if (psenc->gl == target)
1102		goto sideok;
1103	if (bit8 && psenc->gr == target)
1104		goto sideok;
1105
1106	if (target == 0 && (ei->flags & F_LS0)) {
1107		*p++ = '\017';
1108		psenc->gl = 0;
1109	} else if (target == 1 && (ei->flags & F_LS1)) {
1110		*p++ = '\016';
1111		psenc->gl = 1;
1112	} else if (target == 2 && (ei->flags & F_LS2)) {
1113		*p++ = '\033';
1114		*p++ = 'n';
1115		psenc->gl = 2;
1116	} else if (target == 3 && (ei->flags & F_LS3)) {
1117		*p++ = '\033';
1118		*p++ = 'o';
1119		psenc->gl = 3;
1120	} else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1121		*p++ = '\033';
1122		*p++ = '~';
1123		psenc->gr = 1;
1124	} else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1125		*p++ = '\033';
1126		/*{*/
1127		*p++ = '}';
1128		psenc->gr = 2;
1129	} else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1130		*p++ = '\033';
1131		*p++ = '|';
1132		psenc->gr = 3;
1133	} else if (target == 2 && (ei->flags & F_SS2)) {
1134		*p++ = '\033';
1135		*p++ = 'N';
1136		psenc->singlegl = 2;
1137	} else if (target == 3 && (ei->flags & F_SS3)) {
1138		*p++ = '\033';
1139		*p++ = 'O';
1140		psenc->singlegl = 3;
1141	} else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1142		*p++ = '\216';
1143		*p++ = 'N';
1144		psenc->singlegl = psenc->singlegr = 2;
1145	} else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1146		*p++ = '\217';
1147		*p++ = 'O';
1148		psenc->singlegl = psenc->singlegr = 3;
1149	} else
1150		goto ilseq;
1151
1152sideok:
1153	if (psenc->singlegl == target)
1154		mask = 0x00;
1155	else if (psenc->singlegr == target)
1156		mask = 0x80;
1157	else if (psenc->gl == target)
1158		mask = 0x00;
1159	else if ((ei->flags & F_8BIT) && psenc->gr == target)
1160		mask = 0x80;
1161	else
1162		goto ilseq;
1163
1164	switch (cs.type) {
1165	case CS94:
1166	case CS96:
1167		i = 1;
1168		break;
1169	case CS94MULTI:
1170	case CS96MULTI:
1171		i = !iscntl(wc & 0xff) ?
1172		    (isthree(cs.final) ? 3 : 2) : 1;
1173		break;
1174	}
1175	while (i-- > 0)
1176		*p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1177
1178	/* reset single shift state */
1179	psenc->singlegl = psenc->singlegr = -1;
1180
1181	len = (size_t)(p - tmp);
1182	if (n < len) {
1183		if (result)
1184			*result = (char *)0;
1185		*nresult = (size_t)-1;
1186		return E2BIG;
1187	}
1188	if (result)
1189		*result = string + len;
1190	memcpy(string, tmp, len);
1191	*nresult = len;
1192
1193	return 0;
1194
1195ilseq:
1196	*nresult = (size_t)-1;
1197	return EILSEQ;
1198}
1199
1200static int
1201_citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1202				char * __restrict s, size_t n,
1203				_ISO2022State * __restrict psenc,
1204				size_t * __restrict nresult)
1205{
1206	char buf[MB_LEN_MAX];
1207	char *result;
1208	int ret;
1209	size_t len;
1210
1211	_DIAGASSERT(ei != NULL);
1212	_DIAGASSERT(nresult != 0);
1213	_DIAGASSERT(s != NULL);
1214
1215	/* XXX state will be modified after this operation... */
1216	ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc,
1217	    &len);
1218	if (ret) {
1219		*nresult = len;
1220		return ret;
1221	}
1222
1223	if (sizeof(buf) < len || n < len-1) {
1224		/* XXX should recover state? */
1225		*nresult = (size_t)-1;
1226		return E2BIG;
1227	}
1228
1229	memcpy(s, buf, len-1);
1230	*nresult = len-1;
1231	return (0);
1232}
1233
1234static int
1235_citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1236			     char * __restrict s, size_t n, wchar_t wc,
1237			     _ISO2022State * __restrict psenc,
1238			     size_t * __restrict nresult)
1239{
1240	char buf[MB_LEN_MAX];
1241	char *result;
1242	int ret;
1243	size_t len;
1244
1245	_DIAGASSERT(ei != NULL);
1246	_DIAGASSERT(s != NULL);
1247	_DIAGASSERT(psenc != NULL);
1248	_DIAGASSERT(nresult != 0);
1249
1250	/* XXX state will be modified after this operation... */
1251	ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc,
1252	    &len);
1253	if (ret) {
1254		*nresult = len;
1255		return ret;
1256	}
1257
1258	if (sizeof(buf) < len || n < len) {
1259		/* XXX should recover state? */
1260		*nresult = (size_t)-1;
1261		return E2BIG;
1262	}
1263
1264	memcpy(s, buf, len);
1265	*nresult = len;
1266	return (0);
1267}
1268
1269static __inline int
1270/*ARGSUSED*/
1271_citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1272			      _csid_t * __restrict csid,
1273			      _index_t * __restrict idx, wchar_t wc)
1274{
1275	wchar_t m, nm;
1276
1277	_DIAGASSERT(csid != NULL && idx != NULL);
1278
1279	m = wc & 0x7FFF8080;
1280	nm = wc & 0x007F7F7F;
1281	if (m & 0x00800000) {
1282		nm &= 0x00007F7F;
1283	} else {
1284		m &= 0x7F008080;
1285	}
1286	if (nm & 0x007F0000) {
1287		/* ^3 mark */
1288		m |= 0x007F0000;
1289	} else if (nm & 0x00007F00) {
1290		/* ^2 mark */
1291		m |= 0x00007F00;
1292	}
1293	*csid = (_csid_t)m;
1294	*idx  = (_index_t)nm;
1295
1296	return (0);
1297}
1298
1299static __inline int
1300/*ARGSUSED*/
1301_citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1302			      wchar_t * __restrict wc,
1303			      _csid_t csid, _index_t idx)
1304{
1305
1306	_DIAGASSERT(ei != NULL && wc != NULL);
1307
1308	*wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1309
1310	return (0);
1311}
1312
1313static __inline int
1314/*ARGSUSED*/
1315_citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei,
1316					      _ISO2022State * __restrict psenc,
1317					      int * __restrict rstate)
1318{
1319
1320	if (psenc->chlen == 0) {
1321		/* XXX: it should distinguish initial and stable. */
1322		*rstate = _STDENC_SDGEN_STABLE;
1323	} else {
1324		if (psenc->ch[0] == '\033')
1325			*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
1326		else
1327			*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
1328	}
1329
1330	return 0;
1331}
1332
1333/* ----------------------------------------------------------------------
1334 * public interface for ctype
1335 */
1336
1337_CITRUS_CTYPE_DECLS(ISO2022);
1338_CITRUS_CTYPE_DEF_OPS(ISO2022);
1339
1340#include "citrus_ctype_template.h"
1341
1342/* ----------------------------------------------------------------------
1343 * public interface for stdenc
1344 */
1345
1346_CITRUS_STDENC_DECLS(ISO2022);
1347_CITRUS_STDENC_DEF_OPS(ISO2022);
1348
1349#include "citrus_stdenc_template.h"
1350