Deleted Added
full compact
citrus_iso2022.c (252583) citrus_iso2022.c (260003)
1/* $FreeBSD: head/lib/libiconv_modules/ISO2022/citrus_iso2022.c 252583 2013-07-03 18:27:45Z peter $ */
1/* $FreeBSD: head/lib/libiconv_modules/ISO2022/citrus_iso2022.c 260003 2013-12-28 13:49:48Z dim $ */
2/* $NetBSD: citrus_iso2022.c,v 1.19 2008/06/14 16:01:07 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)1999, 2002 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
30 */
31
32#include <sys/cdefs.h>
33#include <sys/types.h>
34
35#include <assert.h>
36#include <errno.h>
37#include <limits.h>
38#include <stdbool.h>
39#include <stddef.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <wchar.h>
44
45#include "citrus_namespace.h"
46#include "citrus_types.h"
47#include "citrus_module.h"
48#include "citrus_stdenc.h"
49#include "citrus_iso2022.h"
50
51
52/* ----------------------------------------------------------------------
53 * private stuffs used by templates
54 */
55
56
57/*
58 * wchar_t mappings:
59 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
60 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
61 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
62 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
63 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
64 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
65 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
66 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
67 * 94x94 charset (ESC & V ESC $ ( F)
68 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
69 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
70 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
71 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
72 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
73 */
74
75#define CS94 (0U)
76#define CS96 (1U)
77#define CS94MULTI (2U)
78#define CS96MULTI (3U)
79
80typedef struct {
81 unsigned char interm;
82 unsigned char final;
83 unsigned char type;
84 unsigned char vers;
85} _ISO2022Charset;
86
87static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' };
88static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' };
89
90typedef struct {
91 _ISO2022Charset g[4];
92 /* need 3 bits to hold -1, 0, ..., 3 */
93 int gl:3,
94 gr:3,
95 singlegl:3,
96 singlegr:3;
97 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
98 size_t chlen;
99 int flags;
100#define _ISO2022STATE_FLAG_INITIALIZED 1
101} _ISO2022State;
102
103typedef struct {
104 _ISO2022Charset *recommend[4];
105 size_t recommendsize[4];
106 _ISO2022Charset initg[4];
107 int maxcharset;
108 int flags;
109#define F_8BIT 0x0001
110#define F_NOOLD 0x0002
111#define F_SI 0x0010 /*0F*/
112#define F_SO 0x0020 /*0E*/
113#define F_LS0 0x0010 /*0F*/
114#define F_LS1 0x0020 /*0E*/
115#define F_LS2 0x0040 /*ESC n*/
116#define F_LS3 0x0080 /*ESC o*/
117#define F_LS1R 0x0100 /*ESC ~*/
118#define F_LS2R 0x0200 /*ESC }*/
119#define F_LS3R 0x0400 /*ESC |*/
120#define F_SS2 0x0800 /*ESC N*/
121#define F_SS3 0x1000 /*ESC O*/
122#define F_SS2R 0x2000 /*8E*/
123#define F_SS3R 0x4000 /*8F*/
124} _ISO2022EncodingInfo;
125
126#define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
127#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
128
129#define _FUNCNAME(m) _citrus_ISO2022_##m
130#define _ENCODING_INFO _ISO2022EncodingInfo
131#define _ENCODING_STATE _ISO2022State
132#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
133#define _ENCODING_IS_STATE_DEPENDENT 1
134#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
135 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
136
137
138#define _ISO2022INVALID (wchar_t)-1
139
140static __inline bool isc0(__uint8_t x)
141{
142
143 return ((x & 0x1f) == x);
144}
145
146static __inline bool isc1(__uint8_t x)
147{
148
149 return (0x80 <= x && x <= 0x9f);
150}
151
152static __inline bool iscntl(__uint8_t x)
153{
154
155 return (isc0(x) || isc1(x) || x == 0x7f);
156}
157
158static __inline bool is94(__uint8_t x)
159{
160
161 return (0x21 <= x && x <= 0x7e);
162}
163
164static __inline bool is96(__uint8_t x)
165{
166
167 return (0x20 <= x && x <= 0x7f);
168}
169
170static __inline bool isecma(__uint8_t x)
171{
172
173 return (0x30 <= x && x <= 0x7f);
174}
175
176static __inline bool isinterm(__uint8_t x)
177{
178
179 return (0x20 <= x && x <= 0x2f);
180}
181
182static __inline bool isthree(__uint8_t x)
183{
184
185 return (0x60 <= x && x <= 0x6f);
186}
187
188static __inline int
189getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
190{
191
192 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
193 cs->final = (unsigned char)(p[3] & 0xff);
194 cs->interm = '\0';
195 cs->vers = '\0';
196 cs->type = CS94MULTI;
197 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
198 cs->final = (unsigned char)(p[3] & 0xff);
199 cs->interm = '\0';
200 cs->vers = '\0';
201 cs->type = CS96MULTI;
202 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
203 cs->final = (unsigned char)(p[2] & 0xff);
204 cs->interm = '\0';
205 cs->vers = '\0';
206 cs->type = CS94;
207 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
208 cs->final = (unsigned char )(p[2] & 0xff);
209 cs->interm = '\0';
210 cs->vers = '\0';
211 cs->type = CS96;
212 } else
213 return (1);
214
215 return (0);
216}
217
218
219#define _NOTMATCH 0
220#define _MATCH 1
221#define _PARSEFAIL 2
222
223static __inline int
224get_recommend(_ISO2022EncodingInfo * __restrict ei,
225 const char * __restrict token)
226{
227 _ISO2022Charset cs, *p;
228 int i;
229
230 if (!strchr("0123", token[0]) || token[1] != '=')
231 return (_NOTMATCH);
232
233 if (getcs(&token[2], &cs) == 0)
234 ;
235 else if (!strcmp(&token[2], "94")) {
236 cs.final = (unsigned char)(token[4]);
237 cs.interm = '\0';
238 cs.vers = '\0';
239 cs.type = CS94;
240 } else if (!strcmp(&token[2], "96")) {
241 cs.final = (unsigned char)(token[4]);
242 cs.interm = '\0';
243 cs.vers = '\0';
244 cs.type = CS96;
245 } else if (!strcmp(&token[2], "94$")) {
246 cs.final = (unsigned char)(token[5]);
247 cs.interm = '\0';
248 cs.vers = '\0';
249 cs.type = CS94MULTI;
250 } else if (!strcmp(&token[2], "96$")) {
251 cs.final = (unsigned char)(token[5]);
252 cs.interm = '\0';
253 cs.vers = '\0';
254 cs.type = CS96MULTI;
255 } else
256 return (_PARSEFAIL);
257
258 i = token[0] - '0';
259 if (!ei->recommend[i])
260 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
261 else {
262 p = realloc(ei->recommend[i],
263 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
264 if (!p)
265 return (_PARSEFAIL);
266 ei->recommend[i] = p;
267 }
268 if (!ei->recommend[i])
269 return (_PARSEFAIL);
270 ei->recommendsize[i]++;
271
272 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
273 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
274 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
275 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
276
277 return (_MATCH);
278}
279
280static __inline int
281get_initg(_ISO2022EncodingInfo * __restrict ei,
282 const char * __restrict token)
283{
284 _ISO2022Charset cs;
285
286 if (strncmp("INIT", &token[0], 4) ||
287 !strchr("0123", token[4]) ||
288 token[5] != '=')
289 return (_NOTMATCH);
290
291 if (getcs(&token[6], &cs) != 0)
292 return (_PARSEFAIL);
293
294 ei->initg[token[4] - '0'].type = cs.type;
295 ei->initg[token[4] - '0'].final = cs.final;
296 ei->initg[token[4] - '0'].interm = cs.interm;
297 ei->initg[token[4] - '0'].vers = cs.vers;
298
299 return (_MATCH);
300}
301
302static __inline int
303get_max(_ISO2022EncodingInfo * __restrict ei,
304 const char * __restrict token)
305{
306 if (!strcmp(token, "MAX1"))
307 ei->maxcharset = 1;
308 else if (!strcmp(token, "MAX2"))
309 ei->maxcharset = 2;
310 else if (!strcmp(token, "MAX3"))
311 ei->maxcharset = 3;
312 else
313 return (_NOTMATCH);
314
315 return (_MATCH);
316}
317
318
319static __inline int
320get_flags(_ISO2022EncodingInfo * __restrict ei,
321 const char * __restrict token)
322{
323 static struct {
324 const char *tag;
325 int flag;
326 } const tags[] = {
327 { "DUMMY", 0 },
328 { "8BIT", F_8BIT },
329 { "NOOLD", F_NOOLD },
330 { "SI", F_SI },
331 { "SO", F_SO },
332 { "LS0", F_LS0 },
333 { "LS1", F_LS1 },
334 { "LS2", F_LS2 },
335 { "LS3", F_LS3 },
336 { "LS1R", F_LS1R },
337 { "LS2R", F_LS2R },
338 { "LS3R", F_LS3R },
339 { "SS2", F_SS2 },
340 { "SS3", F_SS3 },
341 { "SS2R", F_SS2R },
342 { "SS3R", F_SS3R },
343 { NULL, 0 }
344 };
345 int i;
346
347 for (i = 0; tags[i].tag; i++)
348 if (!strcmp(token, tags[i].tag)) {
349 ei->flags |= tags[i].flag;
350 return (_MATCH);
351 }
352
353 return (_NOTMATCH);
354}
355
356
357static __inline int
358_citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
359 const void * __restrict var, size_t lenvar __unused)
360{
361 char const *e, *v;
362 char buf[20];
363 size_t len;
364 int i, ret;
365
366 /*
367 * parse VARIABLE section.
368 */
369
370 if (!var)
371 return (EFTYPE);
372
373 v = (const char *) var;
374
375 /* initialize structure */
376 ei->maxcharset = 0;
377 for (i = 0; i < 4; i++) {
378 ei->recommend[i] = NULL;
379 ei->recommendsize[i] = 0;
380 }
381 ei->flags = 0;
382
383 while (*v) {
384 while (*v == ' ' || *v == '\t')
385 ++v;
386
387 /* find the token */
388 e = v;
389 while (*e && *e != ' ' && *e != '\t')
390 ++e;
391
392 len = e - v;
393 if (len == 0)
394 break;
395 if (len >= sizeof(buf))
396 goto parsefail;
397 snprintf(buf, sizeof(buf), "%.*s", (int)len, v);
398
399 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
400 ;
401 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
402 ;
403 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
404 ;
405 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
406 ;
407 else
408 ret = _PARSEFAIL;
409 if (ret == _PARSEFAIL)
410 goto parsefail;
411 v = e;
412
413 }
414
415 return (0);
416
417parsefail:
418 free(ei->recommend[0]);
419 free(ei->recommend[1]);
420 free(ei->recommend[2]);
421 free(ei->recommend[3]);
422
423 return (EFTYPE);
424}
425
426static __inline void
427/*ARGSUSED*/
428_citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
429 _ISO2022State * __restrict s)
430{
431 int i;
432
433 memset(s, 0, sizeof(*s));
434 s->gl = 0;
435 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
436
437 for (i = 0; i < 4; i++)
438 if (ei->initg[i].final) {
439 s->g[i].type = ei->initg[i].type;
440 s->g[i].final = ei->initg[i].final;
441 s->g[i].interm = ei->initg[i].interm;
442 }
443 s->singlegl = s->singlegr = -1;
444 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
445}
446
2/* $NetBSD: citrus_iso2022.c,v 1.19 2008/06/14 16:01:07 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)1999, 2002 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
30 */
31
32#include <sys/cdefs.h>
33#include <sys/types.h>
34
35#include <assert.h>
36#include <errno.h>
37#include <limits.h>
38#include <stdbool.h>
39#include <stddef.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <wchar.h>
44
45#include "citrus_namespace.h"
46#include "citrus_types.h"
47#include "citrus_module.h"
48#include "citrus_stdenc.h"
49#include "citrus_iso2022.h"
50
51
52/* ----------------------------------------------------------------------
53 * private stuffs used by templates
54 */
55
56
57/*
58 * wchar_t mappings:
59 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
60 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
61 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
62 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
63 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
64 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
65 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
66 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
67 * 94x94 charset (ESC & V ESC $ ( F)
68 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
69 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
70 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
71 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
72 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
73 */
74
75#define CS94 (0U)
76#define CS96 (1U)
77#define CS94MULTI (2U)
78#define CS96MULTI (3U)
79
80typedef struct {
81 unsigned char interm;
82 unsigned char final;
83 unsigned char type;
84 unsigned char vers;
85} _ISO2022Charset;
86
87static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' };
88static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' };
89
90typedef struct {
91 _ISO2022Charset g[4];
92 /* need 3 bits to hold -1, 0, ..., 3 */
93 int gl:3,
94 gr:3,
95 singlegl:3,
96 singlegr:3;
97 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
98 size_t chlen;
99 int flags;
100#define _ISO2022STATE_FLAG_INITIALIZED 1
101} _ISO2022State;
102
103typedef struct {
104 _ISO2022Charset *recommend[4];
105 size_t recommendsize[4];
106 _ISO2022Charset initg[4];
107 int maxcharset;
108 int flags;
109#define F_8BIT 0x0001
110#define F_NOOLD 0x0002
111#define F_SI 0x0010 /*0F*/
112#define F_SO 0x0020 /*0E*/
113#define F_LS0 0x0010 /*0F*/
114#define F_LS1 0x0020 /*0E*/
115#define F_LS2 0x0040 /*ESC n*/
116#define F_LS3 0x0080 /*ESC o*/
117#define F_LS1R 0x0100 /*ESC ~*/
118#define F_LS2R 0x0200 /*ESC }*/
119#define F_LS3R 0x0400 /*ESC |*/
120#define F_SS2 0x0800 /*ESC N*/
121#define F_SS3 0x1000 /*ESC O*/
122#define F_SS2R 0x2000 /*8E*/
123#define F_SS3R 0x4000 /*8F*/
124} _ISO2022EncodingInfo;
125
126#define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
127#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
128
129#define _FUNCNAME(m) _citrus_ISO2022_##m
130#define _ENCODING_INFO _ISO2022EncodingInfo
131#define _ENCODING_STATE _ISO2022State
132#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
133#define _ENCODING_IS_STATE_DEPENDENT 1
134#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
135 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
136
137
138#define _ISO2022INVALID (wchar_t)-1
139
140static __inline bool isc0(__uint8_t x)
141{
142
143 return ((x & 0x1f) == x);
144}
145
146static __inline bool isc1(__uint8_t x)
147{
148
149 return (0x80 <= x && x <= 0x9f);
150}
151
152static __inline bool iscntl(__uint8_t x)
153{
154
155 return (isc0(x) || isc1(x) || x == 0x7f);
156}
157
158static __inline bool is94(__uint8_t x)
159{
160
161 return (0x21 <= x && x <= 0x7e);
162}
163
164static __inline bool is96(__uint8_t x)
165{
166
167 return (0x20 <= x && x <= 0x7f);
168}
169
170static __inline bool isecma(__uint8_t x)
171{
172
173 return (0x30 <= x && x <= 0x7f);
174}
175
176static __inline bool isinterm(__uint8_t x)
177{
178
179 return (0x20 <= x && x <= 0x2f);
180}
181
182static __inline bool isthree(__uint8_t x)
183{
184
185 return (0x60 <= x && x <= 0x6f);
186}
187
188static __inline int
189getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
190{
191
192 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
193 cs->final = (unsigned char)(p[3] & 0xff);
194 cs->interm = '\0';
195 cs->vers = '\0';
196 cs->type = CS94MULTI;
197 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
198 cs->final = (unsigned char)(p[3] & 0xff);
199 cs->interm = '\0';
200 cs->vers = '\0';
201 cs->type = CS96MULTI;
202 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
203 cs->final = (unsigned char)(p[2] & 0xff);
204 cs->interm = '\0';
205 cs->vers = '\0';
206 cs->type = CS94;
207 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
208 cs->final = (unsigned char )(p[2] & 0xff);
209 cs->interm = '\0';
210 cs->vers = '\0';
211 cs->type = CS96;
212 } else
213 return (1);
214
215 return (0);
216}
217
218
219#define _NOTMATCH 0
220#define _MATCH 1
221#define _PARSEFAIL 2
222
223static __inline int
224get_recommend(_ISO2022EncodingInfo * __restrict ei,
225 const char * __restrict token)
226{
227 _ISO2022Charset cs, *p;
228 int i;
229
230 if (!strchr("0123", token[0]) || token[1] != '=')
231 return (_NOTMATCH);
232
233 if (getcs(&token[2], &cs) == 0)
234 ;
235 else if (!strcmp(&token[2], "94")) {
236 cs.final = (unsigned char)(token[4]);
237 cs.interm = '\0';
238 cs.vers = '\0';
239 cs.type = CS94;
240 } else if (!strcmp(&token[2], "96")) {
241 cs.final = (unsigned char)(token[4]);
242 cs.interm = '\0';
243 cs.vers = '\0';
244 cs.type = CS96;
245 } else if (!strcmp(&token[2], "94$")) {
246 cs.final = (unsigned char)(token[5]);
247 cs.interm = '\0';
248 cs.vers = '\0';
249 cs.type = CS94MULTI;
250 } else if (!strcmp(&token[2], "96$")) {
251 cs.final = (unsigned char)(token[5]);
252 cs.interm = '\0';
253 cs.vers = '\0';
254 cs.type = CS96MULTI;
255 } else
256 return (_PARSEFAIL);
257
258 i = token[0] - '0';
259 if (!ei->recommend[i])
260 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
261 else {
262 p = realloc(ei->recommend[i],
263 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
264 if (!p)
265 return (_PARSEFAIL);
266 ei->recommend[i] = p;
267 }
268 if (!ei->recommend[i])
269 return (_PARSEFAIL);
270 ei->recommendsize[i]++;
271
272 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
273 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
274 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
275 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
276
277 return (_MATCH);
278}
279
280static __inline int
281get_initg(_ISO2022EncodingInfo * __restrict ei,
282 const char * __restrict token)
283{
284 _ISO2022Charset cs;
285
286 if (strncmp("INIT", &token[0], 4) ||
287 !strchr("0123", token[4]) ||
288 token[5] != '=')
289 return (_NOTMATCH);
290
291 if (getcs(&token[6], &cs) != 0)
292 return (_PARSEFAIL);
293
294 ei->initg[token[4] - '0'].type = cs.type;
295 ei->initg[token[4] - '0'].final = cs.final;
296 ei->initg[token[4] - '0'].interm = cs.interm;
297 ei->initg[token[4] - '0'].vers = cs.vers;
298
299 return (_MATCH);
300}
301
302static __inline int
303get_max(_ISO2022EncodingInfo * __restrict ei,
304 const char * __restrict token)
305{
306 if (!strcmp(token, "MAX1"))
307 ei->maxcharset = 1;
308 else if (!strcmp(token, "MAX2"))
309 ei->maxcharset = 2;
310 else if (!strcmp(token, "MAX3"))
311 ei->maxcharset = 3;
312 else
313 return (_NOTMATCH);
314
315 return (_MATCH);
316}
317
318
319static __inline int
320get_flags(_ISO2022EncodingInfo * __restrict ei,
321 const char * __restrict token)
322{
323 static struct {
324 const char *tag;
325 int flag;
326 } const tags[] = {
327 { "DUMMY", 0 },
328 { "8BIT", F_8BIT },
329 { "NOOLD", F_NOOLD },
330 { "SI", F_SI },
331 { "SO", F_SO },
332 { "LS0", F_LS0 },
333 { "LS1", F_LS1 },
334 { "LS2", F_LS2 },
335 { "LS3", F_LS3 },
336 { "LS1R", F_LS1R },
337 { "LS2R", F_LS2R },
338 { "LS3R", F_LS3R },
339 { "SS2", F_SS2 },
340 { "SS3", F_SS3 },
341 { "SS2R", F_SS2R },
342 { "SS3R", F_SS3R },
343 { NULL, 0 }
344 };
345 int i;
346
347 for (i = 0; tags[i].tag; i++)
348 if (!strcmp(token, tags[i].tag)) {
349 ei->flags |= tags[i].flag;
350 return (_MATCH);
351 }
352
353 return (_NOTMATCH);
354}
355
356
357static __inline int
358_citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
359 const void * __restrict var, size_t lenvar __unused)
360{
361 char const *e, *v;
362 char buf[20];
363 size_t len;
364 int i, ret;
365
366 /*
367 * parse VARIABLE section.
368 */
369
370 if (!var)
371 return (EFTYPE);
372
373 v = (const char *) var;
374
375 /* initialize structure */
376 ei->maxcharset = 0;
377 for (i = 0; i < 4; i++) {
378 ei->recommend[i] = NULL;
379 ei->recommendsize[i] = 0;
380 }
381 ei->flags = 0;
382
383 while (*v) {
384 while (*v == ' ' || *v == '\t')
385 ++v;
386
387 /* find the token */
388 e = v;
389 while (*e && *e != ' ' && *e != '\t')
390 ++e;
391
392 len = e - v;
393 if (len == 0)
394 break;
395 if (len >= sizeof(buf))
396 goto parsefail;
397 snprintf(buf, sizeof(buf), "%.*s", (int)len, v);
398
399 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
400 ;
401 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
402 ;
403 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
404 ;
405 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
406 ;
407 else
408 ret = _PARSEFAIL;
409 if (ret == _PARSEFAIL)
410 goto parsefail;
411 v = e;
412
413 }
414
415 return (0);
416
417parsefail:
418 free(ei->recommend[0]);
419 free(ei->recommend[1]);
420 free(ei->recommend[2]);
421 free(ei->recommend[3]);
422
423 return (EFTYPE);
424}
425
426static __inline void
427/*ARGSUSED*/
428_citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
429 _ISO2022State * __restrict s)
430{
431 int i;
432
433 memset(s, 0, sizeof(*s));
434 s->gl = 0;
435 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
436
437 for (i = 0; i < 4; i++)
438 if (ei->initg[i].final) {
439 s->g[i].type = ei->initg[i].type;
440 s->g[i].final = ei->initg[i].final;
441 s->g[i].interm = ei->initg[i].interm;
442 }
443 s->singlegl = s->singlegr = -1;
444 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
445}
446
447#if 0
447static __inline void
448/*ARGSUSED*/
449_citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei __unused,
450 void * __restrict pspriv, const _ISO2022State * __restrict s)
451{
452
453 memcpy(pspriv, (const void *)s, sizeof(*s));
454}
455
456static __inline void
457/*ARGSUSED*/
458_citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei __unused,
459 _ISO2022State * __restrict s, const void * __restrict pspriv)
460{
461
462 memcpy((void *)s, pspriv, sizeof(*s));
463}
448static __inline void
449/*ARGSUSED*/
450_citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei __unused,
451 void * __restrict pspriv, const _ISO2022State * __restrict s)
452{
453
454 memcpy(pspriv, (const void *)s, sizeof(*s));
455}
456
457static __inline void
458/*ARGSUSED*/
459_citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei __unused,
460 _ISO2022State * __restrict s, const void * __restrict pspriv)
461{
462
463 memcpy((void *)s, pspriv, sizeof(*s));
464}
465#endif
464
465static int
466/*ARGSUSED*/
467_citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
468 const void * __restrict var, size_t lenvar)
469{
470
471 return (_citrus_ISO2022_parse_variable(ei, var, lenvar));
472}
473
474static void
475/*ARGSUSED*/
476_citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei __unused)
477{
478
479}
480
481#define ESC '\033'
482#define ECMA -1
483#define INTERM -2
484#define OECMA -3
485static const struct seqtable {
486 int type;
487 int csoff;
488 int finaloff;
489 int intermoff;
490 int versoff;
491 int len;
492 int chars[10];
493} seqtable[] = {
494 /* G0 94MULTI special */
495 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
496 /* G0 94MULTI special with version identification */
497 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
498 /* G? 94 */
499 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
500 /* G? 94 with 2nd intermediate char */
501 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
502 /* G? 96 */
503 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
504 /* G? 96 with 2nd intermediate char */
505 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
506 /* G? 94MULTI */
507 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
508 /* G? 96MULTI */
509 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
510 /* G? 94MULTI with version specification */
511 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
512 /* LS2/3 */
513 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
514 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
515 /* LS1/2/3R */
516 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
517 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
518 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
519 /* SS2/3 */
520 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
521 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
522 /* end of records */
523// { 0, }
524 { 0, 0, 0, 0, 0, 0, { ESC, 0, }, }
525};
526
527static int
528seqmatch(const char * __restrict s, size_t n,
529 const struct seqtable * __restrict sp)
530{
531 const int *p;
532
533 p = sp->chars;
534 while ((size_t)(p - sp->chars) < n && p - sp->chars < sp->len) {
535 switch (*p) {
536 case ECMA:
537 if (!isecma(*s))
538 goto terminate;
539 break;
540 case OECMA:
541 if (*s && strchr("@AB", *s))
542 break;
543 else
544 goto terminate;
545 case INTERM:
546 if (!isinterm(*s))
547 goto terminate;
548 break;
549 case CS94:
550 if (*s && strchr("()*+", *s))
551 break;
552 else
553 goto terminate;
554 case CS96:
555 if (*s && strchr(",-./", *s))
556 break;
557 else
558 goto terminate;
559 default:
560 if (*s != *p)
561 goto terminate;
562 break;
563 }
564
565 p++;
566 s++;
567 }
568
569terminate:
570 return (p - sp->chars);
571}
572
573static wchar_t
574_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei __unused,
575 const char * __restrict string, size_t n, const char ** __restrict result,
576 _ISO2022State * __restrict psenc)
577{
578 const struct seqtable *sp;
579 wchar_t wchar = 0;
580 int i, cur, nmatch;
581
582 while (1) {
583 /* SI/SO */
584 if (1 <= n && string[0] == '\017') {
585 psenc->gl = 0;
586 string++;
587 n--;
588 continue;
589 }
590 if (1 <= n && string[0] == '\016') {
591 psenc->gl = 1;
592 string++;
593 n--;
594 continue;
595 }
596
597 /* SS2/3R */
598 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
599 psenc->singlegl = psenc->singlegr =
600 (string[0] - '\216') + 2;
601 string++;
602 n--;
603 continue;
604 }
605
606 /* eat the letter if this is not ESC */
607 if (1 <= n && string[0] != '\033')
608 break;
609
610 /* look for a perfect match from escape sequences */
611 for (sp = &seqtable[0]; sp->len; sp++) {
612 nmatch = seqmatch(string, n, sp);
613 if (sp->len == nmatch && n >= (size_t)(sp->len))
614 break;
615 }
616
617 if (!sp->len)
618 goto notseq;
619
620 if (sp->type != -1) {
621 if (sp->csoff == -1)
622 i = 0;
623 else {
624 switch (sp->type) {
625 case CS94:
626 case CS94MULTI:
627 i = string[sp->csoff] - '(';
628 break;
629 case CS96:
630 case CS96MULTI:
631 i = string[sp->csoff] - ',';
632 break;
633 default:
634 return (_ISO2022INVALID);
635 }
636 }
637 psenc->g[i].type = sp->type;
638 psenc->g[i].final = '\0';
639 psenc->g[i].interm = '\0';
640 psenc->g[i].vers = '\0';
641 /* sp->finaloff must not be -1 */
642 if (sp->finaloff != -1)
643 psenc->g[i].final = string[sp->finaloff];
644 if (sp->intermoff != -1)
645 psenc->g[i].interm = string[sp->intermoff];
646 if (sp->versoff != -1)
647 psenc->g[i].vers = string[sp->versoff];
648
649 string += sp->len;
650 n -= sp->len;
651 continue;
652 }
653
654 /* LS2/3 */
655 if (2 <= n && string[0] == '\033' &&
656 string[1] && strchr("no", string[1])) {
657 psenc->gl = string[1] - 'n' + 2;
658 string += 2;
659 n -= 2;
660 continue;
661 }
662
663 /* LS1/2/3R */
664 /* XXX: { for vi showmatch */
665 if (2 <= n && string[0] == '\033' &&
666 string[1] && strchr("~}|", string[1])) {
667 psenc->gr = 3 - (string[1] - '|');
668 string += 2;
669 n -= 2;
670 continue;
671 }
672
673 /* SS2/3 */
674 if (2 <= n && string[0] == '\033' && string[1] &&
675 strchr("NO", string[1])) {
676 psenc->singlegl = (string[1] - 'N') + 2;
677 string += 2;
678 n -= 2;
679 continue;
680 }
681
682 notseq:
683 /*
684 * if we've got an unknown escape sequence, eat the ESC at the
685 * head. otherwise, wait till full escape sequence comes.
686 */
687 for (sp = &seqtable[0]; sp->len; sp++) {
688 nmatch = seqmatch(string, n, sp);
689 if (!nmatch)
690 continue;
691
692 /*
693 * if we are in the middle of escape sequence,
694 * we still need to wait for more characters to come
695 */
696 if (n < (size_t)(sp->len)) {
697 if ((size_t)(nmatch) == n) {
698 if (result)
699 *result = string;
700 return (_ISO2022INVALID);
701 }
702 } else {
703 if (nmatch == sp->len) {
704 /* this case should not happen */
705 goto eat;
706 }
707 }
708 }
709
710 break;
711 }
712
713eat:
714 /* no letter to eat */
715 if (n < 1) {
716 if (result)
717 *result = string;
718 return (_ISO2022INVALID);
719 }
720
721 /* normal chars. always eat C0/C1 as is. */
722 if (iscntl(*string & 0xff))
723 cur = -1;
724 else if (*string & 0x80)
725 cur = (psenc->singlegr == -1) ? psenc->gr : psenc->singlegr;
726 else
727 cur = (psenc->singlegl == -1) ? psenc->gl : psenc->singlegl;
728
729 if (cur == -1) {
730asis:
731 wchar = *string++ & 0xff;
732 if (result)
733 *result = string;
734 /* reset single shift state */
735 psenc->singlegr = psenc->singlegl = -1;
736 return (wchar);
737 }
738
739 /* length error check */
740 switch (psenc->g[cur].type) {
741 case CS94MULTI:
742 case CS96MULTI:
743 if (!isthree(psenc->g[cur].final)) {
744 if (2 <= n &&
745 (string[0] & 0x80) == (string[1] & 0x80))
746 break;
747 } else {
748 if (3 <= n &&
749 (string[0] & 0x80) == (string[1] & 0x80) &&
750 (string[0] & 0x80) == (string[2] & 0x80))
751 break;
752 }
753
754 /* we still need to wait for more characters to come */
755 if (result)
756 *result = string;
757 return (_ISO2022INVALID);
758
759 case CS94:
760 case CS96:
761 if (1 <= n)
762 break;
763
764 /* we still need to wait for more characters to come */
765 if (result)
766 *result = string;
767 return (_ISO2022INVALID);
768 }
769
770 /* range check */
771 switch (psenc->g[cur].type) {
772 case CS94:
773 if (!(is94(string[0] & 0x7f)))
774 goto asis;
775 case CS96:
776 if (!(is96(string[0] & 0x7f)))
777 goto asis;
778 break;
779 case CS94MULTI:
780 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
781 goto asis;
782 break;
783 case CS96MULTI:
784 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
785 goto asis;
786 break;
787 }
788
789 /* extract the character. */
790 switch (psenc->g[cur].type) {
791 case CS94:
792 /* special case for ASCII. */
793 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
794 wchar = *string++;
795 wchar &= 0x7f;
796 break;
797 }
798 wchar = psenc->g[cur].final;
799 wchar = (wchar << 8);
800 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
801 wchar = (wchar << 8);
802 wchar = (wchar << 8) | (*string++ & 0x7f);
803 break;
804 case CS96:
805 /* special case for ISO-8859-1. */
806 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
807 wchar = *string++;
808 wchar &= 0x7f;
809 wchar |= 0x80;
810 break;
811 }
812 wchar = psenc->g[cur].final;
813 wchar = (wchar << 8);
814 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
815 wchar = (wchar << 8);
816 wchar = (wchar << 8) | (*string++ & 0x7f);
817 wchar |= 0x80;
818 break;
819 case CS94MULTI:
820 case CS96MULTI:
821 wchar = psenc->g[cur].final;
822 wchar = (wchar << 8);
823 if (isthree(psenc->g[cur].final))
824 wchar |= (*string++ & 0x7f);
825 wchar = (wchar << 8) | (*string++ & 0x7f);
826 wchar = (wchar << 8) | (*string++ & 0x7f);
827 if (psenc->g[cur].type == CS96MULTI)
828 wchar |= 0x80;
829 break;
830 }
831
832 if (result)
833 *result = string;
834 /* reset single shift state */
835 psenc->singlegr = psenc->singlegl = -1;
836 return (wchar);
837}
838
839
840
841static int
842_citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
843 wchar_t * __restrict pwc, const char ** __restrict s,
844 size_t n, _ISO2022State * __restrict psenc, size_t * __restrict nresult)
845{
846 const char *p, *result, *s0;
847 wchar_t wchar;
848 int c, chlenbak;
849
850 if (*s == NULL) {
851 _citrus_ISO2022_init_state(ei, psenc);
852 *nresult = _ENCODING_IS_STATE_DEPENDENT;
853 return (0);
854 }
855 s0 = *s;
856 c = 0;
857 chlenbak = psenc->chlen;
858
859 /*
860 * if we have something in buffer, use that.
861 * otherwise, skip here
862 */
863 if (psenc->chlen > sizeof(psenc->ch)) {
864 /* illgeal state */
865 _citrus_ISO2022_init_state(ei, psenc);
866 goto encoding_error;
867 }
868 if (psenc->chlen == 0)
869 goto emptybuf;
870
871 /* buffer is not empty */
872 p = psenc->ch;
873 while (psenc->chlen < sizeof(psenc->ch)) {
874 if (n > 0) {
875 psenc->ch[psenc->chlen++] = *s0++;
876 n--;
877 }
878
879 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
880 &result, psenc);
881 c += result - p;
882 if (wchar != _ISO2022INVALID) {
883 if (psenc->chlen > (size_t)c)
884 memmove(psenc->ch, result, psenc->chlen - c);
885 if (psenc->chlen < (size_t)c)
886 psenc->chlen = 0;
887 else
888 psenc->chlen -= c;
889 goto output;
890 }
891
892 if (n == 0) {
893 if ((size_t)(result - p) == psenc->chlen)
894 /* complete shift sequence. */
895 psenc->chlen = 0;
896 goto restart;
897 }
898
899 p = result;
900 }
901
902 /* escape sequence too long? */
903 goto encoding_error;
904
905emptybuf:
906 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
907 if (wchar != _ISO2022INVALID) {
908 c += result - s0;
909 psenc->chlen = 0;
910 s0 = result;
911 goto output;
912 }
913 if (result > s0) {
914 c += (result - s0);
915 n -= (result - s0);
916 s0 = result;
917 if (n > 0)
918 goto emptybuf;
919 /* complete shift sequence. */
920 goto restart;
921 }
922 n += c;
923 if (n < sizeof(psenc->ch)) {
924 memcpy(psenc->ch, s0 - c, n);
925 psenc->chlen = n;
926 s0 = result;
927 goto restart;
928 }
929
930 /* escape sequence too long? */
931
932encoding_error:
933 psenc->chlen = 0;
934 *nresult = (size_t)-1;
935 return (EILSEQ);
936
937output:
938 *s = s0;
939 if (pwc)
940 *pwc = wchar;
941 *nresult = wchar ? c - chlenbak : 0;
942 return (0);
943
944restart:
945 *s = s0;
946 *nresult = (size_t)-2;
947
948 return (0);
949}
950
951static int
952recommendation(_ISO2022EncodingInfo * __restrict ei,
953 _ISO2022Charset * __restrict cs)
954{
955 _ISO2022Charset *recommend;
956 size_t j;
957 int i;
958
959 /* first, try a exact match. */
960 for (i = 0; i < 4; i++) {
961 recommend = ei->recommend[i];
962 for (j = 0; j < ei->recommendsize[i]; j++) {
963 if (cs->type != recommend[j].type)
964 continue;
965 if (cs->final != recommend[j].final)
966 continue;
967 if (cs->interm != recommend[j].interm)
968 continue;
969
970 return (i);
971 }
972 }
973
974 /* then, try a wildcard match over final char. */
975 for (i = 0; i < 4; i++) {
976 recommend = ei->recommend[i];
977 for (j = 0; j < ei->recommendsize[i]; j++) {
978 if (cs->type != recommend[j].type)
979 continue;
980 if (cs->final && (cs->final != recommend[j].final))
981 continue;
982 if (cs->interm && (cs->interm != recommend[j].interm))
983 continue;
984
985 return (i);
986 }
987 }
988
989 /* there's no recommendation. make a guess. */
990 if (ei->maxcharset == 0) {
991 return (0);
992 } else {
993 switch (cs->type) {
994 case CS94:
995 case CS94MULTI:
996 return (0);
997 case CS96:
998 case CS96MULTI:
999 return (1);
1000 }
1001 }
1002 return (0);
1003}
1004
1005static int
1006_ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1007 char * __restrict string, size_t n, char ** __restrict result,
1008 _ISO2022State * __restrict psenc, size_t * __restrict nresult)
1009{
1010 _ISO2022Charset cs;
1011 char *p;
1012 char tmp[MB_LEN_MAX];
1013 size_t len;
1014 int bit8, i = 0, target;
1015 unsigned char mask;
1016
1017 if (isc0(wc & 0xff)) {
1018 /* go back to INIT0 or ASCII on control chars */
1019 cs = ei->initg[0].final ? ei->initg[0] : ascii;
1020 } else if (isc1(wc & 0xff)) {
1021 /* go back to INIT1 or ISO-8859-1 on control chars */
1022 cs = ei->initg[1].final ? ei->initg[1] : iso88591;
1023 } else if (!(wc & ~0xff)) {
1024 if (wc & 0x80) {
1025 /* special treatment for ISO-8859-1 */
1026 cs = iso88591;
1027 } else {
1028 /* special treatment for ASCII */
1029 cs = ascii;
1030 }
1031 } else {
1032 cs.final = (wc >> 24) & 0x7f;
1033 if ((wc >> 16) & 0x80)
1034 cs.interm = (wc >> 16) & 0x7f;
1035 else
1036 cs.interm = '\0';
1037 if (wc & 0x80)
1038 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1039 else
1040 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1041 }
1042 target = recommendation(ei, &cs);
1043 p = tmp;
1044 bit8 = ei->flags & F_8BIT;
1045
1046 /* designate the charset onto the target plane(G0/1/2/3). */
1047 if (psenc->g[target].type == cs.type &&
1048 psenc->g[target].final == cs.final &&
1049 psenc->g[target].interm == cs.interm)
1050 goto planeok;
1051
1052 *p++ = '\033';
1053 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1054 *p++ = '$';
1055 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) &&
1056 !cs.interm && !(ei->flags & F_NOOLD))
1057 ;
1058 else if (cs.type == CS94 || cs.type == CS94MULTI)
1059 *p++ = "()*+"[target];
1060 else
1061 *p++ = ",-./"[target];
1062 if (cs.interm)
1063 *p++ = cs.interm;
1064 *p++ = cs.final;
1065
1066 psenc->g[target].type = cs.type;
1067 psenc->g[target].final = cs.final;
1068 psenc->g[target].interm = cs.interm;
1069
1070planeok:
1071 /* invoke the plane onto GL or GR. */
1072 if (psenc->gl == target)
1073 goto sideok;
1074 if (bit8 && psenc->gr == target)
1075 goto sideok;
1076
1077 if (target == 0 && (ei->flags & F_LS0)) {
1078 *p++ = '\017';
1079 psenc->gl = 0;
1080 } else if (target == 1 && (ei->flags & F_LS1)) {
1081 *p++ = '\016';
1082 psenc->gl = 1;
1083 } else if (target == 2 && (ei->flags & F_LS2)) {
1084 *p++ = '\033';
1085 *p++ = 'n';
1086 psenc->gl = 2;
1087 } else if (target == 3 && (ei->flags & F_LS3)) {
1088 *p++ = '\033';
1089 *p++ = 'o';
1090 psenc->gl = 3;
1091 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1092 *p++ = '\033';
1093 *p++ = '~';
1094 psenc->gr = 1;
1095 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1096 *p++ = '\033';
1097 /*{*/
1098 *p++ = '}';
1099 psenc->gr = 2;
1100 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1101 *p++ = '\033';
1102 *p++ = '|';
1103 psenc->gr = 3;
1104 } else if (target == 2 && (ei->flags & F_SS2)) {
1105 *p++ = '\033';
1106 *p++ = 'N';
1107 psenc->singlegl = 2;
1108 } else if (target == 3 && (ei->flags & F_SS3)) {
1109 *p++ = '\033';
1110 *p++ = 'O';
1111 psenc->singlegl = 3;
1112 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1113 *p++ = '\216';
1114 *p++ = 'N';
1115 psenc->singlegl = psenc->singlegr = 2;
1116 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1117 *p++ = '\217';
1118 *p++ = 'O';
1119 psenc->singlegl = psenc->singlegr = 3;
1120 } else
1121 goto ilseq;
1122
1123sideok:
1124 if (psenc->singlegl == target)
1125 mask = 0x00;
1126 else if (psenc->singlegr == target)
1127 mask = 0x80;
1128 else if (psenc->gl == target)
1129 mask = 0x00;
1130 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1131 mask = 0x80;
1132 else
1133 goto ilseq;
1134
1135 switch (cs.type) {
1136 case CS94:
1137 case CS96:
1138 i = 1;
1139 break;
1140 case CS94MULTI:
1141 case CS96MULTI:
1142 i = !iscntl(wc & 0xff) ?
1143 (isthree(cs.final) ? 3 : 2) : 1;
1144 break;
1145 }
1146 while (i-- > 0)
1147 *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1148
1149 /* reset single shift state */
1150 psenc->singlegl = psenc->singlegr = -1;
1151
1152 len = (size_t)(p - tmp);
1153 if (n < len) {
1154 if (result)
1155 *result = (char *)0;
1156 *nresult = (size_t)-1;
1157 return (E2BIG);
1158 }
1159 if (result)
1160 *result = string + len;
1161 memcpy(string, tmp, len);
1162 *nresult = len;
1163
1164 return (0);
1165
1166ilseq:
1167 *nresult = (size_t)-1;
1168 return (EILSEQ);
1169}
1170
1171static int
1172_citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1173 char * __restrict s, size_t n, _ISO2022State * __restrict psenc,
1174 size_t * __restrict nresult)
1175{
1176 char *result;
1177 char buf[MB_LEN_MAX];
1178 size_t len;
1179 int ret;
1180
1181 /* XXX state will be modified after this operation... */
1182 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc,
1183 &len);
1184 if (ret) {
1185 *nresult = len;
1186 return (ret);
1187 }
1188
1189 if (sizeof(buf) < len || n < len-1) {
1190 /* XXX should recover state? */
1191 *nresult = (size_t)-1;
1192 return (E2BIG);
1193 }
1194
1195 memcpy(s, buf, len - 1);
1196 *nresult = len - 1;
1197 return (0);
1198}
1199
1200static int
1201_citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1202 char * __restrict s, size_t n, wchar_t wc,
1203 _ISO2022State * __restrict psenc, size_t * __restrict nresult)
1204{
1205 char *result;
1206 char buf[MB_LEN_MAX];
1207 size_t len;
1208 int ret;
1209
1210 /* XXX state will be modified after this operation... */
1211 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc,
1212 &len);
1213 if (ret) {
1214 *nresult = len;
1215 return (ret);
1216 }
1217
1218 if (sizeof(buf) < len || n < len) {
1219 /* XXX should recover state? */
1220 *nresult = (size_t)-1;
1221 return (E2BIG);
1222 }
1223
1224 memcpy(s, buf, len);
1225 *nresult = len;
1226 return (0);
1227}
1228
1229static __inline int
1230/*ARGSUSED*/
1231_citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei __unused,
1232 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
1233{
1234 wchar_t m, nm;
1235
1236 m = wc & 0x7FFF8080;
1237 nm = wc & 0x007F7F7F;
1238 if (m & 0x00800000)
1239 nm &= 0x00007F7F;
1240 else
1241 m &= 0x7F008080;
1242 if (nm & 0x007F0000) {
1243 /* ^3 mark */
1244 m |= 0x007F0000;
1245 } else if (nm & 0x00007F00) {
1246 /* ^2 mark */
1247 m |= 0x00007F00;
1248 }
1249 *csid = (_csid_t)m;
1250 *idx = (_index_t)nm;
1251
1252 return (0);
1253}
1254
1255static __inline int
1256/*ARGSUSED*/
1257_citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei __unused,
1258 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
1259{
1260
1261 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1262
1263 return (0);
1264}
1265
1266static __inline int
1267/*ARGSUSED*/
1268_citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei __unused,
1269 _ISO2022State * __restrict psenc, int * __restrict rstate)
1270{
1271
1272 if (psenc->chlen == 0) {
1273 /* XXX: it should distinguish initial and stable. */
1274 *rstate = _STDENC_SDGEN_STABLE;
1275 } else
1276 *rstate = (psenc->ch[0] == '\033') ?
1277 _STDENC_SDGEN_INCOMPLETE_SHIFT :
1278 _STDENC_SDGEN_INCOMPLETE_CHAR;
1279 return (0);
1280}
1281
1282/* ----------------------------------------------------------------------
1283 * public interface for stdenc
1284 */
1285
1286_CITRUS_STDENC_DECLS(ISO2022);
1287_CITRUS_STDENC_DEF_OPS(ISO2022);
1288
1289#include "citrus_stdenc_template.h"
466
467static int
468/*ARGSUSED*/
469_citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
470 const void * __restrict var, size_t lenvar)
471{
472
473 return (_citrus_ISO2022_parse_variable(ei, var, lenvar));
474}
475
476static void
477/*ARGSUSED*/
478_citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei __unused)
479{
480
481}
482
483#define ESC '\033'
484#define ECMA -1
485#define INTERM -2
486#define OECMA -3
487static const struct seqtable {
488 int type;
489 int csoff;
490 int finaloff;
491 int intermoff;
492 int versoff;
493 int len;
494 int chars[10];
495} seqtable[] = {
496 /* G0 94MULTI special */
497 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
498 /* G0 94MULTI special with version identification */
499 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
500 /* G? 94 */
501 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
502 /* G? 94 with 2nd intermediate char */
503 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
504 /* G? 96 */
505 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
506 /* G? 96 with 2nd intermediate char */
507 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
508 /* G? 94MULTI */
509 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
510 /* G? 96MULTI */
511 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
512 /* G? 94MULTI with version specification */
513 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
514 /* LS2/3 */
515 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
516 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
517 /* LS1/2/3R */
518 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
519 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
520 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
521 /* SS2/3 */
522 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
523 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
524 /* end of records */
525// { 0, }
526 { 0, 0, 0, 0, 0, 0, { ESC, 0, }, }
527};
528
529static int
530seqmatch(const char * __restrict s, size_t n,
531 const struct seqtable * __restrict sp)
532{
533 const int *p;
534
535 p = sp->chars;
536 while ((size_t)(p - sp->chars) < n && p - sp->chars < sp->len) {
537 switch (*p) {
538 case ECMA:
539 if (!isecma(*s))
540 goto terminate;
541 break;
542 case OECMA:
543 if (*s && strchr("@AB", *s))
544 break;
545 else
546 goto terminate;
547 case INTERM:
548 if (!isinterm(*s))
549 goto terminate;
550 break;
551 case CS94:
552 if (*s && strchr("()*+", *s))
553 break;
554 else
555 goto terminate;
556 case CS96:
557 if (*s && strchr(",-./", *s))
558 break;
559 else
560 goto terminate;
561 default:
562 if (*s != *p)
563 goto terminate;
564 break;
565 }
566
567 p++;
568 s++;
569 }
570
571terminate:
572 return (p - sp->chars);
573}
574
575static wchar_t
576_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei __unused,
577 const char * __restrict string, size_t n, const char ** __restrict result,
578 _ISO2022State * __restrict psenc)
579{
580 const struct seqtable *sp;
581 wchar_t wchar = 0;
582 int i, cur, nmatch;
583
584 while (1) {
585 /* SI/SO */
586 if (1 <= n && string[0] == '\017') {
587 psenc->gl = 0;
588 string++;
589 n--;
590 continue;
591 }
592 if (1 <= n && string[0] == '\016') {
593 psenc->gl = 1;
594 string++;
595 n--;
596 continue;
597 }
598
599 /* SS2/3R */
600 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
601 psenc->singlegl = psenc->singlegr =
602 (string[0] - '\216') + 2;
603 string++;
604 n--;
605 continue;
606 }
607
608 /* eat the letter if this is not ESC */
609 if (1 <= n && string[0] != '\033')
610 break;
611
612 /* look for a perfect match from escape sequences */
613 for (sp = &seqtable[0]; sp->len; sp++) {
614 nmatch = seqmatch(string, n, sp);
615 if (sp->len == nmatch && n >= (size_t)(sp->len))
616 break;
617 }
618
619 if (!sp->len)
620 goto notseq;
621
622 if (sp->type != -1) {
623 if (sp->csoff == -1)
624 i = 0;
625 else {
626 switch (sp->type) {
627 case CS94:
628 case CS94MULTI:
629 i = string[sp->csoff] - '(';
630 break;
631 case CS96:
632 case CS96MULTI:
633 i = string[sp->csoff] - ',';
634 break;
635 default:
636 return (_ISO2022INVALID);
637 }
638 }
639 psenc->g[i].type = sp->type;
640 psenc->g[i].final = '\0';
641 psenc->g[i].interm = '\0';
642 psenc->g[i].vers = '\0';
643 /* sp->finaloff must not be -1 */
644 if (sp->finaloff != -1)
645 psenc->g[i].final = string[sp->finaloff];
646 if (sp->intermoff != -1)
647 psenc->g[i].interm = string[sp->intermoff];
648 if (sp->versoff != -1)
649 psenc->g[i].vers = string[sp->versoff];
650
651 string += sp->len;
652 n -= sp->len;
653 continue;
654 }
655
656 /* LS2/3 */
657 if (2 <= n && string[0] == '\033' &&
658 string[1] && strchr("no", string[1])) {
659 psenc->gl = string[1] - 'n' + 2;
660 string += 2;
661 n -= 2;
662 continue;
663 }
664
665 /* LS1/2/3R */
666 /* XXX: { for vi showmatch */
667 if (2 <= n && string[0] == '\033' &&
668 string[1] && strchr("~}|", string[1])) {
669 psenc->gr = 3 - (string[1] - '|');
670 string += 2;
671 n -= 2;
672 continue;
673 }
674
675 /* SS2/3 */
676 if (2 <= n && string[0] == '\033' && string[1] &&
677 strchr("NO", string[1])) {
678 psenc->singlegl = (string[1] - 'N') + 2;
679 string += 2;
680 n -= 2;
681 continue;
682 }
683
684 notseq:
685 /*
686 * if we've got an unknown escape sequence, eat the ESC at the
687 * head. otherwise, wait till full escape sequence comes.
688 */
689 for (sp = &seqtable[0]; sp->len; sp++) {
690 nmatch = seqmatch(string, n, sp);
691 if (!nmatch)
692 continue;
693
694 /*
695 * if we are in the middle of escape sequence,
696 * we still need to wait for more characters to come
697 */
698 if (n < (size_t)(sp->len)) {
699 if ((size_t)(nmatch) == n) {
700 if (result)
701 *result = string;
702 return (_ISO2022INVALID);
703 }
704 } else {
705 if (nmatch == sp->len) {
706 /* this case should not happen */
707 goto eat;
708 }
709 }
710 }
711
712 break;
713 }
714
715eat:
716 /* no letter to eat */
717 if (n < 1) {
718 if (result)
719 *result = string;
720 return (_ISO2022INVALID);
721 }
722
723 /* normal chars. always eat C0/C1 as is. */
724 if (iscntl(*string & 0xff))
725 cur = -1;
726 else if (*string & 0x80)
727 cur = (psenc->singlegr == -1) ? psenc->gr : psenc->singlegr;
728 else
729 cur = (psenc->singlegl == -1) ? psenc->gl : psenc->singlegl;
730
731 if (cur == -1) {
732asis:
733 wchar = *string++ & 0xff;
734 if (result)
735 *result = string;
736 /* reset single shift state */
737 psenc->singlegr = psenc->singlegl = -1;
738 return (wchar);
739 }
740
741 /* length error check */
742 switch (psenc->g[cur].type) {
743 case CS94MULTI:
744 case CS96MULTI:
745 if (!isthree(psenc->g[cur].final)) {
746 if (2 <= n &&
747 (string[0] & 0x80) == (string[1] & 0x80))
748 break;
749 } else {
750 if (3 <= n &&
751 (string[0] & 0x80) == (string[1] & 0x80) &&
752 (string[0] & 0x80) == (string[2] & 0x80))
753 break;
754 }
755
756 /* we still need to wait for more characters to come */
757 if (result)
758 *result = string;
759 return (_ISO2022INVALID);
760
761 case CS94:
762 case CS96:
763 if (1 <= n)
764 break;
765
766 /* we still need to wait for more characters to come */
767 if (result)
768 *result = string;
769 return (_ISO2022INVALID);
770 }
771
772 /* range check */
773 switch (psenc->g[cur].type) {
774 case CS94:
775 if (!(is94(string[0] & 0x7f)))
776 goto asis;
777 case CS96:
778 if (!(is96(string[0] & 0x7f)))
779 goto asis;
780 break;
781 case CS94MULTI:
782 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
783 goto asis;
784 break;
785 case CS96MULTI:
786 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
787 goto asis;
788 break;
789 }
790
791 /* extract the character. */
792 switch (psenc->g[cur].type) {
793 case CS94:
794 /* special case for ASCII. */
795 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
796 wchar = *string++;
797 wchar &= 0x7f;
798 break;
799 }
800 wchar = psenc->g[cur].final;
801 wchar = (wchar << 8);
802 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
803 wchar = (wchar << 8);
804 wchar = (wchar << 8) | (*string++ & 0x7f);
805 break;
806 case CS96:
807 /* special case for ISO-8859-1. */
808 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
809 wchar = *string++;
810 wchar &= 0x7f;
811 wchar |= 0x80;
812 break;
813 }
814 wchar = psenc->g[cur].final;
815 wchar = (wchar << 8);
816 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
817 wchar = (wchar << 8);
818 wchar = (wchar << 8) | (*string++ & 0x7f);
819 wchar |= 0x80;
820 break;
821 case CS94MULTI:
822 case CS96MULTI:
823 wchar = psenc->g[cur].final;
824 wchar = (wchar << 8);
825 if (isthree(psenc->g[cur].final))
826 wchar |= (*string++ & 0x7f);
827 wchar = (wchar << 8) | (*string++ & 0x7f);
828 wchar = (wchar << 8) | (*string++ & 0x7f);
829 if (psenc->g[cur].type == CS96MULTI)
830 wchar |= 0x80;
831 break;
832 }
833
834 if (result)
835 *result = string;
836 /* reset single shift state */
837 psenc->singlegr = psenc->singlegl = -1;
838 return (wchar);
839}
840
841
842
843static int
844_citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
845 wchar_t * __restrict pwc, const char ** __restrict s,
846 size_t n, _ISO2022State * __restrict psenc, size_t * __restrict nresult)
847{
848 const char *p, *result, *s0;
849 wchar_t wchar;
850 int c, chlenbak;
851
852 if (*s == NULL) {
853 _citrus_ISO2022_init_state(ei, psenc);
854 *nresult = _ENCODING_IS_STATE_DEPENDENT;
855 return (0);
856 }
857 s0 = *s;
858 c = 0;
859 chlenbak = psenc->chlen;
860
861 /*
862 * if we have something in buffer, use that.
863 * otherwise, skip here
864 */
865 if (psenc->chlen > sizeof(psenc->ch)) {
866 /* illgeal state */
867 _citrus_ISO2022_init_state(ei, psenc);
868 goto encoding_error;
869 }
870 if (psenc->chlen == 0)
871 goto emptybuf;
872
873 /* buffer is not empty */
874 p = psenc->ch;
875 while (psenc->chlen < sizeof(psenc->ch)) {
876 if (n > 0) {
877 psenc->ch[psenc->chlen++] = *s0++;
878 n--;
879 }
880
881 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
882 &result, psenc);
883 c += result - p;
884 if (wchar != _ISO2022INVALID) {
885 if (psenc->chlen > (size_t)c)
886 memmove(psenc->ch, result, psenc->chlen - c);
887 if (psenc->chlen < (size_t)c)
888 psenc->chlen = 0;
889 else
890 psenc->chlen -= c;
891 goto output;
892 }
893
894 if (n == 0) {
895 if ((size_t)(result - p) == psenc->chlen)
896 /* complete shift sequence. */
897 psenc->chlen = 0;
898 goto restart;
899 }
900
901 p = result;
902 }
903
904 /* escape sequence too long? */
905 goto encoding_error;
906
907emptybuf:
908 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
909 if (wchar != _ISO2022INVALID) {
910 c += result - s0;
911 psenc->chlen = 0;
912 s0 = result;
913 goto output;
914 }
915 if (result > s0) {
916 c += (result - s0);
917 n -= (result - s0);
918 s0 = result;
919 if (n > 0)
920 goto emptybuf;
921 /* complete shift sequence. */
922 goto restart;
923 }
924 n += c;
925 if (n < sizeof(psenc->ch)) {
926 memcpy(psenc->ch, s0 - c, n);
927 psenc->chlen = n;
928 s0 = result;
929 goto restart;
930 }
931
932 /* escape sequence too long? */
933
934encoding_error:
935 psenc->chlen = 0;
936 *nresult = (size_t)-1;
937 return (EILSEQ);
938
939output:
940 *s = s0;
941 if (pwc)
942 *pwc = wchar;
943 *nresult = wchar ? c - chlenbak : 0;
944 return (0);
945
946restart:
947 *s = s0;
948 *nresult = (size_t)-2;
949
950 return (0);
951}
952
953static int
954recommendation(_ISO2022EncodingInfo * __restrict ei,
955 _ISO2022Charset * __restrict cs)
956{
957 _ISO2022Charset *recommend;
958 size_t j;
959 int i;
960
961 /* first, try a exact match. */
962 for (i = 0; i < 4; i++) {
963 recommend = ei->recommend[i];
964 for (j = 0; j < ei->recommendsize[i]; j++) {
965 if (cs->type != recommend[j].type)
966 continue;
967 if (cs->final != recommend[j].final)
968 continue;
969 if (cs->interm != recommend[j].interm)
970 continue;
971
972 return (i);
973 }
974 }
975
976 /* then, try a wildcard match over final char. */
977 for (i = 0; i < 4; i++) {
978 recommend = ei->recommend[i];
979 for (j = 0; j < ei->recommendsize[i]; j++) {
980 if (cs->type != recommend[j].type)
981 continue;
982 if (cs->final && (cs->final != recommend[j].final))
983 continue;
984 if (cs->interm && (cs->interm != recommend[j].interm))
985 continue;
986
987 return (i);
988 }
989 }
990
991 /* there's no recommendation. make a guess. */
992 if (ei->maxcharset == 0) {
993 return (0);
994 } else {
995 switch (cs->type) {
996 case CS94:
997 case CS94MULTI:
998 return (0);
999 case CS96:
1000 case CS96MULTI:
1001 return (1);
1002 }
1003 }
1004 return (0);
1005}
1006
1007static int
1008_ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1009 char * __restrict string, size_t n, char ** __restrict result,
1010 _ISO2022State * __restrict psenc, size_t * __restrict nresult)
1011{
1012 _ISO2022Charset cs;
1013 char *p;
1014 char tmp[MB_LEN_MAX];
1015 size_t len;
1016 int bit8, i = 0, target;
1017 unsigned char mask;
1018
1019 if (isc0(wc & 0xff)) {
1020 /* go back to INIT0 or ASCII on control chars */
1021 cs = ei->initg[0].final ? ei->initg[0] : ascii;
1022 } else if (isc1(wc & 0xff)) {
1023 /* go back to INIT1 or ISO-8859-1 on control chars */
1024 cs = ei->initg[1].final ? ei->initg[1] : iso88591;
1025 } else if (!(wc & ~0xff)) {
1026 if (wc & 0x80) {
1027 /* special treatment for ISO-8859-1 */
1028 cs = iso88591;
1029 } else {
1030 /* special treatment for ASCII */
1031 cs = ascii;
1032 }
1033 } else {
1034 cs.final = (wc >> 24) & 0x7f;
1035 if ((wc >> 16) & 0x80)
1036 cs.interm = (wc >> 16) & 0x7f;
1037 else
1038 cs.interm = '\0';
1039 if (wc & 0x80)
1040 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1041 else
1042 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1043 }
1044 target = recommendation(ei, &cs);
1045 p = tmp;
1046 bit8 = ei->flags & F_8BIT;
1047
1048 /* designate the charset onto the target plane(G0/1/2/3). */
1049 if (psenc->g[target].type == cs.type &&
1050 psenc->g[target].final == cs.final &&
1051 psenc->g[target].interm == cs.interm)
1052 goto planeok;
1053
1054 *p++ = '\033';
1055 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1056 *p++ = '$';
1057 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) &&
1058 !cs.interm && !(ei->flags & F_NOOLD))
1059 ;
1060 else if (cs.type == CS94 || cs.type == CS94MULTI)
1061 *p++ = "()*+"[target];
1062 else
1063 *p++ = ",-./"[target];
1064 if (cs.interm)
1065 *p++ = cs.interm;
1066 *p++ = cs.final;
1067
1068 psenc->g[target].type = cs.type;
1069 psenc->g[target].final = cs.final;
1070 psenc->g[target].interm = cs.interm;
1071
1072planeok:
1073 /* invoke the plane onto GL or GR. */
1074 if (psenc->gl == target)
1075 goto sideok;
1076 if (bit8 && psenc->gr == target)
1077 goto sideok;
1078
1079 if (target == 0 && (ei->flags & F_LS0)) {
1080 *p++ = '\017';
1081 psenc->gl = 0;
1082 } else if (target == 1 && (ei->flags & F_LS1)) {
1083 *p++ = '\016';
1084 psenc->gl = 1;
1085 } else if (target == 2 && (ei->flags & F_LS2)) {
1086 *p++ = '\033';
1087 *p++ = 'n';
1088 psenc->gl = 2;
1089 } else if (target == 3 && (ei->flags & F_LS3)) {
1090 *p++ = '\033';
1091 *p++ = 'o';
1092 psenc->gl = 3;
1093 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1094 *p++ = '\033';
1095 *p++ = '~';
1096 psenc->gr = 1;
1097 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1098 *p++ = '\033';
1099 /*{*/
1100 *p++ = '}';
1101 psenc->gr = 2;
1102 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1103 *p++ = '\033';
1104 *p++ = '|';
1105 psenc->gr = 3;
1106 } else if (target == 2 && (ei->flags & F_SS2)) {
1107 *p++ = '\033';
1108 *p++ = 'N';
1109 psenc->singlegl = 2;
1110 } else if (target == 3 && (ei->flags & F_SS3)) {
1111 *p++ = '\033';
1112 *p++ = 'O';
1113 psenc->singlegl = 3;
1114 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1115 *p++ = '\216';
1116 *p++ = 'N';
1117 psenc->singlegl = psenc->singlegr = 2;
1118 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1119 *p++ = '\217';
1120 *p++ = 'O';
1121 psenc->singlegl = psenc->singlegr = 3;
1122 } else
1123 goto ilseq;
1124
1125sideok:
1126 if (psenc->singlegl == target)
1127 mask = 0x00;
1128 else if (psenc->singlegr == target)
1129 mask = 0x80;
1130 else if (psenc->gl == target)
1131 mask = 0x00;
1132 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1133 mask = 0x80;
1134 else
1135 goto ilseq;
1136
1137 switch (cs.type) {
1138 case CS94:
1139 case CS96:
1140 i = 1;
1141 break;
1142 case CS94MULTI:
1143 case CS96MULTI:
1144 i = !iscntl(wc & 0xff) ?
1145 (isthree(cs.final) ? 3 : 2) : 1;
1146 break;
1147 }
1148 while (i-- > 0)
1149 *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1150
1151 /* reset single shift state */
1152 psenc->singlegl = psenc->singlegr = -1;
1153
1154 len = (size_t)(p - tmp);
1155 if (n < len) {
1156 if (result)
1157 *result = (char *)0;
1158 *nresult = (size_t)-1;
1159 return (E2BIG);
1160 }
1161 if (result)
1162 *result = string + len;
1163 memcpy(string, tmp, len);
1164 *nresult = len;
1165
1166 return (0);
1167
1168ilseq:
1169 *nresult = (size_t)-1;
1170 return (EILSEQ);
1171}
1172
1173static int
1174_citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1175 char * __restrict s, size_t n, _ISO2022State * __restrict psenc,
1176 size_t * __restrict nresult)
1177{
1178 char *result;
1179 char buf[MB_LEN_MAX];
1180 size_t len;
1181 int ret;
1182
1183 /* XXX state will be modified after this operation... */
1184 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc,
1185 &len);
1186 if (ret) {
1187 *nresult = len;
1188 return (ret);
1189 }
1190
1191 if (sizeof(buf) < len || n < len-1) {
1192 /* XXX should recover state? */
1193 *nresult = (size_t)-1;
1194 return (E2BIG);
1195 }
1196
1197 memcpy(s, buf, len - 1);
1198 *nresult = len - 1;
1199 return (0);
1200}
1201
1202static int
1203_citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1204 char * __restrict s, size_t n, wchar_t wc,
1205 _ISO2022State * __restrict psenc, size_t * __restrict nresult)
1206{
1207 char *result;
1208 char buf[MB_LEN_MAX];
1209 size_t len;
1210 int ret;
1211
1212 /* XXX state will be modified after this operation... */
1213 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc,
1214 &len);
1215 if (ret) {
1216 *nresult = len;
1217 return (ret);
1218 }
1219
1220 if (sizeof(buf) < len || n < len) {
1221 /* XXX should recover state? */
1222 *nresult = (size_t)-1;
1223 return (E2BIG);
1224 }
1225
1226 memcpy(s, buf, len);
1227 *nresult = len;
1228 return (0);
1229}
1230
1231static __inline int
1232/*ARGSUSED*/
1233_citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei __unused,
1234 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
1235{
1236 wchar_t m, nm;
1237
1238 m = wc & 0x7FFF8080;
1239 nm = wc & 0x007F7F7F;
1240 if (m & 0x00800000)
1241 nm &= 0x00007F7F;
1242 else
1243 m &= 0x7F008080;
1244 if (nm & 0x007F0000) {
1245 /* ^3 mark */
1246 m |= 0x007F0000;
1247 } else if (nm & 0x00007F00) {
1248 /* ^2 mark */
1249 m |= 0x00007F00;
1250 }
1251 *csid = (_csid_t)m;
1252 *idx = (_index_t)nm;
1253
1254 return (0);
1255}
1256
1257static __inline int
1258/*ARGSUSED*/
1259_citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei __unused,
1260 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
1261{
1262
1263 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1264
1265 return (0);
1266}
1267
1268static __inline int
1269/*ARGSUSED*/
1270_citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei __unused,
1271 _ISO2022State * __restrict psenc, int * __restrict rstate)
1272{
1273
1274 if (psenc->chlen == 0) {
1275 /* XXX: it should distinguish initial and stable. */
1276 *rstate = _STDENC_SDGEN_STABLE;
1277 } else
1278 *rstate = (psenc->ch[0] == '\033') ?
1279 _STDENC_SDGEN_INCOMPLETE_SHIFT :
1280 _STDENC_SDGEN_INCOMPLETE_CHAR;
1281 return (0);
1282}
1283
1284/* ----------------------------------------------------------------------
1285 * public interface for stdenc
1286 */
1287
1288_CITRUS_STDENC_DECLS(ISO2022);
1289_CITRUS_STDENC_DEF_OPS(ISO2022);
1290
1291#include "citrus_stdenc_template.h"