1/* $NetBSD: citrus_iso2022.c,v 1.22 2011/10/10 22:45:45 tnozaki Exp $ */ 2 3/*- 4 * Copyright (c)1999, 2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 29 */ 30 31#include <sys/cdefs.h> 32#if defined(LIBC_SCCS) && !defined(lint) 33__RCSID("$NetBSD: citrus_iso2022.c,v 1.22 2011/10/10 22:45:45 tnozaki Exp $"); 34#endif /* LIBC_SCCS and not lint */ 35 36#include <assert.h> 37#include <errno.h> 38#include <string.h> 39#include <stdio.h> 40#include <stdlib.h> 41#include <stddef.h> 42#include <wchar.h> 43#include <sys/types.h> 44#include <limits.h> 45 46#include "citrus_namespace.h" 47#include "citrus_types.h" 48#include "citrus_module.h" 49#include "citrus_ctype.h" 50#include "citrus_stdenc.h" 51#include "citrus_iso2022.h" 52 53 54/* ---------------------------------------------------------------------- 55 * private stuffs used by templates 56 */ 57 58 59/* 60 * wchar_t mappings: 61 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 62 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 63 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 64 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 65 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 66 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 67 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 68 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 69 * 94x94 charset (ESC & V ESC $ ( F) 70 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 71 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 72 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 74 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 75 */ 76 77typedef struct { 78 u_char type; 79#define CS94 (0U) 80#define CS96 (1U) 81#define CS94MULTI (2U) 82#define CS96MULTI (3U) 83 84 u_char final; 85 u_char interm; 86 u_char vers; 87} _ISO2022Charset; 88 89static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' }; 90static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' }; 91 92typedef struct { 93 _ISO2022Charset g[4]; 94 /* need 3 bits to hold -1, 0, ..., 3 */ 95 int gl:3, 96 gr:3, 97 singlegl:3, 98 singlegr:3; 99 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 100 int chlen; 101 int flags; 102#define _ISO2022STATE_FLAG_INITIALIZED 1 103} _ISO2022State; 104 105typedef struct { 106 _ISO2022Charset *recommend[4]; 107 size_t recommendsize[4]; 108 _ISO2022Charset initg[4]; 109 int maxcharset; 110 int flags; 111#define F_8BIT 0x0001 112#define F_NOOLD 0x0002 113#define F_SI 0x0010 /*0F*/ 114#define F_SO 0x0020 /*0E*/ 115#define F_LS0 0x0010 /*0F*/ 116#define F_LS1 0x0020 /*0E*/ 117#define F_LS2 0x0040 /*ESC n*/ 118#define F_LS3 0x0080 /*ESC o*/ 119#define F_LS1R 0x0100 /*ESC ~*/ 120#define F_LS2R 0x0200 /*ESC }*/ 121#define F_LS3R 0x0400 /*ESC |*/ 122#define F_SS2 0x0800 /*ESC N*/ 123#define F_SS3 0x1000 /*ESC O*/ 124#define F_SS2R 0x2000 /*8E*/ 125#define F_SS3R 0x4000 /*8F*/ 126} _ISO2022EncodingInfo; 127typedef struct { 128 _ISO2022EncodingInfo ei; 129 struct { 130 /* for future multi-locale facility */ 131 _ISO2022State s_mblen; 132 _ISO2022State s_mbrlen; 133 _ISO2022State s_mbrtowc; 134 _ISO2022State s_mbtowc; 135 _ISO2022State s_mbsrtowcs; 136 _ISO2022State s_mbsnrtowcs; 137 _ISO2022State s_wcrtomb; 138 _ISO2022State s_wcsrtombs; 139 _ISO2022State s_wcsnrtombs; 140 _ISO2022State s_wctomb; 141 } states; 142} _ISO2022CTypeInfo; 143 144#define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 145#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 146 147#define _FUNCNAME(m) _citrus_ISO2022_##m 148#define _ENCODING_INFO _ISO2022EncodingInfo 149#define _CTYPE_INFO _ISO2022CTypeInfo 150#define _ENCODING_STATE _ISO2022State 151#define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 152#define _ENCODING_IS_STATE_DEPENDENT 1 153#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 154 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 155 156 157#define _ISO2022INVALID (wchar_t)-1 158 159static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); } 160static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); } 161static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); } 162static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); } 163static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); } 164static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); } 165static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); } 166static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); } 167 168static __inline int 169getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 170{ 171 172 _DIAGASSERT(p != NULL); 173 _DIAGASSERT(cs != NULL); 174 175 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 176 cs->final = (u_char)(p[3] & 0xff); 177 cs->interm = '\0'; 178 cs->vers = '\0'; 179 cs->type = CS94MULTI; 180 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 181 cs->final = (u_char)(p[3] & 0xff); 182 cs->interm = '\0'; 183 cs->vers = '\0'; 184 cs->type = CS96MULTI; 185 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 186 cs->final = (u_char)(p[2] & 0xff); 187 cs->interm = '\0'; 188 cs->vers = '\0'; 189 cs->type = CS94; 190 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 191 cs->final = (u_char )(p[2] & 0xff); 192 cs->interm = '\0'; 193 cs->vers = '\0'; 194 cs->type = CS96; 195 } else { 196 return 1; 197 } 198 199 return 0; 200} 201 202 203#define _NOTMATCH 0 204#define _MATCH 1 205#define _PARSEFAIL 2 206 207static __inline int 208get_recommend(_ISO2022EncodingInfo * __restrict ei, 209 const char * __restrict token) 210{ 211 int i; 212 _ISO2022Charset cs, *p; 213 214 if (!strchr("0123", token[0]) || token[1] != '=') 215 return (_NOTMATCH); 216 217 if (getcs(&token[2], &cs) == 0) 218 ; 219 else if (!strcmp(&token[2], "94")) { 220 cs.final = (u_char)(token[4]); 221 cs.interm = '\0'; 222 cs.vers = '\0'; 223 cs.type = CS94; 224 } else if (!strcmp(&token[2], "96")) { 225 cs.final = (u_char)(token[4]); 226 cs.interm = '\0'; 227 cs.vers = '\0'; 228 cs.type = CS96; 229 } else if (!strcmp(&token[2], "94$")) { 230 cs.final = (u_char)(token[5]); 231 cs.interm = '\0'; 232 cs.vers = '\0'; 233 cs.type = CS94MULTI; 234 } else if (!strcmp(&token[2], "96$")) { 235 cs.final = (u_char)(token[5]); 236 cs.interm = '\0'; 237 cs.vers = '\0'; 238 cs.type = CS96MULTI; 239 } else { 240 return (_PARSEFAIL); 241 } 242 243 i = token[0] - '0'; 244 if (!ei->recommend[i]) { 245 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 246 } else { 247 p = realloc(ei->recommend[i], 248 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1)); 249 if (!p) 250 return (_PARSEFAIL); 251 ei->recommend[i] = p; 252 } 253 if (!ei->recommend[i]) 254 return (_PARSEFAIL); 255 ei->recommendsize[i]++; 256 257 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 258 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 259 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 260 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 261 262 return (_MATCH); 263} 264 265static __inline int 266get_initg(_ISO2022EncodingInfo * __restrict ei, 267 const char * __restrict token) 268{ 269 _ISO2022Charset cs; 270 271 if (strncmp("INIT", &token[0], 4) || 272 !strchr("0123", token[4]) || 273 token[5] != '=') 274 return (_NOTMATCH); 275 276 if (getcs(&token[6], &cs) != 0) 277 return (_PARSEFAIL); 278 279 ei->initg[token[4] - '0'].type = cs.type; 280 ei->initg[token[4] - '0'].final = cs.final; 281 ei->initg[token[4] - '0'].interm = cs.interm; 282 ei->initg[token[4] - '0'].vers = cs.vers; 283 284 return (_MATCH); 285} 286 287static __inline int 288get_max(_ISO2022EncodingInfo * __restrict ei, 289 const char * __restrict token) 290{ 291 if (!strcmp(token, "MAX1")) { 292 ei->maxcharset = 1; 293 } else if (!strcmp(token, "MAX2")) { 294 ei->maxcharset = 2; 295 } else if (!strcmp(token, "MAX3")) { 296 ei->maxcharset = 3; 297 } else 298 return (_NOTMATCH); 299 300 return (_MATCH); 301} 302 303 304static __inline int 305get_flags(_ISO2022EncodingInfo * __restrict ei, 306 const char * __restrict token) 307{ 308 int i; 309 static struct { 310 const char *tag; 311 int flag; 312 } const tags[] = { 313 { "DUMMY", 0 }, 314 { "8BIT", F_8BIT }, 315 { "NOOLD", F_NOOLD }, 316 { "SI", F_SI }, 317 { "SO", F_SO }, 318 { "LS0", F_LS0 }, 319 { "LS1", F_LS1 }, 320 { "LS2", F_LS2 }, 321 { "LS3", F_LS3 }, 322 { "LS1R", F_LS1R }, 323 { "LS2R", F_LS2R }, 324 { "LS3R", F_LS3R }, 325 { "SS2", F_SS2 }, 326 { "SS3", F_SS3 }, 327 { "SS2R", F_SS2R }, 328 { "SS3R", F_SS3R }, 329 { NULL, 0 } 330 }; 331 332 for (i = 0; tags[i].tag; i++) { 333 if (!strcmp(token, tags[i].tag)) { 334 ei->flags |= tags[i].flag; 335 return (_MATCH); 336 } 337 } 338 339 return (_NOTMATCH); 340} 341 342 343static __inline int 344_citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 345 const void * __restrict var, size_t lenvar) 346{ 347 char const *v, *e; 348 char buf[20]; 349 int i, len, ret; 350 351 _DIAGASSERT(ei != NULL); 352 353 354 /* 355 * parse VARIABLE section. 356 */ 357 358 if (!var) 359 return (EFTYPE); 360 361 v = (const char *) var; 362 363 /* initialize structure */ 364 ei->maxcharset = 0; 365 for (i = 0; i < 4; i++) { 366 ei->recommend[i] = NULL; 367 ei->recommendsize[i] = 0; 368 } 369 ei->flags = 0; 370 371 while (*v) { 372 while (*v == ' ' || *v == '\t') 373 ++v; 374 375 /* find the token */ 376 e = v; 377 while (*e && *e != ' ' && *e != '\t') 378 ++e; 379 380 len = e-v; 381 if (len == 0) 382 break; 383 if (len>=sizeof(buf)) 384 goto parsefail; 385 snprintf(buf, sizeof(buf), "%.*s", len, v); 386 387 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 388 ; 389 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 390 ; 391 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 392 ; 393 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 394 ; 395 else 396 ret = _PARSEFAIL; 397 if (ret==_PARSEFAIL) 398 goto parsefail; 399 v = e; 400 401 } 402 403 return (0); 404 405parsefail: 406 free(ei->recommend[0]); 407 free(ei->recommend[1]); 408 free(ei->recommend[2]); 409 free(ei->recommend[3]); 410 411 return (EFTYPE); 412} 413 414static __inline void 415/*ARGSUSED*/ 416_citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 417 _ISO2022State * __restrict s) 418{ 419 int i; 420 421 memset(s, 0, sizeof(*s)); 422 s->gl = 0; 423 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 424 425 for (i = 0; i < 4; i++) { 426 if (ei->initg[i].final) { 427 s->g[i].type = ei->initg[i].type; 428 s->g[i].final = ei->initg[i].final; 429 s->g[i].interm = ei->initg[i].interm; 430 } 431 } 432 s->singlegl = s->singlegr = -1; 433 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 434} 435 436static __inline void 437/*ARGSUSED*/ 438_citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei, 439 void * __restrict pspriv, 440 const _ISO2022State * __restrict s) 441{ 442 memcpy(pspriv, (const void *)s, sizeof(*s)); 443} 444 445static __inline void 446/*ARGSUSED*/ 447_citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei, 448 _ISO2022State * __restrict s, 449 const void * __restrict pspriv) 450{ 451 memcpy((void *)s, pspriv, sizeof(*s)); 452} 453 454static int 455/*ARGSUSED*/ 456_citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 457 const void * __restrict var, 458 size_t lenvar) 459{ 460 461 _DIAGASSERT(ei != NULL); 462 463 return _citrus_ISO2022_parse_variable(ei, var, lenvar); 464} 465 466static void 467/*ARGSUSED*/ 468_citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei) 469{ 470} 471 472#define ESC '\033' 473#define ECMA -1 474#define INTERM -2 475#define OECMA -3 476static const struct seqtable { 477 int type; 478 int csoff; 479 int finaloff; 480 int intermoff; 481 int versoff; 482 int len; 483 int chars[10]; 484} seqtable[] = { 485 /* G0 94MULTI special */ 486 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 487 /* G0 94MULTI special with version identification */ 488 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 489 /* G? 94 */ 490 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 491 /* G? 94 with 2nd intermediate char */ 492 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 493 /* G? 96 */ 494 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 495 /* G? 96 with 2nd intermediate char */ 496 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 497 /* G? 94MULTI */ 498 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 499 /* G? 96MULTI */ 500 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 501 /* G? 94MULTI with version specification */ 502 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 503 /* LS2/3 */ 504 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 505 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 506 /* LS1/2/3R */ 507 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 508 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 509 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 510 /* SS2/3 */ 511 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 512 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 513 /* end of records */ 514 { 0, } 515}; 516 517static int 518seqmatch(const char * __restrict s, size_t n, 519 const struct seqtable * __restrict sp) 520{ 521 const int *p; 522 523 _DIAGASSERT(s != NULL); 524 _DIAGASSERT(sp != NULL); 525 526 p = sp->chars; 527 while (p - sp->chars < n && p - sp->chars < sp->len) { 528 switch (*p) { 529 case ECMA: 530 if (!isecma(*s)) 531 goto terminate; 532 break; 533 case OECMA: 534 if (*s && strchr("@AB", *s)) 535 break; 536 else 537 goto terminate; 538 case INTERM: 539 if (!isinterm(*s)) 540 goto terminate; 541 break; 542 case CS94: 543 if (*s && strchr("()*+", *s)) 544 break; 545 else 546 goto terminate; 547 case CS96: 548 if (*s && strchr(",-./", *s)) 549 break; 550 else 551 goto terminate; 552 default: 553 if (*s != *p) 554 goto terminate; 555 break; 556 } 557 558 p++; 559 s++; 560 } 561 562terminate: 563 return p - sp->chars; 564} 565 566static wchar_t 567_ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei, 568 const char * __restrict string, size_t n, 569 const char ** __restrict result, 570 _ISO2022State * __restrict psenc) 571{ 572 wchar_t wchar = 0; 573 int cur; 574 const struct seqtable *sp; 575 int nmatch; 576 int i; 577 578 _DIAGASSERT(ei != NULL); 579 _DIAGASSERT(psenc != NULL); 580 _DIAGASSERT(string != NULL); 581 /* result may be NULL */ 582 583 while (1) { 584 /* SI/SO */ 585 if (1 <= n && string[0] == '\017') { 586 psenc->gl = 0; 587 string++; 588 n--; 589 continue; 590 } 591 if (1 <= n && string[0] == '\016') { 592 psenc->gl = 1; 593 string++; 594 n--; 595 continue; 596 } 597 598 /* SS2/3R */ 599 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 600 psenc->singlegl = psenc->singlegr = 601 (string[0] - '\216') + 2; 602 string++; 603 n--; 604 continue; 605 } 606 607 /* eat the letter if this is not ESC */ 608 if (1 <= n && string[0] != '\033') 609 break; 610 611 /* look for a perfect match from escape sequences */ 612 for (sp = &seqtable[0]; sp->len; sp++) { 613 nmatch = seqmatch(string, n, sp); 614 if (sp->len == nmatch && n >= sp->len) 615 break; 616 } 617 618 if (!sp->len) 619 goto notseq; 620 621 if (sp->type != -1) { 622 if (sp->csoff == -1) 623 i = 0; 624 else { 625 switch (sp->type) { 626 case CS94: 627 case CS94MULTI: 628 i = string[sp->csoff] - '('; 629 break; 630 case CS96: 631 case CS96MULTI: 632 i = string[sp->csoff] - ','; 633 break; 634 default: 635 return (_ISO2022INVALID); 636 } 637 } 638 psenc->g[i].type = sp->type; 639 psenc->g[i].final = '\0'; 640 psenc->g[i].interm = '\0'; 641 psenc->g[i].vers = '\0'; 642 /* sp->finaloff must not be -1 */ 643 if (sp->finaloff != -1) 644 psenc->g[i].final = string[sp->finaloff]; 645 if (sp->intermoff != -1) 646 psenc->g[i].interm = string[sp->intermoff]; 647 if (sp->versoff != -1) 648 psenc->g[i].vers = string[sp->versoff]; 649 650 string += sp->len; 651 n -= sp->len; 652 continue; 653 } 654 655 /* LS2/3 */ 656 if (2 <= n && string[0] == '\033' 657 && string[1] && strchr("no", string[1])) { 658 psenc->gl = string[1] - 'n' + 2; 659 string += 2; 660 n -= 2; 661 continue; 662 } 663 664 /* LS1/2/3R */ 665 /* XXX: { for vi showmatch */ 666 if (2 <= n && string[0] == '\033' 667 && string[1] && strchr("~}|", string[1])) { 668 psenc->gr = 3 - (string[1] - '|'); 669 string += 2; 670 n -= 2; 671 continue; 672 } 673 674 /* SS2/3 */ 675 if (2 <= n && string[0] == '\033' 676 && string[1] && strchr("NO", string[1])) { 677 psenc->singlegl = (string[1] - 'N') + 2; 678 string += 2; 679 n -= 2; 680 continue; 681 } 682 683 notseq: 684 /* 685 * if we've got an unknown escape sequence, eat the ESC at the 686 * head. otherwise, wait till full escape sequence comes. 687 */ 688 for (sp = &seqtable[0]; sp->len; sp++) { 689 nmatch = seqmatch(string, n, sp); 690 if (!nmatch) 691 continue; 692 693 /* 694 * if we are in the middle of escape sequence, 695 * we still need to wait for more characters to come 696 */ 697 if (n < sp->len) { 698 if (nmatch == n) { 699 if (result) 700 *result = string; 701 return (_ISO2022INVALID); 702 } 703 } else { 704 if (nmatch == sp->len) { 705 /* this case should not happen */ 706 goto eat; 707 } 708 } 709 } 710 711 break; 712 } 713 714eat: 715 /* no letter to eat */ 716 if (n < 1) { 717 if (result) 718 *result = string; 719 return (_ISO2022INVALID); 720 } 721 722 /* normal chars. always eat C0/C1 as is. */ 723 if (iscntl(*string & 0xff)) 724 cur = -1; 725 else if (*string & 0x80) { 726 cur = (psenc->singlegr == -1) 727 ? psenc->gr : psenc->singlegr; 728 } else { 729 cur = (psenc->singlegl == -1) 730 ? psenc->gl : psenc->singlegl; 731 } 732 733 if (cur == -1) { 734asis: 735 wchar = *string++ & 0xff; 736 if (result) 737 *result = string; 738 /* reset single shift state */ 739 psenc->singlegr = psenc->singlegl = -1; 740 return wchar; 741 } 742 743 /* length error check */ 744 switch (psenc->g[cur].type) { 745 case CS94MULTI: 746 case CS96MULTI: 747 if (!isthree(psenc->g[cur].final)) { 748 if (2 <= n 749 && (string[0] & 0x80) == (string[1] & 0x80)) 750 break; 751 } else { 752 if (3 <= n 753 && (string[0] & 0x80) == (string[1] & 0x80) 754 && (string[0] & 0x80) == (string[2] & 0x80)) 755 break; 756 } 757 758 /* we still need to wait for more characters to come */ 759 if (result) 760 *result = string; 761 return (_ISO2022INVALID); 762 763 case CS94: 764 case CS96: 765 if (1 <= n) 766 break; 767 768 /* we still need to wait for more characters to come */ 769 if (result) 770 *result = string; 771 return (_ISO2022INVALID); 772 } 773 774 /* range check */ 775 switch (psenc->g[cur].type) { 776 case CS94: 777 if (!(is94(string[0] & 0x7f))) 778 goto asis; 779 case CS96: 780 if (!(is96(string[0] & 0x7f))) 781 goto asis; 782 break; 783 case CS94MULTI: 784 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 785 goto asis; 786 break; 787 case CS96MULTI: 788 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 789 goto asis; 790 break; 791 } 792 793 /* extract the character. */ 794 switch (psenc->g[cur].type) { 795 case CS94: 796 /* special case for ASCII. */ 797 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 798 wchar = *string++; 799 wchar &= 0x7f; 800 break; 801 } 802 wchar = psenc->g[cur].final; 803 wchar = (wchar << 8); 804 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 805 wchar = (wchar << 8); 806 wchar = (wchar << 8) | (*string++ & 0x7f); 807 break; 808 case CS96: 809 /* special case for ISO-8859-1. */ 810 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 811 wchar = *string++; 812 wchar &= 0x7f; 813 wchar |= 0x80; 814 break; 815 } 816 wchar = psenc->g[cur].final; 817 wchar = (wchar << 8); 818 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 819 wchar = (wchar << 8); 820 wchar = (wchar << 8) | (*string++ & 0x7f); 821 wchar |= 0x80; 822 break; 823 case CS94MULTI: 824 case CS96MULTI: 825 wchar = psenc->g[cur].final; 826 wchar = (wchar << 8); 827 if (isthree(psenc->g[cur].final)) 828 wchar |= (*string++ & 0x7f); 829 wchar = (wchar << 8) | (*string++ & 0x7f); 830 wchar = (wchar << 8) | (*string++ & 0x7f); 831 if (psenc->g[cur].type == CS96MULTI) 832 wchar |= 0x80; 833 break; 834 } 835 836 if (result) 837 *result = string; 838 /* reset single shift state */ 839 psenc->singlegr = psenc->singlegl = -1; 840 return wchar; 841} 842 843 844 845static int 846_citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 847 wchar_t * __restrict pwc, 848 const char ** __restrict s, 849 size_t n, _ISO2022State * __restrict psenc, 850 size_t * __restrict nresult) 851{ 852 wchar_t wchar; 853 const char *s0, *p, *result; 854 int c; 855 int chlenbak; 856 857 _DIAGASSERT(nresult != 0); 858 _DIAGASSERT(ei != NULL); 859 _DIAGASSERT(psenc != NULL); 860 _DIAGASSERT(s != NULL); 861 862 if (*s == NULL) { 863 _citrus_ISO2022_init_state(ei, psenc); 864 *nresult = _ENCODING_IS_STATE_DEPENDENT; 865 return 0; 866 } 867 s0 = *s; 868 c = 0; 869 chlenbak = psenc->chlen; 870 871 /* 872 * if we have something in buffer, use that. 873 * otherwise, skip here 874 */ 875 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) { 876 /* illgeal state */ 877 _citrus_ISO2022_init_state(ei, psenc); 878 goto encoding_error; 879 } 880 if (psenc->chlen == 0) 881 goto emptybuf; 882 883 /* buffer is not empty */ 884 p = psenc->ch; 885 while (psenc->chlen < sizeof(psenc->ch)) { 886 if (n > 0) { 887 psenc->ch[psenc->chlen++] = *s0++; 888 n--; 889 } 890 891 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 892 &result, psenc); 893 c += result - p; 894 if (wchar != _ISO2022INVALID) { 895 if (psenc->chlen > c) 896 memmove(psenc->ch, result, psenc->chlen - c); 897 if (psenc->chlen < c) 898 psenc->chlen = 0; 899 else 900 psenc->chlen -= c; 901 goto output; 902 } 903 904 if (n == 0) { 905 if ((result - p) == psenc->chlen) 906 /* complete shift sequence. */ 907 psenc->chlen = 0; 908 goto restart; 909 } 910 911 p = result; 912 } 913 914 /* escape sequence too long? */ 915 goto encoding_error; 916 917emptybuf: 918 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 919 if (wchar != _ISO2022INVALID) { 920 c += result - s0; 921 psenc->chlen = 0; 922 s0 = result; 923 goto output; 924 } 925 if (result > s0) { 926 c += (result - s0); 927 n -= (result - s0); 928 s0 = result; 929 if (n>0) 930 goto emptybuf; 931 /* complete shift sequence. */ 932 goto restart; 933 } 934 n += c; 935 if (n < sizeof(psenc->ch)) { 936 memcpy(psenc->ch, s0 - c, n); 937 psenc->chlen = n; 938 s0 = result; 939 goto restart; 940 } 941 942 /* escape sequence too long? */ 943 944encoding_error: 945 psenc->chlen = 0; 946 *nresult = (size_t)-1; 947 return (EILSEQ); 948 949output: 950 *s = s0; 951 if (pwc) 952 *pwc = wchar; 953 954 if (!wchar) 955 *nresult = 0; 956 else 957 *nresult = c - chlenbak; 958 959 return (0); 960 961restart: 962 *s = s0; 963 *nresult = (size_t)-2; 964 965 return (0); 966} 967 968static int 969recommendation(_ISO2022EncodingInfo * __restrict ei, 970 _ISO2022Charset * __restrict cs) 971{ 972 int i, j; 973 _ISO2022Charset *recommend; 974 975 _DIAGASSERT(ei != NULL); 976 _DIAGASSERT(cs != NULL); 977 978 /* first, try a exact match. */ 979 for (i = 0; i < 4; i++) { 980 recommend = ei->recommend[i]; 981 for (j = 0; j < ei->recommendsize[i]; j++) { 982 if (cs->type != recommend[j].type) 983 continue; 984 if (cs->final != recommend[j].final) 985 continue; 986 if (cs->interm != recommend[j].interm) 987 continue; 988 989 return i; 990 } 991 } 992 993 /* then, try a wildcard match over final char. */ 994 for (i = 0; i < 4; i++) { 995 recommend = ei->recommend[i]; 996 for (j = 0; j < ei->recommendsize[i]; j++) { 997 if (cs->type != recommend[j].type) 998 continue; 999 if (cs->final && (cs->final != recommend[j].final)) 1000 continue; 1001 if (cs->interm && (cs->interm != recommend[j].interm)) 1002 continue; 1003 1004 return i; 1005 } 1006 } 1007 1008 /* there's no recommendation. make a guess. */ 1009 if (ei->maxcharset == 0) { 1010 return 0; 1011 } else { 1012 switch (cs->type) { 1013 case CS94: 1014 case CS94MULTI: 1015 return 0; 1016 case CS96: 1017 case CS96MULTI: 1018 return 1; 1019 } 1020 } 1021 return 0; 1022} 1023 1024static int 1025_ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1026 char * __restrict string, size_t n, 1027 char ** __restrict result, 1028 _ISO2022State * __restrict psenc, 1029 size_t * __restrict nresult) 1030{ 1031 int i = 0; 1032 size_t len; 1033 _ISO2022Charset cs; 1034 char *p; 1035 char tmp[MB_LEN_MAX]; 1036 int target; 1037 u_char mask; 1038 int bit8; 1039 1040 _DIAGASSERT(ei != NULL); 1041 _DIAGASSERT(string != NULL); 1042 /* result may be NULL */ 1043 _DIAGASSERT(psenc != NULL); 1044 _DIAGASSERT(nresult != NULL); 1045 1046 if (isc0(wc & 0xff)) { 1047 /* go back to INIT0 or ASCII on control chars */ 1048 cs = ei->initg[0].final ? ei->initg[0] : ascii; 1049 } else if (isc1(wc & 0xff)) { 1050 /* go back to INIT1 or ISO-8859-1 on control chars */ 1051 cs = ei->initg[1].final ? ei->initg[1] : iso88591; 1052 } else if (!(wc & ~0xff)) { 1053 if (wc & 0x80) { 1054 /* special treatment for ISO-8859-1 */ 1055 cs = iso88591; 1056 } else { 1057 /* special treatment for ASCII */ 1058 cs = ascii; 1059 } 1060 } else { 1061 cs.final = (wc >> 24) & 0x7f; 1062 if ((wc >> 16) & 0x80) 1063 cs.interm = (wc >> 16) & 0x7f; 1064 else 1065 cs.interm = '\0'; 1066 if (wc & 0x80) 1067 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1068 else 1069 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1070 } 1071 target = recommendation(ei, &cs); 1072 p = tmp; 1073 bit8 = ei->flags & F_8BIT; 1074 1075 /* designate the charset onto the target plane(G0/1/2/3). */ 1076 if (psenc->g[target].type == cs.type 1077 && psenc->g[target].final == cs.final 1078 && psenc->g[target].interm == cs.interm) 1079 goto planeok; 1080 1081 *p++ = '\033'; 1082 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1083 *p++ = '$'; 1084 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) 1085 && !cs.interm && !(ei->flags & F_NOOLD)) 1086 ; 1087 else if (cs.type == CS94 || cs.type == CS94MULTI) 1088 *p++ = "()*+"[target]; 1089 else 1090 *p++ = ",-./"[target]; 1091 if (cs.interm) 1092 *p++ = cs.interm; 1093 *p++ = cs.final; 1094 1095 psenc->g[target].type = cs.type; 1096 psenc->g[target].final = cs.final; 1097 psenc->g[target].interm = cs.interm; 1098 1099planeok: 1100 /* invoke the plane onto GL or GR. */ 1101 if (psenc->gl == target) 1102 goto sideok; 1103 if (bit8 && psenc->gr == target) 1104 goto sideok; 1105 1106 if (target == 0 && (ei->flags & F_LS0)) { 1107 *p++ = '\017'; 1108 psenc->gl = 0; 1109 } else if (target == 1 && (ei->flags & F_LS1)) { 1110 *p++ = '\016'; 1111 psenc->gl = 1; 1112 } else if (target == 2 && (ei->flags & F_LS2)) { 1113 *p++ = '\033'; 1114 *p++ = 'n'; 1115 psenc->gl = 2; 1116 } else if (target == 3 && (ei->flags & F_LS3)) { 1117 *p++ = '\033'; 1118 *p++ = 'o'; 1119 psenc->gl = 3; 1120 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1121 *p++ = '\033'; 1122 *p++ = '~'; 1123 psenc->gr = 1; 1124 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1125 *p++ = '\033'; 1126 /*{*/ 1127 *p++ = '}'; 1128 psenc->gr = 2; 1129 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1130 *p++ = '\033'; 1131 *p++ = '|'; 1132 psenc->gr = 3; 1133 } else if (target == 2 && (ei->flags & F_SS2)) { 1134 *p++ = '\033'; 1135 *p++ = 'N'; 1136 psenc->singlegl = 2; 1137 } else if (target == 3 && (ei->flags & F_SS3)) { 1138 *p++ = '\033'; 1139 *p++ = 'O'; 1140 psenc->singlegl = 3; 1141 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1142 *p++ = '\216'; 1143 *p++ = 'N'; 1144 psenc->singlegl = psenc->singlegr = 2; 1145 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1146 *p++ = '\217'; 1147 *p++ = 'O'; 1148 psenc->singlegl = psenc->singlegr = 3; 1149 } else 1150 goto ilseq; 1151 1152sideok: 1153 if (psenc->singlegl == target) 1154 mask = 0x00; 1155 else if (psenc->singlegr == target) 1156 mask = 0x80; 1157 else if (psenc->gl == target) 1158 mask = 0x00; 1159 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1160 mask = 0x80; 1161 else 1162 goto ilseq; 1163 1164 switch (cs.type) { 1165 case CS94: 1166 case CS96: 1167 i = 1; 1168 break; 1169 case CS94MULTI: 1170 case CS96MULTI: 1171 i = !iscntl(wc & 0xff) ? 1172 (isthree(cs.final) ? 3 : 2) : 1; 1173 break; 1174 } 1175 while (i-- > 0) 1176 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1177 1178 /* reset single shift state */ 1179 psenc->singlegl = psenc->singlegr = -1; 1180 1181 len = (size_t)(p - tmp); 1182 if (n < len) { 1183 if (result) 1184 *result = (char *)0; 1185 *nresult = (size_t)-1; 1186 return E2BIG; 1187 } 1188 if (result) 1189 *result = string + len; 1190 memcpy(string, tmp, len); 1191 *nresult = len; 1192 1193 return 0; 1194 1195ilseq: 1196 *nresult = (size_t)-1; 1197 return EILSEQ; 1198} 1199 1200static int 1201_citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1202 char * __restrict s, size_t n, 1203 _ISO2022State * __restrict psenc, 1204 size_t * __restrict nresult) 1205{ 1206 char buf[MB_LEN_MAX]; 1207 char *result; 1208 int ret; 1209 size_t len; 1210 1211 _DIAGASSERT(ei != NULL); 1212 _DIAGASSERT(nresult != 0); 1213 _DIAGASSERT(s != NULL); 1214 1215 /* XXX state will be modified after this operation... */ 1216 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc, 1217 &len); 1218 if (ret) { 1219 *nresult = len; 1220 return ret; 1221 } 1222 1223 if (sizeof(buf) < len || n < len-1) { 1224 /* XXX should recover state? */ 1225 *nresult = (size_t)-1; 1226 return E2BIG; 1227 } 1228 1229 memcpy(s, buf, len-1); 1230 *nresult = len-1; 1231 return (0); 1232} 1233 1234static int 1235_citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1236 char * __restrict s, size_t n, wchar_t wc, 1237 _ISO2022State * __restrict psenc, 1238 size_t * __restrict nresult) 1239{ 1240 char buf[MB_LEN_MAX]; 1241 char *result; 1242 int ret; 1243 size_t len; 1244 1245 _DIAGASSERT(ei != NULL); 1246 _DIAGASSERT(s != NULL); 1247 _DIAGASSERT(psenc != NULL); 1248 _DIAGASSERT(nresult != 0); 1249 1250 /* XXX state will be modified after this operation... */ 1251 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc, 1252 &len); 1253 if (ret) { 1254 *nresult = len; 1255 return ret; 1256 } 1257 1258 if (sizeof(buf) < len || n < len) { 1259 /* XXX should recover state? */ 1260 *nresult = (size_t)-1; 1261 return E2BIG; 1262 } 1263 1264 memcpy(s, buf, len); 1265 *nresult = len; 1266 return (0); 1267} 1268 1269static __inline int 1270/*ARGSUSED*/ 1271_citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei, 1272 _csid_t * __restrict csid, 1273 _index_t * __restrict idx, wchar_t wc) 1274{ 1275 wchar_t m, nm; 1276 1277 _DIAGASSERT(csid != NULL && idx != NULL); 1278 1279 m = wc & 0x7FFF8080; 1280 nm = wc & 0x007F7F7F; 1281 if (m & 0x00800000) { 1282 nm &= 0x00007F7F; 1283 } else { 1284 m &= 0x7F008080; 1285 } 1286 if (nm & 0x007F0000) { 1287 /* ^3 mark */ 1288 m |= 0x007F0000; 1289 } else if (nm & 0x00007F00) { 1290 /* ^2 mark */ 1291 m |= 0x00007F00; 1292 } 1293 *csid = (_csid_t)m; 1294 *idx = (_index_t)nm; 1295 1296 return (0); 1297} 1298 1299static __inline int 1300/*ARGSUSED*/ 1301_citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei, 1302 wchar_t * __restrict wc, 1303 _csid_t csid, _index_t idx) 1304{ 1305 1306 _DIAGASSERT(ei != NULL && wc != NULL); 1307 1308 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1309 1310 return (0); 1311} 1312 1313static __inline int 1314/*ARGSUSED*/ 1315_citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei, 1316 _ISO2022State * __restrict psenc, 1317 int * __restrict rstate) 1318{ 1319 1320 if (psenc->chlen == 0) { 1321 /* XXX: it should distinguish initial and stable. */ 1322 *rstate = _STDENC_SDGEN_STABLE; 1323 } else { 1324 if (psenc->ch[0] == '\033') 1325 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 1326 else 1327 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 1328 } 1329 1330 return 0; 1331} 1332 1333/* ---------------------------------------------------------------------- 1334 * public interface for ctype 1335 */ 1336 1337_CITRUS_CTYPE_DECLS(ISO2022); 1338_CITRUS_CTYPE_DEF_OPS(ISO2022); 1339 1340#include "citrus_ctype_template.h" 1341 1342/* ---------------------------------------------------------------------- 1343 * public interface for stdenc 1344 */ 1345 1346_CITRUS_STDENC_DECLS(ISO2022); 1347_CITRUS_STDENC_DEF_OPS(ISO2022); 1348 1349#include "citrus_stdenc_template.h" 1350