1/* 2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA). 3 * Copyright (c) 1996-2010, The nkf Project. 4 * 5 * This software is provided 'as-is', without any express or implied 6 * warranty. In no event will the authors be held liable for any damages 7 * arising from the use of this software. 8 * 9 * Permission is granted to anyone to use this software for any purpose, 10 * including commercial applications, and to alter it and redistribute it 11 * freely, subject to the following restrictions: 12 * 13 * 1. The origin of this software must not be misrepresented; you must not 14 * claim that you wrote the original software. If you use this software 15 * in a product, an acknowledgment in the product documentation would be 16 * appreciated but is not required. 17 * 18 * 2. Altered source versions must be plainly marked as such, and must not be 19 * misrepresented as being the original software. 20 * 21 * 3. This notice may not be removed or altered from any source distribution. 22 */ 23#define NKF_VERSION "2.1.3" 24#define NKF_RELEASE_DATE "2012-11-22" 25#define COPY_RIGHT \ 26 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \ 27 "Copyright (C) 1996-2012, The nkf Project." 28 29#include "config.h" 30#include "nkf.h" 31#include "utf8tbl.h" 32#ifdef __WIN32__ 33#include <windows.h> 34#include <locale.h> 35#endif 36#if defined(__OS2__) 37# define INCL_DOS 38# define INCL_DOSERRORS 39# include <os2.h> 40#endif 41#include <assert.h> 42 43 44/* state of output_mode and input_mode 45 46 c2 0 means ASCII 47 JIS_X_0201_1976_K 48 ISO_8859_1 49 JIS_X_0208 50 EOF all termination 51 c1 32bit data 52 53 */ 54 55/* MIME ENCODE */ 56 57#define FIXED_MIME 7 58#define STRICT_MIME 8 59 60/* byte order */ 61enum byte_order { 62 ENDIAN_BIG = 1, 63 ENDIAN_LITTLE = 2, 64 ENDIAN_2143 = 3, 65 ENDIAN_3412 = 4 66}; 67 68/* ASCII CODE */ 69 70#define BS 0x08 71#define TAB 0x09 72#define LF 0x0a 73#define CR 0x0d 74#define ESC 0x1b 75#define SP 0x20 76#define DEL 0x7f 77#define SI 0x0f 78#define SO 0x0e 79#define SS2 0x8e 80#define SS3 0x8f 81#define CRLF 0x0D0A 82 83 84/* encodings */ 85 86enum nkf_encodings { 87 ASCII, 88 ISO_8859_1, 89 ISO_2022_JP, 90 CP50220, 91 CP50221, 92 CP50222, 93 ISO_2022_JP_1, 94 ISO_2022_JP_3, 95 ISO_2022_JP_2004, 96 SHIFT_JIS, 97 WINDOWS_31J, 98 CP10001, 99 EUC_JP, 100 EUCJP_NKF, 101 CP51932, 102 EUCJP_MS, 103 EUCJP_ASCII, 104 SHIFT_JISX0213, 105 SHIFT_JIS_2004, 106 EUC_JISX0213, 107 EUC_JIS_2004, 108 UTF_8, 109 UTF_8N, 110 UTF_8_BOM, 111 UTF8_MAC, 112 UTF_16, 113 UTF_16BE, 114 UTF_16BE_BOM, 115 UTF_16LE, 116 UTF_16LE_BOM, 117 UTF_32, 118 UTF_32BE, 119 UTF_32BE_BOM, 120 UTF_32LE, 121 UTF_32LE_BOM, 122 BINARY, 123 NKF_ENCODING_TABLE_SIZE, 124 JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */ 125 /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */ 126 /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */ 127 /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */ 128 JIS_X_0208 = 0x1168, /* @B */ 129 JIS_X_0212 = 0x1159, /* D */ 130 /* JIS_X_0213_2000_1 = 0x1228, */ /* O */ 131 JIS_X_0213_2 = 0x1229, /* P */ 132 JIS_X_0213_1 = 0x1233 /* Q */ 133}; 134 135static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0); 136static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0); 137static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0); 138static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0); 139static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0); 140static void j_oconv(nkf_char c2, nkf_char c1); 141static void s_oconv(nkf_char c2, nkf_char c1); 142static void e_oconv(nkf_char c2, nkf_char c1); 143static void w_oconv(nkf_char c2, nkf_char c1); 144static void w_oconv16(nkf_char c2, nkf_char c1); 145static void w_oconv32(nkf_char c2, nkf_char c1); 146 147typedef struct { 148 const char *name; 149 nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0); 150 void (*oconv)(nkf_char c2, nkf_char c1); 151} nkf_native_encoding; 152 153nkf_native_encoding NkfEncodingASCII = { "ASCII", e_iconv, e_oconv }; 154nkf_native_encoding NkfEncodingISO_2022_JP = { "ISO-2022-JP", e_iconv, j_oconv }; 155nkf_native_encoding NkfEncodingShift_JIS = { "Shift_JIS", s_iconv, s_oconv }; 156nkf_native_encoding NkfEncodingEUC_JP = { "EUC-JP", e_iconv, e_oconv }; 157nkf_native_encoding NkfEncodingUTF_8 = { "UTF-8", w_iconv, w_oconv }; 158nkf_native_encoding NkfEncodingUTF_16 = { "UTF-16", w_iconv16, w_oconv16 }; 159nkf_native_encoding NkfEncodingUTF_32 = { "UTF-32", w_iconv32, w_oconv32 }; 160 161typedef struct { 162 const int id; 163 const char *name; 164 const nkf_native_encoding *base_encoding; 165} nkf_encoding; 166 167nkf_encoding nkf_encoding_table[] = { 168 {ASCII, "US-ASCII", &NkfEncodingASCII}, 169 {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII}, 170 {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP}, 171 {CP50220, "CP50220", &NkfEncodingISO_2022_JP}, 172 {CP50221, "CP50221", &NkfEncodingISO_2022_JP}, 173 {CP50222, "CP50222", &NkfEncodingISO_2022_JP}, 174 {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP}, 175 {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP}, 176 {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP}, 177 {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS}, 178 {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS}, 179 {CP10001, "CP10001", &NkfEncodingShift_JIS}, 180 {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP}, 181 {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP}, 182 {CP51932, "CP51932", &NkfEncodingEUC_JP}, 183 {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP}, 184 {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP}, 185 {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS}, 186 {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS}, 187 {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP}, 188 {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP}, 189 {UTF_8, "UTF-8", &NkfEncodingUTF_8}, 190 {UTF_8N, "UTF-8N", &NkfEncodingUTF_8}, 191 {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8}, 192 {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8}, 193 {UTF_16, "UTF-16", &NkfEncodingUTF_16}, 194 {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16}, 195 {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16}, 196 {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16}, 197 {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16}, 198 {UTF_32, "UTF-32", &NkfEncodingUTF_32}, 199 {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32}, 200 {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32}, 201 {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32}, 202 {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32}, 203 {BINARY, "BINARY", &NkfEncodingASCII}, 204 {-1, NULL, NULL} 205}; 206 207struct { 208 const char *name; 209 const int id; 210} encoding_name_to_id_table[] = { 211 {"US-ASCII", ASCII}, 212 {"ASCII", ASCII}, 213 {"646", ASCII}, 214 {"ROMAN8", ASCII}, 215 {"ISO-2022-JP", ISO_2022_JP}, 216 {"ISO2022JP-CP932", CP50220}, 217 {"CP50220", CP50220}, 218 {"CP50221", CP50221}, 219 {"CSISO2022JP", CP50221}, 220 {"CP50222", CP50222}, 221 {"ISO-2022-JP-1", ISO_2022_JP_1}, 222 {"ISO-2022-JP-3", ISO_2022_JP_3}, 223 {"ISO-2022-JP-2004", ISO_2022_JP_2004}, 224 {"SHIFT_JIS", SHIFT_JIS}, 225 {"SJIS", SHIFT_JIS}, 226 {"MS_Kanji", SHIFT_JIS}, 227 {"PCK", SHIFT_JIS}, 228 {"WINDOWS-31J", WINDOWS_31J}, 229 {"CSWINDOWS31J", WINDOWS_31J}, 230 {"CP932", WINDOWS_31J}, 231 {"MS932", WINDOWS_31J}, 232 {"CP10001", CP10001}, 233 {"EUCJP", EUC_JP}, 234 {"EUC-JP", EUC_JP}, 235 {"EUCJP-NKF", EUCJP_NKF}, 236 {"CP51932", CP51932}, 237 {"EUC-JP-MS", EUCJP_MS}, 238 {"EUCJP-MS", EUCJP_MS}, 239 {"EUCJPMS", EUCJP_MS}, 240 {"EUC-JP-ASCII", EUCJP_ASCII}, 241 {"EUCJP-ASCII", EUCJP_ASCII}, 242 {"SHIFT_JISX0213", SHIFT_JISX0213}, 243 {"SHIFT_JIS-2004", SHIFT_JIS_2004}, 244 {"EUC-JISX0213", EUC_JISX0213}, 245 {"EUC-JIS-2004", EUC_JIS_2004}, 246 {"UTF-8", UTF_8}, 247 {"UTF-8N", UTF_8N}, 248 {"UTF-8-BOM", UTF_8_BOM}, 249 {"UTF8-MAC", UTF8_MAC}, 250 {"UTF-8-MAC", UTF8_MAC}, 251 {"UTF-16", UTF_16}, 252 {"UTF-16BE", UTF_16BE}, 253 {"UTF-16BE-BOM", UTF_16BE_BOM}, 254 {"UTF-16LE", UTF_16LE}, 255 {"UTF-16LE-BOM", UTF_16LE_BOM}, 256 {"UTF-32", UTF_32}, 257 {"UTF-32BE", UTF_32BE}, 258 {"UTF-32BE-BOM", UTF_32BE_BOM}, 259 {"UTF-32LE", UTF_32LE}, 260 {"UTF-32LE-BOM", UTF_32LE_BOM}, 261 {"BINARY", BINARY}, 262 {NULL, -1} 263}; 264 265#if defined(DEFAULT_CODE_JIS) 266#define DEFAULT_ENCIDX ISO_2022_JP 267#elif defined(DEFAULT_CODE_SJIS) 268#define DEFAULT_ENCIDX SHIFT_JIS 269#elif defined(DEFAULT_CODE_WINDOWS_31J) 270#define DEFAULT_ENCIDX WINDOWS_31J 271#elif defined(DEFAULT_CODE_EUC) 272#define DEFAULT_ENCIDX EUC_JP 273#elif defined(DEFAULT_CODE_UTF8) 274#define DEFAULT_ENCIDX UTF_8 275#endif 276 277 278#define is_alnum(c) \ 279 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')) 280 281/* I don't trust portablity of toupper */ 282#define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) 283#define nkf_isoctal(c) ('0'<=c && c<='7') 284#define nkf_isdigit(c) ('0'<=c && c<='9') 285#define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F')) 286#define nkf_isblank(c) (c == SP || c == TAB) 287#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF) 288#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) 289#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c)) 290#define nkf_isprint(c) (SP<=c && c<='~') 291#define nkf_isgraph(c) ('!'<=c && c<='~') 292#define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \ 293 ('A'<=c&&c<='F') ? (c-'A'+10) : \ 294 ('a'<=c&&c<='f') ? (c-'a'+10) : 0) 295#define bin2hex(c) ("0123456789ABCDEF"[c&15]) 296#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3) 297#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \ 298 ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \ 299 && (c != '(') && (c != ')') && (c != '.') && (c != 0x22))) 300 301#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END) 302#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F) 303 304#define HOLD_SIZE 1024 305#if defined(INT_IS_SHORT) 306#define IOBUF_SIZE 2048 307#else 308#define IOBUF_SIZE 16384 309#endif 310 311#define DEFAULT_J 'B' 312#define DEFAULT_R 'B' 313 314 315#define GETA1 0x22 316#define GETA2 0x2e 317 318 319/* MIME preprocessor */ 320 321#ifdef EASYWIN /*Easy Win */ 322extern POINT _BufferSize; 323#endif 324 325struct input_code{ 326 const char *name; 327 nkf_char stat; 328 nkf_char score; 329 nkf_char index; 330 nkf_char buf[3]; 331 void (*status_func)(struct input_code *, nkf_char); 332 nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0); 333 int _file_stat; 334}; 335 336static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */ 337static nkf_encoding *input_encoding = NULL; 338static nkf_encoding *output_encoding = NULL; 339 340#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 341/* UCS Mapping 342 * 0: Shift_JIS, eucJP-ascii 343 * 1: eucJP-ms 344 * 2: CP932, CP51932 345 * 3: CP10001 346 */ 347#define UCS_MAP_ASCII 0 348#define UCS_MAP_MS 1 349#define UCS_MAP_CP932 2 350#define UCS_MAP_CP10001 3 351static int ms_ucs_map_f = UCS_MAP_ASCII; 352#endif 353#ifdef UTF8_INPUT_ENABLE 354/* no NEC special, NEC-selected IBM extended and IBM extended characters */ 355static int no_cp932ext_f = FALSE; 356/* ignore ZERO WIDTH NO-BREAK SPACE */ 357static int no_best_fit_chars_f = FALSE; 358static int input_endian = ENDIAN_BIG; 359static int input_bom_f = FALSE; 360static nkf_char unicode_subchar = '?'; /* the regular substitution character */ 361static void (*encode_fallback)(nkf_char c) = NULL; 362static void w_status(struct input_code *, nkf_char); 363#endif 364#ifdef UTF8_OUTPUT_ENABLE 365static int output_bom_f = FALSE; 366static int output_endian = ENDIAN_BIG; 367#endif 368 369static void std_putc(nkf_char c); 370static nkf_char std_getc(FILE *f); 371static nkf_char std_ungetc(nkf_char c,FILE *f); 372 373static nkf_char broken_getc(FILE *f); 374static nkf_char broken_ungetc(nkf_char c,FILE *f); 375 376static nkf_char mime_getc(FILE *f); 377 378static void mime_putc(nkf_char c); 379 380/* buffers */ 381 382#if !defined(PERL_XS) && !defined(WIN32DLL) 383static unsigned char stdibuf[IOBUF_SIZE]; 384static unsigned char stdobuf[IOBUF_SIZE]; 385#endif 386 387#define NKF_UNSPECIFIED (-TRUE) 388 389/* flags */ 390static int unbuf_f = FALSE; 391static int estab_f = FALSE; 392static int nop_f = FALSE; 393static int binmode_f = TRUE; /* binary mode */ 394static int rot_f = FALSE; /* rot14/43 mode */ 395static int hira_f = FALSE; /* hira/kata henkan */ 396static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ 397static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */ 398static int mime_decode_f = FALSE; /* mime decode is explicitly on */ 399static int mimebuf_f = FALSE; /* MIME buffered input */ 400static int broken_f = FALSE; /* convert ESC-less broken JIS */ 401static int iso8859_f = FALSE; /* ISO8859 through */ 402static int mimeout_f = FALSE; /* base64 mode */ 403static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */ 404static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */ 405 406#ifdef UNICODE_NORMALIZATION 407static int nfc_f = FALSE; 408static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */ 409static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc; 410#endif 411 412#ifdef INPUT_OPTION 413static int cap_f = FALSE; 414static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */ 415static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc; 416 417static int url_f = FALSE; 418static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */ 419static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc; 420#endif 421 422#define PREFIX_EUCG3 NKF_INT32_C(0x8F00) 423#define CLASS_MASK NKF_INT32_C(0xFF000000) 424#define CLASS_UNICODE NKF_INT32_C(0x01000000) 425#define VALUE_MASK NKF_INT32_C(0x00FFFFFF) 426#define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF) 427#define UNICODE_MAX NKF_INT32_C(0x0010FFFF) 428#define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3) 429#define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE) 430#define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE) 431#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX) 432#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX) 433 434#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00)) 435 436#ifdef NUMCHAR_OPTION 437static int numchar_f = FALSE; 438static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */ 439static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc; 440#endif 441 442#ifdef CHECK_OPTION 443static int noout_f = FALSE; 444static void no_putc(nkf_char c); 445static int debug_f = FALSE; 446static void debug(const char *str); 447static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0; 448#endif 449 450static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */ 451static void set_input_codename(const char *codename); 452 453#ifdef EXEC_IO 454static int exec_f = 0; 455#endif 456 457#ifdef SHIFTJIS_CP932 458/* invert IBM extended characters to others */ 459static int cp51932_f = FALSE; 460 461/* invert NEC-selected IBM extended characters to IBM extended characters */ 462static int cp932inv_f = TRUE; 463 464/* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */ 465#endif /* SHIFTJIS_CP932 */ 466 467static int x0212_f = FALSE; 468static int x0213_f = FALSE; 469 470static unsigned char prefix_table[256]; 471 472static void e_status(struct input_code *, nkf_char); 473static void s_status(struct input_code *, nkf_char); 474 475struct input_code input_code_list[] = { 476 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0}, 477 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0}, 478#ifdef UTF8_INPUT_ENABLE 479 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0}, 480 {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0}, 481 {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0}, 482#endif 483 {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0} 484}; 485 486static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */ 487static int base64_count = 0; 488 489/* X0208 -> ASCII converter */ 490 491/* fold parameter */ 492static int f_line = 0; /* chars in line */ 493static int f_prev = 0; 494static int fold_preserve_f = FALSE; /* preserve new lines */ 495static int fold_f = FALSE; 496static int fold_len = 0; 497 498/* options */ 499static unsigned char kanji_intro = DEFAULT_J; 500static unsigned char ascii_intro = DEFAULT_R; 501 502/* Folding */ 503 504#define FOLD_MARGIN 10 505#define DEFAULT_FOLD 60 506 507static int fold_margin = FOLD_MARGIN; 508 509/* process default */ 510 511static nkf_char 512no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0) 513{ 514 fprintf(stderr,"nkf internal module connection failure.\n"); 515 exit(EXIT_FAILURE); 516 return 0; /* LINT */ 517} 518 519static void 520no_connection(nkf_char c2, nkf_char c1) 521{ 522 no_connection2(c2,c1,0); 523} 524 525static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2; 526static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection; 527 528static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection; 529static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection; 530static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection; 531static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection; 532static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection; 533static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection; 534static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection; 535 536/* static redirections */ 537 538static void (*o_putc)(nkf_char c) = std_putc; 539 540static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */ 541static nkf_char (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc; 542 543static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */ 544static nkf_char (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc; 545 546static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */ 547 548static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */ 549static nkf_char (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc; 550 551/* for strict mime */ 552static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */ 553static nkf_char (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc; 554 555/* Global states */ 556static int output_mode = ASCII; /* output kanji mode */ 557static int input_mode = ASCII; /* input kanji mode */ 558static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */ 559 560/* X0201 / X0208 conversion tables */ 561 562/* X0201 kana conversion table */ 563/* 90-9F A0-DF */ 564static const unsigned char cv[]= { 565 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57, 566 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21, 567 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29, 568 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43, 569 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26, 570 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d, 571 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35, 572 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d, 573 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46, 574 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c, 575 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52, 576 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e, 577 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62, 578 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69, 579 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d, 580 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c, 581 0x00,0x00}; 582 583 584/* X0201 kana conversion table for daguten */ 585/* 90-9F A0-DF */ 586static const unsigned char dv[]= { 587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 590 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 591 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74, 592 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e, 593 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36, 594 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e, 595 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47, 596 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00, 597 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53, 598 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00, 599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 602 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 603 0x00,0x00}; 604 605/* X0201 kana conversion table for han-daguten */ 606/* 90-9F A0-DF */ 607static const unsigned char ev[]= { 608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54, 619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00, 620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 624 0x00,0x00}; 625 626/* X0201 kana to X0213 conversion table for han-daguten */ 627/* 90-9F A0-DF */ 628static const unsigned char ev_x0213[]= { 629 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 630 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 631 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 632 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 633 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 634 0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78, 635 0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00, 636 0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00, 637 0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00, 638 0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00, 639 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 640 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 642 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 644 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 645 0x00,0x00}; 646 647 648/* X0208 kigou conversion table */ 649/* 0x8140 - 0x819e */ 650static const unsigned char fv[] = { 651 652 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a, 653 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00, 654 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00, 655 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f, 656 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27, 657 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d, 658 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00, 659 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00, 660 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00, 661 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 662 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40, 663 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 664} ; 665 666 667 668static int option_mode = 0; 669static int file_out_f = FALSE; 670#ifdef OVERWRITE 671static int overwrite_f = FALSE; 672static int preserve_time_f = FALSE; 673static int backup_f = FALSE; 674static char *backup_suffix = ""; 675#endif 676 677static int eolmode_f = 0; /* CR, LF, CRLF */ 678static int input_eol = 0; /* 0: unestablished, EOF: MIXED */ 679static nkf_char prev_cr = 0; /* CR or 0 */ 680#ifdef EASYWIN /*Easy Win */ 681static int end_check; 682#endif /*Easy Win */ 683 684static void * 685nkf_xmalloc(size_t size) 686{ 687 void *ptr; 688 689 if (size == 0) size = 1; 690 691 ptr = malloc(size); 692 if (ptr == NULL) { 693 perror("can't malloc"); 694 exit(EXIT_FAILURE); 695 } 696 697 return ptr; 698} 699 700static void * 701nkf_xrealloc(void *ptr, size_t size) 702{ 703 if (size == 0) size = 1; 704 705 ptr = realloc(ptr, size); 706 if (ptr == NULL) { 707 perror("can't realloc"); 708 exit(EXIT_FAILURE); 709 } 710 711 return ptr; 712} 713 714#define nkf_xfree(ptr) free(ptr) 715 716static int 717nkf_str_caseeql(const char *src, const char *target) 718{ 719 int i; 720 for (i = 0; src[i] && target[i]; i++) { 721 if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE; 722 } 723 if (src[i] || target[i]) return FALSE; 724 else return TRUE; 725} 726 727static nkf_encoding* 728nkf_enc_from_index(int idx) 729{ 730 if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) { 731 return 0; 732 } 733 return &nkf_encoding_table[idx]; 734} 735 736static int 737nkf_enc_find_index(const char *name) 738{ 739 int i; 740 if (name[0] == 'X' && *(name+1) == '-') name += 2; 741 for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) { 742 if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) { 743 return encoding_name_to_id_table[i].id; 744 } 745 } 746 return -1; 747} 748 749static nkf_encoding* 750nkf_enc_find(const char *name) 751{ 752 int idx = -1; 753 idx = nkf_enc_find_index(name); 754 if (idx < 0) return 0; 755 return nkf_enc_from_index(idx); 756} 757 758#define nkf_enc_name(enc) (enc)->name 759#define nkf_enc_to_index(enc) (enc)->id 760#define nkf_enc_to_base_encoding(enc) (enc)->base_encoding 761#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv 762#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv 763#define nkf_enc_asciicompat(enc) (\ 764 nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\ 765 nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP) 766#define nkf_enc_unicode_p(enc) (\ 767 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\ 768 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\ 769 nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32) 770#define nkf_enc_cp5022x_p(enc) (\ 771 nkf_enc_to_index(enc) == CP50220 ||\ 772 nkf_enc_to_index(enc) == CP50221 ||\ 773 nkf_enc_to_index(enc) == CP50222) 774 775#ifdef DEFAULT_CODE_LOCALE 776static const char* 777nkf_locale_charmap() 778{ 779#ifdef HAVE_LANGINFO_H 780 return nl_langinfo(CODESET); 781#elif defined(__WIN32__) 782 static char buf[16]; 783 sprintf(buf, "CP%d", GetACP()); 784 return buf; 785#elif defined(__OS2__) 786# if defined(INT_IS_SHORT) 787 /* OS/2 1.x */ 788 return NULL; 789# else 790 /* OS/2 32bit */ 791 static char buf[16]; 792 ULONG ulCP[1], ulncp; 793 DosQueryCp(sizeof(ulCP), ulCP, &ulncp); 794 if (ulCP[0] == 932 || ulCP[0] == 943) 795 strcpy(buf, "Shift_JIS"); 796 else 797 sprintf(buf, "CP%lu", ulCP[0]); 798 return buf; 799# endif 800#endif 801 return NULL; 802} 803 804static nkf_encoding* 805nkf_locale_encoding() 806{ 807 nkf_encoding *enc = 0; 808 const char *encname = nkf_locale_charmap(); 809 if (encname) 810 enc = nkf_enc_find(encname); 811 return enc; 812} 813#endif /* DEFAULT_CODE_LOCALE */ 814 815static nkf_encoding* 816nkf_utf8_encoding() 817{ 818 return &nkf_encoding_table[UTF_8]; 819} 820 821static nkf_encoding* 822nkf_default_encoding() 823{ 824 nkf_encoding *enc = 0; 825#ifdef DEFAULT_CODE_LOCALE 826 enc = nkf_locale_encoding(); 827#elif defined(DEFAULT_ENCIDX) 828 enc = nkf_enc_from_index(DEFAULT_ENCIDX); 829#endif 830 if (!enc) enc = nkf_utf8_encoding(); 831 return enc; 832} 833 834typedef struct { 835 long capa; 836 long len; 837 nkf_char *ptr; 838} nkf_buf_t; 839 840static nkf_buf_t * 841nkf_buf_new(int length) 842{ 843 nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t)); 844 buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length); 845 buf->capa = length; 846 buf->len = 0; 847 return buf; 848} 849 850#if 0 851static void 852nkf_buf_dispose(nkf_buf_t *buf) 853{ 854 nkf_xfree(buf->ptr); 855 nkf_xfree(buf); 856} 857#endif 858 859#define nkf_buf_length(buf) ((buf)->len) 860#define nkf_buf_empty_p(buf) ((buf)->len == 0) 861 862static nkf_char 863nkf_buf_at(nkf_buf_t *buf, int index) 864{ 865 assert(index <= buf->len); 866 return buf->ptr[index]; 867} 868 869static void 870nkf_buf_clear(nkf_buf_t *buf) 871{ 872 buf->len = 0; 873} 874 875static void 876nkf_buf_push(nkf_buf_t *buf, nkf_char c) 877{ 878 if (buf->capa <= buf->len) { 879 exit(EXIT_FAILURE); 880 } 881 buf->ptr[buf->len++] = c; 882} 883 884static nkf_char 885nkf_buf_pop(nkf_buf_t *buf) 886{ 887 assert(!nkf_buf_empty_p(buf)); 888 return buf->ptr[--buf->len]; 889} 890 891/* Normalization Form C */ 892#ifndef PERL_XS 893#ifdef WIN32DLL 894#define fprintf dllprintf 895#endif 896 897static void 898version(void) 899{ 900 fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n"); 901} 902 903static void 904usage(void) 905{ 906 fprintf(HELP_OUTPUT, 907 "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n" 908#ifdef UTF8_OUTPUT_ENABLE 909 " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 910 " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n" 911#else 912#endif 913#ifdef UTF8_INPUT_ENABLE 914 " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 915 " UTF option is -W[8,[16,32][B,L]]\n" 916#else 917 " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n" 918#endif 919 ); 920 fprintf(HELP_OUTPUT, 921 " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n" 922 " M[BQ] MIME encode [B:base64 Q:quoted]\n" 923 " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n" 924 ); 925 fprintf(HELP_OUTPUT, 926 " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n" 927 " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n" 928 " 4: JISX0208 Katakana to JISX0201 Katakana\n" 929 " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n" 930 ); 931 fprintf(HELP_OUTPUT, 932 " O Output to File (DEFAULT 'nkf.out')\n" 933 " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n" 934 ); 935 fprintf(HELP_OUTPUT, 936 " --ic=<encoding> Specify the input encoding\n" 937 " --oc=<encoding> Specify the output encoding\n" 938 " --hiragana --katakana Hiragana/Katakana Conversion\n" 939 " --katakana-hiragana Converts each other\n" 940 ); 941 fprintf(HELP_OUTPUT, 942#ifdef INPUT_OPTION 943 " --{cap, url}-input Convert hex after ':' or '%%'\n" 944#endif 945#ifdef NUMCHAR_OPTION 946 " --numchar-input Convert Unicode Character Reference\n" 947#endif 948#ifdef UTF8_INPUT_ENABLE 949 " --fb-{skip, html, xml, perl, java, subchar}\n" 950 " Specify unassigned character's replacement\n" 951#endif 952 ); 953 fprintf(HELP_OUTPUT, 954#ifdef OVERWRITE 955 " --in-place[=SUF] Overwrite original files\n" 956 " --overwrite[=SUF] Preserve timestamp of original files\n" 957#endif 958 " -g --guess Guess the input code\n" 959 " -v --version Print the version\n" 960 " --help/-V Print this help / configuration\n" 961 ); 962 version(); 963} 964 965static void 966show_configuration(void) 967{ 968 fprintf(HELP_OUTPUT, 969 "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n" 970 " Compile-time options:\n" 971 " Compiled at: " __DATE__ " " __TIME__ "\n" 972 ); 973 fprintf(HELP_OUTPUT, 974 " Default output encoding: " 975#ifdef DEFAULT_CODE_LOCALE 976 "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding()) 977#elif defined(DEFAULT_ENCIDX) 978 "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding()) 979#else 980 "NONE\n" 981#endif 982 ); 983 fprintf(HELP_OUTPUT, 984 " Default output end of line: " 985#if DEFAULT_NEWLINE == CR 986 "CR" 987#elif DEFAULT_NEWLINE == CRLF 988 "CRLF" 989#else 990 "LF" 991#endif 992 "\n" 993 " Decode MIME encoded string: " 994#if MIME_DECODE_DEFAULT 995 "ON" 996#else 997 "OFF" 998#endif 999 "\n" 1000 " Convert JIS X 0201 Katakana: " 1001#if X0201_DEFAULT 1002 "ON" 1003#else 1004 "OFF" 1005#endif 1006 "\n" 1007 " --help, --version output: " 1008#if HELP_OUTPUT_HELP_OUTPUT 1009 "HELP_OUTPUT" 1010#else 1011 "STDOUT" 1012#endif 1013 "\n"); 1014} 1015#endif /*PERL_XS*/ 1016 1017#ifdef OVERWRITE 1018static char* 1019get_backup_filename(const char *suffix, const char *filename) 1020{ 1021 char *backup_filename; 1022 int asterisk_count = 0; 1023 int i, j; 1024 int filename_length = strlen(filename); 1025 1026 for(i = 0; suffix[i]; i++){ 1027 if(suffix[i] == '*') asterisk_count++; 1028 } 1029 1030 if(asterisk_count){ 1031 backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1); 1032 for(i = 0, j = 0; suffix[i];){ 1033 if(suffix[i] == '*'){ 1034 backup_filename[j] = '\0'; 1035 strncat(backup_filename, filename, filename_length); 1036 i++; 1037 j += filename_length; 1038 }else{ 1039 backup_filename[j++] = suffix[i++]; 1040 } 1041 } 1042 backup_filename[j] = '\0'; 1043 }else{ 1044 j = filename_length + strlen(suffix); 1045 backup_filename = nkf_xmalloc(j + 1); 1046 strcpy(backup_filename, filename); 1047 strcat(backup_filename, suffix); 1048 backup_filename[j] = '\0'; 1049 } 1050 return backup_filename; 1051} 1052#endif 1053 1054#ifdef UTF8_INPUT_ENABLE 1055static void 1056nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c) 1057{ 1058 int shift = 20; 1059 c &= VALUE_MASK; 1060 while(shift >= 0){ 1061 if(c >= NKF_INT32_C(1)<<shift){ 1062 while(shift >= 0){ 1063 (*f)(0, bin2hex(c>>shift)); 1064 shift -= 4; 1065 } 1066 }else{ 1067 shift -= 4; 1068 } 1069 } 1070 return; 1071} 1072 1073static void 1074encode_fallback_html(nkf_char c) 1075{ 1076 (*oconv)(0, '&'); 1077 (*oconv)(0, '#'); 1078 c &= VALUE_MASK; 1079 if(c >= NKF_INT32_C(1000000)) 1080 (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10); 1081 if(c >= NKF_INT32_C(100000)) 1082 (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10); 1083 if(c >= 10000) 1084 (*oconv)(0, 0x30+(c/10000 )%10); 1085 if(c >= 1000) 1086 (*oconv)(0, 0x30+(c/1000 )%10); 1087 if(c >= 100) 1088 (*oconv)(0, 0x30+(c/100 )%10); 1089 if(c >= 10) 1090 (*oconv)(0, 0x30+(c/10 )%10); 1091 if(c >= 0) 1092 (*oconv)(0, 0x30+ c %10); 1093 (*oconv)(0, ';'); 1094 return; 1095} 1096 1097static void 1098encode_fallback_xml(nkf_char c) 1099{ 1100 (*oconv)(0, '&'); 1101 (*oconv)(0, '#'); 1102 (*oconv)(0, 'x'); 1103 nkf_each_char_to_hex(oconv, c); 1104 (*oconv)(0, ';'); 1105 return; 1106} 1107 1108static void 1109encode_fallback_java(nkf_char c) 1110{ 1111 (*oconv)(0, '\\'); 1112 c &= VALUE_MASK; 1113 if(!nkf_char_unicode_bmp_p(c)){ 1114 (*oconv)(0, 'U'); 1115 (*oconv)(0, '0'); 1116 (*oconv)(0, '0'); 1117 (*oconv)(0, bin2hex(c>>20)); 1118 (*oconv)(0, bin2hex(c>>16)); 1119 }else{ 1120 (*oconv)(0, 'u'); 1121 } 1122 (*oconv)(0, bin2hex(c>>12)); 1123 (*oconv)(0, bin2hex(c>> 8)); 1124 (*oconv)(0, bin2hex(c>> 4)); 1125 (*oconv)(0, bin2hex(c )); 1126 return; 1127} 1128 1129static void 1130encode_fallback_perl(nkf_char c) 1131{ 1132 (*oconv)(0, '\\'); 1133 (*oconv)(0, 'x'); 1134 (*oconv)(0, '{'); 1135 nkf_each_char_to_hex(oconv, c); 1136 (*oconv)(0, '}'); 1137 return; 1138} 1139 1140static void 1141encode_fallback_subchar(nkf_char c) 1142{ 1143 c = unicode_subchar; 1144 (*oconv)((c>>8)&0xFF, c&0xFF); 1145 return; 1146} 1147#endif 1148 1149static const struct { 1150 const char *name; 1151 const char *alias; 1152} long_option[] = { 1153 {"ic=", ""}, 1154 {"oc=", ""}, 1155 {"base64","jMB"}, 1156 {"euc","e"}, 1157 {"euc-input","E"}, 1158 {"fj","jm"}, 1159 {"help",""}, 1160 {"jis","j"}, 1161 {"jis-input","J"}, 1162 {"mac","sLm"}, 1163 {"mime","jM"}, 1164 {"mime-input","m"}, 1165 {"msdos","sLw"}, 1166 {"sjis","s"}, 1167 {"sjis-input","S"}, 1168 {"unix","eLu"}, 1169 {"version","v"}, 1170 {"windows","sLw"}, 1171 {"hiragana","h1"}, 1172 {"katakana","h2"}, 1173 {"katakana-hiragana","h3"}, 1174 {"guess=", ""}, 1175 {"guess", "g2"}, 1176 {"cp932", ""}, 1177 {"no-cp932", ""}, 1178#ifdef X0212_ENABLE 1179 {"x0212", ""}, 1180#endif 1181#ifdef UTF8_OUTPUT_ENABLE 1182 {"utf8", "w"}, 1183 {"utf16", "w16"}, 1184 {"ms-ucs-map", ""}, 1185 {"fb-skip", ""}, 1186 {"fb-html", ""}, 1187 {"fb-xml", ""}, 1188 {"fb-perl", ""}, 1189 {"fb-java", ""}, 1190 {"fb-subchar", ""}, 1191 {"fb-subchar=", ""}, 1192#endif 1193#ifdef UTF8_INPUT_ENABLE 1194 {"utf8-input", "W"}, 1195 {"utf16-input", "W16"}, 1196 {"no-cp932ext", ""}, 1197 {"no-best-fit-chars",""}, 1198#endif 1199#ifdef UNICODE_NORMALIZATION 1200 {"utf8mac-input", ""}, 1201#endif 1202#ifdef OVERWRITE 1203 {"overwrite", ""}, 1204 {"overwrite=", ""}, 1205 {"in-place", ""}, 1206 {"in-place=", ""}, 1207#endif 1208#ifdef INPUT_OPTION 1209 {"cap-input", ""}, 1210 {"url-input", ""}, 1211#endif 1212#ifdef NUMCHAR_OPTION 1213 {"numchar-input", ""}, 1214#endif 1215#ifdef CHECK_OPTION 1216 {"no-output", ""}, 1217 {"debug", ""}, 1218#endif 1219#ifdef SHIFTJIS_CP932 1220 {"cp932inv", ""}, 1221#endif 1222#ifdef EXEC_IO 1223 {"exec-in", ""}, 1224 {"exec-out", ""}, 1225#endif 1226 {"prefix=", ""}, 1227}; 1228 1229static void 1230set_input_encoding(nkf_encoding *enc) 1231{ 1232 switch (nkf_enc_to_index(enc)) { 1233 case ISO_8859_1: 1234 iso8859_f = TRUE; 1235 break; 1236 case CP50221: 1237 case CP50222: 1238 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1239 case CP50220: 1240#ifdef SHIFTJIS_CP932 1241 cp51932_f = TRUE; 1242#endif 1243#ifdef UTF8_OUTPUT_ENABLE 1244 ms_ucs_map_f = UCS_MAP_CP932; 1245#endif 1246 break; 1247 case ISO_2022_JP_1: 1248 x0212_f = TRUE; 1249 break; 1250 case ISO_2022_JP_3: 1251 x0212_f = TRUE; 1252 x0213_f = TRUE; 1253 break; 1254 case ISO_2022_JP_2004: 1255 x0212_f = TRUE; 1256 x0213_f = TRUE; 1257 break; 1258 case SHIFT_JIS: 1259 break; 1260 case WINDOWS_31J: 1261 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1262#ifdef SHIFTJIS_CP932 1263 cp51932_f = TRUE; 1264#endif 1265#ifdef UTF8_OUTPUT_ENABLE 1266 ms_ucs_map_f = UCS_MAP_CP932; 1267#endif 1268 break; 1269 break; 1270 case CP10001: 1271#ifdef SHIFTJIS_CP932 1272 cp51932_f = TRUE; 1273#endif 1274#ifdef UTF8_OUTPUT_ENABLE 1275 ms_ucs_map_f = UCS_MAP_CP10001; 1276#endif 1277 break; 1278 case EUC_JP: 1279 break; 1280 case EUCJP_NKF: 1281 break; 1282 case CP51932: 1283 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1284#ifdef SHIFTJIS_CP932 1285 cp51932_f = TRUE; 1286#endif 1287#ifdef UTF8_OUTPUT_ENABLE 1288 ms_ucs_map_f = UCS_MAP_CP932; 1289#endif 1290 break; 1291 case EUCJP_MS: 1292 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1293#ifdef SHIFTJIS_CP932 1294 cp51932_f = FALSE; 1295#endif 1296#ifdef UTF8_OUTPUT_ENABLE 1297 ms_ucs_map_f = UCS_MAP_MS; 1298#endif 1299 break; 1300 case EUCJP_ASCII: 1301 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1302#ifdef SHIFTJIS_CP932 1303 cp51932_f = FALSE; 1304#endif 1305#ifdef UTF8_OUTPUT_ENABLE 1306 ms_ucs_map_f = UCS_MAP_ASCII; 1307#endif 1308 break; 1309 case SHIFT_JISX0213: 1310 case SHIFT_JIS_2004: 1311 x0213_f = TRUE; 1312#ifdef SHIFTJIS_CP932 1313 cp51932_f = FALSE; 1314 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1315#endif 1316 break; 1317 case EUC_JISX0213: 1318 case EUC_JIS_2004: 1319 x0213_f = TRUE; 1320#ifdef SHIFTJIS_CP932 1321 cp51932_f = FALSE; 1322#endif 1323 break; 1324#ifdef UTF8_INPUT_ENABLE 1325#ifdef UNICODE_NORMALIZATION 1326 case UTF8_MAC: 1327 nfc_f = TRUE; 1328 break; 1329#endif 1330 case UTF_16: 1331 case UTF_16BE: 1332 case UTF_16BE_BOM: 1333 input_endian = ENDIAN_BIG; 1334 break; 1335 case UTF_16LE: 1336 case UTF_16LE_BOM: 1337 input_endian = ENDIAN_LITTLE; 1338 break; 1339 case UTF_32: 1340 case UTF_32BE: 1341 case UTF_32BE_BOM: 1342 input_endian = ENDIAN_BIG; 1343 break; 1344 case UTF_32LE: 1345 case UTF_32LE_BOM: 1346 input_endian = ENDIAN_LITTLE; 1347 break; 1348#endif 1349 } 1350} 1351 1352static void 1353set_output_encoding(nkf_encoding *enc) 1354{ 1355 switch (nkf_enc_to_index(enc)) { 1356 case CP50220: 1357#ifdef SHIFTJIS_CP932 1358 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1359#endif 1360#ifdef UTF8_OUTPUT_ENABLE 1361 ms_ucs_map_f = UCS_MAP_CP932; 1362#endif 1363 break; 1364 case CP50221: 1365 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1366#ifdef SHIFTJIS_CP932 1367 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1368#endif 1369#ifdef UTF8_OUTPUT_ENABLE 1370 ms_ucs_map_f = UCS_MAP_CP932; 1371#endif 1372 break; 1373 case ISO_2022_JP: 1374#ifdef SHIFTJIS_CP932 1375 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1376#endif 1377 break; 1378 case ISO_2022_JP_1: 1379 x0212_f = TRUE; 1380#ifdef SHIFTJIS_CP932 1381 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1382#endif 1383 break; 1384 case ISO_2022_JP_3: 1385 case ISO_2022_JP_2004: 1386 x0212_f = TRUE; 1387 x0213_f = TRUE; 1388#ifdef SHIFTJIS_CP932 1389 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1390#endif 1391 break; 1392 case SHIFT_JIS: 1393 break; 1394 case WINDOWS_31J: 1395 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1396#ifdef UTF8_OUTPUT_ENABLE 1397 ms_ucs_map_f = UCS_MAP_CP932; 1398#endif 1399 break; 1400 case CP10001: 1401#ifdef UTF8_OUTPUT_ENABLE 1402 ms_ucs_map_f = UCS_MAP_CP10001; 1403#endif 1404 break; 1405 case EUC_JP: 1406 x0212_f = TRUE; 1407#ifdef SHIFTJIS_CP932 1408 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1409#endif 1410#ifdef UTF8_OUTPUT_ENABLE 1411 ms_ucs_map_f = UCS_MAP_ASCII; 1412#endif 1413 break; 1414 case EUCJP_NKF: 1415 x0212_f = FALSE; 1416#ifdef SHIFTJIS_CP932 1417 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1418#endif 1419#ifdef UTF8_OUTPUT_ENABLE 1420 ms_ucs_map_f = UCS_MAP_ASCII; 1421#endif 1422 break; 1423 case CP51932: 1424 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1425#ifdef SHIFTJIS_CP932 1426 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1427#endif 1428#ifdef UTF8_OUTPUT_ENABLE 1429 ms_ucs_map_f = UCS_MAP_CP932; 1430#endif 1431 break; 1432 case EUCJP_MS: 1433 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1434 x0212_f = TRUE; 1435#ifdef UTF8_OUTPUT_ENABLE 1436 ms_ucs_map_f = UCS_MAP_MS; 1437#endif 1438 break; 1439 case EUCJP_ASCII: 1440 if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */ 1441 x0212_f = TRUE; 1442#ifdef UTF8_OUTPUT_ENABLE 1443 ms_ucs_map_f = UCS_MAP_ASCII; 1444#endif 1445 break; 1446 case SHIFT_JISX0213: 1447 case SHIFT_JIS_2004: 1448 x0213_f = TRUE; 1449#ifdef SHIFTJIS_CP932 1450 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1451#endif 1452 break; 1453 case EUC_JISX0213: 1454 case EUC_JIS_2004: 1455 x0212_f = TRUE; 1456 x0213_f = TRUE; 1457#ifdef SHIFTJIS_CP932 1458 if (cp932inv_f == TRUE) cp932inv_f = FALSE; 1459#endif 1460 break; 1461#ifdef UTF8_OUTPUT_ENABLE 1462 case UTF_8_BOM: 1463 output_bom_f = TRUE; 1464 break; 1465 case UTF_16: 1466 case UTF_16BE_BOM: 1467 output_bom_f = TRUE; 1468 break; 1469 case UTF_16LE: 1470 output_endian = ENDIAN_LITTLE; 1471 output_bom_f = FALSE; 1472 break; 1473 case UTF_16LE_BOM: 1474 output_endian = ENDIAN_LITTLE; 1475 output_bom_f = TRUE; 1476 break; 1477 case UTF_32: 1478 case UTF_32BE_BOM: 1479 output_bom_f = TRUE; 1480 break; 1481 case UTF_32LE: 1482 output_endian = ENDIAN_LITTLE; 1483 output_bom_f = FALSE; 1484 break; 1485 case UTF_32LE_BOM: 1486 output_endian = ENDIAN_LITTLE; 1487 output_bom_f = TRUE; 1488 break; 1489#endif 1490 } 1491} 1492 1493static struct input_code* 1494find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)) 1495{ 1496 if (iconv_func){ 1497 struct input_code *p = input_code_list; 1498 while (p->name){ 1499 if (iconv_func == p->iconv_func){ 1500 return p; 1501 } 1502 p++; 1503 } 1504 } 1505 return 0; 1506} 1507 1508static void 1509set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0)) 1510{ 1511#ifdef INPUT_CODE_FIX 1512 if (f || !input_encoding) 1513#endif 1514 if (estab_f != f){ 1515 estab_f = f; 1516 } 1517 1518 if (iconv_func 1519#ifdef INPUT_CODE_FIX 1520 && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */ 1521#endif 1522 ){ 1523 iconv = iconv_func; 1524 } 1525#ifdef CHECK_OPTION 1526 if (estab_f && iconv_for_check != iconv){ 1527 struct input_code *p = find_inputcode_byfunc(iconv); 1528 if (p){ 1529 set_input_codename(p->name); 1530 debug(p->name); 1531 } 1532 iconv_for_check = iconv; 1533 } 1534#endif 1535} 1536 1537#ifdef X0212_ENABLE 1538static nkf_char 1539x0212_shift(nkf_char c) 1540{ 1541 nkf_char ret = c; 1542 c &= 0x7f; 1543 if (is_eucg3(ret)){ 1544 if (0x75 <= c && c <= 0x7f){ 1545 ret = c + (0x109 - 0x75); 1546 } 1547 }else{ 1548 if (0x75 <= c && c <= 0x7f){ 1549 ret = c + (0x113 - 0x75); 1550 } 1551 } 1552 return ret; 1553} 1554 1555 1556static nkf_char 1557x0212_unshift(nkf_char c) 1558{ 1559 nkf_char ret = c; 1560 if (0x7f <= c && c <= 0x88){ 1561 ret = c + (0x75 - 0x7f); 1562 }else if (0x89 <= c && c <= 0x92){ 1563 ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89)); 1564 } 1565 return ret; 1566} 1567#endif /* X0212_ENABLE */ 1568 1569static int 1570is_x0213_2_in_x0212(nkf_char c1) 1571{ 1572 static const char x0213_2_table[] = 1573 {0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1}; 1574 int ku = c1 - 0x20; 1575 if (ku <= 15) 1576 return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */ 1577 if (78 <= ku && ku <= 94) 1578 return 1; 1579 return 0; 1580} 1581 1582static nkf_char 1583e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) 1584{ 1585 nkf_char ndx; 1586 if (is_eucg3(c2)){ 1587 ndx = c2 & 0x7f; 1588 if (x0213_f && is_x0213_2_in_x0212(ndx)){ 1589 if((0x21 <= ndx && ndx <= 0x2F)){ 1590 if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3; 1591 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e); 1592 return 0; 1593 }else if(0x6E <= ndx && ndx <= 0x7E){ 1594 if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe; 1595 if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e); 1596 return 0; 1597 } 1598 return 1; 1599 } 1600#ifdef X0212_ENABLE 1601 else if(nkf_isgraph(ndx)){ 1602 nkf_char val = 0; 1603 const unsigned short *ptr; 1604 ptr = x0212_shiftjis[ndx - 0x21]; 1605 if (ptr){ 1606 val = ptr[(c1 & 0x7f) - 0x21]; 1607 } 1608 if (val){ 1609 c2 = val >> 8; 1610 c1 = val & 0xff; 1611 if (p2) *p2 = c2; 1612 if (p1) *p1 = c1; 1613 return 0; 1614 } 1615 c2 = x0212_shift(c2); 1616 } 1617#endif /* X0212_ENABLE */ 1618 } 1619 if(0x7F < c2) return 1; 1620 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1); 1621 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e); 1622 return 0; 1623} 1624 1625static nkf_char 1626s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1) 1627{ 1628#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE) 1629 nkf_char val; 1630#endif 1631 static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} }; 1632 if (0xFC < c1) return 1; 1633#ifdef SHIFTJIS_CP932 1634 if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){ 1635 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; 1636 if (val){ 1637 c2 = val >> 8; 1638 c1 = val & 0xff; 1639 } 1640 } 1641 if (cp932inv_f 1642 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){ 1643 val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40]; 1644 if (val){ 1645 c2 = val >> 8; 1646 c1 = val & 0xff; 1647 } 1648 } 1649#endif /* SHIFTJIS_CP932 */ 1650#ifdef X0212_ENABLE 1651 if (!x0213_f && is_ibmext_in_sjis(c2)){ 1652 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40]; 1653 if (val){ 1654 if (val > 0x7FFF){ 1655 c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f); 1656 c1 = val & 0xff; 1657 }else{ 1658 c2 = val >> 8; 1659 c1 = val & 0xff; 1660 } 1661 if (p2) *p2 = c2; 1662 if (p1) *p1 = c1; 1663 return 0; 1664 } 1665 } 1666#endif 1667 if(c2 >= 0x80){ 1668 if(x0213_f && c2 >= 0xF0){ 1669 if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */ 1670 c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1]; 1671 }else{ /* 78<=k<=94 */ 1672 c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B); 1673 if (0x9E < c1) c2++; 1674 } 1675 }else{ 1676#define SJ0162 0x00e1 /* 01 - 62 ku offset */ 1677#define SJ6394 0x0161 /* 63 - 94 ku offset */ 1678 c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394); 1679 if (0x9E < c1) c2++; 1680 } 1681 if (c1 < 0x9F) 1682 c1 = c1 - ((c1 > DEL) ? SP : 0x1F); 1683 else { 1684 c1 = c1 - 0x7E; 1685 } 1686 } 1687 1688#ifdef X0212_ENABLE 1689 c2 = x0212_unshift(c2); 1690#endif 1691 if (p2) *p2 = c2; 1692 if (p1) *p1 = c1; 1693 return 0; 1694} 1695 1696#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 1697static void 1698nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4) 1699{ 1700 val &= VALUE_MASK; 1701 if (val < 0x80){ 1702 *p1 = val; 1703 *p2 = 0; 1704 *p3 = 0; 1705 *p4 = 0; 1706 }else if (val < 0x800){ 1707 *p1 = 0xc0 | (val >> 6); 1708 *p2 = 0x80 | (val & 0x3f); 1709 *p3 = 0; 1710 *p4 = 0; 1711 } else if (nkf_char_unicode_bmp_p(val)) { 1712 *p1 = 0xe0 | (val >> 12); 1713 *p2 = 0x80 | ((val >> 6) & 0x3f); 1714 *p3 = 0x80 | ( val & 0x3f); 1715 *p4 = 0; 1716 } else if (nkf_char_unicode_value_p(val)) { 1717 *p1 = 0xf0 | (val >> 18); 1718 *p2 = 0x80 | ((val >> 12) & 0x3f); 1719 *p3 = 0x80 | ((val >> 6) & 0x3f); 1720 *p4 = 0x80 | ( val & 0x3f); 1721 } else { 1722 *p1 = 0; 1723 *p2 = 0; 1724 *p3 = 0; 1725 *p4 = 0; 1726 } 1727} 1728 1729static nkf_char 1730nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) 1731{ 1732 nkf_char wc; 1733 if (c1 <= 0x7F) { 1734 /* single byte */ 1735 wc = c1; 1736 } 1737 else if (c1 <= 0xC1) { 1738 /* trail byte or invalid */ 1739 return -1; 1740 } 1741 else if (c1 <= 0xDF) { 1742 /* 2 bytes */ 1743 wc = (c1 & 0x1F) << 6; 1744 wc |= (c2 & 0x3F); 1745 } 1746 else if (c1 <= 0xEF) { 1747 /* 3 bytes */ 1748 wc = (c1 & 0x0F) << 12; 1749 wc |= (c2 & 0x3F) << 6; 1750 wc |= (c3 & 0x3F); 1751 } 1752 else if (c2 <= 0xF4) { 1753 /* 4 bytes */ 1754 wc = (c1 & 0x0F) << 18; 1755 wc |= (c2 & 0x3F) << 12; 1756 wc |= (c3 & 0x3F) << 6; 1757 wc |= (c4 & 0x3F); 1758 } 1759 else { 1760 return -1; 1761 } 1762 return wc; 1763} 1764#endif 1765 1766#ifdef UTF8_INPUT_ENABLE 1767static int 1768unicode_to_jis_common2(nkf_char c1, nkf_char c0, 1769 const unsigned short *const *pp, nkf_char psize, 1770 nkf_char *p2, nkf_char *p1) 1771{ 1772 nkf_char c2; 1773 const unsigned short *p; 1774 unsigned short val; 1775 1776 if (pp == 0) return 1; 1777 1778 c1 -= 0x80; 1779 if (c1 < 0 || psize <= c1) return 1; 1780 p = pp[c1]; 1781 if (p == 0) return 1; 1782 1783 c0 -= 0x80; 1784 if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1; 1785 val = p[c0]; 1786 if (val == 0) return 1; 1787 if (no_cp932ext_f && ( 1788 (val>>8) == 0x2D || /* NEC special characters */ 1789 val > NKF_INT32_C(0xF300) /* IBM extended characters */ 1790 )) return 1; 1791 1792 c2 = val >> 8; 1793 if (val > 0x7FFF){ 1794 c2 &= 0x7f; 1795 c2 |= PREFIX_EUCG3; 1796 } 1797 if (c2 == SO) c2 = JIS_X_0201_1976_K; 1798 c1 = val & 0xFF; 1799 if (p2) *p2 = c2; 1800 if (p1) *p1 = c1; 1801 return 0; 1802} 1803 1804static int 1805unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) 1806{ 1807 const unsigned short *const *pp; 1808 const unsigned short *const *const *ppp; 1809 static const char no_best_fit_chars_table_C2[] = 1810 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1812 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2, 1813 0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1}; 1814 static const char no_best_fit_chars_table_C2_ms[] = 1815 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1816 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1817 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1818 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0}; 1819 static const char no_best_fit_chars_table_932_C2[] = 1820 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1822 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1823 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0}; 1824 static const char no_best_fit_chars_table_932_C3[] = 1825 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1826 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1827 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1828 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}; 1829 nkf_char ret = 0; 1830 1831 if(c2 < 0x80){ 1832 *p2 = 0; 1833 *p1 = c2; 1834 }else if(c2 < 0xe0){ 1835 if(no_best_fit_chars_f){ 1836 if(ms_ucs_map_f == UCS_MAP_CP932){ 1837 switch(c2){ 1838 case 0xC2: 1839 if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1; 1840 break; 1841 case 0xC3: 1842 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1; 1843 break; 1844 } 1845 }else if(!cp932inv_f){ 1846 switch(c2){ 1847 case 0xC2: 1848 if(no_best_fit_chars_table_C2[c1&0x3F]) return 1; 1849 break; 1850 case 0xC3: 1851 if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1; 1852 break; 1853 } 1854 }else if(ms_ucs_map_f == UCS_MAP_MS){ 1855 if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1; 1856 }else if(ms_ucs_map_f == UCS_MAP_CP10001){ 1857 switch(c2){ 1858 case 0xC2: 1859 switch(c1){ 1860 case 0xA2: 1861 case 0xA3: 1862 case 0xA5: 1863 case 0xA6: 1864 case 0xAC: 1865 case 0xAF: 1866 case 0xB8: 1867 return 1; 1868 } 1869 break; 1870 } 1871 } 1872 } 1873 pp = 1874 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 : 1875 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms : 1876 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac : 1877 x0213_f ? utf8_to_euc_2bytes_x0213 : 1878 utf8_to_euc_2bytes; 1879 ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1); 1880 }else if(c0 < 0xF0){ 1881 if(no_best_fit_chars_f){ 1882 if(ms_ucs_map_f == UCS_MAP_CP932){ 1883 if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1; 1884 }else if(ms_ucs_map_f == UCS_MAP_MS){ 1885 switch(c2){ 1886 case 0xE2: 1887 switch(c1){ 1888 case 0x80: 1889 if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1; 1890 break; 1891 case 0x88: 1892 if(c0 == 0x92) return 1; 1893 break; 1894 } 1895 break; 1896 case 0xE3: 1897 if(c1 == 0x80 || c0 == 0x9C) return 1; 1898 break; 1899 } 1900 }else if(ms_ucs_map_f == UCS_MAP_CP10001){ 1901 switch(c2){ 1902 case 0xE3: 1903 switch(c1){ 1904 case 0x82: 1905 if(c0 == 0x94) return 1; 1906 break; 1907 case 0x83: 1908 if(c0 == 0xBB) return 1; 1909 break; 1910 } 1911 break; 1912 } 1913 }else{ 1914 switch(c2){ 1915 case 0xE2: 1916 switch(c1){ 1917 case 0x80: 1918 if(c0 == 0x95) return 1; 1919 break; 1920 case 0x88: 1921 if(c0 == 0xA5) return 1; 1922 break; 1923 } 1924 break; 1925 case 0xEF: 1926 switch(c1){ 1927 case 0xBC: 1928 if(c0 == 0x8D) return 1; 1929 break; 1930 case 0xBD: 1931 if(c0 == 0x9E && !cp932inv_f) return 1; 1932 break; 1933 case 0xBF: 1934 if(0xA0 <= c0 && c0 <= 0xA5) return 1; 1935 break; 1936 } 1937 break; 1938 } 1939 } 1940 } 1941 ppp = 1942 ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 : 1943 ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms : 1944 ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac : 1945 x0213_f ? utf8_to_euc_3bytes_x0213 : 1946 utf8_to_euc_3bytes; 1947 ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1); 1948 }else return -1; 1949#ifdef SHIFTJIS_CP932 1950 if (!ret && !cp932inv_f && is_eucg3(*p2)) { 1951 nkf_char s2, s1; 1952 if (e2s_conv(*p2, *p1, &s2, &s1) == 0) { 1953 s2e_conv(s2, s1, p2, p1); 1954 }else{ 1955 ret = 1; 1956 } 1957 } 1958#endif 1959 return ret; 1960} 1961 1962#ifdef UTF8_OUTPUT_ENABLE 1963#define X0213_SURROGATE_FIND(tbl, size, euc) do { \ 1964 int i; \ 1965 for (i = 0; i < size; i++) \ 1966 if (tbl[i][0] == euc) { \ 1967 low = tbl[i][2]; \ 1968 break; \ 1969 } \ 1970 } while (0) 1971 1972static nkf_char 1973e2w_conv(nkf_char c2, nkf_char c1) 1974{ 1975 const unsigned short *p; 1976 1977 if (c2 == JIS_X_0201_1976_K) { 1978 if (ms_ucs_map_f == UCS_MAP_CP10001) { 1979 switch (c1) { 1980 case 0x20: 1981 return 0xA0; 1982 case 0x7D: 1983 return 0xA9; 1984 } 1985 } 1986 p = euc_to_utf8_1byte; 1987#ifdef X0212_ENABLE 1988 } else if (is_eucg3(c2)){ 1989 if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){ 1990 return 0xA6; 1991 } 1992 c2 = (c2&0x7f) - 0x21; 1993 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes) 1994 p = 1995 x0213_f ? x0212_to_utf8_2bytes_x0213[c2] : 1996 x0212_to_utf8_2bytes[c2]; 1997 else 1998 return 0; 1999#endif 2000 } else { 2001 c2 &= 0x7f; 2002 c2 = (c2&0x7f) - 0x21; 2003 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes) 2004 p = 2005 x0213_f ? euc_to_utf8_2bytes_x0213[c2] : 2006 ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] : 2007 ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] : 2008 euc_to_utf8_2bytes_ms[c2]; 2009 else 2010 return 0; 2011 } 2012 if (!p) return 0; 2013 c1 = (c1 & 0x7f) - 0x21; 2014 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) { 2015 nkf_char val = p[c1]; 2016 if (x0213_f && 0xD800<=val && val<=0xDBFF) { 2017 nkf_char euc = (c2+0x21)<<8 | (c1+0x21); 2018 nkf_char low = 0; 2019 if (p==x0212_to_utf8_2bytes_x0213[c2]) { 2020 X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc); 2021 } else { 2022 X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc); 2023 } 2024 if (!low) return 0; 2025 return UTF16_TO_UTF32(val, low); 2026 } else { 2027 return val; 2028 } 2029 } 2030 return 0; 2031} 2032 2033static nkf_char 2034e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1) 2035{ 2036 nkf_char euc; 2037 int i; 2038 for (i = 0; i < sizeof_x0213_combining_chars; i++) 2039 if (x0213_combining_chars[i] == comb) 2040 break; 2041 if (i >= sizeof_x0213_combining_chars) 2042 return 0; 2043 euc = (c2&0x7f)<<8 | (c1&0x7f); 2044 for (i = 0; i < sizeof_x0213_combining_table; i++) 2045 if (x0213_combining_table[i][0] == euc) 2046 return x0213_combining_table[i][1]; 2047 return 0; 2048} 2049#endif 2050 2051static nkf_char 2052w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1) 2053{ 2054 nkf_char ret = 0; 2055 2056 if (!c1){ 2057 *p2 = 0; 2058 *p1 = c2; 2059 }else if (0xc0 <= c2 && c2 <= 0xef) { 2060 ret = unicode_to_jis_common(c2, c1, c0, p2, p1); 2061#ifdef NUMCHAR_OPTION 2062 if (ret > 0){ 2063 if (p2) *p2 = 0; 2064 if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0)); 2065 ret = 0; 2066 } 2067#endif 2068 } 2069 return ret; 2070} 2071 2072#ifdef UTF8_INPUT_ENABLE 2073static nkf_char 2074w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1) 2075{ 2076 nkf_char c1, c2, c3, c4; 2077 nkf_char ret = 0; 2078 val &= VALUE_MASK; 2079 if (val < 0x80) { 2080 *p2 = 0; 2081 *p1 = val; 2082 } 2083 else if (nkf_char_unicode_bmp_p(val)){ 2084 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); 2085 ret = unicode_to_jis_common(c1, c2, c3, p2, p1); 2086 if (ret > 0){ 2087 *p2 = 0; 2088 *p1 = nkf_char_unicode_new(val); 2089 ret = 0; 2090 } 2091 } 2092 else { 2093 int i; 2094 if (x0213_f) { 2095 c1 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ 2096 c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ 2097 for (i = 0; i < sizeof_x0213_1_surrogate_table; i++) 2098 if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) { 2099 val = x0213_1_surrogate_table[i][0]; 2100 *p2 = val >> 8; 2101 *p1 = val & 0xFF; 2102 return 0; 2103 } 2104 for (i = 0; i < sizeof_x0213_2_surrogate_table; i++) 2105 if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) { 2106 val = x0213_2_surrogate_table[i][0]; 2107 *p2 = PREFIX_EUCG3 | (val >> 8); 2108 *p1 = val & 0xFF; 2109 return 0; 2110 } 2111 } 2112 *p2 = 0; 2113 *p1 = nkf_char_unicode_new(val); 2114 } 2115 return ret; 2116} 2117#endif 2118 2119static nkf_char 2120e_iconv(nkf_char c2, nkf_char c1, nkf_char c0) 2121{ 2122 if (c2 == JIS_X_0201_1976_K || c2 == SS2){ 2123 if (iso2022jp_f && !x0201_f) { 2124 c2 = GETA1; c1 = GETA2; 2125 } else { 2126 c2 = JIS_X_0201_1976_K; 2127 c1 &= 0x7f; 2128 } 2129#ifdef X0212_ENABLE 2130 }else if (c2 == 0x8f){ 2131 if (c0 == 0){ 2132 return -1; 2133 } 2134 if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) { 2135 /* encoding is eucJP-ms, so invert to Unicode Private User Area */ 2136 c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC); 2137 c2 = 0; 2138 } else { 2139 c2 = (c2 << 8) | (c1 & 0x7f); 2140 c1 = c0 & 0x7f; 2141#ifdef SHIFTJIS_CP932 2142 if (cp51932_f){ 2143 nkf_char s2, s1; 2144 if (e2s_conv(c2, c1, &s2, &s1) == 0){ 2145 s2e_conv(s2, s1, &c2, &c1); 2146 if (c2 < 0x100){ 2147 c1 &= 0x7f; 2148 c2 &= 0x7f; 2149 } 2150 } 2151 } 2152#endif /* SHIFTJIS_CP932 */ 2153 } 2154#endif /* X0212_ENABLE */ 2155 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) { 2156 /* NOP */ 2157 } else { 2158 if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) { 2159 /* encoding is eucJP-ms, so invert to Unicode Private User Area */ 2160 c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000); 2161 c2 = 0; 2162 } else { 2163 c1 &= 0x7f; 2164 c2 &= 0x7f; 2165#ifdef SHIFTJIS_CP932 2166 if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){ 2167 nkf_char s2, s1; 2168 if (e2s_conv(c2, c1, &s2, &s1) == 0){ 2169 s2e_conv(s2, s1, &c2, &c1); 2170 if (c2 < 0x100){ 2171 c1 &= 0x7f; 2172 c2 &= 0x7f; 2173 } 2174 } 2175 } 2176#endif /* SHIFTJIS_CP932 */ 2177 } 2178 } 2179 (*oconv)(c2, c1); 2180 return 0; 2181} 2182 2183static nkf_char 2184s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) 2185{ 2186 if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) { 2187 if (iso2022jp_f && !x0201_f) { 2188 c2 = GETA1; c1 = GETA2; 2189 } else { 2190 c1 &= 0x7f; 2191 } 2192 } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) { 2193 /* NOP */ 2194 } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) { 2195 /* CP932 UDC */ 2196 if(c1 == 0x7F) return 0; 2197 c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000); 2198 c2 = 0; 2199 } else { 2200 nkf_char ret = s2e_conv(c2, c1, &c2, &c1); 2201 if (ret) return ret; 2202 } 2203 (*oconv)(c2, c1); 2204 return 0; 2205} 2206 2207static int 2208x0213_wait_combining_p(nkf_char wc) 2209{ 2210 int i; 2211 for (i = 0; i < sizeof_x0213_combining_table; i++) { 2212 if (x0213_combining_table[i][1] == wc) { 2213 return TRUE; 2214 } 2215 } 2216 return FALSE; 2217} 2218 2219static int 2220x0213_combining_p(nkf_char wc) 2221{ 2222 int i; 2223 for (i = 0; i < sizeof_x0213_combining_chars; i++) { 2224 if (x0213_combining_chars[i] == wc) { 2225 return TRUE; 2226 } 2227 } 2228 return FALSE; 2229} 2230 2231static nkf_char 2232w_iconv(nkf_char c1, nkf_char c2, nkf_char c3) 2233{ 2234 nkf_char ret = 0, c4 = 0; 2235 static const char w_iconv_utf8_1st_byte[] = 2236 { /* 0xC0 - 0xFF */ 2237 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2238 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2239 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33, 2240 40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70}; 2241 2242 if (c3 > 0xFF) { 2243 c4 = c3 & 0xFF; 2244 c3 >>= 8; 2245 } 2246 2247 if (c1 < 0 || 0xff < c1) { 2248 }else if (c1 == 0) { /* 0 : 1 byte*/ 2249 c3 = 0; 2250 } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */ 2251 return 0; 2252 } else{ 2253 switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) { 2254 case 21: 2255 if (c2 < 0x80 || 0xBF < c2) return 0; 2256 break; 2257 case 30: 2258 if (c3 == 0) return -1; 2259 if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80) 2260 return 0; 2261 break; 2262 case 31: 2263 case 33: 2264 if (c3 == 0) return -1; 2265 if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80) 2266 return 0; 2267 break; 2268 case 32: 2269 if (c3 == 0) return -1; 2270 if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80) 2271 return 0; 2272 break; 2273 case 40: 2274 if (c3 == 0) return -2; 2275 if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) 2276 return 0; 2277 break; 2278 case 41: 2279 if (c3 == 0) return -2; 2280 if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) 2281 return 0; 2282 break; 2283 case 42: 2284 if (c3 == 0) return -2; 2285 if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) 2286 return 0; 2287 break; 2288 default: 2289 return 0; 2290 break; 2291 } 2292 } 2293 if (c1 == 0 || c1 == EOF){ 2294 } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */ 2295 c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4)); 2296 c1 = 0; 2297 } else { 2298 if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4))) 2299 return -3; 2300 ret = w2e_conv(c1, c2, c3, &c1, &c2); 2301 } 2302 if (ret == 0){ 2303 (*oconv)(c1, c2); 2304 } 2305 return ret; 2306} 2307 2308static nkf_char 2309w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3) 2310{ 2311 /* continue from the line below 'return -3;' in w_iconv() */ 2312 nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2); 2313 if (ret == 0){ 2314 (*oconv)(c1, c2); 2315 } 2316 return ret; 2317} 2318 2319#define NKF_ICONV_INVALID_CODE_RANGE -13 2320#define NKF_ICONV_WAIT_COMBINING_CHAR -14 2321#define NKF_ICONV_NOT_COMBINED -15 2322static size_t 2323unicode_iconv(nkf_char wc, int nocombine) 2324{ 2325 nkf_char c1, c2; 2326 int ret = 0; 2327 2328 if (wc < 0x80) { 2329 c2 = 0; 2330 c1 = wc; 2331 }else if ((wc>>11) == 27) { 2332 /* unpaired surrogate */ 2333 return NKF_ICONV_INVALID_CODE_RANGE; 2334 }else if (wc < 0xFFFF) { 2335 if (!nocombine && x0213_f && x0213_wait_combining_p(wc)) 2336 return NKF_ICONV_WAIT_COMBINING_CHAR; 2337 ret = w16e_conv(wc, &c2, &c1); 2338 if (ret) return ret; 2339 }else if (wc < 0x10FFFF) { 2340 c2 = 0; 2341 c1 = nkf_char_unicode_new(wc); 2342 } else { 2343 return NKF_ICONV_INVALID_CODE_RANGE; 2344 } 2345 (*oconv)(c2, c1); 2346 return 0; 2347} 2348 2349static nkf_char 2350unicode_iconv_combine(nkf_char wc, nkf_char wc2) 2351{ 2352 nkf_char c1, c2; 2353 int i; 2354 2355 if (wc2 < 0x80) { 2356 return NKF_ICONV_NOT_COMBINED; 2357 }else if ((wc2>>11) == 27) { 2358 /* unpaired surrogate */ 2359 return NKF_ICONV_INVALID_CODE_RANGE; 2360 }else if (wc2 < 0xFFFF) { 2361 if (!x0213_combining_p(wc2)) 2362 return NKF_ICONV_NOT_COMBINED; 2363 for (i = 0; i < sizeof_x0213_combining_table; i++) { 2364 if (x0213_combining_table[i][1] == wc && 2365 x0213_combining_table[i][2] == wc2) { 2366 c2 = x0213_combining_table[i][0] >> 8; 2367 c1 = x0213_combining_table[i][0] & 0x7f; 2368 (*oconv)(c2, c1); 2369 return 0; 2370 } 2371 } 2372 }else if (wc2 < 0x10FFFF) { 2373 return NKF_ICONV_NOT_COMBINED; 2374 } else { 2375 return NKF_ICONV_INVALID_CODE_RANGE; 2376 } 2377 return NKF_ICONV_NOT_COMBINED; 2378} 2379 2380static nkf_char 2381w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6) 2382{ 2383 nkf_char wc, wc2; 2384 wc = nkf_utf8_to_unicode(c1, c2, c3, 0); 2385 wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0); 2386 if (wc2 < 0) 2387 return wc2; 2388 return unicode_iconv_combine(wc, wc2); 2389} 2390 2391#define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1 2392#define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2 2393static size_t 2394nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) 2395{ 2396 nkf_char wc; 2397 2398 if (c1 == EOF) { 2399 (*oconv)(EOF, 0); 2400 return 0; 2401 } 2402 2403 if (input_endian == ENDIAN_BIG) { 2404 if (0xD8 <= c1 && c1 <= 0xDB) { 2405 if (0xDC <= c3 && c3 <= 0xDF) { 2406 wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4); 2407 } else return NKF_ICONV_NEED_TWO_MORE_BYTES; 2408 } else { 2409 wc = c1 << 8 | c2; 2410 } 2411 } else { 2412 if (0xD8 <= c2 && c2 <= 0xDB) { 2413 if (0xDC <= c4 && c4 <= 0xDF) { 2414 wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3); 2415 } else return NKF_ICONV_NEED_TWO_MORE_BYTES; 2416 } else { 2417 wc = c2 << 8 | c1; 2418 } 2419 } 2420 2421 return (*unicode_iconv)(wc, FALSE); 2422} 2423 2424static size_t 2425nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) 2426{ 2427 nkf_char wc, wc2; 2428 2429 if (input_endian == ENDIAN_BIG) { 2430 if (0xD8 <= c3 && c3 <= 0xDB) { 2431 return NKF_ICONV_NOT_COMBINED; 2432 } else { 2433 wc = c1 << 8 | c2; 2434 wc2 = c3 << 8 | c4; 2435 } 2436 } else { 2437 if (0xD8 <= c2 && c2 <= 0xDB) { 2438 return NKF_ICONV_NOT_COMBINED; 2439 } else { 2440 wc = c2 << 8 | c1; 2441 wc2 = c4 << 8 | c3; 2442 } 2443 } 2444 2445 return unicode_iconv_combine(wc, wc2); 2446} 2447 2448static size_t 2449nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2) 2450{ 2451 nkf_char wc; 2452 if (input_endian == ENDIAN_BIG) 2453 wc = c1 << 8 | c2; 2454 else 2455 wc = c2 << 8 | c1; 2456 return (*unicode_iconv)(wc, TRUE); 2457} 2458 2459static nkf_char 2460w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) 2461{ 2462 (*oconv)(c2, c1); 2463 return 16; /* different from w_iconv32 */ 2464} 2465 2466static nkf_char 2467w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0) 2468{ 2469 (*oconv)(c2, c1); 2470 return 32; /* different from w_iconv16 */ 2471} 2472 2473static nkf_char 2474utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) 2475{ 2476 nkf_char wc; 2477 2478 switch(input_endian){ 2479 case ENDIAN_BIG: 2480 wc = c2 << 16 | c3 << 8 | c4; 2481 break; 2482 case ENDIAN_LITTLE: 2483 wc = c3 << 16 | c2 << 8 | c1; 2484 break; 2485 case ENDIAN_2143: 2486 wc = c1 << 16 | c4 << 8 | c3; 2487 break; 2488 case ENDIAN_3412: 2489 wc = c4 << 16 | c1 << 8 | c2; 2490 break; 2491 default: 2492 return NKF_ICONV_INVALID_CODE_RANGE; 2493 } 2494 return wc; 2495} 2496 2497static size_t 2498nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) 2499{ 2500 nkf_char wc; 2501 2502 if (c1 == EOF) { 2503 (*oconv)(EOF, 0); 2504 return 0; 2505 } 2506 2507 wc = utf32_to_nkf_char(c1, c2, c3, c4); 2508 if (wc < 0) 2509 return wc; 2510 2511 return (*unicode_iconv)(wc, FALSE); 2512} 2513 2514static nkf_char 2515nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8) 2516{ 2517 nkf_char wc, wc2; 2518 2519 wc = utf32_to_nkf_char(c1, c2, c3, c4); 2520 if (wc < 0) 2521 return wc; 2522 wc2 = utf32_to_nkf_char(c5, c6, c7, c8); 2523 if (wc2 < 0) 2524 return wc2; 2525 2526 return unicode_iconv_combine(wc, wc2); 2527} 2528 2529static size_t 2530nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4) 2531{ 2532 nkf_char wc; 2533 2534 wc = utf32_to_nkf_char(c1, c2, c3, c4); 2535 return (*unicode_iconv)(wc, TRUE); 2536} 2537#endif 2538 2539#define output_ascii_escape_sequence(mode) do { \ 2540 if (output_mode != ASCII && output_mode != ISO_8859_1) { \ 2541 (*o_putc)(ESC); \ 2542 (*o_putc)('('); \ 2543 (*o_putc)(ascii_intro); \ 2544 output_mode = mode; \ 2545 } \ 2546 } while (0) 2547 2548static void 2549output_escape_sequence(int mode) 2550{ 2551 if (output_mode == mode) 2552 return; 2553 switch(mode) { 2554 case ISO_8859_1: 2555 (*o_putc)(ESC); 2556 (*o_putc)('.'); 2557 (*o_putc)('A'); 2558 break; 2559 case JIS_X_0201_1976_K: 2560 (*o_putc)(ESC); 2561 (*o_putc)('('); 2562 (*o_putc)('I'); 2563 break; 2564 case JIS_X_0208: 2565 (*o_putc)(ESC); 2566 (*o_putc)('$'); 2567 (*o_putc)(kanji_intro); 2568 break; 2569 case JIS_X_0212: 2570 (*o_putc)(ESC); 2571 (*o_putc)('$'); 2572 (*o_putc)('('); 2573 (*o_putc)('D'); 2574 break; 2575 case JIS_X_0213_1: 2576 (*o_putc)(ESC); 2577 (*o_putc)('$'); 2578 (*o_putc)('('); 2579 (*o_putc)('Q'); 2580 break; 2581 case JIS_X_0213_2: 2582 (*o_putc)(ESC); 2583 (*o_putc)('$'); 2584 (*o_putc)('('); 2585 (*o_putc)('P'); 2586 break; 2587 } 2588 output_mode = mode; 2589} 2590 2591static void 2592j_oconv(nkf_char c2, nkf_char c1) 2593{ 2594#ifdef NUMCHAR_OPTION 2595 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2596 w16e_conv(c1, &c2, &c1); 2597 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2598 c2 = c1 & VALUE_MASK; 2599 if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) { 2600 /* CP5022x UDC */ 2601 c1 &= 0xFFF; 2602 c2 = 0x7F + c1 / 94; 2603 c1 = 0x21 + c1 % 94; 2604 } else { 2605 if (encode_fallback) (*encode_fallback)(c1); 2606 return; 2607 } 2608 } 2609 } 2610#endif 2611 if (c2 == 0) { 2612 output_ascii_escape_sequence(ASCII); 2613 (*o_putc)(c1); 2614 } 2615 else if (c2 == EOF) { 2616 output_ascii_escape_sequence(ASCII); 2617 (*o_putc)(EOF); 2618 } 2619 else if (c2 == ISO_8859_1) { 2620 output_ascii_escape_sequence(ISO_8859_1); 2621 (*o_putc)(c1|0x80); 2622 } 2623 else if (c2 == JIS_X_0201_1976_K) { 2624 output_escape_sequence(JIS_X_0201_1976_K); 2625 (*o_putc)(c1); 2626#ifdef X0212_ENABLE 2627 } else if (is_eucg3(c2)){ 2628 output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212); 2629 (*o_putc)(c2 & 0x7f); 2630 (*o_putc)(c1); 2631#endif 2632 } else { 2633 if(ms_ucs_map_f 2634 ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1 2635 : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return; 2636 output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208); 2637 (*o_putc)(c2); 2638 (*o_putc)(c1); 2639 } 2640} 2641 2642static void 2643e_oconv(nkf_char c2, nkf_char c1) 2644{ 2645 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2646 w16e_conv(c1, &c2, &c1); 2647 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2648 c2 = c1 & VALUE_MASK; 2649 if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) { 2650 /* eucJP-ms UDC */ 2651 c1 &= 0xFFF; 2652 c2 = c1 / 94; 2653 c2 += c2 < 10 ? 0x75 : 0x8FEB; 2654 c1 = 0x21 + c1 % 94; 2655 if (is_eucg3(c2)){ 2656 (*o_putc)(0x8f); 2657 (*o_putc)((c2 & 0x7f) | 0x080); 2658 (*o_putc)(c1 | 0x080); 2659 }else{ 2660 (*o_putc)((c2 & 0x7f) | 0x080); 2661 (*o_putc)(c1 | 0x080); 2662 } 2663 return; 2664 } else { 2665 if (encode_fallback) (*encode_fallback)(c1); 2666 return; 2667 } 2668 } 2669 } 2670 2671 if (c2 == EOF) { 2672 (*o_putc)(EOF); 2673 } else if (c2 == 0) { 2674 output_mode = ASCII; 2675 (*o_putc)(c1); 2676 } else if (c2 == JIS_X_0201_1976_K) { 2677 output_mode = EUC_JP; 2678 (*o_putc)(SS2); (*o_putc)(c1|0x80); 2679 } else if (c2 == ISO_8859_1) { 2680 output_mode = ISO_8859_1; 2681 (*o_putc)(c1 | 0x080); 2682#ifdef X0212_ENABLE 2683 } else if (is_eucg3(c2)){ 2684 output_mode = EUC_JP; 2685#ifdef SHIFTJIS_CP932 2686 if (!cp932inv_f){ 2687 nkf_char s2, s1; 2688 if (e2s_conv(c2, c1, &s2, &s1) == 0){ 2689 s2e_conv(s2, s1, &c2, &c1); 2690 } 2691 } 2692#endif 2693 if (c2 == 0) { 2694 output_mode = ASCII; 2695 (*o_putc)(c1); 2696 }else if (is_eucg3(c2)){ 2697 if (x0212_f){ 2698 (*o_putc)(0x8f); 2699 (*o_putc)((c2 & 0x7f) | 0x080); 2700 (*o_putc)(c1 | 0x080); 2701 } 2702 }else{ 2703 (*o_putc)((c2 & 0x7f) | 0x080); 2704 (*o_putc)(c1 | 0x080); 2705 } 2706#endif 2707 } else { 2708 if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) { 2709 set_iconv(FALSE, 0); 2710 return; /* too late to rescue this char */ 2711 } 2712 output_mode = EUC_JP; 2713 (*o_putc)(c2 | 0x080); 2714 (*o_putc)(c1 | 0x080); 2715 } 2716} 2717 2718static void 2719s_oconv(nkf_char c2, nkf_char c1) 2720{ 2721#ifdef NUMCHAR_OPTION 2722 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2723 w16e_conv(c1, &c2, &c1); 2724 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2725 c2 = c1 & VALUE_MASK; 2726 if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) { 2727 /* CP932 UDC */ 2728 c1 &= 0xFFF; 2729 c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB); 2730 c1 = c1 % 188; 2731 c1 += 0x40 + (c1 > 0x3e); 2732 (*o_putc)(c2); 2733 (*o_putc)(c1); 2734 return; 2735 } else { 2736 if(encode_fallback)(*encode_fallback)(c1); 2737 return; 2738 } 2739 } 2740 } 2741#endif 2742 if (c2 == EOF) { 2743 (*o_putc)(EOF); 2744 return; 2745 } else if (c2 == 0) { 2746 output_mode = ASCII; 2747 (*o_putc)(c1); 2748 } else if (c2 == JIS_X_0201_1976_K) { 2749 output_mode = SHIFT_JIS; 2750 (*o_putc)(c1|0x80); 2751 } else if (c2 == ISO_8859_1) { 2752 output_mode = ISO_8859_1; 2753 (*o_putc)(c1 | 0x080); 2754#ifdef X0212_ENABLE 2755 } else if (is_eucg3(c2)){ 2756 output_mode = SHIFT_JIS; 2757 if (e2s_conv(c2, c1, &c2, &c1) == 0){ 2758 (*o_putc)(c2); 2759 (*o_putc)(c1); 2760 } 2761#endif 2762 } else { 2763 if (!nkf_isprint(c1) || !nkf_isprint(c2)) { 2764 set_iconv(FALSE, 0); 2765 return; /* too late to rescue this char */ 2766 } 2767 output_mode = SHIFT_JIS; 2768 e2s_conv(c2, c1, &c2, &c1); 2769 2770#ifdef SHIFTJIS_CP932 2771 if (cp932inv_f 2772 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){ 2773 nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40]; 2774 if (c){ 2775 c2 = c >> 8; 2776 c1 = c & 0xff; 2777 } 2778 } 2779#endif /* SHIFTJIS_CP932 */ 2780 2781 (*o_putc)(c2); 2782 if (prefix_table[(unsigned char)c1]){ 2783 (*o_putc)(prefix_table[(unsigned char)c1]); 2784 } 2785 (*o_putc)(c1); 2786 } 2787} 2788 2789#ifdef UTF8_OUTPUT_ENABLE 2790#define OUTPUT_UTF8(val) do { \ 2791 nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \ 2792 (*o_putc)(c1); \ 2793 if (c2) (*o_putc)(c2); \ 2794 if (c3) (*o_putc)(c3); \ 2795 if (c4) (*o_putc)(c4); \ 2796 } while (0) 2797 2798static void 2799w_oconv(nkf_char c2, nkf_char c1) 2800{ 2801 nkf_char c3, c4; 2802 nkf_char val, val2; 2803 2804 if (output_bom_f) { 2805 output_bom_f = FALSE; 2806 (*o_putc)('\357'); 2807 (*o_putc)('\273'); 2808 (*o_putc)('\277'); 2809 } 2810 2811 if (c2 == EOF) { 2812 (*o_putc)(EOF); 2813 return; 2814 } 2815 2816 if (c2 == 0 && nkf_char_unicode_p(c1)){ 2817 val = c1 & VALUE_MASK; 2818 OUTPUT_UTF8(val); 2819 return; 2820 } 2821 2822 if (c2 == 0) { 2823 (*o_putc)(c1); 2824 } else { 2825 val = e2w_conv(c2, c1); 2826 if (val){ 2827 val2 = e2w_combining(val, c2, c1); 2828 if (val2) 2829 OUTPUT_UTF8(val2); 2830 OUTPUT_UTF8(val); 2831 } 2832 } 2833} 2834 2835#define OUTPUT_UTF16_BYTES(c1, c2) do { \ 2836 if (output_endian == ENDIAN_LITTLE){ \ 2837 (*o_putc)(c1); \ 2838 (*o_putc)(c2); \ 2839 }else{ \ 2840 (*o_putc)(c2); \ 2841 (*o_putc)(c1); \ 2842 } \ 2843 } while (0) 2844 2845#define OUTPUT_UTF16(val) do { \ 2846 if (nkf_char_unicode_bmp_p(val)) { \ 2847 c2 = (val >> 8) & 0xff; \ 2848 c1 = val & 0xff; \ 2849 OUTPUT_UTF16_BYTES(c1, c2); \ 2850 } else { \ 2851 val &= VALUE_MASK; \ 2852 if (val <= UNICODE_MAX) { \ 2853 c2 = (val >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */ \ 2854 c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \ 2855 OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \ 2856 OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \ 2857 } \ 2858 } \ 2859 } while (0) 2860 2861static void 2862w_oconv16(nkf_char c2, nkf_char c1) 2863{ 2864 if (output_bom_f) { 2865 output_bom_f = FALSE; 2866 OUTPUT_UTF16_BYTES(0xFF, 0xFE); 2867 } 2868 2869 if (c2 == EOF) { 2870 (*o_putc)(EOF); 2871 return; 2872 } 2873 2874 if (c2 == 0 && nkf_char_unicode_p(c1)) { 2875 OUTPUT_UTF16(c1); 2876 } else if (c2) { 2877 nkf_char val, val2; 2878 val = e2w_conv(c2, c1); 2879 if (!val) return; 2880 val2 = e2w_combining(val, c2, c1); 2881 if (val2) 2882 OUTPUT_UTF16(val2); 2883 OUTPUT_UTF16(val); 2884 } else { 2885 OUTPUT_UTF16_BYTES(c1, c2); 2886 } 2887} 2888 2889#define OUTPUT_UTF32(c) do { \ 2890 if (output_endian == ENDIAN_LITTLE){ \ 2891 (*o_putc)( (c) & 0xFF); \ 2892 (*o_putc)(((c) >> 8) & 0xFF); \ 2893 (*o_putc)(((c) >> 16) & 0xFF); \ 2894 (*o_putc)(0); \ 2895 }else{ \ 2896 (*o_putc)(0); \ 2897 (*o_putc)(((c) >> 16) & 0xFF); \ 2898 (*o_putc)(((c) >> 8) & 0xFF); \ 2899 (*o_putc)( (c) & 0xFF); \ 2900 } \ 2901 } while (0) 2902 2903static void 2904w_oconv32(nkf_char c2, nkf_char c1) 2905{ 2906 if (output_bom_f) { 2907 output_bom_f = FALSE; 2908 if (output_endian == ENDIAN_LITTLE){ 2909 (*o_putc)(0xFF); 2910 (*o_putc)(0xFE); 2911 (*o_putc)(0); 2912 (*o_putc)(0); 2913 }else{ 2914 (*o_putc)(0); 2915 (*o_putc)(0); 2916 (*o_putc)(0xFE); 2917 (*o_putc)(0xFF); 2918 } 2919 } 2920 2921 if (c2 == EOF) { 2922 (*o_putc)(EOF); 2923 return; 2924 } 2925 2926 if (c2 == ISO_8859_1) { 2927 c1 |= 0x80; 2928 } else if (c2 == 0 && nkf_char_unicode_p(c1)) { 2929 c1 &= VALUE_MASK; 2930 } else if (c2) { 2931 nkf_char val, val2; 2932 val = e2w_conv(c2, c1); 2933 if (!val) return; 2934 val2 = e2w_combining(val, c2, c1); 2935 if (val2) 2936 OUTPUT_UTF32(val2); 2937 c1 = val; 2938 } 2939 OUTPUT_UTF32(c1); 2940} 2941#endif 2942 2943#define SCORE_L2 (1) /* Kanji Level 2 */ 2944#define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */ 2945#define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */ 2946#define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */ 2947#define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */ 2948#define SCORE_X0213 (SCORE_X0212 << 1) /* JIS X 0213 */ 2949#define SCORE_NO_EXIST (SCORE_X0213 << 1) /* Undefined Characters */ 2950#define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */ 2951#define SCORE_ERROR (SCORE_iMIME << 1) /* Error */ 2952 2953#define SCORE_INIT (SCORE_iMIME) 2954 2955static const nkf_char score_table_A0[] = { 2956 0, 0, 0, 0, 2957 0, 0, 0, 0, 2958 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, 2959 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_X0213, 2960}; 2961 2962static const nkf_char score_table_F0[] = { 2963 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2, 2964 SCORE_L2, SCORE_DEPEND, SCORE_X0213, SCORE_X0213, 2965 SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932, 2966 SCORE_CP932, SCORE_X0213, SCORE_X0213, SCORE_ERROR, 2967}; 2968 2969static const nkf_char score_table_8FA0[] = { 2970 0, SCORE_X0213, SCORE_X0212, SCORE_X0213, 2971 SCORE_X0213, SCORE_X0213, SCORE_X0212, SCORE_X0212, 2972 SCORE_X0213, SCORE_X0212, SCORE_X0212, SCORE_X0212, 2973 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213, 2974}; 2975 2976static const nkf_char score_table_8FE0[] = { 2977 SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212, 2978 SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212, 2979 SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212, 2980 SCORE_X0212, SCORE_X0212, SCORE_X0213, SCORE_X0213, 2981}; 2982 2983static const nkf_char score_table_8FF0[] = { 2984 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0212, 2985 SCORE_X0212, SCORE_X0213, SCORE_X0213, SCORE_X0213, 2986 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213, 2987 SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213, 2988}; 2989 2990static void 2991set_code_score(struct input_code *ptr, nkf_char score) 2992{ 2993 if (ptr){ 2994 ptr->score |= score; 2995 } 2996} 2997 2998static void 2999clr_code_score(struct input_code *ptr, nkf_char score) 3000{ 3001 if (ptr){ 3002 ptr->score &= ~score; 3003 } 3004} 3005 3006static void 3007code_score(struct input_code *ptr) 3008{ 3009 nkf_char c2 = ptr->buf[0]; 3010 nkf_char c1 = ptr->buf[1]; 3011 if (c2 < 0){ 3012 set_code_score(ptr, SCORE_ERROR); 3013 }else if (c2 == SS2){ 3014 set_code_score(ptr, SCORE_KANA); 3015 }else if (c2 == 0x8f){ 3016 if ((c1 & 0x70) == 0x20){ 3017 set_code_score(ptr, score_table_8FA0[c1 & 0x0f]); 3018 }else if ((c1 & 0x70) == 0x60){ 3019 set_code_score(ptr, score_table_8FE0[c1 & 0x0f]); 3020 }else if ((c1 & 0x70) == 0x70){ 3021 set_code_score(ptr, score_table_8FF0[c1 & 0x0f]); 3022 }else{ 3023 set_code_score(ptr, SCORE_X0212); 3024 } 3025#ifdef UTF8_OUTPUT_ENABLE 3026 }else if (!e2w_conv(c2, c1)){ 3027 set_code_score(ptr, SCORE_NO_EXIST); 3028#endif 3029 }else if ((c2 & 0x70) == 0x20){ 3030 set_code_score(ptr, score_table_A0[c2 & 0x0f]); 3031 }else if ((c2 & 0x70) == 0x70){ 3032 set_code_score(ptr, score_table_F0[c2 & 0x0f]); 3033 }else if ((c2 & 0x70) >= 0x50){ 3034 set_code_score(ptr, SCORE_L2); 3035 } 3036} 3037 3038static void 3039status_disable(struct input_code *ptr) 3040{ 3041 ptr->stat = -1; 3042 ptr->buf[0] = -1; 3043 code_score(ptr); 3044 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0); 3045} 3046 3047static void 3048status_push_ch(struct input_code *ptr, nkf_char c) 3049{ 3050 ptr->buf[ptr->index++] = c; 3051} 3052 3053static void 3054status_clear(struct input_code *ptr) 3055{ 3056 ptr->stat = 0; 3057 ptr->index = 0; 3058} 3059 3060static void 3061status_reset(struct input_code *ptr) 3062{ 3063 status_clear(ptr); 3064 ptr->score = SCORE_INIT; 3065} 3066 3067static void 3068status_reinit(struct input_code *ptr) 3069{ 3070 status_reset(ptr); 3071 ptr->_file_stat = 0; 3072} 3073 3074static void 3075status_check(struct input_code *ptr, nkf_char c) 3076{ 3077 if (c <= DEL && estab_f){ 3078 status_reset(ptr); 3079 } 3080} 3081 3082static void 3083s_status(struct input_code *ptr, nkf_char c) 3084{ 3085 switch(ptr->stat){ 3086 case -1: 3087 status_check(ptr, c); 3088 break; 3089 case 0: 3090 if (c <= DEL){ 3091 break; 3092 }else if (nkf_char_unicode_p(c)){ 3093 break; 3094 }else if (0xa1 <= c && c <= 0xdf){ 3095 status_push_ch(ptr, SS2); 3096 status_push_ch(ptr, c); 3097 code_score(ptr); 3098 status_clear(ptr); 3099 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){ 3100 ptr->stat = 1; 3101 status_push_ch(ptr, c); 3102 }else if (0xed <= c && c <= 0xee){ 3103 ptr->stat = 3; 3104 status_push_ch(ptr, c); 3105#ifdef SHIFTJIS_CP932 3106 }else if (is_ibmext_in_sjis(c)){ 3107 ptr->stat = 2; 3108 status_push_ch(ptr, c); 3109#endif /* SHIFTJIS_CP932 */ 3110#ifdef X0212_ENABLE 3111 }else if (0xf0 <= c && c <= 0xfc){ 3112 ptr->stat = 1; 3113 status_push_ch(ptr, c); 3114#endif /* X0212_ENABLE */ 3115 }else{ 3116 status_disable(ptr); 3117 } 3118 break; 3119 case 1: 3120 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ 3121 status_push_ch(ptr, c); 3122 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); 3123 code_score(ptr); 3124 status_clear(ptr); 3125 }else{ 3126 status_disable(ptr); 3127 } 3128 break; 3129 case 2: 3130#ifdef SHIFTJIS_CP932 3131 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) { 3132 status_push_ch(ptr, c); 3133 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) { 3134 set_code_score(ptr, SCORE_CP932); 3135 status_clear(ptr); 3136 break; 3137 } 3138 } 3139#endif /* SHIFTJIS_CP932 */ 3140 status_disable(ptr); 3141 break; 3142 case 3: 3143 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ 3144 status_push_ch(ptr, c); 3145 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); 3146 set_code_score(ptr, SCORE_CP932); 3147 status_clear(ptr); 3148 }else{ 3149 status_disable(ptr); 3150 } 3151 break; 3152 } 3153} 3154 3155static void 3156e_status(struct input_code *ptr, nkf_char c) 3157{ 3158 switch (ptr->stat){ 3159 case -1: 3160 status_check(ptr, c); 3161 break; 3162 case 0: 3163 if (c <= DEL){ 3164 break; 3165 }else if (nkf_char_unicode_p(c)){ 3166 break; 3167 }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){ 3168 ptr->stat = 1; 3169 status_push_ch(ptr, c); 3170#ifdef X0212_ENABLE 3171 }else if (0x8f == c){ 3172 ptr->stat = 2; 3173 status_push_ch(ptr, c); 3174#endif /* X0212_ENABLE */ 3175 }else{ 3176 status_disable(ptr); 3177 } 3178 break; 3179 case 1: 3180 if (0xa1 <= c && c <= 0xfe){ 3181 status_push_ch(ptr, c); 3182 code_score(ptr); 3183 status_clear(ptr); 3184 }else{ 3185 status_disable(ptr); 3186 } 3187 break; 3188#ifdef X0212_ENABLE 3189 case 2: 3190 if (0xa1 <= c && c <= 0xfe){ 3191 ptr->stat = 1; 3192 status_push_ch(ptr, c); 3193 }else{ 3194 status_disable(ptr); 3195 } 3196#endif /* X0212_ENABLE */ 3197 } 3198} 3199 3200#ifdef UTF8_INPUT_ENABLE 3201static void 3202w_status(struct input_code *ptr, nkf_char c) 3203{ 3204 switch (ptr->stat){ 3205 case -1: 3206 status_check(ptr, c); 3207 break; 3208 case 0: 3209 if (c <= DEL){ 3210 break; 3211 }else if (nkf_char_unicode_p(c)){ 3212 break; 3213 }else if (0xc0 <= c && c <= 0xdf){ 3214 ptr->stat = 1; 3215 status_push_ch(ptr, c); 3216 }else if (0xe0 <= c && c <= 0xef){ 3217 ptr->stat = 2; 3218 status_push_ch(ptr, c); 3219 }else if (0xf0 <= c && c <= 0xf4){ 3220 ptr->stat = 3; 3221 status_push_ch(ptr, c); 3222 }else{ 3223 status_disable(ptr); 3224 } 3225 break; 3226 case 1: 3227 case 2: 3228 if (0x80 <= c && c <= 0xbf){ 3229 status_push_ch(ptr, c); 3230 if (ptr->index > ptr->stat){ 3231 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb 3232 && ptr->buf[2] == 0xbf); 3233 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2], 3234 &ptr->buf[0], &ptr->buf[1]); 3235 if (!bom){ 3236 code_score(ptr); 3237 } 3238 status_clear(ptr); 3239 } 3240 }else{ 3241 status_disable(ptr); 3242 } 3243 break; 3244 case 3: 3245 if (0x80 <= c && c <= 0xbf){ 3246 if (ptr->index < ptr->stat){ 3247 status_push_ch(ptr, c); 3248 } else { 3249 status_clear(ptr); 3250 } 3251 }else{ 3252 status_disable(ptr); 3253 } 3254 break; 3255 } 3256} 3257#endif 3258 3259static void 3260code_status(nkf_char c) 3261{ 3262 int action_flag = 1; 3263 struct input_code *result = 0; 3264 struct input_code *p = input_code_list; 3265 while (p->name){ 3266 if (!p->status_func) { 3267 ++p; 3268 continue; 3269 } 3270 if (!p->status_func) 3271 continue; 3272 (p->status_func)(p, c); 3273 if (p->stat > 0){ 3274 action_flag = 0; 3275 }else if(p->stat == 0){ 3276 if (result){ 3277 action_flag = 0; 3278 }else{ 3279 result = p; 3280 } 3281 } 3282 ++p; 3283 } 3284 3285 if (action_flag){ 3286 if (result && !estab_f){ 3287 set_iconv(TRUE, result->iconv_func); 3288 }else if (c <= DEL){ 3289 struct input_code *ptr = input_code_list; 3290 while (ptr->name){ 3291 status_reset(ptr); 3292 ++ptr; 3293 } 3294 } 3295 } 3296} 3297 3298typedef struct { 3299 nkf_buf_t *std_gc_buf; 3300 nkf_char broken_state; 3301 nkf_buf_t *broken_buf; 3302 nkf_char mimeout_state; 3303 nkf_buf_t *nfc_buf; 3304} nkf_state_t; 3305 3306static nkf_state_t *nkf_state = NULL; 3307 3308#define STD_GC_BUFSIZE (256) 3309 3310static void 3311nkf_state_init(void) 3312{ 3313 if (nkf_state) { 3314 nkf_buf_clear(nkf_state->std_gc_buf); 3315 nkf_buf_clear(nkf_state->broken_buf); 3316 nkf_buf_clear(nkf_state->nfc_buf); 3317 } 3318 else { 3319 nkf_state = nkf_xmalloc(sizeof(nkf_state_t)); 3320 nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE); 3321 nkf_state->broken_buf = nkf_buf_new(3); 3322 nkf_state->nfc_buf = nkf_buf_new(9); 3323 } 3324 nkf_state->broken_state = 0; 3325 nkf_state->mimeout_state = 0; 3326} 3327 3328#ifndef WIN32DLL 3329static nkf_char 3330std_getc(FILE *f) 3331{ 3332 if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){ 3333 return nkf_buf_pop(nkf_state->std_gc_buf); 3334 } 3335 return getc(f); 3336} 3337#endif /*WIN32DLL*/ 3338 3339static nkf_char 3340std_ungetc(nkf_char c, ARG_UNUSED FILE *f) 3341{ 3342 nkf_buf_push(nkf_state->std_gc_buf, c); 3343 return c; 3344} 3345 3346#ifndef WIN32DLL 3347static void 3348std_putc(nkf_char c) 3349{ 3350 if(c!=EOF) 3351 putchar(c); 3352} 3353#endif /*WIN32DLL*/ 3354 3355static nkf_char hold_buf[HOLD_SIZE*2]; 3356static int hold_count = 0; 3357static nkf_char 3358push_hold_buf(nkf_char c2) 3359{ 3360 if (hold_count >= HOLD_SIZE*2) 3361 return (EOF); 3362 hold_buf[hold_count++] = c2; 3363 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count); 3364} 3365 3366static int 3367h_conv(FILE *f, nkf_char c1, nkf_char c2) 3368{ 3369 int ret; 3370 int hold_index; 3371 int fromhold_count; 3372 nkf_char c3, c4; 3373 3374 /** it must NOT be in the kanji shifte sequence */ 3375 /** it must NOT be written in JIS7 */ 3376 /** and it must be after 2 byte 8bit code */ 3377 3378 hold_count = 0; 3379 push_hold_buf(c1); 3380 push_hold_buf(c2); 3381 3382 while ((c2 = (*i_getc)(f)) != EOF) { 3383 if (c2 == ESC){ 3384 (*i_ungetc)(c2,f); 3385 break; 3386 } 3387 code_status(c2); 3388 if (push_hold_buf(c2) == EOF || estab_f) { 3389 break; 3390 } 3391 } 3392 3393 if (!estab_f) { 3394 struct input_code *p = input_code_list; 3395 struct input_code *result = p; 3396 if (c2 == EOF) { 3397 code_status(c2); 3398 } 3399 while (p->name) { 3400 if (p->status_func && p->score < result->score) { 3401 result = p; 3402 } 3403 p++; 3404 } 3405 set_iconv(TRUE, result->iconv_func); 3406 } 3407 3408 3409 /** now, 3410 ** 1) EOF is detected, or 3411 ** 2) Code is established, or 3412 ** 3) Buffer is FULL (but last word is pushed) 3413 ** 3414 ** in 1) and 3) cases, we continue to use 3415 ** Kanji codes by oconv and leave estab_f unchanged. 3416 **/ 3417 3418 ret = c2; 3419 hold_index = 0; 3420 while (hold_index < hold_count){ 3421 c1 = hold_buf[hold_index++]; 3422 if (nkf_char_unicode_p(c1)) { 3423 (*oconv)(0, c1); 3424 continue; 3425 } 3426 else if (c1 <= DEL){ 3427 (*iconv)(0, c1, 0); 3428 continue; 3429 }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){ 3430 (*iconv)(JIS_X_0201_1976_K, c1, 0); 3431 continue; 3432 } 3433 fromhold_count = 1; 3434 if (hold_index < hold_count){ 3435 c2 = hold_buf[hold_index++]; 3436 fromhold_count++; 3437 }else{ 3438 c2 = (*i_getc)(f); 3439 if (c2 == EOF){ 3440 c4 = EOF; 3441 break; 3442 } 3443 code_status(c2); 3444 } 3445 c3 = 0; 3446 switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */ 3447 case -2: 3448 /* 4 bytes UTF-8 */ 3449 if (hold_index < hold_count){ 3450 c3 = hold_buf[hold_index++]; 3451 } else if ((c3 = (*i_getc)(f)) == EOF) { 3452 ret = EOF; 3453 break; 3454 } 3455 code_status(c3); 3456 if (hold_index < hold_count){ 3457 c4 = hold_buf[hold_index++]; 3458 } else if ((c4 = (*i_getc)(f)) == EOF) { 3459 c3 = ret = EOF; 3460 break; 3461 } 3462 code_status(c4); 3463 (*iconv)(c1, c2, (c3<<8)|c4); 3464 break; 3465 case -3: 3466 /* 4 bytes UTF-8 (check combining character) */ 3467 if (hold_index < hold_count){ 3468 c3 = hold_buf[hold_index++]; 3469 fromhold_count++; 3470 } else if ((c3 = (*i_getc)(f)) == EOF) { 3471 w_iconv_nocombine(c1, c2, 0); 3472 break; 3473 } 3474 if (hold_index < hold_count){ 3475 c4 = hold_buf[hold_index++]; 3476 fromhold_count++; 3477 } else if ((c4 = (*i_getc)(f)) == EOF) { 3478 w_iconv_nocombine(c1, c2, 0); 3479 if (fromhold_count <= 2) 3480 (*i_ungetc)(c3,f); 3481 else 3482 hold_index--; 3483 continue; 3484 } 3485 if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) { 3486 w_iconv_nocombine(c1, c2, 0); 3487 if (fromhold_count <= 2) { 3488 (*i_ungetc)(c4,f); 3489 (*i_ungetc)(c3,f); 3490 } else if (fromhold_count == 3) { 3491 (*i_ungetc)(c4,f); 3492 hold_index--; 3493 } else { 3494 hold_index -= 2; 3495 } 3496 } 3497 break; 3498 case -1: 3499 /* 3 bytes EUC or UTF-8 */ 3500 if (hold_index < hold_count){ 3501 c3 = hold_buf[hold_index++]; 3502 fromhold_count++; 3503 } else if ((c3 = (*i_getc)(f)) == EOF) { 3504 ret = EOF; 3505 break; 3506 } else { 3507 code_status(c3); 3508 } 3509 if ((*iconv)(c1, c2, c3) == -3) { 3510 /* 6 bytes UTF-8 (check combining character) */ 3511 nkf_char c5, c6; 3512 if (hold_index < hold_count){ 3513 c4 = hold_buf[hold_index++]; 3514 fromhold_count++; 3515 } else if ((c4 = (*i_getc)(f)) == EOF) { 3516 w_iconv_nocombine(c1, c2, c3); 3517 continue; 3518 } 3519 if (hold_index < hold_count){ 3520 c5 = hold_buf[hold_index++]; 3521 fromhold_count++; 3522 } else if ((c5 = (*i_getc)(f)) == EOF) { 3523 w_iconv_nocombine(c1, c2, c3); 3524 if (fromhold_count == 4) 3525 hold_index--; 3526 else 3527 (*i_ungetc)(c4,f); 3528 continue; 3529 } 3530 if (hold_index < hold_count){ 3531 c6 = hold_buf[hold_index++]; 3532 fromhold_count++; 3533 } else if ((c6 = (*i_getc)(f)) == EOF) { 3534 w_iconv_nocombine(c1, c2, c3); 3535 if (fromhold_count == 5) { 3536 hold_index -= 2; 3537 } else if (fromhold_count == 4) { 3538 hold_index--; 3539 (*i_ungetc)(c5,f); 3540 } else { 3541 (*i_ungetc)(c5,f); 3542 (*i_ungetc)(c4,f); 3543 } 3544 continue; 3545 } 3546 if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) { 3547 w_iconv_nocombine(c1, c2, c3); 3548 if (fromhold_count == 6) { 3549 hold_index -= 3; 3550 } else if (fromhold_count == 5) { 3551 hold_index -= 2; 3552 (*i_ungetc)(c6,f); 3553 } else if (fromhold_count == 4) { 3554 hold_index--; 3555 (*i_ungetc)(c6,f); 3556 (*i_ungetc)(c5,f); 3557 } else { 3558 (*i_ungetc)(c6,f); 3559 (*i_ungetc)(c5,f); 3560 (*i_ungetc)(c4,f); 3561 } 3562 } 3563 } 3564 break; 3565 } 3566 if (c3 == EOF) break; 3567 } 3568 return ret; 3569} 3570 3571/* 3572 * Check and Ignore BOM 3573 */ 3574static void 3575check_bom(FILE *f) 3576{ 3577 int c2; 3578 switch(c2 = (*i_getc)(f)){ 3579 case 0x00: 3580 if((c2 = (*i_getc)(f)) == 0x00){ 3581 if((c2 = (*i_getc)(f)) == 0xFE){ 3582 if((c2 = (*i_getc)(f)) == 0xFF){ 3583 if(!input_encoding){ 3584 set_iconv(TRUE, w_iconv32); 3585 } 3586 if (iconv == w_iconv32) { 3587 input_bom_f = TRUE; 3588 input_endian = ENDIAN_BIG; 3589 return; 3590 } 3591 (*i_ungetc)(0xFF,f); 3592 }else (*i_ungetc)(c2,f); 3593 (*i_ungetc)(0xFE,f); 3594 }else if(c2 == 0xFF){ 3595 if((c2 = (*i_getc)(f)) == 0xFE){ 3596 if(!input_encoding){ 3597 set_iconv(TRUE, w_iconv32); 3598 } 3599 if (iconv == w_iconv32) { 3600 input_endian = ENDIAN_2143; 3601 return; 3602 } 3603 (*i_ungetc)(0xFF,f); 3604 }else (*i_ungetc)(c2,f); 3605 (*i_ungetc)(0xFF,f); 3606 }else (*i_ungetc)(c2,f); 3607 (*i_ungetc)(0x00,f); 3608 }else (*i_ungetc)(c2,f); 3609 (*i_ungetc)(0x00,f); 3610 break; 3611 case 0xEF: 3612 if((c2 = (*i_getc)(f)) == 0xBB){ 3613 if((c2 = (*i_getc)(f)) == 0xBF){ 3614 if(!input_encoding){ 3615 set_iconv(TRUE, w_iconv); 3616 } 3617 if (iconv == w_iconv) { 3618 input_bom_f = TRUE; 3619 return; 3620 } 3621 (*i_ungetc)(0xBF,f); 3622 }else (*i_ungetc)(c2,f); 3623 (*i_ungetc)(0xBB,f); 3624 }else (*i_ungetc)(c2,f); 3625 (*i_ungetc)(0xEF,f); 3626 break; 3627 case 0xFE: 3628 if((c2 = (*i_getc)(f)) == 0xFF){ 3629 if((c2 = (*i_getc)(f)) == 0x00){ 3630 if((c2 = (*i_getc)(f)) == 0x00){ 3631 if(!input_encoding){ 3632 set_iconv(TRUE, w_iconv32); 3633 } 3634 if (iconv == w_iconv32) { 3635 input_endian = ENDIAN_3412; 3636 return; 3637 } 3638 (*i_ungetc)(0x00,f); 3639 }else (*i_ungetc)(c2,f); 3640 (*i_ungetc)(0x00,f); 3641 }else (*i_ungetc)(c2,f); 3642 if(!input_encoding){ 3643 set_iconv(TRUE, w_iconv16); 3644 } 3645 if (iconv == w_iconv16) { 3646 input_endian = ENDIAN_BIG; 3647 input_bom_f = TRUE; 3648 return; 3649 } 3650 (*i_ungetc)(0xFF,f); 3651 }else (*i_ungetc)(c2,f); 3652 (*i_ungetc)(0xFE,f); 3653 break; 3654 case 0xFF: 3655 if((c2 = (*i_getc)(f)) == 0xFE){ 3656 if((c2 = (*i_getc)(f)) == 0x00){ 3657 if((c2 = (*i_getc)(f)) == 0x00){ 3658 if(!input_encoding){ 3659 set_iconv(TRUE, w_iconv32); 3660 } 3661 if (iconv == w_iconv32) { 3662 input_endian = ENDIAN_LITTLE; 3663 input_bom_f = TRUE; 3664 return; 3665 } 3666 (*i_ungetc)(0x00,f); 3667 }else (*i_ungetc)(c2,f); 3668 (*i_ungetc)(0x00,f); 3669 }else (*i_ungetc)(c2,f); 3670 if(!input_encoding){ 3671 set_iconv(TRUE, w_iconv16); 3672 } 3673 if (iconv == w_iconv16) { 3674 input_endian = ENDIAN_LITTLE; 3675 input_bom_f = TRUE; 3676 return; 3677 } 3678 (*i_ungetc)(0xFE,f); 3679 }else (*i_ungetc)(c2,f); 3680 (*i_ungetc)(0xFF,f); 3681 break; 3682 default: 3683 (*i_ungetc)(c2,f); 3684 break; 3685 } 3686} 3687 3688static nkf_char 3689broken_getc(FILE *f) 3690{ 3691 nkf_char c, c1; 3692 3693 if (!nkf_buf_empty_p(nkf_state->broken_buf)) { 3694 return nkf_buf_pop(nkf_state->broken_buf); 3695 } 3696 c = (*i_bgetc)(f); 3697 if (c=='$' && nkf_state->broken_state != ESC 3698 && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) { 3699 c1= (*i_bgetc)(f); 3700 nkf_state->broken_state = 0; 3701 if (c1=='@'|| c1=='B') { 3702 nkf_buf_push(nkf_state->broken_buf, c1); 3703 nkf_buf_push(nkf_state->broken_buf, c); 3704 return ESC; 3705 } else { 3706 (*i_bungetc)(c1,f); 3707 return c; 3708 } 3709 } else if (c=='(' && nkf_state->broken_state != ESC 3710 && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) { 3711 c1= (*i_bgetc)(f); 3712 nkf_state->broken_state = 0; 3713 if (c1=='J'|| c1=='B') { 3714 nkf_buf_push(nkf_state->broken_buf, c1); 3715 nkf_buf_push(nkf_state->broken_buf, c); 3716 return ESC; 3717 } else { 3718 (*i_bungetc)(c1,f); 3719 return c; 3720 } 3721 } else { 3722 nkf_state->broken_state = c; 3723 return c; 3724 } 3725} 3726 3727static nkf_char 3728broken_ungetc(nkf_char c, ARG_UNUSED FILE *f) 3729{ 3730 if (nkf_buf_length(nkf_state->broken_buf) < 2) 3731 nkf_buf_push(nkf_state->broken_buf, c); 3732 return c; 3733} 3734 3735static void 3736eol_conv(nkf_char c2, nkf_char c1) 3737{ 3738 if (guess_f && input_eol != EOF) { 3739 if (c2 == 0 && c1 == LF) { 3740 if (!input_eol) input_eol = prev_cr ? CRLF : LF; 3741 else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF; 3742 } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF; 3743 else if (!prev_cr); 3744 else if (!input_eol) input_eol = CR; 3745 else if (input_eol != CR) input_eol = EOF; 3746 } 3747 if (prev_cr || (c2 == 0 && c1 == LF)) { 3748 prev_cr = 0; 3749 if (eolmode_f != LF) (*o_eol_conv)(0, CR); 3750 if (eolmode_f != CR) (*o_eol_conv)(0, LF); 3751 } 3752 if (c2 == 0 && c1 == CR) prev_cr = CR; 3753 else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1); 3754} 3755 3756static void 3757put_newline(void (*func)(nkf_char)) 3758{ 3759 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) { 3760 case CRLF: 3761 (*func)(0x0D); 3762 (*func)(0x0A); 3763 break; 3764 case CR: 3765 (*func)(0x0D); 3766 break; 3767 case LF: 3768 (*func)(0x0A); 3769 break; 3770 } 3771} 3772 3773static void 3774oconv_newline(void (*func)(nkf_char, nkf_char)) 3775{ 3776 switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) { 3777 case CRLF: 3778 (*func)(0, 0x0D); 3779 (*func)(0, 0x0A); 3780 break; 3781 case CR: 3782 (*func)(0, 0x0D); 3783 break; 3784 case LF: 3785 (*func)(0, 0x0A); 3786 break; 3787 } 3788} 3789 3790/* 3791 Return value of fold_conv() 3792 3793 LF add newline and output char 3794 CR add newline and output nothing 3795 SP space 3796 0 skip 3797 1 (or else) normal output 3798 3799 fold state in prev (previous character) 3800 3801 >0x80 Japanese (X0208/X0201) 3802 <0x80 ASCII 3803 LF new line 3804 SP space 3805 3806 This fold algorthm does not preserve heading space in a line. 3807 This is the main difference from fmt. 3808 */ 3809 3810#define char_size(c2,c1) (c2?2:1) 3811 3812static void 3813fold_conv(nkf_char c2, nkf_char c1) 3814{ 3815 nkf_char prev0; 3816 nkf_char fold_state; 3817 3818 if (c1== CR && !fold_preserve_f) { 3819 fold_state=0; /* ignore cr */ 3820 }else if (c1== LF&&f_prev==CR && fold_preserve_f) { 3821 f_prev = LF; 3822 fold_state=0; /* ignore cr */ 3823 } else if (c1== BS) { 3824 if (f_line>0) f_line--; 3825 fold_state = 1; 3826 } else if (c2==EOF && f_line != 0) { /* close open last line */ 3827 fold_state = LF; 3828 } else if ((c1==LF && !fold_preserve_f) 3829 || ((c1==CR||(c1==LF&&f_prev!=CR)) 3830 && fold_preserve_f)) { 3831 /* new line */ 3832 if (fold_preserve_f) { 3833 f_prev = c1; 3834 f_line = 0; 3835 fold_state = CR; 3836 } else if ((f_prev == c1 && !fold_preserve_f) 3837 || (f_prev == LF && fold_preserve_f) 3838 ) { /* duplicate newline */ 3839 if (f_line) { 3840 f_line = 0; 3841 fold_state = LF; /* output two newline */ 3842 } else { 3843 f_line = 0; 3844 fold_state = 1; 3845 } 3846 } else { 3847 if (f_prev&0x80) { /* Japanese? */ 3848 f_prev = c1; 3849 fold_state = 0; /* ignore given single newline */ 3850 } else if (f_prev==SP) { 3851 fold_state = 0; 3852 } else { 3853 f_prev = c1; 3854 if (++f_line<=fold_len) 3855 fold_state = SP; 3856 else { 3857 f_line = 0; 3858 fold_state = CR; /* fold and output nothing */ 3859 } 3860 } 3861 } 3862 } else if (c1=='\f') { 3863 f_prev = LF; 3864 f_line = 0; 3865 fold_state = LF; /* output newline and clear */ 3866 } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) { 3867 /* X0208 kankaku or ascii space */ 3868 if (f_prev == SP) { 3869 fold_state = 0; /* remove duplicate spaces */ 3870 } else { 3871 f_prev = SP; 3872 if (++f_line<=fold_len) 3873 fold_state = SP; /* output ASCII space only */ 3874 else { 3875 f_prev = SP; f_line = 0; 3876 fold_state = CR; /* fold and output nothing */ 3877 } 3878 } 3879 } else { 3880 prev0 = f_prev; /* we still need this one... , but almost done */ 3881 f_prev = c1; 3882 if (c2 || c2 == JIS_X_0201_1976_K) 3883 f_prev |= 0x80; /* this is Japanese */ 3884 f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1); 3885 if (f_line<=fold_len) { /* normal case */ 3886 fold_state = 1; 3887 } else { 3888 if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */ 3889 f_line = char_size(c2,c1); 3890 fold_state = LF; /* We can't wait, do fold now */ 3891 } else if (c2 == JIS_X_0201_1976_K) { 3892 /* simple kinsoku rules return 1 means no folding */ 3893 if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/ 3894 else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/ 3895 else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/ 3896 else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/ 3897 else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/ 3898 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */ 3899 else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */ 3900 f_line = 1; 3901 fold_state = LF;/* add one new f_line before this character */ 3902 } else { 3903 f_line = 1; 3904 fold_state = LF;/* add one new f_line before this character */ 3905 } 3906 } else if (c2==0) { 3907 /* kinsoku point in ASCII */ 3908 if ( c1==')'|| /* { [ ( */ 3909 c1==']'|| 3910 c1=='}'|| 3911 c1=='.'|| 3912 c1==','|| 3913 c1=='!'|| 3914 c1=='?'|| 3915 c1=='/'|| 3916 c1==':'|| 3917 c1==';') { 3918 fold_state = 1; 3919 /* just after special */ 3920 } else if (!is_alnum(prev0)) { 3921 f_line = char_size(c2,c1); 3922 fold_state = LF; 3923 } else if ((prev0==SP) || /* ignored new f_line */ 3924 (prev0==LF)|| /* ignored new f_line */ 3925 (prev0&0x80)) { /* X0208 - ASCII */ 3926 f_line = char_size(c2,c1); 3927 fold_state = LF;/* add one new f_line before this character */ 3928 } else { 3929 fold_state = 1; /* default no fold in ASCII */ 3930 } 3931 } else { 3932 if (c2=='!') { 3933 if (c1=='"') fold_state = 1; /* $B!"(B */ 3934 else if (c1=='#') fold_state = 1; /* $B!#(B */ 3935 else if (c1=='W') fold_state = 1; /* $B!W(B */ 3936 else if (c1=='K') fold_state = 1; /* $B!K(B */ 3937 else if (c1=='$') fold_state = 1; /* $B!$(B */ 3938 else if (c1=='%') fold_state = 1; /* $B!%(B */ 3939 else if (c1=='\'') fold_state = 1; /* $B!\(B */ 3940 else if (c1=='(') fold_state = 1; /* $B!((B */ 3941 else if (c1==')') fold_state = 1; /* $B!)(B */ 3942 else if (c1=='*') fold_state = 1; /* $B!*(B */ 3943 else if (c1=='+') fold_state = 1; /* $B!+(B */ 3944 else if (c1==',') fold_state = 1; /* $B!,(B */ 3945 /* default no fold in kinsoku */ 3946 else { 3947 fold_state = LF; 3948 f_line = char_size(c2,c1); 3949 /* add one new f_line before this character */ 3950 } 3951 } else { 3952 f_line = char_size(c2,c1); 3953 fold_state = LF; 3954 /* add one new f_line before this character */ 3955 } 3956 } 3957 } 3958 } 3959 /* terminator process */ 3960 switch(fold_state) { 3961 case LF: 3962 oconv_newline(o_fconv); 3963 (*o_fconv)(c2,c1); 3964 break; 3965 case 0: 3966 return; 3967 case CR: 3968 oconv_newline(o_fconv); 3969 break; 3970 case TAB: 3971 case SP: 3972 (*o_fconv)(0,SP); 3973 break; 3974 default: 3975 (*o_fconv)(c2,c1); 3976 } 3977} 3978 3979static nkf_char z_prev2=0,z_prev1=0; 3980 3981static void 3982z_conv(nkf_char c2, nkf_char c1) 3983{ 3984 3985 /* if (c2) c1 &= 0x7f; assertion */ 3986 3987 if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) { 3988 (*o_zconv)(c2,c1); 3989 return; 3990 } 3991 3992 if (x0201_f) { 3993 if (z_prev2 == JIS_X_0201_1976_K) { 3994 if (c2 == JIS_X_0201_1976_K) { 3995 if (c1 == (0xde&0x7f)) { /* $BByE@(B */ 3996 z_prev2 = 0; 3997 (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]); 3998 return; 3999 } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* $BH>ByE@(B */ 4000 z_prev2 = 0; 4001 (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]); 4002 return; 4003 } else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) { /* $BH>ByE@(B */ 4004 z_prev2 = 0; 4005 (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]); 4006 return; 4007 } 4008 } 4009 z_prev2 = 0; 4010 (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]); 4011 } 4012 if (c2 == JIS_X_0201_1976_K) { 4013 if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) { 4014 /* wait for $BByE@(B or $BH>ByE@(B */ 4015 z_prev1 = c1; 4016 z_prev2 = c2; 4017 return; 4018 } else { 4019 (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]); 4020 return; 4021 } 4022 } 4023 } 4024 4025 if (c2 == EOF) { 4026 (*o_zconv)(c2, c1); 4027 return; 4028 } 4029 4030 if (alpha_f&1 && c2 == 0x23) { 4031 /* JISX0208 Alphabet */ 4032 c2 = 0; 4033 } else if (c2 == 0x21) { 4034 /* JISX0208 Kigou */ 4035 if (0x21==c1) { 4036 if (alpha_f&2) { 4037 c2 = 0; 4038 c1 = SP; 4039 } else if (alpha_f&4) { 4040 (*o_zconv)(0, SP); 4041 (*o_zconv)(0, SP); 4042 return; 4043 } 4044 } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) { 4045 c2 = 0; 4046 c1 = fv[c1-0x20]; 4047 } 4048 } 4049 4050 if (alpha_f&8 && c2 == 0) { 4051 /* HTML Entity */ 4052 const char *entity = 0; 4053 switch (c1){ 4054 case '>': entity = ">"; break; 4055 case '<': entity = "<"; break; 4056 case '\"': entity = """; break; 4057 case '&': entity = "&"; break; 4058 } 4059 if (entity){ 4060 while (*entity) (*o_zconv)(0, *entity++); 4061 return; 4062 } 4063 } 4064 4065 if (alpha_f & 16) { 4066 /* JIS X 0208 Katakana to JIS X 0201 Katakana */ 4067 if (c2 == 0x21) { 4068 nkf_char c = 0; 4069 switch (c1) { 4070 case 0x23: 4071 /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */ 4072 c = 0xA1; 4073 break; 4074 case 0x56: 4075 /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */ 4076 c = 0xA2; 4077 break; 4078 case 0x57: 4079 /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */ 4080 c = 0xA3; 4081 break; 4082 case 0x22: 4083 /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */ 4084 c = 0xA4; 4085 break; 4086 case 0x26: 4087 /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */ 4088 c = 0xA5; 4089 break; 4090 case 0x3C: 4091 /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */ 4092 c = 0xB0; 4093 break; 4094 case 0x2B: 4095 /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */ 4096 c = 0xDE; 4097 break; 4098 case 0x2C: 4099 /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */ 4100 c = 0xDF; 4101 break; 4102 } 4103 if (c) { 4104 (*o_zconv)(JIS_X_0201_1976_K, c); 4105 return; 4106 } 4107 } else if (c2 == 0x25) { 4108 /* JISX0208 Katakana */ 4109 static const int fullwidth_to_halfwidth[] = 4110 { 4111 0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00, 4112 0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800, 4113 0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00, 4114 0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000, 4115 0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E, 4116 0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00, 4117 0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F, 4118 0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000, 4119 0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00, 4120 0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00, 4121 0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F, 4122 0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000 4123 }; 4124 if (fullwidth_to_halfwidth[c1-0x20]){ 4125 c2 = fullwidth_to_halfwidth[c1-0x20]; 4126 (*o_zconv)(JIS_X_0201_1976_K, c2>>8); 4127 if (c2 & 0xFF) { 4128 (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF); 4129 } 4130 return; 4131 } 4132 } else if (c2 == 0 && nkf_char_unicode_p(c1) && 4133 ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */ 4134 (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099); 4135 return; 4136 } 4137 } 4138 (*o_zconv)(c2,c1); 4139} 4140 4141 4142#define rot13(c) ( \ 4143 ( c < 'A') ? c: \ 4144 (c <= 'M') ? (c + 13): \ 4145 (c <= 'Z') ? (c - 13): \ 4146 (c < 'a') ? (c): \ 4147 (c <= 'm') ? (c + 13): \ 4148 (c <= 'z') ? (c - 13): \ 4149 (c) \ 4150 ) 4151 4152#define rot47(c) ( \ 4153 ( c < '!') ? c: \ 4154 ( c <= 'O') ? (c + 47) : \ 4155 ( c <= '~') ? (c - 47) : \ 4156 c \ 4157 ) 4158 4159static void 4160rot_conv(nkf_char c2, nkf_char c1) 4161{ 4162 if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) { 4163 c1 = rot13(c1); 4164 } else if (c2) { 4165 c1 = rot47(c1); 4166 c2 = rot47(c2); 4167 } 4168 (*o_rot_conv)(c2,c1); 4169} 4170 4171static void 4172hira_conv(nkf_char c2, nkf_char c1) 4173{ 4174 if (hira_f & 1) { 4175 if (c2 == 0x25) { 4176 if (0x20 < c1 && c1 < 0x74) { 4177 c2 = 0x24; 4178 (*o_hira_conv)(c2,c1); 4179 return; 4180 } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) { 4181 c2 = 0; 4182 c1 = nkf_char_unicode_new(0x3094); 4183 (*o_hira_conv)(c2,c1); 4184 return; 4185 } 4186 } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) { 4187 c1 += 2; 4188 (*o_hira_conv)(c2,c1); 4189 return; 4190 } 4191 } 4192 if (hira_f & 2) { 4193 if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) { 4194 c2 = 0x25; 4195 c1 = 0x74; 4196 } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) { 4197 c2 = 0x25; 4198 } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) { 4199 c1 -= 2; 4200 } 4201 } 4202 (*o_hira_conv)(c2,c1); 4203} 4204 4205 4206static void 4207iso2022jp_check_conv(nkf_char c2, nkf_char c1) 4208{ 4209#define RANGE_NUM_MAX 18 4210 static const nkf_char range[RANGE_NUM_MAX][2] = { 4211 {0x222f, 0x2239,}, 4212 {0x2242, 0x2249,}, 4213 {0x2251, 0x225b,}, 4214 {0x226b, 0x2271,}, 4215 {0x227a, 0x227d,}, 4216 {0x2321, 0x232f,}, 4217 {0x233a, 0x2340,}, 4218 {0x235b, 0x2360,}, 4219 {0x237b, 0x237e,}, 4220 {0x2474, 0x247e,}, 4221 {0x2577, 0x257e,}, 4222 {0x2639, 0x2640,}, 4223 {0x2659, 0x267e,}, 4224 {0x2742, 0x2750,}, 4225 {0x2772, 0x277e,}, 4226 {0x2841, 0x287e,}, 4227 {0x4f54, 0x4f7e,}, 4228 {0x7425, 0x747e}, 4229 }; 4230 nkf_char i; 4231 nkf_char start, end, c; 4232 4233 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) { 4234 c2 = GETA1; 4235 c1 = GETA2; 4236 } 4237 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) { 4238 c2 = GETA1; 4239 c1 = GETA2; 4240 } 4241 4242 for (i = 0; i < RANGE_NUM_MAX; i++) { 4243 start = range[i][0]; 4244 end = range[i][1]; 4245 c = (c2 << 8) + c1; 4246 if (c >= start && c <= end) { 4247 c2 = GETA1; 4248 c1 = GETA2; 4249 } 4250 } 4251 (*o_iso2022jp_check_conv)(c2,c1); 4252} 4253 4254 4255/* This converts =?ISO-2022-JP?B?HOGE HOGE?= */ 4256 4257static const unsigned char *mime_pattern[] = { 4258 (const unsigned char *)"\075?EUC-JP?B?", 4259 (const unsigned char *)"\075?SHIFT_JIS?B?", 4260 (const unsigned char *)"\075?ISO-8859-1?Q?", 4261 (const unsigned char *)"\075?ISO-8859-1?B?", 4262 (const unsigned char *)"\075?ISO-2022-JP?B?", 4263 (const unsigned char *)"\075?ISO-2022-JP?B?", 4264 (const unsigned char *)"\075?ISO-2022-JP?Q?", 4265#if defined(UTF8_INPUT_ENABLE) 4266 (const unsigned char *)"\075?UTF-8?B?", 4267 (const unsigned char *)"\075?UTF-8?Q?", 4268#endif 4269 (const unsigned char *)"\075?US-ASCII?Q?", 4270 NULL 4271}; 4272 4273 4274/* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */ 4275nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = { 4276 e_iconv, s_iconv, 0, 0, 0, 0, 0, 4277#if defined(UTF8_INPUT_ENABLE) 4278 w_iconv, w_iconv, 4279#endif 4280 0, 4281}; 4282 4283static const nkf_char mime_encode[] = { 4284 EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K, 4285#if defined(UTF8_INPUT_ENABLE) 4286 UTF_8, UTF_8, 4287#endif 4288 ASCII, 4289 0 4290}; 4291 4292static const nkf_char mime_encode_method[] = { 4293 'B', 'B','Q', 'B', 'B', 'B', 'Q', 4294#if defined(UTF8_INPUT_ENABLE) 4295 'B', 'Q', 4296#endif 4297 'Q', 4298 0 4299}; 4300 4301 4302/* MIME preprocessor fifo */ 4303 4304#define MIME_BUF_SIZE (1024) /* 2^n ring buffer */ 4305#define MIME_BUF_MASK (MIME_BUF_SIZE-1) 4306#define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK] 4307static struct { 4308 unsigned char buf[MIME_BUF_SIZE]; 4309 unsigned int top; 4310 unsigned int last; /* decoded */ 4311 unsigned int input; /* undecoded */ 4312} mime_input_state; 4313static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL; 4314 4315#define MAXRECOVER 20 4316 4317static void 4318mime_input_buf_unshift(nkf_char c) 4319{ 4320 mime_input_buf(--mime_input_state.top) = (unsigned char)c; 4321} 4322 4323static nkf_char 4324mime_ungetc(nkf_char c, ARG_UNUSED FILE *f) 4325{ 4326 mime_input_buf_unshift(c); 4327 return c; 4328} 4329 4330static nkf_char 4331mime_ungetc_buf(nkf_char c, FILE *f) 4332{ 4333 if (mimebuf_f) 4334 (*i_mungetc_buf)(c,f); 4335 else 4336 mime_input_buf(--mime_input_state.input) = (unsigned char)c; 4337 return c; 4338} 4339 4340static nkf_char 4341mime_getc_buf(FILE *f) 4342{ 4343 /* we don't keep eof of mime_input_buf, becase it contains ?= as 4344 a terminator. It was checked in mime_integrity. */ 4345 return ((mimebuf_f)? 4346 (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++)); 4347} 4348 4349static void 4350switch_mime_getc(void) 4351{ 4352 if (i_getc!=mime_getc) { 4353 i_mgetc = i_getc; i_getc = mime_getc; 4354 i_mungetc = i_ungetc; i_ungetc = mime_ungetc; 4355 if(mime_f==STRICT_MIME) { 4356 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf; 4357 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf; 4358 } 4359 } 4360} 4361 4362static void 4363unswitch_mime_getc(void) 4364{ 4365 if(mime_f==STRICT_MIME) { 4366 i_mgetc = i_mgetc_buf; 4367 i_mungetc = i_mungetc_buf; 4368 } 4369 i_getc = i_mgetc; 4370 i_ungetc = i_mungetc; 4371 if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back); 4372 mime_iconv_back = NULL; 4373} 4374 4375static nkf_char 4376mime_integrity(FILE *f, const unsigned char *p) 4377{ 4378 nkf_char c,d; 4379 unsigned int q; 4380 /* In buffered mode, read until =? or NL or buffer full 4381 */ 4382 mime_input_state.input = mime_input_state.top; 4383 mime_input_state.last = mime_input_state.top; 4384 4385 while(*p) mime_input_buf(mime_input_state.input++) = *p++; 4386 d = 0; 4387 q = mime_input_state.input; 4388 while((c=(*i_getc)(f))!=EOF) { 4389 if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) { 4390 break; /* buffer full */ 4391 } 4392 if (c=='=' && d=='?') { 4393 /* checked. skip header, start decode */ 4394 mime_input_buf(mime_input_state.input++) = (unsigned char)c; 4395 /* mime_last_input = mime_input_state.input; */ 4396 mime_input_state.input = q; 4397 switch_mime_getc(); 4398 return 1; 4399 } 4400 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c)))) 4401 break; 4402 /* Should we check length mod 4? */ 4403 mime_input_buf(mime_input_state.input++) = (unsigned char)c; 4404 d=c; 4405 } 4406 /* In case of Incomplete MIME, no MIME decode */ 4407 mime_input_buf(mime_input_state.input++) = (unsigned char)c; 4408 mime_input_state.last = mime_input_state.input; /* point undecoded buffer */ 4409 mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */ 4410 switch_mime_getc(); /* anyway we need buffered getc */ 4411 return 1; 4412} 4413 4414static nkf_char 4415mime_begin_strict(FILE *f) 4416{ 4417 nkf_char c1 = 0; 4418 int i,j,k; 4419 const unsigned char *p,*q; 4420 nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */ 4421 4422 mime_decode_mode = FALSE; 4423 /* =? has been checked */ 4424 j = 0; 4425 p = mime_pattern[j]; 4426 r[0]='='; r[1]='?'; 4427 4428 for(i=2;p[i]>SP;i++) { /* start at =? */ 4429 if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) { 4430 /* pattern fails, try next one */ 4431 q = p; 4432 while (mime_pattern[++j]) { 4433 p = mime_pattern[j]; 4434 for(k=2;k<i;k++) /* assume length(p) > i */ 4435 if (p[k]!=q[k]) break; 4436 if (k==i && nkf_toupper(c1)==p[k]) break; 4437 } 4438 p = mime_pattern[j]; 4439 if (p) continue; /* found next one, continue */ 4440 /* all fails, output from recovery buffer */ 4441 (*i_ungetc)(c1,f); 4442 for(j=0;j<i;j++) { 4443 (*oconv)(0,r[j]); 4444 } 4445 return c1; 4446 } 4447 } 4448 mime_decode_mode = p[i-2]; 4449 4450 mime_iconv_back = iconv; 4451 set_iconv(FALSE, mime_priority_func[j]); 4452 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME); 4453 4454 if (mime_decode_mode=='B') { 4455 mimebuf_f = unbuf_f; 4456 if (!unbuf_f) { 4457 /* do MIME integrity check */ 4458 return mime_integrity(f,mime_pattern[j]); 4459 } 4460 } 4461 switch_mime_getc(); 4462 mimebuf_f = TRUE; 4463 return c1; 4464} 4465 4466static nkf_char 4467mime_begin(FILE *f) 4468{ 4469 nkf_char c1 = 0; 4470 int i,k; 4471 4472 /* In NONSTRICT mode, only =? is checked. In case of failure, we */ 4473 /* re-read and convert again from mime_buffer. */ 4474 4475 /* =? has been checked */ 4476 k = mime_input_state.last; 4477 mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?'; 4478 for(i=2;i<MAXRECOVER;i++) { /* start at =? */ 4479 /* We accept any character type even if it is breaked by new lines */ 4480 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1; 4481 if (c1==LF||c1==SP||c1==CR|| 4482 c1=='-'||c1=='_'||is_alnum(c1)) continue; 4483 if (c1=='=') { 4484 /* Failed. But this could be another MIME preemble */ 4485 (*i_ungetc)(c1,f); 4486 mime_input_state.last--; 4487 break; 4488 } 4489 if (c1!='?') break; 4490 else { 4491 /* c1=='?' */ 4492 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1; 4493 if (!(++i<MAXRECOVER) || c1==EOF) break; 4494 if (c1=='b'||c1=='B') { 4495 mime_decode_mode = 'B'; 4496 } else if (c1=='q'||c1=='Q') { 4497 mime_decode_mode = 'Q'; 4498 } else { 4499 break; 4500 } 4501 c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1; 4502 if (!(++i<MAXRECOVER) || c1==EOF) break; 4503 if (c1!='?') { 4504 mime_decode_mode = FALSE; 4505 } 4506 break; 4507 } 4508 } 4509 switch_mime_getc(); 4510 if (!mime_decode_mode) { 4511 /* false MIME premble, restart from mime_buffer */ 4512 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */ 4513 /* Since we are in MIME mode until buffer becomes empty, */ 4514 /* we never go into mime_begin again for a while. */ 4515 return c1; 4516 } 4517 /* discard mime preemble, and goto MIME mode */ 4518 mime_input_state.last = k; 4519 /* do no MIME integrity check */ 4520 return c1; /* used only for checking EOF */ 4521} 4522 4523#ifdef CHECK_OPTION 4524static void 4525no_putc(ARG_UNUSED nkf_char c) 4526{ 4527 ; 4528} 4529 4530static void 4531debug(const char *str) 4532{ 4533 if (debug_f){ 4534 fprintf(stderr, "%s\n", str ? str : "NULL"); 4535 } 4536} 4537#endif 4538 4539static void 4540set_input_codename(const char *codename) 4541{ 4542 if (!input_codename) { 4543 input_codename = codename; 4544 } else if (strcmp(codename, input_codename) != 0) { 4545 input_codename = ""; 4546 } 4547} 4548 4549static const char* 4550get_guessed_code(void) 4551{ 4552 if (input_codename && !*input_codename) { 4553 input_codename = "BINARY"; 4554 } else { 4555 struct input_code *p = find_inputcode_byfunc(iconv); 4556 if (!input_codename) { 4557 input_codename = "ASCII"; 4558 } else if (strcmp(input_codename, "Shift_JIS") == 0) { 4559 if (p->score & (SCORE_DEPEND|SCORE_CP932)) 4560 input_codename = "CP932"; 4561 } else if (strcmp(input_codename, "EUC-JP") == 0) { 4562 if (p->score & SCORE_X0213) 4563 input_codename = "EUC-JIS-2004"; 4564 else if (p->score & (SCORE_X0212)) 4565 input_codename = "EUCJP-MS"; 4566 else if (p->score & (SCORE_DEPEND|SCORE_CP932)) 4567 input_codename = "CP51932"; 4568 } else if (strcmp(input_codename, "ISO-2022-JP") == 0) { 4569 if (p->score & (SCORE_KANA)) 4570 input_codename = "CP50221"; 4571 else if (p->score & (SCORE_DEPEND|SCORE_CP932)) 4572 input_codename = "CP50220"; 4573 } 4574 } 4575 return input_codename; 4576} 4577 4578#if !defined(PERL_XS) && !defined(WIN32DLL) 4579static void 4580print_guessed_code(char *filename) 4581{ 4582 if (filename != NULL) printf("%s: ", filename); 4583 if (input_codename && !*input_codename) { 4584 printf("BINARY\n"); 4585 } else { 4586 input_codename = get_guessed_code(); 4587 if (guess_f == 1) { 4588 printf("%s\n", input_codename); 4589 } else { 4590 printf("%s%s%s%s\n", 4591 input_codename, 4592 iconv != w_iconv16 && iconv != w_iconv32 ? "" : 4593 input_endian == ENDIAN_LITTLE ? " LE" : 4594 input_endian == ENDIAN_BIG ? " BE" : 4595 "[BUG]", 4596 input_bom_f ? " (BOM)" : "", 4597 input_eol == CR ? " (CR)" : 4598 input_eol == LF ? " (LF)" : 4599 input_eol == CRLF ? " (CRLF)" : 4600 input_eol == EOF ? " (MIXED NL)" : 4601 ""); 4602 } 4603 } 4604} 4605#endif /*WIN32DLL*/ 4606 4607#ifdef INPUT_OPTION 4608 4609static nkf_char 4610hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f)) 4611{ 4612 nkf_char c1, c2, c3; 4613 c1 = (*g)(f); 4614 if (c1 != ch){ 4615 return c1; 4616 } 4617 c2 = (*g)(f); 4618 if (!nkf_isxdigit(c2)){ 4619 (*u)(c2, f); 4620 return c1; 4621 } 4622 c3 = (*g)(f); 4623 if (!nkf_isxdigit(c3)){ 4624 (*u)(c2, f); 4625 (*u)(c3, f); 4626 return c1; 4627 } 4628 return (hex2bin(c2) << 4) | hex2bin(c3); 4629} 4630 4631static nkf_char 4632cap_getc(FILE *f) 4633{ 4634 return hex_getc(':', f, i_cgetc, i_cungetc); 4635} 4636 4637static nkf_char 4638cap_ungetc(nkf_char c, FILE *f) 4639{ 4640 return (*i_cungetc)(c, f); 4641} 4642 4643static nkf_char 4644url_getc(FILE *f) 4645{ 4646 return hex_getc('%', f, i_ugetc, i_uungetc); 4647} 4648 4649static nkf_char 4650url_ungetc(nkf_char c, FILE *f) 4651{ 4652 return (*i_uungetc)(c, f); 4653} 4654#endif 4655 4656#ifdef NUMCHAR_OPTION 4657static nkf_char 4658numchar_getc(FILE *f) 4659{ 4660 nkf_char (*g)(FILE *) = i_ngetc; 4661 nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc; 4662 int i = 0, j; 4663 nkf_char buf[12]; 4664 nkf_char c = -1; 4665 4666 buf[i] = (*g)(f); 4667 if (buf[i] == '&'){ 4668 buf[++i] = (*g)(f); 4669 if (buf[i] == '#'){ 4670 c = 0; 4671 buf[++i] = (*g)(f); 4672 if (buf[i] == 'x' || buf[i] == 'X'){ 4673 for (j = 0; j < 7; j++){ 4674 buf[++i] = (*g)(f); 4675 if (!nkf_isxdigit(buf[i])){ 4676 if (buf[i] != ';'){ 4677 c = -1; 4678 } 4679 break; 4680 } 4681 c <<= 4; 4682 c |= hex2bin(buf[i]); 4683 } 4684 }else{ 4685 for (j = 0; j < 8; j++){ 4686 if (j){ 4687 buf[++i] = (*g)(f); 4688 } 4689 if (!nkf_isdigit(buf[i])){ 4690 if (buf[i] != ';'){ 4691 c = -1; 4692 } 4693 break; 4694 } 4695 c *= 10; 4696 c += hex2bin(buf[i]); 4697 } 4698 } 4699 } 4700 } 4701 if (c != -1){ 4702 return nkf_char_unicode_new(c); 4703 } 4704 while (i > 0){ 4705 (*u)(buf[i], f); 4706 --i; 4707 } 4708 return buf[0]; 4709} 4710 4711static nkf_char 4712numchar_ungetc(nkf_char c, FILE *f) 4713{ 4714 return (*i_nungetc)(c, f); 4715} 4716#endif 4717 4718#ifdef UNICODE_NORMALIZATION 4719 4720static nkf_char 4721nfc_getc(FILE *f) 4722{ 4723 nkf_char (*g)(FILE *f) = i_nfc_getc; 4724 nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc; 4725 nkf_buf_t *buf = nkf_state->nfc_buf; 4726 const unsigned char *array; 4727 int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1; 4728 nkf_char c = (*g)(f); 4729 4730 if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c; 4731 4732 nkf_buf_push(buf, c); 4733 do { 4734 while (lower <= upper) { 4735 int mid = (lower+upper) / 2; 4736 int len; 4737 array = normalization_table[mid].nfd; 4738 for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) { 4739 if (len >= nkf_buf_length(buf)) { 4740 c = (*g)(f); 4741 if (c == EOF) { 4742 len = 0; 4743 lower = 1, upper = 0; 4744 break; 4745 } 4746 nkf_buf_push(buf, c); 4747 } 4748 if (array[len] != nkf_buf_at(buf, len)) { 4749 if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1; 4750 else upper = mid - 1; 4751 len = 0; 4752 break; 4753 } 4754 } 4755 if (len > 0) { 4756 int i; 4757 array = normalization_table[mid].nfc; 4758 nkf_buf_clear(buf); 4759 for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++) 4760 nkf_buf_push(buf, array[i]); 4761 break; 4762 } 4763 } 4764 } while (lower <= upper); 4765 4766 while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f); 4767 c = nkf_buf_pop(buf); 4768 4769 return c; 4770} 4771 4772static nkf_char 4773nfc_ungetc(nkf_char c, FILE *f) 4774{ 4775 return (*i_nfc_ungetc)(c, f); 4776} 4777#endif /* UNICODE_NORMALIZATION */ 4778 4779 4780static nkf_char 4781base64decode(nkf_char c) 4782{ 4783 int i; 4784 if (c > '@') { 4785 if (c < '[') { 4786 i = c - 'A'; /* A..Z 0-25 */ 4787 } else if (c == '_') { 4788 i = '?' /* 63 */ ; /* _ 63 */ 4789 } else { 4790 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */ 4791 } 4792 } else if (c > '/') { 4793 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */ 4794 } else if (c == '+' || c == '-') { 4795 i = '>' /* 62 */ ; /* + and - 62 */ 4796 } else { 4797 i = '?' /* 63 */ ; /* / 63 */ 4798 } 4799 return (i); 4800} 4801 4802static nkf_char 4803mime_getc(FILE *f) 4804{ 4805 nkf_char c1, c2, c3, c4, cc; 4806 nkf_char t1, t2, t3, t4, mode, exit_mode; 4807 nkf_char lwsp_count; 4808 char *lwsp_buf; 4809 char *lwsp_buf_new; 4810 nkf_char lwsp_size = 128; 4811 4812 if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */ 4813 return mime_input_buf(mime_input_state.top++); 4814 } 4815 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) { 4816 mime_decode_mode=FALSE; 4817 unswitch_mime_getc(); 4818 return (*i_getc)(f); 4819 } 4820 4821 if (mimebuf_f == FIXED_MIME) 4822 exit_mode = mime_decode_mode; 4823 else 4824 exit_mode = FALSE; 4825 if (mime_decode_mode == 'Q') { 4826 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF); 4827 restart_mime_q: 4828 if (c1=='_' && mimebuf_f != FIXED_MIME) return SP; 4829 if (c1<=SP || DEL<=c1) { 4830 mime_decode_mode = exit_mode; /* prepare for quit */ 4831 return c1; 4832 } 4833 if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) { 4834 return c1; 4835 } 4836 4837 mime_decode_mode = exit_mode; /* prepare for quit */ 4838 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF); 4839 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) { 4840 /* end Q encoding */ 4841 input_mode = exit_mode; 4842 lwsp_count = 0; 4843 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char)); 4844 while ((c1=(*i_getc)(f))!=EOF) { 4845 switch (c1) { 4846 case LF: 4847 case CR: 4848 if (c1==LF) { 4849 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) { 4850 i_ungetc(SP,f); 4851 continue; 4852 } else { 4853 i_ungetc(c1,f); 4854 } 4855 c1 = LF; 4856 } else { 4857 if ((c1=(*i_getc)(f))!=EOF && c1 == LF) { 4858 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) { 4859 i_ungetc(SP,f); 4860 continue; 4861 } else { 4862 i_ungetc(c1,f); 4863 } 4864 i_ungetc(LF,f); 4865 } else { 4866 i_ungetc(c1,f); 4867 } 4868 c1 = CR; 4869 } 4870 break; 4871 case SP: 4872 case TAB: 4873 lwsp_buf[lwsp_count] = (unsigned char)c1; 4874 if (lwsp_count++>lwsp_size){ 4875 lwsp_size <<= 1; 4876 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char)); 4877 lwsp_buf = lwsp_buf_new; 4878 } 4879 continue; 4880 } 4881 break; 4882 } 4883 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) { 4884 i_ungetc(c1,f); 4885 for(lwsp_count--;lwsp_count>0;lwsp_count--) 4886 i_ungetc(lwsp_buf[lwsp_count],f); 4887 c1 = lwsp_buf[0]; 4888 } 4889 nkf_xfree(lwsp_buf); 4890 return c1; 4891 } 4892 if (c1=='='&&c2<SP) { /* this is soft wrap */ 4893 while((c1 = (*i_mgetc)(f)) <=SP) { 4894 if (c1 == EOF) return (EOF); 4895 } 4896 mime_decode_mode = 'Q'; /* still in MIME */ 4897 goto restart_mime_q; 4898 } 4899 if (c1=='?') { 4900 mime_decode_mode = 'Q'; /* still in MIME */ 4901 (*i_mungetc)(c2,f); 4902 return c1; 4903 } 4904 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF); 4905 if (c2<=SP) return c2; 4906 mime_decode_mode = 'Q'; /* still in MIME */ 4907 return ((hex2bin(c2)<<4) + hex2bin(c3)); 4908 } 4909 4910 if (mime_decode_mode != 'B') { 4911 mime_decode_mode = FALSE; 4912 return (*i_mgetc)(f); 4913 } 4914 4915 4916 /* Base64 encoding */ 4917 /* 4918 MIME allows line break in the middle of 4919 Base64, but we are very pessimistic in decoding 4920 in unbuf mode because MIME encoded code may broken by 4921 less or editor's control sequence (such as ESC-[-K in unbuffered 4922 mode. ignore incomplete MIME. 4923 */ 4924 mode = mime_decode_mode; 4925 mime_decode_mode = exit_mode; /* prepare for quit */ 4926 4927 while ((c1 = (*i_mgetc)(f))<=SP) { 4928 if (c1==EOF) 4929 return (EOF); 4930 } 4931 mime_c2_retry: 4932 if ((c2 = (*i_mgetc)(f))<=SP) { 4933 if (c2==EOF) 4934 return (EOF); 4935 if (mime_f != STRICT_MIME) goto mime_c2_retry; 4936 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII; 4937 return c2; 4938 } 4939 if ((c1 == '?') && (c2 == '=')) { 4940 input_mode = ASCII; 4941 lwsp_count = 0; 4942 lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char)); 4943 while ((c1=(*i_getc)(f))!=EOF) { 4944 switch (c1) { 4945 case LF: 4946 case CR: 4947 if (c1==LF) { 4948 if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) { 4949 i_ungetc(SP,f); 4950 continue; 4951 } else { 4952 i_ungetc(c1,f); 4953 } 4954 c1 = LF; 4955 } else { 4956 if ((c1=(*i_getc)(f))!=EOF) { 4957 if (c1==SP) { 4958 i_ungetc(SP,f); 4959 continue; 4960 } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) { 4961 i_ungetc(SP,f); 4962 continue; 4963 } else { 4964 i_ungetc(c1,f); 4965 } 4966 i_ungetc(LF,f); 4967 } else { 4968 i_ungetc(c1,f); 4969 } 4970 c1 = CR; 4971 } 4972 break; 4973 case SP: 4974 case TAB: 4975 lwsp_buf[lwsp_count] = (unsigned char)c1; 4976 if (lwsp_count++>lwsp_size){ 4977 lwsp_size <<= 1; 4978 lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char)); 4979 lwsp_buf = lwsp_buf_new; 4980 } 4981 continue; 4982 } 4983 break; 4984 } 4985 if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) { 4986 i_ungetc(c1,f); 4987 for(lwsp_count--;lwsp_count>0;lwsp_count--) 4988 i_ungetc(lwsp_buf[lwsp_count],f); 4989 c1 = lwsp_buf[0]; 4990 } 4991 nkf_xfree(lwsp_buf); 4992 return c1; 4993 } 4994 mime_c3_retry: 4995 if ((c3 = (*i_mgetc)(f))<=SP) { 4996 if (c3==EOF) 4997 return (EOF); 4998 if (mime_f != STRICT_MIME) goto mime_c3_retry; 4999 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII; 5000 return c3; 5001 } 5002 mime_c4_retry: 5003 if ((c4 = (*i_mgetc)(f))<=SP) { 5004 if (c4==EOF) 5005 return (EOF); 5006 if (mime_f != STRICT_MIME) goto mime_c4_retry; 5007 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII; 5008 return c4; 5009 } 5010 5011 mime_decode_mode = mode; /* still in MIME sigh... */ 5012 5013 /* BASE 64 decoding */ 5014 5015 t1 = 0x3f & base64decode(c1); 5016 t2 = 0x3f & base64decode(c2); 5017 t3 = 0x3f & base64decode(c3); 5018 t4 = 0x3f & base64decode(c4); 5019 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03); 5020 if (c2 != '=') { 5021 mime_input_buf(mime_input_state.last++) = (unsigned char)cc; 5022 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f); 5023 if (c3 != '=') { 5024 mime_input_buf(mime_input_state.last++) = (unsigned char)cc; 5025 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f); 5026 if (c4 != '=') 5027 mime_input_buf(mime_input_state.last++) = (unsigned char)cc; 5028 } 5029 } else { 5030 return c1; 5031 } 5032 return mime_input_buf(mime_input_state.top++); 5033} 5034 5035static const char basis_64[] = 5036 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 5037 5038#define MIMEOUT_BUF_LENGTH 74 5039static struct { 5040 unsigned char buf[MIMEOUT_BUF_LENGTH+1]; 5041 int count; 5042} mimeout_state; 5043 5044/*nkf_char mime_lastchar2, mime_lastchar1;*/ 5045 5046static void 5047open_mime(nkf_char mode) 5048{ 5049 const unsigned char *p; 5050 int i; 5051 int j; 5052 p = mime_pattern[0]; 5053 for(i=0;mime_pattern[i];i++) { 5054 if (mode == mime_encode[i]) { 5055 p = mime_pattern[i]; 5056 break; 5057 } 5058 } 5059 mimeout_mode = mime_encode_method[i]; 5060 i = 0; 5061 if (base64_count>45) { 5062 if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){ 5063 (*o_mputc)(mimeout_state.buf[i]); 5064 i++; 5065 } 5066 put_newline(o_mputc); 5067 (*o_mputc)(SP); 5068 base64_count = 1; 5069 if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) { 5070 i++; 5071 } 5072 } 5073 for (;i<mimeout_state.count;i++) { 5074 if (nkf_isspace(mimeout_state.buf[i])) { 5075 (*o_mputc)(mimeout_state.buf[i]); 5076 base64_count ++; 5077 } else { 5078 break; 5079 } 5080 } 5081 while(*p) { 5082 (*o_mputc)(*p++); 5083 base64_count ++; 5084 } 5085 j = mimeout_state.count; 5086 mimeout_state.count = 0; 5087 for (;i<j;i++) { 5088 mime_putc(mimeout_state.buf[i]); 5089 } 5090} 5091 5092static void 5093mime_prechar(nkf_char c2, nkf_char c1) 5094{ 5095 if (mimeout_mode > 0){ 5096 if (c2 == EOF){ 5097 if (base64_count + mimeout_state.count/3*4> 73){ 5098 (*o_base64conv)(EOF,0); 5099 oconv_newline(o_base64conv); 5100 (*o_base64conv)(0,SP); 5101 base64_count = 1; 5102 } 5103 } else { 5104 if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) { 5105 (*o_base64conv)(EOF,0); 5106 oconv_newline(o_base64conv); 5107 (*o_base64conv)(0,SP); 5108 base64_count = 1; 5109 mimeout_mode = -1; 5110 } 5111 } 5112 } else if (c2) { 5113 if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) { 5114 mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B'; 5115 open_mime(output_mode); 5116 (*o_base64conv)(EOF,0); 5117 oconv_newline(o_base64conv); 5118 (*o_base64conv)(0,SP); 5119 base64_count = 1; 5120 mimeout_mode = -1; 5121 } 5122 } 5123} 5124 5125static void 5126close_mime(void) 5127{ 5128 (*o_mputc)('?'); 5129 (*o_mputc)('='); 5130 base64_count += 2; 5131 mimeout_mode = 0; 5132} 5133 5134static void 5135eof_mime(void) 5136{ 5137 switch(mimeout_mode) { 5138 case 'Q': 5139 case 'B': 5140 break; 5141 case 2: 5142 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]); 5143 (*o_mputc)('='); 5144 (*o_mputc)('='); 5145 base64_count += 3; 5146 break; 5147 case 1: 5148 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]); 5149 (*o_mputc)('='); 5150 base64_count += 2; 5151 break; 5152 } 5153 if (mimeout_mode > 0) { 5154 if (mimeout_f!=FIXED_MIME) { 5155 close_mime(); 5156 } else if (mimeout_mode != 'Q') 5157 mimeout_mode = 'B'; 5158 } 5159} 5160 5161static void 5162mimeout_addchar(nkf_char c) 5163{ 5164 switch(mimeout_mode) { 5165 case 'Q': 5166 if (c==CR||c==LF) { 5167 (*o_mputc)(c); 5168 base64_count = 0; 5169 } else if(!nkf_isalnum(c)) { 5170 (*o_mputc)('='); 5171 (*o_mputc)(bin2hex(((c>>4)&0xf))); 5172 (*o_mputc)(bin2hex((c&0xf))); 5173 base64_count += 3; 5174 } else { 5175 (*o_mputc)(c); 5176 base64_count++; 5177 } 5178 break; 5179 case 'B': 5180 nkf_state->mimeout_state=c; 5181 (*o_mputc)(basis_64[c>>2]); 5182 mimeout_mode=2; 5183 base64_count ++; 5184 break; 5185 case 2: 5186 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]); 5187 nkf_state->mimeout_state=c; 5188 mimeout_mode=1; 5189 base64_count ++; 5190 break; 5191 case 1: 5192 (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]); 5193 (*o_mputc)(basis_64[c & 0x3F]); 5194 mimeout_mode='B'; 5195 base64_count += 2; 5196 break; 5197 default: 5198 (*o_mputc)(c); 5199 base64_count++; 5200 break; 5201 } 5202} 5203 5204static void 5205mime_putc(nkf_char c) 5206{ 5207 int i, j; 5208 nkf_char lastchar; 5209 5210 if (mimeout_f == FIXED_MIME){ 5211 if (mimeout_mode == 'Q'){ 5212 if (base64_count > 71){ 5213 if (c!=CR && c!=LF) { 5214 (*o_mputc)('='); 5215 put_newline(o_mputc); 5216 } 5217 base64_count = 0; 5218 } 5219 }else{ 5220 if (base64_count > 71){ 5221 eof_mime(); 5222 put_newline(o_mputc); 5223 base64_count = 0; 5224 } 5225 if (c == EOF) { /* c==EOF */ 5226 eof_mime(); 5227 } 5228 } 5229 if (c != EOF) { /* c==EOF */ 5230 mimeout_addchar(c); 5231 } 5232 return; 5233 } 5234 5235 /* mimeout_f != FIXED_MIME */ 5236 5237 if (c == EOF) { /* c==EOF */ 5238 if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode); 5239 j = mimeout_state.count; 5240 mimeout_state.count = 0; 5241 i = 0; 5242 if (mimeout_mode > 0) { 5243 if (!nkf_isblank(mimeout_state.buf[j-1])) { 5244 for (;i<j;i++) { 5245 if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){ 5246 break; 5247 } 5248 mimeout_addchar(mimeout_state.buf[i]); 5249 } 5250 eof_mime(); 5251 for (;i<j;i++) { 5252 mimeout_addchar(mimeout_state.buf[i]); 5253 } 5254 } else { 5255 for (;i<j;i++) { 5256 mimeout_addchar(mimeout_state.buf[i]); 5257 } 5258 eof_mime(); 5259 } 5260 } else { 5261 for (;i<j;i++) { 5262 mimeout_addchar(mimeout_state.buf[i]); 5263 } 5264 } 5265 return; 5266 } 5267 5268 if (mimeout_state.count > 0){ 5269 lastchar = mimeout_state.buf[mimeout_state.count - 1]; 5270 }else{ 5271 lastchar = -1; 5272 } 5273 5274 if (mimeout_mode=='Q') { 5275 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) { 5276 if (c == CR || c == LF) { 5277 close_mime(); 5278 (*o_mputc)(c); 5279 base64_count = 0; 5280 return; 5281 } else if (c <= SP) { 5282 close_mime(); 5283 if (base64_count > 70) { 5284 put_newline(o_mputc); 5285 base64_count = 0; 5286 } 5287 if (!nkf_isblank(c)) { 5288 (*o_mputc)(SP); 5289 base64_count++; 5290 } 5291 } else { 5292 if (base64_count > 70) { 5293 close_mime(); 5294 put_newline(o_mputc); 5295 (*o_mputc)(SP); 5296 base64_count = 1; 5297 open_mime(output_mode); 5298 } 5299 if (!nkf_noescape_mime(c)) { 5300 mimeout_addchar(c); 5301 return; 5302 } 5303 } 5304 if (c != 0x1B) { 5305 (*o_mputc)(c); 5306 base64_count++; 5307 return; 5308 } 5309 } 5310 } 5311 5312 if (mimeout_mode <= 0) { 5313 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 || 5314 output_mode == UTF_8)) { 5315 if (nkf_isspace(c)) { 5316 int flag = 0; 5317 if (mimeout_mode == -1) { 5318 flag = 1; 5319 } 5320 if (c==CR || c==LF) { 5321 if (flag) { 5322 open_mime(output_mode); 5323 output_mode = 0; 5324 } else { 5325 base64_count = 0; 5326 } 5327 } 5328 for (i=0;i<mimeout_state.count;i++) { 5329 (*o_mputc)(mimeout_state.buf[i]); 5330 if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){ 5331 base64_count = 0; 5332 }else{ 5333 base64_count++; 5334 } 5335 } 5336 if (flag) { 5337 eof_mime(); 5338 base64_count = 0; 5339 mimeout_mode = 0; 5340 } 5341 mimeout_state.buf[0] = (char)c; 5342 mimeout_state.count = 1; 5343 }else{ 5344 if (base64_count > 1 5345 && base64_count + mimeout_state.count > 76 5346 && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){ 5347 static const char *str = "boundary=\""; 5348 static int len = 10; 5349 i = 0; 5350 5351 for (; i < mimeout_state.count - len; ++i) { 5352 if (!strncmp((char *)(mimeout_state.buf+i), str, len)) { 5353 i += len - 2; 5354 break; 5355 } 5356 } 5357 5358 if (i == 0 || i == mimeout_state.count - len) { 5359 put_newline(o_mputc); 5360 base64_count = 0; 5361 if (!nkf_isspace(mimeout_state.buf[0])){ 5362 (*o_mputc)(SP); 5363 base64_count++; 5364 } 5365 } 5366 else { 5367 int j; 5368 for (j = 0; j <= i; ++j) { 5369 (*o_mputc)(mimeout_state.buf[j]); 5370 } 5371 put_newline(o_mputc); 5372 base64_count = 1; 5373 for (; j <= mimeout_state.count; ++j) { 5374 mimeout_state.buf[j - i] = mimeout_state.buf[j]; 5375 } 5376 mimeout_state.count -= i; 5377 } 5378 } 5379 mimeout_state.buf[mimeout_state.count++] = (char)c; 5380 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) { 5381 open_mime(output_mode); 5382 } 5383 } 5384 return; 5385 }else{ 5386 if (lastchar==CR || lastchar == LF){ 5387 for (i=0;i<mimeout_state.count;i++) { 5388 (*o_mputc)(mimeout_state.buf[i]); 5389 } 5390 base64_count = 0; 5391 mimeout_state.count = 0; 5392 } 5393 if (lastchar==SP) { 5394 for (i=0;i<mimeout_state.count-1;i++) { 5395 (*o_mputc)(mimeout_state.buf[i]); 5396 base64_count++; 5397 } 5398 mimeout_state.buf[0] = SP; 5399 mimeout_state.count = 1; 5400 } 5401 open_mime(output_mode); 5402 } 5403 }else{ 5404 /* mimeout_mode == 'B', 1, 2 */ 5405 if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 || 5406 output_mode == UTF_8)) { 5407 if (lastchar == CR || lastchar == LF){ 5408 if (nkf_isblank(c)) { 5409 for (i=0;i<mimeout_state.count;i++) { 5410 mimeout_addchar(mimeout_state.buf[i]); 5411 } 5412 mimeout_state.count = 0; 5413 } else { 5414 eof_mime(); 5415 for (i=0;i<mimeout_state.count;i++) { 5416 (*o_mputc)(mimeout_state.buf[i]); 5417 } 5418 base64_count = 0; 5419 mimeout_state.count = 0; 5420 } 5421 mimeout_state.buf[mimeout_state.count++] = (char)c; 5422 return; 5423 } 5424 if (nkf_isspace(c)) { 5425 for (i=0;i<mimeout_state.count;i++) { 5426 if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) { 5427 eof_mime(); 5428 for (i=0;i<mimeout_state.count;i++) { 5429 (*o_mputc)(mimeout_state.buf[i]); 5430 base64_count++; 5431 } 5432 mimeout_state.count = 0; 5433 } 5434 } 5435 mimeout_state.buf[mimeout_state.count++] = (char)c; 5436 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) { 5437 eof_mime(); 5438 for (i=0;i<mimeout_state.count;i++) { 5439 (*o_mputc)(mimeout_state.buf[i]); 5440 base64_count++; 5441 } 5442 mimeout_state.count = 0; 5443 } 5444 return; 5445 } 5446 if (mimeout_state.count>0 && SP<c && c!='=') { 5447 mimeout_state.buf[mimeout_state.count++] = (char)c; 5448 if (mimeout_state.count>MIMEOUT_BUF_LENGTH) { 5449 j = mimeout_state.count; 5450 mimeout_state.count = 0; 5451 for (i=0;i<j;i++) { 5452 mimeout_addchar(mimeout_state.buf[i]); 5453 } 5454 } 5455 return; 5456 } 5457 } 5458 } 5459 if (mimeout_state.count>0) { 5460 j = mimeout_state.count; 5461 mimeout_state.count = 0; 5462 for (i=0;i<j;i++) { 5463 if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF) 5464 break; 5465 mimeout_addchar(mimeout_state.buf[i]); 5466 } 5467 if (i<j) { 5468 eof_mime(); 5469 base64_count=0; 5470 for (;i<j;i++) { 5471 (*o_mputc)(mimeout_state.buf[i]); 5472 } 5473 open_mime(output_mode); 5474 } 5475 } 5476 mimeout_addchar(c); 5477} 5478 5479static void 5480base64_conv(nkf_char c2, nkf_char c1) 5481{ 5482 mime_prechar(c2, c1); 5483 (*o_base64conv)(c2,c1); 5484} 5485 5486#ifdef HAVE_ICONV_H 5487typedef struct nkf_iconv_t { 5488 iconv_t cd; 5489 char *input_buffer; 5490 size_t input_buffer_size; 5491 char *output_buffer; 5492 size_t output_buffer_size; 5493} 5494 5495static nkf_iconv_t 5496nkf_iconv_new(char *tocode, char *fromcode) 5497{ 5498 nkf_iconv_t converter; 5499 5500 converter->input_buffer_size = IOBUF_SIZE; 5501 converter->input_buffer = nkf_xmalloc(converter->input_buffer_size); 5502 converter->output_buffer_size = IOBUF_SIZE * 2; 5503 converter->output_buffer = nkf_xmalloc(converter->output_buffer_size); 5504 converter->cd = iconv_open(tocode, fromcode); 5505 if (converter->cd == (iconv_t)-1) 5506 { 5507 switch (errno) { 5508 case EINVAL: 5509 perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode)); 5510 return -1; 5511 default: 5512 perror("can't iconv_open"); 5513 } 5514 } 5515} 5516 5517static size_t 5518nkf_iconv_convert(nkf_iconv_t *converter, FILE *input) 5519{ 5520 size_t invalid = (size_t)0; 5521 char *input_buffer = converter->input_buffer; 5522 size_t input_length = (size_t)0; 5523 char *output_buffer = converter->output_buffer; 5524 size_t output_length = converter->output_buffer_size; 5525 int c; 5526 5527 do { 5528 if (c != EOF) { 5529 while ((c = (*i_getc)(f)) != EOF) { 5530 input_buffer[input_length++] = c; 5531 if (input_length < converter->input_buffer_size) break; 5532 } 5533 } 5534 5535 size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length); 5536 while (output_length-- > 0) { 5537 (*o_putc)(output_buffer[converter->output_buffer_size-output_length]); 5538 } 5539 if (ret == (size_t) - 1) { 5540 switch (errno) { 5541 case EINVAL: 5542 if (input_buffer != converter->input_buffer) 5543 memmove(converter->input_buffer, input_buffer, input_length); 5544 break; 5545 case E2BIG: 5546 converter->output_buffer_size *= 2; 5547 output_buffer = realloc(converter->outbuf, converter->output_buffer_size); 5548 if (output_buffer == NULL) { 5549 perror("can't realloc"); 5550 return -1; 5551 } 5552 converter->output_buffer = output_buffer; 5553 break; 5554 default: 5555 perror("can't iconv"); 5556 return -1; 5557 } 5558 } else { 5559 invalid += ret; 5560 } 5561 } while (1); 5562 5563 return invalid; 5564} 5565 5566 5567static void 5568nkf_iconv_close(nkf_iconv_t *convert) 5569{ 5570 nkf_xfree(converter->inbuf); 5571 nkf_xfree(converter->outbuf); 5572 iconv_close(converter->cd); 5573} 5574#endif 5575 5576 5577static void 5578reinit(void) 5579{ 5580 { 5581 struct input_code *p = input_code_list; 5582 while (p->name){ 5583 status_reinit(p++); 5584 } 5585 } 5586 unbuf_f = FALSE; 5587 estab_f = FALSE; 5588 nop_f = FALSE; 5589 binmode_f = TRUE; 5590 rot_f = FALSE; 5591 hira_f = FALSE; 5592 alpha_f = FALSE; 5593 mime_f = MIME_DECODE_DEFAULT; 5594 mime_decode_f = FALSE; 5595 mimebuf_f = FALSE; 5596 broken_f = FALSE; 5597 iso8859_f = FALSE; 5598 mimeout_f = FALSE; 5599 x0201_f = NKF_UNSPECIFIED; 5600 iso2022jp_f = FALSE; 5601#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) 5602 ms_ucs_map_f = UCS_MAP_ASCII; 5603#endif 5604#ifdef UTF8_INPUT_ENABLE 5605 no_cp932ext_f = FALSE; 5606 no_best_fit_chars_f = FALSE; 5607 encode_fallback = NULL; 5608 unicode_subchar = '?'; 5609 input_endian = ENDIAN_BIG; 5610#endif 5611#ifdef UTF8_OUTPUT_ENABLE 5612 output_bom_f = FALSE; 5613 output_endian = ENDIAN_BIG; 5614#endif 5615#ifdef UNICODE_NORMALIZATION 5616 nfc_f = FALSE; 5617#endif 5618#ifdef INPUT_OPTION 5619 cap_f = FALSE; 5620 url_f = FALSE; 5621 numchar_f = FALSE; 5622#endif 5623#ifdef CHECK_OPTION 5624 noout_f = FALSE; 5625 debug_f = FALSE; 5626#endif 5627 guess_f = 0; 5628#ifdef EXEC_IO 5629 exec_f = 0; 5630#endif 5631#ifdef SHIFTJIS_CP932 5632 cp51932_f = TRUE; 5633 cp932inv_f = TRUE; 5634#endif 5635#ifdef X0212_ENABLE 5636 x0212_f = FALSE; 5637 x0213_f = FALSE; 5638#endif 5639 { 5640 int i; 5641 for (i = 0; i < 256; i++){ 5642 prefix_table[i] = 0; 5643 } 5644 } 5645 hold_count = 0; 5646 mimeout_state.count = 0; 5647 mimeout_mode = 0; 5648 base64_count = 0; 5649 f_line = 0; 5650 f_prev = 0; 5651 fold_preserve_f = FALSE; 5652 fold_f = FALSE; 5653 fold_len = 0; 5654 kanji_intro = DEFAULT_J; 5655 ascii_intro = DEFAULT_R; 5656 fold_margin = FOLD_MARGIN; 5657 o_zconv = no_connection; 5658 o_fconv = no_connection; 5659 o_eol_conv = no_connection; 5660 o_rot_conv = no_connection; 5661 o_hira_conv = no_connection; 5662 o_base64conv = no_connection; 5663 o_iso2022jp_check_conv = no_connection; 5664 o_putc = std_putc; 5665 i_getc = std_getc; 5666 i_ungetc = std_ungetc; 5667 i_bgetc = std_getc; 5668 i_bungetc = std_ungetc; 5669 o_mputc = std_putc; 5670 i_mgetc = std_getc; 5671 i_mungetc = std_ungetc; 5672 i_mgetc_buf = std_getc; 5673 i_mungetc_buf = std_ungetc; 5674 output_mode = ASCII; 5675 input_mode = ASCII; 5676 mime_decode_mode = FALSE; 5677 file_out_f = FALSE; 5678 eolmode_f = 0; 5679 input_eol = 0; 5680 prev_cr = 0; 5681 option_mode = 0; 5682 z_prev2=0,z_prev1=0; 5683#ifdef CHECK_OPTION 5684 iconv_for_check = 0; 5685#endif 5686 input_codename = NULL; 5687 input_encoding = NULL; 5688 output_encoding = NULL; 5689 nkf_state_init(); 5690#ifdef WIN32DLL 5691 reinitdll(); 5692#endif /*WIN32DLL*/ 5693} 5694 5695static int 5696module_connection(void) 5697{ 5698 if (input_encoding) set_input_encoding(input_encoding); 5699 if (!output_encoding) { 5700 output_encoding = nkf_default_encoding(); 5701 } 5702 if (!output_encoding) { 5703 if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP); 5704 else return -1; 5705 } 5706 set_output_encoding(output_encoding); 5707 oconv = nkf_enc_to_oconv(output_encoding); 5708 o_putc = std_putc; 5709 if (nkf_enc_unicode_p(output_encoding)) 5710 output_mode = UTF_8; 5711 5712 if (x0201_f == NKF_UNSPECIFIED) { 5713 x0201_f = X0201_DEFAULT; 5714 } 5715 5716 /* replace continucation module, from output side */ 5717 5718 /* output redicrection */ 5719#ifdef CHECK_OPTION 5720 if (noout_f || guess_f){ 5721 o_putc = no_putc; 5722 } 5723#endif 5724 if (mimeout_f) { 5725 o_mputc = o_putc; 5726 o_putc = mime_putc; 5727 if (mimeout_f == TRUE) { 5728 o_base64conv = oconv; oconv = base64_conv; 5729 } 5730 /* base64_count = 0; */ 5731 } 5732 5733 if (eolmode_f || guess_f) { 5734 o_eol_conv = oconv; oconv = eol_conv; 5735 } 5736 if (rot_f) { 5737 o_rot_conv = oconv; oconv = rot_conv; 5738 } 5739 if (iso2022jp_f) { 5740 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv; 5741 } 5742 if (hira_f) { 5743 o_hira_conv = oconv; oconv = hira_conv; 5744 } 5745 if (fold_f) { 5746 o_fconv = oconv; oconv = fold_conv; 5747 f_line = 0; 5748 } 5749 if (alpha_f || x0201_f) { 5750 o_zconv = oconv; oconv = z_conv; 5751 } 5752 5753 i_getc = std_getc; 5754 i_ungetc = std_ungetc; 5755 /* input redicrection */ 5756#ifdef INPUT_OPTION 5757 if (cap_f){ 5758 i_cgetc = i_getc; i_getc = cap_getc; 5759 i_cungetc = i_ungetc; i_ungetc= cap_ungetc; 5760 } 5761 if (url_f){ 5762 i_ugetc = i_getc; i_getc = url_getc; 5763 i_uungetc = i_ungetc; i_ungetc= url_ungetc; 5764 } 5765#endif 5766#ifdef NUMCHAR_OPTION 5767 if (numchar_f){ 5768 i_ngetc = i_getc; i_getc = numchar_getc; 5769 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc; 5770 } 5771#endif 5772#ifdef UNICODE_NORMALIZATION 5773 if (nfc_f){ 5774 i_nfc_getc = i_getc; i_getc = nfc_getc; 5775 i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc; 5776 } 5777#endif 5778 if (mime_f && mimebuf_f==FIXED_MIME) { 5779 i_mgetc = i_getc; i_getc = mime_getc; 5780 i_mungetc = i_ungetc; i_ungetc = mime_ungetc; 5781 } 5782 if (broken_f & 1) { 5783 i_bgetc = i_getc; i_getc = broken_getc; 5784 i_bungetc = i_ungetc; i_ungetc = broken_ungetc; 5785 } 5786 if (input_encoding) { 5787 set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding)); 5788 } else { 5789 set_iconv(FALSE, e_iconv); 5790 } 5791 5792 { 5793 struct input_code *p = input_code_list; 5794 while (p->name){ 5795 status_reinit(p++); 5796 } 5797 } 5798 return 0; 5799} 5800 5801/* 5802 Conversion main loop. Code detection only. 5803 */ 5804 5805#if !defined(PERL_XS) && !defined(WIN32DLL) 5806static nkf_char 5807noconvert(FILE *f) 5808{ 5809 nkf_char c; 5810 5811 if (nop_f == 2) 5812 module_connection(); 5813 while ((c = (*i_getc)(f)) != EOF) 5814 (*o_putc)(c); 5815 (*o_putc)(EOF); 5816 return 1; 5817} 5818#endif 5819 5820#define NEXT continue /* no output, get next */ 5821#define SKIP c2=0;continue /* no output, get next */ 5822#define MORE c2=c1;continue /* need one more byte */ 5823#define SEND (void)0 /* output c1 and c2, get next */ 5824#define LAST break /* end of loop, go closing */ 5825#define set_input_mode(mode) do { \ 5826 input_mode = mode; \ 5827 shift_mode = 0; \ 5828 set_input_codename("ISO-2022-JP"); \ 5829 debug("ISO-2022-JP"); \ 5830} while (0) 5831 5832static int 5833kanji_convert(FILE *f) 5834{ 5835 nkf_char c1=0, c2=0, c3=0, c4=0; 5836 int shift_mode = 0; /* 0, 1, 2, 3 */ 5837 int g2 = 0; 5838 int is_8bit = FALSE; 5839 5840 if (input_encoding && !nkf_enc_asciicompat(input_encoding)) { 5841 is_8bit = TRUE; 5842 } 5843 5844 input_mode = ASCII; 5845 output_mode = ASCII; 5846 5847 if (module_connection() < 0) { 5848#if !defined(PERL_XS) && !defined(WIN32DLL) 5849 fprintf(stderr, "no output encoding given\n"); 5850#endif 5851 return -1; 5852 } 5853 check_bom(f); 5854 5855#ifdef UTF8_INPUT_ENABLE 5856 if(iconv == w_iconv32){ 5857 while ((c1 = (*i_getc)(f)) != EOF && 5858 (c2 = (*i_getc)(f)) != EOF && 5859 (c3 = (*i_getc)(f)) != EOF && 5860 (c4 = (*i_getc)(f)) != EOF) { 5861 nkf_char c5, c6, c7, c8; 5862 if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) { 5863 if ((c5 = (*i_getc)(f)) != EOF && 5864 (c6 = (*i_getc)(f)) != EOF && 5865 (c7 = (*i_getc)(f)) != EOF && 5866 (c8 = (*i_getc)(f)) != EOF) { 5867 if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) { 5868 (*i_ungetc)(c8, f); 5869 (*i_ungetc)(c7, f); 5870 (*i_ungetc)(c6, f); 5871 (*i_ungetc)(c5, f); 5872 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4); 5873 } 5874 } else { 5875 nkf_iconv_utf_32_nocombine(c1, c2, c3, c4); 5876 } 5877 } 5878 } 5879 goto finished; 5880 } 5881 else if (iconv == w_iconv16) { 5882 while ((c1 = (*i_getc)(f)) != EOF && 5883 (c2 = (*i_getc)(f)) != EOF) { 5884 size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0); 5885 if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES && 5886 (c3 = (*i_getc)(f)) != EOF && 5887 (c4 = (*i_getc)(f)) != EOF) { 5888 nkf_iconv_utf_16(c1, c2, c3, c4); 5889 } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) { 5890 if ((c3 = (*i_getc)(f)) != EOF && 5891 (c4 = (*i_getc)(f)) != EOF) { 5892 if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) { 5893 (*i_ungetc)(c4, f); 5894 (*i_ungetc)(c3, f); 5895 nkf_iconv_utf_16_nocombine(c1, c2); 5896 } 5897 } else { 5898 nkf_iconv_utf_16_nocombine(c1, c2); 5899 } 5900 } 5901 } 5902 goto finished; 5903 } 5904#endif 5905 5906 while ((c1 = (*i_getc)(f)) != EOF) { 5907#ifdef INPUT_CODE_FIX 5908 if (!input_encoding) 5909#endif 5910 code_status(c1); 5911 if (c2) { 5912 /* second byte */ 5913 if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) { 5914 /* in case of 8th bit is on */ 5915 if (!estab_f&&!mime_decode_mode) { 5916 /* in case of not established yet */ 5917 /* It is still ambiguious */ 5918 if (h_conv(f, c2, c1)==EOF) { 5919 LAST; 5920 } 5921 else { 5922 SKIP; 5923 } 5924 } 5925 else { 5926 /* in case of already established */ 5927 if (c1 < 0x40) { 5928 /* ignore bogus code */ 5929 SKIP; 5930 } else { 5931 SEND; 5932 } 5933 } 5934 } 5935 else { 5936 /* 2nd byte of 7 bit code or SJIS */ 5937 SEND; 5938 } 5939 } 5940 else if (nkf_char_unicode_p(c1)) { 5941 (*oconv)(0, c1); 5942 NEXT; 5943 } 5944 else { 5945 /* first byte */ 5946 if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) { 5947 /* CP5022x */ 5948 MORE; 5949 }else if (input_codename && input_codename[0] == 'I' && 5950 0xA1 <= c1 && c1 <= 0xDF) { 5951 /* JIS X 0201 Katakana in 8bit JIS */ 5952 c2 = JIS_X_0201_1976_K; 5953 c1 &= 0x7f; 5954 SEND; 5955 } else if (c1 > DEL) { 5956 /* 8 bit code */ 5957 if (!estab_f && !iso8859_f) { 5958 /* not established yet */ 5959 MORE; 5960 } else { /* estab_f==TRUE */ 5961 if (iso8859_f) { 5962 c2 = ISO_8859_1; 5963 c1 &= 0x7f; 5964 SEND; 5965 } 5966 else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) || 5967 (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) { 5968 /* JIS X 0201 */ 5969 c2 = JIS_X_0201_1976_K; 5970 c1 &= 0x7f; 5971 SEND; 5972 } 5973 else { 5974 /* already established */ 5975 MORE; 5976 } 5977 } 5978 } else if (SP < c1 && c1 < DEL) { 5979 /* in case of Roman characters */ 5980 if (shift_mode) { 5981 /* output 1 shifted byte */ 5982 if (iso8859_f) { 5983 c2 = ISO_8859_1; 5984 SEND; 5985 } else if (nkf_byte_jisx0201_katakana_p(c1)){ 5986 /* output 1 shifted byte */ 5987 c2 = JIS_X_0201_1976_K; 5988 SEND; 5989 } else { 5990 /* look like bogus code */ 5991 SKIP; 5992 } 5993 } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 || 5994 input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) { 5995 /* in case of Kanji shifted */ 5996 MORE; 5997 } else if (c1 == '=' && mime_f && !mime_decode_mode) { 5998 /* Check MIME code */ 5999 if ((c1 = (*i_getc)(f)) == EOF) { 6000 (*oconv)(0, '='); 6001 LAST; 6002 } else if (c1 == '?') { 6003 /* =? is mime conversion start sequence */ 6004 if(mime_f == STRICT_MIME) { 6005 /* check in real detail */ 6006 if (mime_begin_strict(f) == EOF) 6007 LAST; 6008 SKIP; 6009 } else if (mime_begin(f) == EOF) 6010 LAST; 6011 SKIP; 6012 } else { 6013 (*oconv)(0, '='); 6014 (*i_ungetc)(c1,f); 6015 SKIP; 6016 } 6017 } else { 6018 /* normal ASCII code */ 6019 SEND; 6020 } 6021 } else if (c1 == SI && (!is_8bit || mime_decode_mode)) { 6022 shift_mode = 0; 6023 SKIP; 6024 } else if (c1 == SO && (!is_8bit || mime_decode_mode)) { 6025 shift_mode = 1; 6026 SKIP; 6027 } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) { 6028 if ((c1 = (*i_getc)(f)) == EOF) { 6029 (*oconv)(0, ESC); 6030 LAST; 6031 } 6032 else if (c1 == '&') { 6033 /* IRR */ 6034 if ((c1 = (*i_getc)(f)) == EOF) { 6035 LAST; 6036 } else { 6037 SKIP; 6038 } 6039 } 6040 else if (c1 == '$') { 6041 /* GZDMx */ 6042 if ((c1 = (*i_getc)(f)) == EOF) { 6043 /* don't send bogus code 6044 (*oconv)(0, ESC); 6045 (*oconv)(0, '$'); */ 6046 LAST; 6047 } else if (c1 == '@' || c1 == 'B') { 6048 /* JIS X 0208 */ 6049 set_input_mode(JIS_X_0208); 6050 SKIP; 6051 } else if (c1 == '(') { 6052 /* GZDM4 */ 6053 if ((c1 = (*i_getc)(f)) == EOF) { 6054 /* don't send bogus code 6055 (*oconv)(0, ESC); 6056 (*oconv)(0, '$'); 6057 (*oconv)(0, '('); 6058 */ 6059 LAST; 6060 } else if (c1 == '@'|| c1 == 'B') { 6061 /* JIS X 0208 */ 6062 set_input_mode(JIS_X_0208); 6063 SKIP; 6064#ifdef X0212_ENABLE 6065 } else if (c1 == 'D'){ 6066 set_input_mode(JIS_X_0212); 6067 SKIP; 6068#endif /* X0212_ENABLE */ 6069 } else if (c1 == 'O' || c1 == 'Q'){ 6070 set_input_mode(JIS_X_0213_1); 6071 SKIP; 6072 } else if (c1 == 'P'){ 6073 set_input_mode(JIS_X_0213_2); 6074 SKIP; 6075 } else { 6076 /* could be some special code */ 6077 (*oconv)(0, ESC); 6078 (*oconv)(0, '$'); 6079 (*oconv)(0, '('); 6080 (*oconv)(0, c1); 6081 SKIP; 6082 } 6083 } else if (broken_f&0x2) { 6084 /* accept any ESC-(-x as broken code ... */ 6085 input_mode = JIS_X_0208; 6086 shift_mode = 0; 6087 SKIP; 6088 } else { 6089 (*oconv)(0, ESC); 6090 (*oconv)(0, '$'); 6091 (*oconv)(0, c1); 6092 SKIP; 6093 } 6094 } else if (c1 == '(') { 6095 /* GZD4 */ 6096 if ((c1 = (*i_getc)(f)) == EOF) { 6097 /* don't send bogus code 6098 (*oconv)(0, ESC); 6099 (*oconv)(0, '('); */ 6100 LAST; 6101 } 6102 else if (c1 == 'I') { 6103 /* JIS X 0201 Katakana */ 6104 set_input_mode(JIS_X_0201_1976_K); 6105 shift_mode = 1; 6106 SKIP; 6107 } 6108 else if (c1 == 'B' || c1 == 'J' || c1 == 'H') { 6109 /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */ 6110 set_input_mode(ASCII); 6111 SKIP; 6112 } 6113 else if (broken_f&0x2) { 6114 set_input_mode(ASCII); 6115 SKIP; 6116 } 6117 else { 6118 (*oconv)(0, ESC); 6119 (*oconv)(0, '('); 6120 SEND; 6121 } 6122 } 6123 else if (c1 == '.') { 6124 /* G2D6 */ 6125 if ((c1 = (*i_getc)(f)) == EOF) { 6126 LAST; 6127 } 6128 else if (c1 == 'A') { 6129 /* ISO-8859-1 */ 6130 g2 = ISO_8859_1; 6131 SKIP; 6132 } 6133 else { 6134 (*oconv)(0, ESC); 6135 (*oconv)(0, '.'); 6136 SEND; 6137 } 6138 } 6139 else if (c1 == 'N') { 6140 /* SS2 */ 6141 c1 = (*i_getc)(f); 6142 if (g2 == ISO_8859_1) { 6143 c2 = ISO_8859_1; 6144 SEND; 6145 }else{ 6146 (*i_ungetc)(c1, f); 6147 /* lonely ESC */ 6148 (*oconv)(0, ESC); 6149 SEND; 6150 } 6151 } 6152 else { 6153 /* lonely ESC */ 6154 (*oconv)(0, ESC); 6155 SEND; 6156 } 6157 } else if (c1 == ESC && iconv == s_iconv) { 6158 /* ESC in Shift_JIS */ 6159 if ((c1 = (*i_getc)(f)) == EOF) { 6160 (*oconv)(0, ESC); 6161 LAST; 6162 } else if (c1 == '$') { 6163 /* J-PHONE emoji */ 6164 if ((c1 = (*i_getc)(f)) == EOF) { 6165 LAST; 6166 } else if (('E' <= c1 && c1 <= 'G') || 6167 ('O' <= c1 && c1 <= 'Q')) { 6168 /* 6169 NUM : 0 1 2 3 4 5 6170 BYTE: G E F O P Q 6171 C%7 : 1 6 0 2 3 4 6172 C%7 : 0 1 2 3 4 5 6 6173 NUM : 2 0 3 4 5 X 1 6174 */ 6175 static const nkf_char jphone_emoji_first_table[7] = 6176 {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0}; 6177 c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]); 6178 if ((c1 = (*i_getc)(f)) == EOF) LAST; 6179 while (SP <= c1 && c1 <= 'z') { 6180 (*oconv)(0, c1 + c3); 6181 if ((c1 = (*i_getc)(f)) == EOF) LAST; 6182 } 6183 SKIP; 6184 } 6185 else { 6186 (*oconv)(0, ESC); 6187 (*oconv)(0, '$'); 6188 SEND; 6189 } 6190 } 6191 else { 6192 /* lonely ESC */ 6193 (*oconv)(0, ESC); 6194 SEND; 6195 } 6196 } else if (c1 == LF || c1 == CR) { 6197 if (broken_f&4) { 6198 input_mode = ASCII; set_iconv(FALSE, 0); 6199 SEND; 6200 } else if (mime_decode_f && !mime_decode_mode){ 6201 if (c1 == LF) { 6202 if ((c1=(*i_getc)(f))!=EOF && c1 == SP) { 6203 i_ungetc(SP,f); 6204 continue; 6205 } else { 6206 i_ungetc(c1,f); 6207 } 6208 c1 = LF; 6209 SEND; 6210 } else { /* if (c1 == CR)*/ 6211 if ((c1=(*i_getc)(f))!=EOF) { 6212 if (c1==SP) { 6213 i_ungetc(SP,f); 6214 continue; 6215 } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) { 6216 i_ungetc(SP,f); 6217 continue; 6218 } else { 6219 i_ungetc(c1,f); 6220 } 6221 i_ungetc(LF,f); 6222 } else { 6223 i_ungetc(c1,f); 6224 } 6225 c1 = CR; 6226 SEND; 6227 } 6228 } 6229 } else 6230 SEND; 6231 } 6232 /* send: */ 6233 switch(input_mode){ 6234 case ASCII: 6235 switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */ 6236 case -2: 6237 /* 4 bytes UTF-8 */ 6238 if ((c3 = (*i_getc)(f)) != EOF) { 6239 code_status(c3); 6240 c3 <<= 8; 6241 if ((c4 = (*i_getc)(f)) != EOF) { 6242 code_status(c4); 6243 (*iconv)(c2, c1, c3|c4); 6244 } 6245 } 6246 break; 6247 case -3: 6248 /* 4 bytes UTF-8 (check combining character) */ 6249 if ((c3 = (*i_getc)(f)) != EOF) { 6250 if ((c4 = (*i_getc)(f)) != EOF) { 6251 if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) { 6252 (*i_ungetc)(c4, f); 6253 (*i_ungetc)(c3, f); 6254 w_iconv_nocombine(c2, c1, 0); 6255 } 6256 } else { 6257 (*i_ungetc)(c3, f); 6258 w_iconv_nocombine(c2, c1, 0); 6259 } 6260 } else { 6261 w_iconv_nocombine(c2, c1, 0); 6262 } 6263 break; 6264 case -1: 6265 /* 3 bytes EUC or UTF-8 */ 6266 if ((c3 = (*i_getc)(f)) != EOF) { 6267 code_status(c3); 6268 if ((*iconv)(c2, c1, c3) == -3) { 6269 /* 6 bytes UTF-8 (check combining character) */ 6270 nkf_char c5, c6; 6271 if ((c4 = (*i_getc)(f)) != EOF) { 6272 if ((c5 = (*i_getc)(f)) != EOF) { 6273 if ((c6 = (*i_getc)(f)) != EOF) { 6274 if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) { 6275 (*i_ungetc)(c6, f); 6276 (*i_ungetc)(c5, f); 6277 (*i_ungetc)(c4, f); 6278 w_iconv_nocombine(c2, c1, c3); 6279 } 6280 } else { 6281 (*i_ungetc)(c5, f); 6282 (*i_ungetc)(c4, f); 6283 w_iconv_nocombine(c2, c1, c3); 6284 } 6285 } else { 6286 (*i_ungetc)(c4, f); 6287 w_iconv_nocombine(c2, c1, c3); 6288 } 6289 } else { 6290 w_iconv_nocombine(c2, c1, c3); 6291 } 6292 } 6293 } 6294 break; 6295 } 6296 break; 6297 case JIS_X_0208: 6298 case JIS_X_0213_1: 6299 if (ms_ucs_map_f && 6300 0x7F <= c2 && c2 <= 0x92 && 6301 0x21 <= c1 && c1 <= 0x7E) { 6302 /* CP932 UDC */ 6303 c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000); 6304 c2 = 0; 6305 } 6306 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ 6307 break; 6308#ifdef X0212_ENABLE 6309 case JIS_X_0212: 6310 (*oconv)(PREFIX_EUCG3 | c2, c1); 6311 break; 6312#endif /* X0212_ENABLE */ 6313 case JIS_X_0213_2: 6314 (*oconv)(PREFIX_EUCG3 | c2, c1); 6315 break; 6316 default: 6317 (*oconv)(input_mode, c1); /* other special case */ 6318 } 6319 6320 c2 = 0; 6321 c3 = 0; 6322 continue; 6323 /* goto next_word */ 6324 } 6325 6326finished: 6327 /* epilogue */ 6328 (*iconv)(EOF, 0, 0); 6329 if (!input_codename) 6330 { 6331 if (is_8bit) { 6332 struct input_code *p = input_code_list; 6333 struct input_code *result = p; 6334 while (p->name){ 6335 if (p->score < result->score) result = p; 6336 ++p; 6337 } 6338 set_input_codename(result->name); 6339#ifdef CHECK_OPTION 6340 debug(result->name); 6341#endif 6342 } 6343 } 6344 return 0; 6345} 6346 6347/* 6348 * int options(unsigned char *cp) 6349 * 6350 * return values: 6351 * 0: success 6352 * -1: ArgumentError 6353 */ 6354static int 6355options(unsigned char *cp) 6356{ 6357 nkf_char i, j; 6358 unsigned char *p; 6359 unsigned char *cp_back = NULL; 6360 nkf_encoding *enc; 6361 6362 if (option_mode==1) 6363 return 0; 6364 while(*cp && *cp++!='-'); 6365 while (*cp || cp_back) { 6366 if(!*cp){ 6367 cp = cp_back; 6368 cp_back = NULL; 6369 continue; 6370 } 6371 p = 0; 6372 switch (*cp++) { 6373 case '-': /* literal options */ 6374 if (!*cp || *cp == SP) { /* ignore the rest of arguments */ 6375 option_mode = 1; 6376 return 0; 6377 } 6378 for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) { 6379 p = (unsigned char *)long_option[i].name; 6380 for (j=0;*p && *p != '=' && *p == cp[j];p++, j++); 6381 if (*p == cp[j] || cp[j] == SP){ 6382 p = &cp[j] + 1; 6383 break; 6384 } 6385 p = 0; 6386 } 6387 if (p == 0) { 6388#if !defined(PERL_XS) && !defined(WIN32DLL) 6389 fprintf(stderr, "unknown long option: --%s\n", cp); 6390#endif 6391 return -1; 6392 } 6393 while(*cp && *cp != SP && cp++); 6394 if (long_option[i].alias[0]){ 6395 cp_back = cp; 6396 cp = (unsigned char *)long_option[i].alias; 6397 }else{ 6398#ifndef PERL_XS 6399 if (strcmp(long_option[i].name, "help") == 0){ 6400 usage(); 6401 exit(EXIT_SUCCESS); 6402 } 6403#endif 6404 if (strcmp(long_option[i].name, "ic=") == 0){ 6405 enc = nkf_enc_find((char *)p); 6406 if (!enc) continue; 6407 input_encoding = enc; 6408 continue; 6409 } 6410 if (strcmp(long_option[i].name, "oc=") == 0){ 6411 enc = nkf_enc_find((char *)p); 6412 /* if (enc <= 0) continue; */ 6413 if (!enc) continue; 6414 output_encoding = enc; 6415 continue; 6416 } 6417 if (strcmp(long_option[i].name, "guess=") == 0){ 6418 if (p[0] == '0' || p[0] == '1') { 6419 guess_f = 1; 6420 } else { 6421 guess_f = 2; 6422 } 6423 continue; 6424 } 6425#ifdef OVERWRITE 6426 if (strcmp(long_option[i].name, "overwrite") == 0){ 6427 file_out_f = TRUE; 6428 overwrite_f = TRUE; 6429 preserve_time_f = TRUE; 6430 continue; 6431 } 6432 if (strcmp(long_option[i].name, "overwrite=") == 0){ 6433 file_out_f = TRUE; 6434 overwrite_f = TRUE; 6435 preserve_time_f = TRUE; 6436 backup_f = TRUE; 6437 backup_suffix = (char *)p; 6438 continue; 6439 } 6440 if (strcmp(long_option[i].name, "in-place") == 0){ 6441 file_out_f = TRUE; 6442 overwrite_f = TRUE; 6443 preserve_time_f = FALSE; 6444 continue; 6445 } 6446 if (strcmp(long_option[i].name, "in-place=") == 0){ 6447 file_out_f = TRUE; 6448 overwrite_f = TRUE; 6449 preserve_time_f = FALSE; 6450 backup_f = TRUE; 6451 backup_suffix = (char *)p; 6452 continue; 6453 } 6454#endif 6455#ifdef INPUT_OPTION 6456 if (strcmp(long_option[i].name, "cap-input") == 0){ 6457 cap_f = TRUE; 6458 continue; 6459 } 6460 if (strcmp(long_option[i].name, "url-input") == 0){ 6461 url_f = TRUE; 6462 continue; 6463 } 6464#endif 6465#ifdef NUMCHAR_OPTION 6466 if (strcmp(long_option[i].name, "numchar-input") == 0){ 6467 numchar_f = TRUE; 6468 continue; 6469 } 6470#endif 6471#ifdef CHECK_OPTION 6472 if (strcmp(long_option[i].name, "no-output") == 0){ 6473 noout_f = TRUE; 6474 continue; 6475 } 6476 if (strcmp(long_option[i].name, "debug") == 0){ 6477 debug_f = TRUE; 6478 continue; 6479 } 6480#endif 6481 if (strcmp(long_option[i].name, "cp932") == 0){ 6482#ifdef SHIFTJIS_CP932 6483 cp51932_f = TRUE; 6484 cp932inv_f = -TRUE; 6485#endif 6486#ifdef UTF8_OUTPUT_ENABLE 6487 ms_ucs_map_f = UCS_MAP_CP932; 6488#endif 6489 continue; 6490 } 6491 if (strcmp(long_option[i].name, "no-cp932") == 0){ 6492#ifdef SHIFTJIS_CP932 6493 cp51932_f = FALSE; 6494 cp932inv_f = FALSE; 6495#endif 6496#ifdef UTF8_OUTPUT_ENABLE 6497 ms_ucs_map_f = UCS_MAP_ASCII; 6498#endif 6499 continue; 6500 } 6501#ifdef SHIFTJIS_CP932 6502 if (strcmp(long_option[i].name, "cp932inv") == 0){ 6503 cp932inv_f = -TRUE; 6504 continue; 6505 } 6506#endif 6507 6508#ifdef X0212_ENABLE 6509 if (strcmp(long_option[i].name, "x0212") == 0){ 6510 x0212_f = TRUE; 6511 continue; 6512 } 6513#endif 6514 6515#ifdef EXEC_IO 6516 if (strcmp(long_option[i].name, "exec-in") == 0){ 6517 exec_f = 1; 6518 return 0; 6519 } 6520 if (strcmp(long_option[i].name, "exec-out") == 0){ 6521 exec_f = -1; 6522 return 0; 6523 } 6524#endif 6525#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE) 6526 if (strcmp(long_option[i].name, "no-cp932ext") == 0){ 6527 no_cp932ext_f = TRUE; 6528 continue; 6529 } 6530 if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){ 6531 no_best_fit_chars_f = TRUE; 6532 continue; 6533 } 6534 if (strcmp(long_option[i].name, "fb-skip") == 0){ 6535 encode_fallback = NULL; 6536 continue; 6537 } 6538 if (strcmp(long_option[i].name, "fb-html") == 0){ 6539 encode_fallback = encode_fallback_html; 6540 continue; 6541 } 6542 if (strcmp(long_option[i].name, "fb-xml") == 0){ 6543 encode_fallback = encode_fallback_xml; 6544 continue; 6545 } 6546 if (strcmp(long_option[i].name, "fb-java") == 0){ 6547 encode_fallback = encode_fallback_java; 6548 continue; 6549 } 6550 if (strcmp(long_option[i].name, "fb-perl") == 0){ 6551 encode_fallback = encode_fallback_perl; 6552 continue; 6553 } 6554 if (strcmp(long_option[i].name, "fb-subchar") == 0){ 6555 encode_fallback = encode_fallback_subchar; 6556 continue; 6557 } 6558 if (strcmp(long_option[i].name, "fb-subchar=") == 0){ 6559 encode_fallback = encode_fallback_subchar; 6560 unicode_subchar = 0; 6561 if (p[0] != '0'){ 6562 /* decimal number */ 6563 for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){ 6564 unicode_subchar *= 10; 6565 unicode_subchar += hex2bin(p[i]); 6566 } 6567 }else if(p[1] == 'x' || p[1] == 'X'){ 6568 /* hexadecimal number */ 6569 for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){ 6570 unicode_subchar <<= 4; 6571 unicode_subchar |= hex2bin(p[i]); 6572 } 6573 }else{ 6574 /* octal number */ 6575 for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){ 6576 unicode_subchar *= 8; 6577 unicode_subchar += hex2bin(p[i]); 6578 } 6579 } 6580 w16e_conv(unicode_subchar, &i, &j); 6581 unicode_subchar = i<<8 | j; 6582 continue; 6583 } 6584#endif 6585#ifdef UTF8_OUTPUT_ENABLE 6586 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){ 6587 ms_ucs_map_f = UCS_MAP_MS; 6588 continue; 6589 } 6590#endif 6591#ifdef UNICODE_NORMALIZATION 6592 if (strcmp(long_option[i].name, "utf8mac-input") == 0){ 6593 nfc_f = TRUE; 6594 continue; 6595 } 6596#endif 6597 if (strcmp(long_option[i].name, "prefix=") == 0){ 6598 if (nkf_isgraph(p[0])){ 6599 for (i = 1; nkf_isgraph(p[i]); i++){ 6600 prefix_table[p[i]] = p[0]; 6601 } 6602 } 6603 continue; 6604 } 6605#if !defined(PERL_XS) && !defined(WIN32DLL) 6606 fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name); 6607#endif 6608 return -1; 6609 } 6610 continue; 6611 case 'b': /* buffered mode */ 6612 unbuf_f = FALSE; 6613 continue; 6614 case 'u': /* non bufferd mode */ 6615 unbuf_f = TRUE; 6616 continue; 6617 case 't': /* transparent mode */ 6618 if (*cp=='1') { 6619 /* alias of -t */ 6620 cp++; 6621 nop_f = TRUE; 6622 } else if (*cp=='2') { 6623 /* 6624 * -t with put/get 6625 * 6626 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin 6627 * 6628 */ 6629 cp++; 6630 nop_f = 2; 6631 } else 6632 nop_f = TRUE; 6633 continue; 6634 case 'j': /* JIS output */ 6635 case 'n': 6636 output_encoding = nkf_enc_from_index(ISO_2022_JP); 6637 continue; 6638 case 'e': /* AT&T EUC output */ 6639 output_encoding = nkf_enc_from_index(EUCJP_NKF); 6640 continue; 6641 case 's': /* SJIS output */ 6642 output_encoding = nkf_enc_from_index(SHIFT_JIS); 6643 continue; 6644 case 'l': /* ISO8859 Latin-1 support, no conversion */ 6645 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ 6646 input_encoding = nkf_enc_from_index(ISO_8859_1); 6647 continue; 6648 case 'i': /* Kanji IN ESC-$-@/B */ 6649 if (*cp=='@'||*cp=='B') 6650 kanji_intro = *cp++; 6651 continue; 6652 case 'o': /* ASCII IN ESC-(-J/B/H */ 6653 /* ESC ( H was used in initial JUNET messages */ 6654 if (*cp=='J'||*cp=='B'||*cp=='H') 6655 ascii_intro = *cp++; 6656 continue; 6657 case 'h': 6658 /* 6659 bit:1 katakana->hiragana 6660 bit:2 hiragana->katakana 6661 */ 6662 if ('9'>= *cp && *cp>='0') 6663 hira_f |= (*cp++ -'0'); 6664 else 6665 hira_f |= 1; 6666 continue; 6667 case 'r': 6668 rot_f = TRUE; 6669 continue; 6670#if defined(MSDOS) || defined(__OS2__) 6671 case 'T': 6672 binmode_f = FALSE; 6673 continue; 6674#endif 6675#ifndef PERL_XS 6676 case 'V': 6677 show_configuration(); 6678 exit(EXIT_SUCCESS); 6679 break; 6680 case 'v': 6681 version(); 6682 exit(EXIT_SUCCESS); 6683 break; 6684#endif 6685#ifdef UTF8_OUTPUT_ENABLE 6686 case 'w': /* UTF-{8,16,32} output */ 6687 if (cp[0] == '8') { 6688 cp++; 6689 if (cp[0] == '0'){ 6690 cp++; 6691 output_encoding = nkf_enc_from_index(UTF_8N); 6692 } else { 6693 output_bom_f = TRUE; 6694 output_encoding = nkf_enc_from_index(UTF_8_BOM); 6695 } 6696 } else { 6697 int enc_idx; 6698 if ('1'== cp[0] && '6'==cp[1]) { 6699 cp += 2; 6700 enc_idx = UTF_16; 6701 } else if ('3'== cp[0] && '2'==cp[1]) { 6702 cp += 2; 6703 enc_idx = UTF_32; 6704 } else { 6705 output_encoding = nkf_enc_from_index(UTF_8); 6706 continue; 6707 } 6708 if (cp[0]=='L') { 6709 cp++; 6710 output_endian = ENDIAN_LITTLE; 6711 output_bom_f = TRUE; 6712 } else if (cp[0] == 'B') { 6713 cp++; 6714 output_bom_f = TRUE; 6715 } 6716 if (cp[0] == '0'){ 6717 output_bom_f = FALSE; 6718 cp++; 6719 enc_idx = enc_idx == UTF_16 6720 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE) 6721 : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE); 6722 } else { 6723 enc_idx = enc_idx == UTF_16 6724 ? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM) 6725 : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM); 6726 } 6727 output_encoding = nkf_enc_from_index(enc_idx); 6728 } 6729 continue; 6730#endif 6731#ifdef UTF8_INPUT_ENABLE 6732 case 'W': /* UTF input */ 6733 if (cp[0] == '8') { 6734 cp++; 6735 input_encoding = nkf_enc_from_index(UTF_8); 6736 }else{ 6737 int enc_idx; 6738 if ('1'== cp[0] && '6'==cp[1]) { 6739 cp += 2; 6740 input_endian = ENDIAN_BIG; 6741 enc_idx = UTF_16; 6742 } else if ('3'== cp[0] && '2'==cp[1]) { 6743 cp += 2; 6744 input_endian = ENDIAN_BIG; 6745 enc_idx = UTF_32; 6746 } else { 6747 input_encoding = nkf_enc_from_index(UTF_8); 6748 continue; 6749 } 6750 if (cp[0]=='L') { 6751 cp++; 6752 input_endian = ENDIAN_LITTLE; 6753 } else if (cp[0] == 'B') { 6754 cp++; 6755 input_endian = ENDIAN_BIG; 6756 } 6757 enc_idx = (enc_idx == UTF_16 6758 ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE) 6759 : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE)); 6760 input_encoding = nkf_enc_from_index(enc_idx); 6761 } 6762 continue; 6763#endif 6764 /* Input code assumption */ 6765 case 'J': /* ISO-2022-JP input */ 6766 input_encoding = nkf_enc_from_index(ISO_2022_JP); 6767 continue; 6768 case 'E': /* EUC-JP input */ 6769 input_encoding = nkf_enc_from_index(EUCJP_NKF); 6770 continue; 6771 case 'S': /* Shift_JIS input */ 6772 input_encoding = nkf_enc_from_index(SHIFT_JIS); 6773 continue; 6774 case 'Z': /* Convert X0208 alphabet to asii */ 6775 /* alpha_f 6776 bit:0 Convert JIS X 0208 Alphabet to ASCII 6777 bit:1 Convert Kankaku to one space 6778 bit:2 Convert Kankaku to two spaces 6779 bit:3 Convert HTML Entity 6780 bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana 6781 */ 6782 while ('0'<= *cp && *cp <='4') { 6783 alpha_f |= 1 << (*cp++ - '0'); 6784 } 6785 alpha_f |= 1; 6786 continue; 6787 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */ 6788 x0201_f = FALSE; /* No X0201->X0208 conversion */ 6789 /* accept X0201 6790 ESC-(-I in JIS, EUC, MS Kanji 6791 SI/SO in JIS, EUC, MS Kanji 6792 SS2 in EUC, JIS, not in MS Kanji 6793 MS Kanji (0xa0-0xdf) 6794 output X0201 6795 ESC-(-I in JIS (0x20-0x5f) 6796 SS2 in EUC (0xa0-0xdf) 6797 0xa0-0xd in MS Kanji (0xa0-0xdf) 6798 */ 6799 continue; 6800 case 'X': /* Convert X0201 kana to X0208 */ 6801 x0201_f = TRUE; 6802 continue; 6803 case 'F': /* prserve new lines */ 6804 fold_preserve_f = TRUE; 6805 case 'f': /* folding -f60 or -f */ 6806 fold_f = TRUE; 6807 fold_len = 0; 6808 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */ 6809 fold_len *= 10; 6810 fold_len += *cp++ - '0'; 6811 } 6812 if (!(0<fold_len && fold_len<BUFSIZ)) 6813 fold_len = DEFAULT_FOLD; 6814 if (*cp=='-') { 6815 fold_margin = 0; 6816 cp++; 6817 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */ 6818 fold_margin *= 10; 6819 fold_margin += *cp++ - '0'; 6820 } 6821 } 6822 continue; 6823 case 'm': /* MIME support */ 6824 /* mime_decode_f = TRUE; */ /* this has too large side effects... */ 6825 if (*cp=='B'||*cp=='Q') { 6826 mime_decode_mode = *cp++; 6827 mimebuf_f = FIXED_MIME; 6828 } else if (*cp=='N') { 6829 mime_f = TRUE; cp++; 6830 } else if (*cp=='S') { 6831 mime_f = STRICT_MIME; cp++; 6832 } else if (*cp=='0') { 6833 mime_decode_f = FALSE; 6834 mime_f = FALSE; cp++; 6835 } else { 6836 mime_f = STRICT_MIME; 6837 } 6838 continue; 6839 case 'M': /* MIME output */ 6840 if (*cp=='B') { 6841 mimeout_mode = 'B'; 6842 mimeout_f = FIXED_MIME; cp++; 6843 } else if (*cp=='Q') { 6844 mimeout_mode = 'Q'; 6845 mimeout_f = FIXED_MIME; cp++; 6846 } else { 6847 mimeout_f = TRUE; 6848 } 6849 continue; 6850 case 'B': /* Broken JIS support */ 6851 /* bit:0 no ESC JIS 6852 bit:1 allow any x on ESC-(-x or ESC-$-x 6853 bit:2 reset to ascii on NL 6854 */ 6855 if ('9'>= *cp && *cp>='0') 6856 broken_f |= 1<<(*cp++ -'0'); 6857 else 6858 broken_f |= TRUE; 6859 continue; 6860#ifndef PERL_XS 6861 case 'O':/* for Output file */ 6862 file_out_f = TRUE; 6863 continue; 6864#endif 6865 case 'c':/* add cr code */ 6866 eolmode_f = CRLF; 6867 continue; 6868 case 'd':/* delete cr code */ 6869 eolmode_f = LF; 6870 continue; 6871 case 'I': /* ISO-2022-JP output */ 6872 iso2022jp_f = TRUE; 6873 continue; 6874 case 'L': /* line mode */ 6875 if (*cp=='u') { /* unix */ 6876 eolmode_f = LF; cp++; 6877 } else if (*cp=='m') { /* mac */ 6878 eolmode_f = CR; cp++; 6879 } else if (*cp=='w') { /* windows */ 6880 eolmode_f = CRLF; cp++; 6881 } else if (*cp=='0') { /* no conversion */ 6882 eolmode_f = 0; cp++; 6883 } 6884 continue; 6885#ifndef PERL_XS 6886 case 'g': 6887 if ('2' <= *cp && *cp <= '9') { 6888 guess_f = 2; 6889 cp++; 6890 } else if (*cp == '0' || *cp == '1') { 6891 guess_f = 1; 6892 cp++; 6893 } else { 6894 guess_f = 1; 6895 } 6896 continue; 6897#endif 6898 case SP: 6899 /* module muliple options in a string are allowed for Perl moudle */ 6900 while(*cp && *cp++!='-'); 6901 continue; 6902 default: 6903#if !defined(PERL_XS) && !defined(WIN32DLL) 6904 fprintf(stderr, "unknown option: -%c\n", *(cp-1)); 6905#endif 6906 /* bogus option but ignored */ 6907 return -1; 6908 } 6909 } 6910 return 0; 6911} 6912 6913#ifdef WIN32DLL 6914#include "nkf32dll.c" 6915#elif defined(PERL_XS) 6916#else /* WIN32DLL */ 6917int 6918main(int argc, char **argv) 6919{ 6920 FILE *fin; 6921 unsigned char *cp; 6922 6923 char *outfname = NULL; 6924 char *origfname; 6925 6926#ifdef EASYWIN /*Easy Win */ 6927 _BufferSize.y = 400;/*Set Scroll Buffer Size*/ 6928#endif 6929#ifdef DEFAULT_CODE_LOCALE 6930 setlocale(LC_CTYPE, ""); 6931#endif 6932 nkf_state_init(); 6933 6934 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) { 6935 cp = (unsigned char *)*argv; 6936 options(cp); 6937#ifdef EXEC_IO 6938 if (exec_f){ 6939 int fds[2], pid; 6940 if (pipe(fds) < 0 || (pid = fork()) < 0){ 6941 abort(); 6942 } 6943 if (pid == 0){ 6944 if (exec_f > 0){ 6945 close(fds[0]); 6946 dup2(fds[1], 1); 6947 }else{ 6948 close(fds[1]); 6949 dup2(fds[0], 0); 6950 } 6951 execvp(argv[1], &argv[1]); 6952 } 6953 if (exec_f > 0){ 6954 close(fds[1]); 6955 dup2(fds[0], 0); 6956 }else{ 6957 close(fds[0]); 6958 dup2(fds[1], 1); 6959 } 6960 argc = 0; 6961 break; 6962 } 6963#endif 6964 } 6965 6966 if (guess_f) { 6967#ifdef CHECK_OPTION 6968 int debug_f_back = debug_f; 6969#endif 6970#ifdef EXEC_IO 6971 int exec_f_back = exec_f; 6972#endif 6973#ifdef X0212_ENABLE 6974 int x0212_f_back = x0212_f; 6975#endif 6976 int x0213_f_back = x0213_f; 6977 int guess_f_back = guess_f; 6978 reinit(); 6979 guess_f = guess_f_back; 6980 mime_f = FALSE; 6981#ifdef CHECK_OPTION 6982 debug_f = debug_f_back; 6983#endif 6984#ifdef EXEC_IO 6985 exec_f = exec_f_back; 6986#endif 6987 x0212_f = x0212_f_back; 6988 x0213_f = x0213_f_back; 6989 } 6990 6991 if (binmode_f == TRUE) 6992#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 6993 if (freopen("","wb",stdout) == NULL) 6994 return (-1); 6995#else 6996 setbinmode(stdout); 6997#endif 6998 6999 if (unbuf_f) 7000 setbuf(stdout, (char *) NULL); 7001 else 7002 setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE); 7003 7004 if (argc == 0) { 7005 if (binmode_f == TRUE) 7006#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7007 if (freopen("","rb",stdin) == NULL) return (-1); 7008#else 7009 setbinmode(stdin); 7010#endif 7011 setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE); 7012 if (nop_f) 7013 noconvert(stdin); 7014 else { 7015 kanji_convert(stdin); 7016 if (guess_f) print_guessed_code(NULL); 7017 } 7018 } else { 7019 int nfiles = argc; 7020 int is_argument_error = FALSE; 7021 while (argc--) { 7022 input_codename = NULL; 7023 input_eol = 0; 7024#ifdef CHECK_OPTION 7025 iconv_for_check = 0; 7026#endif 7027 if ((fin = fopen((origfname = *argv++), "r")) == NULL) { 7028 perror(*(argv-1)); 7029 is_argument_error = TRUE; 7030 continue; 7031 } else { 7032#ifdef OVERWRITE 7033 int fd = 0; 7034 int fd_backup = 0; 7035#endif 7036 7037 /* reopen file for stdout */ 7038 if (file_out_f == TRUE) { 7039#ifdef OVERWRITE 7040 if (overwrite_f){ 7041 outfname = nkf_xmalloc(strlen(origfname) 7042 + strlen(".nkftmpXXXXXX") 7043 + 1); 7044 strcpy(outfname, origfname); 7045#ifdef MSDOS 7046 { 7047 int i; 7048 for (i = strlen(outfname); i; --i){ 7049 if (outfname[i - 1] == '/' 7050 || outfname[i - 1] == '\\'){ 7051 break; 7052 } 7053 } 7054 outfname[i] = '\0'; 7055 } 7056 strcat(outfname, "ntXXXXXX"); 7057 mktemp(outfname); 7058 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 7059 S_IREAD | S_IWRITE); 7060#else 7061 strcat(outfname, ".nkftmpXXXXXX"); 7062 fd = mkstemp(outfname); 7063#endif 7064 if (fd < 0 7065 || (fd_backup = dup(fileno(stdout))) < 0 7066 || dup2(fd, fileno(stdout)) < 0 7067 ){ 7068 perror(origfname); 7069 return -1; 7070 } 7071 }else 7072#endif 7073 if(argc == 1) { 7074 outfname = *argv++; 7075 argc--; 7076 } else { 7077 outfname = "nkf.out"; 7078 } 7079 7080 if(freopen(outfname, "w", stdout) == NULL) { 7081 perror (outfname); 7082 return (-1); 7083 } 7084 if (binmode_f == TRUE) { 7085#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7086 if (freopen("","wb",stdout) == NULL) 7087 return (-1); 7088#else 7089 setbinmode(stdout); 7090#endif 7091 } 7092 } 7093 if (binmode_f == TRUE) 7094#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__)) 7095 if (freopen("","rb",fin) == NULL) 7096 return (-1); 7097#else 7098 setbinmode(fin); 7099#endif 7100 setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE); 7101 if (nop_f) 7102 noconvert(fin); 7103 else { 7104 char *filename = NULL; 7105 kanji_convert(fin); 7106 if (nfiles > 1) filename = origfname; 7107 if (guess_f) print_guessed_code(filename); 7108 } 7109 fclose(fin); 7110#ifdef OVERWRITE 7111 if (overwrite_f) { 7112 struct stat sb; 7113#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__) 7114 time_t tb[2]; 7115#else 7116 struct utimbuf tb; 7117#endif 7118 7119 fflush(stdout); 7120 close(fd); 7121 if (dup2(fd_backup, fileno(stdout)) < 0){ 7122 perror("dup2"); 7123 } 7124 if (stat(origfname, &sb)) { 7125 fprintf(stderr, "Can't stat %s\n", origfname); 7126 } 7127 /* $B%Q!<%_%C%7%g%s$rI|85(B */ 7128 if (chmod(outfname, sb.st_mode)) { 7129 fprintf(stderr, "Can't set permission %s\n", outfname); 7130 } 7131 7132 /* $B%?%$%`%9%?%s%W$rI|85(B */ 7133 if(preserve_time_f){ 7134#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__) 7135 tb[0] = tb[1] = sb.st_mtime; 7136 if (utime(outfname, tb)) { 7137 fprintf(stderr, "Can't set timestamp %s\n", outfname); 7138 } 7139#else 7140 tb.actime = sb.st_atime; 7141 tb.modtime = sb.st_mtime; 7142 if (utime(outfname, &tb)) { 7143 fprintf(stderr, "Can't set timestamp %s\n", outfname); 7144 } 7145#endif 7146 } 7147 if(backup_f){ 7148 char *backup_filename = get_backup_filename(backup_suffix, origfname); 7149#ifdef MSDOS 7150 unlink(backup_filename); 7151#endif 7152 if (rename(origfname, backup_filename)) { 7153 perror(backup_filename); 7154 fprintf(stderr, "Can't rename %s to %s\n", 7155 origfname, backup_filename); 7156 } 7157 nkf_xfree(backup_filename); 7158 }else{ 7159#ifdef MSDOS 7160 if (unlink(origfname)){ 7161 perror(origfname); 7162 } 7163#endif 7164 } 7165 if (rename(outfname, origfname)) { 7166 perror(origfname); 7167 fprintf(stderr, "Can't rename %s to %s\n", 7168 outfname, origfname); 7169 } 7170 nkf_xfree(outfname); 7171 } 7172#endif 7173 } 7174 } 7175 if (is_argument_error) 7176 return(-1); 7177 } 7178#ifdef EASYWIN /*Easy Win */ 7179 if (file_out_f == FALSE) 7180 scanf("%d",&end_check); 7181 else 7182 fclose(stdout); 7183#else /* for Other OS */ 7184 if (file_out_f == TRUE) 7185 fclose(stdout); 7186#endif /*Easy Win */ 7187 return (0); 7188} 7189#endif /* WIN32DLL */ 7190