1/*
2 * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3 * Copyright (c) 1996-2010, The nkf Project.
4 *
5 * This software is provided 'as-is', without any express or implied
6 * warranty. In no event will the authors be held liable for any damages
7 * arising from the use of this software.
8 *
9 * Permission is granted to anyone to use this software for any purpose,
10 * including commercial applications, and to alter it and redistribute it
11 * freely, subject to the following restrictions:
12 *
13 * 1. The origin of this software must not be misrepresented; you must not
14 * claim that you wrote the original software. If you use this software
15 * in a product, an acknowledgment in the product documentation would be
16 * appreciated but is not required.
17 *
18 * 2. Altered source versions must be plainly marked as such, and must not be
19 * misrepresented as being the original software.
20 *
21 * 3. This notice may not be removed or altered from any source distribution.
22 */
23#define NKF_VERSION "2.1.3"
24#define NKF_RELEASE_DATE "2012-11-22"
25#define COPY_RIGHT \
26    "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27    "Copyright (C) 1996-2012, The nkf Project."
28
29#include "config.h"
30#include "nkf.h"
31#include "utf8tbl.h"
32#ifdef __WIN32__
33#include <windows.h>
34#include <locale.h>
35#endif
36#if defined(__OS2__)
37# define INCL_DOS
38# define INCL_DOSERRORS
39# include <os2.h>
40#endif
41#include <assert.h>
42
43
44/* state of output_mode and input_mode
45
46   c2           0 means ASCII
47   JIS_X_0201_1976_K
48   ISO_8859_1
49   JIS_X_0208
50   EOF      all termination
51   c1           32bit data
52
53 */
54
55/* MIME ENCODE */
56
57#define         FIXED_MIME      7
58#define         STRICT_MIME     8
59
60/* byte order */
61enum byte_order {
62    ENDIAN_BIG    = 1,
63    ENDIAN_LITTLE = 2,
64    ENDIAN_2143   = 3,
65    ENDIAN_3412   = 4
66};
67
68/* ASCII CODE */
69
70#define         BS      0x08
71#define         TAB     0x09
72#define         LF      0x0a
73#define         CR      0x0d
74#define         ESC     0x1b
75#define         SP      0x20
76#define         DEL     0x7f
77#define         SI      0x0f
78#define         SO      0x0e
79#define         SS2     0x8e
80#define         SS3     0x8f
81#define         CRLF    0x0D0A
82
83
84/* encodings */
85
86enum nkf_encodings {
87    ASCII,
88    ISO_8859_1,
89    ISO_2022_JP,
90    CP50220,
91    CP50221,
92    CP50222,
93    ISO_2022_JP_1,
94    ISO_2022_JP_3,
95    ISO_2022_JP_2004,
96    SHIFT_JIS,
97    WINDOWS_31J,
98    CP10001,
99    EUC_JP,
100    EUCJP_NKF,
101    CP51932,
102    EUCJP_MS,
103    EUCJP_ASCII,
104    SHIFT_JISX0213,
105    SHIFT_JIS_2004,
106    EUC_JISX0213,
107    EUC_JIS_2004,
108    UTF_8,
109    UTF_8N,
110    UTF_8_BOM,
111    UTF8_MAC,
112    UTF_16,
113    UTF_16BE,
114    UTF_16BE_BOM,
115    UTF_16LE,
116    UTF_16LE_BOM,
117    UTF_32,
118    UTF_32BE,
119    UTF_32BE_BOM,
120    UTF_32LE,
121    UTF_32LE_BOM,
122    BINARY,
123    NKF_ENCODING_TABLE_SIZE,
124    JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125    /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126    /* JIS_X_0208_1978   = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127    /* JIS_X_0208_1983   = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128    JIS_X_0208        = 0x1168, /* @B */
129    JIS_X_0212        = 0x1159, /* D */
130    /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131    JIS_X_0213_2 = 0x1229, /* P */
132    JIS_X_0213_1 = 0x1233 /* Q */
133};
134
135static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140static void j_oconv(nkf_char c2, nkf_char c1);
141static void s_oconv(nkf_char c2, nkf_char c1);
142static void e_oconv(nkf_char c2, nkf_char c1);
143static void w_oconv(nkf_char c2, nkf_char c1);
144static void w_oconv16(nkf_char c2, nkf_char c1);
145static void w_oconv32(nkf_char c2, nkf_char c1);
146
147typedef struct {
148    const char *name;
149    nkf_char (*iconv)(nkf_char c2, nkf_char c1, nkf_char c0);
150    void (*oconv)(nkf_char c2, nkf_char c1);
151} nkf_native_encoding;
152
153nkf_native_encoding NkfEncodingASCII =		{ "ASCII", e_iconv, e_oconv };
154nkf_native_encoding NkfEncodingISO_2022_JP =	{ "ISO-2022-JP", e_iconv, j_oconv };
155nkf_native_encoding NkfEncodingShift_JIS =	{ "Shift_JIS", s_iconv, s_oconv };
156nkf_native_encoding NkfEncodingEUC_JP =		{ "EUC-JP", e_iconv, e_oconv };
157nkf_native_encoding NkfEncodingUTF_8 =		{ "UTF-8", w_iconv, w_oconv };
158nkf_native_encoding NkfEncodingUTF_16 =		{ "UTF-16", w_iconv16, w_oconv16 };
159nkf_native_encoding NkfEncodingUTF_32 =		{ "UTF-32", w_iconv32, w_oconv32 };
160
161typedef struct {
162    const int id;
163    const char *name;
164    const nkf_native_encoding *base_encoding;
165} nkf_encoding;
166
167nkf_encoding nkf_encoding_table[] = {
168    {ASCII,		"US-ASCII",		&NkfEncodingASCII},
169    {ISO_8859_1,	"ISO-8859-1",		&NkfEncodingASCII},
170    {ISO_2022_JP,	"ISO-2022-JP",		&NkfEncodingISO_2022_JP},
171    {CP50220,		"CP50220",		&NkfEncodingISO_2022_JP},
172    {CP50221,		"CP50221",		&NkfEncodingISO_2022_JP},
173    {CP50222,		"CP50222",		&NkfEncodingISO_2022_JP},
174    {ISO_2022_JP_1,	"ISO-2022-JP-1",	&NkfEncodingISO_2022_JP},
175    {ISO_2022_JP_3,	"ISO-2022-JP-3",	&NkfEncodingISO_2022_JP},
176    {ISO_2022_JP_2004,	"ISO-2022-JP-2004",	&NkfEncodingISO_2022_JP},
177    {SHIFT_JIS,		"Shift_JIS",		&NkfEncodingShift_JIS},
178    {WINDOWS_31J,	"Windows-31J",		&NkfEncodingShift_JIS},
179    {CP10001,		"CP10001",		&NkfEncodingShift_JIS},
180    {EUC_JP,		"EUC-JP",		&NkfEncodingEUC_JP},
181    {EUCJP_NKF,		"eucJP-nkf",		&NkfEncodingEUC_JP},
182    {CP51932,		"CP51932",		&NkfEncodingEUC_JP},
183    {EUCJP_MS,		"eucJP-MS",		&NkfEncodingEUC_JP},
184    {EUCJP_ASCII,	"eucJP-ASCII",		&NkfEncodingEUC_JP},
185    {SHIFT_JISX0213,	"Shift_JISX0213",	&NkfEncodingShift_JIS},
186    {SHIFT_JIS_2004,	"Shift_JIS-2004",	&NkfEncodingShift_JIS},
187    {EUC_JISX0213,	"EUC-JISX0213",		&NkfEncodingEUC_JP},
188    {EUC_JIS_2004,	"EUC-JIS-2004",		&NkfEncodingEUC_JP},
189    {UTF_8,		"UTF-8",		&NkfEncodingUTF_8},
190    {UTF_8N,		"UTF-8N",		&NkfEncodingUTF_8},
191    {UTF_8_BOM,		"UTF-8-BOM",		&NkfEncodingUTF_8},
192    {UTF8_MAC,		"UTF8-MAC",		&NkfEncodingUTF_8},
193    {UTF_16,		"UTF-16",		&NkfEncodingUTF_16},
194    {UTF_16BE,		"UTF-16BE",		&NkfEncodingUTF_16},
195    {UTF_16BE_BOM,	"UTF-16BE-BOM",		&NkfEncodingUTF_16},
196    {UTF_16LE,		"UTF-16LE",		&NkfEncodingUTF_16},
197    {UTF_16LE_BOM,	"UTF-16LE-BOM",		&NkfEncodingUTF_16},
198    {UTF_32,		"UTF-32",		&NkfEncodingUTF_32},
199    {UTF_32BE,		"UTF-32BE",		&NkfEncodingUTF_32},
200    {UTF_32BE_BOM,	"UTF-32BE-BOM",		&NkfEncodingUTF_32},
201    {UTF_32LE,		"UTF-32LE",		&NkfEncodingUTF_32},
202    {UTF_32LE_BOM,	"UTF-32LE-BOM",		&NkfEncodingUTF_32},
203    {BINARY,		"BINARY",		&NkfEncodingASCII},
204    {-1,		NULL,			NULL}
205};
206
207struct {
208    const char *name;
209    const int id;
210} encoding_name_to_id_table[] = {
211    {"US-ASCII",		ASCII},
212    {"ASCII",			ASCII},
213    {"646",			ASCII},
214    {"ROMAN8",			ASCII},
215    {"ISO-2022-JP",		ISO_2022_JP},
216    {"ISO2022JP-CP932",		CP50220},
217    {"CP50220",			CP50220},
218    {"CP50221",			CP50221},
219    {"CSISO2022JP",		CP50221},
220    {"CP50222",			CP50222},
221    {"ISO-2022-JP-1",		ISO_2022_JP_1},
222    {"ISO-2022-JP-3",		ISO_2022_JP_3},
223    {"ISO-2022-JP-2004",	ISO_2022_JP_2004},
224    {"SHIFT_JIS",		SHIFT_JIS},
225    {"SJIS",			SHIFT_JIS},
226    {"MS_Kanji",		SHIFT_JIS},
227    {"PCK",			SHIFT_JIS},
228    {"WINDOWS-31J",		WINDOWS_31J},
229    {"CSWINDOWS31J",		WINDOWS_31J},
230    {"CP932",			WINDOWS_31J},
231    {"MS932",			WINDOWS_31J},
232    {"CP10001",			CP10001},
233    {"EUCJP",			EUC_JP},
234    {"EUC-JP",			EUC_JP},
235    {"EUCJP-NKF",		EUCJP_NKF},
236    {"CP51932",			CP51932},
237    {"EUC-JP-MS",		EUCJP_MS},
238    {"EUCJP-MS",		EUCJP_MS},
239    {"EUCJPMS",			EUCJP_MS},
240    {"EUC-JP-ASCII",		EUCJP_ASCII},
241    {"EUCJP-ASCII",		EUCJP_ASCII},
242    {"SHIFT_JISX0213",		SHIFT_JISX0213},
243    {"SHIFT_JIS-2004",		SHIFT_JIS_2004},
244    {"EUC-JISX0213",		EUC_JISX0213},
245    {"EUC-JIS-2004",		EUC_JIS_2004},
246    {"UTF-8",			UTF_8},
247    {"UTF-8N",			UTF_8N},
248    {"UTF-8-BOM",		UTF_8_BOM},
249    {"UTF8-MAC",		UTF8_MAC},
250    {"UTF-8-MAC",		UTF8_MAC},
251    {"UTF-16",			UTF_16},
252    {"UTF-16BE",		UTF_16BE},
253    {"UTF-16BE-BOM",		UTF_16BE_BOM},
254    {"UTF-16LE",		UTF_16LE},
255    {"UTF-16LE-BOM",		UTF_16LE_BOM},
256    {"UTF-32",			UTF_32},
257    {"UTF-32BE",		UTF_32BE},
258    {"UTF-32BE-BOM",		UTF_32BE_BOM},
259    {"UTF-32LE",		UTF_32LE},
260    {"UTF-32LE-BOM",		UTF_32LE_BOM},
261    {"BINARY",			BINARY},
262    {NULL,			-1}
263};
264
265#if defined(DEFAULT_CODE_JIS)
266#define	    DEFAULT_ENCIDX ISO_2022_JP
267#elif defined(DEFAULT_CODE_SJIS)
268#define	    DEFAULT_ENCIDX SHIFT_JIS
269#elif defined(DEFAULT_CODE_WINDOWS_31J)
270#define	    DEFAULT_ENCIDX WINDOWS_31J
271#elif defined(DEFAULT_CODE_EUC)
272#define	    DEFAULT_ENCIDX EUC_JP
273#elif defined(DEFAULT_CODE_UTF8)
274#define	    DEFAULT_ENCIDX UTF_8
275#endif
276
277
278#define		is_alnum(c)  \
279    (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280
281/* I don't trust portablity of toupper */
282#define nkf_toupper(c)  (('a'<=c && c<='z')?(c-('a'-'A')):c)
283#define nkf_isoctal(c)  ('0'<=c && c<='7')
284#define nkf_isdigit(c)  ('0'<=c && c<='9')
285#define nkf_isxdigit(c)  (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286#define nkf_isblank(c) (c == SP || c == TAB)
287#define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288#define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289#define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290#define nkf_isprint(c) (SP<=c && c<='~')
291#define nkf_isgraph(c) ('!'<=c && c<='~')
292#define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293		    ('A'<=c&&c<='F') ? (c-'A'+10) : \
294		    ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295#define bin2hex(c) ("0123456789ABCDEF"[c&15])
296#define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297#define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298			      ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299			       && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300
301#define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302#define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303
304#define         HOLD_SIZE       1024
305#if defined(INT_IS_SHORT)
306#define         IOBUF_SIZE      2048
307#else
308#define         IOBUF_SIZE      16384
309#endif
310
311#define         DEFAULT_J       'B'
312#define         DEFAULT_R       'B'
313
314
315#define         GETA1   0x22
316#define         GETA2   0x2e
317
318
319/* MIME preprocessor */
320
321#ifdef EASYWIN /*Easy Win */
322extern POINT _BufferSize;
323#endif
324
325struct input_code{
326    const char *name;
327    nkf_char stat;
328    nkf_char score;
329    nkf_char index;
330    nkf_char buf[3];
331    void (*status_func)(struct input_code *, nkf_char);
332    nkf_char (*iconv_func)(nkf_char c2, nkf_char c1, nkf_char c0);
333    int _file_stat;
334};
335
336static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
337static nkf_encoding *input_encoding = NULL;
338static nkf_encoding *output_encoding = NULL;
339
340#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341/* UCS Mapping
342 * 0: Shift_JIS, eucJP-ascii
343 * 1: eucJP-ms
344 * 2: CP932, CP51932
345 * 3: CP10001
346 */
347#define UCS_MAP_ASCII   0
348#define UCS_MAP_MS      1
349#define UCS_MAP_CP932   2
350#define UCS_MAP_CP10001 3
351static int ms_ucs_map_f = UCS_MAP_ASCII;
352#endif
353#ifdef UTF8_INPUT_ENABLE
354/* no NEC special, NEC-selected IBM extended and IBM extended characters */
355static  int     no_cp932ext_f = FALSE;
356/* ignore ZERO WIDTH NO-BREAK SPACE */
357static  int     no_best_fit_chars_f = FALSE;
358static  int     input_endian = ENDIAN_BIG;
359static  int     input_bom_f = FALSE;
360static  nkf_char     unicode_subchar = '?'; /* the regular substitution character */
361static  void    (*encode_fallback)(nkf_char c) = NULL;
362static  void    w_status(struct input_code *, nkf_char);
363#endif
364#ifdef UTF8_OUTPUT_ENABLE
365static  int     output_bom_f = FALSE;
366static  int     output_endian = ENDIAN_BIG;
367#endif
368
369static  void    std_putc(nkf_char c);
370static  nkf_char     std_getc(FILE *f);
371static  nkf_char     std_ungetc(nkf_char c,FILE *f);
372
373static  nkf_char     broken_getc(FILE *f);
374static  nkf_char     broken_ungetc(nkf_char c,FILE *f);
375
376static  nkf_char     mime_getc(FILE *f);
377
378static void mime_putc(nkf_char c);
379
380/* buffers */
381
382#if !defined(PERL_XS) && !defined(WIN32DLL)
383static unsigned char   stdibuf[IOBUF_SIZE];
384static unsigned char   stdobuf[IOBUF_SIZE];
385#endif
386
387#define NKF_UNSPECIFIED (-TRUE)
388
389/* flags */
390static int             unbuf_f = FALSE;
391static int             estab_f = FALSE;
392static int             nop_f = FALSE;
393static int             binmode_f = TRUE;       /* binary mode */
394static int             rot_f = FALSE;          /* rot14/43 mode */
395static int             hira_f = FALSE;          /* hira/kata henkan */
396static int             alpha_f = FALSE;        /* convert JIx0208 alphbet to ASCII */
397static int             mime_f = MIME_DECODE_DEFAULT;   /* convert MIME B base64 or Q */
398static int             mime_decode_f = FALSE;  /* mime decode is explicitly on */
399static int             mimebuf_f = FALSE;      /* MIME buffered input */
400static int             broken_f = FALSE;       /* convert ESC-less broken JIS */
401static int             iso8859_f = FALSE;      /* ISO8859 through */
402static int             mimeout_f = FALSE;       /* base64 mode */
403static int             x0201_f = NKF_UNSPECIFIED;   /* convert JIS X 0201 */
404static int             iso2022jp_f = FALSE;    /* replace non ISO-2022-JP with GETA */
405
406#ifdef UNICODE_NORMALIZATION
407static int nfc_f = FALSE;
408static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409static nkf_char (*i_nfc_ungetc)(nkf_char c ,FILE *f) = std_ungetc;
410#endif
411
412#ifdef INPUT_OPTION
413static int cap_f = FALSE;
414static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415static nkf_char (*i_cungetc)(nkf_char c ,FILE *f) = std_ungetc;
416
417static int url_f = FALSE;
418static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419static nkf_char (*i_uungetc)(nkf_char c ,FILE *f) = std_ungetc;
420#endif
421
422#define PREFIX_EUCG3    NKF_INT32_C(0x8F00)
423#define CLASS_MASK      NKF_INT32_C(0xFF000000)
424#define CLASS_UNICODE   NKF_INT32_C(0x01000000)
425#define VALUE_MASK      NKF_INT32_C(0x00FFFFFF)
426#define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
427#define UNICODE_MAX     NKF_INT32_C(0x0010FFFF)
428#define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
429#define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
430#define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
431#define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
432#define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
433
434#define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
435
436#ifdef NUMCHAR_OPTION
437static int numchar_f = FALSE;
438static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
439static nkf_char (*i_nungetc)(nkf_char c ,FILE *f) = std_ungetc;
440#endif
441
442#ifdef CHECK_OPTION
443static int noout_f = FALSE;
444static void no_putc(nkf_char c);
445static int debug_f = FALSE;
446static void debug(const char *str);
447static nkf_char (*iconv_for_check)(nkf_char c2,nkf_char c1,nkf_char c0) = 0;
448#endif
449
450static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
451static  void    set_input_codename(const char *codename);
452
453#ifdef EXEC_IO
454static int exec_f = 0;
455#endif
456
457#ifdef SHIFTJIS_CP932
458/* invert IBM extended characters to others */
459static int cp51932_f = FALSE;
460
461/* invert NEC-selected IBM extended characters to IBM extended characters */
462static int cp932inv_f = TRUE;
463
464/* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
465#endif /* SHIFTJIS_CP932 */
466
467static int x0212_f = FALSE;
468static int x0213_f = FALSE;
469
470static unsigned char prefix_table[256];
471
472static void e_status(struct input_code *, nkf_char);
473static void s_status(struct input_code *, nkf_char);
474
475struct input_code input_code_list[] = {
476    {"EUC-JP",    0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
477    {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
478#ifdef UTF8_INPUT_ENABLE
479    {"UTF-8",     0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
480    {"UTF-16",     0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
481    {"UTF-32",     0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
482#endif
483    {NULL,        0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
484};
485
486static int              mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
487static int              base64_count = 0;
488
489/* X0208 -> ASCII converter */
490
491/* fold parameter */
492static int             f_line = 0;    /* chars in line */
493static int             f_prev = 0;
494static int             fold_preserve_f = FALSE; /* preserve new lines */
495static int             fold_f  = FALSE;
496static int             fold_len  = 0;
497
498/* options */
499static unsigned char   kanji_intro = DEFAULT_J;
500static unsigned char   ascii_intro = DEFAULT_R;
501
502/* Folding */
503
504#define FOLD_MARGIN  10
505#define DEFAULT_FOLD 60
506
507static int             fold_margin  = FOLD_MARGIN;
508
509/* process default */
510
511static nkf_char
512no_connection2(ARG_UNUSED nkf_char c2, ARG_UNUSED nkf_char c1, ARG_UNUSED nkf_char c0)
513{
514    fprintf(stderr,"nkf internal module connection failure.\n");
515    exit(EXIT_FAILURE);
516    return 0; /* LINT */
517}
518
519static void
520no_connection(nkf_char c2, nkf_char c1)
521{
522    no_connection2(c2,c1,0);
523}
524
525static nkf_char (*iconv)(nkf_char c2,nkf_char c1,nkf_char c0) = no_connection2;
526static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
527
528static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
529static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
530static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
531static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
532static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
533static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
534static void (*o_iso2022jp_check_conv)(nkf_char c2,nkf_char c1) = no_connection;
535
536/* static redirections */
537
538static  void   (*o_putc)(nkf_char c) = std_putc;
539
540static  nkf_char    (*i_getc)(FILE *f) = std_getc; /* general input */
541static  nkf_char    (*i_ungetc)(nkf_char c,FILE *f) =std_ungetc;
542
543static  nkf_char    (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
544static  nkf_char    (*i_bungetc)(nkf_char c ,FILE *f) = std_ungetc;
545
546static  void   (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
547
548static  nkf_char    (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
549static  nkf_char    (*i_mungetc)(nkf_char c ,FILE *f) = std_ungetc;
550
551/* for strict mime */
552static  nkf_char    (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
553static  nkf_char    (*i_mungetc_buf)(nkf_char c,FILE *f) = std_ungetc;
554
555/* Global states */
556static int output_mode = ASCII;    /* output kanji mode */
557static int input_mode =  ASCII;    /* input kanji mode */
558static int mime_decode_mode =   FALSE;    /* MIME mode B base64, Q hex */
559
560/* X0201 / X0208 conversion tables */
561
562/* X0201 kana conversion table */
563/* 90-9F A0-DF */
564static const unsigned char cv[]= {
565    0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
566    0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
567    0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
568    0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
569    0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
570    0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
571    0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
572    0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
573    0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
574    0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
575    0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
576    0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
577    0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
578    0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
579    0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
580    0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
581    0x00,0x00};
582
583
584/* X0201 kana conversion table for daguten */
585/* 90-9F A0-DF */
586static const unsigned char dv[]= {
587    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
589    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
590    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591    0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
592    0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
593    0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
594    0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
595    0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
596    0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
597    0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
598    0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
599    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603    0x00,0x00};
604
605/* X0201 kana conversion table for han-daguten */
606/* 90-9F A0-DF */
607static const unsigned char ev[]= {
608    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
616    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
617    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618    0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
619    0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
620    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
622    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
623    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
624    0x00,0x00};
625
626/* X0201 kana to X0213 conversion table for han-daguten */
627/* 90-9F A0-DF */
628static const unsigned char ev_x0213[]= {
629    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
630    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
631    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
633    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
634    0x00,0x00,0x00,0x00,0x25,0x77,0x25,0x78,
635    0x25,0x79,0x25,0x7a,0x25,0x7b,0x00,0x00,
636    0x00,0x00,0x00,0x00,0x25,0x7c,0x00,0x00,
637    0x00,0x00,0x00,0x00,0x25,0x7d,0x00,0x00,
638    0x25,0x7e,0x00,0x00,0x00,0x00,0x00,0x00,
639    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
640    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
641    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
642    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
643    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
644    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
645    0x00,0x00};
646
647
648/* X0208 kigou conversion table */
649/* 0x8140 - 0x819e */
650static const unsigned char fv[] = {
651
652    0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
653    0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
654    0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
655    0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
656    0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
657    0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
658    0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
659    0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
660    0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
661    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
662    0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
663    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
664} ;
665
666
667
668static int option_mode = 0;
669static int             file_out_f = FALSE;
670#ifdef OVERWRITE
671static int             overwrite_f = FALSE;
672static int             preserve_time_f = FALSE;
673static int             backup_f = FALSE;
674static char            *backup_suffix = "";
675#endif
676
677static int eolmode_f = 0;   /* CR, LF, CRLF */
678static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
679static nkf_char prev_cr = 0; /* CR or 0 */
680#ifdef EASYWIN /*Easy Win */
681static int             end_check;
682#endif /*Easy Win */
683
684static void *
685nkf_xmalloc(size_t size)
686{
687    void *ptr;
688
689    if (size == 0) size = 1;
690
691    ptr = malloc(size);
692    if (ptr == NULL) {
693	perror("can't malloc");
694	exit(EXIT_FAILURE);
695    }
696
697    return ptr;
698}
699
700static void *
701nkf_xrealloc(void *ptr, size_t size)
702{
703    if (size == 0) size = 1;
704
705    ptr = realloc(ptr, size);
706    if (ptr == NULL) {
707	perror("can't realloc");
708	exit(EXIT_FAILURE);
709    }
710
711    return ptr;
712}
713
714#define nkf_xfree(ptr) free(ptr)
715
716static int
717nkf_str_caseeql(const char *src, const char *target)
718{
719    int i;
720    for (i = 0; src[i] && target[i]; i++) {
721	if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
722    }
723    if (src[i] || target[i]) return FALSE;
724    else return TRUE;
725}
726
727static nkf_encoding*
728nkf_enc_from_index(int idx)
729{
730    if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
731	return 0;
732    }
733    return &nkf_encoding_table[idx];
734}
735
736static int
737nkf_enc_find_index(const char *name)
738{
739    int i;
740    if (name[0] == 'X' && *(name+1) == '-') name += 2;
741    for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
742	if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
743	    return encoding_name_to_id_table[i].id;
744	}
745    }
746    return -1;
747}
748
749static nkf_encoding*
750nkf_enc_find(const char *name)
751{
752    int idx = -1;
753    idx = nkf_enc_find_index(name);
754    if (idx < 0) return 0;
755    return nkf_enc_from_index(idx);
756}
757
758#define nkf_enc_name(enc) (enc)->name
759#define nkf_enc_to_index(enc) (enc)->id
760#define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
761#define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
762#define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
763#define nkf_enc_asciicompat(enc) (\
764				  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
765				  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
766#define nkf_enc_unicode_p(enc) (\
767				nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
768				nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
769				nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
770#define nkf_enc_cp5022x_p(enc) (\
771				nkf_enc_to_index(enc) == CP50220 ||\
772				nkf_enc_to_index(enc) == CP50221 ||\
773				nkf_enc_to_index(enc) == CP50222)
774
775#ifdef DEFAULT_CODE_LOCALE
776static const char*
777nkf_locale_charmap()
778{
779#ifdef HAVE_LANGINFO_H
780    return nl_langinfo(CODESET);
781#elif defined(__WIN32__)
782    static char buf[16];
783    sprintf(buf, "CP%d", GetACP());
784    return buf;
785#elif defined(__OS2__)
786# if defined(INT_IS_SHORT)
787    /* OS/2 1.x */
788    return NULL;
789# else
790    /* OS/2 32bit */
791    static char buf[16];
792    ULONG ulCP[1], ulncp;
793    DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
794    if (ulCP[0] == 932 || ulCP[0] == 943)
795        strcpy(buf, "Shift_JIS");
796    else
797        sprintf(buf, "CP%lu", ulCP[0]);
798    return buf;
799# endif
800#endif
801    return NULL;
802}
803
804static nkf_encoding*
805nkf_locale_encoding()
806{
807    nkf_encoding *enc = 0;
808    const char *encname = nkf_locale_charmap();
809    if (encname)
810	enc = nkf_enc_find(encname);
811    return enc;
812}
813#endif /* DEFAULT_CODE_LOCALE */
814
815static nkf_encoding*
816nkf_utf8_encoding()
817{
818    return &nkf_encoding_table[UTF_8];
819}
820
821static nkf_encoding*
822nkf_default_encoding()
823{
824    nkf_encoding *enc = 0;
825#ifdef DEFAULT_CODE_LOCALE
826    enc = nkf_locale_encoding();
827#elif defined(DEFAULT_ENCIDX)
828    enc = nkf_enc_from_index(DEFAULT_ENCIDX);
829#endif
830    if (!enc) enc = nkf_utf8_encoding();
831    return enc;
832}
833
834typedef struct {
835    long capa;
836    long len;
837    nkf_char *ptr;
838} nkf_buf_t;
839
840static nkf_buf_t *
841nkf_buf_new(int length)
842{
843    nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
844    buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
845    buf->capa = length;
846    buf->len = 0;
847    return buf;
848}
849
850#if 0
851static void
852nkf_buf_dispose(nkf_buf_t *buf)
853{
854    nkf_xfree(buf->ptr);
855    nkf_xfree(buf);
856}
857#endif
858
859#define nkf_buf_length(buf) ((buf)->len)
860#define nkf_buf_empty_p(buf) ((buf)->len == 0)
861
862static nkf_char
863nkf_buf_at(nkf_buf_t *buf, int index)
864{
865    assert(index <= buf->len);
866    return buf->ptr[index];
867}
868
869static void
870nkf_buf_clear(nkf_buf_t *buf)
871{
872    buf->len = 0;
873}
874
875static void
876nkf_buf_push(nkf_buf_t *buf, nkf_char c)
877{
878    if (buf->capa <= buf->len) {
879	exit(EXIT_FAILURE);
880    }
881    buf->ptr[buf->len++] = c;
882}
883
884static nkf_char
885nkf_buf_pop(nkf_buf_t *buf)
886{
887    assert(!nkf_buf_empty_p(buf));
888    return buf->ptr[--buf->len];
889}
890
891/* Normalization Form C */
892#ifndef PERL_XS
893#ifdef WIN32DLL
894#define fprintf dllprintf
895#endif
896
897static void
898version(void)
899{
900    fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
901}
902
903static void
904usage(void)
905{
906    fprintf(HELP_OUTPUT,
907	    "Usage:  nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
908#ifdef UTF8_OUTPUT_ENABLE
909	    " j/s/e/w  Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
910	    "          UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
911#else
912#endif
913#ifdef UTF8_INPUT_ENABLE
914	    " J/S/E/W  Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
915	    "          UTF option is -W[8,[16,32][B,L]]\n"
916#else
917	    " J/S/E    Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
918#endif
919	    );
920    fprintf(HELP_OUTPUT,
921	    " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
922	    " M[BQ]    MIME encode [B:base64 Q:quoted]\n"
923	    " f/F      Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
924	    );
925    fprintf(HELP_OUTPUT,
926	    " Z[0-4]   Default/0: Convert JISX0208 Alphabet to ASCII\n"
927	    "          1: Kankaku to one space  2: to two spaces  3: HTML Entity\n"
928	    "          4: JISX0208 Katakana to JISX0201 Katakana\n"
929	    " X,x      Convert Halfwidth Katakana to Fullwidth or preserve it\n"
930	    );
931    fprintf(HELP_OUTPUT,
932	    " O        Output to File (DEFAULT 'nkf.out')\n"
933	    " L[uwm]   Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
934	    );
935    fprintf(HELP_OUTPUT,
936	    " --ic=<encoding>        Specify the input encoding\n"
937	    " --oc=<encoding>        Specify the output encoding\n"
938	    " --hiragana --katakana  Hiragana/Katakana Conversion\n"
939	    " --katakana-hiragana    Converts each other\n"
940	    );
941    fprintf(HELP_OUTPUT,
942#ifdef INPUT_OPTION
943	    " --{cap, url}-input     Convert hex after ':' or '%%'\n"
944#endif
945#ifdef NUMCHAR_OPTION
946	    " --numchar-input        Convert Unicode Character Reference\n"
947#endif
948#ifdef UTF8_INPUT_ENABLE
949	    " --fb-{skip, html, xml, perl, java, subchar}\n"
950	    "                        Specify unassigned character's replacement\n"
951#endif
952	    );
953    fprintf(HELP_OUTPUT,
954#ifdef OVERWRITE
955	    " --in-place[=SUF]       Overwrite original files\n"
956	    " --overwrite[=SUF]      Preserve timestamp of original files\n"
957#endif
958	    " -g --guess             Guess the input code\n"
959	    " -v --version           Print the version\n"
960	    " --help/-V              Print this help / configuration\n"
961	    );
962    version();
963}
964
965static void
966show_configuration(void)
967{
968    fprintf(HELP_OUTPUT,
969	    "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
970	    "  Compile-time options:\n"
971	    "    Compiled at:                 " __DATE__ " " __TIME__ "\n"
972	   );
973    fprintf(HELP_OUTPUT,
974	    "    Default output encoding:     "
975#ifdef DEFAULT_CODE_LOCALE
976	    "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
977#elif defined(DEFAULT_ENCIDX)
978	    "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
979#else
980	    "NONE\n"
981#endif
982	   );
983    fprintf(HELP_OUTPUT,
984	    "    Default output end of line:  "
985#if DEFAULT_NEWLINE == CR
986	    "CR"
987#elif DEFAULT_NEWLINE == CRLF
988	    "CRLF"
989#else
990	    "LF"
991#endif
992	    "\n"
993	    "    Decode MIME encoded string:  "
994#if MIME_DECODE_DEFAULT
995	    "ON"
996#else
997	    "OFF"
998#endif
999	    "\n"
1000	    "    Convert JIS X 0201 Katakana: "
1001#if X0201_DEFAULT
1002	    "ON"
1003#else
1004	    "OFF"
1005#endif
1006	    "\n"
1007	    "    --help, --version output:    "
1008#if HELP_OUTPUT_HELP_OUTPUT
1009	    "HELP_OUTPUT"
1010#else
1011	    "STDOUT"
1012#endif
1013	    "\n");
1014}
1015#endif /*PERL_XS*/
1016
1017#ifdef OVERWRITE
1018static char*
1019get_backup_filename(const char *suffix, const char *filename)
1020{
1021    char *backup_filename;
1022    int asterisk_count = 0;
1023    int i, j;
1024    int filename_length = strlen(filename);
1025
1026    for(i = 0; suffix[i]; i++){
1027	if(suffix[i] == '*') asterisk_count++;
1028    }
1029
1030    if(asterisk_count){
1031	backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1032	for(i = 0, j = 0; suffix[i];){
1033	    if(suffix[i] == '*'){
1034		backup_filename[j] = '\0';
1035		strncat(backup_filename, filename, filename_length);
1036		i++;
1037		j += filename_length;
1038	    }else{
1039		backup_filename[j++] = suffix[i++];
1040	    }
1041	}
1042	backup_filename[j] = '\0';
1043    }else{
1044	j = filename_length + strlen(suffix);
1045	backup_filename = nkf_xmalloc(j + 1);
1046	strcpy(backup_filename, filename);
1047	strcat(backup_filename, suffix);
1048	backup_filename[j] = '\0';
1049    }
1050    return backup_filename;
1051}
1052#endif
1053
1054#ifdef UTF8_INPUT_ENABLE
1055static void
1056nkf_each_char_to_hex(void (*f)(nkf_char c2,nkf_char c1), nkf_char c)
1057{
1058    int shift = 20;
1059    c &= VALUE_MASK;
1060    while(shift >= 0){
1061	if(c >= NKF_INT32_C(1)<<shift){
1062	    while(shift >= 0){
1063		(*f)(0, bin2hex(c>>shift));
1064		shift -= 4;
1065	    }
1066	}else{
1067	    shift -= 4;
1068	}
1069    }
1070    return;
1071}
1072
1073static void
1074encode_fallback_html(nkf_char c)
1075{
1076    (*oconv)(0, '&');
1077    (*oconv)(0, '#');
1078    c &= VALUE_MASK;
1079    if(c >= NKF_INT32_C(1000000))
1080	(*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1081    if(c >= NKF_INT32_C(100000))
1082	(*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1083    if(c >= 10000)
1084	(*oconv)(0, 0x30+(c/10000  )%10);
1085    if(c >= 1000)
1086	(*oconv)(0, 0x30+(c/1000   )%10);
1087    if(c >= 100)
1088	(*oconv)(0, 0x30+(c/100    )%10);
1089    if(c >= 10)
1090	(*oconv)(0, 0x30+(c/10     )%10);
1091    if(c >= 0)
1092	(*oconv)(0, 0x30+ c         %10);
1093    (*oconv)(0, ';');
1094    return;
1095}
1096
1097static void
1098encode_fallback_xml(nkf_char c)
1099{
1100    (*oconv)(0, '&');
1101    (*oconv)(0, '#');
1102    (*oconv)(0, 'x');
1103    nkf_each_char_to_hex(oconv, c);
1104    (*oconv)(0, ';');
1105    return;
1106}
1107
1108static void
1109encode_fallback_java(nkf_char c)
1110{
1111    (*oconv)(0, '\\');
1112    c &= VALUE_MASK;
1113    if(!nkf_char_unicode_bmp_p(c)){
1114	(*oconv)(0, 'U');
1115	(*oconv)(0, '0');
1116	(*oconv)(0, '0');
1117	(*oconv)(0, bin2hex(c>>20));
1118	(*oconv)(0, bin2hex(c>>16));
1119    }else{
1120	(*oconv)(0, 'u');
1121    }
1122    (*oconv)(0, bin2hex(c>>12));
1123    (*oconv)(0, bin2hex(c>> 8));
1124    (*oconv)(0, bin2hex(c>> 4));
1125    (*oconv)(0, bin2hex(c    ));
1126    return;
1127}
1128
1129static void
1130encode_fallback_perl(nkf_char c)
1131{
1132    (*oconv)(0, '\\');
1133    (*oconv)(0, 'x');
1134    (*oconv)(0, '{');
1135    nkf_each_char_to_hex(oconv, c);
1136    (*oconv)(0, '}');
1137    return;
1138}
1139
1140static void
1141encode_fallback_subchar(nkf_char c)
1142{
1143    c = unicode_subchar;
1144    (*oconv)((c>>8)&0xFF, c&0xFF);
1145    return;
1146}
1147#endif
1148
1149static const struct {
1150    const char *name;
1151    const char *alias;
1152} long_option[] = {
1153    {"ic=", ""},
1154    {"oc=", ""},
1155    {"base64","jMB"},
1156    {"euc","e"},
1157    {"euc-input","E"},
1158    {"fj","jm"},
1159    {"help",""},
1160    {"jis","j"},
1161    {"jis-input","J"},
1162    {"mac","sLm"},
1163    {"mime","jM"},
1164    {"mime-input","m"},
1165    {"msdos","sLw"},
1166    {"sjis","s"},
1167    {"sjis-input","S"},
1168    {"unix","eLu"},
1169    {"version","v"},
1170    {"windows","sLw"},
1171    {"hiragana","h1"},
1172    {"katakana","h2"},
1173    {"katakana-hiragana","h3"},
1174    {"guess=", ""},
1175    {"guess", "g2"},
1176    {"cp932", ""},
1177    {"no-cp932", ""},
1178#ifdef X0212_ENABLE
1179    {"x0212", ""},
1180#endif
1181#ifdef UTF8_OUTPUT_ENABLE
1182    {"utf8", "w"},
1183    {"utf16", "w16"},
1184    {"ms-ucs-map", ""},
1185    {"fb-skip", ""},
1186    {"fb-html", ""},
1187    {"fb-xml", ""},
1188    {"fb-perl", ""},
1189    {"fb-java", ""},
1190    {"fb-subchar", ""},
1191    {"fb-subchar=", ""},
1192#endif
1193#ifdef UTF8_INPUT_ENABLE
1194    {"utf8-input", "W"},
1195    {"utf16-input", "W16"},
1196    {"no-cp932ext", ""},
1197    {"no-best-fit-chars",""},
1198#endif
1199#ifdef UNICODE_NORMALIZATION
1200    {"utf8mac-input", ""},
1201#endif
1202#ifdef OVERWRITE
1203    {"overwrite", ""},
1204    {"overwrite=", ""},
1205    {"in-place", ""},
1206    {"in-place=", ""},
1207#endif
1208#ifdef INPUT_OPTION
1209    {"cap-input", ""},
1210    {"url-input", ""},
1211#endif
1212#ifdef NUMCHAR_OPTION
1213    {"numchar-input", ""},
1214#endif
1215#ifdef CHECK_OPTION
1216    {"no-output", ""},
1217    {"debug", ""},
1218#endif
1219#ifdef SHIFTJIS_CP932
1220    {"cp932inv", ""},
1221#endif
1222#ifdef EXEC_IO
1223    {"exec-in", ""},
1224    {"exec-out", ""},
1225#endif
1226    {"prefix=", ""},
1227};
1228
1229static void
1230set_input_encoding(nkf_encoding *enc)
1231{
1232    switch (nkf_enc_to_index(enc)) {
1233    case ISO_8859_1:
1234	iso8859_f = TRUE;
1235	break;
1236    case CP50221:
1237    case CP50222:
1238	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1239    case CP50220:
1240#ifdef SHIFTJIS_CP932
1241	cp51932_f = TRUE;
1242#endif
1243#ifdef UTF8_OUTPUT_ENABLE
1244	ms_ucs_map_f = UCS_MAP_CP932;
1245#endif
1246	break;
1247    case ISO_2022_JP_1:
1248	x0212_f = TRUE;
1249	break;
1250    case ISO_2022_JP_3:
1251	x0212_f = TRUE;
1252	x0213_f = TRUE;
1253	break;
1254    case ISO_2022_JP_2004:
1255	x0212_f = TRUE;
1256	x0213_f = TRUE;
1257	break;
1258    case SHIFT_JIS:
1259	break;
1260    case WINDOWS_31J:
1261	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1262#ifdef SHIFTJIS_CP932
1263	cp51932_f = TRUE;
1264#endif
1265#ifdef UTF8_OUTPUT_ENABLE
1266	ms_ucs_map_f = UCS_MAP_CP932;
1267#endif
1268	break;
1269	break;
1270    case CP10001:
1271#ifdef SHIFTJIS_CP932
1272	cp51932_f = TRUE;
1273#endif
1274#ifdef UTF8_OUTPUT_ENABLE
1275	ms_ucs_map_f = UCS_MAP_CP10001;
1276#endif
1277	break;
1278    case EUC_JP:
1279	break;
1280    case EUCJP_NKF:
1281	break;
1282    case CP51932:
1283	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1284#ifdef SHIFTJIS_CP932
1285	cp51932_f = TRUE;
1286#endif
1287#ifdef UTF8_OUTPUT_ENABLE
1288	ms_ucs_map_f = UCS_MAP_CP932;
1289#endif
1290	break;
1291    case EUCJP_MS:
1292	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1293#ifdef SHIFTJIS_CP932
1294	cp51932_f = FALSE;
1295#endif
1296#ifdef UTF8_OUTPUT_ENABLE
1297	ms_ucs_map_f = UCS_MAP_MS;
1298#endif
1299	break;
1300    case EUCJP_ASCII:
1301	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1302#ifdef SHIFTJIS_CP932
1303	cp51932_f = FALSE;
1304#endif
1305#ifdef UTF8_OUTPUT_ENABLE
1306	ms_ucs_map_f = UCS_MAP_ASCII;
1307#endif
1308	break;
1309    case SHIFT_JISX0213:
1310    case SHIFT_JIS_2004:
1311	x0213_f = TRUE;
1312#ifdef SHIFTJIS_CP932
1313	cp51932_f = FALSE;
1314	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1315#endif
1316	break;
1317    case EUC_JISX0213:
1318    case EUC_JIS_2004:
1319	x0213_f = TRUE;
1320#ifdef SHIFTJIS_CP932
1321	cp51932_f = FALSE;
1322#endif
1323	break;
1324#ifdef UTF8_INPUT_ENABLE
1325#ifdef UNICODE_NORMALIZATION
1326    case UTF8_MAC:
1327	nfc_f = TRUE;
1328	break;
1329#endif
1330    case UTF_16:
1331    case UTF_16BE:
1332    case UTF_16BE_BOM:
1333	input_endian = ENDIAN_BIG;
1334	break;
1335    case UTF_16LE:
1336    case UTF_16LE_BOM:
1337	input_endian = ENDIAN_LITTLE;
1338	break;
1339    case UTF_32:
1340    case UTF_32BE:
1341    case UTF_32BE_BOM:
1342	input_endian = ENDIAN_BIG;
1343	break;
1344    case UTF_32LE:
1345    case UTF_32LE_BOM:
1346	input_endian = ENDIAN_LITTLE;
1347	break;
1348#endif
1349    }
1350}
1351
1352static void
1353set_output_encoding(nkf_encoding *enc)
1354{
1355    switch (nkf_enc_to_index(enc)) {
1356    case CP50220:
1357#ifdef SHIFTJIS_CP932
1358	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1359#endif
1360#ifdef UTF8_OUTPUT_ENABLE
1361	ms_ucs_map_f = UCS_MAP_CP932;
1362#endif
1363	break;
1364    case CP50221:
1365	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1366#ifdef SHIFTJIS_CP932
1367	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1368#endif
1369#ifdef UTF8_OUTPUT_ENABLE
1370	ms_ucs_map_f = UCS_MAP_CP932;
1371#endif
1372	break;
1373    case ISO_2022_JP:
1374#ifdef SHIFTJIS_CP932
1375	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376#endif
1377	break;
1378    case ISO_2022_JP_1:
1379	x0212_f = TRUE;
1380#ifdef SHIFTJIS_CP932
1381	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1382#endif
1383	break;
1384    case ISO_2022_JP_3:
1385    case ISO_2022_JP_2004:
1386	x0212_f = TRUE;
1387	x0213_f = TRUE;
1388#ifdef SHIFTJIS_CP932
1389	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1390#endif
1391	break;
1392    case SHIFT_JIS:
1393	break;
1394    case WINDOWS_31J:
1395	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1396#ifdef UTF8_OUTPUT_ENABLE
1397	ms_ucs_map_f = UCS_MAP_CP932;
1398#endif
1399	break;
1400    case CP10001:
1401#ifdef UTF8_OUTPUT_ENABLE
1402	ms_ucs_map_f = UCS_MAP_CP10001;
1403#endif
1404	break;
1405    case EUC_JP:
1406	x0212_f = TRUE;
1407#ifdef SHIFTJIS_CP932
1408	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1409#endif
1410#ifdef UTF8_OUTPUT_ENABLE
1411	ms_ucs_map_f = UCS_MAP_ASCII;
1412#endif
1413	break;
1414    case EUCJP_NKF:
1415	x0212_f = FALSE;
1416#ifdef SHIFTJIS_CP932
1417	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1418#endif
1419#ifdef UTF8_OUTPUT_ENABLE
1420	ms_ucs_map_f = UCS_MAP_ASCII;
1421#endif
1422	break;
1423    case CP51932:
1424	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1425#ifdef SHIFTJIS_CP932
1426	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1427#endif
1428#ifdef UTF8_OUTPUT_ENABLE
1429	ms_ucs_map_f = UCS_MAP_CP932;
1430#endif
1431	break;
1432    case EUCJP_MS:
1433	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1434	x0212_f = TRUE;
1435#ifdef UTF8_OUTPUT_ENABLE
1436	ms_ucs_map_f = UCS_MAP_MS;
1437#endif
1438	break;
1439    case EUCJP_ASCII:
1440	if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE;	/* -x specified implicitly */
1441	x0212_f = TRUE;
1442#ifdef UTF8_OUTPUT_ENABLE
1443	ms_ucs_map_f = UCS_MAP_ASCII;
1444#endif
1445	break;
1446    case SHIFT_JISX0213:
1447    case SHIFT_JIS_2004:
1448	x0213_f = TRUE;
1449#ifdef SHIFTJIS_CP932
1450	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1451#endif
1452	break;
1453    case EUC_JISX0213:
1454    case EUC_JIS_2004:
1455	x0212_f = TRUE;
1456	x0213_f = TRUE;
1457#ifdef SHIFTJIS_CP932
1458	if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1459#endif
1460	break;
1461#ifdef UTF8_OUTPUT_ENABLE
1462    case UTF_8_BOM:
1463	output_bom_f = TRUE;
1464	break;
1465    case UTF_16:
1466    case UTF_16BE_BOM:
1467	output_bom_f = TRUE;
1468	break;
1469    case UTF_16LE:
1470	output_endian = ENDIAN_LITTLE;
1471	output_bom_f = FALSE;
1472	break;
1473    case UTF_16LE_BOM:
1474	output_endian = ENDIAN_LITTLE;
1475	output_bom_f = TRUE;
1476	break;
1477    case UTF_32:
1478    case UTF_32BE_BOM:
1479	output_bom_f = TRUE;
1480	break;
1481    case UTF_32LE:
1482	output_endian = ENDIAN_LITTLE;
1483	output_bom_f = FALSE;
1484	break;
1485    case UTF_32LE_BOM:
1486	output_endian = ENDIAN_LITTLE;
1487	output_bom_f = TRUE;
1488	break;
1489#endif
1490    }
1491}
1492
1493static struct input_code*
1494find_inputcode_byfunc(nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1495{
1496    if (iconv_func){
1497	struct input_code *p = input_code_list;
1498	while (p->name){
1499	    if (iconv_func == p->iconv_func){
1500		return p;
1501	    }
1502	    p++;
1503	}
1504    }
1505    return 0;
1506}
1507
1508static void
1509set_iconv(nkf_char f, nkf_char (*iconv_func)(nkf_char c2,nkf_char c1,nkf_char c0))
1510{
1511#ifdef INPUT_CODE_FIX
1512    if (f || !input_encoding)
1513#endif
1514	if (estab_f != f){
1515	    estab_f = f;
1516	}
1517
1518    if (iconv_func
1519#ifdef INPUT_CODE_FIX
1520	&& (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1521#endif
1522       ){
1523	iconv = iconv_func;
1524    }
1525#ifdef CHECK_OPTION
1526    if (estab_f && iconv_for_check != iconv){
1527	struct input_code *p = find_inputcode_byfunc(iconv);
1528	if (p){
1529	    set_input_codename(p->name);
1530	    debug(p->name);
1531	}
1532	iconv_for_check = iconv;
1533    }
1534#endif
1535}
1536
1537#ifdef X0212_ENABLE
1538static nkf_char
1539x0212_shift(nkf_char c)
1540{
1541    nkf_char ret = c;
1542    c &= 0x7f;
1543    if (is_eucg3(ret)){
1544	if (0x75 <= c && c <= 0x7f){
1545	    ret = c + (0x109 - 0x75);
1546	}
1547    }else{
1548	if (0x75 <= c && c <= 0x7f){
1549	    ret = c + (0x113 - 0x75);
1550	}
1551    }
1552    return ret;
1553}
1554
1555
1556static nkf_char
1557x0212_unshift(nkf_char c)
1558{
1559    nkf_char ret = c;
1560    if (0x7f <= c && c <= 0x88){
1561	ret = c + (0x75 - 0x7f);
1562    }else if (0x89 <= c && c <= 0x92){
1563	ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1564    }
1565    return ret;
1566}
1567#endif /* X0212_ENABLE */
1568
1569static int
1570is_x0213_2_in_x0212(nkf_char c1)
1571{
1572    static const char x0213_2_table[] =
1573	{0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1};
1574    int ku = c1 - 0x20;
1575    if (ku <= 15)
1576	return x0213_2_table[ku]; /* 1, 3-5, 8, 12-15 */
1577    if (78 <= ku && ku <= 94)
1578	return 1;
1579    return 0;
1580}
1581
1582static nkf_char
1583e2s_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1584{
1585    nkf_char ndx;
1586    if (is_eucg3(c2)){
1587	ndx = c2 & 0x7f;
1588	if (x0213_f && is_x0213_2_in_x0212(ndx)){
1589	    if((0x21 <= ndx && ndx <= 0x2F)){
1590		if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1591		if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1592		return 0;
1593	    }else if(0x6E <= ndx && ndx <= 0x7E){
1594		if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1595		if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1596		return 0;
1597	    }
1598	    return 1;
1599	}
1600#ifdef X0212_ENABLE
1601	else if(nkf_isgraph(ndx)){
1602	    nkf_char val = 0;
1603	    const unsigned short *ptr;
1604	    ptr = x0212_shiftjis[ndx - 0x21];
1605	    if (ptr){
1606		val = ptr[(c1 & 0x7f) - 0x21];
1607	    }
1608	    if (val){
1609		c2 = val >> 8;
1610		c1 = val & 0xff;
1611		if (p2) *p2 = c2;
1612		if (p1) *p1 = c1;
1613		return 0;
1614	    }
1615	    c2 = x0212_shift(c2);
1616	}
1617#endif /* X0212_ENABLE */
1618    }
1619    if(0x7F < c2) return 1;
1620    if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1621    if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1622    return 0;
1623}
1624
1625static nkf_char
1626s2e_conv(nkf_char c2, nkf_char c1, nkf_char *p2, nkf_char *p1)
1627{
1628#if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1629    nkf_char val;
1630#endif
1631    static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1632    if (0xFC < c1) return 1;
1633#ifdef SHIFTJIS_CP932
1634    if (!cp932inv_f && !x0213_f && is_ibmext_in_sjis(c2)){
1635	val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1636	if (val){
1637	    c2 = val >> 8;
1638	    c1 = val & 0xff;
1639	}
1640    }
1641    if (cp932inv_f
1642	&& CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1643	val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1644	if (val){
1645	    c2 = val >> 8;
1646	    c1 = val & 0xff;
1647	}
1648    }
1649#endif /* SHIFTJIS_CP932 */
1650#ifdef X0212_ENABLE
1651    if (!x0213_f && is_ibmext_in_sjis(c2)){
1652	val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1653	if (val){
1654	    if (val > 0x7FFF){
1655		c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1656		c1 = val & 0xff;
1657	    }else{
1658		c2 = val >> 8;
1659		c1 = val & 0xff;
1660	    }
1661	    if (p2) *p2 = c2;
1662	    if (p1) *p1 = c1;
1663	    return 0;
1664	}
1665    }
1666#endif
1667    if(c2 >= 0x80){
1668	if(x0213_f && c2 >= 0xF0){
1669	    if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1670		c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1671	    }else{ /* 78<=k<=94 */
1672		c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1673		if (0x9E < c1) c2++;
1674	    }
1675	}else{
1676#define         SJ0162  0x00e1          /* 01 - 62 ku offset */
1677#define         SJ6394  0x0161          /* 63 - 94 ku offset */
1678	    c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1679	    if (0x9E < c1) c2++;
1680	}
1681	if (c1 < 0x9F)
1682	    c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1683	else {
1684	    c1 = c1 - 0x7E;
1685	}
1686    }
1687
1688#ifdef X0212_ENABLE
1689    c2 = x0212_unshift(c2);
1690#endif
1691    if (p2) *p2 = c2;
1692    if (p1) *p1 = c1;
1693    return 0;
1694}
1695
1696#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1697static void
1698nkf_unicode_to_utf8(nkf_char val, nkf_char *p1, nkf_char *p2, nkf_char *p3, nkf_char *p4)
1699{
1700    val &= VALUE_MASK;
1701    if (val < 0x80){
1702	*p1 = val;
1703	*p2 = 0;
1704	*p3 = 0;
1705	*p4 = 0;
1706    }else if (val < 0x800){
1707	*p1 = 0xc0 | (val >> 6);
1708	*p2 = 0x80 | (val & 0x3f);
1709	*p3 = 0;
1710	*p4 = 0;
1711    } else if (nkf_char_unicode_bmp_p(val)) {
1712	*p1 = 0xe0 |  (val >> 12);
1713	*p2 = 0x80 | ((val >>  6) & 0x3f);
1714	*p3 = 0x80 | ( val        & 0x3f);
1715	*p4 = 0;
1716    } else if (nkf_char_unicode_value_p(val)) {
1717	*p1 = 0xf0 |  (val >> 18);
1718	*p2 = 0x80 | ((val >> 12) & 0x3f);
1719	*p3 = 0x80 | ((val >>  6) & 0x3f);
1720	*p4 = 0x80 | ( val        & 0x3f);
1721    } else {
1722	*p1 = 0;
1723	*p2 = 0;
1724	*p3 = 0;
1725	*p4 = 0;
1726    }
1727}
1728
1729static nkf_char
1730nkf_utf8_to_unicode(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
1731{
1732    nkf_char wc;
1733    if (c1 <= 0x7F) {
1734	/* single byte */
1735	wc = c1;
1736    }
1737    else if (c1 <= 0xC1) {
1738	/* trail byte or invalid */
1739	return -1;
1740    }
1741    else if (c1 <= 0xDF) {
1742	/* 2 bytes */
1743	wc  = (c1 & 0x1F) << 6;
1744	wc |= (c2 & 0x3F);
1745    }
1746    else if (c1 <= 0xEF) {
1747	/* 3 bytes */
1748	wc  = (c1 & 0x0F) << 12;
1749	wc |= (c2 & 0x3F) << 6;
1750	wc |= (c3 & 0x3F);
1751    }
1752    else if (c2 <= 0xF4) {
1753	/* 4 bytes */
1754	wc  = (c1 & 0x0F) << 18;
1755	wc |= (c2 & 0x3F) << 12;
1756	wc |= (c3 & 0x3F) << 6;
1757	wc |= (c4 & 0x3F);
1758    }
1759    else {
1760	return -1;
1761    }
1762    return wc;
1763}
1764#endif
1765
1766#ifdef UTF8_INPUT_ENABLE
1767static int
1768unicode_to_jis_common2(nkf_char c1, nkf_char c0,
1769		       const unsigned short *const *pp, nkf_char psize,
1770		       nkf_char *p2, nkf_char *p1)
1771{
1772    nkf_char c2;
1773    const unsigned short *p;
1774    unsigned short val;
1775
1776    if (pp == 0) return 1;
1777
1778    c1 -= 0x80;
1779    if (c1 < 0 || psize <= c1) return 1;
1780    p = pp[c1];
1781    if (p == 0)  return 1;
1782
1783    c0 -= 0x80;
1784    if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1785    val = p[c0];
1786    if (val == 0) return 1;
1787    if (no_cp932ext_f && (
1788			  (val>>8) == 0x2D || /* NEC special characters */
1789			  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1790			 )) return 1;
1791
1792    c2 = val >> 8;
1793    if (val > 0x7FFF){
1794	c2 &= 0x7f;
1795	c2 |= PREFIX_EUCG3;
1796    }
1797    if (c2 == SO) c2 = JIS_X_0201_1976_K;
1798    c1 = val & 0xFF;
1799    if (p2) *p2 = c2;
1800    if (p1) *p1 = c1;
1801    return 0;
1802}
1803
1804static int
1805unicode_to_jis_common(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
1806{
1807    const unsigned short *const *pp;
1808    const unsigned short *const *const *ppp;
1809    static const char no_best_fit_chars_table_C2[] =
1810    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1811	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1812	1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1813	0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1814    static const char no_best_fit_chars_table_C2_ms[] =
1815    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1816	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1817	1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1818	0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1819    static const char no_best_fit_chars_table_932_C2[] =
1820    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1821	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1822	1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1823	0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1824    static const char no_best_fit_chars_table_932_C3[] =
1825    {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1826	1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1827	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1828	1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1829    nkf_char ret = 0;
1830
1831    if(c2 < 0x80){
1832	*p2 = 0;
1833	*p1 = c2;
1834    }else if(c2 < 0xe0){
1835	if(no_best_fit_chars_f){
1836	    if(ms_ucs_map_f == UCS_MAP_CP932){
1837		switch(c2){
1838		case 0xC2:
1839		    if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1840		    break;
1841		case 0xC3:
1842		    if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1843		    break;
1844		}
1845	    }else if(!cp932inv_f){
1846		switch(c2){
1847		case 0xC2:
1848		    if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1849		    break;
1850		case 0xC3:
1851		    if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1852		    break;
1853		}
1854	    }else if(ms_ucs_map_f == UCS_MAP_MS){
1855		if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1856	    }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1857		switch(c2){
1858		case 0xC2:
1859		    switch(c1){
1860		    case 0xA2:
1861		    case 0xA3:
1862		    case 0xA5:
1863		    case 0xA6:
1864		    case 0xAC:
1865		    case 0xAF:
1866		    case 0xB8:
1867			return 1;
1868		    }
1869		    break;
1870		}
1871	    }
1872	}
1873	pp =
1874	    ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_2bytes_932 :
1875	    ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_2bytes_ms :
1876	    ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_2bytes_mac :
1877	    x0213_f ? utf8_to_euc_2bytes_x0213 :
1878	    utf8_to_euc_2bytes;
1879	ret =  unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1880    }else if(c0 < 0xF0){
1881	if(no_best_fit_chars_f){
1882	    if(ms_ucs_map_f == UCS_MAP_CP932){
1883		if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1884	    }else if(ms_ucs_map_f == UCS_MAP_MS){
1885		switch(c2){
1886		case 0xE2:
1887		    switch(c1){
1888		    case 0x80:
1889			if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1890			break;
1891		    case 0x88:
1892			if(c0 == 0x92) return 1;
1893			break;
1894		    }
1895		    break;
1896		case 0xE3:
1897		    if(c1 == 0x80 || c0 == 0x9C) return 1;
1898		    break;
1899		}
1900	    }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1901		switch(c2){
1902		case 0xE3:
1903		    switch(c1){
1904		    case 0x82:
1905			if(c0 == 0x94) return 1;
1906			break;
1907		    case 0x83:
1908			if(c0 == 0xBB) return 1;
1909			break;
1910		    }
1911		    break;
1912		}
1913	    }else{
1914		switch(c2){
1915		case 0xE2:
1916		    switch(c1){
1917		    case 0x80:
1918			if(c0 == 0x95) return 1;
1919			break;
1920		    case 0x88:
1921			if(c0 == 0xA5) return 1;
1922			break;
1923		    }
1924		    break;
1925		case 0xEF:
1926		    switch(c1){
1927		    case 0xBC:
1928			if(c0 == 0x8D) return 1;
1929			break;
1930		    case 0xBD:
1931			if(c0 == 0x9E && !cp932inv_f) return 1;
1932			break;
1933		    case 0xBF:
1934			if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1935			break;
1936		    }
1937		    break;
1938		}
1939	    }
1940	}
1941	ppp =
1942	    ms_ucs_map_f == UCS_MAP_CP932 ? utf8_to_euc_3bytes_932 :
1943	    ms_ucs_map_f == UCS_MAP_MS ? utf8_to_euc_3bytes_ms :
1944	    ms_ucs_map_f == UCS_MAP_CP10001 ? utf8_to_euc_3bytes_mac :
1945	    x0213_f ? utf8_to_euc_3bytes_x0213 :
1946	    utf8_to_euc_3bytes;
1947	ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1948    }else return -1;
1949#ifdef SHIFTJIS_CP932
1950    if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1951	nkf_char s2, s1;
1952	if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1953	    s2e_conv(s2, s1, p2, p1);
1954	}else{
1955	    ret = 1;
1956	}
1957    }
1958#endif
1959    return ret;
1960}
1961
1962#ifdef UTF8_OUTPUT_ENABLE
1963#define X0213_SURROGATE_FIND(tbl, size, euc) do { \
1964	int i; \
1965	for (i = 0; i < size; i++) \
1966	    if (tbl[i][0] == euc) { \
1967		low = tbl[i][2]; \
1968		break; \
1969	    } \
1970    } while (0)
1971
1972static nkf_char
1973e2w_conv(nkf_char c2, nkf_char c1)
1974{
1975    const unsigned short *p;
1976
1977    if (c2 == JIS_X_0201_1976_K) {
1978	if (ms_ucs_map_f == UCS_MAP_CP10001) {
1979	    switch (c1) {
1980	    case 0x20:
1981		return 0xA0;
1982	    case 0x7D:
1983		return 0xA9;
1984	    }
1985	}
1986	p = euc_to_utf8_1byte;
1987#ifdef X0212_ENABLE
1988    } else if (is_eucg3(c2)){
1989	if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1990	    return 0xA6;
1991	}
1992	c2 = (c2&0x7f) - 0x21;
1993	if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1994	    p =
1995		x0213_f ? x0212_to_utf8_2bytes_x0213[c2] :
1996		x0212_to_utf8_2bytes[c2];
1997	else
1998	    return 0;
1999#endif
2000    } else {
2001	c2 &= 0x7f;
2002	c2 = (c2&0x7f) - 0x21;
2003	if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
2004	    p =
2005		x0213_f ? euc_to_utf8_2bytes_x0213[c2] :
2006		ms_ucs_map_f == UCS_MAP_ASCII ? euc_to_utf8_2bytes[c2] :
2007		ms_ucs_map_f == UCS_MAP_CP10001 ? euc_to_utf8_2bytes_mac[c2] :
2008		euc_to_utf8_2bytes_ms[c2];
2009	else
2010	    return 0;
2011    }
2012    if (!p) return 0;
2013    c1 = (c1 & 0x7f) - 0x21;
2014    if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) {
2015	nkf_char val = p[c1];
2016	if (x0213_f && 0xD800<=val && val<=0xDBFF) {
2017	    nkf_char euc = (c2+0x21)<<8 | (c1+0x21);
2018	    nkf_char low = 0;
2019	    if (p==x0212_to_utf8_2bytes_x0213[c2]) {
2020		X0213_SURROGATE_FIND(x0213_2_surrogate_table, sizeof_x0213_2_surrogate_table, euc);
2021	    } else {
2022		X0213_SURROGATE_FIND(x0213_1_surrogate_table, sizeof_x0213_1_surrogate_table, euc);
2023	    }
2024	    if (!low) return 0;
2025	    return UTF16_TO_UTF32(val, low);
2026	} else {
2027	    return val;
2028	}
2029    }
2030    return 0;
2031}
2032
2033static nkf_char
2034e2w_combining(nkf_char comb, nkf_char c2, nkf_char c1)
2035{
2036    nkf_char euc;
2037    int i;
2038    for (i = 0; i < sizeof_x0213_combining_chars; i++)
2039	if (x0213_combining_chars[i] == comb)
2040	    break;
2041    if (i >= sizeof_x0213_combining_chars)
2042	return 0;
2043    euc = (c2&0x7f)<<8 | (c1&0x7f);
2044    for (i = 0; i < sizeof_x0213_combining_table; i++)
2045	if (x0213_combining_table[i][0] == euc)
2046	    return x0213_combining_table[i][1];
2047    return 0;
2048}
2049#endif
2050
2051static nkf_char
2052w2e_conv(nkf_char c2, nkf_char c1, nkf_char c0, nkf_char *p2, nkf_char *p1)
2053{
2054    nkf_char ret = 0;
2055
2056    if (!c1){
2057	*p2 = 0;
2058	*p1 = c2;
2059    }else if (0xc0 <= c2 && c2 <= 0xef) {
2060	ret =  unicode_to_jis_common(c2, c1, c0, p2, p1);
2061#ifdef NUMCHAR_OPTION
2062	if (ret > 0){
2063	    if (p2) *p2 = 0;
2064	    if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
2065	    ret = 0;
2066	}
2067#endif
2068    }
2069    return ret;
2070}
2071
2072#ifdef UTF8_INPUT_ENABLE
2073static nkf_char
2074w16e_conv(nkf_char val, nkf_char *p2, nkf_char *p1)
2075{
2076    nkf_char c1, c2, c3, c4;
2077    nkf_char ret = 0;
2078    val &= VALUE_MASK;
2079    if (val < 0x80) {
2080	*p2 = 0;
2081	*p1 = val;
2082    }
2083    else if (nkf_char_unicode_bmp_p(val)){
2084	nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2085	ret =  unicode_to_jis_common(c1, c2, c3, p2, p1);
2086	if (ret > 0){
2087	    *p2 = 0;
2088	    *p1 = nkf_char_unicode_new(val);
2089	    ret = 0;
2090	}
2091    }
2092    else {
2093	int i;
2094	if (x0213_f) {
2095	    c1 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */
2096	    c2 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2097	    for (i = 0; i < sizeof_x0213_1_surrogate_table; i++)
2098		if (x0213_1_surrogate_table[i][1] == c1 && x0213_1_surrogate_table[i][2] == c2) {
2099		    val = x0213_1_surrogate_table[i][0];
2100		    *p2 = val >> 8;
2101		    *p1 = val & 0xFF;
2102		    return 0;
2103		}
2104	    for (i = 0; i < sizeof_x0213_2_surrogate_table; i++)
2105		if (x0213_2_surrogate_table[i][1] == c1 && x0213_2_surrogate_table[i][2] == c2) {
2106		    val = x0213_2_surrogate_table[i][0];
2107		    *p2 = PREFIX_EUCG3 | (val >> 8);
2108		    *p1 = val & 0xFF;
2109		    return 0;
2110		}
2111	}
2112	*p2 = 0;
2113	*p1 = nkf_char_unicode_new(val);
2114    }
2115    return ret;
2116}
2117#endif
2118
2119static nkf_char
2120e_iconv(nkf_char c2, nkf_char c1, nkf_char c0)
2121{
2122    if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2123	if (iso2022jp_f && !x0201_f) {
2124	    c2 = GETA1; c1 = GETA2;
2125	} else {
2126	    c2 = JIS_X_0201_1976_K;
2127	    c1 &= 0x7f;
2128	}
2129#ifdef X0212_ENABLE
2130    }else if (c2 == 0x8f){
2131	if (c0 == 0){
2132	    return -1;
2133	}
2134	if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2135	    /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2136	    c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2137	    c2 = 0;
2138	} else {
2139	    c2 = (c2 << 8) | (c1 & 0x7f);
2140	    c1 = c0 & 0x7f;
2141#ifdef SHIFTJIS_CP932
2142	    if (cp51932_f){
2143		nkf_char s2, s1;
2144		if (e2s_conv(c2, c1, &s2, &s1) == 0){
2145		    s2e_conv(s2, s1, &c2, &c1);
2146		    if (c2 < 0x100){
2147			c1 &= 0x7f;
2148			c2 &= 0x7f;
2149		    }
2150		}
2151	    }
2152#endif /* SHIFTJIS_CP932 */
2153	}
2154#endif /* X0212_ENABLE */
2155    } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2156	/* NOP */
2157    } else {
2158	if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2159	    /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2160	    c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2161	    c2 = 0;
2162	} else {
2163	    c1 &= 0x7f;
2164	    c2 &= 0x7f;
2165#ifdef SHIFTJIS_CP932
2166	    if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2167		nkf_char s2, s1;
2168		if (e2s_conv(c2, c1, &s2, &s1) == 0){
2169		    s2e_conv(s2, s1, &c2, &c1);
2170		    if (c2 < 0x100){
2171			c1 &= 0x7f;
2172			c2 &= 0x7f;
2173		    }
2174		}
2175	    }
2176#endif /* SHIFTJIS_CP932 */
2177	}
2178    }
2179    (*oconv)(c2, c1);
2180    return 0;
2181}
2182
2183static nkf_char
2184s_iconv(ARG_UNUSED nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2185{
2186    if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2187	if (iso2022jp_f && !x0201_f) {
2188	    c2 = GETA1; c1 = GETA2;
2189	} else {
2190	    c1 &= 0x7f;
2191	}
2192    } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2193	/* NOP */
2194    } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2195	/* CP932 UDC */
2196	if(c1 == 0x7F) return 0;
2197	c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2198	c2 = 0;
2199    } else {
2200	nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2201	if (ret) return ret;
2202    }
2203    (*oconv)(c2, c1);
2204    return 0;
2205}
2206
2207static int
2208x0213_wait_combining_p(nkf_char wc)
2209{
2210    int i;
2211    for (i = 0; i < sizeof_x0213_combining_table; i++) {
2212	if (x0213_combining_table[i][1] == wc) {
2213	    return TRUE;
2214	}
2215    }
2216    return FALSE;
2217}
2218
2219static int
2220x0213_combining_p(nkf_char wc)
2221{
2222    int i;
2223    for (i = 0; i < sizeof_x0213_combining_chars; i++) {
2224	if (x0213_combining_chars[i] == wc) {
2225	    return TRUE;
2226	}
2227    }
2228    return FALSE;
2229}
2230
2231static nkf_char
2232w_iconv(nkf_char c1, nkf_char c2, nkf_char c3)
2233{
2234    nkf_char ret = 0, c4 = 0;
2235    static const char w_iconv_utf8_1st_byte[] =
2236    { /* 0xC0 - 0xFF */
2237	20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2238	21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2239	30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2240	40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2241
2242    if (c3 > 0xFF) {
2243	c4 = c3 & 0xFF;
2244	c3 >>= 8;
2245    }
2246
2247    if (c1 < 0 || 0xff < c1) {
2248    }else if (c1 == 0) { /* 0 : 1 byte*/
2249	c3 = 0;
2250    } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2251	return 0;
2252    } else{
2253	switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2254	case 21:
2255	    if (c2 < 0x80 || 0xBF < c2) return 0;
2256	    break;
2257	case 30:
2258	    if (c3 == 0) return -1;
2259	    if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2260		return 0;
2261	    break;
2262	case 31:
2263	case 33:
2264	    if (c3 == 0) return -1;
2265	    if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2266		return 0;
2267	    break;
2268	case 32:
2269	    if (c3 == 0) return -1;
2270	    if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2271		return 0;
2272	    break;
2273	case 40:
2274	    if (c3 == 0) return -2;
2275	    if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2276		return 0;
2277	    break;
2278	case 41:
2279	    if (c3 == 0) return -2;
2280	    if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2281		return 0;
2282	    break;
2283	case 42:
2284	    if (c3 == 0) return -2;
2285	    if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2286		return 0;
2287	    break;
2288	default:
2289	    return 0;
2290	    break;
2291	}
2292    }
2293    if (c1 == 0 || c1 == EOF){
2294    } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2295	c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2296	c1 = 0;
2297    } else {
2298	if (x0213_f && x0213_wait_combining_p(nkf_utf8_to_unicode(c1, c2, c3, c4)))
2299	    return -3;
2300	ret = w2e_conv(c1, c2, c3, &c1, &c2);
2301    }
2302    if (ret == 0){
2303	(*oconv)(c1, c2);
2304    }
2305    return ret;
2306}
2307
2308static nkf_char
2309w_iconv_nocombine(nkf_char c1, nkf_char c2, nkf_char c3)
2310{
2311    /* continue from the line below 'return -3;' in w_iconv() */
2312    nkf_char ret = w2e_conv(c1, c2, c3, &c1, &c2);
2313    if (ret == 0){
2314	(*oconv)(c1, c2);
2315    }
2316    return ret;
2317}
2318
2319#define NKF_ICONV_INVALID_CODE_RANGE -13
2320#define NKF_ICONV_WAIT_COMBINING_CHAR -14
2321#define NKF_ICONV_NOT_COMBINED -15
2322static size_t
2323unicode_iconv(nkf_char wc, int nocombine)
2324{
2325    nkf_char c1, c2;
2326    int ret = 0;
2327
2328    if (wc < 0x80) {
2329	c2 = 0;
2330	c1 = wc;
2331    }else if ((wc>>11) == 27) {
2332	/* unpaired surrogate */
2333	return NKF_ICONV_INVALID_CODE_RANGE;
2334    }else if (wc < 0xFFFF) {
2335	if (!nocombine && x0213_f && x0213_wait_combining_p(wc))
2336	    return NKF_ICONV_WAIT_COMBINING_CHAR;
2337	ret = w16e_conv(wc, &c2, &c1);
2338	if (ret) return ret;
2339    }else if (wc < 0x10FFFF) {
2340	c2 = 0;
2341	c1 = nkf_char_unicode_new(wc);
2342    } else {
2343	return NKF_ICONV_INVALID_CODE_RANGE;
2344    }
2345    (*oconv)(c2, c1);
2346    return 0;
2347}
2348
2349static nkf_char
2350unicode_iconv_combine(nkf_char wc, nkf_char wc2)
2351{
2352    nkf_char c1, c2;
2353    int i;
2354
2355    if (wc2 < 0x80) {
2356	return NKF_ICONV_NOT_COMBINED;
2357    }else if ((wc2>>11) == 27) {
2358	/* unpaired surrogate */
2359	return NKF_ICONV_INVALID_CODE_RANGE;
2360    }else if (wc2 < 0xFFFF) {
2361	if (!x0213_combining_p(wc2))
2362	    return NKF_ICONV_NOT_COMBINED;
2363	for (i = 0; i < sizeof_x0213_combining_table; i++) {
2364	    if (x0213_combining_table[i][1] == wc &&
2365		x0213_combining_table[i][2] == wc2) {
2366		c2 = x0213_combining_table[i][0] >> 8;
2367		c1 = x0213_combining_table[i][0] & 0x7f;
2368		(*oconv)(c2, c1);
2369		return 0;
2370	    }
2371	}
2372    }else if (wc2 < 0x10FFFF) {
2373	return NKF_ICONV_NOT_COMBINED;
2374    } else {
2375	return NKF_ICONV_INVALID_CODE_RANGE;
2376    }
2377    return NKF_ICONV_NOT_COMBINED;
2378}
2379
2380static nkf_char
2381w_iconv_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6)
2382{
2383    nkf_char wc, wc2;
2384    wc = nkf_utf8_to_unicode(c1, c2, c3, 0);
2385    wc2 = nkf_utf8_to_unicode(c4, c5, c6, 0);
2386    if (wc2 < 0)
2387	return wc2;
2388    return unicode_iconv_combine(wc, wc2);
2389}
2390
2391#define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2392#define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2393static size_t
2394nkf_iconv_utf_16(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2395{
2396    nkf_char wc;
2397
2398    if (c1 == EOF) {
2399	(*oconv)(EOF, 0);
2400	return 0;
2401    }
2402
2403    if (input_endian == ENDIAN_BIG) {
2404	if (0xD8 <= c1 && c1 <= 0xDB) {
2405	    if (0xDC <= c3 && c3 <= 0xDF) {
2406		wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2407	    } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2408	} else {
2409	    wc = c1 << 8 | c2;
2410	}
2411    } else {
2412	if (0xD8 <= c2 && c2 <= 0xDB) {
2413	    if (0xDC <= c4 && c4 <= 0xDF) {
2414		wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2415	    } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2416	} else {
2417	    wc = c2 << 8 | c1;
2418	}
2419    }
2420
2421    return (*unicode_iconv)(wc, FALSE);
2422}
2423
2424static size_t
2425nkf_iconv_utf_16_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2426{
2427    nkf_char wc, wc2;
2428
2429    if (input_endian == ENDIAN_BIG) {
2430	if (0xD8 <= c3 && c3 <= 0xDB) {
2431	    return NKF_ICONV_NOT_COMBINED;
2432	} else {
2433	    wc = c1 << 8 | c2;
2434	    wc2 = c3 << 8 | c4;
2435	}
2436    } else {
2437	if (0xD8 <= c2 && c2 <= 0xDB) {
2438	    return NKF_ICONV_NOT_COMBINED;
2439	} else {
2440	    wc = c2 << 8 | c1;
2441	    wc2 = c4 << 8 | c3;
2442	}
2443    }
2444
2445    return unicode_iconv_combine(wc, wc2);
2446}
2447
2448static size_t
2449nkf_iconv_utf_16_nocombine(nkf_char c1, nkf_char c2)
2450{
2451    nkf_char wc;
2452    if (input_endian == ENDIAN_BIG)
2453	wc = c1 << 8 | c2;
2454    else
2455	wc = c2 << 8 | c1;
2456    return (*unicode_iconv)(wc, TRUE);
2457}
2458
2459static nkf_char
2460w_iconv16(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2461{
2462    (*oconv)(c2, c1);
2463    return 16; /* different from w_iconv32 */
2464}
2465
2466static nkf_char
2467w_iconv32(nkf_char c2, nkf_char c1, ARG_UNUSED nkf_char c0)
2468{
2469    (*oconv)(c2, c1);
2470    return 32; /* different from w_iconv16 */
2471}
2472
2473static nkf_char
2474utf32_to_nkf_char(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2475{
2476    nkf_char wc;
2477
2478    switch(input_endian){
2479    case ENDIAN_BIG:
2480	wc = c2 << 16 | c3 << 8 | c4;
2481	break;
2482    case ENDIAN_LITTLE:
2483	wc = c3 << 16 | c2 << 8 | c1;
2484	break;
2485    case ENDIAN_2143:
2486	wc = c1 << 16 | c4 << 8 | c3;
2487	break;
2488    case ENDIAN_3412:
2489	wc = c4 << 16 | c1 << 8 | c2;
2490	break;
2491    default:
2492	return NKF_ICONV_INVALID_CODE_RANGE;
2493    }
2494    return wc;
2495}
2496
2497static size_t
2498nkf_iconv_utf_32(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2499{
2500    nkf_char wc;
2501
2502    if (c1 == EOF) {
2503	(*oconv)(EOF, 0);
2504	return 0;
2505    }
2506
2507    wc = utf32_to_nkf_char(c1, c2, c3, c4);
2508    if (wc < 0)
2509	return wc;
2510
2511    return (*unicode_iconv)(wc, FALSE);
2512}
2513
2514static nkf_char
2515nkf_iconv_utf_32_combine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4, nkf_char c5, nkf_char c6, nkf_char c7, nkf_char c8)
2516{
2517    nkf_char wc, wc2;
2518
2519    wc = utf32_to_nkf_char(c1, c2, c3, c4);
2520    if (wc < 0)
2521	return wc;
2522    wc2 = utf32_to_nkf_char(c5, c6, c7, c8);
2523    if (wc2 < 0)
2524	return wc2;
2525
2526    return unicode_iconv_combine(wc, wc2);
2527}
2528
2529static size_t
2530nkf_iconv_utf_32_nocombine(nkf_char c1, nkf_char c2, nkf_char c3, nkf_char c4)
2531{
2532    nkf_char wc;
2533
2534    wc = utf32_to_nkf_char(c1, c2, c3, c4);
2535    return (*unicode_iconv)(wc, TRUE);
2536}
2537#endif
2538
2539#define output_ascii_escape_sequence(mode) do { \
2540	    if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2541		    (*o_putc)(ESC); \
2542		    (*o_putc)('('); \
2543		    (*o_putc)(ascii_intro); \
2544		    output_mode = mode; \
2545	    } \
2546    } while (0)
2547
2548static void
2549output_escape_sequence(int mode)
2550{
2551    if (output_mode == mode)
2552	return;
2553    switch(mode) {
2554    case ISO_8859_1:
2555	(*o_putc)(ESC);
2556	(*o_putc)('.');
2557	(*o_putc)('A');
2558	break;
2559    case JIS_X_0201_1976_K:
2560	(*o_putc)(ESC);
2561	(*o_putc)('(');
2562	(*o_putc)('I');
2563	break;
2564    case JIS_X_0208:
2565	(*o_putc)(ESC);
2566	(*o_putc)('$');
2567	(*o_putc)(kanji_intro);
2568	break;
2569    case JIS_X_0212:
2570	(*o_putc)(ESC);
2571	(*o_putc)('$');
2572	(*o_putc)('(');
2573	(*o_putc)('D');
2574	break;
2575    case JIS_X_0213_1:
2576	(*o_putc)(ESC);
2577	(*o_putc)('$');
2578	(*o_putc)('(');
2579	(*o_putc)('Q');
2580	break;
2581    case JIS_X_0213_2:
2582	(*o_putc)(ESC);
2583	(*o_putc)('$');
2584	(*o_putc)('(');
2585	(*o_putc)('P');
2586	break;
2587    }
2588    output_mode = mode;
2589}
2590
2591static void
2592j_oconv(nkf_char c2, nkf_char c1)
2593{
2594#ifdef NUMCHAR_OPTION
2595    if (c2 == 0 && nkf_char_unicode_p(c1)){
2596	w16e_conv(c1, &c2, &c1);
2597	if (c2 == 0 && nkf_char_unicode_p(c1)){
2598	    c2 = c1 & VALUE_MASK;
2599	    if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2600		/* CP5022x UDC */
2601		c1 &= 0xFFF;
2602		c2 = 0x7F + c1 / 94;
2603		c1 = 0x21 + c1 % 94;
2604	    } else {
2605		if (encode_fallback) (*encode_fallback)(c1);
2606		return;
2607	    }
2608	}
2609    }
2610#endif
2611    if (c2 == 0) {
2612	output_ascii_escape_sequence(ASCII);
2613	(*o_putc)(c1);
2614    }
2615    else if (c2 == EOF) {
2616	output_ascii_escape_sequence(ASCII);
2617	(*o_putc)(EOF);
2618    }
2619    else if (c2 == ISO_8859_1) {
2620	output_ascii_escape_sequence(ISO_8859_1);
2621	(*o_putc)(c1|0x80);
2622    }
2623    else if (c2 == JIS_X_0201_1976_K) {
2624	output_escape_sequence(JIS_X_0201_1976_K);
2625	(*o_putc)(c1);
2626#ifdef X0212_ENABLE
2627    } else if (is_eucg3(c2)){
2628	output_escape_sequence(x0213_f ? JIS_X_0213_2 : JIS_X_0212);
2629	(*o_putc)(c2 & 0x7f);
2630	(*o_putc)(c1);
2631#endif
2632    } else {
2633	if(ms_ucs_map_f
2634	   ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2635	   : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2636	output_escape_sequence(x0213_f ? JIS_X_0213_1 : JIS_X_0208);
2637	(*o_putc)(c2);
2638	(*o_putc)(c1);
2639    }
2640}
2641
2642static void
2643e_oconv(nkf_char c2, nkf_char c1)
2644{
2645    if (c2 == 0 && nkf_char_unicode_p(c1)){
2646	w16e_conv(c1, &c2, &c1);
2647	if (c2 == 0 && nkf_char_unicode_p(c1)){
2648	    c2 = c1 & VALUE_MASK;
2649	    if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2650		/* eucJP-ms UDC */
2651		c1 &= 0xFFF;
2652		c2 = c1 / 94;
2653		c2 += c2 < 10 ? 0x75 : 0x8FEB;
2654		c1 = 0x21 + c1 % 94;
2655		if (is_eucg3(c2)){
2656		    (*o_putc)(0x8f);
2657		    (*o_putc)((c2 & 0x7f) | 0x080);
2658		    (*o_putc)(c1 | 0x080);
2659		}else{
2660		    (*o_putc)((c2 & 0x7f) | 0x080);
2661		    (*o_putc)(c1 | 0x080);
2662		}
2663		return;
2664	    } else {
2665		if (encode_fallback) (*encode_fallback)(c1);
2666		return;
2667	    }
2668	}
2669    }
2670
2671    if (c2 == EOF) {
2672	(*o_putc)(EOF);
2673    } else if (c2 == 0) {
2674	output_mode = ASCII;
2675	(*o_putc)(c1);
2676    } else if (c2 == JIS_X_0201_1976_K) {
2677	output_mode = EUC_JP;
2678	(*o_putc)(SS2); (*o_putc)(c1|0x80);
2679    } else if (c2 == ISO_8859_1) {
2680	output_mode = ISO_8859_1;
2681	(*o_putc)(c1 | 0x080);
2682#ifdef X0212_ENABLE
2683    } else if (is_eucg3(c2)){
2684	output_mode = EUC_JP;
2685#ifdef SHIFTJIS_CP932
2686	if (!cp932inv_f){
2687	    nkf_char s2, s1;
2688	    if (e2s_conv(c2, c1, &s2, &s1) == 0){
2689		s2e_conv(s2, s1, &c2, &c1);
2690	    }
2691	}
2692#endif
2693	if (c2 == 0) {
2694	    output_mode = ASCII;
2695	    (*o_putc)(c1);
2696	}else if (is_eucg3(c2)){
2697	    if (x0212_f){
2698		(*o_putc)(0x8f);
2699		(*o_putc)((c2 & 0x7f) | 0x080);
2700		(*o_putc)(c1 | 0x080);
2701	    }
2702	}else{
2703	    (*o_putc)((c2 & 0x7f) | 0x080);
2704	    (*o_putc)(c1 | 0x080);
2705	}
2706#endif
2707    } else {
2708	if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2709	    set_iconv(FALSE, 0);
2710	    return; /* too late to rescue this char */
2711	}
2712	output_mode = EUC_JP;
2713	(*o_putc)(c2 | 0x080);
2714	(*o_putc)(c1 | 0x080);
2715    }
2716}
2717
2718static void
2719s_oconv(nkf_char c2, nkf_char c1)
2720{
2721#ifdef NUMCHAR_OPTION
2722    if (c2 == 0 && nkf_char_unicode_p(c1)){
2723	w16e_conv(c1, &c2, &c1);
2724	if (c2 == 0 && nkf_char_unicode_p(c1)){
2725	    c2 = c1 & VALUE_MASK;
2726	    if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2727		/* CP932 UDC */
2728		c1 &= 0xFFF;
2729		c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2730		c1 = c1 % 188;
2731		c1 += 0x40 + (c1 > 0x3e);
2732		(*o_putc)(c2);
2733		(*o_putc)(c1);
2734		return;
2735	    } else {
2736		if(encode_fallback)(*encode_fallback)(c1);
2737		return;
2738	    }
2739	}
2740    }
2741#endif
2742    if (c2 == EOF) {
2743	(*o_putc)(EOF);
2744	return;
2745    } else if (c2 == 0) {
2746	output_mode = ASCII;
2747	(*o_putc)(c1);
2748    } else if (c2 == JIS_X_0201_1976_K) {
2749	output_mode = SHIFT_JIS;
2750	(*o_putc)(c1|0x80);
2751    } else if (c2 == ISO_8859_1) {
2752	output_mode = ISO_8859_1;
2753	(*o_putc)(c1 | 0x080);
2754#ifdef X0212_ENABLE
2755    } else if (is_eucg3(c2)){
2756	output_mode = SHIFT_JIS;
2757	if (e2s_conv(c2, c1, &c2, &c1) == 0){
2758	    (*o_putc)(c2);
2759	    (*o_putc)(c1);
2760	}
2761#endif
2762    } else {
2763	if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2764	    set_iconv(FALSE, 0);
2765	    return; /* too late to rescue this char */
2766	}
2767	output_mode = SHIFT_JIS;
2768	e2s_conv(c2, c1, &c2, &c1);
2769
2770#ifdef SHIFTJIS_CP932
2771	if (cp932inv_f
2772	    && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2773	    nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2774	    if (c){
2775		c2 = c >> 8;
2776		c1 = c & 0xff;
2777	    }
2778	}
2779#endif /* SHIFTJIS_CP932 */
2780
2781	(*o_putc)(c2);
2782	if (prefix_table[(unsigned char)c1]){
2783	    (*o_putc)(prefix_table[(unsigned char)c1]);
2784	}
2785	(*o_putc)(c1);
2786    }
2787}
2788
2789#ifdef UTF8_OUTPUT_ENABLE
2790#define OUTPUT_UTF8(val) do { \
2791	nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4); \
2792	(*o_putc)(c1); \
2793	if (c2) (*o_putc)(c2); \
2794	if (c3) (*o_putc)(c3); \
2795	if (c4) (*o_putc)(c4); \
2796    } while (0)
2797
2798static void
2799w_oconv(nkf_char c2, nkf_char c1)
2800{
2801    nkf_char c3, c4;
2802    nkf_char val, val2;
2803
2804    if (output_bom_f) {
2805	output_bom_f = FALSE;
2806	(*o_putc)('\357');
2807	(*o_putc)('\273');
2808	(*o_putc)('\277');
2809    }
2810
2811    if (c2 == EOF) {
2812	(*o_putc)(EOF);
2813	return;
2814    }
2815
2816    if (c2 == 0 && nkf_char_unicode_p(c1)){
2817	val = c1 & VALUE_MASK;
2818	OUTPUT_UTF8(val);
2819	return;
2820    }
2821
2822    if (c2 == 0) {
2823	(*o_putc)(c1);
2824    } else {
2825	val = e2w_conv(c2, c1);
2826	if (val){
2827	    val2 = e2w_combining(val, c2, c1);
2828	    if (val2)
2829		OUTPUT_UTF8(val2);
2830	    OUTPUT_UTF8(val);
2831	}
2832    }
2833}
2834
2835#define OUTPUT_UTF16_BYTES(c1, c2) do { \
2836	if (output_endian == ENDIAN_LITTLE){ \
2837	    (*o_putc)(c1); \
2838	    (*o_putc)(c2); \
2839	}else{ \
2840	    (*o_putc)(c2); \
2841	    (*o_putc)(c1); \
2842	} \
2843    } while (0)
2844
2845#define OUTPUT_UTF16(val) do { \
2846	if (nkf_char_unicode_bmp_p(val)) { \
2847	    c2 = (val >> 8) & 0xff; \
2848	    c1 = val & 0xff; \
2849	    OUTPUT_UTF16_BYTES(c1, c2); \
2850	} else { \
2851	    val &= VALUE_MASK; \
2852	    if (val <= UNICODE_MAX) { \
2853		c2 = (val >> 10) + NKF_INT32_C(0xD7C0);   /* high surrogate */ \
2854		c1 = (val & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */ \
2855		OUTPUT_UTF16_BYTES(c2 & 0xff, (c2 >> 8) & 0xff); \
2856		OUTPUT_UTF16_BYTES(c1 & 0xff, (c1 >> 8) & 0xff); \
2857	    } \
2858	} \
2859    } while (0)
2860
2861static void
2862w_oconv16(nkf_char c2, nkf_char c1)
2863{
2864    if (output_bom_f) {
2865	output_bom_f = FALSE;
2866	OUTPUT_UTF16_BYTES(0xFF, 0xFE);
2867    }
2868
2869    if (c2 == EOF) {
2870	(*o_putc)(EOF);
2871	return;
2872    }
2873
2874    if (c2 == 0 && nkf_char_unicode_p(c1)) {
2875	OUTPUT_UTF16(c1);
2876    } else if (c2) {
2877	nkf_char val, val2;
2878	val = e2w_conv(c2, c1);
2879	if (!val) return;
2880	val2 = e2w_combining(val, c2, c1);
2881	if (val2)
2882	    OUTPUT_UTF16(val2);
2883	OUTPUT_UTF16(val);
2884    } else {
2885	OUTPUT_UTF16_BYTES(c1, c2);
2886    }
2887}
2888
2889#define OUTPUT_UTF32(c) do { \
2890	if (output_endian == ENDIAN_LITTLE){ \
2891	    (*o_putc)( (c)        & 0xFF); \
2892	    (*o_putc)(((c) >>  8) & 0xFF); \
2893	    (*o_putc)(((c) >> 16) & 0xFF); \
2894	    (*o_putc)(0); \
2895	}else{ \
2896	    (*o_putc)(0); \
2897	    (*o_putc)(((c) >> 16) & 0xFF); \
2898	    (*o_putc)(((c) >>  8) & 0xFF); \
2899	    (*o_putc)( (c)        & 0xFF); \
2900	} \
2901    } while (0)
2902
2903static void
2904w_oconv32(nkf_char c2, nkf_char c1)
2905{
2906    if (output_bom_f) {
2907	output_bom_f = FALSE;
2908	if (output_endian == ENDIAN_LITTLE){
2909	    (*o_putc)(0xFF);
2910	    (*o_putc)(0xFE);
2911	    (*o_putc)(0);
2912	    (*o_putc)(0);
2913	}else{
2914	    (*o_putc)(0);
2915	    (*o_putc)(0);
2916	    (*o_putc)(0xFE);
2917	    (*o_putc)(0xFF);
2918	}
2919    }
2920
2921    if (c2 == EOF) {
2922	(*o_putc)(EOF);
2923	return;
2924    }
2925
2926    if (c2 == ISO_8859_1) {
2927	c1 |= 0x80;
2928    } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2929	c1 &= VALUE_MASK;
2930    } else if (c2) {
2931	nkf_char val, val2;
2932	val = e2w_conv(c2, c1);
2933	if (!val) return;
2934	val2 = e2w_combining(val, c2, c1);
2935	if (val2)
2936	    OUTPUT_UTF32(val2);
2937	c1 = val;
2938    }
2939    OUTPUT_UTF32(c1);
2940}
2941#endif
2942
2943#define SCORE_L2       (1)                   /* Kanji Level 2 */
2944#define SCORE_KANA     (SCORE_L2 << 1)       /* Halfwidth Katakana */
2945#define SCORE_DEPEND   (SCORE_KANA << 1)     /* MD Characters */
2946#define SCORE_CP932    (SCORE_DEPEND << 1)   /* IBM extended characters */
2947#define SCORE_X0212    (SCORE_CP932 << 1)    /* JIS X 0212 */
2948#define SCORE_X0213    (SCORE_X0212 << 1)    /* JIS X 0213 */
2949#define SCORE_NO_EXIST (SCORE_X0213 << 1)    /* Undefined Characters */
2950#define SCORE_iMIME    (SCORE_NO_EXIST << 1) /* MIME selected */
2951#define SCORE_ERROR    (SCORE_iMIME << 1) /* Error */
2952
2953#define SCORE_INIT (SCORE_iMIME)
2954
2955static const nkf_char score_table_A0[] = {
2956    0, 0, 0, 0,
2957    0, 0, 0, 0,
2958    0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND,
2959    SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_X0213,
2960};
2961
2962static const nkf_char score_table_F0[] = {
2963    SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2,
2964    SCORE_L2, SCORE_DEPEND, SCORE_X0213, SCORE_X0213,
2965    SCORE_DEPEND, SCORE_DEPEND, SCORE_CP932, SCORE_CP932,
2966    SCORE_CP932, SCORE_X0213, SCORE_X0213, SCORE_ERROR,
2967};
2968
2969static const nkf_char score_table_8FA0[] = {
2970    0, SCORE_X0213, SCORE_X0212, SCORE_X0213,
2971    SCORE_X0213, SCORE_X0213, SCORE_X0212, SCORE_X0212,
2972    SCORE_X0213, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2973    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
2974};
2975
2976static const nkf_char score_table_8FE0[] = {
2977    SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2978    SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2979    SCORE_X0212, SCORE_X0212, SCORE_X0212, SCORE_X0212,
2980    SCORE_X0212, SCORE_X0212, SCORE_X0213, SCORE_X0213,
2981};
2982
2983static const nkf_char score_table_8FF0[] = {
2984    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0212,
2985    SCORE_X0212, SCORE_X0213, SCORE_X0213, SCORE_X0213,
2986    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
2987    SCORE_X0213, SCORE_X0213, SCORE_X0213, SCORE_X0213,
2988};
2989
2990static void
2991set_code_score(struct input_code *ptr, nkf_char score)
2992{
2993    if (ptr){
2994	ptr->score |= score;
2995    }
2996}
2997
2998static void
2999clr_code_score(struct input_code *ptr, nkf_char score)
3000{
3001    if (ptr){
3002	ptr->score &= ~score;
3003    }
3004}
3005
3006static void
3007code_score(struct input_code *ptr)
3008{
3009    nkf_char c2 = ptr->buf[0];
3010    nkf_char c1 = ptr->buf[1];
3011    if (c2 < 0){
3012	set_code_score(ptr, SCORE_ERROR);
3013    }else if (c2 == SS2){
3014	set_code_score(ptr, SCORE_KANA);
3015    }else if (c2 == 0x8f){
3016	if ((c1 & 0x70) == 0x20){
3017	    set_code_score(ptr, score_table_8FA0[c1 & 0x0f]);
3018	}else if ((c1 & 0x70) == 0x60){
3019	    set_code_score(ptr, score_table_8FE0[c1 & 0x0f]);
3020	}else if ((c1 & 0x70) == 0x70){
3021	    set_code_score(ptr, score_table_8FF0[c1 & 0x0f]);
3022	}else{
3023	    set_code_score(ptr, SCORE_X0212);
3024	}
3025#ifdef UTF8_OUTPUT_ENABLE
3026    }else if (!e2w_conv(c2, c1)){
3027	set_code_score(ptr, SCORE_NO_EXIST);
3028#endif
3029    }else if ((c2 & 0x70) == 0x20){
3030	set_code_score(ptr, score_table_A0[c2 & 0x0f]);
3031    }else if ((c2 & 0x70) == 0x70){
3032	set_code_score(ptr, score_table_F0[c2 & 0x0f]);
3033    }else if ((c2 & 0x70) >= 0x50){
3034	set_code_score(ptr, SCORE_L2);
3035    }
3036}
3037
3038static void
3039status_disable(struct input_code *ptr)
3040{
3041    ptr->stat = -1;
3042    ptr->buf[0] = -1;
3043    code_score(ptr);
3044    if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
3045}
3046
3047static void
3048status_push_ch(struct input_code *ptr, nkf_char c)
3049{
3050    ptr->buf[ptr->index++] = c;
3051}
3052
3053static void
3054status_clear(struct input_code *ptr)
3055{
3056    ptr->stat = 0;
3057    ptr->index = 0;
3058}
3059
3060static void
3061status_reset(struct input_code *ptr)
3062{
3063    status_clear(ptr);
3064    ptr->score = SCORE_INIT;
3065}
3066
3067static void
3068status_reinit(struct input_code *ptr)
3069{
3070    status_reset(ptr);
3071    ptr->_file_stat = 0;
3072}
3073
3074static void
3075status_check(struct input_code *ptr, nkf_char c)
3076{
3077    if (c <= DEL && estab_f){
3078	status_reset(ptr);
3079    }
3080}
3081
3082static void
3083s_status(struct input_code *ptr, nkf_char c)
3084{
3085    switch(ptr->stat){
3086    case -1:
3087	status_check(ptr, c);
3088	break;
3089    case 0:
3090	if (c <= DEL){
3091	    break;
3092	}else if (nkf_char_unicode_p(c)){
3093	    break;
3094	}else if (0xa1 <= c && c <= 0xdf){
3095	    status_push_ch(ptr, SS2);
3096	    status_push_ch(ptr, c);
3097	    code_score(ptr);
3098	    status_clear(ptr);
3099	}else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
3100	    ptr->stat = 1;
3101	    status_push_ch(ptr, c);
3102	}else if (0xed <= c && c <= 0xee){
3103	    ptr->stat = 3;
3104	    status_push_ch(ptr, c);
3105#ifdef SHIFTJIS_CP932
3106	}else if (is_ibmext_in_sjis(c)){
3107	    ptr->stat = 2;
3108	    status_push_ch(ptr, c);
3109#endif /* SHIFTJIS_CP932 */
3110#ifdef X0212_ENABLE
3111	}else if (0xf0 <= c && c <= 0xfc){
3112	    ptr->stat = 1;
3113	    status_push_ch(ptr, c);
3114#endif /* X0212_ENABLE */
3115	}else{
3116	    status_disable(ptr);
3117	}
3118	break;
3119    case 1:
3120	if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3121	    status_push_ch(ptr, c);
3122	    s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3123	    code_score(ptr);
3124	    status_clear(ptr);
3125	}else{
3126	    status_disable(ptr);
3127	}
3128	break;
3129    case 2:
3130#ifdef SHIFTJIS_CP932
3131	if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
3132	    status_push_ch(ptr, c);
3133	    if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
3134		set_code_score(ptr, SCORE_CP932);
3135		status_clear(ptr);
3136		break;
3137	    }
3138	}
3139#endif /* SHIFTJIS_CP932 */
3140	status_disable(ptr);
3141	break;
3142    case 3:
3143	if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
3144	    status_push_ch(ptr, c);
3145	    s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
3146	    set_code_score(ptr, SCORE_CP932);
3147	    status_clear(ptr);
3148	}else{
3149	    status_disable(ptr);
3150	}
3151	break;
3152    }
3153}
3154
3155static void
3156e_status(struct input_code *ptr, nkf_char c)
3157{
3158    switch (ptr->stat){
3159    case -1:
3160	status_check(ptr, c);
3161	break;
3162    case 0:
3163	if (c <= DEL){
3164	    break;
3165	}else if (nkf_char_unicode_p(c)){
3166	    break;
3167	}else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
3168	    ptr->stat = 1;
3169	    status_push_ch(ptr, c);
3170#ifdef X0212_ENABLE
3171	}else if (0x8f == c){
3172	    ptr->stat = 2;
3173	    status_push_ch(ptr, c);
3174#endif /* X0212_ENABLE */
3175	}else{
3176	    status_disable(ptr);
3177	}
3178	break;
3179    case 1:
3180	if (0xa1 <= c && c <= 0xfe){
3181	    status_push_ch(ptr, c);
3182	    code_score(ptr);
3183	    status_clear(ptr);
3184	}else{
3185	    status_disable(ptr);
3186	}
3187	break;
3188#ifdef X0212_ENABLE
3189    case 2:
3190	if (0xa1 <= c && c <= 0xfe){
3191	    ptr->stat = 1;
3192	    status_push_ch(ptr, c);
3193	}else{
3194	    status_disable(ptr);
3195	}
3196#endif /* X0212_ENABLE */
3197    }
3198}
3199
3200#ifdef UTF8_INPUT_ENABLE
3201static void
3202w_status(struct input_code *ptr, nkf_char c)
3203{
3204    switch (ptr->stat){
3205    case -1:
3206	status_check(ptr, c);
3207	break;
3208    case 0:
3209	if (c <= DEL){
3210	    break;
3211	}else if (nkf_char_unicode_p(c)){
3212	    break;
3213	}else if (0xc0 <= c && c <= 0xdf){
3214	    ptr->stat = 1;
3215	    status_push_ch(ptr, c);
3216	}else if (0xe0 <= c && c <= 0xef){
3217	    ptr->stat = 2;
3218	    status_push_ch(ptr, c);
3219	}else if (0xf0 <= c && c <= 0xf4){
3220	    ptr->stat = 3;
3221	    status_push_ch(ptr, c);
3222	}else{
3223	    status_disable(ptr);
3224	}
3225	break;
3226    case 1:
3227    case 2:
3228	if (0x80 <= c && c <= 0xbf){
3229	    status_push_ch(ptr, c);
3230	    if (ptr->index > ptr->stat){
3231		int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
3232			   && ptr->buf[2] == 0xbf);
3233		w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
3234			 &ptr->buf[0], &ptr->buf[1]);
3235		if (!bom){
3236		    code_score(ptr);
3237		}
3238		status_clear(ptr);
3239	    }
3240	}else{
3241	    status_disable(ptr);
3242	}
3243	break;
3244    case 3:
3245	if (0x80 <= c && c <= 0xbf){
3246	    if (ptr->index < ptr->stat){
3247		status_push_ch(ptr, c);
3248	    } else {
3249		status_clear(ptr);
3250	    }
3251	}else{
3252	    status_disable(ptr);
3253	}
3254	break;
3255    }
3256}
3257#endif
3258
3259static void
3260code_status(nkf_char c)
3261{
3262    int action_flag = 1;
3263    struct input_code *result = 0;
3264    struct input_code *p = input_code_list;
3265    while (p->name){
3266	if (!p->status_func) {
3267	    ++p;
3268	    continue;
3269	}
3270	if (!p->status_func)
3271	    continue;
3272	(p->status_func)(p, c);
3273	if (p->stat > 0){
3274	    action_flag = 0;
3275	}else if(p->stat == 0){
3276	    if (result){
3277		action_flag = 0;
3278	    }else{
3279		result = p;
3280	    }
3281	}
3282	++p;
3283    }
3284
3285    if (action_flag){
3286	if (result && !estab_f){
3287	    set_iconv(TRUE, result->iconv_func);
3288	}else if (c <= DEL){
3289	    struct input_code *ptr = input_code_list;
3290	    while (ptr->name){
3291		status_reset(ptr);
3292		++ptr;
3293	    }
3294	}
3295    }
3296}
3297
3298typedef struct {
3299    nkf_buf_t *std_gc_buf;
3300    nkf_char broken_state;
3301    nkf_buf_t *broken_buf;
3302    nkf_char mimeout_state;
3303    nkf_buf_t *nfc_buf;
3304} nkf_state_t;
3305
3306static nkf_state_t *nkf_state = NULL;
3307
3308#define STD_GC_BUFSIZE (256)
3309
3310static void
3311nkf_state_init(void)
3312{
3313    if (nkf_state) {
3314	nkf_buf_clear(nkf_state->std_gc_buf);
3315	nkf_buf_clear(nkf_state->broken_buf);
3316	nkf_buf_clear(nkf_state->nfc_buf);
3317    }
3318    else {
3319	nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3320	nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3321	nkf_state->broken_buf = nkf_buf_new(3);
3322	nkf_state->nfc_buf = nkf_buf_new(9);
3323    }
3324    nkf_state->broken_state = 0;
3325    nkf_state->mimeout_state = 0;
3326}
3327
3328#ifndef WIN32DLL
3329static nkf_char
3330std_getc(FILE *f)
3331{
3332    if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3333	return nkf_buf_pop(nkf_state->std_gc_buf);
3334    }
3335    return getc(f);
3336}
3337#endif /*WIN32DLL*/
3338
3339static nkf_char
3340std_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3341{
3342    nkf_buf_push(nkf_state->std_gc_buf, c);
3343    return c;
3344}
3345
3346#ifndef WIN32DLL
3347static void
3348std_putc(nkf_char c)
3349{
3350    if(c!=EOF)
3351	putchar(c);
3352}
3353#endif /*WIN32DLL*/
3354
3355static nkf_char   hold_buf[HOLD_SIZE*2];
3356static int             hold_count = 0;
3357static nkf_char
3358push_hold_buf(nkf_char c2)
3359{
3360    if (hold_count >= HOLD_SIZE*2)
3361	return (EOF);
3362    hold_buf[hold_count++] = c2;
3363    return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3364}
3365
3366static int
3367h_conv(FILE *f, nkf_char c1, nkf_char c2)
3368{
3369    int ret;
3370    int hold_index;
3371    int fromhold_count;
3372    nkf_char c3, c4;
3373
3374    /** it must NOT be in the kanji shifte sequence      */
3375    /** it must NOT be written in JIS7                   */
3376    /** and it must be after 2 byte 8bit code            */
3377
3378    hold_count = 0;
3379    push_hold_buf(c1);
3380    push_hold_buf(c2);
3381
3382    while ((c2 = (*i_getc)(f)) != EOF) {
3383	if (c2 == ESC){
3384	    (*i_ungetc)(c2,f);
3385	    break;
3386	}
3387	code_status(c2);
3388	if (push_hold_buf(c2) == EOF || estab_f) {
3389	    break;
3390	}
3391    }
3392
3393    if (!estab_f) {
3394	struct input_code *p = input_code_list;
3395	struct input_code *result = p;
3396	if (c2 == EOF) {
3397	    code_status(c2);
3398	}
3399	while (p->name) {
3400	    if (p->status_func && p->score < result->score) {
3401		result = p;
3402	    }
3403	    p++;
3404	}
3405	set_iconv(TRUE, result->iconv_func);
3406    }
3407
3408
3409    /** now,
3410     ** 1) EOF is detected, or
3411     ** 2) Code is established, or
3412     ** 3) Buffer is FULL (but last word is pushed)
3413     **
3414     ** in 1) and 3) cases, we continue to use
3415     ** Kanji codes by oconv and leave estab_f unchanged.
3416     **/
3417
3418    ret = c2;
3419    hold_index = 0;
3420    while (hold_index < hold_count){
3421	c1 = hold_buf[hold_index++];
3422	if (nkf_char_unicode_p(c1)) {
3423	    (*oconv)(0, c1);
3424	    continue;
3425	}
3426	else if (c1 <= DEL){
3427	    (*iconv)(0, c1, 0);
3428	    continue;
3429	}else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3430	    (*iconv)(JIS_X_0201_1976_K, c1, 0);
3431	    continue;
3432	}
3433	fromhold_count = 1;
3434	if (hold_index < hold_count){
3435	    c2 = hold_buf[hold_index++];
3436	    fromhold_count++;
3437	}else{
3438	    c2 = (*i_getc)(f);
3439	    if (c2 == EOF){
3440		c4 = EOF;
3441		break;
3442	    }
3443	    code_status(c2);
3444	}
3445	c3 = 0;
3446	switch ((*iconv)(c1, c2, 0)) {  /* can be EUC/SJIS/UTF-8 */
3447	case -2:
3448	    /* 4 bytes UTF-8 */
3449	    if (hold_index < hold_count){
3450		c3 = hold_buf[hold_index++];
3451	    } else if ((c3 = (*i_getc)(f)) == EOF) {
3452		ret = EOF;
3453		break;
3454	    }
3455	    code_status(c3);
3456	    if (hold_index < hold_count){
3457		c4 = hold_buf[hold_index++];
3458	    } else if ((c4 = (*i_getc)(f)) == EOF) {
3459		c3 = ret = EOF;
3460		break;
3461	    }
3462	    code_status(c4);
3463	    (*iconv)(c1, c2, (c3<<8)|c4);
3464	    break;
3465	case -3:
3466	    /* 4 bytes UTF-8 (check combining character) */
3467	    if (hold_index < hold_count){
3468		c3 = hold_buf[hold_index++];
3469		fromhold_count++;
3470	    } else if ((c3 = (*i_getc)(f)) == EOF) {
3471		w_iconv_nocombine(c1, c2, 0);
3472		break;
3473	    }
3474	    if (hold_index < hold_count){
3475		c4 = hold_buf[hold_index++];
3476		fromhold_count++;
3477	    } else if ((c4 = (*i_getc)(f)) == EOF) {
3478		w_iconv_nocombine(c1, c2, 0);
3479		if (fromhold_count <= 2)
3480		    (*i_ungetc)(c3,f);
3481		else
3482		    hold_index--;
3483		continue;
3484	    }
3485	    if (w_iconv_combine(c1, c2, 0, c3, c4, 0)) {
3486		w_iconv_nocombine(c1, c2, 0);
3487		if (fromhold_count <= 2) {
3488		    (*i_ungetc)(c4,f);
3489		    (*i_ungetc)(c3,f);
3490		} else if (fromhold_count == 3) {
3491		    (*i_ungetc)(c4,f);
3492		    hold_index--;
3493		} else {
3494		    hold_index -= 2;
3495		}
3496	    }
3497	    break;
3498	case -1:
3499	    /* 3 bytes EUC or UTF-8 */
3500	    if (hold_index < hold_count){
3501		c3 = hold_buf[hold_index++];
3502		fromhold_count++;
3503	    } else if ((c3 = (*i_getc)(f)) == EOF) {
3504		ret = EOF;
3505		break;
3506	    } else {
3507		code_status(c3);
3508	    }
3509	    if ((*iconv)(c1, c2, c3) == -3) {
3510		/* 6 bytes UTF-8 (check combining character) */
3511		nkf_char c5, c6;
3512		if (hold_index < hold_count){
3513		    c4 = hold_buf[hold_index++];
3514		    fromhold_count++;
3515		} else if ((c4 = (*i_getc)(f)) == EOF) {
3516		    w_iconv_nocombine(c1, c2, c3);
3517		    continue;
3518		}
3519		if (hold_index < hold_count){
3520		    c5 = hold_buf[hold_index++];
3521		    fromhold_count++;
3522		} else if ((c5 = (*i_getc)(f)) == EOF) {
3523		    w_iconv_nocombine(c1, c2, c3);
3524		    if (fromhold_count == 4)
3525			hold_index--;
3526		    else
3527			(*i_ungetc)(c4,f);
3528		    continue;
3529		}
3530		if (hold_index < hold_count){
3531		    c6 = hold_buf[hold_index++];
3532		    fromhold_count++;
3533		} else if ((c6 = (*i_getc)(f)) == EOF) {
3534		    w_iconv_nocombine(c1, c2, c3);
3535		    if (fromhold_count == 5) {
3536			hold_index -= 2;
3537		    } else if (fromhold_count == 4) {
3538			hold_index--;
3539			(*i_ungetc)(c5,f);
3540		    } else {
3541			(*i_ungetc)(c5,f);
3542			(*i_ungetc)(c4,f);
3543		    }
3544		    continue;
3545		}
3546		if (w_iconv_combine(c1, c2, c3, c4, c5, c6)) {
3547		    w_iconv_nocombine(c1, c2, c3);
3548		    if (fromhold_count == 6) {
3549			hold_index -= 3;
3550		    } else if (fromhold_count == 5) {
3551			hold_index -= 2;
3552			(*i_ungetc)(c6,f);
3553		    } else if (fromhold_count == 4) {
3554			hold_index--;
3555			(*i_ungetc)(c6,f);
3556			(*i_ungetc)(c5,f);
3557		    } else {
3558			(*i_ungetc)(c6,f);
3559			(*i_ungetc)(c5,f);
3560			(*i_ungetc)(c4,f);
3561		    }
3562		}
3563	    }
3564	    break;
3565	}
3566	if (c3 == EOF) break;
3567    }
3568    return ret;
3569}
3570
3571/*
3572 * Check and Ignore BOM
3573 */
3574static void
3575check_bom(FILE *f)
3576{
3577    int c2;
3578    switch(c2 = (*i_getc)(f)){
3579    case 0x00:
3580	if((c2 = (*i_getc)(f)) == 0x00){
3581	    if((c2 = (*i_getc)(f)) == 0xFE){
3582		if((c2 = (*i_getc)(f)) == 0xFF){
3583		    if(!input_encoding){
3584			set_iconv(TRUE, w_iconv32);
3585		    }
3586		    if (iconv == w_iconv32) {
3587			input_bom_f = TRUE;
3588			input_endian = ENDIAN_BIG;
3589			return;
3590		    }
3591		    (*i_ungetc)(0xFF,f);
3592		}else (*i_ungetc)(c2,f);
3593		(*i_ungetc)(0xFE,f);
3594	    }else if(c2 == 0xFF){
3595		if((c2 = (*i_getc)(f)) == 0xFE){
3596		    if(!input_encoding){
3597			set_iconv(TRUE, w_iconv32);
3598		    }
3599		    if (iconv == w_iconv32) {
3600			input_endian = ENDIAN_2143;
3601			return;
3602		    }
3603		    (*i_ungetc)(0xFF,f);
3604		}else (*i_ungetc)(c2,f);
3605		(*i_ungetc)(0xFF,f);
3606	    }else (*i_ungetc)(c2,f);
3607	    (*i_ungetc)(0x00,f);
3608	}else (*i_ungetc)(c2,f);
3609	(*i_ungetc)(0x00,f);
3610	break;
3611    case 0xEF:
3612	if((c2 = (*i_getc)(f)) == 0xBB){
3613	    if((c2 = (*i_getc)(f)) == 0xBF){
3614		if(!input_encoding){
3615		    set_iconv(TRUE, w_iconv);
3616		}
3617		if (iconv == w_iconv) {
3618		    input_bom_f = TRUE;
3619		    return;
3620		}
3621		(*i_ungetc)(0xBF,f);
3622	    }else (*i_ungetc)(c2,f);
3623	    (*i_ungetc)(0xBB,f);
3624	}else (*i_ungetc)(c2,f);
3625	(*i_ungetc)(0xEF,f);
3626	break;
3627    case 0xFE:
3628	if((c2 = (*i_getc)(f)) == 0xFF){
3629	    if((c2 = (*i_getc)(f)) == 0x00){
3630		if((c2 = (*i_getc)(f)) == 0x00){
3631		    if(!input_encoding){
3632			set_iconv(TRUE, w_iconv32);
3633		    }
3634		    if (iconv == w_iconv32) {
3635			input_endian = ENDIAN_3412;
3636			return;
3637		    }
3638		    (*i_ungetc)(0x00,f);
3639		}else (*i_ungetc)(c2,f);
3640		(*i_ungetc)(0x00,f);
3641	    }else (*i_ungetc)(c2,f);
3642	    if(!input_encoding){
3643		set_iconv(TRUE, w_iconv16);
3644	    }
3645	    if (iconv == w_iconv16) {
3646		input_endian = ENDIAN_BIG;
3647		input_bom_f = TRUE;
3648		return;
3649	    }
3650	    (*i_ungetc)(0xFF,f);
3651	}else (*i_ungetc)(c2,f);
3652	(*i_ungetc)(0xFE,f);
3653	break;
3654    case 0xFF:
3655	if((c2 = (*i_getc)(f)) == 0xFE){
3656	    if((c2 = (*i_getc)(f)) == 0x00){
3657		if((c2 = (*i_getc)(f)) == 0x00){
3658		    if(!input_encoding){
3659			set_iconv(TRUE, w_iconv32);
3660		    }
3661		    if (iconv == w_iconv32) {
3662			input_endian = ENDIAN_LITTLE;
3663			input_bom_f = TRUE;
3664			return;
3665		    }
3666		    (*i_ungetc)(0x00,f);
3667		}else (*i_ungetc)(c2,f);
3668		(*i_ungetc)(0x00,f);
3669	    }else (*i_ungetc)(c2,f);
3670	    if(!input_encoding){
3671		set_iconv(TRUE, w_iconv16);
3672	    }
3673	    if (iconv == w_iconv16) {
3674		input_endian = ENDIAN_LITTLE;
3675		input_bom_f = TRUE;
3676		return;
3677	    }
3678	    (*i_ungetc)(0xFE,f);
3679	}else (*i_ungetc)(c2,f);
3680	(*i_ungetc)(0xFF,f);
3681	break;
3682    default:
3683	(*i_ungetc)(c2,f);
3684	break;
3685    }
3686}
3687
3688static nkf_char
3689broken_getc(FILE *f)
3690{
3691    nkf_char c, c1;
3692
3693    if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3694	return nkf_buf_pop(nkf_state->broken_buf);
3695    }
3696    c = (*i_bgetc)(f);
3697    if (c=='$' && nkf_state->broken_state != ESC
3698	&& (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3699	c1= (*i_bgetc)(f);
3700	nkf_state->broken_state = 0;
3701	if (c1=='@'|| c1=='B') {
3702	    nkf_buf_push(nkf_state->broken_buf, c1);
3703	    nkf_buf_push(nkf_state->broken_buf, c);
3704	    return ESC;
3705	} else {
3706	    (*i_bungetc)(c1,f);
3707	    return c;
3708	}
3709    } else if (c=='(' && nkf_state->broken_state != ESC
3710	       && (input_mode == JIS_X_0208 || input_mode == JIS_X_0201_1976_K)) {
3711	c1= (*i_bgetc)(f);
3712	nkf_state->broken_state = 0;
3713	if (c1=='J'|| c1=='B') {
3714	    nkf_buf_push(nkf_state->broken_buf, c1);
3715	    nkf_buf_push(nkf_state->broken_buf, c);
3716	    return ESC;
3717	} else {
3718	    (*i_bungetc)(c1,f);
3719	    return c;
3720	}
3721    } else {
3722	nkf_state->broken_state = c;
3723	return c;
3724    }
3725}
3726
3727static nkf_char
3728broken_ungetc(nkf_char c, ARG_UNUSED FILE *f)
3729{
3730    if (nkf_buf_length(nkf_state->broken_buf) < 2)
3731	nkf_buf_push(nkf_state->broken_buf, c);
3732    return c;
3733}
3734
3735static void
3736eol_conv(nkf_char c2, nkf_char c1)
3737{
3738    if (guess_f && input_eol != EOF) {
3739	if (c2 == 0 && c1 == LF) {
3740	    if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3741	    else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3742	} else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3743	else if (!prev_cr);
3744	else if (!input_eol) input_eol = CR;
3745	else if (input_eol != CR) input_eol = EOF;
3746    }
3747    if (prev_cr || (c2 == 0 && c1 == LF)) {
3748	prev_cr = 0;
3749	if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3750	if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3751    }
3752    if (c2 == 0 && c1 == CR) prev_cr = CR;
3753    else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3754}
3755
3756static void
3757put_newline(void (*func)(nkf_char))
3758{
3759    switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3760      case CRLF:
3761	(*func)(0x0D);
3762	(*func)(0x0A);
3763	break;
3764      case CR:
3765	(*func)(0x0D);
3766	break;
3767      case LF:
3768	(*func)(0x0A);
3769	break;
3770    }
3771}
3772
3773static void
3774oconv_newline(void (*func)(nkf_char, nkf_char))
3775{
3776    switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3777      case CRLF:
3778	(*func)(0, 0x0D);
3779	(*func)(0, 0x0A);
3780	break;
3781      case CR:
3782	(*func)(0, 0x0D);
3783	break;
3784      case LF:
3785	(*func)(0, 0x0A);
3786	break;
3787    }
3788}
3789
3790/*
3791   Return value of fold_conv()
3792
3793   LF  add newline  and output char
3794   CR  add newline  and output nothing
3795   SP  space
3796   0   skip
3797   1   (or else) normal output
3798
3799   fold state in prev (previous character)
3800
3801   >0x80 Japanese (X0208/X0201)
3802   <0x80 ASCII
3803   LF    new line
3804   SP    space
3805
3806   This fold algorthm does not preserve heading space in a line.
3807   This is the main difference from fmt.
3808 */
3809
3810#define char_size(c2,c1) (c2?2:1)
3811
3812static void
3813fold_conv(nkf_char c2, nkf_char c1)
3814{
3815    nkf_char prev0;
3816    nkf_char fold_state;
3817
3818    if (c1== CR && !fold_preserve_f) {
3819	fold_state=0;  /* ignore cr */
3820    }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3821	f_prev = LF;
3822	fold_state=0;  /* ignore cr */
3823    } else if (c1== BS) {
3824	if (f_line>0) f_line--;
3825	fold_state =  1;
3826    } else if (c2==EOF && f_line != 0) {    /* close open last line */
3827	fold_state = LF;
3828    } else if ((c1==LF && !fold_preserve_f)
3829	       || ((c1==CR||(c1==LF&&f_prev!=CR))
3830		   && fold_preserve_f)) {
3831	/* new line */
3832	if (fold_preserve_f) {
3833	    f_prev = c1;
3834	    f_line = 0;
3835	    fold_state =  CR;
3836	} else if ((f_prev == c1 && !fold_preserve_f)
3837		   || (f_prev == LF && fold_preserve_f)
3838		  ) {        /* duplicate newline */
3839	    if (f_line) {
3840		f_line = 0;
3841		fold_state =  LF;    /* output two newline */
3842	    } else {
3843		f_line = 0;
3844		fold_state =  1;
3845	    }
3846	} else  {
3847	    if (f_prev&0x80) {     /* Japanese? */
3848		f_prev = c1;
3849		fold_state =  0;       /* ignore given single newline */
3850	    } else if (f_prev==SP) {
3851		fold_state =  0;
3852	    } else {
3853		f_prev = c1;
3854		if (++f_line<=fold_len)
3855		    fold_state =  SP;
3856		else {
3857		    f_line = 0;
3858		    fold_state =  CR;        /* fold and output nothing */
3859		}
3860	    }
3861	}
3862    } else if (c1=='\f') {
3863	f_prev = LF;
3864	f_line = 0;
3865	fold_state =  LF;            /* output newline and clear */
3866    } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3867	/* X0208 kankaku or ascii space */
3868	if (f_prev == SP) {
3869	    fold_state = 0;         /* remove duplicate spaces */
3870	} else {
3871	    f_prev = SP;
3872	    if (++f_line<=fold_len)
3873		fold_state = SP;         /* output ASCII space only */
3874	    else {
3875		f_prev = SP; f_line = 0;
3876		fold_state = CR;        /* fold and output nothing */
3877	    }
3878	}
3879    } else {
3880	prev0 = f_prev; /* we still need this one... , but almost done */
3881	f_prev = c1;
3882	if (c2 || c2 == JIS_X_0201_1976_K)
3883	    f_prev |= 0x80;  /* this is Japanese */
3884	f_line += c2 == JIS_X_0201_1976_K ? 1: char_size(c2,c1);
3885	if (f_line<=fold_len) {   /* normal case */
3886	    fold_state = 1;
3887	} else {
3888	    if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3889		f_line = char_size(c2,c1);
3890		fold_state =  LF;       /* We can't wait, do fold now */
3891	    } else if (c2 == JIS_X_0201_1976_K) {
3892		/* simple kinsoku rules  return 1 means no folding  */
3893		if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
3894		else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
3895		else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
3896		else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
3897		else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
3898		else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3899		else if (SP<=c1 && c1<=(0xdf&0x7f)) {      /* X0201 */
3900		    f_line = 1;
3901		    fold_state = LF;/* add one new f_line before this character */
3902		} else {
3903		    f_line = 1;
3904		    fold_state = LF;/* add one new f_line before this character */
3905		}
3906	    } else if (c2==0) {
3907		/* kinsoku point in ASCII */
3908		if (  c1==')'||    /* { [ ( */
3909		    c1==']'||
3910		    c1=='}'||
3911		    c1=='.'||
3912		    c1==','||
3913		    c1=='!'||
3914		    c1=='?'||
3915		    c1=='/'||
3916		    c1==':'||
3917		    c1==';') {
3918		    fold_state = 1;
3919		    /* just after special */
3920		} else if (!is_alnum(prev0)) {
3921		    f_line = char_size(c2,c1);
3922		    fold_state = LF;
3923		} else if ((prev0==SP) ||   /* ignored new f_line */
3924			   (prev0==LF)||        /* ignored new f_line */
3925			   (prev0&0x80)) {        /* X0208 - ASCII */
3926		    f_line = char_size(c2,c1);
3927		    fold_state = LF;/* add one new f_line before this character */
3928		} else {
3929		    fold_state = 1;  /* default no fold in ASCII */
3930		}
3931	    } else {
3932		if (c2=='!') {
3933		    if (c1=='"')  fold_state = 1; /* $B!"(B */
3934		    else if (c1=='#')  fold_state = 1; /* $B!#(B */
3935		    else if (c1=='W')  fold_state = 1; /* $B!W(B */
3936		    else if (c1=='K')  fold_state = 1; /* $B!K(B */
3937		    else if (c1=='$')  fold_state = 1; /* $B!$(B */
3938		    else if (c1=='%')  fold_state = 1; /* $B!%(B */
3939		    else if (c1=='\'') fold_state = 1; /* $B!\(B */
3940		    else if (c1=='(')  fold_state = 1; /* $B!((B */
3941		    else if (c1==')')  fold_state = 1; /* $B!)(B */
3942		    else if (c1=='*')  fold_state = 1; /* $B!*(B */
3943		    else if (c1=='+')  fold_state = 1; /* $B!+(B */
3944		    else if (c1==',')  fold_state = 1; /* $B!,(B */
3945		    /* default no fold in kinsoku */
3946		    else {
3947			fold_state = LF;
3948			f_line = char_size(c2,c1);
3949			/* add one new f_line before this character */
3950		    }
3951		} else {
3952		    f_line = char_size(c2,c1);
3953		    fold_state = LF;
3954		    /* add one new f_line before this character */
3955		}
3956	    }
3957	}
3958    }
3959    /* terminator process */
3960    switch(fold_state) {
3961    case LF:
3962	oconv_newline(o_fconv);
3963	(*o_fconv)(c2,c1);
3964	break;
3965    case 0:
3966	return;
3967    case CR:
3968	oconv_newline(o_fconv);
3969	break;
3970    case TAB:
3971    case SP:
3972	(*o_fconv)(0,SP);
3973	break;
3974    default:
3975	(*o_fconv)(c2,c1);
3976    }
3977}
3978
3979static nkf_char z_prev2=0,z_prev1=0;
3980
3981static void
3982z_conv(nkf_char c2, nkf_char c1)
3983{
3984
3985    /* if (c2) c1 &= 0x7f; assertion */
3986
3987    if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3988	(*o_zconv)(c2,c1);
3989	return;
3990    }
3991
3992    if (x0201_f) {
3993	if (z_prev2 == JIS_X_0201_1976_K) {
3994	    if (c2 == JIS_X_0201_1976_K) {
3995		if (c1 == (0xde&0x7f)) { /* $BByE@(B */
3996		    z_prev2 = 0;
3997		    (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3998		    return;
3999		} else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
4000		    z_prev2 = 0;
4001		    (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
4002		    return;
4003		} else if (x0213_f && c1 == (0xdf&0x7f) && ev_x0213[(z_prev1-SP)*2]) {  /* $BH>ByE@(B */
4004		    z_prev2 = 0;
4005		    (*o_zconv)(ev_x0213[(z_prev1-SP)*2], ev_x0213[(z_prev1-SP)*2+1]);
4006		    return;
4007		}
4008	    }
4009	    z_prev2 = 0;
4010	    (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
4011	}
4012	if (c2 == JIS_X_0201_1976_K) {
4013	    if (dv[(c1-SP)*2] || ev[(c1-SP)*2] || (x0213_f && ev_x0213[(c1-SP)*2])) {
4014		/* wait for $BByE@(B or $BH>ByE@(B */
4015		z_prev1 = c1;
4016		z_prev2 = c2;
4017		return;
4018	    } else {
4019		(*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
4020		return;
4021	    }
4022	}
4023    }
4024
4025    if (c2 == EOF) {
4026	(*o_zconv)(c2, c1);
4027	return;
4028    }
4029
4030    if (alpha_f&1 && c2 == 0x23) {
4031	/* JISX0208 Alphabet */
4032	c2 = 0;
4033    } else if (c2 == 0x21) {
4034	/* JISX0208 Kigou */
4035	if (0x21==c1) {
4036	    if (alpha_f&2) {
4037		c2 = 0;
4038		c1 = SP;
4039	    } else if (alpha_f&4) {
4040		(*o_zconv)(0, SP);
4041		(*o_zconv)(0, SP);
4042		return;
4043	    }
4044	} else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
4045	    c2 =  0;
4046	    c1 = fv[c1-0x20];
4047	}
4048    }
4049
4050    if (alpha_f&8 && c2 == 0) {
4051	/* HTML Entity */
4052	const char *entity = 0;
4053	switch (c1){
4054	case '>': entity = "&gt;"; break;
4055	case '<': entity = "&lt;"; break;
4056	case '\"': entity = "&quot;"; break;
4057	case '&': entity = "&amp;"; break;
4058	}
4059	if (entity){
4060	    while (*entity) (*o_zconv)(0, *entity++);
4061	    return;
4062	}
4063    }
4064
4065    if (alpha_f & 16) {
4066	/* JIS X 0208 Katakana to JIS X 0201 Katakana */
4067	if (c2 == 0x21) {
4068	    nkf_char c = 0;
4069	    switch (c1) {
4070	    case 0x23:
4071		/* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
4072		c = 0xA1;
4073		break;
4074	    case 0x56:
4075		/* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
4076		c = 0xA2;
4077		break;
4078	    case 0x57:
4079		/* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
4080		c = 0xA3;
4081		break;
4082	    case 0x22:
4083		/* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
4084		c = 0xA4;
4085		break;
4086	    case 0x26:
4087		/* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
4088		c = 0xA5;
4089		break;
4090	    case 0x3C:
4091		/* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
4092		c = 0xB0;
4093		break;
4094	    case 0x2B:
4095		/* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
4096		c = 0xDE;
4097		break;
4098	    case 0x2C:
4099		/* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
4100		c = 0xDF;
4101		break;
4102	    }
4103	    if (c) {
4104		(*o_zconv)(JIS_X_0201_1976_K, c);
4105		return;
4106	    }
4107	} else if (c2 == 0x25) {
4108	    /* JISX0208 Katakana */
4109	    static const int fullwidth_to_halfwidth[] =
4110	    {
4111		0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
4112		0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
4113		0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
4114		0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
4115		0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
4116		0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
4117		0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
4118		0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
4119		0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
4120		0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
4121		0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x365F,
4122		0x375F, 0x385F, 0x395F, 0x3A5F, 0x3E5F, 0x425F, 0x445F, 0x0000
4123	    };
4124	    if (fullwidth_to_halfwidth[c1-0x20]){
4125		c2 = fullwidth_to_halfwidth[c1-0x20];
4126		(*o_zconv)(JIS_X_0201_1976_K, c2>>8);
4127		if (c2 & 0xFF) {
4128		    (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
4129		}
4130		return;
4131	    }
4132	} else if (c2 == 0 && nkf_char_unicode_p(c1) &&
4133	    ((c1&VALUE_MASK) == 0x3099 || (c1&VALUE_MASK) == 0x309A)) { /* $B9g@.MQByE@!&H>ByE@(B */
4134	    (*o_zconv)(JIS_X_0201_1976_K, 0x5E + (c1&VALUE_MASK) - 0x3099);
4135	    return;
4136	}
4137    }
4138    (*o_zconv)(c2,c1);
4139}
4140
4141
4142#define rot13(c)  ( \
4143		   ( c < 'A') ? c: \
4144		   (c <= 'M')  ? (c + 13): \
4145		   (c <= 'Z')  ? (c - 13): \
4146		   (c < 'a')   ? (c): \
4147		   (c <= 'm')  ? (c + 13): \
4148		   (c <= 'z')  ? (c - 13): \
4149		   (c) \
4150		  )
4151
4152#define  rot47(c) ( \
4153		   ( c < '!') ? c: \
4154		   ( c <= 'O') ? (c + 47) : \
4155		   ( c <= '~') ?  (c - 47) : \
4156		   c \
4157		  )
4158
4159static void
4160rot_conv(nkf_char c2, nkf_char c1)
4161{
4162    if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
4163	c1 = rot13(c1);
4164    } else if (c2) {
4165	c1 = rot47(c1);
4166	c2 = rot47(c2);
4167    }
4168    (*o_rot_conv)(c2,c1);
4169}
4170
4171static void
4172hira_conv(nkf_char c2, nkf_char c1)
4173{
4174    if (hira_f & 1) {
4175	if (c2 == 0x25) {
4176	    if (0x20 < c1 && c1 < 0x74) {
4177		c2 = 0x24;
4178		(*o_hira_conv)(c2,c1);
4179		return;
4180	    } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
4181		c2 = 0;
4182		c1 = nkf_char_unicode_new(0x3094);
4183		(*o_hira_conv)(c2,c1);
4184		return;
4185	    }
4186	} else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
4187	    c1 += 2;
4188	    (*o_hira_conv)(c2,c1);
4189	    return;
4190	}
4191    }
4192    if (hira_f & 2) {
4193	if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
4194	    c2 = 0x25;
4195	    c1 = 0x74;
4196	} else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
4197	    c2 = 0x25;
4198	} else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
4199	    c1 -= 2;
4200	}
4201    }
4202    (*o_hira_conv)(c2,c1);
4203}
4204
4205
4206static void
4207iso2022jp_check_conv(nkf_char c2, nkf_char c1)
4208{
4209#define RANGE_NUM_MAX 18
4210    static const nkf_char range[RANGE_NUM_MAX][2] = {
4211	{0x222f, 0x2239,},
4212	{0x2242, 0x2249,},
4213	{0x2251, 0x225b,},
4214	{0x226b, 0x2271,},
4215	{0x227a, 0x227d,},
4216	{0x2321, 0x232f,},
4217	{0x233a, 0x2340,},
4218	{0x235b, 0x2360,},
4219	{0x237b, 0x237e,},
4220	{0x2474, 0x247e,},
4221	{0x2577, 0x257e,},
4222	{0x2639, 0x2640,},
4223	{0x2659, 0x267e,},
4224	{0x2742, 0x2750,},
4225	{0x2772, 0x277e,},
4226	{0x2841, 0x287e,},
4227	{0x4f54, 0x4f7e,},
4228	{0x7425, 0x747e},
4229    };
4230    nkf_char i;
4231    nkf_char start, end, c;
4232
4233    if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
4234	c2 = GETA1;
4235	c1 = GETA2;
4236    }
4237    if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
4238	c2 = GETA1;
4239	c1 = GETA2;
4240    }
4241
4242    for (i = 0; i < RANGE_NUM_MAX; i++) {
4243	start = range[i][0];
4244	end   = range[i][1];
4245	c     = (c2 << 8) + c1;
4246	if (c >= start && c <= end) {
4247	    c2 = GETA1;
4248	    c1 = GETA2;
4249	}
4250    }
4251    (*o_iso2022jp_check_conv)(c2,c1);
4252}
4253
4254
4255/* This converts  =?ISO-2022-JP?B?HOGE HOGE?= */
4256
4257static const unsigned char *mime_pattern[] = {
4258    (const unsigned char *)"\075?EUC-JP?B?",
4259    (const unsigned char *)"\075?SHIFT_JIS?B?",
4260    (const unsigned char *)"\075?ISO-8859-1?Q?",
4261    (const unsigned char *)"\075?ISO-8859-1?B?",
4262    (const unsigned char *)"\075?ISO-2022-JP?B?",
4263    (const unsigned char *)"\075?ISO-2022-JP?B?",
4264    (const unsigned char *)"\075?ISO-2022-JP?Q?",
4265#if defined(UTF8_INPUT_ENABLE)
4266    (const unsigned char *)"\075?UTF-8?B?",
4267    (const unsigned char *)"\075?UTF-8?Q?",
4268#endif
4269    (const unsigned char *)"\075?US-ASCII?Q?",
4270    NULL
4271};
4272
4273
4274/* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
4275nkf_char (*mime_priority_func[])(nkf_char c2, nkf_char c1, nkf_char c0) = {
4276    e_iconv, s_iconv, 0, 0, 0, 0, 0,
4277#if defined(UTF8_INPUT_ENABLE)
4278    w_iconv, w_iconv,
4279#endif
4280    0,
4281};
4282
4283static const nkf_char mime_encode[] = {
4284    EUC_JP, SHIFT_JIS, ISO_8859_1, ISO_8859_1, JIS_X_0208, JIS_X_0201_1976_K, JIS_X_0201_1976_K,
4285#if defined(UTF8_INPUT_ENABLE)
4286    UTF_8, UTF_8,
4287#endif
4288    ASCII,
4289    0
4290};
4291
4292static const nkf_char mime_encode_method[] = {
4293    'B', 'B','Q', 'B', 'B', 'B', 'Q',
4294#if defined(UTF8_INPUT_ENABLE)
4295    'B', 'Q',
4296#endif
4297    'Q',
4298    0
4299};
4300
4301
4302/* MIME preprocessor fifo */
4303
4304#define MIME_BUF_SIZE   (1024)    /* 2^n ring buffer */
4305#define MIME_BUF_MASK   (MIME_BUF_SIZE-1)
4306#define mime_input_buf(n)        mime_input_state.buf[(n)&MIME_BUF_MASK]
4307static struct {
4308    unsigned char buf[MIME_BUF_SIZE];
4309    unsigned int  top;
4310    unsigned int  last;  /* decoded */
4311    unsigned int  input; /* undecoded */
4312} mime_input_state;
4313static nkf_char (*mime_iconv_back)(nkf_char c2,nkf_char c1,nkf_char c0) = NULL;
4314
4315#define MAXRECOVER 20
4316
4317static void
4318mime_input_buf_unshift(nkf_char c)
4319{
4320    mime_input_buf(--mime_input_state.top) = (unsigned char)c;
4321}
4322
4323static nkf_char
4324mime_ungetc(nkf_char c, ARG_UNUSED FILE *f)
4325{
4326    mime_input_buf_unshift(c);
4327    return c;
4328}
4329
4330static nkf_char
4331mime_ungetc_buf(nkf_char c, FILE *f)
4332{
4333    if (mimebuf_f)
4334	(*i_mungetc_buf)(c,f);
4335    else
4336	mime_input_buf(--mime_input_state.input) = (unsigned char)c;
4337    return c;
4338}
4339
4340static nkf_char
4341mime_getc_buf(FILE *f)
4342{
4343    /* we don't keep eof of mime_input_buf, becase it contains ?= as
4344       a terminator. It was checked in mime_integrity. */
4345    return ((mimebuf_f)?
4346	    (*i_mgetc_buf)(f):mime_input_buf(mime_input_state.input++));
4347}
4348
4349static void
4350switch_mime_getc(void)
4351{
4352    if (i_getc!=mime_getc) {
4353	i_mgetc = i_getc; i_getc = mime_getc;
4354	i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
4355	if(mime_f==STRICT_MIME) {
4356	    i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf;
4357	    i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf;
4358	}
4359    }
4360}
4361
4362static void
4363unswitch_mime_getc(void)
4364{
4365    if(mime_f==STRICT_MIME) {
4366	i_mgetc = i_mgetc_buf;
4367	i_mungetc = i_mungetc_buf;
4368    }
4369    i_getc = i_mgetc;
4370    i_ungetc = i_mungetc;
4371    if(mime_iconv_back)set_iconv(FALSE, mime_iconv_back);
4372    mime_iconv_back = NULL;
4373}
4374
4375static nkf_char
4376mime_integrity(FILE *f, const unsigned char *p)
4377{
4378    nkf_char c,d;
4379    unsigned int q;
4380    /* In buffered mode, read until =? or NL or buffer full
4381     */
4382    mime_input_state.input = mime_input_state.top;
4383    mime_input_state.last = mime_input_state.top;
4384
4385    while(*p) mime_input_buf(mime_input_state.input++) = *p++;
4386    d = 0;
4387    q = mime_input_state.input;
4388    while((c=(*i_getc)(f))!=EOF) {
4389	if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
4390	    break;   /* buffer full */
4391	}
4392	if (c=='=' && d=='?') {
4393	    /* checked. skip header, start decode */
4394	    mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4395	    /* mime_last_input = mime_input_state.input; */
4396	    mime_input_state.input = q;
4397	    switch_mime_getc();
4398	    return 1;
4399	}
4400	if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4401	    break;
4402	/* Should we check length mod 4? */
4403	mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4404	d=c;
4405    }
4406    /* In case of Incomplete MIME, no MIME decode  */
4407    mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4408    mime_input_state.last = mime_input_state.input;     /* point undecoded buffer */
4409    mime_decode_mode = 1;              /* no decode on mime_input_buf last in mime_getc */
4410    switch_mime_getc();         /* anyway we need buffered getc */
4411    return 1;
4412}
4413
4414static nkf_char
4415mime_begin_strict(FILE *f)
4416{
4417    nkf_char c1 = 0;
4418    int i,j,k;
4419    const unsigned char *p,*q;
4420    nkf_char r[MAXRECOVER];    /* recovery buffer, max mime pattern length */
4421
4422    mime_decode_mode = FALSE;
4423    /* =? has been checked */
4424    j = 0;
4425    p = mime_pattern[j];
4426    r[0]='='; r[1]='?';
4427
4428    for(i=2;p[i]>SP;i++) {                   /* start at =? */
4429	if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4430	    /* pattern fails, try next one */
4431	    q = p;
4432	    while (mime_pattern[++j]) {
4433		p = mime_pattern[j];
4434		for(k=2;k<i;k++)              /* assume length(p) > i */
4435		    if (p[k]!=q[k]) break;
4436		if (k==i && nkf_toupper(c1)==p[k]) break;
4437	    }
4438	    p = mime_pattern[j];
4439	    if (p) continue;  /* found next one, continue */
4440	    /* all fails, output from recovery buffer */
4441	    (*i_ungetc)(c1,f);
4442	    for(j=0;j<i;j++) {
4443		(*oconv)(0,r[j]);
4444	    }
4445	    return c1;
4446	}
4447    }
4448    mime_decode_mode = p[i-2];
4449
4450    mime_iconv_back = iconv;
4451    set_iconv(FALSE, mime_priority_func[j]);
4452    clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME);
4453
4454    if (mime_decode_mode=='B') {
4455	mimebuf_f = unbuf_f;
4456	if (!unbuf_f) {
4457	    /* do MIME integrity check */
4458	    return mime_integrity(f,mime_pattern[j]);
4459	}
4460    }
4461    switch_mime_getc();
4462    mimebuf_f = TRUE;
4463    return c1;
4464}
4465
4466static nkf_char
4467mime_begin(FILE *f)
4468{
4469    nkf_char c1 = 0;
4470    int i,k;
4471
4472    /* In NONSTRICT mode, only =? is checked. In case of failure, we  */
4473    /* re-read and convert again from mime_buffer.  */
4474
4475    /* =? has been checked */
4476    k = mime_input_state.last;
4477    mime_input_buf(mime_input_state.last++)='='; mime_input_buf(mime_input_state.last++)='?';
4478    for(i=2;i<MAXRECOVER;i++) {                   /* start at =? */
4479	/* We accept any character type even if it is breaked by new lines */
4480	c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4481	if (c1==LF||c1==SP||c1==CR||
4482	    c1=='-'||c1=='_'||is_alnum(c1)) continue;
4483	if (c1=='=') {
4484	    /* Failed. But this could be another MIME preemble */
4485	    (*i_ungetc)(c1,f);
4486	    mime_input_state.last--;
4487	    break;
4488	}
4489	if (c1!='?') break;
4490	else {
4491	    /* c1=='?' */
4492	    c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4493	    if (!(++i<MAXRECOVER) || c1==EOF) break;
4494	    if (c1=='b'||c1=='B') {
4495		mime_decode_mode = 'B';
4496	    } else if (c1=='q'||c1=='Q') {
4497		mime_decode_mode = 'Q';
4498	    } else {
4499		break;
4500	    }
4501	    c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4502	    if (!(++i<MAXRECOVER) || c1==EOF) break;
4503	    if (c1!='?') {
4504		mime_decode_mode = FALSE;
4505	    }
4506	    break;
4507	}
4508    }
4509    switch_mime_getc();
4510    if (!mime_decode_mode) {
4511	/* false MIME premble, restart from mime_buffer */
4512	mime_decode_mode = 1;  /* no decode, but read from the mime_buffer */
4513	/* Since we are in MIME mode until buffer becomes empty,    */
4514	/* we never go into mime_begin again for a while.           */
4515	return c1;
4516    }
4517    /* discard mime preemble, and goto MIME mode */
4518    mime_input_state.last = k;
4519    /* do no MIME integrity check */
4520    return c1;   /* used only for checking EOF */
4521}
4522
4523#ifdef CHECK_OPTION
4524static void
4525no_putc(ARG_UNUSED nkf_char c)
4526{
4527    ;
4528}
4529
4530static void
4531debug(const char *str)
4532{
4533    if (debug_f){
4534	fprintf(stderr, "%s\n", str ? str : "NULL");
4535    }
4536}
4537#endif
4538
4539static void
4540set_input_codename(const char *codename)
4541{
4542    if (!input_codename) {
4543	input_codename = codename;
4544    } else if (strcmp(codename, input_codename) != 0) {
4545	input_codename = "";
4546    }
4547}
4548
4549static const char*
4550get_guessed_code(void)
4551{
4552    if (input_codename && !*input_codename) {
4553	input_codename = "BINARY";
4554    } else {
4555	struct input_code *p = find_inputcode_byfunc(iconv);
4556	if (!input_codename) {
4557	    input_codename = "ASCII";
4558	} else if (strcmp(input_codename, "Shift_JIS") == 0) {
4559	    if (p->score & (SCORE_DEPEND|SCORE_CP932))
4560		input_codename = "CP932";
4561	} else if (strcmp(input_codename, "EUC-JP") == 0) {
4562	    if (p->score & SCORE_X0213)
4563		input_codename = "EUC-JIS-2004";
4564	    else if (p->score & (SCORE_X0212))
4565		input_codename = "EUCJP-MS";
4566	    else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4567		input_codename = "CP51932";
4568	} else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4569	    if (p->score & (SCORE_KANA))
4570		input_codename = "CP50221";
4571	    else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4572		input_codename = "CP50220";
4573	}
4574    }
4575    return input_codename;
4576}
4577
4578#if !defined(PERL_XS) && !defined(WIN32DLL)
4579static void
4580print_guessed_code(char *filename)
4581{
4582    if (filename != NULL) printf("%s: ", filename);
4583    if (input_codename && !*input_codename) {
4584	printf("BINARY\n");
4585    } else {
4586	input_codename = get_guessed_code();
4587	if (guess_f == 1) {
4588	    printf("%s\n", input_codename);
4589	} else {
4590	    printf("%s%s%s%s\n",
4591		   input_codename,
4592		   iconv != w_iconv16 && iconv != w_iconv32 ? "" :
4593		   input_endian == ENDIAN_LITTLE ? " LE" :
4594		   input_endian == ENDIAN_BIG ? " BE" :
4595		   "[BUG]",
4596		   input_bom_f ? " (BOM)" : "",
4597		   input_eol == CR   ? " (CR)" :
4598		   input_eol == LF   ? " (LF)" :
4599		   input_eol == CRLF ? " (CRLF)" :
4600		   input_eol == EOF  ? " (MIXED NL)" :
4601		   "");
4602	}
4603    }
4604}
4605#endif /*WIN32DLL*/
4606
4607#ifdef INPUT_OPTION
4608
4609static nkf_char
4610hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4611{
4612    nkf_char c1, c2, c3;
4613    c1 = (*g)(f);
4614    if (c1 != ch){
4615	return c1;
4616    }
4617    c2 = (*g)(f);
4618    if (!nkf_isxdigit(c2)){
4619	(*u)(c2, f);
4620	return c1;
4621    }
4622    c3 = (*g)(f);
4623    if (!nkf_isxdigit(c3)){
4624	(*u)(c2, f);
4625	(*u)(c3, f);
4626	return c1;
4627    }
4628    return (hex2bin(c2) << 4) | hex2bin(c3);
4629}
4630
4631static nkf_char
4632cap_getc(FILE *f)
4633{
4634    return hex_getc(':', f, i_cgetc, i_cungetc);
4635}
4636
4637static nkf_char
4638cap_ungetc(nkf_char c, FILE *f)
4639{
4640    return (*i_cungetc)(c, f);
4641}
4642
4643static nkf_char
4644url_getc(FILE *f)
4645{
4646    return hex_getc('%', f, i_ugetc, i_uungetc);
4647}
4648
4649static nkf_char
4650url_ungetc(nkf_char c, FILE *f)
4651{
4652    return (*i_uungetc)(c, f);
4653}
4654#endif
4655
4656#ifdef NUMCHAR_OPTION
4657static nkf_char
4658numchar_getc(FILE *f)
4659{
4660    nkf_char (*g)(FILE *) = i_ngetc;
4661    nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4662    int i = 0, j;
4663    nkf_char buf[12];
4664    nkf_char c = -1;
4665
4666    buf[i] = (*g)(f);
4667    if (buf[i] == '&'){
4668	buf[++i] = (*g)(f);
4669	if (buf[i] == '#'){
4670	    c = 0;
4671	    buf[++i] = (*g)(f);
4672	    if (buf[i] == 'x' || buf[i] == 'X'){
4673		for (j = 0; j < 7; j++){
4674		    buf[++i] = (*g)(f);
4675		    if (!nkf_isxdigit(buf[i])){
4676			if (buf[i] != ';'){
4677			    c = -1;
4678			}
4679			break;
4680		    }
4681		    c <<= 4;
4682		    c |= hex2bin(buf[i]);
4683		}
4684	    }else{
4685		for (j = 0; j < 8; j++){
4686		    if (j){
4687			buf[++i] = (*g)(f);
4688		    }
4689		    if (!nkf_isdigit(buf[i])){
4690			if (buf[i] != ';'){
4691			    c = -1;
4692			}
4693			break;
4694		    }
4695		    c *= 10;
4696		    c += hex2bin(buf[i]);
4697		}
4698	    }
4699	}
4700    }
4701    if (c != -1){
4702	return nkf_char_unicode_new(c);
4703    }
4704    while (i > 0){
4705	(*u)(buf[i], f);
4706	--i;
4707    }
4708    return buf[0];
4709}
4710
4711static nkf_char
4712numchar_ungetc(nkf_char c, FILE *f)
4713{
4714    return (*i_nungetc)(c, f);
4715}
4716#endif
4717
4718#ifdef UNICODE_NORMALIZATION
4719
4720static nkf_char
4721nfc_getc(FILE *f)
4722{
4723    nkf_char (*g)(FILE *f) = i_nfc_getc;
4724    nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4725    nkf_buf_t *buf = nkf_state->nfc_buf;
4726    const unsigned char *array;
4727    int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4728    nkf_char c = (*g)(f);
4729
4730    if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4731
4732    nkf_buf_push(buf, c);
4733    do {
4734	while (lower <= upper) {
4735	    int mid = (lower+upper) / 2;
4736	    int len;
4737	    array = normalization_table[mid].nfd;
4738	    for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4739		if (len >= nkf_buf_length(buf)) {
4740		    c = (*g)(f);
4741		    if (c == EOF) {
4742			len = 0;
4743			lower = 1, upper = 0;
4744			break;
4745		    }
4746		    nkf_buf_push(buf, c);
4747		}
4748		if (array[len] != nkf_buf_at(buf, len)) {
4749		    if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4750		    else  upper = mid - 1;
4751		    len = 0;
4752		    break;
4753		}
4754	    }
4755	    if (len > 0) {
4756		int i;
4757		array = normalization_table[mid].nfc;
4758		nkf_buf_clear(buf);
4759		for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4760		    nkf_buf_push(buf, array[i]);
4761		break;
4762	    }
4763	}
4764    } while (lower <= upper);
4765
4766    while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4767    c = nkf_buf_pop(buf);
4768
4769    return c;
4770}
4771
4772static nkf_char
4773nfc_ungetc(nkf_char c, FILE *f)
4774{
4775    return (*i_nfc_ungetc)(c, f);
4776}
4777#endif /* UNICODE_NORMALIZATION */
4778
4779
4780static nkf_char
4781base64decode(nkf_char c)
4782{
4783    int             i;
4784    if (c > '@') {
4785	if (c < '[') {
4786	    i = c - 'A';                        /* A..Z 0-25 */
4787	} else if (c == '_') {
4788	    i = '?'         /* 63 */ ;          /* _  63 */
4789	} else {
4790	    i = c - 'G'     /* - 'a' + 26 */ ;  /* a..z 26-51 */
4791	}
4792    } else if (c > '/') {
4793	i = c - '0' + '4'   /* - '0' + 52 */ ;  /* 0..9 52-61 */
4794    } else if (c == '+' || c == '-') {
4795	i = '>'             /* 62 */ ;          /* + and -  62 */
4796    } else {
4797	i = '?'             /* 63 */ ;          /* / 63 */
4798    }
4799    return (i);
4800}
4801
4802static nkf_char
4803mime_getc(FILE *f)
4804{
4805    nkf_char c1, c2, c3, c4, cc;
4806    nkf_char t1, t2, t3, t4, mode, exit_mode;
4807    nkf_char lwsp_count;
4808    char *lwsp_buf;
4809    char *lwsp_buf_new;
4810    nkf_char lwsp_size = 128;
4811
4812    if (mime_input_state.top != mime_input_state.last) {  /* Something is in FIFO */
4813	return  mime_input_buf(mime_input_state.top++);
4814    }
4815    if (mime_decode_mode==1 ||mime_decode_mode==FALSE) {
4816	mime_decode_mode=FALSE;
4817	unswitch_mime_getc();
4818	return (*i_getc)(f);
4819    }
4820
4821    if (mimebuf_f == FIXED_MIME)
4822	exit_mode = mime_decode_mode;
4823    else
4824	exit_mode = FALSE;
4825    if (mime_decode_mode == 'Q') {
4826	if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4827      restart_mime_q:
4828	if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4829	if (c1<=SP || DEL<=c1) {
4830	    mime_decode_mode = exit_mode; /* prepare for quit */
4831	    return c1;
4832	}
4833	if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4834	    return c1;
4835	}
4836
4837	mime_decode_mode = exit_mode; /* prepare for quit */
4838	if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4839	if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4840	    /* end Q encoding */
4841	    input_mode = exit_mode;
4842	    lwsp_count = 0;
4843	    lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4844	    while ((c1=(*i_getc)(f))!=EOF) {
4845		switch (c1) {
4846		case LF:
4847		case CR:
4848		    if (c1==LF) {
4849			if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4850			    i_ungetc(SP,f);
4851			    continue;
4852			} else {
4853			    i_ungetc(c1,f);
4854			}
4855			c1 = LF;
4856		    } else {
4857			if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4858			    if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4859				i_ungetc(SP,f);
4860				continue;
4861			    } else {
4862				i_ungetc(c1,f);
4863			    }
4864			    i_ungetc(LF,f);
4865			} else {
4866			    i_ungetc(c1,f);
4867			}
4868			c1 = CR;
4869		    }
4870		    break;
4871		case SP:
4872		case TAB:
4873		    lwsp_buf[lwsp_count] = (unsigned char)c1;
4874		    if (lwsp_count++>lwsp_size){
4875			lwsp_size <<= 1;
4876			lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4877			lwsp_buf = lwsp_buf_new;
4878		    }
4879		    continue;
4880		}
4881		break;
4882	    }
4883	    if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4884		i_ungetc(c1,f);
4885		for(lwsp_count--;lwsp_count>0;lwsp_count--)
4886		    i_ungetc(lwsp_buf[lwsp_count],f);
4887		c1 = lwsp_buf[0];
4888	    }
4889	    nkf_xfree(lwsp_buf);
4890	    return c1;
4891	}
4892	if (c1=='='&&c2<SP) { /* this is soft wrap */
4893	    while((c1 =  (*i_mgetc)(f)) <=SP) {
4894		if (c1 == EOF) return (EOF);
4895	    }
4896	    mime_decode_mode = 'Q'; /* still in MIME */
4897	    goto restart_mime_q;
4898	}
4899	if (c1=='?') {
4900	    mime_decode_mode = 'Q'; /* still in MIME */
4901	    (*i_mungetc)(c2,f);
4902	    return c1;
4903	}
4904	if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4905	if (c2<=SP) return c2;
4906	mime_decode_mode = 'Q'; /* still in MIME */
4907	return ((hex2bin(c2)<<4) + hex2bin(c3));
4908    }
4909
4910    if (mime_decode_mode != 'B') {
4911	mime_decode_mode = FALSE;
4912	return (*i_mgetc)(f);
4913    }
4914
4915
4916    /* Base64 encoding */
4917    /*
4918       MIME allows line break in the middle of
4919       Base64, but we are very pessimistic in decoding
4920       in unbuf mode because MIME encoded code may broken by
4921       less or editor's control sequence (such as ESC-[-K in unbuffered
4922       mode. ignore incomplete MIME.
4923     */
4924    mode = mime_decode_mode;
4925    mime_decode_mode = exit_mode;  /* prepare for quit */
4926
4927    while ((c1 = (*i_mgetc)(f))<=SP) {
4928	if (c1==EOF)
4929	    return (EOF);
4930    }
4931  mime_c2_retry:
4932    if ((c2 = (*i_mgetc)(f))<=SP) {
4933	if (c2==EOF)
4934	    return (EOF);
4935	if (mime_f != STRICT_MIME) goto mime_c2_retry;
4936	if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
4937	return c2;
4938    }
4939    if ((c1 == '?') && (c2 == '=')) {
4940	input_mode = ASCII;
4941	lwsp_count = 0;
4942	lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4943	while ((c1=(*i_getc)(f))!=EOF) {
4944	    switch (c1) {
4945	    case LF:
4946	    case CR:
4947		if (c1==LF) {
4948		    if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4949			i_ungetc(SP,f);
4950			continue;
4951		    } else {
4952			i_ungetc(c1,f);
4953		    }
4954		    c1 = LF;
4955		} else {
4956		    if ((c1=(*i_getc)(f))!=EOF) {
4957			if (c1==SP) {
4958			    i_ungetc(SP,f);
4959			    continue;
4960			} else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4961			    i_ungetc(SP,f);
4962			    continue;
4963			} else {
4964			    i_ungetc(c1,f);
4965			}
4966			i_ungetc(LF,f);
4967		    } else {
4968			i_ungetc(c1,f);
4969		    }
4970		    c1 = CR;
4971		}
4972		break;
4973	    case SP:
4974	    case TAB:
4975		lwsp_buf[lwsp_count] = (unsigned char)c1;
4976		if (lwsp_count++>lwsp_size){
4977		    lwsp_size <<= 1;
4978		    lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4979		    lwsp_buf = lwsp_buf_new;
4980		}
4981		continue;
4982	    }
4983	    break;
4984	}
4985	if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4986	    i_ungetc(c1,f);
4987	    for(lwsp_count--;lwsp_count>0;lwsp_count--)
4988		i_ungetc(lwsp_buf[lwsp_count],f);
4989	    c1 = lwsp_buf[0];
4990	}
4991	nkf_xfree(lwsp_buf);
4992	return c1;
4993    }
4994  mime_c3_retry:
4995    if ((c3 = (*i_mgetc)(f))<=SP) {
4996	if (c3==EOF)
4997	    return (EOF);
4998	if (mime_f != STRICT_MIME) goto mime_c3_retry;
4999	if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5000	return c3;
5001    }
5002  mime_c4_retry:
5003    if ((c4 = (*i_mgetc)(f))<=SP) {
5004	if (c4==EOF)
5005	    return (EOF);
5006	if (mime_f != STRICT_MIME) goto mime_c4_retry;
5007	if (mimebuf_f!=FIXED_MIME) input_mode = ASCII;
5008	return c4;
5009    }
5010
5011    mime_decode_mode = mode; /* still in MIME sigh... */
5012
5013    /* BASE 64 decoding */
5014
5015    t1 = 0x3f & base64decode(c1);
5016    t2 = 0x3f & base64decode(c2);
5017    t3 = 0x3f & base64decode(c3);
5018    t4 = 0x3f & base64decode(c4);
5019    cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
5020    if (c2 != '=') {
5021	mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5022	cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
5023	if (c3 != '=') {
5024	    mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5025	    cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
5026	    if (c4 != '=')
5027		mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
5028	}
5029    } else {
5030	return c1;
5031    }
5032    return  mime_input_buf(mime_input_state.top++);
5033}
5034
5035static const char basis_64[] =
5036    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
5037
5038#define MIMEOUT_BUF_LENGTH 74
5039static struct {
5040    unsigned char buf[MIMEOUT_BUF_LENGTH+1];
5041    int count;
5042} mimeout_state;
5043
5044/*nkf_char mime_lastchar2, mime_lastchar1;*/
5045
5046static void
5047open_mime(nkf_char mode)
5048{
5049    const unsigned char *p;
5050    int i;
5051    int j;
5052    p  = mime_pattern[0];
5053    for(i=0;mime_pattern[i];i++) {
5054	if (mode == mime_encode[i]) {
5055	    p = mime_pattern[i];
5056	    break;
5057	}
5058    }
5059    mimeout_mode = mime_encode_method[i];
5060    i = 0;
5061    if (base64_count>45) {
5062	if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
5063	    (*o_mputc)(mimeout_state.buf[i]);
5064	    i++;
5065	}
5066	put_newline(o_mputc);
5067	(*o_mputc)(SP);
5068	base64_count = 1;
5069	if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
5070	    i++;
5071	}
5072    }
5073    for (;i<mimeout_state.count;i++) {
5074	if (nkf_isspace(mimeout_state.buf[i])) {
5075	    (*o_mputc)(mimeout_state.buf[i]);
5076	    base64_count ++;
5077	} else {
5078	    break;
5079	}
5080    }
5081    while(*p) {
5082	(*o_mputc)(*p++);
5083	base64_count ++;
5084    }
5085    j = mimeout_state.count;
5086    mimeout_state.count = 0;
5087    for (;i<j;i++) {
5088	mime_putc(mimeout_state.buf[i]);
5089    }
5090}
5091
5092static void
5093mime_prechar(nkf_char c2, nkf_char c1)
5094{
5095    if (mimeout_mode > 0){
5096	if (c2 == EOF){
5097	    if (base64_count + mimeout_state.count/3*4> 73){
5098		(*o_base64conv)(EOF,0);
5099		oconv_newline(o_base64conv);
5100		(*o_base64conv)(0,SP);
5101		base64_count = 1;
5102	    }
5103	} else {
5104	    if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
5105		(*o_base64conv)(EOF,0);
5106		oconv_newline(o_base64conv);
5107		(*o_base64conv)(0,SP);
5108		base64_count = 1;
5109		mimeout_mode = -1;
5110	    }
5111	}
5112    } else if (c2) {
5113	if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
5114	    mimeout_mode =  (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
5115	    open_mime(output_mode);
5116	    (*o_base64conv)(EOF,0);
5117	    oconv_newline(o_base64conv);
5118	    (*o_base64conv)(0,SP);
5119	    base64_count = 1;
5120	    mimeout_mode = -1;
5121	}
5122    }
5123}
5124
5125static void
5126close_mime(void)
5127{
5128    (*o_mputc)('?');
5129    (*o_mputc)('=');
5130    base64_count += 2;
5131    mimeout_mode = 0;
5132}
5133
5134static void
5135eof_mime(void)
5136{
5137    switch(mimeout_mode) {
5138    case 'Q':
5139    case 'B':
5140	break;
5141    case 2:
5142	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
5143	(*o_mputc)('=');
5144	(*o_mputc)('=');
5145	base64_count += 3;
5146	break;
5147    case 1:
5148	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
5149	(*o_mputc)('=');
5150	base64_count += 2;
5151	break;
5152    }
5153    if (mimeout_mode > 0) {
5154	if (mimeout_f!=FIXED_MIME) {
5155	    close_mime();
5156	} else if (mimeout_mode != 'Q')
5157	    mimeout_mode = 'B';
5158    }
5159}
5160
5161static void
5162mimeout_addchar(nkf_char c)
5163{
5164    switch(mimeout_mode) {
5165    case 'Q':
5166	if (c==CR||c==LF) {
5167	    (*o_mputc)(c);
5168	    base64_count = 0;
5169	} else if(!nkf_isalnum(c)) {
5170	    (*o_mputc)('=');
5171	    (*o_mputc)(bin2hex(((c>>4)&0xf)));
5172	    (*o_mputc)(bin2hex((c&0xf)));
5173	    base64_count += 3;
5174	} else {
5175	    (*o_mputc)(c);
5176	    base64_count++;
5177	}
5178	break;
5179    case 'B':
5180	nkf_state->mimeout_state=c;
5181	(*o_mputc)(basis_64[c>>2]);
5182	mimeout_mode=2;
5183	base64_count ++;
5184	break;
5185    case 2:
5186	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
5187	nkf_state->mimeout_state=c;
5188	mimeout_mode=1;
5189	base64_count ++;
5190	break;
5191    case 1:
5192	(*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
5193	(*o_mputc)(basis_64[c & 0x3F]);
5194	mimeout_mode='B';
5195	base64_count += 2;
5196	break;
5197    default:
5198	(*o_mputc)(c);
5199	base64_count++;
5200	break;
5201    }
5202}
5203
5204static void
5205mime_putc(nkf_char c)
5206{
5207    int i, j;
5208    nkf_char lastchar;
5209
5210    if (mimeout_f == FIXED_MIME){
5211	if (mimeout_mode == 'Q'){
5212	    if (base64_count > 71){
5213		if (c!=CR && c!=LF) {
5214		    (*o_mputc)('=');
5215		    put_newline(o_mputc);
5216		}
5217		base64_count = 0;
5218	    }
5219	}else{
5220	    if (base64_count > 71){
5221		eof_mime();
5222		put_newline(o_mputc);
5223		base64_count = 0;
5224	    }
5225	    if (c == EOF) { /* c==EOF */
5226		eof_mime();
5227	    }
5228	}
5229	if (c != EOF) { /* c==EOF */
5230	    mimeout_addchar(c);
5231	}
5232	return;
5233    }
5234
5235    /* mimeout_f != FIXED_MIME */
5236
5237    if (c == EOF) { /* c==EOF */
5238	if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
5239	j = mimeout_state.count;
5240	mimeout_state.count = 0;
5241	i = 0;
5242	if (mimeout_mode > 0) {
5243	    if (!nkf_isblank(mimeout_state.buf[j-1])) {
5244		for (;i<j;i++) {
5245		    if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
5246			break;
5247		    }
5248		    mimeout_addchar(mimeout_state.buf[i]);
5249		}
5250		eof_mime();
5251		for (;i<j;i++) {
5252		    mimeout_addchar(mimeout_state.buf[i]);
5253		}
5254	    } else {
5255		for (;i<j;i++) {
5256		    mimeout_addchar(mimeout_state.buf[i]);
5257		}
5258		eof_mime();
5259	    }
5260	} else {
5261	    for (;i<j;i++) {
5262		mimeout_addchar(mimeout_state.buf[i]);
5263	    }
5264	}
5265	return;
5266    }
5267
5268    if (mimeout_state.count > 0){
5269	lastchar = mimeout_state.buf[mimeout_state.count - 1];
5270    }else{
5271	lastchar = -1;
5272    }
5273
5274    if (mimeout_mode=='Q') {
5275	if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
5276	    if (c == CR || c == LF) {
5277		close_mime();
5278		(*o_mputc)(c);
5279		base64_count = 0;
5280		return;
5281	    } else if (c <= SP) {
5282		close_mime();
5283		if (base64_count > 70) {
5284		    put_newline(o_mputc);
5285		    base64_count = 0;
5286		}
5287		if (!nkf_isblank(c)) {
5288		    (*o_mputc)(SP);
5289		    base64_count++;
5290		}
5291	    } else {
5292		if (base64_count > 70) {
5293		    close_mime();
5294		    put_newline(o_mputc);
5295		    (*o_mputc)(SP);
5296		    base64_count = 1;
5297		    open_mime(output_mode);
5298		}
5299		if (!nkf_noescape_mime(c)) {
5300		    mimeout_addchar(c);
5301		    return;
5302		}
5303	    }
5304	    if (c != 0x1B) {
5305		(*o_mputc)(c);
5306		base64_count++;
5307		return;
5308	    }
5309	}
5310    }
5311
5312    if (mimeout_mode <= 0) {
5313	if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5314		    output_mode == UTF_8)) {
5315	    if (nkf_isspace(c)) {
5316		int flag = 0;
5317		if (mimeout_mode == -1) {
5318		    flag = 1;
5319		}
5320		if (c==CR || c==LF) {
5321		    if (flag) {
5322			open_mime(output_mode);
5323			output_mode = 0;
5324		    } else {
5325			base64_count = 0;
5326		    }
5327		}
5328		for (i=0;i<mimeout_state.count;i++) {
5329		    (*o_mputc)(mimeout_state.buf[i]);
5330		    if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
5331			base64_count = 0;
5332		    }else{
5333			base64_count++;
5334		    }
5335		}
5336		if (flag) {
5337		    eof_mime();
5338		    base64_count = 0;
5339		    mimeout_mode = 0;
5340		}
5341		mimeout_state.buf[0] = (char)c;
5342		mimeout_state.count = 1;
5343	    }else{
5344		if (base64_count > 1
5345		    && base64_count + mimeout_state.count > 76
5346		    && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
5347		    static const char *str = "boundary=\"";
5348		    static int len = 10;
5349		    i = 0;
5350
5351		    for (; i < mimeout_state.count - len; ++i) {
5352			if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
5353			    i += len - 2;
5354			    break;
5355			}
5356		    }
5357
5358		    if (i == 0 || i == mimeout_state.count - len) {
5359			put_newline(o_mputc);
5360			base64_count = 0;
5361			if (!nkf_isspace(mimeout_state.buf[0])){
5362			    (*o_mputc)(SP);
5363			    base64_count++;
5364			}
5365		    }
5366		    else {
5367			int j;
5368			for (j = 0; j <= i; ++j) {
5369			    (*o_mputc)(mimeout_state.buf[j]);
5370			}
5371			put_newline(o_mputc);
5372			base64_count = 1;
5373			for (; j <= mimeout_state.count; ++j) {
5374			    mimeout_state.buf[j - i] = mimeout_state.buf[j];
5375			}
5376			mimeout_state.count -= i;
5377		    }
5378		}
5379		mimeout_state.buf[mimeout_state.count++] = (char)c;
5380		if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5381		    open_mime(output_mode);
5382		}
5383	    }
5384	    return;
5385	}else{
5386	    if (lastchar==CR || lastchar == LF){
5387		for (i=0;i<mimeout_state.count;i++) {
5388		    (*o_mputc)(mimeout_state.buf[i]);
5389		}
5390		base64_count = 0;
5391		mimeout_state.count = 0;
5392	    }
5393	    if (lastchar==SP) {
5394		for (i=0;i<mimeout_state.count-1;i++) {
5395		    (*o_mputc)(mimeout_state.buf[i]);
5396		    base64_count++;
5397		}
5398		mimeout_state.buf[0] = SP;
5399		mimeout_state.count = 1;
5400	    }
5401	    open_mime(output_mode);
5402	}
5403    }else{
5404	/* mimeout_mode == 'B', 1, 2 */
5405	if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5406		    output_mode == UTF_8)) {
5407	    if (lastchar == CR || lastchar == LF){
5408		if (nkf_isblank(c)) {
5409		    for (i=0;i<mimeout_state.count;i++) {
5410			mimeout_addchar(mimeout_state.buf[i]);
5411		    }
5412		    mimeout_state.count = 0;
5413		} else {
5414		    eof_mime();
5415		    for (i=0;i<mimeout_state.count;i++) {
5416			(*o_mputc)(mimeout_state.buf[i]);
5417		    }
5418		    base64_count = 0;
5419		    mimeout_state.count = 0;
5420		}
5421		mimeout_state.buf[mimeout_state.count++] = (char)c;
5422		return;
5423	    }
5424	    if (nkf_isspace(c)) {
5425		for (i=0;i<mimeout_state.count;i++) {
5426		    if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5427			eof_mime();
5428			for (i=0;i<mimeout_state.count;i++) {
5429			    (*o_mputc)(mimeout_state.buf[i]);
5430			    base64_count++;
5431			}
5432			mimeout_state.count = 0;
5433		    }
5434		}
5435		mimeout_state.buf[mimeout_state.count++] = (char)c;
5436		if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5437		    eof_mime();
5438		    for (i=0;i<mimeout_state.count;i++) {
5439			(*o_mputc)(mimeout_state.buf[i]);
5440			base64_count++;
5441		    }
5442		    mimeout_state.count = 0;
5443		}
5444		return;
5445	    }
5446	    if (mimeout_state.count>0 && SP<c && c!='=') {
5447		mimeout_state.buf[mimeout_state.count++] = (char)c;
5448		if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5449		    j = mimeout_state.count;
5450		    mimeout_state.count = 0;
5451		    for (i=0;i<j;i++) {
5452			mimeout_addchar(mimeout_state.buf[i]);
5453		    }
5454		}
5455		return;
5456	    }
5457	}
5458    }
5459    if (mimeout_state.count>0) {
5460	j = mimeout_state.count;
5461	mimeout_state.count = 0;
5462	for (i=0;i<j;i++) {
5463	    if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5464		break;
5465	    mimeout_addchar(mimeout_state.buf[i]);
5466	}
5467	if (i<j) {
5468	    eof_mime();
5469	    base64_count=0;
5470	    for (;i<j;i++) {
5471		(*o_mputc)(mimeout_state.buf[i]);
5472	    }
5473	    open_mime(output_mode);
5474	}
5475    }
5476    mimeout_addchar(c);
5477}
5478
5479static void
5480base64_conv(nkf_char c2, nkf_char c1)
5481{
5482    mime_prechar(c2, c1);
5483    (*o_base64conv)(c2,c1);
5484}
5485
5486#ifdef HAVE_ICONV_H
5487typedef struct nkf_iconv_t {
5488    iconv_t cd;
5489    char *input_buffer;
5490    size_t input_buffer_size;
5491    char *output_buffer;
5492    size_t output_buffer_size;
5493}
5494
5495static nkf_iconv_t
5496nkf_iconv_new(char *tocode, char *fromcode)
5497{
5498    nkf_iconv_t converter;
5499
5500    converter->input_buffer_size = IOBUF_SIZE;
5501    converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5502    converter->output_buffer_size = IOBUF_SIZE * 2;
5503    converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5504    converter->cd = iconv_open(tocode, fromcode);
5505    if (converter->cd == (iconv_t)-1)
5506    {
5507	switch (errno) {
5508	case EINVAL:
5509	    perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5510	    return -1;
5511	default:
5512	    perror("can't iconv_open");
5513	}
5514    }
5515}
5516
5517static size_t
5518nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5519{
5520    size_t invalid = (size_t)0;
5521    char *input_buffer = converter->input_buffer;
5522    size_t input_length = (size_t)0;
5523    char *output_buffer = converter->output_buffer;
5524    size_t output_length = converter->output_buffer_size;
5525    int c;
5526
5527    do {
5528	if (c != EOF) {
5529	    while ((c = (*i_getc)(f)) != EOF) {
5530		input_buffer[input_length++] = c;
5531		if (input_length < converter->input_buffer_size) break;
5532	    }
5533	}
5534
5535	size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5536	while (output_length-- > 0) {
5537	    (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5538	}
5539	if (ret == (size_t) - 1) {
5540	    switch (errno) {
5541	    case EINVAL:
5542		if (input_buffer != converter->input_buffer)
5543		    memmove(converter->input_buffer, input_buffer, input_length);
5544		break;
5545	    case E2BIG:
5546		converter->output_buffer_size *= 2;
5547		output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5548		if (output_buffer == NULL) {
5549		    perror("can't realloc");
5550		    return -1;
5551		}
5552		converter->output_buffer = output_buffer;
5553		break;
5554	    default:
5555		perror("can't iconv");
5556		return -1;
5557	    }
5558	} else {
5559	    invalid += ret;
5560	}
5561    } while (1);
5562
5563    return invalid;
5564}
5565
5566
5567static void
5568nkf_iconv_close(nkf_iconv_t *convert)
5569{
5570    nkf_xfree(converter->inbuf);
5571    nkf_xfree(converter->outbuf);
5572    iconv_close(converter->cd);
5573}
5574#endif
5575
5576
5577static void
5578reinit(void)
5579{
5580    {
5581	struct input_code *p = input_code_list;
5582	while (p->name){
5583	    status_reinit(p++);
5584	}
5585    }
5586    unbuf_f = FALSE;
5587    estab_f = FALSE;
5588    nop_f = FALSE;
5589    binmode_f = TRUE;
5590    rot_f = FALSE;
5591    hira_f = FALSE;
5592    alpha_f = FALSE;
5593    mime_f = MIME_DECODE_DEFAULT;
5594    mime_decode_f = FALSE;
5595    mimebuf_f = FALSE;
5596    broken_f = FALSE;
5597    iso8859_f = FALSE;
5598    mimeout_f = FALSE;
5599    x0201_f = NKF_UNSPECIFIED;
5600    iso2022jp_f = FALSE;
5601#if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5602    ms_ucs_map_f = UCS_MAP_ASCII;
5603#endif
5604#ifdef UTF8_INPUT_ENABLE
5605    no_cp932ext_f = FALSE;
5606    no_best_fit_chars_f = FALSE;
5607    encode_fallback = NULL;
5608    unicode_subchar  = '?';
5609    input_endian = ENDIAN_BIG;
5610#endif
5611#ifdef UTF8_OUTPUT_ENABLE
5612    output_bom_f = FALSE;
5613    output_endian = ENDIAN_BIG;
5614#endif
5615#ifdef UNICODE_NORMALIZATION
5616    nfc_f = FALSE;
5617#endif
5618#ifdef INPUT_OPTION
5619    cap_f = FALSE;
5620    url_f = FALSE;
5621    numchar_f = FALSE;
5622#endif
5623#ifdef CHECK_OPTION
5624    noout_f = FALSE;
5625    debug_f = FALSE;
5626#endif
5627    guess_f = 0;
5628#ifdef EXEC_IO
5629    exec_f = 0;
5630#endif
5631#ifdef SHIFTJIS_CP932
5632    cp51932_f = TRUE;
5633    cp932inv_f = TRUE;
5634#endif
5635#ifdef X0212_ENABLE
5636    x0212_f = FALSE;
5637    x0213_f = FALSE;
5638#endif
5639    {
5640	int i;
5641	for (i = 0; i < 256; i++){
5642	    prefix_table[i] = 0;
5643	}
5644    }
5645    hold_count = 0;
5646    mimeout_state.count = 0;
5647    mimeout_mode = 0;
5648    base64_count = 0;
5649    f_line = 0;
5650    f_prev = 0;
5651    fold_preserve_f = FALSE;
5652    fold_f = FALSE;
5653    fold_len = 0;
5654    kanji_intro = DEFAULT_J;
5655    ascii_intro = DEFAULT_R;
5656    fold_margin  = FOLD_MARGIN;
5657    o_zconv = no_connection;
5658    o_fconv = no_connection;
5659    o_eol_conv = no_connection;
5660    o_rot_conv = no_connection;
5661    o_hira_conv = no_connection;
5662    o_base64conv = no_connection;
5663    o_iso2022jp_check_conv = no_connection;
5664    o_putc = std_putc;
5665    i_getc = std_getc;
5666    i_ungetc = std_ungetc;
5667    i_bgetc = std_getc;
5668    i_bungetc = std_ungetc;
5669    o_mputc = std_putc;
5670    i_mgetc = std_getc;
5671    i_mungetc  = std_ungetc;
5672    i_mgetc_buf = std_getc;
5673    i_mungetc_buf = std_ungetc;
5674    output_mode = ASCII;
5675    input_mode =  ASCII;
5676    mime_decode_mode = FALSE;
5677    file_out_f = FALSE;
5678    eolmode_f = 0;
5679    input_eol = 0;
5680    prev_cr = 0;
5681    option_mode = 0;
5682    z_prev2=0,z_prev1=0;
5683#ifdef CHECK_OPTION
5684    iconv_for_check = 0;
5685#endif
5686    input_codename = NULL;
5687    input_encoding = NULL;
5688    output_encoding = NULL;
5689    nkf_state_init();
5690#ifdef WIN32DLL
5691    reinitdll();
5692#endif /*WIN32DLL*/
5693}
5694
5695static int
5696module_connection(void)
5697{
5698    if (input_encoding) set_input_encoding(input_encoding);
5699    if (!output_encoding) {
5700	output_encoding = nkf_default_encoding();
5701    }
5702    if (!output_encoding) {
5703	if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5704	else return -1;
5705    }
5706    set_output_encoding(output_encoding);
5707    oconv = nkf_enc_to_oconv(output_encoding);
5708    o_putc = std_putc;
5709    if (nkf_enc_unicode_p(output_encoding))
5710	output_mode = UTF_8;
5711
5712	if (x0201_f == NKF_UNSPECIFIED) {
5713		x0201_f = X0201_DEFAULT;
5714	}
5715
5716    /* replace continucation module, from output side */
5717
5718    /* output redicrection */
5719#ifdef CHECK_OPTION
5720    if (noout_f || guess_f){
5721	o_putc = no_putc;
5722    }
5723#endif
5724    if (mimeout_f) {
5725	o_mputc = o_putc;
5726	o_putc = mime_putc;
5727	if (mimeout_f == TRUE) {
5728	    o_base64conv = oconv; oconv = base64_conv;
5729	}
5730	/* base64_count = 0; */
5731    }
5732
5733    if (eolmode_f || guess_f) {
5734	o_eol_conv = oconv; oconv = eol_conv;
5735    }
5736    if (rot_f) {
5737	o_rot_conv = oconv; oconv = rot_conv;
5738    }
5739    if (iso2022jp_f) {
5740	o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv;
5741    }
5742    if (hira_f) {
5743	o_hira_conv = oconv; oconv = hira_conv;
5744    }
5745    if (fold_f) {
5746	o_fconv = oconv; oconv = fold_conv;
5747	f_line = 0;
5748    }
5749    if (alpha_f || x0201_f) {
5750	o_zconv = oconv; oconv = z_conv;
5751    }
5752
5753    i_getc = std_getc;
5754    i_ungetc = std_ungetc;
5755    /* input redicrection */
5756#ifdef INPUT_OPTION
5757    if (cap_f){
5758	i_cgetc = i_getc; i_getc = cap_getc;
5759	i_cungetc = i_ungetc; i_ungetc= cap_ungetc;
5760    }
5761    if (url_f){
5762	i_ugetc = i_getc; i_getc = url_getc;
5763	i_uungetc = i_ungetc; i_ungetc= url_ungetc;
5764    }
5765#endif
5766#ifdef NUMCHAR_OPTION
5767    if (numchar_f){
5768	i_ngetc = i_getc; i_getc = numchar_getc;
5769	i_nungetc = i_ungetc; i_ungetc= numchar_ungetc;
5770    }
5771#endif
5772#ifdef UNICODE_NORMALIZATION
5773    if (nfc_f){
5774	i_nfc_getc = i_getc; i_getc = nfc_getc;
5775	i_nfc_ungetc = i_ungetc; i_ungetc= nfc_ungetc;
5776    }
5777#endif
5778    if (mime_f && mimebuf_f==FIXED_MIME) {
5779	i_mgetc = i_getc; i_getc = mime_getc;
5780	i_mungetc = i_ungetc; i_ungetc = mime_ungetc;
5781    }
5782    if (broken_f & 1) {
5783	i_bgetc = i_getc; i_getc = broken_getc;
5784	i_bungetc = i_ungetc; i_ungetc = broken_ungetc;
5785    }
5786    if (input_encoding) {
5787	set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5788    } else {
5789	set_iconv(FALSE, e_iconv);
5790    }
5791
5792    {
5793	struct input_code *p = input_code_list;
5794	while (p->name){
5795	    status_reinit(p++);
5796	}
5797    }
5798    return 0;
5799}
5800
5801/*
5802   Conversion main loop. Code detection only.
5803 */
5804
5805#if !defined(PERL_XS) && !defined(WIN32DLL)
5806static nkf_char
5807noconvert(FILE *f)
5808{
5809    nkf_char    c;
5810
5811    if (nop_f == 2)
5812	module_connection();
5813    while ((c = (*i_getc)(f)) != EOF)
5814	(*o_putc)(c);
5815    (*o_putc)(EOF);
5816    return 1;
5817}
5818#endif
5819
5820#define NEXT continue        /* no output, get next */
5821#define SKIP c2=0;continue        /* no output, get next */
5822#define MORE c2=c1;continue  /* need one more byte */
5823#define SEND (void)0         /* output c1 and c2, get next */
5824#define LAST break           /* end of loop, go closing  */
5825#define set_input_mode(mode) do { \
5826    input_mode = mode; \
5827    shift_mode = 0; \
5828    set_input_codename("ISO-2022-JP"); \
5829    debug("ISO-2022-JP"); \
5830} while (0)
5831
5832static int
5833kanji_convert(FILE *f)
5834{
5835    nkf_char c1=0, c2=0, c3=0, c4=0;
5836    int shift_mode = 0; /* 0, 1, 2, 3 */
5837    int g2 = 0;
5838    int is_8bit = FALSE;
5839
5840    if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5841	is_8bit = TRUE;
5842    }
5843
5844    input_mode = ASCII;
5845    output_mode = ASCII;
5846
5847    if (module_connection() < 0) {
5848#if !defined(PERL_XS) && !defined(WIN32DLL)
5849	fprintf(stderr, "no output encoding given\n");
5850#endif
5851	return -1;
5852    }
5853    check_bom(f);
5854
5855#ifdef UTF8_INPUT_ENABLE
5856    if(iconv == w_iconv32){
5857	while ((c1 = (*i_getc)(f)) != EOF &&
5858	       (c2 = (*i_getc)(f)) != EOF &&
5859	       (c3 = (*i_getc)(f)) != EOF &&
5860	       (c4 = (*i_getc)(f)) != EOF) {
5861	    nkf_char c5, c6, c7, c8;
5862	    if (nkf_iconv_utf_32(c1, c2, c3, c4) == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5863		if ((c5 = (*i_getc)(f)) != EOF &&
5864		    (c6 = (*i_getc)(f)) != EOF &&
5865		    (c7 = (*i_getc)(f)) != EOF &&
5866		    (c8 = (*i_getc)(f)) != EOF) {
5867		    if (nkf_iconv_utf_32_combine(c1, c2, c3, c4, c5, c6, c7, c8)) {
5868			(*i_ungetc)(c8, f);
5869			(*i_ungetc)(c7, f);
5870			(*i_ungetc)(c6, f);
5871			(*i_ungetc)(c5, f);
5872			nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5873		    }
5874		} else {
5875		    nkf_iconv_utf_32_nocombine(c1, c2, c3, c4);
5876		}
5877	    }
5878	}
5879	goto finished;
5880    }
5881    else if (iconv == w_iconv16) {
5882	while ((c1 = (*i_getc)(f)) != EOF &&
5883	       (c2 = (*i_getc)(f)) != EOF) {
5884	    size_t ret = nkf_iconv_utf_16(c1, c2, 0, 0);
5885	    if (ret == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5886		(c3 = (*i_getc)(f)) != EOF &&
5887		(c4 = (*i_getc)(f)) != EOF) {
5888		nkf_iconv_utf_16(c1, c2, c3, c4);
5889	    } else if (ret == (size_t)NKF_ICONV_WAIT_COMBINING_CHAR) {
5890		if ((c3 = (*i_getc)(f)) != EOF &&
5891		    (c4 = (*i_getc)(f)) != EOF) {
5892		    if (nkf_iconv_utf_16_combine(c1, c2, c3, c4)) {
5893			(*i_ungetc)(c4, f);
5894			(*i_ungetc)(c3, f);
5895			nkf_iconv_utf_16_nocombine(c1, c2);
5896		    }
5897		} else {
5898		    nkf_iconv_utf_16_nocombine(c1, c2);
5899		}
5900	    }
5901	}
5902	goto finished;
5903    }
5904#endif
5905
5906    while ((c1 = (*i_getc)(f)) != EOF) {
5907#ifdef INPUT_CODE_FIX
5908	if (!input_encoding)
5909#endif
5910	    code_status(c1);
5911	if (c2) {
5912	    /* second byte */
5913	    if (c2 > ((input_encoding && nkf_enc_cp5022x_p(input_encoding)) ? 0x92 : DEL)) {
5914		/* in case of 8th bit is on */
5915		if (!estab_f&&!mime_decode_mode) {
5916		    /* in case of not established yet */
5917		    /* It is still ambiguious */
5918		    if (h_conv(f, c2, c1)==EOF) {
5919			LAST;
5920		    }
5921		    else {
5922			SKIP;
5923		    }
5924		}
5925		else {
5926		    /* in case of already established */
5927		    if (c1 < 0x40) {
5928			/* ignore bogus code */
5929			SKIP;
5930		    } else {
5931			SEND;
5932		    }
5933		}
5934	    }
5935	    else {
5936		/* 2nd byte of 7 bit code or SJIS */
5937		SEND;
5938	    }
5939	}
5940	else if (nkf_char_unicode_p(c1)) {
5941	    (*oconv)(0, c1);
5942	    NEXT;
5943	}
5944	else {
5945	    /* first byte */
5946	    if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5947		/* CP5022x */
5948		MORE;
5949	    }else if (input_codename && input_codename[0] == 'I' &&
5950		    0xA1 <= c1 && c1 <= 0xDF) {
5951		/* JIS X 0201 Katakana in 8bit JIS */
5952		c2 = JIS_X_0201_1976_K;
5953		c1 &= 0x7f;
5954		SEND;
5955	    } else if (c1 > DEL) {
5956		/* 8 bit code */
5957		if (!estab_f && !iso8859_f) {
5958		    /* not established yet */
5959		    MORE;
5960		} else { /* estab_f==TRUE */
5961		    if (iso8859_f) {
5962			c2 = ISO_8859_1;
5963			c1 &= 0x7f;
5964			SEND;
5965		    }
5966		    else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5967			     (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5968			/* JIS X 0201 */
5969			c2 = JIS_X_0201_1976_K;
5970			c1 &= 0x7f;
5971			SEND;
5972		    }
5973		    else {
5974			/* already established */
5975			MORE;
5976		    }
5977		}
5978	    } else if (SP < c1 && c1 < DEL) {
5979		/* in case of Roman characters */
5980		if (shift_mode) {
5981		    /* output 1 shifted byte */
5982		    if (iso8859_f) {
5983			c2 = ISO_8859_1;
5984			SEND;
5985		    } else if (nkf_byte_jisx0201_katakana_p(c1)){
5986			/* output 1 shifted byte */
5987			c2 = JIS_X_0201_1976_K;
5988			SEND;
5989		    } else {
5990			/* look like bogus code */
5991			SKIP;
5992		    }
5993		} else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5994			   input_mode == JIS_X_0213_1 || input_mode == JIS_X_0213_2) {
5995		    /* in case of Kanji shifted */
5996		    MORE;
5997		} else if (c1 == '=' && mime_f && !mime_decode_mode) {
5998		    /* Check MIME code */
5999		    if ((c1 = (*i_getc)(f)) == EOF) {
6000			(*oconv)(0, '=');
6001			LAST;
6002		    } else if (c1 == '?') {
6003			/* =? is mime conversion start sequence */
6004			if(mime_f == STRICT_MIME) {
6005			    /* check in real detail */
6006			    if (mime_begin_strict(f) == EOF)
6007				LAST;
6008			    SKIP;
6009			} else if (mime_begin(f) == EOF)
6010			    LAST;
6011			SKIP;
6012		    } else {
6013			(*oconv)(0, '=');
6014			(*i_ungetc)(c1,f);
6015			SKIP;
6016		    }
6017		} else {
6018		    /* normal ASCII code */
6019		    SEND;
6020		}
6021	    } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
6022		shift_mode = 0;
6023		SKIP;
6024	    } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
6025		shift_mode = 1;
6026		SKIP;
6027	    } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
6028		if ((c1 = (*i_getc)(f)) == EOF) {
6029		    (*oconv)(0, ESC);
6030		    LAST;
6031		}
6032		else if (c1 == '&') {
6033		    /* IRR */
6034		    if ((c1 = (*i_getc)(f)) == EOF) {
6035			LAST;
6036		    } else {
6037			SKIP;
6038		    }
6039		}
6040		else if (c1 == '$') {
6041		    /* GZDMx */
6042		    if ((c1 = (*i_getc)(f)) == EOF) {
6043			/* don't send bogus code
6044			   (*oconv)(0, ESC);
6045			   (*oconv)(0, '$'); */
6046			LAST;
6047		    } else if (c1 == '@' || c1 == 'B') {
6048			/* JIS X 0208 */
6049			set_input_mode(JIS_X_0208);
6050			SKIP;
6051		    } else if (c1 == '(') {
6052			/* GZDM4 */
6053			if ((c1 = (*i_getc)(f)) == EOF) {
6054			    /* don't send bogus code
6055			       (*oconv)(0, ESC);
6056			       (*oconv)(0, '$');
6057			       (*oconv)(0, '(');
6058			     */
6059			    LAST;
6060			} else if (c1 == '@'|| c1 == 'B') {
6061			    /* JIS X 0208 */
6062			    set_input_mode(JIS_X_0208);
6063			    SKIP;
6064#ifdef X0212_ENABLE
6065			} else if (c1 == 'D'){
6066			    set_input_mode(JIS_X_0212);
6067			    SKIP;
6068#endif /* X0212_ENABLE */
6069			} else if (c1 == 'O' || c1 == 'Q'){
6070			    set_input_mode(JIS_X_0213_1);
6071			    SKIP;
6072			} else if (c1 == 'P'){
6073			    set_input_mode(JIS_X_0213_2);
6074			    SKIP;
6075			} else {
6076			    /* could be some special code */
6077			    (*oconv)(0, ESC);
6078			    (*oconv)(0, '$');
6079			    (*oconv)(0, '(');
6080			    (*oconv)(0, c1);
6081			    SKIP;
6082			}
6083		    } else if (broken_f&0x2) {
6084			/* accept any ESC-(-x as broken code ... */
6085			input_mode = JIS_X_0208;
6086			shift_mode = 0;
6087			SKIP;
6088		    } else {
6089			(*oconv)(0, ESC);
6090			(*oconv)(0, '$');
6091			(*oconv)(0, c1);
6092			SKIP;
6093		    }
6094		} else if (c1 == '(') {
6095		    /* GZD4 */
6096		    if ((c1 = (*i_getc)(f)) == EOF) {
6097			/* don't send bogus code
6098			   (*oconv)(0, ESC);
6099			   (*oconv)(0, '('); */
6100			LAST;
6101		    }
6102		    else if (c1 == 'I') {
6103			/* JIS X 0201 Katakana */
6104			set_input_mode(JIS_X_0201_1976_K);
6105			shift_mode = 1;
6106			SKIP;
6107		    }
6108		    else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
6109			/* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
6110			set_input_mode(ASCII);
6111			SKIP;
6112		    }
6113		    else if (broken_f&0x2) {
6114			set_input_mode(ASCII);
6115			SKIP;
6116		    }
6117		    else {
6118			(*oconv)(0, ESC);
6119			(*oconv)(0, '(');
6120			SEND;
6121		    }
6122		}
6123		else if (c1 == '.') {
6124		    /* G2D6 */
6125		    if ((c1 = (*i_getc)(f)) == EOF) {
6126			LAST;
6127		    }
6128		    else if (c1 == 'A') {
6129			/* ISO-8859-1 */
6130			g2 = ISO_8859_1;
6131			SKIP;
6132		    }
6133		    else {
6134			(*oconv)(0, ESC);
6135			(*oconv)(0, '.');
6136			SEND;
6137		    }
6138		}
6139		else if (c1 == 'N') {
6140		    /* SS2 */
6141		    c1 = (*i_getc)(f);
6142		    if (g2 == ISO_8859_1) {
6143			c2 = ISO_8859_1;
6144			SEND;
6145		    }else{
6146			(*i_ungetc)(c1, f);
6147			/* lonely ESC  */
6148			(*oconv)(0, ESC);
6149			SEND;
6150		    }
6151		}
6152		else {
6153		    /* lonely ESC  */
6154		    (*oconv)(0, ESC);
6155		    SEND;
6156		}
6157	    } else if (c1 == ESC && iconv == s_iconv) {
6158		/* ESC in Shift_JIS */
6159		if ((c1 = (*i_getc)(f)) == EOF) {
6160		    (*oconv)(0, ESC);
6161		    LAST;
6162		} else if (c1 == '$') {
6163		    /* J-PHONE emoji */
6164		    if ((c1 = (*i_getc)(f)) == EOF) {
6165			LAST;
6166		    } else if (('E' <= c1 && c1 <= 'G') ||
6167			       ('O' <= c1 && c1 <= 'Q')) {
6168			/*
6169			   NUM : 0 1 2 3 4 5
6170			   BYTE: G E F O P Q
6171			   C%7 : 1 6 0 2 3 4
6172			   C%7 : 0 1 2 3 4 5 6
6173			   NUM : 2 0 3 4 5 X 1
6174			 */
6175			static const nkf_char jphone_emoji_first_table[7] =
6176			{0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
6177			c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
6178			if ((c1 = (*i_getc)(f)) == EOF) LAST;
6179			while (SP <= c1 && c1 <= 'z') {
6180			    (*oconv)(0, c1 + c3);
6181			    if ((c1 = (*i_getc)(f)) == EOF) LAST;
6182			}
6183			SKIP;
6184		    }
6185		    else {
6186			(*oconv)(0, ESC);
6187			(*oconv)(0, '$');
6188			SEND;
6189		    }
6190		}
6191		else {
6192		    /* lonely ESC  */
6193		    (*oconv)(0, ESC);
6194		    SEND;
6195		}
6196	    } else if (c1 == LF || c1 == CR) {
6197		if (broken_f&4) {
6198		    input_mode = ASCII; set_iconv(FALSE, 0);
6199		    SEND;
6200		} else if (mime_decode_f && !mime_decode_mode){
6201		    if (c1 == LF) {
6202			if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
6203			    i_ungetc(SP,f);
6204			    continue;
6205			} else {
6206			    i_ungetc(c1,f);
6207			}
6208			c1 = LF;
6209			SEND;
6210		    } else  { /* if (c1 == CR)*/
6211			if ((c1=(*i_getc)(f))!=EOF) {
6212			    if (c1==SP) {
6213				i_ungetc(SP,f);
6214				continue;
6215			    } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
6216				i_ungetc(SP,f);
6217				continue;
6218			    } else {
6219				i_ungetc(c1,f);
6220			    }
6221			    i_ungetc(LF,f);
6222			} else {
6223			    i_ungetc(c1,f);
6224			}
6225			c1 = CR;
6226			SEND;
6227		    }
6228		}
6229	    } else
6230		SEND;
6231	}
6232	/* send: */
6233	switch(input_mode){
6234	case ASCII:
6235	    switch ((*iconv)(c2, c1, 0)) {  /* can be EUC / SJIS / UTF-8 */
6236	    case -2:
6237		/* 4 bytes UTF-8 */
6238		if ((c3 = (*i_getc)(f)) != EOF) {
6239		    code_status(c3);
6240		    c3 <<= 8;
6241		    if ((c4 = (*i_getc)(f)) != EOF) {
6242			code_status(c4);
6243			(*iconv)(c2, c1, c3|c4);
6244		    }
6245		}
6246		break;
6247	    case -3:
6248		/* 4 bytes UTF-8 (check combining character) */
6249		if ((c3 = (*i_getc)(f)) != EOF) {
6250		    if ((c4 = (*i_getc)(f)) != EOF) {
6251			if (w_iconv_combine(c2, c1, 0, c3, c4, 0)) {
6252			    (*i_ungetc)(c4, f);
6253			    (*i_ungetc)(c3, f);
6254			    w_iconv_nocombine(c2, c1, 0);
6255			}
6256		    } else {
6257			(*i_ungetc)(c3, f);
6258			w_iconv_nocombine(c2, c1, 0);
6259		    }
6260		} else {
6261		    w_iconv_nocombine(c2, c1, 0);
6262		}
6263		break;
6264	    case -1:
6265		/* 3 bytes EUC or UTF-8 */
6266		if ((c3 = (*i_getc)(f)) != EOF) {
6267		    code_status(c3);
6268		    if ((*iconv)(c2, c1, c3) == -3) {
6269			/* 6 bytes UTF-8 (check combining character) */
6270			nkf_char c5, c6;
6271			if ((c4 = (*i_getc)(f)) != EOF) {
6272			    if ((c5 = (*i_getc)(f)) != EOF) {
6273				if ((c6 = (*i_getc)(f)) != EOF) {
6274				    if (w_iconv_combine(c2, c1, c3, c4, c5, c6)) {
6275					(*i_ungetc)(c6, f);
6276					(*i_ungetc)(c5, f);
6277					(*i_ungetc)(c4, f);
6278					w_iconv_nocombine(c2, c1, c3);
6279				    }
6280				} else {
6281				    (*i_ungetc)(c5, f);
6282				    (*i_ungetc)(c4, f);
6283				    w_iconv_nocombine(c2, c1, c3);
6284				}
6285			    } else {
6286				(*i_ungetc)(c4, f);
6287				w_iconv_nocombine(c2, c1, c3);
6288			    }
6289			} else {
6290			    w_iconv_nocombine(c2, c1, c3);
6291			}
6292		    }
6293		}
6294		break;
6295	    }
6296	    break;
6297	case JIS_X_0208:
6298	case JIS_X_0213_1:
6299	    if (ms_ucs_map_f &&
6300		0x7F <= c2 && c2 <= 0x92 &&
6301		0x21 <= c1 && c1 <= 0x7E) {
6302		/* CP932 UDC */
6303		c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
6304		c2 = 0;
6305	    }
6306	    (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
6307	    break;
6308#ifdef X0212_ENABLE
6309	case JIS_X_0212:
6310	    (*oconv)(PREFIX_EUCG3 | c2, c1);
6311	    break;
6312#endif /* X0212_ENABLE */
6313	case JIS_X_0213_2:
6314	    (*oconv)(PREFIX_EUCG3 | c2, c1);
6315	    break;
6316	default:
6317	    (*oconv)(input_mode, c1);  /* other special case */
6318	}
6319
6320	c2 = 0;
6321	c3 = 0;
6322	continue;
6323	/* goto next_word */
6324    }
6325
6326finished:
6327    /* epilogue */
6328    (*iconv)(EOF, 0, 0);
6329    if (!input_codename)
6330    {
6331	if (is_8bit) {
6332	    struct input_code *p = input_code_list;
6333	    struct input_code *result = p;
6334	    while (p->name){
6335		if (p->score < result->score) result = p;
6336		++p;
6337	    }
6338	    set_input_codename(result->name);
6339#ifdef CHECK_OPTION
6340	    debug(result->name);
6341#endif
6342	}
6343    }
6344    return 0;
6345}
6346
6347/*
6348 * int options(unsigned char *cp)
6349 *
6350 * return values:
6351 *    0: success
6352 *   -1: ArgumentError
6353 */
6354static int
6355options(unsigned char *cp)
6356{
6357    nkf_char i, j;
6358    unsigned char *p;
6359    unsigned char *cp_back = NULL;
6360    nkf_encoding *enc;
6361
6362    if (option_mode==1)
6363	return 0;
6364    while(*cp && *cp++!='-');
6365    while (*cp || cp_back) {
6366	if(!*cp){
6367	    cp = cp_back;
6368	    cp_back = NULL;
6369	    continue;
6370	}
6371	p = 0;
6372	switch (*cp++) {
6373	case '-':  /* literal options */
6374	    if (!*cp || *cp == SP) {        /* ignore the rest of arguments */
6375		option_mode = 1;
6376		return 0;
6377	    }
6378	    for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
6379		p = (unsigned char *)long_option[i].name;
6380		for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
6381		if (*p == cp[j] || cp[j] == SP){
6382		    p = &cp[j] + 1;
6383		    break;
6384		}
6385		p = 0;
6386	    }
6387	    if (p == 0) {
6388#if !defined(PERL_XS) && !defined(WIN32DLL)
6389		fprintf(stderr, "unknown long option: --%s\n", cp);
6390#endif
6391		return -1;
6392	    }
6393	    while(*cp && *cp != SP && cp++);
6394	    if (long_option[i].alias[0]){
6395		cp_back = cp;
6396		cp = (unsigned char *)long_option[i].alias;
6397	    }else{
6398#ifndef PERL_XS
6399		if (strcmp(long_option[i].name, "help") == 0){
6400		    usage();
6401		    exit(EXIT_SUCCESS);
6402		}
6403#endif
6404		if (strcmp(long_option[i].name, "ic=") == 0){
6405		    enc = nkf_enc_find((char *)p);
6406		    if (!enc) continue;
6407		    input_encoding = enc;
6408		    continue;
6409		}
6410		if (strcmp(long_option[i].name, "oc=") == 0){
6411		    enc = nkf_enc_find((char *)p);
6412		    /* if (enc <= 0) continue; */
6413		    if (!enc) continue;
6414		    output_encoding = enc;
6415		    continue;
6416		}
6417		if (strcmp(long_option[i].name, "guess=") == 0){
6418		    if (p[0] == '0' || p[0] == '1') {
6419			guess_f = 1;
6420		    } else {
6421			guess_f = 2;
6422		    }
6423		    continue;
6424		}
6425#ifdef OVERWRITE
6426		if (strcmp(long_option[i].name, "overwrite") == 0){
6427		    file_out_f = TRUE;
6428		    overwrite_f = TRUE;
6429		    preserve_time_f = TRUE;
6430		    continue;
6431		}
6432		if (strcmp(long_option[i].name, "overwrite=") == 0){
6433		    file_out_f = TRUE;
6434		    overwrite_f = TRUE;
6435		    preserve_time_f = TRUE;
6436		    backup_f = TRUE;
6437		    backup_suffix = (char *)p;
6438		    continue;
6439		}
6440		if (strcmp(long_option[i].name, "in-place") == 0){
6441		    file_out_f = TRUE;
6442		    overwrite_f = TRUE;
6443		    preserve_time_f = FALSE;
6444		    continue;
6445		}
6446		if (strcmp(long_option[i].name, "in-place=") == 0){
6447		    file_out_f = TRUE;
6448		    overwrite_f = TRUE;
6449		    preserve_time_f = FALSE;
6450		    backup_f = TRUE;
6451		    backup_suffix = (char *)p;
6452		    continue;
6453		}
6454#endif
6455#ifdef INPUT_OPTION
6456		if (strcmp(long_option[i].name, "cap-input") == 0){
6457		    cap_f = TRUE;
6458		    continue;
6459		}
6460		if (strcmp(long_option[i].name, "url-input") == 0){
6461		    url_f = TRUE;
6462		    continue;
6463		}
6464#endif
6465#ifdef NUMCHAR_OPTION
6466		if (strcmp(long_option[i].name, "numchar-input") == 0){
6467		    numchar_f = TRUE;
6468		    continue;
6469		}
6470#endif
6471#ifdef CHECK_OPTION
6472		if (strcmp(long_option[i].name, "no-output") == 0){
6473		    noout_f = TRUE;
6474		    continue;
6475		}
6476		if (strcmp(long_option[i].name, "debug") == 0){
6477		    debug_f = TRUE;
6478		    continue;
6479		}
6480#endif
6481		if (strcmp(long_option[i].name, "cp932") == 0){
6482#ifdef SHIFTJIS_CP932
6483		    cp51932_f = TRUE;
6484		    cp932inv_f = -TRUE;
6485#endif
6486#ifdef UTF8_OUTPUT_ENABLE
6487		    ms_ucs_map_f = UCS_MAP_CP932;
6488#endif
6489		    continue;
6490		}
6491		if (strcmp(long_option[i].name, "no-cp932") == 0){
6492#ifdef SHIFTJIS_CP932
6493		    cp51932_f = FALSE;
6494		    cp932inv_f = FALSE;
6495#endif
6496#ifdef UTF8_OUTPUT_ENABLE
6497		    ms_ucs_map_f = UCS_MAP_ASCII;
6498#endif
6499		    continue;
6500		}
6501#ifdef SHIFTJIS_CP932
6502		if (strcmp(long_option[i].name, "cp932inv") == 0){
6503		    cp932inv_f = -TRUE;
6504		    continue;
6505		}
6506#endif
6507
6508#ifdef X0212_ENABLE
6509		if (strcmp(long_option[i].name, "x0212") == 0){
6510		    x0212_f = TRUE;
6511		    continue;
6512		}
6513#endif
6514
6515#ifdef EXEC_IO
6516		if (strcmp(long_option[i].name, "exec-in") == 0){
6517		    exec_f = 1;
6518		    return 0;
6519		}
6520		if (strcmp(long_option[i].name, "exec-out") == 0){
6521		    exec_f = -1;
6522		    return 0;
6523		}
6524#endif
6525#if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6526		if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6527		    no_cp932ext_f = TRUE;
6528		    continue;
6529		}
6530		if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6531		    no_best_fit_chars_f = TRUE;
6532		    continue;
6533		}
6534		if (strcmp(long_option[i].name, "fb-skip") == 0){
6535		    encode_fallback = NULL;
6536		    continue;
6537		}
6538		if (strcmp(long_option[i].name, "fb-html") == 0){
6539		    encode_fallback = encode_fallback_html;
6540		    continue;
6541		}
6542		if (strcmp(long_option[i].name, "fb-xml") == 0){
6543		    encode_fallback = encode_fallback_xml;
6544		    continue;
6545		}
6546		if (strcmp(long_option[i].name, "fb-java") == 0){
6547		    encode_fallback = encode_fallback_java;
6548		    continue;
6549		}
6550		if (strcmp(long_option[i].name, "fb-perl") == 0){
6551		    encode_fallback = encode_fallback_perl;
6552		    continue;
6553		}
6554		if (strcmp(long_option[i].name, "fb-subchar") == 0){
6555		    encode_fallback = encode_fallback_subchar;
6556		    continue;
6557		}
6558		if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6559		    encode_fallback = encode_fallback_subchar;
6560		    unicode_subchar = 0;
6561		    if (p[0] != '0'){
6562			/* decimal number */
6563			for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6564			    unicode_subchar *= 10;
6565			    unicode_subchar += hex2bin(p[i]);
6566			}
6567		    }else if(p[1] == 'x' || p[1] == 'X'){
6568			/* hexadecimal number */
6569			for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6570			    unicode_subchar <<= 4;
6571			    unicode_subchar |= hex2bin(p[i]);
6572			}
6573		    }else{
6574			/* octal number */
6575			for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6576			    unicode_subchar *= 8;
6577			    unicode_subchar += hex2bin(p[i]);
6578			}
6579		    }
6580		    w16e_conv(unicode_subchar, &i, &j);
6581		    unicode_subchar = i<<8 | j;
6582		    continue;
6583		}
6584#endif
6585#ifdef UTF8_OUTPUT_ENABLE
6586		if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6587		    ms_ucs_map_f = UCS_MAP_MS;
6588		    continue;
6589		}
6590#endif
6591#ifdef UNICODE_NORMALIZATION
6592		if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6593		    nfc_f = TRUE;
6594		    continue;
6595		}
6596#endif
6597		if (strcmp(long_option[i].name, "prefix=") == 0){
6598		    if (nkf_isgraph(p[0])){
6599			for (i = 1; nkf_isgraph(p[i]); i++){
6600			    prefix_table[p[i]] = p[0];
6601			}
6602		    }
6603		    continue;
6604		}
6605#if !defined(PERL_XS) && !defined(WIN32DLL)
6606		fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6607#endif
6608		return -1;
6609	    }
6610	    continue;
6611	case 'b':           /* buffered mode */
6612	    unbuf_f = FALSE;
6613	    continue;
6614	case 'u':           /* non bufferd mode */
6615	    unbuf_f = TRUE;
6616	    continue;
6617	case 't':           /* transparent mode */
6618	    if (*cp=='1') {
6619		/* alias of -t */
6620		cp++;
6621		nop_f = TRUE;
6622	    } else if (*cp=='2') {
6623		/*
6624		 * -t with put/get
6625		 *
6626		 * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6627		 *
6628		 */
6629		cp++;
6630		nop_f = 2;
6631	    } else
6632		nop_f = TRUE;
6633	    continue;
6634	case 'j':           /* JIS output */
6635	case 'n':
6636	    output_encoding = nkf_enc_from_index(ISO_2022_JP);
6637	    continue;
6638	case 'e':           /* AT&T EUC output */
6639	    output_encoding = nkf_enc_from_index(EUCJP_NKF);
6640	    continue;
6641	case 's':           /* SJIS output */
6642	    output_encoding = nkf_enc_from_index(SHIFT_JIS);
6643	    continue;
6644	case 'l':           /* ISO8859 Latin-1 support, no conversion */
6645	    iso8859_f = TRUE;  /* Only compatible with ISO-2022-JP */
6646	    input_encoding = nkf_enc_from_index(ISO_8859_1);
6647	    continue;
6648	case 'i':           /* Kanji IN ESC-$-@/B */
6649	    if (*cp=='@'||*cp=='B')
6650		kanji_intro = *cp++;
6651	    continue;
6652	case 'o':           /* ASCII IN ESC-(-J/B/H */
6653	    /* ESC ( H was used in initial JUNET messages */
6654	    if (*cp=='J'||*cp=='B'||*cp=='H')
6655		ascii_intro = *cp++;
6656	    continue;
6657	case 'h':
6658	    /*
6659	       bit:1   katakana->hiragana
6660	       bit:2   hiragana->katakana
6661	     */
6662	    if ('9'>= *cp && *cp>='0')
6663		hira_f |= (*cp++ -'0');
6664	    else
6665		hira_f |= 1;
6666	    continue;
6667	case 'r':
6668	    rot_f = TRUE;
6669	    continue;
6670#if defined(MSDOS) || defined(__OS2__)
6671	case 'T':
6672	    binmode_f = FALSE;
6673	    continue;
6674#endif
6675#ifndef PERL_XS
6676	case 'V':
6677	    show_configuration();
6678	    exit(EXIT_SUCCESS);
6679	    break;
6680	case 'v':
6681	    version();
6682	    exit(EXIT_SUCCESS);
6683	    break;
6684#endif
6685#ifdef UTF8_OUTPUT_ENABLE
6686	case 'w':           /* UTF-{8,16,32} output */
6687	    if (cp[0] == '8') {
6688		cp++;
6689		if (cp[0] == '0'){
6690		    cp++;
6691		    output_encoding = nkf_enc_from_index(UTF_8N);
6692		} else {
6693		    output_bom_f = TRUE;
6694		    output_encoding = nkf_enc_from_index(UTF_8_BOM);
6695		}
6696	    } else {
6697		int enc_idx;
6698		if ('1'== cp[0] && '6'==cp[1]) {
6699		    cp += 2;
6700		    enc_idx = UTF_16;
6701		} else if ('3'== cp[0] && '2'==cp[1]) {
6702		    cp += 2;
6703		    enc_idx = UTF_32;
6704		} else {
6705		    output_encoding = nkf_enc_from_index(UTF_8);
6706		    continue;
6707		}
6708		if (cp[0]=='L') {
6709		    cp++;
6710		    output_endian = ENDIAN_LITTLE;
6711		    output_bom_f = TRUE;
6712		} else if (cp[0] == 'B') {
6713		    cp++;
6714		    output_bom_f = TRUE;
6715		}
6716		if (cp[0] == '0'){
6717		    output_bom_f = FALSE;
6718		    cp++;
6719		    enc_idx = enc_idx == UTF_16
6720			? (output_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6721			: (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6722		} else {
6723		    enc_idx = enc_idx == UTF_16
6724			? (output_endian == ENDIAN_LITTLE ? UTF_16LE_BOM : UTF_16BE_BOM)
6725			: (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6726		}
6727		output_encoding = nkf_enc_from_index(enc_idx);
6728	    }
6729	    continue;
6730#endif
6731#ifdef UTF8_INPUT_ENABLE
6732	case 'W':           /* UTF input */
6733	    if (cp[0] == '8') {
6734		cp++;
6735		input_encoding = nkf_enc_from_index(UTF_8);
6736	    }else{
6737		int enc_idx;
6738		if ('1'== cp[0] && '6'==cp[1]) {
6739		    cp += 2;
6740		    input_endian = ENDIAN_BIG;
6741		    enc_idx = UTF_16;
6742		} else if ('3'== cp[0] && '2'==cp[1]) {
6743		    cp += 2;
6744		    input_endian = ENDIAN_BIG;
6745		    enc_idx = UTF_32;
6746		} else {
6747		    input_encoding = nkf_enc_from_index(UTF_8);
6748		    continue;
6749		}
6750		if (cp[0]=='L') {
6751		    cp++;
6752		    input_endian = ENDIAN_LITTLE;
6753		} else if (cp[0] == 'B') {
6754		    cp++;
6755		    input_endian = ENDIAN_BIG;
6756		}
6757		enc_idx = (enc_idx == UTF_16
6758		    ? (input_endian == ENDIAN_LITTLE ? UTF_16LE : UTF_16BE)
6759		    : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6760		input_encoding = nkf_enc_from_index(enc_idx);
6761	    }
6762	    continue;
6763#endif
6764	    /* Input code assumption */
6765	case 'J':   /* ISO-2022-JP input */
6766	    input_encoding = nkf_enc_from_index(ISO_2022_JP);
6767	    continue;
6768	case 'E':   /* EUC-JP input */
6769	    input_encoding = nkf_enc_from_index(EUCJP_NKF);
6770	    continue;
6771	case 'S':   /* Shift_JIS input */
6772	    input_encoding = nkf_enc_from_index(SHIFT_JIS);
6773	    continue;
6774	case 'Z':   /* Convert X0208 alphabet to asii */
6775	    /* alpha_f
6776	       bit:0   Convert JIS X 0208 Alphabet to ASCII
6777	       bit:1   Convert Kankaku to one space
6778	       bit:2   Convert Kankaku to two spaces
6779	       bit:3   Convert HTML Entity
6780	       bit:4   Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6781	     */
6782	    while ('0'<= *cp && *cp <='4') {
6783		alpha_f |= 1 << (*cp++ - '0');
6784	    }
6785	    alpha_f |= 1;
6786	    continue;
6787	case 'x':   /* Convert X0201 kana to X0208 or X0201 Conversion */
6788	    x0201_f = FALSE;    /* No X0201->X0208 conversion */
6789	    /* accept  X0201
6790	       ESC-(-I     in JIS, EUC, MS Kanji
6791	       SI/SO       in JIS, EUC, MS Kanji
6792	       SS2         in EUC, JIS, not in MS Kanji
6793	       MS Kanji (0xa0-0xdf)
6794	       output  X0201
6795	       ESC-(-I     in JIS (0x20-0x5f)
6796	       SS2         in EUC (0xa0-0xdf)
6797	       0xa0-0xd    in MS Kanji (0xa0-0xdf)
6798	     */
6799	    continue;
6800	case 'X':   /* Convert X0201 kana to X0208 */
6801	    x0201_f = TRUE;
6802	    continue;
6803	case 'F':   /* prserve new lines */
6804	    fold_preserve_f = TRUE;
6805	case 'f':   /* folding -f60 or -f */
6806	    fold_f = TRUE;
6807	    fold_len = 0;
6808	    while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6809		fold_len *= 10;
6810		fold_len += *cp++ - '0';
6811	    }
6812	    if (!(0<fold_len && fold_len<BUFSIZ))
6813		fold_len = DEFAULT_FOLD;
6814	    if (*cp=='-') {
6815		fold_margin = 0;
6816		cp++;
6817		while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6818		    fold_margin *= 10;
6819		    fold_margin += *cp++ - '0';
6820		}
6821	    }
6822	    continue;
6823	case 'm':   /* MIME support */
6824	    /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6825	    if (*cp=='B'||*cp=='Q') {
6826		mime_decode_mode = *cp++;
6827		mimebuf_f = FIXED_MIME;
6828	    } else if (*cp=='N') {
6829		mime_f = TRUE; cp++;
6830	    } else if (*cp=='S') {
6831		mime_f = STRICT_MIME; cp++;
6832	    } else if (*cp=='0') {
6833		mime_decode_f = FALSE;
6834		mime_f = FALSE; cp++;
6835	    } else {
6836		mime_f = STRICT_MIME;
6837	    }
6838	    continue;
6839	case 'M':   /* MIME output */
6840	    if (*cp=='B') {
6841		mimeout_mode = 'B';
6842		mimeout_f = FIXED_MIME; cp++;
6843	    } else if (*cp=='Q') {
6844		mimeout_mode = 'Q';
6845		mimeout_f = FIXED_MIME; cp++;
6846	    } else {
6847		mimeout_f = TRUE;
6848	    }
6849	    continue;
6850	case 'B':   /* Broken JIS support */
6851	    /*  bit:0   no ESC JIS
6852	       bit:1   allow any x on ESC-(-x or ESC-$-x
6853	       bit:2   reset to ascii on NL
6854	     */
6855	    if ('9'>= *cp && *cp>='0')
6856		broken_f |= 1<<(*cp++ -'0');
6857	    else
6858		broken_f |= TRUE;
6859	    continue;
6860#ifndef PERL_XS
6861	case 'O':/* for Output file */
6862	    file_out_f = TRUE;
6863	    continue;
6864#endif
6865	case 'c':/* add cr code */
6866	    eolmode_f = CRLF;
6867	    continue;
6868	case 'd':/* delete cr code */
6869	    eolmode_f = LF;
6870	    continue;
6871	case 'I':   /* ISO-2022-JP output */
6872	    iso2022jp_f = TRUE;
6873	    continue;
6874	case 'L':  /* line mode */
6875	    if (*cp=='u') {         /* unix */
6876		eolmode_f = LF; cp++;
6877	    } else if (*cp=='m') { /* mac */
6878		eolmode_f = CR; cp++;
6879	    } else if (*cp=='w') { /* windows */
6880		eolmode_f = CRLF; cp++;
6881	    } else if (*cp=='0') { /* no conversion  */
6882		eolmode_f = 0; cp++;
6883	    }
6884	    continue;
6885#ifndef PERL_XS
6886	case 'g':
6887	    if ('2' <= *cp && *cp <= '9') {
6888		guess_f = 2;
6889		cp++;
6890	    } else if (*cp == '0' || *cp == '1') {
6891		guess_f = 1;
6892		cp++;
6893	    } else {
6894		guess_f = 1;
6895	    }
6896	    continue;
6897#endif
6898	case SP:
6899	    /* module muliple options in a string are allowed for Perl moudle  */
6900	    while(*cp && *cp++!='-');
6901	    continue;
6902	default:
6903#if !defined(PERL_XS) && !defined(WIN32DLL)
6904	    fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6905#endif
6906	    /* bogus option but ignored */
6907	    return -1;
6908	}
6909    }
6910    return 0;
6911}
6912
6913#ifdef WIN32DLL
6914#include "nkf32dll.c"
6915#elif defined(PERL_XS)
6916#else /* WIN32DLL */
6917int
6918main(int argc, char **argv)
6919{
6920    FILE  *fin;
6921    unsigned char  *cp;
6922
6923    char *outfname = NULL;
6924    char *origfname;
6925
6926#ifdef EASYWIN /*Easy Win */
6927    _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6928#endif
6929#ifdef DEFAULT_CODE_LOCALE
6930    setlocale(LC_CTYPE, "");
6931#endif
6932    nkf_state_init();
6933
6934    for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6935	cp = (unsigned char *)*argv;
6936	options(cp);
6937#ifdef EXEC_IO
6938	if (exec_f){
6939	    int fds[2], pid;
6940	    if (pipe(fds) < 0 || (pid = fork()) < 0){
6941		abort();
6942	    }
6943	    if (pid == 0){
6944		if (exec_f > 0){
6945		    close(fds[0]);
6946		    dup2(fds[1], 1);
6947		}else{
6948		    close(fds[1]);
6949		    dup2(fds[0], 0);
6950		}
6951		execvp(argv[1], &argv[1]);
6952	    }
6953	    if (exec_f > 0){
6954		close(fds[1]);
6955		dup2(fds[0], 0);
6956	    }else{
6957		close(fds[0]);
6958		dup2(fds[1], 1);
6959	    }
6960	    argc = 0;
6961	    break;
6962	}
6963#endif
6964    }
6965
6966    if (guess_f) {
6967#ifdef CHECK_OPTION
6968	int debug_f_back = debug_f;
6969#endif
6970#ifdef EXEC_IO
6971	int exec_f_back = exec_f;
6972#endif
6973#ifdef X0212_ENABLE
6974	int x0212_f_back = x0212_f;
6975#endif
6976	int x0213_f_back = x0213_f;
6977	int guess_f_back = guess_f;
6978	reinit();
6979	guess_f = guess_f_back;
6980	mime_f = FALSE;
6981#ifdef CHECK_OPTION
6982	debug_f = debug_f_back;
6983#endif
6984#ifdef EXEC_IO
6985	exec_f = exec_f_back;
6986#endif
6987	x0212_f = x0212_f_back;
6988	x0213_f = x0213_f_back;
6989    }
6990
6991    if (binmode_f == TRUE)
6992#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6993	if (freopen("","wb",stdout) == NULL)
6994	    return (-1);
6995#else
6996    setbinmode(stdout);
6997#endif
6998
6999    if (unbuf_f)
7000	setbuf(stdout, (char *) NULL);
7001    else
7002	setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
7003
7004    if (argc == 0) {
7005	if (binmode_f == TRUE)
7006#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7007	    if (freopen("","rb",stdin) == NULL) return (-1);
7008#else
7009	setbinmode(stdin);
7010#endif
7011	setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
7012	if (nop_f)
7013	    noconvert(stdin);
7014	else {
7015	    kanji_convert(stdin);
7016	    if (guess_f) print_guessed_code(NULL);
7017	}
7018    } else {
7019	int nfiles = argc;
7020	int is_argument_error = FALSE;
7021	while (argc--) {
7022	    input_codename = NULL;
7023	    input_eol = 0;
7024#ifdef CHECK_OPTION
7025	    iconv_for_check = 0;
7026#endif
7027	    if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
7028		perror(*(argv-1));
7029		is_argument_error = TRUE;
7030		continue;
7031	    } else {
7032#ifdef OVERWRITE
7033		int fd = 0;
7034		int fd_backup = 0;
7035#endif
7036
7037		/* reopen file for stdout */
7038		if (file_out_f == TRUE) {
7039#ifdef OVERWRITE
7040		    if (overwrite_f){
7041			outfname = nkf_xmalloc(strlen(origfname)
7042					  + strlen(".nkftmpXXXXXX")
7043					  + 1);
7044			strcpy(outfname, origfname);
7045#ifdef MSDOS
7046			{
7047			    int i;
7048			    for (i = strlen(outfname); i; --i){
7049				if (outfname[i - 1] == '/'
7050				    || outfname[i - 1] == '\\'){
7051				    break;
7052				}
7053			    }
7054			    outfname[i] = '\0';
7055			}
7056			strcat(outfname, "ntXXXXXX");
7057			mktemp(outfname);
7058			fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
7059				  S_IREAD | S_IWRITE);
7060#else
7061			strcat(outfname, ".nkftmpXXXXXX");
7062			fd = mkstemp(outfname);
7063#endif
7064			if (fd < 0
7065			    || (fd_backup = dup(fileno(stdout))) < 0
7066			    || dup2(fd, fileno(stdout)) < 0
7067			   ){
7068			    perror(origfname);
7069			    return -1;
7070			}
7071		    }else
7072#endif
7073		    if(argc == 1) {
7074			outfname = *argv++;
7075			argc--;
7076		    } else {
7077			outfname = "nkf.out";
7078		    }
7079
7080		    if(freopen(outfname, "w", stdout) == NULL) {
7081			perror (outfname);
7082			return (-1);
7083		    }
7084		    if (binmode_f == TRUE) {
7085#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7086			if (freopen("","wb",stdout) == NULL)
7087			    return (-1);
7088#else
7089			setbinmode(stdout);
7090#endif
7091		    }
7092		}
7093		if (binmode_f == TRUE)
7094#if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
7095		    if (freopen("","rb",fin) == NULL)
7096			return (-1);
7097#else
7098		setbinmode(fin);
7099#endif
7100		setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
7101		if (nop_f)
7102		    noconvert(fin);
7103		else {
7104		    char *filename = NULL;
7105		    kanji_convert(fin);
7106		    if (nfiles > 1) filename = origfname;
7107		    if (guess_f) print_guessed_code(filename);
7108		}
7109		fclose(fin);
7110#ifdef OVERWRITE
7111		if (overwrite_f) {
7112		    struct stat     sb;
7113#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7114		    time_t tb[2];
7115#else
7116		    struct utimbuf  tb;
7117#endif
7118
7119		    fflush(stdout);
7120		    close(fd);
7121		    if (dup2(fd_backup, fileno(stdout)) < 0){
7122			perror("dup2");
7123		    }
7124		    if (stat(origfname, &sb)) {
7125			fprintf(stderr, "Can't stat %s\n", origfname);
7126		    }
7127		    /* $B%Q!<%_%C%7%g%s$rI|85(B */
7128		    if (chmod(outfname, sb.st_mode)) {
7129			fprintf(stderr, "Can't set permission %s\n", outfname);
7130		    }
7131
7132		    /* $B%?%$%`%9%?%s%W$rI|85(B */
7133		    if(preserve_time_f){
7134#if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
7135			tb[0] = tb[1] = sb.st_mtime;
7136			if (utime(outfname, tb)) {
7137			    fprintf(stderr, "Can't set timestamp %s\n", outfname);
7138			}
7139#else
7140			tb.actime  = sb.st_atime;
7141			tb.modtime = sb.st_mtime;
7142			if (utime(outfname, &tb)) {
7143			    fprintf(stderr, "Can't set timestamp %s\n", outfname);
7144			}
7145#endif
7146		    }
7147		    if(backup_f){
7148			char *backup_filename = get_backup_filename(backup_suffix, origfname);
7149#ifdef MSDOS
7150			unlink(backup_filename);
7151#endif
7152			if (rename(origfname, backup_filename)) {
7153			    perror(backup_filename);
7154			    fprintf(stderr, "Can't rename %s to %s\n",
7155				    origfname, backup_filename);
7156			}
7157			nkf_xfree(backup_filename);
7158		    }else{
7159#ifdef MSDOS
7160			if (unlink(origfname)){
7161			    perror(origfname);
7162			}
7163#endif
7164		    }
7165		    if (rename(outfname, origfname)) {
7166			perror(origfname);
7167			fprintf(stderr, "Can't rename %s to %s\n",
7168				outfname, origfname);
7169		    }
7170		    nkf_xfree(outfname);
7171		}
7172#endif
7173	    }
7174	}
7175	if (is_argument_error)
7176	    return(-1);
7177    }
7178#ifdef EASYWIN /*Easy Win */
7179    if (file_out_f == FALSE)
7180	scanf("%d",&end_check);
7181    else
7182	fclose(stdout);
7183#else /* for Other OS */
7184    if (file_out_f == TRUE)
7185	fclose(stdout);
7186#endif /*Easy Win */
7187    return (0);
7188}
7189#endif /* WIN32DLL */
7190