1/*
2 * Copyright (C) 1999-2002, 2004-2007 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21/* This file defines all the converters. */
22
23
24/* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
25typedef unsigned int ucs4_t;
26
27/* State used by a conversion. 0 denotes the initial state. */
28typedef unsigned int state_t;
29
30/* iconv_t is an opaque type. This is the real iconv_t type. */
31typedef struct conv_struct * conv_t;
32
33/*
34 * Data type for conversion multibyte -> unicode
35 */
36struct mbtowc_funcs {
37  int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n);
38  /*
39   * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n)
40   * converts the byte sequence starting at s to a wide character. Up to n bytes
41   * are available at s. n is >= 1.
42   * Result is number of bytes consumed (if a wide character was read),
43   * or -1 if invalid, or -2 if n too small, or -2-(number of bytes consumed)
44   * if only a shift sequence was read.
45   */
46  int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
47  /*
48   * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
49   * returns to the initial state and stores the pending wide character, if any.
50   * Result is 1 (if a wide character was read) or 0 if none was pending.
51   */
52};
53
54/* Return code if invalid. (xxx_mbtowc) */
55#define RET_ILSEQ      -1
56/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
57#define RET_TOOFEW(n)  (-2-(n))
58
59/*
60 * Data type for conversion unicode -> multibyte
61 */
62struct wctomb_funcs {
63  int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n);
64  /*
65   * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
66   * converts the wide character wc to the character set xxx, and stores the
67   * result beginning at r. Up to n bytes may be written at r. n is >= 1.
68   * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
69   */
70  int (*xxx_reset) (conv_t conv, unsigned char *r, int n);
71  /*
72   * int xxx_reset (conv_t conv, unsigned char *r, int n)
73   * stores a shift sequences returning to the initial state beginning at r.
74   * Up to n bytes may be written at r. n is >= 0.
75   * Result is number of bytes written, or -2 if n too small.
76   */
77};
78
79/* Return code if invalid. (xxx_wctomb) */
80#define RET_ILUNI      -1
81/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
82#define RET_TOOSMALL   -2
83
84/*
85 * Contents of a conversion descriptor.
86 */
87struct conv_struct {
88  struct loop_funcs lfuncs;
89  /* Input (conversion multibyte -> unicode) */
90  int iindex;
91  struct mbtowc_funcs ifuncs;
92  state_t istate;
93  /* Output (conversion unicode -> multibyte) */
94  int oindex;
95  struct wctomb_funcs ofuncs;
96  int oflags;
97  state_t ostate;
98  /* Operation flags */
99  int transliterate;
100  int discard_ilseq;
101  #ifndef LIBICONV_PLUG
102  struct iconv_fallbacks fallbacks;
103  struct iconv_hooks hooks;
104  #endif
105};
106
107/*
108 * Include all the converters.
109 */
110
111#include "ascii.h"
112
113/* General multi-byte encodings */
114#include "utf8.h"
115#include "ucs2.h"
116#include "ucs2be.h"
117#include "ucs2le.h"
118#include "ucs4.h"
119#include "ucs4be.h"
120#include "ucs4le.h"
121#include "utf16.h"
122#include "utf16be.h"
123#include "utf16le.h"
124#include "utf32.h"
125#include "utf32be.h"
126#include "utf32le.h"
127#include "utf7.h"
128#include "ucs2internal.h"
129#include "ucs2swapped.h"
130#include "ucs4internal.h"
131#include "ucs4swapped.h"
132#include "c99.h"
133#include "java.h"
134
135/* 8-bit encodings */
136#include "iso8859_1.h"
137#include "iso8859_2.h"
138#include "iso8859_3.h"
139#include "iso8859_4.h"
140#include "iso8859_5.h"
141#include "iso8859_6.h"
142#include "iso8859_7.h"
143#include "iso8859_8.h"
144#include "iso8859_9.h"
145#include "iso8859_10.h"
146#include "iso8859_11.h"
147#include "iso8859_13.h"
148#include "iso8859_14.h"
149#include "iso8859_15.h"
150#include "iso8859_16.h"
151#include "koi8_r.h"
152#include "koi8_u.h"
153#include "koi8_ru.h"
154#include "cp1250.h"
155#include "cp1251.h"
156#include "cp1252.h"
157#include "cp1253.h"
158#include "cp1254.h"
159#include "cp1255.h"
160#include "cp1256.h"
161#include "cp1257.h"
162#include "cp1258.h"
163#include "cp850.h"
164#include "cp862.h"
165#include "cp866.h"
166#include "mac_roman.h"
167#include "mac_centraleurope.h"
168#include "mac_iceland.h"
169#include "mac_croatian.h"
170#include "mac_romania.h"
171#include "mac_cyrillic.h"
172#include "mac_ukraine.h"
173#include "mac_greek.h"
174#include "mac_turkish.h"
175#include "mac_hebrew.h"
176#include "mac_arabic.h"
177#include "mac_thai.h"
178#include "hp_roman8.h"
179#include "nextstep.h"
180#include "armscii_8.h"
181#include "georgian_academy.h"
182#include "georgian_ps.h"
183#include "koi8_t.h"
184#include "pt154.h"
185#include "rk1048.h"
186#include "mulelao.h"
187#include "cp1133.h"
188#include "tis620.h"
189#include "cp874.h"
190#include "viscii.h"
191#include "tcvn.h"
192
193/* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
194
195typedef struct {
196  unsigned short indx; /* index into big table */
197  unsigned short used; /* bitmask of used entries */
198} Summary16;
199
200#include "iso646_jp.h"
201#include "jisx0201.h"
202#include "jisx0208.h"
203#include "jisx0212.h"
204
205#include "iso646_cn.h"
206#include "gb2312.h"
207#include "isoir165.h"
208/*#include "gb12345.h"*/
209#include "gbk.h"
210#include "cns11643.h"
211#include "big5.h"
212
213#include "ksc5601.h"
214#include "johab_hangul.h"
215
216/* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
217
218#include "euc_jp.h"
219#include "sjis.h"
220#include "cp932.h"
221#include "iso2022_jp.h"
222#include "iso2022_jp1.h"
223#include "iso2022_jp2.h"
224
225#include "euc_cn.h"
226#include "ces_gbk.h"
227#include "cp936.h"
228#include "gb18030.h"
229#include "iso2022_cn.h"
230#include "iso2022_cnext.h"
231#include "hz.h"
232#include "euc_tw.h"
233#include "ces_big5.h"
234#include "cp950.h"
235#include "big5hkscs1999.h"
236#include "big5hkscs2001.h"
237#include "big5hkscs2004.h"
238
239#include "euc_kr.h"
240#include "cp949.h"
241#include "johab.h"
242#include "iso2022_kr.h"
243
244/* Encodings used by system dependent locales. */
245
246#ifdef USE_AIX
247#include "cp856.h"
248#include "cp922.h"
249#include "cp943.h"
250#include "cp1046.h"
251#include "cp1124.h"
252#include "cp1129.h"
253#include "cp1161.h"
254#include "cp1162.h"
255#include "cp1163.h"
256#endif
257
258#ifdef USE_OSF1
259#include "dec_kanji.h"
260#include "dec_hanyu.h"
261#endif
262
263#ifdef USE_DOS
264#include "cp437.h"
265#include "cp737.h"
266#include "cp775.h"
267#include "cp852.h"
268#include "cp853.h"
269#include "cp855.h"
270#include "cp857.h"
271#include "cp858.h"
272#include "cp860.h"
273#include "cp861.h"
274#include "cp863.h"
275#include "cp864.h"
276#include "cp865.h"
277#include "cp869.h"
278#include "cp1125.h"
279#endif
280
281#ifdef USE_EXTRA
282#include "euc_jisx0213.h"
283#include "shift_jisx0213.h"
284#include "iso2022_jp3.h"
285#include "big5_2003.h"
286#include "tds565.h"
287#include "atarist.h"
288#include "riscos1.h"
289#endif
290
291