1/* Multibyte Character Functions.
2   Copyright (C) 1998 Free Software Foundation, Inc.
3
4This file is part of GNU CC.
5
6GNU CC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11GNU CC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU CC; see the file COPYING.  If not, write to
18the Free Software Foundation, 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA.  */
20
21/* These functions are used to manipulate multibyte characters.  */
22
23/* Note regarding cross compilation:
24
25   In general translation of multibyte characters to wide characters can
26   only work in a native compiler since the translation function (mbtowc)
27   needs to know about both the source and target character encoding.  However,
28   this particular implementation for JIS, SJIS and EUCJP source characters
29   will work for any compiler with a newlib target.  Other targets may also
30   work provided that their wchar_t implementation is 2 bytes and the encoding
31   leaves the source character values unchanged (except for removing the
32   state shifting markers).  */
33
34#ifdef MULTIBYTE_CHARS
35#include "config.h"
36#include "system.h"
37#include "mbchar.h"
38#include <locale.h>
39
40typedef enum
41{
42  ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
43} JIS_CHAR_TYPE;
44
45typedef enum
46{
47  ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
48  J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
49} JIS_STATE;
50
51typedef enum
52{
53  COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
54} JIS_ACTION;
55
56/*****************************************************************************
57 * state/action tables for processing JIS encoding
58 * Where possible, switches to JIS are grouped with proceding JIS characters
59 * and switches to ASCII are grouped with preceding JIS characters.
60 * Thus, maximum returned length is:
61 *   2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
62 *****************************************************************************/
63static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
64/*            ESCAPE DOLLAR   BRACKET   AT     B      J     NUL JIS_CHAR OTHER*/
65/*ASCII*/   { A_ESC, ASCII,   ASCII,    ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
66/*A_ESC*/   { ASCII, A_ESC_DL,ASCII,    ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
67/*A_ESC_DL*/{ ASCII, ASCII,   ASCII,    JIS,   JIS,   ASCII, ASCII,ASCII,ASCII},
68/*JIS*/     { J_ESC, JIS_1,   JIS_1,    JIS_1, JIS_1, JIS_1, INV,  JIS_1,INV },
69/*JIS_1*/   { INV,   JIS_2,   JIS_2,    JIS_2, JIS_2, JIS_2, INV,  JIS_2,INV },
70/*JIS_2*/   { J2_ESC,JIS,     JIS,      JIS,   JIS,   JIS,   INV,  JIS,  JIS },
71/*J_ESC*/   { INV,   INV,     J_ESC_BR, INV,   INV,   INV,   INV,  INV,  INV },
72/*J_ESC_BR*/{ INV,   INV,     INV,      INV,   ASCII, ASCII, INV,  INV,  INV },
73/*J2_ESC*/  { INV,   INV,     J2_ESC_BR,INV,   INV,   INV,   INV,  INV,  INV },
74/*J2_ESC_BR*/{INV,   INV,     INV,      INV,   ASCII, ASCII, INV,  INV,  INV },
75};
76
77static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
78/*            ESCAPE DOLLAR BRACKET AT     B       J      NUL  JIS_CHAR OTHER */
79/*ASCII */   {NOOP,  COPYA, COPYA, COPYA,  COPYA,  COPYA, EMPTY, COPYA, COPYA},
80/*A_ESC */   {COPYA, NOOP,  COPYA, COPYA,  COPYA,  COPYA, COPYA, COPYA, COPYA},
81/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
82/*JIS */     {NOOP,  NOOP,  NOOP,  NOOP,   NOOP,   NOOP,  ERROR, NOOP,  ERROR },
83/*JIS_1 */   {ERROR, NOOP,  NOOP,  NOOP,   NOOP,   NOOP,  ERROR, NOOP,  ERROR },
84/*JIS_2 */   {NOOP,  COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
85/*J_ESC */   {ERROR, ERROR, NOOP,  ERROR,  ERROR,  ERROR, ERROR, ERROR, ERROR },
86/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR,  NOOP,   NOOP,  ERROR, ERROR, ERROR },
87/*J2_ESC */  {ERROR, ERROR, NOOP,  ERROR,  ERROR,  ERROR, ERROR, ERROR, ERROR },
88/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR,  COPYJ,  COPYJ, ERROR, ERROR, ERROR },
89};
90
91
92char *literal_codeset = NULL;
93
94int
95local_mbtowc (pwc, s, n)
96     wchar_t       *pwc;
97     const char    *s;
98     size_t         n;
99{
100  static JIS_STATE save_state = ASCII;
101  JIS_STATE curr_state = save_state;
102  unsigned char *t = (unsigned char *)s;
103
104  if (s != NULL && n == 0)
105    return -1;
106
107  if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
108    {
109      /* This must be the "C" locale or unknown locale -- fall thru */
110    }
111  else if (! strcmp (literal_codeset, "C-SJIS"))
112    {
113      int char1;
114      if (s == NULL)
115        return 0;  /* not state-dependent */
116      char1 = *t;
117      if (ISSJIS1 (char1))
118        {
119          int char2 = t[1];
120          if (n <= 1)
121            return -1;
122          if (ISSJIS2 (char2))
123            {
124	      if (pwc != NULL)
125		*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
126              return 2;
127            }
128	  return -1;
129        }
130      if (pwc != NULL)
131	*pwc = (wchar_t)*t;
132      if (*t == '\0')
133	return 0;
134      return 1;
135    }
136  else if (! strcmp (literal_codeset, "C-EUCJP"))
137    {
138      int char1;
139      if (s == NULL)
140        return 0;  /* not state-dependent */
141      char1 = *t;
142      if (ISEUCJP (char1))
143        {
144          int char2 = t[1];
145          if (n <= 1)
146            return -1;
147          if (ISEUCJP (char2))
148            {
149	      if (pwc != NULL)
150		*pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
151              return 2;
152            }
153	  return -1;
154        }
155      if (pwc != NULL)
156	*pwc = (wchar_t)*t;
157      if (*t == '\0')
158	return 0;
159      return 1;
160    }
161  else if (! strcmp (literal_codeset, "C-JIS"))
162    {
163      JIS_ACTION action;
164      JIS_CHAR_TYPE ch;
165      unsigned char *ptr;
166      int i, curr_ch;
167
168      if (s == NULL)
169	{
170	  save_state = ASCII;
171	  return 1;  /* state-dependent */
172	}
173
174      ptr = t;
175
176      for (i = 0; i < n; ++i)
177        {
178          curr_ch = t[i];
179          switch (curr_ch)
180            {
181	    case JIS_ESC_CHAR:
182              ch = ESCAPE;
183              break;
184	    case '$':
185              ch = DOLLAR;
186              break;
187            case '@':
188              ch = AT;
189              break;
190            case '(':
191	      ch = BRACKET;
192              break;
193            case 'B':
194              ch = B;
195              break;
196            case 'J':
197              ch = J;
198              break;
199            case '\0':
200              ch = NUL;
201              break;
202            default:
203              if (ISJIS (curr_ch))
204                ch = JIS_CHAR;
205              else
206                ch = OTHER;
207	    }
208
209          action = JIS_action_table[curr_state][ch];
210          curr_state = JIS_state_table[curr_state][ch];
211
212          switch (action)
213            {
214            case NOOP:
215              break;
216            case EMPTY:
217	      if (pwc != NULL)
218		*pwc = (wchar_t)0;
219	      save_state = curr_state;
220              return i;
221            case COPYA:
222	      if (pwc != NULL)
223		*pwc = (wchar_t)*ptr;
224	      save_state = curr_state;
225              return (i + 1);
226            case COPYJ:
227	      if (pwc != NULL)
228		*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
229	      save_state = curr_state;
230              return (i + 1);
231            case COPYJ2:
232	      if (pwc != NULL)
233		*pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
234	      save_state = curr_state;
235              return (ptr - t) + 2;
236            case MAKE_A:
237            case MAKE_J:
238              ptr = (char *)(t + i + 1);
239              break;
240            case ERROR:
241            default:
242              return -1;
243            }
244        }
245
246      return -1;  /* n < bytes needed */
247    }
248
249#ifdef CROSS_COMPILE
250  if (s == NULL)
251    return 0;  /* not state-dependent */
252  if (pwc != NULL)
253    *pwc = *s;
254  return 1;
255#else
256  /* This must be the "C" locale or unknown locale. */
257  return mbtowc (pwc, s, n);
258#endif
259}
260
261int
262local_mblen (s, n)
263     const char    *s;
264     size_t         n;
265{
266  return local_mbtowc (NULL, s, n);
267}
268
269int
270local_mb_cur_max ()
271{
272  if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
273    ;
274  else if (! strcmp (literal_codeset, "C-SJIS"))
275    return 2;
276  else if (! strcmp (literal_codeset, "C-EUCJP"))
277    return 2;
278  else if (! strcmp (literal_codeset, "C-JIS"))
279    return 8; /* 3 + 2 + 3 */
280
281#ifdef CROSS_COMPILE
282  return 1;
283#else
284  if (MB_CUR_MAX > 0)
285    return MB_CUR_MAX;
286
287  return 1; /* default */
288#endif
289}
290#endif /* MULTIBYTE_CHARS */
291