1/* Copyright 1994 NEC Corporation, Tokyo, Japan.
2 *
3 * Permission to use, copy, modify, distribute and sell this software
4 * and its documentation for any purpose is hereby granted without
5 * fee, provided that the above copyright notice appear in all copies
6 * and that both that copyright notice and this permission notice
7 * appear in supporting documentation, and that the name of NEC
8 * Corporation not be used in advertising or publicity pertaining to
9 * distribution of the software without specific, written prior
10 * permission.  NEC Corporation makes no representations about the
11 * suitability of this software for any purpose.  It is provided "as
12 * is" without express or implied warranty.
13 *
14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20 * PERFORMANCE OF THIS SOFTWARE.
21 */
22
23#if !defined(lint) && !defined(__CODECENTER__)
24static char rcsid[]="@(#) 102.1 $Id: kana.c 10525 2004-12-23 21:23:50Z korli $";
25#endif
26
27/* LINTLIBRARY */
28#include	"RKintern.h"
29
30#define SUUJI_THROUGH		0
31#define SUUJI_HANKAKU		1
32#define SUUJI_ZENKAKU		2
33#define SUUJI_SIMPLEKANJI	3
34#define SUUJI_FULLKANJI		4
35#define SUUJI_FULLKANJITRAD	5
36#define SUUJI_WITHKANJIUNIT	6
37#define SUUJI_WITHCOMMA		7
38
39#if 0
40/* RkCvtZen
41 *	hankaku moji wo zenkaku moji ni suru
42 */
43static
44WCHAR_T
45hiragana[] =
46{
47/* 0x00 */
48	0x0000,	0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
49	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
50/* 0x10 */
51	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
52	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
53/* 0x20 */
54	0xa1a1, 0xa1aa, 0xa1ed, 0xa1f4,		0xa1f0, 0xa1f3, 0xa1f5, 0xa1c7,
55	0xa1ca, 0xa1cb, 0xa1f6, 0xa1dc,		0xa1a4, 0xa1dd, 0xa1a5, 0xa1bf,
56/* 0x30 */
57	0xa3b0, 0xa3b1, 0xa3b2, 0xa3b3,		0xa3b4, 0xa3b5, 0xa3b6, 0xa3b7,
58	0xa3b8, 0xa3b9, 0xa1a7, 0xa1a8,		0xa1e3, 0xa1e1, 0xa1e4, 0xa1a9,
59/* 0x40 */
60	0xa1f7, 0xa3c1, 0xa3c2, 0xa3c3,		0xa3c4, 0xa3c5, 0xa3c6, 0xa3c7,
61	0xa3c8, 0xa3c9, 0xa3ca, 0xa3cb,		0xa3cc, 0xa3cd, 0xa3ce, 0xa3cf,
62/* 0x50 */
63	0xa3d0, 0xa3d1, 0xa3d2, 0xa3d3,		0xa3d4, 0xa3d5, 0xa3d6, 0xa3d7,
64	0xa3d8, 0xa3d9, 0xa3da, 0xa1ce,		0xa1ef, 0xa1cf, 0xa1b0, 0xa1b2,
65/* 0x60 */
66	0xa1c6, 0xa3e1, 0xa3e2, 0xa3e3,		0xa3e4, 0xa3e5, 0xa3e6, 0xa3e7,
67	0xa3e8, 0xa3e9, 0xa3ea, 0xa3eb,		0xa3ec, 0xa3ed, 0xa3ee, 0xa3ef,
68/* 0x70 */
69	0xa3f0, 0xa3f1, 0xa3f2, 0xa3f3,		0xa3f4, 0xa3f5, 0xa3f6, 0xa3f7,
70	0xa3f8, 0xa3f9, 0xa3fa, 0xa1d0,		0xa1c3, 0xa1d1, 0xa1c1, 0xa2a2,
71/*0x80 */
72	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
73	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
74/*0x90 */
75	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
76	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
77/*0xa0 */
78	0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7,		0xa1a2, 0xa1a6, 0xa4f2, 0xa4a1,
79	0xa4a3, 0xa4a5, 0xa4a7, 0xa4a9,		0xa4e3, 0xa4e5, 0xa4e7, 0xa4c3,
80/*0xb0 */
81	0xa1bc, 0xa4a2, 0xa4a4, 0xa4a6,		0xa4a8, 0xa4aa, 0xa4ab, 0xa4ad,
82	0xa4af, 0xa4b1, 0xa4b3, 0xa4b5,		0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd,
83/*0xc0 */
84	0xa4bf, 0xa4c1, 0xa4c4, 0xa4c6,		0xa4c8, 0xa4ca, 0xa4cb, 0xa4cc,
85	0xa4cd, 0xa4ce, 0xa4cf, 0xa4d2,		0xa4d5, 0xa4d8, 0xa4db, 0xa4de,
86/*0xd0 */
87	0xa4df, 0xa4e0, 0xa4e1, 0xa4e2,		0xa4e4, 0xa4e6, 0xa4e8, 0xa4e9,
88	0xa4ea, 0xa4eb, 0xa4ec, 0xa4ed,		0xa4ef, 0xa4f3, 0xa1ab, 0xa1ac,
89/* 0xe0 */
90	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
91	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
92/* 0xf0 */
93	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
94	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
95};
96
97static
98WCHAR_T
99hankaku[] = {
100/*0x00*/
101	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
102	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
103/*0x10*/
104	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
105	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
106/*0x20*/
107	0x0000,    ' ', 0x8ea4, 0x8ea1,		   ',',    '.', 0x8ea5,    ':',
108	   ';',    '?',    '!', 0x8ede,		0x8edf, 0x0000, 0x0000, 0x0000,
109/*0x30*/
110	   '^', 0x0000,    '_', 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
111	0x0000, 0x0000, 0x0000, 0x0000,		0x8eb0, 0x0000, 0x0000,    '/',
112/*0x40*/
113	0x0000,    '~', 0x0000,    '|',		0x0000, 0x0000,   '\'',   '\'',
114	   '"',    '"',    '(',    ')',		   '[',    ']',    '[',    ']',
115/*0x50*/
116	'{',    '}', 0x0000, 0x0000,		0x0000, 0x0000, 0x8ea2, 0x8ea3,
117	0x0000, 0x0000, 0x0000, 0x0000,		   '+',    '-', 0x0000, 0x0000,
118/*0x60*/
119	0x0000,    '=', 0x0000,    '<',		   '>', 0x0000, 0x0000, 0x0000,
120	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000,   '\\',
121/*0x70*/
122	    '$',0x0000, 0x0000,    '%',		   '#',    '&',    '*',    '@',
123	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
124/*0x80*/
125	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
126	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
127/*0x90*/
128	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
129	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
130/*0xa0*/
131	0x0020, 0x00a7, 0x00b1, 0x00a8, 	0x00b2, 0x00a9, 0x00b3, 0x00aa,
132	0x00b4, 0x00ab, 0x00b5, 0x00b6, 	0xb6de, 0x00b7, 0xb7de, 0x00b8,
133/*0xb0*/
134	0xb8de, 0x00b9, 0xb9de, 0x00ba, 	0xbade, 0x00bb, 0xbbde, 0x00bc,
135	0xbcde, 0x00bd, 0xbdde, 0x00be, 	0xbede, 0x00bf, 0xbfde, 0x00c0,
136/*0xc0*/
137	0xc0de, 0x00c1, 0xc1de, 0x00af, 	0x00c2, 0xc2de, 0x00c3, 0xc3de,
138	0x00c4, 0xc4de, 0x00c5, 0x00c6, 	0x00c7, 0x00c8, 0x00c9, 0x00ca,
139/*0xd0*/
140	0xcade, 0xcadf, 0x00cb, 0xcbde, 	0xcbdf, 0x00cc, 0xccde, 0xccdf,
141	0x00cd, 0xcdde, 0xcddf, 0x00ce, 	0xcede, 0xcedf, 0x00cf, 0x00d0,
142/*0xe0*/
143	0x00d1, 0x00d2, 0x00d3, 0x00ac, 	0x00d4, 0x00ad, 0x00d5, 0x00ae,
144	0x00d6, 0x00d7, 0x00d8, 0x00d9, 	0x00da, 0x00db, 0x00dc, 0x00dc,
145/*0xf0*/
146	0x00b2, 0x00b4, 0x00a6, 0x00dd,		0xb3de, 0x00b6, 0x00b9, 0x0000,
147	0x0000, 0x0000, 0x0000, 0x0000,		0x0000, 0x0000, 0x0000, 0x0000,
148};
149
150#endif
151
152#ifdef OBSOLETE_RKKANA
153
154#define	ADDCODE(dst, maxdst, count, code, length) {\
155    if ( (unsigned long)(length) <= (unsigned long)(maxdst) ) {\
156	(maxdst) -= (length); (count) += (length);\
157	if ( (dst) ) {\
158	    (dst) += (length);\
159	    switch((length)) {\
160	    case 4:	*--(dst) = (code)&255; (code) >>= 8;\
161	    case 3:	*--(dst) = (code)&255; (code) >>= 8;\
162	    case 2:	*--(dst) = (code)&255; (code) >>= 8;\
163	    case 1:	*--(dst) = (code)&255; (code) >>= 8;\
164	    };\
165	    (dst) += (length);\
166	};\
167    };\
168}
169
170#else /* !OBSOLETE_RKKANA */
171
172#include "RKproto.h"
173static int _ADDCODE(unsigned char *dst, int maxdst, int count, unsigned long code, int length);
174static int euccharlen(unsigned char *s, int bytelen);
175static int Wcstosjis(char *dst, int dstlen, WCHAR_T *src, int srclen);
176
177static int
178_ADDCODE(unsigned char *dst, int maxdst, int count, unsigned long code, int length)
179{
180  if ((unsigned long)length <= (unsigned long)maxdst) {
181    maxdst -= length;
182    count += length;
183    if (dst) {
184      dst += length;
185      switch (length) {
186      case 4:	*--dst = (unsigned char)code; code >>= 8;
187      case 3:	*--dst = (unsigned char)code; code >>= 8;
188      case 2:	*--dst = (unsigned char)code; code >>= 8;
189      case 1:	*--dst = (unsigned char)code; code >>= 8;
190      }
191    }
192    return length;
193  }
194  return 0;
195}
196
197#define ADDCODE(dst, maxdst, count, code, length) \
198{ int llen = _ADDCODE(dst, maxdst, count, (unsigned long) code, length); \
199  if (llen > 0 && (dst)) { (dst) += llen; (maxdst) -= llen; (count) += llen; }}
200
201#endif /* !OBSOLETE_RKKANA */
202
203#define	ADDWCODE(dst, maxdst, count, code) {\
204    if ( (maxdst) > 0 ) {\
205	(maxdst)-- ; (count)++ ;\
206	if ( (dst) ) {\
207	    *(dst)++ = (code);\
208	}\
209    }\
210}
211
212#if 0
213
214static int
215euccharlen(unsigned char *s, int bytelen)
216{
217  unsigned char	ch;
218  int		res = 0;
219
220  while ((ch = *s++) && bytelen--) {
221    res++;
222    if (ch & 0x80) {
223      if (ch == RK_SS3) {
224	s++;
225	bytelen--;
226      }
227      s++;
228      bytelen--;
229    }
230  }
231  return res;
232}
233
234/* RkCvtZen
235 *	hankaku moji(ASCII+katakana) wo taiou suru zenkaku moji ni suru
236 *	dakuten,handakuten shori mo okonau.
237 */
238int RkCvtZen (unsigned char *, int, unsigned char *, int);
239
240int
241RkCvtZen(unsigned char *zen, int maxzen, unsigned char *han, int maxhan)
242{
243  unsigned char	*z = zen;
244  unsigned char	*h = han;
245  unsigned char	*H = han + maxhan;
246  WCHAR_T		hi, lo;
247  unsigned 	byte;
248  int 		count = 0;
249  unsigned long	code;
250
251  if ( --maxzen <= 0 )
252    return count;
253  while ( h < H ) {
254    hi = *h++;
255    byte = 2;
256    if ( hi == 0x8e ) {	/* hankaku katakana */
257      if ( !(code = hiragana[lo = *h++]) )
258	code = (hi<<8)|lo;
259      byte = (code>>8) ? 2 : 1;
260      if ( (code>>8) == 0xa4 ) {
261	code |= 0x100;
262	/* dakuten/handakuten ga tuku baai */
263	if ( h + 1 < H && h[0] == 0x8e ) {
264	  lo = h[1];
265	  switch( LOMASK(code) ) {
266	  case 0xa6: /* u */
267	    if ( lo == 0xde ) code = 0xa5f4, h += 2;
268	    break;
269	    /* ha */case 0xcf: case 0xd2: case 0xd5: case 0xd8: case 0xdb:
270	    if ( lo == 0xdf ) {
271	      code += 2, h += 2;
272	      break;
273	    };
274	  case 0xab: case 0xad: case 0xaf: case 0xb1: case 0xb3: /* ka */
275	  case 0xb5: case 0xb7: case 0xb9: case 0xbb: case 0xbd: /* sa */
276	  case 0xbf: case 0xc1: case 0xc4: case 0xc6: case 0xc8: /* ta */
277	    if ( lo == 0xde ) {
278	      code += 1, h += 2;
279	      break;
280	    };
281	  };
282	};
283      };
284    }
285    else if (hi == 0x8f) {
286      ADDCODE(z, maxzen, count, hi, 1);
287      code = (((WCHAR_T) h[0]) << 8) | ((WCHAR_T) h[1]); h += 2;
288      byte = 2;
289    } else if ( hi & 0x80 )
290      code = (hi<<8)|*h++;
291    else {
292      if ( !(code = hiragana[hi]) )
293	code = hi;
294      byte = (code>>8) ? 2 : 1;
295    }
296    ADDCODE(z, maxzen, count, code, byte);
297  };
298  if ( z )
299    *z = 0;
300  return count;
301}
302
303/* RkCvtHan
304 *	zenkaku kana moji wo hankaku moji ni suru
305 */
306int RkCvtHan (unsigned char *, int, unsigned char *, int);
307
308int
309RkCvtHan(unsigned char *han, int maxhan, unsigned char *zen, int maxzen)
310{
311  unsigned char	*h = han;
312  unsigned char	*z = zen;
313  unsigned char	*Z = zen + maxzen;
314  WCHAR_T		hi, lo;
315  WCHAR_T		byte;
316  int 		count = 0;
317  unsigned long	code;
318
319  if ( --maxhan <= 0 )
320    return 0;
321  while ( z < Z ) {
322    hi = *z++;
323    byte = 1;
324    switch(hi) {
325    case	0xa1:	/* kigou */
326      lo = *z++;
327      if ( !(code = hankaku[lo&0x7f]) )
328	code = (hi<<8)|lo;
329      byte = (code>>8) ? 2 : 1;
330      break;
331    case	0xa3:	/* eisuuji */
332      lo = *z++;
333      if ( 0xb0 <= lo && lo <= 0xb9 ) code = (lo - 0xb0) + '0';
334      else
335	if ( 0xc1 <= lo && lo <= 0xda ) code = (lo - 0xc1) + 'A';
336	else
337	  if ( 0xe1 <= lo && lo <= 0xfa ) code = (lo - 0xe1) + 'a';
338	  else
339	    code = (hi<<8)|lo, byte = 2;
340      break;
341    case	0xa4:	/* hiragana */
342    case	0xa5:	/* katakana */
343      lo = *z++;
344      if ( (code = hankaku[lo]) &&
345	  (lo <= (WCHAR_T)(hi == 0xa4 ? 0xf3 : 0xf6)) ) {
346	if ( code>>8 ) {
347	  code = 0x8e000000|((code>>8)<<16)|0x00008e00|LOMASK(code);
348	  byte = 4;
349	}
350	else {
351	  code = 0x00008e00|LOMASK(code);
352	  byte = 2;
353	};
354      }
355      else
356	code = (hi<<8)|lo, byte = 2;
357      break;
358    default:
359      if (hi == 0x8f) {
360	ADDCODE(h, maxhan, count, hi, 1);
361	code = (((WCHAR_T) z[0]) << 8) | ((WCHAR_T) z[1]); z += 2;
362	byte = 2;
363      }
364      else if ( hi & 0x80 ) { 	/* kanji */
365	code = (hi<<8)|(*z++);
366	byte = 2;
367      }
368      else
369	switch(hi) {
370	  /*
371	    case	',':	code = 0x8ea4; byte = 2; break;
372	    case	'-': 	code = 0x8eb0; byte = 2; break;
373	    case	'.': 	code = 0x8ea1; byte = 2; break;
374	    */
375	default:	code = hi; break;
376	};
377      break;
378    };
379    ADDCODE(h, maxhan, count, code, byte);
380  };
381  if ( h )
382    *h = 0;
383  return count;
384}
385
386/* RkCvtKana/RkCvtHira
387 *	zenkaku hiragana wo katakana ni suru
388 */
389int RkCvtKana (unsigned char *, int, unsigned char *, int);
390
391int
392RkCvtKana(unsigned char *kana, int maxkana, unsigned char *hira, int maxhira)
393{
394  unsigned char	*k = kana;
395  unsigned char	*h = hira;
396  unsigned char	*H = hira + maxhira;
397  WCHAR_T		hi;
398  WCHAR_T		byte;
399  int 		count = 0;
400  unsigned long	code;
401
402  if ( --maxkana <= 0 )
403    return 0;
404  while ( h < H ) {
405    hi = *h++;
406    if (hi == 0x8f) {
407      ADDCODE(k, maxkana, count, hi, 1);
408      code = (((WCHAR_T) h[0]) << 8) | ((WCHAR_T) h[1]); h += 2;
409      byte = 2;
410    }
411    else if ( hi & 0x80 ) {
412      int	dakuon;
413
414      code = (hi == 0xa4) ? (0xa500|(*h++)) : ((hi<<8)|(*h++));
415      byte = 2;
416      /* hiragana U + " */
417      dakuon = ( h + 1 < H && ((((WCHAR_T) h[0])<<8)| ((WCHAR_T) h[1])) == 0xa1ab );
418      if ( hi == 0xa4 && code == 0xa5a6 && dakuon ) {
419	code = 0xa5f4;
420	h += 2;
421      };
422    } else
423      code = hi, byte = 1;
424    ADDCODE(k, maxkana, count, code, byte);
425  };
426  if ( k )
427    *k = 0;
428  return count;
429}
430
431int RkCvtHira (unsigned char *, int, unsigned char *, int);
432
433int
434RkCvtHira(unsigned char *hira, int maxhira, unsigned char *kana, int maxkana)
435{
436  unsigned char	*h = hira;
437  unsigned char	*k = kana;
438  unsigned char	*K = kana + maxkana;
439  WCHAR_T		hi;
440  WCHAR_T		byte;
441  int 			count = 0;
442  unsigned long		code;
443
444  if ( --maxhira <= 0 )
445    return 0;
446  while ( k < K ) {
447    hi = *k++;
448    if (hi == 0x8f) {
449      ADDCODE(h, maxhira, count, hi, 1);
450      code = (((WCHAR_T) k[0]) << 8) | ((WCHAR_T) k[1]); k += 2;
451      byte = 2;
452    } else if ( hi & 0x80 ) {
453      code = (hi == 0xa5) ? (0xa400|(*k++)) : ((hi<<8)|(*k++));
454      byte = 2;
455      /* katakana U + " */
456      if ( code == 0xa4f4 ) {	/* u no dakuon */
457	code = 0xa4a6a1ab;
458	byte = 4;
459      } else if ( code == 0xa4f5 )
460	code = 0xa4ab;
461      else if ( code == 0xa4f6 )
462	code = 0xa4b1;
463    } else
464      code = hi, byte = 1;
465    ADDCODE(h, maxhira, count, code, byte);
466  };
467  if ( h )
468    *h = 0;
469  return count;
470}
471
472int RkCvtNone (unsigned char *, int, unsigned char *, int);
473
474int
475RkCvtNone(unsigned char *dst, int maxdst, unsigned char *src, int maxsrc)
476{
477  unsigned char	*d = dst;
478  unsigned char	*s = src;
479  unsigned char	*S = src + maxsrc;
480  WCHAR_T		byte;
481  int 		count = 0;
482  unsigned long	code;
483
484  if ( --maxdst <= 0 )
485    return 0;
486  while ( s < S ) {
487    code = *s++;
488    byte = 1;
489    if (code == 0x8f) {
490      ADDCODE(d, maxdst, count, code, 1);
491      code = (((WCHAR_T) s[0]) << 8) | ((WCHAR_T) s[1]); s += 2;
492      byte = 2;
493    } else if ( code & 0x80 )
494      code = (code<<8)|(*s++), byte = 2;
495    ADDCODE(d, maxdst, count, code, byte);
496  };
497  if ( d )
498    *d = 0;
499  return count;
500}
501
502#ifdef USE_SJIS_TEXT_DIC
503exp(int maxwc)
504{
505    WCHAR_T *e = wc_return, *ee = wc_return + maxwc;
506    unsigned char	*s = (unsigned char *)sj;
507    unsigned char	*S = (unsigned char *)sj + maxsj;
508    unsigned short	hi, lo;
509    unsigned short	byte;
510    int 		count = 0;
511    unsigned long	code;
512
513    if ( --maxwc <= 0 )
514	return 0;
515
516    while ( s < S ) {
517	hi = *s++;
518	if ( hi <= 0x7f )  			/* ascii */
519	    code = hi, byte = 1;
520	else
521	if ( 0xa0 <= hi && hi <= 0xdf ) 	/* hankaku katakana */
522	    code = hi, byte = 2;
523        else
524        if (0xf0 <= hi && hi <= 0xfc) {		/* gaiji */
525            hi -= 0xf0;
526            hi = 2*hi + 0x21;
527            if ((lo = *s++) <= 0x9e) {
528                if (lo < 0x80)
529                    lo++;
530                lo -= 0x20;
531            }
532            else {
533                hi++;
534                lo -= 0x7e;
535            }
536            code = 0x8000 | (hi<<8) | lo, byte = 3;
537        }
538	else {
539	    hi -= (hi <= 0x9f) ?  0x80 : 0xc0;
540	    hi = 2*hi + 0x20;
541	    if ( (lo = *s++) <= 0x9e ) {	/* kisuu ku */
542		hi--;
543		if ( 0x80 <= lo ) lo--;
544		lo -= (0x40 - 0x21);
545	    }
546	    else 			/* guusuu ku */
547		lo -= (0x9f - 0x21);
548	    code = 0x8080|(hi<<8)|lo, byte = 2;
549	};
550	if (wc_return && e < ee) {
551	  *e++ = (WCHAR_T)code;
552	}
553    };
554    if (wc_return && e && e < ee) {
555      *e = 0;
556    }
557    return count;
558}
559#endif /* USE_SJIS_TEXT_DIC */
560
561/* RkCvtWide
562 *
563 */
564int
565RkCvtWide(WCHAR_T *dst, int maxdst, char *src, int maxsrc)
566{
567#ifdef USE_SJIS_TEXT_DIC
568  return SJistowcs(dst, maxdst, src, maxsrc);
569#else /* !USE_SJIS_TEXT_DIC, that is, EUC */
570  WCHAR_T		*d = dst;
571  unsigned char	*s = (unsigned char *)src;
572  unsigned char	*S = (unsigned char *)src + maxsrc;
573  int 		count = 0;
574  unsigned long	code;
575
576    if ( --maxdst <= 0 )
577	return count;
578    while ( s < S )
579    {
580	code = *s++;
581	if ( code & 0x80 )
582	{
583	    switch(code)
584	    {
585	    case RK_SS2:	/* hankaku katakana */
586		code = 0x0080|(s[0]&0x7f);
587		s++;
588		break;
589	    case RK_SS3:	/* gaiji */
590		code = 0x8000|(((s[0]<<8)|s[1])&0x7f7f);
591		s += 2;
592		break;
593	    default:
594		code = 0x8080|(((s[-1]<<8)|s[0])&0x7f7f);
595                s += 1;
596            };
597        };
598	ADDWCODE(d, maxdst, count, (WCHAR_T)code);
599    };
600    if ( d )
601	*d = 0;
602    return count;
603#endif /* !USE_SJIS_TEXT_DIC */
604}
605
606#ifdef USE_SJIS_TEXT_DIC
607/*
608  Wcstosjis -- To convert WCHAR_T string to SJIS string.
609
610   This function should not copy after NULL character even if
611   the srclen is too large
612 */
613
614static int
615Wcstosjis(char *dst, int dstlen, WCHAR_T *src, int srclen)
616{
617  register int i, j;
618  unsigned char *sjise_kanjip, sjise_area[2];
619  WCHAR_T codeset;
620  register WCHAR_T wc;
621
622  sjise_kanjip = sjise_area;
623
624  for (i = 0, j = 0; src[i] != (WCHAR_T)0 && j < dstlen && i < srclen ; i++) {
625    wc = src[i];
626    codeset = wc & 0x8080; /* to know what codeset is `wc' */
627
628    switch (codeset) {
629    case 0x0000: /* ASCII */
630      /* continue to the following line ... */
631    case 0x0080: /* Codeset 2, that is Katakana */
632      if (dst) dst[j++] = (unsigned char)wc;
633      break;
634
635    case 0x8080: /* Codeset 1, that is Kanji */
636      if (j + 2 > dstlen) {
637	return j; /* overflow */
638      }
639      sjise_kanjip[0] = ((wc >> 8) & 0x7f);
640      sjise_kanjip[1] = (wc & 0x7f);
641      sjise_kanjip[1] = ((sjise_kanjip[0] % 2) ?
642			 (sjise_kanjip[1] + 0x1f +
643			  ((sjise_kanjip[1] < 0x60) ? 0 : 1)) :
644			 (sjise_kanjip[1] + 0x7e));
645      sjise_kanjip[0] = ((sjise_kanjip[0] < 0x5f) ?
646			 ((sjise_kanjip[0] - 0x21) / 2 + 0x81) :
647			 ((sjise_kanjip[0] - 0x5f) / 2 + 0xe0));
648      if (dst) {
649	dst[j++] = sjise_kanjip[0];
650	dst[j++] = sjise_kanjip[1];
651      }
652      break;
653
654    case 0x8000: /* Codeset 3 */
655      if (j + 2 > dstlen) {
656	return j; /* overflow */
657      }
658      sjise_kanjip[0] = ((wc >> 8) & 0x7f);
659      sjise_kanjip[1] = (wc & 0x7fb);
660      sjise_kanjip[1] = ((sjise_kanjip[0] % 2) ?
661			 (sjise_kanjip[1] + 0x1f +
662			  ((sjise_kanjip[1] < 0x60) ? 0 : 1)) :
663			 (sjise_kanjip[1] + 0x7e));
664      sjise_kanjip[0] = ((sjise_kanjip[0] - 0x21) / 2 + 0xf0);
665      if (dst) {
666	dst[j++] = sjise_kanjip[0];
667	dst[j++] = sjise_kanjip[1];
668      }
669      break;
670    }
671  }
672  if (j < dstlen && dst) {
673    dst[j] = '\0';
674  }
675  return j;
676}
677#endif /* USE_SJIS_TEXT_DIC */
678
679/* RkCvtNarrow
680 *
681 */
682int
683RkCvtNarrow(char *dst, int maxdst, WCHAR_T *src, int maxsrc)
684{
685#ifdef USE_SJIS_TEXT_DIC
686  return Wcstosjis(dst, maxdst, src, maxsrc);
687#else /* !USE_SJIS_TEXT_DIC */
688  unsigned char	*d = (unsigned char *)dst;
689  WCHAR_T		*s = src;
690  WCHAR_T		*S = src + maxsrc;
691  int 		count = 0;
692  long		code;
693  int		byte;
694
695    if ( --maxdst <= 0 )
696	return count;
697    while ( s < S )
698    {
699	code = *s++;
700	switch(code&0x8080)
701	{
702	case 0x0000:
703	    code &= 0xff;
704	    byte = 1;
705	    break;
706	case 0x0080:
707	    code &= 0xff;
708	    code |= 0x8e00;
709	    byte = 2;
710	    break;
711	case 0x8000:
712	    code &= 0xffff;
713	    code |= 0x8f8080;
714	    byte = 3;
715	    break;
716	case 0x8080:
717	    code &= 0xffff;
718	    byte = 2;
719	    break;
720        };
721	ADDCODE(d, maxdst, count, code, byte);
722    };
723    if ( d )
724	*d = 0;
725    return count;
726#endif /* !USE_SJIS_TEXT_DIC */
727}
728
729#ifdef notdef
730/* RkEuc
731 * 	shift jis --> euc
732 */
733int
734RkCvtEuc(unsigned char *euc, int maxeuc, unsigned char *sj, int maxsj)
735{
736  unsigned char	*e = euc;
737  unsigned char	*s = sj;
738  unsigned char	*S = sj + maxsj;
739  WCHAR_T		hi, lo;
740  WCHAR_T		byte;
741  int 		count = 0;
742  unsigned long	code;
743
744  if ( --maxeuc <= 0 )
745    return 0;
746
747  while ( s < S ) {
748    hi = *s++;
749    if ( hi <= 0x7f )  			/* ascii */
750      code = hi, byte = 1;
751    else
752      if ( 0xa0 <= hi && hi <= 0xdf ) 	/* hankaku katakana */
753	code = 0x8e00|hi, byte = 2;
754      else {
755	hi -= (hi <= 0x9f) ?  0x80 : 0xc0;
756	hi = 2*hi + 0x20;
757	if ( (lo = *s++) <= 0x9e ) {	/* kisuu ku */
758	  hi--;
759	  if ( 0x80 <= lo ) lo--;
760	  lo -= (0x40 - 0x21);
761	} else 			/* guusuu ku */
762	  lo -= (0x9f - 0x21);
763	code = 0x8080|(hi<<8)|lo, byte = 2;
764      };
765    ADDCODE(e, maxeuc, count, code, byte);
766  };
767  if ( e )
768    *e = 0;
769  return count;
770}
771#endif /* notdef */
772
773/* RkCvtSuuji
774 * 	arabia suuji wo kansuuji ni kaeru
775 */
776static WCHAR_T suujinew[] = {
777  0xa1bb, 0xb0ec, 0xc6f3, 0xbbb0, 0xbbcd,
778  0xb8de, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5,
779};
780static WCHAR_T suujiold[] = {
781  0xa1bb, 0xb0ed, 0xc6f5, 0xbbb2, 0xbbcd,
782  0xb8e0, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5,
783};
784static WCHAR_T kurai4[] = {
785  0, 0xcbfc, 0xb2af, 0xc3fb, 0xb5fe, 0,
786};
787
788static WCHAR_T kurai3new[] = { 0, 0xbdbd, 0xc9b4, 0xc0e9, };
789static WCHAR_T kurai3old[] = { 0, 0xbdbd, 0xc9b4, 0xc0e9, };
790
791int
792RkwCvtSuuji(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format)
793{
794  int	count;
795  int	i, j, k;
796  int	digit[4], pend;
797  WCHAR_T	code, tmp;
798  WCHAR_T	*d = dst;
799  WCHAR_T	*s = src + maxsrc - 1;
800
801  if ( --maxdst <= 0 )
802    return 0;
803  /* Í­¸ú¤Ê·å¿ô¤ò¿ô¤¨¤ë */
804  pend = 0;
805  for ( count = k = 0; s >= src; k++ ) {
806    int	dec, thru = *s;
807
808    if ( thru & 0x8080 ) {
809      if ( !((WCHAR_T)0xa3b0 <= *s && *s <= (WCHAR_T)0xa3b9) )
810	break;
811      dec = *s-- - 0xa3b0;
812    }
813    else {
814      if ( !((WCHAR_T)'0' <= *s && *s <= (WCHAR_T)'9') )
815	break;
816      dec = *s-- - '0';
817    }
818
819    switch(format) {
820      /* simple */
821    case SUUJI_THROUGH:	/* sanyou suuji */
822      code = thru;
823      ADDWCODE(d, maxdst, count, code);
824      break;
825    case SUUJI_HANKAKU:	/* sanyou suuji */
826      code = dec + '0';
827      if (code == thru) {
828	return 0;
829      }
830      ADDWCODE(d, maxdst, count, code);
831      break;
832    case SUUJI_ZENKAKU:	/* sanyou suuji */
833      code = hiragana[dec + '0'];
834      if (code == thru) {
835	return 0;
836      }
837      ADDWCODE(d, maxdst, count, code);
838      break;
839      /* kanji kurai dori */
840    case SUUJI_SIMPLEKANJI:	/* kanji suuji */
841      code = suujinew[dec];
842      ADDWCODE(d, maxdst, count, code);
843      break;
844    case SUUJI_FULLKANJI:
845    case SUUJI_FULLKANJITRAD:
846    case SUUJI_WITHKANJIUNIT:	/* 12 O 3456 M 7890 */
847      digit[pend++] = dec;
848      if ( pend == 4 ) {
849	while ( pend > 0 && digit[pend - 1] == 0 )
850	  pend--;
851	if ( pend ) {
852	  /* kurai wo shuturyoku */
853	  code = kurai4[k/4];
854	  if (code)
855	    ADDWCODE(d, maxdst, count, code)
856	  else
857	    if ( k >= 4 )
858	      return 0;
859
860	  for ( i = 0; i < pend; i++ )
861	    switch(format) {
862	    case SUUJI_FULLKANJI:
863	      if ( digit[i] ) {
864		code = kurai3new[i];
865		if (code)
866		  ADDWCODE(d, maxdst, count, code);
867		if ( i == 0 || (digit[i] > 1) ) {
868		  code = suujinew[digit[i]];
869		  ADDWCODE(d, maxdst, count, code);
870		}
871	      }
872	      break;
873	    case SUUJI_FULLKANJITRAD:
874	      if ( digit[i] ) {
875		code = kurai3old[i];
876		if (code)
877		  ADDWCODE(d, maxdst, count, code);
878		code = suujiold[digit[i]];
879		ADDWCODE(d, maxdst, count, code);
880	      };
881	      break;
882	    case SUUJI_WITHKANJIUNIT:
883	      code = hiragana[digit[i]+'0'];
884	      ADDWCODE(d, maxdst, count, code);
885	      break;
886	    }
887	}
888	pend = 0;
889      }
890      break;
891    case SUUJI_WITHCOMMA: /* 1,234,567,890 */
892      if ( k && k%3 == 0 ) {
893	code = hiragana[','];
894	ADDWCODE(d, maxdst, count, code);
895      }
896      code = hiragana[dec + '0'];
897      ADDWCODE(d, maxdst, count, code);
898      break;
899    default:
900      return 0;
901    };
902  };
903
904  if (format == SUUJI_FULLKANJI || format == SUUJI_FULLKANJITRAD ||
905      format == SUUJI_WITHKANJIUNIT) {
906    while ( pend > 0 && digit[pend - 1] == 0 )
907      pend--;
908    if ( pend ) {
909      code = kurai4[k/4];
910      if (code)
911	ADDWCODE(d, maxdst, count, code)
912      else
913	if ( k >= 4 )
914	  return 0;
915      for ( i = 0; i < pend; i++ )
916	switch(format) {
917	case SUUJI_FULLKANJI:
918	  if ( digit[i] ) {
919	    code = kurai3new[i];
920	    if (code)
921	      ADDWCODE(d, maxdst, count, code);
922	    if ( i == 0 || (digit[i] > 1) ) {
923	      code = suujinew[digit[i]];
924	      ADDWCODE(d, maxdst, count, code);
925	    };
926	  };
927	  break;
928	case SUUJI_FULLKANJITRAD:
929	  if ( digit[i] ) {
930	    code = kurai3old[i];
931	    if (code)
932	      ADDWCODE(d, maxdst, count, code);
933	    code = suujiold[digit[i]];
934	    ADDWCODE(d, maxdst, count, code);
935	  };
936	  break;
937	case SUUJI_WITHKANJIUNIT:
938	  code = hiragana[digit[i]+'0'];
939	  ADDWCODE(d, maxdst, count, code);
940	  break;
941	}
942    }
943  }
944
945  if ( dst ) {
946    *d = 0;
947    for ( i = 0, j = count - 1; i < j; i++, j-- ) {
948      tmp = dst[i]; dst[i] = dst[j]; dst[j] = tmp;
949    }
950  }
951  return count;
952}
953
954/* ¥ï¥¤¥É¥­¥ã¥é¥¯¥¿Âбþ´Ø¿ô */
955
956#define CBUFSIZE     512
957unsigned char	*ustoeuc();
958WCHAR_T		*euctous();
959
960int RkwCvtHan (WCHAR_T *, int, WCHAR_T *, int);
961
962int
963RkwCvtHan(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen)
964{
965  int len;
966#ifndef USE_MALLOC_FOR_BIG_ARRAY
967  unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE];
968#else
969  unsigned char *cbuf, *cbuf2;
970  cbuf = (unsigned char *)malloc(CBUFSIZE);
971  cbuf2 = (unsigned char *)malloc(CBUFSIZE);
972  if (!cbuf || !cbuf2) {
973    if (cbuf) (void)free((char *)cbuf);
974    if (cbuf2) (void)free((char *)cbuf2);
975    return 0;
976  }
977#endif
978
979  len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf;
980  len = RkCvtHan(cbuf2, CBUFSIZE, cbuf, len);
981  if (len > 0) {
982    if (dst) {
983      len = euctous(cbuf2, len, dst, maxdst) - dst;
984    }
985    else {
986      len = euccharlen(cbuf2, len);
987    }
988  }
989#ifdef USE_MALLOC_FOR_BIG_ARRAY
990  (void)free((char *)cbuf);
991  (void)free((char *)cbuf2);
992#endif
993  return len;
994}
995
996int RkwCvtHira (WCHAR_T *, int, WCHAR_T *, int);
997
998int
999RkwCvtHira(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen)
1000{
1001  int len;
1002#ifndef USE_MALLOC_FOR_BIG_ARRAY
1003  unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE];
1004#else
1005  unsigned char *cbuf, *cbuf2;
1006  cbuf = (unsigned char *)malloc(CBUFSIZE);
1007  cbuf2 = (unsigned char *)malloc(CBUFSIZE);
1008  if (!cbuf || !cbuf2) {
1009    if (cbuf) (void)free((char *)cbuf);
1010    if (cbuf2) (void)free((char *)cbuf2);
1011    return 0;
1012  }
1013#endif
1014
1015  len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf;
1016  len = RkCvtHira(cbuf2, CBUFSIZE, cbuf, len);
1017  if (len > 0) {
1018    if (dst) {
1019      len = euctous(cbuf2, len, dst, maxdst) - dst;
1020    }
1021    else {
1022      len = euccharlen(cbuf2, len);
1023    }
1024  }
1025#ifdef USE_MALLOC_FOR_BIG_ARRAY
1026  (void)free((char *)cbuf);
1027  (void)free((char *)cbuf2);
1028#endif
1029  return len;
1030}
1031
1032int RkwCvtKana (WCHAR_T *, int, WCHAR_T *, int);
1033
1034int
1035RkwCvtKana(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen)
1036{
1037  unsigned int	len;
1038#ifndef USE_MALLOC_FOR_BIG_ARRAY
1039  unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE];
1040#else
1041  unsigned char *cbuf, *cbuf2;
1042  cbuf = (unsigned char *)malloc(CBUFSIZE);
1043  cbuf2 = (unsigned char *)malloc(CBUFSIZE);
1044  if (!cbuf || !cbuf2) {
1045    if (cbuf) (void)free((char *)cbuf);
1046    if (cbuf2) (void)free((char *)cbuf2);
1047    return 0;
1048  }
1049#endif
1050
1051  len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf;
1052  len = RkCvtKana(cbuf2, CBUFSIZE, cbuf, len);
1053  if (len > 0) {
1054    if (dst) {
1055      len = euctous(cbuf2, len, dst, maxdst) - dst;
1056    }
1057    else {
1058      len = euccharlen(cbuf2, len);
1059    }
1060  }
1061#ifdef USE_MALLOC_FOR_BIG_ARRAY
1062  (void)free((char *)cbuf);
1063  (void)free((char *)cbuf2);
1064#endif
1065  return len;
1066}
1067
1068int RkwCvtZen (WCHAR_T *, int, WCHAR_T *, int);
1069
1070int
1071RkwCvtZen(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen)
1072{
1073  int len;
1074#ifndef USE_MALLOC_FOR_BIG_ARRAY
1075  unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE];
1076#else
1077  unsigned char *cbuf, *cbuf2;
1078  cbuf = (unsigned char *)malloc(CBUFSIZE);
1079  cbuf2 = (unsigned char *)malloc(CBUFSIZE);
1080  if (!cbuf || !cbuf2) {
1081    if (cbuf) (void)free((char *)cbuf);
1082    if (cbuf2) (void)free((char *)cbuf2);
1083    return 0;
1084  }
1085#endif
1086
1087  len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf;
1088  len = RkCvtZen(cbuf2, CBUFSIZE, cbuf, len);
1089  if (len > 0) {
1090    if (dst) {
1091      len = euctous(cbuf2, len, dst, maxdst) - dst;
1092    }
1093    else {
1094      len = euccharlen(cbuf2, len);
1095    }
1096  }
1097#ifdef USE_MALLOC_FOR_BIG_ARRAY
1098  (void)free((char *)cbuf);
1099  (void)free((char *)cbuf2);
1100#endif
1101  return len;
1102}
1103
1104int RkwCvtNone (WCHAR_T *, int, WCHAR_T *, int);
1105
1106int
1107RkwCvtNone(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen)
1108{
1109  int i;
1110  int len = (maxdst < srclen) ? maxdst : srclen;
1111
1112  if (dst) {
1113    for (i = 0 ; i < len ; i++) {
1114      *dst++ = *src++;
1115    }
1116    *dst = *src;
1117  }
1118  return len;
1119}
1120#endif
1121