1/* Copyright 1994 NEC Corporation, Tokyo, Japan. 2 * 3 * Permission to use, copy, modify, distribute and sell this software 4 * and its documentation for any purpose is hereby granted without 5 * fee, provided that the above copyright notice appear in all copies 6 * and that both that copyright notice and this permission notice 7 * appear in supporting documentation, and that the name of NEC 8 * Corporation not be used in advertising or publicity pertaining to 9 * distribution of the software without specific, written prior 10 * permission. NEC Corporation makes no representations about the 11 * suitability of this software for any purpose. It is provided "as 12 * is" without express or implied warranty. 13 * 14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN 16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR 17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 20 * PERFORMANCE OF THIS SOFTWARE. 21 */ 22 23#if !defined(lint) && !defined(__CODECENTER__) 24static char rcsid[]="@(#) 102.1 $Id: kana.c 10525 2004-12-23 21:23:50Z korli $"; 25#endif 26 27/* LINTLIBRARY */ 28#include "RKintern.h" 29 30#define SUUJI_THROUGH 0 31#define SUUJI_HANKAKU 1 32#define SUUJI_ZENKAKU 2 33#define SUUJI_SIMPLEKANJI 3 34#define SUUJI_FULLKANJI 4 35#define SUUJI_FULLKANJITRAD 5 36#define SUUJI_WITHKANJIUNIT 6 37#define SUUJI_WITHCOMMA 7 38 39#if 0 40/* RkCvtZen 41 * hankaku moji wo zenkaku moji ni suru 42 */ 43static 44WCHAR_T 45hiragana[] = 46{ 47/* 0x00 */ 48 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 49 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 50/* 0x10 */ 51 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 52 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 53/* 0x20 */ 54 0xa1a1, 0xa1aa, 0xa1ed, 0xa1f4, 0xa1f0, 0xa1f3, 0xa1f5, 0xa1c7, 55 0xa1ca, 0xa1cb, 0xa1f6, 0xa1dc, 0xa1a4, 0xa1dd, 0xa1a5, 0xa1bf, 56/* 0x30 */ 57 0xa3b0, 0xa3b1, 0xa3b2, 0xa3b3, 0xa3b4, 0xa3b5, 0xa3b6, 0xa3b7, 58 0xa3b8, 0xa3b9, 0xa1a7, 0xa1a8, 0xa1e3, 0xa1e1, 0xa1e4, 0xa1a9, 59/* 0x40 */ 60 0xa1f7, 0xa3c1, 0xa3c2, 0xa3c3, 0xa3c4, 0xa3c5, 0xa3c6, 0xa3c7, 61 0xa3c8, 0xa3c9, 0xa3ca, 0xa3cb, 0xa3cc, 0xa3cd, 0xa3ce, 0xa3cf, 62/* 0x50 */ 63 0xa3d0, 0xa3d1, 0xa3d2, 0xa3d3, 0xa3d4, 0xa3d5, 0xa3d6, 0xa3d7, 64 0xa3d8, 0xa3d9, 0xa3da, 0xa1ce, 0xa1ef, 0xa1cf, 0xa1b0, 0xa1b2, 65/* 0x60 */ 66 0xa1c6, 0xa3e1, 0xa3e2, 0xa3e3, 0xa3e4, 0xa3e5, 0xa3e6, 0xa3e7, 67 0xa3e8, 0xa3e9, 0xa3ea, 0xa3eb, 0xa3ec, 0xa3ed, 0xa3ee, 0xa3ef, 68/* 0x70 */ 69 0xa3f0, 0xa3f1, 0xa3f2, 0xa3f3, 0xa3f4, 0xa3f5, 0xa3f6, 0xa3f7, 70 0xa3f8, 0xa3f9, 0xa3fa, 0xa1d0, 0xa1c3, 0xa1d1, 0xa1c1, 0xa2a2, 71/*0x80 */ 72 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 73 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 74/*0x90 */ 75 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 76 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 77/*0xa0 */ 78 0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa4f2, 0xa4a1, 79 0xa4a3, 0xa4a5, 0xa4a7, 0xa4a9, 0xa4e3, 0xa4e5, 0xa4e7, 0xa4c3, 80/*0xb0 */ 81 0xa1bc, 0xa4a2, 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ad, 82 0xa4af, 0xa4b1, 0xa4b3, 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 83/*0xc0 */ 84 0xa4bf, 0xa4c1, 0xa4c4, 0xa4c6, 0xa4c8, 0xa4ca, 0xa4cb, 0xa4cc, 85 0xa4cd, 0xa4ce, 0xa4cf, 0xa4d2, 0xa4d5, 0xa4d8, 0xa4db, 0xa4de, 86/*0xd0 */ 87 0xa4df, 0xa4e0, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e6, 0xa4e8, 0xa4e9, 88 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ed, 0xa4ef, 0xa4f3, 0xa1ab, 0xa1ac, 89/* 0xe0 */ 90 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 91 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 92/* 0xf0 */ 93 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 94 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 95}; 96 97static 98WCHAR_T 99hankaku[] = { 100/*0x00*/ 101 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 102 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 103/*0x10*/ 104 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 105 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 106/*0x20*/ 107 0x0000, ' ', 0x8ea4, 0x8ea1, ',', '.', 0x8ea5, ':', 108 ';', '?', '!', 0x8ede, 0x8edf, 0x0000, 0x0000, 0x0000, 109/*0x30*/ 110 '^', 0x0000, '_', 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 111 0x0000, 0x0000, 0x0000, 0x0000, 0x8eb0, 0x0000, 0x0000, '/', 112/*0x40*/ 113 0x0000, '~', 0x0000, '|', 0x0000, 0x0000, '\'', '\'', 114 '"', '"', '(', ')', '[', ']', '[', ']', 115/*0x50*/ 116 '{', '}', 0x0000, 0x0000, 0x0000, 0x0000, 0x8ea2, 0x8ea3, 117 0x0000, 0x0000, 0x0000, 0x0000, '+', '-', 0x0000, 0x0000, 118/*0x60*/ 119 0x0000, '=', 0x0000, '<', '>', 0x0000, 0x0000, 0x0000, 120 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, '\\', 121/*0x70*/ 122 '$',0x0000, 0x0000, '%', '#', '&', '*', '@', 123 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 124/*0x80*/ 125 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 126 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 127/*0x90*/ 128 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 129 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 130/*0xa0*/ 131 0x0020, 0x00a7, 0x00b1, 0x00a8, 0x00b2, 0x00a9, 0x00b3, 0x00aa, 132 0x00b4, 0x00ab, 0x00b5, 0x00b6, 0xb6de, 0x00b7, 0xb7de, 0x00b8, 133/*0xb0*/ 134 0xb8de, 0x00b9, 0xb9de, 0x00ba, 0xbade, 0x00bb, 0xbbde, 0x00bc, 135 0xbcde, 0x00bd, 0xbdde, 0x00be, 0xbede, 0x00bf, 0xbfde, 0x00c0, 136/*0xc0*/ 137 0xc0de, 0x00c1, 0xc1de, 0x00af, 0x00c2, 0xc2de, 0x00c3, 0xc3de, 138 0x00c4, 0xc4de, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 139/*0xd0*/ 140 0xcade, 0xcadf, 0x00cb, 0xcbde, 0xcbdf, 0x00cc, 0xccde, 0xccdf, 141 0x00cd, 0xcdde, 0xcddf, 0x00ce, 0xcede, 0xcedf, 0x00cf, 0x00d0, 142/*0xe0*/ 143 0x00d1, 0x00d2, 0x00d3, 0x00ac, 0x00d4, 0x00ad, 0x00d5, 0x00ae, 144 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dc, 145/*0xf0*/ 146 0x00b2, 0x00b4, 0x00a6, 0x00dd, 0xb3de, 0x00b6, 0x00b9, 0x0000, 147 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 148}; 149 150#endif 151 152#ifdef OBSOLETE_RKKANA 153 154#define ADDCODE(dst, maxdst, count, code, length) {\ 155 if ( (unsigned long)(length) <= (unsigned long)(maxdst) ) {\ 156 (maxdst) -= (length); (count) += (length);\ 157 if ( (dst) ) {\ 158 (dst) += (length);\ 159 switch((length)) {\ 160 case 4: *--(dst) = (code)&255; (code) >>= 8;\ 161 case 3: *--(dst) = (code)&255; (code) >>= 8;\ 162 case 2: *--(dst) = (code)&255; (code) >>= 8;\ 163 case 1: *--(dst) = (code)&255; (code) >>= 8;\ 164 };\ 165 (dst) += (length);\ 166 };\ 167 };\ 168} 169 170#else /* !OBSOLETE_RKKANA */ 171 172#include "RKproto.h" 173static int _ADDCODE(unsigned char *dst, int maxdst, int count, unsigned long code, int length); 174static int euccharlen(unsigned char *s, int bytelen); 175static int Wcstosjis(char *dst, int dstlen, WCHAR_T *src, int srclen); 176 177static int 178_ADDCODE(unsigned char *dst, int maxdst, int count, unsigned long code, int length) 179{ 180 if ((unsigned long)length <= (unsigned long)maxdst) { 181 maxdst -= length; 182 count += length; 183 if (dst) { 184 dst += length; 185 switch (length) { 186 case 4: *--dst = (unsigned char)code; code >>= 8; 187 case 3: *--dst = (unsigned char)code; code >>= 8; 188 case 2: *--dst = (unsigned char)code; code >>= 8; 189 case 1: *--dst = (unsigned char)code; code >>= 8; 190 } 191 } 192 return length; 193 } 194 return 0; 195} 196 197#define ADDCODE(dst, maxdst, count, code, length) \ 198{ int llen = _ADDCODE(dst, maxdst, count, (unsigned long) code, length); \ 199 if (llen > 0 && (dst)) { (dst) += llen; (maxdst) -= llen; (count) += llen; }} 200 201#endif /* !OBSOLETE_RKKANA */ 202 203#define ADDWCODE(dst, maxdst, count, code) {\ 204 if ( (maxdst) > 0 ) {\ 205 (maxdst)-- ; (count)++ ;\ 206 if ( (dst) ) {\ 207 *(dst)++ = (code);\ 208 }\ 209 }\ 210} 211 212#if 0 213 214static int 215euccharlen(unsigned char *s, int bytelen) 216{ 217 unsigned char ch; 218 int res = 0; 219 220 while ((ch = *s++) && bytelen--) { 221 res++; 222 if (ch & 0x80) { 223 if (ch == RK_SS3) { 224 s++; 225 bytelen--; 226 } 227 s++; 228 bytelen--; 229 } 230 } 231 return res; 232} 233 234/* RkCvtZen 235 * hankaku moji(ASCII+katakana) wo taiou suru zenkaku moji ni suru 236 * dakuten,handakuten shori mo okonau. 237 */ 238int RkCvtZen (unsigned char *, int, unsigned char *, int); 239 240int 241RkCvtZen(unsigned char *zen, int maxzen, unsigned char *han, int maxhan) 242{ 243 unsigned char *z = zen; 244 unsigned char *h = han; 245 unsigned char *H = han + maxhan; 246 WCHAR_T hi, lo; 247 unsigned byte; 248 int count = 0; 249 unsigned long code; 250 251 if ( --maxzen <= 0 ) 252 return count; 253 while ( h < H ) { 254 hi = *h++; 255 byte = 2; 256 if ( hi == 0x8e ) { /* hankaku katakana */ 257 if ( !(code = hiragana[lo = *h++]) ) 258 code = (hi<<8)|lo; 259 byte = (code>>8) ? 2 : 1; 260 if ( (code>>8) == 0xa4 ) { 261 code |= 0x100; 262 /* dakuten/handakuten ga tuku baai */ 263 if ( h + 1 < H && h[0] == 0x8e ) { 264 lo = h[1]; 265 switch( LOMASK(code) ) { 266 case 0xa6: /* u */ 267 if ( lo == 0xde ) code = 0xa5f4, h += 2; 268 break; 269 /* ha */case 0xcf: case 0xd2: case 0xd5: case 0xd8: case 0xdb: 270 if ( lo == 0xdf ) { 271 code += 2, h += 2; 272 break; 273 }; 274 case 0xab: case 0xad: case 0xaf: case 0xb1: case 0xb3: /* ka */ 275 case 0xb5: case 0xb7: case 0xb9: case 0xbb: case 0xbd: /* sa */ 276 case 0xbf: case 0xc1: case 0xc4: case 0xc6: case 0xc8: /* ta */ 277 if ( lo == 0xde ) { 278 code += 1, h += 2; 279 break; 280 }; 281 }; 282 }; 283 }; 284 } 285 else if (hi == 0x8f) { 286 ADDCODE(z, maxzen, count, hi, 1); 287 code = (((WCHAR_T) h[0]) << 8) | ((WCHAR_T) h[1]); h += 2; 288 byte = 2; 289 } else if ( hi & 0x80 ) 290 code = (hi<<8)|*h++; 291 else { 292 if ( !(code = hiragana[hi]) ) 293 code = hi; 294 byte = (code>>8) ? 2 : 1; 295 } 296 ADDCODE(z, maxzen, count, code, byte); 297 }; 298 if ( z ) 299 *z = 0; 300 return count; 301} 302 303/* RkCvtHan 304 * zenkaku kana moji wo hankaku moji ni suru 305 */ 306int RkCvtHan (unsigned char *, int, unsigned char *, int); 307 308int 309RkCvtHan(unsigned char *han, int maxhan, unsigned char *zen, int maxzen) 310{ 311 unsigned char *h = han; 312 unsigned char *z = zen; 313 unsigned char *Z = zen + maxzen; 314 WCHAR_T hi, lo; 315 WCHAR_T byte; 316 int count = 0; 317 unsigned long code; 318 319 if ( --maxhan <= 0 ) 320 return 0; 321 while ( z < Z ) { 322 hi = *z++; 323 byte = 1; 324 switch(hi) { 325 case 0xa1: /* kigou */ 326 lo = *z++; 327 if ( !(code = hankaku[lo&0x7f]) ) 328 code = (hi<<8)|lo; 329 byte = (code>>8) ? 2 : 1; 330 break; 331 case 0xa3: /* eisuuji */ 332 lo = *z++; 333 if ( 0xb0 <= lo && lo <= 0xb9 ) code = (lo - 0xb0) + '0'; 334 else 335 if ( 0xc1 <= lo && lo <= 0xda ) code = (lo - 0xc1) + 'A'; 336 else 337 if ( 0xe1 <= lo && lo <= 0xfa ) code = (lo - 0xe1) + 'a'; 338 else 339 code = (hi<<8)|lo, byte = 2; 340 break; 341 case 0xa4: /* hiragana */ 342 case 0xa5: /* katakana */ 343 lo = *z++; 344 if ( (code = hankaku[lo]) && 345 (lo <= (WCHAR_T)(hi == 0xa4 ? 0xf3 : 0xf6)) ) { 346 if ( code>>8 ) { 347 code = 0x8e000000|((code>>8)<<16)|0x00008e00|LOMASK(code); 348 byte = 4; 349 } 350 else { 351 code = 0x00008e00|LOMASK(code); 352 byte = 2; 353 }; 354 } 355 else 356 code = (hi<<8)|lo, byte = 2; 357 break; 358 default: 359 if (hi == 0x8f) { 360 ADDCODE(h, maxhan, count, hi, 1); 361 code = (((WCHAR_T) z[0]) << 8) | ((WCHAR_T) z[1]); z += 2; 362 byte = 2; 363 } 364 else if ( hi & 0x80 ) { /* kanji */ 365 code = (hi<<8)|(*z++); 366 byte = 2; 367 } 368 else 369 switch(hi) { 370 /* 371 case ',': code = 0x8ea4; byte = 2; break; 372 case '-': code = 0x8eb0; byte = 2; break; 373 case '.': code = 0x8ea1; byte = 2; break; 374 */ 375 default: code = hi; break; 376 }; 377 break; 378 }; 379 ADDCODE(h, maxhan, count, code, byte); 380 }; 381 if ( h ) 382 *h = 0; 383 return count; 384} 385 386/* RkCvtKana/RkCvtHira 387 * zenkaku hiragana wo katakana ni suru 388 */ 389int RkCvtKana (unsigned char *, int, unsigned char *, int); 390 391int 392RkCvtKana(unsigned char *kana, int maxkana, unsigned char *hira, int maxhira) 393{ 394 unsigned char *k = kana; 395 unsigned char *h = hira; 396 unsigned char *H = hira + maxhira; 397 WCHAR_T hi; 398 WCHAR_T byte; 399 int count = 0; 400 unsigned long code; 401 402 if ( --maxkana <= 0 ) 403 return 0; 404 while ( h < H ) { 405 hi = *h++; 406 if (hi == 0x8f) { 407 ADDCODE(k, maxkana, count, hi, 1); 408 code = (((WCHAR_T) h[0]) << 8) | ((WCHAR_T) h[1]); h += 2; 409 byte = 2; 410 } 411 else if ( hi & 0x80 ) { 412 int dakuon; 413 414 code = (hi == 0xa4) ? (0xa500|(*h++)) : ((hi<<8)|(*h++)); 415 byte = 2; 416 /* hiragana U + " */ 417 dakuon = ( h + 1 < H && ((((WCHAR_T) h[0])<<8)| ((WCHAR_T) h[1])) == 0xa1ab ); 418 if ( hi == 0xa4 && code == 0xa5a6 && dakuon ) { 419 code = 0xa5f4; 420 h += 2; 421 }; 422 } else 423 code = hi, byte = 1; 424 ADDCODE(k, maxkana, count, code, byte); 425 }; 426 if ( k ) 427 *k = 0; 428 return count; 429} 430 431int RkCvtHira (unsigned char *, int, unsigned char *, int); 432 433int 434RkCvtHira(unsigned char *hira, int maxhira, unsigned char *kana, int maxkana) 435{ 436 unsigned char *h = hira; 437 unsigned char *k = kana; 438 unsigned char *K = kana + maxkana; 439 WCHAR_T hi; 440 WCHAR_T byte; 441 int count = 0; 442 unsigned long code; 443 444 if ( --maxhira <= 0 ) 445 return 0; 446 while ( k < K ) { 447 hi = *k++; 448 if (hi == 0x8f) { 449 ADDCODE(h, maxhira, count, hi, 1); 450 code = (((WCHAR_T) k[0]) << 8) | ((WCHAR_T) k[1]); k += 2; 451 byte = 2; 452 } else if ( hi & 0x80 ) { 453 code = (hi == 0xa5) ? (0xa400|(*k++)) : ((hi<<8)|(*k++)); 454 byte = 2; 455 /* katakana U + " */ 456 if ( code == 0xa4f4 ) { /* u no dakuon */ 457 code = 0xa4a6a1ab; 458 byte = 4; 459 } else if ( code == 0xa4f5 ) 460 code = 0xa4ab; 461 else if ( code == 0xa4f6 ) 462 code = 0xa4b1; 463 } else 464 code = hi, byte = 1; 465 ADDCODE(h, maxhira, count, code, byte); 466 }; 467 if ( h ) 468 *h = 0; 469 return count; 470} 471 472int RkCvtNone (unsigned char *, int, unsigned char *, int); 473 474int 475RkCvtNone(unsigned char *dst, int maxdst, unsigned char *src, int maxsrc) 476{ 477 unsigned char *d = dst; 478 unsigned char *s = src; 479 unsigned char *S = src + maxsrc; 480 WCHAR_T byte; 481 int count = 0; 482 unsigned long code; 483 484 if ( --maxdst <= 0 ) 485 return 0; 486 while ( s < S ) { 487 code = *s++; 488 byte = 1; 489 if (code == 0x8f) { 490 ADDCODE(d, maxdst, count, code, 1); 491 code = (((WCHAR_T) s[0]) << 8) | ((WCHAR_T) s[1]); s += 2; 492 byte = 2; 493 } else if ( code & 0x80 ) 494 code = (code<<8)|(*s++), byte = 2; 495 ADDCODE(d, maxdst, count, code, byte); 496 }; 497 if ( d ) 498 *d = 0; 499 return count; 500} 501 502#ifdef USE_SJIS_TEXT_DIC 503exp(int maxwc) 504{ 505 WCHAR_T *e = wc_return, *ee = wc_return + maxwc; 506 unsigned char *s = (unsigned char *)sj; 507 unsigned char *S = (unsigned char *)sj + maxsj; 508 unsigned short hi, lo; 509 unsigned short byte; 510 int count = 0; 511 unsigned long code; 512 513 if ( --maxwc <= 0 ) 514 return 0; 515 516 while ( s < S ) { 517 hi = *s++; 518 if ( hi <= 0x7f ) /* ascii */ 519 code = hi, byte = 1; 520 else 521 if ( 0xa0 <= hi && hi <= 0xdf ) /* hankaku katakana */ 522 code = hi, byte = 2; 523 else 524 if (0xf0 <= hi && hi <= 0xfc) { /* gaiji */ 525 hi -= 0xf0; 526 hi = 2*hi + 0x21; 527 if ((lo = *s++) <= 0x9e) { 528 if (lo < 0x80) 529 lo++; 530 lo -= 0x20; 531 } 532 else { 533 hi++; 534 lo -= 0x7e; 535 } 536 code = 0x8000 | (hi<<8) | lo, byte = 3; 537 } 538 else { 539 hi -= (hi <= 0x9f) ? 0x80 : 0xc0; 540 hi = 2*hi + 0x20; 541 if ( (lo = *s++) <= 0x9e ) { /* kisuu ku */ 542 hi--; 543 if ( 0x80 <= lo ) lo--; 544 lo -= (0x40 - 0x21); 545 } 546 else /* guusuu ku */ 547 lo -= (0x9f - 0x21); 548 code = 0x8080|(hi<<8)|lo, byte = 2; 549 }; 550 if (wc_return && e < ee) { 551 *e++ = (WCHAR_T)code; 552 } 553 }; 554 if (wc_return && e && e < ee) { 555 *e = 0; 556 } 557 return count; 558} 559#endif /* USE_SJIS_TEXT_DIC */ 560 561/* RkCvtWide 562 * 563 */ 564int 565RkCvtWide(WCHAR_T *dst, int maxdst, char *src, int maxsrc) 566{ 567#ifdef USE_SJIS_TEXT_DIC 568 return SJistowcs(dst, maxdst, src, maxsrc); 569#else /* !USE_SJIS_TEXT_DIC, that is, EUC */ 570 WCHAR_T *d = dst; 571 unsigned char *s = (unsigned char *)src; 572 unsigned char *S = (unsigned char *)src + maxsrc; 573 int count = 0; 574 unsigned long code; 575 576 if ( --maxdst <= 0 ) 577 return count; 578 while ( s < S ) 579 { 580 code = *s++; 581 if ( code & 0x80 ) 582 { 583 switch(code) 584 { 585 case RK_SS2: /* hankaku katakana */ 586 code = 0x0080|(s[0]&0x7f); 587 s++; 588 break; 589 case RK_SS3: /* gaiji */ 590 code = 0x8000|(((s[0]<<8)|s[1])&0x7f7f); 591 s += 2; 592 break; 593 default: 594 code = 0x8080|(((s[-1]<<8)|s[0])&0x7f7f); 595 s += 1; 596 }; 597 }; 598 ADDWCODE(d, maxdst, count, (WCHAR_T)code); 599 }; 600 if ( d ) 601 *d = 0; 602 return count; 603#endif /* !USE_SJIS_TEXT_DIC */ 604} 605 606#ifdef USE_SJIS_TEXT_DIC 607/* 608 Wcstosjis -- To convert WCHAR_T string to SJIS string. 609 610 This function should not copy after NULL character even if 611 the srclen is too large 612 */ 613 614static int 615Wcstosjis(char *dst, int dstlen, WCHAR_T *src, int srclen) 616{ 617 register int i, j; 618 unsigned char *sjise_kanjip, sjise_area[2]; 619 WCHAR_T codeset; 620 register WCHAR_T wc; 621 622 sjise_kanjip = sjise_area; 623 624 for (i = 0, j = 0; src[i] != (WCHAR_T)0 && j < dstlen && i < srclen ; i++) { 625 wc = src[i]; 626 codeset = wc & 0x8080; /* to know what codeset is `wc' */ 627 628 switch (codeset) { 629 case 0x0000: /* ASCII */ 630 /* continue to the following line ... */ 631 case 0x0080: /* Codeset 2, that is Katakana */ 632 if (dst) dst[j++] = (unsigned char)wc; 633 break; 634 635 case 0x8080: /* Codeset 1, that is Kanji */ 636 if (j + 2 > dstlen) { 637 return j; /* overflow */ 638 } 639 sjise_kanjip[0] = ((wc >> 8) & 0x7f); 640 sjise_kanjip[1] = (wc & 0x7f); 641 sjise_kanjip[1] = ((sjise_kanjip[0] % 2) ? 642 (sjise_kanjip[1] + 0x1f + 643 ((sjise_kanjip[1] < 0x60) ? 0 : 1)) : 644 (sjise_kanjip[1] + 0x7e)); 645 sjise_kanjip[0] = ((sjise_kanjip[0] < 0x5f) ? 646 ((sjise_kanjip[0] - 0x21) / 2 + 0x81) : 647 ((sjise_kanjip[0] - 0x5f) / 2 + 0xe0)); 648 if (dst) { 649 dst[j++] = sjise_kanjip[0]; 650 dst[j++] = sjise_kanjip[1]; 651 } 652 break; 653 654 case 0x8000: /* Codeset 3 */ 655 if (j + 2 > dstlen) { 656 return j; /* overflow */ 657 } 658 sjise_kanjip[0] = ((wc >> 8) & 0x7f); 659 sjise_kanjip[1] = (wc & 0x7fb); 660 sjise_kanjip[1] = ((sjise_kanjip[0] % 2) ? 661 (sjise_kanjip[1] + 0x1f + 662 ((sjise_kanjip[1] < 0x60) ? 0 : 1)) : 663 (sjise_kanjip[1] + 0x7e)); 664 sjise_kanjip[0] = ((sjise_kanjip[0] - 0x21) / 2 + 0xf0); 665 if (dst) { 666 dst[j++] = sjise_kanjip[0]; 667 dst[j++] = sjise_kanjip[1]; 668 } 669 break; 670 } 671 } 672 if (j < dstlen && dst) { 673 dst[j] = '\0'; 674 } 675 return j; 676} 677#endif /* USE_SJIS_TEXT_DIC */ 678 679/* RkCvtNarrow 680 * 681 */ 682int 683RkCvtNarrow(char *dst, int maxdst, WCHAR_T *src, int maxsrc) 684{ 685#ifdef USE_SJIS_TEXT_DIC 686 return Wcstosjis(dst, maxdst, src, maxsrc); 687#else /* !USE_SJIS_TEXT_DIC */ 688 unsigned char *d = (unsigned char *)dst; 689 WCHAR_T *s = src; 690 WCHAR_T *S = src + maxsrc; 691 int count = 0; 692 long code; 693 int byte; 694 695 if ( --maxdst <= 0 ) 696 return count; 697 while ( s < S ) 698 { 699 code = *s++; 700 switch(code&0x8080) 701 { 702 case 0x0000: 703 code &= 0xff; 704 byte = 1; 705 break; 706 case 0x0080: 707 code &= 0xff; 708 code |= 0x8e00; 709 byte = 2; 710 break; 711 case 0x8000: 712 code &= 0xffff; 713 code |= 0x8f8080; 714 byte = 3; 715 break; 716 case 0x8080: 717 code &= 0xffff; 718 byte = 2; 719 break; 720 }; 721 ADDCODE(d, maxdst, count, code, byte); 722 }; 723 if ( d ) 724 *d = 0; 725 return count; 726#endif /* !USE_SJIS_TEXT_DIC */ 727} 728 729#ifdef notdef 730/* RkEuc 731 * shift jis --> euc 732 */ 733int 734RkCvtEuc(unsigned char *euc, int maxeuc, unsigned char *sj, int maxsj) 735{ 736 unsigned char *e = euc; 737 unsigned char *s = sj; 738 unsigned char *S = sj + maxsj; 739 WCHAR_T hi, lo; 740 WCHAR_T byte; 741 int count = 0; 742 unsigned long code; 743 744 if ( --maxeuc <= 0 ) 745 return 0; 746 747 while ( s < S ) { 748 hi = *s++; 749 if ( hi <= 0x7f ) /* ascii */ 750 code = hi, byte = 1; 751 else 752 if ( 0xa0 <= hi && hi <= 0xdf ) /* hankaku katakana */ 753 code = 0x8e00|hi, byte = 2; 754 else { 755 hi -= (hi <= 0x9f) ? 0x80 : 0xc0; 756 hi = 2*hi + 0x20; 757 if ( (lo = *s++) <= 0x9e ) { /* kisuu ku */ 758 hi--; 759 if ( 0x80 <= lo ) lo--; 760 lo -= (0x40 - 0x21); 761 } else /* guusuu ku */ 762 lo -= (0x9f - 0x21); 763 code = 0x8080|(hi<<8)|lo, byte = 2; 764 }; 765 ADDCODE(e, maxeuc, count, code, byte); 766 }; 767 if ( e ) 768 *e = 0; 769 return count; 770} 771#endif /* notdef */ 772 773/* RkCvtSuuji 774 * arabia suuji wo kansuuji ni kaeru 775 */ 776static WCHAR_T suujinew[] = { 777 0xa1bb, 0xb0ec, 0xc6f3, 0xbbb0, 0xbbcd, 778 0xb8de, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5, 779}; 780static WCHAR_T suujiold[] = { 781 0xa1bb, 0xb0ed, 0xc6f5, 0xbbb2, 0xbbcd, 782 0xb8e0, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5, 783}; 784static WCHAR_T kurai4[] = { 785 0, 0xcbfc, 0xb2af, 0xc3fb, 0xb5fe, 0, 786}; 787 788static WCHAR_T kurai3new[] = { 0, 0xbdbd, 0xc9b4, 0xc0e9, }; 789static WCHAR_T kurai3old[] = { 0, 0xbdbd, 0xc9b4, 0xc0e9, }; 790 791int 792RkwCvtSuuji(WCHAR_T *dst, int maxdst, WCHAR_T *src, int maxsrc, int format) 793{ 794 int count; 795 int i, j, k; 796 int digit[4], pend; 797 WCHAR_T code, tmp; 798 WCHAR_T *d = dst; 799 WCHAR_T *s = src + maxsrc - 1; 800 801 if ( --maxdst <= 0 ) 802 return 0; 803 /* ͸ú¤Ê·å¿ô¤ò¿ô¤¨¤ë */ 804 pend = 0; 805 for ( count = k = 0; s >= src; k++ ) { 806 int dec, thru = *s; 807 808 if ( thru & 0x8080 ) { 809 if ( !((WCHAR_T)0xa3b0 <= *s && *s <= (WCHAR_T)0xa3b9) ) 810 break; 811 dec = *s-- - 0xa3b0; 812 } 813 else { 814 if ( !((WCHAR_T)'0' <= *s && *s <= (WCHAR_T)'9') ) 815 break; 816 dec = *s-- - '0'; 817 } 818 819 switch(format) { 820 /* simple */ 821 case SUUJI_THROUGH: /* sanyou suuji */ 822 code = thru; 823 ADDWCODE(d, maxdst, count, code); 824 break; 825 case SUUJI_HANKAKU: /* sanyou suuji */ 826 code = dec + '0'; 827 if (code == thru) { 828 return 0; 829 } 830 ADDWCODE(d, maxdst, count, code); 831 break; 832 case SUUJI_ZENKAKU: /* sanyou suuji */ 833 code = hiragana[dec + '0']; 834 if (code == thru) { 835 return 0; 836 } 837 ADDWCODE(d, maxdst, count, code); 838 break; 839 /* kanji kurai dori */ 840 case SUUJI_SIMPLEKANJI: /* kanji suuji */ 841 code = suujinew[dec]; 842 ADDWCODE(d, maxdst, count, code); 843 break; 844 case SUUJI_FULLKANJI: 845 case SUUJI_FULLKANJITRAD: 846 case SUUJI_WITHKANJIUNIT: /* 12 O 3456 M 7890 */ 847 digit[pend++] = dec; 848 if ( pend == 4 ) { 849 while ( pend > 0 && digit[pend - 1] == 0 ) 850 pend--; 851 if ( pend ) { 852 /* kurai wo shuturyoku */ 853 code = kurai4[k/4]; 854 if (code) 855 ADDWCODE(d, maxdst, count, code) 856 else 857 if ( k >= 4 ) 858 return 0; 859 860 for ( i = 0; i < pend; i++ ) 861 switch(format) { 862 case SUUJI_FULLKANJI: 863 if ( digit[i] ) { 864 code = kurai3new[i]; 865 if (code) 866 ADDWCODE(d, maxdst, count, code); 867 if ( i == 0 || (digit[i] > 1) ) { 868 code = suujinew[digit[i]]; 869 ADDWCODE(d, maxdst, count, code); 870 } 871 } 872 break; 873 case SUUJI_FULLKANJITRAD: 874 if ( digit[i] ) { 875 code = kurai3old[i]; 876 if (code) 877 ADDWCODE(d, maxdst, count, code); 878 code = suujiold[digit[i]]; 879 ADDWCODE(d, maxdst, count, code); 880 }; 881 break; 882 case SUUJI_WITHKANJIUNIT: 883 code = hiragana[digit[i]+'0']; 884 ADDWCODE(d, maxdst, count, code); 885 break; 886 } 887 } 888 pend = 0; 889 } 890 break; 891 case SUUJI_WITHCOMMA: /* 1,234,567,890 */ 892 if ( k && k%3 == 0 ) { 893 code = hiragana[',']; 894 ADDWCODE(d, maxdst, count, code); 895 } 896 code = hiragana[dec + '0']; 897 ADDWCODE(d, maxdst, count, code); 898 break; 899 default: 900 return 0; 901 }; 902 }; 903 904 if (format == SUUJI_FULLKANJI || format == SUUJI_FULLKANJITRAD || 905 format == SUUJI_WITHKANJIUNIT) { 906 while ( pend > 0 && digit[pend - 1] == 0 ) 907 pend--; 908 if ( pend ) { 909 code = kurai4[k/4]; 910 if (code) 911 ADDWCODE(d, maxdst, count, code) 912 else 913 if ( k >= 4 ) 914 return 0; 915 for ( i = 0; i < pend; i++ ) 916 switch(format) { 917 case SUUJI_FULLKANJI: 918 if ( digit[i] ) { 919 code = kurai3new[i]; 920 if (code) 921 ADDWCODE(d, maxdst, count, code); 922 if ( i == 0 || (digit[i] > 1) ) { 923 code = suujinew[digit[i]]; 924 ADDWCODE(d, maxdst, count, code); 925 }; 926 }; 927 break; 928 case SUUJI_FULLKANJITRAD: 929 if ( digit[i] ) { 930 code = kurai3old[i]; 931 if (code) 932 ADDWCODE(d, maxdst, count, code); 933 code = suujiold[digit[i]]; 934 ADDWCODE(d, maxdst, count, code); 935 }; 936 break; 937 case SUUJI_WITHKANJIUNIT: 938 code = hiragana[digit[i]+'0']; 939 ADDWCODE(d, maxdst, count, code); 940 break; 941 } 942 } 943 } 944 945 if ( dst ) { 946 *d = 0; 947 for ( i = 0, j = count - 1; i < j; i++, j-- ) { 948 tmp = dst[i]; dst[i] = dst[j]; dst[j] = tmp; 949 } 950 } 951 return count; 952} 953 954/* ¥ï¥¤¥É¥¥ã¥é¥¯¥¿Âбþ´Ø¿ô */ 955 956#define CBUFSIZE 512 957unsigned char *ustoeuc(); 958WCHAR_T *euctous(); 959 960int RkwCvtHan (WCHAR_T *, int, WCHAR_T *, int); 961 962int 963RkwCvtHan(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen) 964{ 965 int len; 966#ifndef USE_MALLOC_FOR_BIG_ARRAY 967 unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE]; 968#else 969 unsigned char *cbuf, *cbuf2; 970 cbuf = (unsigned char *)malloc(CBUFSIZE); 971 cbuf2 = (unsigned char *)malloc(CBUFSIZE); 972 if (!cbuf || !cbuf2) { 973 if (cbuf) (void)free((char *)cbuf); 974 if (cbuf2) (void)free((char *)cbuf2); 975 return 0; 976 } 977#endif 978 979 len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf; 980 len = RkCvtHan(cbuf2, CBUFSIZE, cbuf, len); 981 if (len > 0) { 982 if (dst) { 983 len = euctous(cbuf2, len, dst, maxdst) - dst; 984 } 985 else { 986 len = euccharlen(cbuf2, len); 987 } 988 } 989#ifdef USE_MALLOC_FOR_BIG_ARRAY 990 (void)free((char *)cbuf); 991 (void)free((char *)cbuf2); 992#endif 993 return len; 994} 995 996int RkwCvtHira (WCHAR_T *, int, WCHAR_T *, int); 997 998int 999RkwCvtHira(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen) 1000{ 1001 int len; 1002#ifndef USE_MALLOC_FOR_BIG_ARRAY 1003 unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE]; 1004#else 1005 unsigned char *cbuf, *cbuf2; 1006 cbuf = (unsigned char *)malloc(CBUFSIZE); 1007 cbuf2 = (unsigned char *)malloc(CBUFSIZE); 1008 if (!cbuf || !cbuf2) { 1009 if (cbuf) (void)free((char *)cbuf); 1010 if (cbuf2) (void)free((char *)cbuf2); 1011 return 0; 1012 } 1013#endif 1014 1015 len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf; 1016 len = RkCvtHira(cbuf2, CBUFSIZE, cbuf, len); 1017 if (len > 0) { 1018 if (dst) { 1019 len = euctous(cbuf2, len, dst, maxdst) - dst; 1020 } 1021 else { 1022 len = euccharlen(cbuf2, len); 1023 } 1024 } 1025#ifdef USE_MALLOC_FOR_BIG_ARRAY 1026 (void)free((char *)cbuf); 1027 (void)free((char *)cbuf2); 1028#endif 1029 return len; 1030} 1031 1032int RkwCvtKana (WCHAR_T *, int, WCHAR_T *, int); 1033 1034int 1035RkwCvtKana(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen) 1036{ 1037 unsigned int len; 1038#ifndef USE_MALLOC_FOR_BIG_ARRAY 1039 unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE]; 1040#else 1041 unsigned char *cbuf, *cbuf2; 1042 cbuf = (unsigned char *)malloc(CBUFSIZE); 1043 cbuf2 = (unsigned char *)malloc(CBUFSIZE); 1044 if (!cbuf || !cbuf2) { 1045 if (cbuf) (void)free((char *)cbuf); 1046 if (cbuf2) (void)free((char *)cbuf2); 1047 return 0; 1048 } 1049#endif 1050 1051 len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf; 1052 len = RkCvtKana(cbuf2, CBUFSIZE, cbuf, len); 1053 if (len > 0) { 1054 if (dst) { 1055 len = euctous(cbuf2, len, dst, maxdst) - dst; 1056 } 1057 else { 1058 len = euccharlen(cbuf2, len); 1059 } 1060 } 1061#ifdef USE_MALLOC_FOR_BIG_ARRAY 1062 (void)free((char *)cbuf); 1063 (void)free((char *)cbuf2); 1064#endif 1065 return len; 1066} 1067 1068int RkwCvtZen (WCHAR_T *, int, WCHAR_T *, int); 1069 1070int 1071RkwCvtZen(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen) 1072{ 1073 int len; 1074#ifndef USE_MALLOC_FOR_BIG_ARRAY 1075 unsigned char cbuf[CBUFSIZE], cbuf2[CBUFSIZE]; 1076#else 1077 unsigned char *cbuf, *cbuf2; 1078 cbuf = (unsigned char *)malloc(CBUFSIZE); 1079 cbuf2 = (unsigned char *)malloc(CBUFSIZE); 1080 if (!cbuf || !cbuf2) { 1081 if (cbuf) (void)free((char *)cbuf); 1082 if (cbuf2) (void)free((char *)cbuf2); 1083 return 0; 1084 } 1085#endif 1086 1087 len = ustoeuc(src, srclen, cbuf, CBUFSIZE) - cbuf; 1088 len = RkCvtZen(cbuf2, CBUFSIZE, cbuf, len); 1089 if (len > 0) { 1090 if (dst) { 1091 len = euctous(cbuf2, len, dst, maxdst) - dst; 1092 } 1093 else { 1094 len = euccharlen(cbuf2, len); 1095 } 1096 } 1097#ifdef USE_MALLOC_FOR_BIG_ARRAY 1098 (void)free((char *)cbuf); 1099 (void)free((char *)cbuf2); 1100#endif 1101 return len; 1102} 1103 1104int RkwCvtNone (WCHAR_T *, int, WCHAR_T *, int); 1105 1106int 1107RkwCvtNone(WCHAR_T *dst, int maxdst, WCHAR_T *src, int srclen) 1108{ 1109 int i; 1110 int len = (maxdst < srclen) ? maxdst : srclen; 1111 1112 if (dst) { 1113 for (i = 0 ; i < len ; i++) { 1114 *dst++ = *src++; 1115 } 1116 *dst = *src; 1117 } 1118 return len; 1119} 1120#endif 1121