1/* Copyright 1992 NEC Corporation, Tokyo, Japan. 2 * 3 * Permission to use, copy, modify, distribute and sell this software 4 * and its documentation for any purpose is hereby granted without 5 * fee, provided that the above copyright notice appear in all copies 6 * and that both that copyright notice and this permission notice 7 * appear in supporting documentation, and that the name of NEC 8 * Corporation not be used in advertising or publicity pertaining to 9 * distribution of the software without specific, written prior 10 * permission. NEC Corporation makes no representations about the 11 * suitability of this software for any purpose. It is provided "as 12 * is" without express or implied warranty. 13 * 14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN 16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR 17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 20 * PERFORMANCE OF THIS SOFTWARE. 21 */ 22//////////////////////////////////////////////////////////////////////// 23// This source cdde is Modified 1998 by T.Murai for kanBe. 24//////////////////////////////////////////////////////////////////////// 25 26#if !defined(lint) && !defined(__CODECENTER__) 27static char rcsid[]="@(#) 102.1 $Id: RKroma.c 10525 2004-12-23 21:23:50Z korli $"; 28#endif 29 30#include "canna.h" 31// There is Exported Symbols !! 32#include <stdlib.h> 33#include <string.h> 34#include <fcntl.h> 35#include <unistd.h> 36 37#define S2TOS(s2) (((unsigned short)(s2)[0]<<8)|(s2)[1]) 38 39#ifdef WIN 40#define JAPANESE_SORT 41#endif 42 43#ifdef JAPANESE_SORT 44 45struct romaRec { 46 unsigned char *roma; 47 unsigned char bang; 48}; 49 50static int findRoma(struct RkRxDic *rdic, struct rstat *m, unsigned char c, int n, int flg); 51static unsigned char *getKana(struct RkRxDic *rdic, int p, int flags); 52static unsigned char *getTSU(struct RkRxDic *rdic, int flags); 53static unsigned char *getTemp(struct RkRxDic *rdic, int p); 54 55int 56compar(struct romaRec *p, struct romaRec *q) 57{ 58 unsigned char *s = p->roma; 59 unsigned char *t = q->roma; 60 61 while ( *s == *t ) 62 if (*s) 63 s++, t++; 64 else 65 return 0; 66 return ((int)*s) - ((int)*t); 67} 68#endif /* JAPANESE_SORT */ 69 70#define ROMDICHEADERLEN 6 71 72struct RkRxDic * 73RkwOpenRoma(char *romaji) 74{ 75 struct RkRxDic *rdic; 76#ifdef JAPANESE_SORT 77 struct romaRec *tmp_rdic; 78#endif 79 80 rdic = (struct RkRxDic *)malloc(sizeof(struct RkRxDic)); 81 if (rdic) { 82 int dic; 83 unsigned char header[ROMDICHEADERLEN]; 84 unsigned char *s; 85 int i, sz, open_flags = O_RDONLY; 86 87#ifdef O_BINARY 88 open_flags |= O_BINARY; 89#endif 90 if ( (dic = open((char *)romaji, open_flags)) < 0 ) { 91 free(rdic); 92 return((struct RkRxDic *)0); 93 } 94/* magic no shougou */ 95 if ( read(dic, (char *)header, ROMDICHEADERLEN) != ROMDICHEADERLEN || 96 (strncmp((char *)header, "RD", 2) && 97 strncmp((char *)header, "KP", 2)) ) { 98 (void)close(dic); 99 free(rdic); 100 return((struct RkRxDic *)0); 101 } 102 if ( !strncmp((char *)header, "KP", 2) ) { 103 rdic->dic = RX_KPDIC; 104 } 105 else { 106 rdic->dic = RX_RXDIC; 107 } 108 rdic->nr_strsz = S2TOS(header + 2); 109 rdic->nr_nkey = S2TOS(header + 4); 110 if (rdic->nr_strsz > 0) { 111 rdic->nr_string = 112 (unsigned char *)malloc((unsigned int)rdic->nr_strsz); 113 114 if ( !rdic->nr_string ) { 115 (void)close(dic); 116 free(rdic); 117 return((struct RkRxDic *)0); 118 } 119 120 sz = read(dic, (char *)rdic->nr_string, rdic->nr_strsz); 121 (void)close(dic); 122 if ( sz != rdic->nr_strsz ) { 123 free(rdic->nr_string); 124 free(rdic); 125 return((struct RkRxDic *)0); 126 } 127 } 128 else { 129 rdic->nr_string = (unsigned char *)0; 130 } 131 132 if (rdic->nr_nkey > 0) { 133 rdic->nr_keyaddr = 134 (unsigned char **)calloc((unsigned)rdic->nr_nkey, 135 sizeof(unsigned char *)); 136 if ( !rdic->nr_keyaddr ) { 137 free(rdic->nr_string); 138 free(rdic); 139 return((struct RkRxDic *)0); 140 } 141 } 142 else { 143 rdic->nr_keyaddr = (unsigned char **)0; 144 } 145 146 s = rdic->nr_string; 147 148 /* �������������������������������������������� */ 149 if (rdic->dic == RX_KPDIC) { /* KPDIC ���� nr_string �������������������������������� */ 150 rdic->nr_bchars = s; 151 while (*s++) 152 /* EMPTY */ 153 ; 154 155 /* ���������������������������������������������������������������������������������������������������� */ 156 if (*rdic->nr_string && rdic->nr_nkey > 0) { 157 rdic->nr_brules = (unsigned char *)calloc((unsigned)rdic->nr_nkey, 158 sizeof(unsigned char)); 159 } 160 else { 161 rdic->nr_brules = (unsigned char *)0; 162 } 163 } 164 else { 165 rdic->nr_brules = (unsigned char *)0; 166 } 167 168 /* �������������������������������� */ 169 for ( i = 0; i < rdic->nr_nkey; i++ ) { 170 rdic->nr_keyaddr[i] = s; 171 while (*s++) 172 /* EMPTY */ 173 ; 174 while (*s++) 175 /* EMPTY */ 176 ; 177 if (rdic->dic == RX_KPDIC) { 178 while ( *s > 0x19 ) s++; 179 if (*s) { /* ���������������������������� */ 180 if (rdic->nr_brules) { 181 rdic->nr_brules[i] = (unsigned char)1; 182 } 183 *s = (unsigned char)'\0'; 184 } 185 s++; 186 } 187 } 188 189#ifdef JAPANESE_SORT 190 tmp_rdic = (struct romaRec *)calloc((unsigned)rdic->nr_nkey, 191 sizeof(struct romaRec)); 192 if (!tmp_rdic) { 193 if (rdic->nr_string) 194 free(rdic->nr_string); 195 if (rdic->nr_keyaddr) 196 free(rdic->nr_keyaddr); 197 if (rdic->nr_brules) 198 free(rdic->nr_brules); 199 free(rdic); 200 return (struct RkRxDic *)NULL; 201 } 202 203 for (i = 0; i < rdic->nr_nkey; i++) { 204 tmp_rdic[i].roma = rdic->nr_keyaddr[i]; 205 if (rdic->nr_brules) 206 tmp_rdic[i].bang = rdic->nr_brules[i]; 207 } 208 209 qsort((char *)tmp_rdic, rdic->nr_nkey, sizeof(struct romaRec), 210 (int (*) (const void *, const void *)))compar; 211 212 for (i = 0; i < rdic->nr_nkey; i++) { 213 rdic->nr_keyaddr[i] = tmp_rdic[i].roma; 214 if (rdic->nr_brules) 215 rdic->nr_brules[i] = tmp_rdic[i].bang; 216 } 217 free ((char *)tmp_rdic); 218#endif /* JAPANESE_SORT */ 219 } 220 return((struct RkRxDic *)rdic); 221} 222/* RkCloseRoma 223 * romaji henkan table wo tojiru 224 */ 225void 226RkwCloseRoma(struct RkRxDic *rdic) 227{ 228 if ( rdic ) { 229 if (rdic->nr_string) free(rdic->nr_string); 230 if (rdic->nr_keyaddr) free(rdic->nr_keyaddr); 231 if (rdic->nr_brules) free(rdic->nr_brules); 232 free(rdic); 233 }; 234} 235 236struct RkRxDic * 237RkOpenRoma(char *romaji) 238{ 239 return RkwOpenRoma(romaji); 240} 241 242void 243RkCloseRoma(struct RkRxDic *rdic) 244{ 245 RkwCloseRoma(rdic); 246} 247 248/* RkMapRoma 249 * key no sentou wo saichou itti hou ni yori,henkan suru 250 */ 251#define xkey(roma, line, n) ((roma)->nr_keyaddr[line][n]) 252 253struct rstat { 254 int start, end; /* match sury key no hanni */ 255}; 256 257static 258int 259findRoma(struct RkRxDic *rdic, struct rstat *m, unsigned char c, int n, int flg) 260{ 261 register int s, e; 262 263 if (flg && 'A' <= c && c <= 'Z') { 264 c += 'a' - 'A'; 265 } 266 for(s = m->start; s < m->end; s++) 267 if( c == xkey(rdic, s, n) ) 268 break; 269 for(e = s; e < m->end; e++) 270 if( c != xkey(rdic, e, n) ) 271 break; 272 m->start = s; 273 m->end = e; 274 return e - s; 275} 276static 277unsigned char * 278getKana(struct RkRxDic *rdic, int p, int flags) 279{ 280 register unsigned char *kana; 281 int klen; 282 static unsigned char tmp[256]; 283 284 for (kana = rdic->nr_keyaddr[p] ; *kana++ ; ) 285 /* EMPTY */ 286 ; 287 288 klen = strlen((char *)kana); 289 switch(flags&RK_XFERMASK) { 290 default: 291 (void)RkCvtNone(tmp, sizeof(tmp), kana, klen); 292 return tmp; 293 case RK_XFER: 294 (void)RkCvtHira(tmp, sizeof(tmp), kana, klen); 295 return tmp; 296 case RK_HFER: 297 (void)RkCvtHan(tmp, sizeof(tmp), kana, klen); 298 return tmp; 299 case RK_KFER: 300 (void)RkCvtKana(tmp, sizeof(tmp), kana, klen); 301 return tmp; 302 case RK_ZFER: 303 (void)RkCvtZen(tmp, sizeof(tmp), kana, klen); 304 return tmp; 305 }; 306} 307 308inline 309unsigned char * 310getRoma(struct RkRxDic *rdic, int p) 311{ 312 return rdic->nr_keyaddr[p]; 313} 314 315/*ARGSUSED*/ 316static 317unsigned char * 318getTSU(struct RkRxDic *rdic, int flags) 319{ 320 static unsigned char hira_tsu[] = {0xa4, 0xc3, 0}; 321 static unsigned char kana_tsu[] = {0xa5, 0xc3, 0}; 322 static unsigned char han_tsu[] = {0x8e, 0xaf, 0}; 323 324 switch(flags&RK_XFERMASK) { 325 default: return hira_tsu; 326 case RK_HFER: return han_tsu; 327 case RK_KFER: return kana_tsu; 328 }; 329} 330 331int 332RkMapRoma(struct RkRxDic *rdic, unsigned char *dst, int maxdst, unsigned char *src, int maxsrc, int flags, int *status) 333{ 334 register int i; 335 unsigned char *roma; 336 unsigned char *kana = src; 337 int count = 0; 338 int byte; 339 int found = 1; 340 struct rstat *m; 341 struct rstat match[256]; 342 343 if ( rdic ) { 344 m = match; 345 m->start = 0; 346 m->end = rdic->nr_nkey; 347 for (i = 0; (flags & RK_FLUSH) || i < maxsrc; i++) { 348 m[1] = m[0]; 349 m++; 350 switch((i < maxsrc) ? findRoma(rdic, m, src[i], i, 0) : 0) { 351 case 0: 352 while (--m > match && xkey(rdic, m->start, m - match)) 353 /* EMPTY */ 354 ; 355 if(m == match) { /* table ni nakatta tokino shori */ 356 kana = src; 357 count = (maxsrc <= 0)? 0 : (*src & 0x80)? 2 : 1; 358 if( (flags & RK_SOKON) && 359 (match[1].start < rdic->nr_nkey) && 360 (2 <= maxsrc) && 361 (src[0] == src[1]) && 362 (i == 1)) { 363 kana = getTSU(rdic, flags); 364 /* tsu ha jisho ni aru kao wo suru */ 365 byte = strlen((char *)kana); 366 } 367 else { 368 static unsigned char tmp[256]; 369 370 switch(flags&RK_XFERMASK) { 371 default: 372 byte = RkCvtNone(tmp, sizeof(tmp), src, count); 373 break; 374 case RK_XFER: 375 byte = RkCvtHira(tmp, sizeof(tmp), src, count); 376 break; 377 case RK_HFER: 378 byte = RkCvtHan(tmp, sizeof(tmp), src, count); 379 break; 380 case RK_KFER: 381 byte = RkCvtKana(tmp, sizeof(tmp), src, count); 382 break; 383 case RK_ZFER: 384 byte = RkCvtZen(tmp, sizeof(tmp), src, count); 385 break; 386 }; 387 kana = tmp; 388 found = -1; 389 }; 390 } 391 else { /* 'n' nado no shori: saitan no monowo toru */ 392 kana = getKana(rdic, m->start, flags); 393 byte = strlen((char *)kana); 394 count = m - match; 395 } 396 goto done; 397 case 1: /* determined uniquely */ 398 /* key no hou ga nagai baai */ 399 roma = getRoma(rdic, m->start); 400 if ( roma[i + 1] ) /* waiting suffix */ 401 continue; 402 kana = getKana(rdic, m->start, flags); 403 byte = strlen((char *)kana); 404 count = i + 1; 405 goto done; 406 }; 407 }; 408 byte = 0; 409 } 410 else 411 byte = (maxsrc <= 0) ? 0 : (*src & 0x80) ? 2 : 1; 412done: 413 *status = found*byte; 414 if ( byte + 1 <= maxdst ) { 415 if ( dst ) { 416 while ( byte-- ) 417 *dst++ = *kana++; 418 *dst = 0; 419 }; 420 }; 421 return count; 422} 423 424inline 425unsigned char * 426getrawKana(struct RkRxDic *rdic, int p) 427{ 428 register unsigned char *kana; 429 430 for (kana = rdic->nr_keyaddr[p] ; *kana++ ; ) 431 /* EMPTY */ 432 ; 433 434 return kana; 435} 436 437static 438unsigned char * 439getTemp(struct RkRxDic *rdic, int p) 440{ 441 register unsigned char *kana; 442 443 if (rdic->dic != RX_KPDIC) { 444 return (unsigned char *)0; 445 } 446 kana = rdic->nr_keyaddr[p]; 447 while (*kana++) 448 /* EMPTY */ 449 ; 450 while (*kana++) 451 /* EMPTY */ 452 ; 453 454 return kana; 455} 456 457 458int 459RkMapPhonogram(struct RkRxDic *rdic, unsigned char *dst, int maxdst, unsigned char *src, int srclen, unsigned key, int flags, int *used_len_return, int *dst_len_return, int *tmp_len_return, int *rule_id_inout) 460{ 461 register int i; 462 unsigned char *roma, *temp; 463 unsigned char *kana = src; 464 int count = 0; 465 int byte; 466 int found = 1; 467 int templen, lastrule; 468 struct rstat *m; 469 struct rstat match[256]; 470 471 if ( rdic ) { 472 if (rdic->dic == RX_KPDIC 473 && rule_id_inout && (lastrule = *rule_id_inout)) { 474 if (!key) { 475 if (rdic->nr_brules && rdic->nr_brules[lastrule] && 476 !(flags & RK_FLUSH)) { 477 /* ������������! ������������������������������������������������������������������������������������������������ 478 ���������������������������������������������������������������������������������������������������������������� 479 ���� key ���������������������������������������������������������������������������������������������������� 480 ���������������������������������������������������������������������������������������������������� */ 481 /* RK_FLUSH ������������������������������������������������������������ */ 482 byte = count = 0; 483 templen = 0; 484 found = 0; 485 goto done; 486 } 487 }else{ 488 lastrule--; 489 if (lastrule < rdic->nr_nkey && rdic->nr_brules) { 490 if (rdic->nr_brules[lastrule]) { 491 unsigned char *p; 492 493 for (p = rdic->nr_bchars ; *p ; p++) { 494 if (key == *p) { 495 unsigned char *origin = getTemp(rdic, lastrule), *ret; 496 int dstlen = 0, tmplen; 497 498 ret = dst; 499 for (i = 0 ; i < maxdst && *origin ; i++) { 500 origin++; 501 } 502 if (i + 1 == srclen) { 503 /* ���������������������������������������� */ 504 origin = rdic->nr_keyaddr[lastrule]; 505 506 for (i = 0 ; i < maxdst && *origin ; i++) { 507 *dst++ = *origin++; 508 } 509 tmplen = ++i; 510 if (i < maxdst) { 511 *dst++ = key; 512 *dst = (unsigned char)0; 513 } 514 if (used_len_return) *used_len_return = srclen; 515 if (*ret & 0x80) { /* very dependent on Japanese EUC */ 516 if (*ret == 0x8f) { 517 dstlen++; 518 } 519 dstlen++; 520 } 521 dstlen++; 522 if (dst_len_return) *dst_len_return = dstlen; 523 if (tmp_len_return) *tmp_len_return = tmplen - dstlen; 524 *rule_id_inout = 0; 525 goto return_found; 526 } 527 } 528 } 529 } 530 } 531 } 532 } 533 m = match; 534 m->start = 0; 535 m->end = rdic->nr_nkey; 536 for (i = 0; (flags & RK_FLUSH) || i < srclen; i++) { 537 m[1] = m[0]; 538 m++; 539 switch((i < srclen) ? 540 findRoma(rdic, m, src[i], i, flags & RK_IGNORECASE) : 0) { 541 case 0: 542 while (--m > match && xkey(rdic, m->start, m - match)) 543 /* EMPTY */ 544 ; 545 if(m == match) { /* ���������������������������������������������������� */ 546 count = (*src & 0x80) ? 2 : 1; 547 if (srclen < count) { 548 count = 0; 549 } 550 if( (rdic->dic == RX_RXDIC) && /* tt ������������(����������������) */ 551 (flags & RK_SOKON) && 552 (match[1].start < rdic->nr_nkey) && 553 (2 <= srclen) && 554 (src[0] == src[1]) && 555 (i == 1)) { 556 kana = getTSU(rdic, flags); 557 /* tsu ha jisho ni aru kao wo suru */ 558 byte = strlen((char *)kana); 559 templen = 0; 560 if (rule_id_inout) *rule_id_inout = 0; 561 } 562 else { /* ���������������������������������������������������� */ 563 byte = count; 564 templen = 0; 565 kana = src; 566 found = 0; 567 } 568 } 569 else { /* 'n' ��������������������: �������������������������������� */ 570 kana = getrawKana(rdic, m->start); 571 byte = strlen((char *)kana); 572 temp = getTemp(rdic, m->start); 573 templen = temp ? strlen((char *)temp) : 0; 574 count = m - match; 575 if (rule_id_inout) { 576 if (byte == 0 && templen > 0) { 577 *rule_id_inout = m->start + 1; 578 } 579 else { 580 *rule_id_inout = 0; 581 } 582 } 583 } 584 goto done; 585 case 1: /* �������������������������������������������������������� */ 586 /* key no hou ga nagai baai */ 587 roma = getRoma(rdic, m->start); 588 if ( roma[i + 1] ) /* waiting suffix */ 589 continue; 590 kana = getrawKana(rdic, m->start); 591 byte = strlen((char *)kana); 592 temp = getTemp(rdic, m->start); 593 templen = temp ? strlen((char *)temp) : 0; 594 count = i + 1; 595 if (rule_id_inout) { 596 if (byte == 0 && templen > 0) { 597 *rule_id_inout = m->start + 1; 598 } 599 else { 600 *rule_id_inout = 0; 601 } 602 } 603 goto done; 604 } 605 } 606 byte = count = 0; 607 templen = 0; 608 } 609 else { 610 byte = (*src & 0x80) ? 2 : 1; 611 if (srclen < byte) { 612 byte = 0; 613 } 614 count = byte; 615 kana = src; 616 templen = 0; 617 found = 0; 618 } 619 done: 620 621 if (dst_len_return) { 622 *dst_len_return = byte; 623 } 624 if (used_len_return) { 625 *used_len_return = count; 626 } 627 if (tmp_len_return) { 628 *tmp_len_return = templen; 629 } 630 if ( byte < maxdst ) { 631 if ( dst ) { 632 int ii; 633 for (ii = 0 ; ii < byte ; ii++) 634 *dst++ = *kana++; 635 *dst = 0; 636 } 637 if (byte + templen < maxdst) { 638 if (dst) { 639 while (templen--) { 640 *dst++ = *temp++; 641 } 642 *dst = 0; 643 } 644 } 645 } 646 return_found: 647 return found; 648} 649 650/* RkCvtRoma 651 */ 652int 653RkCvtRoma(struct RkRxDic *rdic, unsigned char *dst, int maxdst, unsigned char *src, int maxsrc, unsigned flags) 654{ 655 register unsigned char *d = dst; 656 register unsigned char *s = src; 657 register unsigned char *S = src + maxsrc; 658 int count = 0; 659 unsigned xp = 0; 660 unsigned char key; 661#ifndef WIN 662 unsigned char xxxx[64], yyyy[64]; 663#else 664 unsigned char *xxxx, *yyyy; 665 xxxx = (unsigned char *)malloc(64); 666 yyyy = (unsigned char *)malloc(64); 667 if (!xxxx || !yyyy) { 668 if (xxxx) { 669 free(xxxx); 670 } 671 if (yyyy) { 672 free(yyyy); 673 } 674 return count; 675 } 676#endif 677 678 if (!(maxdst <= 0 || maxsrc < 0)) { 679 while ( s < S ) { 680 int ulen, dlen, tlen, rule = 0; 681 unsigned dontflush = RK_FLUSH; 682 683 key = xxxx[xp++] = *s++; 684 flush: 685 do { 686 RkMapPhonogram(rdic, d, maxdst, xxxx, xp, (unsigned)key, 687 flags & ~dontflush, &ulen, &dlen, &tlen, &rule); 688 689 if ( dlen + 1 <= maxdst ) { 690 maxdst -= dlen; count += dlen; 691 if ( dst ) { 692 d += dlen; 693 (void)strncpy((char *)yyyy, (char *)d, tlen); 694 } 695 } 696 697 if (ulen < (int)xp) { 698 strncpy((char *)yyyy + tlen, (char *)xxxx + ulen, xp - ulen); 699 } 700 strncpy((char *)xxxx, (char *)yyyy, tlen + xp - ulen); 701 xp = tlen + xp - ulen; 702 key = 0; 703 } while (ulen > 0); 704 if (s == S && dontflush) { 705 dontflush = 0; 706 goto flush; 707 } 708 } 709 } 710#ifdef WIN 711 free(yyyy); 712 free(xxxx); 713#endif 714 return count; 715} 716