1/* Copyright 1992 NEC Corporation, Tokyo, Japan.
2 *
3 * Permission to use, copy, modify, distribute and sell this software
4 * and its documentation for any purpose is hereby granted without
5 * fee, provided that the above copyright notice appear in all copies
6 * and that both that copyright notice and this permission notice
7 * appear in supporting documentation, and that the name of NEC
8 * Corporation not be used in advertising or publicity pertaining to
9 * distribution of the software without specific, written prior
10 * permission.  NEC Corporation makes no representations about the
11 * suitability of this software for any purpose.  It is provided "as
12 * is" without express or implied warranty.
13 *
14 * NEC CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
16 * NO EVENT SHALL NEC CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
20 * PERFORMANCE OF THIS SOFTWARE.
21 */
22////////////////////////////////////////////////////////////////////////
23// This source cdde is Modified 1998 by T.Murai for kanBe.
24////////////////////////////////////////////////////////////////////////
25
26#if !defined(lint) && !defined(__CODECENTER__)
27static char rcsid[]="@(#) 102.1 $Id: RKroma.c 10525 2004-12-23 21:23:50Z korli $";
28#endif
29
30#include "canna.h"
31// There is Exported Symbols !!
32#include <stdlib.h>
33#include <string.h>
34#include <fcntl.h>
35#include <unistd.h>
36
37#define S2TOS(s2)	(((unsigned short)(s2)[0]<<8)|(s2)[1])
38
39#ifdef WIN
40#define JAPANESE_SORT
41#endif
42
43#ifdef JAPANESE_SORT
44
45struct romaRec {
46  unsigned char *roma;
47  unsigned char bang;
48};
49
50static int findRoma(struct RkRxDic *rdic, struct rstat *m, unsigned char c, int n, int flg);
51static unsigned char *getKana(struct RkRxDic *rdic, int p, int flags);
52static unsigned char *getTSU(struct RkRxDic *rdic, int flags);
53static unsigned char *getTemp(struct RkRxDic *rdic, int p);
54
55int
56compar(struct romaRec *p, struct romaRec *q)
57{
58  unsigned char	*s = p->roma;
59  unsigned char	*t = q->roma;
60
61  while ( *s == *t )
62    if (*s)
63      s++, t++;
64    else
65      return 0;
66  return ((int)*s) - ((int)*t);
67}
68#endif /* JAPANESE_SORT */
69
70#define ROMDICHEADERLEN 6
71
72struct RkRxDic *
73RkwOpenRoma(char *romaji)
74{
75    struct RkRxDic	*rdic;
76#ifdef JAPANESE_SORT
77    struct romaRec *tmp_rdic;
78#endif
79
80    rdic = (struct RkRxDic *)malloc(sizeof(struct RkRxDic));
81    if (rdic) {
82	int	dic;
83	unsigned char	header[ROMDICHEADERLEN];
84	unsigned char	*s;
85	int	i, sz, open_flags = O_RDONLY;
86
87#ifdef O_BINARY
88	open_flags |= O_BINARY;
89#endif
90	if ( (dic = open((char *)romaji, open_flags)) < 0 ) {
91		free(rdic);
92		return((struct RkRxDic *)0);
93	}
94/* magic no shougou */
95	if ( read(dic, (char *)header, ROMDICHEADERLEN) != ROMDICHEADERLEN ||
96	     (strncmp((char *)header, "RD", 2) &&
97	      strncmp((char *)header, "KP", 2)) ) {
98		(void)close(dic);
99		free(rdic);
100		return((struct RkRxDic *)0);
101	}
102	if ( !strncmp((char *)header, "KP", 2) ) {
103	  rdic->dic = RX_KPDIC;
104	}
105	else {
106	  rdic->dic = RX_RXDIC;
107	}
108	rdic->nr_strsz = S2TOS(header + 2);
109	rdic->nr_nkey  = S2TOS(header + 4);
110	if (rdic->nr_strsz > 0) {
111	  rdic->nr_string =
112	    (unsigned char *)malloc((unsigned int)rdic->nr_strsz);
113
114	  if ( !rdic->nr_string ) {
115		(void)close(dic);
116		free(rdic);
117		return((struct RkRxDic *)0);
118	  }
119
120	  sz = read(dic, (char *)rdic->nr_string, rdic->nr_strsz);
121	  (void)close(dic);
122	  if ( sz != rdic->nr_strsz ) {
123	    free(rdic->nr_string);
124	    free(rdic);
125	    return((struct RkRxDic *)0);
126	  }
127	}
128	else {
129	  rdic->nr_string = (unsigned char *)0;
130	}
131
132	if (rdic->nr_nkey > 0) {
133	  rdic->nr_keyaddr =
134	    (unsigned char **)calloc((unsigned)rdic->nr_nkey,
135				     sizeof(unsigned char *));
136	  if ( !rdic->nr_keyaddr ) {
137	    free(rdic->nr_string);
138	    free(rdic);
139	    return((struct RkRxDic *)0);
140	  }
141	}
142	else {
143	  rdic->nr_keyaddr = (unsigned char **)0;
144	}
145
146	s = rdic->nr_string;
147
148	/* �������������������������������������������� */
149	if (rdic->dic == RX_KPDIC) { /* KPDIC ���� nr_string �������������������������������� */
150	  rdic->nr_bchars = s;
151	  while (*s++)
152	    /* EMPTY */
153	    ;
154
155	  /* ���������������������������������������������������������������������������������������������������� */
156	  if (*rdic->nr_string && rdic->nr_nkey > 0) {
157	    rdic->nr_brules = (unsigned char *)calloc((unsigned)rdic->nr_nkey,
158                                                       sizeof(unsigned char));
159	  }
160	  else {
161	    rdic->nr_brules = (unsigned char *)0;
162	  }
163	}
164	else {
165	  rdic->nr_brules = (unsigned char *)0;
166	}
167
168	/* �������������������������������� */
169	for ( i = 0; i < rdic->nr_nkey; i++ ) {
170	    rdic->nr_keyaddr[i] = s;
171	    while (*s++)
172	      /* EMPTY */
173	      ;
174	    while (*s++)
175	      /* EMPTY */
176	      ;
177	    if (rdic->dic == RX_KPDIC) {
178	      while ( *s > 0x19 ) s++;
179	      if (*s) { /* ���������������������������� */
180		if (rdic->nr_brules) {
181		  rdic->nr_brules[i] = (unsigned char)1;
182		}
183		*s = (unsigned char)'\0';
184	      }
185	      s++;
186	    }
187	}
188
189#ifdef JAPANESE_SORT
190	tmp_rdic = (struct romaRec *)calloc((unsigned)rdic->nr_nkey,
191                                              sizeof(struct romaRec));
192	if (!tmp_rdic) {
193          if (rdic->nr_string)
194            free(rdic->nr_string);
195          if (rdic->nr_keyaddr)
196            free(rdic->nr_keyaddr);
197	  if (rdic->nr_brules)
198            free(rdic->nr_brules);
199	  free(rdic);
200	  return (struct RkRxDic *)NULL;
201	}
202
203        for (i = 0; i < rdic->nr_nkey; i++) {
204	  tmp_rdic[i].roma = rdic->nr_keyaddr[i];
205	  if (rdic->nr_brules)
206	    tmp_rdic[i].bang = rdic->nr_brules[i];
207	}
208
209        qsort((char *)tmp_rdic, rdic->nr_nkey, sizeof(struct romaRec),
210                (int (*) (const void *, const void *)))compar;
211
212        for (i = 0; i < rdic->nr_nkey; i++) {
213	  rdic->nr_keyaddr[i] = tmp_rdic[i].roma;
214	  if (rdic->nr_brules)
215	    rdic->nr_brules[i]  = tmp_rdic[i].bang;
216	}
217	free ((char *)tmp_rdic);
218#endif /* JAPANESE_SORT */
219    }
220    return((struct RkRxDic *)rdic);
221}
222/* RkCloseRoma
223 *	romaji henkan table wo tojiru
224 */
225void
226RkwCloseRoma(struct RkRxDic *rdic)
227{
228    if ( rdic ) {
229        if (rdic->nr_string) free(rdic->nr_string);
230        if (rdic->nr_keyaddr) free(rdic->nr_keyaddr);
231	if (rdic->nr_brules) free(rdic->nr_brules);
232	free(rdic);
233    };
234}
235
236struct RkRxDic *
237RkOpenRoma(char *romaji)
238{
239  return RkwOpenRoma(romaji);
240}
241
242void
243RkCloseRoma(struct RkRxDic *rdic)
244{
245  RkwCloseRoma(rdic);
246}
247
248/* RkMapRoma
249 *	key no sentou wo saichou itti hou ni yori,henkan suru
250 */
251#define	xkey(roma, line, n) 	((roma)->nr_keyaddr[line][n])
252
253struct rstat {
254    int	start, end;	/* match sury key no hanni */
255};
256
257static
258int
259findRoma(struct RkRxDic *rdic, struct rstat *m, unsigned char c, int n, int flg)
260{
261    register int	s, e;
262
263    if (flg && 'A' <= c && c <= 'Z') {
264      c += 'a' - 'A';
265    }
266    for(s = m->start; s < m->end; s++)
267	if( c == xkey(rdic, s, n) )
268	    break;
269    for(e = s; e < m->end; e++)
270	if( c != xkey(rdic, e, n) )
271	    break;
272    m->start	= s;
273    m->end	= e;
274    return e - s;
275}
276static
277unsigned char	*
278getKana(struct RkRxDic *rdic, int p, int flags)
279{
280    register unsigned char	*kana;
281    int				klen;
282    static unsigned  char	tmp[256];
283
284    for (kana = rdic->nr_keyaddr[p] ; *kana++ ; )
285      /* EMPTY */
286      ;
287
288    klen = strlen((char *)kana);
289    switch(flags&RK_XFERMASK) {
290    default:
291	(void)RkCvtNone(tmp, sizeof(tmp), kana, klen);
292	return tmp;
293    case RK_XFER:
294	(void)RkCvtHira(tmp, sizeof(tmp), kana, klen);
295	return tmp;
296    case RK_HFER:
297	(void)RkCvtHan(tmp, sizeof(tmp), kana, klen);
298	return tmp;
299    case RK_KFER:
300	(void)RkCvtKana(tmp, sizeof(tmp), kana, klen);
301	return tmp;
302    case RK_ZFER:
303	(void)RkCvtZen(tmp, sizeof(tmp), kana, klen);
304	return tmp;
305    };
306}
307
308inline
309unsigned char	*
310getRoma(struct RkRxDic *rdic, int p)
311{
312    return rdic->nr_keyaddr[p];
313}
314
315/*ARGSUSED*/
316static
317unsigned char	*
318getTSU(struct RkRxDic *rdic, int flags)
319{
320    static unsigned char  hira_tsu[] = {0xa4, 0xc3, 0};
321    static unsigned char  kana_tsu[] = {0xa5, 0xc3, 0};
322    static unsigned char  han_tsu[] =  {0x8e, 0xaf, 0};
323
324    switch(flags&RK_XFERMASK) {
325    default:	  return hira_tsu;
326    case RK_HFER: return han_tsu;
327    case RK_KFER: return kana_tsu;
328    };
329}
330
331int
332RkMapRoma(struct RkRxDic *rdic, unsigned char *dst, int maxdst, unsigned char *src, int maxsrc, int flags, int *status)
333{
334    register int	i;
335    unsigned char	*roma;
336    unsigned char	*kana = src;
337    int			count = 0;
338    int			byte;
339    int			found = 1;
340    struct rstat *m;
341    struct rstat match[256];
342
343    if ( rdic ) {
344	m = match;
345	m->start = 0;
346	m->end = rdic->nr_nkey;
347	for (i = 0; (flags & RK_FLUSH) || i < maxsrc;  i++) {
348	    m[1] = m[0];
349	    m++;
350	    switch((i < maxsrc) ? findRoma(rdic, m, src[i], i, 0) : 0) {
351	    case	0:
352		while (--m > match && xkey(rdic, m->start, m - match))
353		  /* EMPTY */
354		  ;
355		if(m == match) { /* table ni nakatta tokino shori */
356		    kana = src;
357		    count = (maxsrc <= 0)? 0 : (*src & 0x80)? 2 : 1;
358		    if( (flags & RK_SOKON) &&
359			(match[1].start < rdic->nr_nkey) &&
360			(2 <= maxsrc) &&
361			(src[0] == src[1]) &&
362			(i == 1)) {
363			kana = getTSU(rdic, flags);
364		    /* tsu ha jisho ni aru kao wo suru */
365			byte = strlen((char *)kana);
366		    }
367		    else {
368			static unsigned char	tmp[256];
369
370			switch(flags&RK_XFERMASK) {
371			default:
372			    byte = RkCvtNone(tmp, sizeof(tmp), src, count);
373			    break;
374			case RK_XFER:
375			    byte = RkCvtHira(tmp, sizeof(tmp), src, count);
376			    break;
377			case RK_HFER:
378			    byte = RkCvtHan(tmp, sizeof(tmp), src, count);
379			    break;
380			case RK_KFER:
381			    byte = RkCvtKana(tmp, sizeof(tmp), src, count);
382			    break;
383			case RK_ZFER:
384			    byte = RkCvtZen(tmp, sizeof(tmp), src, count);
385			    break;
386			};
387			kana = tmp;
388			found = -1;
389		    };
390		}
391		else {  /* 'n' nado no shori: saitan no monowo toru */
392		    kana = getKana(rdic, m->start, flags);
393		    byte = strlen((char *)kana);
394		    count = m - match;
395		}
396		goto done;
397	    case	1:	/* determined uniquely */
398	    /* key no hou ga nagai baai */
399		roma = getRoma(rdic, m->start);
400		if ( roma[i + 1] ) 	/* waiting suffix */
401		    continue;
402		kana = getKana(rdic, m->start, flags);
403		byte = strlen((char *)kana);
404		count = i + 1;
405		goto done;
406	    };
407	};
408	byte = 0;
409    }
410    else
411	byte = (maxsrc <= 0) ? 0 : (*src & 0x80) ? 2 : 1;
412done:
413    *status = found*byte;
414    if ( byte + 1 <= maxdst ) {
415	if ( dst ) {
416	    while ( byte-- )
417		*dst++ = *kana++;
418	    *dst = 0;
419	};
420    };
421    return count;
422}
423
424inline
425unsigned char	*
426getrawKana(struct RkRxDic *rdic, int p)
427{
428  register unsigned char	*kana;
429
430  for (kana = rdic->nr_keyaddr[p] ; *kana++ ; )
431    /* EMPTY */
432    ;
433
434  return kana;
435}
436
437static
438unsigned char	*
439getTemp(struct RkRxDic *rdic, int p)
440{
441  register unsigned char	*kana;
442
443  if (rdic->dic != RX_KPDIC) {
444    return (unsigned char *)0;
445  }
446  kana = rdic->nr_keyaddr[p];
447  while (*kana++)
448    /* EMPTY */
449    ;
450  while (*kana++)
451    /* EMPTY */
452    ;
453
454  return kana;
455}
456
457
458int
459RkMapPhonogram(struct RkRxDic *rdic, unsigned char *dst, int maxdst, unsigned char *src, int srclen, unsigned key, int flags, int *used_len_return, int *dst_len_return, int *tmp_len_return, int *rule_id_inout)
460{
461  register int	i;
462  unsigned char	*roma, *temp;
463  unsigned char	*kana = src;
464  int			count = 0;
465  int			byte;
466  int			found = 1;
467  int templen, lastrule;
468  struct rstat *m;
469  struct rstat match[256];
470
471  if ( rdic ) {
472    if (rdic->dic == RX_KPDIC
473	&& rule_id_inout && (lastrule = *rule_id_inout)) {
474      if (!key) {
475	if (rdic->nr_brules && rdic->nr_brules[lastrule] &&
476	    !(flags & RK_FLUSH)) {
477	  /* ������������! ������������������������������������������������������������������������������������������������
478             ����������������������������������������������������������������������������������������������������������������
479             ���� key ����������������������������������������������������������������������������������������������������
480             ���������������������������������������������������������������������������������������������������� */
481	  /* RK_FLUSH ������������������������������������������������������������ */
482	  byte = count = 0;
483	  templen = 0;
484	  found = 0;
485	  goto done;
486	}
487      }else{
488	lastrule--;
489	if (lastrule < rdic->nr_nkey && rdic->nr_brules) {
490	  if (rdic->nr_brules[lastrule]) {
491	    unsigned char *p;
492
493	    for (p = rdic->nr_bchars ; *p ; p++) {
494	      if (key == *p) {
495		unsigned char *origin = getTemp(rdic, lastrule), *ret;
496		int dstlen = 0, tmplen;
497
498		ret = dst;
499		for (i = 0 ; i < maxdst && *origin ; i++) {
500		  origin++;
501		}
502		if (i + 1 == srclen) {
503		  /* ���������������������������������������� */
504		  origin = rdic->nr_keyaddr[lastrule];
505
506		  for (i = 0 ; i < maxdst && *origin ; i++) {
507		    *dst++ = *origin++;
508		  }
509		  tmplen = ++i;
510		  if (i < maxdst) {
511		    *dst++ = key;
512		    *dst = (unsigned char)0;
513		  }
514		  if (used_len_return) *used_len_return = srclen;
515		  if (*ret & 0x80) { /* very dependent on Japanese EUC */
516		    if (*ret == 0x8f) {
517		      dstlen++;
518		    }
519		    dstlen++;
520		  }
521		  dstlen++;
522		  if (dst_len_return) *dst_len_return = dstlen;
523		  if (tmp_len_return) *tmp_len_return = tmplen - dstlen;
524		  *rule_id_inout = 0;
525		  goto return_found;
526		}
527	      }
528	    }
529	  }
530	}
531      }
532    }
533    m = match;
534    m->start = 0;
535    m->end = rdic->nr_nkey;
536    for (i = 0; (flags & RK_FLUSH) || i < srclen;  i++) {
537      m[1] = m[0];
538      m++;
539      switch((i < srclen) ?
540	     findRoma(rdic, m, src[i], i, flags & RK_IGNORECASE) : 0) {
541      case	0:
542	while (--m > match && xkey(rdic, m->start, m - match))
543	  /* EMPTY */
544	  ;
545	if(m == match) { /* ���������������������������������������������������� */
546	  count = (*src & 0x80) ? 2 : 1;
547	  if (srclen < count) {
548	    count = 0;
549	  }
550	  if( (rdic->dic == RX_RXDIC) && /* tt ������������(����������������) */
551	     (flags & RK_SOKON) &&
552	     (match[1].start < rdic->nr_nkey) &&
553	     (2 <= srclen) &&
554	     (src[0] == src[1]) &&
555	     (i == 1)) {
556	    kana = getTSU(rdic, flags);
557	    /* tsu ha jisho ni aru kao wo suru */
558	    byte = strlen((char *)kana);
559	    templen = 0;
560	    if (rule_id_inout) *rule_id_inout = 0;
561	  }
562	  else { /* ���������������������������������������������������� */
563	    byte = count;
564	    templen = 0;
565	    kana = src;
566	    found = 0;
567	  }
568	}
569	else {  /* 'n' ��������������������: �������������������������������� */
570	  kana = getrawKana(rdic, m->start);
571	  byte = strlen((char *)kana);
572	  temp = getTemp(rdic, m->start);
573	  templen = temp ? strlen((char *)temp) : 0;
574	  count = m - match;
575	  if (rule_id_inout) {
576	    if (byte == 0 && templen > 0) {
577	      *rule_id_inout = m->start + 1;
578	    }
579	    else {
580	      *rule_id_inout = 0;
581	    }
582	  }
583	}
584	goto done;
585      case	1: /* �������������������������������������������������������� */
586	/* key no hou ga nagai baai */
587	roma = getRoma(rdic, m->start);
588	if ( roma[i + 1] ) 	/* waiting suffix */
589	  continue;
590	kana = getrawKana(rdic, m->start);
591	byte = strlen((char *)kana);
592	temp = getTemp(rdic, m->start);
593	templen = temp ? strlen((char *)temp) : 0;
594	count = i + 1;
595	if (rule_id_inout) {
596	  if (byte == 0 && templen > 0) {
597	    *rule_id_inout = m->start + 1;
598	  }
599	  else {
600	    *rule_id_inout = 0;
601	  }
602	}
603	goto done;
604      }
605    }
606    byte = count = 0;
607    templen = 0;
608  }
609  else {
610    byte = (*src & 0x80) ? 2 : 1;
611    if (srclen < byte) {
612      byte = 0;
613    }
614    count = byte;
615    kana = src;
616    templen = 0;
617    found = 0;
618  }
619 done:
620
621  if (dst_len_return) {
622    *dst_len_return = byte;
623  }
624  if (used_len_return) {
625    *used_len_return = count;
626  }
627  if (tmp_len_return) {
628    *tmp_len_return = templen;
629  }
630  if ( byte < maxdst ) {
631    if ( dst ) {
632      int ii;
633      for (ii = 0 ; ii < byte ; ii++)
634	*dst++ = *kana++;
635      *dst = 0;
636    }
637    if (byte + templen < maxdst) {
638      if (dst) {
639	while (templen--) {
640	  *dst++ = *temp++;
641	}
642	*dst = 0;
643      }
644    }
645  }
646 return_found:
647  return found;
648}
649
650/* RkCvtRoma
651 */
652int
653RkCvtRoma(struct RkRxDic *rdic, unsigned char *dst, int maxdst, unsigned char *src, int maxsrc, unsigned flags)
654{
655    register unsigned char	*d = dst;
656    register unsigned char	*s = src;
657    register unsigned char	*S = src + maxsrc;
658    int count = 0;
659    unsigned xp = 0;
660    unsigned char key;
661#ifndef WIN
662    unsigned char xxxx[64], yyyy[64];
663#else
664    unsigned char *xxxx, *yyyy;
665    xxxx = (unsigned char *)malloc(64);
666    yyyy = (unsigned char *)malloc(64);
667    if (!xxxx || !yyyy) {
668      if (xxxx) {
669	free(xxxx);
670      }
671      if (yyyy) {
672	free(yyyy);
673      }
674      return count;
675    }
676#endif
677
678    if (!(maxdst <= 0 || maxsrc < 0)) {
679      while ( s < S ) {
680	int ulen, dlen, tlen, rule = 0;
681	unsigned dontflush = RK_FLUSH;
682
683	key = xxxx[xp++] = *s++;
684      flush:
685	do {
686	  RkMapPhonogram(rdic, d, maxdst, xxxx, xp, (unsigned)key,
687			 flags & ~dontflush, &ulen, &dlen, &tlen, &rule);
688
689	  if ( dlen + 1 <= maxdst ) {
690	    maxdst -= dlen; count += dlen;
691	    if ( dst ) {
692	      d += dlen;
693	      (void)strncpy((char *)yyyy, (char *)d, tlen);
694	    }
695	  }
696
697	  if (ulen < (int)xp) {
698	    strncpy((char *)yyyy + tlen, (char *)xxxx + ulen, xp - ulen);
699	  }
700	  strncpy((char *)xxxx, (char *)yyyy, tlen + xp - ulen);
701	  xp = tlen + xp - ulen;
702	  key = 0;
703	} while (ulen > 0);
704	if (s == S && dontflush) {
705	  dontflush = 0;
706	  goto flush;
707	}
708      }
709    }
710#ifdef WIN
711    free(yyyy);
712    free(xxxx);
713#endif
714    return count;
715}
716