1/* mbutil.c -- readline multibyte character utility functions */
2
3/* Copyright (C) 2001 Free Software Foundation, Inc.
4
5   This file is part of the GNU Readline Library, a library for
6   reading lines of text with interactive input and history editing.
7
8   The GNU Readline Library is free software; you can redistribute it
9   and/or modify it under the terms of the GNU General Public License
10   as published by the Free Software Foundation; either version 2, or
11   (at your option) any later version.
12
13   The GNU Readline Library is distributed in the hope that it will be
14   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   The GNU General Public License is often shipped with GNU software, and
19   is generally kept in a file called COPYING or LICENSE.  If you do not
20   have a copy of the license, write to the Free Software Foundation,
21   59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22#define READLINE_LIBRARY
23
24#if defined (HAVE_CONFIG_H)
25#  include <config.h>
26#endif
27
28#include <sys/types.h>
29#include <fcntl.h>
30#include "posixjmp.h"
31
32#if defined (HAVE_UNISTD_H)
33#  include <unistd.h>	   /* for _POSIX_VERSION */
34#endif /* HAVE_UNISTD_H */
35
36#if defined (HAVE_STDLIB_H)
37#  include <stdlib.h>
38#else
39#  include "ansi_stdlib.h"
40#endif /* HAVE_STDLIB_H */
41
42#include <stdio.h>
43#include <ctype.h>
44
45/* System-specific feature definitions and include files. */
46#include "rldefs.h"
47#include "rlmbutil.h"
48
49#if defined (TIOCSTAT_IN_SYS_IOCTL)
50#  include <sys/ioctl.h>
51#endif /* TIOCSTAT_IN_SYS_IOCTL */
52
53/* Some standard library routines. */
54#include "readline.h"
55
56#include "rlprivate.h"
57#include "xmalloc.h"
58
59/* Declared here so it can be shared between the readline and history
60   libraries. */
61#if defined (HANDLE_MULTIBYTE)
62int rl_byte_oriented = 0;
63#else
64int rl_byte_oriented = 1;
65#endif
66
67/* **************************************************************** */
68/*								    */
69/*		Multibyte Character Utility Functions		    */
70/*								    */
71/* **************************************************************** */
72
73#if defined(HANDLE_MULTIBYTE)
74
75static int
76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77     char *string;
78     int seed, count, find_non_zero;
79{
80  size_t tmp = 0;
81  mbstate_t ps;
82  int point = 0;
83  wchar_t wc;
84
85  memset(&ps, 0, sizeof (mbstate_t));
86  if (seed < 0)
87    seed = 0;
88  if (count <= 0)
89    return seed;
90
91  point = seed + _rl_adjust_point(string, seed, &ps);
92  /* if this is true, means that seed was not pointed character
93     started byte.  So correct the point and consume count */
94  if (seed < point)
95    count --;
96
97  while (count > 0)
98    {
99      tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
100      if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
101	{
102	  /* invalid bytes. asume a byte represents a character */
103	  point++;
104	  count--;
105	  /* reset states. */
106	  memset(&ps, 0, sizeof(mbstate_t));
107	}
108      else if (tmp == (size_t)0)
109	/* found '\0' char */
110	break;
111      else
112	{
113	  /* valid bytes */
114	  point += tmp;
115	  if (find_non_zero)
116	    {
117	      if (wcwidth (wc) == 0)
118		continue;
119	      else
120		count--;
121	    }
122	  else
123	    count--;
124	}
125    }
126
127  if (find_non_zero)
128    {
129      tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
130      while (wcwidth (wc) == 0)
131	{
132	  point += tmp;
133	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
134	  if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2))
135	    break;
136	}
137    }
138    return point;
139}
140
141static int
142_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
143     char *string;
144     int seed, find_non_zero;
145{
146  mbstate_t ps;
147  int prev, non_zero_prev, point, length;
148  size_t tmp;
149  wchar_t wc;
150
151  memset(&ps, 0, sizeof(mbstate_t));
152  length = strlen(string);
153
154  if (seed < 0)
155    return 0;
156  else if (length < seed)
157    return length;
158
159  prev = non_zero_prev = point = 0;
160  while (point < seed)
161    {
162      tmp = mbrtowc (&wc, string + point, length - point, &ps);
163      if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
164	{
165	  /* in this case, bytes are invalid or shorted to compose
166	     multibyte char, so assume that the first byte represents
167	     a single character anyway. */
168	  tmp = 1;
169	  /* clear the state of the byte sequence, because
170	     in this case effect of mbstate is undefined  */
171	  memset(&ps, 0, sizeof (mbstate_t));
172	}
173      else if (tmp == 0)
174	break;			/* Found '\0' char.  Can this happen? */
175      else
176	{
177	  if (find_non_zero)
178	    {
179	      if (wcwidth (wc) != 0)
180		prev = point;
181	    }
182	  else
183	    prev = point;
184	}
185
186      point += tmp;
187    }
188
189  return prev;
190}
191
192/* return the number of bytes parsed from the multibyte sequence starting
193   at src, if a non-L'\0' wide character was recognized. It returns 0,
194   if a L'\0' wide character was recognized. It  returns (size_t)(-1),
195   if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
196   if it couldn't parse a complete  multibyte character.  */
197int
198_rl_get_char_len (src, ps)
199     char *src;
200     mbstate_t *ps;
201{
202  size_t tmp;
203
204  tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
205  if (tmp == (size_t)(-2))
206    {
207      /* shorted to compose multibyte char */
208      if (ps)
209	memset (ps, 0, sizeof(mbstate_t));
210      return -2;
211    }
212  else if (tmp == (size_t)(-1))
213    {
214      /* invalid to compose multibyte char */
215      /* initialize the conversion state */
216      if (ps)
217	memset (ps, 0, sizeof(mbstate_t));
218      return -1;
219    }
220  else if (tmp == (size_t)0)
221    return 0;
222  else
223    return (int)tmp;
224}
225
226/* compare the specified two characters. If the characters matched,
227   return 1. Otherwise return 0. */
228int
229_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
230     char *buf1;
231     int pos1;
232     mbstate_t *ps1;
233     char *buf2;
234     int pos2;
235     mbstate_t *ps2;
236{
237  int i, w1, w2;
238
239  if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
240	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
241	(w1 != w2) ||
242	(buf1[pos1] != buf2[pos2]))
243    return 0;
244
245  for (i = 1; i < w1; i++)
246    if (buf1[pos1+i] != buf2[pos2+i])
247      return 0;
248
249  return 1;
250}
251
252/* adjust pointed byte and find mbstate of the point of string.
253   adjusted point will be point <= adjusted_point, and returns
254   differences of the byte(adjusted_point - point).
255   if point is invalied (point < 0 || more than string length),
256   it returns -1 */
257int
258_rl_adjust_point(string, point, ps)
259     char *string;
260     int point;
261     mbstate_t *ps;
262{
263  size_t tmp = 0;
264  int length;
265  int pos = 0;
266
267  length = strlen(string);
268  if (point < 0)
269    return -1;
270  if (length < point)
271    return -1;
272
273  while (pos < point)
274    {
275      tmp = mbrlen (string + pos, length - pos, ps);
276      if((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
277	{
278	  /* in this case, bytes are invalid or shorted to compose
279	     multibyte char, so assume that the first byte represents
280	     a single character anyway. */
281	  pos++;
282	  /* clear the state of the byte sequence, because
283	     in this case effect of mbstate is undefined  */
284	  if (ps)
285	    memset (ps, 0, sizeof (mbstate_t));
286	}
287      else if (tmp == 0)
288	pos++;
289      else
290	pos += tmp;
291    }
292
293  return (pos - point);
294}
295
296int
297_rl_is_mbchar_matched (string, seed, end, mbchar, length)
298     char *string;
299     int seed, end;
300     char *mbchar;
301     int length;
302{
303  int i;
304
305  if ((end - seed) < length)
306    return 0;
307
308  for (i = 0; i < length; i++)
309    if (string[seed + i] != mbchar[i])
310      return 0;
311  return 1;
312}
313#endif /* HANDLE_MULTIBYTE */
314
315/* Find next `count' characters started byte point of the specified seed.
316   If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
317   characters. */
318#undef _rl_find_next_mbchar
319int
320_rl_find_next_mbchar (string, seed, count, flags)
321     char *string;
322     int seed, count, flags;
323{
324#if defined (HANDLE_MULTIBYTE)
325  return _rl_find_next_mbchar_internal (string, seed, count, flags);
326#else
327  return (seed + count);
328#endif
329}
330
331/* Find previous character started byte point of the specified seed.
332   Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
333   we look for non-zero-width multibyte characters. */
334#undef _rl_find_prev_mbchar
335int
336_rl_find_prev_mbchar (string, seed, flags)
337     char *string;
338     int seed, flags;
339{
340#if defined (HANDLE_MULTIBYTE)
341  return _rl_find_prev_mbchar_internal (string, seed, flags);
342#else
343  return ((seed == 0) ? seed : seed - 1);
344#endif
345}
346