1119610Sache/* mbutil.c -- readline multibyte character utility functions */
2119610Sache
3157184Sache/* Copyright (C) 2001-2005 Free Software Foundation, Inc.
4119610Sache
5119610Sache   This file is part of the GNU Readline Library, a library for
6119610Sache   reading lines of text with interactive input and history editing.
7119610Sache
8119610Sache   The GNU Readline Library is free software; you can redistribute it
9119610Sache   and/or modify it under the terms of the GNU General Public License
10119610Sache   as published by the Free Software Foundation; either version 2, or
11119610Sache   (at your option) any later version.
12119610Sache
13119610Sache   The GNU Readline Library is distributed in the hope that it will be
14119610Sache   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15119610Sache   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16119610Sache   GNU General Public License for more details.
17119610Sache
18119610Sache   The GNU General Public License is often shipped with GNU software, and
19119610Sache   is generally kept in a file called COPYING or LICENSE.  If you do not
20119610Sache   have a copy of the license, write to the Free Software Foundation,
21119610Sache   59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22119610Sache#define READLINE_LIBRARY
23119610Sache
24119610Sache#if defined (HAVE_CONFIG_H)
25119610Sache#  include <config.h>
26119610Sache#endif
27119610Sache
28119610Sache#include <sys/types.h>
29119610Sache#include <fcntl.h>
30119610Sache#include "posixjmp.h"
31119610Sache
32119610Sache#if defined (HAVE_UNISTD_H)
33119610Sache#  include <unistd.h>	   /* for _POSIX_VERSION */
34119610Sache#endif /* HAVE_UNISTD_H */
35119610Sache
36119610Sache#if defined (HAVE_STDLIB_H)
37119610Sache#  include <stdlib.h>
38119610Sache#else
39119610Sache#  include "ansi_stdlib.h"
40119610Sache#endif /* HAVE_STDLIB_H */
41119610Sache
42119610Sache#include <stdio.h>
43119610Sache#include <ctype.h>
44119610Sache
45119610Sache/* System-specific feature definitions and include files. */
46119610Sache#include "rldefs.h"
47119610Sache#include "rlmbutil.h"
48119610Sache
49119610Sache#if defined (TIOCSTAT_IN_SYS_IOCTL)
50119610Sache#  include <sys/ioctl.h>
51119610Sache#endif /* TIOCSTAT_IN_SYS_IOCTL */
52119610Sache
53119610Sache/* Some standard library routines. */
54119610Sache#include "readline.h"
55119610Sache
56119610Sache#include "rlprivate.h"
57119610Sache#include "xmalloc.h"
58119610Sache
59119610Sache/* Declared here so it can be shared between the readline and history
60119610Sache   libraries. */
61119610Sache#if defined (HANDLE_MULTIBYTE)
62119610Sacheint rl_byte_oriented = 0;
63119610Sache#else
64119610Sacheint rl_byte_oriented = 1;
65119610Sache#endif
66119610Sache
67119610Sache/* **************************************************************** */
68119610Sache/*								    */
69119610Sache/*		Multibyte Character Utility Functions		    */
70119610Sache/*								    */
71119610Sache/* **************************************************************** */
72119610Sache
73119610Sache#if defined(HANDLE_MULTIBYTE)
74119610Sache
75119610Sachestatic int
76119610Sache_rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77119610Sache     char *string;
78119610Sache     int seed, count, find_non_zero;
79119610Sache{
80157184Sache  size_t tmp;
81119610Sache  mbstate_t ps;
82157184Sache  int point;
83119610Sache  wchar_t wc;
84119610Sache
85157184Sache  tmp = 0;
86157184Sache
87119610Sache  memset(&ps, 0, sizeof (mbstate_t));
88119610Sache  if (seed < 0)
89119610Sache    seed = 0;
90119610Sache  if (count <= 0)
91119610Sache    return seed;
92119610Sache
93157184Sache  point = seed + _rl_adjust_point (string, seed, &ps);
94119610Sache  /* if this is true, means that seed was not pointed character
95119610Sache     started byte.  So correct the point and consume count */
96119610Sache  if (seed < point)
97136644Sache    count--;
98119610Sache
99119610Sache  while (count > 0)
100119610Sache    {
101119610Sache      tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
102136644Sache      if (MB_INVALIDCH ((size_t)tmp))
103119610Sache	{
104119610Sache	  /* invalid bytes. asume a byte represents a character */
105119610Sache	  point++;
106119610Sache	  count--;
107119610Sache	  /* reset states. */
108119610Sache	  memset(&ps, 0, sizeof(mbstate_t));
109119610Sache	}
110136644Sache      else if (MB_NULLWCH (tmp))
111136644Sache	break;			/* found wide '\0' */
112119610Sache      else
113119610Sache	{
114119610Sache	  /* valid bytes */
115119610Sache	  point += tmp;
116119610Sache	  if (find_non_zero)
117119610Sache	    {
118119610Sache	      if (wcwidth (wc) == 0)
119119610Sache		continue;
120119610Sache	      else
121119610Sache		count--;
122119610Sache	    }
123119610Sache	  else
124119610Sache	    count--;
125119610Sache	}
126119610Sache    }
127119610Sache
128119610Sache  if (find_non_zero)
129119610Sache    {
130119610Sache      tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
131136647Sache      while (tmp > 0 && wcwidth (wc) == 0)
132119610Sache	{
133119610Sache	  point += tmp;
134119610Sache	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
135136647Sache	  if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp))
136119610Sache	    break;
137119610Sache	}
138119610Sache    }
139157184Sache
140157184Sache  return point;
141119610Sache}
142119610Sache
143119610Sachestatic int
144119610Sache_rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145119610Sache     char *string;
146119610Sache     int seed, find_non_zero;
147119610Sache{
148119610Sache  mbstate_t ps;
149119610Sache  int prev, non_zero_prev, point, length;
150119610Sache  size_t tmp;
151119610Sache  wchar_t wc;
152119610Sache
153119610Sache  memset(&ps, 0, sizeof(mbstate_t));
154119610Sache  length = strlen(string);
155119610Sache
156119610Sache  if (seed < 0)
157119610Sache    return 0;
158119610Sache  else if (length < seed)
159119610Sache    return length;
160119610Sache
161119610Sache  prev = non_zero_prev = point = 0;
162119610Sache  while (point < seed)
163119610Sache    {
164119610Sache      tmp = mbrtowc (&wc, string + point, length - point, &ps);
165136644Sache      if (MB_INVALIDCH ((size_t)tmp))
166119610Sache	{
167119610Sache	  /* in this case, bytes are invalid or shorted to compose
168119610Sache	     multibyte char, so assume that the first byte represents
169119610Sache	     a single character anyway. */
170119610Sache	  tmp = 1;
171119610Sache	  /* clear the state of the byte sequence, because
172119610Sache	     in this case effect of mbstate is undefined  */
173119610Sache	  memset(&ps, 0, sizeof (mbstate_t));
174136644Sache
175136644Sache	  /* Since we're assuming that this byte represents a single
176136644Sache	     non-zero-width character, don't forget about it. */
177136644Sache	  prev = point;
178119610Sache	}
179136644Sache      else if (MB_NULLWCH (tmp))
180119610Sache	break;			/* Found '\0' char.  Can this happen? */
181119610Sache      else
182119610Sache	{
183119610Sache	  if (find_non_zero)
184119610Sache	    {
185119610Sache	      if (wcwidth (wc) != 0)
186119610Sache		prev = point;
187119610Sache	    }
188119610Sache	  else
189119610Sache	    prev = point;
190119610Sache	}
191119610Sache
192119610Sache      point += tmp;
193119610Sache    }
194119610Sache
195119610Sache  return prev;
196119610Sache}
197119610Sache
198119610Sache/* return the number of bytes parsed from the multibyte sequence starting
199119610Sache   at src, if a non-L'\0' wide character was recognized. It returns 0,
200119610Sache   if a L'\0' wide character was recognized. It  returns (size_t)(-1),
201119610Sache   if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202119610Sache   if it couldn't parse a complete  multibyte character.  */
203119610Sacheint
204119610Sache_rl_get_char_len (src, ps)
205119610Sache     char *src;
206119610Sache     mbstate_t *ps;
207119610Sache{
208119610Sache  size_t tmp;
209119610Sache
210119610Sache  tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211119610Sache  if (tmp == (size_t)(-2))
212119610Sache    {
213119610Sache      /* shorted to compose multibyte char */
214125759Sache      if (ps)
215125759Sache	memset (ps, 0, sizeof(mbstate_t));
216119610Sache      return -2;
217119610Sache    }
218119610Sache  else if (tmp == (size_t)(-1))
219119610Sache    {
220119610Sache      /* invalid to compose multibyte char */
221119610Sache      /* initialize the conversion state */
222125759Sache      if (ps)
223125759Sache	memset (ps, 0, sizeof(mbstate_t));
224119610Sache      return -1;
225119610Sache    }
226119610Sache  else if (tmp == (size_t)0)
227119610Sache    return 0;
228119610Sache  else
229119610Sache    return (int)tmp;
230119610Sache}
231119610Sache
232119610Sache/* compare the specified two characters. If the characters matched,
233119610Sache   return 1. Otherwise return 0. */
234119610Sacheint
235119610Sache_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
236125759Sache     char *buf1;
237125759Sache     int pos1;
238125759Sache     mbstate_t *ps1;
239125759Sache     char *buf2;
240125759Sache     int pos2;
241125759Sache     mbstate_t *ps2;
242119610Sache{
243119610Sache  int i, w1, w2;
244119610Sache
245119610Sache  if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
246119610Sache	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
247119610Sache	(w1 != w2) ||
248119610Sache	(buf1[pos1] != buf2[pos2]))
249119610Sache    return 0;
250119610Sache
251119610Sache  for (i = 1; i < w1; i++)
252119610Sache    if (buf1[pos1+i] != buf2[pos2+i])
253119610Sache      return 0;
254119610Sache
255119610Sache  return 1;
256119610Sache}
257119610Sache
258119610Sache/* adjust pointed byte and find mbstate of the point of string.
259119610Sache   adjusted point will be point <= adjusted_point, and returns
260119610Sache   differences of the byte(adjusted_point - point).
261119610Sache   if point is invalied (point < 0 || more than string length),
262119610Sache   it returns -1 */
263119610Sacheint
264119610Sache_rl_adjust_point(string, point, ps)
265119610Sache     char *string;
266119610Sache     int point;
267119610Sache     mbstate_t *ps;
268119610Sache{
269119610Sache  size_t tmp = 0;
270119610Sache  int length;
271119610Sache  int pos = 0;
272119610Sache
273119610Sache  length = strlen(string);
274119610Sache  if (point < 0)
275119610Sache    return -1;
276119610Sache  if (length < point)
277119610Sache    return -1;
278119610Sache
279119610Sache  while (pos < point)
280119610Sache    {
281119610Sache      tmp = mbrlen (string + pos, length - pos, ps);
282136644Sache      if (MB_INVALIDCH ((size_t)tmp))
283119610Sache	{
284119610Sache	  /* in this case, bytes are invalid or shorted to compose
285119610Sache	     multibyte char, so assume that the first byte represents
286119610Sache	     a single character anyway. */
287119610Sache	  pos++;
288119610Sache	  /* clear the state of the byte sequence, because
289119610Sache	     in this case effect of mbstate is undefined  */
290125759Sache	  if (ps)
291125759Sache	    memset (ps, 0, sizeof (mbstate_t));
292119610Sache	}
293136644Sache      else if (MB_NULLWCH (tmp))
294125759Sache	pos++;
295119610Sache      else
296119610Sache	pos += tmp;
297119610Sache    }
298119610Sache
299119610Sache  return (pos - point);
300119610Sache}
301119610Sache
302119610Sacheint
303119610Sache_rl_is_mbchar_matched (string, seed, end, mbchar, length)
304119610Sache     char *string;
305119610Sache     int seed, end;
306119610Sache     char *mbchar;
307119610Sache     int length;
308119610Sache{
309119610Sache  int i;
310119610Sache
311119610Sache  if ((end - seed) < length)
312119610Sache    return 0;
313119610Sache
314119610Sache  for (i = 0; i < length; i++)
315119610Sache    if (string[seed + i] != mbchar[i])
316119610Sache      return 0;
317119610Sache  return 1;
318119610Sache}
319157184Sache
320157184Sachewchar_t
321157184Sache_rl_char_value (buf, ind)
322157184Sache     char *buf;
323157184Sache     int ind;
324157184Sache{
325157184Sache  size_t tmp;
326157184Sache  wchar_t wc;
327157184Sache  mbstate_t ps;
328157184Sache  int l;
329157184Sache
330157184Sache  if (MB_LEN_MAX == 1 || rl_byte_oriented)
331157184Sache    return ((wchar_t) buf[ind]);
332157184Sache  l = strlen (buf);
333157184Sache  if (ind >= l - 1)
334157184Sache    return ((wchar_t) buf[ind]);
335157184Sache  memset (&ps, 0, sizeof (mbstate_t));
336157184Sache  tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337157184Sache  if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
338157184Sache    return ((wchar_t) buf[ind]);
339157184Sache  return wc;
340157184Sache}
341119610Sache#endif /* HANDLE_MULTIBYTE */
342119610Sache
343119610Sache/* Find next `count' characters started byte point of the specified seed.
344119610Sache   If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345119610Sache   characters. */
346119610Sache#undef _rl_find_next_mbchar
347119610Sacheint
348119610Sache_rl_find_next_mbchar (string, seed, count, flags)
349119610Sache     char *string;
350119610Sache     int seed, count, flags;
351119610Sache{
352119610Sache#if defined (HANDLE_MULTIBYTE)
353119610Sache  return _rl_find_next_mbchar_internal (string, seed, count, flags);
354119610Sache#else
355119610Sache  return (seed + count);
356119610Sache#endif
357119610Sache}
358119610Sache
359119610Sache/* Find previous character started byte point of the specified seed.
360119610Sache   Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
361119610Sache   we look for non-zero-width multibyte characters. */
362119610Sache#undef _rl_find_prev_mbchar
363119610Sacheint
364119610Sache_rl_find_prev_mbchar (string, seed, flags)
365119610Sache     char *string;
366119610Sache     int seed, flags;
367119610Sache{
368119610Sache#if defined (HANDLE_MULTIBYTE)
369119610Sache  return _rl_find_prev_mbchar_internal (string, seed, flags);
370119610Sache#else
371119610Sache  return ((seed == 0) ? seed : seed - 1);
372119610Sache#endif
373119610Sache}
374