1/* mbutil.c -- readline multibyte character utility functions */ 2 3/* Copyright (C) 2001-2005 Free Software Foundation, Inc. 4 5 This file is part of the GNU Readline Library, a library for 6 reading lines of text with interactive input and history editing. 7 8 The GNU Readline Library is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License 10 as published by the Free Software Foundation; either version 2, or 11 (at your option) any later version. 12 13 The GNU Readline Library is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 The GNU General Public License is often shipped with GNU software, and 19 is generally kept in a file called COPYING or LICENSE. If you do not 20 have a copy of the license, write to the Free Software Foundation, 21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ 22#define READLINE_LIBRARY 23 24#if defined (HAVE_CONFIG_H) 25# include <config.h> 26#endif 27 28#include <sys/types.h> 29#include <fcntl.h> 30#include "posixjmp.h" 31 32#if defined (HAVE_UNISTD_H) 33# include <unistd.h> /* for _POSIX_VERSION */ 34#endif /* HAVE_UNISTD_H */ 35 36#if defined (HAVE_STDLIB_H) 37# include <stdlib.h> 38#else 39# include "ansi_stdlib.h" 40#endif /* HAVE_STDLIB_H */ 41 42#include <stdio.h> 43#include <ctype.h> 44 45/* System-specific feature definitions and include files. */ 46#include "rldefs.h" 47#include "rlmbutil.h" 48 49#if defined (TIOCSTAT_IN_SYS_IOCTL) 50# include <sys/ioctl.h> 51#endif /* TIOCSTAT_IN_SYS_IOCTL */ 52 53/* Some standard library routines. */ 54#include "readline.h" 55 56#include "rlprivate.h" 57#include "xmalloc.h" 58 59/* Declared here so it can be shared between the readline and history 60 libraries. */ 61#if defined (HANDLE_MULTIBYTE) 62int rl_byte_oriented = 0; 63#else 64int rl_byte_oriented = 1; 65#endif 66 67/* **************************************************************** */ 68/* */ 69/* Multibyte Character Utility Functions */ 70/* */ 71/* **************************************************************** */ 72 73#if defined(HANDLE_MULTIBYTE) 74 75static int 76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 77 char *string; 78 int seed, count, find_non_zero; 79{ 80 size_t tmp, len; 81 mbstate_t ps; 82 int point; 83 wchar_t wc; 84 85 tmp = 0; 86 87 memset(&ps, 0, sizeof (mbstate_t)); 88 if (seed < 0) 89 seed = 0; 90 if (count <= 0) 91 return seed; 92 93 point = seed + _rl_adjust_point (string, seed, &ps); 94 /* if this is true, means that seed was not pointed character 95 started byte. So correct the point and consume count */ 96 if (seed < point) 97 count--; 98 99 while (count > 0) 100 { 101 len = strlen (string + point); 102 if (len == 0) 103 break; 104 tmp = mbrtowc (&wc, string+point, len, &ps); 105 if (MB_INVALIDCH ((size_t)tmp)) 106 { 107 /* invalid bytes. asume a byte represents a character */ 108 point++; 109 count--; 110 /* reset states. */ 111 memset(&ps, 0, sizeof(mbstate_t)); 112 } 113 else if (MB_NULLWCH (tmp)) 114 break; /* found wide '\0' */ 115 else 116 { 117 /* valid bytes */ 118 point += tmp; 119 if (find_non_zero) 120 { 121 if (wcwidth (wc) == 0) 122 continue; 123 else 124 count--; 125 } 126 else 127 count--; 128 } 129 } 130 131 if (find_non_zero) 132 { 133 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 134 while (MB_NULLWCH (tmp) == 0 && MB_INVALIDCH (tmp) == 0 && wcwidth (wc) == 0) 135 { 136 point += tmp; 137 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 138 } 139 } 140 141 return point; 142} 143 144static int 145_rl_find_prev_mbchar_internal (string, seed, find_non_zero) 146 char *string; 147 int seed, find_non_zero; 148{ 149 mbstate_t ps; 150 int prev, non_zero_prev, point, length; 151 size_t tmp; 152 wchar_t wc; 153 154 memset(&ps, 0, sizeof(mbstate_t)); 155 length = strlen(string); 156 157 if (seed < 0) 158 return 0; 159 else if (length < seed) 160 return length; 161 162 prev = non_zero_prev = point = 0; 163 while (point < seed) 164 { 165 tmp = mbrtowc (&wc, string + point, length - point, &ps); 166 if (MB_INVALIDCH ((size_t)tmp)) 167 { 168 /* in this case, bytes are invalid or shorted to compose 169 multibyte char, so assume that the first byte represents 170 a single character anyway. */ 171 tmp = 1; 172 /* clear the state of the byte sequence, because 173 in this case effect of mbstate is undefined */ 174 memset(&ps, 0, sizeof (mbstate_t)); 175 176 /* Since we're assuming that this byte represents a single 177 non-zero-width character, don't forget about it. */ 178 prev = point; 179 } 180 else if (MB_NULLWCH (tmp)) 181 break; /* Found '\0' char. Can this happen? */ 182 else 183 { 184 if (find_non_zero) 185 { 186 if (wcwidth (wc) != 0) 187 prev = point; 188 } 189 else 190 prev = point; 191 } 192 193 point += tmp; 194 } 195 196 return prev; 197} 198 199/* return the number of bytes parsed from the multibyte sequence starting 200 at src, if a non-L'\0' wide character was recognized. It returns 0, 201 if a L'\0' wide character was recognized. It returns (size_t)(-1), 202 if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 203 if it couldn't parse a complete multibyte character. */ 204int 205_rl_get_char_len (src, ps) 206 char *src; 207 mbstate_t *ps; 208{ 209 size_t tmp; 210 211 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 212 if (tmp == (size_t)(-2)) 213 { 214 /* shorted to compose multibyte char */ 215 if (ps) 216 memset (ps, 0, sizeof(mbstate_t)); 217 return -2; 218 } 219 else if (tmp == (size_t)(-1)) 220 { 221 /* invalid to compose multibyte char */ 222 /* initialize the conversion state */ 223 if (ps) 224 memset (ps, 0, sizeof(mbstate_t)); 225 return -1; 226 } 227 else if (tmp == (size_t)0) 228 return 0; 229 else 230 return (int)tmp; 231} 232 233/* compare the specified two characters. If the characters matched, 234 return 1. Otherwise return 0. */ 235int 236_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2) 237 char *buf1; 238 int pos1; 239 mbstate_t *ps1; 240 char *buf2; 241 int pos2; 242 mbstate_t *ps2; 243{ 244 int i, w1, w2; 245 246 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 247 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 248 (w1 != w2) || 249 (buf1[pos1] != buf2[pos2])) 250 return 0; 251 252 for (i = 1; i < w1; i++) 253 if (buf1[pos1+i] != buf2[pos2+i]) 254 return 0; 255 256 return 1; 257} 258 259/* adjust pointed byte and find mbstate of the point of string. 260 adjusted point will be point <= adjusted_point, and returns 261 differences of the byte(adjusted_point - point). 262 if point is invalied (point < 0 || more than string length), 263 it returns -1 */ 264int 265_rl_adjust_point(string, point, ps) 266 char *string; 267 int point; 268 mbstate_t *ps; 269{ 270 size_t tmp = 0; 271 int length; 272 int pos = 0; 273 274 length = strlen(string); 275 if (point < 0) 276 return -1; 277 if (length < point) 278 return -1; 279 280 while (pos < point) 281 { 282 tmp = mbrlen (string + pos, length - pos, ps); 283 if (MB_INVALIDCH ((size_t)tmp)) 284 { 285 /* in this case, bytes are invalid or shorted to compose 286 multibyte char, so assume that the first byte represents 287 a single character anyway. */ 288 pos++; 289 /* clear the state of the byte sequence, because 290 in this case effect of mbstate is undefined */ 291 if (ps) 292 memset (ps, 0, sizeof (mbstate_t)); 293 } 294 else if (MB_NULLWCH (tmp)) 295 pos++; 296 else 297 pos += tmp; 298 } 299 300 return (pos - point); 301} 302 303int 304_rl_is_mbchar_matched (string, seed, end, mbchar, length) 305 char *string; 306 int seed, end; 307 char *mbchar; 308 int length; 309{ 310 int i; 311 312 if ((end - seed) < length) 313 return 0; 314 315 for (i = 0; i < length; i++) 316 if (string[seed + i] != mbchar[i]) 317 return 0; 318 return 1; 319} 320 321wchar_t 322_rl_char_value (buf, ind) 323 char *buf; 324 int ind; 325{ 326 size_t tmp; 327 wchar_t wc; 328 mbstate_t ps; 329 int l; 330 331 if (MB_LEN_MAX == 1 || rl_byte_oriented) 332 return ((wchar_t) buf[ind]); 333 l = strlen (buf); 334 if (ind >= l - 1) 335 return ((wchar_t) buf[ind]); 336 memset (&ps, 0, sizeof (mbstate_t)); 337 tmp = mbrtowc (&wc, buf + ind, l - ind, &ps); 338 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) 339 return ((wchar_t) buf[ind]); 340 return wc; 341} 342#endif /* HANDLE_MULTIBYTE */ 343 344/* Find next `count' characters started byte point of the specified seed. 345 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 346 characters. */ 347#undef _rl_find_next_mbchar 348int 349_rl_find_next_mbchar (string, seed, count, flags) 350 char *string; 351 int seed, count, flags; 352{ 353#if defined (HANDLE_MULTIBYTE) 354 return _rl_find_next_mbchar_internal (string, seed, count, flags); 355#else 356 return (seed + count); 357#endif 358} 359 360/* Find previous character started byte point of the specified seed. 361 Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 362 we look for non-zero-width multibyte characters. */ 363#undef _rl_find_prev_mbchar 364int 365_rl_find_prev_mbchar (string, seed, flags) 366 char *string; 367 int seed, flags; 368{ 369#if defined (HANDLE_MULTIBYTE) 370 return _rl_find_prev_mbchar_internal (string, seed, flags); 371#else 372 return ((seed == 0) ? seed : seed - 1); 373#endif 374} 375