mbutil.c revision 136647
1/* mbutil.c -- readline multibyte character utility functions */ 2 3/* Copyright (C) 2001-2004 Free Software Foundation, Inc. 4 5 This file is part of the GNU Readline Library, a library for 6 reading lines of text with interactive input and history editing. 7 8 The GNU Readline Library is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License 10 as published by the Free Software Foundation; either version 2, or 11 (at your option) any later version. 12 13 The GNU Readline Library is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 The GNU General Public License is often shipped with GNU software, and 19 is generally kept in a file called COPYING or LICENSE. If you do not 20 have a copy of the license, write to the Free Software Foundation, 21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ 22#define READLINE_LIBRARY 23 24#if defined (HAVE_CONFIG_H) 25# include <config.h> 26#endif 27 28#include <sys/types.h> 29#include <fcntl.h> 30#include "posixjmp.h" 31 32#if defined (HAVE_UNISTD_H) 33# include <unistd.h> /* for _POSIX_VERSION */ 34#endif /* HAVE_UNISTD_H */ 35 36#if defined (HAVE_STDLIB_H) 37# include <stdlib.h> 38#else 39# include "ansi_stdlib.h" 40#endif /* HAVE_STDLIB_H */ 41 42#include <stdio.h> 43#include <ctype.h> 44 45/* System-specific feature definitions and include files. */ 46#include "rldefs.h" 47#include "rlmbutil.h" 48 49#if defined (TIOCSTAT_IN_SYS_IOCTL) 50# include <sys/ioctl.h> 51#endif /* TIOCSTAT_IN_SYS_IOCTL */ 52 53/* Some standard library routines. */ 54#include "readline.h" 55 56#include "rlprivate.h" 57#include "xmalloc.h" 58 59/* Declared here so it can be shared between the readline and history 60 libraries. */ 61#if defined (HANDLE_MULTIBYTE) 62int rl_byte_oriented = 0; 63#else 64int rl_byte_oriented = 1; 65#endif 66 67/* **************************************************************** */ 68/* */ 69/* Multibyte Character Utility Functions */ 70/* */ 71/* **************************************************************** */ 72 73#if defined(HANDLE_MULTIBYTE) 74 75static int 76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 77 char *string; 78 int seed, count, find_non_zero; 79{ 80 size_t tmp = 0; 81 mbstate_t ps; 82 int point = 0; 83 wchar_t wc; 84 85 memset(&ps, 0, sizeof (mbstate_t)); 86 if (seed < 0) 87 seed = 0; 88 if (count <= 0) 89 return seed; 90 91 point = seed + _rl_adjust_point(string, seed, &ps); 92 /* if this is true, means that seed was not pointed character 93 started byte. So correct the point and consume count */ 94 if (seed < point) 95 count--; 96 97 while (count > 0) 98 { 99 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps); 100 if (MB_INVALIDCH ((size_t)tmp)) 101 { 102 /* invalid bytes. asume a byte represents a character */ 103 point++; 104 count--; 105 /* reset states. */ 106 memset(&ps, 0, sizeof(mbstate_t)); 107 } 108 else if (MB_NULLWCH (tmp)) 109 break; /* found wide '\0' */ 110 else 111 { 112 /* valid bytes */ 113 point += tmp; 114 if (find_non_zero) 115 { 116 if (wcwidth (wc) == 0) 117 continue; 118 else 119 count--; 120 } 121 else 122 count--; 123 } 124 } 125 126 if (find_non_zero) 127 { 128 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 129 while (tmp > 0 && wcwidth (wc) == 0) 130 { 131 point += tmp; 132 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 133 if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp)) 134 break; 135 } 136 } 137 return point; 138} 139 140static int 141_rl_find_prev_mbchar_internal (string, seed, find_non_zero) 142 char *string; 143 int seed, find_non_zero; 144{ 145 mbstate_t ps; 146 int prev, non_zero_prev, point, length; 147 size_t tmp; 148 wchar_t wc; 149 150 memset(&ps, 0, sizeof(mbstate_t)); 151 length = strlen(string); 152 153 if (seed < 0) 154 return 0; 155 else if (length < seed) 156 return length; 157 158 prev = non_zero_prev = point = 0; 159 while (point < seed) 160 { 161 tmp = mbrtowc (&wc, string + point, length - point, &ps); 162 if (MB_INVALIDCH ((size_t)tmp)) 163 { 164 /* in this case, bytes are invalid or shorted to compose 165 multibyte char, so assume that the first byte represents 166 a single character anyway. */ 167 tmp = 1; 168 /* clear the state of the byte sequence, because 169 in this case effect of mbstate is undefined */ 170 memset(&ps, 0, sizeof (mbstate_t)); 171 172 /* Since we're assuming that this byte represents a single 173 non-zero-width character, don't forget about it. */ 174 prev = point; 175 } 176 else if (MB_NULLWCH (tmp)) 177 break; /* Found '\0' char. Can this happen? */ 178 else 179 { 180 if (find_non_zero) 181 { 182 if (wcwidth (wc) != 0) 183 prev = point; 184 } 185 else 186 prev = point; 187 } 188 189 point += tmp; 190 } 191 192 return prev; 193} 194 195/* return the number of bytes parsed from the multibyte sequence starting 196 at src, if a non-L'\0' wide character was recognized. It returns 0, 197 if a L'\0' wide character was recognized. It returns (size_t)(-1), 198 if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 199 if it couldn't parse a complete multibyte character. */ 200int 201_rl_get_char_len (src, ps) 202 char *src; 203 mbstate_t *ps; 204{ 205 size_t tmp; 206 207 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 208 if (tmp == (size_t)(-2)) 209 { 210 /* shorted to compose multibyte char */ 211 if (ps) 212 memset (ps, 0, sizeof(mbstate_t)); 213 return -2; 214 } 215 else if (tmp == (size_t)(-1)) 216 { 217 /* invalid to compose multibyte char */ 218 /* initialize the conversion state */ 219 if (ps) 220 memset (ps, 0, sizeof(mbstate_t)); 221 return -1; 222 } 223 else if (tmp == (size_t)0) 224 return 0; 225 else 226 return (int)tmp; 227} 228 229/* compare the specified two characters. If the characters matched, 230 return 1. Otherwise return 0. */ 231int 232_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2) 233 char *buf1; 234 int pos1; 235 mbstate_t *ps1; 236 char *buf2; 237 int pos2; 238 mbstate_t *ps2; 239{ 240 int i, w1, w2; 241 242 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 243 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 244 (w1 != w2) || 245 (buf1[pos1] != buf2[pos2])) 246 return 0; 247 248 for (i = 1; i < w1; i++) 249 if (buf1[pos1+i] != buf2[pos2+i]) 250 return 0; 251 252 return 1; 253} 254 255/* adjust pointed byte and find mbstate of the point of string. 256 adjusted point will be point <= adjusted_point, and returns 257 differences of the byte(adjusted_point - point). 258 if point is invalied (point < 0 || more than string length), 259 it returns -1 */ 260int 261_rl_adjust_point(string, point, ps) 262 char *string; 263 int point; 264 mbstate_t *ps; 265{ 266 size_t tmp = 0; 267 int length; 268 int pos = 0; 269 270 length = strlen(string); 271 if (point < 0) 272 return -1; 273 if (length < point) 274 return -1; 275 276 while (pos < point) 277 { 278 tmp = mbrlen (string + pos, length - pos, ps); 279 if (MB_INVALIDCH ((size_t)tmp)) 280 { 281 /* in this case, bytes are invalid or shorted to compose 282 multibyte char, so assume that the first byte represents 283 a single character anyway. */ 284 pos++; 285 /* clear the state of the byte sequence, because 286 in this case effect of mbstate is undefined */ 287 if (ps) 288 memset (ps, 0, sizeof (mbstate_t)); 289 } 290 else if (MB_NULLWCH (tmp)) 291 pos++; 292 else 293 pos += tmp; 294 } 295 296 return (pos - point); 297} 298 299int 300_rl_is_mbchar_matched (string, seed, end, mbchar, length) 301 char *string; 302 int seed, end; 303 char *mbchar; 304 int length; 305{ 306 int i; 307 308 if ((end - seed) < length) 309 return 0; 310 311 for (i = 0; i < length; i++) 312 if (string[seed + i] != mbchar[i]) 313 return 0; 314 return 1; 315} 316#endif /* HANDLE_MULTIBYTE */ 317 318/* Find next `count' characters started byte point of the specified seed. 319 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 320 characters. */ 321#undef _rl_find_next_mbchar 322int 323_rl_find_next_mbchar (string, seed, count, flags) 324 char *string; 325 int seed, count, flags; 326{ 327#if defined (HANDLE_MULTIBYTE) 328 return _rl_find_next_mbchar_internal (string, seed, count, flags); 329#else 330 return (seed + count); 331#endif 332} 333 334/* Find previous character started byte point of the specified seed. 335 Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 336 we look for non-zero-width multibyte characters. */ 337#undef _rl_find_prev_mbchar 338int 339_rl_find_prev_mbchar (string, seed, flags) 340 char *string; 341 int seed, flags; 342{ 343#if defined (HANDLE_MULTIBYTE) 344 return _rl_find_prev_mbchar_internal (string, seed, flags); 345#else 346 return ((seed == 0) ? seed : seed - 1); 347#endif 348} 349