1/* mbutil.c -- readline multibyte character utility functions */ 2 3/* Copyright (C) 2001 Free Software Foundation, Inc. 4 5 This file is part of the GNU Readline Library, a library for 6 reading lines of text with interactive input and history editing. 7 8 The GNU Readline Library is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License 10 as published by the Free Software Foundation; either version 2, or 11 (at your option) any later version. 12 13 The GNU Readline Library is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 The GNU General Public License is often shipped with GNU software, and 19 is generally kept in a file called COPYING or LICENSE. If you do not 20 have a copy of the license, write to the Free Software Foundation, 21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ 22#define READLINE_LIBRARY 23 24#if defined (HAVE_CONFIG_H) 25# include <config.h> 26#endif 27 28#include <sys/types.h> 29#include <fcntl.h> 30#include "posixjmp.h" 31 32#if defined (HAVE_UNISTD_H) 33# include <unistd.h> /* for _POSIX_VERSION */ 34#endif /* HAVE_UNISTD_H */ 35 36#if defined (HAVE_STDLIB_H) 37# include <stdlib.h> 38#else 39# include "ansi_stdlib.h" 40#endif /* HAVE_STDLIB_H */ 41 42#include <stdio.h> 43#include <ctype.h> 44 45/* System-specific feature definitions and include files. */ 46#include "rldefs.h" 47#include "rlmbutil.h" 48 49#if defined (TIOCSTAT_IN_SYS_IOCTL) 50# include <sys/ioctl.h> 51#endif /* TIOCSTAT_IN_SYS_IOCTL */ 52 53/* Some standard library routines. */ 54#include "readline.h" 55 56#include "rlprivate.h" 57#include "xmalloc.h" 58 59/* Declared here so it can be shared between the readline and history 60 libraries. */ 61#if defined (HANDLE_MULTIBYTE) 62int rl_byte_oriented = 0; 63#else 64int rl_byte_oriented = 1; 65#endif 66 67/* **************************************************************** */ 68/* */ 69/* Multibyte Character Utility Functions */ 70/* */ 71/* **************************************************************** */ 72 73#if defined(HANDLE_MULTIBYTE) 74 75static int 76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 77 char *string; 78 int seed, count, find_non_zero; 79{ 80 size_t tmp = 0; 81 mbstate_t ps; 82 int point = 0; 83 wchar_t wc; 84 85 memset(&ps, 0, sizeof (mbstate_t)); 86 if (seed < 0) 87 seed = 0; 88 if (count <= 0) 89 return seed; 90 91 point = seed + _rl_adjust_point(string, seed, &ps); 92 /* if this is true, means that seed was not pointed character 93 started byte. So correct the point and consume count */ 94 if (seed < point) 95 count --; 96 97 while (count > 0) 98 { 99 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps); 100 if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2) 101 { 102 /* invalid bytes. asume a byte represents a character */ 103 point++; 104 count--; 105 /* reset states. */ 106 memset(&ps, 0, sizeof(mbstate_t)); 107 } 108 else if (tmp == (size_t)0) 109 /* found '\0' char */ 110 break; 111 else 112 { 113 /* valid bytes */ 114 point += tmp; 115 if (find_non_zero) 116 { 117 if (wcwidth (wc) == 0) 118 continue; 119 else 120 count--; 121 } 122 else 123 count--; 124 } 125 } 126 127 if (find_non_zero) 128 { 129 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 130 while (wcwidth (wc) == 0) 131 { 132 point += tmp; 133 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 134 if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2)) 135 break; 136 } 137 } 138 return point; 139} 140 141static int 142_rl_find_prev_mbchar_internal (string, seed, find_non_zero) 143 char *string; 144 int seed, find_non_zero; 145{ 146 mbstate_t ps; 147 int prev, non_zero_prev, point, length; 148 size_t tmp; 149 wchar_t wc; 150 151 memset(&ps, 0, sizeof(mbstate_t)); 152 length = strlen(string); 153 154 if (seed < 0) 155 return 0; 156 else if (length < seed) 157 return length; 158 159 prev = non_zero_prev = point = 0; 160 while (point < seed) 161 { 162 tmp = mbrtowc (&wc, string + point, length - point, &ps); 163 if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2) 164 { 165 /* in this case, bytes are invalid or shorted to compose 166 multibyte char, so assume that the first byte represents 167 a single character anyway. */ 168 tmp = 1; 169 /* clear the state of the byte sequence, because 170 in this case effect of mbstate is undefined */ 171 memset(&ps, 0, sizeof (mbstate_t)); 172 } 173 else if (tmp == 0) 174 break; /* Found '\0' char. Can this happen? */ 175 else 176 { 177 if (find_non_zero) 178 { 179 if (wcwidth (wc) != 0) 180 prev = point; 181 } 182 else 183 prev = point; 184 } 185 186 point += tmp; 187 } 188 189 return prev; 190} 191 192/* return the number of bytes parsed from the multibyte sequence starting 193 at src, if a non-L'\0' wide character was recognized. It returns 0, 194 if a L'\0' wide character was recognized. It returns (size_t)(-1), 195 if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 196 if it couldn't parse a complete multibyte character. */ 197int 198_rl_get_char_len (src, ps) 199 char *src; 200 mbstate_t *ps; 201{ 202 size_t tmp; 203 204 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 205 if (tmp == (size_t)(-2)) 206 { 207 /* shorted to compose multibyte char */
| 1/* mbutil.c -- readline multibyte character utility functions */ 2 3/* Copyright (C) 2001 Free Software Foundation, Inc. 4 5 This file is part of the GNU Readline Library, a library for 6 reading lines of text with interactive input and history editing. 7 8 The GNU Readline Library is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License 10 as published by the Free Software Foundation; either version 2, or 11 (at your option) any later version. 12 13 The GNU Readline Library is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 The GNU General Public License is often shipped with GNU software, and 19 is generally kept in a file called COPYING or LICENSE. If you do not 20 have a copy of the license, write to the Free Software Foundation, 21 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ 22#define READLINE_LIBRARY 23 24#if defined (HAVE_CONFIG_H) 25# include <config.h> 26#endif 27 28#include <sys/types.h> 29#include <fcntl.h> 30#include "posixjmp.h" 31 32#if defined (HAVE_UNISTD_H) 33# include <unistd.h> /* for _POSIX_VERSION */ 34#endif /* HAVE_UNISTD_H */ 35 36#if defined (HAVE_STDLIB_H) 37# include <stdlib.h> 38#else 39# include "ansi_stdlib.h" 40#endif /* HAVE_STDLIB_H */ 41 42#include <stdio.h> 43#include <ctype.h> 44 45/* System-specific feature definitions and include files. */ 46#include "rldefs.h" 47#include "rlmbutil.h" 48 49#if defined (TIOCSTAT_IN_SYS_IOCTL) 50# include <sys/ioctl.h> 51#endif /* TIOCSTAT_IN_SYS_IOCTL */ 52 53/* Some standard library routines. */ 54#include "readline.h" 55 56#include "rlprivate.h" 57#include "xmalloc.h" 58 59/* Declared here so it can be shared between the readline and history 60 libraries. */ 61#if defined (HANDLE_MULTIBYTE) 62int rl_byte_oriented = 0; 63#else 64int rl_byte_oriented = 1; 65#endif 66 67/* **************************************************************** */ 68/* */ 69/* Multibyte Character Utility Functions */ 70/* */ 71/* **************************************************************** */ 72 73#if defined(HANDLE_MULTIBYTE) 74 75static int 76_rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 77 char *string; 78 int seed, count, find_non_zero; 79{ 80 size_t tmp = 0; 81 mbstate_t ps; 82 int point = 0; 83 wchar_t wc; 84 85 memset(&ps, 0, sizeof (mbstate_t)); 86 if (seed < 0) 87 seed = 0; 88 if (count <= 0) 89 return seed; 90 91 point = seed + _rl_adjust_point(string, seed, &ps); 92 /* if this is true, means that seed was not pointed character 93 started byte. So correct the point and consume count */ 94 if (seed < point) 95 count --; 96 97 while (count > 0) 98 { 99 tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps); 100 if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2) 101 { 102 /* invalid bytes. asume a byte represents a character */ 103 point++; 104 count--; 105 /* reset states. */ 106 memset(&ps, 0, sizeof(mbstate_t)); 107 } 108 else if (tmp == (size_t)0) 109 /* found '\0' char */ 110 break; 111 else 112 { 113 /* valid bytes */ 114 point += tmp; 115 if (find_non_zero) 116 { 117 if (wcwidth (wc) == 0) 118 continue; 119 else 120 count--; 121 } 122 else 123 count--; 124 } 125 } 126 127 if (find_non_zero) 128 { 129 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 130 while (wcwidth (wc) == 0) 131 { 132 point += tmp; 133 tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 134 if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2)) 135 break; 136 } 137 } 138 return point; 139} 140 141static int 142_rl_find_prev_mbchar_internal (string, seed, find_non_zero) 143 char *string; 144 int seed, find_non_zero; 145{ 146 mbstate_t ps; 147 int prev, non_zero_prev, point, length; 148 size_t tmp; 149 wchar_t wc; 150 151 memset(&ps, 0, sizeof(mbstate_t)); 152 length = strlen(string); 153 154 if (seed < 0) 155 return 0; 156 else if (length < seed) 157 return length; 158 159 prev = non_zero_prev = point = 0; 160 while (point < seed) 161 { 162 tmp = mbrtowc (&wc, string + point, length - point, &ps); 163 if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2) 164 { 165 /* in this case, bytes are invalid or shorted to compose 166 multibyte char, so assume that the first byte represents 167 a single character anyway. */ 168 tmp = 1; 169 /* clear the state of the byte sequence, because 170 in this case effect of mbstate is undefined */ 171 memset(&ps, 0, sizeof (mbstate_t)); 172 } 173 else if (tmp == 0) 174 break; /* Found '\0' char. Can this happen? */ 175 else 176 { 177 if (find_non_zero) 178 { 179 if (wcwidth (wc) != 0) 180 prev = point; 181 } 182 else 183 prev = point; 184 } 185 186 point += tmp; 187 } 188 189 return prev; 190} 191 192/* return the number of bytes parsed from the multibyte sequence starting 193 at src, if a non-L'\0' wide character was recognized. It returns 0, 194 if a L'\0' wide character was recognized. It returns (size_t)(-1), 195 if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 196 if it couldn't parse a complete multibyte character. */ 197int 198_rl_get_char_len (src, ps) 199 char *src; 200 mbstate_t *ps; 201{ 202 size_t tmp; 203 204 tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 205 if (tmp == (size_t)(-2)) 206 { 207 /* shorted to compose multibyte char */
|
208 memset (ps, 0, sizeof(mbstate_t));
| 208 if (ps) 209 memset (ps, 0, sizeof(mbstate_t));
|
209 return -2; 210 } 211 else if (tmp == (size_t)(-1)) 212 { 213 /* invalid to compose multibyte char */ 214 /* initialize the conversion state */
| 210 return -2; 211 } 212 else if (tmp == (size_t)(-1)) 213 { 214 /* invalid to compose multibyte char */ 215 /* initialize the conversion state */
|
215 memset (ps, 0, sizeof(mbstate_t));
| 216 if (ps) 217 memset (ps, 0, sizeof(mbstate_t));
|
216 return -1; 217 } 218 else if (tmp == (size_t)0) 219 return 0; 220 else 221 return (int)tmp; 222} 223 224/* compare the specified two characters. If the characters matched, 225 return 1. Otherwise return 0. */ 226int 227_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
| 218 return -1; 219 } 220 else if (tmp == (size_t)0) 221 return 0; 222 else 223 return (int)tmp; 224} 225 226/* compare the specified two characters. If the characters matched, 227 return 1. Otherwise return 0. */ 228int 229_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
|
228 char *buf1, *buf2; 229 mbstate_t *ps1, *ps2; 230 int pos1, pos2;
| 230 char *buf1; 231 int pos1; 232 mbstate_t *ps1; 233 char *buf2; 234 int pos2; 235 mbstate_t *ps2;
|
231{ 232 int i, w1, w2; 233 234 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 235 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 236 (w1 != w2) || 237 (buf1[pos1] != buf2[pos2])) 238 return 0; 239 240 for (i = 1; i < w1; i++) 241 if (buf1[pos1+i] != buf2[pos2+i]) 242 return 0; 243 244 return 1; 245} 246 247/* adjust pointed byte and find mbstate of the point of string. 248 adjusted point will be point <= adjusted_point, and returns 249 differences of the byte(adjusted_point - point). 250 if point is invalied (point < 0 || more than string length), 251 it returns -1 */ 252int 253_rl_adjust_point(string, point, ps) 254 char *string; 255 int point; 256 mbstate_t *ps; 257{ 258 size_t tmp = 0; 259 int length; 260 int pos = 0; 261 262 length = strlen(string); 263 if (point < 0) 264 return -1; 265 if (length < point) 266 return -1; 267 268 while (pos < point) 269 { 270 tmp = mbrlen (string + pos, length - pos, ps); 271 if((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2) 272 { 273 /* in this case, bytes are invalid or shorted to compose 274 multibyte char, so assume that the first byte represents 275 a single character anyway. */ 276 pos++; 277 /* clear the state of the byte sequence, because 278 in this case effect of mbstate is undefined */
| 236{ 237 int i, w1, w2; 238 239 if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 240 (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 241 (w1 != w2) || 242 (buf1[pos1] != buf2[pos2])) 243 return 0; 244 245 for (i = 1; i < w1; i++) 246 if (buf1[pos1+i] != buf2[pos2+i]) 247 return 0; 248 249 return 1; 250} 251 252/* adjust pointed byte and find mbstate of the point of string. 253 adjusted point will be point <= adjusted_point, and returns 254 differences of the byte(adjusted_point - point). 255 if point is invalied (point < 0 || more than string length), 256 it returns -1 */ 257int 258_rl_adjust_point(string, point, ps) 259 char *string; 260 int point; 261 mbstate_t *ps; 262{ 263 size_t tmp = 0; 264 int length; 265 int pos = 0; 266 267 length = strlen(string); 268 if (point < 0) 269 return -1; 270 if (length < point) 271 return -1; 272 273 while (pos < point) 274 { 275 tmp = mbrlen (string + pos, length - pos, ps); 276 if((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2) 277 { 278 /* in this case, bytes are invalid or shorted to compose 279 multibyte char, so assume that the first byte represents 280 a single character anyway. */ 281 pos++; 282 /* clear the state of the byte sequence, because 283 in this case effect of mbstate is undefined */
|
279 memset (ps, 0, sizeof (mbstate_t));
| 284 if (ps) 285 memset (ps, 0, sizeof (mbstate_t));
|
280 }
| 286 }
|
| 287 else if (tmp == 0) 288 pos++;
|
281 else 282 pos += tmp; 283 } 284 285 return (pos - point); 286} 287 288int 289_rl_is_mbchar_matched (string, seed, end, mbchar, length) 290 char *string; 291 int seed, end; 292 char *mbchar; 293 int length; 294{ 295 int i; 296 297 if ((end - seed) < length) 298 return 0; 299 300 for (i = 0; i < length; i++) 301 if (string[seed + i] != mbchar[i]) 302 return 0; 303 return 1; 304} 305#endif /* HANDLE_MULTIBYTE */ 306 307/* Find next `count' characters started byte point of the specified seed. 308 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 309 characters. */ 310#undef _rl_find_next_mbchar 311int 312_rl_find_next_mbchar (string, seed, count, flags) 313 char *string; 314 int seed, count, flags; 315{ 316#if defined (HANDLE_MULTIBYTE) 317 return _rl_find_next_mbchar_internal (string, seed, count, flags); 318#else 319 return (seed + count); 320#endif 321} 322 323/* Find previous character started byte point of the specified seed. 324 Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 325 we look for non-zero-width multibyte characters. */ 326#undef _rl_find_prev_mbchar 327int 328_rl_find_prev_mbchar (string, seed, flags) 329 char *string; 330 int seed, flags; 331{ 332#if defined (HANDLE_MULTIBYTE) 333 return _rl_find_prev_mbchar_internal (string, seed, flags); 334#else 335 return ((seed == 0) ? seed : seed - 1); 336#endif 337}
| 289 else 290 pos += tmp; 291 } 292 293 return (pos - point); 294} 295 296int 297_rl_is_mbchar_matched (string, seed, end, mbchar, length) 298 char *string; 299 int seed, end; 300 char *mbchar; 301 int length; 302{ 303 int i; 304 305 if ((end - seed) < length) 306 return 0; 307 308 for (i = 0; i < length; i++) 309 if (string[seed + i] != mbchar[i]) 310 return 0; 311 return 1; 312} 313#endif /* HANDLE_MULTIBYTE */ 314 315/* Find next `count' characters started byte point of the specified seed. 316 If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 317 characters. */ 318#undef _rl_find_next_mbchar 319int 320_rl_find_next_mbchar (string, seed, count, flags) 321 char *string; 322 int seed, count, flags; 323{ 324#if defined (HANDLE_MULTIBYTE) 325 return _rl_find_next_mbchar_internal (string, seed, count, flags); 326#else 327 return (seed + count); 328#endif 329} 330 331/* Find previous character started byte point of the specified seed. 332 Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 333 we look for non-zero-width multibyte characters. */ 334#undef _rl_find_prev_mbchar 335int 336_rl_find_prev_mbchar (string, seed, flags) 337 char *string; 338 int seed, flags; 339{ 340#if defined (HANDLE_MULTIBYTE) 341 return _rl_find_prev_mbchar_internal (string, seed, flags); 342#else 343 return ((seed == 0) ? seed : seed - 1); 344#endif 345}
|