1119610Sache/* mbutil.c -- readline multibyte character utility functions */ 2119610Sache 3157184Sache/* Copyright (C) 2001-2005 Free Software Foundation, Inc. 4119610Sache 5119610Sache This file is part of the GNU Readline Library, a library for 6119610Sache reading lines of text with interactive input and history editing. 7119610Sache 8119610Sache The GNU Readline Library is free software; you can redistribute it 9119610Sache and/or modify it under the terms of the GNU General Public License 10119610Sache as published by the Free Software Foundation; either version 2, or 11119610Sache (at your option) any later version. 12119610Sache 13119610Sache The GNU Readline Library is distributed in the hope that it will be 14119610Sache useful, but WITHOUT ANY WARRANTY; without even the implied warranty 15119610Sache of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16119610Sache GNU General Public License for more details. 17119610Sache 18119610Sache The GNU General Public License is often shipped with GNU software, and 19119610Sache is generally kept in a file called COPYING or LICENSE. If you do not 20119610Sache have a copy of the license, write to the Free Software Foundation, 21119610Sache 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ 22119610Sache#define READLINE_LIBRARY 23119610Sache 24119610Sache#if defined (HAVE_CONFIG_H) 25119610Sache# include <config.h> 26119610Sache#endif 27119610Sache 28119610Sache#include <sys/types.h> 29119610Sache#include <fcntl.h> 30119610Sache#include "posixjmp.h" 31119610Sache 32119610Sache#if defined (HAVE_UNISTD_H) 33119610Sache# include <unistd.h> /* for _POSIX_VERSION */ 34119610Sache#endif /* HAVE_UNISTD_H */ 35119610Sache 36119610Sache#if defined (HAVE_STDLIB_H) 37119610Sache# include <stdlib.h> 38119610Sache#else 39119610Sache# include "ansi_stdlib.h" 40119610Sache#endif /* HAVE_STDLIB_H */ 41119610Sache 42119610Sache#include <stdio.h> 43119610Sache#include <ctype.h> 44119610Sache 45119610Sache/* System-specific feature definitions and include files. */ 46119610Sache#include "rldefs.h" 47119610Sache#include "rlmbutil.h" 48119610Sache 49119610Sache#if defined (TIOCSTAT_IN_SYS_IOCTL) 50119610Sache# include <sys/ioctl.h> 51119610Sache#endif /* TIOCSTAT_IN_SYS_IOCTL */ 52119610Sache 53119610Sache/* Some standard library routines. */ 54119610Sache#include "readline.h" 55119610Sache 56119610Sache#include "rlprivate.h" 57119610Sache#include "xmalloc.h" 58119610Sache 59119610Sache/* Declared here so it can be shared between the readline and history 60119610Sache libraries. */ 61119610Sache#if defined (HANDLE_MULTIBYTE) 62119610Sacheint rl_byte_oriented = 0; 63119610Sache#else 64119610Sacheint rl_byte_oriented = 1; 65119610Sache#endif 66119610Sache 67119610Sache/* **************************************************************** */ 68119610Sache/* */ 69119610Sache/* Multibyte Character Utility Functions */ 70119610Sache/* */ 71119610Sache/* **************************************************************** */ 72119610Sache 73119610Sache#if defined(HANDLE_MULTIBYTE) 74119610Sache 75119610Sachestatic int 76119610Sache_rl_find_next_mbchar_internal (string, seed, count, find_non_zero) 77119610Sache char *string; 78119610Sache int seed, count, find_non_zero; 79119610Sache{ 80157184Sache size_t tmp; 81119610Sache mbstate_t ps; 82157184Sache int point; 83119610Sache wchar_t wc; 84119610Sache 85157184Sache tmp = 0; 86157184Sache 87119610Sache memset(&ps, 0, sizeof (mbstate_t)); 88119610Sache if (seed < 0) 89119610Sache seed = 0; 90119610Sache if (count <= 0) 91119610Sache return seed; 92119610Sache 93157184Sache point = seed + _rl_adjust_point (string, seed, &ps); 94119610Sache /* if this is true, means that seed was not pointed character 95119610Sache started byte. So correct the point and consume count */ 96119610Sache if (seed < point) 97136644Sache count--; 98119610Sache 99119610Sache while (count > 0) 100119610Sache { 101119610Sache tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps); 102136644Sache if (MB_INVALIDCH ((size_t)tmp)) 103119610Sache { 104119610Sache /* invalid bytes. asume a byte represents a character */ 105119610Sache point++; 106119610Sache count--; 107119610Sache /* reset states. */ 108119610Sache memset(&ps, 0, sizeof(mbstate_t)); 109119610Sache } 110136644Sache else if (MB_NULLWCH (tmp)) 111136644Sache break; /* found wide '\0' */ 112119610Sache else 113119610Sache { 114119610Sache /* valid bytes */ 115119610Sache point += tmp; 116119610Sache if (find_non_zero) 117119610Sache { 118119610Sache if (wcwidth (wc) == 0) 119119610Sache continue; 120119610Sache else 121119610Sache count--; 122119610Sache } 123119610Sache else 124119610Sache count--; 125119610Sache } 126119610Sache } 127119610Sache 128119610Sache if (find_non_zero) 129119610Sache { 130119610Sache tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 131136647Sache while (tmp > 0 && wcwidth (wc) == 0) 132119610Sache { 133119610Sache point += tmp; 134119610Sache tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps); 135136647Sache if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp)) 136119610Sache break; 137119610Sache } 138119610Sache } 139157184Sache 140157184Sache return point; 141119610Sache} 142119610Sache 143119610Sachestatic int 144119610Sache_rl_find_prev_mbchar_internal (string, seed, find_non_zero) 145119610Sache char *string; 146119610Sache int seed, find_non_zero; 147119610Sache{ 148119610Sache mbstate_t ps; 149119610Sache int prev, non_zero_prev, point, length; 150119610Sache size_t tmp; 151119610Sache wchar_t wc; 152119610Sache 153119610Sache memset(&ps, 0, sizeof(mbstate_t)); 154119610Sache length = strlen(string); 155119610Sache 156119610Sache if (seed < 0) 157119610Sache return 0; 158119610Sache else if (length < seed) 159119610Sache return length; 160119610Sache 161119610Sache prev = non_zero_prev = point = 0; 162119610Sache while (point < seed) 163119610Sache { 164119610Sache tmp = mbrtowc (&wc, string + point, length - point, &ps); 165136644Sache if (MB_INVALIDCH ((size_t)tmp)) 166119610Sache { 167119610Sache /* in this case, bytes are invalid or shorted to compose 168119610Sache multibyte char, so assume that the first byte represents 169119610Sache a single character anyway. */ 170119610Sache tmp = 1; 171119610Sache /* clear the state of the byte sequence, because 172119610Sache in this case effect of mbstate is undefined */ 173119610Sache memset(&ps, 0, sizeof (mbstate_t)); 174136644Sache 175136644Sache /* Since we're assuming that this byte represents a single 176136644Sache non-zero-width character, don't forget about it. */ 177136644Sache prev = point; 178119610Sache } 179136644Sache else if (MB_NULLWCH (tmp)) 180119610Sache break; /* Found '\0' char. Can this happen? */ 181119610Sache else 182119610Sache { 183119610Sache if (find_non_zero) 184119610Sache { 185119610Sache if (wcwidth (wc) != 0) 186119610Sache prev = point; 187119610Sache } 188119610Sache else 189119610Sache prev = point; 190119610Sache } 191119610Sache 192119610Sache point += tmp; 193119610Sache } 194119610Sache 195119610Sache return prev; 196119610Sache} 197119610Sache 198119610Sache/* return the number of bytes parsed from the multibyte sequence starting 199119610Sache at src, if a non-L'\0' wide character was recognized. It returns 0, 200119610Sache if a L'\0' wide character was recognized. It returns (size_t)(-1), 201119610Sache if an invalid multibyte sequence was encountered. It returns (size_t)(-2) 202119610Sache if it couldn't parse a complete multibyte character. */ 203119610Sacheint 204119610Sache_rl_get_char_len (src, ps) 205119610Sache char *src; 206119610Sache mbstate_t *ps; 207119610Sache{ 208119610Sache size_t tmp; 209119610Sache 210119610Sache tmp = mbrlen((const char *)src, (size_t)strlen (src), ps); 211119610Sache if (tmp == (size_t)(-2)) 212119610Sache { 213119610Sache /* shorted to compose multibyte char */ 214125759Sache if (ps) 215125759Sache memset (ps, 0, sizeof(mbstate_t)); 216119610Sache return -2; 217119610Sache } 218119610Sache else if (tmp == (size_t)(-1)) 219119610Sache { 220119610Sache /* invalid to compose multibyte char */ 221119610Sache /* initialize the conversion state */ 222125759Sache if (ps) 223125759Sache memset (ps, 0, sizeof(mbstate_t)); 224119610Sache return -1; 225119610Sache } 226119610Sache else if (tmp == (size_t)0) 227119610Sache return 0; 228119610Sache else 229119610Sache return (int)tmp; 230119610Sache} 231119610Sache 232119610Sache/* compare the specified two characters. If the characters matched, 233119610Sache return 1. Otherwise return 0. */ 234119610Sacheint 235119610Sache_rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2) 236125759Sache char *buf1; 237125759Sache int pos1; 238125759Sache mbstate_t *ps1; 239125759Sache char *buf2; 240125759Sache int pos2; 241125759Sache mbstate_t *ps2; 242119610Sache{ 243119610Sache int i, w1, w2; 244119610Sache 245119610Sache if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 || 246119610Sache (w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 || 247119610Sache (w1 != w2) || 248119610Sache (buf1[pos1] != buf2[pos2])) 249119610Sache return 0; 250119610Sache 251119610Sache for (i = 1; i < w1; i++) 252119610Sache if (buf1[pos1+i] != buf2[pos2+i]) 253119610Sache return 0; 254119610Sache 255119610Sache return 1; 256119610Sache} 257119610Sache 258119610Sache/* adjust pointed byte and find mbstate of the point of string. 259119610Sache adjusted point will be point <= adjusted_point, and returns 260119610Sache differences of the byte(adjusted_point - point). 261119610Sache if point is invalied (point < 0 || more than string length), 262119610Sache it returns -1 */ 263119610Sacheint 264119610Sache_rl_adjust_point(string, point, ps) 265119610Sache char *string; 266119610Sache int point; 267119610Sache mbstate_t *ps; 268119610Sache{ 269119610Sache size_t tmp = 0; 270119610Sache int length; 271119610Sache int pos = 0; 272119610Sache 273119610Sache length = strlen(string); 274119610Sache if (point < 0) 275119610Sache return -1; 276119610Sache if (length < point) 277119610Sache return -1; 278119610Sache 279119610Sache while (pos < point) 280119610Sache { 281119610Sache tmp = mbrlen (string + pos, length - pos, ps); 282136644Sache if (MB_INVALIDCH ((size_t)tmp)) 283119610Sache { 284119610Sache /* in this case, bytes are invalid or shorted to compose 285119610Sache multibyte char, so assume that the first byte represents 286119610Sache a single character anyway. */ 287119610Sache pos++; 288119610Sache /* clear the state of the byte sequence, because 289119610Sache in this case effect of mbstate is undefined */ 290125759Sache if (ps) 291125759Sache memset (ps, 0, sizeof (mbstate_t)); 292119610Sache } 293136644Sache else if (MB_NULLWCH (tmp)) 294125759Sache pos++; 295119610Sache else 296119610Sache pos += tmp; 297119610Sache } 298119610Sache 299119610Sache return (pos - point); 300119610Sache} 301119610Sache 302119610Sacheint 303119610Sache_rl_is_mbchar_matched (string, seed, end, mbchar, length) 304119610Sache char *string; 305119610Sache int seed, end; 306119610Sache char *mbchar; 307119610Sache int length; 308119610Sache{ 309119610Sache int i; 310119610Sache 311119610Sache if ((end - seed) < length) 312119610Sache return 0; 313119610Sache 314119610Sache for (i = 0; i < length; i++) 315119610Sache if (string[seed + i] != mbchar[i]) 316119610Sache return 0; 317119610Sache return 1; 318119610Sache} 319157184Sache 320157184Sachewchar_t 321157184Sache_rl_char_value (buf, ind) 322157184Sache char *buf; 323157184Sache int ind; 324157184Sache{ 325157184Sache size_t tmp; 326157184Sache wchar_t wc; 327157184Sache mbstate_t ps; 328157184Sache int l; 329157184Sache 330157184Sache if (MB_LEN_MAX == 1 || rl_byte_oriented) 331157184Sache return ((wchar_t) buf[ind]); 332157184Sache l = strlen (buf); 333157184Sache if (ind >= l - 1) 334157184Sache return ((wchar_t) buf[ind]); 335157184Sache memset (&ps, 0, sizeof (mbstate_t)); 336157184Sache tmp = mbrtowc (&wc, buf + ind, l - ind, &ps); 337157184Sache if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) 338157184Sache return ((wchar_t) buf[ind]); 339157184Sache return wc; 340157184Sache} 341119610Sache#endif /* HANDLE_MULTIBYTE */ 342119610Sache 343119610Sache/* Find next `count' characters started byte point of the specified seed. 344119610Sache If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte 345119610Sache characters. */ 346119610Sache#undef _rl_find_next_mbchar 347119610Sacheint 348119610Sache_rl_find_next_mbchar (string, seed, count, flags) 349119610Sache char *string; 350119610Sache int seed, count, flags; 351119610Sache{ 352119610Sache#if defined (HANDLE_MULTIBYTE) 353119610Sache return _rl_find_next_mbchar_internal (string, seed, count, flags); 354119610Sache#else 355119610Sache return (seed + count); 356119610Sache#endif 357119610Sache} 358119610Sache 359119610Sache/* Find previous character started byte point of the specified seed. 360119610Sache Returned point will be point <= seed. If flags is MB_FIND_NONZERO, 361119610Sache we look for non-zero-width multibyte characters. */ 362119610Sache#undef _rl_find_prev_mbchar 363119610Sacheint 364119610Sache_rl_find_prev_mbchar (string, seed, flags) 365119610Sache char *string; 366119610Sache int seed, flags; 367119610Sache{ 368119610Sache#if defined (HANDLE_MULTIBYTE) 369119610Sache return _rl_find_prev_mbchar_internal (string, seed, flags); 370119610Sache#else 371119610Sache return ((seed == 0) ? seed : seed - 1); 372119610Sache#endif 373119610Sache} 374