1/* rlmbutil.h -- utility functions for multibyte characters. */ 2 3/* Copyright (C) 2001-2015 Free Software Foundation, Inc. 4 5 This file is part of the GNU Readline Library (Readline), a library 6 for reading lines of text with interactive input and history editing. 7 8 Readline is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 Readline is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with Readline. If not, see <http://www.gnu.org/licenses/>. 20*/ 21 22#if !defined (_RL_MBUTIL_H_) 23#define _RL_MBUTIL_H_ 24 25#include "rlstdc.h" 26 27/************************************************/ 28/* check multibyte capability for I18N code */ 29/************************************************/ 30 31/* For platforms which support the ISO C amendment 1 functionality we 32 support user defined character classes. */ 33 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ 34#if defined (HAVE_WCTYPE_H) && defined (HAVE_WCHAR_H) && defined (HAVE_LOCALE_H) 35# include <wchar.h> 36# include <wctype.h> 37# if defined (HAVE_ISWCTYPE) && \ 38 defined (HAVE_ISWLOWER) && \ 39 defined (HAVE_ISWUPPER) && \ 40 defined (HAVE_MBSRTOWCS) && \ 41 defined (HAVE_MBRTOWC) && \ 42 defined (HAVE_MBRLEN) && \ 43 defined (HAVE_TOWLOWER) && \ 44 defined (HAVE_TOWUPPER) && \ 45 defined (HAVE_WCHAR_T) && \ 46 defined (HAVE_WCWIDTH) 47 /* system is supposed to support XPG5 */ 48# define HANDLE_MULTIBYTE 1 49# endif 50#endif 51 52/* If we don't want multibyte chars even on a system that supports them, let 53 the configuring user turn multibyte support off. */ 54#if defined (NO_MULTIBYTE_SUPPORT) 55# undef HANDLE_MULTIBYTE 56#endif 57 58/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 59#if HANDLE_MULTIBYTE && !defined (HAVE_MBSTATE_T) 60# define wcsrtombs(dest, src, len, ps) (wcsrtombs) (dest, src, len, 0) 61# define mbsrtowcs(dest, src, len, ps) (mbsrtowcs) (dest, src, len, 0) 62# define wcrtomb(s, wc, ps) (wcrtomb) (s, wc, 0) 63# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) 64# define mbrlen(s, n, ps) (mbrlen) (s, n, 0) 65# define mbstate_t int 66#endif 67 68/* Make sure MB_LEN_MAX is at least 16 on systems that claim to be able to 69 handle multibyte chars (some systems define MB_LEN_MAX as 1) */ 70#ifdef HANDLE_MULTIBYTE 71# include <limits.h> 72# if defined(MB_LEN_MAX) && (MB_LEN_MAX < 16) 73# undef MB_LEN_MAX 74# endif 75# if !defined (MB_LEN_MAX) 76# define MB_LEN_MAX 16 77# endif 78#endif 79 80/************************************************/ 81/* end of multibyte capability checks for I18N */ 82/************************************************/ 83 84/* 85 * Flags for _rl_find_prev_mbchar and _rl_find_next_mbchar: 86 * 87 * MB_FIND_ANY find any multibyte character 88 * MB_FIND_NONZERO find a non-zero-width multibyte character 89 */ 90 91#define MB_FIND_ANY 0x00 92#define MB_FIND_NONZERO 0x01 93 94extern int _rl_find_prev_mbchar PARAMS((char *, int, int)); 95extern int _rl_find_next_mbchar PARAMS((char *, int, int, int)); 96 97#ifdef HANDLE_MULTIBYTE 98 99extern int _rl_compare_chars PARAMS((char *, int, mbstate_t *, char *, int, mbstate_t *)); 100extern int _rl_get_char_len PARAMS((char *, mbstate_t *)); 101extern int _rl_adjust_point PARAMS((char *, int, mbstate_t *)); 102 103extern int _rl_read_mbchar PARAMS((char *, int)); 104extern int _rl_read_mbstring PARAMS((int, char *, int)); 105 106extern int _rl_is_mbchar_matched PARAMS((char *, int, int, char *, int)); 107 108extern wchar_t _rl_char_value PARAMS((char *, int)); 109extern int _rl_walphabetic PARAMS((wchar_t)); 110 111#define _rl_to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc)) 112#define _rl_to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc)) 113 114#define MB_NEXTCHAR(b,s,c,f) \ 115 ((MB_CUR_MAX > 1 && rl_byte_oriented == 0) \ 116 ? _rl_find_next_mbchar ((b), (s), (c), (f)) \ 117 : ((s) + (c))) 118#define MB_PREVCHAR(b,s,f) \ 119 ((MB_CUR_MAX > 1 && rl_byte_oriented == 0) \ 120 ? _rl_find_prev_mbchar ((b), (s), (f)) \ 121 : ((s) - 1)) 122 123#define MB_INVALIDCH(x) ((x) == (size_t)-1 || (x) == (size_t)-2) 124#define MB_NULLWCH(x) ((x) == 0) 125 126/* Try and shortcut the printable ascii characters to cut down the number of 127 calls to a libc wcwidth() */ 128static inline int 129_rl_wcwidth (wc) 130 wchar_t wc; 131{ 132 switch (wc) 133 { 134 case ' ': case '!': case '"': case '#': case '%': 135 case '&': case '\'': case '(': case ')': case '*': 136 case '+': case ',': case '-': case '.': case '/': 137 case '0': case '1': case '2': case '3': case '4': 138 case '5': case '6': case '7': case '8': case '9': 139 case ':': case ';': case '<': case '=': case '>': 140 case '?': 141 case 'A': case 'B': case 'C': case 'D': case 'E': 142 case 'F': case 'G': case 'H': case 'I': case 'J': 143 case 'K': case 'L': case 'M': case 'N': case 'O': 144 case 'P': case 'Q': case 'R': case 'S': case 'T': 145 case 'U': case 'V': case 'W': case 'X': case 'Y': 146 case 'Z': 147 case '[': case '\\': case ']': case '^': case '_': 148 case 'a': case 'b': case 'c': case 'd': case 'e': 149 case 'f': case 'g': case 'h': case 'i': case 'j': 150 case 'k': case 'l': case 'm': case 'n': case 'o': 151 case 'p': case 'q': case 'r': case 's': case 't': 152 case 'u': case 'v': case 'w': case 'x': case 'y': 153 case 'z': case '{': case '|': case '}': case '~': 154 return 1; 155 default: 156 return wcwidth (wc); 157 } 158} 159 160/* Unicode combining characters range from U+0300 to U+036F */ 161#define UNICODE_COMBINING_CHAR(x) ((x) >= 768 && (x) <= 879) 162 163#if defined (WCWIDTH_BROKEN) 164# define WCWIDTH(wc) ((_rl_utf8locale && UNICODE_COMBINING_CHAR(wc)) ? 0 : _rl_wcwidth(wc)) 165#else 166# define WCWIDTH(wc) _rl_wcwidth(wc) 167#endif 168 169#if defined (WCWIDTH_BROKEN) 170# define IS_COMBINING_CHAR(x) (WCWIDTH(x) == 0 && iswcntrl(x) == 0) 171#else 172# define IS_COMBINING_CHAR(x) (WCWIDTH(x) == 0) 173#endif 174 175#define UTF8_SINGLEBYTE(c) (((c) & 0x80) == 0) 176#define UTF8_MBFIRSTCHAR(c) (((c) & 0xc0) == 0xc0) 177#define UTF8_MBCHAR(c) (((c) & 0xc0) == 0x80) 178 179#else /* !HANDLE_MULTIBYTE */ 180 181#undef MB_LEN_MAX 182#undef MB_CUR_MAX 183 184#define MB_LEN_MAX 1 185#define MB_CUR_MAX 1 186 187#define _rl_find_prev_mbchar(b, i, f) (((i) == 0) ? (i) : ((i) - 1)) 188#define _rl_find_next_mbchar(b, i1, i2, f) ((i1) + (i2)) 189 190#define _rl_char_value(buf,ind) ((buf)[(ind)]) 191 192#define _rl_walphabetic(c) (rl_alphabetic (c)) 193 194#define _rl_to_wupper(c) (_rl_to_upper (c)) 195#define _rl_to_wlower(c) (_rl_to_lower (c)) 196 197#define MB_NEXTCHAR(b,s,c,f) ((s) + (c)) 198#define MB_PREVCHAR(b,s,f) ((s) - 1) 199 200#define MB_INVALIDCH(x) (0) 201#define MB_NULLWCH(x) (0) 202 203#define UTF8_SINGLEBYTE(c) (1) 204 205#if !defined (HAVE_WCHAR_T) && !defined (wchar_t) 206# define wchar_t int 207#endif 208 209#endif /* !HANDLE_MULTIBYTE */ 210 211extern int rl_byte_oriented; 212 213#endif /* _RL_MBUTIL_H_ */ 214