1313981Spfg/* $NetBSD: chartype.c,v 1.23 2016/02/28 23:02:24 christos Exp $ */ 2276881Sbapt 3276881Sbapt/*- 4276881Sbapt * Copyright (c) 2009 The NetBSD Foundation, Inc. 5276881Sbapt * All rights reserved. 6276881Sbapt * 7276881Sbapt * Redistribution and use in source and binary forms, with or without 8276881Sbapt * modification, are permitted provided that the following conditions 9276881Sbapt * are met: 10276881Sbapt * 1. Redistributions of source code must retain the above copyright 11276881Sbapt * notice, this list of conditions and the following disclaimer. 12276881Sbapt * 2. Redistributions in binary form must reproduce the above copyright 13276881Sbapt * notice, this list of conditions and the following disclaimer in the 14276881Sbapt * documentation and/or other materials provided with the distribution. 15276881Sbapt * 16276881Sbapt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17276881Sbapt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18276881Sbapt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19276881Sbapt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20276881Sbapt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21276881Sbapt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22276881Sbapt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23276881Sbapt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24276881Sbapt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25276881Sbapt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26276881Sbapt * POSSIBILITY OF SUCH DAMAGE. 27276881Sbapt */ 28276881Sbapt 29276881Sbapt/* 30276881Sbapt * chartype.c: character classification and meta information 31276881Sbapt */ 32276881Sbapt#include "config.h" 33276881Sbapt#if !defined(lint) && !defined(SCCSID) 34313981Spfg__RCSID("$NetBSD: chartype.c,v 1.23 2016/02/28 23:02:24 christos Exp $"); 35276881Sbapt#endif /* not lint && not SCCSID */ 36276881Sbapt#include <sys/cdefs.h> 37276881Sbapt__FBSDID("$FreeBSD: stable/11/lib/libedit/chartype.c 343460 2019-01-25 22:52:49Z jilles $"); 38276881Sbapt 39313981Spfg#include <ctype.h> 40343091Sbapt#include <limits.h> 41276881Sbapt#include <stdlib.h> 42313981Spfg#include <string.h> 43276881Sbapt 44313981Spfg#include "el.h" 45313981Spfg 46276881Sbapt#define CT_BUFSIZ ((size_t)1024) 47276881Sbapt 48276881Sbapt#ifdef WIDECHAR 49283084Sbaptprotected int 50283084Sbaptct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize) 51276881Sbapt{ 52276881Sbapt void *p; 53283084Sbapt 54283084Sbapt if (csize <= conv->csize) 55283084Sbapt return 0; 56283084Sbapt 57283084Sbapt conv->csize = csize; 58283084Sbapt 59283084Sbapt p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff)); 60283084Sbapt if (p == NULL) { 61283084Sbapt conv->csize = 0; 62283084Sbapt el_free(conv->cbuff); 63283084Sbapt conv->cbuff = NULL; 64283084Sbapt return -1; 65276881Sbapt } 66283084Sbapt conv->cbuff = p; 67283084Sbapt return 0; 68283084Sbapt} 69276881Sbapt 70283084Sbaptprotected int 71283084Sbaptct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize) 72283084Sbapt{ 73283084Sbapt void *p; 74283084Sbapt 75313981Spfg if (wsize <= conv->wsize) 76283084Sbapt return 0; 77283084Sbapt 78283084Sbapt conv->wsize = wsize; 79283084Sbapt 80283084Sbapt p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff)); 81283084Sbapt if (p == NULL) { 82283084Sbapt conv->wsize = 0; 83283084Sbapt el_free(conv->wbuff); 84283084Sbapt conv->wbuff = NULL; 85283084Sbapt return -1; 86276881Sbapt } 87283084Sbapt conv->wbuff = p; 88283084Sbapt return 0; 89276881Sbapt} 90276881Sbapt 91276881Sbapt 92276881Sbaptpublic char * 93276881Sbaptct_encode_string(const Char *s, ct_buffer_t *conv) 94276881Sbapt{ 95276881Sbapt char *dst; 96283084Sbapt ssize_t used; 97276881Sbapt 98276881Sbapt if (!s) 99276881Sbapt return NULL; 100276881Sbapt 101276881Sbapt dst = conv->cbuff; 102283084Sbapt for (;;) { 103283084Sbapt used = (ssize_t)(dst - conv->cbuff); 104283084Sbapt if ((conv->csize - (size_t)used) < 5) { 105283084Sbapt if (ct_conv_cbuff_resize(conv, 106283084Sbapt conv->csize + CT_BUFSIZ) == -1) 107276881Sbapt return NULL; 108276881Sbapt dst = conv->cbuff + used; 109276881Sbapt } 110283084Sbapt if (!*s) 111283084Sbapt break; 112276881Sbapt used = ct_encode_char(dst, (size_t)5, *s); 113276881Sbapt if (used == -1) /* failed to encode, need more buffer space */ 114276881Sbapt abort(); 115276881Sbapt ++s; 116276881Sbapt dst += used; 117276881Sbapt } 118276881Sbapt *dst = '\0'; 119276881Sbapt return conv->cbuff; 120276881Sbapt} 121276881Sbapt 122276881Sbaptpublic Char * 123276881Sbaptct_decode_string(const char *s, ct_buffer_t *conv) 124276881Sbapt{ 125283084Sbapt size_t len; 126276881Sbapt 127276881Sbapt if (!s) 128276881Sbapt return NULL; 129276881Sbapt 130276881Sbapt len = ct_mbstowcs(NULL, s, (size_t)0); 131276881Sbapt if (len == (size_t)-1) 132276881Sbapt return NULL; 133283084Sbapt 134283084Sbapt if (conv->wsize < ++len) 135283084Sbapt if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1) 136283084Sbapt return NULL; 137283084Sbapt 138276881Sbapt ct_mbstowcs(conv->wbuff, s, conv->wsize); 139276881Sbapt return conv->wbuff; 140276881Sbapt} 141276881Sbapt 142276881Sbapt 143276881Sbaptprotected Char ** 144276881Sbaptct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) 145276881Sbapt{ 146276881Sbapt size_t bufspace; 147276881Sbapt int i; 148276881Sbapt Char *p; 149276881Sbapt Char **wargv; 150276881Sbapt ssize_t bytes; 151276881Sbapt 152276881Sbapt /* Make sure we have enough space in the conversion buffer to store all 153276881Sbapt * the argv strings. */ 154276881Sbapt for (i = 0, bufspace = 0; i < argc; ++i) 155276881Sbapt bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; 156283084Sbapt if (conv->wsize < ++bufspace) 157283084Sbapt if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1) 158283084Sbapt return NULL; 159276881Sbapt 160343460Sjilles wargv = el_malloc((size_t)(argc + 1) * sizeof(*wargv)); 161276881Sbapt 162276881Sbapt for (i = 0, p = conv->wbuff; i < argc; ++i) { 163276881Sbapt if (!argv[i]) { /* don't pass null pointers to mbstowcs */ 164276881Sbapt wargv[i] = NULL; 165276881Sbapt continue; 166276881Sbapt } else { 167276881Sbapt wargv[i] = p; 168276881Sbapt bytes = (ssize_t)mbstowcs(p, argv[i], bufspace); 169276881Sbapt } 170276881Sbapt if (bytes == -1) { 171276881Sbapt el_free(wargv); 172276881Sbapt return NULL; 173276881Sbapt } else 174276881Sbapt bytes++; /* include '\0' in the count */ 175276881Sbapt bufspace -= (size_t)bytes; 176276881Sbapt p += bytes; 177276881Sbapt } 178343460Sjilles wargv[i] = NULL; 179276881Sbapt 180276881Sbapt return wargv; 181276881Sbapt} 182276881Sbapt 183276881Sbapt 184276881Sbaptprotected size_t 185276881Sbaptct_enc_width(Char c) 186276881Sbapt{ 187343091Sbapt mbstate_t ps = (mbstate_t){{0}}; 188343091Sbapt size_t len; 189343091Sbapt char cbuf[MB_LEN_MAX]; 190343091Sbapt len = ct_wcrtomb(cbuf, c, &ps); 191343091Sbapt if (len == (size_t)-1) 192343091Sbapt return (0); 193343091Sbapt return (len); 194276881Sbapt} 195276881Sbapt 196276881Sbaptprotected ssize_t 197276881Sbaptct_encode_char(char *dst, size_t len, Char c) 198276881Sbapt{ 199276881Sbapt ssize_t l = 0; 200276881Sbapt if (len < ct_enc_width(c)) 201276881Sbapt return -1; 202276881Sbapt l = ct_wctomb(dst, c); 203276881Sbapt 204276881Sbapt if (l < 0) { 205276881Sbapt ct_wctomb_reset; 206276881Sbapt l = 0; 207276881Sbapt } 208276881Sbapt return l; 209276881Sbapt} 210313981Spfg 211313981Spfgsize_t 212313981Spfgct_mbrtowc(wchar_t *wc, const char *s, size_t n) 213313981Spfg{ 214313981Spfg mbstate_t mbs; 215313981Spfg /* This only works because UTF-8 is stateless */ 216313981Spfg memset(&mbs, 0, sizeof(mbs)); 217313981Spfg return mbrtowc(wc, s, n, &mbs); 218313981Spfg} 219313981Spfg 220313981Spfg#else 221313981Spfg 222313981Spfgsize_t 223313981Spfgct_mbrtowc(wchar_t *wc, const char *s, size_t n) 224325391Spfg{ 225313981Spfg if (s == NULL) 226313981Spfg return 0; 227313981Spfg if (n == 0) 228313981Spfg return (size_t)-2; 229313981Spfg if (wc != NULL) 230313981Spfg *wc = *s; 231313981Spfg return *s != '\0'; 232313981Spfg} 233276881Sbapt#endif 234276881Sbapt 235276881Sbaptprotected const Char * 236276881Sbaptct_visual_string(const Char *s) 237276881Sbapt{ 238276881Sbapt static Char *buff = NULL; 239276881Sbapt static size_t buffsize = 0; 240276881Sbapt void *p; 241276881Sbapt Char *dst; 242276881Sbapt ssize_t used = 0; 243276881Sbapt 244276881Sbapt if (!s) 245276881Sbapt return NULL; 246276881Sbapt if (!buff) { 247276881Sbapt buffsize = CT_BUFSIZ; 248276881Sbapt buff = el_malloc(buffsize * sizeof(*buff)); 249276881Sbapt } 250276881Sbapt dst = buff; 251276881Sbapt while (*s) { 252276881Sbapt used = ct_visual_char(dst, buffsize - (size_t)(dst - buff), *s); 253276881Sbapt if (used == -1) { /* failed to encode, need more buffer space */ 254276881Sbapt used = dst - buff; 255276881Sbapt buffsize += CT_BUFSIZ; 256276881Sbapt p = el_realloc(buff, buffsize * sizeof(*buff)); 257276881Sbapt if (p == NULL) 258276881Sbapt goto out; 259276881Sbapt buff = p; 260276881Sbapt dst = buff + used; 261276881Sbapt /* don't increment s here - we want to retry it! */ 262276881Sbapt } 263276881Sbapt else 264276881Sbapt ++s; 265276881Sbapt dst += used; 266276881Sbapt } 267276881Sbapt if (dst >= (buff + buffsize)) { /* sigh */ 268276881Sbapt buffsize += 1; 269276881Sbapt p = el_realloc(buff, buffsize * sizeof(*buff)); 270276881Sbapt if (p == NULL) 271276881Sbapt goto out; 272276881Sbapt buff = p; 273276881Sbapt dst = buff + buffsize - 1; 274276881Sbapt } 275276881Sbapt *dst = 0; 276276881Sbapt return buff; 277276881Sbaptout: 278276881Sbapt el_free(buff); 279276881Sbapt buffsize = 0; 280276881Sbapt return NULL; 281276881Sbapt} 282276881Sbapt 283276881Sbapt 284276881Sbapt 285276881Sbaptprotected int 286276881Sbaptct_visual_width(Char c) 287276881Sbapt{ 288276881Sbapt int t = ct_chr_class(c); 289276881Sbapt switch (t) { 290276881Sbapt case CHTYPE_ASCIICTL: 291276881Sbapt return 2; /* ^@ ^? etc. */ 292276881Sbapt case CHTYPE_TAB: 293276881Sbapt return 1; /* Hmm, this really need to be handled outside! */ 294276881Sbapt case CHTYPE_NL: 295276881Sbapt return 0; /* Should this be 1 instead? */ 296276881Sbapt#ifdef WIDECHAR 297276881Sbapt case CHTYPE_PRINT: 298276881Sbapt return wcwidth(c); 299276881Sbapt case CHTYPE_NONPRINT: 300276881Sbapt if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 301276881Sbapt return 8; /* \U+12345 */ 302276881Sbapt else 303276881Sbapt return 7; /* \U+1234 */ 304276881Sbapt#else 305276881Sbapt case CHTYPE_PRINT: 306276881Sbapt return 1; 307276881Sbapt case CHTYPE_NONPRINT: 308276881Sbapt return 4; /* \123 */ 309276881Sbapt#endif 310276881Sbapt default: 311276881Sbapt return 0; /* should not happen */ 312276881Sbapt } 313276881Sbapt} 314276881Sbapt 315276881Sbapt 316276881Sbaptprotected ssize_t 317276881Sbaptct_visual_char(Char *dst, size_t len, Char c) 318276881Sbapt{ 319276881Sbapt int t = ct_chr_class(c); 320276881Sbapt switch (t) { 321276881Sbapt case CHTYPE_TAB: 322276881Sbapt case CHTYPE_NL: 323276881Sbapt case CHTYPE_ASCIICTL: 324276881Sbapt if (len < 2) 325276881Sbapt return -1; /* insufficient space */ 326276881Sbapt *dst++ = '^'; 327276881Sbapt if (c == '\177') 328276881Sbapt *dst = '?'; /* DEL -> ^? */ 329276881Sbapt else 330276881Sbapt *dst = c | 0100; /* uncontrolify it */ 331276881Sbapt return 2; 332276881Sbapt case CHTYPE_PRINT: 333276881Sbapt if (len < 1) 334276881Sbapt return -1; /* insufficient space */ 335276881Sbapt *dst = c; 336276881Sbapt return 1; 337276881Sbapt case CHTYPE_NONPRINT: 338276881Sbapt /* we only use single-width glyphs for display, 339276881Sbapt * so this is right */ 340276881Sbapt if ((ssize_t)len < ct_visual_width(c)) 341276881Sbapt return -1; /* insufficient space */ 342276881Sbapt#ifdef WIDECHAR 343276881Sbapt *dst++ = '\\'; 344276881Sbapt *dst++ = 'U'; 345276881Sbapt *dst++ = '+'; 346276881Sbapt#define tohexdigit(v) "0123456789ABCDEF"[v] 347276881Sbapt if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 348276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); 349276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); 350276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); 351276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); 352276881Sbapt *dst = tohexdigit(((unsigned int) c ) & 0xf); 353276881Sbapt return c > 0xffff ? 8 : 7; 354276881Sbapt#else 355276881Sbapt *dst++ = '\\'; 356313981Spfg#define tooctaldigit(v) (Char)((v) + '0') 357276881Sbapt *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7); 358276881Sbapt *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7); 359276881Sbapt *dst++ = tooctaldigit(((unsigned int) c ) & 0x7); 360276881Sbapt#endif 361276881Sbapt /*FALLTHROUGH*/ 362276881Sbapt /* these two should be handled outside this function */ 363276881Sbapt default: /* we should never hit the default */ 364276881Sbapt return 0; 365276881Sbapt } 366276881Sbapt} 367276881Sbapt 368276881Sbapt 369276881Sbapt 370276881Sbapt 371276881Sbaptprotected int 372276881Sbaptct_chr_class(Char c) 373276881Sbapt{ 374276881Sbapt if (c == '\t') 375276881Sbapt return CHTYPE_TAB; 376276881Sbapt else if (c == '\n') 377276881Sbapt return CHTYPE_NL; 378276881Sbapt else if (IsASCII(c) && Iscntrl(c)) 379276881Sbapt return CHTYPE_ASCIICTL; 380276881Sbapt else if (Isprint(c)) 381276881Sbapt return CHTYPE_PRINT; 382276881Sbapt else 383276881Sbapt return CHTYPE_NONPRINT; 384276881Sbapt} 385