chartype.c revision 283084
1283084Sbapt/* $NetBSD: chartype.c,v 1.12 2015/02/22 02:16:19 christos Exp $ */ 2276881Sbapt 3276881Sbapt/*- 4276881Sbapt * Copyright (c) 2009 The NetBSD Foundation, Inc. 5276881Sbapt * All rights reserved. 6276881Sbapt * 7276881Sbapt * Redistribution and use in source and binary forms, with or without 8276881Sbapt * modification, are permitted provided that the following conditions 9276881Sbapt * are met: 10276881Sbapt * 1. Redistributions of source code must retain the above copyright 11276881Sbapt * notice, this list of conditions and the following disclaimer. 12276881Sbapt * 2. Redistributions in binary form must reproduce the above copyright 13276881Sbapt * notice, this list of conditions and the following disclaimer in the 14276881Sbapt * documentation and/or other materials provided with the distribution. 15276881Sbapt * 16276881Sbapt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17276881Sbapt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18276881Sbapt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19276881Sbapt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20276881Sbapt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21276881Sbapt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22276881Sbapt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23276881Sbapt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24276881Sbapt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25276881Sbapt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26276881Sbapt * POSSIBILITY OF SUCH DAMAGE. 27276881Sbapt */ 28276881Sbapt 29276881Sbapt/* 30276881Sbapt * chartype.c: character classification and meta information 31276881Sbapt */ 32276881Sbapt#include "config.h" 33276881Sbapt#if !defined(lint) && !defined(SCCSID) 34283084Sbapt__RCSID("$NetBSD: chartype.c,v 1.12 2015/02/22 02:16:19 christos Exp $"); 35276881Sbapt#endif /* not lint && not SCCSID */ 36276881Sbapt#include <sys/cdefs.h> 37276881Sbapt__FBSDID("$FreeBSD: head/lib/libedit/chartype.c 283084 2015-05-18 22:03:05Z bapt $"); 38276881Sbapt 39276881Sbapt#include "el.h" 40276881Sbapt#include <stdlib.h> 41276881Sbapt 42276881Sbapt#define CT_BUFSIZ ((size_t)1024) 43276881Sbapt 44276881Sbapt#ifdef WIDECHAR 45283084Sbaptprotected int 46283084Sbaptct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize) 47276881Sbapt{ 48276881Sbapt void *p; 49283084Sbapt 50283084Sbapt if (csize <= conv->csize) 51283084Sbapt return 0; 52283084Sbapt 53283084Sbapt conv->csize = csize; 54283084Sbapt 55283084Sbapt p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff)); 56283084Sbapt if (p == NULL) { 57283084Sbapt conv->csize = 0; 58283084Sbapt el_free(conv->cbuff); 59283084Sbapt conv->cbuff = NULL; 60283084Sbapt return -1; 61276881Sbapt } 62283084Sbapt conv->cbuff = p; 63283084Sbapt return 0; 64283084Sbapt} 65276881Sbapt 66283084Sbaptprotected int 67283084Sbaptct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize) 68283084Sbapt{ 69283084Sbapt void *p; 70283084Sbapt 71283084Sbapt if (wsize <= conv->wsize) 72283084Sbapt return 0; 73283084Sbapt 74283084Sbapt conv->wsize = wsize; 75283084Sbapt 76283084Sbapt p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff)); 77283084Sbapt if (p == NULL) { 78283084Sbapt conv->wsize = 0; 79283084Sbapt el_free(conv->wbuff); 80283084Sbapt conv->wbuff = NULL; 81283084Sbapt return -1; 82276881Sbapt } 83283084Sbapt conv->wbuff = p; 84283084Sbapt return 0; 85276881Sbapt} 86276881Sbapt 87276881Sbapt 88276881Sbaptpublic char * 89276881Sbaptct_encode_string(const Char *s, ct_buffer_t *conv) 90276881Sbapt{ 91276881Sbapt char *dst; 92283084Sbapt ssize_t used; 93276881Sbapt 94276881Sbapt if (!s) 95276881Sbapt return NULL; 96276881Sbapt 97276881Sbapt dst = conv->cbuff; 98283084Sbapt for (;;) { 99283084Sbapt used = (ssize_t)(dst - conv->cbuff); 100283084Sbapt if ((conv->csize - (size_t)used) < 5) { 101283084Sbapt if (ct_conv_cbuff_resize(conv, 102283084Sbapt conv->csize + CT_BUFSIZ) == -1) 103276881Sbapt return NULL; 104276881Sbapt dst = conv->cbuff + used; 105276881Sbapt } 106283084Sbapt if (!*s) 107283084Sbapt break; 108276881Sbapt used = ct_encode_char(dst, (size_t)5, *s); 109276881Sbapt if (used == -1) /* failed to encode, need more buffer space */ 110276881Sbapt abort(); 111276881Sbapt ++s; 112276881Sbapt dst += used; 113276881Sbapt } 114276881Sbapt *dst = '\0'; 115276881Sbapt return conv->cbuff; 116276881Sbapt} 117276881Sbapt 118276881Sbaptpublic Char * 119276881Sbaptct_decode_string(const char *s, ct_buffer_t *conv) 120276881Sbapt{ 121283084Sbapt size_t len; 122276881Sbapt 123276881Sbapt if (!s) 124276881Sbapt return NULL; 125276881Sbapt 126276881Sbapt len = ct_mbstowcs(NULL, s, (size_t)0); 127276881Sbapt if (len == (size_t)-1) 128276881Sbapt return NULL; 129283084Sbapt 130283084Sbapt if (conv->wsize < ++len) 131283084Sbapt if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1) 132283084Sbapt return NULL; 133283084Sbapt 134276881Sbapt ct_mbstowcs(conv->wbuff, s, conv->wsize); 135276881Sbapt return conv->wbuff; 136276881Sbapt} 137276881Sbapt 138276881Sbapt 139276881Sbaptprotected Char ** 140276881Sbaptct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) 141276881Sbapt{ 142276881Sbapt size_t bufspace; 143276881Sbapt int i; 144276881Sbapt Char *p; 145276881Sbapt Char **wargv; 146276881Sbapt ssize_t bytes; 147276881Sbapt 148276881Sbapt /* Make sure we have enough space in the conversion buffer to store all 149276881Sbapt * the argv strings. */ 150276881Sbapt for (i = 0, bufspace = 0; i < argc; ++i) 151276881Sbapt bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; 152283084Sbapt if (conv->wsize < ++bufspace) 153283084Sbapt if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1) 154283084Sbapt return NULL; 155276881Sbapt 156276881Sbapt wargv = el_malloc((size_t)argc * sizeof(*wargv)); 157276881Sbapt 158276881Sbapt for (i = 0, p = conv->wbuff; i < argc; ++i) { 159276881Sbapt if (!argv[i]) { /* don't pass null pointers to mbstowcs */ 160276881Sbapt wargv[i] = NULL; 161276881Sbapt continue; 162276881Sbapt } else { 163276881Sbapt wargv[i] = p; 164276881Sbapt bytes = (ssize_t)mbstowcs(p, argv[i], bufspace); 165276881Sbapt } 166276881Sbapt if (bytes == -1) { 167276881Sbapt el_free(wargv); 168276881Sbapt return NULL; 169276881Sbapt } else 170276881Sbapt bytes++; /* include '\0' in the count */ 171276881Sbapt bufspace -= (size_t)bytes; 172276881Sbapt p += bytes; 173276881Sbapt } 174276881Sbapt 175276881Sbapt return wargv; 176276881Sbapt} 177276881Sbapt 178276881Sbapt 179276881Sbaptprotected size_t 180276881Sbaptct_enc_width(Char c) 181276881Sbapt{ 182276881Sbapt /* UTF-8 encoding specific values */ 183276881Sbapt if (c < 0x80) 184276881Sbapt return 1; 185276881Sbapt else if (c < 0x0800) 186276881Sbapt return 2; 187276881Sbapt else if (c < 0x10000) 188276881Sbapt return 3; 189276881Sbapt else if (c < 0x110000) 190276881Sbapt return 4; 191276881Sbapt else 192276881Sbapt return 0; /* not a valid codepoint */ 193276881Sbapt} 194276881Sbapt 195276881Sbaptprotected ssize_t 196276881Sbaptct_encode_char(char *dst, size_t len, Char c) 197276881Sbapt{ 198276881Sbapt ssize_t l = 0; 199276881Sbapt if (len < ct_enc_width(c)) 200276881Sbapt return -1; 201276881Sbapt l = ct_wctomb(dst, c); 202276881Sbapt 203276881Sbapt if (l < 0) { 204276881Sbapt ct_wctomb_reset; 205276881Sbapt l = 0; 206276881Sbapt } 207276881Sbapt return l; 208276881Sbapt} 209276881Sbapt#endif 210276881Sbapt 211276881Sbaptprotected const Char * 212276881Sbaptct_visual_string(const Char *s) 213276881Sbapt{ 214276881Sbapt static Char *buff = NULL; 215276881Sbapt static size_t buffsize = 0; 216276881Sbapt void *p; 217276881Sbapt Char *dst; 218276881Sbapt ssize_t used = 0; 219276881Sbapt 220276881Sbapt if (!s) 221276881Sbapt return NULL; 222276881Sbapt if (!buff) { 223276881Sbapt buffsize = CT_BUFSIZ; 224276881Sbapt buff = el_malloc(buffsize * sizeof(*buff)); 225276881Sbapt } 226276881Sbapt dst = buff; 227276881Sbapt while (*s) { 228276881Sbapt used = ct_visual_char(dst, buffsize - (size_t)(dst - buff), *s); 229276881Sbapt if (used == -1) { /* failed to encode, need more buffer space */ 230276881Sbapt used = dst - buff; 231276881Sbapt buffsize += CT_BUFSIZ; 232276881Sbapt p = el_realloc(buff, buffsize * sizeof(*buff)); 233276881Sbapt if (p == NULL) 234276881Sbapt goto out; 235276881Sbapt buff = p; 236276881Sbapt dst = buff + used; 237276881Sbapt /* don't increment s here - we want to retry it! */ 238276881Sbapt } 239276881Sbapt else 240276881Sbapt ++s; 241276881Sbapt dst += used; 242276881Sbapt } 243276881Sbapt if (dst >= (buff + buffsize)) { /* sigh */ 244276881Sbapt buffsize += 1; 245276881Sbapt p = el_realloc(buff, buffsize * sizeof(*buff)); 246276881Sbapt if (p == NULL) 247276881Sbapt goto out; 248276881Sbapt buff = p; 249276881Sbapt dst = buff + buffsize - 1; 250276881Sbapt } 251276881Sbapt *dst = 0; 252276881Sbapt return buff; 253276881Sbaptout: 254276881Sbapt el_free(buff); 255276881Sbapt buffsize = 0; 256276881Sbapt return NULL; 257276881Sbapt} 258276881Sbapt 259276881Sbapt 260276881Sbapt 261276881Sbaptprotected int 262276881Sbaptct_visual_width(Char c) 263276881Sbapt{ 264276881Sbapt int t = ct_chr_class(c); 265276881Sbapt switch (t) { 266276881Sbapt case CHTYPE_ASCIICTL: 267276881Sbapt return 2; /* ^@ ^? etc. */ 268276881Sbapt case CHTYPE_TAB: 269276881Sbapt return 1; /* Hmm, this really need to be handled outside! */ 270276881Sbapt case CHTYPE_NL: 271276881Sbapt return 0; /* Should this be 1 instead? */ 272276881Sbapt#ifdef WIDECHAR 273276881Sbapt case CHTYPE_PRINT: 274276881Sbapt return wcwidth(c); 275276881Sbapt case CHTYPE_NONPRINT: 276276881Sbapt if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 277276881Sbapt return 8; /* \U+12345 */ 278276881Sbapt else 279276881Sbapt return 7; /* \U+1234 */ 280276881Sbapt#else 281276881Sbapt case CHTYPE_PRINT: 282276881Sbapt return 1; 283276881Sbapt case CHTYPE_NONPRINT: 284276881Sbapt return 4; /* \123 */ 285276881Sbapt#endif 286276881Sbapt default: 287276881Sbapt return 0; /* should not happen */ 288276881Sbapt } 289276881Sbapt} 290276881Sbapt 291276881Sbapt 292276881Sbaptprotected ssize_t 293276881Sbaptct_visual_char(Char *dst, size_t len, Char c) 294276881Sbapt{ 295276881Sbapt int t = ct_chr_class(c); 296276881Sbapt switch (t) { 297276881Sbapt case CHTYPE_TAB: 298276881Sbapt case CHTYPE_NL: 299276881Sbapt case CHTYPE_ASCIICTL: 300276881Sbapt if (len < 2) 301276881Sbapt return -1; /* insufficient space */ 302276881Sbapt *dst++ = '^'; 303276881Sbapt if (c == '\177') 304276881Sbapt *dst = '?'; /* DEL -> ^? */ 305276881Sbapt else 306276881Sbapt *dst = c | 0100; /* uncontrolify it */ 307276881Sbapt return 2; 308276881Sbapt case CHTYPE_PRINT: 309276881Sbapt if (len < 1) 310276881Sbapt return -1; /* insufficient space */ 311276881Sbapt *dst = c; 312276881Sbapt return 1; 313276881Sbapt case CHTYPE_NONPRINT: 314276881Sbapt /* we only use single-width glyphs for display, 315276881Sbapt * so this is right */ 316276881Sbapt if ((ssize_t)len < ct_visual_width(c)) 317276881Sbapt return -1; /* insufficient space */ 318276881Sbapt#ifdef WIDECHAR 319276881Sbapt *dst++ = '\\'; 320276881Sbapt *dst++ = 'U'; 321276881Sbapt *dst++ = '+'; 322276881Sbapt#define tohexdigit(v) "0123456789ABCDEF"[v] 323276881Sbapt if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 324276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); 325276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); 326276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); 327276881Sbapt *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); 328276881Sbapt *dst = tohexdigit(((unsigned int) c ) & 0xf); 329276881Sbapt return c > 0xffff ? 8 : 7; 330276881Sbapt#else 331276881Sbapt *dst++ = '\\'; 332276881Sbapt#define tooctaldigit(v) ((v) + '0') 333276881Sbapt *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7); 334276881Sbapt *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7); 335276881Sbapt *dst++ = tooctaldigit(((unsigned int) c ) & 0x7); 336276881Sbapt#endif 337276881Sbapt /*FALLTHROUGH*/ 338276881Sbapt /* these two should be handled outside this function */ 339276881Sbapt default: /* we should never hit the default */ 340276881Sbapt return 0; 341276881Sbapt } 342276881Sbapt} 343276881Sbapt 344276881Sbapt 345276881Sbapt 346276881Sbapt 347276881Sbaptprotected int 348276881Sbaptct_chr_class(Char c) 349276881Sbapt{ 350276881Sbapt if (c == '\t') 351276881Sbapt return CHTYPE_TAB; 352276881Sbapt else if (c == '\n') 353276881Sbapt return CHTYPE_NL; 354276881Sbapt else if (IsASCII(c) && Iscntrl(c)) 355276881Sbapt return CHTYPE_ASCIICTL; 356276881Sbapt else if (Isprint(c)) 357276881Sbapt return CHTYPE_PRINT; 358276881Sbapt else 359276881Sbapt return CHTYPE_NONPRINT; 360276881Sbapt} 361