1326324Sbrooks/* $NetBSD: vis.c,v 1.74 2017/11/27 16:37:21 christos Exp $ */ 2241236Sbrooks 3241236Sbrooks/*- 4241236Sbrooks * Copyright (c) 1989, 1993 5241236Sbrooks * The Regents of the University of California. All rights reserved. 6241236Sbrooks * 7241236Sbrooks * Redistribution and use in source and binary forms, with or without 8241236Sbrooks * modification, are permitted provided that the following conditions 9241236Sbrooks * are met: 10241236Sbrooks * 1. Redistributions of source code must retain the above copyright 11241236Sbrooks * notice, this list of conditions and the following disclaimer. 12241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright 13241236Sbrooks * notice, this list of conditions and the following disclaimer in the 14241236Sbrooks * documentation and/or other materials provided with the distribution. 15241236Sbrooks * 3. Neither the name of the University nor the names of its contributors 16241236Sbrooks * may be used to endorse or promote products derived from this software 17241236Sbrooks * without specific prior written permission. 18241236Sbrooks * 19241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20241236Sbrooks * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21241236Sbrooks * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22241236Sbrooks * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23241236Sbrooks * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24241236Sbrooks * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25241236Sbrooks * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26241236Sbrooks * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27241236Sbrooks * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28241236Sbrooks * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29241236Sbrooks * SUCH DAMAGE. 30241236Sbrooks */ 31241236Sbrooks 32241236Sbrooks/*- 33241236Sbrooks * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc. 34241236Sbrooks * All rights reserved. 35241236Sbrooks * 36241236Sbrooks * Redistribution and use in source and binary forms, with or without 37241236Sbrooks * modification, are permitted provided that the following conditions 38241236Sbrooks * are met: 39241236Sbrooks * 1. Redistributions of source code must retain the above copyright 40241236Sbrooks * notice, this list of conditions and the following disclaimer. 41241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright 42241236Sbrooks * notice, this list of conditions and the following disclaimer in the 43241236Sbrooks * documentation and/or other materials provided with the distribution. 44241236Sbrooks * 45241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 46241236Sbrooks * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 47241236Sbrooks * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48241236Sbrooks * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 49241236Sbrooks * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 50241236Sbrooks * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 51241236Sbrooks * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 52241236Sbrooks * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 53241236Sbrooks * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 54241236Sbrooks * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 55241236Sbrooks * POSSIBILITY OF SUCH DAMAGE. 56241236Sbrooks */ 57241236Sbrooks 58241236Sbrooks#include <sys/cdefs.h> 59241236Sbrooks#if defined(LIBC_SCCS) && !defined(lint) 60326324Sbrooks__RCSID("$NetBSD: vis.c,v 1.74 2017/11/27 16:37:21 christos Exp $"); 61241236Sbrooks#endif /* LIBC_SCCS and not lint */ 62248302Sbrooks#ifdef __FBSDID 63244401Sbrooks__FBSDID("$FreeBSD: stable/10/contrib/libc-vis/vis.c 326324 2017-11-28 17:20:53Z brooks $"); 64248302Sbrooks#define _DIAGASSERT(x) assert(x) 65248302Sbrooks#endif 66241236Sbrooks 67241236Sbrooks#include "namespace.h" 68241236Sbrooks#include <sys/types.h> 69248302Sbrooks#include <sys/param.h> 70241236Sbrooks 71241236Sbrooks#include <assert.h> 72241236Sbrooks#include <vis.h> 73241236Sbrooks#include <errno.h> 74241236Sbrooks#include <stdlib.h> 75248302Sbrooks#include <wchar.h> 76248302Sbrooks#include <wctype.h> 77241236Sbrooks 78241236Sbrooks#ifdef __weak_alias 79241236Sbrooks__weak_alias(strvisx,_strvisx) 80241236Sbrooks#endif 81241236Sbrooks 82241236Sbrooks#if !HAVE_VIS || !HAVE_SVIS 83241236Sbrooks#include <ctype.h> 84241236Sbrooks#include <limits.h> 85241236Sbrooks#include <stdio.h> 86241236Sbrooks#include <string.h> 87241236Sbrooks 88248302Sbrooks/* 89248302Sbrooks * The reason for going through the trouble to deal with character encodings 90248302Sbrooks * in vis(3), is that we use this to safe encode output of commands. This 91248302Sbrooks * safe encoding varies depending on the character set. For example if we 92248302Sbrooks * display ps output in French, we don't want to display French characters 93248302Sbrooks * as M-foo. 94248302Sbrooks */ 95241236Sbrooks 96248302Sbrooksstatic wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *); 97248302Sbrooks 98241236Sbrooks#undef BELL 99248302Sbrooks#define BELL L'\a' 100326324Sbrooks 101326324Sbrooks#if defined(LC_C_LOCALE) 102326324Sbrooks#define iscgraph(c) isgraph_l(c, LC_C_LOCALE) 103326324Sbrooks#else 104326324Sbrooks/* Keep it simple for now, no locale stuff */ 105326324Sbrooks#define iscgraph(c) isgraph(c) 106326324Sbrooks#ifdef notyet 107326324Sbrooks#include <locale.h> 108326324Sbrooksstatic int 109326324Sbrooksiscgraph(int c) { 110326324Sbrooks int rv; 111326324Sbrooks char *ol; 112241236Sbrooks 113326324Sbrooks ol = setlocale(LC_CTYPE, "C"); 114326324Sbrooks rv = isgraph(c); 115326324Sbrooks if (ol) 116326324Sbrooks setlocale(LC_CTYPE, ol); 117326324Sbrooks return rv; 118326324Sbrooks} 119326324Sbrooks#endif 120326324Sbrooks#endif 121326324Sbrooks 122326324Sbrooks#define ISGRAPH(flags, c) \ 123326324Sbrooks (((flags) & VIS_NOLOCALE) ? iscgraph(c) : iswgraph(c)) 124326324Sbrooks 125248302Sbrooks#define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7') 126248302Sbrooks#define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n') 127248302Sbrooks#define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r') 128248302Sbrooks#define xtoa(c) L"0123456789abcdef"[c] 129248302Sbrooks#define XTOA(c) L"0123456789ABCDEF"[c] 130241236Sbrooks 131326324Sbrooks#define MAXEXTRAS 30 132241236Sbrooks 133326324Sbrooksstatic const wchar_t char_shell[] = L"'`\";&<>()|{}]\\$!^~"; 134326324Sbrooksstatic const wchar_t char_glob[] = L"*?[#"; 135326324Sbrooks 136248302Sbrooks#if !HAVE_NBTOOL_CONFIG_H 137248302Sbrooks#ifndef __NetBSD__ 138248302Sbrooks/* 139248302Sbrooks * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer 140248302Sbrooks * integral type and it is probably wrong, since currently the maximum 141248302Sbrooks * number of bytes and character needs is 6. Until this is fixed, the 142248302Sbrooks * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and 143248302Sbrooks * the assertion is commented out. 144248302Sbrooks */ 145248302Sbrooks#ifdef __FreeBSD__ 146248302Sbrooks/* 147248302Sbrooks * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel 148248302Sbrooks * mode. 149248302Sbrooks */ 150248302Sbrooks#ifndef CTASSERT 151248302Sbrooks#define CTASSERT(x) _CTASSERT(x, __LINE__) 152248302Sbrooks#define _CTASSERT(x, y) __CTASSERT(x, y) 153248302Sbrooks#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] 154248302Sbrooks#endif 155248302Sbrooks#endif /* __FreeBSD__ */ 156248302SbrooksCTASSERT(MB_LEN_MAX <= sizeof(uint64_t)); 157248302Sbrooks#endif /* !__NetBSD__ */ 158248302Sbrooks#endif 159241236Sbrooks 160241236Sbrooks/* 161241236Sbrooks * This is do_hvis, for HTTP style (RFC 1808) 162241236Sbrooks */ 163248302Sbrooksstatic wchar_t * 164248302Sbrooksdo_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) 165241236Sbrooks{ 166248302Sbrooks if (iswalnum(c) 167241236Sbrooks /* safe */ 168248302Sbrooks || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+' 169241236Sbrooks /* extra */ 170248302Sbrooks || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')' 171248302Sbrooks || c == L',') 172248302Sbrooks dst = do_svis(dst, c, flags, nextc, extra); 173248302Sbrooks else { 174248302Sbrooks *dst++ = L'%'; 175241236Sbrooks *dst++ = xtoa(((unsigned int)c >> 4) & 0xf); 176241236Sbrooks *dst++ = xtoa((unsigned int)c & 0xf); 177241236Sbrooks } 178241236Sbrooks 179241236Sbrooks return dst; 180241236Sbrooks} 181241236Sbrooks 182241236Sbrooks/* 183241236Sbrooks * This is do_mvis, for Quoted-Printable MIME (RFC 2045) 184241236Sbrooks * NB: No handling of long lines or CRLF. 185241236Sbrooks */ 186248302Sbrooksstatic wchar_t * 187248302Sbrooksdo_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) 188241236Sbrooks{ 189248302Sbrooks if ((c != L'\n') && 190241236Sbrooks /* Space at the end of the line */ 191248302Sbrooks ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) || 192241236Sbrooks /* Out of range */ 193248302Sbrooks (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || 194248302Sbrooks /* Specific char to be escaped */ 195248302Sbrooks wcschr(L"#$@[\\]^`{|}~", c) != NULL)) { 196248302Sbrooks *dst++ = L'='; 197241236Sbrooks *dst++ = XTOA(((unsigned int)c >> 4) & 0xf); 198241236Sbrooks *dst++ = XTOA((unsigned int)c & 0xf); 199248302Sbrooks } else 200248302Sbrooks dst = do_svis(dst, c, flags, nextc, extra); 201241236Sbrooks return dst; 202241236Sbrooks} 203241236Sbrooks 204241236Sbrooks/* 205248302Sbrooks * Output single byte of multibyte character. 206241236Sbrooks */ 207248302Sbrooksstatic wchar_t * 208248302Sbrooksdo_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra) 209241236Sbrooks{ 210248302Sbrooks if (flags & VIS_CSTYLE) { 211241236Sbrooks switch (c) { 212248302Sbrooks case L'\n': 213248302Sbrooks *dst++ = L'\\'; *dst++ = L'n'; 214241236Sbrooks return dst; 215248302Sbrooks case L'\r': 216248302Sbrooks *dst++ = L'\\'; *dst++ = L'r'; 217241236Sbrooks return dst; 218248302Sbrooks case L'\b': 219248302Sbrooks *dst++ = L'\\'; *dst++ = L'b'; 220241236Sbrooks return dst; 221241236Sbrooks case BELL: 222248302Sbrooks *dst++ = L'\\'; *dst++ = L'a'; 223241236Sbrooks return dst; 224248302Sbrooks case L'\v': 225248302Sbrooks *dst++ = L'\\'; *dst++ = L'v'; 226241236Sbrooks return dst; 227248302Sbrooks case L'\t': 228248302Sbrooks *dst++ = L'\\'; *dst++ = L't'; 229241236Sbrooks return dst; 230248302Sbrooks case L'\f': 231248302Sbrooks *dst++ = L'\\'; *dst++ = L'f'; 232241236Sbrooks return dst; 233248302Sbrooks case L' ': 234248302Sbrooks *dst++ = L'\\'; *dst++ = L's'; 235241236Sbrooks return dst; 236248302Sbrooks case L'\0': 237248302Sbrooks *dst++ = L'\\'; *dst++ = L'0'; 238248302Sbrooks if (iswoctal(nextc)) { 239248302Sbrooks *dst++ = L'0'; 240248302Sbrooks *dst++ = L'0'; 241241236Sbrooks } 242241236Sbrooks return dst; 243326324Sbrooks /* We cannot encode these characters in VIS_CSTYLE 244326324Sbrooks * because they special meaning */ 245326324Sbrooks case L'n': 246326324Sbrooks case L'r': 247326324Sbrooks case L'b': 248326324Sbrooks case L'a': 249326324Sbrooks case L'v': 250326324Sbrooks case L't': 251326324Sbrooks case L'f': 252326324Sbrooks case L's': 253326324Sbrooks case L'0': 254326324Sbrooks case L'M': 255326324Sbrooks case L'^': 256326324Sbrooks case L'$': /* vis(1) -l */ 257326324Sbrooks break; 258241236Sbrooks default: 259326324Sbrooks if (ISGRAPH(flags, c) && !iswoctal(c)) { 260248302Sbrooks *dst++ = L'\\'; 261248302Sbrooks *dst++ = c; 262241236Sbrooks return dst; 263241236Sbrooks } 264241236Sbrooks } 265241236Sbrooks } 266248302Sbrooks if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) { 267248302Sbrooks *dst++ = L'\\'; 268248302Sbrooks *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0'; 269248302Sbrooks *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0'; 270248302Sbrooks *dst++ = (c & 07) + L'0'; 271241236Sbrooks } else { 272248302Sbrooks if ((flags & VIS_NOSLASH) == 0) 273248302Sbrooks *dst++ = L'\\'; 274241236Sbrooks 275241236Sbrooks if (c & 0200) { 276248302Sbrooks c &= 0177; 277248302Sbrooks *dst++ = L'M'; 278241236Sbrooks } 279241236Sbrooks 280248302Sbrooks if (iswcntrl(c)) { 281248302Sbrooks *dst++ = L'^'; 282241236Sbrooks if (c == 0177) 283248302Sbrooks *dst++ = L'?'; 284241236Sbrooks else 285248302Sbrooks *dst++ = c + L'@'; 286241236Sbrooks } else { 287248302Sbrooks *dst++ = L'-'; 288248302Sbrooks *dst++ = c; 289241236Sbrooks } 290241236Sbrooks } 291248302Sbrooks 292241236Sbrooks return dst; 293241236Sbrooks} 294241236Sbrooks 295248302Sbrooks/* 296248302Sbrooks * This is do_vis, the central code of vis. 297248302Sbrooks * dst: Pointer to the destination buffer 298248302Sbrooks * c: Character to encode 299248302Sbrooks * flags: Flags word 300248302Sbrooks * nextc: The character following 'c' 301248302Sbrooks * extra: Pointer to the list of extra characters to be 302248302Sbrooks * backslash-protected. 303248302Sbrooks */ 304248302Sbrooksstatic wchar_t * 305248302Sbrooksdo_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) 306248302Sbrooks{ 307248302Sbrooks int iswextra, i, shft; 308248302Sbrooks uint64_t bmsk, wmsk; 309241236Sbrooks 310248302Sbrooks iswextra = wcschr(extra, c) != NULL; 311326324Sbrooks if (!iswextra && (ISGRAPH(flags, c) || iswwhite(c) || 312248302Sbrooks ((flags & VIS_SAFE) && iswsafe(c)))) { 313248302Sbrooks *dst++ = c; 314248302Sbrooks return dst; 315248302Sbrooks } 316248302Sbrooks 317248302Sbrooks /* See comment in istrsenvisx() output loop, below. */ 318248302Sbrooks wmsk = 0; 319248302Sbrooks for (i = sizeof(wmsk) - 1; i >= 0; i--) { 320248302Sbrooks shft = i * NBBY; 321248302Sbrooks bmsk = (uint64_t)0xffLL << shft; 322248302Sbrooks wmsk |= bmsk; 323248302Sbrooks if ((c & wmsk) || i == 0) 324248302Sbrooks dst = do_mbyte(dst, (wint_t)( 325248302Sbrooks (uint64_t)(c & bmsk) >> shft), 326248302Sbrooks flags, nextc, iswextra); 327248302Sbrooks } 328248302Sbrooks 329248302Sbrooks return dst; 330248302Sbrooks} 331248302Sbrooks 332248302Sbrookstypedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *); 333248302Sbrooks 334241236Sbrooks/* 335241236Sbrooks * Return the appropriate encoding function depending on the flags given. 336241236Sbrooks */ 337241236Sbrooksstatic visfun_t 338248302Sbrooksgetvisfun(int flags) 339241236Sbrooks{ 340248302Sbrooks if (flags & VIS_HTTPSTYLE) 341241236Sbrooks return do_hvis; 342248302Sbrooks if (flags & VIS_MIMESTYLE) 343241236Sbrooks return do_mvis; 344241236Sbrooks return do_svis; 345241236Sbrooks} 346241236Sbrooks 347241236Sbrooks/* 348248302Sbrooks * Expand list of extra characters to not visually encode. 349241236Sbrooks */ 350248302Sbrooksstatic wchar_t * 351248302Sbrooksmakeextralist(int flags, const char *src) 352241236Sbrooks{ 353248302Sbrooks wchar_t *dst, *d; 354248302Sbrooks size_t len; 355326324Sbrooks const wchar_t *s; 356326324Sbrooks mbstate_t mbstate; 357241236Sbrooks 358326324Sbrooks bzero(&mbstate, sizeof(mbstate)); 359248302Sbrooks len = strlen(src); 360248302Sbrooks if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL) 361241236Sbrooks return NULL; 362248302Sbrooks 363326324Sbrooks if ((flags & VIS_NOLOCALE) || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) { 364248302Sbrooks size_t i; 365248302Sbrooks for (i = 0; i < len; i++) 366326324Sbrooks dst[i] = (wchar_t)(u_char)src[i]; 367248302Sbrooks d = dst + len; 368248302Sbrooks } else 369248302Sbrooks d = dst + wcslen(dst); 370248302Sbrooks 371326324Sbrooks if (flags & VIS_GLOB) 372326324Sbrooks for (s = char_glob; *s; *d++ = *s++) 373326324Sbrooks continue; 374241236Sbrooks 375326324Sbrooks if (flags & VIS_SHELL) 376326324Sbrooks for (s = char_shell; *s; *d++ = *s++) 377326324Sbrooks continue; 378326324Sbrooks 379248302Sbrooks if (flags & VIS_SP) *d++ = L' '; 380248302Sbrooks if (flags & VIS_TAB) *d++ = L'\t'; 381248302Sbrooks if (flags & VIS_NL) *d++ = L'\n'; 382326324Sbrooks if (flags & VIS_DQ) *d++ = L'"'; 383248302Sbrooks if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\'; 384248302Sbrooks *d = L'\0'; 385241236Sbrooks 386248302Sbrooks return dst; 387241236Sbrooks} 388241236Sbrooks 389241236Sbrooks/* 390248302Sbrooks * istrsenvisx() 391248302Sbrooks * The main internal function. 392248302Sbrooks * All user-visible functions call this one. 393241236Sbrooks */ 394241236Sbrooksstatic int 395326324Sbrooksistrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength, 396248302Sbrooks int flags, const char *mbextra, int *cerr_ptr) 397241236Sbrooks{ 398248302Sbrooks wchar_t *dst, *src, *pdst, *psrc, *start, *extra; 399248302Sbrooks size_t len, olen; 400248302Sbrooks uint64_t bmsk, wmsk; 401248302Sbrooks wint_t c; 402241236Sbrooks visfun_t f; 403326324Sbrooks int clen = 0, cerr, error = -1, i, shft; 404326324Sbrooks char *mbdst, *mdst; 405248302Sbrooks ssize_t mbslength, maxolen; 406326324Sbrooks mbstate_t mbstate; 407241236Sbrooks 408326324Sbrooks _DIAGASSERT(mbdstp != NULL); 409272753Sbrooks _DIAGASSERT(mbsrc != NULL || mblength == 0); 410248302Sbrooks _DIAGASSERT(mbextra != NULL); 411248302Sbrooks 412326324Sbrooks mbslength = (ssize_t)mblength; 413248302Sbrooks /* 414326324Sbrooks * When inputing a single character, must also read in the 415326324Sbrooks * next character for nextc, the look-ahead character. 416326324Sbrooks */ 417326324Sbrooks if (mbslength == 1) 418326324Sbrooks mbslength++; 419326324Sbrooks 420326324Sbrooks /* 421248302Sbrooks * Input (mbsrc) is a char string considered to be multibyte 422248302Sbrooks * characters. The input loop will read this string pulling 423248302Sbrooks * one character, possibly multiple bytes, from mbsrc and 424248302Sbrooks * converting each to wchar_t in src. 425248302Sbrooks * 426248302Sbrooks * The vis conversion will be done using the wide char 427248302Sbrooks * wchar_t string. 428248302Sbrooks * 429248302Sbrooks * This will then be converted back to a multibyte string to 430248302Sbrooks * return to the caller. 431248302Sbrooks */ 432248302Sbrooks 433248302Sbrooks /* Allocate space for the wide char strings */ 434248302Sbrooks psrc = pdst = extra = NULL; 435326324Sbrooks mdst = NULL; 436326324Sbrooks if ((psrc = calloc(mbslength + 1, sizeof(*psrc))) == NULL) 437248302Sbrooks return -1; 438326324Sbrooks if ((pdst = calloc((16 * mbslength) + 1, sizeof(*pdst))) == NULL) 439248302Sbrooks goto out; 440326324Sbrooks if (*mbdstp == NULL) { 441326324Sbrooks if ((mdst = calloc((16 * mbslength) + 1, sizeof(*mdst))) == NULL) 442326324Sbrooks goto out; 443326324Sbrooks *mbdstp = mdst; 444326324Sbrooks } 445326324Sbrooks 446326324Sbrooks mbdst = *mbdstp; 447248302Sbrooks dst = pdst; 448248302Sbrooks src = psrc; 449248302Sbrooks 450326324Sbrooks if (flags & VIS_NOLOCALE) { 451326324Sbrooks /* Do one byte at a time conversion */ 452326324Sbrooks cerr = 1; 453326324Sbrooks } else { 454326324Sbrooks /* Use caller's multibyte conversion error flag. */ 455326324Sbrooks cerr = cerr_ptr ? *cerr_ptr : 0; 456326324Sbrooks } 457248302Sbrooks 458248302Sbrooks /* 459248302Sbrooks * Input loop. 460248302Sbrooks * Handle up to mblength characters (not bytes). We do not 461248302Sbrooks * stop at NULs because we may be processing a block of data 462248302Sbrooks * that includes NULs. 463248302Sbrooks */ 464326324Sbrooks bzero(&mbstate, sizeof(mbstate)); 465248302Sbrooks while (mbslength > 0) { 466248302Sbrooks /* Convert one multibyte character to wchar_t. */ 467248302Sbrooks if (!cerr) 468326324Sbrooks clen = mbrtowc(src, mbsrc, MB_LEN_MAX, &mbstate); 469248302Sbrooks if (cerr || clen < 0) { 470248302Sbrooks /* Conversion error, process as a byte instead. */ 471248302Sbrooks *src = (wint_t)(u_char)*mbsrc; 472248302Sbrooks clen = 1; 473248302Sbrooks cerr = 1; 474248302Sbrooks } 475326324Sbrooks if (clen == 0) { 476248302Sbrooks /* 477248302Sbrooks * NUL in input gives 0 return value. process 478248302Sbrooks * as single NUL byte and keep going. 479248302Sbrooks */ 480248302Sbrooks clen = 1; 481326324Sbrooks } 482248302Sbrooks /* Advance buffer character pointer. */ 483248302Sbrooks src++; 484248302Sbrooks /* Advance input pointer by number of bytes read. */ 485248302Sbrooks mbsrc += clen; 486248302Sbrooks /* Decrement input byte count. */ 487248302Sbrooks mbslength -= clen; 488241236Sbrooks } 489248302Sbrooks len = src - psrc; 490248302Sbrooks src = psrc; 491326324Sbrooks 492248302Sbrooks /* 493248302Sbrooks * In the single character input case, we will have actually 494248302Sbrooks * processed two characters, c and nextc. Reset len back to 495248302Sbrooks * just a single character. 496248302Sbrooks */ 497248302Sbrooks if (mblength < len) 498248302Sbrooks len = mblength; 499248302Sbrooks 500248302Sbrooks /* Convert extra argument to list of characters for this mode. */ 501248302Sbrooks extra = makeextralist(flags, mbextra); 502248302Sbrooks if (!extra) { 503248302Sbrooks if (dlen && *dlen == 0) { 504248302Sbrooks errno = ENOSPC; 505248302Sbrooks goto out; 506248302Sbrooks } 507326324Sbrooks *mbdst = '\0'; /* can't create extra, return "" */ 508248302Sbrooks error = 0; 509248302Sbrooks goto out; 510248302Sbrooks } 511248302Sbrooks 512248302Sbrooks /* Look up which processing function to call. */ 513248302Sbrooks f = getvisfun(flags); 514248302Sbrooks 515248302Sbrooks /* 516248302Sbrooks * Main processing loop. 517248302Sbrooks * Call do_Xvis processing function one character at a time 518248302Sbrooks * with next character available for look-ahead. 519248302Sbrooks */ 520248302Sbrooks for (start = dst; len > 0; len--) { 521248302Sbrooks c = *src++; 522248302Sbrooks dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra); 523241236Sbrooks if (dst == NULL) { 524241236Sbrooks errno = ENOSPC; 525248302Sbrooks goto out; 526241236Sbrooks } 527241236Sbrooks } 528248302Sbrooks 529248302Sbrooks /* Terminate the string in the buffer. */ 530248302Sbrooks *dst = L'\0'; 531248302Sbrooks 532248302Sbrooks /* 533248302Sbrooks * Output loop. 534248302Sbrooks * Convert wchar_t string back to multibyte output string. 535248302Sbrooks * If we have hit a multi-byte conversion error on input, 536248302Sbrooks * output byte-by-byte here. Else use wctomb(). 537248302Sbrooks */ 538248302Sbrooks len = wcslen(start); 539248302Sbrooks maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1); 540248302Sbrooks olen = 0; 541326324Sbrooks bzero(&mbstate, sizeof(mbstate)); 542248302Sbrooks for (dst = start; len > 0; len--) { 543248302Sbrooks if (!cerr) 544326324Sbrooks clen = wcrtomb(mbdst, *dst, &mbstate); 545248302Sbrooks if (cerr || clen < 0) { 546248302Sbrooks /* 547248302Sbrooks * Conversion error, process as a byte(s) instead. 548248302Sbrooks * Examine each byte and higher-order bytes for 549248302Sbrooks * data. E.g., 550248302Sbrooks * 0x000000000000a264 -> a2 64 551248302Sbrooks * 0x000000001f00a264 -> 1f 00 a2 64 552248302Sbrooks */ 553248302Sbrooks clen = 0; 554248302Sbrooks wmsk = 0; 555248302Sbrooks for (i = sizeof(wmsk) - 1; i >= 0; i--) { 556248302Sbrooks shft = i * NBBY; 557248302Sbrooks bmsk = (uint64_t)0xffLL << shft; 558248302Sbrooks wmsk |= bmsk; 559248302Sbrooks if ((*dst & wmsk) || i == 0) 560248302Sbrooks mbdst[clen++] = (char)( 561248302Sbrooks (uint64_t)(*dst & bmsk) >> 562248302Sbrooks shft); 563248302Sbrooks } 564248302Sbrooks cerr = 1; 565248302Sbrooks } 566248302Sbrooks /* If this character would exceed our output limit, stop. */ 567248302Sbrooks if (olen + clen > (size_t)maxolen) 568248302Sbrooks break; 569248302Sbrooks /* Advance output pointer by number of bytes written. */ 570248302Sbrooks mbdst += clen; 571248302Sbrooks /* Advance buffer character pointer. */ 572248302Sbrooks dst++; 573248302Sbrooks /* Incrment output character count. */ 574248302Sbrooks olen += clen; 575241236Sbrooks } 576248302Sbrooks 577248302Sbrooks /* Terminate the output string. */ 578248302Sbrooks *mbdst = '\0'; 579248302Sbrooks 580326324Sbrooks if (flags & VIS_NOLOCALE) { 581326324Sbrooks /* Pass conversion error flag out. */ 582326324Sbrooks if (cerr_ptr) 583326324Sbrooks *cerr_ptr = cerr; 584326324Sbrooks } 585248302Sbrooks 586248302Sbrooks free(extra); 587248302Sbrooks free(pdst); 588248302Sbrooks free(psrc); 589248302Sbrooks 590248302Sbrooks return (int)olen; 591248302Sbrooksout: 592248302Sbrooks free(extra); 593248302Sbrooks free(pdst); 594248302Sbrooks free(psrc); 595326324Sbrooks free(mdst); 596248302Sbrooks return error; 597241236Sbrooks} 598272753Sbrooks 599272753Sbrooksstatic int 600326324Sbrooksistrsenvisxl(char **mbdstp, size_t *dlen, const char *mbsrc, 601272753Sbrooks int flags, const char *mbextra, int *cerr_ptr) 602272753Sbrooks{ 603326324Sbrooks return istrsenvisx(mbdstp, dlen, mbsrc, 604272753Sbrooks mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr); 605272753Sbrooks} 606272753Sbrooks 607248302Sbrooks#endif 608241236Sbrooks 609248302Sbrooks#if !HAVE_SVIS 610248302Sbrooks/* 611248302Sbrooks * The "svis" variants all take an "extra" arg that is a pointer 612248302Sbrooks * to a NUL-terminated list of characters to be encoded, too. 613248302Sbrooks * These functions are useful e. g. to encode strings in such a 614248302Sbrooks * way so that they are not interpreted by a shell. 615248302Sbrooks */ 616248302Sbrooks 617248302Sbrookschar * 618248302Sbrookssvis(char *mbdst, int c, int flags, int nextc, const char *mbextra) 619248302Sbrooks{ 620248302Sbrooks char cc[2]; 621248302Sbrooks int ret; 622248302Sbrooks 623248302Sbrooks cc[0] = c; 624248302Sbrooks cc[1] = nextc; 625248302Sbrooks 626326324Sbrooks ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, mbextra, NULL); 627248302Sbrooks if (ret < 0) 628248302Sbrooks return NULL; 629248302Sbrooks return mbdst + ret; 630248302Sbrooks} 631248302Sbrooks 632248302Sbrookschar * 633248302Sbrookssnvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra) 634248302Sbrooks{ 635248302Sbrooks char cc[2]; 636248302Sbrooks int ret; 637248302Sbrooks 638248302Sbrooks cc[0] = c; 639248302Sbrooks cc[1] = nextc; 640248302Sbrooks 641326324Sbrooks ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, mbextra, NULL); 642248302Sbrooks if (ret < 0) 643248302Sbrooks return NULL; 644248302Sbrooks return mbdst + ret; 645248302Sbrooks} 646248302Sbrooks 647241236Sbrooksint 648248302Sbrooksstrsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra) 649241236Sbrooks{ 650326324Sbrooks return istrsenvisxl(&mbdst, NULL, mbsrc, flags, mbextra, NULL); 651241236Sbrooks} 652241236Sbrooks 653241236Sbrooksint 654248302Sbrooksstrsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra) 655241236Sbrooks{ 656326324Sbrooks return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, mbextra, NULL); 657241236Sbrooks} 658241236Sbrooks 659248302Sbrooksint 660248302Sbrooksstrsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra) 661241236Sbrooks{ 662326324Sbrooks return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, mbextra, NULL); 663241236Sbrooks} 664241236Sbrooks 665241236Sbrooksint 666248302Sbrooksstrsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, 667248302Sbrooks const char *mbextra) 668241236Sbrooks{ 669326324Sbrooks return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, NULL); 670241236Sbrooks} 671241236Sbrooks 672241236Sbrooksint 673248302Sbrooksstrsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, 674248302Sbrooks const char *mbextra, int *cerr_ptr) 675241236Sbrooks{ 676326324Sbrooks return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr); 677241236Sbrooks} 678241236Sbrooks#endif 679241236Sbrooks 680241236Sbrooks#if !HAVE_VIS 681241236Sbrooks/* 682241236Sbrooks * vis - visually encode characters 683241236Sbrooks */ 684248302Sbrookschar * 685248302Sbrooksvis(char *mbdst, int c, int flags, int nextc) 686241236Sbrooks{ 687248302Sbrooks char cc[2]; 688248302Sbrooks int ret; 689241236Sbrooks 690248302Sbrooks cc[0] = c; 691248302Sbrooks cc[1] = nextc; 692241236Sbrooks 693326324Sbrooks ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, "", NULL); 694248302Sbrooks if (ret < 0) 695241236Sbrooks return NULL; 696248302Sbrooks return mbdst + ret; 697241236Sbrooks} 698241236Sbrooks 699241236Sbrookschar * 700248302Sbrooksnvis(char *mbdst, size_t dlen, int c, int flags, int nextc) 701241236Sbrooks{ 702248302Sbrooks char cc[2]; 703248302Sbrooks int ret; 704241236Sbrooks 705248302Sbrooks cc[0] = c; 706248302Sbrooks cc[1] = nextc; 707248302Sbrooks 708326324Sbrooks ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, "", NULL); 709248302Sbrooks if (ret < 0) 710248302Sbrooks return NULL; 711248302Sbrooks return mbdst + ret; 712241236Sbrooks} 713241236Sbrooks 714241236Sbrooks/* 715248302Sbrooks * strvis - visually encode characters from src into dst 716241236Sbrooks * 717241236Sbrooks * Dst must be 4 times the size of src to account for possible 718241236Sbrooks * expansion. The length of dst, not including the trailing NULL, 719241236Sbrooks * is returned. 720241236Sbrooks */ 721241236Sbrooks 722241236Sbrooksint 723248302Sbrooksstrvis(char *mbdst, const char *mbsrc, int flags) 724241236Sbrooks{ 725326324Sbrooks return istrsenvisxl(&mbdst, NULL, mbsrc, flags, "", NULL); 726241236Sbrooks} 727241236Sbrooks 728241236Sbrooksint 729248302Sbrooksstrnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags) 730241236Sbrooks{ 731326324Sbrooks return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, "", NULL); 732241236Sbrooks} 733241236Sbrooks 734326324Sbrooksint 735326324Sbrooksstravis(char **mbdstp, const char *mbsrc, int flags) 736326324Sbrooks{ 737326324Sbrooks *mbdstp = NULL; 738326324Sbrooks return istrsenvisxl(mbdstp, NULL, mbsrc, flags, "", NULL); 739326324Sbrooks} 740326324Sbrooks 741248302Sbrooks/* 742248302Sbrooks * strvisx - visually encode characters from src into dst 743248302Sbrooks * 744248302Sbrooks * Dst must be 4 times the size of src to account for possible 745248302Sbrooks * expansion. The length of dst, not including the trailing NULL, 746248302Sbrooks * is returned. 747248302Sbrooks * 748248302Sbrooks * Strvisx encodes exactly len characters from src into dst. 749248302Sbrooks * This is useful for encoding a block of data. 750248302Sbrooks */ 751248302Sbrooks 752248302Sbrooksint 753248302Sbrooksstrvisx(char *mbdst, const char *mbsrc, size_t len, int flags) 754241236Sbrooks{ 755326324Sbrooks return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, "", NULL); 756241236Sbrooks} 757241236Sbrooks 758241236Sbrooksint 759248302Sbrooksstrnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags) 760241236Sbrooks{ 761326324Sbrooks return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", NULL); 762241236Sbrooks} 763241236Sbrooks 764241236Sbrooksint 765248302Sbrooksstrenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, 766248302Sbrooks int *cerr_ptr) 767241236Sbrooks{ 768326324Sbrooks return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr); 769241236Sbrooks} 770241236Sbrooks#endif 771