1326324Sbrooks/*	$NetBSD: vis.c,v 1.74 2017/11/27 16:37:21 christos Exp $	*/
2241236Sbrooks
3241236Sbrooks/*-
4241236Sbrooks * Copyright (c) 1989, 1993
5241236Sbrooks *	The Regents of the University of California.  All rights reserved.
6241236Sbrooks *
7241236Sbrooks * Redistribution and use in source and binary forms, with or without
8241236Sbrooks * modification, are permitted provided that the following conditions
9241236Sbrooks * are met:
10241236Sbrooks * 1. Redistributions of source code must retain the above copyright
11241236Sbrooks *    notice, this list of conditions and the following disclaimer.
12241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright
13241236Sbrooks *    notice, this list of conditions and the following disclaimer in the
14241236Sbrooks *    documentation and/or other materials provided with the distribution.
15241236Sbrooks * 3. Neither the name of the University nor the names of its contributors
16241236Sbrooks *    may be used to endorse or promote products derived from this software
17241236Sbrooks *    without specific prior written permission.
18241236Sbrooks *
19241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20241236Sbrooks * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21241236Sbrooks * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22241236Sbrooks * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23241236Sbrooks * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24241236Sbrooks * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25241236Sbrooks * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26241236Sbrooks * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27241236Sbrooks * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28241236Sbrooks * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29241236Sbrooks * SUCH DAMAGE.
30241236Sbrooks */
31241236Sbrooks
32241236Sbrooks/*-
33241236Sbrooks * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34241236Sbrooks * All rights reserved.
35241236Sbrooks *
36241236Sbrooks * Redistribution and use in source and binary forms, with or without
37241236Sbrooks * modification, are permitted provided that the following conditions
38241236Sbrooks * are met:
39241236Sbrooks * 1. Redistributions of source code must retain the above copyright
40241236Sbrooks *    notice, this list of conditions and the following disclaimer.
41241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright
42241236Sbrooks *    notice, this list of conditions and the following disclaimer in the
43241236Sbrooks *    documentation and/or other materials provided with the distribution.
44241236Sbrooks *
45241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46241236Sbrooks * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47241236Sbrooks * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48241236Sbrooks * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49241236Sbrooks * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50241236Sbrooks * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51241236Sbrooks * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52241236Sbrooks * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53241236Sbrooks * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54241236Sbrooks * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55241236Sbrooks * POSSIBILITY OF SUCH DAMAGE.
56241236Sbrooks */
57241236Sbrooks
58241236Sbrooks#include <sys/cdefs.h>
59241236Sbrooks#if defined(LIBC_SCCS) && !defined(lint)
60326324Sbrooks__RCSID("$NetBSD: vis.c,v 1.74 2017/11/27 16:37:21 christos Exp $");
61241236Sbrooks#endif /* LIBC_SCCS and not lint */
62248302Sbrooks#ifdef __FBSDID
63244401Sbrooks__FBSDID("$FreeBSD: stable/10/contrib/libc-vis/vis.c 326324 2017-11-28 17:20:53Z brooks $");
64248302Sbrooks#define	_DIAGASSERT(x)	assert(x)
65248302Sbrooks#endif
66241236Sbrooks
67241236Sbrooks#include "namespace.h"
68241236Sbrooks#include <sys/types.h>
69248302Sbrooks#include <sys/param.h>
70241236Sbrooks
71241236Sbrooks#include <assert.h>
72241236Sbrooks#include <vis.h>
73241236Sbrooks#include <errno.h>
74241236Sbrooks#include <stdlib.h>
75248302Sbrooks#include <wchar.h>
76248302Sbrooks#include <wctype.h>
77241236Sbrooks
78241236Sbrooks#ifdef __weak_alias
79241236Sbrooks__weak_alias(strvisx,_strvisx)
80241236Sbrooks#endif
81241236Sbrooks
82241236Sbrooks#if !HAVE_VIS || !HAVE_SVIS
83241236Sbrooks#include <ctype.h>
84241236Sbrooks#include <limits.h>
85241236Sbrooks#include <stdio.h>
86241236Sbrooks#include <string.h>
87241236Sbrooks
88248302Sbrooks/*
89248302Sbrooks * The reason for going through the trouble to deal with character encodings
90248302Sbrooks * in vis(3), is that we use this to safe encode output of commands. This
91248302Sbrooks * safe encoding varies depending on the character set. For example if we
92248302Sbrooks * display ps output in French, we don't want to display French characters
93248302Sbrooks * as M-foo.
94248302Sbrooks */
95241236Sbrooks
96248302Sbrooksstatic wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
97248302Sbrooks
98241236Sbrooks#undef BELL
99248302Sbrooks#define BELL L'\a'
100326324Sbrooks
101326324Sbrooks#if defined(LC_C_LOCALE)
102326324Sbrooks#define iscgraph(c)      isgraph_l(c, LC_C_LOCALE)
103326324Sbrooks#else
104326324Sbrooks/* Keep it simple for now, no locale stuff */
105326324Sbrooks#define iscgraph(c)	isgraph(c)
106326324Sbrooks#ifdef notyet
107326324Sbrooks#include <locale.h>
108326324Sbrooksstatic int
109326324Sbrooksiscgraph(int c) {
110326324Sbrooks	int rv;
111326324Sbrooks	char *ol;
112241236Sbrooks
113326324Sbrooks	ol = setlocale(LC_CTYPE, "C");
114326324Sbrooks	rv = isgraph(c);
115326324Sbrooks	if (ol)
116326324Sbrooks		setlocale(LC_CTYPE, ol);
117326324Sbrooks	return rv;
118326324Sbrooks}
119326324Sbrooks#endif
120326324Sbrooks#endif
121326324Sbrooks
122326324Sbrooks#define ISGRAPH(flags, c) \
123326324Sbrooks    (((flags) & VIS_NOLOCALE) ? iscgraph(c) : iswgraph(c))
124326324Sbrooks
125248302Sbrooks#define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
126248302Sbrooks#define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
127248302Sbrooks#define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
128248302Sbrooks#define xtoa(c)		L"0123456789abcdef"[c]
129248302Sbrooks#define XTOA(c)		L"0123456789ABCDEF"[c]
130241236Sbrooks
131326324Sbrooks#define MAXEXTRAS	30
132241236Sbrooks
133326324Sbrooksstatic const wchar_t char_shell[] = L"'`\";&<>()|{}]\\$!^~";
134326324Sbrooksstatic const wchar_t char_glob[] = L"*?[#";
135326324Sbrooks
136248302Sbrooks#if !HAVE_NBTOOL_CONFIG_H
137248302Sbrooks#ifndef __NetBSD__
138248302Sbrooks/*
139248302Sbrooks * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
140248302Sbrooks * integral type and it is probably wrong, since currently the maximum
141248302Sbrooks * number of bytes and character needs is 6. Until this is fixed, the
142248302Sbrooks * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
143248302Sbrooks * the assertion is commented out.
144248302Sbrooks */
145248302Sbrooks#ifdef __FreeBSD__
146248302Sbrooks/*
147248302Sbrooks * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
148248302Sbrooks * mode.
149248302Sbrooks */
150248302Sbrooks#ifndef CTASSERT
151248302Sbrooks#define CTASSERT(x)             _CTASSERT(x, __LINE__)
152248302Sbrooks#define _CTASSERT(x, y)         __CTASSERT(x, y)
153248302Sbrooks#define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
154248302Sbrooks#endif
155248302Sbrooks#endif /* __FreeBSD__ */
156248302SbrooksCTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
157248302Sbrooks#endif /* !__NetBSD__ */
158248302Sbrooks#endif
159241236Sbrooks
160241236Sbrooks/*
161241236Sbrooks * This is do_hvis, for HTTP style (RFC 1808)
162241236Sbrooks */
163248302Sbrooksstatic wchar_t *
164248302Sbrooksdo_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
165241236Sbrooks{
166248302Sbrooks	if (iswalnum(c)
167241236Sbrooks	    /* safe */
168248302Sbrooks	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
169241236Sbrooks	    /* extra */
170248302Sbrooks	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
171248302Sbrooks	    || c == L',')
172248302Sbrooks		dst = do_svis(dst, c, flags, nextc, extra);
173248302Sbrooks	else {
174248302Sbrooks		*dst++ = L'%';
175241236Sbrooks		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
176241236Sbrooks		*dst++ = xtoa((unsigned int)c & 0xf);
177241236Sbrooks	}
178241236Sbrooks
179241236Sbrooks	return dst;
180241236Sbrooks}
181241236Sbrooks
182241236Sbrooks/*
183241236Sbrooks * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
184241236Sbrooks * NB: No handling of long lines or CRLF.
185241236Sbrooks */
186248302Sbrooksstatic wchar_t *
187248302Sbrooksdo_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
188241236Sbrooks{
189248302Sbrooks	if ((c != L'\n') &&
190241236Sbrooks	    /* Space at the end of the line */
191248302Sbrooks	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
192241236Sbrooks	    /* Out of range */
193248302Sbrooks	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
194248302Sbrooks	    /* Specific char to be escaped */
195248302Sbrooks	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
196248302Sbrooks		*dst++ = L'=';
197241236Sbrooks		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
198241236Sbrooks		*dst++ = XTOA((unsigned int)c & 0xf);
199248302Sbrooks	} else
200248302Sbrooks		dst = do_svis(dst, c, flags, nextc, extra);
201241236Sbrooks	return dst;
202241236Sbrooks}
203241236Sbrooks
204241236Sbrooks/*
205248302Sbrooks * Output single byte of multibyte character.
206241236Sbrooks */
207248302Sbrooksstatic wchar_t *
208248302Sbrooksdo_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
209241236Sbrooks{
210248302Sbrooks	if (flags & VIS_CSTYLE) {
211241236Sbrooks		switch (c) {
212248302Sbrooks		case L'\n':
213248302Sbrooks			*dst++ = L'\\'; *dst++ = L'n';
214241236Sbrooks			return dst;
215248302Sbrooks		case L'\r':
216248302Sbrooks			*dst++ = L'\\'; *dst++ = L'r';
217241236Sbrooks			return dst;
218248302Sbrooks		case L'\b':
219248302Sbrooks			*dst++ = L'\\'; *dst++ = L'b';
220241236Sbrooks			return dst;
221241236Sbrooks		case BELL:
222248302Sbrooks			*dst++ = L'\\'; *dst++ = L'a';
223241236Sbrooks			return dst;
224248302Sbrooks		case L'\v':
225248302Sbrooks			*dst++ = L'\\'; *dst++ = L'v';
226241236Sbrooks			return dst;
227248302Sbrooks		case L'\t':
228248302Sbrooks			*dst++ = L'\\'; *dst++ = L't';
229241236Sbrooks			return dst;
230248302Sbrooks		case L'\f':
231248302Sbrooks			*dst++ = L'\\'; *dst++ = L'f';
232241236Sbrooks			return dst;
233248302Sbrooks		case L' ':
234248302Sbrooks			*dst++ = L'\\'; *dst++ = L's';
235241236Sbrooks			return dst;
236248302Sbrooks		case L'\0':
237248302Sbrooks			*dst++ = L'\\'; *dst++ = L'0';
238248302Sbrooks			if (iswoctal(nextc)) {
239248302Sbrooks				*dst++ = L'0';
240248302Sbrooks				*dst++ = L'0';
241241236Sbrooks			}
242241236Sbrooks			return dst;
243326324Sbrooks		/* We cannot encode these characters in VIS_CSTYLE
244326324Sbrooks		 * because they special meaning */
245326324Sbrooks		case L'n':
246326324Sbrooks		case L'r':
247326324Sbrooks		case L'b':
248326324Sbrooks		case L'a':
249326324Sbrooks		case L'v':
250326324Sbrooks		case L't':
251326324Sbrooks		case L'f':
252326324Sbrooks		case L's':
253326324Sbrooks		case L'0':
254326324Sbrooks		case L'M':
255326324Sbrooks		case L'^':
256326324Sbrooks		case L'$': /* vis(1) -l */
257326324Sbrooks			break;
258241236Sbrooks		default:
259326324Sbrooks			if (ISGRAPH(flags, c) && !iswoctal(c)) {
260248302Sbrooks				*dst++ = L'\\';
261248302Sbrooks				*dst++ = c;
262241236Sbrooks				return dst;
263241236Sbrooks			}
264241236Sbrooks		}
265241236Sbrooks	}
266248302Sbrooks	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
267248302Sbrooks		*dst++ = L'\\';
268248302Sbrooks		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
269248302Sbrooks		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
270248302Sbrooks		*dst++ =			     (c	      & 07) + L'0';
271241236Sbrooks	} else {
272248302Sbrooks		if ((flags & VIS_NOSLASH) == 0)
273248302Sbrooks			*dst++ = L'\\';
274241236Sbrooks
275241236Sbrooks		if (c & 0200) {
276248302Sbrooks			c &= 0177;
277248302Sbrooks			*dst++ = L'M';
278241236Sbrooks		}
279241236Sbrooks
280248302Sbrooks		if (iswcntrl(c)) {
281248302Sbrooks			*dst++ = L'^';
282241236Sbrooks			if (c == 0177)
283248302Sbrooks				*dst++ = L'?';
284241236Sbrooks			else
285248302Sbrooks				*dst++ = c + L'@';
286241236Sbrooks		} else {
287248302Sbrooks			*dst++ = L'-';
288248302Sbrooks			*dst++ = c;
289241236Sbrooks		}
290241236Sbrooks	}
291248302Sbrooks
292241236Sbrooks	return dst;
293241236Sbrooks}
294241236Sbrooks
295248302Sbrooks/*
296248302Sbrooks * This is do_vis, the central code of vis.
297248302Sbrooks * dst:	      Pointer to the destination buffer
298248302Sbrooks * c:	      Character to encode
299248302Sbrooks * flags:     Flags word
300248302Sbrooks * nextc:     The character following 'c'
301248302Sbrooks * extra:     Pointer to the list of extra characters to be
302248302Sbrooks *	      backslash-protected.
303248302Sbrooks */
304248302Sbrooksstatic wchar_t *
305248302Sbrooksdo_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
306248302Sbrooks{
307248302Sbrooks	int iswextra, i, shft;
308248302Sbrooks	uint64_t bmsk, wmsk;
309241236Sbrooks
310248302Sbrooks	iswextra = wcschr(extra, c) != NULL;
311326324Sbrooks	if (!iswextra && (ISGRAPH(flags, c) || iswwhite(c) ||
312248302Sbrooks	    ((flags & VIS_SAFE) && iswsafe(c)))) {
313248302Sbrooks		*dst++ = c;
314248302Sbrooks		return dst;
315248302Sbrooks	}
316248302Sbrooks
317248302Sbrooks	/* See comment in istrsenvisx() output loop, below. */
318248302Sbrooks	wmsk = 0;
319248302Sbrooks	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
320248302Sbrooks		shft = i * NBBY;
321248302Sbrooks		bmsk = (uint64_t)0xffLL << shft;
322248302Sbrooks		wmsk |= bmsk;
323248302Sbrooks		if ((c & wmsk) || i == 0)
324248302Sbrooks			dst = do_mbyte(dst, (wint_t)(
325248302Sbrooks			    (uint64_t)(c & bmsk) >> shft),
326248302Sbrooks			    flags, nextc, iswextra);
327248302Sbrooks	}
328248302Sbrooks
329248302Sbrooks	return dst;
330248302Sbrooks}
331248302Sbrooks
332248302Sbrookstypedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
333248302Sbrooks
334241236Sbrooks/*
335241236Sbrooks * Return the appropriate encoding function depending on the flags given.
336241236Sbrooks */
337241236Sbrooksstatic visfun_t
338248302Sbrooksgetvisfun(int flags)
339241236Sbrooks{
340248302Sbrooks	if (flags & VIS_HTTPSTYLE)
341241236Sbrooks		return do_hvis;
342248302Sbrooks	if (flags & VIS_MIMESTYLE)
343241236Sbrooks		return do_mvis;
344241236Sbrooks	return do_svis;
345241236Sbrooks}
346241236Sbrooks
347241236Sbrooks/*
348248302Sbrooks * Expand list of extra characters to not visually encode.
349241236Sbrooks */
350248302Sbrooksstatic wchar_t *
351248302Sbrooksmakeextralist(int flags, const char *src)
352241236Sbrooks{
353248302Sbrooks	wchar_t *dst, *d;
354248302Sbrooks	size_t len;
355326324Sbrooks	const wchar_t *s;
356326324Sbrooks	mbstate_t mbstate;
357241236Sbrooks
358326324Sbrooks	bzero(&mbstate, sizeof(mbstate));
359248302Sbrooks	len = strlen(src);
360248302Sbrooks	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
361241236Sbrooks		return NULL;
362248302Sbrooks
363326324Sbrooks	if ((flags & VIS_NOLOCALE) || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) {
364248302Sbrooks		size_t i;
365248302Sbrooks		for (i = 0; i < len; i++)
366326324Sbrooks			dst[i] = (wchar_t)(u_char)src[i];
367248302Sbrooks		d = dst + len;
368248302Sbrooks	} else
369248302Sbrooks		d = dst + wcslen(dst);
370248302Sbrooks
371326324Sbrooks	if (flags & VIS_GLOB)
372326324Sbrooks		for (s = char_glob; *s; *d++ = *s++)
373326324Sbrooks			continue;
374241236Sbrooks
375326324Sbrooks	if (flags & VIS_SHELL)
376326324Sbrooks		for (s = char_shell; *s; *d++ = *s++)
377326324Sbrooks			continue;
378326324Sbrooks
379248302Sbrooks	if (flags & VIS_SP) *d++ = L' ';
380248302Sbrooks	if (flags & VIS_TAB) *d++ = L'\t';
381248302Sbrooks	if (flags & VIS_NL) *d++ = L'\n';
382326324Sbrooks	if (flags & VIS_DQ) *d++ = L'"';
383248302Sbrooks	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
384248302Sbrooks	*d = L'\0';
385241236Sbrooks
386248302Sbrooks	return dst;
387241236Sbrooks}
388241236Sbrooks
389241236Sbrooks/*
390248302Sbrooks * istrsenvisx()
391248302Sbrooks * 	The main internal function.
392248302Sbrooks *	All user-visible functions call this one.
393241236Sbrooks */
394241236Sbrooksstatic int
395326324Sbrooksistrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
396248302Sbrooks    int flags, const char *mbextra, int *cerr_ptr)
397241236Sbrooks{
398248302Sbrooks	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
399248302Sbrooks	size_t len, olen;
400248302Sbrooks	uint64_t bmsk, wmsk;
401248302Sbrooks	wint_t c;
402241236Sbrooks	visfun_t f;
403326324Sbrooks	int clen = 0, cerr, error = -1, i, shft;
404326324Sbrooks	char *mbdst, *mdst;
405248302Sbrooks	ssize_t mbslength, maxolen;
406326324Sbrooks	mbstate_t mbstate;
407241236Sbrooks
408326324Sbrooks	_DIAGASSERT(mbdstp != NULL);
409272753Sbrooks	_DIAGASSERT(mbsrc != NULL || mblength == 0);
410248302Sbrooks	_DIAGASSERT(mbextra != NULL);
411248302Sbrooks
412326324Sbrooks	mbslength = (ssize_t)mblength;
413248302Sbrooks	/*
414326324Sbrooks	 * When inputing a single character, must also read in the
415326324Sbrooks	 * next character for nextc, the look-ahead character.
416326324Sbrooks	 */
417326324Sbrooks	if (mbslength == 1)
418326324Sbrooks		mbslength++;
419326324Sbrooks
420326324Sbrooks	/*
421248302Sbrooks	 * Input (mbsrc) is a char string considered to be multibyte
422248302Sbrooks	 * characters.  The input loop will read this string pulling
423248302Sbrooks	 * one character, possibly multiple bytes, from mbsrc and
424248302Sbrooks	 * converting each to wchar_t in src.
425248302Sbrooks	 *
426248302Sbrooks	 * The vis conversion will be done using the wide char
427248302Sbrooks	 * wchar_t string.
428248302Sbrooks	 *
429248302Sbrooks	 * This will then be converted back to a multibyte string to
430248302Sbrooks	 * return to the caller.
431248302Sbrooks	 */
432248302Sbrooks
433248302Sbrooks	/* Allocate space for the wide char strings */
434248302Sbrooks	psrc = pdst = extra = NULL;
435326324Sbrooks	mdst = NULL;
436326324Sbrooks	if ((psrc = calloc(mbslength + 1, sizeof(*psrc))) == NULL)
437248302Sbrooks		return -1;
438326324Sbrooks	if ((pdst = calloc((16 * mbslength) + 1, sizeof(*pdst))) == NULL)
439248302Sbrooks		goto out;
440326324Sbrooks	if (*mbdstp == NULL) {
441326324Sbrooks		if ((mdst = calloc((16 * mbslength) + 1, sizeof(*mdst))) == NULL)
442326324Sbrooks			goto out;
443326324Sbrooks		*mbdstp = mdst;
444326324Sbrooks	}
445326324Sbrooks
446326324Sbrooks	mbdst = *mbdstp;
447248302Sbrooks	dst = pdst;
448248302Sbrooks	src = psrc;
449248302Sbrooks
450326324Sbrooks	if (flags & VIS_NOLOCALE) {
451326324Sbrooks		/* Do one byte at a time conversion */
452326324Sbrooks		cerr = 1;
453326324Sbrooks	} else {
454326324Sbrooks		/* Use caller's multibyte conversion error flag. */
455326324Sbrooks		cerr = cerr_ptr ? *cerr_ptr : 0;
456326324Sbrooks	}
457248302Sbrooks
458248302Sbrooks	/*
459248302Sbrooks	 * Input loop.
460248302Sbrooks	 * Handle up to mblength characters (not bytes).  We do not
461248302Sbrooks	 * stop at NULs because we may be processing a block of data
462248302Sbrooks	 * that includes NULs.
463248302Sbrooks	 */
464326324Sbrooks	bzero(&mbstate, sizeof(mbstate));
465248302Sbrooks	while (mbslength > 0) {
466248302Sbrooks		/* Convert one multibyte character to wchar_t. */
467248302Sbrooks		if (!cerr)
468326324Sbrooks			clen = mbrtowc(src, mbsrc, MB_LEN_MAX, &mbstate);
469248302Sbrooks		if (cerr || clen < 0) {
470248302Sbrooks			/* Conversion error, process as a byte instead. */
471248302Sbrooks			*src = (wint_t)(u_char)*mbsrc;
472248302Sbrooks			clen = 1;
473248302Sbrooks			cerr = 1;
474248302Sbrooks		}
475326324Sbrooks		if (clen == 0) {
476248302Sbrooks			/*
477248302Sbrooks			 * NUL in input gives 0 return value. process
478248302Sbrooks			 * as single NUL byte and keep going.
479248302Sbrooks			 */
480248302Sbrooks			clen = 1;
481326324Sbrooks		}
482248302Sbrooks		/* Advance buffer character pointer. */
483248302Sbrooks		src++;
484248302Sbrooks		/* Advance input pointer by number of bytes read. */
485248302Sbrooks		mbsrc += clen;
486248302Sbrooks		/* Decrement input byte count. */
487248302Sbrooks		mbslength -= clen;
488241236Sbrooks	}
489248302Sbrooks	len = src - psrc;
490248302Sbrooks	src = psrc;
491326324Sbrooks
492248302Sbrooks	/*
493248302Sbrooks	 * In the single character input case, we will have actually
494248302Sbrooks	 * processed two characters, c and nextc.  Reset len back to
495248302Sbrooks	 * just a single character.
496248302Sbrooks	 */
497248302Sbrooks	if (mblength < len)
498248302Sbrooks		len = mblength;
499248302Sbrooks
500248302Sbrooks	/* Convert extra argument to list of characters for this mode. */
501248302Sbrooks	extra = makeextralist(flags, mbextra);
502248302Sbrooks	if (!extra) {
503248302Sbrooks		if (dlen && *dlen == 0) {
504248302Sbrooks			errno = ENOSPC;
505248302Sbrooks			goto out;
506248302Sbrooks		}
507326324Sbrooks		*mbdst = '\0';	/* can't create extra, return "" */
508248302Sbrooks		error = 0;
509248302Sbrooks		goto out;
510248302Sbrooks	}
511248302Sbrooks
512248302Sbrooks	/* Look up which processing function to call. */
513248302Sbrooks	f = getvisfun(flags);
514248302Sbrooks
515248302Sbrooks	/*
516248302Sbrooks	 * Main processing loop.
517248302Sbrooks	 * Call do_Xvis processing function one character at a time
518248302Sbrooks	 * with next character available for look-ahead.
519248302Sbrooks	 */
520248302Sbrooks	for (start = dst; len > 0; len--) {
521248302Sbrooks		c = *src++;
522248302Sbrooks		dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
523241236Sbrooks		if (dst == NULL) {
524241236Sbrooks			errno = ENOSPC;
525248302Sbrooks			goto out;
526241236Sbrooks		}
527241236Sbrooks	}
528248302Sbrooks
529248302Sbrooks	/* Terminate the string in the buffer. */
530248302Sbrooks	*dst = L'\0';
531248302Sbrooks
532248302Sbrooks	/*
533248302Sbrooks	 * Output loop.
534248302Sbrooks	 * Convert wchar_t string back to multibyte output string.
535248302Sbrooks	 * If we have hit a multi-byte conversion error on input,
536248302Sbrooks	 * output byte-by-byte here.  Else use wctomb().
537248302Sbrooks	 */
538248302Sbrooks	len = wcslen(start);
539248302Sbrooks	maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1);
540248302Sbrooks	olen = 0;
541326324Sbrooks	bzero(&mbstate, sizeof(mbstate));
542248302Sbrooks	for (dst = start; len > 0; len--) {
543248302Sbrooks		if (!cerr)
544326324Sbrooks			clen = wcrtomb(mbdst, *dst, &mbstate);
545248302Sbrooks		if (cerr || clen < 0) {
546248302Sbrooks			/*
547248302Sbrooks			 * Conversion error, process as a byte(s) instead.
548248302Sbrooks			 * Examine each byte and higher-order bytes for
549248302Sbrooks			 * data.  E.g.,
550248302Sbrooks			 *	0x000000000000a264 -> a2 64
551248302Sbrooks			 *	0x000000001f00a264 -> 1f 00 a2 64
552248302Sbrooks			 */
553248302Sbrooks			clen = 0;
554248302Sbrooks			wmsk = 0;
555248302Sbrooks			for (i = sizeof(wmsk) - 1; i >= 0; i--) {
556248302Sbrooks				shft = i * NBBY;
557248302Sbrooks				bmsk = (uint64_t)0xffLL << shft;
558248302Sbrooks				wmsk |= bmsk;
559248302Sbrooks				if ((*dst & wmsk) || i == 0)
560248302Sbrooks					mbdst[clen++] = (char)(
561248302Sbrooks					    (uint64_t)(*dst & bmsk) >>
562248302Sbrooks					    shft);
563248302Sbrooks			}
564248302Sbrooks			cerr = 1;
565248302Sbrooks		}
566248302Sbrooks		/* If this character would exceed our output limit, stop. */
567248302Sbrooks		if (olen + clen > (size_t)maxolen)
568248302Sbrooks			break;
569248302Sbrooks		/* Advance output pointer by number of bytes written. */
570248302Sbrooks		mbdst += clen;
571248302Sbrooks		/* Advance buffer character pointer. */
572248302Sbrooks		dst++;
573248302Sbrooks		/* Incrment output character count. */
574248302Sbrooks		olen += clen;
575241236Sbrooks	}
576248302Sbrooks
577248302Sbrooks	/* Terminate the output string. */
578248302Sbrooks	*mbdst = '\0';
579248302Sbrooks
580326324Sbrooks	if (flags & VIS_NOLOCALE) {
581326324Sbrooks		/* Pass conversion error flag out. */
582326324Sbrooks		if (cerr_ptr)
583326324Sbrooks			*cerr_ptr = cerr;
584326324Sbrooks	}
585248302Sbrooks
586248302Sbrooks	free(extra);
587248302Sbrooks	free(pdst);
588248302Sbrooks	free(psrc);
589248302Sbrooks
590248302Sbrooks	return (int)olen;
591248302Sbrooksout:
592248302Sbrooks	free(extra);
593248302Sbrooks	free(pdst);
594248302Sbrooks	free(psrc);
595326324Sbrooks	free(mdst);
596248302Sbrooks	return error;
597241236Sbrooks}
598272753Sbrooks
599272753Sbrooksstatic int
600326324Sbrooksistrsenvisxl(char **mbdstp, size_t *dlen, const char *mbsrc,
601272753Sbrooks    int flags, const char *mbextra, int *cerr_ptr)
602272753Sbrooks{
603326324Sbrooks	return istrsenvisx(mbdstp, dlen, mbsrc,
604272753Sbrooks	    mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr);
605272753Sbrooks}
606272753Sbrooks
607248302Sbrooks#endif
608241236Sbrooks
609248302Sbrooks#if !HAVE_SVIS
610248302Sbrooks/*
611248302Sbrooks *	The "svis" variants all take an "extra" arg that is a pointer
612248302Sbrooks *	to a NUL-terminated list of characters to be encoded, too.
613248302Sbrooks *	These functions are useful e. g. to encode strings in such a
614248302Sbrooks *	way so that they are not interpreted by a shell.
615248302Sbrooks */
616248302Sbrooks
617248302Sbrookschar *
618248302Sbrookssvis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
619248302Sbrooks{
620248302Sbrooks	char cc[2];
621248302Sbrooks	int ret;
622248302Sbrooks
623248302Sbrooks	cc[0] = c;
624248302Sbrooks	cc[1] = nextc;
625248302Sbrooks
626326324Sbrooks	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, mbextra, NULL);
627248302Sbrooks	if (ret < 0)
628248302Sbrooks		return NULL;
629248302Sbrooks	return mbdst + ret;
630248302Sbrooks}
631248302Sbrooks
632248302Sbrookschar *
633248302Sbrookssnvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
634248302Sbrooks{
635248302Sbrooks	char cc[2];
636248302Sbrooks	int ret;
637248302Sbrooks
638248302Sbrooks	cc[0] = c;
639248302Sbrooks	cc[1] = nextc;
640248302Sbrooks
641326324Sbrooks	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, mbextra, NULL);
642248302Sbrooks	if (ret < 0)
643248302Sbrooks		return NULL;
644248302Sbrooks	return mbdst + ret;
645248302Sbrooks}
646248302Sbrooks
647241236Sbrooksint
648248302Sbrooksstrsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
649241236Sbrooks{
650326324Sbrooks	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, mbextra, NULL);
651241236Sbrooks}
652241236Sbrooks
653241236Sbrooksint
654248302Sbrooksstrsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
655241236Sbrooks{
656326324Sbrooks	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, mbextra, NULL);
657241236Sbrooks}
658241236Sbrooks
659248302Sbrooksint
660248302Sbrooksstrsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
661241236Sbrooks{
662326324Sbrooks	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
663241236Sbrooks}
664241236Sbrooks
665241236Sbrooksint
666248302Sbrooksstrsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
667248302Sbrooks    const char *mbextra)
668241236Sbrooks{
669326324Sbrooks	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
670241236Sbrooks}
671241236Sbrooks
672241236Sbrooksint
673248302Sbrooksstrsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
674248302Sbrooks    const char *mbextra, int *cerr_ptr)
675241236Sbrooks{
676326324Sbrooks	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
677241236Sbrooks}
678241236Sbrooks#endif
679241236Sbrooks
680241236Sbrooks#if !HAVE_VIS
681241236Sbrooks/*
682241236Sbrooks * vis - visually encode characters
683241236Sbrooks */
684248302Sbrookschar *
685248302Sbrooksvis(char *mbdst, int c, int flags, int nextc)
686241236Sbrooks{
687248302Sbrooks	char cc[2];
688248302Sbrooks	int ret;
689241236Sbrooks
690248302Sbrooks	cc[0] = c;
691248302Sbrooks	cc[1] = nextc;
692241236Sbrooks
693326324Sbrooks	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, "", NULL);
694248302Sbrooks	if (ret < 0)
695241236Sbrooks		return NULL;
696248302Sbrooks	return mbdst + ret;
697241236Sbrooks}
698241236Sbrooks
699241236Sbrookschar *
700248302Sbrooksnvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
701241236Sbrooks{
702248302Sbrooks	char cc[2];
703248302Sbrooks	int ret;
704241236Sbrooks
705248302Sbrooks	cc[0] = c;
706248302Sbrooks	cc[1] = nextc;
707248302Sbrooks
708326324Sbrooks	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, "", NULL);
709248302Sbrooks	if (ret < 0)
710248302Sbrooks		return NULL;
711248302Sbrooks	return mbdst + ret;
712241236Sbrooks}
713241236Sbrooks
714241236Sbrooks/*
715248302Sbrooks * strvis - visually encode characters from src into dst
716241236Sbrooks *
717241236Sbrooks *	Dst must be 4 times the size of src to account for possible
718241236Sbrooks *	expansion.  The length of dst, not including the trailing NULL,
719241236Sbrooks *	is returned.
720241236Sbrooks */
721241236Sbrooks
722241236Sbrooksint
723248302Sbrooksstrvis(char *mbdst, const char *mbsrc, int flags)
724241236Sbrooks{
725326324Sbrooks	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, "", NULL);
726241236Sbrooks}
727241236Sbrooks
728241236Sbrooksint
729248302Sbrooksstrnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
730241236Sbrooks{
731326324Sbrooks	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, "", NULL);
732241236Sbrooks}
733241236Sbrooks
734326324Sbrooksint
735326324Sbrooksstravis(char **mbdstp, const char *mbsrc, int flags)
736326324Sbrooks{
737326324Sbrooks	*mbdstp = NULL;
738326324Sbrooks	return istrsenvisxl(mbdstp, NULL, mbsrc, flags, "", NULL);
739326324Sbrooks}
740326324Sbrooks
741248302Sbrooks/*
742248302Sbrooks * strvisx - visually encode characters from src into dst
743248302Sbrooks *
744248302Sbrooks *	Dst must be 4 times the size of src to account for possible
745248302Sbrooks *	expansion.  The length of dst, not including the trailing NULL,
746248302Sbrooks *	is returned.
747248302Sbrooks *
748248302Sbrooks *	Strvisx encodes exactly len characters from src into dst.
749248302Sbrooks *	This is useful for encoding a block of data.
750248302Sbrooks */
751248302Sbrooks
752248302Sbrooksint
753248302Sbrooksstrvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
754241236Sbrooks{
755326324Sbrooks	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, "", NULL);
756241236Sbrooks}
757241236Sbrooks
758241236Sbrooksint
759248302Sbrooksstrnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
760241236Sbrooks{
761326324Sbrooks	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", NULL);
762241236Sbrooks}
763241236Sbrooks
764241236Sbrooksint
765248302Sbrooksstrenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
766248302Sbrooks    int *cerr_ptr)
767241236Sbrooks{
768326324Sbrooks	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
769241236Sbrooks}
770241236Sbrooks#endif
771