1272753Sbrooks/*	$NetBSD: vis.c,v 1.62 2014/09/08 17:35:01 christos Exp $	*/
2241236Sbrooks
3241236Sbrooks/*-
4241236Sbrooks * Copyright (c) 1989, 1993
5241236Sbrooks *	The Regents of the University of California.  All rights reserved.
6241236Sbrooks *
7241236Sbrooks * Redistribution and use in source and binary forms, with or without
8241236Sbrooks * modification, are permitted provided that the following conditions
9241236Sbrooks * are met:
10241236Sbrooks * 1. Redistributions of source code must retain the above copyright
11241236Sbrooks *    notice, this list of conditions and the following disclaimer.
12241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright
13241236Sbrooks *    notice, this list of conditions and the following disclaimer in the
14241236Sbrooks *    documentation and/or other materials provided with the distribution.
15241236Sbrooks * 3. Neither the name of the University nor the names of its contributors
16241236Sbrooks *    may be used to endorse or promote products derived from this software
17241236Sbrooks *    without specific prior written permission.
18241236Sbrooks *
19241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20241236Sbrooks * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21241236Sbrooks * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22241236Sbrooks * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23241236Sbrooks * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24241236Sbrooks * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25241236Sbrooks * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26241236Sbrooks * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27241236Sbrooks * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28241236Sbrooks * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29241236Sbrooks * SUCH DAMAGE.
30241236Sbrooks */
31241236Sbrooks
32241236Sbrooks/*-
33241236Sbrooks * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34241236Sbrooks * All rights reserved.
35241236Sbrooks *
36241236Sbrooks * Redistribution and use in source and binary forms, with or without
37241236Sbrooks * modification, are permitted provided that the following conditions
38241236Sbrooks * are met:
39241236Sbrooks * 1. Redistributions of source code must retain the above copyright
40241236Sbrooks *    notice, this list of conditions and the following disclaimer.
41241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright
42241236Sbrooks *    notice, this list of conditions and the following disclaimer in the
43241236Sbrooks *    documentation and/or other materials provided with the distribution.
44241236Sbrooks *
45241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46241236Sbrooks * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47241236Sbrooks * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48241236Sbrooks * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49241236Sbrooks * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50241236Sbrooks * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51241236Sbrooks * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52241236Sbrooks * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53241236Sbrooks * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54241236Sbrooks * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55241236Sbrooks * POSSIBILITY OF SUCH DAMAGE.
56241236Sbrooks */
57241236Sbrooks
58241236Sbrooks#include <sys/cdefs.h>
59241236Sbrooks#if defined(LIBC_SCCS) && !defined(lint)
60272753Sbrooks__RCSID("$NetBSD: vis.c,v 1.62 2014/09/08 17:35:01 christos Exp $");
61241236Sbrooks#endif /* LIBC_SCCS and not lint */
62248302Sbrooks#ifdef __FBSDID
63244401Sbrooks__FBSDID("$FreeBSD: releng/10.3/contrib/libc-vis/vis.c 272753 2014-10-08 15:44:11Z brooks $");
64248302Sbrooks#define	_DIAGASSERT(x)	assert(x)
65248302Sbrooks#endif
66241236Sbrooks
67241236Sbrooks#include "namespace.h"
68241236Sbrooks#include <sys/types.h>
69248302Sbrooks#include <sys/param.h>
70241236Sbrooks
71241236Sbrooks#include <assert.h>
72241236Sbrooks#include <vis.h>
73241236Sbrooks#include <errno.h>
74241236Sbrooks#include <stdlib.h>
75248302Sbrooks#include <wchar.h>
76248302Sbrooks#include <wctype.h>
77241236Sbrooks
78241236Sbrooks#ifdef __weak_alias
79241236Sbrooks__weak_alias(strvisx,_strvisx)
80241236Sbrooks#endif
81241236Sbrooks
82241236Sbrooks#if !HAVE_VIS || !HAVE_SVIS
83241236Sbrooks#include <ctype.h>
84241236Sbrooks#include <limits.h>
85241236Sbrooks#include <stdio.h>
86241236Sbrooks#include <string.h>
87241236Sbrooks
88248302Sbrooks/*
89248302Sbrooks * The reason for going through the trouble to deal with character encodings
90248302Sbrooks * in vis(3), is that we use this to safe encode output of commands. This
91248302Sbrooks * safe encoding varies depending on the character set. For example if we
92248302Sbrooks * display ps output in French, we don't want to display French characters
93248302Sbrooks * as M-foo.
94248302Sbrooks */
95241236Sbrooks
96248302Sbrooksstatic wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
97248302Sbrooks
98241236Sbrooks#undef BELL
99248302Sbrooks#define BELL L'\a'
100241236Sbrooks
101248302Sbrooks#define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
102248302Sbrooks#define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
103248302Sbrooks#define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
104248302Sbrooks#define xtoa(c)		L"0123456789abcdef"[c]
105248302Sbrooks#define XTOA(c)		L"0123456789ABCDEF"[c]
106241236Sbrooks
107248302Sbrooks#define MAXEXTRAS	10
108241236Sbrooks
109248302Sbrooks#if !HAVE_NBTOOL_CONFIG_H
110248302Sbrooks#ifndef __NetBSD__
111248302Sbrooks/*
112248302Sbrooks * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
113248302Sbrooks * integral type and it is probably wrong, since currently the maximum
114248302Sbrooks * number of bytes and character needs is 6. Until this is fixed, the
115248302Sbrooks * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
116248302Sbrooks * the assertion is commented out.
117248302Sbrooks */
118248302Sbrooks#ifdef __FreeBSD__
119248302Sbrooks/*
120248302Sbrooks * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
121248302Sbrooks * mode.
122248302Sbrooks */
123248302Sbrooks#ifndef CTASSERT
124248302Sbrooks#define CTASSERT(x)             _CTASSERT(x, __LINE__)
125248302Sbrooks#define _CTASSERT(x, y)         __CTASSERT(x, y)
126248302Sbrooks#define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
127248302Sbrooks#endif
128248302Sbrooks#endif /* __FreeBSD__ */
129248302SbrooksCTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
130248302Sbrooks#endif /* !__NetBSD__ */
131248302Sbrooks#endif
132241236Sbrooks
133241236Sbrooks/*
134241236Sbrooks * This is do_hvis, for HTTP style (RFC 1808)
135241236Sbrooks */
136248302Sbrooksstatic wchar_t *
137248302Sbrooksdo_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
138241236Sbrooks{
139248302Sbrooks	if (iswalnum(c)
140241236Sbrooks	    /* safe */
141248302Sbrooks	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
142241236Sbrooks	    /* extra */
143248302Sbrooks	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
144248302Sbrooks	    || c == L',')
145248302Sbrooks		dst = do_svis(dst, c, flags, nextc, extra);
146248302Sbrooks	else {
147248302Sbrooks		*dst++ = L'%';
148241236Sbrooks		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
149241236Sbrooks		*dst++ = xtoa((unsigned int)c & 0xf);
150241236Sbrooks	}
151241236Sbrooks
152241236Sbrooks	return dst;
153241236Sbrooks}
154241236Sbrooks
155241236Sbrooks/*
156241236Sbrooks * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
157241236Sbrooks * NB: No handling of long lines or CRLF.
158241236Sbrooks */
159248302Sbrooksstatic wchar_t *
160248302Sbrooksdo_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
161241236Sbrooks{
162248302Sbrooks	if ((c != L'\n') &&
163241236Sbrooks	    /* Space at the end of the line */
164248302Sbrooks	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
165241236Sbrooks	    /* Out of range */
166248302Sbrooks	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
167248302Sbrooks	    /* Specific char to be escaped */
168248302Sbrooks	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
169248302Sbrooks		*dst++ = L'=';
170241236Sbrooks		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
171241236Sbrooks		*dst++ = XTOA((unsigned int)c & 0xf);
172248302Sbrooks	} else
173248302Sbrooks		dst = do_svis(dst, c, flags, nextc, extra);
174241236Sbrooks	return dst;
175241236Sbrooks}
176241236Sbrooks
177241236Sbrooks/*
178248302Sbrooks * Output single byte of multibyte character.
179241236Sbrooks */
180248302Sbrooksstatic wchar_t *
181248302Sbrooksdo_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
182241236Sbrooks{
183248302Sbrooks	if (flags & VIS_CSTYLE) {
184241236Sbrooks		switch (c) {
185248302Sbrooks		case L'\n':
186248302Sbrooks			*dst++ = L'\\'; *dst++ = L'n';
187241236Sbrooks			return dst;
188248302Sbrooks		case L'\r':
189248302Sbrooks			*dst++ = L'\\'; *dst++ = L'r';
190241236Sbrooks			return dst;
191248302Sbrooks		case L'\b':
192248302Sbrooks			*dst++ = L'\\'; *dst++ = L'b';
193241236Sbrooks			return dst;
194241236Sbrooks		case BELL:
195248302Sbrooks			*dst++ = L'\\'; *dst++ = L'a';
196241236Sbrooks			return dst;
197248302Sbrooks		case L'\v':
198248302Sbrooks			*dst++ = L'\\'; *dst++ = L'v';
199241236Sbrooks			return dst;
200248302Sbrooks		case L'\t':
201248302Sbrooks			*dst++ = L'\\'; *dst++ = L't';
202241236Sbrooks			return dst;
203248302Sbrooks		case L'\f':
204248302Sbrooks			*dst++ = L'\\'; *dst++ = L'f';
205241236Sbrooks			return dst;
206248302Sbrooks		case L' ':
207248302Sbrooks			*dst++ = L'\\'; *dst++ = L's';
208241236Sbrooks			return dst;
209248302Sbrooks		case L'\0':
210248302Sbrooks			*dst++ = L'\\'; *dst++ = L'0';
211248302Sbrooks			if (iswoctal(nextc)) {
212248302Sbrooks				*dst++ = L'0';
213248302Sbrooks				*dst++ = L'0';
214241236Sbrooks			}
215241236Sbrooks			return dst;
216241236Sbrooks		default:
217248302Sbrooks			if (iswgraph(c)) {
218248302Sbrooks				*dst++ = L'\\';
219248302Sbrooks				*dst++ = c;
220241236Sbrooks				return dst;
221241236Sbrooks			}
222241236Sbrooks		}
223241236Sbrooks	}
224248302Sbrooks	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
225248302Sbrooks		*dst++ = L'\\';
226248302Sbrooks		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
227248302Sbrooks		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
228248302Sbrooks		*dst++ =			     (c	      & 07) + L'0';
229241236Sbrooks	} else {
230248302Sbrooks		if ((flags & VIS_NOSLASH) == 0)
231248302Sbrooks			*dst++ = L'\\';
232241236Sbrooks
233241236Sbrooks		if (c & 0200) {
234248302Sbrooks			c &= 0177;
235248302Sbrooks			*dst++ = L'M';
236241236Sbrooks		}
237241236Sbrooks
238248302Sbrooks		if (iswcntrl(c)) {
239248302Sbrooks			*dst++ = L'^';
240241236Sbrooks			if (c == 0177)
241248302Sbrooks				*dst++ = L'?';
242241236Sbrooks			else
243248302Sbrooks				*dst++ = c + L'@';
244241236Sbrooks		} else {
245248302Sbrooks			*dst++ = L'-';
246248302Sbrooks			*dst++ = c;
247241236Sbrooks		}
248241236Sbrooks	}
249248302Sbrooks
250241236Sbrooks	return dst;
251241236Sbrooks}
252241236Sbrooks
253248302Sbrooks/*
254248302Sbrooks * This is do_vis, the central code of vis.
255248302Sbrooks * dst:	      Pointer to the destination buffer
256248302Sbrooks * c:	      Character to encode
257248302Sbrooks * flags:     Flags word
258248302Sbrooks * nextc:     The character following 'c'
259248302Sbrooks * extra:     Pointer to the list of extra characters to be
260248302Sbrooks *	      backslash-protected.
261248302Sbrooks */
262248302Sbrooksstatic wchar_t *
263248302Sbrooksdo_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
264248302Sbrooks{
265248302Sbrooks	int iswextra, i, shft;
266248302Sbrooks	uint64_t bmsk, wmsk;
267241236Sbrooks
268248302Sbrooks	iswextra = wcschr(extra, c) != NULL;
269248302Sbrooks	if (!iswextra && (iswgraph(c) || iswwhite(c) ||
270248302Sbrooks	    ((flags & VIS_SAFE) && iswsafe(c)))) {
271248302Sbrooks		*dst++ = c;
272248302Sbrooks		return dst;
273248302Sbrooks	}
274248302Sbrooks
275248302Sbrooks	/* See comment in istrsenvisx() output loop, below. */
276248302Sbrooks	wmsk = 0;
277248302Sbrooks	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
278248302Sbrooks		shft = i * NBBY;
279248302Sbrooks		bmsk = (uint64_t)0xffLL << shft;
280248302Sbrooks		wmsk |= bmsk;
281248302Sbrooks		if ((c & wmsk) || i == 0)
282248302Sbrooks			dst = do_mbyte(dst, (wint_t)(
283248302Sbrooks			    (uint64_t)(c & bmsk) >> shft),
284248302Sbrooks			    flags, nextc, iswextra);
285248302Sbrooks	}
286248302Sbrooks
287248302Sbrooks	return dst;
288248302Sbrooks}
289248302Sbrooks
290248302Sbrookstypedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
291248302Sbrooks
292241236Sbrooks/*
293241236Sbrooks * Return the appropriate encoding function depending on the flags given.
294241236Sbrooks */
295241236Sbrooksstatic visfun_t
296248302Sbrooksgetvisfun(int flags)
297241236Sbrooks{
298248302Sbrooks	if (flags & VIS_HTTPSTYLE)
299241236Sbrooks		return do_hvis;
300248302Sbrooks	if (flags & VIS_MIMESTYLE)
301241236Sbrooks		return do_mvis;
302241236Sbrooks	return do_svis;
303241236Sbrooks}
304241236Sbrooks
305241236Sbrooks/*
306248302Sbrooks * Expand list of extra characters to not visually encode.
307241236Sbrooks */
308248302Sbrooksstatic wchar_t *
309248302Sbrooksmakeextralist(int flags, const char *src)
310241236Sbrooks{
311248302Sbrooks	wchar_t *dst, *d;
312248302Sbrooks	size_t len;
313241236Sbrooks
314248302Sbrooks	len = strlen(src);
315248302Sbrooks	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
316241236Sbrooks		return NULL;
317248302Sbrooks
318248302Sbrooks	if (mbstowcs(dst, src, len) == (size_t)-1) {
319248302Sbrooks		size_t i;
320248302Sbrooks		for (i = 0; i < len; i++)
321248302Sbrooks			dst[i] = (wint_t)(u_char)src[i];
322248302Sbrooks		d = dst + len;
323248302Sbrooks	} else
324248302Sbrooks		d = dst + wcslen(dst);
325248302Sbrooks
326248302Sbrooks	if (flags & VIS_GLOB) {
327248302Sbrooks		*d++ = L'*';
328248302Sbrooks		*d++ = L'?';
329248302Sbrooks		*d++ = L'[';
330248302Sbrooks		*d++ = L'#';
331241236Sbrooks	}
332241236Sbrooks
333248302Sbrooks	if (flags & VIS_SP) *d++ = L' ';
334248302Sbrooks	if (flags & VIS_TAB) *d++ = L'\t';
335248302Sbrooks	if (flags & VIS_NL) *d++ = L'\n';
336248302Sbrooks	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
337248302Sbrooks	*d = L'\0';
338241236Sbrooks
339248302Sbrooks	return dst;
340241236Sbrooks}
341241236Sbrooks
342241236Sbrooks/*
343248302Sbrooks * istrsenvisx()
344248302Sbrooks * 	The main internal function.
345248302Sbrooks *	All user-visible functions call this one.
346241236Sbrooks */
347241236Sbrooksstatic int
348248302Sbrooksistrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
349248302Sbrooks    int flags, const char *mbextra, int *cerr_ptr)
350241236Sbrooks{
351248302Sbrooks	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
352248302Sbrooks	size_t len, olen;
353248302Sbrooks	uint64_t bmsk, wmsk;
354248302Sbrooks	wint_t c;
355241236Sbrooks	visfun_t f;
356248302Sbrooks	int clen = 0, cerr = 0, error = -1, i, shft;
357248302Sbrooks	ssize_t mbslength, maxolen;
358241236Sbrooks
359248302Sbrooks	_DIAGASSERT(mbdst != NULL);
360272753Sbrooks	_DIAGASSERT(mbsrc != NULL || mblength == 0);
361248302Sbrooks	_DIAGASSERT(mbextra != NULL);
362248302Sbrooks
363248302Sbrooks	/*
364248302Sbrooks	 * Input (mbsrc) is a char string considered to be multibyte
365248302Sbrooks	 * characters.  The input loop will read this string pulling
366248302Sbrooks	 * one character, possibly multiple bytes, from mbsrc and
367248302Sbrooks	 * converting each to wchar_t in src.
368248302Sbrooks	 *
369248302Sbrooks	 * The vis conversion will be done using the wide char
370248302Sbrooks	 * wchar_t string.
371248302Sbrooks	 *
372248302Sbrooks	 * This will then be converted back to a multibyte string to
373248302Sbrooks	 * return to the caller.
374248302Sbrooks	 */
375248302Sbrooks
376248302Sbrooks	/* Allocate space for the wide char strings */
377248302Sbrooks	psrc = pdst = extra = NULL;
378248302Sbrooks	if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
379248302Sbrooks		return -1;
380248302Sbrooks	if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
381248302Sbrooks		goto out;
382248302Sbrooks	dst = pdst;
383248302Sbrooks	src = psrc;
384248302Sbrooks
385248302Sbrooks	/* Use caller's multibyte conversion error flag. */
386248302Sbrooks	if (cerr_ptr)
387248302Sbrooks		cerr = *cerr_ptr;
388248302Sbrooks
389248302Sbrooks	/*
390248302Sbrooks	 * Input loop.
391248302Sbrooks	 * Handle up to mblength characters (not bytes).  We do not
392248302Sbrooks	 * stop at NULs because we may be processing a block of data
393248302Sbrooks	 * that includes NULs.
394248302Sbrooks	 */
395248302Sbrooks	mbslength = (ssize_t)mblength;
396248302Sbrooks	/*
397248302Sbrooks	 * When inputing a single character, must also read in the
398248302Sbrooks	 * next character for nextc, the look-ahead character.
399248302Sbrooks	 */
400248302Sbrooks	if (mbslength == 1)
401248302Sbrooks		mbslength++;
402248302Sbrooks	while (mbslength > 0) {
403248302Sbrooks		/* Convert one multibyte character to wchar_t. */
404248302Sbrooks		if (!cerr)
405248302Sbrooks			clen = mbtowc(src, mbsrc, MB_LEN_MAX);
406248302Sbrooks		if (cerr || clen < 0) {
407248302Sbrooks			/* Conversion error, process as a byte instead. */
408248302Sbrooks			*src = (wint_t)(u_char)*mbsrc;
409248302Sbrooks			clen = 1;
410248302Sbrooks			cerr = 1;
411248302Sbrooks		}
412248302Sbrooks		if (clen == 0)
413248302Sbrooks			/*
414248302Sbrooks			 * NUL in input gives 0 return value. process
415248302Sbrooks			 * as single NUL byte and keep going.
416248302Sbrooks			 */
417248302Sbrooks			clen = 1;
418248302Sbrooks		/* Advance buffer character pointer. */
419248302Sbrooks		src++;
420248302Sbrooks		/* Advance input pointer by number of bytes read. */
421248302Sbrooks		mbsrc += clen;
422248302Sbrooks		/* Decrement input byte count. */
423248302Sbrooks		mbslength -= clen;
424241236Sbrooks	}
425248302Sbrooks	len = src - psrc;
426248302Sbrooks	src = psrc;
427248302Sbrooks	/*
428248302Sbrooks	 * In the single character input case, we will have actually
429248302Sbrooks	 * processed two characters, c and nextc.  Reset len back to
430248302Sbrooks	 * just a single character.
431248302Sbrooks	 */
432248302Sbrooks	if (mblength < len)
433248302Sbrooks		len = mblength;
434248302Sbrooks
435248302Sbrooks	/* Convert extra argument to list of characters for this mode. */
436248302Sbrooks	extra = makeextralist(flags, mbextra);
437248302Sbrooks	if (!extra) {
438248302Sbrooks		if (dlen && *dlen == 0) {
439248302Sbrooks			errno = ENOSPC;
440248302Sbrooks			goto out;
441248302Sbrooks		}
442248302Sbrooks		*mbdst = '\0';		/* can't create extra, return "" */
443248302Sbrooks		error = 0;
444248302Sbrooks		goto out;
445248302Sbrooks	}
446248302Sbrooks
447248302Sbrooks	/* Look up which processing function to call. */
448248302Sbrooks	f = getvisfun(flags);
449248302Sbrooks
450248302Sbrooks	/*
451248302Sbrooks	 * Main processing loop.
452248302Sbrooks	 * Call do_Xvis processing function one character at a time
453248302Sbrooks	 * with next character available for look-ahead.
454248302Sbrooks	 */
455248302Sbrooks	for (start = dst; len > 0; len--) {
456248302Sbrooks		c = *src++;
457248302Sbrooks		dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
458241236Sbrooks		if (dst == NULL) {
459241236Sbrooks			errno = ENOSPC;
460248302Sbrooks			goto out;
461241236Sbrooks		}
462241236Sbrooks	}
463248302Sbrooks
464248302Sbrooks	/* Terminate the string in the buffer. */
465248302Sbrooks	*dst = L'\0';
466248302Sbrooks
467248302Sbrooks	/*
468248302Sbrooks	 * Output loop.
469248302Sbrooks	 * Convert wchar_t string back to multibyte output string.
470248302Sbrooks	 * If we have hit a multi-byte conversion error on input,
471248302Sbrooks	 * output byte-by-byte here.  Else use wctomb().
472248302Sbrooks	 */
473248302Sbrooks	len = wcslen(start);
474248302Sbrooks	maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1);
475248302Sbrooks	olen = 0;
476248302Sbrooks	for (dst = start; len > 0; len--) {
477248302Sbrooks		if (!cerr)
478248302Sbrooks			clen = wctomb(mbdst, *dst);
479248302Sbrooks		if (cerr || clen < 0) {
480248302Sbrooks			/*
481248302Sbrooks			 * Conversion error, process as a byte(s) instead.
482248302Sbrooks			 * Examine each byte and higher-order bytes for
483248302Sbrooks			 * data.  E.g.,
484248302Sbrooks			 *	0x000000000000a264 -> a2 64
485248302Sbrooks			 *	0x000000001f00a264 -> 1f 00 a2 64
486248302Sbrooks			 */
487248302Sbrooks			clen = 0;
488248302Sbrooks			wmsk = 0;
489248302Sbrooks			for (i = sizeof(wmsk) - 1; i >= 0; i--) {
490248302Sbrooks				shft = i * NBBY;
491248302Sbrooks				bmsk = (uint64_t)0xffLL << shft;
492248302Sbrooks				wmsk |= bmsk;
493248302Sbrooks				if ((*dst & wmsk) || i == 0)
494248302Sbrooks					mbdst[clen++] = (char)(
495248302Sbrooks					    (uint64_t)(*dst & bmsk) >>
496248302Sbrooks					    shft);
497248302Sbrooks			}
498248302Sbrooks			cerr = 1;
499248302Sbrooks		}
500248302Sbrooks		/* If this character would exceed our output limit, stop. */
501248302Sbrooks		if (olen + clen > (size_t)maxolen)
502248302Sbrooks			break;
503248302Sbrooks		/* Advance output pointer by number of bytes written. */
504248302Sbrooks		mbdst += clen;
505248302Sbrooks		/* Advance buffer character pointer. */
506248302Sbrooks		dst++;
507248302Sbrooks		/* Incrment output character count. */
508248302Sbrooks		olen += clen;
509241236Sbrooks	}
510248302Sbrooks
511248302Sbrooks	/* Terminate the output string. */
512248302Sbrooks	*mbdst = '\0';
513248302Sbrooks
514248302Sbrooks	/* Pass conversion error flag out. */
515248302Sbrooks	if (cerr_ptr)
516248302Sbrooks		*cerr_ptr = cerr;
517248302Sbrooks
518248302Sbrooks	free(extra);
519248302Sbrooks	free(pdst);
520248302Sbrooks	free(psrc);
521248302Sbrooks
522248302Sbrooks	return (int)olen;
523248302Sbrooksout:
524248302Sbrooks	free(extra);
525248302Sbrooks	free(pdst);
526248302Sbrooks	free(psrc);
527248302Sbrooks	return error;
528241236Sbrooks}
529272753Sbrooks
530272753Sbrooksstatic int
531272753Sbrooksistrsenvisxl(char *mbdst, size_t *dlen, const char *mbsrc,
532272753Sbrooks    int flags, const char *mbextra, int *cerr_ptr)
533272753Sbrooks{
534272753Sbrooks	return istrsenvisx(mbdst, dlen, mbsrc,
535272753Sbrooks	    mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr);
536272753Sbrooks}
537272753Sbrooks
538248302Sbrooks#endif
539241236Sbrooks
540248302Sbrooks#if !HAVE_SVIS
541248302Sbrooks/*
542248302Sbrooks *	The "svis" variants all take an "extra" arg that is a pointer
543248302Sbrooks *	to a NUL-terminated list of characters to be encoded, too.
544248302Sbrooks *	These functions are useful e. g. to encode strings in such a
545248302Sbrooks *	way so that they are not interpreted by a shell.
546248302Sbrooks */
547248302Sbrooks
548248302Sbrookschar *
549248302Sbrookssvis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
550248302Sbrooks{
551248302Sbrooks	char cc[2];
552248302Sbrooks	int ret;
553248302Sbrooks
554248302Sbrooks	cc[0] = c;
555248302Sbrooks	cc[1] = nextc;
556248302Sbrooks
557248302Sbrooks	ret = istrsenvisx(mbdst, NULL, cc, 1, flags, mbextra, NULL);
558248302Sbrooks	if (ret < 0)
559248302Sbrooks		return NULL;
560248302Sbrooks	return mbdst + ret;
561248302Sbrooks}
562248302Sbrooks
563248302Sbrookschar *
564248302Sbrookssnvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
565248302Sbrooks{
566248302Sbrooks	char cc[2];
567248302Sbrooks	int ret;
568248302Sbrooks
569248302Sbrooks	cc[0] = c;
570248302Sbrooks	cc[1] = nextc;
571248302Sbrooks
572248302Sbrooks	ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, mbextra, NULL);
573248302Sbrooks	if (ret < 0)
574248302Sbrooks		return NULL;
575248302Sbrooks	return mbdst + ret;
576248302Sbrooks}
577248302Sbrooks
578241236Sbrooksint
579248302Sbrooksstrsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
580241236Sbrooks{
581272753Sbrooks	return istrsenvisxl(mbdst, NULL, mbsrc, flags, mbextra, NULL);
582241236Sbrooks}
583241236Sbrooks
584241236Sbrooksint
585248302Sbrooksstrsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
586241236Sbrooks{
587272753Sbrooks	return istrsenvisxl(mbdst, &dlen, mbsrc, flags, mbextra, NULL);
588241236Sbrooks}
589241236Sbrooks
590248302Sbrooksint
591248302Sbrooksstrsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
592241236Sbrooks{
593248302Sbrooks	return istrsenvisx(mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
594241236Sbrooks}
595241236Sbrooks
596241236Sbrooksint
597248302Sbrooksstrsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
598248302Sbrooks    const char *mbextra)
599241236Sbrooks{
600248302Sbrooks	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
601241236Sbrooks}
602241236Sbrooks
603241236Sbrooksint
604248302Sbrooksstrsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
605248302Sbrooks    const char *mbextra, int *cerr_ptr)
606241236Sbrooks{
607248302Sbrooks	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
608241236Sbrooks}
609241236Sbrooks#endif
610241236Sbrooks
611241236Sbrooks#if !HAVE_VIS
612241236Sbrooks/*
613241236Sbrooks * vis - visually encode characters
614241236Sbrooks */
615248302Sbrookschar *
616248302Sbrooksvis(char *mbdst, int c, int flags, int nextc)
617241236Sbrooks{
618248302Sbrooks	char cc[2];
619248302Sbrooks	int ret;
620241236Sbrooks
621248302Sbrooks	cc[0] = c;
622248302Sbrooks	cc[1] = nextc;
623241236Sbrooks
624248302Sbrooks	ret = istrsenvisx(mbdst, NULL, cc, 1, flags, "", NULL);
625248302Sbrooks	if (ret < 0)
626241236Sbrooks		return NULL;
627248302Sbrooks	return mbdst + ret;
628241236Sbrooks}
629241236Sbrooks
630241236Sbrookschar *
631248302Sbrooksnvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
632241236Sbrooks{
633248302Sbrooks	char cc[2];
634248302Sbrooks	int ret;
635241236Sbrooks
636248302Sbrooks	cc[0] = c;
637248302Sbrooks	cc[1] = nextc;
638248302Sbrooks
639248302Sbrooks	ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, "", NULL);
640248302Sbrooks	if (ret < 0)
641248302Sbrooks		return NULL;
642248302Sbrooks	return mbdst + ret;
643241236Sbrooks}
644241236Sbrooks
645241236Sbrooks/*
646248302Sbrooks * strvis - visually encode characters from src into dst
647241236Sbrooks *
648241236Sbrooks *	Dst must be 4 times the size of src to account for possible
649241236Sbrooks *	expansion.  The length of dst, not including the trailing NULL,
650241236Sbrooks *	is returned.
651241236Sbrooks */
652241236Sbrooks
653241236Sbrooksint
654248302Sbrooksstrvis(char *mbdst, const char *mbsrc, int flags)
655241236Sbrooks{
656272753Sbrooks	return istrsenvisxl(mbdst, NULL, mbsrc, flags, "", NULL);
657241236Sbrooks}
658241236Sbrooks
659241236Sbrooksint
660248302Sbrooksstrnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
661241236Sbrooks{
662272753Sbrooks	return istrsenvisxl(mbdst, &dlen, mbsrc, flags, "", NULL);
663241236Sbrooks}
664241236Sbrooks
665248302Sbrooks/*
666248302Sbrooks * strvisx - visually encode characters from src into dst
667248302Sbrooks *
668248302Sbrooks *	Dst must be 4 times the size of src to account for possible
669248302Sbrooks *	expansion.  The length of dst, not including the trailing NULL,
670248302Sbrooks *	is returned.
671248302Sbrooks *
672248302Sbrooks *	Strvisx encodes exactly len characters from src into dst.
673248302Sbrooks *	This is useful for encoding a block of data.
674248302Sbrooks */
675248302Sbrooks
676248302Sbrooksint
677248302Sbrooksstrvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
678241236Sbrooks{
679248302Sbrooks	return istrsenvisx(mbdst, NULL, mbsrc, len, flags, "", NULL);
680241236Sbrooks}
681241236Sbrooks
682241236Sbrooksint
683248302Sbrooksstrnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
684241236Sbrooks{
685248302Sbrooks	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", NULL);
686241236Sbrooks}
687241236Sbrooks
688241236Sbrooksint
689248302Sbrooksstrenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
690248302Sbrooks    int *cerr_ptr)
691241236Sbrooks{
692248302Sbrooks	return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
693241236Sbrooks}
694241236Sbrooks#endif
695