1/*	$NetBSD: vis.c,v 1.44 2011/03/12 19:52:48 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1989, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*-
33 * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55 * POSSIBILITY OF SUCH DAMAGE.
56 */
57
58#include "config.h"
59
60#if defined(LIBC_SCCS) && !defined(lint)
61__RCSID("$NetBSD: vis.c,v 1.44 2011/03/12 19:52:48 christos Exp $");
62#endif /* LIBC_SCCS and not lint */
63
64#include <sys/types.h>
65
66#include <assert.h>
67#include <vis.h>
68#include <errno.h>
69#include <stdlib.h>
70
71#ifdef __weak_alias
72__weak_alias(strvisx,_strvisx)
73#endif
74
75#if !HAVE_VIS || !HAVE_SVIS
76#include <ctype.h>
77#include <limits.h>
78#include <stdio.h>
79#include <string.h>
80
81static char *do_svis(char *, size_t *, int, int, int, const char *);
82
83#undef BELL
84#define BELL '\a'
85
86#define isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
87#define iswhite(c)	(c == ' ' || c == '\t' || c == '\n')
88#define issafe(c)	(c == '\b' || c == BELL || c == '\r')
89#define xtoa(c)		"0123456789abcdef"[c]
90#define XTOA(c)		"0123456789ABCDEF"[c]
91
92#define MAXEXTRAS	5
93
94#define MAKEEXTRALIST(flag, extra, orig_str)				      \
95do {									      \
96	const char *orig = orig_str;					      \
97	const char *o = orig;						      \
98	char *e;							      \
99	while (*o++)							      \
100		continue;						      \
101	extra = malloc((size_t)((o - orig) + MAXEXTRAS));		      \
102	if (!extra) break;						      \
103	for (o = orig, e = extra; (*e++ = *o++) != '\0';)		      \
104		continue;						      \
105	e--;								      \
106	if (flag & VIS_SP) *e++ = ' ';					      \
107	if (flag & VIS_TAB) *e++ = '\t';				      \
108	if (flag & VIS_NL) *e++ = '\n';					      \
109	if ((flag & VIS_NOSLASH) == 0) *e++ = '\\';			      \
110	*e = '\0';							      \
111} while (/*CONSTCOND*/0)
112
113/*
114 * This is do_hvis, for HTTP style (RFC 1808)
115 */
116static char *
117do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
118{
119
120	if ((isascii(c) && isalnum(c))
121	    /* safe */
122	    || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
123	    /* extra */
124	    || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
125	    || c == ',') {
126		dst = do_svis(dst, dlen, c, flag, nextc, extra);
127	} else {
128		if (dlen) {
129			if (*dlen < 3)
130				return NULL;
131			*dlen -= 3;
132		}
133		*dst++ = '%';
134		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
135		*dst++ = xtoa((unsigned int)c & 0xf);
136	}
137
138	return dst;
139}
140
141/*
142 * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
143 * NB: No handling of long lines or CRLF.
144 */
145static char *
146do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
147{
148	if ((c != '\n') &&
149	    /* Space at the end of the line */
150	    ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
151	    /* Out of range */
152	    (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
153	    /* Specific char to be escaped */
154	    strchr("#$@[\\]^`{|}~", c) != NULL)) {
155		if (dlen) {
156			if (*dlen < 3)
157				return NULL;
158			*dlen -= 3;
159		}
160		*dst++ = '=';
161		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
162		*dst++ = XTOA((unsigned int)c & 0xf);
163	} else {
164		dst = do_svis(dst, dlen, c, flag, nextc, extra);
165	}
166	return dst;
167}
168
169/*
170 * This is do_vis, the central code of vis.
171 * dst:	      Pointer to the destination buffer
172 * c:	      Character to encode
173 * flag:      Flag word
174 * nextc:     The character following 'c'
175 * extra:     Pointer to the list of extra characters to be
176 *	      backslash-protected.
177 */
178static char *
179do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
180{
181	int isextra;
182	size_t odlen = dlen ? *dlen : 0;
183
184	isextra = strchr(extra, c) != NULL;
185#define HAVE(x) \
186	do { \
187		if (dlen) { \
188			if (*dlen < (x)) \
189				goto out; \
190			*dlen -= (x); \
191		} \
192	} while (/*CONSTCOND*/0)
193	if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
194	    ((flag & VIS_SAFE) && issafe(c)))) {
195		HAVE(1);
196		*dst++ = c;
197		return dst;
198	}
199	if (flag & VIS_CSTYLE) {
200		HAVE(2);
201		switch (c) {
202		case '\n':
203			*dst++ = '\\'; *dst++ = 'n';
204			return dst;
205		case '\r':
206			*dst++ = '\\'; *dst++ = 'r';
207			return dst;
208		case '\b':
209			*dst++ = '\\'; *dst++ = 'b';
210			return dst;
211		case BELL:
212			*dst++ = '\\'; *dst++ = 'a';
213			return dst;
214		case '\v':
215			*dst++ = '\\'; *dst++ = 'v';
216			return dst;
217		case '\t':
218			*dst++ = '\\'; *dst++ = 't';
219			return dst;
220		case '\f':
221			*dst++ = '\\'; *dst++ = 'f';
222			return dst;
223		case ' ':
224			*dst++ = '\\'; *dst++ = 's';
225			return dst;
226		case '\0':
227			*dst++ = '\\'; *dst++ = '0';
228			if (isoctal(nextc)) {
229				HAVE(2);
230				*dst++ = '0';
231				*dst++ = '0';
232			}
233			return dst;
234		default:
235			if (isgraph(c)) {
236				*dst++ = '\\'; *dst++ = c;
237				return dst;
238			}
239			if (dlen)
240				*dlen = odlen;
241		}
242	}
243	if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
244		HAVE(4);
245		*dst++ = '\\';
246		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
247		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
248		*dst++ =			     (c	      & 07) + '0';
249	} else {
250		if ((flag & VIS_NOSLASH) == 0) {
251			HAVE(1);
252			*dst++ = '\\';
253		}
254
255		if (c & 0200) {
256			HAVE(1);
257			c &= 0177; *dst++ = 'M';
258		}
259
260		if (iscntrl(c)) {
261			HAVE(2);
262			*dst++ = '^';
263			if (c == 0177)
264				*dst++ = '?';
265			else
266				*dst++ = c + '@';
267		} else {
268			HAVE(2);
269			*dst++ = '-'; *dst++ = c;
270		}
271	}
272	return dst;
273out:
274	*dlen = odlen;
275	return NULL;
276}
277
278typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
279
280/*
281 * Return the appropriate encoding function depending on the flags given.
282 */
283static visfun_t
284getvisfun(int flag)
285{
286	if (flag & VIS_HTTPSTYLE)
287		return do_hvis;
288	if (flag & VIS_MIMESTYLE)
289		return do_mvis;
290	return do_svis;
291}
292
293/*
294 * isnvis - visually encode characters, also encoding the characters
295 *	  pointed to by `extra'
296 */
297static char *
298isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
299{
300	char *nextra = NULL;
301	visfun_t f;
302
303	_DIAGASSERT(dst != NULL);
304	_DIAGASSERT(extra != NULL);
305	MAKEEXTRALIST(flag, nextra, extra);
306	if (!nextra) {
307		if (dlen && *dlen == 0) {
308			errno = ENOSPC;
309			return NULL;
310		}
311		*dst = '\0';		/* can't create nextra, return "" */
312		return dst;
313	}
314	f = getvisfun(flag);
315	dst = (*f)(dst, dlen, c, flag, nextc, nextra);
316	free(nextra);
317	if (dst == NULL || (dlen && *dlen == 0)) {
318		errno = ENOSPC;
319		return NULL;
320	}
321	*dst = '\0';
322	return dst;
323}
324
325char *
326svis(char *dst, int c, int flag, int nextc, const char *extra)
327{
328	return isnvis(dst, NULL, c, flag, nextc, extra);
329}
330
331char *
332snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
333{
334	return isnvis(dst, &dlen, c, flag, nextc, extra);
335}
336
337
338/*
339 * strsvis, strsvisx - visually encode characters from src into dst
340 *
341 *	Extra is a pointer to a \0-terminated list of characters to
342 *	be encoded, too. These functions are useful e. g. to
343 *	encode strings in such a way so that they are not interpreted
344 *	by a shell.
345 *
346 *	Dst must be 4 times the size of src to account for possible
347 *	expansion.  The length of dst, not including the trailing NULL,
348 *	is returned.
349 *
350 *	Strsvisx encodes exactly len bytes from src into dst.
351 *	This is useful for encoding a block of data.
352 */
353static int
354istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra)
355{
356	int c;
357	char *start;
358	char *nextra = NULL;
359	const unsigned char *src = (const unsigned char *)csrc;
360	visfun_t f;
361
362	_DIAGASSERT(dst != NULL);
363	_DIAGASSERT(src != NULL);
364	_DIAGASSERT(extra != NULL);
365	MAKEEXTRALIST(flag, nextra, extra);
366	if (!nextra) {
367		*dst = '\0';		/* can't create nextra, return "" */
368		return 0;
369	}
370	f = getvisfun(flag);
371	for (start = dst; (c = *src++) != '\0'; /* empty */) {
372		dst = (*f)(dst, dlen, c, flag, *src, nextra);
373		if (dst == NULL) {
374			errno = ENOSPC;
375			return -1;
376		}
377	}
378	free(nextra);
379	if (dlen && *dlen == 0) {
380		errno = ENOSPC;
381		return -1;
382	}
383	*dst = '\0';
384	return (int)(dst - start);
385}
386
387int
388strsvis(char *dst, const char *csrc, int flag, const char *extra)
389{
390	return istrsnvis(dst, NULL, csrc, flag, extra);
391}
392
393int
394strsnvis(char *dst, size_t dlen, const char *csrc, int flag, const char *extra)
395{
396	return istrsnvis(dst, &dlen, csrc, flag, extra);
397}
398
399static int
400istrsnvisx(char *dst, size_t *dlen, const char *csrc, size_t len, int flag,
401    const char *extra)
402{
403	unsigned char c;
404	char *start;
405	char *nextra = NULL;
406	const unsigned char *src = (const unsigned char *)csrc;
407	visfun_t f;
408
409	_DIAGASSERT(dst != NULL);
410	_DIAGASSERT(src != NULL);
411	_DIAGASSERT(extra != NULL);
412	MAKEEXTRALIST(flag, nextra, extra);
413	if (! nextra) {
414		if (dlen && *dlen == 0) {
415			errno = ENOSPC;
416			return -1;
417		}
418		*dst = '\0';		/* can't create nextra, return "" */
419		return 0;
420	}
421
422	f = getvisfun(flag);
423	for (start = dst; len > 0; len--) {
424		c = *src++;
425		dst = (*f)(dst, dlen, c, flag, len > 1 ? *src : '\0', nextra);
426		if (dst == NULL) {
427			errno = ENOSPC;
428			return -1;
429		}
430	}
431	free(nextra);
432	if (dlen && *dlen == 0) {
433		errno = ENOSPC;
434		return -1;
435	}
436	*dst = '\0';
437	return (int)(dst - start);
438}
439
440int
441strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra)
442{
443	return istrsnvisx(dst, NULL, csrc, len, flag, extra);
444}
445
446int
447strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag,
448    const char *extra)
449{
450	return istrsnvisx(dst, &dlen, csrc, len, flag, extra);
451}
452#endif
453
454#if !HAVE_VIS
455/*
456 * vis - visually encode characters
457 */
458static char *
459invis(char *dst, size_t *dlen, int c, int flag, int nextc)
460{
461	char *extra = NULL;
462	unsigned char uc = (unsigned char)c;
463	visfun_t f;
464
465	_DIAGASSERT(dst != NULL);
466
467	MAKEEXTRALIST(flag, extra, "");
468	if (! extra) {
469		if (dlen && *dlen == 0) {
470			errno = ENOSPC;
471			return NULL;
472		}
473		*dst = '\0';		/* can't create extra, return "" */
474		return dst;
475	}
476	f = getvisfun(flag);
477	dst = (*f)(dst, dlen, uc, flag, nextc, extra);
478	free(extra);
479	if (dst == NULL || (dlen && *dlen == 0)) {
480		errno = ENOSPC;
481		return NULL;
482	}
483	*dst = '\0';
484	return dst;
485}
486
487char *
488vis(char *dst, int c, int flag, int nextc)
489{
490	return invis(dst, NULL, c, flag, nextc);
491}
492
493char *
494nvis(char *dst, size_t dlen, int c, int flag, int nextc)
495{
496	return invis(dst, &dlen, c, flag, nextc);
497}
498
499
500/*
501 * strvis, strvisx - visually encode characters from src into dst
502 *
503 *	Dst must be 4 times the size of src to account for possible
504 *	expansion.  The length of dst, not including the trailing NULL,
505 *	is returned.
506 *
507 *	Strvisx encodes exactly len bytes from src into dst.
508 *	This is useful for encoding a block of data.
509 */
510static int
511istrnvis(char *dst, size_t *dlen, const char *src, int flag)
512{
513	char *extra = NULL;
514	int rv;
515
516	MAKEEXTRALIST(flag, extra, "");
517	if (!extra) {
518		if (dlen && *dlen == 0) {
519			errno = ENOSPC;
520			return -1;
521		}
522		*dst = '\0';		/* can't create extra, return "" */
523		return 0;
524	}
525	rv = istrsnvis(dst, dlen, src, flag, extra);
526	free(extra);
527	return rv;
528}
529
530int
531strvis(char *dst, const char *src, int flag)
532{
533	return istrnvis(dst, NULL, src, flag);
534}
535
536int
537strnvis(char *dst, size_t dlen, const char *src, int flag)
538{
539	return istrnvis(dst, &dlen, src, flag);
540}
541
542static int
543istrnvisx(char *dst, size_t *dlen, const char *src, size_t len, int flag)
544{
545	char *extra = NULL;
546	int rv;
547
548	MAKEEXTRALIST(flag, extra, "");
549	if (!extra) {
550		if (dlen && *dlen == 0) {
551			errno = ENOSPC;
552			return -1;
553		}
554		*dst = '\0';		/* can't create extra, return "" */
555		return 0;
556	}
557	rv = istrsnvisx(dst, dlen, src, len, flag, extra);
558	free(extra);
559	return rv;
560}
561
562int
563strvisx(char *dst, const char *src, size_t len, int flag)
564{
565	return istrnvisx(dst, NULL, src, len, flag);
566}
567
568int
569strnvisx(char *dst, size_t dlen, const char *src, size_t len, int flag)
570{
571	return istrnvisx(dst, &dlen, src, len, flag);
572}
573
574#endif
575