1301679Sbrooks/*	$NetBSD: unvis.c,v 1.44 2014/09/26 15:43:36 roy Exp $	*/
2241236Sbrooks
3241236Sbrooks/*-
4241236Sbrooks * Copyright (c) 1989, 1993
5241236Sbrooks *	The Regents of the University of California.  All rights reserved.
6241236Sbrooks *
7241236Sbrooks * Redistribution and use in source and binary forms, with or without
8241236Sbrooks * modification, are permitted provided that the following conditions
9241236Sbrooks * are met:
10241236Sbrooks * 1. Redistributions of source code must retain the above copyright
11241236Sbrooks *    notice, this list of conditions and the following disclaimer.
12241236Sbrooks * 2. Redistributions in binary form must reproduce the above copyright
13241236Sbrooks *    notice, this list of conditions and the following disclaimer in the
14241236Sbrooks *    documentation and/or other materials provided with the distribution.
15241236Sbrooks * 3. Neither the name of the University nor the names of its contributors
16241236Sbrooks *    may be used to endorse or promote products derived from this software
17241236Sbrooks *    without specific prior written permission.
18241236Sbrooks *
19241236Sbrooks * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20241236Sbrooks * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21241236Sbrooks * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22241236Sbrooks * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23241236Sbrooks * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24241236Sbrooks * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25241236Sbrooks * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26241236Sbrooks * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27241236Sbrooks * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28241236Sbrooks * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29241236Sbrooks * SUCH DAMAGE.
30241236Sbrooks */
31241236Sbrooks
32241236Sbrooks#include <sys/cdefs.h>
33241236Sbrooks#if defined(LIBC_SCCS) && !defined(lint)
34241236Sbrooks#if 0
35241236Sbrooksstatic char sccsid[] = "@(#)unvis.c	8.1 (Berkeley) 6/4/93";
36241236Sbrooks#else
37301679Sbrooks__RCSID("$NetBSD: unvis.c,v 1.44 2014/09/26 15:43:36 roy Exp $");
38241236Sbrooks#endif
39241236Sbrooks#endif /* LIBC_SCCS and not lint */
40244401Sbrooks__FBSDID("$FreeBSD$");
41241236Sbrooks
42241236Sbrooks#include "namespace.h"
43241236Sbrooks#include <sys/types.h>
44241236Sbrooks
45241236Sbrooks#include <assert.h>
46241236Sbrooks#include <ctype.h>
47241236Sbrooks#include <stdint.h>
48241236Sbrooks#include <stdio.h>
49241236Sbrooks#include <errno.h>
50241236Sbrooks#include <vis.h>
51241236Sbrooks
52244401Sbrooks#define	_DIAGASSERT(x)	assert(x)
53244401Sbrooks
54244401Sbrooks/*
55244401Sbrooks * Return the number of elements in a statically-allocated array,
56244401Sbrooks * __x.
57244401Sbrooks */
58244401Sbrooks#define	__arraycount(__x)	(sizeof(__x) / sizeof(__x[0]))
59244401Sbrooks
60241236Sbrooks#ifdef __weak_alias
61241236Sbrooks__weak_alias(strnunvisx,_strnunvisx)
62241236Sbrooks#endif
63241236Sbrooks
64241236Sbrooks#if !HAVE_VIS
65241236Sbrooks/*
66241236Sbrooks * decode driven by state machine
67241236Sbrooks */
68241236Sbrooks#define	S_GROUND	0	/* haven't seen escape char */
69241236Sbrooks#define	S_START		1	/* start decoding special sequence */
70241236Sbrooks#define	S_META		2	/* metachar started (M) */
71241236Sbrooks#define	S_META1		3	/* metachar more, regular char (-) */
72241236Sbrooks#define	S_CTRL		4	/* control char started (^) */
73241236Sbrooks#define	S_OCTAL2	5	/* octal digit 2 */
74241236Sbrooks#define	S_OCTAL3	6	/* octal digit 3 */
75241236Sbrooks#define	S_HEX		7	/* mandatory hex digit */
76241236Sbrooks#define	S_HEX1		8	/* http hex digit */
77241236Sbrooks#define	S_HEX2		9	/* http hex digit 2 */
78241236Sbrooks#define	S_MIME1		10	/* mime hex digit 1 */
79241236Sbrooks#define	S_MIME2		11	/* mime hex digit 2 */
80241236Sbrooks#define	S_EATCRNL	12	/* mime eating CRNL */
81241236Sbrooks#define	S_AMP		13	/* seen & */
82241236Sbrooks#define	S_NUMBER	14	/* collecting number */
83241236Sbrooks#define	S_STRING	15	/* collecting string */
84241236Sbrooks
85241236Sbrooks#define	isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
86241236Sbrooks#define	xtod(c)		(isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10))
87241236Sbrooks#define	XTOD(c)		(isdigit(c) ? (c - '0') : ((c - 'A') + 10))
88241236Sbrooks
89241236Sbrooks/*
90241236Sbrooks * RFC 1866
91241236Sbrooks */
92241236Sbrooksstatic const struct nv {
93248302Sbrooks	char name[7];
94241236Sbrooks	uint8_t value;
95241236Sbrooks} nv[] = {
96241236Sbrooks	{ "AElig",	198 }, /* capital AE diphthong (ligature)  */
97241236Sbrooks	{ "Aacute",	193 }, /* capital A, acute accent  */
98241236Sbrooks	{ "Acirc",	194 }, /* capital A, circumflex accent  */
99241236Sbrooks	{ "Agrave",	192 }, /* capital A, grave accent  */
100241236Sbrooks	{ "Aring",	197 }, /* capital A, ring  */
101241236Sbrooks	{ "Atilde",	195 }, /* capital A, tilde  */
102241236Sbrooks	{ "Auml",	196 }, /* capital A, dieresis or umlaut mark  */
103241236Sbrooks	{ "Ccedil",	199 }, /* capital C, cedilla  */
104241236Sbrooks	{ "ETH",	208 }, /* capital Eth, Icelandic  */
105241236Sbrooks	{ "Eacute",	201 }, /* capital E, acute accent  */
106241236Sbrooks	{ "Ecirc",	202 }, /* capital E, circumflex accent  */
107241236Sbrooks	{ "Egrave",	200 }, /* capital E, grave accent  */
108241236Sbrooks	{ "Euml",	203 }, /* capital E, dieresis or umlaut mark  */
109241236Sbrooks	{ "Iacute",	205 }, /* capital I, acute accent  */
110241236Sbrooks	{ "Icirc",	206 }, /* capital I, circumflex accent  */
111241236Sbrooks	{ "Igrave",	204 }, /* capital I, grave accent  */
112241236Sbrooks	{ "Iuml",	207 }, /* capital I, dieresis or umlaut mark  */
113241236Sbrooks	{ "Ntilde",	209 }, /* capital N, tilde  */
114241236Sbrooks	{ "Oacute",	211 }, /* capital O, acute accent  */
115241236Sbrooks	{ "Ocirc",	212 }, /* capital O, circumflex accent  */
116241236Sbrooks	{ "Ograve",	210 }, /* capital O, grave accent  */
117241236Sbrooks	{ "Oslash",	216 }, /* capital O, slash  */
118241236Sbrooks	{ "Otilde",	213 }, /* capital O, tilde  */
119241236Sbrooks	{ "Ouml",	214 }, /* capital O, dieresis or umlaut mark  */
120241236Sbrooks	{ "THORN",	222 }, /* capital THORN, Icelandic  */
121241236Sbrooks	{ "Uacute",	218 }, /* capital U, acute accent  */
122241236Sbrooks	{ "Ucirc",	219 }, /* capital U, circumflex accent  */
123241236Sbrooks	{ "Ugrave",	217 }, /* capital U, grave accent  */
124241236Sbrooks	{ "Uuml",	220 }, /* capital U, dieresis or umlaut mark  */
125241236Sbrooks	{ "Yacute",	221 }, /* capital Y, acute accent  */
126241236Sbrooks	{ "aacute",	225 }, /* small a, acute accent  */
127241236Sbrooks	{ "acirc",	226 }, /* small a, circumflex accent  */
128241236Sbrooks	{ "acute",	180 }, /* acute accent  */
129241236Sbrooks	{ "aelig",	230 }, /* small ae diphthong (ligature)  */
130241236Sbrooks	{ "agrave",	224 }, /* small a, grave accent  */
131241236Sbrooks	{ "amp",	 38 }, /* ampersand  */
132241236Sbrooks	{ "aring",	229 }, /* small a, ring  */
133241236Sbrooks	{ "atilde",	227 }, /* small a, tilde  */
134241236Sbrooks	{ "auml",	228 }, /* small a, dieresis or umlaut mark  */
135241236Sbrooks	{ "brvbar",	166 }, /* broken (vertical) bar  */
136241236Sbrooks	{ "ccedil",	231 }, /* small c, cedilla  */
137241236Sbrooks	{ "cedil",	184 }, /* cedilla  */
138241236Sbrooks	{ "cent",	162 }, /* cent sign  */
139241236Sbrooks	{ "copy",	169 }, /* copyright sign  */
140241236Sbrooks	{ "curren",	164 }, /* general currency sign  */
141241236Sbrooks	{ "deg",	176 }, /* degree sign  */
142241236Sbrooks	{ "divide",	247 }, /* divide sign  */
143241236Sbrooks	{ "eacute",	233 }, /* small e, acute accent  */
144241236Sbrooks	{ "ecirc",	234 }, /* small e, circumflex accent  */
145241236Sbrooks	{ "egrave",	232 }, /* small e, grave accent  */
146241236Sbrooks	{ "eth",	240 }, /* small eth, Icelandic  */
147241236Sbrooks	{ "euml",	235 }, /* small e, dieresis or umlaut mark  */
148241236Sbrooks	{ "frac12",	189 }, /* fraction one-half  */
149241236Sbrooks	{ "frac14",	188 }, /* fraction one-quarter  */
150241236Sbrooks	{ "frac34",	190 }, /* fraction three-quarters  */
151241236Sbrooks	{ "gt",		 62 }, /* greater than  */
152241236Sbrooks	{ "iacute",	237 }, /* small i, acute accent  */
153241236Sbrooks	{ "icirc",	238 }, /* small i, circumflex accent  */
154241236Sbrooks	{ "iexcl",	161 }, /* inverted exclamation mark  */
155241236Sbrooks	{ "igrave",	236 }, /* small i, grave accent  */
156241236Sbrooks	{ "iquest",	191 }, /* inverted question mark  */
157241236Sbrooks	{ "iuml",	239 }, /* small i, dieresis or umlaut mark  */
158241236Sbrooks	{ "laquo",	171 }, /* angle quotation mark, left  */
159241236Sbrooks	{ "lt",		 60 }, /* less than  */
160241236Sbrooks	{ "macr",	175 }, /* macron  */
161241236Sbrooks	{ "micro",	181 }, /* micro sign  */
162241236Sbrooks	{ "middot",	183 }, /* middle dot  */
163241236Sbrooks	{ "nbsp",	160 }, /* no-break space  */
164241236Sbrooks	{ "not",	172 }, /* not sign  */
165241236Sbrooks	{ "ntilde",	241 }, /* small n, tilde  */
166241236Sbrooks	{ "oacute",	243 }, /* small o, acute accent  */
167241236Sbrooks	{ "ocirc",	244 }, /* small o, circumflex accent  */
168241236Sbrooks	{ "ograve",	242 }, /* small o, grave accent  */
169241236Sbrooks	{ "ordf",	170 }, /* ordinal indicator, feminine  */
170241236Sbrooks	{ "ordm",	186 }, /* ordinal indicator, masculine  */
171241236Sbrooks	{ "oslash",	248 }, /* small o, slash  */
172241236Sbrooks	{ "otilde",	245 }, /* small o, tilde  */
173241236Sbrooks	{ "ouml",	246 }, /* small o, dieresis or umlaut mark  */
174241236Sbrooks	{ "para",	182 }, /* pilcrow (paragraph sign)  */
175241236Sbrooks	{ "plusmn",	177 }, /* plus-or-minus sign  */
176241236Sbrooks	{ "pound",	163 }, /* pound sterling sign  */
177241236Sbrooks	{ "quot",	 34 }, /* double quote  */
178241236Sbrooks	{ "raquo",	187 }, /* angle quotation mark, right  */
179241236Sbrooks	{ "reg",	174 }, /* registered sign  */
180241236Sbrooks	{ "sect",	167 }, /* section sign  */
181241236Sbrooks	{ "shy",	173 }, /* soft hyphen  */
182241236Sbrooks	{ "sup1",	185 }, /* superscript one  */
183241236Sbrooks	{ "sup2",	178 }, /* superscript two  */
184241236Sbrooks	{ "sup3",	179 }, /* superscript three  */
185241236Sbrooks	{ "szlig",	223 }, /* small sharp s, German (sz ligature)  */
186241236Sbrooks	{ "thorn",	254 }, /* small thorn, Icelandic  */
187241236Sbrooks	{ "times",	215 }, /* multiply sign  */
188241236Sbrooks	{ "uacute",	250 }, /* small u, acute accent  */
189241236Sbrooks	{ "ucirc",	251 }, /* small u, circumflex accent  */
190241236Sbrooks	{ "ugrave",	249 }, /* small u, grave accent  */
191241236Sbrooks	{ "uml",	168 }, /* umlaut (dieresis)  */
192241236Sbrooks	{ "uuml",	252 }, /* small u, dieresis or umlaut mark  */
193241236Sbrooks	{ "yacute",	253 }, /* small y, acute accent  */
194241236Sbrooks	{ "yen",	165 }, /* yen sign  */
195241236Sbrooks	{ "yuml",	255 }, /* small y, dieresis or umlaut mark  */
196241236Sbrooks};
197241236Sbrooks
198241236Sbrooks/*
199241236Sbrooks * unvis - decode characters previously encoded by vis
200241236Sbrooks */
201241236Sbrooksint
202241236Sbrooksunvis(char *cp, int c, int *astate, int flag)
203241236Sbrooks{
204241236Sbrooks	unsigned char uc = (unsigned char)c;
205241236Sbrooks	unsigned char st, ia, is, lc;
206241236Sbrooks
207241236Sbrooks/*
208241236Sbrooks * Bottom 8 bits of astate hold the state machine state.
209241236Sbrooks * Top 8 bits hold the current character in the http 1866 nv string decoding
210241236Sbrooks */
211241236Sbrooks#define GS(a)		((a) & 0xff)
212241236Sbrooks#define SS(a, b)	(((uint32_t)(a) << 24) | (b))
213241236Sbrooks#define GI(a)		((uint32_t)(a) >> 24)
214241236Sbrooks
215241236Sbrooks	_DIAGASSERT(cp != NULL);
216241236Sbrooks	_DIAGASSERT(astate != NULL);
217241236Sbrooks	st = GS(*astate);
218241236Sbrooks
219241236Sbrooks	if (flag & UNVIS_END) {
220241236Sbrooks		switch (st) {
221241236Sbrooks		case S_OCTAL2:
222241236Sbrooks		case S_OCTAL3:
223241236Sbrooks		case S_HEX2:
224241236Sbrooks			*astate = SS(0, S_GROUND);
225241236Sbrooks			return UNVIS_VALID;
226241236Sbrooks		case S_GROUND:
227241236Sbrooks			return UNVIS_NOCHAR;
228241236Sbrooks		default:
229241236Sbrooks			return UNVIS_SYNBAD;
230241236Sbrooks		}
231241236Sbrooks	}
232241236Sbrooks
233241236Sbrooks	switch (st) {
234241236Sbrooks
235241236Sbrooks	case S_GROUND:
236241236Sbrooks		*cp = 0;
237241236Sbrooks		if ((flag & VIS_NOESCAPE) == 0 && c == '\\') {
238241236Sbrooks			*astate = SS(0, S_START);
239241236Sbrooks			return UNVIS_NOCHAR;
240241236Sbrooks		}
241241236Sbrooks		if ((flag & VIS_HTTP1808) && c == '%') {
242241236Sbrooks			*astate = SS(0, S_HEX1);
243241236Sbrooks			return UNVIS_NOCHAR;
244241236Sbrooks		}
245241236Sbrooks		if ((flag & VIS_HTTP1866) && c == '&') {
246241236Sbrooks			*astate = SS(0, S_AMP);
247241236Sbrooks			return UNVIS_NOCHAR;
248241236Sbrooks		}
249241236Sbrooks		if ((flag & VIS_MIMESTYLE) && c == '=') {
250241236Sbrooks			*astate = SS(0, S_MIME1);
251241236Sbrooks			return UNVIS_NOCHAR;
252241236Sbrooks		}
253241236Sbrooks		*cp = c;
254241236Sbrooks		return UNVIS_VALID;
255241236Sbrooks
256241236Sbrooks	case S_START:
257241236Sbrooks		switch(c) {
258241236Sbrooks		case '\\':
259241236Sbrooks			*cp = c;
260241236Sbrooks			*astate = SS(0, S_GROUND);
261241236Sbrooks			return UNVIS_VALID;
262241236Sbrooks		case '0': case '1': case '2': case '3':
263241236Sbrooks		case '4': case '5': case '6': case '7':
264241236Sbrooks			*cp = (c - '0');
265241236Sbrooks			*astate = SS(0, S_OCTAL2);
266241236Sbrooks			return UNVIS_NOCHAR;
267241236Sbrooks		case 'M':
268241236Sbrooks			*cp = (char)0200;
269241236Sbrooks			*astate = SS(0, S_META);
270241236Sbrooks			return UNVIS_NOCHAR;
271241236Sbrooks		case '^':
272241236Sbrooks			*astate = SS(0, S_CTRL);
273241236Sbrooks			return UNVIS_NOCHAR;
274241236Sbrooks		case 'n':
275241236Sbrooks			*cp = '\n';
276241236Sbrooks			*astate = SS(0, S_GROUND);
277241236Sbrooks			return UNVIS_VALID;
278241236Sbrooks		case 'r':
279241236Sbrooks			*cp = '\r';
280241236Sbrooks			*astate = SS(0, S_GROUND);
281241236Sbrooks			return UNVIS_VALID;
282241236Sbrooks		case 'b':
283241236Sbrooks			*cp = '\b';
284241236Sbrooks			*astate = SS(0, S_GROUND);
285241236Sbrooks			return UNVIS_VALID;
286241236Sbrooks		case 'a':
287241236Sbrooks			*cp = '\007';
288241236Sbrooks			*astate = SS(0, S_GROUND);
289241236Sbrooks			return UNVIS_VALID;
290241236Sbrooks		case 'v':
291241236Sbrooks			*cp = '\v';
292241236Sbrooks			*astate = SS(0, S_GROUND);
293241236Sbrooks			return UNVIS_VALID;
294241236Sbrooks		case 't':
295241236Sbrooks			*cp = '\t';
296241236Sbrooks			*astate = SS(0, S_GROUND);
297241236Sbrooks			return UNVIS_VALID;
298241236Sbrooks		case 'f':
299241236Sbrooks			*cp = '\f';
300241236Sbrooks			*astate = SS(0, S_GROUND);
301241236Sbrooks			return UNVIS_VALID;
302241236Sbrooks		case 's':
303241236Sbrooks			*cp = ' ';
304241236Sbrooks			*astate = SS(0, S_GROUND);
305241236Sbrooks			return UNVIS_VALID;
306241236Sbrooks		case 'E':
307241236Sbrooks			*cp = '\033';
308241236Sbrooks			*astate = SS(0, S_GROUND);
309241236Sbrooks			return UNVIS_VALID;
310241236Sbrooks		case 'x':
311241236Sbrooks			*astate = SS(0, S_HEX);
312241236Sbrooks			return UNVIS_NOCHAR;
313241236Sbrooks		case '\n':
314241236Sbrooks			/*
315241236Sbrooks			 * hidden newline
316241236Sbrooks			 */
317241236Sbrooks			*astate = SS(0, S_GROUND);
318241236Sbrooks			return UNVIS_NOCHAR;
319241236Sbrooks		case '$':
320241236Sbrooks			/*
321241236Sbrooks			 * hidden marker
322241236Sbrooks			 */
323241236Sbrooks			*astate = SS(0, S_GROUND);
324241236Sbrooks			return UNVIS_NOCHAR;
325301679Sbrooks		default:
326301679Sbrooks			if (isgraph(c)) {
327301679Sbrooks				*cp = c;
328301679Sbrooks				*astate = SS(0, S_GROUND);
329301679Sbrooks				return UNVIS_VALID;
330301679Sbrooks			}
331241236Sbrooks		}
332241236Sbrooks		goto bad;
333241236Sbrooks
334241236Sbrooks	case S_META:
335241236Sbrooks		if (c == '-')
336241236Sbrooks			*astate = SS(0, S_META1);
337241236Sbrooks		else if (c == '^')
338241236Sbrooks			*astate = SS(0, S_CTRL);
339241236Sbrooks		else
340241236Sbrooks			goto bad;
341241236Sbrooks		return UNVIS_NOCHAR;
342241236Sbrooks
343241236Sbrooks	case S_META1:
344241236Sbrooks		*astate = SS(0, S_GROUND);
345241236Sbrooks		*cp |= c;
346241236Sbrooks		return UNVIS_VALID;
347241236Sbrooks
348241236Sbrooks	case S_CTRL:
349241236Sbrooks		if (c == '?')
350241236Sbrooks			*cp |= 0177;
351241236Sbrooks		else
352241236Sbrooks			*cp |= c & 037;
353241236Sbrooks		*astate = SS(0, S_GROUND);
354241236Sbrooks		return UNVIS_VALID;
355241236Sbrooks
356241236Sbrooks	case S_OCTAL2:	/* second possible octal digit */
357241236Sbrooks		if (isoctal(uc)) {
358241236Sbrooks			/*
359241236Sbrooks			 * yes - and maybe a third
360241236Sbrooks			 */
361241236Sbrooks			*cp = (*cp << 3) + (c - '0');
362241236Sbrooks			*astate = SS(0, S_OCTAL3);
363241236Sbrooks			return UNVIS_NOCHAR;
364241236Sbrooks		}
365241236Sbrooks		/*
366241236Sbrooks		 * no - done with current sequence, push back passed char
367241236Sbrooks		 */
368241236Sbrooks		*astate = SS(0, S_GROUND);
369241236Sbrooks		return UNVIS_VALIDPUSH;
370241236Sbrooks
371241236Sbrooks	case S_OCTAL3:	/* third possible octal digit */
372241236Sbrooks		*astate = SS(0, S_GROUND);
373241236Sbrooks		if (isoctal(uc)) {
374241236Sbrooks			*cp = (*cp << 3) + (c - '0');
375241236Sbrooks			return UNVIS_VALID;
376241236Sbrooks		}
377241236Sbrooks		/*
378241236Sbrooks		 * we were done, push back passed char
379241236Sbrooks		 */
380241236Sbrooks		return UNVIS_VALIDPUSH;
381241236Sbrooks
382241236Sbrooks	case S_HEX:
383241236Sbrooks		if (!isxdigit(uc))
384241236Sbrooks			goto bad;
385241236Sbrooks		/*FALLTHROUGH*/
386241236Sbrooks	case S_HEX1:
387241236Sbrooks		if (isxdigit(uc)) {
388241236Sbrooks			*cp = xtod(uc);
389241236Sbrooks			*astate = SS(0, S_HEX2);
390241236Sbrooks			return UNVIS_NOCHAR;
391241236Sbrooks		}
392241236Sbrooks		/*
393241236Sbrooks		 * no - done with current sequence, push back passed char
394241236Sbrooks		 */
395241236Sbrooks		*astate = SS(0, S_GROUND);
396241236Sbrooks		return UNVIS_VALIDPUSH;
397241236Sbrooks
398241236Sbrooks	case S_HEX2:
399241236Sbrooks		*astate = S_GROUND;
400241236Sbrooks		if (isxdigit(uc)) {
401241236Sbrooks			*cp = xtod(uc) | (*cp << 4);
402241236Sbrooks			return UNVIS_VALID;
403241236Sbrooks		}
404241236Sbrooks		return UNVIS_VALIDPUSH;
405241236Sbrooks
406241236Sbrooks	case S_MIME1:
407241236Sbrooks		if (uc == '\n' || uc == '\r') {
408241236Sbrooks			*astate = SS(0, S_EATCRNL);
409241236Sbrooks			return UNVIS_NOCHAR;
410241236Sbrooks		}
411241236Sbrooks		if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
412241236Sbrooks			*cp = XTOD(uc);
413241236Sbrooks			*astate = SS(0, S_MIME2);
414241236Sbrooks			return UNVIS_NOCHAR;
415241236Sbrooks		}
416241236Sbrooks		goto bad;
417241236Sbrooks
418241236Sbrooks	case S_MIME2:
419241236Sbrooks		if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
420241236Sbrooks			*astate = SS(0, S_GROUND);
421241236Sbrooks			*cp = XTOD(uc) | (*cp << 4);
422241236Sbrooks			return UNVIS_VALID;
423241236Sbrooks		}
424241236Sbrooks		goto bad;
425241236Sbrooks
426241236Sbrooks	case S_EATCRNL:
427241236Sbrooks		switch (uc) {
428241236Sbrooks		case '\r':
429241236Sbrooks		case '\n':
430241236Sbrooks			return UNVIS_NOCHAR;
431241236Sbrooks		case '=':
432241236Sbrooks			*astate = SS(0, S_MIME1);
433241236Sbrooks			return UNVIS_NOCHAR;
434241236Sbrooks		default:
435241236Sbrooks			*cp = uc;
436241236Sbrooks			*astate = SS(0, S_GROUND);
437241236Sbrooks			return UNVIS_VALID;
438241236Sbrooks		}
439241236Sbrooks
440241236Sbrooks	case S_AMP:
441241236Sbrooks		*cp = 0;
442241236Sbrooks		if (uc == '#') {
443241236Sbrooks			*astate = SS(0, S_NUMBER);
444241236Sbrooks			return UNVIS_NOCHAR;
445241236Sbrooks		}
446241236Sbrooks		*astate = SS(0, S_STRING);
447241236Sbrooks		/*FALLTHROUGH*/
448241236Sbrooks
449241236Sbrooks	case S_STRING:
450241236Sbrooks		ia = *cp;		/* index in the array */
451241236Sbrooks		is = GI(*astate);	/* index in the string */
452241236Sbrooks		lc = is == 0 ? 0 : nv[ia].name[is - 1];	/* last character */
453241236Sbrooks
454241236Sbrooks		if (uc == ';')
455241236Sbrooks			uc = '\0';
456241236Sbrooks
457241236Sbrooks		for (; ia < __arraycount(nv); ia++) {
458241236Sbrooks			if (is != 0 && nv[ia].name[is - 1] != lc)
459241236Sbrooks				goto bad;
460241236Sbrooks			if (nv[ia].name[is] == uc)
461241236Sbrooks				break;
462241236Sbrooks		}
463241236Sbrooks
464241236Sbrooks		if (ia == __arraycount(nv))
465241236Sbrooks			goto bad;
466241236Sbrooks
467241236Sbrooks		if (uc != 0) {
468241236Sbrooks			*cp = ia;
469241236Sbrooks			*astate = SS(is + 1, S_STRING);
470241236Sbrooks			return UNVIS_NOCHAR;
471241236Sbrooks		}
472241236Sbrooks
473241236Sbrooks		*cp = nv[ia].value;
474241236Sbrooks		*astate = SS(0, S_GROUND);
475241236Sbrooks		return UNVIS_VALID;
476241236Sbrooks
477241236Sbrooks	case S_NUMBER:
478241236Sbrooks		if (uc == ';')
479241236Sbrooks			return UNVIS_VALID;
480241236Sbrooks		if (!isdigit(uc))
481241236Sbrooks			goto bad;
482241236Sbrooks		*cp += (*cp * 10) + uc - '0';
483241236Sbrooks		return UNVIS_NOCHAR;
484241236Sbrooks
485241236Sbrooks	default:
486241236Sbrooks	bad:
487241236Sbrooks		/*
488241236Sbrooks		 * decoder in unknown state - (probably uninitialized)
489241236Sbrooks		 */
490241236Sbrooks		*astate = SS(0, S_GROUND);
491241236Sbrooks		return UNVIS_SYNBAD;
492241236Sbrooks	}
493241236Sbrooks}
494241236Sbrooks
495241236Sbrooks/*
496241236Sbrooks * strnunvisx - decode src into dst
497241236Sbrooks *
498241236Sbrooks *	Number of chars decoded into dst is returned, -1 on error.
499241236Sbrooks *	Dst is null terminated.
500241236Sbrooks */
501241236Sbrooks
502241236Sbrooksint
503241236Sbrooksstrnunvisx(char *dst, size_t dlen, const char *src, int flag)
504241236Sbrooks{
505241236Sbrooks	char c;
506241236Sbrooks	char t = '\0', *start = dst;
507241236Sbrooks	int state = 0;
508241236Sbrooks
509241236Sbrooks	_DIAGASSERT(src != NULL);
510241236Sbrooks	_DIAGASSERT(dst != NULL);
511241236Sbrooks#define CHECKSPACE() \
512241236Sbrooks	do { \
513241236Sbrooks		if (dlen-- == 0) { \
514241236Sbrooks			errno = ENOSPC; \
515241236Sbrooks			return -1; \
516241236Sbrooks		} \
517241236Sbrooks	} while (/*CONSTCOND*/0)
518241236Sbrooks
519241236Sbrooks	while ((c = *src++) != '\0') {
520241236Sbrooks again:
521241236Sbrooks		switch (unvis(&t, c, &state, flag)) {
522241236Sbrooks		case UNVIS_VALID:
523241236Sbrooks			CHECKSPACE();
524241236Sbrooks			*dst++ = t;
525241236Sbrooks			break;
526241236Sbrooks		case UNVIS_VALIDPUSH:
527241236Sbrooks			CHECKSPACE();
528241236Sbrooks			*dst++ = t;
529241236Sbrooks			goto again;
530241236Sbrooks		case 0:
531241236Sbrooks		case UNVIS_NOCHAR:
532241236Sbrooks			break;
533241236Sbrooks		case UNVIS_SYNBAD:
534241236Sbrooks			errno = EINVAL;
535241236Sbrooks			return -1;
536241236Sbrooks		default:
537241236Sbrooks			_DIAGASSERT(/*CONSTCOND*/0);
538241236Sbrooks			errno = EINVAL;
539241236Sbrooks			return -1;
540241236Sbrooks		}
541241236Sbrooks	}
542241236Sbrooks	if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) {
543241236Sbrooks		CHECKSPACE();
544241236Sbrooks		*dst++ = t;
545241236Sbrooks	}
546241236Sbrooks	CHECKSPACE();
547241236Sbrooks	*dst = '\0';
548241236Sbrooks	return (int)(dst - start);
549241236Sbrooks}
550241236Sbrooks
551241236Sbrooksint
552241236Sbrooksstrunvisx(char *dst, const char *src, int flag)
553241236Sbrooks{
554241236Sbrooks	return strnunvisx(dst, (size_t)~0, src, flag);
555241236Sbrooks}
556241236Sbrooks
557241236Sbrooksint
558241236Sbrooksstrunvis(char *dst, const char *src)
559241236Sbrooks{
560241236Sbrooks	return strnunvisx(dst, (size_t)~0, src, 0);
561241236Sbrooks}
562241236Sbrooks
563241236Sbrooksint
564241236Sbrooksstrnunvis(char *dst, size_t dlen, const char *src)
565241236Sbrooks{
566241236Sbrooks	return strnunvisx(dst, dlen, src, 0);
567241236Sbrooks}
568241236Sbrooks#endif
569