unvis.c revision 233294
146283Sdfr/*	$NetBSD: unvis.c,v 1.19 2000/01/22 22:19:13 mycroft Exp $	*/
246283Sdfr
346283Sdfr/*-
446283Sdfr * Copyright (c) 1989, 1993
546283Sdfr *	The Regents of the University of California.  All rights reserved.
646283Sdfr *
746283Sdfr * Redistribution and use in source and binary forms, with or without
846283Sdfr * modification, are permitted provided that the following conditions
946283Sdfr * are met:
1046283Sdfr * 1. Redistributions of source code must retain the above copyright
1146283Sdfr *    notice, this list of conditions and the following disclaimer.
1246283Sdfr * 2. Redistributions in binary form must reproduce the above copyright
1346283Sdfr *    notice, this list of conditions and the following disclaimer in the
1446283Sdfr *    documentation and/or other materials provided with the distribution.
1546283Sdfr * 3. Neither the name of the University nor the names of its contributors
1646283Sdfr *    may be used to endorse or promote products derived from this software
1746283Sdfr *    without specific prior written permission.
1846283Sdfr *
1946283Sdfr * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2046283Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2146283Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2246283Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2346283Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2446283Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2546283Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2646283Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2746283Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2846283Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2946283Sdfr * SUCH DAMAGE.
3046283Sdfr */
3146283Sdfr
3246283Sdfr#if 1
3346283Sdfr#include <config.h>
3446283Sdfr#include "roken.h"
3546283Sdfr#ifndef _DIAGASSERT
3646283Sdfr#define _DIAGASSERT(X)
3746283Sdfr#endif
3846283Sdfr#else
3946283Sdfr#include <sys/cdefs.h>
4046283Sdfr#if defined(LIBC_SCCS) && !defined(lint)
4146283Sdfr#if 0
4246283Sdfrstatic char sccsid[] = "@(#)unvis.c	8.1 (Berkeley) 6/4/93";
4346283Sdfr#else
4446283Sdfr__RCSID("$NetBSD: unvis.c,v 1.19 2000/01/22 22:19:13 mycroft Exp $");
4546283Sdfr#endif
4646283Sdfr#endif /* LIBC_SCCS and not lint */
4746283Sdfr
4846283Sdfr#define __LIBC12_SOURCE__
4946283Sdfr
5046283Sdfr#include "namespace.h"
5146283Sdfr#endif
5246283Sdfr#include <sys/types.h>
5346283Sdfr
5446283Sdfr#include <assert.h>
5546283Sdfr#include <ctype.h>
5646283Sdfr#include <stdio.h>
5746283Sdfr#include <vis.h>
5846283Sdfr
5946283Sdfr#if 0
6046283Sdfr#ifdef __weak_alias
6146283Sdfr__weak_alias(strunvis,_strunvis)
6246283Sdfr__weak_alias(unvis,_unvis)
6346283Sdfr#endif
6446283Sdfr
6546283Sdfr__warn_references(unvis,
6646283Sdfr    "warning: reference to compatibility unvis(); include <vis.h> for correct reference")
6746283Sdfr#endif
6846283Sdfr
6946283Sdfr/*
7046283Sdfr * decode driven by state machine
7146283Sdfr */
7246283Sdfr#define	S_GROUND	0	/* haven't seen escape char */
7346283Sdfr#define	S_START		1	/* start decoding special sequence */
7446283Sdfr#define	S_META		2	/* metachar started (M) */
7546283Sdfr#define	S_META1		3	/* metachar more, regular char (-) */
7646283Sdfr#define	S_CTRL		4	/* control char started (^) */
7746283Sdfr#define	S_OCTAL2	5	/* octal digit 2 */
7846283Sdfr#define	S_OCTAL3	6	/* octal digit 3 */
7946283Sdfr
8046283Sdfr#define	isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
8146283Sdfr
8246283SdfrROKEN_LIB_FUNCTION int ROKEN_LIB_CALL
8346283Sdfr	rk_strunvis (char *, const char *);
8446283SdfrROKEN_LIB_FUNCTION int ROKEN_LIB_CALL
8546283Sdfr	rk_unvis (char *, int, int *, int);
8646283Sdfr
8746283Sdfr/*
8846283Sdfr * unvis - decode characters previously encoded by vis
8946283Sdfr */
9046283Sdfr
9146283SdfrROKEN_LIB_FUNCTION int ROKEN_LIB_CALL
9246283Sdfrrk_unvis(char *cp, int c, int *astate, int flag)
9346283Sdfr{
9446283Sdfr
9546283Sdfr	_DIAGASSERT(cp != NULL);
9646283Sdfr	_DIAGASSERT(astate != NULL);
9746283Sdfr
9846283Sdfr	if (flag & UNVIS_END) {
9946283Sdfr		if (*astate == S_OCTAL2 || *astate == S_OCTAL3) {
10046283Sdfr			*astate = S_GROUND;
10146283Sdfr			return (UNVIS_VALID);
10246283Sdfr		}
10346283Sdfr		return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD);
10446283Sdfr	}
10546283Sdfr
10646283Sdfr	switch (*astate) {
10746283Sdfr
10846283Sdfr	case S_GROUND:
10946283Sdfr		*cp = 0;
11046283Sdfr		if (c == '\\') {
11146283Sdfr			*astate = S_START;
11246283Sdfr			return (0);
11346283Sdfr		}
11446283Sdfr		*cp = c;
11546283Sdfr		return (UNVIS_VALID);
11646283Sdfr
11746283Sdfr	case S_START:
11846283Sdfr		switch(c) {
11946283Sdfr		case '\\':
12046283Sdfr			*cp = c;
12146283Sdfr			*astate = S_GROUND;
12246283Sdfr			return (UNVIS_VALID);
12346283Sdfr		case '0': case '1': case '2': case '3':
12446283Sdfr		case '4': case '5': case '6': case '7':
12546283Sdfr			*cp = (c - '0');
12646283Sdfr			*astate = S_OCTAL2;
12746283Sdfr			return (0);
12846283Sdfr		case 'M':
12946283Sdfr			*cp = (u_char)0200;
13046283Sdfr			*astate = S_META;
13146283Sdfr			return (0);
13246283Sdfr		case '^':
13346283Sdfr			*astate = S_CTRL;
13446283Sdfr			return (0);
13546283Sdfr		case 'n':
13646283Sdfr			*cp = '\n';
13746283Sdfr			*astate = S_GROUND;
13846283Sdfr			return (UNVIS_VALID);
13946283Sdfr		case 'r':
14046283Sdfr			*cp = '\r';
14146283Sdfr			*astate = S_GROUND;
14246283Sdfr			return (UNVIS_VALID);
14346283Sdfr		case 'b':
14446283Sdfr			*cp = '\b';
14546283Sdfr			*astate = S_GROUND;
14646283Sdfr			return (UNVIS_VALID);
14746283Sdfr		case 'a':
14846283Sdfr			*cp = '\007';
14946283Sdfr			*astate = S_GROUND;
15046283Sdfr			return (UNVIS_VALID);
15146283Sdfr		case 'v':
15246283Sdfr			*cp = '\v';
15346283Sdfr			*astate = S_GROUND;
15446283Sdfr			return (UNVIS_VALID);
15546283Sdfr		case 't':
15646283Sdfr			*cp = '\t';
15746283Sdfr			*astate = S_GROUND;
15846283Sdfr			return (UNVIS_VALID);
15946283Sdfr		case 'f':
16046283Sdfr			*cp = '\f';
16146283Sdfr			*astate = S_GROUND;
16246283Sdfr			return (UNVIS_VALID);
16346283Sdfr		case 's':
16446283Sdfr			*cp = ' ';
16546283Sdfr			*astate = S_GROUND;
16646283Sdfr			return (UNVIS_VALID);
16746283Sdfr		case 'E':
16846283Sdfr			*cp = '\033';
16946283Sdfr			*astate = S_GROUND;
17046283Sdfr			return (UNVIS_VALID);
17146283Sdfr		case '\n':
17246283Sdfr			/*
17346283Sdfr			 * hidden newline
17446283Sdfr			 */
17546283Sdfr			*astate = S_GROUND;
17646283Sdfr			return (UNVIS_NOCHAR);
17746283Sdfr		case '$':
17846283Sdfr			/*
17946283Sdfr			 * hidden marker
18046283Sdfr			 */
18146283Sdfr			*astate = S_GROUND;
18246283Sdfr			return (UNVIS_NOCHAR);
18346283Sdfr		}
18446283Sdfr		*astate = S_GROUND;
18546283Sdfr		return (UNVIS_SYNBAD);
18646283Sdfr
18746283Sdfr	case S_META:
18846283Sdfr		if (c == '-')
18946283Sdfr			*astate = S_META1;
19046283Sdfr		else if (c == '^')
19146283Sdfr			*astate = S_CTRL;
19246283Sdfr		else {
19346283Sdfr			*astate = S_GROUND;
19446283Sdfr			return (UNVIS_SYNBAD);
19546283Sdfr		}
19646283Sdfr		return (0);
19746283Sdfr
19846283Sdfr	case S_META1:
19946283Sdfr		*astate = S_GROUND;
20046283Sdfr		*cp |= c;
20146283Sdfr		return (UNVIS_VALID);
20246283Sdfr
20346283Sdfr	case S_CTRL:
20446283Sdfr		if (c == '?')
20546283Sdfr			*cp |= 0177;
20646283Sdfr		else
20746283Sdfr			*cp |= c & 037;
20846283Sdfr		*astate = S_GROUND;
20946283Sdfr		return (UNVIS_VALID);
21046283Sdfr
21146283Sdfr	case S_OCTAL2:	/* second possible octal digit */
21246283Sdfr		if (isoctal(c)) {
21346283Sdfr			/*
21446283Sdfr			 * yes - and maybe a third
21546283Sdfr			 */
21646283Sdfr			*cp = (*cp << 3) + (c - '0');
21746283Sdfr			*astate = S_OCTAL3;
21846283Sdfr			return (0);
21946283Sdfr		}
22046283Sdfr		/*
22146283Sdfr		 * no - done with current sequence, push back passed char
22246283Sdfr		 */
22346283Sdfr		*astate = S_GROUND;
22446283Sdfr		return (UNVIS_VALIDPUSH);
22546283Sdfr
22646283Sdfr	case S_OCTAL3:	/* third possible octal digit */
22746283Sdfr		*astate = S_GROUND;
22846283Sdfr		if (isoctal(c)) {
22946283Sdfr			*cp = (*cp << 3) + (c - '0');
23046283Sdfr			return (UNVIS_VALID);
23146283Sdfr		}
23246283Sdfr		/*
23346283Sdfr		 * we were done, push back passed char
23446283Sdfr		 */
23546283Sdfr		return (UNVIS_VALIDPUSH);
23646283Sdfr
23746283Sdfr	default:
23846283Sdfr		/*
23946283Sdfr		 * decoder in unknown state - (probably uninitialized)
24046283Sdfr		 */
24146283Sdfr		*astate = S_GROUND;
24246283Sdfr		return (UNVIS_SYNBAD);
24346283Sdfr	}
24446283Sdfr}
24546283Sdfr
24646283Sdfr/*
24746283Sdfr * strunvis - decode src into dst
24846283Sdfr *
24946283Sdfr *	Number of chars decoded into dst is returned, -1 on error.
25046283Sdfr *	Dst is null terminated.
25146283Sdfr */
25246283Sdfr
25346283SdfrROKEN_LIB_FUNCTION int ROKEN_LIB_CALL
25446283Sdfrrk_strunvis(char *dst, const char *src)
25546283Sdfr{
25646283Sdfr	char c;
25746283Sdfr	char *start = dst;
25846283Sdfr	int state = 0;
25946283Sdfr
26046283Sdfr	_DIAGASSERT(src != NULL);
26146283Sdfr	_DIAGASSERT(dst != NULL);
26246283Sdfr
26346283Sdfr	while ((c = *src++) != '\0') {
26446283Sdfr	again:
26546283Sdfr		switch (rk_unvis(dst, (unsigned char)c, &state, 0)) {
26646283Sdfr		case UNVIS_VALID:
26746283Sdfr			dst++;
26846283Sdfr			break;
26946283Sdfr		case UNVIS_VALIDPUSH:
27046283Sdfr			dst++;
27146283Sdfr			goto again;
27246283Sdfr		case 0:
27346283Sdfr		case UNVIS_NOCHAR:
27446283Sdfr			break;
27546283Sdfr		default:
27646283Sdfr			return (-1);
27746283Sdfr		}
27846283Sdfr	}
27946283Sdfr	if (unvis(dst, (unsigned char)c, &state, UNVIS_END) == UNVIS_VALID)
28046283Sdfr		dst++;
28146283Sdfr	*dst = '\0';
28246283Sdfr	return (dst - start);
28346283Sdfr}
28446283Sdfr