c_literal.c revision 11734:d29dc9c2b6c5
1189251Ssam/*
2189251Ssam * CDDL HEADER START
3189251Ssam *
4189251Ssam * The contents of this file are subject to the terms of the
5189251Ssam * Common Development and Distribution License (the "License").
6189251Ssam * You may not use this file except in compliance with the License.
7189251Ssam *
8189251Ssam * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9189251Ssam * or http://www.opensolaris.org/os/licensing.
10189251Ssam * See the License for the specific language governing permissions
11189251Ssam * and limitations under the License.
12189251Ssam *
13189251Ssam * When distributing Covered Code, include this CDDL HEADER in each
14189251Ssam * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15189251Ssam * If applicable, add the following below this CDDL HEADER, with the
16189251Ssam * fields enclosed by brackets "[]" replaced with your own identifying
17189251Ssam * information: Portions Copyright [yyyy] [name of copyright owner]
18189251Ssam *
19189251Ssam * CDDL HEADER END
20189251Ssam */
21189251Ssam
22189251Ssam/*
23189251Ssam * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24189251Ssam * Use is subject to license terms.
25189251Ssam */
26189251Ssam
27189251Ssam
28189251Ssam/*
29189251Ssam * Translate a string into C literal string constant notation.
30189251Ssam */
31189251Ssam
32189251Ssam#include	<stdio.h>
33189251Ssam#include	<ctype.h>
34189251Ssam#include	<_conv.h>
35189251Ssam#include	<c_literal_msg.h>
36189251Ssam
37189251Ssam
38189251Ssam/*
39189251Ssam * Convert characters to the form used by the C language to represent
40189251Ssam * literal strings:
41189251Ssam *	- Printable characters are shown as themselves
42189251Ssam *	- Convert special characters to their 2-character escaped forms:
43189251Ssam *		alert (bell)	\a
44189251Ssam *		backspace	\b
45189251Ssam *		formfeed	\f
46189251Ssam *		newline		\n
47189251Ssam *		return		\r
48189251Ssam *		horizontal tab	\t
49189251Ssam *		vertical tab	\v
50189251Ssam *		backspace	\\
51189251Ssam *		single quote	\'
52189251Ssam *		double quote	\"
53189251Ssam *	- Display other non-printable characters as 4-character escaped
54189251Ssam *		octal constants.
55189251Ssam *
56189251Ssam * entry:
57189251Ssam *	buf - Buffer of characters to be processed
58189251Ssam *	n # of characters in buf to be processed
59189251Ssam *	outfunc - Function to be called to move output characters.
60189251Ssam *	uvalue - User value. This argument is passed to outfunc without
61189251Ssam *		examination. The caller can use it to pass additional
62189251Ssam *		information required by the callback.
63189251Ssam *
64189251Ssam * exit:
65189251Ssam *	The string has been processed, with the resulting data passed
66189251Ssam *	to outfunc for processing.
67189251Ssam */
68189251Ssamvoid
69189251Ssamconv_str_to_c_literal(const char *buf, size_t n,
70189251Ssam    Conv_str_to_c_literal_func_t *outfunc, void *uvalue)
71189251Ssam{
72189251Ssam	char	bs_buf[2];	/* For two-character backslash codes */
73189251Ssam	char	octal_buf[10];	/* For \000 style octal constants */
74189251Ssam
75189251Ssam	bs_buf[0] = '\\';
76189251Ssam	while (n > 0) {
77189251Ssam		switch (*buf) {
78189251Ssam		case '\0':
79189251Ssam			bs_buf[1] = '0';
80189251Ssam			break;
81189251Ssam		case '\a':
82189251Ssam			bs_buf[1] = 'a';
83189251Ssam			break;
84189251Ssam		case '\b':
85189251Ssam			bs_buf[1] = 'b';
86189251Ssam			break;
87189251Ssam		case '\f':
88189251Ssam			bs_buf[1] = 'f';
89189251Ssam			break;
90189251Ssam		case '\n':
91189251Ssam			bs_buf[1] = 'n';
92			break;
93		case '\r':
94			bs_buf[1] = 'r';
95			break;
96		case '\t':
97			bs_buf[1] = 't';
98			break;
99		case '\v':
100			bs_buf[1] = 'v';
101			break;
102		case '\\':
103			bs_buf[1] = '\\';
104			break;
105		case '\'':
106			bs_buf[1] = '\'';
107			break;
108		case '"':
109			bs_buf[1] = '"';
110			break;
111		default:
112			bs_buf[1] = '\0';
113		}
114
115		if (bs_buf[1] != '\0') {
116			(*outfunc)(bs_buf, 2, uvalue);
117			buf++;
118			n--;
119		} else if (isprint(*buf)) {
120			/*
121			 * Output the entire sequence of printable
122			 * characters in a single shot.
123			 */
124			const char	*start = buf;
125			size_t		outlen = 0;
126
127			for (start = buf; (n > 0) && isprint(*buf); buf++, n--)
128				outlen++;
129			(*outfunc)(start, outlen, uvalue);
130		} else {
131			/* Generic unprintable character: Use octal notation */
132			(void) snprintf(octal_buf, sizeof (octal_buf),
133			    MSG_ORIG(MSG_FMT_OCTCONST), (uchar_t)*buf);
134			(*outfunc)(octal_buf, strlen(octal_buf), uvalue);
135			buf++;
136			n--;
137		}
138	}
139}
140
141/*
142 * Given the pointer to the character following a '\' character in
143 * a C style literal, return the ASCII character code it represents,
144 * and advance the string pointer to the character following the last
145 * character in the escape sequence.
146 *
147 * entry:
148 *	str - Address of string pointer to first character following
149 *		the backslash.
150 *
151 * exit:
152 *	If the character is not valid, -1 is returned. Otherwise
153 *	it returns the ASCII code for the translated character, and
154 *	*str has been advanced.
155 */
156int
157conv_translate_c_esc(char **str)
158{
159	char	*s = *str;
160	int	ch, i;
161
162	ch = *s++;
163	switch (ch) {
164	case 'a':
165		ch = '\a';
166		break;
167	case 'b':
168		ch = '\b';
169		break;
170	case 'f':
171		ch = '\f';
172		break;
173	case 'n':
174		ch = '\n';
175		break;
176	case 'r':
177		ch = '\r';
178		break;
179	case 't':
180		ch = '\t';
181		break;
182	case 'v':
183		ch = '\v';
184		break;
185
186	case '0':
187	case '1':
188	case '2':
189	case '3':
190	case '4':
191	case '5':
192	case '6':
193	case '7':
194		/* Octal constant: There can be up to 3 digits */
195		ch -= '0';
196		for (i = 0; i < 2; i++) {
197			if ((*s < '0') || (*s > '7'))
198				break;
199			ch = (ch << 3) + (*s++ - '0');
200		}
201		break;
202
203	/*
204	 * There are some cases where ch already has the desired value.
205	 * These cases exist simply to remove the special meaning that
206	 * character would otherwise have. We need to match them to
207	 * prevent them from falling into the default error case.
208	 */
209	case '\\':
210	case '\'':
211	case '"':
212		break;
213
214	default:
215		ch = -1;
216		break;
217	}
218
219	*str = s;
220	return (ch);
221}
222