dt_string.c revision 256281
173149Snyan/*
279697Snon * CDDL HEADER START
373149Snyan *
473149Snyan * The contents of this file are subject to the terms of the
573149Snyan * Common Development and Distribution License (the "License").
673149Snyan * You may not use this file except in compliance with the License.
773149Snyan *
873149Snyan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
973149Snyan * or http://www.opensolaris.org/os/licensing.
1073149Snyan * See the License for the specific language governing permissions
1173149Snyan * and limitations under the License.
1273149Snyan *
1373149Snyan * When distributing Covered Code, include this CDDL HEADER in each
1473149Snyan * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1573149Snyan * If applicable, add the following below this CDDL HEADER, with the
1673149Snyan * fields enclosed by brackets "[]" replaced with your own identifying
1773149Snyan * information: Portions Copyright [yyyy] [name of copyright owner]
1873149Snyan *
1973149Snyan * CDDL HEADER END
2073149Snyan */
2173149Snyan
2273149Snyan/*
2373149Snyan * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
2473149Snyan */
2573149Snyan
2673149Snyan#include <strings.h>
2773149Snyan#include <stdlib.h>
2873149Snyan#include <errno.h>
2973149Snyan#include <ctype.h>
3073149Snyan
3173149Snyan#include <dt_string.h>
3273149Snyan
3373149Snyan/*
3473149Snyan * Transform string s inline, converting each embedded C escape sequence string
3573149Snyan * to the corresponding character.  For example, the substring "\n" is replaced
3673149Snyan * by an inline '\n' character.  The length of the resulting string is returned.
3773149Snyan */
3873149Snyansize_t
3973149Snyanstresc2chr(char *s)
4073149Snyan{
4173149Snyan	char *p, *q, c;
4273149Snyan	int esc = 0;
4373149Snyan	int x;
4473149Snyan
4573149Snyan	for (p = q = s; (c = *p) != '\0'; p++) {
4673149Snyan		if (esc) {
4773149Snyan			switch (c) {
4873149Snyan			case '0':
4979697Snon			case '1':
5073149Snyan			case '2':
5179697Snon			case '3':
5279697Snon			case '4':
5379697Snon			case '5':
5473149Snyan			case '6':
5573149Snyan			case '7':
5673149Snyan				c -= '0';
5773149Snyan				p++;
5873149Snyan
5973149Snyan				if (*p >= '0' && *p <= '7') {
6073149Snyan					c = c * 8 + *p++ - '0';
6173149Snyan
6273149Snyan					if (*p >= '0' && *p <= '7')
6373149Snyan						c = c * 8 + *p - '0';
6473149Snyan					else
6579697Snon						p--;
6679697Snon				} else
6773149Snyan					p--;
6873149Snyan
6973149Snyan				*q++ = c;
7073149Snyan				break;
7173149Snyan
7273149Snyan			case 'a':
7373149Snyan				*q++ = '\a';
7479697Snon				break;
7579697Snon			case 'b':
7679697Snon				*q++ = '\b';
7779697Snon				break;
7879697Snon			case 'f':
7979697Snon				*q++ = '\f';
8079697Snon				break;
8173149Snyan			case 'n':
8273149Snyan				*q++ = '\n';
8379697Snon				break;
8479697Snon			case 'r':
8573149Snyan				*q++ = '\r';
8673149Snyan				break;
8779697Snon			case 't':
8873149Snyan				*q++ = '\t';
8979697Snon				break;
9079697Snon			case 'v':
9173149Snyan				*q++ = '\v';
9279697Snon				break;
9373149Snyan
9479697Snon			case 'x':
9573149Snyan				for (x = 0; (c = *++p) != '\0'; ) {
9673149Snyan					if (c >= '0' && c <= '9')
97						x = x * 16 + c - '0';
98					else if (c >= 'a' && c <= 'f')
99						x = x * 16 + c - 'a' + 10;
100					else if (c >= 'A' && c <= 'F')
101						x = x * 16 + c - 'A' + 10;
102					else
103						break;
104				}
105				*q++ = (char)x;
106				p--;
107				break;
108
109			case '"':
110			case '\\':
111				*q++ = c;
112				break;
113			default:
114				*q++ = '\\';
115				*q++ = c;
116			}
117
118			esc = 0;
119
120		} else {
121			if ((esc = c == '\\') == 0)
122				*q++ = c;
123		}
124	}
125
126	*q = '\0';
127	return ((size_t)(q - s));
128}
129
130/*
131 * Create a copy of string s in which certain unprintable or special characters
132 * have been converted to the string representation of their C escape sequence.
133 * For example, the newline character is expanded to the string "\n".
134 */
135char *
136strchr2esc(const char *s, size_t n)
137{
138	const char *p;
139	char *q, *s2, c;
140	size_t addl = 0;
141
142	for (p = s; p < s + n; p++) {
143		switch (c = *p) {
144		case '\0':
145		case '\a':
146		case '\b':
147		case '\f':
148		case '\n':
149		case '\r':
150		case '\t':
151		case '\v':
152		case '"':
153		case '\\':
154			addl++;		/* 1 add'l char needed to follow \ */
155			break;
156		case ' ':
157			break;
158		default:
159			if (c < '!' || c > '~')
160				addl += 3; /* 3 add'l chars following \ */
161		}
162	}
163
164	if ((s2 = malloc(n + addl + 1)) == NULL)
165		return (NULL);
166
167	for (p = s, q = s2; p < s + n; p++) {
168		switch (c = *p) {
169		case '\0':
170			*q++ = '\\';
171			*q++ = '0';
172			break;
173		case '\a':
174			*q++ = '\\';
175			*q++ = 'a';
176			break;
177		case '\b':
178			*q++ = '\\';
179			*q++ = 'b';
180			break;
181		case '\f':
182			*q++ = '\\';
183			*q++ = 'f';
184			break;
185		case '\n':
186			*q++ = '\\';
187			*q++ = 'n';
188			break;
189		case '\r':
190			*q++ = '\\';
191			*q++ = 'r';
192			break;
193		case '\t':
194			*q++ = '\\';
195			*q++ = 't';
196			break;
197		case '\v':
198			*q++ = '\\';
199			*q++ = 'v';
200			break;
201		case '"':
202			*q++ = '\\';
203			*q++ = '"';
204			break;
205		case '\\':
206			*q++ = '\\';
207			*q++ = '\\';
208			break;
209		case ' ':
210			*q++ = c;
211			break;
212		default:
213			if (c < '!' || c > '~') {
214				*q++ = '\\';
215				*q++ = ((c >> 6) & 3) + '0';
216				*q++ = ((c >> 3) & 7) + '0';
217				*q++ = (c & 7) + '0';
218			} else
219				*q++ = c;
220		}
221
222		if (c == '\0')
223			break; /* don't continue past \0 even if p < s + n */
224	}
225
226	*q = '\0';
227	return (s2);
228}
229
230/*
231 * Return the basename (name after final /) of the given string.  We use
232 * strbasename rather than basename to avoid conflicting with libgen.h's
233 * non-const function prototype.
234 */
235const char *
236strbasename(const char *s)
237{
238	const char *p = strrchr(s, '/');
239
240	if (p == NULL)
241		return (s);
242
243	return (++p);
244}
245
246/*
247 * This function tests a string against the regular expression used for idents
248 * and integers in the D lexer, and should match the superset of RGX_IDENT and
249 * RGX_INT in dt_lex.l.  If an invalid character is found, the function returns
250 * a pointer to it.  Otherwise NULL is returned for a valid string.
251 */
252const char *
253strbadidnum(const char *s)
254{
255	char *p;
256	int c;
257
258	if (*s == '\0')
259		return (s);
260
261	errno = 0;
262	(void) strtoull(s, &p, 0);
263
264	if (errno == 0 && *p == '\0')
265		return (NULL); /* matches RGX_INT */
266
267	while ((c = *s++) != '\0') {
268		if (isalnum(c) == 0 && c != '_' && c != '`')
269			return (s - 1);
270	}
271
272	return (NULL); /* matches RGX_IDENT */
273}
274
275/*
276 * Determine whether the string contains a glob matching pattern or is just a
277 * simple string.  See gmatch(3GEN) and sh(1) for the glob syntax definition.
278 */
279int
280strisglob(const char *s)
281{
282	char c;
283
284	while ((c = *s++) != '\0') {
285		if (c == '[' || c == '?' || c == '*' || c == '\\')
286			return (1);
287	}
288
289	return (0);
290}
291
292/*
293 * Hyphenate a string in-place by converting any instances of "__" to "-",
294 * which we use for probe names to improve readability, and return the string.
295 */
296char *
297strhyphenate(char *s)
298{
299	char *p, *q;
300
301	for (p = s, q = p + strlen(p); p < q; p++) {
302		if (p[0] == '_' && p[1] == '_') {
303			p[0] = '-';
304			bcopy(p + 2, p + 1, (size_t)(q - p) - 1);
305		}
306	}
307
308	return (s);
309}
310