xstr.c revision 106296
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35
36__FBSDID("$FreeBSD: head/usr.bin/xstr/xstr.c 106296 2002-11-01 12:48:28Z tjr $");
37
38#ifndef lint
39static const char copyright[] =
40"@(#) Copyright (c) 1980, 1993\n\
41	The Regents of the University of California.  All rights reserved.\n";
42#endif
43
44#ifndef lint
45static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
46#endif
47
48#include <sys/types.h>
49
50#include <ctype.h>
51#include <err.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <signal.h>
55#include <string.h>
56#include <unistd.h>
57
58#include "pathnames.h"
59
60/*
61 * xstr - extract and hash strings in a C program
62 *
63 * Bill Joy UCB
64 * November, 1978
65 */
66
67#define	ignore(a)	((void) a)
68
69off_t	tellpt;
70
71off_t	mesgpt;
72char	cstrings[] =	"strings";
73char	*strings =	cstrings;
74
75int	cflg;
76int	vflg;
77int	readstd;
78
79char lastchr(char *);
80
81int fgetNUL(char *, int, FILE *);
82int istail(char *, char *);
83int octdigit(char);
84int xgetc(FILE *);
85
86off_t hashit(char *, int);
87off_t yankstr(char **);
88
89static void usage(void);
90
91void flushsh(void);
92void found(int, off_t, char *);
93void inithash(void);
94void onintr(int);
95void process(const char *);
96void prstr(char *);
97void xsdotc(void);
98
99int
100main(int argc, char *argv[])
101{
102	int c;
103
104	while ((c = getopt(argc, argv, "-cv")) != -1)
105		switch (c) {
106		case '-':
107			readstd++;
108			break;
109		case 'c':
110			cflg++;
111			break;
112		case 'v':
113			vflg++;
114			break;
115		default:
116			usage();
117		}
118	argc -= optind;
119	argv += optind;
120
121	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
122		signal(SIGINT, onintr);
123	if (cflg || (argc == 0 && !readstd))
124		inithash();
125	else
126		strings = mktemp(strdup(_PATH_TMP));
127	while (readstd || argc > 0) {
128		if (freopen("x.c", "w", stdout) == NULL)
129			err(1, "x.c");
130		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
131			err(2, "%s", argv[0]);
132		process("x.c");
133		if (readstd == 0)
134			argc--, argv++;
135		else
136			readstd = 0;
137	};
138	flushsh();
139	if (cflg == 0)
140		xsdotc();
141	if (strings[0] == '/')
142		ignore(unlink(strings));
143	exit(0);
144}
145
146static void
147usage(void)
148{
149	fprintf(stderr, "usage: xstr [-v] [-c] [-] [name ...]\n");
150	exit (1);
151}
152
153char linebuf[BUFSIZ];
154
155void
156process(const char *name)
157{
158	char *cp;
159	int c;
160	int incomm = 0;
161	int ret;
162
163	printf("extern char\txstr[];\n");
164	for (;;) {
165		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
166			if (ferror(stdin))
167				err(3, "%s", name);
168			break;
169		}
170		if (linebuf[0] == '#') {
171			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
172				printf("#line%s", &linebuf[1]);
173			else
174				printf("%s", linebuf);
175			continue;
176		}
177		for (cp = linebuf; (c = *cp++);) switch (c) {
178
179		case '"':
180			if (incomm)
181				goto def;
182			if ((ret = (int) yankstr(&cp)) == -1)
183				goto out;
184			printf("(&xstr[%d])", ret);
185			break;
186
187		case '\'':
188			if (incomm)
189				goto def;
190			putchar(c);
191			if (*cp)
192				putchar(*cp++);
193			break;
194
195		case '/':
196			if (incomm || *cp != '*')
197				goto def;
198			incomm = 1;
199			cp++;
200			printf("/*");
201			continue;
202
203		case '*':
204			if (incomm && *cp == '/') {
205				incomm = 0;
206				cp++;
207				printf("*/");
208				continue;
209			}
210			goto def;
211
212def:
213		default:
214			putchar(c);
215			break;
216		}
217	}
218out:
219	if (ferror(stdout))
220		warn("x.c"), onintr(0);
221}
222
223off_t
224yankstr(char **cpp)
225{
226	char *cp = *cpp;
227	int c, ch;
228	char dbuf[BUFSIZ];
229	char *dp = dbuf;
230	char *tp;
231	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
232
233	while ((c = *cp++)) {
234		if (dp == dbuf + sizeof(dbuf) - 3)
235			errx(1, "message too long");
236		switch (c) {
237
238		case '"':
239			cp++;
240			goto out;
241
242		case '\\':
243			c = *cp++;
244			if (c == 0)
245				break;
246			if (c == '\n') {
247				if (fgets(linebuf, sizeof linebuf, stdin)
248				    == NULL) {
249					if (ferror(stdin))
250						err(3, "x.c");
251					return(-1);
252				}
253				cp = linebuf;
254				continue;
255			}
256			for (tp = tmp; (ch = *tp++); tp++)
257				if (c == ch) {
258					c = *tp;
259					goto gotc;
260				}
261			if (!octdigit(c)) {
262				*dp++ = '\\';
263				break;
264			}
265			c -= '0';
266			if (!octdigit(*cp))
267				break;
268			c <<= 3, c += *cp++ - '0';
269			if (!octdigit(*cp))
270				break;
271			c <<= 3, c += *cp++ - '0';
272			break;
273		}
274gotc:
275		*dp++ = c;
276	}
277out:
278	*cpp = --cp;
279	*dp = 0;
280	return (hashit(dbuf, 1));
281}
282
283int
284octdigit(char c)
285{
286	return (isdigit(c) && c != '8' && c != '9');
287}
288
289void
290inithash(void)
291{
292	char buf[BUFSIZ];
293	FILE *mesgread = fopen(strings, "r");
294
295	if (mesgread == NULL)
296		return;
297	for (;;) {
298		mesgpt = tellpt;
299		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
300			break;
301		ignore(hashit(buf, 0));
302	}
303	ignore(fclose(mesgread));
304}
305
306int
307fgetNUL(char *obuf, int rmdr, FILE *file)
308{
309	int c;
310	char *buf = obuf;
311
312	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
313		*buf++ = c;
314	*buf++ = 0;
315	return ((feof(file) || ferror(file)) ? 0 : 1);
316}
317
318int
319xgetc(FILE *file)
320{
321
322	tellpt++;
323	return (getc(file));
324}
325
326#define	BUCKETS	128
327
328struct	hash {
329	off_t	hpt;
330	char	*hstr;
331	struct	hash *hnext;
332	short	hnew;
333} bucket[BUCKETS];
334
335off_t
336hashit(char *str, int new)
337{
338	int i;
339	struct hash *hp, *hp0;
340
341	hp = hp0 = &bucket[lastchr(str) & 0177];
342	while (hp->hnext) {
343		hp = hp->hnext;
344		i = istail(str, hp->hstr);
345		if (i >= 0)
346			return (hp->hpt + i);
347	}
348	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
349		errx(8, "calloc");
350	hp->hpt = mesgpt;
351	if (!(hp->hstr = strdup(str)))
352		err(1, NULL);
353	mesgpt += strlen(hp->hstr) + 1;
354	hp->hnext = hp0->hnext;
355	hp->hnew = new;
356	hp0->hnext = hp;
357	return (hp->hpt);
358}
359
360void
361flushsh(void)
362{
363	int i;
364	struct hash *hp;
365	FILE *mesgwrit;
366	int old = 0, new = 0;
367
368	for (i = 0; i < BUCKETS; i++)
369		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
370			if (hp->hnew)
371				new++;
372			else
373				old++;
374	if (new == 0 && old != 0)
375		return;
376	mesgwrit = fopen(strings, old ? "r+" : "w");
377	if (mesgwrit == NULL)
378		perror(strings), exit(4);
379	for (i = 0; i < BUCKETS; i++)
380		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
381			found(hp->hnew, hp->hpt, hp->hstr);
382			if (hp->hnew) {
383				fseek(mesgwrit, hp->hpt, 0);
384				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
385				if (ferror(mesgwrit))
386					err(4, "%s", strings);
387			}
388		}
389	if (fclose(mesgwrit) == EOF)
390		err(4, "%s", strings);
391}
392
393void
394found(int new, off_t off, char *str)
395{
396	if (vflg == 0)
397		return;
398	if (!new)
399		fprintf(stderr, "found at %d:", (int) off);
400	else
401		fprintf(stderr, "new at %d:", (int) off);
402	prstr(str);
403	fprintf(stderr, "\n");
404}
405
406void
407prstr(char *cp)
408{
409	int c;
410
411	while ((c = (*cp++ & 0377)))
412		if (c < ' ')
413			fprintf(stderr, "^%c", c + '`');
414		else if (c == 0177)
415			fprintf(stderr, "^?");
416		else if (c > 0200)
417			fprintf(stderr, "\\%03o", c);
418		else
419			fprintf(stderr, "%c", c);
420}
421
422void
423xsdotc(void)
424{
425	FILE *strf = fopen(strings, "r");
426	FILE *xdotcf;
427
428	if (strf == NULL)
429		err(5, "%s", strings);
430	xdotcf = fopen("xs.c", "w");
431	if (xdotcf == NULL)
432		err(6, "xs.c");
433	fprintf(xdotcf, "char\txstr[] = {\n");
434	for (;;) {
435		int i, c;
436
437		for (i = 0; i < 8; i++) {
438			c = getc(strf);
439			if (ferror(strf)) {
440				warn("%s", strings);
441				onintr(0);
442			}
443			if (feof(strf)) {
444				fprintf(xdotcf, "\n");
445				goto out;
446			}
447			fprintf(xdotcf, "0x%02x,", c);
448		}
449		fprintf(xdotcf, "\n");
450	}
451out:
452	fprintf(xdotcf, "};\n");
453	ignore(fclose(xdotcf));
454	ignore(fclose(strf));
455}
456
457char
458lastchr(char *cp)
459{
460
461	while (cp[0] && cp[1])
462		cp++;
463	return (*cp);
464}
465
466int
467istail(char *str, char *of)
468{
469	int d = strlen(of) - strlen(str);
470
471	if (d < 0 || strcmp(&of[d], str) != 0)
472		return (-1);
473	return (d);
474}
475
476void
477onintr(int dummy __unused)
478{
479
480	ignore(signal(SIGINT, SIG_IGN));
481	if (strings[0] == '/')
482		ignore(unlink(strings));
483	ignore(unlink("x.c"));
484	ignore(unlink("xs.c"));
485	exit(7);
486}
487