1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31
32__FBSDID("$FreeBSD$");
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif
39
40#ifndef lint
41static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
42#endif
43
44#include <sys/types.h>
45
46#include <ctype.h>
47#include <err.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <signal.h>
51#include <string.h>
52#include <unistd.h>
53
54#include "pathnames.h"
55
56/*
57 * xstr - extract and hash strings in a C program
58 *
59 * Bill Joy UCB
60 * November, 1978
61 */
62
63#define	ignore(a)	((void) a)
64
65static off_t	tellpt;
66
67static off_t	mesgpt;
68static char	cstrings[] =	"strings";
69static char	*strings =	cstrings;
70
71static int	cflg;
72static int	vflg;
73static int	readstd;
74
75static char lastchr(char *);
76
77static int fgetNUL(char *, int, FILE *);
78static int istail(char *, char *);
79static int octdigit(char);
80static int xgetc(FILE *);
81
82static off_t hashit(char *, int);
83static off_t yankstr(char **);
84
85static void usage(void);
86
87static void flushsh(void);
88static void found(int, off_t, char *);
89static void inithash(void);
90static void onintr(int);
91static void process(const char *);
92static void prstr(char *);
93static void xsdotc(void);
94
95int
96main(int argc, char *argv[])
97{
98	int c;
99	int fdesc;
100
101	while ((c = getopt(argc, argv, "-cv")) != -1)
102		switch (c) {
103		case '-':
104			readstd++;
105			break;
106		case 'c':
107			cflg++;
108			break;
109		case 'v':
110			vflg++;
111			break;
112		default:
113			usage();
114		}
115	argc -= optind;
116	argv += optind;
117
118	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
119		signal(SIGINT, onintr);
120	if (cflg || (argc == 0 && !readstd))
121		inithash();
122	else {
123		strings = strdup(_PATH_TMP);
124		if (strings == NULL)
125			err(1, "strdup() failed");
126		fdesc = mkstemp(strings);
127		if (fdesc == -1)
128			err(1, "Unable to create temporary file");
129		close(fdesc);
130	}
131
132	while (readstd || argc > 0) {
133		if (freopen("x.c", "w", stdout) == NULL)
134			err(1, "x.c");
135		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
136			err(2, "%s", argv[0]);
137		process("x.c");
138		if (readstd == 0)
139			argc--, argv++;
140		else
141			readstd = 0;
142	};
143	flushsh();
144	if (cflg == 0)
145		xsdotc();
146	if (strings[0] == '/')
147		ignore(unlink(strings));
148	exit(0);
149}
150
151static void
152usage(void)
153{
154	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
155	exit (1);
156}
157
158static char linebuf[BUFSIZ];
159
160static void
161process(const char *name)
162{
163	char *cp;
164	int c;
165	int incomm = 0;
166	int ret;
167
168	printf("extern char\txstr[];\n");
169	for (;;) {
170		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
171			if (ferror(stdin))
172				err(3, "%s", name);
173			break;
174		}
175		if (linebuf[0] == '#') {
176			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
177				printf("#line%s", &linebuf[1]);
178			else
179				printf("%s", linebuf);
180			continue;
181		}
182		for (cp = linebuf; (c = *cp++);) switch (c) {
183
184		case '"':
185			if (incomm)
186				goto def;
187			if ((ret = (int) yankstr(&cp)) == -1)
188				goto out;
189			printf("(&xstr[%d])", ret);
190			break;
191
192		case '\'':
193			if (incomm)
194				goto def;
195			putchar(c);
196			if (*cp)
197				putchar(*cp++);
198			break;
199
200		case '/':
201			if (incomm || *cp != '*')
202				goto def;
203			incomm = 1;
204			cp++;
205			printf("/*");
206			continue;
207
208		case '*':
209			if (incomm && *cp == '/') {
210				incomm = 0;
211				cp++;
212				printf("*/");
213				continue;
214			}
215			goto def;
216
217def:
218		default:
219			putchar(c);
220			break;
221		}
222	}
223out:
224	if (ferror(stdout))
225		warn("x.c"), onintr(0);
226}
227
228static off_t
229yankstr(char **cpp)
230{
231	char *cp = *cpp;
232	int c, ch;
233	char dbuf[BUFSIZ];
234	char *dp = dbuf;
235	char *tp;
236	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
237
238	while ((c = *cp++)) {
239		if (dp == dbuf + sizeof(dbuf) - 3)
240			errx(1, "message too long");
241		switch (c) {
242
243		case '"':
244			cp++;
245			goto out;
246
247		case '\\':
248			c = *cp++;
249			if (c == 0)
250				break;
251			if (c == '\n') {
252				if (fgets(linebuf, sizeof linebuf, stdin)
253				    == NULL) {
254					if (ferror(stdin))
255						err(3, "x.c");
256					return(-1);
257				}
258				cp = linebuf;
259				continue;
260			}
261			for (tp = tmp; (ch = *tp++); tp++)
262				if (c == ch) {
263					c = *tp;
264					goto gotc;
265				}
266			if (!octdigit(c)) {
267				*dp++ = '\\';
268				break;
269			}
270			c -= '0';
271			if (!octdigit(*cp))
272				break;
273			c <<= 3, c += *cp++ - '0';
274			if (!octdigit(*cp))
275				break;
276			c <<= 3, c += *cp++ - '0';
277			break;
278		}
279gotc:
280		*dp++ = c;
281	}
282out:
283	*cpp = --cp;
284	*dp = 0;
285	return (hashit(dbuf, 1));
286}
287
288static int
289octdigit(char c)
290{
291	return (isdigit(c) && c != '8' && c != '9');
292}
293
294static void
295inithash(void)
296{
297	char buf[BUFSIZ];
298	FILE *mesgread = fopen(strings, "r");
299
300	if (mesgread == NULL)
301		return;
302	for (;;) {
303		mesgpt = tellpt;
304		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
305			break;
306		ignore(hashit(buf, 0));
307	}
308	ignore(fclose(mesgread));
309}
310
311static int
312fgetNUL(char *obuf, int rmdr, FILE *file)
313{
314	int c;
315	char *buf = obuf;
316
317	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
318		*buf++ = c;
319	*buf++ = 0;
320	return ((feof(file) || ferror(file)) ? 0 : 1);
321}
322
323static int
324xgetc(FILE *file)
325{
326
327	tellpt++;
328	return (getc(file));
329}
330
331#define	BUCKETS	128
332
333static struct hash {
334	off_t	hpt;
335	char	*hstr;
336	struct	hash *hnext;
337	short	hnew;
338} bucket[BUCKETS];
339
340static off_t
341hashit(char *str, int new)
342{
343	int i;
344	struct hash *hp, *hp0;
345
346	hp = hp0 = &bucket[lastchr(str) & 0177];
347	while (hp->hnext) {
348		hp = hp->hnext;
349		i = istail(str, hp->hstr);
350		if (i >= 0)
351			return (hp->hpt + i);
352	}
353	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
354		errx(8, "calloc");
355	hp->hpt = mesgpt;
356	if (!(hp->hstr = strdup(str)))
357		err(1, NULL);
358	mesgpt += strlen(hp->hstr) + 1;
359	hp->hnext = hp0->hnext;
360	hp->hnew = new;
361	hp0->hnext = hp;
362	return (hp->hpt);
363}
364
365static void
366flushsh(void)
367{
368	int i;
369	struct hash *hp;
370	FILE *mesgwrit;
371	int old = 0, new = 0;
372
373	for (i = 0; i < BUCKETS; i++)
374		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
375			if (hp->hnew)
376				new++;
377			else
378				old++;
379	if (new == 0 && old != 0)
380		return;
381	mesgwrit = fopen(strings, old ? "r+" : "w");
382	if (mesgwrit == NULL)
383		err(4, "%s", strings);
384	for (i = 0; i < BUCKETS; i++)
385		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
386			found(hp->hnew, hp->hpt, hp->hstr);
387			if (hp->hnew) {
388				fseek(mesgwrit, hp->hpt, 0);
389				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
390				if (ferror(mesgwrit))
391					err(4, "%s", strings);
392			}
393		}
394	if (fclose(mesgwrit) == EOF)
395		err(4, "%s", strings);
396}
397
398static void
399found(int new, off_t off, char *str)
400{
401	if (vflg == 0)
402		return;
403	if (!new)
404		fprintf(stderr, "found at %d:", (int) off);
405	else
406		fprintf(stderr, "new at %d:", (int) off);
407	prstr(str);
408	fprintf(stderr, "\n");
409}
410
411static void
412prstr(char *cp)
413{
414	int c;
415
416	while ((c = (*cp++ & 0377)))
417		if (c < ' ')
418			fprintf(stderr, "^%c", c + '`');
419		else if (c == 0177)
420			fprintf(stderr, "^?");
421		else if (c > 0200)
422			fprintf(stderr, "\\%03o", c);
423		else
424			fprintf(stderr, "%c", c);
425}
426
427static void
428xsdotc(void)
429{
430	FILE *strf = fopen(strings, "r");
431	FILE *xdotcf;
432
433	if (strf == NULL)
434		err(5, "%s", strings);
435	xdotcf = fopen("xs.c", "w");
436	if (xdotcf == NULL)
437		err(6, "xs.c");
438	fprintf(xdotcf, "char\txstr[] = {\n");
439	for (;;) {
440		int i, c;
441
442		for (i = 0; i < 8; i++) {
443			c = getc(strf);
444			if (ferror(strf)) {
445				warn("%s", strings);
446				onintr(0);
447			}
448			if (feof(strf)) {
449				fprintf(xdotcf, "\n");
450				goto out;
451			}
452			fprintf(xdotcf, "0x%02x,", c);
453		}
454		fprintf(xdotcf, "\n");
455	}
456out:
457	fprintf(xdotcf, "};\n");
458	ignore(fclose(xdotcf));
459	ignore(fclose(strf));
460}
461
462static char
463lastchr(char *cp)
464{
465
466	while (cp[0] && cp[1])
467		cp++;
468	return (*cp);
469}
470
471static int
472istail(char *str, char *of)
473{
474	int d = strlen(of) - strlen(str);
475
476	if (d < 0 || strcmp(&of[d], str) != 0)
477		return (-1);
478	return (d);
479}
480
481static void
482onintr(int dummy __unused)
483{
484
485	ignore(signal(SIGINT, SIG_IGN));
486	if (strings[0] == '/')
487		ignore(unlink(strings));
488	ignore(unlink("x.c"));
489	ignore(unlink("xs.c"));
490	exit(7);
491}
492