1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33
34__FBSDID("$FreeBSD$");
35
36#ifndef lint
37static const char copyright[] =
38"@(#) Copyright (c) 1980, 1993\n\
39	The Regents of the University of California.  All rights reserved.\n";
40#endif
41
42#ifndef lint
43static const char sccsid[] = "@(#)xstr.c	8.1 (Berkeley) 6/9/93";
44#endif
45
46#include <sys/types.h>
47
48#include <ctype.h>
49#include <err.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <signal.h>
53#include <string.h>
54#include <unistd.h>
55
56#include "pathnames.h"
57
58/*
59 * xstr - extract and hash strings in a C program
60 *
61 * Bill Joy UCB
62 * November, 1978
63 */
64
65#define	ignore(a)	((void) a)
66
67static off_t	tellpt;
68
69static off_t	mesgpt;
70static char	cstrings[] =	"strings";
71static char	*strings =	cstrings;
72
73static int	cflg;
74static int	vflg;
75static int	readstd;
76
77static char lastchr(char *);
78
79static int fgetNUL(char *, int, FILE *);
80static int istail(char *, char *);
81static int octdigit(char);
82static int xgetc(FILE *);
83
84static off_t hashit(char *, int);
85static off_t yankstr(char **);
86
87static void usage(void);
88
89static void flushsh(void);
90static void found(int, off_t, char *);
91static void inithash(void);
92static void onintr(int);
93static void process(const char *);
94static void prstr(char *);
95static void xsdotc(void);
96
97int
98main(int argc, char *argv[])
99{
100	int c;
101	int fdesc;
102
103	while ((c = getopt(argc, argv, "-cv")) != -1)
104		switch (c) {
105		case '-':
106			readstd++;
107			break;
108		case 'c':
109			cflg++;
110			break;
111		case 'v':
112			vflg++;
113			break;
114		default:
115			usage();
116		}
117	argc -= optind;
118	argv += optind;
119
120	if (signal(SIGINT, SIG_IGN) == SIG_DFL)
121		signal(SIGINT, onintr);
122	if (cflg || (argc == 0 && !readstd))
123		inithash();
124	else {
125		strings = strdup(_PATH_TMP);
126		if (strings == NULL)
127			err(1, "strdup() failed");
128		fdesc = mkstemp(strings);
129		if (fdesc == -1)
130			err(1, "Unable to create temporary file");
131		close(fdesc);
132	}
133
134	while (readstd || argc > 0) {
135		if (freopen("x.c", "w", stdout) == NULL)
136			err(1, "x.c");
137		if (!readstd && freopen(argv[0], "r", stdin) == NULL)
138			err(2, "%s", argv[0]);
139		process("x.c");
140		if (readstd == 0)
141			argc--, argv++;
142		else
143			readstd = 0;
144	}
145	flushsh();
146	if (cflg == 0)
147		xsdotc();
148	if (strings[0] == '/')
149		ignore(unlink(strings));
150	exit(0);
151}
152
153static void
154usage(void)
155{
156	fprintf(stderr, "usage: xstr [-cv] [-] [file ...]\n");
157	exit (1);
158}
159
160static char linebuf[BUFSIZ];
161
162static void
163process(const char *name)
164{
165	char *cp;
166	int c;
167	int incomm = 0;
168	int ret;
169
170	printf("extern char\txstr[];\n");
171	for (;;) {
172		if (fgets(linebuf, sizeof linebuf, stdin) == NULL) {
173			if (ferror(stdin))
174				err(3, "%s", name);
175			break;
176		}
177		if (linebuf[0] == '#') {
178			if (linebuf[1] == ' ' && isdigit(linebuf[2]))
179				printf("#line%s", &linebuf[1]);
180			else
181				printf("%s", linebuf);
182			continue;
183		}
184		for (cp = linebuf; (c = *cp++);) switch (c) {
185
186		case '"':
187			if (incomm)
188				goto def;
189			if ((ret = (int) yankstr(&cp)) == -1)
190				goto out;
191			printf("(&xstr[%d])", ret);
192			break;
193
194		case '\'':
195			if (incomm)
196				goto def;
197			putchar(c);
198			if (*cp)
199				putchar(*cp++);
200			break;
201
202		case '/':
203			if (incomm || *cp != '*')
204				goto def;
205			incomm = 1;
206			cp++;
207			printf("/*");
208			continue;
209
210		case '*':
211			if (incomm && *cp == '/') {
212				incomm = 0;
213				cp++;
214				printf("*/");
215				continue;
216			}
217			goto def;
218
219def:
220		default:
221			putchar(c);
222			break;
223		}
224	}
225out:
226	if (ferror(stdout))
227		warn("x.c"), onintr(0);
228}
229
230static off_t
231yankstr(char **cpp)
232{
233	char *cp = *cpp;
234	int c, ch;
235	char dbuf[BUFSIZ];
236	char *dp = dbuf;
237	char *tp;
238	static char tmp[] = "b\bt\tr\rn\nf\f\\\\\"\"";
239
240	while ((c = *cp++)) {
241		if (dp == dbuf + sizeof(dbuf) - 3)
242			errx(1, "message too long");
243		switch (c) {
244
245		case '"':
246			cp++;
247			goto out;
248
249		case '\\':
250			c = *cp++;
251			if (c == 0)
252				break;
253			if (c == '\n') {
254				if (fgets(linebuf, sizeof linebuf, stdin)
255				    == NULL) {
256					if (ferror(stdin))
257						err(3, "x.c");
258					return(-1);
259				}
260				cp = linebuf;
261				continue;
262			}
263			for (tp = tmp; (ch = *tp++); tp++)
264				if (c == ch) {
265					c = *tp;
266					goto gotc;
267				}
268			if (!octdigit(c)) {
269				*dp++ = '\\';
270				break;
271			}
272			c -= '0';
273			if (!octdigit(*cp))
274				break;
275			c <<= 3, c += *cp++ - '0';
276			if (!octdigit(*cp))
277				break;
278			c <<= 3, c += *cp++ - '0';
279			break;
280		}
281gotc:
282		*dp++ = c;
283	}
284out:
285	*cpp = --cp;
286	*dp = 0;
287	return (hashit(dbuf, 1));
288}
289
290static int
291octdigit(char c)
292{
293	return (isdigit(c) && c != '8' && c != '9');
294}
295
296static void
297inithash(void)
298{
299	char buf[BUFSIZ];
300	FILE *mesgread = fopen(strings, "r");
301
302	if (mesgread == NULL)
303		return;
304	for (;;) {
305		mesgpt = tellpt;
306		if (fgetNUL(buf, sizeof buf, mesgread) == 0)
307			break;
308		ignore(hashit(buf, 0));
309	}
310	ignore(fclose(mesgread));
311}
312
313static int
314fgetNUL(char *obuf, int rmdr, FILE *file)
315{
316	int c;
317	char *buf = obuf;
318
319	while (--rmdr > 0 && (c = xgetc(file)) != 0 && c != EOF)
320		*buf++ = c;
321	*buf++ = 0;
322	return ((feof(file) || ferror(file)) ? 0 : 1);
323}
324
325static int
326xgetc(FILE *file)
327{
328
329	tellpt++;
330	return (getc(file));
331}
332
333#define	BUCKETS	128
334
335static struct hash {
336	off_t	hpt;
337	char	*hstr;
338	struct	hash *hnext;
339	short	hnew;
340} bucket[BUCKETS];
341
342static off_t
343hashit(char *str, int new)
344{
345	int i;
346	struct hash *hp, *hp0;
347
348	hp = hp0 = &bucket[lastchr(str) & 0177];
349	while (hp->hnext) {
350		hp = hp->hnext;
351		i = istail(str, hp->hstr);
352		if (i >= 0)
353			return (hp->hpt + i);
354	}
355	if ((hp = (struct hash *) calloc(1, sizeof (*hp))) == NULL)
356		errx(8, "calloc");
357	hp->hpt = mesgpt;
358	if (!(hp->hstr = strdup(str)))
359		err(1, NULL);
360	mesgpt += strlen(hp->hstr) + 1;
361	hp->hnext = hp0->hnext;
362	hp->hnew = new;
363	hp0->hnext = hp;
364	return (hp->hpt);
365}
366
367static void
368flushsh(void)
369{
370	int i;
371	struct hash *hp;
372	FILE *mesgwrit;
373	int old = 0, new = 0;
374
375	for (i = 0; i < BUCKETS; i++)
376		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext)
377			if (hp->hnew)
378				new++;
379			else
380				old++;
381	if (new == 0 && old != 0)
382		return;
383	mesgwrit = fopen(strings, old ? "r+" : "w");
384	if (mesgwrit == NULL)
385		err(4, "%s", strings);
386	for (i = 0; i < BUCKETS; i++)
387		for (hp = bucket[i].hnext; hp != NULL; hp = hp->hnext) {
388			found(hp->hnew, hp->hpt, hp->hstr);
389			if (hp->hnew) {
390				fseek(mesgwrit, hp->hpt, 0);
391				ignore(fwrite(hp->hstr, strlen(hp->hstr) + 1, 1, mesgwrit));
392				if (ferror(mesgwrit))
393					err(4, "%s", strings);
394			}
395		}
396	if (fclose(mesgwrit) == EOF)
397		err(4, "%s", strings);
398}
399
400static void
401found(int new, off_t off, char *str)
402{
403	if (vflg == 0)
404		return;
405	if (!new)
406		fprintf(stderr, "found at %d:", (int) off);
407	else
408		fprintf(stderr, "new at %d:", (int) off);
409	prstr(str);
410	fprintf(stderr, "\n");
411}
412
413static void
414prstr(char *cp)
415{
416	int c;
417
418	while ((c = (*cp++ & 0377)))
419		if (c < ' ')
420			fprintf(stderr, "^%c", c + '`');
421		else if (c == 0177)
422			fprintf(stderr, "^?");
423		else if (c > 0200)
424			fprintf(stderr, "\\%03o", c);
425		else
426			fprintf(stderr, "%c", c);
427}
428
429static void
430xsdotc(void)
431{
432	FILE *strf = fopen(strings, "r");
433	FILE *xdotcf;
434
435	if (strf == NULL)
436		err(5, "%s", strings);
437	xdotcf = fopen("xs.c", "w");
438	if (xdotcf == NULL)
439		err(6, "xs.c");
440	fprintf(xdotcf, "char\txstr[] = {\n");
441	for (;;) {
442		int i, c;
443
444		for (i = 0; i < 8; i++) {
445			c = getc(strf);
446			if (ferror(strf)) {
447				warn("%s", strings);
448				onintr(0);
449			}
450			if (feof(strf)) {
451				fprintf(xdotcf, "\n");
452				goto out;
453			}
454			fprintf(xdotcf, "0x%02x,", c);
455		}
456		fprintf(xdotcf, "\n");
457	}
458out:
459	fprintf(xdotcf, "};\n");
460	ignore(fclose(xdotcf));
461	ignore(fclose(strf));
462}
463
464static char
465lastchr(char *cp)
466{
467
468	while (cp[0] && cp[1])
469		cp++;
470	return (*cp);
471}
472
473static int
474istail(char *str, char *of)
475{
476	int d = strlen(of) - strlen(str);
477
478	if (d < 0 || strcmp(&of[d], str) != 0)
479		return (-1);
480	return (d);
481}
482
483static void
484onintr(int dummy __unused)
485{
486
487	ignore(signal(SIGINT, SIG_IGN));
488	if (strings[0] == '/')
489		ignore(unlink(strings));
490	ignore(unlink("x.c"));
491	ignore(unlink("xs.c"));
492	exit(7);
493}
494