mksyntax.c revision 17538
1/*-
2 * Copyright (c) 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Kenneth Almquist.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	$Id: mksyntax.c,v 1.3 1996/08/11 22:50:59 ache Exp $
37 */
38
39#ifndef lint
40static char copyright[] =
41"@(#) Copyright (c) 1991, 1993\n\
42	The Regents of the University of California.  All rights reserved.\n";
43#endif /* not lint */
44
45#ifndef lint
46static char sccsid[] = "@(#)mksyntax.c	8.1 (Berkeley) 5/31/93";
47#endif /* not lint */
48
49/*
50 * This program creates syntax.h and syntax.c.
51 */
52
53#include <stdio.h>
54#include "parser.h"
55
56
57struct synclass {
58	char *name;
59	char *comment;
60};
61
62/* Syntax classes */
63struct synclass synclass[] = {
64	"CWORD",		"character is nothing special",
65	"CNL",		"newline character",
66	"CBACK",		"a backslash character",
67	"CSQUOTE",	"single quote",
68	"CDQUOTE",	"double quote",
69	"CENDQUOTE",	"a terminating quote",
70	"CBQUOTE",	"backwards single quote",
71	"CVAR",		"a dollar sign",
72	"CENDVAR",	"a '}' character",
73	"CLP",		"a left paren in arithmetic",
74	"CRP",		"a right paren in arithmetic",
75	"CEOF",		"end of file",
76	"CCTL",		"like CWORD, except it must be escaped",
77	"CSPCL",		"these terminate a word",
78	NULL, NULL
79};
80
81
82/*
83 * Syntax classes for is_ functions.  Warning:  if you add new classes
84 * you may have to change the definition of the is_in_name macro.
85 */
86struct synclass is_entry[] = {
87	"ISDIGIT",	"a digit",
88	"ISUPPER",	"an upper case letter",
89	"ISLOWER",	"a lower case letter",
90	"ISUNDER",	"an underscore",
91	"ISSPECL",	"the name of a special parameter",
92	NULL, NULL,
93};
94
95char writer[] = "\
96/*\n\
97 * This file was generated by the mksyntax program.\n\
98 */\n\
99\n";
100
101
102FILE *cfile;
103FILE *hfile;
104char *syntax[513];
105int base;
106int size;		/* number of values which a char variable can have */
107int nbits;		/* number of bits in a character */
108int digit_contig;	/* true if digits are contiguous */
109
110
111main() {
112	char c;
113	char d;
114	int sign;
115	int i;
116	char buf[80];
117	int pos;
118	static char digit[] = "0123456789";
119
120	/* Create output files */
121	if ((cfile = fopen("syntax.c", "w")) == NULL) {
122		perror("syntax.c");
123		exit(2);
124	}
125	if ((hfile = fopen("syntax.h", "w")) == NULL) {
126		perror("syntax.h");
127		exit(2);
128	}
129	fputs(writer, hfile);
130	fputs(writer, cfile);
131
132	/* Determine the characteristics of chars. */
133	c = -1;
134	if (c < 0)
135		sign = 1;
136	else
137		sign = 0;
138	for (nbits = 1 ; ; nbits++) {
139		d = (1 << nbits) - 1;
140		if (d == c)
141			break;
142	}
143	printf("%s %d bit chars\n", sign? "signed" : "unsigned", nbits);
144	if (nbits > 9) {
145		fputs("Characters can't have more than 9 bits\n", stderr);
146		exit(2);
147	}
148	size = (1 << nbits) + 1;
149	base = 1;
150	if (sign)
151		base += 1 << (nbits - 1);
152	digit_contig = 1;
153	for (i = 0 ; i < 10 ; i++) {
154		if (digit[i] != '0' + i)
155			digit_contig = 0;
156	}
157
158	fputs("#include <ctype.h>\n", hfile);
159	fputs("#include <sys/cdefs.h>\n", hfile);
160
161	/* Generate the #define statements in the header file */
162	fputs("/* Syntax classes */\n", hfile);
163	for (i = 0 ; synclass[i].name ; i++) {
164		sprintf(buf, "#define %s %d", synclass[i].name, i);
165		fputs(buf, hfile);
166		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
167			putc('\t', hfile);
168		fprintf(hfile, "/* %s */\n", synclass[i].comment);
169	}
170	putc('\n', hfile);
171	fputs("/* Syntax classes for is_ functions */\n", hfile);
172	for (i = 0 ; is_entry[i].name ; i++) {
173		sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i);
174		fputs(buf, hfile);
175		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
176			putc('\t', hfile);
177		fprintf(hfile, "/* %s */\n", is_entry[i].comment);
178	}
179	putc('\n', hfile);
180	fprintf(hfile, "#define SYNBASE %d\n", base);
181	fprintf(hfile, "#define PEOF %d\n\n", -base);
182	putc('\n', hfile);
183	fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile);
184	fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile);
185	fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile);
186	fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile);
187	putc('\n', hfile);
188	output_type_macros();		/* is_digit, etc. */
189	putc('\n', hfile);
190
191	/* Generate the syntax tables. */
192	fputs("#include \"shell.h\"\n", cfile);
193	fputs("#include \"syntax.h\"\n\n", cfile);
194	init();
195	fputs("/* syntax table used when not in quotes */\n", cfile);
196	add("\n", "CNL");
197	add("\\", "CBACK");
198	add("'", "CSQUOTE");
199	add("\"", "CDQUOTE");
200	add("`", "CBQUOTE");
201	add("$", "CVAR");
202	add("}", "CENDVAR");
203	add("<>();&| \t", "CSPCL");
204	print("basesyntax");
205	init();
206	fputs("\n/* syntax table used when in double quotes */\n", cfile);
207	add("\n", "CNL");
208	add("\\", "CBACK");
209	add("\"", "CENDQUOTE");
210	add("`", "CBQUOTE");
211	add("$", "CVAR");
212	add("}", "CENDVAR");
213	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
214	print("dqsyntax");
215	init();
216	fputs("\n/* syntax table used when in single quotes */\n", cfile);
217	add("\n", "CNL");
218	add("'", "CENDQUOTE");
219	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
220	print("sqsyntax");
221	init();
222	fputs("\n/* syntax table used when in arithmetic */\n", cfile);
223	add("\n", "CNL");
224	add("\\", "CBACK");
225	add("`", "CBQUOTE");
226	add("'", "CSQUOTE");
227	add("\"", "CDQUOTE");
228	add("$", "CVAR");
229	add("}", "CENDVAR");
230	add("(", "CLP");
231	add(")", "CRP");
232	print("arisyntax");
233	filltable("0");
234	fputs("\n/* character classification table */\n", cfile);
235	add("0123456789", "ISDIGIT");
236	add("abcdefghijklmnopqrstucvwxyz", "ISLOWER");
237	add("ABCDEFGHIJKLMNOPQRSTUCVWXYZ", "ISUPPER");
238	add("_", "ISUNDER");
239	add("#?$!-*@", "ISSPECL");
240	print("is_type");
241	if (! digit_contig)
242		digit_convert();
243	exit(0);
244}
245
246
247
248/*
249 * Clear the syntax table.
250 */
251
252filltable(dftval)
253	char *dftval;
254	{
255	int i;
256
257	for (i = 0 ; i < size ; i++)
258		syntax[i] = dftval;
259}
260
261
262/*
263 * Initialize the syntax table with default values.
264 */
265
266init() {
267	filltable("CWORD");
268	syntax[0] = "CEOF";
269	syntax[base + CTLESC] = "CCTL";
270	syntax[base + CTLVAR] = "CCTL";
271	syntax[base + CTLENDVAR] = "CCTL";
272	syntax[base + CTLBACKQ] = "CCTL";
273	syntax[base + CTLBACKQ + CTLQUOTE] = "CCTL";
274	syntax[base + CTLARI] = "CCTL";
275	syntax[base + CTLENDARI] = "CCTL";
276}
277
278
279/*
280 * Add entries to the syntax table.
281 */
282
283add(p, type)
284	char *p, *type;
285	{
286	while (*p)
287		syntax[*p++ + base] = type;
288}
289
290
291
292/*
293 * Output the syntax table.
294 */
295
296print(name)
297	char *name;
298	{
299	int i;
300	int col;
301
302	fprintf(hfile, "extern const char %s[];\n", name);
303	fprintf(cfile, "const char %s[%d] = {\n", name, size);
304	col = 0;
305	for (i = 0 ; i < size ; i++) {
306		if (i == 0) {
307			fputs("      ", cfile);
308		} else if ((i & 03) == 0) {
309			fputs(",\n      ", cfile);
310			col = 0;
311		} else {
312			putc(',', cfile);
313			while (++col < 9 * (i & 03))
314				putc(' ', cfile);
315		}
316		fputs(syntax[i], cfile);
317		col += strlen(syntax[i]);
318	}
319	fputs("\n};\n", cfile);
320}
321
322
323
324/*
325 * Output character classification macros (e.g. is_digit).  If digits are
326 * contiguous, we can test for them quickly.
327 */
328
329char *macro[] = {
330	"#define is_digit(c)\t((is_type+SYNBASE)[c] & ISDIGIT)",
331	"#define is_alpha(c)\t((c) != PEOF && isalpha((unsigned char) (c)))",
332	"#define is_name(c)\t((c) != PEOF && ((c) == '_' || isalpha((unsigned char) (c))))",
333	"#define is_in_name(c)\t((c) != PEOF && ((c) == '_' || isalnum((unsigned char) (c))))",
334	"#define is_special(c)\t((is_type+SYNBASE)[c] & (ISSPECL|ISDIGIT))",
335	NULL
336};
337
338output_type_macros() {
339	char **pp;
340
341	if (digit_contig)
342		macro[0] = "#define is_digit(c)\t((unsigned)((c) - '0') <= 9)";
343	for (pp = macro ; *pp ; pp++)
344		fprintf(hfile, "%s\n", *pp);
345	if (digit_contig)
346		fputs("#define digit_val(c)\t((c) - '0')\n", hfile);
347	else
348		fputs("#define digit_val(c)\t(digit_value[c])\n", hfile);
349}
350
351
352
353/*
354 * Output digit conversion table (if digits are not contiguous).
355 */
356
357digit_convert() {
358	int maxdigit;
359	static char digit[] = "0123456789";
360	char *p;
361	int i;
362
363	maxdigit = 0;
364	for (p = digit ; *p ; p++)
365		if (*p > maxdigit)
366			maxdigit = *p;
367	fputs("extern const char digit_value[];\n", hfile);
368	fputs("\n\nconst char digit_value[] = {\n", cfile);
369	for (i = 0 ; i <= maxdigit ; i++) {
370		for (p = digit ; *p && *p != i ; p++);
371		if (*p == '\0')
372			p = digit;
373		fprintf(cfile, "      %d,\n", p - digit);
374	}
375	fputs("};\n", cfile);
376}
377