mksyntax.c revision 17525
150276Speter/*-
2166124Srafan * Copyright (c) 1991, 1993
350276Speter *	The Regents of the University of California.  All rights reserved.
450276Speter *
550276Speter * This code is derived from software contributed to Berkeley by
650276Speter * Kenneth Almquist.
750276Speter *
850276Speter * Redistribution and use in source and binary forms, with or without
950276Speter * modification, are permitted provided that the following conditions
1050276Speter * are met:
1150276Speter * 1. Redistributions of source code must retain the above copyright
1250276Speter *    notice, this list of conditions and the following disclaimer.
1350276Speter * 2. Redistributions in binary form must reproduce the above copyright
1450276Speter *    notice, this list of conditions and the following disclaimer in the
1550276Speter *    documentation and/or other materials provided with the distribution.
1650276Speter * 3. All advertising materials mentioning features or use of this software
1750276Speter *    must display the following acknowledgement:
1850276Speter *	This product includes software developed by the University of
1950276Speter *	California, Berkeley and its contributors.
2050276Speter * 4. Neither the name of the University nor the names of its contributors
2150276Speter *    may be used to endorse or promote products derived from this software
2250276Speter *    without specific prior written permission.
2350276Speter *
2450276Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2550276Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2650276Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2750276Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2850276Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2950276Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30166124Srafan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3150276Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3250276Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3350276Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3450276Speter * SUCH DAMAGE.
3550276Speter *
3650276Speter *	$Id: mksyntax.c,v 1.2 1994/09/24 02:57:57 davidg Exp $
3750276Speter */
3850276Speter
3950276Speter#ifndef lint
40166124Srafanstatic char copyright[] =
4150276Speter"@(#) Copyright (c) 1991, 1993\n\
4250276Speter	The Regents of the University of California.  All rights reserved.\n";
4350276Speter#endif /* not lint */
4450276Speter
4550276Speter#ifndef lint
4650276Speterstatic char sccsid[] = "@(#)mksyntax.c	8.1 (Berkeley) 5/31/93";
4750276Speter#endif /* not lint */
4850276Speter
4950276Speter/*
5050276Speter * This program creates syntax.h and syntax.c.
5150276Speter */
5250276Speter
5350276Speter#include <stdio.h>
5450276Speter#include "parser.h"
5576726Speter
56166124Srafan
5750276Speterstruct synclass {
58166124Srafan	char *name;
59166124Srafan	char *comment;
6050276Speter};
6150276Speter
6250276Speter/* Syntax classes */
6350276Speterstruct synclass synclass[] = {
6450276Speter	"CWORD",		"character is nothing special",
6550276Speter	"CNL",		"newline character",
6650276Speter	"CBACK",		"a backslash character",
67166124Srafan	"CSQUOTE",	"single quote",
6850276Speter	"CDQUOTE",	"double quote",
6950276Speter	"CENDQUOTE",	"a terminating quote",
70166124Srafan	"CBQUOTE",	"backwards single quote",
7150276Speter	"CVAR",		"a dollar sign",
7250276Speter	"CENDVAR",	"a '}' character",
7350276Speter	"CLP",		"a left paren in arithmetic",
7450276Speter	"CRP",		"a right paren in arithmetic",
75166124Srafan	"CEOF",		"end of file",
7650276Speter	"CCTL",		"like CWORD, except it must be escaped",
7750276Speter	"CSPCL",		"these terminate a word",
78166124Srafan	NULL, NULL
7950276Speter};
8050276Speter
8150276Speter
8250276Speter/*
8350276Speter * Syntax classes for is_ functions.  Warning:  if you add new classes
8450276Speter * you may have to change the definition of the is_in_name macro.
8550276Speter */
8650276Speterstruct synclass is_entry[] = {
8750276Speter	"ISDIGIT",	"a digit",
88166124Srafan	"ISUPPER",	"an upper case letter",
89166124Srafan	"ISLOWER",	"a lower case letter",
9050276Speter	"ISUNDER",	"an underscore",
9150276Speter	"ISSPECL",	"the name of a special parameter",
9250276Speter	NULL, NULL,
9350276Speter};
94166124Srafan
9550276Speterchar writer[] = "\
9650276Speter/*\n\
9750276Speter * This file was generated by the mksyntax program.\n\
9850276Speter */\n\
9950276Speter\n";
10050276Speter
10150276Speter
10250276SpeterFILE *cfile;
10350276SpeterFILE *hfile;
10450276Speterchar *syntax[513];
10550276Speterint base;
10650276Speterint size;		/* number of values which a char variable can have */
10750276Speterint nbits;		/* number of bits in a character */
10850276Speterint digit_contig;	/* true if digits are contiguous */
10950276Speter
11050276Speter
11150276Spetermain() {
11250276Speter	char c;
11350276Speter	char d;
11450276Speter	int sign;
11576726Speter	int i;
116166124Srafan	char buf[80];
11750276Speter	int pos;
118166124Srafan	static char digit[] = "0123456789";
11950276Speter
120166124Srafan	/* Create output files */
121166124Srafan	if ((cfile = fopen("syntax.c", "w")) == NULL) {
122166124Srafan		perror("syntax.c");
123166124Srafan		exit(2);
12450276Speter	}
12550276Speter	if ((hfile = fopen("syntax.h", "w")) == NULL) {
12650276Speter		perror("syntax.h");
12750276Speter		exit(2);
12850276Speter	}
12950276Speter	fputs(writer, hfile);
13050276Speter	fputs(writer, cfile);
131166124Srafan
13250276Speter	/* Determine the characteristics of chars. */
13350276Speter	c = -1;
13450276Speter	if (c < 0)
13550276Speter		sign = 1;
13650276Speter	else
13750276Speter		sign = 0;
13850276Speter	for (nbits = 1 ; ; nbits++) {
13950276Speter		d = (1 << nbits) - 1;
14050276Speter		if (d == c)
14150276Speter			break;
14250276Speter	}
14350276Speter	printf("%s %d bit chars\n", sign? "signed" : "unsigned", nbits);
14450276Speter	if (nbits > 9) {
14550276Speter		fputs("Characters can't have more than 9 bits\n", stderr);
14650276Speter		exit(2);
14750276Speter	}
14876726Speter	size = (1 << nbits) + 1;
149166124Srafan	base = 1;
15050276Speter	if (sign)
151166124Srafan		base += 1 << (nbits - 1);
15250276Speter	digit_contig = 1;
153166124Srafan	for (i = 0 ; i < 10 ; i++) {
154166124Srafan		if (digit[i] != '0' + i)
155166124Srafan			digit_contig = 0;
156166124Srafan	}
15750276Speter
15850276Speter	fputs("#include <ctype.h>\n", hfile);
15950276Speter	fputs("#include <sys/cdefs.h>\n", hfile);
16050276Speter
16150276Speter	/* Generate the #define statements in the header file */
16250276Speter	fputs("/* Syntax classes */\n", hfile);
16350276Speter	for (i = 0 ; synclass[i].name ; i++) {
164166124Srafan		sprintf(buf, "#define %s %d", synclass[i].name, i);
16550276Speter		fputs(buf, hfile);
16650276Speter		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
16750276Speter			putc('\t', hfile);
16850276Speter		fprintf(hfile, "/* %s */\n", synclass[i].comment);
16950276Speter	}
17050276Speter	putc('\n', hfile);
17150276Speter	fputs("/* Syntax classes for is_ functions */\n", hfile);
17250276Speter	for (i = 0 ; is_entry[i].name ; i++) {
17350276Speter		sprintf(buf, "#define %s %#o", is_entry[i].name, 1 << i);
17450276Speter		fputs(buf, hfile);
17550276Speter		for (pos = strlen(buf) ; pos < 32 ; pos = pos + 8 &~ 07)
17676726Speter			putc('\t', hfile);
177166124Srafan		fprintf(hfile, "/* %s */\n", is_entry[i].comment);
17850276Speter	}
179166124Srafan	putc('\n', hfile);
180166124Srafan	fprintf(hfile, "#define SYNBASE %d\n", base);
18150276Speter	fprintf(hfile, "#define PEOF %d\n\n", -base);
18250276Speter	putc('\n', hfile);
18350276Speter	fputs("#define BASESYNTAX (basesyntax + SYNBASE)\n", hfile);
184	fputs("#define DQSYNTAX (dqsyntax + SYNBASE)\n", hfile);
185	fputs("#define SQSYNTAX (sqsyntax + SYNBASE)\n", hfile);
186	fputs("#define ARISYNTAX (arisyntax + SYNBASE)\n", hfile);
187	putc('\n', hfile);
188	output_type_macros();		/* is_digit, etc. */
189	putc('\n', hfile);
190
191	/* Generate the syntax tables. */
192	fputs("#include \"shell.h\"\n", cfile);
193	fputs("#include \"syntax.h\"\n\n", cfile);
194	init();
195	fputs("/* syntax table used when not in quotes */\n", cfile);
196	add("\n", "CNL");
197	add("\\", "CBACK");
198	add("'", "CSQUOTE");
199	add("\"", "CDQUOTE");
200	add("`", "CBQUOTE");
201	add("$", "CVAR");
202	add("}", "CENDVAR");
203	add("<>();&| \t", "CSPCL");
204	print("basesyntax");
205	init();
206	fputs("\n/* syntax table used when in double quotes */\n", cfile);
207	add("\n", "CNL");
208	add("\\", "CBACK");
209	add("\"", "CENDQUOTE");
210	add("`", "CBQUOTE");
211	add("$", "CVAR");
212	add("}", "CENDVAR");
213	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
214	print("dqsyntax");
215	init();
216	fputs("\n/* syntax table used when in single quotes */\n", cfile);
217	add("\n", "CNL");
218	add("'", "CENDQUOTE");
219	add("!*?[=~:/", "CCTL");	/* ':/' for tilde - yuck */
220	print("sqsyntax");
221	init();
222	fputs("\n/* syntax table used when in arithmetic */\n", cfile);
223	add("\n", "CNL");
224	add("\\", "CBACK");
225	add("`", "CBQUOTE");
226	add("'", "CSQUOTE");
227	add("\"", "CDQUOTE");
228	add("$", "CVAR");
229	add("}", "CENDVAR");
230	add("(", "CLP");
231	add(")", "CRP");
232	print("arisyntax");
233	filltable("0");
234	fputs("\n/* character classification table */\n", cfile);
235	add("0123456789", "ISDIGIT");
236	add("abcdefghijklmnopqrstucvwxyz", "ISLOWER");
237	add("ABCDEFGHIJKLMNOPQRSTUCVWXYZ", "ISUPPER");
238	add("_", "ISUNDER");
239	add("#?$!-*@", "ISSPECL");
240	print("is_type");
241	if (! digit_contig)
242		digit_convert();
243	exit(0);
244}
245
246
247
248/*
249 * Clear the syntax table.
250 */
251
252filltable(dftval)
253	char *dftval;
254	{
255	int i;
256
257	for (i = 0 ; i < size ; i++)
258		syntax[i] = dftval;
259}
260
261
262/*
263 * Initialize the syntax table with default values.
264 */
265
266init() {
267	filltable("CWORD");
268	syntax[0] = "CEOF";
269	syntax[base + CTLESC] = "CCTL";
270	syntax[base + CTLVAR] = "CCTL";
271	syntax[base + CTLENDVAR] = "CCTL";
272	syntax[base + CTLBACKQ] = "CCTL";
273	syntax[base + CTLBACKQ + CTLQUOTE] = "CCTL";
274	syntax[base + CTLARI] = "CCTL";
275	syntax[base + CTLENDARI] = "CCTL";
276}
277
278
279/*
280 * Add entries to the syntax table.
281 */
282
283add(p, type)
284	char *p, *type;
285	{
286	while (*p)
287		syntax[*p++ + base] = type;
288}
289
290
291
292/*
293 * Output the syntax table.
294 */
295
296print(name)
297	char *name;
298	{
299	int i;
300	int col;
301
302	fprintf(hfile, "extern const char %s[];\n", name);
303	fprintf(cfile, "const char %s[%d] = {\n", name, size);
304	col = 0;
305	for (i = 0 ; i < size ; i++) {
306		if (i == 0) {
307			fputs("      ", cfile);
308		} else if ((i & 03) == 0) {
309			fputs(",\n      ", cfile);
310			col = 0;
311		} else {
312			putc(',', cfile);
313			while (++col < 9 * (i & 03))
314				putc(' ', cfile);
315		}
316		fputs(syntax[i], cfile);
317		col += strlen(syntax[i]);
318	}
319	fputs("\n};\n", cfile);
320}
321
322
323
324/*
325 * Output character classification macros (e.g. is_digit).  If digits are
326 * contiguous, we can test for them quickly.
327 */
328
329char *macro[] = {
330	"#define is_digit(c)\t((is_type+SYNBASE)[c] & ISDIGIT)",
331	"#define is_alpha(c)\t((c) != PEOF && isalpha((unsigned char) (c)))",
332	"#define is_name(c)\t((c) != PEOF && ((c) == '_' || isalpha((unsigned char) (c))))",
333	"#define is_in_name(c)\t((c) != PEOF && ((c) == '_' || isdigit((unsigned char) (c)) || isalpha((unsigned char) (c))))",
334	"#define is_special(c)\t((is_type+SYNBASE)[c] & (ISSPECL|ISDIGIT))",
335	NULL
336};
337
338output_type_macros() {
339	char **pp;
340
341	if (digit_contig)
342		macro[0] = "#define is_digit(c)\t((unsigned)((c) - '0') <= 9)";
343	for (pp = macro ; *pp ; pp++)
344		fprintf(hfile, "%s\n", *pp);
345	if (digit_contig)
346		fputs("#define digit_val(c)\t((c) - '0')\n", hfile);
347	else
348		fputs("#define digit_val(c)\t(digit_value[c])\n", hfile);
349}
350
351
352
353/*
354 * Output digit conversion table (if digits are not contiguous).
355 */
356
357digit_convert() {
358	int maxdigit;
359	static char digit[] = "0123456789";
360	char *p;
361	int i;
362
363	maxdigit = 0;
364	for (p = digit ; *p ; p++)
365		if (*p > maxdigit)
366			maxdigit = *p;
367	fputs("extern const char digit_value[];\n", hfile);
368	fputs("\n\nconst char digit_value[] = {\n", cfile);
369	for (i = 0 ; i <= maxdigit ; i++) {
370		for (p = digit ; *p && *p != i ; p++);
371		if (*p == '\0')
372			p = digit;
373		fprintf(cfile, "      %d,\n", p - digit);
374	}
375	fputs("};\n", cfile);
376}
377