checknr.c revision 92920
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#if 0
41#ifndef lint
42static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
43#endif /* not lint */
44#endif
45
46#include <sys/cdefs.h>
47__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 92920 2002-03-22 01:22:50Z imp $");
48
49/*
50 * checknr: check an nroff/troff input file for matching macro calls.
51 * we also attempt to match size and font changes, but only the embedded
52 * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
53 * later but for now think of these restrictions as contributions to
54 * structured typesetting.
55 */
56#include <stdio.h>
57#include <stdlib.h>
58#include <string.h>
59#include <ctype.h>
60
61#define MAXSTK	100	/* Stack size */
62#define MAXBR	100	/* Max number of bracket pairs known */
63#define MAXCMDS	500	/* Max number of commands known */
64
65void addcmd(char *);
66void addmac(const char *);
67int binsrch(const char *);
68void checkknown(char *);
69void chkcmd(char *, char *);
70void complain(int);
71int eq(const char *, const char *);
72void nomatch(char *);
73void pe(int);
74void process(FILE *);
75void prop(int);
76static void usage(void);
77
78/*
79 * The stack on which we remember what we've seen so far.
80 */
81struct stkstr {
82	int opno;	/* number of opening bracket */
83	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
84	int parm;	/* parm to size, font, etc */
85	int lno;	/* line number the thing came in in */
86} stk[MAXSTK];
87int stktop;
88
89/*
90 * The kinds of opening and closing brackets.
91 */
92struct brstr {
93	const char *opbr;
94	const char *clbr;
95} br[MAXBR] = {
96	/* A few bare bones troff commands */
97#define SZ	0
98	{"sz",	"sz"},	/* also \s */
99#define FT	1
100	{"ft",	"ft"},	/* also \f */
101	/* the -mm package */
102	{"AL",	"LE"},
103	{"AS",	"AE"},
104	{"BL",	"LE"},
105	{"BS",	"BE"},
106	{"DF",	"DE"},
107	{"DL",	"LE"},
108	{"DS",	"DE"},
109	{"FS",	"FE"},
110	{"ML",	"LE"},
111	{"NS",	"NE"},
112	{"RL",	"LE"},
113	{"VL",	"LE"},
114	/* the -ms package */
115	{"AB",	"AE"},
116	{"BD",	"DE"},
117	{"CD",	"DE"},
118	{"DS",	"DE"},
119	{"FS",	"FE"},
120	{"ID",	"DE"},
121	{"KF",	"KE"},
122	{"KS",	"KE"},
123	{"LD",	"DE"},
124	{"LG",	"NL"},
125	{"QS",	"QE"},
126	{"RS",	"RE"},
127	{"SM",	"NL"},
128	{"XA",	"XE"},
129	{"XS",	"XE"},
130	/* The -me package */
131	{"(b",	")b"},
132	{"(c",	")c"},
133	{"(d",	")d"},
134	{"(f",	")f"},
135	{"(l",	")l"},
136	{"(q",	")q"},
137	{"(x",	")x"},
138	{"(z",	")z"},
139	/* Things needed by preprocessors */
140	{"EQ",	"EN"},
141	{"TS",	"TE"},
142	/* Refer */
143	{"[",	"]"},
144	{0,	0}
145};
146
147/*
148 * All commands known to nroff, plus macro packages.
149 * Used so we can complain about unrecognized commands.
150 */
151const char *knowncmds[MAXCMDS] = {
152"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
153"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
154"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
155"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
156"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
157"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
158"D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
159"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
160"FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
161"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
162"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
163"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
164"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
165"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
166"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
167"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
168"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
169"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
170"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
171"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
172"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
173"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
174"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
175"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
176"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
177"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
178"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
179"q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
180"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
181"ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
182"yr", 0
183};
184
185int	lineno;		/* current line number in input file */
186const char *cfilename;	/* name of current file */
187int	nfiles;		/* number of files to process */
188int	fflag;		/* -f: ignore \f */
189int	sflag;		/* -s: ignore \s */
190int	ncmds;		/* size of knowncmds */
191int	slot;		/* slot in knowncmds found by binsrch */
192
193int
194main(argc, argv)
195int argc;
196char **argv;
197{
198	FILE *f;
199	int i;
200	char *cp;
201	char b1[4];
202
203	/* Figure out how many known commands there are */
204	while (knowncmds[ncmds])
205		ncmds++;
206	while (argc > 1 && argv[1][0] == '-') {
207		switch(argv[1][1]) {
208
209		/* -a: add pairs of macros */
210		case 'a':
211			i = strlen(argv[1]) - 2;
212			if (i % 6 != 0)
213				usage();
214			/* look for empty macro slots */
215			for (i=0; br[i].opbr; i++)
216				;
217			for (cp=argv[1]+3; cp[-1]; cp += 6) {
218				br[i].opbr = strncpy(malloc(3), cp, 2);
219				br[i].clbr = strncpy(malloc(3), cp+3, 2);
220				addmac(br[i].opbr);	/* knows pairs are also known cmds */
221				addmac(br[i].clbr);
222				i++;
223			}
224			break;
225
226		/* -c: add known commands */
227		case 'c':
228			i = strlen(argv[1]) - 2;
229			if (i % 3 != 0)
230				usage();
231			for (cp=argv[1]+3; cp[-1]; cp += 3) {
232				if (cp[2] && cp[2] != '.')
233					usage();
234				strncpy(b1, cp, 2);
235				b1[2] = '\0';
236				addmac(b1);
237			}
238			break;
239
240		/* -f: ignore font changes */
241		case 'f':
242			fflag = 1;
243			break;
244
245		/* -s: ignore size changes */
246		case 's':
247			sflag = 1;
248			break;
249		default:
250			usage();
251		}
252		argc--; argv++;
253	}
254
255	nfiles = argc - 1;
256
257	if (nfiles > 0) {
258		for (i=1; i<argc; i++) {
259			cfilename = argv[i];
260			f = fopen(cfilename, "r");
261			if (f == NULL)
262				perror(cfilename);
263			else
264				process(f);
265		}
266	} else {
267		cfilename = "stdin";
268		process(stdin);
269	}
270	exit(0);
271}
272
273static void
274usage()
275{
276	fprintf(stderr,
277	"usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
278	exit(1);
279}
280
281void
282process(f)
283FILE *f;
284{
285	int i, n;
286	char mac[5];	/* The current macro or nroff command */
287	int pl;
288	static char line[256];	/* the current line */
289
290	stktop = -1;
291	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
292		if (line[0] == '.') {
293			/*
294			 * find and isolate the macro/command name.
295			 */
296			strncpy(mac, line+1, 4);
297			if (isspace(mac[0])) {
298				pe(lineno);
299				printf("Empty command\n");
300			} else if (isspace(mac[1])) {
301				mac[1] = 0;
302			} else if (isspace(mac[2])) {
303				mac[2] = 0;
304			} else if (mac[0] != '\\' || mac[1] != '\"') {
305				pe(lineno);
306				printf("Command too long\n");
307			}
308
309			/*
310			 * Is it a known command?
311			 */
312			checkknown(mac);
313
314			/*
315			 * Should we add it?
316			 */
317			if (eq(mac, "de"))
318				addcmd(line);
319
320			chkcmd(line, mac);
321		}
322
323		/*
324		 * At this point we process the line looking
325		 * for \s and \f.
326		 */
327		for (i=0; line[i]; i++)
328			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
329				if (!sflag && line[++i]=='s') {
330					pl = line[++i];
331					if (isdigit(pl)) {
332						n = pl - '0';
333						pl = ' ';
334					} else
335						n = 0;
336					while (isdigit(line[++i]))
337						n = 10 * n + line[i] - '0';
338					i--;
339					if (n == 0) {
340						if (stk[stktop].opno == SZ) {
341							stktop--;
342						} else {
343							pe(lineno);
344							printf("unmatched \\s0\n");
345						}
346					} else {
347						stk[++stktop].opno = SZ;
348						stk[stktop].pl = pl;
349						stk[stktop].parm = n;
350						stk[stktop].lno = lineno;
351					}
352				} else if (!fflag && line[i]=='f') {
353					n = line[++i];
354					if (n == 'P') {
355						if (stk[stktop].opno == FT) {
356							stktop--;
357						} else {
358							pe(lineno);
359							printf("unmatched \\fP\n");
360						}
361					} else {
362						stk[++stktop].opno = FT;
363						stk[stktop].pl = 1;
364						stk[stktop].parm = n;
365						stk[stktop].lno = lineno;
366					}
367				}
368			}
369	}
370	/*
371	 * We've hit the end and look at all this stuff that hasn't been
372	 * matched yet!  Complain, complain.
373	 */
374	for (i=stktop; i>=0; i--) {
375		complain(i);
376	}
377}
378
379void
380complain(i)
381int i;
382{
383	pe(stk[i].lno);
384	printf("Unmatched ");
385	prop(i);
386	printf("\n");
387}
388
389void
390prop(i)
391int i;
392{
393	if (stk[i].pl == 0)
394		printf(".%s", br[stk[i].opno].opbr);
395	else switch(stk[i].opno) {
396	case SZ:
397		printf("\\s%c%d", stk[i].pl, stk[i].parm);
398		break;
399	case FT:
400		printf("\\f%c", stk[i].parm);
401		break;
402	default:
403		printf("Bug: stk[%d].opno = %d = .%s, .%s",
404			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
405	}
406}
407
408void
409chkcmd(line, mac)
410char *line __unused;
411char *mac;
412{
413	int i;
414
415	/*
416	 * Check to see if it matches top of stack.
417	 */
418	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
419		stktop--;	/* OK. Pop & forget */
420	else {
421		/* No. Maybe it's an opener */
422		for (i=0; br[i].opbr; i++) {
423			if (eq(mac, br[i].opbr)) {
424				/* Found. Push it. */
425				stktop++;
426				stk[stktop].opno = i;
427				stk[stktop].pl = 0;
428				stk[stktop].parm = 0;
429				stk[stktop].lno = lineno;
430				break;
431			}
432			/*
433			 * Maybe it's an unmatched closer.
434			 * NOTE: this depends on the fact
435			 * that none of the closers can be
436			 * openers too.
437			 */
438			if (eq(mac, br[i].clbr)) {
439				nomatch(mac);
440				break;
441			}
442		}
443	}
444}
445
446void
447nomatch(mac)
448char *mac;
449{
450	int i, j;
451
452	/*
453	 * Look for a match further down on stack
454	 * If we find one, it suggests that the stuff in
455	 * between is supposed to match itself.
456	 */
457	for (j=stktop; j>=0; j--)
458		if (eq(mac,br[stk[j].opno].clbr)) {
459			/* Found.  Make a good diagnostic. */
460			if (j == stktop-2) {
461				/*
462				 * Check for special case \fx..\fR and don't
463				 * complain.
464				 */
465				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
466				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
467					stktop = j -1;
468					return;
469				}
470				/*
471				 * We have two unmatched frobs.  Chances are
472				 * they were intended to match, so we mention
473				 * them together.
474				 */
475				pe(stk[j+1].lno);
476				prop(j+1);
477				printf(" does not match %d: ", stk[j+2].lno);
478				prop(j+2);
479				printf("\n");
480			} else for (i=j+1; i <= stktop; i++) {
481				complain(i);
482			}
483			stktop = j-1;
484			return;
485		}
486	/* Didn't find one.  Throw this away. */
487	pe(lineno);
488	printf("Unmatched .%s\n", mac);
489}
490
491/* eq: are two strings equal? */
492int
493eq(s1, s2)
494const char *s1, *s2;
495{
496	return (strcmp(s1, s2) == 0);
497}
498
499/* print the first part of an error message, given the line number */
500void
501pe(linen)
502int linen;
503{
504	if (nfiles > 1)
505		printf("%s: ", cfilename);
506	printf("%d: ", linen);
507}
508
509void
510checkknown(mac)
511char *mac;
512{
513
514	if (eq(mac, "."))
515		return;
516	if (binsrch(mac) >= 0)
517		return;
518	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
519		return;
520
521	pe(lineno);
522	printf("Unknown command: .%s\n", mac);
523}
524
525/*
526 * We have a .de xx line in "line".  Add xx to the list of known commands.
527 */
528void
529addcmd(line)
530char *line;
531{
532	char *mac;
533
534	/* grab the macro being defined */
535	mac = line+4;
536	while (isspace(*mac))
537		mac++;
538	if (*mac == 0) {
539		pe(lineno);
540		printf("illegal define: %s\n", line);
541		return;
542	}
543	mac[2] = 0;
544	if (isspace(mac[1]) || mac[1] == '\\')
545		mac[1] = 0;
546	if (ncmds >= MAXCMDS) {
547		printf("Only %d known commands allowed\n", MAXCMDS);
548		exit(1);
549	}
550	addmac(mac);
551}
552
553/*
554 * Add mac to the list.  We should really have some kind of tree
555 * structure here but this is a quick-and-dirty job and I just don't
556 * have time to mess with it.  (I wonder if this will come back to haunt
557 * me someday?)  Anyway, I claim that .de is fairly rare in user
558 * nroff programs, and the register loop below is pretty fast.
559 */
560void
561addmac(mac)
562const char *mac;
563{
564	const char **src, **dest, **loc;
565
566	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
567#ifdef DEBUG
568		printf("binsrch(%s) -> already in table\n", mac);
569#endif
570		return;
571	}
572	/* binsrch sets slot as a side effect */
573#ifdef DEBUG
574printf("binsrch(%s) -> %d\n", mac, slot);
575#endif
576	loc = &knowncmds[slot];
577	src = &knowncmds[ncmds-1];
578	dest = src+1;
579	while (dest > loc)
580		*dest-- = *src--;
581	*loc = strcpy(malloc(3), mac);
582	ncmds++;
583#ifdef DEBUG
584printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
585#endif
586}
587
588/*
589 * Do a binary search in knowncmds for mac.
590 * If found, return the index.  If not, return -1.
591 */
592int
593binsrch(mac)
594const char *mac;
595{
596	const char *p;	/* pointer to current cmd in list */
597	int d;		/* difference if any */
598	int mid;	/* mid point in binary search */
599	int top, bot;	/* boundaries of bin search, inclusive */
600
601	top = ncmds-1;
602	bot = 0;
603	while (top >= bot) {
604		mid = (top+bot)/2;
605		p = knowncmds[mid];
606		d = p[0] - mac[0];
607		if (d == 0)
608			d = p[1] - mac[1];
609		if (d == 0)
610			return mid;
611		if (d < 0)
612			bot = mid + 1;
613		else
614			top = mid - 1;
615	}
616	slot = bot;	/* place it would have gone */
617	return -1;
618}
619