checknr.c revision 115601
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#if 0
41#ifndef lint
42static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
43#endif /* not lint */
44#endif
45
46#include <sys/cdefs.h>
47__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 115601 2003-06-01 06:15:30Z tjr $");
48
49/*
50 * checknr: check an nroff/troff input file for matching macro calls.
51 * we also attempt to match size and font changes, but only the embedded
52 * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
53 * later but for now think of these restrictions as contributions to
54 * structured typesetting.
55 */
56#include <stdio.h>
57#include <stdlib.h>
58#include <string.h>
59#include <ctype.h>
60
61#define MAXSTK	100	/* Stack size */
62#define MAXBR	100	/* Max number of bracket pairs known */
63#define MAXCMDS	500	/* Max number of commands known */
64
65void addcmd(char *);
66void addmac(const char *);
67int binsrch(const char *);
68void checkknown(const char *);
69void chkcmd(const char *, const char *);
70void complain(int);
71int eq(const char *, const char *);
72void nomatch(const char *);
73void pe(int);
74void process(FILE *);
75void prop(int);
76static void usage(void);
77
78/*
79 * The stack on which we remember what we've seen so far.
80 */
81struct stkstr {
82	int opno;	/* number of opening bracket */
83	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
84	int parm;	/* parm to size, font, etc */
85	int lno;	/* line number the thing came in in */
86} stk[MAXSTK];
87int stktop;
88
89/*
90 * The kinds of opening and closing brackets.
91 */
92struct brstr {
93	const char *opbr;
94	const char *clbr;
95} br[MAXBR] = {
96	/* A few bare bones troff commands */
97#define SZ	0
98	{"sz",	"sz"},	/* also \s */
99#define FT	1
100	{"ft",	"ft"},	/* also \f */
101	/* the -mm package */
102	{"AL",	"LE"},
103	{"AS",	"AE"},
104	{"BL",	"LE"},
105	{"BS",	"BE"},
106	{"DF",	"DE"},
107	{"DL",	"LE"},
108	{"DS",	"DE"},
109	{"FS",	"FE"},
110	{"ML",	"LE"},
111	{"NS",	"NE"},
112	{"RL",	"LE"},
113	{"VL",	"LE"},
114	/* the -ms package */
115	{"AB",	"AE"},
116	{"BD",	"DE"},
117	{"CD",	"DE"},
118	{"DS",	"DE"},
119	{"FS",	"FE"},
120	{"ID",	"DE"},
121	{"KF",	"KE"},
122	{"KS",	"KE"},
123	{"LD",	"DE"},
124	{"LG",	"NL"},
125	{"QS",	"QE"},
126	{"RS",	"RE"},
127	{"SM",	"NL"},
128	{"XA",	"XE"},
129	{"XS",	"XE"},
130	/* The -me package */
131	{"(b",	")b"},
132	{"(c",	")c"},
133	{"(d",	")d"},
134	{"(f",	")f"},
135	{"(l",	")l"},
136	{"(q",	")q"},
137	{"(x",	")x"},
138	{"(z",	")z"},
139	/* Things needed by preprocessors */
140	{"EQ",	"EN"},
141	{"TS",	"TE"},
142	/* Refer */
143	{"[",	"]"},
144	{0,	0}
145};
146
147/*
148 * All commands known to nroff, plus macro packages.
149 * Used so we can complain about unrecognized commands.
150 */
151const char *knowncmds[MAXCMDS] = {
152"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
153"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
154"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
155"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
156"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
157"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
158"D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
159"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
160"FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
161"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
162"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
163"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
164"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
165"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
166"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
167"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
168"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
169"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
170"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
171"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
172"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
173"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
174"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
175"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
176"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
177"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
178"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
179"q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
180"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
181"ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
182"yr", 0
183};
184
185int	lineno;		/* current line number in input file */
186const char *cfilename;	/* name of current file */
187int	nfiles;		/* number of files to process */
188int	fflag;		/* -f: ignore \f */
189int	sflag;		/* -s: ignore \s */
190int	ncmds;		/* size of knowncmds */
191int	slot;		/* slot in knowncmds found by binsrch */
192
193int
194main(int argc, char **argv)
195{
196	FILE *f;
197	int i;
198	char *cp;
199	char b1[4];
200
201	/* Figure out how many known commands there are */
202	while (knowncmds[ncmds])
203		ncmds++;
204	while (argc > 1 && argv[1][0] == '-') {
205		switch(argv[1][1]) {
206
207		/* -a: add pairs of macros */
208		case 'a':
209			i = strlen(argv[1]) - 2;
210			if (i % 6 != 0)
211				usage();
212			/* look for empty macro slots */
213			for (i=0; br[i].opbr; i++)
214				;
215			for (cp=argv[1]+3; cp[-1]; cp += 6) {
216				br[i].opbr = strncpy(malloc(3), cp, 2);
217				br[i].clbr = strncpy(malloc(3), cp+3, 2);
218				addmac(br[i].opbr);	/* knows pairs are also known cmds */
219				addmac(br[i].clbr);
220				i++;
221			}
222			break;
223
224		/* -c: add known commands */
225		case 'c':
226			i = strlen(argv[1]) - 2;
227			if (i % 3 != 0)
228				usage();
229			for (cp=argv[1]+3; cp[-1]; cp += 3) {
230				if (cp[2] && cp[2] != '.')
231					usage();
232				strncpy(b1, cp, 2);
233				b1[2] = '\0';
234				addmac(b1);
235			}
236			break;
237
238		/* -f: ignore font changes */
239		case 'f':
240			fflag = 1;
241			break;
242
243		/* -s: ignore size changes */
244		case 's':
245			sflag = 1;
246			break;
247		default:
248			usage();
249		}
250		argc--; argv++;
251	}
252
253	nfiles = argc - 1;
254
255	if (nfiles > 0) {
256		for (i=1; i<argc; i++) {
257			cfilename = argv[i];
258			f = fopen(cfilename, "r");
259			if (f == NULL)
260				perror(cfilename);
261			else {
262				process(f);
263				fclose(f);
264			}
265		}
266	} else {
267		cfilename = "stdin";
268		process(stdin);
269	}
270	exit(0);
271}
272
273static void
274usage(void)
275{
276	fprintf(stderr,
277	"usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
278	exit(1);
279}
280
281void
282process(FILE *f)
283{
284	int i, n;
285	char mac[5];	/* The current macro or nroff command */
286	int pl;
287	static char line[256];	/* the current line */
288
289	stktop = -1;
290	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
291		if (line[0] == '.') {
292			/*
293			 * find and isolate the macro/command name.
294			 */
295			strncpy(mac, line+1, 4);
296			if (isspace(mac[0])) {
297				pe(lineno);
298				printf("Empty command\n");
299			} else if (isspace(mac[1])) {
300				mac[1] = 0;
301			} else if (isspace(mac[2])) {
302				mac[2] = 0;
303			} else if (mac[0] != '\\' || mac[1] != '\"') {
304				pe(lineno);
305				printf("Command too long\n");
306			}
307
308			/*
309			 * Is it a known command?
310			 */
311			checkknown(mac);
312
313			/*
314			 * Should we add it?
315			 */
316			if (eq(mac, "de"))
317				addcmd(line);
318
319			chkcmd(line, mac);
320		}
321
322		/*
323		 * At this point we process the line looking
324		 * for \s and \f.
325		 */
326		for (i=0; line[i]; i++)
327			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
328				if (!sflag && line[++i]=='s') {
329					pl = line[++i];
330					if (isdigit(pl)) {
331						n = pl - '0';
332						pl = ' ';
333					} else
334						n = 0;
335					while (isdigit(line[++i]))
336						n = 10 * n + line[i] - '0';
337					i--;
338					if (n == 0) {
339						if (stk[stktop].opno == SZ) {
340							stktop--;
341						} else {
342							pe(lineno);
343							printf("unmatched \\s0\n");
344						}
345					} else {
346						stk[++stktop].opno = SZ;
347						stk[stktop].pl = pl;
348						stk[stktop].parm = n;
349						stk[stktop].lno = lineno;
350					}
351				} else if (!fflag && line[i]=='f') {
352					n = line[++i];
353					if (n == 'P') {
354						if (stk[stktop].opno == FT) {
355							stktop--;
356						} else {
357							pe(lineno);
358							printf("unmatched \\fP\n");
359						}
360					} else {
361						stk[++stktop].opno = FT;
362						stk[stktop].pl = 1;
363						stk[stktop].parm = n;
364						stk[stktop].lno = lineno;
365					}
366				}
367			}
368	}
369	/*
370	 * We've hit the end and look at all this stuff that hasn't been
371	 * matched yet!  Complain, complain.
372	 */
373	for (i=stktop; i>=0; i--) {
374		complain(i);
375	}
376}
377
378void
379complain(int i)
380{
381	pe(stk[i].lno);
382	printf("Unmatched ");
383	prop(i);
384	printf("\n");
385}
386
387void
388prop(int i)
389{
390	if (stk[i].pl == 0)
391		printf(".%s", br[stk[i].opno].opbr);
392	else switch(stk[i].opno) {
393	case SZ:
394		printf("\\s%c%d", stk[i].pl, stk[i].parm);
395		break;
396	case FT:
397		printf("\\f%c", stk[i].parm);
398		break;
399	default:
400		printf("Bug: stk[%d].opno = %d = .%s, .%s",
401			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
402	}
403}
404
405void
406chkcmd(const char *line __unused, const char *mac)
407{
408	int i;
409
410	/*
411	 * Check to see if it matches top of stack.
412	 */
413	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
414		stktop--;	/* OK. Pop & forget */
415	else {
416		/* No. Maybe it's an opener */
417		for (i=0; br[i].opbr; i++) {
418			if (eq(mac, br[i].opbr)) {
419				/* Found. Push it. */
420				stktop++;
421				stk[stktop].opno = i;
422				stk[stktop].pl = 0;
423				stk[stktop].parm = 0;
424				stk[stktop].lno = lineno;
425				break;
426			}
427			/*
428			 * Maybe it's an unmatched closer.
429			 * NOTE: this depends on the fact
430			 * that none of the closers can be
431			 * openers too.
432			 */
433			if (eq(mac, br[i].clbr)) {
434				nomatch(mac);
435				break;
436			}
437		}
438	}
439}
440
441void
442nomatch(const char *mac)
443{
444	int i, j;
445
446	/*
447	 * Look for a match further down on stack
448	 * If we find one, it suggests that the stuff in
449	 * between is supposed to match itself.
450	 */
451	for (j=stktop; j>=0; j--)
452		if (eq(mac,br[stk[j].opno].clbr)) {
453			/* Found.  Make a good diagnostic. */
454			if (j == stktop-2) {
455				/*
456				 * Check for special case \fx..\fR and don't
457				 * complain.
458				 */
459				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
460				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
461					stktop = j -1;
462					return;
463				}
464				/*
465				 * We have two unmatched frobs.  Chances are
466				 * they were intended to match, so we mention
467				 * them together.
468				 */
469				pe(stk[j+1].lno);
470				prop(j+1);
471				printf(" does not match %d: ", stk[j+2].lno);
472				prop(j+2);
473				printf("\n");
474			} else for (i=j+1; i <= stktop; i++) {
475				complain(i);
476			}
477			stktop = j-1;
478			return;
479		}
480	/* Didn't find one.  Throw this away. */
481	pe(lineno);
482	printf("Unmatched .%s\n", mac);
483}
484
485/* eq: are two strings equal? */
486int
487eq(const char *s1, const char *s2)
488{
489	return (strcmp(s1, s2) == 0);
490}
491
492/* print the first part of an error message, given the line number */
493void
494pe(int linen)
495{
496	if (nfiles > 1)
497		printf("%s: ", cfilename);
498	printf("%d: ", linen);
499}
500
501void
502checkknown(const char *mac)
503{
504
505	if (eq(mac, "."))
506		return;
507	if (binsrch(mac) >= 0)
508		return;
509	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
510		return;
511
512	pe(lineno);
513	printf("Unknown command: .%s\n", mac);
514}
515
516/*
517 * We have a .de xx line in "line".  Add xx to the list of known commands.
518 */
519void
520addcmd(char *line)
521{
522	char *mac;
523
524	/* grab the macro being defined */
525	mac = line+4;
526	while (isspace(*mac))
527		mac++;
528	if (*mac == 0) {
529		pe(lineno);
530		printf("illegal define: %s\n", line);
531		return;
532	}
533	mac[2] = 0;
534	if (isspace(mac[1]) || mac[1] == '\\')
535		mac[1] = 0;
536	if (ncmds >= MAXCMDS) {
537		printf("Only %d known commands allowed\n", MAXCMDS);
538		exit(1);
539	}
540	addmac(mac);
541}
542
543/*
544 * Add mac to the list.  We should really have some kind of tree
545 * structure here but this is a quick-and-dirty job and I just don't
546 * have time to mess with it.  (I wonder if this will come back to haunt
547 * me someday?)  Anyway, I claim that .de is fairly rare in user
548 * nroff programs, and the register loop below is pretty fast.
549 */
550void
551addmac(const char *mac)
552{
553	const char **src, **dest, **loc;
554
555	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
556#ifdef DEBUG
557		printf("binsrch(%s) -> already in table\n", mac);
558#endif
559		return;
560	}
561	/* binsrch sets slot as a side effect */
562#ifdef DEBUG
563printf("binsrch(%s) -> %d\n", mac, slot);
564#endif
565	loc = &knowncmds[slot];
566	src = &knowncmds[ncmds-1];
567	dest = src+1;
568	while (dest > loc)
569		*dest-- = *src--;
570	*loc = strcpy(malloc(3), mac);
571	ncmds++;
572#ifdef DEBUG
573printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
574#endif
575}
576
577/*
578 * Do a binary search in knowncmds for mac.
579 * If found, return the index.  If not, return -1.
580 */
581int
582binsrch(const char *mac)
583{
584	const char *p;	/* pointer to current cmd in list */
585	int d;		/* difference if any */
586	int mid;	/* mid point in binary search */
587	int top, bot;	/* boundaries of bin search, inclusive */
588
589	top = ncmds-1;
590	bot = 0;
591	while (top >= bot) {
592		mid = (top+bot)/2;
593		p = knowncmds[mid];
594		d = p[0] - mac[0];
595		if (d == 0)
596			d = p[1] - mac[1];
597		if (d == 0)
598			return mid;
599		if (d < 0)
600			bot = mid + 1;
601		else
602			top = mid - 1;
603	}
604	slot = bot;	/* place it would have gone */
605	return -1;
606}
607