1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1980, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#if 0
41#ifndef lint
42static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
43#endif /* not lint */
44#endif
45
46#include <sys/cdefs.h>
47__FBSDID("$FreeBSD: src/usr.bin/checknr/checknr.c,v 1.9 2004/07/15 04:42:47 tjr Exp $");
48
49/*
50 * checknr: check an nroff/troff input file for matching macro calls.
51 * we also attempt to match size and font changes, but only the embedded
52 * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
53 * later but for now think of these restrictions as contributions to
54 * structured typesetting.
55 */
56#include <err.h>
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <ctype.h>
61
62#define MAXSTK	100	/* Stack size */
63#define MAXBR	100	/* Max number of bracket pairs known */
64#define MAXCMDS	500	/* Max number of commands known */
65
66void addcmd(char *);
67void addmac(const char *);
68int binsrch(const char *);
69void checkknown(const char *);
70void chkcmd(const char *, const char *);
71void complain(int);
72int eq(const char *, const char *);
73void nomatch(const char *);
74void pe(int);
75void process(FILE *);
76void prop(int);
77static void usage(void);
78
79/*
80 * The stack on which we remember what we've seen so far.
81 */
82struct stkstr {
83	int opno;	/* number of opening bracket */
84	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
85	int parm;	/* parm to size, font, etc */
86	int lno;	/* line number the thing came in in */
87} stk[MAXSTK];
88int stktop;
89
90/*
91 * The kinds of opening and closing brackets.
92 */
93struct brstr {
94	const char *opbr;
95	const char *clbr;
96} br[MAXBR] = {
97	/* A few bare bones troff commands */
98#define SZ	0
99	{"sz",	"sz"},	/* also \s */
100#define FT	1
101	{"ft",	"ft"},	/* also \f */
102	/* the -mm package */
103	{"AL",	"LE"},
104	{"AS",	"AE"},
105	{"BL",	"LE"},
106	{"BS",	"BE"},
107	{"DF",	"DE"},
108	{"DL",	"LE"},
109	{"DS",	"DE"},
110	{"FS",	"FE"},
111	{"ML",	"LE"},
112	{"NS",	"NE"},
113	{"RL",	"LE"},
114	{"VL",	"LE"},
115	/* the -ms package */
116	{"AB",	"AE"},
117	{"BD",	"DE"},
118	{"CD",	"DE"},
119	{"DS",	"DE"},
120	{"FS",	"FE"},
121	{"ID",	"DE"},
122	{"KF",	"KE"},
123	{"KS",	"KE"},
124	{"LD",	"DE"},
125	{"LG",	"NL"},
126	{"QS",	"QE"},
127	{"RS",	"RE"},
128	{"SM",	"NL"},
129	{"XA",	"XE"},
130	{"XS",	"XE"},
131	/* The -me package */
132	{"(b",	")b"},
133	{"(c",	")c"},
134	{"(d",	")d"},
135	{"(f",	")f"},
136	{"(l",	")l"},
137	{"(q",	")q"},
138	{"(x",	")x"},
139	{"(z",	")z"},
140	/* Things needed by preprocessors */
141	{"EQ",	"EN"},
142	{"TS",	"TE"},
143	/* Refer */
144	{"[",	"]"},
145	{0,	0}
146};
147
148/*
149 * All commands known to nroff, plus macro packages.
150 * Used so we can complain about unrecognized commands.
151 */
152const char *knowncmds[MAXCMDS] = {
153"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
154"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
155"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
156"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
157"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
158"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
159"D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
160"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
161"FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
162"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
163"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
164"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
165"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
166"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
167"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
168"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
169"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
170"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
171"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
172"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
173"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
174"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
175"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
176"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
177"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
178"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
179"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
180"q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
181"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
182"ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
183"yr", 0
184};
185
186int	lineno;		/* current line number in input file */
187const char *cfilename;	/* name of current file */
188int	nfiles;		/* number of files to process */
189int	fflag;		/* -f: ignore \f */
190int	sflag;		/* -s: ignore \s */
191int	ncmds;		/* size of knowncmds */
192int	slot;		/* slot in knowncmds found by binsrch */
193
194int
195main(int argc, char **argv)
196{
197	FILE *f;
198	int i;
199	char *cp;
200	char b1[4];
201
202	/* Figure out how many known commands there are */
203	while (knowncmds[ncmds])
204		ncmds++;
205	while (argc > 1 && argv[1][0] == '-') {
206		switch(argv[1][1]) {
207
208		/* -a: add pairs of macros */
209		case 'a':
210			i = strlen(argv[1]) - 2;
211			if (i % 6 != 0)
212				usage();
213			/* look for empty macro slots */
214			for (i=0; br[i].opbr; i++)
215				;
216			for (cp=argv[1]+3; cp[-1]; cp += 6) {
217				br[i].opbr = strncpy(malloc(3), cp, 2);
218				br[i].clbr = strncpy(malloc(3), cp+3, 2);
219				addmac(br[i].opbr);	/* knows pairs are also known cmds */
220				addmac(br[i].clbr);
221				i++;
222			}
223			break;
224
225		/* -c: add known commands */
226		case 'c':
227			i = strlen(argv[1]) - 2;
228			if (i % 3 != 0)
229				usage();
230			for (cp=argv[1]+3; cp[-1]; cp += 3) {
231				if (cp[2] && cp[2] != '.')
232					usage();
233				strncpy(b1, cp, 2);
234				b1[2] = '\0';
235				addmac(b1);
236			}
237			break;
238
239		/* -f: ignore font changes */
240		case 'f':
241			fflag = 1;
242			break;
243
244		/* -s: ignore size changes */
245		case 's':
246			sflag = 1;
247			break;
248		default:
249			usage();
250		}
251		argc--; argv++;
252	}
253
254	nfiles = argc - 1;
255
256	if (nfiles > 0) {
257		for (i=1; i<argc; i++) {
258			cfilename = argv[i];
259			f = fopen(cfilename, "r");
260			if (f == NULL)
261				warn("%s", cfilename);
262			else {
263				process(f);
264				fclose(f);
265			}
266		}
267	} else {
268		cfilename = "stdin";
269		process(stdin);
270	}
271	exit(0);
272}
273
274static void
275usage(void)
276{
277	fprintf(stderr,
278	"usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
279	exit(1);
280}
281
282void
283process(FILE *f)
284{
285	int i, n;
286	char mac[5];	/* The current macro or nroff command */
287	int pl;
288	static char line[256];	/* the current line */
289
290	stktop = -1;
291	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
292		if (line[0] == '.') {
293			/*
294			 * find and isolate the macro/command name.
295			 */
296			strncpy(mac, line+1, 4);
297			if (isspace(mac[0])) {
298				pe(lineno);
299				printf("Empty command\n");
300			} else if (isspace(mac[1])) {
301				mac[1] = 0;
302			} else if (isspace(mac[2])) {
303				mac[2] = 0;
304			} else if (mac[0] != '\\' || mac[1] != '\"') {
305				pe(lineno);
306				printf("Command too long\n");
307			}
308
309			/*
310			 * Is it a known command?
311			 */
312			checkknown(mac);
313
314			/*
315			 * Should we add it?
316			 */
317			if (eq(mac, "de"))
318				addcmd(line);
319
320			chkcmd(line, mac);
321		}
322
323		/*
324		 * At this point we process the line looking
325		 * for \s and \f.
326		 */
327		for (i=0; line[i]; i++)
328			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
329				if (!sflag && line[++i]=='s') {
330					pl = line[++i];
331					if (isdigit(pl)) {
332						n = pl - '0';
333						pl = ' ';
334					} else
335						n = 0;
336					while (isdigit(line[++i]))
337						n = 10 * n + line[i] - '0';
338					i--;
339					if (n == 0) {
340						if (stk[stktop].opno == SZ) {
341							stktop--;
342						} else {
343							pe(lineno);
344							printf("unmatched \\s0\n");
345						}
346					} else {
347						stk[++stktop].opno = SZ;
348						stk[stktop].pl = pl;
349						stk[stktop].parm = n;
350						stk[stktop].lno = lineno;
351					}
352				} else if (!fflag && line[i]=='f') {
353					n = line[++i];
354					if (n == 'P') {
355						if (stk[stktop].opno == FT) {
356							stktop--;
357						} else {
358							pe(lineno);
359							printf("unmatched \\fP\n");
360						}
361					} else {
362						stk[++stktop].opno = FT;
363						stk[stktop].pl = 1;
364						stk[stktop].parm = n;
365						stk[stktop].lno = lineno;
366					}
367				}
368			}
369	}
370	/*
371	 * We've hit the end and look at all this stuff that hasn't been
372	 * matched yet!  Complain, complain.
373	 */
374	for (i=stktop; i>=0; i--) {
375		complain(i);
376	}
377}
378
379void
380complain(int i)
381{
382	pe(stk[i].lno);
383	printf("Unmatched ");
384	prop(i);
385	printf("\n");
386}
387
388void
389prop(int i)
390{
391	if (stk[i].pl == 0)
392		printf(".%s", br[stk[i].opno].opbr);
393	else switch(stk[i].opno) {
394	case SZ:
395		printf("\\s%c%d", stk[i].pl, stk[i].parm);
396		break;
397	case FT:
398		printf("\\f%c", stk[i].parm);
399		break;
400	default:
401		printf("Bug: stk[%d].opno = %d = .%s, .%s",
402			i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
403	}
404}
405
406void
407chkcmd(const char *line __unused, const char *mac)
408{
409	int i;
410
411	/*
412	 * Check to see if it matches top of stack.
413	 */
414	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415		stktop--;	/* OK. Pop & forget */
416	else {
417		/* No. Maybe it's an opener */
418		for (i=0; br[i].opbr; i++) {
419			if (eq(mac, br[i].opbr)) {
420				/* Found. Push it. */
421				stktop++;
422				stk[stktop].opno = i;
423				stk[stktop].pl = 0;
424				stk[stktop].parm = 0;
425				stk[stktop].lno = lineno;
426				break;
427			}
428			/*
429			 * Maybe it's an unmatched closer.
430			 * NOTE: this depends on the fact
431			 * that none of the closers can be
432			 * openers too.
433			 */
434			if (eq(mac, br[i].clbr)) {
435				nomatch(mac);
436				break;
437			}
438		}
439	}
440}
441
442void
443nomatch(const char *mac)
444{
445	int i, j;
446
447	/*
448	 * Look for a match further down on stack
449	 * If we find one, it suggests that the stuff in
450	 * between is supposed to match itself.
451	 */
452	for (j=stktop; j>=0; j--)
453		if (eq(mac,br[stk[j].opno].clbr)) {
454			/* Found.  Make a good diagnostic. */
455			if (j == stktop-2) {
456				/*
457				 * Check for special case \fx..\fR and don't
458				 * complain.
459				 */
460				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
461				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
462					stktop = j -1;
463					return;
464				}
465				/*
466				 * We have two unmatched frobs.  Chances are
467				 * they were intended to match, so we mention
468				 * them together.
469				 */
470				pe(stk[j+1].lno);
471				prop(j+1);
472				printf(" does not match %d: ", stk[j+2].lno);
473				prop(j+2);
474				printf("\n");
475			} else for (i=j+1; i <= stktop; i++) {
476				complain(i);
477			}
478			stktop = j-1;
479			return;
480		}
481	/* Didn't find one.  Throw this away. */
482	pe(lineno);
483	printf("Unmatched .%s\n", mac);
484}
485
486/* eq: are two strings equal? */
487int
488eq(const char *s1, const char *s2)
489{
490	return (strcmp(s1, s2) == 0);
491}
492
493/* print the first part of an error message, given the line number */
494void
495pe(int linen)
496{
497	if (nfiles > 1)
498		printf("%s: ", cfilename);
499	printf("%d: ", linen);
500}
501
502void
503checkknown(const char *mac)
504{
505
506	if (eq(mac, "."))
507		return;
508	if (binsrch(mac) >= 0)
509		return;
510	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
511		return;
512
513	pe(lineno);
514	printf("Unknown command: .%s\n", mac);
515}
516
517/*
518 * We have a .de xx line in "line".  Add xx to the list of known commands.
519 */
520void
521addcmd(char *line)
522{
523	char *mac;
524
525	/* grab the macro being defined */
526	mac = line+4;
527	while (isspace(*mac))
528		mac++;
529	if (*mac == 0) {
530		pe(lineno);
531		printf("illegal define: %s\n", line);
532		return;
533	}
534	mac[2] = 0;
535	if (isspace(mac[1]) || mac[1] == '\\')
536		mac[1] = 0;
537	if (ncmds >= MAXCMDS) {
538		printf("Only %d known commands allowed\n", MAXCMDS);
539		exit(1);
540	}
541	addmac(mac);
542}
543
544/*
545 * Add mac to the list.  We should really have some kind of tree
546 * structure here but this is a quick-and-dirty job and I just don't
547 * have time to mess with it.  (I wonder if this will come back to haunt
548 * me someday?)  Anyway, I claim that .de is fairly rare in user
549 * nroff programs, and the register loop below is pretty fast.
550 */
551void
552addmac(const char *mac)
553{
554	const char **src, **dest, **loc;
555
556	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
557#ifdef DEBUG
558		printf("binsrch(%s) -> already in table\n", mac);
559#endif
560		return;
561	}
562	/* binsrch sets slot as a side effect */
563#ifdef DEBUG
564printf("binsrch(%s) -> %d\n", mac, slot);
565#endif
566	loc = &knowncmds[slot];
567	src = &knowncmds[ncmds-1];
568	dest = src+1;
569	while (dest > loc)
570		*dest-- = *src--;
571	*loc = strcpy(malloc(3), mac);
572	ncmds++;
573#ifdef DEBUG
574printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
575#endif
576}
577
578/*
579 * Do a binary search in knowncmds for mac.
580 * If found, return the index.  If not, return -1.
581 */
582int
583binsrch(const char *mac)
584{
585	const char *p;	/* pointer to current cmd in list */
586	int d;		/* difference if any */
587	int mid;	/* mid point in binary search */
588	int top, bot;	/* boundaries of bin search, inclusive */
589
590	top = ncmds-1;
591	bot = 0;
592	while (top >= bot) {
593		mid = (top+bot)/2;
594		p = knowncmds[mid];
595		d = p[0] - mac[0];
596		if (d == 0)
597			d = p[1] - mac[1];
598		if (d == 0)
599			return mid;
600		if (d < 0)
601			bot = mid + 1;
602		else
603			top = mid - 1;
604	}
605	slot = bot;	/* place it would have gone */
606	return -1;
607}
608