1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifndef lint
31static const char copyright[] =
32"@(#) Copyright (c) 1980, 1993\n\
33	The Regents of the University of California.  All rights reserved.\n";
34#endif /* not lint */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
39#endif /* not lint */
40#endif
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD$");
44
45/*
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
51 */
52#include <err.h>
53#define _WITH_GETLINE
54#include <stdio.h>
55#include <stdlib.h>
56#include <string.h>
57#include <ctype.h>
58
59#define MAXSTK	100	/* Stack size */
60#define MAXBR	100	/* Max number of bracket pairs known */
61#define MAXCMDS	600	/* Max number of commands known */
62
63static void addcmd(char *);
64static void addmac(const char *);
65static int binsrch(const char *);
66static void checkknown(const char *);
67static void chkcmd(const char *, const char *);
68static void complain(int);
69static int eq(const char *, const char *);
70static void nomatch(const char *);
71static void pe(int);
72static void process(FILE *);
73static void prop(int);
74static void usage(void);
75
76/*
77 * The stack on which we remember what we've seen so far.
78 */
79static struct stkstr {
80	int opno;	/* number of opening bracket */
81	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
82	int parm;	/* parm to size, font, etc */
83	int lno;	/* line number */
84} stk[MAXSTK];
85static int stktop;
86
87/*
88 * The kinds of opening and closing brackets.
89 */
90static struct brstr {
91	const char *opbr;
92	const char *clbr;
93} br[MAXBR] = {
94	/* A few bare bones troff commands */
95#define SZ	0
96	{"sz",	"sz"},	/* also \s */
97#define FT	1
98	{"ft",	"ft"},	/* also \f */
99	/* the -mm package */
100	{"AL",	"LE"},
101	{"AS",	"AE"},
102	{"BL",	"LE"},
103	{"BS",	"BE"},
104	{"DF",	"DE"},
105	{"DL",	"LE"},
106	{"DS",	"DE"},
107	{"FS",	"FE"},
108	{"ML",	"LE"},
109	{"NS",	"NE"},
110	{"RL",	"LE"},
111	{"VL",	"LE"},
112	/* the -ms package */
113	{"AB",	"AE"},
114	{"BD",	"DE"},
115	{"CD",	"DE"},
116	{"DS",	"DE"},
117	{"FS",	"FE"},
118	{"ID",	"DE"},
119	{"KF",	"KE"},
120	{"KS",	"KE"},
121	{"LD",	"DE"},
122	{"LG",	"NL"},
123	{"QS",	"QE"},
124	{"RS",	"RE"},
125	{"SM",	"NL"},
126	{"XA",	"XE"},
127	{"XS",	"XE"},
128	/* The -me package */
129	{"(b",	")b"},
130	{"(c",	")c"},
131	{"(d",	")d"},
132	{"(f",	")f"},
133	{"(l",	")l"},
134	{"(q",	")q"},
135	{"(x",	")x"},
136	{"(z",	")z"},
137	/* The -mdoc package */
138	{"Ao",  "Ac"},
139	{"Bd",  "Ed"},
140	{"Bk",  "Ek"},
141	{"Bo",  "Bc"},
142	{"Do",  "Dc"},
143	{"Fo",  "Fc"},
144	{"Oo",  "Oc"},
145	{"Po",  "Pc"},
146	{"Qo",  "Qc"},
147	{"Rs",  "Re"},
148	{"So",  "Sc"},
149	{"Xo",  "Xc"},
150	/* Things needed by preprocessors */
151	{"EQ",	"EN"},
152	{"TS",	"TE"},
153	/* Refer */
154	{"[",	"]"},
155	{0,	0}
156};
157
158/*
159 * All commands known to nroff, plus macro packages.
160 * Used so we can complain about unrecognized commands.
161 */
162static const char *knowncmds[MAXCMDS] = {
163"$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
164"%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x",
165"(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C",
166"1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f",
167"@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL",
168"AM", "AS", "AT", "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
169"B", "B" , "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd",
170"Bf", "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", "Cd",
171"Cm", "D", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", "Dd",
172"Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX",
173"Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", "Ev", "FA", "FD", "FE", "FG",
174"FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl",
175"Fn", "Fo", "Ft", "Fx", "H", "H" , "HC", "HD", "HM", "HO", "HU", "I", "I" ,
176"ID", "IE", "IH", "IM", "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF",
177"KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME",
178"MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", "Nm",
179"No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", "Os", "Ot", "Ox",
180"P", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", "Pa", "Pc", "Pf", "Po",
181"Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", "Qo", "Qq", "R", "R" , "RA", "RC",
182"RE", "RL", "RP", "RQ", "RS", "RT", "Re", "Rs", "S", "S" , "S0", "S2", "S3",
183"SA", "SG", "SH", "SK", "SM", "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss",
184"St", "Sx", "Sy", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP",
185"TQ", "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
186"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", "Xr", "[",
187"[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "\\{", "\\}",
188"]", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
189"b", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc",
190"ce", "cf", "ch", "chop", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "do",
191"ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "evc",
192"ex", "fallback", "fc", "feature", "fi", "fl", "flig", "fo", "fp", "ft", "ftr",
193"fz", "fzoom", "hc", "he", "hidechar", "hl", "hp", "ht", "hw", "hx", "hy",
194"hylang", "i", "i" , "ie", "if", "ig", "in", "ip", "it", "ix", "kern",
195"kernafter", "kernbefore", "kernpair", "lc", "lc_ctype", "lg", "lhang", "li",
196"ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
197"n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
198"of", "oh", "os", "pa", "papersize", "pc", "pi", "pl", "pm", "pn", "po", "pp",
199"ps", "q", "q" , "r", "r" , "rb", "rd", "re", "recursionlimit", "return",
200"rhang", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "shift", "sk",
201"so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp",
202"tr", "track", "u", "uf", "uh", "ul", "vs", "wh", "xflag", "xp", "yr",
2030
204};
205
206static int	lineno;		/* current line number in input file */
207static const char *cfilename;	/* name of current file */
208static int	nfiles;		/* number of files to process */
209static int	fflag;		/* -f: ignore \f */
210static int	sflag;		/* -s: ignore \s */
211static int	ncmds;		/* size of knowncmds */
212static int	slot;		/* slot in knowncmds found by binsrch */
213
214int
215main(int argc, char **argv)
216{
217	FILE *f;
218	int i;
219	char *cp;
220	char b1[4];
221
222	/* Figure out how many known commands there are */
223	while (knowncmds[ncmds])
224		ncmds++;
225	while (argc > 1 && argv[1][0] == '-') {
226		switch(argv[1][1]) {
227
228		/* -a: add pairs of macros */
229		case 'a':
230			i = strlen(argv[1]) - 2;
231			if (i % 6 != 0)
232				usage();
233			/* look for empty macro slots */
234			for (i=0; br[i].opbr; i++)
235				;
236			for (cp=argv[1]+3; cp[-1]; cp += 6) {
237				char *tmp;
238
239				if (i >= MAXBR)
240					errx(1, "too many pairs");
241				if ((tmp = malloc(3)) == NULL)
242					err(1, "malloc");
243				strlcpy(tmp, cp, 3);
244				br[i].opbr = tmp;
245				if ((tmp = malloc(3)) == NULL)
246					err(1, "malloc");
247				strlcpy(tmp, cp+3, 3);
248				br[i].clbr = tmp;
249				addmac(br[i].opbr);	/* knows pairs are also known cmds */
250				addmac(br[i].clbr);
251				i++;
252			}
253			break;
254
255		/* -c: add known commands */
256		case 'c':
257			i = strlen(argv[1]) - 2;
258			if (i % 3 != 0)
259				usage();
260			for (cp=argv[1]+3; cp[-1]; cp += 3) {
261				if (cp[2] && cp[2] != '.')
262					usage();
263				strncpy(b1, cp, 2);
264				b1[2] = '\0';
265				addmac(b1);
266			}
267			break;
268
269		/* -f: ignore font changes */
270		case 'f':
271			fflag = 1;
272			break;
273
274		/* -s: ignore size changes */
275		case 's':
276			sflag = 1;
277			break;
278		default:
279			usage();
280		}
281		argc--; argv++;
282	}
283
284	nfiles = argc - 1;
285
286	if (nfiles > 0) {
287		for (i = 1; i < argc; i++) {
288			cfilename = argv[i];
289			f = fopen(cfilename, "r");
290			if (f == NULL)
291				warn("%s", cfilename);
292			else {
293				process(f);
294				fclose(f);
295			}
296		}
297	} else {
298		cfilename = "stdin";
299		process(stdin);
300	}
301	exit(0);
302}
303
304static void
305usage(void)
306{
307	fprintf(stderr,
308	"usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
309	exit(1);
310}
311
312static void
313process(FILE *f)
314{
315	int i, n;
316	char mac[64];	/* The current macro or nroff command */
317	char *line;
318	size_t linecap;
319	int pl;
320
321	line = NULL;
322	linecap = 0;
323	stktop = -1;
324	for (lineno = 1; getline(&line, &linecap, f) > 0; lineno++) {
325		if (line[0] == '.') {
326			/*
327			 * find and isolate the macro/command name.
328			 */
329			strncpy(mac, line+1, 4);
330			if (isspace(mac[0])) {
331				pe(lineno);
332				printf("Empty command\n");
333			} else if (isspace(mac[1])) {
334				mac[1] = 0;
335			} else if (isspace(mac[2])) {
336				mac[2] = 0;
337			} else if (mac[0] != '\\' || mac[1] != '\"') {
338				pe(lineno);
339				printf("Command too long\n");
340			}
341
342			/*
343			 * Is it a known command?
344			 */
345			checkknown(mac);
346
347			/*
348			 * Should we add it?
349			 */
350			if (eq(mac, "de"))
351				addcmd(line);
352
353			chkcmd(line, mac);
354		}
355
356		/*
357		 * At this point we process the line looking
358		 * for \s and \f.
359		 */
360		for (i = 0; line[i]; i++)
361			if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
362				if (!sflag && line[++i] == 's') {
363					pl = line[++i];
364					if (isdigit(pl)) {
365						n = pl - '0';
366						pl = ' ';
367					} else
368						n = 0;
369					while (isdigit(line[++i]))
370						n = 10 * n + line[i] - '0';
371					i--;
372					if (n == 0) {
373						if (stktop >= 0 &&
374						    stk[stktop].opno == SZ) {
375							stktop--;
376						} else {
377							pe(lineno);
378							printf("unmatched \\s0\n");
379						}
380					} else {
381						stk[++stktop].opno = SZ;
382						stk[stktop].pl = pl;
383						stk[stktop].parm = n;
384						stk[stktop].lno = lineno;
385					}
386				} else if (!fflag && line[i] == 'f') {
387					n = line[++i];
388					if (n == 'P') {
389						if (stktop >= 0 &&
390						    stk[stktop].opno == FT) {
391							stktop--;
392						} else {
393							pe(lineno);
394							printf("unmatched \\fP\n");
395						}
396					} else {
397						stk[++stktop].opno = FT;
398						stk[stktop].pl = 1;
399						stk[stktop].parm = n;
400						stk[stktop].lno = lineno;
401					}
402				}
403			}
404	}
405	free(line);
406	/*
407	 * We've hit the end and look at all this stuff that hasn't been
408	 * matched yet!  Complain, complain.
409	 */
410	for (i = stktop; i >= 0; i--) {
411		complain(i);
412	}
413}
414
415static void
416complain(int i)
417{
418	pe(stk[i].lno);
419	printf("Unmatched ");
420	prop(i);
421	printf("\n");
422}
423
424static void
425prop(int i)
426{
427	if (stk[i].pl == 0)
428		printf(".%s", br[stk[i].opno].opbr);
429	else switch(stk[i].opno) {
430	case SZ:
431		printf("\\s%c%d", stk[i].pl, stk[i].parm);
432		break;
433	case FT:
434		printf("\\f%c", stk[i].parm);
435		break;
436	default:
437		printf("Bug: stk[%d].opno = %d = .%s, .%s",
438			i, stk[i].opno, br[stk[i].opno].opbr,
439			br[stk[i].opno].clbr);
440	}
441}
442
443static void
444chkcmd(const char *line __unused, const char *mac)
445{
446	int i;
447
448	/*
449	 * Check to see if it matches top of stack.
450	 */
451	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
452		stktop--;	/* OK. Pop & forget */
453	else {
454		/* No. Maybe it's an opener */
455		for (i=0; br[i].opbr; i++) {
456			if (eq(mac, br[i].opbr)) {
457				/* Found. Push it. */
458				stktop++;
459				stk[stktop].opno = i;
460				stk[stktop].pl = 0;
461				stk[stktop].parm = 0;
462				stk[stktop].lno = lineno;
463				break;
464			}
465			/*
466			 * Maybe it's an unmatched closer.
467			 * NOTE: this depends on the fact
468			 * that none of the closers can be
469			 * openers too.
470			 */
471			if (eq(mac, br[i].clbr)) {
472				nomatch(mac);
473				break;
474			}
475		}
476	}
477}
478
479static void
480nomatch(const char *mac)
481{
482	int i, j;
483
484	/*
485	 * Look for a match further down on stack
486	 * If we find one, it suggests that the stuff in
487	 * between is supposed to match itself.
488	 */
489	for (j=stktop; j>=0; j--)
490		if (eq(mac,br[stk[j].opno].clbr)) {
491			/* Found.  Make a good diagnostic. */
492			if (j == stktop-2) {
493				/*
494				 * Check for special case \fx..\fR and don't
495				 * complain.
496				 */
497				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
498				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
499					stktop = j -1;
500					return;
501				}
502				/*
503				 * We have two unmatched frobs.  Chances are
504				 * they were intended to match, so we mention
505				 * them together.
506				 */
507				pe(stk[j+1].lno);
508				prop(j+1);
509				printf(" does not match %d: ", stk[j+2].lno);
510				prop(j+2);
511				printf("\n");
512			} else for (i=j+1; i <= stktop; i++) {
513				complain(i);
514			}
515			stktop = j-1;
516			return;
517		}
518	/* Didn't find one.  Throw this away. */
519	pe(lineno);
520	printf("Unmatched .%s\n", mac);
521}
522
523/* eq: are two strings equal? */
524static int
525eq(const char *s1, const char *s2)
526{
527	return (strcmp(s1, s2) == 0);
528}
529
530/* print the first part of an error message, given the line number */
531static void
532pe(int linen)
533{
534	if (nfiles > 1)
535		printf("%s: ", cfilename);
536	printf("%d: ", linen);
537}
538
539static void
540checkknown(const char *mac)
541{
542
543	if (eq(mac, "."))
544		return;
545	if (binsrch(mac) >= 0)
546		return;
547	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
548		return;
549
550	pe(lineno);
551	printf("Unknown command: .%s\n", mac);
552}
553
554/*
555 * We have a .de xx line in "line".  Add xx to the list of known commands.
556 */
557static void
558addcmd(char *line)
559{
560	char *mac;
561
562	/* grab the macro being defined */
563	mac = line+4;
564	while (isspace(*mac))
565		mac++;
566	if (*mac == 0) {
567		pe(lineno);
568		printf("illegal define: %s\n", line);
569		return;
570	}
571	mac[2] = 0;
572	if (isspace(mac[1]) || mac[1] == '\\')
573		mac[1] = 0;
574	if (ncmds >= MAXCMDS) {
575		printf("Only %d known commands allowed\n", MAXCMDS);
576		exit(1);
577	}
578	addmac(mac);
579}
580
581/*
582 * Add mac to the list.  We should really have some kind of tree
583 * structure here but this is a quick-and-dirty job and I just don't
584 * have time to mess with it.  (I wonder if this will come back to haunt
585 * me someday?)  Anyway, I claim that .de is fairly rare in user
586 * nroff programs, and the register loop below is pretty fast.
587 */
588static void
589addmac(const char *mac)
590{
591	const char **src, **dest, **loc;
592
593	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
594#ifdef DEBUG
595		printf("binsrch(%s) -> already in table\n", mac);
596#endif
597		return;
598	}
599	/* binsrch sets slot as a side effect */
600#ifdef DEBUG
601	printf("binsrch(%s) -> %d\n", mac, slot);
602#endif
603	loc = &knowncmds[slot];
604	src = &knowncmds[ncmds-1];
605	dest = src+1;
606	while (dest > loc)
607		*dest-- = *src--;
608	if ((*loc = strdup(mac)) == NULL)
609		err(1, "strdup");
610	ncmds++;
611#ifdef DEBUG
612	printf("after: %s %s %s %s %s, %d cmds\n",
613	    knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
614	    knowncmds[slot+1], knowncmds[slot+2], ncmds);
615#endif
616}
617
618/*
619 * Do a binary search in knowncmds for mac.
620 * If found, return the index.  If not, return -1.
621 */
622static int
623binsrch(const char *mac)
624{
625	const char *p;	/* pointer to current cmd in list */
626	int d;		/* difference if any */
627	int mid;	/* mid point in binary search */
628	int top, bot;	/* boundaries of bin search, inclusive */
629
630	top = ncmds-1;
631	bot = 0;
632	while (top >= bot) {
633		mid = (top+bot)/2;
634		p = knowncmds[mid];
635		d = p[0] - mac[0];
636		if (d == 0)
637			d = p[1] - mac[1];
638		if (d == 0)
639			return (mid);
640		if (d < 0)
641			bot = mid + 1;
642		else
643			top = mid - 1;
644	}
645	slot = bot;	/* place it would have gone */
646	return (-1);
647}
648