checknr.c revision 282437
1/*
2 * Copyright (c) 1980, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifndef lint
31static const char copyright[] =
32"@(#) Copyright (c) 1980, 1993\n\
33	The Regents of the University of California.  All rights reserved.\n";
34#endif /* not lint */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
39#endif /* not lint */
40#endif
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 282437 2015-05-04 22:05:12Z bapt $");
44
45/*
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
51 */
52#include <err.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <ctype.h>
57
58#define MAXSTK	100	/* Stack size */
59#define MAXBR	100	/* Max number of bracket pairs known */
60#define MAXCMDS	500	/* Max number of commands known */
61
62static void addcmd(char *);
63static void addmac(const char *);
64static int binsrch(const char *);
65static void checkknown(const char *);
66static void chkcmd(const char *, const char *);
67static void complain(int);
68static int eq(const char *, const char *);
69static void nomatch(const char *);
70static void pe(int);
71static void process(FILE *);
72static void prop(int);
73static void usage(void);
74
75/*
76 * The stack on which we remember what we've seen so far.
77 */
78static struct stkstr {
79	int opno;	/* number of opening bracket */
80	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
81	int parm;	/* parm to size, font, etc */
82	int lno;	/* line number */
83} stk[MAXSTK];
84static int stktop;
85
86/*
87 * The kinds of opening and closing brackets.
88 */
89static struct brstr {
90	const char *opbr;
91	const char *clbr;
92} br[MAXBR] = {
93	/* A few bare bones troff commands */
94#define SZ	0
95	{"sz",	"sz"},	/* also \s */
96#define FT	1
97	{"ft",	"ft"},	/* also \f */
98	/* the -mm package */
99	{"AL",	"LE"},
100	{"AS",	"AE"},
101	{"BL",	"LE"},
102	{"BS",	"BE"},
103	{"DF",	"DE"},
104	{"DL",	"LE"},
105	{"DS",	"DE"},
106	{"FS",	"FE"},
107	{"ML",	"LE"},
108	{"NS",	"NE"},
109	{"RL",	"LE"},
110	{"VL",	"LE"},
111	/* the -ms package */
112	{"AB",	"AE"},
113	{"BD",	"DE"},
114	{"CD",	"DE"},
115	{"DS",	"DE"},
116	{"FS",	"FE"},
117	{"ID",	"DE"},
118	{"KF",	"KE"},
119	{"KS",	"KE"},
120	{"LD",	"DE"},
121	{"LG",	"NL"},
122	{"QS",	"QE"},
123	{"RS",	"RE"},
124	{"SM",	"NL"},
125	{"XA",	"XE"},
126	{"XS",	"XE"},
127	/* The -me package */
128	{"(b",	")b"},
129	{"(c",	")c"},
130	{"(d",	")d"},
131	{"(f",	")f"},
132	{"(l",	")l"},
133	{"(q",	")q"},
134	{"(x",	")x"},
135	{"(z",	")z"},
136	/* Things needed by preprocessors */
137	{"EQ",	"EN"},
138	{"TS",	"TE"},
139	/* Refer */
140	{"[",	"]"},
141	{0,	0}
142};
143
144/*
145 * All commands known to nroff, plus macro packages.
146 * Used so we can complain about unrecognized commands.
147 */
148static const char *knowncmds[MAXCMDS] = {
149"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
150"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
151"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
152"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
153"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
154"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
155"D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
156"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
157"FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
158"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
159"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
160"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
161"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
162"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
163"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
164"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
165"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
166"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
167"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
168"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
169"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
170"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
171"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
172"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
173"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
174"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
175"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
176"q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
177"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
178"ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
179"yr", 0
180};
181
182static int	lineno;		/* current line number in input file */
183static const char *cfilename;	/* name of current file */
184static int	nfiles;		/* number of files to process */
185static int	fflag;		/* -f: ignore \f */
186static int	sflag;		/* -s: ignore \s */
187static int	ncmds;		/* size of knowncmds */
188static int	slot;		/* slot in knowncmds found by binsrch */
189
190int
191main(int argc, char **argv)
192{
193	FILE *f;
194	int i;
195	char *cp;
196	char b1[4];
197
198	/* Figure out how many known commands there are */
199	while (knowncmds[ncmds])
200		ncmds++;
201	while (argc > 1 && argv[1][0] == '-') {
202		switch(argv[1][1]) {
203
204		/* -a: add pairs of macros */
205		case 'a':
206			i = strlen(argv[1]) - 2;
207			if (i % 6 != 0)
208				usage();
209			/* look for empty macro slots */
210			for (i=0; br[i].opbr; i++)
211				;
212			for (cp=argv[1]+3; cp[-1]; cp += 6) {
213				br[i].opbr = strncpy(malloc(3), cp, 2);
214				br[i].clbr = strncpy(malloc(3), cp+3, 2);
215				addmac(br[i].opbr);	/* knows pairs are also known cmds */
216				addmac(br[i].clbr);
217				i++;
218			}
219			break;
220
221		/* -c: add known commands */
222		case 'c':
223			i = strlen(argv[1]) - 2;
224			if (i % 3 != 0)
225				usage();
226			for (cp=argv[1]+3; cp[-1]; cp += 3) {
227				if (cp[2] && cp[2] != '.')
228					usage();
229				strncpy(b1, cp, 2);
230				b1[2] = '\0';
231				addmac(b1);
232			}
233			break;
234
235		/* -f: ignore font changes */
236		case 'f':
237			fflag = 1;
238			break;
239
240		/* -s: ignore size changes */
241		case 's':
242			sflag = 1;
243			break;
244		default:
245			usage();
246		}
247		argc--; argv++;
248	}
249
250	nfiles = argc - 1;
251
252	if (nfiles > 0) {
253		for (i = 1; i < argc; i++) {
254			cfilename = argv[i];
255			f = fopen(cfilename, "r");
256			if (f == NULL)
257				warn("%s", cfilename);
258			else {
259				process(f);
260				fclose(f);
261			}
262		}
263	} else {
264		cfilename = "stdin";
265		process(stdin);
266	}
267	exit(0);
268}
269
270static void
271usage(void)
272{
273	fprintf(stderr,
274	"usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
275	exit(1);
276}
277
278static void
279process(FILE *f)
280{
281	int i, n;
282	char mac[5];	/* The current macro or nroff command */
283	int pl;
284	static char line[256];	/* the current line */
285
286	stktop = -1;
287	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
288		if (line[0] == '.') {
289			/*
290			 * find and isolate the macro/command name.
291			 */
292			strncpy(mac, line+1, 4);
293			if (isspace(mac[0])) {
294				pe(lineno);
295				printf("Empty command\n");
296			} else if (isspace(mac[1])) {
297				mac[1] = 0;
298			} else if (isspace(mac[2])) {
299				mac[2] = 0;
300			} else if (mac[0] != '\\' || mac[1] != '\"') {
301				pe(lineno);
302				printf("Command too long\n");
303			}
304
305			/*
306			 * Is it a known command?
307			 */
308			checkknown(mac);
309
310			/*
311			 * Should we add it?
312			 */
313			if (eq(mac, "de"))
314				addcmd(line);
315
316			chkcmd(line, mac);
317		}
318
319		/*
320		 * At this point we process the line looking
321		 * for \s and \f.
322		 */
323		for (i = 0; line[i]; i++)
324			if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
325				if (!sflag && line[++i] == 's') {
326					pl = line[++i];
327					if (isdigit(pl)) {
328						n = pl - '0';
329						pl = ' ';
330					} else
331						n = 0;
332					while (isdigit(line[++i]))
333						n = 10 * n + line[i] - '0';
334					i--;
335					if (n == 0) {
336						if (stk[stktop].opno == SZ) {
337							stktop--;
338						} else {
339							pe(lineno);
340							printf("unmatched \\s0\n");
341						}
342					} else {
343						stk[++stktop].opno = SZ;
344						stk[stktop].pl = pl;
345						stk[stktop].parm = n;
346						stk[stktop].lno = lineno;
347					}
348				} else if (!fflag && line[i] == 'f') {
349					n = line[++i];
350					if (n == 'P') {
351						if (stk[stktop].opno == FT) {
352							stktop--;
353						} else {
354							pe(lineno);
355							printf("unmatched \\fP\n");
356						}
357					} else {
358						stk[++stktop].opno = FT;
359						stk[stktop].pl = 1;
360						stk[stktop].parm = n;
361						stk[stktop].lno = lineno;
362					}
363				}
364			}
365	}
366	/*
367	 * We've hit the end and look at all this stuff that hasn't been
368	 * matched yet!  Complain, complain.
369	 */
370	for (i = stktop; i >= 0; i--) {
371		complain(i);
372	}
373}
374
375static void
376complain(int i)
377{
378	pe(stk[i].lno);
379	printf("Unmatched ");
380	prop(i);
381	printf("\n");
382}
383
384static void
385prop(int i)
386{
387	if (stk[i].pl == 0)
388		printf(".%s", br[stk[i].opno].opbr);
389	else switch (stk[i].opno) {
390	case SZ:
391		printf("\\s%c%d", stk[i].pl, stk[i].parm);
392		break;
393	case FT:
394		printf("\\f%c", stk[i].parm);
395		break;
396	default:
397		printf("Bug: stk[%d].opno = %d = .%s, .%s",
398			i, stk[i].opno, br[stk[i].opno].opbr,
399			br[stk[i].opno].clbr);
400	}
401}
402
403static void
404chkcmd(const char *line __unused, const char *mac)
405{
406	int i;
407
408	/*
409	 * Check to see if it matches top of stack.
410	 */
411	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
412		stktop--;	/* OK. Pop & forget */
413	else {
414		/* No. Maybe it's an opener */
415		for (i=0; br[i].opbr; i++) {
416			if (eq(mac, br[i].opbr)) {
417				/* Found. Push it. */
418				stktop++;
419				stk[stktop].opno = i;
420				stk[stktop].pl = 0;
421				stk[stktop].parm = 0;
422				stk[stktop].lno = lineno;
423				break;
424			}
425			/*
426			 * Maybe it's an unmatched closer.
427			 * NOTE: this depends on the fact
428			 * that none of the closers can be
429			 * openers too.
430			 */
431			if (eq(mac, br[i].clbr)) {
432				nomatch(mac);
433				break;
434			}
435		}
436	}
437}
438
439static void
440nomatch(const char *mac)
441{
442	int i, j;
443
444	/*
445	 * Look for a match further down on stack
446	 * If we find one, it suggests that the stuff in
447	 * between is supposed to match itself.
448	 */
449	for (j=stktop; j>=0; j--)
450		if (eq(mac,br[stk[j].opno].clbr)) {
451			/* Found.  Make a good diagnostic. */
452			if (j == stktop-2) {
453				/*
454				 * Check for special case \fx..\fR and don't
455				 * complain.
456				 */
457				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
458				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
459					stktop = j -1;
460					return;
461				}
462				/*
463				 * We have two unmatched frobs.  Chances are
464				 * they were intended to match, so we mention
465				 * them together.
466				 */
467				pe(stk[j+1].lno);
468				prop(j+1);
469				printf(" does not match %d: ", stk[j+2].lno);
470				prop(j+2);
471				printf("\n");
472			} else for (i=j+1; i <= stktop; i++) {
473				complain(i);
474			}
475			stktop = j-1;
476			return;
477		}
478	/* Didn't find one.  Throw this away. */
479	pe(lineno);
480	printf("Unmatched .%s\n", mac);
481}
482
483/* eq: are two strings equal? */
484static int
485eq(const char *s1, const char *s2)
486{
487	return (strcmp(s1, s2) == 0);
488}
489
490/* print the first part of an error message, given the line number */
491static void
492pe(int linen)
493{
494	if (nfiles > 1)
495		printf("%s: ", cfilename);
496	printf("%d: ", linen);
497}
498
499static void
500checkknown(const char *mac)
501{
502
503	if (eq(mac, "."))
504		return;
505	if (binsrch(mac) >= 0)
506		return;
507	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
508		return;
509
510	pe(lineno);
511	printf("Unknown command: .%s\n", mac);
512}
513
514/*
515 * We have a .de xx line in "line".  Add xx to the list of known commands.
516 */
517static void
518addcmd(char *line)
519{
520	char *mac;
521
522	/* grab the macro being defined */
523	mac = line+4;
524	while (isspace(*mac))
525		mac++;
526	if (*mac == 0) {
527		pe(lineno);
528		printf("illegal define: %s\n", line);
529		return;
530	}
531	mac[2] = 0;
532	if (isspace(mac[1]) || mac[1] == '\\')
533		mac[1] = 0;
534	if (ncmds >= MAXCMDS) {
535		printf("Only %d known commands allowed\n", MAXCMDS);
536		exit(1);
537	}
538	addmac(mac);
539}
540
541/*
542 * Add mac to the list.  We should really have some kind of tree
543 * structure here but this is a quick-and-dirty job and I just don't
544 * have time to mess with it.  (I wonder if this will come back to haunt
545 * me someday?)  Anyway, I claim that .de is fairly rare in user
546 * nroff programs, and the register loop below is pretty fast.
547 */
548static void
549addmac(const char *mac)
550{
551	const char **src, **dest, **loc;
552
553	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
554#ifdef DEBUG
555		printf("binsrch(%s) -> already in table\n", mac);
556#endif
557		return;
558	}
559	/* binsrch sets slot as a side effect */
560#ifdef DEBUG
561printf("binsrch(%s) -> %d\n", mac, slot);
562#endif
563	loc = &knowncmds[slot];
564	src = &knowncmds[ncmds-1];
565	dest = src+1;
566	while (dest > loc)
567		*dest-- = *src--;
568	*loc = strcpy(malloc(3), mac);
569	ncmds++;
570#ifdef DEBUG
571	printf("after: %s %s %s %s %s, %d cmds\n",
572	    knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
573	    knowncmds[slot+1], knowncmds[slot+2], ncmds);
574#endif
575}
576
577/*
578 * Do a binary search in knowncmds for mac.
579 * If found, return the index.  If not, return -1.
580 */
581static int
582binsrch(const char *mac)
583{
584	const char *p;	/* pointer to current cmd in list */
585	int d;		/* difference if any */
586	int mid;	/* mid point in binary search */
587	int top, bot;	/* boundaries of bin search, inclusive */
588
589	top = ncmds-1;
590	bot = 0;
591	while (top >= bot) {
592		mid = (top+bot)/2;
593		p = knowncmds[mid];
594		d = p[0] - mac[0];
595		if (d == 0)
596			d = p[1] - mac[1];
597		if (d == 0)
598			return (mid);
599		if (d < 0)
600			bot = mid + 1;
601		else
602			top = mid - 1;
603	}
604	slot = bot;	/* place it would have gone */
605	return (-1);
606}
607