1/*	$NetBSD: checknr.c,v 1.25 2021/04/13 01:38:04 mrg Exp $	*/
2
3/*
4 * Copyright (c) 1980, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#ifndef lint
34__COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California.  All rights reserved.");
36#endif /* not lint */
37
38#ifndef lint
39#if 0
40static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
41#else
42__RCSID("$NetBSD: checknr.c,v 1.25 2021/04/13 01:38:04 mrg Exp $");
43#endif
44#endif /* not lint */
45
46/*
47 * checknr: check an nroff/troff input file for matching macro calls.
48 * we also attempt to match size and font changes, but only the embedded
49 * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
50 * later but for now think of these restrictions as contributions to
51 * structured typesetting.
52 */
53#include <ctype.h>
54#include <err.h>
55#include <stdio.h>
56#include <stdlib.h>
57#include <string.h>
58
59#define MAXSTK	100	/* Stack size */
60#define MAXBR	100	/* Max number of bracket pairs known */
61#define MAXCMDS	500	/* Max number of commands known */
62
63/*
64 * The stack on which we remember what we've seen so far.
65 */
66static struct stkstr {
67	int opno;	/* number of opening bracket */
68	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
69	int parm;	/* parm to size, font, etc */
70	int lno;	/* line number the thing came in in */
71} stk[MAXSTK];
72static int stktop;
73
74/*
75 * The kinds of opening and closing brackets.
76 */
77static struct brstr {
78	const char *opbr;
79	const char *clbr;
80} br[MAXBR] = {
81	/* A few bare bones troff commands */
82#define SZ	0
83	{ "sz",	"sz"},	/* also \s */
84#define FT	1
85	{ "ft",	"ft"},	/* also \f */
86	/* the -mm package */
87	{"AL",	"LE"},
88	{"AS",	"AE"},
89	{"BL",	"LE"},
90	{"BS",	"BE"},
91	{"DF",	"DE"},
92	{"DL",	"LE"},
93	{"DS",	"DE"},
94	{"FS",	"FE"},
95	{"ML",	"LE"},
96	{"NS",	"NE"},
97	{"RL",	"LE"},
98	{"VL",	"LE"},
99	/* the -ms package */
100	{"AB",	"AE"},
101	{"BD",	"DE"},
102	{"CD",	"DE"},
103	{"DS",	"DE"},
104	{"FS",	"FE"},
105	{"ID",	"DE"},
106	{"KF",	"KE"},
107	{"KS",	"KE"},
108	{"LD",	"DE"},
109	{"LG",	"NL"},
110	{"QS",	"QE"},
111	{"RS",	"RE"},
112	{"SM",	"NL"},
113	{"XA",	"XE"},
114	{"XS",	"XE"},
115	/* The -me package */
116	{"(b",	")b"},
117	{"(c",	")c"},
118	{"(d",	")d"},
119	{"(f",	")f"},
120	{"(l",	")l"},
121	{"(q",	")q"},
122	{"(x",	")x"},
123	{"(z",	")z"},
124	/* The -mdoc package */
125	{"Ao",  "Ac"},
126	{"Bd",  "Ed"},
127	{"Bk",  "Ek"},
128	{"Bo",  "Bc"},
129	{"Do",  "Dc"},
130	{"Fo",  "Fc"},
131	{"Oo",  "Oc"},
132	{"Po",  "Pc"},
133	{"Qo",  "Qc"},
134	{"Rs",  "Re"},
135	{"So",  "Sc"},
136	{"Xo",  "Xc"},
137	/* Things needed by preprocessors */
138	{"EQ",	"EN"},
139	{"TS",	"TE"},
140	/* Refer */
141	{"[",	"]"},
142	{0,	0}
143};
144
145/*
146 * All commands known to nroff, plus macro packages.
147 * Used so we can complain about unrecognized commands.
148 */
149static const char *knowncmds[MAXCMDS] = {
150"$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N",
151"%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q",
152"(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x",
153")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D",
154"@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p",
155"@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT",
156"AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" ,  "B1",
157"B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf",
158"Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT",
159"Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc",
160"Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM",
161"EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er",
162"Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ",
163"FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx",
164"H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM",
165"IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB",
166"LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF",
167"MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd",
168"Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op",
169"Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY",
170"Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql",
171"Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT",
172"Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM",
173"SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy",
174"T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ",
175"TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
176"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo",
177"Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>",
178"[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am",
179"ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx",
180"c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de",
181"di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el",
182"em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft",
183"fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie",
184"if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
185"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
186"n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr",
187"ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn",
188"po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro",
189"rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st",
190"sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u",
191"uf", "uh", "ul", "vs", "wh", "xp", "yr", 0
192};
193
194static int lineno;		/* current line number in input file */
195static const char *cfilename;	/* name of current file */
196static int nfiles;		/* number of files to process */
197static int fflag;		/* -f: ignore \f */
198static int sflag;		/* -s: ignore \s */
199static int ncmds;		/* size of knowncmds */
200static int slot;		/* slot in knowncmds found by binsrch */
201
202static void addcmd(char *);
203static void addmac(const char *);
204static int binsrch(const char *);
205static void checkknown(const char *);
206static void chkcmd(const char *);
207static void complain(int);
208static int eq(const char *, const char *);
209static void nomatch(const char *);
210static void pe(int);
211static void process(FILE *);
212static void prop(int);
213static void usage(void) __dead;
214
215int
216main(int argc, char **argv)
217{
218	FILE *f;
219	int i;
220	char *cp;
221	char b1[4];
222
223	/* Figure out how many known commands there are */
224	while (knowncmds[ncmds])
225		ncmds++;
226	while (argc > 1 && argv[1][0] == '-') {
227		switch(argv[1][1]) {
228
229		/* -a: add pairs of macros */
230		case 'a':
231			i = strlen(argv[1]) - 2;
232			if (i % 6 != 0)
233				usage();
234			/* look for empty macro slots */
235			for (i=0; br[i].opbr; i++)
236				;
237			for (cp=argv[1]+3; cp[-1]; cp += 6) {
238				char *tmp;
239
240				if (i >= MAXBR)
241					errx(1, "too many pairs");
242				if ((tmp = malloc(3)) == NULL)
243					err(1, "malloc");
244				strlcpy(tmp, cp, 3);
245				br[i].opbr = tmp;
246				if ((tmp = malloc(3)) == NULL)
247					err(1, "malloc");
248				strlcpy(tmp, cp+3, 3);
249				br[i].clbr = tmp;
250				addmac(br[i].opbr);	/* knows pairs are also known cmds */
251				addmac(br[i].clbr);
252				i++;
253			}
254			break;
255
256		/* -c: add known commands */
257		case 'c':
258			i = strlen(argv[1]) - 2;
259			if (i % 3 != 0)
260				usage();
261			for (cp=argv[1]+3; cp[-1]; cp += 3) {
262				if (cp[2] && cp[2] != '.')
263					usage();
264				strncpy(b1, cp, 2);
265				addmac(b1);
266			}
267			break;
268
269		/* -f: ignore font changes */
270		case 'f':
271			fflag = 1;
272			break;
273
274		/* -s: ignore size changes */
275		case 's':
276			sflag = 1;
277			break;
278		default:
279			usage();
280		}
281		argc--; argv++;
282	}
283
284	nfiles = argc - 1;
285
286	if (nfiles > 0) {
287		for (i=1; i<argc; i++) {
288			cfilename = argv[i];
289			f = fopen(cfilename, "r");
290			if (f == NULL)
291				perror(cfilename);
292			else {
293				process(f);
294				fclose(f);
295			}
296		}
297	} else {
298		cfilename = "stdin";
299		process(stdin);
300	}
301	exit(0);
302}
303
304static void
305usage(void)
306{
307	(void)fprintf(stderr,
308	    "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n",
309	    getprogname());
310	exit(1);
311}
312
313static void
314process(FILE *f)
315{
316	int i, n;
317	char line[256];	/* the current line */
318	char mac[5];	/* The current macro or nroff command */
319	int pl;
320
321	stktop = -1;
322	for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
323		if (line[0] == '.') {
324			/*
325			 * find and isolate the macro/command name.
326			 */
327			strncpy(mac, line+1, 4);
328			mac[4] = '\0';
329			if (isspace((unsigned char)mac[0])) {
330				pe(lineno);
331				printf("Empty command\n");
332			} else if (isspace((unsigned char)mac[1])) {
333				mac[1] = 0;
334			} else if (isspace((unsigned char)mac[2])) {
335				mac[2] = 0;
336			} else if (mac[0] != '\\' || mac[1] != '\"') {
337				pe(lineno);
338				printf("Command too long\n");
339			}
340
341			/*
342			 * Is it a known command?
343			 */
344			checkknown(mac);
345
346			/*
347			 * Should we add it?
348			 */
349			if (eq(mac, "de"))
350				addcmd(line);
351
352			chkcmd(mac);
353		}
354
355		/*
356		 * At this point we process the line looking
357		 * for \s and \f.
358		 */
359		for (i=0; line[i]; i++)
360			if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
361				if (!sflag && line[++i]=='s') {
362					pl = line[++i];
363					if (isdigit((unsigned char)pl)) {
364						n = pl - '0';
365						pl = ' ';
366					} else
367						n = 0;
368					while (isdigit((unsigned char)line[++i]))
369						n = 10 * n + line[i] - '0';
370					i--;
371					if (n == 0) {
372						if (stktop >= 0 &&
373						    stk[stktop].opno == SZ) {
374							stktop--;
375						} else {
376							pe(lineno);
377							printf("unmatched \\s0\n");
378						}
379					} else {
380						stk[++stktop].opno = SZ;
381						stk[stktop].pl = pl;
382						stk[stktop].parm = n;
383						stk[stktop].lno = lineno;
384					}
385				} else if (!fflag && line[i]=='f') {
386					n = line[++i];
387					if (n == 'P') {
388						if (stktop >= 0 &&
389						    stk[stktop].opno == FT) {
390							stktop--;
391						} else {
392							pe(lineno);
393							printf("unmatched \\fP\n");
394						}
395					} else {
396						stk[++stktop].opno = FT;
397						stk[stktop].pl = 1;
398						stk[stktop].parm = n;
399						stk[stktop].lno = lineno;
400					}
401				}
402			}
403	}
404	/*
405	 * We've hit the end and look at all this stuff that hasn't been
406	 * matched yet!  Complain, complain.
407	 */
408	for (i=stktop; i>=0; i--) {
409		complain(i);
410	}
411}
412
413static void
414complain(int i)
415{
416	pe(stk[i].lno);
417	printf("Unmatched ");
418	prop(i);
419	printf("\n");
420}
421
422static void
423prop(int i)
424{
425	if (stk[i].pl == 0)
426		printf(".%s", br[stk[i].opno].opbr);
427	else switch(stk[i].opno) {
428	case SZ:
429		printf("\\s%c%d", stk[i].pl, stk[i].parm);
430		break;
431	case FT:
432		printf("\\f%c", stk[i].parm);
433		break;
434	default:
435		printf("Bug: stk[%d].opno = %d = .%s, .%s",
436			i, stk[i].opno, br[stk[i].opno].opbr,
437			br[stk[i].opno].clbr);
438	}
439}
440
441static void
442chkcmd(const char *mac)
443{
444	int i;
445
446	/*
447	 * Check to see if it matches top of stack.
448	 */
449	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
450		stktop--;	/* OK. Pop & forget */
451	else {
452		/* No. Maybe it's an opener */
453		for (i=0; br[i].opbr; i++) {
454			if (eq(mac, br[i].opbr)) {
455				/* Found. Push it. */
456				stktop++;
457				stk[stktop].opno = i;
458				stk[stktop].pl = 0;
459				stk[stktop].parm = 0;
460				stk[stktop].lno = lineno;
461				break;
462			}
463			/*
464			 * Maybe it's an unmatched closer.
465			 * NOTE: this depends on the fact
466			 * that none of the closers can be
467			 * openers too.
468			 */
469			if (eq(mac, br[i].clbr)) {
470				nomatch(mac);
471				break;
472			}
473		}
474	}
475}
476
477static void
478nomatch(const char *mac)
479{
480	int i, j;
481
482	/*
483	 * Look for a match further down on stack
484	 * If we find one, it suggests that the stuff in
485	 * between is supposed to match itself.
486	 */
487	for (j=stktop; j>=0; j--)
488		if (eq(mac,br[stk[j].opno].clbr)) {
489			/* Found.  Make a good diagnostic. */
490			if (j == stktop-2) {
491				/*
492				 * Check for special case \fx..\fR and don't
493				 * complain.
494				 */
495				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
496				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
497					stktop = j -1;
498					return;
499				}
500				/*
501				 * We have two unmatched frobs.  Chances are
502				 * they were intended to match, so we mention
503				 * them together.
504				 */
505				pe(stk[j+1].lno);
506				prop(j+1);
507				printf(" does not match %d: ", stk[j+2].lno);
508				prop(j+2);
509				printf("\n");
510			} else for (i=j+1; i <= stktop; i++) {
511				complain(i);
512			}
513			stktop = j-1;
514			return;
515		}
516	/* Didn't find one.  Throw this away. */
517	pe(lineno);
518	printf("Unmatched .%s\n", mac);
519}
520
521/* eq: are two strings equal? */
522static int
523eq(const char *s1, const char *s2)
524{
525	return strcmp(s1, s2) == 0;
526}
527
528/* print the first part of an error message, given the line number */
529static void
530pe(int pelineno)
531{
532	if (nfiles > 1)
533		printf("%s: ", cfilename);
534	printf("%d: ", pelineno);
535}
536
537static void
538checkknown(const char *mac)
539{
540
541	if (eq(mac, "."))
542		return;
543	if (binsrch(mac) >= 0)
544		return;
545	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
546		return;
547
548	pe(lineno);
549	printf("Unknown command: .%s\n", mac);
550}
551
552/*
553 * We have a .de xx line in "line".  Add xx to the list of known commands.
554 */
555static void
556addcmd(char *line)
557{
558	char *mac;
559
560	/* grab the macro being defined */
561	mac = line+4;
562	while (isspace((unsigned char)*mac))
563		mac++;
564	if (*mac == 0) {
565		pe(lineno);
566		printf("illegal define: %s\n", line);
567		return;
568	}
569	mac[2] = 0;
570	if (isspace((unsigned char)mac[1]) || mac[1] == '\\')
571		mac[1] = 0;
572	if (ncmds >= MAXCMDS) {
573		printf("Only %d known commands allowed\n", MAXCMDS);
574		exit(1);
575	}
576	addmac(mac);
577}
578
579/*
580 * Add mac to the list.  We should really have some kind of tree
581 * structure here but this is a quick-and-dirty job and I just don't
582 * have time to mess with it.  (I wonder if this will come back to haunt
583 * me someday?)  Anyway, I claim that .de is fairly rare in user
584 * nroff programs, and the register loop below is pretty fast.
585 */
586static void
587addmac(const char *mac)
588{
589	const char **src, **dest, **loc;
590
591	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
592#ifdef DEBUG
593		printf("binsrch(%s) -> already in table\n", mac);
594#endif /* DEBUG */
595		return;
596	}
597	/* binsrch sets slot as a side effect */
598#ifdef DEBUG
599	printf("binsrch(%s) -> %d\n", mac, slot);
600#endif
601	loc = &knowncmds[slot];
602	src = &knowncmds[ncmds-1];
603	dest = src+1;
604	while (dest > loc)
605		*dest-- = *src--;
606	if ((*loc = strdup(mac)) == NULL)
607		err(1, "strdup");
608	ncmds++;
609#ifdef DEBUG
610	printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2],
611	    knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1],
612	    knowncmds[slot+2], ncmds);
613#endif
614}
615
616/*
617 * Do a binary search in knowncmds for mac.
618 * If found, return the index.  If not, return -1.
619 */
620static int
621binsrch(const char *mac)
622{
623	const char *p;	/* pointer to current cmd in list */
624	int d;		/* difference if any */
625	int mid;	/* mid point in binary search */
626	int top, bot;	/* boundaries of bin search, inclusive */
627
628	top = ncmds-1;
629	bot = 0;
630	while (top >= bot) {
631		mid = (top+bot)/2;
632		p = knowncmds[mid];
633		d = p[0] - mac[0];
634		if (d == 0)
635			d = p[1] - mac[1];
636		if (d == 0)
637			return mid;
638		if (d < 0)
639			bot = mid + 1;
640		else
641			top = mid - 1;
642	}
643	slot = bot;	/* place it would have gone */
644	return -1;
645}
646