Deleted Added
sdiff udiff text old ( 219096 ) new ( 219126 )
full compact
1/*
2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifndef lint
31static const char copyright[] =
32"@(#) Copyright (c) 1980, 1993\n\
33 The Regents of the University of California. All rights reserved.\n";
34#endif /* not lint */
35
36#if 0
37#ifndef lint
38static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
39#endif /* not lint */
40#endif
41
42#include <sys/cdefs.h>
43__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 219096 2011-02-28 10:03:48Z brucec $");
44
45/*
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
51 */
52#include <err.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <ctype.h>
57
58#define MAXSTK 100 /* Stack size */
59#define MAXBR 100 /* Max number of bracket pairs known */
60#define MAXCMDS 500 /* Max number of commands known */
61
62void addcmd(char *);
63void addmac(const char *);
64int binsrch(const char *);
65void checkknown(const char *);
66void chkcmd(const char *, const char *);
67void complain(int);
68int eq(const char *, const char *);
69void nomatch(const char *);
70void pe(int);
71void process(FILE *);
72void prop(int);
73static void usage(void);
74
75/*
76 * The stack on which we remember what we've seen so far.
77 */
78struct stkstr {
79 int opno; /* number of opening bracket */
80 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
81 int parm; /* parm to size, font, etc */
82 int lno; /* line number the thing came in in */
83} stk[MAXSTK];
84int stktop;
85
86/*
87 * The kinds of opening and closing brackets.
88 */
89struct brstr {
90 const char *opbr;
91 const char *clbr;
92} br[MAXBR] = {
93 /* A few bare bones troff commands */
94#define SZ 0
95 {"sz", "sz"}, /* also \s */
96#define FT 1
97 {"ft", "ft"}, /* also \f */
98 /* the -mm package */
99 {"AL", "LE"},
100 {"AS", "AE"},
101 {"BL", "LE"},
102 {"BS", "BE"},
103 {"DF", "DE"},
104 {"DL", "LE"},
105 {"DS", "DE"},
106 {"FS", "FE"},
107 {"ML", "LE"},
108 {"NS", "NE"},
109 {"RL", "LE"},
110 {"VL", "LE"},
111 /* the -ms package */
112 {"AB", "AE"},
113 {"BD", "DE"},
114 {"CD", "DE"},
115 {"DS", "DE"},
116 {"FS", "FE"},
117 {"ID", "DE"},
118 {"KF", "KE"},
119 {"KS", "KE"},
120 {"LD", "DE"},
121 {"LG", "NL"},
122 {"QS", "QE"},
123 {"RS", "RE"},
124 {"SM", "NL"},
125 {"XA", "XE"},
126 {"XS", "XE"},
127 /* The -me package */
128 {"(b", ")b"},
129 {"(c", ")c"},
130 {"(d", ")d"},
131 {"(f", ")f"},
132 {"(l", ")l"},
133 {"(q", ")q"},
134 {"(x", ")x"},
135 {"(z", ")z"},
136 /* Things needed by preprocessors */
137 {"EQ", "EN"},
138 {"TS", "TE"},
139 /* Refer */
140 {"[", "]"},
141 {0, 0}
142};
143
144/*
145 * All commands known to nroff, plus macro packages.
146 * Used so we can complain about unrecognized commands.
147 */
148const char *knowncmds[MAXCMDS] = {
149"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
150"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
151"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
152"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
153"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
154"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
155"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
156"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
157"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
158"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
159"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
160"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
161"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
162"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
163"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
164"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
165"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
166"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
167"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
168"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
169"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
170"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
171"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
172"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
173"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
174"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
175"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
176"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
177"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
178"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
179"yr", 0
180};
181
182int lineno; /* current line number in input file */
183const char *cfilename; /* name of current file */
184int nfiles; /* number of files to process */
185int fflag; /* -f: ignore \f */
186int sflag; /* -s: ignore \s */
187int ncmds; /* size of knowncmds */
188int slot; /* slot in knowncmds found by binsrch */
189
190int
191main(int argc, char **argv)
192{
193 FILE *f;
194 int i;
195 char *cp;
196 char b1[4];
197
198 /* Figure out how many known commands there are */
199 while (knowncmds[ncmds])
200 ncmds++;
201 while (argc > 1 && argv[1][0] == '-') {
202 switch(argv[1][1]) {
203
204 /* -a: add pairs of macros */
205 case 'a':
206 i = strlen(argv[1]) - 2;
207 if (i % 6 != 0)
208 usage();
209 /* look for empty macro slots */
210 for (i=0; br[i].opbr; i++)
211 ;
212 for (cp=argv[1]+3; cp[-1]; cp += 6) {
213 br[i].opbr = strncpy(malloc(3), cp, 2);
214 br[i].clbr = strncpy(malloc(3), cp+3, 2);
215 addmac(br[i].opbr); /* knows pairs are also known cmds */
216 addmac(br[i].clbr);
217 i++;
218 }
219 break;
220
221 /* -c: add known commands */
222 case 'c':
223 i = strlen(argv[1]) - 2;
224 if (i % 3 != 0)
225 usage();
226 for (cp=argv[1]+3; cp[-1]; cp += 3) {
227 if (cp[2] && cp[2] != '.')
228 usage();
229 strncpy(b1, cp, 2);
230 b1[2] = '\0';
231 addmac(b1);
232 }
233 break;
234
235 /* -f: ignore font changes */
236 case 'f':
237 fflag = 1;
238 break;
239
240 /* -s: ignore size changes */
241 case 's':
242 sflag = 1;
243 break;
244 default:
245 usage();
246 }
247 argc--; argv++;
248 }
249
250 nfiles = argc - 1;
251
252 if (nfiles > 0) {
253 for (i=1; i<argc; i++) {
254 cfilename = argv[i];
255 f = fopen(cfilename, "r");
256 if (f == NULL)
257 warn("%s", cfilename);
258 else {
259 process(f);
260 fclose(f);
261 }
262 }
263 } else {
264 cfilename = "stdin";
265 process(stdin);
266 }
267 exit(0);
268}
269
270static void
271usage(void)
272{
273 fprintf(stderr,
274 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
275 exit(1);
276}
277
278void
279process(FILE *f)
280{
281 int i, n;
282 char mac[5]; /* The current macro or nroff command */
283 int pl;
284 static char line[256]; /* the current line */
285
286 stktop = -1;
287 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
288 if (line[0] == '.') {
289 /*
290 * find and isolate the macro/command name.
291 */
292 strncpy(mac, line+1, 4);
293 if (isspace(mac[0])) {
294 pe(lineno);
295 printf("Empty command\n");
296 } else if (isspace(mac[1])) {
297 mac[1] = 0;
298 } else if (isspace(mac[2])) {
299 mac[2] = 0;
300 } else if (mac[0] != '\\' || mac[1] != '\"') {
301 pe(lineno);
302 printf("Command too long\n");
303 }
304
305 /*
306 * Is it a known command?
307 */
308 checkknown(mac);
309
310 /*
311 * Should we add it?
312 */
313 if (eq(mac, "de"))
314 addcmd(line);
315
316 chkcmd(line, mac);
317 }
318
319 /*
320 * At this point we process the line looking
321 * for \s and \f.
322 */
323 for (i=0; line[i]; i++)
324 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
325 if (!sflag && line[++i]=='s') {
326 pl = line[++i];
327 if (isdigit(pl)) {
328 n = pl - '0';
329 pl = ' ';
330 } else
331 n = 0;
332 while (isdigit(line[++i]))
333 n = 10 * n + line[i] - '0';
334 i--;
335 if (n == 0) {
336 if (stk[stktop].opno == SZ) {
337 stktop--;
338 } else {
339 pe(lineno);
340 printf("unmatched \\s0\n");
341 }
342 } else {
343 stk[++stktop].opno = SZ;
344 stk[stktop].pl = pl;
345 stk[stktop].parm = n;
346 stk[stktop].lno = lineno;
347 }
348 } else if (!fflag && line[i]=='f') {
349 n = line[++i];
350 if (n == 'P') {
351 if (stk[stktop].opno == FT) {
352 stktop--;
353 } else {
354 pe(lineno);
355 printf("unmatched \\fP\n");
356 }
357 } else {
358 stk[++stktop].opno = FT;
359 stk[stktop].pl = 1;
360 stk[stktop].parm = n;
361 stk[stktop].lno = lineno;
362 }
363 }
364 }
365 }
366 /*
367 * We've hit the end and look at all this stuff that hasn't been
368 * matched yet! Complain, complain.
369 */
370 for (i=stktop; i>=0; i--) {
371 complain(i);
372 }
373}
374
375void
376complain(int i)
377{
378 pe(stk[i].lno);
379 printf("Unmatched ");
380 prop(i);
381 printf("\n");
382}
383
384void
385prop(int i)
386{
387 if (stk[i].pl == 0)
388 printf(".%s", br[stk[i].opno].opbr);
389 else switch(stk[i].opno) {
390 case SZ:
391 printf("\\s%c%d", stk[i].pl, stk[i].parm);
392 break;
393 case FT:
394 printf("\\f%c", stk[i].parm);
395 break;
396 default:
397 printf("Bug: stk[%d].opno = %d = .%s, .%s",
398 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
399 }
400}
401
402void
403chkcmd(const char *line __unused, const char *mac)
404{
405 int i;
406
407 /*
408 * Check to see if it matches top of stack.
409 */
410 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
411 stktop--; /* OK. Pop & forget */
412 else {
413 /* No. Maybe it's an opener */
414 for (i=0; br[i].opbr; i++) {
415 if (eq(mac, br[i].opbr)) {
416 /* Found. Push it. */
417 stktop++;
418 stk[stktop].opno = i;
419 stk[stktop].pl = 0;
420 stk[stktop].parm = 0;
421 stk[stktop].lno = lineno;
422 break;
423 }
424 /*
425 * Maybe it's an unmatched closer.
426 * NOTE: this depends on the fact
427 * that none of the closers can be
428 * openers too.
429 */
430 if (eq(mac, br[i].clbr)) {
431 nomatch(mac);
432 break;
433 }
434 }
435 }
436}
437
438void
439nomatch(const char *mac)
440{
441 int i, j;
442
443 /*
444 * Look for a match further down on stack
445 * If we find one, it suggests that the stuff in
446 * between is supposed to match itself.
447 */
448 for (j=stktop; j>=0; j--)
449 if (eq(mac,br[stk[j].opno].clbr)) {
450 /* Found. Make a good diagnostic. */
451 if (j == stktop-2) {
452 /*
453 * Check for special case \fx..\fR and don't
454 * complain.
455 */
456 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
457 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
458 stktop = j -1;
459 return;
460 }
461 /*
462 * We have two unmatched frobs. Chances are
463 * they were intended to match, so we mention
464 * them together.
465 */
466 pe(stk[j+1].lno);
467 prop(j+1);
468 printf(" does not match %d: ", stk[j+2].lno);
469 prop(j+2);
470 printf("\n");
471 } else for (i=j+1; i <= stktop; i++) {
472 complain(i);
473 }
474 stktop = j-1;
475 return;
476 }
477 /* Didn't find one. Throw this away. */
478 pe(lineno);
479 printf("Unmatched .%s\n", mac);
480}
481
482/* eq: are two strings equal? */
483int
484eq(const char *s1, const char *s2)
485{
486 return (strcmp(s1, s2) == 0);
487}
488
489/* print the first part of an error message, given the line number */
490void
491pe(int linen)
492{
493 if (nfiles > 1)
494 printf("%s: ", cfilename);
495 printf("%d: ", linen);
496}
497
498void
499checkknown(const char *mac)
500{
501
502 if (eq(mac, "."))
503 return;
504 if (binsrch(mac) >= 0)
505 return;
506 if (mac[0] == '\\' && mac[1] == '"') /* comments */
507 return;
508
509 pe(lineno);
510 printf("Unknown command: .%s\n", mac);
511}
512
513/*
514 * We have a .de xx line in "line". Add xx to the list of known commands.
515 */
516void
517addcmd(char *line)
518{
519 char *mac;
520
521 /* grab the macro being defined */
522 mac = line+4;
523 while (isspace(*mac))
524 mac++;
525 if (*mac == 0) {
526 pe(lineno);
527 printf("illegal define: %s\n", line);
528 return;
529 }
530 mac[2] = 0;
531 if (isspace(mac[1]) || mac[1] == '\\')
532 mac[1] = 0;
533 if (ncmds >= MAXCMDS) {
534 printf("Only %d known commands allowed\n", MAXCMDS);
535 exit(1);
536 }
537 addmac(mac);
538}
539
540/*
541 * Add mac to the list. We should really have some kind of tree
542 * structure here but this is a quick-and-dirty job and I just don't
543 * have time to mess with it. (I wonder if this will come back to haunt
544 * me someday?) Anyway, I claim that .de is fairly rare in user
545 * nroff programs, and the register loop below is pretty fast.
546 */
547void
548addmac(const char *mac)
549{
550 const char **src, **dest, **loc;
551
552 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
553#ifdef DEBUG
554 printf("binsrch(%s) -> already in table\n", mac);
555#endif
556 return;
557 }
558 /* binsrch sets slot as a side effect */
559#ifdef DEBUG
560printf("binsrch(%s) -> %d\n", mac, slot);
561#endif
562 loc = &knowncmds[slot];
563 src = &knowncmds[ncmds-1];
564 dest = src+1;
565 while (dest > loc)
566 *dest-- = *src--;
567 *loc = strcpy(malloc(3), mac);
568 ncmds++;
569#ifdef DEBUG
570printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
571#endif
572}
573
574/*
575 * Do a binary search in knowncmds for mac.
576 * If found, return the index. If not, return -1.
577 */
578int
579binsrch(const char *mac)
580{
581 const char *p; /* pointer to current cmd in list */
582 int d; /* difference if any */
583 int mid; /* mid point in binary search */
584 int top, bot; /* boundaries of bin search, inclusive */
585
586 top = ncmds-1;
587 bot = 0;
588 while (top >= bot) {
589 mid = (top+bot)/2;
590 p = knowncmds[mid];
591 d = p[0] - mac[0];
592 if (d == 0)
593 d = p[1] - mac[1];
594 if (d == 0)
595 return mid;
596 if (d < 0)
597 bot = mid + 1;
598 else
599 top = mid - 1;
600 }
601 slot = bot; /* place it would have gone */
602 return -1;
603}