checknr.c revision 92920
1/* 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#ifndef lint 35static const char copyright[] = 36"@(#) Copyright (c) 1980, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38#endif /* not lint */ 39 40#if 0 41#ifndef lint 42static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 43#endif /* not lint */ 44#endif 45 46#include <sys/cdefs.h> 47__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 92920 2002-03-22 01:22:50Z imp $"); 48 49/* 50 * checknr: check an nroff/troff input file for matching macro calls. 51 * we also attempt to match size and font changes, but only the embedded 52 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 53 * later but for now think of these restrictions as contributions to 54 * structured typesetting. 55 */ 56#include <stdio.h> 57#include <stdlib.h> 58#include <string.h> 59#include <ctype.h> 60 61#define MAXSTK 100 /* Stack size */ 62#define MAXBR 100 /* Max number of bracket pairs known */ 63#define MAXCMDS 500 /* Max number of commands known */ 64 65void addcmd(char *); 66void addmac(const char *); 67int binsrch(const char *); 68void checkknown(char *); 69void chkcmd(char *, char *); 70void complain(int); 71int eq(const char *, const char *); 72void nomatch(char *); 73void pe(int); 74void process(FILE *); 75void prop(int); 76static void usage(void); 77 78/* 79 * The stack on which we remember what we've seen so far. 80 */ 81struct stkstr { 82 int opno; /* number of opening bracket */ 83 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 84 int parm; /* parm to size, font, etc */ 85 int lno; /* line number the thing came in in */ 86} stk[MAXSTK]; 87int stktop; 88 89/* 90 * The kinds of opening and closing brackets. 91 */ 92struct brstr { 93 const char *opbr; 94 const char *clbr; 95} br[MAXBR] = { 96 /* A few bare bones troff commands */ 97#define SZ 0 98 {"sz", "sz"}, /* also \s */ 99#define FT 1 100 {"ft", "ft"}, /* also \f */ 101 /* the -mm package */ 102 {"AL", "LE"}, 103 {"AS", "AE"}, 104 {"BL", "LE"}, 105 {"BS", "BE"}, 106 {"DF", "DE"}, 107 {"DL", "LE"}, 108 {"DS", "DE"}, 109 {"FS", "FE"}, 110 {"ML", "LE"}, 111 {"NS", "NE"}, 112 {"RL", "LE"}, 113 {"VL", "LE"}, 114 /* the -ms package */ 115 {"AB", "AE"}, 116 {"BD", "DE"}, 117 {"CD", "DE"}, 118 {"DS", "DE"}, 119 {"FS", "FE"}, 120 {"ID", "DE"}, 121 {"KF", "KE"}, 122 {"KS", "KE"}, 123 {"LD", "DE"}, 124 {"LG", "NL"}, 125 {"QS", "QE"}, 126 {"RS", "RE"}, 127 {"SM", "NL"}, 128 {"XA", "XE"}, 129 {"XS", "XE"}, 130 /* The -me package */ 131 {"(b", ")b"}, 132 {"(c", ")c"}, 133 {"(d", ")d"}, 134 {"(f", ")f"}, 135 {"(l", ")l"}, 136 {"(q", ")q"}, 137 {"(x", ")x"}, 138 {"(z", ")z"}, 139 /* Things needed by preprocessors */ 140 {"EQ", "EN"}, 141 {"TS", "TE"}, 142 /* Refer */ 143 {"[", "]"}, 144 {0, 0} 145}; 146 147/* 148 * All commands known to nroff, plus macro packages. 149 * Used so we can complain about unrecognized commands. 150 */ 151const char *knowncmds[MAXCMDS] = { 152"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 153"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 154"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 155"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 156"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 157"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 158"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 159"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 160"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 161"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 162"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 163"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 164"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 165"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 166"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 167"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 168"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 169"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 170"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 171"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 172"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 173"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 174"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 175"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 176"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 177"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 178"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 179"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 180"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 181"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 182"yr", 0 183}; 184 185int lineno; /* current line number in input file */ 186const char *cfilename; /* name of current file */ 187int nfiles; /* number of files to process */ 188int fflag; /* -f: ignore \f */ 189int sflag; /* -s: ignore \s */ 190int ncmds; /* size of knowncmds */ 191int slot; /* slot in knowncmds found by binsrch */ 192 193int 194main(argc, argv) 195int argc; 196char **argv; 197{ 198 FILE *f; 199 int i; 200 char *cp; 201 char b1[4]; 202 203 /* Figure out how many known commands there are */ 204 while (knowncmds[ncmds]) 205 ncmds++; 206 while (argc > 1 && argv[1][0] == '-') { 207 switch(argv[1][1]) { 208 209 /* -a: add pairs of macros */ 210 case 'a': 211 i = strlen(argv[1]) - 2; 212 if (i % 6 != 0) 213 usage(); 214 /* look for empty macro slots */ 215 for (i=0; br[i].opbr; i++) 216 ; 217 for (cp=argv[1]+3; cp[-1]; cp += 6) { 218 br[i].opbr = strncpy(malloc(3), cp, 2); 219 br[i].clbr = strncpy(malloc(3), cp+3, 2); 220 addmac(br[i].opbr); /* knows pairs are also known cmds */ 221 addmac(br[i].clbr); 222 i++; 223 } 224 break; 225 226 /* -c: add known commands */ 227 case 'c': 228 i = strlen(argv[1]) - 2; 229 if (i % 3 != 0) 230 usage(); 231 for (cp=argv[1]+3; cp[-1]; cp += 3) { 232 if (cp[2] && cp[2] != '.') 233 usage(); 234 strncpy(b1, cp, 2); 235 b1[2] = '\0'; 236 addmac(b1); 237 } 238 break; 239 240 /* -f: ignore font changes */ 241 case 'f': 242 fflag = 1; 243 break; 244 245 /* -s: ignore size changes */ 246 case 's': 247 sflag = 1; 248 break; 249 default: 250 usage(); 251 } 252 argc--; argv++; 253 } 254 255 nfiles = argc - 1; 256 257 if (nfiles > 0) { 258 for (i=1; i<argc; i++) { 259 cfilename = argv[i]; 260 f = fopen(cfilename, "r"); 261 if (f == NULL) 262 perror(cfilename); 263 else 264 process(f); 265 } 266 } else { 267 cfilename = "stdin"; 268 process(stdin); 269 } 270 exit(0); 271} 272 273static void 274usage() 275{ 276 fprintf(stderr, 277 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n"); 278 exit(1); 279} 280 281void 282process(f) 283FILE *f; 284{ 285 int i, n; 286 char mac[5]; /* The current macro or nroff command */ 287 int pl; 288 static char line[256]; /* the current line */ 289 290 stktop = -1; 291 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 292 if (line[0] == '.') { 293 /* 294 * find and isolate the macro/command name. 295 */ 296 strncpy(mac, line+1, 4); 297 if (isspace(mac[0])) { 298 pe(lineno); 299 printf("Empty command\n"); 300 } else if (isspace(mac[1])) { 301 mac[1] = 0; 302 } else if (isspace(mac[2])) { 303 mac[2] = 0; 304 } else if (mac[0] != '\\' || mac[1] != '\"') { 305 pe(lineno); 306 printf("Command too long\n"); 307 } 308 309 /* 310 * Is it a known command? 311 */ 312 checkknown(mac); 313 314 /* 315 * Should we add it? 316 */ 317 if (eq(mac, "de")) 318 addcmd(line); 319 320 chkcmd(line, mac); 321 } 322 323 /* 324 * At this point we process the line looking 325 * for \s and \f. 326 */ 327 for (i=0; line[i]; i++) 328 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 329 if (!sflag && line[++i]=='s') { 330 pl = line[++i]; 331 if (isdigit(pl)) { 332 n = pl - '0'; 333 pl = ' '; 334 } else 335 n = 0; 336 while (isdigit(line[++i])) 337 n = 10 * n + line[i] - '0'; 338 i--; 339 if (n == 0) { 340 if (stk[stktop].opno == SZ) { 341 stktop--; 342 } else { 343 pe(lineno); 344 printf("unmatched \\s0\n"); 345 } 346 } else { 347 stk[++stktop].opno = SZ; 348 stk[stktop].pl = pl; 349 stk[stktop].parm = n; 350 stk[stktop].lno = lineno; 351 } 352 } else if (!fflag && line[i]=='f') { 353 n = line[++i]; 354 if (n == 'P') { 355 if (stk[stktop].opno == FT) { 356 stktop--; 357 } else { 358 pe(lineno); 359 printf("unmatched \\fP\n"); 360 } 361 } else { 362 stk[++stktop].opno = FT; 363 stk[stktop].pl = 1; 364 stk[stktop].parm = n; 365 stk[stktop].lno = lineno; 366 } 367 } 368 } 369 } 370 /* 371 * We've hit the end and look at all this stuff that hasn't been 372 * matched yet! Complain, complain. 373 */ 374 for (i=stktop; i>=0; i--) { 375 complain(i); 376 } 377} 378 379void 380complain(i) 381int i; 382{ 383 pe(stk[i].lno); 384 printf("Unmatched "); 385 prop(i); 386 printf("\n"); 387} 388 389void 390prop(i) 391int i; 392{ 393 if (stk[i].pl == 0) 394 printf(".%s", br[stk[i].opno].opbr); 395 else switch(stk[i].opno) { 396 case SZ: 397 printf("\\s%c%d", stk[i].pl, stk[i].parm); 398 break; 399 case FT: 400 printf("\\f%c", stk[i].parm); 401 break; 402 default: 403 printf("Bug: stk[%d].opno = %d = .%s, .%s", 404 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 405 } 406} 407 408void 409chkcmd(line, mac) 410char *line __unused; 411char *mac; 412{ 413 int i; 414 415 /* 416 * Check to see if it matches top of stack. 417 */ 418 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 419 stktop--; /* OK. Pop & forget */ 420 else { 421 /* No. Maybe it's an opener */ 422 for (i=0; br[i].opbr; i++) { 423 if (eq(mac, br[i].opbr)) { 424 /* Found. Push it. */ 425 stktop++; 426 stk[stktop].opno = i; 427 stk[stktop].pl = 0; 428 stk[stktop].parm = 0; 429 stk[stktop].lno = lineno; 430 break; 431 } 432 /* 433 * Maybe it's an unmatched closer. 434 * NOTE: this depends on the fact 435 * that none of the closers can be 436 * openers too. 437 */ 438 if (eq(mac, br[i].clbr)) { 439 nomatch(mac); 440 break; 441 } 442 } 443 } 444} 445 446void 447nomatch(mac) 448char *mac; 449{ 450 int i, j; 451 452 /* 453 * Look for a match further down on stack 454 * If we find one, it suggests that the stuff in 455 * between is supposed to match itself. 456 */ 457 for (j=stktop; j>=0; j--) 458 if (eq(mac,br[stk[j].opno].clbr)) { 459 /* Found. Make a good diagnostic. */ 460 if (j == stktop-2) { 461 /* 462 * Check for special case \fx..\fR and don't 463 * complain. 464 */ 465 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 466 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 467 stktop = j -1; 468 return; 469 } 470 /* 471 * We have two unmatched frobs. Chances are 472 * they were intended to match, so we mention 473 * them together. 474 */ 475 pe(stk[j+1].lno); 476 prop(j+1); 477 printf(" does not match %d: ", stk[j+2].lno); 478 prop(j+2); 479 printf("\n"); 480 } else for (i=j+1; i <= stktop; i++) { 481 complain(i); 482 } 483 stktop = j-1; 484 return; 485 } 486 /* Didn't find one. Throw this away. */ 487 pe(lineno); 488 printf("Unmatched .%s\n", mac); 489} 490 491/* eq: are two strings equal? */ 492int 493eq(s1, s2) 494const char *s1, *s2; 495{ 496 return (strcmp(s1, s2) == 0); 497} 498 499/* print the first part of an error message, given the line number */ 500void 501pe(linen) 502int linen; 503{ 504 if (nfiles > 1) 505 printf("%s: ", cfilename); 506 printf("%d: ", linen); 507} 508 509void 510checkknown(mac) 511char *mac; 512{ 513 514 if (eq(mac, ".")) 515 return; 516 if (binsrch(mac) >= 0) 517 return; 518 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 519 return; 520 521 pe(lineno); 522 printf("Unknown command: .%s\n", mac); 523} 524 525/* 526 * We have a .de xx line in "line". Add xx to the list of known commands. 527 */ 528void 529addcmd(line) 530char *line; 531{ 532 char *mac; 533 534 /* grab the macro being defined */ 535 mac = line+4; 536 while (isspace(*mac)) 537 mac++; 538 if (*mac == 0) { 539 pe(lineno); 540 printf("illegal define: %s\n", line); 541 return; 542 } 543 mac[2] = 0; 544 if (isspace(mac[1]) || mac[1] == '\\') 545 mac[1] = 0; 546 if (ncmds >= MAXCMDS) { 547 printf("Only %d known commands allowed\n", MAXCMDS); 548 exit(1); 549 } 550 addmac(mac); 551} 552 553/* 554 * Add mac to the list. We should really have some kind of tree 555 * structure here but this is a quick-and-dirty job and I just don't 556 * have time to mess with it. (I wonder if this will come back to haunt 557 * me someday?) Anyway, I claim that .de is fairly rare in user 558 * nroff programs, and the register loop below is pretty fast. 559 */ 560void 561addmac(mac) 562const char *mac; 563{ 564 const char **src, **dest, **loc; 565 566 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 567#ifdef DEBUG 568 printf("binsrch(%s) -> already in table\n", mac); 569#endif 570 return; 571 } 572 /* binsrch sets slot as a side effect */ 573#ifdef DEBUG 574printf("binsrch(%s) -> %d\n", mac, slot); 575#endif 576 loc = &knowncmds[slot]; 577 src = &knowncmds[ncmds-1]; 578 dest = src+1; 579 while (dest > loc) 580 *dest-- = *src--; 581 *loc = strcpy(malloc(3), mac); 582 ncmds++; 583#ifdef DEBUG 584printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 585#endif 586} 587 588/* 589 * Do a binary search in knowncmds for mac. 590 * If found, return the index. If not, return -1. 591 */ 592int 593binsrch(mac) 594const char *mac; 595{ 596 const char *p; /* pointer to current cmd in list */ 597 int d; /* difference if any */ 598 int mid; /* mid point in binary search */ 599 int top, bot; /* boundaries of bin search, inclusive */ 600 601 top = ncmds-1; 602 bot = 0; 603 while (top >= bot) { 604 mid = (top+bot)/2; 605 p = knowncmds[mid]; 606 d = p[0] - mac[0]; 607 if (d == 0) 608 d = p[1] - mac[1]; 609 if (d == 0) 610 return mid; 611 if (d < 0) 612 bot = mid + 1; 613 else 614 top = mid - 1; 615 } 616 slot = bot; /* place it would have gone */ 617 return -1; 618} 619