checknr.c revision 115601
1/* 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#ifndef lint 35static const char copyright[] = 36"@(#) Copyright (c) 1980, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38#endif /* not lint */ 39 40#if 0 41#ifndef lint 42static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 43#endif /* not lint */ 44#endif 45 46#include <sys/cdefs.h> 47__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 115601 2003-06-01 06:15:30Z tjr $"); 48 49/* 50 * checknr: check an nroff/troff input file for matching macro calls. 51 * we also attempt to match size and font changes, but only the embedded 52 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 53 * later but for now think of these restrictions as contributions to 54 * structured typesetting. 55 */ 56#include <stdio.h> 57#include <stdlib.h> 58#include <string.h> 59#include <ctype.h> 60 61#define MAXSTK 100 /* Stack size */ 62#define MAXBR 100 /* Max number of bracket pairs known */ 63#define MAXCMDS 500 /* Max number of commands known */ 64 65void addcmd(char *); 66void addmac(const char *); 67int binsrch(const char *); 68void checkknown(const char *); 69void chkcmd(const char *, const char *); 70void complain(int); 71int eq(const char *, const char *); 72void nomatch(const char *); 73void pe(int); 74void process(FILE *); 75void prop(int); 76static void usage(void); 77 78/* 79 * The stack on which we remember what we've seen so far. 80 */ 81struct stkstr { 82 int opno; /* number of opening bracket */ 83 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 84 int parm; /* parm to size, font, etc */ 85 int lno; /* line number the thing came in in */ 86} stk[MAXSTK]; 87int stktop; 88 89/* 90 * The kinds of opening and closing brackets. 91 */ 92struct brstr { 93 const char *opbr; 94 const char *clbr; 95} br[MAXBR] = { 96 /* A few bare bones troff commands */ 97#define SZ 0 98 {"sz", "sz"}, /* also \s */ 99#define FT 1 100 {"ft", "ft"}, /* also \f */ 101 /* the -mm package */ 102 {"AL", "LE"}, 103 {"AS", "AE"}, 104 {"BL", "LE"}, 105 {"BS", "BE"}, 106 {"DF", "DE"}, 107 {"DL", "LE"}, 108 {"DS", "DE"}, 109 {"FS", "FE"}, 110 {"ML", "LE"}, 111 {"NS", "NE"}, 112 {"RL", "LE"}, 113 {"VL", "LE"}, 114 /* the -ms package */ 115 {"AB", "AE"}, 116 {"BD", "DE"}, 117 {"CD", "DE"}, 118 {"DS", "DE"}, 119 {"FS", "FE"}, 120 {"ID", "DE"}, 121 {"KF", "KE"}, 122 {"KS", "KE"}, 123 {"LD", "DE"}, 124 {"LG", "NL"}, 125 {"QS", "QE"}, 126 {"RS", "RE"}, 127 {"SM", "NL"}, 128 {"XA", "XE"}, 129 {"XS", "XE"}, 130 /* The -me package */ 131 {"(b", ")b"}, 132 {"(c", ")c"}, 133 {"(d", ")d"}, 134 {"(f", ")f"}, 135 {"(l", ")l"}, 136 {"(q", ")q"}, 137 {"(x", ")x"}, 138 {"(z", ")z"}, 139 /* Things needed by preprocessors */ 140 {"EQ", "EN"}, 141 {"TS", "TE"}, 142 /* Refer */ 143 {"[", "]"}, 144 {0, 0} 145}; 146 147/* 148 * All commands known to nroff, plus macro packages. 149 * Used so we can complain about unrecognized commands. 150 */ 151const char *knowncmds[MAXCMDS] = { 152"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 153"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 154"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 155"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 156"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 157"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 158"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 159"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 160"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 161"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 162"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 163"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 164"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 165"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 166"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 167"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 168"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 169"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 170"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 171"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 172"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 173"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 174"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 175"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 176"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 177"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 178"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 179"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 180"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 181"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 182"yr", 0 183}; 184 185int lineno; /* current line number in input file */ 186const char *cfilename; /* name of current file */ 187int nfiles; /* number of files to process */ 188int fflag; /* -f: ignore \f */ 189int sflag; /* -s: ignore \s */ 190int ncmds; /* size of knowncmds */ 191int slot; /* slot in knowncmds found by binsrch */ 192 193int 194main(int argc, char **argv) 195{ 196 FILE *f; 197 int i; 198 char *cp; 199 char b1[4]; 200 201 /* Figure out how many known commands there are */ 202 while (knowncmds[ncmds]) 203 ncmds++; 204 while (argc > 1 && argv[1][0] == '-') { 205 switch(argv[1][1]) { 206 207 /* -a: add pairs of macros */ 208 case 'a': 209 i = strlen(argv[1]) - 2; 210 if (i % 6 != 0) 211 usage(); 212 /* look for empty macro slots */ 213 for (i=0; br[i].opbr; i++) 214 ; 215 for (cp=argv[1]+3; cp[-1]; cp += 6) { 216 br[i].opbr = strncpy(malloc(3), cp, 2); 217 br[i].clbr = strncpy(malloc(3), cp+3, 2); 218 addmac(br[i].opbr); /* knows pairs are also known cmds */ 219 addmac(br[i].clbr); 220 i++; 221 } 222 break; 223 224 /* -c: add known commands */ 225 case 'c': 226 i = strlen(argv[1]) - 2; 227 if (i % 3 != 0) 228 usage(); 229 for (cp=argv[1]+3; cp[-1]; cp += 3) { 230 if (cp[2] && cp[2] != '.') 231 usage(); 232 strncpy(b1, cp, 2); 233 b1[2] = '\0'; 234 addmac(b1); 235 } 236 break; 237 238 /* -f: ignore font changes */ 239 case 'f': 240 fflag = 1; 241 break; 242 243 /* -s: ignore size changes */ 244 case 's': 245 sflag = 1; 246 break; 247 default: 248 usage(); 249 } 250 argc--; argv++; 251 } 252 253 nfiles = argc - 1; 254 255 if (nfiles > 0) { 256 for (i=1; i<argc; i++) { 257 cfilename = argv[i]; 258 f = fopen(cfilename, "r"); 259 if (f == NULL) 260 perror(cfilename); 261 else { 262 process(f); 263 fclose(f); 264 } 265 } 266 } else { 267 cfilename = "stdin"; 268 process(stdin); 269 } 270 exit(0); 271} 272 273static void 274usage(void) 275{ 276 fprintf(stderr, 277 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n"); 278 exit(1); 279} 280 281void 282process(FILE *f) 283{ 284 int i, n; 285 char mac[5]; /* The current macro or nroff command */ 286 int pl; 287 static char line[256]; /* the current line */ 288 289 stktop = -1; 290 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 291 if (line[0] == '.') { 292 /* 293 * find and isolate the macro/command name. 294 */ 295 strncpy(mac, line+1, 4); 296 if (isspace(mac[0])) { 297 pe(lineno); 298 printf("Empty command\n"); 299 } else if (isspace(mac[1])) { 300 mac[1] = 0; 301 } else if (isspace(mac[2])) { 302 mac[2] = 0; 303 } else if (mac[0] != '\\' || mac[1] != '\"') { 304 pe(lineno); 305 printf("Command too long\n"); 306 } 307 308 /* 309 * Is it a known command? 310 */ 311 checkknown(mac); 312 313 /* 314 * Should we add it? 315 */ 316 if (eq(mac, "de")) 317 addcmd(line); 318 319 chkcmd(line, mac); 320 } 321 322 /* 323 * At this point we process the line looking 324 * for \s and \f. 325 */ 326 for (i=0; line[i]; i++) 327 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 328 if (!sflag && line[++i]=='s') { 329 pl = line[++i]; 330 if (isdigit(pl)) { 331 n = pl - '0'; 332 pl = ' '; 333 } else 334 n = 0; 335 while (isdigit(line[++i])) 336 n = 10 * n + line[i] - '0'; 337 i--; 338 if (n == 0) { 339 if (stk[stktop].opno == SZ) { 340 stktop--; 341 } else { 342 pe(lineno); 343 printf("unmatched \\s0\n"); 344 } 345 } else { 346 stk[++stktop].opno = SZ; 347 stk[stktop].pl = pl; 348 stk[stktop].parm = n; 349 stk[stktop].lno = lineno; 350 } 351 } else if (!fflag && line[i]=='f') { 352 n = line[++i]; 353 if (n == 'P') { 354 if (stk[stktop].opno == FT) { 355 stktop--; 356 } else { 357 pe(lineno); 358 printf("unmatched \\fP\n"); 359 } 360 } else { 361 stk[++stktop].opno = FT; 362 stk[stktop].pl = 1; 363 stk[stktop].parm = n; 364 stk[stktop].lno = lineno; 365 } 366 } 367 } 368 } 369 /* 370 * We've hit the end and look at all this stuff that hasn't been 371 * matched yet! Complain, complain. 372 */ 373 for (i=stktop; i>=0; i--) { 374 complain(i); 375 } 376} 377 378void 379complain(int i) 380{ 381 pe(stk[i].lno); 382 printf("Unmatched "); 383 prop(i); 384 printf("\n"); 385} 386 387void 388prop(int i) 389{ 390 if (stk[i].pl == 0) 391 printf(".%s", br[stk[i].opno].opbr); 392 else switch(stk[i].opno) { 393 case SZ: 394 printf("\\s%c%d", stk[i].pl, stk[i].parm); 395 break; 396 case FT: 397 printf("\\f%c", stk[i].parm); 398 break; 399 default: 400 printf("Bug: stk[%d].opno = %d = .%s, .%s", 401 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 402 } 403} 404 405void 406chkcmd(const char *line __unused, const char *mac) 407{ 408 int i; 409 410 /* 411 * Check to see if it matches top of stack. 412 */ 413 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 414 stktop--; /* OK. Pop & forget */ 415 else { 416 /* No. Maybe it's an opener */ 417 for (i=0; br[i].opbr; i++) { 418 if (eq(mac, br[i].opbr)) { 419 /* Found. Push it. */ 420 stktop++; 421 stk[stktop].opno = i; 422 stk[stktop].pl = 0; 423 stk[stktop].parm = 0; 424 stk[stktop].lno = lineno; 425 break; 426 } 427 /* 428 * Maybe it's an unmatched closer. 429 * NOTE: this depends on the fact 430 * that none of the closers can be 431 * openers too. 432 */ 433 if (eq(mac, br[i].clbr)) { 434 nomatch(mac); 435 break; 436 } 437 } 438 } 439} 440 441void 442nomatch(const char *mac) 443{ 444 int i, j; 445 446 /* 447 * Look for a match further down on stack 448 * If we find one, it suggests that the stuff in 449 * between is supposed to match itself. 450 */ 451 for (j=stktop; j>=0; j--) 452 if (eq(mac,br[stk[j].opno].clbr)) { 453 /* Found. Make a good diagnostic. */ 454 if (j == stktop-2) { 455 /* 456 * Check for special case \fx..\fR and don't 457 * complain. 458 */ 459 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 460 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 461 stktop = j -1; 462 return; 463 } 464 /* 465 * We have two unmatched frobs. Chances are 466 * they were intended to match, so we mention 467 * them together. 468 */ 469 pe(stk[j+1].lno); 470 prop(j+1); 471 printf(" does not match %d: ", stk[j+2].lno); 472 prop(j+2); 473 printf("\n"); 474 } else for (i=j+1; i <= stktop; i++) { 475 complain(i); 476 } 477 stktop = j-1; 478 return; 479 } 480 /* Didn't find one. Throw this away. */ 481 pe(lineno); 482 printf("Unmatched .%s\n", mac); 483} 484 485/* eq: are two strings equal? */ 486int 487eq(const char *s1, const char *s2) 488{ 489 return (strcmp(s1, s2) == 0); 490} 491 492/* print the first part of an error message, given the line number */ 493void 494pe(int linen) 495{ 496 if (nfiles > 1) 497 printf("%s: ", cfilename); 498 printf("%d: ", linen); 499} 500 501void 502checkknown(const char *mac) 503{ 504 505 if (eq(mac, ".")) 506 return; 507 if (binsrch(mac) >= 0) 508 return; 509 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 510 return; 511 512 pe(lineno); 513 printf("Unknown command: .%s\n", mac); 514} 515 516/* 517 * We have a .de xx line in "line". Add xx to the list of known commands. 518 */ 519void 520addcmd(char *line) 521{ 522 char *mac; 523 524 /* grab the macro being defined */ 525 mac = line+4; 526 while (isspace(*mac)) 527 mac++; 528 if (*mac == 0) { 529 pe(lineno); 530 printf("illegal define: %s\n", line); 531 return; 532 } 533 mac[2] = 0; 534 if (isspace(mac[1]) || mac[1] == '\\') 535 mac[1] = 0; 536 if (ncmds >= MAXCMDS) { 537 printf("Only %d known commands allowed\n", MAXCMDS); 538 exit(1); 539 } 540 addmac(mac); 541} 542 543/* 544 * Add mac to the list. We should really have some kind of tree 545 * structure here but this is a quick-and-dirty job and I just don't 546 * have time to mess with it. (I wonder if this will come back to haunt 547 * me someday?) Anyway, I claim that .de is fairly rare in user 548 * nroff programs, and the register loop below is pretty fast. 549 */ 550void 551addmac(const char *mac) 552{ 553 const char **src, **dest, **loc; 554 555 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 556#ifdef DEBUG 557 printf("binsrch(%s) -> already in table\n", mac); 558#endif 559 return; 560 } 561 /* binsrch sets slot as a side effect */ 562#ifdef DEBUG 563printf("binsrch(%s) -> %d\n", mac, slot); 564#endif 565 loc = &knowncmds[slot]; 566 src = &knowncmds[ncmds-1]; 567 dest = src+1; 568 while (dest > loc) 569 *dest-- = *src--; 570 *loc = strcpy(malloc(3), mac); 571 ncmds++; 572#ifdef DEBUG 573printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 574#endif 575} 576 577/* 578 * Do a binary search in knowncmds for mac. 579 * If found, return the index. If not, return -1. 580 */ 581int 582binsrch(const char *mac) 583{ 584 const char *p; /* pointer to current cmd in list */ 585 int d; /* difference if any */ 586 int mid; /* mid point in binary search */ 587 int top, bot; /* boundaries of bin search, inclusive */ 588 589 top = ncmds-1; 590 bot = 0; 591 while (top >= bot) { 592 mid = (top+bot)/2; 593 p = knowncmds[mid]; 594 d = p[0] - mac[0]; 595 if (d == 0) 596 d = p[1] - mac[1]; 597 if (d == 0) 598 return mid; 599 if (d < 0) 600 bot = mid + 1; 601 else 602 top = mid - 1; 603 } 604 slot = bot; /* place it would have gone */ 605 return -1; 606} 607