1/* 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#ifndef lint 35static const char copyright[] = 36"@(#) Copyright (c) 1980, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38#endif /* not lint */ 39 40#if 0 41#ifndef lint 42static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 43#endif /* not lint */ 44#endif 45 46#include <sys/cdefs.h> 47__FBSDID("$FreeBSD: src/usr.bin/checknr/checknr.c,v 1.9 2004/07/15 04:42:47 tjr Exp $"); 48 49/* 50 * checknr: check an nroff/troff input file for matching macro calls. 51 * we also attempt to match size and font changes, but only the embedded 52 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 53 * later but for now think of these restrictions as contributions to 54 * structured typesetting. 55 */ 56#include <err.h> 57#include <stdio.h> 58#include <stdlib.h> 59#include <string.h> 60#include <ctype.h> 61 62#define MAXSTK 100 /* Stack size */ 63#define MAXBR 100 /* Max number of bracket pairs known */ 64#define MAXCMDS 500 /* Max number of commands known */ 65 66void addcmd(char *); 67void addmac(const char *); 68int binsrch(const char *); 69void checkknown(const char *); 70void chkcmd(const char *, const char *); 71void complain(int); 72int eq(const char *, const char *); 73void nomatch(const char *); 74void pe(int); 75void process(FILE *); 76void prop(int); 77static void usage(void); 78 79/* 80 * The stack on which we remember what we've seen so far. 81 */ 82struct stkstr { 83 int opno; /* number of opening bracket */ 84 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 85 int parm; /* parm to size, font, etc */ 86 int lno; /* line number the thing came in in */ 87} stk[MAXSTK]; 88int stktop; 89 90/* 91 * The kinds of opening and closing brackets. 92 */ 93struct brstr { 94 const char *opbr; 95 const char *clbr; 96} br[MAXBR] = { 97 /* A few bare bones troff commands */ 98#define SZ 0 99 {"sz", "sz"}, /* also \s */ 100#define FT 1 101 {"ft", "ft"}, /* also \f */ 102 /* the -mm package */ 103 {"AL", "LE"}, 104 {"AS", "AE"}, 105 {"BL", "LE"}, 106 {"BS", "BE"}, 107 {"DF", "DE"}, 108 {"DL", "LE"}, 109 {"DS", "DE"}, 110 {"FS", "FE"}, 111 {"ML", "LE"}, 112 {"NS", "NE"}, 113 {"RL", "LE"}, 114 {"VL", "LE"}, 115 /* the -ms package */ 116 {"AB", "AE"}, 117 {"BD", "DE"}, 118 {"CD", "DE"}, 119 {"DS", "DE"}, 120 {"FS", "FE"}, 121 {"ID", "DE"}, 122 {"KF", "KE"}, 123 {"KS", "KE"}, 124 {"LD", "DE"}, 125 {"LG", "NL"}, 126 {"QS", "QE"}, 127 {"RS", "RE"}, 128 {"SM", "NL"}, 129 {"XA", "XE"}, 130 {"XS", "XE"}, 131 /* The -me package */ 132 {"(b", ")b"}, 133 {"(c", ")c"}, 134 {"(d", ")d"}, 135 {"(f", ")f"}, 136 {"(l", ")l"}, 137 {"(q", ")q"}, 138 {"(x", ")x"}, 139 {"(z", ")z"}, 140 /* Things needed by preprocessors */ 141 {"EQ", "EN"}, 142 {"TS", "TE"}, 143 /* Refer */ 144 {"[", "]"}, 145 {0, 0} 146}; 147 148/* 149 * All commands known to nroff, plus macro packages. 150 * Used so we can complain about unrecognized commands. 151 */ 152const char *knowncmds[MAXCMDS] = { 153"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 154"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 155"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 156"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 157"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 158"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 159"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 160"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 161"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 162"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 163"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 164"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 165"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 166"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 167"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 168"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 169"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 170"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 171"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 172"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 173"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 174"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 175"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 176"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 177"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 178"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 179"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 180"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 181"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 182"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 183"yr", 0 184}; 185 186int lineno; /* current line number in input file */ 187const char *cfilename; /* name of current file */ 188int nfiles; /* number of files to process */ 189int fflag; /* -f: ignore \f */ 190int sflag; /* -s: ignore \s */ 191int ncmds; /* size of knowncmds */ 192int slot; /* slot in knowncmds found by binsrch */ 193 194int 195main(int argc, char **argv) 196{ 197 FILE *f; 198 int i; 199 char *cp; 200 char b1[4]; 201 202 /* Figure out how many known commands there are */ 203 while (knowncmds[ncmds]) 204 ncmds++; 205 while (argc > 1 && argv[1][0] == '-') { 206 switch(argv[1][1]) { 207 208 /* -a: add pairs of macros */ 209 case 'a': 210 i = strlen(argv[1]) - 2; 211 if (i % 6 != 0) 212 usage(); 213 /* look for empty macro slots */ 214 for (i=0; br[i].opbr; i++) 215 ; 216 for (cp=argv[1]+3; cp[-1]; cp += 6) { 217 br[i].opbr = strncpy(malloc(3), cp, 2); 218 br[i].clbr = strncpy(malloc(3), cp+3, 2); 219 addmac(br[i].opbr); /* knows pairs are also known cmds */ 220 addmac(br[i].clbr); 221 i++; 222 } 223 break; 224 225 /* -c: add known commands */ 226 case 'c': 227 i = strlen(argv[1]) - 2; 228 if (i % 3 != 0) 229 usage(); 230 for (cp=argv[1]+3; cp[-1]; cp += 3) { 231 if (cp[2] && cp[2] != '.') 232 usage(); 233 strncpy(b1, cp, 2); 234 b1[2] = '\0'; 235 addmac(b1); 236 } 237 break; 238 239 /* -f: ignore font changes */ 240 case 'f': 241 fflag = 1; 242 break; 243 244 /* -s: ignore size changes */ 245 case 's': 246 sflag = 1; 247 break; 248 default: 249 usage(); 250 } 251 argc--; argv++; 252 } 253 254 nfiles = argc - 1; 255 256 if (nfiles > 0) { 257 for (i=1; i<argc; i++) { 258 cfilename = argv[i]; 259 f = fopen(cfilename, "r"); 260 if (f == NULL) 261 warn("%s", cfilename); 262 else { 263 process(f); 264 fclose(f); 265 } 266 } 267 } else { 268 cfilename = "stdin"; 269 process(stdin); 270 } 271 exit(0); 272} 273 274static void 275usage(void) 276{ 277 fprintf(stderr, 278 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n"); 279 exit(1); 280} 281 282void 283process(FILE *f) 284{ 285 int i, n; 286 char mac[5]; /* The current macro or nroff command */ 287 int pl; 288 static char line[256]; /* the current line */ 289 290 stktop = -1; 291 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 292 if (line[0] == '.') { 293 /* 294 * find and isolate the macro/command name. 295 */ 296 strncpy(mac, line+1, 4); 297 if (isspace(mac[0])) { 298 pe(lineno); 299 printf("Empty command\n"); 300 } else if (isspace(mac[1])) { 301 mac[1] = 0; 302 } else if (isspace(mac[2])) { 303 mac[2] = 0; 304 } else if (mac[0] != '\\' || mac[1] != '\"') { 305 pe(lineno); 306 printf("Command too long\n"); 307 } 308 309 /* 310 * Is it a known command? 311 */ 312 checkknown(mac); 313 314 /* 315 * Should we add it? 316 */ 317 if (eq(mac, "de")) 318 addcmd(line); 319 320 chkcmd(line, mac); 321 } 322 323 /* 324 * At this point we process the line looking 325 * for \s and \f. 326 */ 327 for (i=0; line[i]; i++) 328 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 329 if (!sflag && line[++i]=='s') { 330 pl = line[++i]; 331 if (isdigit(pl)) { 332 n = pl - '0'; 333 pl = ' '; 334 } else 335 n = 0; 336 while (isdigit(line[++i])) 337 n = 10 * n + line[i] - '0'; 338 i--; 339 if (n == 0) { 340 if (stk[stktop].opno == SZ) { 341 stktop--; 342 } else { 343 pe(lineno); 344 printf("unmatched \\s0\n"); 345 } 346 } else { 347 stk[++stktop].opno = SZ; 348 stk[stktop].pl = pl; 349 stk[stktop].parm = n; 350 stk[stktop].lno = lineno; 351 } 352 } else if (!fflag && line[i]=='f') { 353 n = line[++i]; 354 if (n == 'P') { 355 if (stk[stktop].opno == FT) { 356 stktop--; 357 } else { 358 pe(lineno); 359 printf("unmatched \\fP\n"); 360 } 361 } else { 362 stk[++stktop].opno = FT; 363 stk[stktop].pl = 1; 364 stk[stktop].parm = n; 365 stk[stktop].lno = lineno; 366 } 367 } 368 } 369 } 370 /* 371 * We've hit the end and look at all this stuff that hasn't been 372 * matched yet! Complain, complain. 373 */ 374 for (i=stktop; i>=0; i--) { 375 complain(i); 376 } 377} 378 379void 380complain(int i) 381{ 382 pe(stk[i].lno); 383 printf("Unmatched "); 384 prop(i); 385 printf("\n"); 386} 387 388void 389prop(int i) 390{ 391 if (stk[i].pl == 0) 392 printf(".%s", br[stk[i].opno].opbr); 393 else switch(stk[i].opno) { 394 case SZ: 395 printf("\\s%c%d", stk[i].pl, stk[i].parm); 396 break; 397 case FT: 398 printf("\\f%c", stk[i].parm); 399 break; 400 default: 401 printf("Bug: stk[%d].opno = %d = .%s, .%s", 402 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 403 } 404} 405 406void 407chkcmd(const char *line __unused, const char *mac) 408{ 409 int i; 410 411 /* 412 * Check to see if it matches top of stack. 413 */ 414 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 415 stktop--; /* OK. Pop & forget */ 416 else { 417 /* No. Maybe it's an opener */ 418 for (i=0; br[i].opbr; i++) { 419 if (eq(mac, br[i].opbr)) { 420 /* Found. Push it. */ 421 stktop++; 422 stk[stktop].opno = i; 423 stk[stktop].pl = 0; 424 stk[stktop].parm = 0; 425 stk[stktop].lno = lineno; 426 break; 427 } 428 /* 429 * Maybe it's an unmatched closer. 430 * NOTE: this depends on the fact 431 * that none of the closers can be 432 * openers too. 433 */ 434 if (eq(mac, br[i].clbr)) { 435 nomatch(mac); 436 break; 437 } 438 } 439 } 440} 441 442void 443nomatch(const char *mac) 444{ 445 int i, j; 446 447 /* 448 * Look for a match further down on stack 449 * If we find one, it suggests that the stuff in 450 * between is supposed to match itself. 451 */ 452 for (j=stktop; j>=0; j--) 453 if (eq(mac,br[stk[j].opno].clbr)) { 454 /* Found. Make a good diagnostic. */ 455 if (j == stktop-2) { 456 /* 457 * Check for special case \fx..\fR and don't 458 * complain. 459 */ 460 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 461 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 462 stktop = j -1; 463 return; 464 } 465 /* 466 * We have two unmatched frobs. Chances are 467 * they were intended to match, so we mention 468 * them together. 469 */ 470 pe(stk[j+1].lno); 471 prop(j+1); 472 printf(" does not match %d: ", stk[j+2].lno); 473 prop(j+2); 474 printf("\n"); 475 } else for (i=j+1; i <= stktop; i++) { 476 complain(i); 477 } 478 stktop = j-1; 479 return; 480 } 481 /* Didn't find one. Throw this away. */ 482 pe(lineno); 483 printf("Unmatched .%s\n", mac); 484} 485 486/* eq: are two strings equal? */ 487int 488eq(const char *s1, const char *s2) 489{ 490 return (strcmp(s1, s2) == 0); 491} 492 493/* print the first part of an error message, given the line number */ 494void 495pe(int linen) 496{ 497 if (nfiles > 1) 498 printf("%s: ", cfilename); 499 printf("%d: ", linen); 500} 501 502void 503checkknown(const char *mac) 504{ 505 506 if (eq(mac, ".")) 507 return; 508 if (binsrch(mac) >= 0) 509 return; 510 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 511 return; 512 513 pe(lineno); 514 printf("Unknown command: .%s\n", mac); 515} 516 517/* 518 * We have a .de xx line in "line". Add xx to the list of known commands. 519 */ 520void 521addcmd(char *line) 522{ 523 char *mac; 524 525 /* grab the macro being defined */ 526 mac = line+4; 527 while (isspace(*mac)) 528 mac++; 529 if (*mac == 0) { 530 pe(lineno); 531 printf("illegal define: %s\n", line); 532 return; 533 } 534 mac[2] = 0; 535 if (isspace(mac[1]) || mac[1] == '\\') 536 mac[1] = 0; 537 if (ncmds >= MAXCMDS) { 538 printf("Only %d known commands allowed\n", MAXCMDS); 539 exit(1); 540 } 541 addmac(mac); 542} 543 544/* 545 * Add mac to the list. We should really have some kind of tree 546 * structure here but this is a quick-and-dirty job and I just don't 547 * have time to mess with it. (I wonder if this will come back to haunt 548 * me someday?) Anyway, I claim that .de is fairly rare in user 549 * nroff programs, and the register loop below is pretty fast. 550 */ 551void 552addmac(const char *mac) 553{ 554 const char **src, **dest, **loc; 555 556 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 557#ifdef DEBUG 558 printf("binsrch(%s) -> already in table\n", mac); 559#endif 560 return; 561 } 562 /* binsrch sets slot as a side effect */ 563#ifdef DEBUG 564printf("binsrch(%s) -> %d\n", mac, slot); 565#endif 566 loc = &knowncmds[slot]; 567 src = &knowncmds[ncmds-1]; 568 dest = src+1; 569 while (dest > loc) 570 *dest-- = *src--; 571 *loc = strcpy(malloc(3), mac); 572 ncmds++; 573#ifdef DEBUG 574printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 575#endif 576} 577 578/* 579 * Do a binary search in knowncmds for mac. 580 * If found, return the index. If not, return -1. 581 */ 582int 583binsrch(const char *mac) 584{ 585 const char *p; /* pointer to current cmd in list */ 586 int d; /* difference if any */ 587 int mid; /* mid point in binary search */ 588 int top, bot; /* boundaries of bin search, inclusive */ 589 590 top = ncmds-1; 591 bot = 0; 592 while (top >= bot) { 593 mid = (top+bot)/2; 594 p = knowncmds[mid]; 595 d = p[0] - mac[0]; 596 if (d == 0) 597 d = p[1] - mac[1]; 598 if (d == 0) 599 return mid; 600 if (d < 0) 601 bot = mid + 1; 602 else 603 top = mid - 1; 604 } 605 slot = bot; /* place it would have gone */ 606 return -1; 607} 608