checknr.c revision 282437
1/* 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#ifndef lint 31static const char copyright[] = 32"@(#) Copyright (c) 1980, 1993\n\ 33 The Regents of the University of California. All rights reserved.\n"; 34#endif /* not lint */ 35 36#if 0 37#ifndef lint 38static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 39#endif /* not lint */ 40#endif 41 42#include <sys/cdefs.h> 43__FBSDID("$FreeBSD: head/usr.bin/checknr/checknr.c 282437 2015-05-04 22:05:12Z bapt $"); 44 45/* 46 * checknr: check an nroff/troff input file for matching macro calls. 47 * we also attempt to match size and font changes, but only the embedded 48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 49 * later but for now think of these restrictions as contributions to 50 * structured typesetting. 51 */ 52#include <err.h> 53#include <stdio.h> 54#include <stdlib.h> 55#include <string.h> 56#include <ctype.h> 57 58#define MAXSTK 100 /* Stack size */ 59#define MAXBR 100 /* Max number of bracket pairs known */ 60#define MAXCMDS 500 /* Max number of commands known */ 61 62static void addcmd(char *); 63static void addmac(const char *); 64static int binsrch(const char *); 65static void checkknown(const char *); 66static void chkcmd(const char *, const char *); 67static void complain(int); 68static int eq(const char *, const char *); 69static void nomatch(const char *); 70static void pe(int); 71static void process(FILE *); 72static void prop(int); 73static void usage(void); 74 75/* 76 * The stack on which we remember what we've seen so far. 77 */ 78static struct stkstr { 79 int opno; /* number of opening bracket */ 80 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 81 int parm; /* parm to size, font, etc */ 82 int lno; /* line number */ 83} stk[MAXSTK]; 84static int stktop; 85 86/* 87 * The kinds of opening and closing brackets. 88 */ 89static struct brstr { 90 const char *opbr; 91 const char *clbr; 92} br[MAXBR] = { 93 /* A few bare bones troff commands */ 94#define SZ 0 95 {"sz", "sz"}, /* also \s */ 96#define FT 1 97 {"ft", "ft"}, /* also \f */ 98 /* the -mm package */ 99 {"AL", "LE"}, 100 {"AS", "AE"}, 101 {"BL", "LE"}, 102 {"BS", "BE"}, 103 {"DF", "DE"}, 104 {"DL", "LE"}, 105 {"DS", "DE"}, 106 {"FS", "FE"}, 107 {"ML", "LE"}, 108 {"NS", "NE"}, 109 {"RL", "LE"}, 110 {"VL", "LE"}, 111 /* the -ms package */ 112 {"AB", "AE"}, 113 {"BD", "DE"}, 114 {"CD", "DE"}, 115 {"DS", "DE"}, 116 {"FS", "FE"}, 117 {"ID", "DE"}, 118 {"KF", "KE"}, 119 {"KS", "KE"}, 120 {"LD", "DE"}, 121 {"LG", "NL"}, 122 {"QS", "QE"}, 123 {"RS", "RE"}, 124 {"SM", "NL"}, 125 {"XA", "XE"}, 126 {"XS", "XE"}, 127 /* The -me package */ 128 {"(b", ")b"}, 129 {"(c", ")c"}, 130 {"(d", ")d"}, 131 {"(f", ")f"}, 132 {"(l", ")l"}, 133 {"(q", ")q"}, 134 {"(x", ")x"}, 135 {"(z", ")z"}, 136 /* Things needed by preprocessors */ 137 {"EQ", "EN"}, 138 {"TS", "TE"}, 139 /* Refer */ 140 {"[", "]"}, 141 {0, 0} 142}; 143 144/* 145 * All commands known to nroff, plus macro packages. 146 * Used so we can complain about unrecognized commands. 147 */ 148static const char *knowncmds[MAXCMDS] = { 149"$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 150"(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 151"+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 152"@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 153"AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 154"BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 155"D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 156"EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 157"FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 158"IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 159"LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 160"MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 161"P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 162"RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 163"SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 164"TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 165"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 166"[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 167"][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 168"bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 169"ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 170"ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 171"fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 172"ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 173"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 174"n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 175"of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 176"q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 177"sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 178"ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 179"yr", 0 180}; 181 182static int lineno; /* current line number in input file */ 183static const char *cfilename; /* name of current file */ 184static int nfiles; /* number of files to process */ 185static int fflag; /* -f: ignore \f */ 186static int sflag; /* -s: ignore \s */ 187static int ncmds; /* size of knowncmds */ 188static int slot; /* slot in knowncmds found by binsrch */ 189 190int 191main(int argc, char **argv) 192{ 193 FILE *f; 194 int i; 195 char *cp; 196 char b1[4]; 197 198 /* Figure out how many known commands there are */ 199 while (knowncmds[ncmds]) 200 ncmds++; 201 while (argc > 1 && argv[1][0] == '-') { 202 switch(argv[1][1]) { 203 204 /* -a: add pairs of macros */ 205 case 'a': 206 i = strlen(argv[1]) - 2; 207 if (i % 6 != 0) 208 usage(); 209 /* look for empty macro slots */ 210 for (i=0; br[i].opbr; i++) 211 ; 212 for (cp=argv[1]+3; cp[-1]; cp += 6) { 213 br[i].opbr = strncpy(malloc(3), cp, 2); 214 br[i].clbr = strncpy(malloc(3), cp+3, 2); 215 addmac(br[i].opbr); /* knows pairs are also known cmds */ 216 addmac(br[i].clbr); 217 i++; 218 } 219 break; 220 221 /* -c: add known commands */ 222 case 'c': 223 i = strlen(argv[1]) - 2; 224 if (i % 3 != 0) 225 usage(); 226 for (cp=argv[1]+3; cp[-1]; cp += 3) { 227 if (cp[2] && cp[2] != '.') 228 usage(); 229 strncpy(b1, cp, 2); 230 b1[2] = '\0'; 231 addmac(b1); 232 } 233 break; 234 235 /* -f: ignore font changes */ 236 case 'f': 237 fflag = 1; 238 break; 239 240 /* -s: ignore size changes */ 241 case 's': 242 sflag = 1; 243 break; 244 default: 245 usage(); 246 } 247 argc--; argv++; 248 } 249 250 nfiles = argc - 1; 251 252 if (nfiles > 0) { 253 for (i = 1; i < argc; i++) { 254 cfilename = argv[i]; 255 f = fopen(cfilename, "r"); 256 if (f == NULL) 257 warn("%s", cfilename); 258 else { 259 process(f); 260 fclose(f); 261 } 262 } 263 } else { 264 cfilename = "stdin"; 265 process(stdin); 266 } 267 exit(0); 268} 269 270static void 271usage(void) 272{ 273 fprintf(stderr, 274 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n"); 275 exit(1); 276} 277 278static void 279process(FILE *f) 280{ 281 int i, n; 282 char mac[5]; /* The current macro or nroff command */ 283 int pl; 284 static char line[256]; /* the current line */ 285 286 stktop = -1; 287 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 288 if (line[0] == '.') { 289 /* 290 * find and isolate the macro/command name. 291 */ 292 strncpy(mac, line+1, 4); 293 if (isspace(mac[0])) { 294 pe(lineno); 295 printf("Empty command\n"); 296 } else if (isspace(mac[1])) { 297 mac[1] = 0; 298 } else if (isspace(mac[2])) { 299 mac[2] = 0; 300 } else if (mac[0] != '\\' || mac[1] != '\"') { 301 pe(lineno); 302 printf("Command too long\n"); 303 } 304 305 /* 306 * Is it a known command? 307 */ 308 checkknown(mac); 309 310 /* 311 * Should we add it? 312 */ 313 if (eq(mac, "de")) 314 addcmd(line); 315 316 chkcmd(line, mac); 317 } 318 319 /* 320 * At this point we process the line looking 321 * for \s and \f. 322 */ 323 for (i = 0; line[i]; i++) 324 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) { 325 if (!sflag && line[++i] == 's') { 326 pl = line[++i]; 327 if (isdigit(pl)) { 328 n = pl - '0'; 329 pl = ' '; 330 } else 331 n = 0; 332 while (isdigit(line[++i])) 333 n = 10 * n + line[i] - '0'; 334 i--; 335 if (n == 0) { 336 if (stk[stktop].opno == SZ) { 337 stktop--; 338 } else { 339 pe(lineno); 340 printf("unmatched \\s0\n"); 341 } 342 } else { 343 stk[++stktop].opno = SZ; 344 stk[stktop].pl = pl; 345 stk[stktop].parm = n; 346 stk[stktop].lno = lineno; 347 } 348 } else if (!fflag && line[i] == 'f') { 349 n = line[++i]; 350 if (n == 'P') { 351 if (stk[stktop].opno == FT) { 352 stktop--; 353 } else { 354 pe(lineno); 355 printf("unmatched \\fP\n"); 356 } 357 } else { 358 stk[++stktop].opno = FT; 359 stk[stktop].pl = 1; 360 stk[stktop].parm = n; 361 stk[stktop].lno = lineno; 362 } 363 } 364 } 365 } 366 /* 367 * We've hit the end and look at all this stuff that hasn't been 368 * matched yet! Complain, complain. 369 */ 370 for (i = stktop; i >= 0; i--) { 371 complain(i); 372 } 373} 374 375static void 376complain(int i) 377{ 378 pe(stk[i].lno); 379 printf("Unmatched "); 380 prop(i); 381 printf("\n"); 382} 383 384static void 385prop(int i) 386{ 387 if (stk[i].pl == 0) 388 printf(".%s", br[stk[i].opno].opbr); 389 else switch (stk[i].opno) { 390 case SZ: 391 printf("\\s%c%d", stk[i].pl, stk[i].parm); 392 break; 393 case FT: 394 printf("\\f%c", stk[i].parm); 395 break; 396 default: 397 printf("Bug: stk[%d].opno = %d = .%s, .%s", 398 i, stk[i].opno, br[stk[i].opno].opbr, 399 br[stk[i].opno].clbr); 400 } 401} 402 403static void 404chkcmd(const char *line __unused, const char *mac) 405{ 406 int i; 407 408 /* 409 * Check to see if it matches top of stack. 410 */ 411 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 412 stktop--; /* OK. Pop & forget */ 413 else { 414 /* No. Maybe it's an opener */ 415 for (i=0; br[i].opbr; i++) { 416 if (eq(mac, br[i].opbr)) { 417 /* Found. Push it. */ 418 stktop++; 419 stk[stktop].opno = i; 420 stk[stktop].pl = 0; 421 stk[stktop].parm = 0; 422 stk[stktop].lno = lineno; 423 break; 424 } 425 /* 426 * Maybe it's an unmatched closer. 427 * NOTE: this depends on the fact 428 * that none of the closers can be 429 * openers too. 430 */ 431 if (eq(mac, br[i].clbr)) { 432 nomatch(mac); 433 break; 434 } 435 } 436 } 437} 438 439static void 440nomatch(const char *mac) 441{ 442 int i, j; 443 444 /* 445 * Look for a match further down on stack 446 * If we find one, it suggests that the stuff in 447 * between is supposed to match itself. 448 */ 449 for (j=stktop; j>=0; j--) 450 if (eq(mac,br[stk[j].opno].clbr)) { 451 /* Found. Make a good diagnostic. */ 452 if (j == stktop-2) { 453 /* 454 * Check for special case \fx..\fR and don't 455 * complain. 456 */ 457 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 458 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 459 stktop = j -1; 460 return; 461 } 462 /* 463 * We have two unmatched frobs. Chances are 464 * they were intended to match, so we mention 465 * them together. 466 */ 467 pe(stk[j+1].lno); 468 prop(j+1); 469 printf(" does not match %d: ", stk[j+2].lno); 470 prop(j+2); 471 printf("\n"); 472 } else for (i=j+1; i <= stktop; i++) { 473 complain(i); 474 } 475 stktop = j-1; 476 return; 477 } 478 /* Didn't find one. Throw this away. */ 479 pe(lineno); 480 printf("Unmatched .%s\n", mac); 481} 482 483/* eq: are two strings equal? */ 484static int 485eq(const char *s1, const char *s2) 486{ 487 return (strcmp(s1, s2) == 0); 488} 489 490/* print the first part of an error message, given the line number */ 491static void 492pe(int linen) 493{ 494 if (nfiles > 1) 495 printf("%s: ", cfilename); 496 printf("%d: ", linen); 497} 498 499static void 500checkknown(const char *mac) 501{ 502 503 if (eq(mac, ".")) 504 return; 505 if (binsrch(mac) >= 0) 506 return; 507 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 508 return; 509 510 pe(lineno); 511 printf("Unknown command: .%s\n", mac); 512} 513 514/* 515 * We have a .de xx line in "line". Add xx to the list of known commands. 516 */ 517static void 518addcmd(char *line) 519{ 520 char *mac; 521 522 /* grab the macro being defined */ 523 mac = line+4; 524 while (isspace(*mac)) 525 mac++; 526 if (*mac == 0) { 527 pe(lineno); 528 printf("illegal define: %s\n", line); 529 return; 530 } 531 mac[2] = 0; 532 if (isspace(mac[1]) || mac[1] == '\\') 533 mac[1] = 0; 534 if (ncmds >= MAXCMDS) { 535 printf("Only %d known commands allowed\n", MAXCMDS); 536 exit(1); 537 } 538 addmac(mac); 539} 540 541/* 542 * Add mac to the list. We should really have some kind of tree 543 * structure here but this is a quick-and-dirty job and I just don't 544 * have time to mess with it. (I wonder if this will come back to haunt 545 * me someday?) Anyway, I claim that .de is fairly rare in user 546 * nroff programs, and the register loop below is pretty fast. 547 */ 548static void 549addmac(const char *mac) 550{ 551 const char **src, **dest, **loc; 552 553 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 554#ifdef DEBUG 555 printf("binsrch(%s) -> already in table\n", mac); 556#endif 557 return; 558 } 559 /* binsrch sets slot as a side effect */ 560#ifdef DEBUG 561printf("binsrch(%s) -> %d\n", mac, slot); 562#endif 563 loc = &knowncmds[slot]; 564 src = &knowncmds[ncmds-1]; 565 dest = src+1; 566 while (dest > loc) 567 *dest-- = *src--; 568 *loc = strcpy(malloc(3), mac); 569 ncmds++; 570#ifdef DEBUG 571 printf("after: %s %s %s %s %s, %d cmds\n", 572 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], 573 knowncmds[slot+1], knowncmds[slot+2], ncmds); 574#endif 575} 576 577/* 578 * Do a binary search in knowncmds for mac. 579 * If found, return the index. If not, return -1. 580 */ 581static int 582binsrch(const char *mac) 583{ 584 const char *p; /* pointer to current cmd in list */ 585 int d; /* difference if any */ 586 int mid; /* mid point in binary search */ 587 int top, bot; /* boundaries of bin search, inclusive */ 588 589 top = ncmds-1; 590 bot = 0; 591 while (top >= bot) { 592 mid = (top+bot)/2; 593 p = knowncmds[mid]; 594 d = p[0] - mac[0]; 595 if (d == 0) 596 d = p[1] - mac[1]; 597 if (d == 0) 598 return (mid); 599 if (d < 0) 600 bot = mid + 1; 601 else 602 top = mid - 1; 603 } 604 slot = bot; /* place it would have gone */ 605 return (-1); 606} 607