1/* $NetBSD: checknr.c,v 1.25 2021/04/13 01:38:04 mrg Exp $ */ 2 3/* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33#ifndef lint 34__COPYRIGHT("@(#) Copyright (c) 1980, 1993\ 35 The Regents of the University of California. All rights reserved."); 36#endif /* not lint */ 37 38#ifndef lint 39#if 0 40static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93"; 41#else 42__RCSID("$NetBSD: checknr.c,v 1.25 2021/04/13 01:38:04 mrg Exp $"); 43#endif 44#endif /* not lint */ 45 46/* 47 * checknr: check an nroff/troff input file for matching macro calls. 48 * we also attempt to match size and font changes, but only the embedded 49 * kind. These must end in \s0 and \fP resp. Maybe more sophistication 50 * later but for now think of these restrictions as contributions to 51 * structured typesetting. 52 */ 53#include <ctype.h> 54#include <err.h> 55#include <stdio.h> 56#include <stdlib.h> 57#include <string.h> 58 59#define MAXSTK 100 /* Stack size */ 60#define MAXBR 100 /* Max number of bracket pairs known */ 61#define MAXCMDS 500 /* Max number of commands known */ 62 63/* 64 * The stack on which we remember what we've seen so far. 65 */ 66static struct stkstr { 67 int opno; /* number of opening bracket */ 68 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 69 int parm; /* parm to size, font, etc */ 70 int lno; /* line number the thing came in in */ 71} stk[MAXSTK]; 72static int stktop; 73 74/* 75 * The kinds of opening and closing brackets. 76 */ 77static struct brstr { 78 const char *opbr; 79 const char *clbr; 80} br[MAXBR] = { 81 /* A few bare bones troff commands */ 82#define SZ 0 83 { "sz", "sz"}, /* also \s */ 84#define FT 1 85 { "ft", "ft"}, /* also \f */ 86 /* the -mm package */ 87 {"AL", "LE"}, 88 {"AS", "AE"}, 89 {"BL", "LE"}, 90 {"BS", "BE"}, 91 {"DF", "DE"}, 92 {"DL", "LE"}, 93 {"DS", "DE"}, 94 {"FS", "FE"}, 95 {"ML", "LE"}, 96 {"NS", "NE"}, 97 {"RL", "LE"}, 98 {"VL", "LE"}, 99 /* the -ms package */ 100 {"AB", "AE"}, 101 {"BD", "DE"}, 102 {"CD", "DE"}, 103 {"DS", "DE"}, 104 {"FS", "FE"}, 105 {"ID", "DE"}, 106 {"KF", "KE"}, 107 {"KS", "KE"}, 108 {"LD", "DE"}, 109 {"LG", "NL"}, 110 {"QS", "QE"}, 111 {"RS", "RE"}, 112 {"SM", "NL"}, 113 {"XA", "XE"}, 114 {"XS", "XE"}, 115 /* The -me package */ 116 {"(b", ")b"}, 117 {"(c", ")c"}, 118 {"(d", ")d"}, 119 {"(f", ")f"}, 120 {"(l", ")l"}, 121 {"(q", ")q"}, 122 {"(x", ")x"}, 123 {"(z", ")z"}, 124 /* The -mdoc package */ 125 {"Ao", "Ac"}, 126 {"Bd", "Ed"}, 127 {"Bk", "Ek"}, 128 {"Bo", "Bc"}, 129 {"Do", "Dc"}, 130 {"Fo", "Fc"}, 131 {"Oo", "Oc"}, 132 {"Po", "Pc"}, 133 {"Qo", "Qc"}, 134 {"Rs", "Re"}, 135 {"So", "Sc"}, 136 {"Xo", "Xc"}, 137 /* Things needed by preprocessors */ 138 {"EQ", "EN"}, 139 {"TS", "TE"}, 140 /* Refer */ 141 {"[", "]"}, 142 {0, 0} 143}; 144 145/* 146 * All commands known to nroff, plus macro packages. 147 * Used so we can complain about unrecognized commands. 148 */ 149static const char *knowncmds[MAXCMDS] = { 150"$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", 151"%O", "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", 152"(t", "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", 153")z", "++", "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", 154"@F", "@I", "@M", "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", 155"@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL", "AM", "AS", "AT", 156"AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", "B" , "B1", 157"B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd", "Bf", 158"Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", 159"Cd", "Cm", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", 160"Dd", "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", 161"EN", "EQ", "EX", "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", 162"Ev", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", 163"FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Ft", "Fx", 164"H" , "HC", "HD", "HM", "HO", "HU", "I" , "ID", "IE", "IH", "IM", 165"IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF", "KQ", "KS", "LB", 166"LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME", "MF", 167"MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", 168"Nm", "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", 169"Os", "Ot", "Ox", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", 170"Pa", "Pc", "Pf", "Po", "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", 171"Qo", "Qq", "R" , "RA", "RC", "RE", "RL", "RP", "RQ", "RS", "RT", 172"Re", "Rs", "S" , "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", 173"SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss", "St", "Sx", "Sy", 174"T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", 175"TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt", 176"WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", 177"Xr", "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", 178"[]", "\\{", "\\}", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", 179"ar", "as", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", 180"c.", "c2", "cc", "ce", "cf", "ch", "cs", "ct", "cu", "da", "de", 181"di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", 182"em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", "fp", "ft", 183"fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i" , "ie", 184"if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 185"lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", 186"n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", 187"ns", "nx", "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", 188"po", "pp", "ps", "q" , "r" , "rb", "rd", "re", "rm", "rn", "ro", 189"rr", "rs", "rt", "sb", "sc", "sh", "sk", "so", "sp", "ss", "st", 190"sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp", "tr", "u", 191"uf", "uh", "ul", "vs", "wh", "xp", "yr", 0 192}; 193 194static int lineno; /* current line number in input file */ 195static const char *cfilename; /* name of current file */ 196static int nfiles; /* number of files to process */ 197static int fflag; /* -f: ignore \f */ 198static int sflag; /* -s: ignore \s */ 199static int ncmds; /* size of knowncmds */ 200static int slot; /* slot in knowncmds found by binsrch */ 201 202static void addcmd(char *); 203static void addmac(const char *); 204static int binsrch(const char *); 205static void checkknown(const char *); 206static void chkcmd(const char *); 207static void complain(int); 208static int eq(const char *, const char *); 209static void nomatch(const char *); 210static void pe(int); 211static void process(FILE *); 212static void prop(int); 213static void usage(void) __dead; 214 215int 216main(int argc, char **argv) 217{ 218 FILE *f; 219 int i; 220 char *cp; 221 char b1[4]; 222 223 /* Figure out how many known commands there are */ 224 while (knowncmds[ncmds]) 225 ncmds++; 226 while (argc > 1 && argv[1][0] == '-') { 227 switch(argv[1][1]) { 228 229 /* -a: add pairs of macros */ 230 case 'a': 231 i = strlen(argv[1]) - 2; 232 if (i % 6 != 0) 233 usage(); 234 /* look for empty macro slots */ 235 for (i=0; br[i].opbr; i++) 236 ; 237 for (cp=argv[1]+3; cp[-1]; cp += 6) { 238 char *tmp; 239 240 if (i >= MAXBR) 241 errx(1, "too many pairs"); 242 if ((tmp = malloc(3)) == NULL) 243 err(1, "malloc"); 244 strlcpy(tmp, cp, 3); 245 br[i].opbr = tmp; 246 if ((tmp = malloc(3)) == NULL) 247 err(1, "malloc"); 248 strlcpy(tmp, cp+3, 3); 249 br[i].clbr = tmp; 250 addmac(br[i].opbr); /* knows pairs are also known cmds */ 251 addmac(br[i].clbr); 252 i++; 253 } 254 break; 255 256 /* -c: add known commands */ 257 case 'c': 258 i = strlen(argv[1]) - 2; 259 if (i % 3 != 0) 260 usage(); 261 for (cp=argv[1]+3; cp[-1]; cp += 3) { 262 if (cp[2] && cp[2] != '.') 263 usage(); 264 strncpy(b1, cp, 2); 265 addmac(b1); 266 } 267 break; 268 269 /* -f: ignore font changes */ 270 case 'f': 271 fflag = 1; 272 break; 273 274 /* -s: ignore size changes */ 275 case 's': 276 sflag = 1; 277 break; 278 default: 279 usage(); 280 } 281 argc--; argv++; 282 } 283 284 nfiles = argc - 1; 285 286 if (nfiles > 0) { 287 for (i=1; i<argc; i++) { 288 cfilename = argv[i]; 289 f = fopen(cfilename, "r"); 290 if (f == NULL) 291 perror(cfilename); 292 else { 293 process(f); 294 fclose(f); 295 } 296 } 297 } else { 298 cfilename = "stdin"; 299 process(stdin); 300 } 301 exit(0); 302} 303 304static void 305usage(void) 306{ 307 (void)fprintf(stderr, 308 "usage: %s [-fs] [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] file\n", 309 getprogname()); 310 exit(1); 311} 312 313static void 314process(FILE *f) 315{ 316 int i, n; 317 char line[256]; /* the current line */ 318 char mac[5]; /* The current macro or nroff command */ 319 int pl; 320 321 stktop = -1; 322 for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 323 if (line[0] == '.') { 324 /* 325 * find and isolate the macro/command name. 326 */ 327 strncpy(mac, line+1, 4); 328 mac[4] = '\0'; 329 if (isspace((unsigned char)mac[0])) { 330 pe(lineno); 331 printf("Empty command\n"); 332 } else if (isspace((unsigned char)mac[1])) { 333 mac[1] = 0; 334 } else if (isspace((unsigned char)mac[2])) { 335 mac[2] = 0; 336 } else if (mac[0] != '\\' || mac[1] != '\"') { 337 pe(lineno); 338 printf("Command too long\n"); 339 } 340 341 /* 342 * Is it a known command? 343 */ 344 checkknown(mac); 345 346 /* 347 * Should we add it? 348 */ 349 if (eq(mac, "de")) 350 addcmd(line); 351 352 chkcmd(mac); 353 } 354 355 /* 356 * At this point we process the line looking 357 * for \s and \f. 358 */ 359 for (i=0; line[i]; i++) 360 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 361 if (!sflag && line[++i]=='s') { 362 pl = line[++i]; 363 if (isdigit((unsigned char)pl)) { 364 n = pl - '0'; 365 pl = ' '; 366 } else 367 n = 0; 368 while (isdigit((unsigned char)line[++i])) 369 n = 10 * n + line[i] - '0'; 370 i--; 371 if (n == 0) { 372 if (stktop >= 0 && 373 stk[stktop].opno == SZ) { 374 stktop--; 375 } else { 376 pe(lineno); 377 printf("unmatched \\s0\n"); 378 } 379 } else { 380 stk[++stktop].opno = SZ; 381 stk[stktop].pl = pl; 382 stk[stktop].parm = n; 383 stk[stktop].lno = lineno; 384 } 385 } else if (!fflag && line[i]=='f') { 386 n = line[++i]; 387 if (n == 'P') { 388 if (stktop >= 0 && 389 stk[stktop].opno == FT) { 390 stktop--; 391 } else { 392 pe(lineno); 393 printf("unmatched \\fP\n"); 394 } 395 } else { 396 stk[++stktop].opno = FT; 397 stk[stktop].pl = 1; 398 stk[stktop].parm = n; 399 stk[stktop].lno = lineno; 400 } 401 } 402 } 403 } 404 /* 405 * We've hit the end and look at all this stuff that hasn't been 406 * matched yet! Complain, complain. 407 */ 408 for (i=stktop; i>=0; i--) { 409 complain(i); 410 } 411} 412 413static void 414complain(int i) 415{ 416 pe(stk[i].lno); 417 printf("Unmatched "); 418 prop(i); 419 printf("\n"); 420} 421 422static void 423prop(int i) 424{ 425 if (stk[i].pl == 0) 426 printf(".%s", br[stk[i].opno].opbr); 427 else switch(stk[i].opno) { 428 case SZ: 429 printf("\\s%c%d", stk[i].pl, stk[i].parm); 430 break; 431 case FT: 432 printf("\\f%c", stk[i].parm); 433 break; 434 default: 435 printf("Bug: stk[%d].opno = %d = .%s, .%s", 436 i, stk[i].opno, br[stk[i].opno].opbr, 437 br[stk[i].opno].clbr); 438 } 439} 440 441static void 442chkcmd(const char *mac) 443{ 444 int i; 445 446 /* 447 * Check to see if it matches top of stack. 448 */ 449 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 450 stktop--; /* OK. Pop & forget */ 451 else { 452 /* No. Maybe it's an opener */ 453 for (i=0; br[i].opbr; i++) { 454 if (eq(mac, br[i].opbr)) { 455 /* Found. Push it. */ 456 stktop++; 457 stk[stktop].opno = i; 458 stk[stktop].pl = 0; 459 stk[stktop].parm = 0; 460 stk[stktop].lno = lineno; 461 break; 462 } 463 /* 464 * Maybe it's an unmatched closer. 465 * NOTE: this depends on the fact 466 * that none of the closers can be 467 * openers too. 468 */ 469 if (eq(mac, br[i].clbr)) { 470 nomatch(mac); 471 break; 472 } 473 } 474 } 475} 476 477static void 478nomatch(const char *mac) 479{ 480 int i, j; 481 482 /* 483 * Look for a match further down on stack 484 * If we find one, it suggests that the stuff in 485 * between is supposed to match itself. 486 */ 487 for (j=stktop; j>=0; j--) 488 if (eq(mac,br[stk[j].opno].clbr)) { 489 /* Found. Make a good diagnostic. */ 490 if (j == stktop-2) { 491 /* 492 * Check for special case \fx..\fR and don't 493 * complain. 494 */ 495 if (stk[j+1].opno==FT && stk[j+1].parm!='R' 496 && stk[j+2].opno==FT && stk[j+2].parm=='R') { 497 stktop = j -1; 498 return; 499 } 500 /* 501 * We have two unmatched frobs. Chances are 502 * they were intended to match, so we mention 503 * them together. 504 */ 505 pe(stk[j+1].lno); 506 prop(j+1); 507 printf(" does not match %d: ", stk[j+2].lno); 508 prop(j+2); 509 printf("\n"); 510 } else for (i=j+1; i <= stktop; i++) { 511 complain(i); 512 } 513 stktop = j-1; 514 return; 515 } 516 /* Didn't find one. Throw this away. */ 517 pe(lineno); 518 printf("Unmatched .%s\n", mac); 519} 520 521/* eq: are two strings equal? */ 522static int 523eq(const char *s1, const char *s2) 524{ 525 return strcmp(s1, s2) == 0; 526} 527 528/* print the first part of an error message, given the line number */ 529static void 530pe(int pelineno) 531{ 532 if (nfiles > 1) 533 printf("%s: ", cfilename); 534 printf("%d: ", pelineno); 535} 536 537static void 538checkknown(const char *mac) 539{ 540 541 if (eq(mac, ".")) 542 return; 543 if (binsrch(mac) >= 0) 544 return; 545 if (mac[0] == '\\' && mac[1] == '"') /* comments */ 546 return; 547 548 pe(lineno); 549 printf("Unknown command: .%s\n", mac); 550} 551 552/* 553 * We have a .de xx line in "line". Add xx to the list of known commands. 554 */ 555static void 556addcmd(char *line) 557{ 558 char *mac; 559 560 /* grab the macro being defined */ 561 mac = line+4; 562 while (isspace((unsigned char)*mac)) 563 mac++; 564 if (*mac == 0) { 565 pe(lineno); 566 printf("illegal define: %s\n", line); 567 return; 568 } 569 mac[2] = 0; 570 if (isspace((unsigned char)mac[1]) || mac[1] == '\\') 571 mac[1] = 0; 572 if (ncmds >= MAXCMDS) { 573 printf("Only %d known commands allowed\n", MAXCMDS); 574 exit(1); 575 } 576 addmac(mac); 577} 578 579/* 580 * Add mac to the list. We should really have some kind of tree 581 * structure here but this is a quick-and-dirty job and I just don't 582 * have time to mess with it. (I wonder if this will come back to haunt 583 * me someday?) Anyway, I claim that .de is fairly rare in user 584 * nroff programs, and the register loop below is pretty fast. 585 */ 586static void 587addmac(const char *mac) 588{ 589 const char **src, **dest, **loc; 590 591 if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 592#ifdef DEBUG 593 printf("binsrch(%s) -> already in table\n", mac); 594#endif /* DEBUG */ 595 return; 596 } 597 /* binsrch sets slot as a side effect */ 598#ifdef DEBUG 599 printf("binsrch(%s) -> %d\n", mac, slot); 600#endif 601 loc = &knowncmds[slot]; 602 src = &knowncmds[ncmds-1]; 603 dest = src+1; 604 while (dest > loc) 605 *dest-- = *src--; 606 if ((*loc = strdup(mac)) == NULL) 607 err(1, "strdup"); 608 ncmds++; 609#ifdef DEBUG 610 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], 611 knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], 612 knowncmds[slot+2], ncmds); 613#endif 614} 615 616/* 617 * Do a binary search in knowncmds for mac. 618 * If found, return the index. If not, return -1. 619 */ 620static int 621binsrch(const char *mac) 622{ 623 const char *p; /* pointer to current cmd in list */ 624 int d; /* difference if any */ 625 int mid; /* mid point in binary search */ 626 int top, bot; /* boundaries of bin search, inclusive */ 627 628 top = ncmds-1; 629 bot = 0; 630 while (top >= bot) { 631 mid = (top+bot)/2; 632 p = knowncmds[mid]; 633 d = p[0] - mac[0]; 634 if (d == 0) 635 d = p[1] - mac[1]; 636 if (d == 0) 637 return mid; 638 if (d < 0) 639 bot = mid + 1; 640 else 641 top = mid - 1; 642 } 643 slot = bot; /* place it would have gone */ 644 return -1; 645} 646