deroff.c revision 1.12
1/* $NetBSD: deroff.c,v 1.12 2019/02/03 03:19:29 mrg Exp $ */ 2 3/* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */ 4 5/*- 6 * Copyright (c) 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33/* 34 * Copyright (C) Caldera International Inc. 2001-2002. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code and documentation must retain the above 41 * copyright notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed or owned by Caldera 48 * International, Inc. 49 * 4. Neither the name of Caldera International, Inc. nor the names of other 50 * contributors may be used to endorse or promote products derived from 51 * this software without specific prior written permission. 52 * 53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 64 * POSSIBILITY OF SUCH DAMAGE. 65 */ 66 67#include <sys/cdefs.h> 68__RCSID("$NetBSD: deroff.c,v 1.12 2019/02/03 03:19:29 mrg Exp $"); 69 70#include <err.h> 71#include <limits.h> 72#include <stddef.h> 73#include <stdio.h> 74#include <stdlib.h> 75#include <string.h> 76#include <unistd.h> 77 78/* 79 * Deroff command -- strip troff, eqn, and Tbl sequences from 80 * a file. Has two flags argument, -w, to cause output one word per line 81 * rather than in the original format. 82 * -mm (or -ms) causes the corresponding macro's to be interpreted 83 * so that just sentences are output 84 * -ml also gets rid of lists. 85 * Deroff follows .so and .nx commands, removes contents of macro 86 * definitions, equations (both .EQ ... .EN and $...$), 87 * Tbl command sequences, and Troff backslash constructions. 88 * 89 * All input is through the Cget macro; 90 * the most recently read character is in c. 91 * 92 * Modified by Robert Henry to process -me and -man macros. 93 */ 94 95#define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 96#define C1get ( (c=getc(infile)) == EOF ? eof() : c) 97 98#ifdef DEBUG 99# define C _C() 100# define C1 _C1() 101#else /* not DEBUG */ 102# define C Cget 103# define C1 C1get 104#endif /* not DEBUG */ 105 106#define SKIP while (C != '\n') 107#define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 108 109#define YES 1 110#define NO 0 111#define MS 0 /* -ms */ 112#define MM 1 /* -mm */ 113#define ME 2 /* -me */ 114#define MA 3 /* -man */ 115 116#ifdef DEBUG 117static char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 118#endif /* DEBUG */ 119 120#define ONE 1 121#define TWO 2 122 123#define NOCHAR -2 124#define SPECIAL 0 125#define APOS 1 126#define PUNCT 2 127#define DIGIT 3 128#define LETTER 4 129 130#define MAXFILES 20 131 132static int iflag; 133static int wordflag; 134static int msflag; /* processing a source written using a mac package */ 135static int mac; /* which package */ 136static int disp; 137static int parag; 138static int inmacro; 139static int intable; 140static int keepblock; /* keep blocks of text; normally false when msflag */ 141 142static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 143 144static char line[LINE_MAX]; 145static char *lp; 146 147static int c; 148static int pc; 149static int ldelim; 150static int rdelim; 151 152static char fname[PATH_MAX]; 153static FILE *files[MAXFILES]; 154static FILE **filesp; 155static FILE *infile; 156 157static int argc; 158static char **argv; 159 160/* 161 * Macro processing 162 * 163 * Macro table definitions 164 */ 165typedef int pacmac; /* compressed macro name */ 166static int argconcat = 0; /* concat arguments together (-me only) */ 167 168#define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 169#define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF), __USE(c1), __USE(c2)) 170 171struct mactab { 172 int condition; 173 pacmac macname; 174 int (*func)(pacmac); 175}; 176 177static const struct mactab troffmactab[]; 178static const struct mactab ppmactab[]; 179static const struct mactab msmactab[]; 180static const struct mactab mmmactab[]; 181static const struct mactab memactab[]; 182static const struct mactab manmactab[]; 183 184/* 185 * Macro table initialization 186 */ 187#define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 188 189/* 190 * Flags for matching conditions other than 191 * the macro name 192 */ 193#define NONE 0 194#define FNEST 1 /* no nested files */ 195#define NOMAC 2 /* no macro */ 196#define MAC 3 /* macro */ 197#define PARAG 4 /* in a paragraph */ 198#define MSF 5 /* msflag is on */ 199#define NBLK 6 /* set if no blocks to be kept */ 200 201/* 202 * Return codes from macro minions, determine where to jump, 203 * how to repeat/reprocess text 204 */ 205#define COMX 1 /* goto comx */ 206#define COM 2 /* goto com */ 207 208static int skeqn(void); 209static int eof(void); 210#ifdef DEBUG 211static int _C1(void); 212static int _C(void); 213#endif 214static int EQ(pacmac); 215static int domacro(pacmac); 216static int PS(pacmac); 217static int skip(pacmac); 218static int intbl(pacmac); 219static int outtbl(pacmac); 220static int so(pacmac); 221static int nx(pacmac); 222static int skiptocom(pacmac); 223static int PP(pacmac); 224static int AU(pacmac); 225static int SH(pacmac); 226static int UX(pacmac); 227static int MMHU(pacmac); 228static int mesnblock(pacmac); 229static int mssnblock(pacmac); 230static int nf(pacmac); 231static int ce(pacmac); 232static int meip(pacmac); 233static int mepp(pacmac); 234static int mesh(pacmac); 235static int mefont(pacmac); 236static int manfont(pacmac); 237static int manpp(pacmac); 238static int macsort(const void *, const void *); 239static int sizetab(const struct mactab *); 240static void getfname(void); 241static void textline(char *, int); 242static void work(void) __dead; 243static void regline(void (*)(char *, int), int); 244static void macro(void); 245static void tbl(void); 246static void stbl(void); 247static void eqn(void); 248static void backsl(void); 249static void sce(void); 250static void refer(int); 251static void inpic(void); 252static void msputmac(char *, int); 253static void msputwords(int); 254static void meputmac(char *, int); 255static void meputwords(int); 256static void noblock(char, char); 257static void defcomline(pacmac); 258static void comline(void); 259static void buildtab(const struct mactab **, int *); 260static FILE *opn(char *); 261static struct mactab *macfill(struct mactab *, const struct mactab *); 262static void usage(void) __dead; 263 264int 265main(int ac, char **av) 266{ 267 int i, ch; 268 int errflg = 0; 269 int kflag = NO; 270 271 iflag = NO; 272 wordflag = NO; 273 msflag = NO; 274 mac = ME; 275 disp = NO; 276 parag = NO; 277 inmacro = NO; 278 intable = NO; 279 ldelim = NOCHAR; 280 rdelim = NOCHAR; 281 keepblock = YES; 282 283 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 284 switch (ch) { 285 case 'i': 286 iflag = YES; 287 break; 288 case 'k': 289 kflag = YES; 290 break; 291 case 'm': 292 msflag = YES; 293 keepblock = NO; 294 switch (optarg[0]) { 295 case 'm': 296 mac = MM; 297 break; 298 case 's': 299 mac = MS; 300 break; 301 case 'e': 302 mac = ME; 303 break; 304 case 'a': 305 mac = MA; 306 break; 307 case 'l': 308 disp = YES; 309 break; 310 default: 311 errflg++; 312 break; 313 } 314 if (errflg == 0 && optarg[1] != '\0') 315 errflg++; 316 break; 317 case 'p': 318 parag = YES; 319 break; 320 case 'w': 321 wordflag = YES; 322 kflag = YES; 323 break; 324 default: 325 errflg++; 326 } 327 } 328 argc = ac - optind; 329 argv = av + optind; 330 331 if (kflag) 332 keepblock = YES; 333 if (errflg) 334 usage(); 335 336#ifdef DEBUG 337 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 338 msflag, mactab[mac], keepblock, disp); 339#endif /* DEBUG */ 340 if (argc == 0) { 341 infile = stdin; 342 } else { 343 infile = opn(argv[0]); 344 --argc; 345 ++argv; 346 } 347 files[0] = infile; 348 filesp = &files[0]; 349 350 for (i = 'a'; i <= 'z' ; ++i) 351 chars[i] = LETTER; 352 for (i = 'A'; i <= 'Z'; ++i) 353 chars[i] = LETTER; 354 for (i = '0'; i <= '9'; ++i) 355 chars[i] = DIGIT; 356 chars['\''] = APOS; 357 chars['&'] = APOS; 358 chars['.'] = PUNCT; 359 chars[','] = PUNCT; 360 chars[';'] = PUNCT; 361 chars['?'] = PUNCT; 362 chars[':'] = PUNCT; 363 work(); 364 return 0; 365} 366 367static int 368skeqn(void) 369{ 370 371 while ((c = getc(infile)) != rdelim) { 372 if (c == EOF) 373 c = eof(); 374 else if (c == '"') { 375 while ((c = getc(infile)) != '"') { 376 if (c == EOF || 377 (c == '\\' && (c = getc(infile)) == EOF)) 378 c = eof(); 379 } 380 } 381 } 382 if (msflag) 383 return c == 'x'; 384 return c == ' '; 385} 386 387static FILE * 388opn(char *p) 389{ 390 FILE *fd; 391 392 if ((fd = fopen(p, "r")) == NULL) 393 err(1, "fopen %s", p); 394 395 return fd; 396} 397 398static int 399eof(void) 400{ 401 402 if (infile != stdin) 403 fclose(infile); 404 if (filesp > files) 405 infile = *--filesp; 406 else if (argc > 0) { 407 infile = opn(argv[0]); 408 --argc; 409 ++argv; 410 } else 411 exit(0); 412 return C; 413} 414 415static void 416getfname(void) 417{ 418 char *p; 419 struct chain { 420 struct chain *nextp; 421 char *datap; 422 } *q; 423 static struct chain *namechain= NULL; 424 425 while (C == ' ') 426 ; /* nothing */ 427 428 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) && 429 (*p = c) != '\n' && 430 c != ' ' && c != '\t' && c != '\\'; ++p) 431 C; 432 *p = '\0'; 433 while (c != '\n') 434 C; 435 436 /* see if this name has already been used */ 437 for (q = namechain ; q; q = q->nextp) 438 if (strcmp(fname, q->datap) == 0) { 439 fname[0] = '\0'; 440 return; 441 } 442 443 q = (struct chain *) malloc(sizeof(struct chain)); 444 if (q == NULL) 445 err(1, NULL); 446 q->nextp = namechain; 447 q->datap = strdup(fname); 448 if (q->datap == NULL) 449 err(1, NULL); 450 namechain = q; 451} 452 453/*ARGSUSED*/ 454static void 455textline(char *str, int constant) 456{ 457 458 if (wordflag) { 459 msputwords(0); 460 return; 461 } 462 puts(str); 463} 464 465static void 466work(void) 467{ 468 469 for (;;) { 470 C; 471#ifdef FULLDEBUG 472 printf("Starting work with `%c'\n", c); 473#endif /* FULLDEBUG */ 474 if (c == '.' || c == '\'') 475 comline(); 476 else 477 regline(textline, TWO); 478 } 479} 480 481static void 482regline(void (*pfunc)(char *, int), int constant) 483{ 484 485 line[0] = c; 486 lp = line; 487 while (lp - line < (ptrdiff_t)sizeof(line)) { 488 if (c == '\\') { 489 *lp = ' '; 490 backsl(); 491 } 492 if (c == '\n') 493 break; 494 if (intable && c == 'T') { 495 *++lp = C; 496 if (c == '{' || c == '}') { 497 lp[-1] = ' '; 498 *lp = C; 499 } 500 } else { 501 *++lp = C; 502 } 503 } 504 *lp = '\0'; 505 506 if (line[0] != '\0') 507 (*pfunc)(line, constant); 508} 509 510static void 511macro(void) 512{ 513 514 if (msflag) { 515 do { 516 SKIP; 517 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 518 if (c != '\n') 519 SKIP; 520 return; 521 } 522 SKIP; 523 inmacro = YES; 524} 525 526static void 527tbl(void) 528{ 529 530 while (C != '.') 531 ; /* nothing */ 532 SKIP; 533 intable = YES; 534} 535 536static void 537stbl(void) 538{ 539 540 while (C != '.') 541 ; /* nothing */ 542 SKIP_TO_COM; 543 if (c != 'T' || C != 'E') { 544 SKIP; 545 pc = c; 546 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 547 pc = c; 548 } 549} 550 551static void 552eqn(void) 553{ 554 int c1, c2; 555 int dflg; 556 char last; 557 558 last=0; 559 dflg = 1; 560 SKIP; 561 562 for (;;) { 563 if (C1 == '.' || c == '\'') { 564 while (C1 == ' ' || c == '\t') 565 ; 566 if (c == 'E' && C1 == 'N') { 567 SKIP; 568 if (msflag && dflg) { 569 putchar('x'); 570 putchar(' '); 571 if (last) { 572 putchar(last); 573 putchar('\n'); 574 } 575 } 576 return; 577 } 578 } else if (c == 'd') { 579 /* look for delim */ 580 if (C1 == 'e' && C1 == 'l') 581 if (C1 == 'i' && C1 == 'm') { 582 while (C1 == ' ') 583 ; /* nothing */ 584 585 if ((c1 = c) == '\n' || 586 (c2 = C1) == '\n' || 587 (c1 == 'o' && c2 == 'f' && C1=='f')) { 588 ldelim = NOCHAR; 589 rdelim = NOCHAR; 590 } else { 591 ldelim = c1; 592 rdelim = c2; 593 } 594 } 595 dflg = 0; 596 } 597 598 if (c != '\n') 599 while (C1 != '\n') { 600 if (chars[c] == PUNCT) 601 last = c; 602 else if (c != ' ') 603 last = 0; 604 } 605 } 606} 607 608/* skip over a complete backslash construction */ 609static void 610backsl(void) 611{ 612 int bdelim; 613 614sw: 615 switch (C) { 616 case '"': 617 SKIP; 618 return; 619 620 case 's': 621 if (C == '\\') 622 backsl(); 623 else { 624 while (C >= '0' && c <= '9') 625 ; /* nothing */ 626 ungetc(c, infile); 627 c = '0'; 628 } 629 --lp; 630 return; 631 632 case 'f': 633 case 'n': 634 case '*': 635 if (C != '(') 636 return; 637 638 /* FALLTHROUGH */ 639 case '(': 640 if (msflag) { 641 if (C == 'e') { 642 if (C == 'm') { 643 *lp = '-'; 644 return; 645 } 646 } 647 else if (c != '\n') 648 C; 649 return; 650 } 651 if (C != '\n') 652 C; 653 return; 654 655 case '$': 656 C; /* discard argument number */ 657 return; 658 659 case 'b': 660 case 'x': 661 case 'v': 662 case 'h': 663 case 'w': 664 case 'o': 665 case 'l': 666 case 'L': 667 if ((bdelim = C) == '\n') 668 return; 669 while (C != '\n' && c != bdelim) 670 if (c == '\\') 671 backsl(); 672 return; 673 674 case '\\': 675 if (inmacro) 676 goto sw; 677 678 default: 679 return; 680 } 681} 682 683static void 684sce(void) 685{ 686 char *ap; 687 int n, i; 688 char a[10]; 689 690 for (ap = a; C != '\n'; ap++) { 691 *ap = c; 692 if (ap == &a[9]) { 693 SKIP; 694 ap = a; 695 break; 696 } 697 } 698 if (ap != a) 699 n = atoi(a); 700 else 701 n = 1; 702 for (i = 0; i < n;) { 703 if (C == '.') { 704 if (C == 'c') { 705 if (C == 'e') { 706 while (C == ' ') 707 ; /* nothing */ 708 if (c == '0') { 709 SKIP; 710 break; 711 } else 712 SKIP; 713 } 714 else 715 SKIP; 716 } else if (c == 'P' || C == 'P') { 717 if (c != '\n') 718 SKIP; 719 break; 720 } else if (c != '\n') 721 SKIP; 722 } else { 723 SKIP; 724 i++; 725 } 726 } 727} 728 729static void 730refer(int c1) 731{ 732 int c2; 733 734 if (c1 != '\n') 735 SKIP; 736 737 for (c2 = -1;;) { 738 if (C != '.') 739 SKIP; 740 else { 741 if (C != ']') 742 SKIP; 743 else { 744 while (C != '\n') 745 c2 = c; 746 if (c2 != -1 && chars[c2] == PUNCT) 747 putchar(c2); 748 return; 749 } 750 } 751 } 752} 753 754static void 755inpic(void) 756{ 757 int c1; 758 char *p1; 759 760 SKIP; 761 p1 = line; 762 c = '\n'; 763 for (;;) { 764 c1 = c; 765 if (C == '.' && c1 == '\n') { 766 if (C != 'P') { 767 if (c == '\n') 768 continue; 769 else { 770 SKIP; 771 c = '\n'; 772 continue; 773 } 774 } 775 if (C != 'E') { 776 if (c == '\n') 777 continue; 778 else { 779 SKIP; 780 c = '\n'; 781 continue; 782 } 783 } 784 SKIP; 785 return; 786 } 787 else if (c == '\"') { 788 while (C != '\"') { 789 if (c == '\\') { 790 if (C == '\"') 791 continue; 792 ungetc(c, infile); 793 backsl(); 794 } else 795 *p1++ = c; 796 } 797 *p1++ = ' '; 798 } 799 else if (c == '\n' && p1 != line) { 800 *p1 = '\0'; 801 if (wordflag) 802 msputwords(NO); 803 else { 804 puts(line); 805 putchar('\n'); 806 } 807 p1 = line; 808 } 809 } 810} 811 812#ifdef DEBUG 813static int 814_C1(void) 815{ 816 817 return C1get; 818} 819 820static int 821_C(void) 822{ 823 824 return Cget; 825} 826#endif /* DEBUG */ 827 828/* 829 * Put out a macro line, using ms and mm conventions. 830 */ 831static void 832msputmac(char *s, int constant) 833{ 834 char *t; 835 int found; 836 int last; 837 838 last = 0; 839 found = 0; 840 if (wordflag) { 841 msputwords(YES); 842 return; 843 } 844 while (*s) { 845 while (*s == ' ' || *s == '\t') 846 putchar(*s++); 847 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 848 ; /* nothing */ 849 if (*s == '\"') 850 s++; 851 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 852 chars[(unsigned char)s[1]] == LETTER) { 853 while (s < t) 854 if (*s == '\"') 855 s++; 856 else 857 putchar(*s++); 858 last = *(t-1); 859 found++; 860 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 861 s[1] == '\0') { 862 putchar(*s++); 863 } else { 864 last = *(t - 1); 865 s = t; 866 } 867 } 868 putchar('\n'); 869 if (msflag && chars[last] == PUNCT) { 870 putchar(last); 871 putchar('\n'); 872 } 873} 874 875/* 876 * put out words (for the -w option) with ms and mm conventions 877 */ 878static void 879msputwords(int macline) 880{ 881 char *p, *p1; 882 int i, nlet; 883 884 for (p1 = line;;) { 885 /* 886 * skip initial specials ampersands and apostrophes 887 */ 888 while (chars[(unsigned char)*p1] < DIGIT) 889 if (*p1++ == '\0') 890 return; 891 nlet = 0; 892 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 893 if (i == LETTER) 894 ++nlet; 895 896 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 897 /* 898 * delete trailing ampersands and apostrophes 899 */ 900 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 901 i == APOS ) 902 --p; 903 while (p1 < p) 904 putchar(*p1++); 905 putchar('\n'); 906 } else { 907 p1 = p; 908 } 909 } 910} 911 912/* 913 * put out a macro using the me conventions 914 */ 915#define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 916#define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; } 917 918static void 919meputmac(char *cp, int constant) 920{ 921 char *np; 922 int found; 923 int argno; 924 int last; 925 int inquote; 926 927 last = 0; 928 found = 0; 929 if (wordflag) { 930 meputwords(YES); 931 return; 932 } 933 for (argno = 0; *cp; argno++) { 934 SKIPBLANK(cp); 935 inquote = (*cp == '"'); 936 if (inquote) 937 cp++; 938 for (np = cp; *np; np++) { 939 switch (*np) { 940 case '\n': 941 case '\0': 942 break; 943 944 case '\t': 945 case ' ': 946 if (inquote) 947 continue; 948 else 949 goto endarg; 950 951 case '"': 952 if (inquote && np[1] == '"') { 953 memmove(np, np + 1, strlen(np)); 954 np++; 955 continue; 956 } else { 957 *np = ' '; /* bye bye " */ 958 goto endarg; 959 } 960 961 default: 962 continue; 963 } 964 } 965 endarg: ; 966 /* 967 * cp points at the first char in the arg 968 * np points one beyond the last char in the arg 969 */ 970 if ((argconcat == 0) || (argconcat != argno)) 971 putchar(' '); 972#ifdef FULLDEBUG 973 { 974 char *p; 975 printf("[%d,%d: ", argno, np - cp); 976 for (p = cp; p < np; p++) { 977 putchar(*p); 978 } 979 printf("]"); 980 } 981#endif /* FULLDEBUG */ 982 /* 983 * Determine if the argument merits being printed 984 * 985 * constant is the cut off point below which something 986 * is not a word. 987 */ 988 if (((np - cp) > constant) && 989 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 990 for (; cp < np; cp++) 991 putchar(*cp); 992 last = np[-1]; 993 found++; 994 } else if (found && (np - cp == 1) && 995 chars[(unsigned char)*cp] == PUNCT) { 996 putchar(*cp); 997 } else { 998 last = np[-1]; 999 } 1000 cp = np; 1001 } 1002 if (msflag && chars[last] == PUNCT) 1003 putchar(last); 1004 putchar('\n'); 1005} 1006 1007/* 1008 * put out words (for the -w option) with ms and mm conventions 1009 */ 1010static void 1011meputwords(int macline) 1012{ 1013 1014 msputwords(macline); 1015} 1016 1017/* 1018 * 1019 * Skip over a nested set of macros 1020 * 1021 * Possible arguments to noblock are: 1022 * 1023 * fi end of unfilled text 1024 * PE pic ending 1025 * DE display ending 1026 * 1027 * for ms and mm only: 1028 * KE keep ending 1029 * 1030 * NE undocumented match to NS (for mm?) 1031 * LE mm only: matches RL or *L (for lists) 1032 * 1033 * for me: 1034 * ([lqbzcdf] 1035 */ 1036static void 1037noblock(char a1, char a2) 1038{ 1039 int c1,c2; 1040 int eqnf; 1041 int lct; 1042 1043 lct = 0; 1044 eqnf = 1; 1045 SKIP; 1046 for (;;) { 1047 while (C != '.') 1048 if (c == '\n') 1049 continue; 1050 else 1051 SKIP; 1052 if ((c1 = C) == '\n') 1053 continue; 1054 if ((c2 = C) == '\n') 1055 continue; 1056 if (c1 == a1 && c2 == a2) { 1057 SKIP; 1058 if (lct != 0) { 1059 lct--; 1060 continue; 1061 } 1062 if (eqnf) 1063 putchar('.'); 1064 putchar('\n'); 1065 return; 1066 } else if (a1 == 'L' && c2 == 'L') { 1067 lct++; 1068 SKIP; 1069 } 1070 /* 1071 * equations (EQ) nested within a display 1072 */ 1073 else if (c1 == 'E' && c2 == 'Q') { 1074 if ((mac == ME && a1 == ')') 1075 || (mac != ME && a1 == 'D')) { 1076 eqn(); 1077 eqnf=0; 1078 } 1079 } 1080 /* 1081 * turning on filling is done by the paragraphing 1082 * macros 1083 */ 1084 else if (a1 == 'f') { /* .fi */ 1085 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1086 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1087 SKIP; 1088 return; 1089 } 1090 } else { 1091 SKIP; 1092 } 1093 } 1094} 1095 1096static int 1097/*ARGSUSED*/ 1098EQ(pacmac unused) 1099{ 1100 1101 eqn(); 1102 return 0; 1103} 1104 1105static int 1106/*ARGSUSED*/ 1107domacro(pacmac unused) 1108{ 1109 1110 macro(); 1111 return 0; 1112} 1113 1114static int 1115/*ARGSUSED*/ 1116PS(pacmac unused) 1117{ 1118 1119 for (C; c == ' ' || c == '\t'; C) 1120 ; /* nothing */ 1121 1122 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1123 SKIP; 1124 return 0; 1125 } 1126 if (!msflag) 1127 inpic(); 1128 else 1129 noblock('P', 'E'); 1130 return 0; 1131} 1132 1133static int 1134/*ARGSUSED*/ 1135skip(pacmac unused) 1136{ 1137 1138 SKIP; 1139 return 0; 1140} 1141 1142static int 1143/*ARGSUSED*/ 1144intbl(pacmac unused) 1145{ 1146 1147 if (msflag) 1148 stbl(); 1149 else 1150 tbl(); 1151 return 0; 1152} 1153 1154static int 1155/*ARGSUSED*/ 1156outtbl(pacmac unused) 1157{ 1158 1159 intable = NO; 1160 return 0; 1161} 1162 1163static int 1164/*ARGSUSED*/ 1165so(pacmac unused) 1166{ 1167 1168 if (!iflag) { 1169 getfname(); 1170 if (fname[0]) { 1171 if (++filesp - &files[0] > MAXFILES) 1172 err(1, "too many nested files (max %d)", 1173 MAXFILES); 1174 infile = *filesp = opn(fname); 1175 } 1176 } 1177 return 0; 1178} 1179 1180static int 1181/*ARGSUSED*/ 1182nx(pacmac unused) 1183{ 1184 1185 if (!iflag) { 1186 getfname(); 1187 if (fname[0] == '\0') 1188 exit(0); 1189 if (infile != stdin) 1190 fclose(infile); 1191 infile = *filesp = opn(fname); 1192 } 1193 return 0; 1194} 1195 1196static int 1197/*ARGSUSED*/ 1198skiptocom(pacmac unused) 1199{ 1200 1201 SKIP_TO_COM; 1202 return COMX; 1203} 1204 1205static int 1206PP(pacmac c12) 1207{ 1208 int c1, c2; 1209 1210 frommac(c12, c1, c2); 1211 printf(".%c%c", c1, c2); 1212 while (C != '\n') 1213 putchar(c); 1214 putchar('\n'); 1215 return 0; 1216} 1217 1218static int 1219/*ARGSUSED*/ 1220AU(pacmac unused) 1221{ 1222 1223 if (mac == MM) 1224 return 0; 1225 SKIP_TO_COM; 1226 return COMX; 1227} 1228 1229static int 1230SH(pacmac c12) 1231{ 1232 int c1, c2; 1233 1234 frommac(c12, c1, c2); 1235 1236 if (parag) { 1237 printf(".%c%c", c1, c2); 1238 while (C != '\n') 1239 putchar(c); 1240 putchar(c); 1241 putchar('!'); 1242 for (;;) { 1243 while (C != '\n') 1244 putchar(c); 1245 putchar('\n'); 1246 if (C == '.') 1247 return COM; 1248 putchar('!'); 1249 putchar(c); 1250 } 1251 /*NOTREACHED*/ 1252 } else { 1253 SKIP_TO_COM; 1254 return COMX; 1255 } 1256} 1257 1258static int 1259/*ARGSUSED*/ 1260UX(pacmac unused) 1261{ 1262 1263 if (wordflag) 1264 printf("UNIX\n"); 1265 else 1266 printf("UNIX "); 1267 return 0; 1268} 1269 1270static int 1271MMHU(pacmac c12) 1272{ 1273 int c1, c2; 1274 1275 frommac(c12, c1, c2); 1276 if (parag) { 1277 printf(".%c%c", c1, c2); 1278 while (C != '\n') 1279 putchar(c); 1280 putchar('\n'); 1281 } else { 1282 SKIP; 1283 } 1284 return 0; 1285} 1286 1287static int 1288mesnblock(pacmac c12) 1289{ 1290 int c1, c2; 1291 1292 frommac(c12, c1, c2); 1293 noblock(')', c2); 1294 return 0; 1295} 1296 1297static int 1298mssnblock(pacmac c12) 1299{ 1300 int c1, c2; 1301 1302 frommac(c12, c1, c2); 1303 noblock(c1, 'E'); 1304 return 0; 1305} 1306 1307static int 1308/*ARGUSED*/ 1309nf(pacmac unused) 1310{ 1311 1312 noblock('f', 'i'); 1313 return 0; 1314} 1315 1316static int 1317/*ARGUSED*/ 1318ce(pacmac unused) 1319{ 1320 1321 sce(); 1322 return 0; 1323} 1324 1325static int 1326meip(pacmac c12) 1327{ 1328 1329 if (parag) 1330 mepp(c12); 1331 else if (wordflag) /* save the tag */ 1332 regline(meputmac, ONE); 1333 else 1334 SKIP; 1335 return 0; 1336} 1337 1338/* 1339 * only called for -me .pp or .sh, when parag is on 1340 */ 1341static int 1342mepp(pacmac c12) 1343{ 1344 1345 PP(c12); /* eats the line */ 1346 return 0; 1347} 1348 1349/* 1350 * Start of a section heading; output the section name if doing words 1351 */ 1352static int 1353mesh(pacmac c12) 1354{ 1355 1356 if (parag) 1357 mepp(c12); 1358 else if (wordflag) 1359 defcomline(c12); 1360 else 1361 SKIP; 1362 return 0; 1363} 1364 1365/* 1366 * process a font setting 1367 */ 1368static int 1369mefont(pacmac c12) 1370{ 1371 1372 argconcat = 1; 1373 defcomline(c12); 1374 argconcat = 0; 1375 return 0; 1376} 1377 1378static int 1379manfont(pacmac c12) 1380{ 1381 1382 return mefont(c12); 1383} 1384 1385static int 1386manpp(pacmac c12) 1387{ 1388 1389 return mepp(c12); 1390} 1391 1392static void 1393defcomline(pacmac c12) 1394{ 1395 int c1, c2; 1396 1397 frommac(c12, c1, c2); 1398 if (msflag && mac == MM && c2 == 'L') { 1399 if (disp || c1 == 'R') { 1400 noblock('L', 'E'); 1401 } else { 1402 SKIP; 1403 putchar('.'); 1404 } 1405 } 1406 else if (c1 == '.' && c2 == '.') { 1407 if (msflag) { 1408 SKIP; 1409 return; 1410 } 1411 while (C == '.') 1412 /*VOID*/; 1413 } 1414 ++inmacro; 1415 /* 1416 * Process the arguments to the macro 1417 */ 1418 switch (mac) { 1419 default: 1420 case MM: 1421 case MS: 1422 if (c1 <= 'Z' && msflag) 1423 regline(msputmac, ONE); 1424 else 1425 regline(msputmac, TWO); 1426 break; 1427 case ME: 1428 regline(meputmac, ONE); 1429 break; 1430 } 1431 --inmacro; 1432} 1433 1434static void 1435comline(void) 1436{ 1437 int c1; 1438 int c2; 1439 pacmac c12; 1440 int mid; 1441 int lb, ub; 1442 int hit; 1443 static int tabsize = 0; 1444 static const struct mactab *mactab = NULL; 1445 const struct mactab *mp; 1446 1447 if (mactab == 0) 1448 buildtab(&mactab, &tabsize); 1449com: 1450 while (C == ' ' || c == '\t') 1451 ; 1452comx: 1453 if ((c1 = c) == '\n') 1454 return; 1455 c2 = C; 1456 if (c1 == '.' && c2 != '.') 1457 inmacro = NO; 1458 if (msflag && c1 == '[') { 1459 refer(c2); 1460 return; 1461 } 1462 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1463 printf(".P\n"); 1464 return; 1465 } 1466 if (c2 == '\n') 1467 return; 1468 /* 1469 * Single letter macro 1470 */ 1471 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1472 c2 = ' '; 1473 c12 = tomac(c1, c2); 1474 /* 1475 * binary search through the table of macros 1476 */ 1477 lb = 0; 1478 ub = tabsize - 1; 1479 while (lb <= ub) { 1480 mid = (ub + lb) / 2; 1481 mp = &mactab[mid]; 1482 if (mp->macname < c12) 1483 lb = mid + 1; 1484 else if (mp->macname > c12) 1485 ub = mid - 1; 1486 else { 1487 hit = 1; 1488#ifdef FULLDEBUG 1489 printf("preliminary hit macro %c%c ", c1, c2); 1490#endif /* FULLDEBUG */ 1491 switch (mp->condition) { 1492 case NONE: 1493 hit = YES; 1494 break; 1495 case FNEST: 1496 hit = (filesp == files); 1497 break; 1498 case NOMAC: 1499 hit = !inmacro; 1500 break; 1501 case MAC: 1502 hit = inmacro; 1503 break; 1504 case PARAG: 1505 hit = parag; 1506 break; 1507 case NBLK: 1508 hit = !keepblock; 1509 break; 1510 default: 1511 hit = 0; 1512 } 1513 1514 if (hit) { 1515#ifdef FULLDEBUG 1516 printf("MATCH\n"); 1517#endif /* FULLDEBUG */ 1518 switch ((*(mp->func))(c12)) { 1519 default: 1520 return; 1521 case COMX: 1522 goto comx; 1523 case COM: 1524 goto com; 1525 } 1526 } 1527#ifdef FULLDEBUG 1528 printf("FAIL\n"); 1529#endif /* FULLDEBUG */ 1530 break; 1531 } 1532 } 1533 defcomline(c12); 1534} 1535 1536static int 1537macsort(const void *p1, const void *p2) 1538{ 1539 const struct mactab *t1 = p1; 1540 const struct mactab *t2 = p2; 1541 1542 return t1->macname - t2->macname; 1543} 1544 1545static int 1546sizetab(const struct mactab *mp) 1547{ 1548 int i; 1549 1550 i = 0; 1551 if (mp) { 1552 for (; mp->macname; mp++, i++) 1553 /*VOID*/ ; 1554 } 1555 return i; 1556} 1557 1558static struct mactab * 1559macfill(struct mactab *dst, const struct mactab *src) 1560{ 1561 1562 if (src) { 1563 while (src->macname) 1564 *dst++ = *src++; 1565 } 1566 return dst; 1567} 1568 1569static void 1570usage(void) 1571{ 1572 extern char *__progname; 1573 1574 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname); 1575 exit(1); 1576} 1577 1578static void 1579buildtab(const struct mactab **r_back, int *r_size) 1580{ 1581 size_t size; 1582 const struct mactab *p1, *p2; 1583 struct mactab *back, *p; 1584 1585 size = sizetab(troffmactab) + sizetab(ppmactab); 1586 p1 = p2 = NULL; 1587 if (msflag) { 1588 switch (mac) { 1589 case ME: 1590 p1 = memactab; 1591 break; 1592 case MM: 1593 p1 = msmactab; 1594 p2 = mmmactab; 1595 break; 1596 case MS: 1597 p1 = msmactab; 1598 break; 1599 case MA: 1600 p1 = manmactab; 1601 break; 1602 default: 1603 break; 1604 } 1605 } 1606 size += sizetab(p1); 1607 size += sizetab(p2); 1608 back = calloc(size + 2, sizeof(struct mactab)); 1609 if (back == NULL) 1610 err(1, NULL); 1611 1612 p = macfill(back, troffmactab); 1613 p = macfill(p, ppmactab); 1614 p = macfill(p, p1); 1615 p = macfill(p, p2); 1616 1617 qsort(back, size, sizeof(struct mactab), macsort); 1618 *r_size = size; 1619 *r_back = back; 1620} 1621 1622/* 1623 * troff commands 1624 */ 1625static const struct mactab troffmactab[] = { 1626 M(NONE, '\\','"', skip), /* comment */ 1627 M(NOMAC, 'd','e', domacro), /* define */ 1628 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1629 M(NOMAC, 'a','m', domacro), /* append macro */ 1630 M(NBLK, 'n','f', nf), /* filled */ 1631 M(NBLK, 'c','e', ce), /* centered */ 1632 1633 M(NONE, 's','o', so), /* source a file */ 1634 M(NONE, 'n','x', nx), /* go to next file */ 1635 1636 M(NONE, 't','m', skip), /* print string on tty */ 1637 M(NONE, 'h','w', skip), /* exception hyphen words */ 1638 M(NONE, 0,0, 0) 1639}; 1640 1641/* 1642 * Preprocessor output 1643 */ 1644static const struct mactab ppmactab[] = { 1645 M(FNEST, 'E','Q', EQ), /* equation starting */ 1646 M(FNEST, 'T','S', intbl), /* table starting */ 1647 M(FNEST, 'T','C', intbl), /* alternative table? */ 1648 M(FNEST, 'T','&', intbl), /* table reformatting */ 1649 M(NONE, 'T','E', outtbl),/* table ending */ 1650 M(NONE, 'P','S', PS), /* picture starting */ 1651 M(NONE, 0,0, 0) 1652}; 1653 1654/* 1655 * Particular to ms and mm 1656 */ 1657static const struct mactab msmactab[] = { 1658 M(NONE, 'T','L', skiptocom), /* title follows */ 1659 M(NONE, 'F','S', skiptocom), /* start footnote */ 1660 M(NONE, 'O','K', skiptocom), /* Other kws */ 1661 1662 M(NONE, 'N','R', skip), /* undocumented */ 1663 M(NONE, 'N','D', skip), /* use supplied date */ 1664 1665 M(PARAG, 'P','P', PP), /* begin parag */ 1666 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1667 M(PARAG, 'L','P', PP), /* left blocked parag */ 1668 1669 M(NONE, 'A','U', AU), /* author */ 1670 M(NONE, 'A','I', AU), /* authors institution */ 1671 1672 M(NONE, 'S','H', SH), /* section heading */ 1673 M(NONE, 'S','N', SH), /* undocumented */ 1674 M(NONE, 'U','X', UX), /* unix */ 1675 1676 M(NBLK, 'D','S', mssnblock), /* start display text */ 1677 M(NBLK, 'K','S', mssnblock), /* start keep */ 1678 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1679 M(NONE, 0,0, 0) 1680}; 1681 1682static const struct mactab mmmactab[] = { 1683 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1684 M(NONE, 'H','U', MMHU), /* -mm ? */ 1685 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1686 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1687 M(NONE, 0,0, 0) 1688}; 1689 1690static const struct mactab memactab[] = { 1691 M(PARAG, 'p','p', mepp), 1692 M(PARAG, 'l','p', mepp), 1693 M(PARAG, 'n','p', mepp), 1694 M(NONE, 'i','p', meip), 1695 1696 M(NONE, 's','h', mesh), 1697 M(NONE, 'u','h', mesh), 1698 1699 M(NBLK, '(','l', mesnblock), 1700 M(NBLK, '(','q', mesnblock), 1701 M(NBLK, '(','b', mesnblock), 1702 M(NBLK, '(','z', mesnblock), 1703 M(NBLK, '(','c', mesnblock), 1704 1705 M(NBLK, '(','d', mesnblock), 1706 M(NBLK, '(','f', mesnblock), 1707 M(NBLK, '(','x', mesnblock), 1708 1709 M(NONE, 'r',' ', mefont), 1710 M(NONE, 'i',' ', mefont), 1711 M(NONE, 'b',' ', mefont), 1712 M(NONE, 'u',' ', mefont), 1713 M(NONE, 'q',' ', mefont), 1714 M(NONE, 'r','b', mefont), 1715 M(NONE, 'b','i', mefont), 1716 M(NONE, 'b','x', mefont), 1717 M(NONE, 0,0, 0) 1718}; 1719 1720static const struct mactab manmactab[] = { 1721 M(PARAG, 'B','I', manfont), 1722 M(PARAG, 'B','R', manfont), 1723 M(PARAG, 'I','B', manfont), 1724 M(PARAG, 'I','R', manfont), 1725 M(PARAG, 'R','B', manfont), 1726 M(PARAG, 'R','I', manfont), 1727 1728 M(PARAG, 'P','P', manpp), 1729 M(PARAG, 'L','P', manpp), 1730 M(PARAG, 'H','P', manpp), 1731 M(NONE, 0,0, 0) 1732}; 1733