deroff.c revision 1.8
1/* $NetBSD: deroff.c,v 1.8 2011/05/24 12:19:11 joerg Exp $ */ 2 3/* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */ 4 5/*- 6 * Copyright (c) 1988, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33/* 34 * Copyright (C) Caldera International Inc. 2001-2002. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code and documentation must retain the above 41 * copyright notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed or owned by Caldera 48 * International, Inc. 49 * 4. Neither the name of Caldera International, Inc. nor the names of other 50 * contributors may be used to endorse or promote products derived from 51 * this software without specific prior written permission. 52 * 53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 64 * POSSIBILITY OF SUCH DAMAGE. 65 */ 66 67#ifndef lint 68static const char copyright[] = 69"@(#) Copyright (c) 1988, 1993\n\ 70 The Regents of the University of California. All rights reserved.\n"; 71#endif /* not lint */ 72 73#ifndef lint 74#if 0 75static const char sccsid[] = "@(#)deroff.c 8.1 (Berkeley) 6/6/93"; 76#else 77static const char rcsid[] = "$NetBSD: deroff.c,v 1.8 2011/05/24 12:19:11 joerg Exp $"; 78#endif 79#endif /* not lint */ 80 81#include <sys/cdefs.h> 82#include <err.h> 83#include <limits.h> 84#include <stddef.h> 85#include <stdio.h> 86#include <stdlib.h> 87#include <string.h> 88#include <unistd.h> 89 90/* 91 * Deroff command -- strip troff, eqn, and Tbl sequences from 92 * a file. Has two flags argument, -w, to cause output one word per line 93 * rather than in the original format. 94 * -mm (or -ms) causes the corresponding macro's to be interpreted 95 * so that just sentences are output 96 * -ml also gets rid of lists. 97 * Deroff follows .so and .nx commands, removes contents of macro 98 * definitions, equations (both .EQ ... .EN and $...$), 99 * Tbl command sequences, and Troff backslash constructions. 100 * 101 * All input is through the Cget macro; 102 * the most recently read character is in c. 103 * 104 * Modified by Robert Henry to process -me and -man macros. 105 */ 106 107#define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) ) 108#define C1get ( (c=getc(infile)) == EOF ? eof() : c) 109 110#ifdef DEBUG 111# define C _C() 112# define C1 _C1() 113#else /* not DEBUG */ 114# define C Cget 115# define C1 C1get 116#endif /* not DEBUG */ 117 118#define SKIP while (C != '\n') 119#define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c 120 121#define YES 1 122#define NO 0 123#define MS 0 /* -ms */ 124#define MM 1 /* -mm */ 125#define ME 2 /* -me */ 126#define MA 3 /* -man */ 127 128#ifdef DEBUG 129char *mactab[] = { "-ms", "-mm", "-me", "-ma" }; 130#endif /* DEBUG */ 131 132#define ONE 1 133#define TWO 2 134 135#define NOCHAR -2 136#define SPECIAL 0 137#define APOS 1 138#define PUNCT 2 139#define DIGIT 3 140#define LETTER 4 141 142#define MAXFILES 20 143 144static int iflag; 145static int wordflag; 146static int msflag; /* processing a source written using a mac package */ 147static int mac; /* which package */ 148static int disp; 149static int parag; 150static int inmacro; 151static int intable; 152static int keepblock; /* keep blocks of text; normally false when msflag */ 153 154static char chars[128]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */ 155 156static char line[LINE_MAX]; 157static char *lp; 158 159static int c; 160static int pc; 161static int ldelim; 162static int rdelim; 163 164static char fname[PATH_MAX]; 165static FILE *files[MAXFILES]; 166static FILE **filesp; 167static FILE *infile; 168 169static int argc; 170static char **argv; 171 172/* 173 * Macro processing 174 * 175 * Macro table definitions 176 */ 177typedef int pacmac; /* compressed macro name */ 178static int argconcat = 0; /* concat arguments together (-me only) */ 179 180#define tomac(c1, c2) ((((c1) & 0xFF) << 8) | ((c2) & 0xFF)) 181#define frommac(src, c1, c2) (((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF)) 182 183struct mactab { 184 int condition; 185 pacmac macname; 186 int (*func)(pacmac); 187}; 188 189static const struct mactab troffmactab[]; 190static const struct mactab ppmactab[]; 191static const struct mactab msmactab[]; 192static const struct mactab mmmactab[]; 193static const struct mactab memactab[]; 194static const struct mactab manmactab[]; 195 196/* 197 * Macro table initialization 198 */ 199#define M(cond, c1, c2, func) {cond, tomac(c1, c2), func} 200 201/* 202 * Flags for matching conditions other than 203 * the macro name 204 */ 205#define NONE 0 206#define FNEST 1 /* no nested files */ 207#define NOMAC 2 /* no macro */ 208#define MAC 3 /* macro */ 209#define PARAG 4 /* in a paragraph */ 210#define MSF 5 /* msflag is on */ 211#define NBLK 6 /* set if no blocks to be kept */ 212 213/* 214 * Return codes from macro minions, determine where to jump, 215 * how to repeat/reprocess text 216 */ 217#define COMX 1 /* goto comx */ 218#define COM 2 /* goto com */ 219 220static int skeqn(void); 221static int eof(void); 222#ifdef DEBUG 223static int _C1(void); 224static int _C(void); 225#endif 226static int EQ(pacmac); 227static int domacro(pacmac); 228static int PS(pacmac); 229static int skip(pacmac); 230static int intbl(pacmac); 231static int outtbl(pacmac); 232static int so(pacmac); 233static int nx(pacmac); 234static int skiptocom(pacmac); 235static int PP(pacmac); 236static int AU(pacmac); 237static int SH(pacmac); 238static int UX(pacmac); 239static int MMHU(pacmac); 240static int mesnblock(pacmac); 241static int mssnblock(pacmac); 242static int nf(pacmac); 243static int ce(pacmac); 244static int meip(pacmac); 245static int mepp(pacmac); 246static int mesh(pacmac); 247static int mefont(pacmac); 248static int manfont(pacmac); 249static int manpp(pacmac); 250static int macsort(const void *, const void *); 251static int sizetab(const struct mactab *); 252static void getfname(void); 253static void textline(char *, int); 254static void work(void); 255static void regline(void (*)(char *, int), int); 256static void macro(void); 257static void tbl(void); 258static void stbl(void); 259static void eqn(void); 260static void backsl(void); 261static void sce(void); 262static void refer(int); 263static void inpic(void); 264static void msputmac(char *, int); 265static void msputwords(int); 266static void meputmac(char *, int); 267static void meputwords(int); 268static void noblock(char, char); 269static void defcomline(pacmac); 270static void comline(void); 271static void buildtab(const struct mactab **, int *); 272static FILE *opn(char *); 273static struct mactab *macfill(struct mactab *, const struct mactab *); 274static void usage(void) __dead; 275 276int 277main(int ac, char **av) 278{ 279 int i, ch; 280 int errflg = 0; 281 int kflag = NO; 282 283 iflag = NO; 284 wordflag = NO; 285 msflag = NO; 286 mac = ME; 287 disp = NO; 288 parag = NO; 289 inmacro = NO; 290 intable = NO; 291 ldelim = NOCHAR; 292 rdelim = NOCHAR; 293 keepblock = YES; 294 295 while ((ch = getopt(ac, av, "ikpwm:")) != -1) { 296 switch (ch) { 297 case 'i': 298 iflag = YES; 299 break; 300 case 'k': 301 kflag = YES; 302 break; 303 case 'm': 304 msflag = YES; 305 keepblock = NO; 306 switch (optarg[0]) { 307 case 'm': 308 mac = MM; 309 break; 310 case 's': 311 mac = MS; 312 break; 313 case 'e': 314 mac = ME; 315 break; 316 case 'a': 317 mac = MA; 318 break; 319 case 'l': 320 disp = YES; 321 break; 322 default: 323 errflg++; 324 break; 325 } 326 if (errflg == 0 && optarg[1] != '\0') 327 errflg++; 328 break; 329 case 'p': 330 parag = YES; 331 break; 332 case 'w': 333 wordflag = YES; 334 kflag = YES; 335 break; 336 default: 337 errflg++; 338 } 339 } 340 argc = ac - optind; 341 argv = av + optind; 342 343 if (kflag) 344 keepblock = YES; 345 if (errflg) 346 usage(); 347 348#ifdef DEBUG 349 printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n", 350 msflag, mactab[mac], keepblock, disp); 351#endif /* DEBUG */ 352 if (argc == 0) { 353 infile = stdin; 354 } else { 355 infile = opn(argv[0]); 356 --argc; 357 ++argv; 358 } 359 files[0] = infile; 360 filesp = &files[0]; 361 362 for (i = 'a'; i <= 'z' ; ++i) 363 chars[i] = LETTER; 364 for (i = 'A'; i <= 'Z'; ++i) 365 chars[i] = LETTER; 366 for (i = '0'; i <= '9'; ++i) 367 chars[i] = DIGIT; 368 chars['\''] = APOS; 369 chars['&'] = APOS; 370 chars['.'] = PUNCT; 371 chars[','] = PUNCT; 372 chars[';'] = PUNCT; 373 chars['?'] = PUNCT; 374 chars[':'] = PUNCT; 375 work(); 376 return 0; 377} 378 379static int 380skeqn(void) 381{ 382 383 while ((c = getc(infile)) != rdelim) { 384 if (c == EOF) 385 c = eof(); 386 else if (c == '"') { 387 while ((c = getc(infile)) != '"') { 388 if (c == EOF || 389 (c == '\\' && (c = getc(infile)) == EOF)) 390 c = eof(); 391 } 392 } 393 } 394 if (msflag) 395 return c == 'x'; 396 return c == ' '; 397} 398 399static FILE * 400opn(char *p) 401{ 402 FILE *fd; 403 404 if ((fd = fopen(p, "r")) == NULL) 405 err(1, "fopen %s", p); 406 407 return fd; 408} 409 410static int 411eof(void) 412{ 413 414 if (infile != stdin) 415 fclose(infile); 416 if (filesp > files) 417 infile = *--filesp; 418 else if (argc > 0) { 419 infile = opn(argv[0]); 420 --argc; 421 ++argv; 422 } else 423 exit(0); 424 return C; 425} 426 427static void 428getfname(void) 429{ 430 char *p; 431 struct chain { 432 struct chain *nextp; 433 char *datap; 434 } *q; 435 static struct chain *namechain= NULL; 436 437 while (C == ' ') 438 ; /* nothing */ 439 440 for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) && 441 (*p = c) != '\n' && 442 c != ' ' && c != '\t' && c != '\\'; ++p) 443 C; 444 *p = '\0'; 445 while (c != '\n') 446 C; 447 448 /* see if this name has already been used */ 449 for (q = namechain ; q; q = q->nextp) 450 if (strcmp(fname, q->datap) == 0) { 451 fname[0] = '\0'; 452 return; 453 } 454 455 q = (struct chain *) malloc(sizeof(struct chain)); 456 if (q == NULL) 457 err(1, NULL); 458 q->nextp = namechain; 459 q->datap = strdup(fname); 460 if (q->datap == NULL) 461 err(1, NULL); 462 namechain = q; 463} 464 465/*ARGSUSED*/ 466static void 467textline(char *str, int constant) 468{ 469 470 if (wordflag) { 471 msputwords(0); 472 return; 473 } 474 puts(str); 475} 476 477void 478work(void) 479{ 480 481 for (;;) { 482 C; 483#ifdef FULLDEBUG 484 printf("Starting work with `%c'\n", c); 485#endif /* FULLDEBUG */ 486 if (c == '.' || c == '\'') 487 comline(); 488 else 489 regline(textline, TWO); 490 } 491} 492 493static void 494regline(void (*pfunc)(char *, int), int constant) 495{ 496 497 line[0] = c; 498 lp = line; 499 while (lp - line < (ptrdiff_t)sizeof(line)) { 500 if (c == '\\') { 501 *lp = ' '; 502 backsl(); 503 } 504 if (c == '\n') 505 break; 506 if (intable && c == 'T') { 507 *++lp = C; 508 if (c == '{' || c == '}') { 509 lp[-1] = ' '; 510 *lp = C; 511 } 512 } else { 513 *++lp = C; 514 } 515 } 516 *lp = '\0'; 517 518 if (line[0] != '\0') 519 (*pfunc)(line, constant); 520} 521 522static void 523macro(void) 524{ 525 526 if (msflag) { 527 do { 528 SKIP; 529 } while (C!='.' || C!='.' || C=='.'); /* look for .. */ 530 if (c != '\n') 531 SKIP; 532 return; 533 } 534 SKIP; 535 inmacro = YES; 536} 537 538static void 539tbl(void) 540{ 541 542 while (C != '.') 543 ; /* nothing */ 544 SKIP; 545 intable = YES; 546} 547 548static void 549stbl(void) 550{ 551 552 while (C != '.') 553 ; /* nothing */ 554 SKIP_TO_COM; 555 if (c != 'T' || C != 'E') { 556 SKIP; 557 pc = c; 558 while (C != '.' || pc != '\n' || C != 'T' || C != 'E') 559 pc = c; 560 } 561} 562 563static void 564eqn(void) 565{ 566 int c1, c2; 567 int dflg; 568 char last; 569 570 last=0; 571 dflg = 1; 572 SKIP; 573 574 for (;;) { 575 if (C1 == '.' || c == '\'') { 576 while (C1 == ' ' || c == '\t') 577 ; 578 if (c == 'E' && C1 == 'N') { 579 SKIP; 580 if (msflag && dflg) { 581 putchar('x'); 582 putchar(' '); 583 if (last) { 584 putchar(last); 585 putchar('\n'); 586 } 587 } 588 return; 589 } 590 } else if (c == 'd') { 591 /* look for delim */ 592 if (C1 == 'e' && C1 == 'l') 593 if (C1 == 'i' && C1 == 'm') { 594 while (C1 == ' ') 595 ; /* nothing */ 596 597 if ((c1 = c) == '\n' || 598 (c2 = C1) == '\n' || 599 (c1 == 'o' && c2 == 'f' && C1=='f')) { 600 ldelim = NOCHAR; 601 rdelim = NOCHAR; 602 } else { 603 ldelim = c1; 604 rdelim = c2; 605 } 606 } 607 dflg = 0; 608 } 609 610 if (c != '\n') 611 while (C1 != '\n') { 612 if (chars[c] == PUNCT) 613 last = c; 614 else if (c != ' ') 615 last = 0; 616 } 617 } 618} 619 620/* skip over a complete backslash construction */ 621static void 622backsl(void) 623{ 624 int bdelim; 625 626sw: 627 switch (C) { 628 case '"': 629 SKIP; 630 return; 631 632 case 's': 633 if (C == '\\') 634 backsl(); 635 else { 636 while (C >= '0' && c <= '9') 637 ; /* nothing */ 638 ungetc(c, infile); 639 c = '0'; 640 } 641 --lp; 642 return; 643 644 case 'f': 645 case 'n': 646 case '*': 647 if (C != '(') 648 return; 649 650 case '(': 651 if (msflag) { 652 if (C == 'e') { 653 if (C == 'm') { 654 *lp = '-'; 655 return; 656 } 657 } 658 else if (c != '\n') 659 C; 660 return; 661 } 662 if (C != '\n') 663 C; 664 return; 665 666 case '$': 667 C; /* discard argument number */ 668 return; 669 670 case 'b': 671 case 'x': 672 case 'v': 673 case 'h': 674 case 'w': 675 case 'o': 676 case 'l': 677 case 'L': 678 if ((bdelim = C) == '\n') 679 return; 680 while (C != '\n' && c != bdelim) 681 if (c == '\\') 682 backsl(); 683 return; 684 685 case '\\': 686 if (inmacro) 687 goto sw; 688 689 default: 690 return; 691 } 692} 693 694static void 695sce(void) 696{ 697 char *ap; 698 int n, i; 699 char a[10]; 700 701 for (ap = a; C != '\n'; ap++) { 702 *ap = c; 703 if (ap == &a[9]) { 704 SKIP; 705 ap = a; 706 break; 707 } 708 } 709 if (ap != a) 710 n = atoi(a); 711 else 712 n = 1; 713 for (i = 0; i < n;) { 714 if (C == '.') { 715 if (C == 'c') { 716 if (C == 'e') { 717 while (C == ' ') 718 ; /* nothing */ 719 if (c == '0') { 720 SKIP; 721 break; 722 } else 723 SKIP; 724 } 725 else 726 SKIP; 727 } else if (c == 'P' || C == 'P') { 728 if (c != '\n') 729 SKIP; 730 break; 731 } else if (c != '\n') 732 SKIP; 733 } else { 734 SKIP; 735 i++; 736 } 737 } 738} 739 740static void 741refer(int c1) 742{ 743 int c2; 744 745 if (c1 != '\n') 746 SKIP; 747 748 for (c2 = -1;;) { 749 if (C != '.') 750 SKIP; 751 else { 752 if (C != ']') 753 SKIP; 754 else { 755 while (C != '\n') 756 c2 = c; 757 if (c2 != -1 && chars[c2] == PUNCT) 758 putchar(c2); 759 return; 760 } 761 } 762 } 763} 764 765static void 766inpic(void) 767{ 768 int c1; 769 char *p1; 770 771 SKIP; 772 p1 = line; 773 c = '\n'; 774 for (;;) { 775 c1 = c; 776 if (C == '.' && c1 == '\n') { 777 if (C != 'P') { 778 if (c == '\n') 779 continue; 780 else { 781 SKIP; 782 c = '\n'; 783 continue; 784 } 785 } 786 if (C != 'E') { 787 if (c == '\n') 788 continue; 789 else { 790 SKIP; 791 c = '\n'; 792 continue; 793 } 794 } 795 SKIP; 796 return; 797 } 798 else if (c == '\"') { 799 while (C != '\"') { 800 if (c == '\\') { 801 if (C == '\"') 802 continue; 803 ungetc(c, infile); 804 backsl(); 805 } else 806 *p1++ = c; 807 } 808 *p1++ = ' '; 809 } 810 else if (c == '\n' && p1 != line) { 811 *p1 = '\0'; 812 if (wordflag) 813 msputwords(NO); 814 else { 815 puts(line); 816 putchar('\n'); 817 } 818 p1 = line; 819 } 820 } 821} 822 823#ifdef DEBUG 824static int 825_C1(void) 826{ 827 828 return C1get; 829} 830 831static int 832_C(void) 833{ 834 835 return Cget; 836} 837#endif /* DEBUG */ 838 839/* 840 * Put out a macro line, using ms and mm conventions. 841 */ 842static void 843msputmac(char *s, int constant) 844{ 845 char *t; 846 int found; 847 int last; 848 849 last = 0; 850 found = 0; 851 if (wordflag) { 852 msputwords(YES); 853 return; 854 } 855 while (*s) { 856 while (*s == ' ' || *s == '\t') 857 putchar(*s++); 858 for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t) 859 ; /* nothing */ 860 if (*s == '\"') 861 s++; 862 if (t > s + constant && chars[(unsigned char)s[0]] == LETTER && 863 chars[(unsigned char)s[1]] == LETTER) { 864 while (s < t) 865 if (*s == '\"') 866 s++; 867 else 868 putchar(*s++); 869 last = *(t-1); 870 found++; 871 } else if (found && chars[(unsigned char)s[0]] == PUNCT && 872 s[1] == '\0') { 873 putchar(*s++); 874 } else { 875 last = *(t - 1); 876 s = t; 877 } 878 } 879 putchar('\n'); 880 if (msflag && chars[last] == PUNCT) { 881 putchar(last); 882 putchar('\n'); 883 } 884} 885 886/* 887 * put out words (for the -w option) with ms and mm conventions 888 */ 889static void 890msputwords(int macline) 891{ 892 char *p, *p1; 893 int i, nlet; 894 895 for (p1 = line;;) { 896 /* 897 * skip initial specials ampersands and apostrophes 898 */ 899 while (chars[(unsigned char)*p1] < DIGIT) 900 if (*p1++ == '\0') 901 return; 902 nlet = 0; 903 for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p) 904 if (i == LETTER) 905 ++nlet; 906 907 if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) { 908 /* 909 * delete trailing ampersands and apostrophes 910 */ 911 while ((i = chars[(unsigned char)p[-1]]) == PUNCT || 912 i == APOS ) 913 --p; 914 while (p1 < p) 915 putchar(*p1++); 916 putchar('\n'); 917 } else { 918 p1 = p; 919 } 920 } 921} 922 923/* 924 * put out a macro using the me conventions 925 */ 926#define SKIPBLANK(cp) while (*cp == ' ' || *cp == '\t') { cp++; } 927#define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; } 928 929static void 930meputmac(char *cp, int constant) 931{ 932 char *np; 933 int found; 934 int argno; 935 int last; 936 int inquote; 937 938 last = 0; 939 found = 0; 940 if (wordflag) { 941 meputwords(YES); 942 return; 943 } 944 for (argno = 0; *cp; argno++) { 945 SKIPBLANK(cp); 946 inquote = (*cp == '"'); 947 if (inquote) 948 cp++; 949 for (np = cp; *np; np++) { 950 switch (*np) { 951 case '\n': 952 case '\0': 953 break; 954 955 case '\t': 956 case ' ': 957 if (inquote) 958 continue; 959 else 960 goto endarg; 961 962 case '"': 963 if (inquote && np[1] == '"') { 964 memmove(np, np + 1, strlen(np)); 965 np++; 966 continue; 967 } else { 968 *np = ' '; /* bye bye " */ 969 goto endarg; 970 } 971 972 default: 973 continue; 974 } 975 } 976 endarg: ; 977 /* 978 * cp points at the first char in the arg 979 * np points one beyond the last char in the arg 980 */ 981 if ((argconcat == 0) || (argconcat != argno)) 982 putchar(' '); 983#ifdef FULLDEBUG 984 { 985 char *p; 986 printf("[%d,%d: ", argno, np - cp); 987 for (p = cp; p < np; p++) { 988 putchar(*p); 989 } 990 printf("]"); 991 } 992#endif /* FULLDEBUG */ 993 /* 994 * Determine if the argument merits being printed 995 * 996 * constant is the cut off point below which something 997 * is not a word. 998 */ 999 if (((np - cp) > constant) && 1000 (inquote || (chars[(unsigned char)cp[0]] == LETTER))) { 1001 for (; cp < np; cp++) 1002 putchar(*cp); 1003 last = np[-1]; 1004 found++; 1005 } else if (found && (np - cp == 1) && 1006 chars[(unsigned char)*cp] == PUNCT) { 1007 putchar(*cp); 1008 } else { 1009 last = np[-1]; 1010 } 1011 cp = np; 1012 } 1013 if (msflag && chars[last] == PUNCT) 1014 putchar(last); 1015 putchar('\n'); 1016} 1017 1018/* 1019 * put out words (for the -w option) with ms and mm conventions 1020 */ 1021static void 1022meputwords(int macline) 1023{ 1024 1025 msputwords(macline); 1026} 1027 1028/* 1029 * 1030 * Skip over a nested set of macros 1031 * 1032 * Possible arguments to noblock are: 1033 * 1034 * fi end of unfilled text 1035 * PE pic ending 1036 * DE display ending 1037 * 1038 * for ms and mm only: 1039 * KE keep ending 1040 * 1041 * NE undocumented match to NS (for mm?) 1042 * LE mm only: matches RL or *L (for lists) 1043 * 1044 * for me: 1045 * ([lqbzcdf] 1046 */ 1047static void 1048noblock(char a1, char a2) 1049{ 1050 int c1,c2; 1051 int eqnf; 1052 int lct; 1053 1054 lct = 0; 1055 eqnf = 1; 1056 SKIP; 1057 for (;;) { 1058 while (C != '.') 1059 if (c == '\n') 1060 continue; 1061 else 1062 SKIP; 1063 if ((c1 = C) == '\n') 1064 continue; 1065 if ((c2 = C) == '\n') 1066 continue; 1067 if (c1 == a1 && c2 == a2) { 1068 SKIP; 1069 if (lct != 0) { 1070 lct--; 1071 continue; 1072 } 1073 if (eqnf) 1074 putchar('.'); 1075 putchar('\n'); 1076 return; 1077 } else if (a1 == 'L' && c2 == 'L') { 1078 lct++; 1079 SKIP; 1080 } 1081 /* 1082 * equations (EQ) nested within a display 1083 */ 1084 else if (c1 == 'E' && c2 == 'Q') { 1085 if ((mac == ME && a1 == ')') 1086 || (mac != ME && a1 == 'D')) { 1087 eqn(); 1088 eqnf=0; 1089 } 1090 } 1091 /* 1092 * turning on filling is done by the paragraphing 1093 * macros 1094 */ 1095 else if (a1 == 'f') { /* .fi */ 1096 if ((mac == ME && (c2 == 'h' || c2 == 'p')) 1097 || (mac != ME && (c1 == 'P' || c2 == 'P'))) { 1098 SKIP; 1099 return; 1100 } 1101 } else { 1102 SKIP; 1103 } 1104 } 1105} 1106 1107static int 1108/*ARGSUSED*/ 1109EQ(pacmac unused) 1110{ 1111 1112 eqn(); 1113 return 0; 1114} 1115 1116static int 1117/*ARGSUSED*/ 1118domacro(pacmac unused) 1119{ 1120 1121 macro(); 1122 return 0; 1123} 1124 1125static int 1126/*ARGSUSED*/ 1127PS(pacmac unused) 1128{ 1129 1130 for (C; c == ' ' || c == '\t'; C) 1131 ; /* nothing */ 1132 1133 if (c == '<') { /* ".PS < file" -- don't expect a .PE */ 1134 SKIP; 1135 return 0; 1136 } 1137 if (!msflag) 1138 inpic(); 1139 else 1140 noblock('P', 'E'); 1141 return 0; 1142} 1143 1144static int 1145/*ARGSUSED*/ 1146skip(pacmac unused) 1147{ 1148 1149 SKIP; 1150 return 0; 1151} 1152 1153static int 1154/*ARGSUSED*/ 1155intbl(pacmac unused) 1156{ 1157 1158 if (msflag) 1159 stbl(); 1160 else 1161 tbl(); 1162 return 0; 1163} 1164 1165static int 1166/*ARGSUSED*/ 1167outtbl(pacmac unused) 1168{ 1169 1170 intable = NO; 1171 return 0; 1172} 1173 1174int 1175/*ARGSUSED*/ 1176so(pacmac unused) 1177{ 1178 1179 if (!iflag) { 1180 getfname(); 1181 if (fname[0]) { 1182 if (++filesp - &files[0] > MAXFILES) 1183 err(1, "too many nested files (max %d)", 1184 MAXFILES); 1185 infile = *filesp = opn(fname); 1186 } 1187 } 1188 return 0; 1189} 1190 1191static int 1192/*ARGSUSED*/ 1193nx(pacmac unused) 1194{ 1195 1196 if (!iflag) { 1197 getfname(); 1198 if (fname[0] == '\0') 1199 exit(0); 1200 if (infile != stdin) 1201 fclose(infile); 1202 infile = *filesp = opn(fname); 1203 } 1204 return 0; 1205} 1206 1207static int 1208/*ARGSUSED*/ 1209skiptocom(pacmac unused) 1210{ 1211 1212 SKIP_TO_COM; 1213 return COMX; 1214} 1215 1216static int 1217PP(pacmac c12) 1218{ 1219 int c1, c2; 1220 1221 frommac(c12, c1, c2); 1222 printf(".%c%c", c1, c2); 1223 while (C != '\n') 1224 putchar(c); 1225 putchar('\n'); 1226 return 0; 1227} 1228 1229static int 1230/*ARGSUSED*/ 1231AU(pacmac unused) 1232{ 1233 1234 if (mac == MM) 1235 return 0; 1236 SKIP_TO_COM; 1237 return COMX; 1238} 1239 1240static int 1241SH(pacmac c12) 1242{ 1243 int c1, c2; 1244 1245 frommac(c12, c1, c2); 1246 1247 if (parag) { 1248 printf(".%c%c", c1, c2); 1249 while (C != '\n') 1250 putchar(c); 1251 putchar(c); 1252 putchar('!'); 1253 for (;;) { 1254 while (C != '\n') 1255 putchar(c); 1256 putchar('\n'); 1257 if (C == '.') 1258 return COM; 1259 putchar('!'); 1260 putchar(c); 1261 } 1262 /*NOTREACHED*/ 1263 } else { 1264 SKIP_TO_COM; 1265 return COMX; 1266 } 1267} 1268 1269static int 1270/*ARGSUSED*/ 1271UX(pacmac unused) 1272{ 1273 1274 if (wordflag) 1275 printf("UNIX\n"); 1276 else 1277 printf("UNIX "); 1278 return 0; 1279} 1280 1281static int 1282MMHU(pacmac c12) 1283{ 1284 int c1, c2; 1285 1286 frommac(c12, c1, c2); 1287 if (parag) { 1288 printf(".%c%c", c1, c2); 1289 while (C != '\n') 1290 putchar(c); 1291 putchar('\n'); 1292 } else { 1293 SKIP; 1294 } 1295 return 0; 1296} 1297 1298static int 1299mesnblock(pacmac c12) 1300{ 1301 int c1, c2; 1302 1303 frommac(c12, c1, c2); 1304 noblock(')', c2); 1305 return 0; 1306} 1307 1308static int 1309mssnblock(pacmac c12) 1310{ 1311 int c1, c2; 1312 1313 frommac(c12, c1, c2); 1314 noblock(c1, 'E'); 1315 return 0; 1316} 1317 1318static int 1319/*ARGUSED*/ 1320nf(pacmac unused) 1321{ 1322 1323 noblock('f', 'i'); 1324 return 0; 1325} 1326 1327static int 1328/*ARGUSED*/ 1329ce(pacmac unused) 1330{ 1331 1332 sce(); 1333 return 0; 1334} 1335 1336static int 1337meip(pacmac c12) 1338{ 1339 1340 if (parag) 1341 mepp(c12); 1342 else if (wordflag) /* save the tag */ 1343 regline(meputmac, ONE); 1344 else 1345 SKIP; 1346 return 0; 1347} 1348 1349/* 1350 * only called for -me .pp or .sh, when parag is on 1351 */ 1352static int 1353mepp(pacmac c12) 1354{ 1355 1356 PP(c12); /* eats the line */ 1357 return 0; 1358} 1359 1360/* 1361 * Start of a section heading; output the section name if doing words 1362 */ 1363static int 1364mesh(pacmac c12) 1365{ 1366 1367 if (parag) 1368 mepp(c12); 1369 else if (wordflag) 1370 defcomline(c12); 1371 else 1372 SKIP; 1373 return 0; 1374} 1375 1376/* 1377 * process a font setting 1378 */ 1379static int 1380mefont(pacmac c12) 1381{ 1382 1383 argconcat = 1; 1384 defcomline(c12); 1385 argconcat = 0; 1386 return 0; 1387} 1388 1389static int 1390manfont(pacmac c12) 1391{ 1392 1393 return mefont(c12); 1394} 1395 1396static int 1397manpp(pacmac c12) 1398{ 1399 1400 return mepp(c12); 1401} 1402 1403static void 1404defcomline(pacmac c12) 1405{ 1406 int c1, c2; 1407 1408 frommac(c12, c1, c2); 1409 if (msflag && mac == MM && c2 == 'L') { 1410 if (disp || c1 == 'R') { 1411 noblock('L', 'E'); 1412 } else { 1413 SKIP; 1414 putchar('.'); 1415 } 1416 } 1417 else if (c1 == '.' && c2 == '.') { 1418 if (msflag) { 1419 SKIP; 1420 return; 1421 } 1422 while (C == '.') 1423 /*VOID*/; 1424 } 1425 ++inmacro; 1426 /* 1427 * Process the arguments to the macro 1428 */ 1429 switch (mac) { 1430 default: 1431 case MM: 1432 case MS: 1433 if (c1 <= 'Z' && msflag) 1434 regline(msputmac, ONE); 1435 else 1436 regline(msputmac, TWO); 1437 break; 1438 case ME: 1439 regline(meputmac, ONE); 1440 break; 1441 } 1442 --inmacro; 1443} 1444 1445static void 1446comline(void) 1447{ 1448 int c1; 1449 int c2; 1450 pacmac c12; 1451 int mid; 1452 int lb, ub; 1453 int hit; 1454 static int tabsize = 0; 1455 static const struct mactab *mactab = NULL; 1456 const struct mactab *mp; 1457 1458 if (mactab == 0) 1459 buildtab(&mactab, &tabsize); 1460com: 1461 while (C == ' ' || c == '\t') 1462 ; 1463comx: 1464 if ((c1 = c) == '\n') 1465 return; 1466 c2 = C; 1467 if (c1 == '.' && c2 != '.') 1468 inmacro = NO; 1469 if (msflag && c1 == '[') { 1470 refer(c2); 1471 return; 1472 } 1473 if (parag && mac==MM && c1 == 'P' && c2 == '\n') { 1474 printf(".P\n"); 1475 return; 1476 } 1477 if (c2 == '\n') 1478 return; 1479 /* 1480 * Single letter macro 1481 */ 1482 if (mac == ME && (c2 == ' ' || c2 == '\t') ) 1483 c2 = ' '; 1484 c12 = tomac(c1, c2); 1485 /* 1486 * binary search through the table of macros 1487 */ 1488 lb = 0; 1489 ub = tabsize - 1; 1490 while (lb <= ub) { 1491 mid = (ub + lb) / 2; 1492 mp = &mactab[mid]; 1493 if (mp->macname < c12) 1494 lb = mid + 1; 1495 else if (mp->macname > c12) 1496 ub = mid - 1; 1497 else { 1498 hit = 1; 1499#ifdef FULLDEBUG 1500 printf("preliminary hit macro %c%c ", c1, c2); 1501#endif /* FULLDEBUG */ 1502 switch (mp->condition) { 1503 case NONE: 1504 hit = YES; 1505 break; 1506 case FNEST: 1507 hit = (filesp == files); 1508 break; 1509 case NOMAC: 1510 hit = !inmacro; 1511 break; 1512 case MAC: 1513 hit = inmacro; 1514 break; 1515 case PARAG: 1516 hit = parag; 1517 break; 1518 case NBLK: 1519 hit = !keepblock; 1520 break; 1521 default: 1522 hit = 0; 1523 } 1524 1525 if (hit) { 1526#ifdef FULLDEBUG 1527 printf("MATCH\n"); 1528#endif /* FULLDEBUG */ 1529 switch ((*(mp->func))(c12)) { 1530 default: 1531 return; 1532 case COMX: 1533 goto comx; 1534 case COM: 1535 goto com; 1536 } 1537 } 1538#ifdef FULLDEBUG 1539 printf("FAIL\n"); 1540#endif /* FULLDEBUG */ 1541 break; 1542 } 1543 } 1544 defcomline(c12); 1545} 1546 1547static int 1548macsort(const void *p1, const void *p2) 1549{ 1550 const struct mactab *t1 = p1; 1551 const struct mactab *t2 = p2; 1552 1553 return t1->macname - t2->macname; 1554} 1555 1556static int 1557sizetab(const struct mactab *mp) 1558{ 1559 int i; 1560 1561 i = 0; 1562 if (mp) { 1563 for (; mp->macname; mp++, i++) 1564 /*VOID*/ ; 1565 } 1566 return i; 1567} 1568 1569static struct mactab * 1570macfill(struct mactab *dst, const struct mactab *src) 1571{ 1572 1573 if (src) { 1574 while (src->macname) 1575 *dst++ = *src++; 1576 } 1577 return dst; 1578} 1579 1580static void 1581usage(void) 1582{ 1583 extern char *__progname; 1584 1585 fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname); 1586 exit(1); 1587} 1588 1589static void 1590buildtab(const struct mactab **r_back, int *r_size) 1591{ 1592 size_t size; 1593 const struct mactab *p1, *p2; 1594 struct mactab *back, *p; 1595 1596 size = sizetab(troffmactab) + sizetab(ppmactab); 1597 p1 = p2 = NULL; 1598 if (msflag) { 1599 switch (mac) { 1600 case ME: 1601 p1 = memactab; 1602 break; 1603 case MM: 1604 p1 = msmactab; 1605 p2 = mmmactab; 1606 break; 1607 case MS: 1608 p1 = msmactab; 1609 break; 1610 case MA: 1611 p1 = manmactab; 1612 break; 1613 default: 1614 break; 1615 } 1616 } 1617 size += sizetab(p1); 1618 size += sizetab(p2); 1619 back = calloc(size + 2, sizeof(struct mactab)); 1620 if (back == NULL) 1621 err(1, NULL); 1622 1623 p = macfill(back, troffmactab); 1624 p = macfill(p, ppmactab); 1625 p = macfill(p, p1); 1626 p = macfill(p, p2); 1627 1628 qsort(back, size, sizeof(struct mactab), macsort); 1629 *r_size = size; 1630 *r_back = back; 1631} 1632 1633/* 1634 * troff commands 1635 */ 1636static const struct mactab troffmactab[] = { 1637 M(NONE, '\\','"', skip), /* comment */ 1638 M(NOMAC, 'd','e', domacro), /* define */ 1639 M(NOMAC, 'i','g', domacro), /* ignore till .. */ 1640 M(NOMAC, 'a','m', domacro), /* append macro */ 1641 M(NBLK, 'n','f', nf), /* filled */ 1642 M(NBLK, 'c','e', ce), /* centered */ 1643 1644 M(NONE, 's','o', so), /* source a file */ 1645 M(NONE, 'n','x', nx), /* go to next file */ 1646 1647 M(NONE, 't','m', skip), /* print string on tty */ 1648 M(NONE, 'h','w', skip), /* exception hyphen words */ 1649 M(NONE, 0,0, 0) 1650}; 1651 1652/* 1653 * Preprocessor output 1654 */ 1655static const struct mactab ppmactab[] = { 1656 M(FNEST, 'E','Q', EQ), /* equation starting */ 1657 M(FNEST, 'T','S', intbl), /* table starting */ 1658 M(FNEST, 'T','C', intbl), /* alternative table? */ 1659 M(FNEST, 'T','&', intbl), /* table reformatting */ 1660 M(NONE, 'T','E', outtbl),/* table ending */ 1661 M(NONE, 'P','S', PS), /* picture starting */ 1662 M(NONE, 0,0, 0) 1663}; 1664 1665/* 1666 * Particular to ms and mm 1667 */ 1668static const struct mactab msmactab[] = { 1669 M(NONE, 'T','L', skiptocom), /* title follows */ 1670 M(NONE, 'F','S', skiptocom), /* start footnote */ 1671 M(NONE, 'O','K', skiptocom), /* Other kws */ 1672 1673 M(NONE, 'N','R', skip), /* undocumented */ 1674 M(NONE, 'N','D', skip), /* use supplied date */ 1675 1676 M(PARAG, 'P','P', PP), /* begin parag */ 1677 M(PARAG, 'I','P', PP), /* begin indent parag, tag x */ 1678 M(PARAG, 'L','P', PP), /* left blocked parag */ 1679 1680 M(NONE, 'A','U', AU), /* author */ 1681 M(NONE, 'A','I', AU), /* authors institution */ 1682 1683 M(NONE, 'S','H', SH), /* section heading */ 1684 M(NONE, 'S','N', SH), /* undocumented */ 1685 M(NONE, 'U','X', UX), /* unix */ 1686 1687 M(NBLK, 'D','S', mssnblock), /* start display text */ 1688 M(NBLK, 'K','S', mssnblock), /* start keep */ 1689 M(NBLK, 'K','F', mssnblock), /* start float keep */ 1690 M(NONE, 0,0, 0) 1691}; 1692 1693static const struct mactab mmmactab[] = { 1694 M(NONE, 'H',' ', MMHU), /* -mm ? */ 1695 M(NONE, 'H','U', MMHU), /* -mm ? */ 1696 M(PARAG, 'P',' ', PP), /* paragraph for -mm */ 1697 M(NBLK, 'N','S', mssnblock), /* undocumented */ 1698 M(NONE, 0,0, 0) 1699}; 1700 1701static const struct mactab memactab[] = { 1702 M(PARAG, 'p','p', mepp), 1703 M(PARAG, 'l','p', mepp), 1704 M(PARAG, 'n','p', mepp), 1705 M(NONE, 'i','p', meip), 1706 1707 M(NONE, 's','h', mesh), 1708 M(NONE, 'u','h', mesh), 1709 1710 M(NBLK, '(','l', mesnblock), 1711 M(NBLK, '(','q', mesnblock), 1712 M(NBLK, '(','b', mesnblock), 1713 M(NBLK, '(','z', mesnblock), 1714 M(NBLK, '(','c', mesnblock), 1715 1716 M(NBLK, '(','d', mesnblock), 1717 M(NBLK, '(','f', mesnblock), 1718 M(NBLK, '(','x', mesnblock), 1719 1720 M(NONE, 'r',' ', mefont), 1721 M(NONE, 'i',' ', mefont), 1722 M(NONE, 'b',' ', mefont), 1723 M(NONE, 'u',' ', mefont), 1724 M(NONE, 'q',' ', mefont), 1725 M(NONE, 'r','b', mefont), 1726 M(NONE, 'b','i', mefont), 1727 M(NONE, 'b','x', mefont), 1728 M(NONE, 0,0, 0) 1729}; 1730 1731static const struct mactab manmactab[] = { 1732 M(PARAG, 'B','I', manfont), 1733 M(PARAG, 'B','R', manfont), 1734 M(PARAG, 'I','B', manfont), 1735 M(PARAG, 'I','R', manfont), 1736 M(PARAG, 'R','B', manfont), 1737 M(PARAG, 'R','I', manfont), 1738 1739 M(PARAG, 'P','P', manpp), 1740 M(PARAG, 'L','P', manpp), 1741 M(PARAG, 'H','P', manpp), 1742 M(NONE, 0,0, 0) 1743}; 1744