deroff.c revision 1.12
1/*	$NetBSD: deroff.c,v 1.12 2019/02/03 03:19:29 mrg Exp $	*/
2
3/* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5/*-
6 * Copyright (c) 1988, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33/*
34 * Copyright (C) Caldera International Inc.  2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 *    copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed or owned by Caldera
48 *	International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 *    contributors may be used to endorse or promote products derived from
51 *    this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67#include <sys/cdefs.h>
68__RCSID("$NetBSD: deroff.c,v 1.12 2019/02/03 03:19:29 mrg Exp $");
69
70#include <err.h>
71#include <limits.h>
72#include <stddef.h>
73#include <stdio.h>
74#include <stdlib.h>
75#include <string.h>
76#include <unistd.h>
77
78/*
79 *	Deroff command -- strip troff, eqn, and Tbl sequences from
80 *	a file.  Has two flags argument, -w, to cause output one word per line
81 *	rather than in the original format.
82 *	-mm (or -ms) causes the corresponding macro's to be interpreted
83 *	so that just sentences are output
84 *	-ml  also gets rid of lists.
85 *	Deroff follows .so and .nx commands, removes contents of macro
86 *	definitions, equations (both .EQ ... .EN and $...$),
87 *	Tbl command sequences, and Troff backslash constructions.
88 *
89 *	All input is through the Cget macro;
90 *	the most recently read character is in c.
91 *
92 *	Modified by Robert Henry to process -me and -man macros.
93 */
94
95#define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
96#define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
97
98#ifdef DEBUG
99#  define C	_C()
100#  define C1	_C1()
101#else /* not DEBUG */
102#  define C	Cget
103#  define C1	C1get
104#endif /* not DEBUG */
105
106#define SKIP while (C != '\n')
107#define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
108
109#define	YES 1
110#define	NO 0
111#define	MS 0	/* -ms */
112#define	MM 1	/* -mm */
113#define	ME 2	/* -me */
114#define	MA 3	/* -man */
115
116#ifdef DEBUG
117static char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
118#endif /* DEBUG */
119
120#define	ONE 1
121#define	TWO 2
122
123#define NOCHAR -2
124#define SPECIAL 0
125#define APOS 1
126#define PUNCT 2
127#define DIGIT 3
128#define LETTER 4
129
130#define MAXFILES 20
131
132static int	iflag;
133static int	wordflag;
134static int	msflag;	 /* processing a source written using a mac package */
135static int	mac;		/* which package */
136static int	disp;
137static int	parag;
138static int	inmacro;
139static int	intable;
140static int	keepblock; /* keep blocks of text; normally false when msflag */
141
142static char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
143
144static char line[LINE_MAX];
145static char *lp;
146
147static int c;
148static int pc;
149static int ldelim;
150static int rdelim;
151
152static char fname[PATH_MAX];
153static FILE *files[MAXFILES];
154static FILE **filesp;
155static FILE *infile;
156
157static int argc;
158static char **argv;
159
160/*
161 *	Macro processing
162 *
163 *	Macro table definitions
164 */
165typedef	int pacmac;		/* compressed macro name */
166static int	argconcat = 0;	/* concat arguments together (-me only) */
167
168#define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
169#define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF), __USE(c1), __USE(c2))
170
171struct mactab {
172	int	condition;
173	pacmac	macname;
174	int	(*func)(pacmac);
175};
176
177static const struct	mactab	troffmactab[];
178static const struct	mactab	ppmactab[];
179static const struct	mactab	msmactab[];
180static const struct	mactab	mmmactab[];
181static const struct	mactab	memactab[];
182static const struct	mactab	manmactab[];
183
184/*
185 *	Macro table initialization
186 */
187#define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
188
189/*
190 *	Flags for matching conditions other than
191 *	the macro name
192 */
193#define	NONE		0
194#define	FNEST		1		/* no nested files */
195#define	NOMAC		2		/* no macro */
196#define	MAC		3		/* macro */
197#define	PARAG		4		/* in a paragraph */
198#define	MSF		5		/* msflag is on */
199#define	NBLK		6		/* set if no blocks to be kept */
200
201/*
202 *	Return codes from macro minions, determine where to jump,
203 *	how to repeat/reprocess text
204 */
205#define	COMX		1		/* goto comx */
206#define	COM		2		/* goto com */
207
208static int	 skeqn(void);
209static int	 eof(void);
210#ifdef DEBUG
211static int	 _C1(void);
212static int	 _C(void);
213#endif
214static int	 EQ(pacmac);
215static int	 domacro(pacmac);
216static int	 PS(pacmac);
217static int	 skip(pacmac);
218static int	 intbl(pacmac);
219static int	 outtbl(pacmac);
220static int	 so(pacmac);
221static int	 nx(pacmac);
222static int	 skiptocom(pacmac);
223static int	 PP(pacmac);
224static int	 AU(pacmac);
225static int	 SH(pacmac);
226static int	 UX(pacmac);
227static int	 MMHU(pacmac);
228static int	 mesnblock(pacmac);
229static int	 mssnblock(pacmac);
230static int	 nf(pacmac);
231static int	 ce(pacmac);
232static int	 meip(pacmac);
233static int	 mepp(pacmac);
234static int	 mesh(pacmac);
235static int	 mefont(pacmac);
236static int	 manfont(pacmac);
237static int	 manpp(pacmac);
238static int	 macsort(const void *, const void *);
239static int	 sizetab(const struct mactab *);
240static void	 getfname(void);
241static void	 textline(char *, int);
242static void	 work(void) __dead;
243static void	 regline(void (*)(char *, int), int);
244static void	 macro(void);
245static void	 tbl(void);
246static void	 stbl(void);
247static void	 eqn(void);
248static void	 backsl(void);
249static void	 sce(void);
250static void	 refer(int);
251static void	 inpic(void);
252static void	 msputmac(char *, int);
253static void	 msputwords(int);
254static void	 meputmac(char *, int);
255static void	 meputwords(int);
256static void	 noblock(char, char);
257static void	 defcomline(pacmac);
258static void	 comline(void);
259static void	 buildtab(const struct mactab **, int *);
260static FILE	*opn(char *);
261static struct mactab *macfill(struct mactab *, const struct mactab *);
262static void usage(void) __dead;
263
264int
265main(int ac, char **av)
266{
267	int	i, ch;
268	int	errflg = 0;
269	int	kflag = NO;
270
271	iflag = NO;
272	wordflag = NO;
273	msflag = NO;
274	mac = ME;
275	disp = NO;
276	parag = NO;
277	inmacro = NO;
278	intable = NO;
279	ldelim	= NOCHAR;
280	rdelim	= NOCHAR;
281	keepblock = YES;
282
283	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
284		switch (ch) {
285		case 'i':
286			iflag = YES;
287			break;
288		case 'k':
289			kflag = YES;
290			break;
291		case 'm':
292			msflag = YES;
293			keepblock = NO;
294			switch (optarg[0]) {
295			case 'm':
296				mac = MM;
297				break;
298			case 's':
299				mac = MS;
300				break;
301			case 'e':
302				mac = ME;
303				break;
304			case 'a':
305				mac = MA;
306				break;
307			case 'l':
308				disp = YES;
309				break;
310			default:
311				errflg++;
312				break;
313			}
314			if (errflg == 0 && optarg[1] != '\0')
315				errflg++;
316			break;
317		case 'p':
318			parag = YES;
319			break;
320		case 'w':
321			wordflag = YES;
322			kflag = YES;
323			break;
324		default:
325			errflg++;
326		}
327	}
328	argc = ac - optind;
329	argv = av + optind;
330
331	if (kflag)
332		keepblock = YES;
333	if (errflg)
334		usage();
335
336#ifdef DEBUG
337	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
338		msflag, mactab[mac], keepblock, disp);
339#endif /* DEBUG */
340	if (argc == 0) {
341		infile = stdin;
342	} else {
343		infile = opn(argv[0]);
344		--argc;
345		++argv;
346	}
347	files[0] = infile;
348	filesp = &files[0];
349
350	for (i = 'a'; i <= 'z' ; ++i)
351		chars[i] = LETTER;
352	for (i = 'A'; i <= 'Z'; ++i)
353		chars[i] = LETTER;
354	for (i = '0'; i <= '9'; ++i)
355		chars[i] = DIGIT;
356	chars['\''] = APOS;
357	chars['&'] = APOS;
358	chars['.'] = PUNCT;
359	chars[','] = PUNCT;
360	chars[';'] = PUNCT;
361	chars['?'] = PUNCT;
362	chars[':'] = PUNCT;
363	work();
364	return 0;
365}
366
367static int
368skeqn(void)
369{
370
371	while ((c = getc(infile)) != rdelim) {
372		if (c == EOF)
373			c = eof();
374		else if (c == '"') {
375			while ((c = getc(infile)) != '"') {
376				if (c == EOF ||
377				    (c == '\\' && (c = getc(infile)) == EOF))
378					c = eof();
379			}
380		}
381	}
382	if (msflag)
383		return c == 'x';
384	return c == ' ';
385}
386
387static FILE *
388opn(char *p)
389{
390	FILE *fd;
391
392	if ((fd = fopen(p, "r")) == NULL)
393		err(1, "fopen %s", p);
394
395	return fd;
396}
397
398static int
399eof(void)
400{
401
402	if (infile != stdin)
403		fclose(infile);
404	if (filesp > files)
405		infile = *--filesp;
406	else if (argc > 0) {
407		infile = opn(argv[0]);
408		--argc;
409		++argv;
410	} else
411		exit(0);
412	return C;
413}
414
415static void
416getfname(void)
417{
418	char *p;
419	struct chain {
420		struct chain *nextp;
421		char *datap;
422	} *q;
423	static struct chain *namechain= NULL;
424
425	while (C == ' ')
426		;	/* nothing */
427
428	for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) &&
429	    (*p = c) != '\n' &&
430	    c != ' ' && c != '\t' && c != '\\'; ++p)
431		C;
432	*p = '\0';
433	while (c != '\n')
434		C;
435
436	/* see if this name has already been used */
437	for (q = namechain ; q; q = q->nextp)
438		if (strcmp(fname, q->datap) == 0) {
439			fname[0] = '\0';
440			return;
441		}
442
443	q = (struct chain *) malloc(sizeof(struct chain));
444	if (q == NULL)
445		err(1, NULL);
446	q->nextp = namechain;
447	q->datap = strdup(fname);
448	if (q->datap == NULL)
449		err(1, NULL);
450	namechain = q;
451}
452
453/*ARGSUSED*/
454static void
455textline(char *str, int constant)
456{
457
458	if (wordflag) {
459		msputwords(0);
460		return;
461	}
462	puts(str);
463}
464
465static void
466work(void)
467{
468
469	for (;;) {
470		C;
471#ifdef FULLDEBUG
472		printf("Starting work with `%c'\n", c);
473#endif /* FULLDEBUG */
474		if (c == '.' || c == '\'')
475			comline();
476		else
477			regline(textline, TWO);
478	}
479}
480
481static void
482regline(void (*pfunc)(char *, int), int constant)
483{
484
485	line[0] = c;
486	lp = line;
487	while (lp - line < (ptrdiff_t)sizeof(line)) {
488		if (c == '\\') {
489			*lp = ' ';
490			backsl();
491		}
492		if (c == '\n')
493			break;
494		if (intable && c == 'T') {
495			*++lp = C;
496			if (c == '{' || c == '}') {
497				lp[-1] = ' ';
498				*lp = C;
499			}
500		} else {
501			*++lp = C;
502		}
503	}
504	*lp = '\0';
505
506	if (line[0] != '\0')
507		(*pfunc)(line, constant);
508}
509
510static void
511macro(void)
512{
513
514	if (msflag) {
515		do {
516			SKIP;
517		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
518		if (c != '\n')
519			SKIP;
520		return;
521	}
522	SKIP;
523	inmacro = YES;
524}
525
526static void
527tbl(void)
528{
529
530	while (C != '.')
531		;	/* nothing */
532	SKIP;
533	intable = YES;
534}
535
536static void
537stbl(void)
538{
539
540	while (C != '.')
541		;	/* nothing */
542	SKIP_TO_COM;
543	if (c != 'T' || C != 'E') {
544		SKIP;
545		pc = c;
546		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
547			pc = c;
548	}
549}
550
551static void
552eqn(void)
553{
554	int c1, c2;
555	int dflg;
556	char last;
557
558	last=0;
559	dflg = 1;
560	SKIP;
561
562	for (;;) {
563		if (C1 == '.'  || c == '\'') {
564			while (C1 == ' ' || c == '\t')
565				;
566			if (c == 'E' && C1 == 'N') {
567				SKIP;
568				if (msflag && dflg) {
569					putchar('x');
570					putchar(' ');
571					if (last) {
572						putchar(last);
573						putchar('\n');
574					}
575				}
576				return;
577			}
578		} else if (c == 'd') {
579			/* look for delim */
580			if (C1 == 'e' && C1 == 'l')
581				if (C1 == 'i' && C1 == 'm') {
582					while (C1 == ' ')
583						;	/* nothing */
584
585					if ((c1 = c) == '\n' ||
586					    (c2 = C1) == '\n' ||
587					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
588						ldelim = NOCHAR;
589						rdelim = NOCHAR;
590					} else {
591						ldelim = c1;
592						rdelim = c2;
593					}
594				}
595			dflg = 0;
596		}
597
598		if (c != '\n')
599			while (C1 != '\n') {
600				if (chars[c] == PUNCT)
601					last = c;
602				else if (c != ' ')
603					last = 0;
604			}
605	}
606}
607
608/* skip over a complete backslash construction */
609static void
610backsl(void)
611{
612	int bdelim;
613
614sw:
615	switch (C) {
616	case '"':
617		SKIP;
618		return;
619
620	case 's':
621		if (C == '\\')
622			backsl();
623		else {
624			while (C >= '0' && c <= '9')
625				;	/* nothing */
626			ungetc(c, infile);
627			c = '0';
628		}
629		--lp;
630		return;
631
632	case 'f':
633	case 'n':
634	case '*':
635		if (C != '(')
636			return;
637
638		/* FALLTHROUGH */
639	case '(':
640		if (msflag) {
641			if (C == 'e') {
642				if (C == 'm') {
643					*lp = '-';
644					return;
645				}
646			}
647			else if (c != '\n')
648				C;
649			return;
650		}
651		if (C != '\n')
652			C;
653		return;
654
655	case '$':
656		C;	/* discard argument number */
657		return;
658
659	case 'b':
660	case 'x':
661	case 'v':
662	case 'h':
663	case 'w':
664	case 'o':
665	case 'l':
666	case 'L':
667		if ((bdelim = C) == '\n')
668			return;
669		while (C != '\n' && c != bdelim)
670			if (c == '\\')
671				backsl();
672		return;
673
674	case '\\':
675		if (inmacro)
676			goto sw;
677
678	default:
679		return;
680	}
681}
682
683static void
684sce(void)
685{
686	char *ap;
687	int n, i;
688	char a[10];
689
690	for (ap = a; C != '\n'; ap++) {
691		*ap = c;
692		if (ap == &a[9]) {
693			SKIP;
694			ap = a;
695			break;
696		}
697	}
698	if (ap != a)
699		n = atoi(a);
700	else
701		n = 1;
702	for (i = 0; i < n;) {
703		if (C == '.') {
704			if (C == 'c') {
705				if (C == 'e') {
706					while (C == ' ')
707						;	/* nothing */
708					if (c == '0') {
709						SKIP;
710						break;
711					} else
712						SKIP;
713				}
714				else
715					SKIP;
716			} else if (c == 'P' || C == 'P') {
717				if (c != '\n')
718					SKIP;
719				break;
720			} else if (c != '\n')
721				SKIP;
722		} else {
723			SKIP;
724			i++;
725		}
726	}
727}
728
729static void
730refer(int c1)
731{
732	int c2;
733
734	if (c1 != '\n')
735		SKIP;
736
737	for (c2 = -1;;) {
738		if (C != '.')
739			SKIP;
740		else {
741			if (C != ']')
742				SKIP;
743			else {
744				while (C != '\n')
745					c2 = c;
746				if (c2 != -1 && chars[c2] == PUNCT)
747					putchar(c2);
748				return;
749			}
750		}
751	}
752}
753
754static void
755inpic(void)
756{
757	int c1;
758	char *p1;
759
760	SKIP;
761	p1 = line;
762	c = '\n';
763	for (;;) {
764		c1 = c;
765		if (C == '.' && c1 == '\n') {
766			if (C != 'P') {
767				if (c == '\n')
768					continue;
769				else {
770					SKIP;
771					c = '\n';
772					continue;
773				}
774			}
775			if (C != 'E') {
776				if (c == '\n')
777					continue;
778				else {
779					SKIP;
780					c = '\n';
781					continue;
782				}
783			}
784			SKIP;
785			return;
786		}
787		else if (c == '\"') {
788			while (C != '\"') {
789				if (c == '\\') {
790					if (C == '\"')
791						continue;
792					ungetc(c, infile);
793					backsl();
794				} else
795					*p1++ = c;
796			}
797			*p1++ = ' ';
798		}
799		else if (c == '\n' && p1 != line) {
800			*p1 = '\0';
801			if (wordflag)
802				msputwords(NO);
803			else {
804				puts(line);
805				putchar('\n');
806			}
807			p1 = line;
808		}
809	}
810}
811
812#ifdef DEBUG
813static int
814_C1(void)
815{
816
817	return C1get;
818}
819
820static int
821_C(void)
822{
823
824	return Cget;
825}
826#endif /* DEBUG */
827
828/*
829 *	Put out a macro line, using ms and mm conventions.
830 */
831static void
832msputmac(char *s, int constant)
833{
834	char *t;
835	int found;
836	int last;
837
838	last = 0;
839	found = 0;
840	if (wordflag) {
841		msputwords(YES);
842		return;
843	}
844	while (*s) {
845		while (*s == ' ' || *s == '\t')
846			putchar(*s++);
847		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
848			;	/* nothing */
849		if (*s == '\"')
850			s++;
851		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
852		    chars[(unsigned char)s[1]] == LETTER) {
853			while (s < t)
854				if (*s == '\"')
855					s++;
856				else
857					putchar(*s++);
858			last = *(t-1);
859			found++;
860		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
861		    s[1] == '\0') {
862			putchar(*s++);
863		} else {
864			last = *(t - 1);
865			s = t;
866		}
867	}
868	putchar('\n');
869	if (msflag && chars[last] == PUNCT) {
870		putchar(last);
871		putchar('\n');
872	}
873}
874
875/*
876 *	put out words (for the -w option) with ms and mm conventions
877 */
878static void
879msputwords(int macline)
880{
881	char *p, *p1;
882	int i, nlet;
883
884	for (p1 = line;;) {
885		/*
886		 *	skip initial specials ampersands and apostrophes
887		 */
888		while (chars[(unsigned char)*p1] < DIGIT)
889			if (*p1++ == '\0')
890				return;
891		nlet = 0;
892		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
893			if (i == LETTER)
894				++nlet;
895
896		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
897			/*
898			 *	delete trailing ampersands and apostrophes
899			 */
900			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
901			    i == APOS )
902				--p;
903			while (p1 < p)
904				putchar(*p1++);
905			putchar('\n');
906		} else {
907			p1 = p;
908		}
909	}
910}
911
912/*
913 *	put out a macro using the me conventions
914 */
915#define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
916#define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
917
918static void
919meputmac(char *cp, int constant)
920{
921	char	*np;
922	int	found;
923	int	argno;
924	int	last;
925	int	inquote;
926
927	last = 0;
928	found = 0;
929	if (wordflag) {
930		meputwords(YES);
931		return;
932	}
933	for (argno = 0; *cp; argno++) {
934		SKIPBLANK(cp);
935		inquote = (*cp == '"');
936		if (inquote)
937			cp++;
938		for (np = cp; *np; np++) {
939			switch (*np) {
940			case '\n':
941			case '\0':
942				break;
943
944			case '\t':
945			case ' ':
946				if (inquote)
947					continue;
948				else
949					goto endarg;
950
951			case '"':
952				if (inquote && np[1] == '"') {
953					memmove(np, np + 1, strlen(np));
954					np++;
955					continue;
956				} else {
957					*np = ' '; 	/* bye bye " */
958					goto endarg;
959				}
960
961			default:
962				continue;
963			}
964		}
965		endarg: ;
966		/*
967		 *	cp points at the first char in the arg
968		 *	np points one beyond the last char in the arg
969		 */
970		if ((argconcat == 0) || (argconcat != argno))
971			putchar(' ');
972#ifdef FULLDEBUG
973		{
974			char	*p;
975			printf("[%d,%d: ", argno, np - cp);
976			for (p = cp; p < np; p++) {
977				putchar(*p);
978			}
979			printf("]");
980		}
981#endif /* FULLDEBUG */
982		/*
983		 *	Determine if the argument merits being printed
984		 *
985		 *	constant is the cut off point below which something
986		 *	is not a word.
987		 */
988		if (((np - cp) > constant) &&
989		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
990			for (; cp < np; cp++)
991				putchar(*cp);
992			last = np[-1];
993			found++;
994		} else if (found && (np - cp == 1) &&
995		    chars[(unsigned char)*cp] == PUNCT) {
996			putchar(*cp);
997		} else {
998			last = np[-1];
999		}
1000		cp = np;
1001	}
1002	if (msflag && chars[last] == PUNCT)
1003		putchar(last);
1004	putchar('\n');
1005}
1006
1007/*
1008 *	put out words (for the -w option) with ms and mm conventions
1009 */
1010static void
1011meputwords(int macline)
1012{
1013
1014	msputwords(macline);
1015}
1016
1017/*
1018 *
1019 *	Skip over a nested set of macros
1020 *
1021 *	Possible arguments to noblock are:
1022 *
1023 *	fi	end of unfilled text
1024 *	PE	pic ending
1025 *	DE	display ending
1026 *
1027 *	for ms and mm only:
1028 *		KE	keep ending
1029 *
1030 *		NE	undocumented match to NS (for mm?)
1031 *		LE	mm only: matches RL or *L (for lists)
1032 *
1033 *	for me:
1034 *		([lqbzcdf]
1035 */
1036static void
1037noblock(char a1, char a2)
1038{
1039	int c1,c2;
1040	int eqnf;
1041	int lct;
1042
1043	lct = 0;
1044	eqnf = 1;
1045	SKIP;
1046	for (;;) {
1047		while (C != '.')
1048			if (c == '\n')
1049				continue;
1050			else
1051				SKIP;
1052		if ((c1 = C) == '\n')
1053			continue;
1054		if ((c2 = C) == '\n')
1055			continue;
1056		if (c1 == a1 && c2 == a2) {
1057			SKIP;
1058			if (lct != 0) {
1059				lct--;
1060				continue;
1061			}
1062			if (eqnf)
1063				putchar('.');
1064			putchar('\n');
1065			return;
1066		} else if (a1 == 'L' && c2 == 'L') {
1067			lct++;
1068			SKIP;
1069		}
1070		/*
1071		 *	equations (EQ) nested within a display
1072		 */
1073		else if (c1 == 'E' && c2 == 'Q') {
1074			if ((mac == ME && a1 == ')')
1075			    || (mac != ME && a1 == 'D')) {
1076				eqn();
1077				eqnf=0;
1078			}
1079		}
1080		/*
1081		 *	turning on filling is done by the paragraphing
1082		 *	macros
1083		 */
1084		else if (a1 == 'f') {	/* .fi */
1085			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1086			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1087				SKIP;
1088				return;
1089			}
1090		} else {
1091			SKIP;
1092		}
1093	}
1094}
1095
1096static int
1097/*ARGSUSED*/
1098EQ(pacmac unused)
1099{
1100
1101	eqn();
1102	return 0;
1103}
1104
1105static int
1106/*ARGSUSED*/
1107domacro(pacmac unused)
1108{
1109
1110	macro();
1111	return 0;
1112}
1113
1114static int
1115/*ARGSUSED*/
1116PS(pacmac unused)
1117{
1118
1119	for (C; c == ' ' || c == '\t'; C)
1120		;	/* nothing */
1121
1122	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1123		SKIP;
1124		return 0;
1125	}
1126	if (!msflag)
1127		inpic();
1128	else
1129		noblock('P', 'E');
1130	return 0;
1131}
1132
1133static int
1134/*ARGSUSED*/
1135skip(pacmac unused)
1136{
1137
1138	SKIP;
1139	return 0;
1140}
1141
1142static int
1143/*ARGSUSED*/
1144intbl(pacmac unused)
1145{
1146
1147	if (msflag)
1148		stbl();
1149	else
1150		tbl();
1151	return 0;
1152}
1153
1154static int
1155/*ARGSUSED*/
1156outtbl(pacmac unused)
1157{
1158
1159	intable = NO;
1160	return 0;
1161}
1162
1163static int
1164/*ARGSUSED*/
1165so(pacmac unused)
1166{
1167
1168	if (!iflag) {
1169		getfname();
1170		if (fname[0]) {
1171			if (++filesp - &files[0] > MAXFILES)
1172				err(1, "too many nested files (max %d)",
1173				    MAXFILES);
1174			infile = *filesp = opn(fname);
1175		}
1176	}
1177	return 0;
1178}
1179
1180static int
1181/*ARGSUSED*/
1182nx(pacmac unused)
1183{
1184
1185	if (!iflag) {
1186		getfname();
1187		if (fname[0] == '\0')
1188			exit(0);
1189		if (infile != stdin)
1190			fclose(infile);
1191		infile = *filesp = opn(fname);
1192	}
1193	return 0;
1194}
1195
1196static int
1197/*ARGSUSED*/
1198skiptocom(pacmac unused)
1199{
1200
1201	SKIP_TO_COM;
1202	return COMX;
1203}
1204
1205static int
1206PP(pacmac c12)
1207{
1208	int c1, c2;
1209
1210	frommac(c12, c1, c2);
1211	printf(".%c%c", c1, c2);
1212	while (C != '\n')
1213		putchar(c);
1214	putchar('\n');
1215	return 0;
1216}
1217
1218static int
1219/*ARGSUSED*/
1220AU(pacmac unused)
1221{
1222
1223	if (mac == MM)
1224		return 0;
1225	SKIP_TO_COM;
1226	return COMX;
1227}
1228
1229static int
1230SH(pacmac c12)
1231{
1232	int c1, c2;
1233
1234	frommac(c12, c1, c2);
1235
1236	if (parag) {
1237		printf(".%c%c", c1, c2);
1238		while (C != '\n')
1239			putchar(c);
1240		putchar(c);
1241		putchar('!');
1242		for (;;) {
1243			while (C != '\n')
1244				putchar(c);
1245			putchar('\n');
1246			if (C == '.')
1247				return COM;
1248			putchar('!');
1249			putchar(c);
1250		}
1251		/*NOTREACHED*/
1252	} else {
1253		SKIP_TO_COM;
1254		return COMX;
1255	}
1256}
1257
1258static int
1259/*ARGSUSED*/
1260UX(pacmac unused)
1261{
1262
1263	if (wordflag)
1264		printf("UNIX\n");
1265	else
1266		printf("UNIX ");
1267	return 0;
1268}
1269
1270static int
1271MMHU(pacmac c12)
1272{
1273	int c1, c2;
1274
1275	frommac(c12, c1, c2);
1276	if (parag) {
1277		printf(".%c%c", c1, c2);
1278		while (C != '\n')
1279			putchar(c);
1280		putchar('\n');
1281	} else {
1282		SKIP;
1283	}
1284	return 0;
1285}
1286
1287static int
1288mesnblock(pacmac c12)
1289{
1290	int c1, c2;
1291
1292	frommac(c12, c1, c2);
1293	noblock(')', c2);
1294	return 0;
1295}
1296
1297static int
1298mssnblock(pacmac c12)
1299{
1300	int c1, c2;
1301
1302	frommac(c12, c1, c2);
1303	noblock(c1, 'E');
1304	return 0;
1305}
1306
1307static int
1308/*ARGUSED*/
1309nf(pacmac unused)
1310{
1311
1312	noblock('f', 'i');
1313	return 0;
1314}
1315
1316static int
1317/*ARGUSED*/
1318ce(pacmac unused)
1319{
1320
1321	sce();
1322	return 0;
1323}
1324
1325static int
1326meip(pacmac c12)
1327{
1328
1329	if (parag)
1330		mepp(c12);
1331	else if (wordflag)	/* save the tag */
1332		regline(meputmac, ONE);
1333	else
1334		SKIP;
1335	return 0;
1336}
1337
1338/*
1339 *	only called for -me .pp or .sh, when parag is on
1340 */
1341static int
1342mepp(pacmac c12)
1343{
1344
1345	PP(c12);		/* eats the line */
1346	return 0;
1347}
1348
1349/*
1350 *	Start of a section heading; output the section name if doing words
1351 */
1352static int
1353mesh(pacmac c12)
1354{
1355
1356	if (parag)
1357		mepp(c12);
1358	else if (wordflag)
1359		defcomline(c12);
1360	else
1361		SKIP;
1362	return 0;
1363}
1364
1365/*
1366 *	process a font setting
1367 */
1368static int
1369mefont(pacmac c12)
1370{
1371
1372	argconcat = 1;
1373	defcomline(c12);
1374	argconcat = 0;
1375	return 0;
1376}
1377
1378static int
1379manfont(pacmac c12)
1380{
1381
1382	return mefont(c12);
1383}
1384
1385static int
1386manpp(pacmac c12)
1387{
1388
1389	return mepp(c12);
1390}
1391
1392static void
1393defcomline(pacmac c12)
1394{
1395	int c1, c2;
1396
1397	frommac(c12, c1, c2);
1398	if (msflag && mac == MM && c2 == 'L') {
1399		if (disp || c1 == 'R') {
1400			noblock('L', 'E');
1401		} else {
1402			SKIP;
1403			putchar('.');
1404		}
1405	}
1406	else if (c1 == '.' && c2 == '.') {
1407		if (msflag) {
1408			SKIP;
1409			return;
1410		}
1411		while (C == '.')
1412			/*VOID*/;
1413	}
1414	++inmacro;
1415	/*
1416	 *	Process the arguments to the macro
1417	 */
1418	switch (mac) {
1419	default:
1420	case MM:
1421	case MS:
1422		if (c1 <= 'Z' && msflag)
1423			regline(msputmac, ONE);
1424		else
1425			regline(msputmac, TWO);
1426		break;
1427	case ME:
1428		regline(meputmac, ONE);
1429		break;
1430	}
1431	--inmacro;
1432}
1433
1434static void
1435comline(void)
1436{
1437	int	c1;
1438	int	c2;
1439	pacmac	c12;
1440	int	mid;
1441	int	lb, ub;
1442	int	hit;
1443	static	int	tabsize = 0;
1444	static	const struct mactab	*mactab = NULL;
1445	const struct mactab	*mp;
1446
1447	if (mactab == 0)
1448		 buildtab(&mactab, &tabsize);
1449com:
1450	while (C == ' ' || c == '\t')
1451		;
1452comx:
1453	if ((c1 = c) == '\n')
1454		return;
1455	c2 = C;
1456	if (c1 == '.' && c2 != '.')
1457		inmacro = NO;
1458	if (msflag && c1 == '[') {
1459		refer(c2);
1460		return;
1461	}
1462	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1463		printf(".P\n");
1464		return;
1465	}
1466	if (c2 == '\n')
1467		return;
1468	/*
1469	 *	Single letter macro
1470	 */
1471	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1472		c2 = ' ';
1473	c12 = tomac(c1, c2);
1474	/*
1475	 *	binary search through the table of macros
1476	 */
1477	lb = 0;
1478	ub = tabsize - 1;
1479	while (lb <= ub) {
1480		mid = (ub + lb) / 2;
1481		mp = &mactab[mid];
1482		if (mp->macname < c12)
1483			lb = mid + 1;
1484		else if (mp->macname > c12)
1485			ub = mid - 1;
1486		else {
1487			hit = 1;
1488#ifdef FULLDEBUG
1489			printf("preliminary hit macro %c%c ", c1, c2);
1490#endif /* FULLDEBUG */
1491			switch (mp->condition) {
1492			case NONE:
1493				hit = YES;
1494				break;
1495			case FNEST:
1496				hit = (filesp == files);
1497				break;
1498			case NOMAC:
1499				hit = !inmacro;
1500				break;
1501			case MAC:
1502				hit = inmacro;
1503				break;
1504			case PARAG:
1505				hit = parag;
1506				break;
1507			case NBLK:
1508				hit = !keepblock;
1509				break;
1510			default:
1511				hit = 0;
1512			}
1513
1514			if (hit) {
1515#ifdef FULLDEBUG
1516				printf("MATCH\n");
1517#endif /* FULLDEBUG */
1518				switch ((*(mp->func))(c12)) {
1519				default:
1520					return;
1521				case COMX:
1522					goto comx;
1523				case COM:
1524					goto com;
1525				}
1526			}
1527#ifdef FULLDEBUG
1528			printf("FAIL\n");
1529#endif /* FULLDEBUG */
1530			break;
1531		}
1532	}
1533	defcomline(c12);
1534}
1535
1536static int
1537macsort(const void *p1, const void *p2)
1538{
1539	const struct mactab *t1 = p1;
1540	const struct mactab *t2 = p2;
1541
1542	return t1->macname - t2->macname;
1543}
1544
1545static int
1546sizetab(const struct mactab *mp)
1547{
1548	int i;
1549
1550	i = 0;
1551	if (mp) {
1552		for (; mp->macname; mp++, i++)
1553			/*VOID*/ ;
1554	}
1555	return i;
1556}
1557
1558static struct mactab *
1559macfill(struct mactab *dst, const struct mactab *src)
1560{
1561
1562	if (src) {
1563		while (src->macname)
1564			*dst++ = *src++;
1565	}
1566	return dst;
1567}
1568
1569static void
1570usage(void)
1571{
1572	extern char *__progname;
1573
1574	fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname);
1575	exit(1);
1576}
1577
1578static void
1579buildtab(const struct mactab **r_back, int *r_size)
1580{
1581	size_t	size;
1582	const struct	mactab	*p1, *p2;
1583	struct	mactab	*back, *p;
1584
1585	size = sizetab(troffmactab) + sizetab(ppmactab);
1586	p1 = p2 = NULL;
1587	if (msflag) {
1588		switch (mac) {
1589		case ME:
1590			p1 = memactab;
1591			break;
1592		case MM:
1593			p1 = msmactab;
1594			p2 = mmmactab;
1595			break;
1596		case MS:
1597			p1 = msmactab;
1598			break;
1599		case MA:
1600			p1 = manmactab;
1601			break;
1602		default:
1603			break;
1604		}
1605	}
1606	size += sizetab(p1);
1607	size += sizetab(p2);
1608	back = calloc(size + 2, sizeof(struct mactab));
1609	if (back == NULL)
1610		err(1, NULL);
1611
1612	p = macfill(back, troffmactab);
1613	p = macfill(p, ppmactab);
1614	p = macfill(p, p1);
1615	p = macfill(p, p2);
1616
1617	qsort(back, size, sizeof(struct mactab), macsort);
1618	*r_size = size;
1619	*r_back = back;
1620}
1621
1622/*
1623 *	troff commands
1624 */
1625static const struct mactab	troffmactab[] = {
1626	M(NONE,		'\\','"',	skip),	/* comment */
1627	M(NOMAC,	'd','e',	domacro),	/* define */
1628	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1629	M(NOMAC,	'a','m',	domacro),	/* append macro */
1630	M(NBLK,		'n','f',	nf),	/* filled */
1631	M(NBLK,		'c','e',	ce),	/* centered */
1632
1633	M(NONE,		's','o',	so),	/* source a file */
1634	M(NONE,		'n','x',	nx),	/* go to next file */
1635
1636	M(NONE,		't','m',	skip),	/* print string on tty */
1637	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1638	M(NONE,		0,0,		0)
1639};
1640
1641/*
1642 *	Preprocessor output
1643 */
1644static const struct mactab	ppmactab[] = {
1645	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1646	M(FNEST,	'T','S',	intbl),	/* table starting */
1647	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1648	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1649	M(NONE,		'T','E',	outtbl),/* table ending */
1650	M(NONE,		'P','S',	PS),	/* picture starting */
1651	M(NONE,		0,0,		0)
1652};
1653
1654/*
1655 *	Particular to ms and mm
1656 */
1657static const struct mactab	msmactab[] = {
1658	M(NONE,		'T','L',	skiptocom),	/* title follows */
1659	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1660	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1661
1662	M(NONE,		'N','R',	skip),	/* undocumented */
1663	M(NONE,		'N','D',	skip),	/* use supplied date */
1664
1665	M(PARAG,	'P','P',	PP),	/* begin parag */
1666	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1667	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1668
1669	M(NONE,		'A','U',	AU),	/* author */
1670	M(NONE,		'A','I',	AU),	/* authors institution */
1671
1672	M(NONE,		'S','H',	SH),	/* section heading */
1673	M(NONE,		'S','N',	SH),	/* undocumented */
1674	M(NONE,		'U','X',	UX),	/* unix */
1675
1676	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1677	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1678	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1679	M(NONE,		0,0,		0)
1680};
1681
1682static const struct mactab	mmmactab[] = {
1683	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1684	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1685	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1686	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1687	M(NONE,		0,0,		0)
1688};
1689
1690static const struct mactab	memactab[] = {
1691	M(PARAG,	'p','p',	mepp),
1692	M(PARAG,	'l','p',	mepp),
1693	M(PARAG,	'n','p',	mepp),
1694	M(NONE,		'i','p',	meip),
1695
1696	M(NONE,		's','h',	mesh),
1697	M(NONE,		'u','h',	mesh),
1698
1699	M(NBLK,		'(','l',	mesnblock),
1700	M(NBLK,		'(','q',	mesnblock),
1701	M(NBLK,		'(','b',	mesnblock),
1702	M(NBLK,		'(','z',	mesnblock),
1703	M(NBLK,		'(','c',	mesnblock),
1704
1705	M(NBLK,		'(','d',	mesnblock),
1706	M(NBLK,		'(','f',	mesnblock),
1707	M(NBLK,		'(','x',	mesnblock),
1708
1709	M(NONE,		'r',' ',	mefont),
1710	M(NONE,		'i',' ',	mefont),
1711	M(NONE,		'b',' ',	mefont),
1712	M(NONE,		'u',' ',	mefont),
1713	M(NONE,		'q',' ',	mefont),
1714	M(NONE,		'r','b',	mefont),
1715	M(NONE,		'b','i',	mefont),
1716	M(NONE,		'b','x',	mefont),
1717	M(NONE,		0,0,		0)
1718};
1719
1720static const struct mactab	manmactab[] = {
1721	M(PARAG,	'B','I',	manfont),
1722	M(PARAG,	'B','R',	manfont),
1723	M(PARAG,	'I','B',	manfont),
1724	M(PARAG,	'I','R',	manfont),
1725	M(PARAG,	'R','B',	manfont),
1726	M(PARAG,	'R','I',	manfont),
1727
1728	M(PARAG,	'P','P',	manpp),
1729	M(PARAG,	'L','P',	manpp),
1730	M(PARAG,	'H','P',	manpp),
1731	M(NONE,		0,0,		0)
1732};
1733