deroff.c revision 1.2
1/*	$NetBSD: deroff.c,v 1.2 2005/06/30 16:23:29 christos Exp $	*/
2
3/* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5/*-
6 * Copyright (c) 1988, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33/*
34 * Copyright (C) Caldera International Inc.  2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 *    copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed or owned by Caldera
48 *	International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 *    contributors may be used to endorse or promote products derived from
51 *    this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67#ifndef lint
68static const char copyright[] =
69"@(#) Copyright (c) 1988, 1993\n\
70	The Regents of the University of California.  All rights reserved.\n";
71#endif /* not lint */
72
73#ifndef lint
74#if 0
75static const char sccsid[] = "@(#)deroff.c	8.1 (Berkeley) 6/6/93";
76#else
77static const char rcsid[] = "$NetBSD: deroff.c,v 1.2 2005/06/30 16:23:29 christos Exp $";
78#endif
79#endif /* not lint */
80
81#include <err.h>
82#include <limits.h>
83#include <stdio.h>
84#include <stdlib.h>
85#include <string.h>
86#include <unistd.h>
87
88/*
89 *	Deroff command -- strip troff, eqn, and Tbl sequences from
90 *	a file.  Has two flags argument, -w, to cause output one word per line
91 *	rather than in the original format.
92 *	-mm (or -ms) causes the corresponding macro's to be interpreted
93 *	so that just sentences are output
94 *	-ml  also gets rid of lists.
95 *	Deroff follows .so and .nx commands, removes contents of macro
96 *	definitions, equations (both .EQ ... .EN and $...$),
97 *	Tbl command sequences, and Troff backslash constructions.
98 *
99 *	All input is through the Cget macro;
100 *	the most recently read character is in c.
101 *
102 *	Modified by Robert Henry to process -me and -man macros.
103 */
104
105#define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
106#define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
107
108#ifdef DEBUG
109#  define C	_C()
110#  define C1	_C1()
111#else /* not DEBUG */
112#  define C	Cget
113#  define C1	C1get
114#endif /* not DEBUG */
115
116#define SKIP while (C != '\n')
117#define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
118
119#define	YES 1
120#define	NO 0
121#define	MS 0	/* -ms */
122#define	MM 1	/* -mm */
123#define	ME 2	/* -me */
124#define	MA 3	/* -man */
125
126#ifdef DEBUG
127char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
128#endif /* DEBUG */
129
130#define	ONE 1
131#define	TWO 2
132
133#define NOCHAR -2
134#define SPECIAL 0
135#define APOS 1
136#define PUNCT 2
137#define DIGIT 3
138#define LETTER 4
139
140#define MAXFILES 20
141
142static int	iflag;
143static int	wordflag;
144static int	msflag;	 /* processing a source written using a mac package */
145static int	mac;		/* which package */
146static int	disp;
147static int	parag;
148static int	inmacro;
149static int	intable;
150static int	keepblock; /* keep blocks of text; normally false when msflag */
151
152static char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
153
154static char line[LINE_MAX];
155static char *lp;
156
157static int c;
158static int pc;
159static int ldelim;
160static int rdelim;
161
162static char fname[PATH_MAX];
163static FILE *files[MAXFILES];
164static FILE **filesp;
165static FILE *infile;
166
167static int argc;
168static char **argv;
169
170/*
171 *	Macro processing
172 *
173 *	Macro table definitions
174 */
175typedef	int pacmac;		/* compressed macro name */
176static int	argconcat = 0;	/* concat arguments together (-me only) */
177
178#define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
179#define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
180
181struct mactab {
182	int	condition;
183	pacmac	macname;
184	int	(*func)(pacmac);
185};
186
187static const struct	mactab	troffmactab[];
188static const struct	mactab	ppmactab[];
189static const struct	mactab	msmactab[];
190static const struct	mactab	mmmactab[];
191static const struct	mactab	memactab[];
192static const struct	mactab	manmactab[];
193
194/*
195 *	Macro table initialization
196 */
197#define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
198
199/*
200 *	Flags for matching conditions other than
201 *	the macro name
202 */
203#define	NONE		0
204#define	FNEST		1		/* no nested files */
205#define	NOMAC		2		/* no macro */
206#define	MAC		3		/* macro */
207#define	PARAG		4		/* in a paragraph */
208#define	MSF		5		/* msflag is on */
209#define	NBLK		6		/* set if no blocks to be kept */
210
211/*
212 *	Return codes from macro minions, determine where to jump,
213 *	how to repeat/reprocess text
214 */
215#define	COMX		1		/* goto comx */
216#define	COM		2		/* goto com */
217
218static int	 skeqn(void);
219static int	 eof(void);
220#ifdef DEBUG
221static int	 _C1(void);
222static int	 _C(void);
223#endif
224static int	 EQ(pacmac);
225static int	 domacro(pacmac);
226static int	 PS(pacmac);
227static int	 skip(pacmac);
228static int	 intbl(pacmac);
229static int	 outtbl(pacmac);
230static int	 so(pacmac);
231static int	 nx(pacmac);
232static int	 skiptocom(pacmac);
233static int	 PP(pacmac);
234static int	 AU(pacmac);
235static int	 SH(pacmac);
236static int	 UX(pacmac);
237static int	 MMHU(pacmac);
238static int	 mesnblock(pacmac);
239static int	 mssnblock(pacmac);
240static int	 nf(pacmac);
241static int	 ce(pacmac);
242static int	 meip(pacmac);
243static int	 mepp(pacmac);
244static int	 mesh(pacmac);
245static int	 mefont(pacmac);
246static int	 manfont(pacmac);
247static int	 manpp(pacmac);
248static int	 macsort(const void *, const void *);
249static int	 sizetab(const struct mactab *);
250static void	 getfname(void);
251static void	 textline(char *, int);
252static void	 work(void);
253static void	 regline(void (*)(char *, int), int);
254static void	 macro(void);
255static void	 tbl(void);
256static void	 stbl(void);
257static void	 eqn(void);
258static void	 backsl(void);
259static void	 sce(void);
260static void	 refer(int);
261static void	 inpic(void);
262static void	 msputmac(char *, int);
263static void	 msputwords(int);
264static void	 meputmac(char *, int);
265static void	 meputwords(int);
266static void	 noblock(char, char);
267static void	 defcomline(pacmac);
268static void	 comline(void);
269static void	 buildtab(const struct mactab **, int *);
270static FILE	*opn(char *);
271static struct mactab *macfill(struct mactab *, const struct mactab *);
272static void usage(void) __attribute__((__noreturn__));
273
274int
275main(int ac, char **av)
276{
277	int	i, ch;
278	int	errflg = 0;
279	int	kflag = NO;
280
281	iflag = NO;
282	wordflag = NO;
283	msflag = NO;
284	mac = ME;
285	disp = NO;
286	parag = NO;
287	inmacro = NO;
288	intable = NO;
289	ldelim	= NOCHAR;
290	rdelim	= NOCHAR;
291	keepblock = YES;
292
293	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
294		switch (ch) {
295		case 'i':
296			iflag = YES;
297			break;
298		case 'k':
299			kflag = YES;
300			break;
301		case 'm':
302			msflag = YES;
303			keepblock = NO;
304			switch (optarg[0]) {
305			case 'm':
306				mac = MM;
307				break;
308			case 's':
309				mac = MS;
310				break;
311			case 'e':
312				mac = ME;
313				break;
314			case 'a':
315				mac = MA;
316				break;
317			case 'l':
318				disp = YES;
319				break;
320			default:
321				errflg++;
322				break;
323			}
324			if (errflg == 0 && optarg[1] != '\0')
325				errflg++;
326			break;
327		case 'p':
328			parag = YES;
329			break;
330		case 'w':
331			wordflag = YES;
332			kflag = YES;
333			break;
334		default:
335			errflg++;
336		}
337	}
338	argc = ac - optind;
339	argv = av + optind;
340
341	if (kflag)
342		keepblock = YES;
343	if (errflg)
344		usage();
345
346#ifdef DEBUG
347	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
348		msflag, mactab[mac], keepblock, disp);
349#endif /* DEBUG */
350	if (argc == 0) {
351		infile = stdin;
352	} else {
353		infile = opn(argv[0]);
354		--argc;
355		++argv;
356	}
357	files[0] = infile;
358	filesp = &files[0];
359
360	for (i = 'a'; i <= 'z' ; ++i)
361		chars[i] = LETTER;
362	for (i = 'A'; i <= 'Z'; ++i)
363		chars[i] = LETTER;
364	for (i = '0'; i <= '9'; ++i)
365		chars[i] = DIGIT;
366	chars['\''] = APOS;
367	chars['&'] = APOS;
368	chars['.'] = PUNCT;
369	chars[','] = PUNCT;
370	chars[';'] = PUNCT;
371	chars['?'] = PUNCT;
372	chars[':'] = PUNCT;
373	work();
374	return 0;
375}
376
377static int
378skeqn(void)
379{
380
381	while ((c = getc(infile)) != rdelim) {
382		if (c == EOF)
383			c = eof();
384		else if (c == '"') {
385			while ((c = getc(infile)) != '"') {
386				if (c == EOF ||
387				    (c == '\\' && (c = getc(infile)) == EOF))
388					c = eof();
389			}
390		}
391	}
392	if (msflag)
393		return c == 'x';
394	return c == ' ';
395}
396
397static FILE *
398opn(char *p)
399{
400	FILE *fd;
401
402	if ((fd = fopen(p, "r")) == NULL)
403		err(1, "fopen %s", p);
404
405	return fd;
406}
407
408static int
409eof(void)
410{
411
412	if (infile != stdin)
413		fclose(infile);
414	if (filesp > files)
415		infile = *--filesp;
416	else if (argc > 0) {
417		infile = opn(argv[0]);
418		--argc;
419		++argv;
420	} else
421		exit(0);
422	return C;
423}
424
425static void
426getfname(void)
427{
428	char *p;
429	struct chain {
430		struct chain *nextp;
431		char *datap;
432	} *q;
433	static struct chain *namechain= NULL;
434
435	while (C == ' ')
436		;	/* nothing */
437
438	for (p = fname ; p - fname < sizeof(fname) && (*p = c) != '\n' &&
439	    c != ' ' && c != '\t' && c != '\\'; ++p)
440		C;
441	*p = '\0';
442	while (c != '\n')
443		C;
444
445	/* see if this name has already been used */
446	for (q = namechain ; q; q = q->nextp)
447		if (strcmp(fname, q->datap) == 0) {
448			fname[0] = '\0';
449			return;
450		}
451
452	q = (struct chain *) malloc(sizeof(struct chain));
453	if (q == NULL)
454		err(1, NULL);
455	q->nextp = namechain;
456	q->datap = strdup(fname);
457	if (q->datap == NULL)
458		err(1, NULL);
459	namechain = q;
460}
461
462/*ARGSUSED*/
463static void
464textline(char *str, int constant)
465{
466
467	if (wordflag) {
468		msputwords(0);
469		return;
470	}
471	puts(str);
472}
473
474void
475work(void)
476{
477
478	for (;;) {
479		C;
480#ifdef FULLDEBUG
481		printf("Starting work with `%c'\n", c);
482#endif /* FULLDEBUG */
483		if (c == '.' || c == '\'')
484			comline();
485		else
486			regline(textline, TWO);
487	}
488}
489
490static void
491regline(void (*pfunc)(char *, int), int constant)
492{
493
494	line[0] = c;
495	lp = line;
496	while (lp - line < sizeof(line)) {
497		if (c == '\\') {
498			*lp = ' ';
499			backsl();
500		}
501		if (c == '\n')
502			break;
503		if (intable && c == 'T') {
504			*++lp = C;
505			if (c == '{' || c == '}') {
506				lp[-1] = ' ';
507				*lp = C;
508			}
509		} else {
510			*++lp = C;
511		}
512	}
513	*lp = '\0';
514
515	if (line[0] != '\0')
516		(*pfunc)(line, constant);
517}
518
519static void
520macro(void)
521{
522
523	if (msflag) {
524		do {
525			SKIP;
526		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
527		if (c != '\n')
528			SKIP;
529		return;
530	}
531	SKIP;
532	inmacro = YES;
533}
534
535static void
536tbl(void)
537{
538
539	while (C != '.')
540		;	/* nothing */
541	SKIP;
542	intable = YES;
543}
544
545static void
546stbl(void)
547{
548
549	while (C != '.')
550		;	/* nothing */
551	SKIP_TO_COM;
552	if (c != 'T' || C != 'E') {
553		SKIP;
554		pc = c;
555		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
556			pc = c;
557	}
558}
559
560static void
561eqn(void)
562{
563	int c1, c2;
564	int dflg;
565	char last;
566
567	last=0;
568	dflg = 1;
569	SKIP;
570
571	for (;;) {
572		if (C1 == '.'  || c == '\'') {
573			while (C1 == ' ' || c == '\t')
574				;
575			if (c == 'E' && C1 == 'N') {
576				SKIP;
577				if (msflag && dflg) {
578					putchar('x');
579					putchar(' ');
580					if (last) {
581						putchar(last);
582						putchar('\n');
583					}
584				}
585				return;
586			}
587		} else if (c == 'd') {
588			/* look for delim */
589			if (C1 == 'e' && C1 == 'l')
590				if (C1 == 'i' && C1 == 'm') {
591					while (C1 == ' ')
592						;	/* nothing */
593
594					if ((c1 = c) == '\n' ||
595					    (c2 = C1) == '\n' ||
596					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
597						ldelim = NOCHAR;
598						rdelim = NOCHAR;
599					} else {
600						ldelim = c1;
601						rdelim = c2;
602					}
603				}
604			dflg = 0;
605		}
606
607		if (c != '\n')
608			while (C1 != '\n') {
609				if (chars[c] == PUNCT)
610					last = c;
611				else if (c != ' ')
612					last = 0;
613			}
614	}
615}
616
617/* skip over a complete backslash construction */
618static void
619backsl(void)
620{
621	int bdelim;
622
623sw:
624	switch (C) {
625	case '"':
626		SKIP;
627		return;
628
629	case 's':
630		if (C == '\\')
631			backsl();
632		else {
633			while (C >= '0' && c <= '9')
634				;	/* nothing */
635			ungetc(c, infile);
636			c = '0';
637		}
638		--lp;
639		return;
640
641	case 'f':
642	case 'n':
643	case '*':
644		if (C != '(')
645			return;
646
647	case '(':
648		if (msflag) {
649			if (C == 'e') {
650				if (C == 'm') {
651					*lp = '-';
652					return;
653				}
654			}
655			else if (c != '\n')
656				C;
657			return;
658		}
659		if (C != '\n')
660			C;
661		return;
662
663	case '$':
664		C;	/* discard argument number */
665		return;
666
667	case 'b':
668	case 'x':
669	case 'v':
670	case 'h':
671	case 'w':
672	case 'o':
673	case 'l':
674	case 'L':
675		if ((bdelim = C) == '\n')
676			return;
677		while (C != '\n' && c != bdelim)
678			if (c == '\\')
679				backsl();
680		return;
681
682	case '\\':
683		if (inmacro)
684			goto sw;
685
686	default:
687		return;
688	}
689}
690
691static void
692sce(void)
693{
694	char *ap;
695	int n, i;
696	char a[10];
697
698	for (ap = a; C != '\n'; ap++) {
699		*ap = c;
700		if (ap == &a[9]) {
701			SKIP;
702			ap = a;
703			break;
704		}
705	}
706	if (ap != a)
707		n = atoi(a);
708	else
709		n = 1;
710	for (i = 0; i < n;) {
711		if (C == '.') {
712			if (C == 'c') {
713				if (C == 'e') {
714					while (C == ' ')
715						;	/* nothing */
716					if (c == '0') {
717						SKIP;
718						break;
719					} else
720						SKIP;
721				}
722				else
723					SKIP;
724			} else if (c == 'P' || C == 'P') {
725				if (c != '\n')
726					SKIP;
727				break;
728			} else if (c != '\n')
729				SKIP;
730		} else {
731			SKIP;
732			i++;
733		}
734	}
735}
736
737static void
738refer(int c1)
739{
740	int c2;
741
742	if (c1 != '\n')
743		SKIP;
744
745	for (c2 = -1;;) {
746		if (C != '.')
747			SKIP;
748		else {
749			if (C != ']')
750				SKIP;
751			else {
752				while (C != '\n')
753					c2 = c;
754				if (c2 != -1 && chars[c2] == PUNCT)
755					putchar(c2);
756				return;
757			}
758		}
759	}
760}
761
762static void
763inpic(void)
764{
765	int c1;
766	char *p1;
767
768	SKIP;
769	p1 = line;
770	c = '\n';
771	for (;;) {
772		c1 = c;
773		if (C == '.' && c1 == '\n') {
774			if (C != 'P') {
775				if (c == '\n')
776					continue;
777				else {
778					SKIP;
779					c = '\n';
780					continue;
781				}
782			}
783			if (C != 'E') {
784				if (c == '\n')
785					continue;
786				else {
787					SKIP;
788					c = '\n';
789					continue;
790				}
791			}
792			SKIP;
793			return;
794		}
795		else if (c == '\"') {
796			while (C != '\"') {
797				if (c == '\\') {
798					if (C == '\"')
799						continue;
800					ungetc(c, infile);
801					backsl();
802				} else
803					*p1++ = c;
804			}
805			*p1++ = ' ';
806		}
807		else if (c == '\n' && p1 != line) {
808			*p1 = '\0';
809			if (wordflag)
810				msputwords(NO);
811			else {
812				puts(line);
813				putchar('\n');
814			}
815			p1 = line;
816		}
817	}
818}
819
820#ifdef DEBUG
821static int
822_C1(void)
823{
824
825	return C1get);
826}
827
828static int
829_C(void)
830{
831
832	return Cget);
833}
834#endif /* DEBUG */
835
836/*
837 *	Put out a macro line, using ms and mm conventions.
838 */
839static void
840msputmac(char *s, int constant)
841{
842	char *t;
843	int found;
844	int last;
845
846	last = 0;
847	found = 0;
848	if (wordflag) {
849		msputwords(YES);
850		return;
851	}
852	while (*s) {
853		while (*s == ' ' || *s == '\t')
854			putchar(*s++);
855		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
856			;	/* nothing */
857		if (*s == '\"')
858			s++;
859		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
860		    chars[(unsigned char)s[1]] == LETTER) {
861			while (s < t)
862				if (*s == '\"')
863					s++;
864				else
865					putchar(*s++);
866			last = *(t-1);
867			found++;
868		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
869		    s[1] == '\0') {
870			putchar(*s++);
871		} else {
872			last = *(t - 1);
873			s = t;
874		}
875	}
876	putchar('\n');
877	if (msflag && chars[last] == PUNCT) {
878		putchar(last);
879		putchar('\n');
880	}
881}
882
883/*
884 *	put out words (for the -w option) with ms and mm conventions
885 */
886static void
887msputwords(int macline)
888{
889	char *p, *p1;
890	int i, nlet;
891
892	for (p1 = line;;) {
893		/*
894		 *	skip initial specials ampersands and apostrophes
895		 */
896		while (chars[(unsigned char)*p1] < DIGIT)
897			if (*p1++ == '\0')
898				return;
899		nlet = 0;
900		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
901			if (i == LETTER)
902				++nlet;
903
904		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
905			/*
906			 *	delete trailing ampersands and apostrophes
907			 */
908			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
909			    i == APOS )
910				--p;
911			while (p1 < p)
912				putchar(*p1++);
913			putchar('\n');
914		} else {
915			p1 = p;
916		}
917	}
918}
919
920/*
921 *	put out a macro using the me conventions
922 */
923#define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
924#define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
925
926static void
927meputmac(char *cp, int constant)
928{
929	char	*np;
930	int	found;
931	int	argno;
932	int	last;
933	int	inquote;
934
935	last = 0;
936	found = 0;
937	if (wordflag) {
938		meputwords(YES);
939		return;
940	}
941	for (argno = 0; *cp; argno++) {
942		SKIPBLANK(cp);
943		inquote = (*cp == '"');
944		if (inquote)
945			cp++;
946		for (np = cp; *np; np++) {
947			switch (*np) {
948			case '\n':
949			case '\0':
950				break;
951
952			case '\t':
953			case ' ':
954				if (inquote)
955					continue;
956				else
957					goto endarg;
958
959			case '"':
960				if (inquote && np[1] == '"') {
961					memmove(np, np + 1, strlen(np));
962					np++;
963					continue;
964				} else {
965					*np = ' '; 	/* bye bye " */
966					goto endarg;
967				}
968
969			default:
970				continue;
971			}
972		}
973		endarg: ;
974		/*
975		 *	cp points at the first char in the arg
976		 *	np points one beyond the last char in the arg
977		 */
978		if ((argconcat == 0) || (argconcat != argno))
979			putchar(' ');
980#ifdef FULLDEBUG
981		{
982			char	*p;
983			printf("[%d,%d: ", argno, np - cp);
984			for (p = cp; p < np; p++) {
985				putchar(*p);
986			}
987			printf("]");
988		}
989#endif /* FULLDEBUG */
990		/*
991		 *	Determine if the argument merits being printed
992		 *
993		 *	constant is the cut off point below which something
994		 *	is not a word.
995		 */
996		if (((np - cp) > constant) &&
997		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
998			for (cp = cp; cp < np; cp++)
999				putchar(*cp);
1000			last = np[-1];
1001			found++;
1002		} else if (found && (np - cp == 1) &&
1003		    chars[(unsigned char)*cp] == PUNCT) {
1004			putchar(*cp);
1005		} else {
1006			last = np[-1];
1007		}
1008		cp = np;
1009	}
1010	if (msflag && chars[last] == PUNCT)
1011		putchar(last);
1012	putchar('\n');
1013}
1014
1015/*
1016 *	put out words (for the -w option) with ms and mm conventions
1017 */
1018static void
1019meputwords(int macline)
1020{
1021
1022	msputwords(macline);
1023}
1024
1025/*
1026 *
1027 *	Skip over a nested set of macros
1028 *
1029 *	Possible arguments to noblock are:
1030 *
1031 *	fi	end of unfilled text
1032 *	PE	pic ending
1033 *	DE	display ending
1034 *
1035 *	for ms and mm only:
1036 *		KE	keep ending
1037 *
1038 *		NE	undocumented match to NS (for mm?)
1039 *		LE	mm only: matches RL or *L (for lists)
1040 *
1041 *	for me:
1042 *		([lqbzcdf]
1043 */
1044static void
1045noblock(char a1, char a2)
1046{
1047	int c1,c2;
1048	int eqnf;
1049	int lct;
1050
1051	lct = 0;
1052	eqnf = 1;
1053	SKIP;
1054	for (;;) {
1055		while (C != '.')
1056			if (c == '\n')
1057				continue;
1058			else
1059				SKIP;
1060		if ((c1 = C) == '\n')
1061			continue;
1062		if ((c2 = C) == '\n')
1063			continue;
1064		if (c1 == a1 && c2 == a2) {
1065			SKIP;
1066			if (lct != 0) {
1067				lct--;
1068				continue;
1069			}
1070			if (eqnf)
1071				putchar('.');
1072			putchar('\n');
1073			return;
1074		} else if (a1 == 'L' && c2 == 'L') {
1075			lct++;
1076			SKIP;
1077		}
1078		/*
1079		 *	equations (EQ) nested within a display
1080		 */
1081		else if (c1 == 'E' && c2 == 'Q') {
1082			if ((mac == ME && a1 == ')')
1083			    || (mac != ME && a1 == 'D')) {
1084				eqn();
1085				eqnf=0;
1086			}
1087		}
1088		/*
1089		 *	turning on filling is done by the paragraphing
1090		 *	macros
1091		 */
1092		else if (a1 == 'f') {	/* .fi */
1093			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1094			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1095				SKIP;
1096				return;
1097			}
1098		} else {
1099			SKIP;
1100		}
1101	}
1102}
1103
1104static int
1105/*ARGSUSED*/
1106EQ(pacmac unused)
1107{
1108
1109	eqn();
1110	return 0;
1111}
1112
1113static int
1114/*ARGSUSED*/
1115domacro(pacmac unused)
1116{
1117
1118	macro();
1119	return 0;
1120}
1121
1122static int
1123/*ARGSUSED*/
1124PS(pacmac unused)
1125{
1126
1127	for (C; c == ' ' || c == '\t'; C)
1128		;	/* nothing */
1129
1130	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1131		SKIP;
1132		return 0;
1133	}
1134	if (!msflag)
1135		inpic();
1136	else
1137		noblock('P', 'E');
1138	return 0;
1139}
1140
1141static int
1142/*ARGSUSED*/
1143skip(pacmac unused)
1144{
1145
1146	SKIP;
1147	return 0;
1148}
1149
1150static int
1151/*ARGSUSED*/
1152intbl(pacmac unused)
1153{
1154
1155	if (msflag)
1156		stbl();
1157	else
1158		tbl();
1159	return 0;
1160}
1161
1162static int
1163/*ARGSUSED*/
1164outtbl(pacmac unused)
1165{
1166
1167	intable = NO;
1168	return 0;
1169}
1170
1171int
1172/*ARGSUSED*/
1173so(pacmac unused)
1174{
1175
1176	if (!iflag) {
1177		getfname();
1178		if (fname[0]) {
1179			if (++filesp - &files[0] > MAXFILES)
1180				err(1, "too many nested files (max %d)",
1181				    MAXFILES);
1182			infile = *filesp = opn(fname);
1183		}
1184	}
1185	return 0;
1186}
1187
1188static int
1189/*ARGSUSED*/
1190nx(pacmac unused)
1191{
1192
1193	if (!iflag) {
1194		getfname();
1195		if (fname[0] == '\0')
1196			exit(0);
1197		if (infile != stdin)
1198			fclose(infile);
1199		infile = *filesp = opn(fname);
1200	}
1201	return 0;
1202}
1203
1204static int
1205/*ARGSUSED*/
1206skiptocom(pacmac unused)
1207{
1208
1209	SKIP_TO_COM;
1210	return COMX;
1211}
1212
1213static int
1214PP(pacmac c12)
1215{
1216	int c1, c2;
1217
1218	frommac(c12, c1, c2);
1219	printf(".%c%c", c1, c2);
1220	while (C != '\n')
1221		putchar(c);
1222	putchar('\n');
1223	return 0;
1224}
1225
1226static int
1227/*ARGSUSED*/
1228AU(pacmac unused)
1229{
1230
1231	if (mac == MM)
1232		return 0;
1233	SKIP_TO_COM;
1234	return COMX;
1235}
1236
1237static int
1238SH(pacmac c12)
1239{
1240	int c1, c2;
1241
1242	frommac(c12, c1, c2);
1243
1244	if (parag) {
1245		printf(".%c%c", c1, c2);
1246		while (C != '\n')
1247			putchar(c);
1248		putchar(c);
1249		putchar('!');
1250		for (;;) {
1251			while (C != '\n')
1252				putchar(c);
1253			putchar('\n');
1254			if (C == '.')
1255				return COM;
1256			putchar('!');
1257			putchar(c);
1258		}
1259		/*NOTREACHED*/
1260	} else {
1261		SKIP_TO_COM;
1262		return COMX;
1263	}
1264}
1265
1266static int
1267/*ARGSUSED*/
1268UX(pacmac unused)
1269{
1270
1271	if (wordflag)
1272		printf("UNIX\n");
1273	else
1274		printf("UNIX ");
1275	return 0;
1276}
1277
1278static int
1279MMHU(pacmac c12)
1280{
1281	int c1, c2;
1282
1283	frommac(c12, c1, c2);
1284	if (parag) {
1285		printf(".%c%c", c1, c2);
1286		while (C != '\n')
1287			putchar(c);
1288		putchar('\n');
1289	} else {
1290		SKIP;
1291	}
1292	return 0;
1293}
1294
1295static int
1296mesnblock(pacmac c12)
1297{
1298	int c1, c2;
1299
1300	frommac(c12, c1, c2);
1301	noblock(')', c2);
1302	return 0;
1303}
1304
1305static int
1306mssnblock(pacmac c12)
1307{
1308	int c1, c2;
1309
1310	frommac(c12, c1, c2);
1311	noblock(c1, 'E');
1312	return 0;
1313}
1314
1315static int
1316/*ARGUSED*/
1317nf(pacmac unused)
1318{
1319
1320	noblock('f', 'i');
1321	return 0;
1322}
1323
1324static int
1325/*ARGUSED*/
1326ce(pacmac unused)
1327{
1328
1329	sce();
1330	return 0;
1331}
1332
1333static int
1334meip(pacmac c12)
1335{
1336
1337	if (parag)
1338		mepp(c12);
1339	else if (wordflag)	/* save the tag */
1340		regline(meputmac, ONE);
1341	else
1342		SKIP;
1343	return 0;
1344}
1345
1346/*
1347 *	only called for -me .pp or .sh, when parag is on
1348 */
1349static int
1350mepp(pacmac c12)
1351{
1352
1353	PP(c12);		/* eats the line */
1354	return 0;
1355}
1356
1357/*
1358 *	Start of a section heading; output the section name if doing words
1359 */
1360static int
1361mesh(pacmac c12)
1362{
1363
1364	if (parag)
1365		mepp(c12);
1366	else if (wordflag)
1367		defcomline(c12);
1368	else
1369		SKIP;
1370	return 0;
1371}
1372
1373/*
1374 *	process a font setting
1375 */
1376static int
1377mefont(pacmac c12)
1378{
1379
1380	argconcat = 1;
1381	defcomline(c12);
1382	argconcat = 0;
1383	return 0;
1384}
1385
1386static int
1387manfont(pacmac c12)
1388{
1389
1390	return mefont(c12);
1391}
1392
1393static int
1394manpp(pacmac c12)
1395{
1396
1397	return mepp(c12);
1398}
1399
1400static void
1401defcomline(pacmac c12)
1402{
1403	int c1, c2;
1404
1405	frommac(c12, c1, c2);
1406	if (msflag && mac == MM && c2 == 'L') {
1407		if (disp || c1 == 'R') {
1408			noblock('L', 'E');
1409		} else {
1410			SKIP;
1411			putchar('.');
1412		}
1413	}
1414	else if (c1 == '.' && c2 == '.') {
1415		if (msflag) {
1416			SKIP;
1417			return;
1418		}
1419		while (C == '.')
1420			/*VOID*/;
1421	}
1422	++inmacro;
1423	/*
1424	 *	Process the arguments to the macro
1425	 */
1426	switch (mac) {
1427	default:
1428	case MM:
1429	case MS:
1430		if (c1 <= 'Z' && msflag)
1431			regline(msputmac, ONE);
1432		else
1433			regline(msputmac, TWO);
1434		break;
1435	case ME:
1436		regline(meputmac, ONE);
1437		break;
1438	}
1439	--inmacro;
1440}
1441
1442static void
1443comline(void)
1444{
1445	int	c1;
1446	int	c2;
1447	pacmac	c12;
1448	int	mid;
1449	int	lb, ub;
1450	int	hit;
1451	static	int	tabsize = 0;
1452	static	const struct mactab	*mactab = NULL;
1453	const struct mactab	*mp;
1454
1455	if (mactab == 0)
1456		 buildtab(&mactab, &tabsize);
1457com:
1458	while (C == ' ' || c == '\t')
1459		;
1460comx:
1461	if ((c1 = c) == '\n')
1462		return;
1463	c2 = C;
1464	if (c1 == '.' && c2 != '.')
1465		inmacro = NO;
1466	if (msflag && c1 == '[') {
1467		refer(c2);
1468		return;
1469	}
1470	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1471		printf(".P\n");
1472		return;
1473	}
1474	if (c2 == '\n')
1475		return;
1476	/*
1477	 *	Single letter macro
1478	 */
1479	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1480		c2 = ' ';
1481	c12 = tomac(c1, c2);
1482	/*
1483	 *	binary search through the table of macros
1484	 */
1485	lb = 0;
1486	ub = tabsize - 1;
1487	while (lb <= ub) {
1488		mid = (ub + lb) / 2;
1489		mp = &mactab[mid];
1490		if (mp->macname < c12)
1491			lb = mid + 1;
1492		else if (mp->macname > c12)
1493			ub = mid - 1;
1494		else {
1495			hit = 1;
1496#ifdef FULLDEBUG
1497			printf("preliminary hit macro %c%c ", c1, c2);
1498#endif /* FULLDEBUG */
1499			switch (mp->condition) {
1500			case NONE:
1501				hit = YES;
1502				break;
1503			case FNEST:
1504				hit = (filesp == files);
1505				break;
1506			case NOMAC:
1507				hit = !inmacro;
1508				break;
1509			case MAC:
1510				hit = inmacro;
1511				break;
1512			case PARAG:
1513				hit = parag;
1514				break;
1515			case NBLK:
1516				hit = !keepblock;
1517				break;
1518			default:
1519				hit = 0;
1520			}
1521
1522			if (hit) {
1523#ifdef FULLDEBUG
1524				printf("MATCH\n");
1525#endif /* FULLDEBUG */
1526				switch ((*(mp->func))(c12)) {
1527				default:
1528					return;
1529				case COMX:
1530					goto comx;
1531				case COM:
1532					goto com;
1533				}
1534			}
1535#ifdef FULLDEBUG
1536			printf("FAIL\n");
1537#endif /* FULLDEBUG */
1538			break;
1539		}
1540	}
1541	defcomline(c12);
1542}
1543
1544static int
1545macsort(const void *p1, const void *p2)
1546{
1547	const struct mactab *t1 = p1;
1548	const struct mactab *t2 = p2;
1549
1550	return t1->macname - t2->macname;
1551}
1552
1553static int
1554sizetab(const struct mactab *mp)
1555{
1556	int i;
1557
1558	i = 0;
1559	if (mp) {
1560		for (; mp->macname; mp++, i++)
1561			/*VOID*/ ;
1562	}
1563	return i;
1564}
1565
1566static struct mactab *
1567macfill(struct mactab *dst, const struct mactab *src)
1568{
1569
1570	if (src) {
1571		while (src->macname)
1572			*dst++ = *src++;
1573	}
1574	return dst;
1575}
1576
1577static void
1578usage(void)
1579{
1580	extern char *__progname;
1581
1582	fprintf(stderr, "usage: %s [-ikpw ] [ -m ( a | e | m | s | l ) ] [ filename ] ... \n", __progname);
1583	exit(1);
1584}
1585
1586static void
1587buildtab(const struct mactab **r_back, int *r_size)
1588{
1589	size_t	size;
1590	const struct	mactab	*p1, *p2;
1591	struct	mactab	*back, *p;
1592
1593	size = sizetab(troffmactab) + sizetab(ppmactab);
1594	p1 = p2 = NULL;
1595	if (msflag) {
1596		switch (mac) {
1597		case ME:
1598			p1 = memactab;
1599			break;
1600		case MM:
1601			p1 = msmactab;
1602			p2 = mmmactab;
1603			break;
1604		case MS:
1605			p1 = msmactab;
1606			break;
1607		case MA:
1608			p1 = manmactab;
1609			break;
1610		default:
1611			break;
1612		}
1613	}
1614	size += sizetab(p1);
1615	size += sizetab(p2);
1616	back = calloc(size + 2, sizeof(struct mactab));
1617	if (back == NULL)
1618		err(1, NULL);
1619
1620	p = macfill(back, troffmactab);
1621	p = macfill(p, ppmactab);
1622	p = macfill(p, p1);
1623	p = macfill(p, p2);
1624
1625	qsort(back, size, sizeof(struct mactab), macsort);
1626	*r_size = size;
1627	*r_back = back;
1628}
1629
1630/*
1631 *	troff commands
1632 */
1633static const struct mactab	troffmactab[] = {
1634	M(NONE,		'\\','"',	skip),	/* comment */
1635	M(NOMAC,	'd','e',	domacro),	/* define */
1636	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1637	M(NOMAC,	'a','m',	domacro),	/* append macro */
1638	M(NBLK,		'n','f',	nf),	/* filled */
1639	M(NBLK,		'c','e',	ce),	/* centered */
1640
1641	M(NONE,		's','o',	so),	/* source a file */
1642	M(NONE,		'n','x',	nx),	/* go to next file */
1643
1644	M(NONE,		't','m',	skip),	/* print string on tty */
1645	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1646	M(NONE,		0,0,		0)
1647};
1648
1649/*
1650 *	Preprocessor output
1651 */
1652static const struct mactab	ppmactab[] = {
1653	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1654	M(FNEST,	'T','S',	intbl),	/* table starting */
1655	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1656	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1657	M(NONE,		'T','E',	outtbl),/* table ending */
1658	M(NONE,		'P','S',	PS),	/* picture starting */
1659	M(NONE,		0,0,		0)
1660};
1661
1662/*
1663 *	Particular to ms and mm
1664 */
1665static const struct mactab	msmactab[] = {
1666	M(NONE,		'T','L',	skiptocom),	/* title follows */
1667	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1668	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1669
1670	M(NONE,		'N','R',	skip),	/* undocumented */
1671	M(NONE,		'N','D',	skip),	/* use supplied date */
1672
1673	M(PARAG,	'P','P',	PP),	/* begin parag */
1674	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1675	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1676
1677	M(NONE,		'A','U',	AU),	/* author */
1678	M(NONE,		'A','I',	AU),	/* authors institution */
1679
1680	M(NONE,		'S','H',	SH),	/* section heading */
1681	M(NONE,		'S','N',	SH),	/* undocumented */
1682	M(NONE,		'U','X',	UX),	/* unix */
1683
1684	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1685	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1686	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1687	M(NONE,		0,0,		0)
1688};
1689
1690static const struct mactab	mmmactab[] = {
1691	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1692	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1693	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1694	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1695	M(NONE,		0,0,		0)
1696};
1697
1698static const struct mactab	memactab[] = {
1699	M(PARAG,	'p','p',	mepp),
1700	M(PARAG,	'l','p',	mepp),
1701	M(PARAG,	'n','p',	mepp),
1702	M(NONE,		'i','p',	meip),
1703
1704	M(NONE,		's','h',	mesh),
1705	M(NONE,		'u','h',	mesh),
1706
1707	M(NBLK,		'(','l',	mesnblock),
1708	M(NBLK,		'(','q',	mesnblock),
1709	M(NBLK,		'(','b',	mesnblock),
1710	M(NBLK,		'(','z',	mesnblock),
1711	M(NBLK,		'(','c',	mesnblock),
1712
1713	M(NBLK,		'(','d',	mesnblock),
1714	M(NBLK,		'(','f',	mesnblock),
1715	M(NBLK,		'(','x',	mesnblock),
1716
1717	M(NONE,		'r',' ',	mefont),
1718	M(NONE,		'i',' ',	mefont),
1719	M(NONE,		'b',' ',	mefont),
1720	M(NONE,		'u',' ',	mefont),
1721	M(NONE,		'q',' ',	mefont),
1722	M(NONE,		'r','b',	mefont),
1723	M(NONE,		'b','i',	mefont),
1724	M(NONE,		'b','x',	mefont),
1725	M(NONE,		0,0,		0)
1726};
1727
1728static const struct mactab	manmactab[] = {
1729	M(PARAG,	'B','I',	manfont),
1730	M(PARAG,	'B','R',	manfont),
1731	M(PARAG,	'I','B',	manfont),
1732	M(PARAG,	'I','R',	manfont),
1733	M(PARAG,	'R','B',	manfont),
1734	M(PARAG,	'R','I',	manfont),
1735
1736	M(PARAG,	'P','P',	manpp),
1737	M(PARAG,	'L','P',	manpp),
1738	M(PARAG,	'H','P',	manpp),
1739	M(NONE,		0,0,		0)
1740};
1741