deroff.c revision 1.8
1/*	$NetBSD: deroff.c,v 1.8 2011/05/24 12:19:11 joerg Exp $	*/
2
3/* taken from: OpenBSD: deroff.c,v 1.6 2004/06/02 14:58:46 tom Exp */
4
5/*-
6 * Copyright (c) 1988, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33/*
34 * Copyright (C) Caldera International Inc.  2001-2002.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code and documentation must retain the above
41 *    copyright notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 *    must display the following acknowledgement:
47 *	This product includes software developed or owned by Caldera
48 *	International, Inc.
49 * 4. Neither the name of Caldera International, Inc. nor the names of other
50 *    contributors may be used to endorse or promote products derived from
51 *    this software without specific prior written permission.
52 *
53 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
54 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
58 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
62 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
63 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
64 * POSSIBILITY OF SUCH DAMAGE.
65 */
66
67#ifndef lint
68static const char copyright[] =
69"@(#) Copyright (c) 1988, 1993\n\
70	The Regents of the University of California.  All rights reserved.\n";
71#endif /* not lint */
72
73#ifndef lint
74#if 0
75static const char sccsid[] = "@(#)deroff.c	8.1 (Berkeley) 6/6/93";
76#else
77static const char rcsid[] = "$NetBSD: deroff.c,v 1.8 2011/05/24 12:19:11 joerg Exp $";
78#endif
79#endif /* not lint */
80
81#include <sys/cdefs.h>
82#include <err.h>
83#include <limits.h>
84#include <stddef.h>
85#include <stdio.h>
86#include <stdlib.h>
87#include <string.h>
88#include <unistd.h>
89
90/*
91 *	Deroff command -- strip troff, eqn, and Tbl sequences from
92 *	a file.  Has two flags argument, -w, to cause output one word per line
93 *	rather than in the original format.
94 *	-mm (or -ms) causes the corresponding macro's to be interpreted
95 *	so that just sentences are output
96 *	-ml  also gets rid of lists.
97 *	Deroff follows .so and .nx commands, removes contents of macro
98 *	definitions, equations (both .EQ ... .EN and $...$),
99 *	Tbl command sequences, and Troff backslash constructions.
100 *
101 *	All input is through the Cget macro;
102 *	the most recently read character is in c.
103 *
104 *	Modified by Robert Henry to process -me and -man macros.
105 */
106
107#define Cget ( (c=getc(infile)) == EOF ? eof() : ((c==ldelim)&&(filesp==files) ? skeqn() : c) )
108#define C1get ( (c=getc(infile)) == EOF ? eof() :  c)
109
110#ifdef DEBUG
111#  define C	_C()
112#  define C1	_C1()
113#else /* not DEBUG */
114#  define C	Cget
115#  define C1	C1get
116#endif /* not DEBUG */
117
118#define SKIP while (C != '\n')
119#define SKIP_TO_COM SKIP; SKIP; pc=c; while (C != '.' || pc != '\n' || C > 'Z')pc=c
120
121#define	YES 1
122#define	NO 0
123#define	MS 0	/* -ms */
124#define	MM 1	/* -mm */
125#define	ME 2	/* -me */
126#define	MA 3	/* -man */
127
128#ifdef DEBUG
129char *mactab[] = { "-ms", "-mm", "-me", "-ma" };
130#endif /* DEBUG */
131
132#define	ONE 1
133#define	TWO 2
134
135#define NOCHAR -2
136#define SPECIAL 0
137#define APOS 1
138#define PUNCT 2
139#define DIGIT 3
140#define LETTER 4
141
142#define MAXFILES 20
143
144static int	iflag;
145static int	wordflag;
146static int	msflag;	 /* processing a source written using a mac package */
147static int	mac;		/* which package */
148static int	disp;
149static int	parag;
150static int	inmacro;
151static int	intable;
152static int	keepblock; /* keep blocks of text; normally false when msflag */
153
154static char chars[128];  /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
155
156static char line[LINE_MAX];
157static char *lp;
158
159static int c;
160static int pc;
161static int ldelim;
162static int rdelim;
163
164static char fname[PATH_MAX];
165static FILE *files[MAXFILES];
166static FILE **filesp;
167static FILE *infile;
168
169static int argc;
170static char **argv;
171
172/*
173 *	Macro processing
174 *
175 *	Macro table definitions
176 */
177typedef	int pacmac;		/* compressed macro name */
178static int	argconcat = 0;	/* concat arguments together (-me only) */
179
180#define	tomac(c1, c2)		((((c1) & 0xFF) << 8) | ((c2) & 0xFF))
181#define	frommac(src, c1, c2)	(((c1)=((src)>>8)&0xFF),((c2) =(src)&0xFF))
182
183struct mactab {
184	int	condition;
185	pacmac	macname;
186	int	(*func)(pacmac);
187};
188
189static const struct	mactab	troffmactab[];
190static const struct	mactab	ppmactab[];
191static const struct	mactab	msmactab[];
192static const struct	mactab	mmmactab[];
193static const struct	mactab	memactab[];
194static const struct	mactab	manmactab[];
195
196/*
197 *	Macro table initialization
198 */
199#define	M(cond, c1, c2, func) {cond, tomac(c1, c2), func}
200
201/*
202 *	Flags for matching conditions other than
203 *	the macro name
204 */
205#define	NONE		0
206#define	FNEST		1		/* no nested files */
207#define	NOMAC		2		/* no macro */
208#define	MAC		3		/* macro */
209#define	PARAG		4		/* in a paragraph */
210#define	MSF		5		/* msflag is on */
211#define	NBLK		6		/* set if no blocks to be kept */
212
213/*
214 *	Return codes from macro minions, determine where to jump,
215 *	how to repeat/reprocess text
216 */
217#define	COMX		1		/* goto comx */
218#define	COM		2		/* goto com */
219
220static int	 skeqn(void);
221static int	 eof(void);
222#ifdef DEBUG
223static int	 _C1(void);
224static int	 _C(void);
225#endif
226static int	 EQ(pacmac);
227static int	 domacro(pacmac);
228static int	 PS(pacmac);
229static int	 skip(pacmac);
230static int	 intbl(pacmac);
231static int	 outtbl(pacmac);
232static int	 so(pacmac);
233static int	 nx(pacmac);
234static int	 skiptocom(pacmac);
235static int	 PP(pacmac);
236static int	 AU(pacmac);
237static int	 SH(pacmac);
238static int	 UX(pacmac);
239static int	 MMHU(pacmac);
240static int	 mesnblock(pacmac);
241static int	 mssnblock(pacmac);
242static int	 nf(pacmac);
243static int	 ce(pacmac);
244static int	 meip(pacmac);
245static int	 mepp(pacmac);
246static int	 mesh(pacmac);
247static int	 mefont(pacmac);
248static int	 manfont(pacmac);
249static int	 manpp(pacmac);
250static int	 macsort(const void *, const void *);
251static int	 sizetab(const struct mactab *);
252static void	 getfname(void);
253static void	 textline(char *, int);
254static void	 work(void);
255static void	 regline(void (*)(char *, int), int);
256static void	 macro(void);
257static void	 tbl(void);
258static void	 stbl(void);
259static void	 eqn(void);
260static void	 backsl(void);
261static void	 sce(void);
262static void	 refer(int);
263static void	 inpic(void);
264static void	 msputmac(char *, int);
265static void	 msputwords(int);
266static void	 meputmac(char *, int);
267static void	 meputwords(int);
268static void	 noblock(char, char);
269static void	 defcomline(pacmac);
270static void	 comline(void);
271static void	 buildtab(const struct mactab **, int *);
272static FILE	*opn(char *);
273static struct mactab *macfill(struct mactab *, const struct mactab *);
274static void usage(void) __dead;
275
276int
277main(int ac, char **av)
278{
279	int	i, ch;
280	int	errflg = 0;
281	int	kflag = NO;
282
283	iflag = NO;
284	wordflag = NO;
285	msflag = NO;
286	mac = ME;
287	disp = NO;
288	parag = NO;
289	inmacro = NO;
290	intable = NO;
291	ldelim	= NOCHAR;
292	rdelim	= NOCHAR;
293	keepblock = YES;
294
295	while ((ch = getopt(ac, av, "ikpwm:")) != -1) {
296		switch (ch) {
297		case 'i':
298			iflag = YES;
299			break;
300		case 'k':
301			kflag = YES;
302			break;
303		case 'm':
304			msflag = YES;
305			keepblock = NO;
306			switch (optarg[0]) {
307			case 'm':
308				mac = MM;
309				break;
310			case 's':
311				mac = MS;
312				break;
313			case 'e':
314				mac = ME;
315				break;
316			case 'a':
317				mac = MA;
318				break;
319			case 'l':
320				disp = YES;
321				break;
322			default:
323				errflg++;
324				break;
325			}
326			if (errflg == 0 && optarg[1] != '\0')
327				errflg++;
328			break;
329		case 'p':
330			parag = YES;
331			break;
332		case 'w':
333			wordflag = YES;
334			kflag = YES;
335			break;
336		default:
337			errflg++;
338		}
339	}
340	argc = ac - optind;
341	argv = av + optind;
342
343	if (kflag)
344		keepblock = YES;
345	if (errflg)
346		usage();
347
348#ifdef DEBUG
349	printf("msflag = %d, mac = %s, keepblock = %d, disp = %d\n",
350		msflag, mactab[mac], keepblock, disp);
351#endif /* DEBUG */
352	if (argc == 0) {
353		infile = stdin;
354	} else {
355		infile = opn(argv[0]);
356		--argc;
357		++argv;
358	}
359	files[0] = infile;
360	filesp = &files[0];
361
362	for (i = 'a'; i <= 'z' ; ++i)
363		chars[i] = LETTER;
364	for (i = 'A'; i <= 'Z'; ++i)
365		chars[i] = LETTER;
366	for (i = '0'; i <= '9'; ++i)
367		chars[i] = DIGIT;
368	chars['\''] = APOS;
369	chars['&'] = APOS;
370	chars['.'] = PUNCT;
371	chars[','] = PUNCT;
372	chars[';'] = PUNCT;
373	chars['?'] = PUNCT;
374	chars[':'] = PUNCT;
375	work();
376	return 0;
377}
378
379static int
380skeqn(void)
381{
382
383	while ((c = getc(infile)) != rdelim) {
384		if (c == EOF)
385			c = eof();
386		else if (c == '"') {
387			while ((c = getc(infile)) != '"') {
388				if (c == EOF ||
389				    (c == '\\' && (c = getc(infile)) == EOF))
390					c = eof();
391			}
392		}
393	}
394	if (msflag)
395		return c == 'x';
396	return c == ' ';
397}
398
399static FILE *
400opn(char *p)
401{
402	FILE *fd;
403
404	if ((fd = fopen(p, "r")) == NULL)
405		err(1, "fopen %s", p);
406
407	return fd;
408}
409
410static int
411eof(void)
412{
413
414	if (infile != stdin)
415		fclose(infile);
416	if (filesp > files)
417		infile = *--filesp;
418	else if (argc > 0) {
419		infile = opn(argv[0]);
420		--argc;
421		++argv;
422	} else
423		exit(0);
424	return C;
425}
426
427static void
428getfname(void)
429{
430	char *p;
431	struct chain {
432		struct chain *nextp;
433		char *datap;
434	} *q;
435	static struct chain *namechain= NULL;
436
437	while (C == ' ')
438		;	/* nothing */
439
440	for (p = fname ; p - fname < (ptrdiff_t)sizeof(fname) &&
441	    (*p = c) != '\n' &&
442	    c != ' ' && c != '\t' && c != '\\'; ++p)
443		C;
444	*p = '\0';
445	while (c != '\n')
446		C;
447
448	/* see if this name has already been used */
449	for (q = namechain ; q; q = q->nextp)
450		if (strcmp(fname, q->datap) == 0) {
451			fname[0] = '\0';
452			return;
453		}
454
455	q = (struct chain *) malloc(sizeof(struct chain));
456	if (q == NULL)
457		err(1, NULL);
458	q->nextp = namechain;
459	q->datap = strdup(fname);
460	if (q->datap == NULL)
461		err(1, NULL);
462	namechain = q;
463}
464
465/*ARGSUSED*/
466static void
467textline(char *str, int constant)
468{
469
470	if (wordflag) {
471		msputwords(0);
472		return;
473	}
474	puts(str);
475}
476
477void
478work(void)
479{
480
481	for (;;) {
482		C;
483#ifdef FULLDEBUG
484		printf("Starting work with `%c'\n", c);
485#endif /* FULLDEBUG */
486		if (c == '.' || c == '\'')
487			comline();
488		else
489			regline(textline, TWO);
490	}
491}
492
493static void
494regline(void (*pfunc)(char *, int), int constant)
495{
496
497	line[0] = c;
498	lp = line;
499	while (lp - line < (ptrdiff_t)sizeof(line)) {
500		if (c == '\\') {
501			*lp = ' ';
502			backsl();
503		}
504		if (c == '\n')
505			break;
506		if (intable && c == 'T') {
507			*++lp = C;
508			if (c == '{' || c == '}') {
509				lp[-1] = ' ';
510				*lp = C;
511			}
512		} else {
513			*++lp = C;
514		}
515	}
516	*lp = '\0';
517
518	if (line[0] != '\0')
519		(*pfunc)(line, constant);
520}
521
522static void
523macro(void)
524{
525
526	if (msflag) {
527		do {
528			SKIP;
529		} while (C!='.' || C!='.' || C=='.');	/* look for  .. */
530		if (c != '\n')
531			SKIP;
532		return;
533	}
534	SKIP;
535	inmacro = YES;
536}
537
538static void
539tbl(void)
540{
541
542	while (C != '.')
543		;	/* nothing */
544	SKIP;
545	intable = YES;
546}
547
548static void
549stbl(void)
550{
551
552	while (C != '.')
553		;	/* nothing */
554	SKIP_TO_COM;
555	if (c != 'T' || C != 'E') {
556		SKIP;
557		pc = c;
558		while (C != '.' || pc != '\n' || C != 'T' || C != 'E')
559			pc = c;
560	}
561}
562
563static void
564eqn(void)
565{
566	int c1, c2;
567	int dflg;
568	char last;
569
570	last=0;
571	dflg = 1;
572	SKIP;
573
574	for (;;) {
575		if (C1 == '.'  || c == '\'') {
576			while (C1 == ' ' || c == '\t')
577				;
578			if (c == 'E' && C1 == 'N') {
579				SKIP;
580				if (msflag && dflg) {
581					putchar('x');
582					putchar(' ');
583					if (last) {
584						putchar(last);
585						putchar('\n');
586					}
587				}
588				return;
589			}
590		} else if (c == 'd') {
591			/* look for delim */
592			if (C1 == 'e' && C1 == 'l')
593				if (C1 == 'i' && C1 == 'm') {
594					while (C1 == ' ')
595						;	/* nothing */
596
597					if ((c1 = c) == '\n' ||
598					    (c2 = C1) == '\n' ||
599					    (c1 == 'o' && c2 == 'f' && C1=='f')) {
600						ldelim = NOCHAR;
601						rdelim = NOCHAR;
602					} else {
603						ldelim = c1;
604						rdelim = c2;
605					}
606				}
607			dflg = 0;
608		}
609
610		if (c != '\n')
611			while (C1 != '\n') {
612				if (chars[c] == PUNCT)
613					last = c;
614				else if (c != ' ')
615					last = 0;
616			}
617	}
618}
619
620/* skip over a complete backslash construction */
621static void
622backsl(void)
623{
624	int bdelim;
625
626sw:
627	switch (C) {
628	case '"':
629		SKIP;
630		return;
631
632	case 's':
633		if (C == '\\')
634			backsl();
635		else {
636			while (C >= '0' && c <= '9')
637				;	/* nothing */
638			ungetc(c, infile);
639			c = '0';
640		}
641		--lp;
642		return;
643
644	case 'f':
645	case 'n':
646	case '*':
647		if (C != '(')
648			return;
649
650	case '(':
651		if (msflag) {
652			if (C == 'e') {
653				if (C == 'm') {
654					*lp = '-';
655					return;
656				}
657			}
658			else if (c != '\n')
659				C;
660			return;
661		}
662		if (C != '\n')
663			C;
664		return;
665
666	case '$':
667		C;	/* discard argument number */
668		return;
669
670	case 'b':
671	case 'x':
672	case 'v':
673	case 'h':
674	case 'w':
675	case 'o':
676	case 'l':
677	case 'L':
678		if ((bdelim = C) == '\n')
679			return;
680		while (C != '\n' && c != bdelim)
681			if (c == '\\')
682				backsl();
683		return;
684
685	case '\\':
686		if (inmacro)
687			goto sw;
688
689	default:
690		return;
691	}
692}
693
694static void
695sce(void)
696{
697	char *ap;
698	int n, i;
699	char a[10];
700
701	for (ap = a; C != '\n'; ap++) {
702		*ap = c;
703		if (ap == &a[9]) {
704			SKIP;
705			ap = a;
706			break;
707		}
708	}
709	if (ap != a)
710		n = atoi(a);
711	else
712		n = 1;
713	for (i = 0; i < n;) {
714		if (C == '.') {
715			if (C == 'c') {
716				if (C == 'e') {
717					while (C == ' ')
718						;	/* nothing */
719					if (c == '0') {
720						SKIP;
721						break;
722					} else
723						SKIP;
724				}
725				else
726					SKIP;
727			} else if (c == 'P' || C == 'P') {
728				if (c != '\n')
729					SKIP;
730				break;
731			} else if (c != '\n')
732				SKIP;
733		} else {
734			SKIP;
735			i++;
736		}
737	}
738}
739
740static void
741refer(int c1)
742{
743	int c2;
744
745	if (c1 != '\n')
746		SKIP;
747
748	for (c2 = -1;;) {
749		if (C != '.')
750			SKIP;
751		else {
752			if (C != ']')
753				SKIP;
754			else {
755				while (C != '\n')
756					c2 = c;
757				if (c2 != -1 && chars[c2] == PUNCT)
758					putchar(c2);
759				return;
760			}
761		}
762	}
763}
764
765static void
766inpic(void)
767{
768	int c1;
769	char *p1;
770
771	SKIP;
772	p1 = line;
773	c = '\n';
774	for (;;) {
775		c1 = c;
776		if (C == '.' && c1 == '\n') {
777			if (C != 'P') {
778				if (c == '\n')
779					continue;
780				else {
781					SKIP;
782					c = '\n';
783					continue;
784				}
785			}
786			if (C != 'E') {
787				if (c == '\n')
788					continue;
789				else {
790					SKIP;
791					c = '\n';
792					continue;
793				}
794			}
795			SKIP;
796			return;
797		}
798		else if (c == '\"') {
799			while (C != '\"') {
800				if (c == '\\') {
801					if (C == '\"')
802						continue;
803					ungetc(c, infile);
804					backsl();
805				} else
806					*p1++ = c;
807			}
808			*p1++ = ' ';
809		}
810		else if (c == '\n' && p1 != line) {
811			*p1 = '\0';
812			if (wordflag)
813				msputwords(NO);
814			else {
815				puts(line);
816				putchar('\n');
817			}
818			p1 = line;
819		}
820	}
821}
822
823#ifdef DEBUG
824static int
825_C1(void)
826{
827
828	return C1get;
829}
830
831static int
832_C(void)
833{
834
835	return Cget;
836}
837#endif /* DEBUG */
838
839/*
840 *	Put out a macro line, using ms and mm conventions.
841 */
842static void
843msputmac(char *s, int constant)
844{
845	char *t;
846	int found;
847	int last;
848
849	last = 0;
850	found = 0;
851	if (wordflag) {
852		msputwords(YES);
853		return;
854	}
855	while (*s) {
856		while (*s == ' ' || *s == '\t')
857			putchar(*s++);
858		for (t = s ; *t != ' ' && *t != '\t' && *t != '\0' ; ++t)
859			;	/* nothing */
860		if (*s == '\"')
861			s++;
862		if (t > s + constant && chars[(unsigned char)s[0]] == LETTER &&
863		    chars[(unsigned char)s[1]] == LETTER) {
864			while (s < t)
865				if (*s == '\"')
866					s++;
867				else
868					putchar(*s++);
869			last = *(t-1);
870			found++;
871		} else if (found && chars[(unsigned char)s[0]] == PUNCT &&
872		    s[1] == '\0') {
873			putchar(*s++);
874		} else {
875			last = *(t - 1);
876			s = t;
877		}
878	}
879	putchar('\n');
880	if (msflag && chars[last] == PUNCT) {
881		putchar(last);
882		putchar('\n');
883	}
884}
885
886/*
887 *	put out words (for the -w option) with ms and mm conventions
888 */
889static void
890msputwords(int macline)
891{
892	char *p, *p1;
893	int i, nlet;
894
895	for (p1 = line;;) {
896		/*
897		 *	skip initial specials ampersands and apostrophes
898		 */
899		while (chars[(unsigned char)*p1] < DIGIT)
900			if (*p1++ == '\0')
901				return;
902		nlet = 0;
903		for (p = p1 ; (i = chars[(unsigned char)*p]) != SPECIAL ; ++p)
904			if (i == LETTER)
905				++nlet;
906
907		if (nlet > 1 && chars[(unsigned char)p1[0]] == LETTER) {
908			/*
909			 *	delete trailing ampersands and apostrophes
910			 */
911			while ((i = chars[(unsigned char)p[-1]]) == PUNCT ||
912			    i == APOS )
913				--p;
914			while (p1 < p)
915				putchar(*p1++);
916			putchar('\n');
917		} else {
918			p1 = p;
919		}
920	}
921}
922
923/*
924 *	put out a macro using the me conventions
925 */
926#define SKIPBLANK(cp)	while (*cp == ' ' || *cp == '\t') { cp++; }
927#define SKIPNONBLANK(cp) while (*cp !=' ' && *cp !='\cp' && *cp !='\0') { cp++; }
928
929static void
930meputmac(char *cp, int constant)
931{
932	char	*np;
933	int	found;
934	int	argno;
935	int	last;
936	int	inquote;
937
938	last = 0;
939	found = 0;
940	if (wordflag) {
941		meputwords(YES);
942		return;
943	}
944	for (argno = 0; *cp; argno++) {
945		SKIPBLANK(cp);
946		inquote = (*cp == '"');
947		if (inquote)
948			cp++;
949		for (np = cp; *np; np++) {
950			switch (*np) {
951			case '\n':
952			case '\0':
953				break;
954
955			case '\t':
956			case ' ':
957				if (inquote)
958					continue;
959				else
960					goto endarg;
961
962			case '"':
963				if (inquote && np[1] == '"') {
964					memmove(np, np + 1, strlen(np));
965					np++;
966					continue;
967				} else {
968					*np = ' '; 	/* bye bye " */
969					goto endarg;
970				}
971
972			default:
973				continue;
974			}
975		}
976		endarg: ;
977		/*
978		 *	cp points at the first char in the arg
979		 *	np points one beyond the last char in the arg
980		 */
981		if ((argconcat == 0) || (argconcat != argno))
982			putchar(' ');
983#ifdef FULLDEBUG
984		{
985			char	*p;
986			printf("[%d,%d: ", argno, np - cp);
987			for (p = cp; p < np; p++) {
988				putchar(*p);
989			}
990			printf("]");
991		}
992#endif /* FULLDEBUG */
993		/*
994		 *	Determine if the argument merits being printed
995		 *
996		 *	constant is the cut off point below which something
997		 *	is not a word.
998		 */
999		if (((np - cp) > constant) &&
1000		    (inquote || (chars[(unsigned char)cp[0]] == LETTER))) {
1001			for (; cp < np; cp++)
1002				putchar(*cp);
1003			last = np[-1];
1004			found++;
1005		} else if (found && (np - cp == 1) &&
1006		    chars[(unsigned char)*cp] == PUNCT) {
1007			putchar(*cp);
1008		} else {
1009			last = np[-1];
1010		}
1011		cp = np;
1012	}
1013	if (msflag && chars[last] == PUNCT)
1014		putchar(last);
1015	putchar('\n');
1016}
1017
1018/*
1019 *	put out words (for the -w option) with ms and mm conventions
1020 */
1021static void
1022meputwords(int macline)
1023{
1024
1025	msputwords(macline);
1026}
1027
1028/*
1029 *
1030 *	Skip over a nested set of macros
1031 *
1032 *	Possible arguments to noblock are:
1033 *
1034 *	fi	end of unfilled text
1035 *	PE	pic ending
1036 *	DE	display ending
1037 *
1038 *	for ms and mm only:
1039 *		KE	keep ending
1040 *
1041 *		NE	undocumented match to NS (for mm?)
1042 *		LE	mm only: matches RL or *L (for lists)
1043 *
1044 *	for me:
1045 *		([lqbzcdf]
1046 */
1047static void
1048noblock(char a1, char a2)
1049{
1050	int c1,c2;
1051	int eqnf;
1052	int lct;
1053
1054	lct = 0;
1055	eqnf = 1;
1056	SKIP;
1057	for (;;) {
1058		while (C != '.')
1059			if (c == '\n')
1060				continue;
1061			else
1062				SKIP;
1063		if ((c1 = C) == '\n')
1064			continue;
1065		if ((c2 = C) == '\n')
1066			continue;
1067		if (c1 == a1 && c2 == a2) {
1068			SKIP;
1069			if (lct != 0) {
1070				lct--;
1071				continue;
1072			}
1073			if (eqnf)
1074				putchar('.');
1075			putchar('\n');
1076			return;
1077		} else if (a1 == 'L' && c2 == 'L') {
1078			lct++;
1079			SKIP;
1080		}
1081		/*
1082		 *	equations (EQ) nested within a display
1083		 */
1084		else if (c1 == 'E' && c2 == 'Q') {
1085			if ((mac == ME && a1 == ')')
1086			    || (mac != ME && a1 == 'D')) {
1087				eqn();
1088				eqnf=0;
1089			}
1090		}
1091		/*
1092		 *	turning on filling is done by the paragraphing
1093		 *	macros
1094		 */
1095		else if (a1 == 'f') {	/* .fi */
1096			if  ((mac == ME && (c2 == 'h' || c2 == 'p'))
1097			    || (mac != ME && (c1 == 'P' || c2 == 'P'))) {
1098				SKIP;
1099				return;
1100			}
1101		} else {
1102			SKIP;
1103		}
1104	}
1105}
1106
1107static int
1108/*ARGSUSED*/
1109EQ(pacmac unused)
1110{
1111
1112	eqn();
1113	return 0;
1114}
1115
1116static int
1117/*ARGSUSED*/
1118domacro(pacmac unused)
1119{
1120
1121	macro();
1122	return 0;
1123}
1124
1125static int
1126/*ARGSUSED*/
1127PS(pacmac unused)
1128{
1129
1130	for (C; c == ' ' || c == '\t'; C)
1131		;	/* nothing */
1132
1133	if (c == '<') {		/* ".PS < file" -- don't expect a .PE */
1134		SKIP;
1135		return 0;
1136	}
1137	if (!msflag)
1138		inpic();
1139	else
1140		noblock('P', 'E');
1141	return 0;
1142}
1143
1144static int
1145/*ARGSUSED*/
1146skip(pacmac unused)
1147{
1148
1149	SKIP;
1150	return 0;
1151}
1152
1153static int
1154/*ARGSUSED*/
1155intbl(pacmac unused)
1156{
1157
1158	if (msflag)
1159		stbl();
1160	else
1161		tbl();
1162	return 0;
1163}
1164
1165static int
1166/*ARGSUSED*/
1167outtbl(pacmac unused)
1168{
1169
1170	intable = NO;
1171	return 0;
1172}
1173
1174int
1175/*ARGSUSED*/
1176so(pacmac unused)
1177{
1178
1179	if (!iflag) {
1180		getfname();
1181		if (fname[0]) {
1182			if (++filesp - &files[0] > MAXFILES)
1183				err(1, "too many nested files (max %d)",
1184				    MAXFILES);
1185			infile = *filesp = opn(fname);
1186		}
1187	}
1188	return 0;
1189}
1190
1191static int
1192/*ARGSUSED*/
1193nx(pacmac unused)
1194{
1195
1196	if (!iflag) {
1197		getfname();
1198		if (fname[0] == '\0')
1199			exit(0);
1200		if (infile != stdin)
1201			fclose(infile);
1202		infile = *filesp = opn(fname);
1203	}
1204	return 0;
1205}
1206
1207static int
1208/*ARGSUSED*/
1209skiptocom(pacmac unused)
1210{
1211
1212	SKIP_TO_COM;
1213	return COMX;
1214}
1215
1216static int
1217PP(pacmac c12)
1218{
1219	int c1, c2;
1220
1221	frommac(c12, c1, c2);
1222	printf(".%c%c", c1, c2);
1223	while (C != '\n')
1224		putchar(c);
1225	putchar('\n');
1226	return 0;
1227}
1228
1229static int
1230/*ARGSUSED*/
1231AU(pacmac unused)
1232{
1233
1234	if (mac == MM)
1235		return 0;
1236	SKIP_TO_COM;
1237	return COMX;
1238}
1239
1240static int
1241SH(pacmac c12)
1242{
1243	int c1, c2;
1244
1245	frommac(c12, c1, c2);
1246
1247	if (parag) {
1248		printf(".%c%c", c1, c2);
1249		while (C != '\n')
1250			putchar(c);
1251		putchar(c);
1252		putchar('!');
1253		for (;;) {
1254			while (C != '\n')
1255				putchar(c);
1256			putchar('\n');
1257			if (C == '.')
1258				return COM;
1259			putchar('!');
1260			putchar(c);
1261		}
1262		/*NOTREACHED*/
1263	} else {
1264		SKIP_TO_COM;
1265		return COMX;
1266	}
1267}
1268
1269static int
1270/*ARGSUSED*/
1271UX(pacmac unused)
1272{
1273
1274	if (wordflag)
1275		printf("UNIX\n");
1276	else
1277		printf("UNIX ");
1278	return 0;
1279}
1280
1281static int
1282MMHU(pacmac c12)
1283{
1284	int c1, c2;
1285
1286	frommac(c12, c1, c2);
1287	if (parag) {
1288		printf(".%c%c", c1, c2);
1289		while (C != '\n')
1290			putchar(c);
1291		putchar('\n');
1292	} else {
1293		SKIP;
1294	}
1295	return 0;
1296}
1297
1298static int
1299mesnblock(pacmac c12)
1300{
1301	int c1, c2;
1302
1303	frommac(c12, c1, c2);
1304	noblock(')', c2);
1305	return 0;
1306}
1307
1308static int
1309mssnblock(pacmac c12)
1310{
1311	int c1, c2;
1312
1313	frommac(c12, c1, c2);
1314	noblock(c1, 'E');
1315	return 0;
1316}
1317
1318static int
1319/*ARGUSED*/
1320nf(pacmac unused)
1321{
1322
1323	noblock('f', 'i');
1324	return 0;
1325}
1326
1327static int
1328/*ARGUSED*/
1329ce(pacmac unused)
1330{
1331
1332	sce();
1333	return 0;
1334}
1335
1336static int
1337meip(pacmac c12)
1338{
1339
1340	if (parag)
1341		mepp(c12);
1342	else if (wordflag)	/* save the tag */
1343		regline(meputmac, ONE);
1344	else
1345		SKIP;
1346	return 0;
1347}
1348
1349/*
1350 *	only called for -me .pp or .sh, when parag is on
1351 */
1352static int
1353mepp(pacmac c12)
1354{
1355
1356	PP(c12);		/* eats the line */
1357	return 0;
1358}
1359
1360/*
1361 *	Start of a section heading; output the section name if doing words
1362 */
1363static int
1364mesh(pacmac c12)
1365{
1366
1367	if (parag)
1368		mepp(c12);
1369	else if (wordflag)
1370		defcomline(c12);
1371	else
1372		SKIP;
1373	return 0;
1374}
1375
1376/*
1377 *	process a font setting
1378 */
1379static int
1380mefont(pacmac c12)
1381{
1382
1383	argconcat = 1;
1384	defcomline(c12);
1385	argconcat = 0;
1386	return 0;
1387}
1388
1389static int
1390manfont(pacmac c12)
1391{
1392
1393	return mefont(c12);
1394}
1395
1396static int
1397manpp(pacmac c12)
1398{
1399
1400	return mepp(c12);
1401}
1402
1403static void
1404defcomline(pacmac c12)
1405{
1406	int c1, c2;
1407
1408	frommac(c12, c1, c2);
1409	if (msflag && mac == MM && c2 == 'L') {
1410		if (disp || c1 == 'R') {
1411			noblock('L', 'E');
1412		} else {
1413			SKIP;
1414			putchar('.');
1415		}
1416	}
1417	else if (c1 == '.' && c2 == '.') {
1418		if (msflag) {
1419			SKIP;
1420			return;
1421		}
1422		while (C == '.')
1423			/*VOID*/;
1424	}
1425	++inmacro;
1426	/*
1427	 *	Process the arguments to the macro
1428	 */
1429	switch (mac) {
1430	default:
1431	case MM:
1432	case MS:
1433		if (c1 <= 'Z' && msflag)
1434			regline(msputmac, ONE);
1435		else
1436			regline(msputmac, TWO);
1437		break;
1438	case ME:
1439		regline(meputmac, ONE);
1440		break;
1441	}
1442	--inmacro;
1443}
1444
1445static void
1446comline(void)
1447{
1448	int	c1;
1449	int	c2;
1450	pacmac	c12;
1451	int	mid;
1452	int	lb, ub;
1453	int	hit;
1454	static	int	tabsize = 0;
1455	static	const struct mactab	*mactab = NULL;
1456	const struct mactab	*mp;
1457
1458	if (mactab == 0)
1459		 buildtab(&mactab, &tabsize);
1460com:
1461	while (C == ' ' || c == '\t')
1462		;
1463comx:
1464	if ((c1 = c) == '\n')
1465		return;
1466	c2 = C;
1467	if (c1 == '.' && c2 != '.')
1468		inmacro = NO;
1469	if (msflag && c1 == '[') {
1470		refer(c2);
1471		return;
1472	}
1473	if (parag && mac==MM && c1 == 'P' && c2 == '\n') {
1474		printf(".P\n");
1475		return;
1476	}
1477	if (c2 == '\n')
1478		return;
1479	/*
1480	 *	Single letter macro
1481	 */
1482	if (mac == ME && (c2 == ' ' || c2 == '\t') )
1483		c2 = ' ';
1484	c12 = tomac(c1, c2);
1485	/*
1486	 *	binary search through the table of macros
1487	 */
1488	lb = 0;
1489	ub = tabsize - 1;
1490	while (lb <= ub) {
1491		mid = (ub + lb) / 2;
1492		mp = &mactab[mid];
1493		if (mp->macname < c12)
1494			lb = mid + 1;
1495		else if (mp->macname > c12)
1496			ub = mid - 1;
1497		else {
1498			hit = 1;
1499#ifdef FULLDEBUG
1500			printf("preliminary hit macro %c%c ", c1, c2);
1501#endif /* FULLDEBUG */
1502			switch (mp->condition) {
1503			case NONE:
1504				hit = YES;
1505				break;
1506			case FNEST:
1507				hit = (filesp == files);
1508				break;
1509			case NOMAC:
1510				hit = !inmacro;
1511				break;
1512			case MAC:
1513				hit = inmacro;
1514				break;
1515			case PARAG:
1516				hit = parag;
1517				break;
1518			case NBLK:
1519				hit = !keepblock;
1520				break;
1521			default:
1522				hit = 0;
1523			}
1524
1525			if (hit) {
1526#ifdef FULLDEBUG
1527				printf("MATCH\n");
1528#endif /* FULLDEBUG */
1529				switch ((*(mp->func))(c12)) {
1530				default:
1531					return;
1532				case COMX:
1533					goto comx;
1534				case COM:
1535					goto com;
1536				}
1537			}
1538#ifdef FULLDEBUG
1539			printf("FAIL\n");
1540#endif /* FULLDEBUG */
1541			break;
1542		}
1543	}
1544	defcomline(c12);
1545}
1546
1547static int
1548macsort(const void *p1, const void *p2)
1549{
1550	const struct mactab *t1 = p1;
1551	const struct mactab *t2 = p2;
1552
1553	return t1->macname - t2->macname;
1554}
1555
1556static int
1557sizetab(const struct mactab *mp)
1558{
1559	int i;
1560
1561	i = 0;
1562	if (mp) {
1563		for (; mp->macname; mp++, i++)
1564			/*VOID*/ ;
1565	}
1566	return i;
1567}
1568
1569static struct mactab *
1570macfill(struct mactab *dst, const struct mactab *src)
1571{
1572
1573	if (src) {
1574		while (src->macname)
1575			*dst++ = *src++;
1576	}
1577	return dst;
1578}
1579
1580static void
1581usage(void)
1582{
1583	extern char *__progname;
1584
1585	fprintf(stderr, "usage: %s [-ikpw ] [ -m a | e | l | m | s] [file ...]\n", __progname);
1586	exit(1);
1587}
1588
1589static void
1590buildtab(const struct mactab **r_back, int *r_size)
1591{
1592	size_t	size;
1593	const struct	mactab	*p1, *p2;
1594	struct	mactab	*back, *p;
1595
1596	size = sizetab(troffmactab) + sizetab(ppmactab);
1597	p1 = p2 = NULL;
1598	if (msflag) {
1599		switch (mac) {
1600		case ME:
1601			p1 = memactab;
1602			break;
1603		case MM:
1604			p1 = msmactab;
1605			p2 = mmmactab;
1606			break;
1607		case MS:
1608			p1 = msmactab;
1609			break;
1610		case MA:
1611			p1 = manmactab;
1612			break;
1613		default:
1614			break;
1615		}
1616	}
1617	size += sizetab(p1);
1618	size += sizetab(p2);
1619	back = calloc(size + 2, sizeof(struct mactab));
1620	if (back == NULL)
1621		err(1, NULL);
1622
1623	p = macfill(back, troffmactab);
1624	p = macfill(p, ppmactab);
1625	p = macfill(p, p1);
1626	p = macfill(p, p2);
1627
1628	qsort(back, size, sizeof(struct mactab), macsort);
1629	*r_size = size;
1630	*r_back = back;
1631}
1632
1633/*
1634 *	troff commands
1635 */
1636static const struct mactab	troffmactab[] = {
1637	M(NONE,		'\\','"',	skip),	/* comment */
1638	M(NOMAC,	'd','e',	domacro),	/* define */
1639	M(NOMAC,	'i','g',	domacro),	/* ignore till .. */
1640	M(NOMAC,	'a','m',	domacro),	/* append macro */
1641	M(NBLK,		'n','f',	nf),	/* filled */
1642	M(NBLK,		'c','e',	ce),	/* centered */
1643
1644	M(NONE,		's','o',	so),	/* source a file */
1645	M(NONE,		'n','x',	nx),	/* go to next file */
1646
1647	M(NONE,		't','m',	skip),	/* print string on tty */
1648	M(NONE,		'h','w',	skip),	/* exception hyphen words */
1649	M(NONE,		0,0,		0)
1650};
1651
1652/*
1653 *	Preprocessor output
1654 */
1655static const struct mactab	ppmactab[] = {
1656	M(FNEST,	'E','Q',	EQ),	/* equation starting */
1657	M(FNEST,	'T','S',	intbl),	/* table starting */
1658	M(FNEST,	'T','C',	intbl),	/* alternative table? */
1659	M(FNEST,	'T','&',	intbl),	/* table reformatting */
1660	M(NONE,		'T','E',	outtbl),/* table ending */
1661	M(NONE,		'P','S',	PS),	/* picture starting */
1662	M(NONE,		0,0,		0)
1663};
1664
1665/*
1666 *	Particular to ms and mm
1667 */
1668static const struct mactab	msmactab[] = {
1669	M(NONE,		'T','L',	skiptocom),	/* title follows */
1670	M(NONE,		'F','S',	skiptocom),	/* start footnote */
1671	M(NONE,		'O','K',	skiptocom),	/* Other kws */
1672
1673	M(NONE,		'N','R',	skip),	/* undocumented */
1674	M(NONE,		'N','D',	skip),	/* use supplied date */
1675
1676	M(PARAG,	'P','P',	PP),	/* begin parag */
1677	M(PARAG,	'I','P',	PP),	/* begin indent parag, tag x */
1678	M(PARAG,	'L','P',	PP),	/* left blocked parag */
1679
1680	M(NONE,		'A','U',	AU),	/* author */
1681	M(NONE,		'A','I',	AU),	/* authors institution */
1682
1683	M(NONE,		'S','H',	SH),	/* section heading */
1684	M(NONE,		'S','N',	SH),	/* undocumented */
1685	M(NONE,		'U','X',	UX),	/* unix */
1686
1687	M(NBLK,		'D','S',	mssnblock),	/* start display text */
1688	M(NBLK,		'K','S',	mssnblock),	/* start keep */
1689	M(NBLK,		'K','F',	mssnblock),	/* start float keep */
1690	M(NONE,		0,0,		0)
1691};
1692
1693static const struct mactab	mmmactab[] = {
1694	M(NONE,		'H',' ',	MMHU),	/* -mm ? */
1695	M(NONE,		'H','U',	MMHU),	/* -mm ? */
1696	M(PARAG,	'P',' ',	PP),	/* paragraph for -mm */
1697	M(NBLK,		'N','S',	mssnblock),	/* undocumented */
1698	M(NONE,		0,0,		0)
1699};
1700
1701static const struct mactab	memactab[] = {
1702	M(PARAG,	'p','p',	mepp),
1703	M(PARAG,	'l','p',	mepp),
1704	M(PARAG,	'n','p',	mepp),
1705	M(NONE,		'i','p',	meip),
1706
1707	M(NONE,		's','h',	mesh),
1708	M(NONE,		'u','h',	mesh),
1709
1710	M(NBLK,		'(','l',	mesnblock),
1711	M(NBLK,		'(','q',	mesnblock),
1712	M(NBLK,		'(','b',	mesnblock),
1713	M(NBLK,		'(','z',	mesnblock),
1714	M(NBLK,		'(','c',	mesnblock),
1715
1716	M(NBLK,		'(','d',	mesnblock),
1717	M(NBLK,		'(','f',	mesnblock),
1718	M(NBLK,		'(','x',	mesnblock),
1719
1720	M(NONE,		'r',' ',	mefont),
1721	M(NONE,		'i',' ',	mefont),
1722	M(NONE,		'b',' ',	mefont),
1723	M(NONE,		'u',' ',	mefont),
1724	M(NONE,		'q',' ',	mefont),
1725	M(NONE,		'r','b',	mefont),
1726	M(NONE,		'b','i',	mefont),
1727	M(NONE,		'b','x',	mefont),
1728	M(NONE,		0,0,		0)
1729};
1730
1731static const struct mactab	manmactab[] = {
1732	M(PARAG,	'B','I',	manfont),
1733	M(PARAG,	'B','R',	manfont),
1734	M(PARAG,	'I','B',	manfont),
1735	M(PARAG,	'I','R',	manfont),
1736	M(PARAG,	'R','B',	manfont),
1737	M(PARAG,	'R','I',	manfont),
1738
1739	M(PARAG,	'P','P',	manpp),
1740	M(PARAG,	'L','P',	manpp),
1741	M(PARAG,	'H','P',	manpp),
1742	M(NONE,		0,0,		0)
1743};
1744