read.c revision 1.11
1/*	$Id: read.c,v 1.11 2015/12/17 22:31:12 christos Exp $ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19#include "config.h"
20
21#include <sys/types.h>
22#if HAVE_MMAP
23#include <sys/mman.h>
24#include <sys/stat.h>
25#endif
26#include <sys/wait.h>
27
28#include <assert.h>
29#include <ctype.h>
30#include <errno.h>
31#include <fcntl.h>
32#include <stdarg.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38
39#include "mandoc.h"
40#include "mandoc_aux.h"
41#include "libmandoc.h"
42#include "mdoc.h"
43#include "man.h"
44
45#define	REPARSE_LIMIT	1000
46
47struct	mparse {
48	struct man	 *pman; /* persistent man parser */
49	struct mdoc	 *pmdoc; /* persistent mdoc parser */
50	struct man	 *man; /* man parser */
51	struct mdoc	 *mdoc; /* mdoc parser */
52	struct roff	 *roff; /* roff parser (!NULL) */
53	const struct mchars *mchars; /* character table */
54	char		 *sodest; /* filename pointed to by .so */
55	const char	 *file; /* filename of current input file */
56	struct buf	 *primary; /* buffer currently being parsed */
57	struct buf	 *secondary; /* preprocessed copy of input */
58	const char	 *defos; /* default operating system */
59	mandocmsg	  mmsg; /* warning/error message handler */
60	enum mandoclevel  file_status; /* status of current parse */
61	enum mandoclevel  wlevel; /* ignore messages below this */
62	int		  options; /* parser options */
63	int		  filenc; /* encoding of the current file */
64	int		  reparse_count; /* finite interp. stack */
65	int		  line; /* line number in the file */
66	pid_t		  child; /* the gunzip(1) process */
67};
68
69static	void	  choose_parser(struct mparse *);
70static	void	  resize_buf(struct buf *, size_t);
71static	void	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
72static	int	  read_whole_file(struct mparse *, const char *, int,
73				struct buf *, int *);
74static	void	  mparse_end(struct mparse *);
75static	void	  mparse_parse_buffer(struct mparse *, struct buf,
76			const char *);
77
78static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
79	MANDOCERR_OK,
80	MANDOCERR_WARNING,
81	MANDOCERR_WARNING,
82	MANDOCERR_ERROR,
83	MANDOCERR_UNSUPP,
84	MANDOCERR_MAX,
85	MANDOCERR_MAX
86};
87
88static	const char * const	mandocerrs[MANDOCERR_MAX] = {
89	"ok",
90
91	"generic warning",
92
93	/* related to the prologue */
94	"missing manual title, using UNTITLED",
95	"missing manual title, using \"\"",
96	"lower case character in document title",
97	"missing manual section, using \"\"",
98	"unknown manual section",
99	"missing date, using today's date",
100	"cannot parse date, using it verbatim",
101	"missing Os macro, using \"\"",
102	"duplicate prologue macro",
103	"late prologue macro",
104	"skipping late title macro",
105	"prologue macros out of order",
106
107	/* related to document structure */
108	".so is fragile, better use ln(1)",
109	"no document body",
110	"content before first section header",
111	"first section is not \"NAME\"",
112	"NAME section without name",
113	"NAME section without description",
114	"description not at the end of NAME",
115	"bad NAME section content",
116	"missing description line, using \"\"",
117	"sections out of conventional order",
118	"duplicate section title",
119	"unexpected section",
120	"unusual Xr order",
121	"unusual Xr punctuation",
122	"AUTHORS section without An macro",
123
124	/* related to macros and nesting */
125	"obsolete macro",
126	"macro neither callable nor escaped",
127	"skipping paragraph macro",
128	"moving paragraph macro out of list",
129	"skipping no-space macro",
130	"blocks badly nested",
131	"nested displays are not portable",
132	"moving content out of list",
133	".Vt block has child macro",
134	"fill mode already enabled, skipping",
135	"fill mode already disabled, skipping",
136	"line scope broken",
137
138	/* related to missing macro arguments */
139	"skipping empty request",
140	"conditional request controls empty scope",
141	"skipping empty macro",
142	"empty block",
143	"empty argument, using 0n",
144	"missing display type, using -ragged",
145	"list type is not the first argument",
146	"missing -width in -tag list, using 8n",
147	"missing utility name, using \"\"",
148	"missing function name, using \"\"",
149	"empty head in list item",
150	"empty list item",
151	"missing font type, using \\fR",
152	"unknown font type, using \\fR",
153	"nothing follows prefix",
154	"empty reference block",
155	"missing -std argument, adding it",
156	"missing option string, using \"\"",
157	"missing resource identifier, using \"\"",
158	"missing eqn box, using \"\"",
159
160	/* related to bad macro arguments */
161	"unterminated quoted argument",
162	"duplicate argument",
163	"skipping duplicate argument",
164	"skipping duplicate display type",
165	"skipping duplicate list type",
166	"skipping -width argument",
167	"wrong number of cells",
168	"unknown AT&T UNIX version",
169	"comma in function argument",
170	"parenthesis in function name",
171	"invalid content in Rs block",
172	"invalid Boolean argument",
173	"unknown font, skipping request",
174	"odd number of characters in request",
175
176	/* related to plain text */
177	"blank line in fill mode, using .sp",
178	"tab in filled text",
179	"whitespace at end of input line",
180	"bad comment style",
181	"invalid escape sequence",
182	"undefined string, using \"\"",
183
184	/* related to tables */
185	"tbl line starts with span",
186	"tbl column starts with span",
187	"skipping vertical bar in tbl layout",
188
189	"generic error",
190
191	/* related to tables */
192	"non-alphabetic character in tbl options",
193	"skipping unknown tbl option",
194	"missing tbl option argument",
195	"wrong tbl option argument size",
196	"empty tbl layout",
197	"invalid character in tbl layout",
198	"unmatched parenthesis in tbl layout",
199	"tbl without any data cells",
200	"ignoring data in spanned tbl cell",
201	"ignoring extra tbl data cells",
202	"data block open at end of tbl",
203
204	/* related to document structure and macros */
205	NULL,
206	"input stack limit exceeded, infinite loop?",
207	"skipping bad character",
208	"skipping unknown macro",
209	"skipping insecure request",
210	"skipping item outside list",
211	"skipping column outside column list",
212	"skipping end of block that is not open",
213	"fewer RS blocks open, skipping",
214	"inserting missing end of block",
215	"appending missing end of block",
216
217	/* related to request and macro arguments */
218	"escaped character not allowed in a name",
219	"NOT IMPLEMENTED: Bd -file",
220	"missing list type, using -item",
221	"missing manual name, using \"\"",
222	"uname(3) system call failed, using UNKNOWN",
223	"unknown standard specifier",
224	"skipping request without numeric argument",
225	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
226	".so request failed",
227	"skipping all arguments",
228	"skipping excess arguments",
229	"divide by zero",
230
231	"unsupported feature",
232	"input too large",
233	"unsupported control character",
234	"unsupported roff request",
235	"eqn delim option in tbl",
236	"unsupported tbl layout modifier",
237	"ignoring macro in table",
238};
239
240static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
241	"SUCCESS",
242	"RESERVED",
243	"WARNING",
244	"ERROR",
245	"UNSUPP",
246	"BADARG",
247	"SYSERR"
248};
249
250
251static void
252resize_buf(struct buf *buf, size_t initial)
253{
254
255	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
256	buf->buf = mandoc_realloc(buf->buf, buf->sz);
257}
258
259static void
260choose_parser(struct mparse *curp)
261{
262	char		*cp, *ep;
263	int		 format;
264
265	/*
266	 * If neither command line arguments -mdoc or -man select
267	 * a parser nor the roff parser found a .Dd or .TH macro
268	 * yet, look ahead in the main input buffer.
269	 */
270
271	if ((format = roff_getformat(curp->roff)) == 0) {
272		cp = curp->primary->buf;
273		ep = cp + curp->primary->sz;
274		while (cp < ep) {
275			if (*cp == '.' || *cp == '\'') {
276				cp++;
277				if (cp[0] == 'D' && cp[1] == 'd') {
278					format = MPARSE_MDOC;
279					break;
280				}
281				if (cp[0] == 'T' && cp[1] == 'H') {
282					format = MPARSE_MAN;
283					break;
284				}
285			}
286			cp = memchr(cp, '\n', ep - cp);
287			if (cp == NULL)
288				break;
289			cp++;
290		}
291	}
292
293	if (format == MPARSE_MDOC) {
294		if (NULL == curp->pmdoc)
295			curp->pmdoc = mdoc_alloc(
296			    curp->roff, curp, curp->defos,
297			    MPARSE_QUICK & curp->options ? 1 : 0);
298		assert(curp->pmdoc);
299		curp->mdoc = curp->pmdoc;
300		return;
301	}
302
303	/* Fall back to man(7) as a last resort. */
304
305	if (NULL == curp->pman)
306		curp->pman = man_alloc(
307		    curp->roff, curp, curp->defos,
308		    MPARSE_QUICK & curp->options ? 1 : 0);
309	assert(curp->pman);
310	curp->man = curp->pman;
311}
312
313/*
314 * Main parse routine for a buffer.
315 * It assumes encoding and line numbering are already set up.
316 * It can recurse directly (for invocations of user-defined
317 * macros, inline equations, and input line traps)
318 * and indirectly (for .so file inclusion).
319 */
320static void
321mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
322{
323	const struct tbl_span	*span;
324	struct buf	 ln;
325	const char	*save_file;
326	char		*cp;
327	size_t		 pos; /* byte number in the ln buffer */
328	enum rofferr	 rr;
329	int		 of;
330	int		 lnn; /* line number in the real file */
331	int		 fd;
332	pid_t		 save_child;
333	unsigned char	 c;
334
335	memset(&ln, 0, sizeof(ln));
336
337	lnn = curp->line;
338	pos = 0;
339
340	while (i < blk.sz) {
341		if (0 == pos && '\0' == blk.buf[i])
342			break;
343
344		if (start) {
345			curp->line = lnn;
346			curp->reparse_count = 0;
347
348			if (lnn < 3 &&
349			    curp->filenc & MPARSE_UTF8 &&
350			    curp->filenc & MPARSE_LATIN1)
351				curp->filenc = preconv_cue(&blk, i);
352		}
353
354		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
355
356			/*
357			 * When finding an unescaped newline character,
358			 * leave the character loop to process the line.
359			 * Skip a preceding carriage return, if any.
360			 */
361
362			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
363			    '\n' == blk.buf[i + 1])
364				++i;
365			if ('\n' == blk.buf[i]) {
366				++i;
367				++lnn;
368				break;
369			}
370
371			/*
372			 * Make sure we have space for the worst
373			 * case of 11 bytes: "\\[u10ffff]\0"
374			 */
375
376			if (pos + 11 > ln.sz)
377				resize_buf(&ln, 256);
378
379			/*
380			 * Encode 8-bit input.
381			 */
382
383			c = blk.buf[i];
384			if (c & 0x80) {
385				if ( ! (curp->filenc && preconv_encode(
386				    &blk, &i, &ln, &pos, &curp->filenc))) {
387					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
388					    curp->line, pos, "0x%x", c);
389					ln.buf[pos++] = '?';
390					i++;
391				}
392				continue;
393			}
394
395			/*
396			 * Exclude control characters.
397			 */
398
399			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
400				mandoc_vmsg(c == 0x00 || c == 0x04 ||
401				    c > 0x0a ? MANDOCERR_CHAR_BAD :
402				    MANDOCERR_CHAR_UNSUPP,
403				    curp, curp->line, pos, "0x%x", c);
404				i++;
405				if (c != '\r')
406					ln.buf[pos++] = '?';
407				continue;
408			}
409
410			/* Trailing backslash = a plain char. */
411
412			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
413				ln.buf[pos++] = blk.buf[i++];
414				continue;
415			}
416
417			/*
418			 * Found escape and at least one other character.
419			 * When it's a newline character, skip it.
420			 * When there is a carriage return in between,
421			 * skip that one as well.
422			 */
423
424			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
425			    '\n' == blk.buf[i + 2])
426				++i;
427			if ('\n' == blk.buf[i + 1]) {
428				i += 2;
429				++lnn;
430				continue;
431			}
432
433			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
434				i += 2;
435				/* Comment, skip to end of line */
436				for (; i < blk.sz; ++i) {
437					if ('\n' == blk.buf[i]) {
438						++i;
439						++lnn;
440						break;
441					}
442				}
443
444				/* Backout trailing whitespaces */
445				for (; pos > 0; --pos) {
446					if (ln.buf[pos - 1] != ' ')
447						break;
448					if (pos > 2 && ln.buf[pos - 2] == '\\')
449						break;
450				}
451				break;
452			}
453
454			/* Catch escaped bogus characters. */
455
456			c = (unsigned char) blk.buf[i+1];
457
458			if ( ! (isascii(c) &&
459			    (isgraph(c) || isblank(c)))) {
460				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
461				    curp->line, pos, "0x%x", c);
462				i += 2;
463				ln.buf[pos++] = '?';
464				continue;
465			}
466
467			/* Some other escape sequence, copy & cont. */
468
469			ln.buf[pos++] = blk.buf[i++];
470			ln.buf[pos++] = blk.buf[i++];
471		}
472
473		if (pos >= ln.sz)
474			resize_buf(&ln, 256);
475
476		ln.buf[pos] = '\0';
477
478		/*
479		 * A significant amount of complexity is contained by
480		 * the roff preprocessor.  It's line-oriented but can be
481		 * expressed on one line, so we need at times to
482		 * readjust our starting point and re-run it.  The roff
483		 * preprocessor can also readjust the buffers with new
484		 * data, so we pass them in wholesale.
485		 */
486
487		of = 0;
488
489		/*
490		 * Maintain a lookaside buffer of all parsed lines.  We
491		 * only do this if mparse_keep() has been invoked (the
492		 * buffer may be accessed with mparse_getkeep()).
493		 */
494
495		if (curp->secondary) {
496			curp->secondary->buf = mandoc_realloc(
497			    curp->secondary->buf,
498			    curp->secondary->sz + pos + 2);
499			memcpy(curp->secondary->buf +
500			    curp->secondary->sz,
501			    ln.buf, pos);
502			curp->secondary->sz += pos;
503			curp->secondary->buf
504				[curp->secondary->sz] = '\n';
505			curp->secondary->sz++;
506			curp->secondary->buf
507				[curp->secondary->sz] = '\0';
508		}
509rerun:
510		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
511
512		switch (rr) {
513		case ROFF_REPARSE:
514			if (REPARSE_LIMIT >= ++curp->reparse_count)
515				mparse_buf_r(curp, ln, of, 0);
516			else
517				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
518				    curp->line, pos, NULL);
519			pos = 0;
520			continue;
521		case ROFF_APPEND:
522			pos = strlen(ln.buf);
523			continue;
524		case ROFF_RERUN:
525			goto rerun;
526		case ROFF_IGN:
527			pos = 0;
528			continue;
529		case ROFF_SO:
530			if ( ! (curp->options & MPARSE_SO) &&
531			    (i >= blk.sz || blk.buf[i] == '\0')) {
532				curp->sodest = mandoc_strdup(ln.buf + of);
533				free(ln.buf);
534				return;
535			}
536			/*
537			 * We remove `so' clauses from our lookaside
538			 * buffer because we're going to descend into
539			 * the file recursively.
540			 */
541			if (curp->secondary)
542				curp->secondary->sz -= pos + 1;
543			save_file = curp->file;
544			save_child = curp->child;
545			if (mparse_open(curp, &fd, ln.buf + of) ==
546			    MANDOCLEVEL_OK) {
547				mparse_readfd(curp, fd, ln.buf + of);
548				curp->file = save_file;
549			} else {
550				curp->file = save_file;
551				mandoc_vmsg(MANDOCERR_SO_FAIL,
552				    curp, curp->line, pos,
553				    ".so %s", ln.buf + of);
554				ln.sz = mandoc_asprintf(&cp,
555				    ".sp\nSee the file %s.\n.sp",
556				    ln.buf + of);
557				free(ln.buf);
558				ln.buf = cp;
559				of = 0;
560				mparse_buf_r(curp, ln, of, 0);
561			}
562			curp->child = save_child;
563			pos = 0;
564			continue;
565		default:
566			break;
567		}
568
569		/*
570		 * If input parsers have not been allocated, do so now.
571		 * We keep these instanced between parsers, but set them
572		 * locally per parse routine since we can use different
573		 * parsers with each one.
574		 */
575
576		if ( ! (curp->man || curp->mdoc))
577			choose_parser(curp);
578
579		/*
580		 * Lastly, push down into the parsers themselves.
581		 * If libroff returns ROFF_TBL, then add it to the
582		 * currently open parse.  Since we only get here if
583		 * there does exist data (see tbl_data.c), we're
584		 * guaranteed that something's been allocated.
585		 * Do the same for ROFF_EQN.
586		 */
587
588		if (rr == ROFF_TBL) {
589			while ((span = roff_span(curp->roff)) != NULL)
590				if (curp->man == NULL)
591					mdoc_addspan(curp->mdoc, span);
592				else
593					man_addspan(curp->man, span);
594		} else if (rr == ROFF_EQN) {
595			if (curp->man == NULL)
596				mdoc_addeqn(curp->mdoc, roff_eqn(curp->roff));
597			else
598				man_addeqn(curp->man, roff_eqn(curp->roff));
599		} else if ((curp->man == NULL ?
600		    mdoc_parseln(curp->mdoc, curp->line, ln.buf, of) :
601		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
602				break;
603
604		/* Temporary buffers typically are not full. */
605
606		if (0 == start && '\0' == blk.buf[i])
607			break;
608
609		/* Start the next input line. */
610
611		pos = 0;
612	}
613
614	free(ln.buf);
615}
616
617static int
618read_whole_file(struct mparse *curp, const char *file, int fd,
619		struct buf *fb, int *with_mmap)
620{
621	size_t		 off;
622	ssize_t		 ssz;
623
624#if HAVE_MMAP
625	struct stat	 st;
626	if (-1 == fstat(fd, &st)) {
627		perror(file);
628		exit((int)MANDOCLEVEL_SYSERR);
629	}
630
631	/*
632	 * If we're a regular file, try just reading in the whole entry
633	 * via mmap().  This is faster than reading it into blocks, and
634	 * since each file is only a few bytes to begin with, I'm not
635	 * concerned that this is going to tank any machines.
636	 */
637
638	if (S_ISREG(st.st_mode)) {
639		if (st.st_size > 0x7fffffff) {
640			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
641			return(0);
642		}
643		*with_mmap = 1;
644		fb->sz = (size_t)st.st_size;
645		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
646		if (fb->buf != MAP_FAILED)
647			return(1);
648	}
649#endif
650
651	/*
652	 * If this isn't a regular file (like, say, stdin), then we must
653	 * go the old way and just read things in bit by bit.
654	 */
655
656	*with_mmap = 0;
657	off = 0;
658	fb->sz = 0;
659	fb->buf = NULL;
660	for (;;) {
661		if (off == fb->sz) {
662			if (fb->sz == (1U << 31)) {
663				mandoc_msg(MANDOCERR_TOOLARGE, curp,
664				    0, 0, NULL);
665				break;
666			}
667			resize_buf(fb, 65536);
668		}
669		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
670		if (ssz == 0) {
671			fb->sz = off;
672			return(1);
673		}
674		if (ssz == -1) {
675			perror(file);
676			exit((int)MANDOCLEVEL_SYSERR);
677		}
678		off += (size_t)ssz;
679	}
680
681	free(fb->buf);
682	fb->buf = NULL;
683	return(0);
684}
685
686static void
687mparse_end(struct mparse *curp)
688{
689
690	if (curp->mdoc == NULL &&
691	    curp->man == NULL &&
692	    curp->sodest == NULL) {
693		if (curp->options & MPARSE_MDOC)
694			curp->mdoc = curp->pmdoc;
695		else {
696			if (curp->pman == NULL)
697				curp->pman = man_alloc(
698				    curp->roff, curp, curp->defos,
699				    curp->options & MPARSE_QUICK ? 1 : 0);
700			curp->man = curp->pman;
701		}
702	}
703	if (curp->mdoc)
704		mdoc_endparse(curp->mdoc);
705	if (curp->man)
706		man_endparse(curp->man);
707	roff_endparse(curp->roff);
708}
709
710static void
711mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
712{
713	struct buf	*svprimary;
714	const char	*svfile;
715	size_t		 offset;
716	static int	 recursion_depth;
717
718	if (64 < recursion_depth) {
719		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
720		return;
721	}
722
723	/* Line number is per-file. */
724	svfile = curp->file;
725	curp->file = file;
726	svprimary = curp->primary;
727	curp->primary = &blk;
728	curp->line = 1;
729	recursion_depth++;
730
731	/* Skip an UTF-8 byte order mark. */
732	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
733	    (unsigned char)blk.buf[0] == 0xef &&
734	    (unsigned char)blk.buf[1] == 0xbb &&
735	    (unsigned char)blk.buf[2] == 0xbf) {
736		offset = 3;
737		curp->filenc &= ~MPARSE_LATIN1;
738	} else
739		offset = 0;
740
741	mparse_buf_r(curp, blk, offset, 1);
742
743	if (--recursion_depth == 0)
744		mparse_end(curp);
745
746	curp->primary = svprimary;
747	curp->file = svfile;
748}
749
750enum mandoclevel
751mparse_readmem(struct mparse *curp, void *buf, size_t len,
752		const char *file)
753{
754	struct buf blk;
755
756	blk.buf = buf;
757	blk.sz = len;
758
759	mparse_parse_buffer(curp, blk, file);
760	return(curp->file_status);
761}
762
763/*
764 * Read the whole file into memory and call the parsers.
765 * Called recursively when an .so request is encountered.
766 */
767enum mandoclevel
768mparse_readfd(struct mparse *curp, int fd, const char *file)
769{
770	struct buf	 blk;
771	int		 with_mmap;
772	int		 save_filenc;
773
774	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
775		save_filenc = curp->filenc;
776		curp->filenc = curp->options &
777		    (MPARSE_UTF8 | MPARSE_LATIN1);
778		mparse_parse_buffer(curp, blk, file);
779		curp->filenc = save_filenc;
780#if HAVE_MMAP
781		if (with_mmap)
782			munmap(blk.buf, blk.sz);
783		else
784#endif
785			free(blk.buf);
786	}
787
788	if (fd != STDIN_FILENO && close(fd) == -1)
789		perror(file);
790
791	mparse_wait(curp);
792	return(curp->file_status);
793}
794
795enum mandoclevel
796mparse_open(struct mparse *curp, int *fd, const char *file)
797{
798	int		  pfd[2];
799	int		  save_errno;
800	char		 *cp;
801
802	curp->file = file;
803
804	/* Unless zipped, try to just open the file. */
805
806	if ((cp = strrchr(file, '.')) == NULL ||
807	    strcmp(cp + 1, "gz")) {
808		curp->child = 0;
809		if ((*fd = open(file, O_RDONLY)) != -1)
810			return(MANDOCLEVEL_OK);
811
812		/* Open failed; try to append ".gz". */
813
814		mandoc_asprintf(&cp, "%s.gz", file);
815		file = cp;
816	} else
817		cp = NULL;
818
819	/* Before forking, make sure the file can be read. */
820
821	save_errno = errno;
822	if (access(file, R_OK) == -1) {
823		if (cp != NULL)
824			errno = save_errno;
825		free(cp);
826		*fd = -1;
827		curp->child = 0;
828		mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
829		return(MANDOCLEVEL_ERROR);
830	}
831
832	/* Run gunzip(1). */
833
834	if (pipe(pfd) == -1) {
835		perror("pipe");
836		exit((int)MANDOCLEVEL_SYSERR);
837	}
838
839	switch (curp->child = fork()) {
840	case -1:
841		perror("fork");
842		exit((int)MANDOCLEVEL_SYSERR);
843	case 0:
844		close(pfd[0]);
845		if (dup2(pfd[1], STDOUT_FILENO) == -1) {
846			perror("dup");
847			exit((int)MANDOCLEVEL_SYSERR);
848		}
849		execlp("gunzip", "gunzip", "-c", file, NULL);
850		perror("exec");
851		exit((int)MANDOCLEVEL_SYSERR);
852	default:
853		close(pfd[1]);
854		*fd = pfd[0];
855		return(MANDOCLEVEL_OK);
856	}
857}
858
859enum mandoclevel
860mparse_wait(struct mparse *curp)
861{
862	int	  status;
863
864	if (curp->child == 0)
865		return(MANDOCLEVEL_OK);
866
867	if (waitpid(curp->child, &status, 0) == -1) {
868		perror("wait");
869		exit((int)MANDOCLEVEL_SYSERR);
870	}
871	curp->child = 0;
872	if (WIFSIGNALED(status)) {
873		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
874		    "gunzip died from signal %d", WTERMSIG(status));
875		return(MANDOCLEVEL_ERROR);
876	}
877	if (WEXITSTATUS(status)) {
878		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
879		    "gunzip failed with code %d", WEXITSTATUS(status));
880		return(MANDOCLEVEL_ERROR);
881	}
882	return(MANDOCLEVEL_OK);
883}
884
885struct mparse *
886mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
887    const struct mchars *mchars, const char *defos)
888{
889	struct mparse	*curp;
890
891	curp = mandoc_calloc(1, sizeof(struct mparse));
892
893	curp->options = options;
894	curp->wlevel = wlevel;
895	curp->mmsg = mmsg;
896	curp->defos = defos;
897
898	curp->mchars = mchars;
899	curp->roff = roff_alloc(curp, curp->mchars, options);
900	if (curp->options & MPARSE_MDOC)
901		curp->pmdoc = mdoc_alloc(
902		    curp->roff, curp, curp->defos,
903		    curp->options & MPARSE_QUICK ? 1 : 0);
904	if (curp->options & MPARSE_MAN)
905		curp->pman = man_alloc(
906		    curp->roff, curp, curp->defos,
907		    curp->options & MPARSE_QUICK ? 1 : 0);
908
909	return(curp);
910}
911
912void
913mparse_reset(struct mparse *curp)
914{
915
916	roff_reset(curp->roff);
917
918	if (curp->mdoc)
919		mdoc_reset(curp->mdoc);
920	if (curp->man)
921		man_reset(curp->man);
922	if (curp->secondary)
923		curp->secondary->sz = 0;
924
925	curp->file_status = MANDOCLEVEL_OK;
926	curp->mdoc = NULL;
927	curp->man = NULL;
928
929	free(curp->sodest);
930	curp->sodest = NULL;
931}
932
933void
934mparse_free(struct mparse *curp)
935{
936
937	if (curp->pmdoc)
938		mdoc_free(curp->pmdoc);
939	if (curp->pman)
940		man_free(curp->pman);
941	if (curp->roff)
942		roff_free(curp->roff);
943	if (curp->secondary)
944		free(curp->secondary->buf);
945
946	free(curp->secondary);
947	free(curp->sodest);
948	free(curp);
949}
950
951void
952mparse_result(struct mparse *curp,
953	struct mdoc **mdoc, struct man **man, char **sodest)
954{
955
956	if (sodest && NULL != (*sodest = curp->sodest)) {
957		*mdoc = NULL;
958		*man = NULL;
959		return;
960	}
961	if (mdoc)
962		*mdoc = curp->mdoc;
963	if (man)
964		*man = curp->man;
965}
966
967void
968mandoc_vmsg(enum mandocerr t, struct mparse *m,
969		int ln, int pos, const char *fmt, ...)
970{
971	char		 buf[256];
972	va_list		 ap;
973
974	va_start(ap, fmt);
975	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
976	va_end(ap);
977
978	mandoc_msg(t, m, ln, pos, buf);
979}
980
981void
982mandoc_msg(enum mandocerr er, struct mparse *m,
983		int ln, int col, const char *msg)
984{
985	enum mandoclevel level;
986
987	level = MANDOCLEVEL_UNSUPP;
988	while (er < mandoclimits[level])
989		level--;
990
991	if (level < m->wlevel && er != MANDOCERR_FILE)
992		return;
993
994	if (m->mmsg)
995		(*m->mmsg)(er, level, m->file, ln, col, msg);
996
997	if (m->file_status < level)
998		m->file_status = level;
999}
1000
1001const char *
1002mparse_strerror(enum mandocerr er)
1003{
1004
1005	return(mandocerrs[er]);
1006}
1007
1008const char *
1009mparse_strlevel(enum mandoclevel lvl)
1010{
1011	return(mandoclevels[lvl]);
1012}
1013
1014void
1015mparse_keep(struct mparse *p)
1016{
1017
1018	assert(NULL == p->secondary);
1019	p->secondary = mandoc_calloc(1, sizeof(struct buf));
1020}
1021
1022const char *
1023mparse_getkeep(const struct mparse *p)
1024{
1025
1026	assert(p->secondary);
1027	return(p->secondary->sz ? p->secondary->buf : NULL);
1028}
1029