read.c revision 1.17
1/*	Id: read.c,v 1.149 2016/07/10 13:34:30 schwarze Exp  */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2016 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19#include "config.h"
20
21#include <sys/types.h>
22#if HAVE_MMAP
23#include <sys/mman.h>
24#include <sys/stat.h>
25#endif
26
27#include <assert.h>
28#include <ctype.h>
29#if HAVE_ERR
30#include <err.h>
31#endif
32#include <errno.h>
33#include <fcntl.h>
34#include <stdarg.h>
35#include <stdint.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40#include <zlib.h>
41
42#include "mandoc_aux.h"
43#include "mandoc.h"
44#include "roff.h"
45#include "mdoc.h"
46#include "man.h"
47#include "libmandoc.h"
48#include "roff_int.h"
49
50#define	REPARSE_LIMIT	1000
51
52struct	mparse {
53	struct roff_man	 *man; /* man parser */
54	struct roff	 *roff; /* roff parser (!NULL) */
55	char		 *sodest; /* filename pointed to by .so */
56	const char	 *file; /* filename of current input file */
57	struct buf	 *primary; /* buffer currently being parsed */
58	struct buf	 *secondary; /* preprocessed copy of input */
59	const char	 *defos; /* default operating system */
60	mandocmsg	  mmsg; /* warning/error message handler */
61	enum mandoclevel  file_status; /* status of current parse */
62	enum mandoclevel  wlevel; /* ignore messages below this */
63	int		  options; /* parser options */
64	int		  gzip; /* current input file is gzipped */
65	int		  filenc; /* encoding of the current file */
66	int		  reparse_count; /* finite interp. stack */
67	int		  line; /* line number in the file */
68};
69
70static	void	  choose_parser(struct mparse *);
71static	void	  resize_buf(struct buf *, size_t);
72static	void	  mparse_buf_r(struct mparse *, const struct buf, size_t, int);
73static	int	  read_whole_file(struct mparse *, const char *, int,
74				struct buf *, int *);
75static	void	  mparse_end(struct mparse *);
76static	void	  mparse_parse_buffer(struct mparse *, struct buf,
77			const char *);
78
79static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
80	MANDOCERR_OK,
81	MANDOCERR_WARNING,
82	MANDOCERR_WARNING,
83	MANDOCERR_ERROR,
84	MANDOCERR_UNSUPP,
85	MANDOCERR_MAX,
86	MANDOCERR_MAX
87};
88
89static	const char * const	mandocerrs[MANDOCERR_MAX] = {
90	"ok",
91
92	"generic warning",
93
94	/* related to the prologue */
95	"missing manual title, using UNTITLED",
96	"missing manual title, using \"\"",
97	"lower case character in document title",
98	"missing manual section, using \"\"",
99	"unknown manual section",
100	"missing date, using today's date",
101	"cannot parse date, using it verbatim",
102	"missing Os macro, using \"\"",
103	"duplicate prologue macro",
104	"late prologue macro",
105	"skipping late title macro",
106	"prologue macros out of order",
107
108	/* related to document structure */
109	".so is fragile, better use ln(1)",
110	"no document body",
111	"content before first section header",
112	"first section is not \"NAME\"",
113	"NAME section without name",
114	"NAME section without description",
115	"description not at the end of NAME",
116	"bad NAME section content",
117	"missing description line, using \"\"",
118	"sections out of conventional order",
119	"duplicate section title",
120	"unexpected section",
121	"unusual Xr order",
122	"unusual Xr punctuation",
123	"AUTHORS section without An macro",
124
125	/* related to macros and nesting */
126	"obsolete macro",
127	"macro neither callable nor escaped",
128	"skipping paragraph macro",
129	"moving paragraph macro out of list",
130	"skipping no-space macro",
131	"blocks badly nested",
132	"nested displays are not portable",
133	"moving content out of list",
134	"fill mode already enabled, skipping",
135	"fill mode already disabled, skipping",
136	"line scope broken",
137
138	/* related to missing macro arguments */
139	"skipping empty request",
140	"conditional request controls empty scope",
141	"skipping empty macro",
142	"empty block",
143	"empty argument, using 0n",
144	"missing display type, using -ragged",
145	"list type is not the first argument",
146	"missing -width in -tag list, using 8n",
147	"missing utility name, using \"\"",
148	"missing function name, using \"\"",
149	"empty head in list item",
150	"empty list item",
151	"missing font type, using \\fR",
152	"unknown font type, using \\fR",
153	"nothing follows prefix",
154	"empty reference block",
155	"missing -std argument, adding it",
156	"missing option string, using \"\"",
157	"missing resource identifier, using \"\"",
158	"missing eqn box, using \"\"",
159
160	/* related to bad macro arguments */
161	"unterminated quoted argument",
162	"duplicate argument",
163	"skipping duplicate argument",
164	"skipping duplicate display type",
165	"skipping duplicate list type",
166	"skipping -width argument",
167	"wrong number of cells",
168	"unknown AT&T UNIX version",
169	"comma in function argument",
170	"parenthesis in function name",
171	"invalid content in Rs block",
172	"invalid Boolean argument",
173	"unknown font, skipping request",
174	"odd number of characters in request",
175
176	/* related to plain text */
177	"blank line in fill mode, using .sp",
178	"tab in filled text",
179	"whitespace at end of input line",
180	"bad comment style",
181	"invalid escape sequence",
182	"undefined string, using \"\"",
183
184	/* related to tables */
185	"tbl line starts with span",
186	"tbl column starts with span",
187	"skipping vertical bar in tbl layout",
188
189	"generic error",
190
191	/* related to tables */
192	"non-alphabetic character in tbl options",
193	"skipping unknown tbl option",
194	"missing tbl option argument",
195	"wrong tbl option argument size",
196	"empty tbl layout",
197	"invalid character in tbl layout",
198	"unmatched parenthesis in tbl layout",
199	"tbl without any data cells",
200	"ignoring data in spanned tbl cell",
201	"ignoring extra tbl data cells",
202	"data block open at end of tbl",
203
204	/* related to document structure and macros */
205	NULL,
206	"input stack limit exceeded, infinite loop?",
207	"skipping bad character",
208	"skipping unknown macro",
209	"skipping insecure request",
210	"skipping item outside list",
211	"skipping column outside column list",
212	"skipping end of block that is not open",
213	"fewer RS blocks open, skipping",
214	"inserting missing end of block",
215	"appending missing end of block",
216
217	/* related to request and macro arguments */
218	"escaped character not allowed in a name",
219	"NOT IMPLEMENTED: Bd -file",
220	"skipping display without arguments",
221	"missing list type, using -item",
222	"missing manual name, using \"\"",
223	"uname(3) system call failed, using UNKNOWN",
224	"unknown standard specifier",
225	"skipping request without numeric argument",
226	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
227	".so request failed",
228	"skipping all arguments",
229	"skipping excess arguments",
230	"divide by zero",
231
232	"unsupported feature",
233	"input too large",
234	"unsupported control character",
235	"unsupported roff request",
236	"eqn delim option in tbl",
237	"unsupported tbl layout modifier",
238	"ignoring macro in table",
239};
240
241static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
242	"SUCCESS",
243	"RESERVED",
244	"WARNING",
245	"ERROR",
246	"UNSUPP",
247	"BADARG",
248	"SYSERR"
249};
250
251
252static void
253resize_buf(struct buf *buf, size_t initial)
254{
255
256	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
257	buf->buf = mandoc_realloc(buf->buf, buf->sz);
258}
259
260static void
261choose_parser(struct mparse *curp)
262{
263	char		*cp, *ep;
264	int		 format;
265
266	/*
267	 * If neither command line arguments -mdoc or -man select
268	 * a parser nor the roff parser found a .Dd or .TH macro
269	 * yet, look ahead in the main input buffer.
270	 */
271
272	if ((format = roff_getformat(curp->roff)) == 0) {
273		cp = curp->primary->buf;
274		ep = cp + curp->primary->sz;
275		while (cp < ep) {
276			if (*cp == '.' || *cp == '\'') {
277				cp++;
278				if (cp[0] == 'D' && cp[1] == 'd') {
279					format = MPARSE_MDOC;
280					break;
281				}
282				if (cp[0] == 'T' && cp[1] == 'H') {
283					format = MPARSE_MAN;
284					break;
285				}
286			}
287			cp = memchr(cp, '\n', ep - cp);
288			if (cp == NULL)
289				break;
290			cp++;
291		}
292	}
293
294	if (curp->man == NULL) {
295		curp->man = roff_man_alloc(curp->roff, curp, curp->defos,
296		    curp->options & MPARSE_QUICK ? 1 : 0);
297		curp->man->macroset = MACROSET_MAN;
298		curp->man->first->tok = TOKEN_NONE;
299	}
300
301	if (format == MPARSE_MDOC) {
302		mdoc_hash_init();
303		curp->man->macroset = MACROSET_MDOC;
304		curp->man->first->tok = TOKEN_NONE;
305	} else {
306		man_hash_init();
307		curp->man->macroset = MACROSET_MAN;
308		curp->man->first->tok = TOKEN_NONE;
309	}
310}
311
312/*
313 * Main parse routine for a buffer.
314 * It assumes encoding and line numbering are already set up.
315 * It can recurse directly (for invocations of user-defined
316 * macros, inline equations, and input line traps)
317 * and indirectly (for .so file inclusion).
318 */
319static void
320mparse_buf_r(struct mparse *curp, const struct buf blk, size_t i, int start)
321{
322	const struct tbl_span	*span;
323	struct buf	 ln;
324	const char	*save_file;
325	char		*cp;
326	size_t		 pos; /* byte number in the ln buffer */
327	enum rofferr	 rr;
328	int		 of;
329	int		 lnn; /* line number in the real file */
330	int		 fd;
331	unsigned char	 c;
332
333	memset(&ln, 0, sizeof(ln));
334
335	lnn = curp->line;
336	pos = 0;
337	fd = -1;
338
339	while (i < blk.sz) {
340		if (0 == pos && '\0' == blk.buf[i])
341			break;
342
343		if (start) {
344			curp->line = lnn;
345			curp->reparse_count = 0;
346
347			if (lnn < 3 &&
348			    curp->filenc & MPARSE_UTF8 &&
349			    curp->filenc & MPARSE_LATIN1)
350				curp->filenc = preconv_cue(&blk, i);
351		}
352
353		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
354
355			/*
356			 * When finding an unescaped newline character,
357			 * leave the character loop to process the line.
358			 * Skip a preceding carriage return, if any.
359			 */
360
361			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
362			    '\n' == blk.buf[i + 1])
363				++i;
364			if ('\n' == blk.buf[i]) {
365				++i;
366				++lnn;
367				break;
368			}
369
370			/*
371			 * Make sure we have space for the worst
372			 * case of 11 bytes: "\\[u10ffff]\0"
373			 */
374
375			if (pos + 11 > ln.sz)
376				resize_buf(&ln, 256);
377
378			/*
379			 * Encode 8-bit input.
380			 */
381
382			c = blk.buf[i];
383			if (c & 0x80) {
384				if ( ! (curp->filenc && preconv_encode(
385				    &blk, &i, &ln, &pos, &curp->filenc))) {
386					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
387					    curp->line, pos, "0x%x", c);
388					ln.buf[pos++] = '?';
389					i++;
390				}
391				continue;
392			}
393
394			/*
395			 * Exclude control characters.
396			 */
397
398			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
399				mandoc_vmsg(c == 0x00 || c == 0x04 ||
400				    c > 0x0a ? MANDOCERR_CHAR_BAD :
401				    MANDOCERR_CHAR_UNSUPP,
402				    curp, curp->line, pos, "0x%x", c);
403				i++;
404				if (c != '\r')
405					ln.buf[pos++] = '?';
406				continue;
407			}
408
409			/* Trailing backslash = a plain char. */
410
411			if (blk.buf[i] != '\\' || i + 1 == blk.sz) {
412				ln.buf[pos++] = blk.buf[i++];
413				continue;
414			}
415
416			/*
417			 * Found escape and at least one other character.
418			 * When it's a newline character, skip it.
419			 * When there is a carriage return in between,
420			 * skip that one as well.
421			 */
422
423			if ('\r' == blk.buf[i + 1] && i + 2 < blk.sz &&
424			    '\n' == blk.buf[i + 2])
425				++i;
426			if ('\n' == blk.buf[i + 1]) {
427				i += 2;
428				++lnn;
429				continue;
430			}
431
432			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
433				i += 2;
434				/* Comment, skip to end of line */
435				for (; i < blk.sz; ++i) {
436					if ('\n' == blk.buf[i]) {
437						++i;
438						++lnn;
439						break;
440					}
441				}
442
443				/* Backout trailing whitespaces */
444				for (; pos > 0; --pos) {
445					if (ln.buf[pos - 1] != ' ')
446						break;
447					if (pos > 2 && ln.buf[pos - 2] == '\\')
448						break;
449				}
450				break;
451			}
452
453			/* Catch escaped bogus characters. */
454
455			c = (unsigned char) blk.buf[i+1];
456
457			if ( ! (isascii(c) &&
458			    (isgraph(c) || isblank(c)))) {
459				mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
460				    curp->line, pos, "0x%x", c);
461				i += 2;
462				ln.buf[pos++] = '?';
463				continue;
464			}
465
466			/* Some other escape sequence, copy & cont. */
467
468			ln.buf[pos++] = blk.buf[i++];
469			ln.buf[pos++] = blk.buf[i++];
470		}
471
472		if (pos >= ln.sz)
473			resize_buf(&ln, 256);
474
475		ln.buf[pos] = '\0';
476
477		/*
478		 * A significant amount of complexity is contained by
479		 * the roff preprocessor.  It's line-oriented but can be
480		 * expressed on one line, so we need at times to
481		 * readjust our starting point and re-run it.  The roff
482		 * preprocessor can also readjust the buffers with new
483		 * data, so we pass them in wholesale.
484		 */
485
486		of = 0;
487
488		/*
489		 * Maintain a lookaside buffer of all parsed lines.  We
490		 * only do this if mparse_keep() has been invoked (the
491		 * buffer may be accessed with mparse_getkeep()).
492		 */
493
494		if (curp->secondary) {
495			curp->secondary->buf = mandoc_realloc(
496			    curp->secondary->buf,
497			    curp->secondary->sz + pos + 2);
498			memcpy(curp->secondary->buf +
499			    curp->secondary->sz,
500			    ln.buf, pos);
501			curp->secondary->sz += pos;
502			curp->secondary->buf
503				[curp->secondary->sz] = '\n';
504			curp->secondary->sz++;
505			curp->secondary->buf
506				[curp->secondary->sz] = '\0';
507		}
508rerun:
509		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
510
511		switch (rr) {
512		case ROFF_REPARSE:
513			if (REPARSE_LIMIT >= ++curp->reparse_count)
514				mparse_buf_r(curp, ln, of, 0);
515			else
516				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
517				    curp->line, pos, NULL);
518			pos = 0;
519			continue;
520		case ROFF_APPEND:
521			pos = strlen(ln.buf);
522			continue;
523		case ROFF_RERUN:
524			goto rerun;
525		case ROFF_IGN:
526			pos = 0;
527			continue;
528		case ROFF_SO:
529			if ( ! (curp->options & MPARSE_SO) &&
530			    (i >= blk.sz || blk.buf[i] == '\0')) {
531				curp->sodest = mandoc_strdup(ln.buf + of);
532				goto out;
533			}
534			/*
535			 * We remove `so' clauses from our lookaside
536			 * buffer because we're going to descend into
537			 * the file recursively.
538			 */
539			if (curp->secondary)
540				curp->secondary->sz -= pos + 1;
541			save_file = curp->file;
542			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
543				mparse_readfd(curp, fd, ln.buf + of);
544				close(fd);
545				curp->file = save_file;
546			} else {
547				curp->file = save_file;
548				mandoc_vmsg(MANDOCERR_SO_FAIL,
549				    curp, curp->line, pos,
550				    ".so %s", ln.buf + of);
551				ln.sz = mandoc_asprintf(&cp,
552				    ".sp\nSee the file %s.\n.sp",
553				    ln.buf + of);
554				free(ln.buf);
555				ln.buf = cp;
556				of = 0;
557				mparse_buf_r(curp, ln, of, 0);
558			}
559			pos = 0;
560			continue;
561		default:
562			break;
563		}
564
565		/*
566		 * If input parsers have not been allocated, do so now.
567		 * We keep these instanced between parsers, but set them
568		 * locally per parse routine since we can use different
569		 * parsers with each one.
570		 */
571
572		if (curp->man == NULL ||
573		    curp->man->macroset == MACROSET_NONE)
574			choose_parser(curp);
575
576		/*
577		 * Lastly, push down into the parsers themselves.
578		 * If libroff returns ROFF_TBL, then add it to the
579		 * currently open parse.  Since we only get here if
580		 * there does exist data (see tbl_data.c), we're
581		 * guaranteed that something's been allocated.
582		 * Do the same for ROFF_EQN.
583		 */
584
585		if (rr == ROFF_TBL)
586			while ((span = roff_span(curp->roff)) != NULL)
587				roff_addtbl(curp->man, span);
588		else if (rr == ROFF_EQN)
589			roff_addeqn(curp->man, roff_eqn(curp->roff));
590		else if ((curp->man->macroset == MACROSET_MDOC ?
591		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
592		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
593				break;
594
595		/* Temporary buffers typically are not full. */
596
597		if (0 == start && '\0' == blk.buf[i])
598			break;
599
600		/* Start the next input line. */
601
602		pos = 0;
603	}
604
605out:
606	free(ln.buf);
607}
608
609static int
610read_whole_file(struct mparse *curp, const char *file, int fd,
611		struct buf *fb, int *with_mmap)
612{
613	gzFile		 gz;
614	size_t		 off;
615	ssize_t		 ssz;
616
617#if HAVE_MMAP
618	struct stat	 st;
619
620	if (fstat(fd, &st) == -1)
621		err((int)MANDOCLEVEL_SYSERR, "%s", file);
622
623	/*
624	 * If we're a regular file, try just reading in the whole entry
625	 * via mmap().  This is faster than reading it into blocks, and
626	 * since each file is only a few bytes to begin with, I'm not
627	 * concerned that this is going to tank any machines.
628	 */
629
630	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
631		if (st.st_size > 0x7fffffff) {
632			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
633			return 0;
634		}
635		*with_mmap = 1;
636		fb->sz = (size_t)st.st_size;
637		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
638		if (fb->buf != MAP_FAILED)
639			return 1;
640	}
641#endif
642
643	if (curp->gzip) {
644		if ((gz = gzdopen(fd, "rb")) == NULL)
645			err((int)MANDOCLEVEL_SYSERR, "%s", file);
646	} else
647		gz = NULL;
648
649	/*
650	 * If this isn't a regular file (like, say, stdin), then we must
651	 * go the old way and just read things in bit by bit.
652	 */
653
654	*with_mmap = 0;
655	off = 0;
656	fb->sz = 0;
657	fb->buf = NULL;
658	for (;;) {
659		if (off == fb->sz) {
660			if (fb->sz == (1U << 31)) {
661				mandoc_msg(MANDOCERR_TOOLARGE, curp,
662				    0, 0, NULL);
663				break;
664			}
665			resize_buf(fb, 65536);
666		}
667		ssz = curp->gzip ?
668		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
669		    read(fd, fb->buf + (int)off, fb->sz - off);
670		if (ssz == 0) {
671			fb->sz = off;
672			return 1;
673		}
674		if (ssz == -1)
675			err((int)MANDOCLEVEL_SYSERR, "%s", file);
676		off += (size_t)ssz;
677	}
678
679	free(fb->buf);
680	fb->buf = NULL;
681	return 0;
682}
683
684static void
685mparse_end(struct mparse *curp)
686{
687
688	if (curp->man == NULL && curp->sodest == NULL)
689		curp->man = roff_man_alloc(curp->roff, curp, curp->defos,
690		    curp->options & MPARSE_QUICK ? 1 : 0);
691	if (curp->man != NULL) {
692		if (curp->man->macroset == MACROSET_NONE)
693			curp->man->macroset = MACROSET_MAN;
694		if (curp->man->macroset == MACROSET_MDOC)
695			mdoc_endparse(curp->man);
696		else
697			man_endparse(curp->man);
698	}
699	roff_endparse(curp->roff);
700}
701
702static void
703mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
704{
705	struct buf	*svprimary;
706	const char	*svfile;
707	size_t		 offset;
708	static int	 recursion_depth;
709
710	if (64 < recursion_depth) {
711		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
712		return;
713	}
714
715	/* Line number is per-file. */
716	svfile = curp->file;
717	curp->file = file;
718	svprimary = curp->primary;
719	curp->primary = &blk;
720	curp->line = 1;
721	recursion_depth++;
722
723	/* Skip an UTF-8 byte order mark. */
724	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
725	    (unsigned char)blk.buf[0] == 0xef &&
726	    (unsigned char)blk.buf[1] == 0xbb &&
727	    (unsigned char)blk.buf[2] == 0xbf) {
728		offset = 3;
729		curp->filenc &= ~MPARSE_LATIN1;
730	} else
731		offset = 0;
732
733	mparse_buf_r(curp, blk, offset, 1);
734
735	if (--recursion_depth == 0)
736		mparse_end(curp);
737
738	curp->primary = svprimary;
739	curp->file = svfile;
740}
741
742enum mandoclevel
743mparse_readmem(struct mparse *curp, const void *buf, size_t len,
744		const char *file)
745{
746	struct buf blk;
747
748	blk.buf = __UNCONST(buf);
749	blk.sz = len;
750
751	mparse_parse_buffer(curp, blk, file);
752	return curp->file_status;
753}
754
755/*
756 * Read the whole file into memory and call the parsers.
757 * Called recursively when an .so request is encountered.
758 */
759enum mandoclevel
760mparse_readfd(struct mparse *curp, int fd, const char *file)
761{
762	struct buf	 blk;
763	int		 with_mmap;
764	int		 save_filenc;
765
766	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
767		save_filenc = curp->filenc;
768		curp->filenc = curp->options &
769		    (MPARSE_UTF8 | MPARSE_LATIN1);
770		mparse_parse_buffer(curp, blk, file);
771		curp->filenc = save_filenc;
772#if HAVE_MMAP
773		if (with_mmap)
774			munmap(blk.buf, blk.sz);
775		else
776#endif
777			free(blk.buf);
778	}
779	return curp->file_status;
780}
781
782int
783mparse_open(struct mparse *curp, const char *file)
784{
785	char		 *cp;
786	int		  fd;
787
788	curp->file = file;
789	cp = strrchr(file, '.');
790	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
791
792	/* First try to use the filename as it is. */
793
794	if ((fd = open(file, O_RDONLY)) != -1)
795		return fd;
796
797	/*
798	 * If that doesn't work and the filename doesn't
799	 * already  end in .gz, try appending .gz.
800	 */
801
802	if ( ! curp->gzip) {
803		mandoc_asprintf(&cp, "%s.gz", file);
804		fd = open(cp, O_RDONLY);
805		free(cp);
806		if (fd != -1) {
807			curp->gzip = 1;
808			return fd;
809		}
810	}
811
812	/* Neither worked, give up. */
813
814	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
815	return -1;
816}
817
818struct mparse *
819mparse_alloc(int options, enum mandoclevel wlevel, mandocmsg mmsg,
820    const char *defos)
821{
822	struct mparse	*curp;
823
824	curp = mandoc_calloc(1, sizeof(struct mparse));
825
826	curp->options = options;
827	curp->wlevel = wlevel;
828	curp->mmsg = mmsg;
829	curp->defos = defos;
830
831	curp->roff = roff_alloc(curp, options);
832	curp->man = roff_man_alloc( curp->roff, curp, curp->defos,
833		curp->options & MPARSE_QUICK ? 1 : 0);
834	if (curp->options & MPARSE_MDOC) {
835		mdoc_hash_init();
836		curp->man->macroset = MACROSET_MDOC;
837	} else if (curp->options & MPARSE_MAN) {
838		man_hash_init();
839		curp->man->macroset = MACROSET_MAN;
840	}
841	curp->man->first->tok = TOKEN_NONE;
842	return curp;
843}
844
845void
846mparse_reset(struct mparse *curp)
847{
848
849	roff_reset(curp->roff);
850
851	if (curp->man != NULL)
852		roff_man_reset(curp->man);
853	if (curp->secondary)
854		curp->secondary->sz = 0;
855
856	curp->file_status = MANDOCLEVEL_OK;
857
858	free(curp->sodest);
859	curp->sodest = NULL;
860}
861
862void
863mparse_free(struct mparse *curp)
864{
865
866	roff_man_free(curp->man);
867	if (curp->roff)
868		roff_free(curp->roff);
869	if (curp->secondary)
870		free(curp->secondary->buf);
871
872	free(curp->secondary);
873	free(curp->sodest);
874	free(curp);
875}
876
877void
878mparse_result(struct mparse *curp, struct roff_man **man,
879	char **sodest)
880{
881
882	if (sodest && NULL != (*sodest = curp->sodest)) {
883		*man = NULL;
884		return;
885	}
886	if (man)
887		*man = curp->man;
888}
889
890void
891mandoc_vmsg(enum mandocerr t, struct mparse *m,
892		int ln, int pos, const char *fmt, ...)
893{
894	char		 buf[256];
895	va_list		 ap;
896
897	va_start(ap, fmt);
898	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
899	va_end(ap);
900
901	mandoc_msg(t, m, ln, pos, buf);
902}
903
904void
905mandoc_msg(enum mandocerr er, struct mparse *m,
906		int ln, int col, const char *msg)
907{
908	enum mandoclevel level;
909
910	level = MANDOCLEVEL_UNSUPP;
911	while (er < mandoclimits[level])
912		level--;
913
914	if (level < m->wlevel && er != MANDOCERR_FILE)
915		return;
916
917	if (m->mmsg)
918		(*m->mmsg)(er, level, m->file, ln, col, msg);
919
920	if (m->file_status < level)
921		m->file_status = level;
922}
923
924const char *
925mparse_strerror(enum mandocerr er)
926{
927
928	return mandocerrs[er];
929}
930
931const char *
932mparse_strlevel(enum mandoclevel lvl)
933{
934	return mandoclevels[lvl];
935}
936
937void
938mparse_keep(struct mparse *p)
939{
940
941	assert(NULL == p->secondary);
942	p->secondary = mandoc_calloc(1, sizeof(struct buf));
943}
944
945const char *
946mparse_getkeep(const struct mparse *p)
947{
948
949	assert(p->secondary);
950	return p->secondary->sz ? p->secondary->buf : NULL;
951}
952