1/*	$Id: read.c,v 1.192 2017/07/20 14:36:36 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19#include "config.h"
20
21#include <sys/types.h>
22#include <sys/mman.h>
23#include <sys/stat.h>
24
25#include <assert.h>
26#include <ctype.h>
27#include <errno.h>
28#include <fcntl.h>
29#include <stdarg.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34#include <zlib.h>
35
36#include "mandoc_aux.h"
37#include "mandoc.h"
38#include "roff.h"
39#include "mdoc.h"
40#include "man.h"
41#include "libmandoc.h"
42
43#define	REPARSE_LIMIT	1000
44
45struct	mparse {
46	struct roff	 *roff; /* roff parser (!NULL) */
47	struct roff_man	 *man; /* man parser */
48	char		 *sodest; /* filename pointed to by .so */
49	const char	 *file; /* filename of current input file */
50	struct buf	 *primary; /* buffer currently being parsed */
51	struct buf	 *secondary; /* preprocessed copy of input */
52	const char	 *os_s; /* default operating system */
53	mandocmsg	  mmsg; /* warning/error message handler */
54	enum mandoclevel  file_status; /* status of current parse */
55	enum mandocerr	  mmin; /* ignore messages below this */
56	int		  options; /* parser options */
57	int		  gzip; /* current input file is gzipped */
58	int		  filenc; /* encoding of the current file */
59	int		  reparse_count; /* finite interp. stack */
60	int		  line; /* line number in the file */
61};
62
63static	void	  choose_parser(struct mparse *);
64static	void	  resize_buf(struct buf *, size_t);
65static	int	  mparse_buf_r(struct mparse *, struct buf, size_t, int);
66static	int	  read_whole_file(struct mparse *, const char *, int,
67				struct buf *, int *);
68static	void	  mparse_end(struct mparse *);
69static	void	  mparse_parse_buffer(struct mparse *, struct buf,
70			const char *);
71
72static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
73	MANDOCERR_OK,
74	MANDOCERR_OK,
75	MANDOCERR_WARNING,
76	MANDOCERR_ERROR,
77	MANDOCERR_UNSUPP,
78	MANDOCERR_MAX,
79	MANDOCERR_MAX
80};
81
82static	const char * const	mandocerrs[MANDOCERR_MAX] = {
83	"ok",
84
85	"base system convention",
86
87	"Mdocdate found",
88	"Mdocdate missing",
89	"unknown architecture",
90	"operating system explicitly specified",
91	"RCS id missing",
92	"referenced manual not found",
93
94	"generic style suggestion",
95
96	"legacy man(7) date format",
97	"lower case character in document title",
98	"duplicate RCS id",
99	"typo in section name",
100	"unterminated quoted argument",
101	"useless macro",
102	"consider using OS macro",
103	"errnos out of order",
104	"duplicate errno",
105	"trailing delimiter",
106	"no blank before trailing delimiter",
107	"fill mode already enabled, skipping",
108	"fill mode already disabled, skipping",
109	"function name without markup",
110	"whitespace at end of input line",
111	"bad comment style",
112
113	"generic warning",
114
115	/* related to the prologue */
116	"missing manual title, using UNTITLED",
117	"missing manual title, using \"\"",
118	"missing manual section, using \"\"",
119	"unknown manual section",
120	"missing date, using today's date",
121	"cannot parse date, using it verbatim",
122	"date in the future, using it anyway",
123	"missing Os macro, using \"\"",
124	"late prologue macro",
125	"prologue macros out of order",
126
127	/* related to document structure */
128	".so is fragile, better use ln(1)",
129	"no document body",
130	"content before first section header",
131	"first section is not \"NAME\"",
132	"NAME section without Nm before Nd",
133	"NAME section without description",
134	"description not at the end of NAME",
135	"bad NAME section content",
136	"missing comma before name",
137	"missing description line, using \"\"",
138	"description line outside NAME section",
139	"sections out of conventional order",
140	"duplicate section title",
141	"unexpected section",
142	"cross reference to self",
143	"unusual Xr order",
144	"unusual Xr punctuation",
145	"AUTHORS section without An macro",
146
147	/* related to macros and nesting */
148	"obsolete macro",
149	"macro neither callable nor escaped",
150	"skipping paragraph macro",
151	"moving paragraph macro out of list",
152	"skipping no-space macro",
153	"blocks badly nested",
154	"nested displays are not portable",
155	"moving content out of list",
156	"first macro on line",
157	"line scope broken",
158	"skipping blank line in line scope",
159
160	/* related to missing macro arguments */
161	"skipping empty request",
162	"conditional request controls empty scope",
163	"skipping empty macro",
164	"empty block",
165	"empty argument, using 0n",
166	"missing display type, using -ragged",
167	"list type is not the first argument",
168	"missing -width in -tag list, using 6n",
169	"missing utility name, using \"\"",
170	"missing function name, using \"\"",
171	"empty head in list item",
172	"empty list item",
173	"missing argument, using next line",
174	"missing font type, using \\fR",
175	"unknown font type, using \\fR",
176	"nothing follows prefix",
177	"empty reference block",
178	"missing section argument",
179	"missing -std argument, adding it",
180	"missing option string, using \"\"",
181	"missing resource identifier, using \"\"",
182	"missing eqn box, using \"\"",
183
184	/* related to bad macro arguments */
185	"duplicate argument",
186	"skipping duplicate argument",
187	"skipping duplicate display type",
188	"skipping duplicate list type",
189	"skipping -width argument",
190	"wrong number of cells",
191	"unknown AT&T UNIX version",
192	"comma in function argument",
193	"parenthesis in function name",
194	"unknown library name",
195	"invalid content in Rs block",
196	"invalid Boolean argument",
197	"unknown font, skipping request",
198	"odd number of characters in request",
199
200	/* related to plain text */
201	"blank line in fill mode, using .sp",
202	"tab in filled text",
203	"new sentence, new line",
204	"invalid escape sequence",
205	"undefined string, using \"\"",
206
207	/* related to tables */
208	"tbl line starts with span",
209	"tbl column starts with span",
210	"skipping vertical bar in tbl layout",
211
212	"generic error",
213
214	/* related to tables */
215	"non-alphabetic character in tbl options",
216	"skipping unknown tbl option",
217	"missing tbl option argument",
218	"wrong tbl option argument size",
219	"empty tbl layout",
220	"invalid character in tbl layout",
221	"unmatched parenthesis in tbl layout",
222	"tbl without any data cells",
223	"ignoring data in spanned tbl cell",
224	"ignoring extra tbl data cells",
225	"data block open at end of tbl",
226
227	/* related to document structure and macros */
228	NULL,
229	"duplicate prologue macro",
230	"skipping late title macro",
231	"input stack limit exceeded, infinite loop?",
232	"skipping bad character",
233	"skipping unknown macro",
234	"skipping insecure request",
235	"skipping item outside list",
236	"skipping column outside column list",
237	"skipping end of block that is not open",
238	"fewer RS blocks open, skipping",
239	"inserting missing end of block",
240	"appending missing end of block",
241
242	/* related to request and macro arguments */
243	"escaped character not allowed in a name",
244	"NOT IMPLEMENTED: Bd -file",
245	"skipping display without arguments",
246	"missing list type, using -item",
247	"argument is not numeric, using 1",
248	"missing manual name, using \"\"",
249	"uname(3) system call failed, using UNKNOWN",
250	"unknown standard specifier",
251	"skipping request without numeric argument",
252	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
253	".so request failed",
254	"skipping all arguments",
255	"skipping excess arguments",
256	"divide by zero",
257
258	"unsupported feature",
259	"input too large",
260	"unsupported control character",
261	"unsupported roff request",
262	"eqn delim option in tbl",
263	"unsupported tbl layout modifier",
264	"ignoring macro in table",
265};
266
267static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
268	"SUCCESS",
269	"STYLE",
270	"WARNING",
271	"ERROR",
272	"UNSUPP",
273	"BADARG",
274	"SYSERR"
275};
276
277
278static void
279resize_buf(struct buf *buf, size_t initial)
280{
281
282	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
283	buf->buf = mandoc_realloc(buf->buf, buf->sz);
284}
285
286static void
287choose_parser(struct mparse *curp)
288{
289	char		*cp, *ep;
290	int		 format;
291
292	/*
293	 * If neither command line arguments -mdoc or -man select
294	 * a parser nor the roff parser found a .Dd or .TH macro
295	 * yet, look ahead in the main input buffer.
296	 */
297
298	if ((format = roff_getformat(curp->roff)) == 0) {
299		cp = curp->primary->buf;
300		ep = cp + curp->primary->sz;
301		while (cp < ep) {
302			if (*cp == '.' || *cp == '\'') {
303				cp++;
304				if (cp[0] == 'D' && cp[1] == 'd') {
305					format = MPARSE_MDOC;
306					break;
307				}
308				if (cp[0] == 'T' && cp[1] == 'H') {
309					format = MPARSE_MAN;
310					break;
311				}
312			}
313			cp = memchr(cp, '\n', ep - cp);
314			if (cp == NULL)
315				break;
316			cp++;
317		}
318	}
319
320	if (format == MPARSE_MDOC) {
321		curp->man->macroset = MACROSET_MDOC;
322		if (curp->man->mdocmac == NULL)
323			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
324	} else {
325		curp->man->macroset = MACROSET_MAN;
326		if (curp->man->manmac == NULL)
327			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
328	}
329	curp->man->first->tok = TOKEN_NONE;
330}
331
332/*
333 * Main parse routine for a buffer.
334 * It assumes encoding and line numbering are already set up.
335 * It can recurse directly (for invocations of user-defined
336 * macros, inline equations, and input line traps)
337 * and indirectly (for .so file inclusion).
338 */
339static int
340mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
341{
342	struct buf	 ln;
343	const char	*save_file;
344	char		*cp;
345	size_t		 pos; /* byte number in the ln buffer */
346	enum rofferr	 rr;
347	int		 of;
348	int		 lnn; /* line number in the real file */
349	int		 fd;
350	unsigned char	 c;
351
352	memset(&ln, 0, sizeof(ln));
353
354	lnn = curp->line;
355	pos = 0;
356
357	while (i < blk.sz) {
358		if (0 == pos && '\0' == blk.buf[i])
359			break;
360
361		if (start) {
362			curp->line = lnn;
363			curp->reparse_count = 0;
364
365			if (lnn < 3 &&
366			    curp->filenc & MPARSE_UTF8 &&
367			    curp->filenc & MPARSE_LATIN1)
368				curp->filenc = preconv_cue(&blk, i);
369		}
370
371		while (i < blk.sz && (start || blk.buf[i] != '\0')) {
372
373			/*
374			 * When finding an unescaped newline character,
375			 * leave the character loop to process the line.
376			 * Skip a preceding carriage return, if any.
377			 */
378
379			if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
380			    '\n' == blk.buf[i + 1])
381				++i;
382			if ('\n' == blk.buf[i]) {
383				++i;
384				++lnn;
385				break;
386			}
387
388			/*
389			 * Make sure we have space for the worst
390			 * case of 11 bytes: "\\[u10ffff]\0"
391			 */
392
393			if (pos + 11 > ln.sz)
394				resize_buf(&ln, 256);
395
396			/*
397			 * Encode 8-bit input.
398			 */
399
400			c = blk.buf[i];
401			if (c & 0x80) {
402				if ( ! (curp->filenc && preconv_encode(
403				    &blk, &i, &ln, &pos, &curp->filenc))) {
404					mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
405					    curp->line, pos, "0x%x", c);
406					ln.buf[pos++] = '?';
407					i++;
408				}
409				continue;
410			}
411
412			/*
413			 * Exclude control characters.
414			 */
415
416			if (c == 0x7f || (c < 0x20 && c != 0x09)) {
417				mandoc_vmsg(c == 0x00 || c == 0x04 ||
418				    c > 0x0a ? MANDOCERR_CHAR_BAD :
419				    MANDOCERR_CHAR_UNSUPP,
420				    curp, curp->line, pos, "0x%x", c);
421				i++;
422				if (c != '\r')
423					ln.buf[pos++] = '?';
424				continue;
425			}
426
427			ln.buf[pos++] = blk.buf[i++];
428		}
429
430		if (pos + 1 >= ln.sz)
431			resize_buf(&ln, 256);
432
433		if (i == blk.sz || blk.buf[i] == '\0')
434			ln.buf[pos++] = '\n';
435		ln.buf[pos] = '\0';
436
437		/*
438		 * A significant amount of complexity is contained by
439		 * the roff preprocessor.  It's line-oriented but can be
440		 * expressed on one line, so we need at times to
441		 * readjust our starting point and re-run it.  The roff
442		 * preprocessor can also readjust the buffers with new
443		 * data, so we pass them in wholesale.
444		 */
445
446		of = 0;
447
448		/*
449		 * Maintain a lookaside buffer of all parsed lines.  We
450		 * only do this if mparse_keep() has been invoked (the
451		 * buffer may be accessed with mparse_getkeep()).
452		 */
453
454		if (curp->secondary) {
455			curp->secondary->buf = mandoc_realloc(
456			    curp->secondary->buf,
457			    curp->secondary->sz + pos + 2);
458			memcpy(curp->secondary->buf +
459			    curp->secondary->sz,
460			    ln.buf, pos);
461			curp->secondary->sz += pos;
462			curp->secondary->buf
463				[curp->secondary->sz] = '\n';
464			curp->secondary->sz++;
465			curp->secondary->buf
466				[curp->secondary->sz] = '\0';
467		}
468rerun:
469		rr = roff_parseln(curp->roff, curp->line, &ln, &of);
470
471		switch (rr) {
472		case ROFF_REPARSE:
473			if (++curp->reparse_count > REPARSE_LIMIT)
474				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
475				    curp->line, pos, NULL);
476			else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
477			    start == 1) {
478				pos = 0;
479				continue;
480			}
481			free(ln.buf);
482			return 0;
483		case ROFF_APPEND:
484			pos = strlen(ln.buf);
485			continue;
486		case ROFF_RERUN:
487			goto rerun;
488		case ROFF_IGN:
489			pos = 0;
490			continue;
491		case ROFF_SO:
492			if ( ! (curp->options & MPARSE_SO) &&
493			    (i >= blk.sz || blk.buf[i] == '\0')) {
494				curp->sodest = mandoc_strdup(ln.buf + of);
495				free(ln.buf);
496				return 1;
497			}
498			/*
499			 * We remove `so' clauses from our lookaside
500			 * buffer because we're going to descend into
501			 * the file recursively.
502			 */
503			if (curp->secondary)
504				curp->secondary->sz -= pos + 1;
505			save_file = curp->file;
506			if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
507				mparse_readfd(curp, fd, ln.buf + of);
508				close(fd);
509				curp->file = save_file;
510			} else {
511				curp->file = save_file;
512				mandoc_vmsg(MANDOCERR_SO_FAIL,
513				    curp, curp->line, pos,
514				    ".so %s", ln.buf + of);
515				ln.sz = mandoc_asprintf(&cp,
516				    ".sp\nSee the file %s.\n.sp",
517				    ln.buf + of);
518				free(ln.buf);
519				ln.buf = cp;
520				of = 0;
521				mparse_buf_r(curp, ln, of, 0);
522			}
523			pos = 0;
524			continue;
525		default:
526			break;
527		}
528
529		if (curp->man->macroset == MACROSET_NONE)
530			choose_parser(curp);
531
532		if ((curp->man->macroset == MACROSET_MDOC ?
533		    mdoc_parseln(curp->man, curp->line, ln.buf, of) :
534		    man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
535				break;
536
537		/* Temporary buffers typically are not full. */
538
539		if (0 == start && '\0' == blk.buf[i])
540			break;
541
542		/* Start the next input line. */
543
544		pos = 0;
545	}
546
547	free(ln.buf);
548	return 1;
549}
550
551static int
552read_whole_file(struct mparse *curp, const char *file, int fd,
553		struct buf *fb, int *with_mmap)
554{
555	struct stat	 st;
556	gzFile		 gz;
557	size_t		 off;
558	ssize_t		 ssz;
559
560	if (fstat(fd, &st) == -1) {
561		mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
562		    "fstat: %s", strerror(errno));
563		return 0;
564	}
565
566	/*
567	 * If we're a regular file, try just reading in the whole entry
568	 * via mmap().  This is faster than reading it into blocks, and
569	 * since each file is only a few bytes to begin with, I'm not
570	 * concerned that this is going to tank any machines.
571	 */
572
573	if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
574		if (st.st_size > 0x7fffffff) {
575			mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
576			return 0;
577		}
578		*with_mmap = 1;
579		fb->sz = (size_t)st.st_size;
580		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
581		if (fb->buf != MAP_FAILED)
582			return 1;
583	}
584
585	if (curp->gzip) {
586		if ((gz = gzdopen(fd, "rb")) == NULL) {
587			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
588			    "gzdopen: %s", strerror(errno));
589			return 0;
590		}
591	} else
592		gz = NULL;
593
594	/*
595	 * If this isn't a regular file (like, say, stdin), then we must
596	 * go the old way and just read things in bit by bit.
597	 */
598
599	*with_mmap = 0;
600	off = 0;
601	fb->sz = 0;
602	fb->buf = NULL;
603	for (;;) {
604		if (off == fb->sz) {
605			if (fb->sz == (1U << 31)) {
606				mandoc_msg(MANDOCERR_TOOLARGE, curp,
607				    0, 0, NULL);
608				break;
609			}
610			resize_buf(fb, 65536);
611		}
612		ssz = curp->gzip ?
613		    gzread(gz, fb->buf + (int)off, fb->sz - off) :
614		    read(fd, fb->buf + (int)off, fb->sz - off);
615		if (ssz == 0) {
616			fb->sz = off;
617			return 1;
618		}
619		if (ssz == -1) {
620			mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
621			    "read: %s", strerror(errno));
622			break;
623		}
624		off += (size_t)ssz;
625	}
626
627	free(fb->buf);
628	fb->buf = NULL;
629	return 0;
630}
631
632static void
633mparse_end(struct mparse *curp)
634{
635	if (curp->man->macroset == MACROSET_NONE)
636		curp->man->macroset = MACROSET_MAN;
637	if (curp->man->macroset == MACROSET_MDOC)
638		mdoc_endparse(curp->man);
639	else
640		man_endparse(curp->man);
641	roff_endparse(curp->roff);
642}
643
644static void
645mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
646{
647	struct buf	*svprimary;
648	const char	*svfile;
649	size_t		 offset;
650	static int	 recursion_depth;
651
652	if (64 < recursion_depth) {
653		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
654		return;
655	}
656
657	/* Line number is per-file. */
658	svfile = curp->file;
659	curp->file = file;
660	svprimary = curp->primary;
661	curp->primary = &blk;
662	curp->line = 1;
663	recursion_depth++;
664
665	/* Skip an UTF-8 byte order mark. */
666	if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
667	    (unsigned char)blk.buf[0] == 0xef &&
668	    (unsigned char)blk.buf[1] == 0xbb &&
669	    (unsigned char)blk.buf[2] == 0xbf) {
670		offset = 3;
671		curp->filenc &= ~MPARSE_LATIN1;
672	} else
673		offset = 0;
674
675	mparse_buf_r(curp, blk, offset, 1);
676
677	if (--recursion_depth == 0)
678		mparse_end(curp);
679
680	curp->primary = svprimary;
681	curp->file = svfile;
682}
683
684enum mandoclevel
685mparse_readmem(struct mparse *curp, void *buf, size_t len,
686		const char *file)
687{
688	struct buf blk;
689
690	blk.buf = buf;
691	blk.sz = len;
692
693	mparse_parse_buffer(curp, blk, file);
694	return curp->file_status;
695}
696
697/*
698 * Read the whole file into memory and call the parsers.
699 * Called recursively when an .so request is encountered.
700 */
701enum mandoclevel
702mparse_readfd(struct mparse *curp, int fd, const char *file)
703{
704	struct buf	 blk;
705	int		 with_mmap;
706	int		 save_filenc;
707
708	if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
709		save_filenc = curp->filenc;
710		curp->filenc = curp->options &
711		    (MPARSE_UTF8 | MPARSE_LATIN1);
712		mparse_parse_buffer(curp, blk, file);
713		curp->filenc = save_filenc;
714		if (with_mmap)
715			munmap(blk.buf, blk.sz);
716		else
717			free(blk.buf);
718	}
719	return curp->file_status;
720}
721
722int
723mparse_open(struct mparse *curp, const char *file)
724{
725	char		 *cp;
726	int		  fd;
727
728	curp->file = file;
729	cp = strrchr(file, '.');
730	curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
731
732	/* First try to use the filename as it is. */
733
734	if ((fd = open(file, O_RDONLY)) != -1)
735		return fd;
736
737	/*
738	 * If that doesn't work and the filename doesn't
739	 * already  end in .gz, try appending .gz.
740	 */
741
742	if ( ! curp->gzip) {
743		mandoc_asprintf(&cp, "%s.gz", file);
744		fd = open(cp, O_RDONLY);
745		free(cp);
746		if (fd != -1) {
747			curp->gzip = 1;
748			return fd;
749		}
750	}
751
752	/* Neither worked, give up. */
753
754	mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
755	return -1;
756}
757
758struct mparse *
759mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
760    enum mandoc_os os_e, const char *os_s)
761{
762	struct mparse	*curp;
763
764	curp = mandoc_calloc(1, sizeof(struct mparse));
765
766	curp->options = options;
767	curp->mmin = mmin;
768	curp->mmsg = mmsg;
769	curp->os_s = os_s;
770
771	curp->roff = roff_alloc(curp, options);
772	curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
773		curp->options & MPARSE_QUICK ? 1 : 0);
774	if (curp->options & MPARSE_MDOC) {
775		curp->man->macroset = MACROSET_MDOC;
776		if (curp->man->mdocmac == NULL)
777			curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
778	} else if (curp->options & MPARSE_MAN) {
779		curp->man->macroset = MACROSET_MAN;
780		if (curp->man->manmac == NULL)
781			curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
782	}
783	curp->man->first->tok = TOKEN_NONE;
784	curp->man->meta.os_e = os_e;
785	return curp;
786}
787
788void
789mparse_reset(struct mparse *curp)
790{
791	roff_reset(curp->roff);
792	roff_man_reset(curp->man);
793
794	free(curp->sodest);
795	curp->sodest = NULL;
796
797	if (curp->secondary)
798		curp->secondary->sz = 0;
799
800	curp->file_status = MANDOCLEVEL_OK;
801	curp->gzip = 0;
802}
803
804void
805mparse_free(struct mparse *curp)
806{
807
808	roffhash_free(curp->man->mdocmac);
809	roffhash_free(curp->man->manmac);
810	roff_man_free(curp->man);
811	roff_free(curp->roff);
812	if (curp->secondary)
813		free(curp->secondary->buf);
814
815	free(curp->secondary);
816	free(curp->sodest);
817	free(curp);
818}
819
820void
821mparse_result(struct mparse *curp, struct roff_man **man,
822	char **sodest)
823{
824
825	if (sodest && NULL != (*sodest = curp->sodest)) {
826		*man = NULL;
827		return;
828	}
829	if (man)
830		*man = curp->man;
831}
832
833void
834mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
835{
836	if (curp->file_status > *rc)
837		*rc = curp->file_status;
838}
839
840void
841mandoc_vmsg(enum mandocerr t, struct mparse *m,
842		int ln, int pos, const char *fmt, ...)
843{
844	char		 buf[256];
845	va_list		 ap;
846
847	va_start(ap, fmt);
848	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
849	va_end(ap);
850
851	mandoc_msg(t, m, ln, pos, buf);
852}
853
854void
855mandoc_msg(enum mandocerr er, struct mparse *m,
856		int ln, int col, const char *msg)
857{
858	enum mandoclevel level;
859
860	if (er < m->mmin && er != MANDOCERR_FILE)
861		return;
862
863	level = MANDOCLEVEL_UNSUPP;
864	while (er < mandoclimits[level])
865		level--;
866
867	if (m->mmsg)
868		(*m->mmsg)(er, level, m->file, ln, col, msg);
869
870	if (m->file_status < level)
871		m->file_status = level;
872}
873
874const char *
875mparse_strerror(enum mandocerr er)
876{
877
878	return mandocerrs[er];
879}
880
881const char *
882mparse_strlevel(enum mandoclevel lvl)
883{
884	return mandoclevels[lvl];
885}
886
887void
888mparse_keep(struct mparse *p)
889{
890
891	assert(NULL == p->secondary);
892	p->secondary = mandoc_calloc(1, sizeof(struct buf));
893}
894
895const char *
896mparse_getkeep(const struct mparse *p)
897{
898
899	assert(p->secondary);
900	return p->secondary->sz ? p->secondary->buf : NULL;
901}
902