mdoc.c revision 1.1.1.5
1/*	$Vendor-Id: mdoc.c,v 1.118 2010/03/31 07:42:04 kristaps Exp $ */
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#ifdef HAVE_CONFIG_H
18#include "config.h"
19#endif
20
21#include <sys/types.h>
22
23#include <assert.h>
24#include <ctype.h>
25#include <stdarg.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "libmdoc.h"
31#include "libmandoc.h"
32
33const	char *const __mdoc_merrnames[MERRMAX] = {
34	"trailing whitespace", /* ETAILWS */
35	"unexpected quoted parameter", /* EQUOTPARM */
36	"unterminated quoted parameter", /* EQUOTTERM */
37	"argument parameter suggested", /* EARGVAL */
38	"macro disallowed in prologue", /* EBODYPROL */
39	"macro disallowed in body", /* EPROLBODY */
40	"text disallowed in prologue", /* ETEXTPROL */
41	"blank line disallowed", /* ENOBLANK */
42	"text parameter too long", /* ETOOLONG */
43	"invalid escape sequence", /* EESCAPE */
44	"invalid character", /* EPRINT */
45	"document has no body", /* ENODAT */
46	"document has no prologue", /* ENOPROLOGUE */
47	"expected line arguments", /* ELINE */
48	"invalid AT&T argument", /* EATT */
49	"default name not yet set", /* ENAME */
50	"missing list type", /* ELISTTYPE */
51	"missing display type", /* EDISPTYPE */
52	"too many display types", /* EMULTIDISP */
53	"too many list types", /* EMULTILIST */
54	"NAME section must be first", /* ESECNAME */
55	"badly-formed NAME section", /* ENAMESECINC */
56	"argument repeated", /* EARGREP */
57	"expected boolean parameter", /* EBOOL */
58	"inconsistent column syntax", /* ECOLMIS */
59	"nested display invalid", /* ENESTDISP */
60	"width argument missing", /* EMISSWIDTH */
61	"invalid section for this manual section", /* EWRONGMSEC */
62	"section out of conventional order", /* ESECOOO */
63	"section repeated", /* ESECREP */
64	"invalid standard argument", /* EBADSTAND */
65	"multi-line arguments discouraged", /* ENOMULTILINE */
66	"multi-line arguments suggested", /* EMULTILINE */
67	"line arguments discouraged", /* ENOLINE */
68	"prologue macro out of conventional order", /* EPROLOOO */
69	"prologue macro repeated", /* EPROLREP */
70	"invalid manual section", /* EBADMSEC */
71	"invalid section", /* EBADSEC */
72	"invalid font mode", /* EFONT */
73	"invalid date syntax", /* EBADDATE */
74	"invalid number format", /* ENUMFMT */
75	"superfluous width argument", /* ENOWIDTH */
76	"system: utsname error", /* EUTSNAME */
77	"obsolete macro", /* EOBS */
78	"end-of-line scope violation", /* EIMPBRK */
79	"empty macro ignored", /* EIGNE */
80	"unclosed explicit scope", /* EOPEN */
81	"unterminated quoted phrase", /* EQUOTPHR */
82	"closure macro without prior context", /* ENOCTX */
83	"no description found for library", /* ELIB */
84	"bad child for parent context", /* EBADCHILD */
85	"list arguments preceding type", /* ENOTYPE */
86};
87
88const	char *const __mdoc_macronames[MDOC_MAX] = {
89	"Ap",		"Dd",		"Dt",		"Os",
90	"Sh",		"Ss",		"Pp",		"D1",
91	"Dl",		"Bd",		"Ed",		"Bl",
92	"El",		"It",		"Ad",		"An",
93	"Ar",		"Cd",		"Cm",		"Dv",
94	"Er",		"Ev",		"Ex",		"Fa",
95	"Fd",		"Fl",		"Fn",		"Ft",
96	"Ic",		"In",		"Li",		"Nd",
97	"Nm",		"Op",		"Ot",		"Pa",
98	"Rv",		"St",		"Va",		"Vt",
99	/* LINTED */
100	"Xr",		"%A",		"%B",		"%D",
101	/* LINTED */
102	"%I",		"%J",		"%N",		"%O",
103	/* LINTED */
104	"%P",		"%R",		"%T",		"%V",
105	"Ac",		"Ao",		"Aq",		"At",
106	"Bc",		"Bf",		"Bo",		"Bq",
107	"Bsx",		"Bx",		"Db",		"Dc",
108	"Do",		"Dq",		"Ec",		"Ef",
109	"Em",		"Eo",		"Fx",		"Ms",
110	"No",		"Ns",		"Nx",		"Ox",
111	"Pc",		"Pf",		"Po",		"Pq",
112	"Qc",		"Ql",		"Qo",		"Qq",
113	"Re",		"Rs",		"Sc",		"So",
114	"Sq",		"Sm",		"Sx",		"Sy",
115	"Tn",		"Ux",		"Xc",		"Xo",
116	"Fo",		"Fc",		"Oo",		"Oc",
117	"Bk",		"Ek",		"Bt",		"Hf",
118	"Fr",		"Ud",		"Lb",		"Lp",
119	"Lk",		"Mt",		"Brq",		"Bro",
120	/* LINTED */
121	"Brc",		"%C",		"Es",		"En",
122	/* LINTED */
123	"Dx",		"%Q",		"br",		"sp",
124	/* LINTED */
125	"%U"
126	};
127
128const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
129	"split",		"nosplit",		"ragged",
130	"unfilled",		"literal",		"file",
131	"offset",		"bullet",		"dash",
132	"hyphen",		"item",			"enum",
133	"tag",			"diag",			"hang",
134	"ohang",		"inset",		"column",
135	"width",		"compact",		"std",
136	"filled",		"words",		"emphasis",
137	"symbolic",		"nested",		"centered"
138	};
139
140const	char * const *mdoc_macronames = __mdoc_macronames;
141const	char * const *mdoc_argnames = __mdoc_argnames;
142
143static	void		  mdoc_free1(struct mdoc *);
144static	void		  mdoc_alloc1(struct mdoc *);
145static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
146				enum mdoct, enum mdoc_type);
147static	int		  node_append(struct mdoc *,
148				struct mdoc_node *);
149static	int		  parsetext(struct mdoc *, int, char *);
150static	int		  parsemacro(struct mdoc *, int, char *);
151static	int		  macrowarn(struct mdoc *, int, const char *);
152static	int		  pstring(struct mdoc *, int, int,
153				const char *, size_t);
154
155const struct mdoc_node *
156mdoc_node(const struct mdoc *m)
157{
158
159	return(MDOC_HALT & m->flags ? NULL : m->first);
160}
161
162
163const struct mdoc_meta *
164mdoc_meta(const struct mdoc *m)
165{
166
167	return(MDOC_HALT & m->flags ? NULL : &m->meta);
168}
169
170
171/*
172 * Frees volatile resources (parse tree, meta-data, fields).
173 */
174static void
175mdoc_free1(struct mdoc *mdoc)
176{
177
178	if (mdoc->first)
179		mdoc_node_freelist(mdoc->first);
180	if (mdoc->meta.title)
181		free(mdoc->meta.title);
182	if (mdoc->meta.os)
183		free(mdoc->meta.os);
184	if (mdoc->meta.name)
185		free(mdoc->meta.name);
186	if (mdoc->meta.arch)
187		free(mdoc->meta.arch);
188	if (mdoc->meta.vol)
189		free(mdoc->meta.vol);
190}
191
192
193/*
194 * Allocate all volatile resources (parse tree, meta-data, fields).
195 */
196static void
197mdoc_alloc1(struct mdoc *mdoc)
198{
199
200	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
201	mdoc->flags = 0;
202	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
203	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
204	mdoc->first = mdoc->last;
205	mdoc->last->type = MDOC_ROOT;
206	mdoc->next = MDOC_NEXT_CHILD;
207}
208
209
210/*
211 * Free up volatile resources (see mdoc_free1()) then re-initialises the
212 * data with mdoc_alloc1().  After invocation, parse data has been reset
213 * and the parser is ready for re-invocation on a new tree; however,
214 * cross-parse non-volatile data is kept intact.
215 */
216void
217mdoc_reset(struct mdoc *mdoc)
218{
219
220	mdoc_free1(mdoc);
221	mdoc_alloc1(mdoc);
222}
223
224
225/*
226 * Completely free up all volatile and non-volatile parse resources.
227 * After invocation, the pointer is no longer usable.
228 */
229void
230mdoc_free(struct mdoc *mdoc)
231{
232
233	mdoc_free1(mdoc);
234	free(mdoc);
235}
236
237
238/*
239 * Allocate volatile and non-volatile parse resources.
240 */
241struct mdoc *
242mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
243{
244	struct mdoc	*p;
245
246	p = mandoc_calloc(1, sizeof(struct mdoc));
247
248	if (cb)
249		memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
250
251	p->data = data;
252	p->pflags = pflags;
253
254	mdoc_hash_init();
255	mdoc_alloc1(p);
256	return(p);
257}
258
259
260/*
261 * Climb back up the parse tree, validating open scopes.  Mostly calls
262 * through to macro_end() in macro.c.
263 */
264int
265mdoc_endparse(struct mdoc *m)
266{
267
268	if (MDOC_HALT & m->flags)
269		return(0);
270	else if (mdoc_macroend(m))
271		return(1);
272	m->flags |= MDOC_HALT;
273	return(0);
274}
275
276
277/*
278 * Main parse routine.  Parses a single line -- really just hands off to
279 * the macro (parsemacro()) or text parser (parsetext()).
280 */
281int
282mdoc_parseln(struct mdoc *m, int ln, char *buf)
283{
284
285	if (MDOC_HALT & m->flags)
286		return(0);
287
288	return('.' == *buf ? parsemacro(m, ln, buf) :
289			parsetext(m, ln, buf));
290}
291
292
293int
294mdoc_verr(struct mdoc *mdoc, int ln, int pos,
295		const char *fmt, ...)
296{
297	char		 buf[256];
298	va_list		 ap;
299
300	if (NULL == mdoc->cb.mdoc_err)
301		return(0);
302
303	va_start(ap, fmt);
304	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
305	va_end(ap);
306
307	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
308}
309
310
311int
312mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
313{
314	char		 buf[256];
315	va_list		 ap;
316
317	if (NULL == mdoc->cb.mdoc_warn)
318		return(0);
319
320	va_start(ap, fmt);
321	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
322	va_end(ap);
323
324	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
325}
326
327
328int
329mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
330{
331	const char	*p;
332
333	p = __mdoc_merrnames[(int)type];
334	assert(p);
335
336	if (iserr)
337		return(mdoc_verr(m, line, pos, p));
338
339	return(mdoc_vwarn(m, line, pos, p));
340}
341
342
343int
344mdoc_macro(struct mdoc *m, enum mdoct tok,
345		int ln, int pp, int *pos, char *buf)
346{
347
348	assert(tok < MDOC_MAX);
349	/*
350	 * If we're in the prologue, deny "body" macros.  Similarly, if
351	 * we're in the body, deny prologue calls.
352	 */
353	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
354			MDOC_PBODY & m->flags)
355		return(mdoc_perr(m, ln, pp, EPROLBODY));
356	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
357			! (MDOC_PBODY & m->flags))
358		return(mdoc_perr(m, ln, pp, EBODYPROL));
359
360	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
361}
362
363
364static int
365node_append(struct mdoc *mdoc, struct mdoc_node *p)
366{
367
368	assert(mdoc->last);
369	assert(mdoc->first);
370	assert(MDOC_ROOT != p->type);
371
372	switch (mdoc->next) {
373	case (MDOC_NEXT_SIBLING):
374		mdoc->last->next = p;
375		p->prev = mdoc->last;
376		p->parent = mdoc->last->parent;
377		break;
378	case (MDOC_NEXT_CHILD):
379		mdoc->last->child = p;
380		p->parent = mdoc->last;
381		break;
382	default:
383		abort();
384		/* NOTREACHED */
385	}
386
387	p->parent->nchild++;
388
389	if ( ! mdoc_valid_pre(mdoc, p))
390		return(0);
391	if ( ! mdoc_action_pre(mdoc, p))
392		return(0);
393
394	switch (p->type) {
395	case (MDOC_HEAD):
396		assert(MDOC_BLOCK == p->parent->type);
397		p->parent->head = p;
398		break;
399	case (MDOC_TAIL):
400		assert(MDOC_BLOCK == p->parent->type);
401		p->parent->tail = p;
402		break;
403	case (MDOC_BODY):
404		assert(MDOC_BLOCK == p->parent->type);
405		p->parent->body = p;
406		break;
407	default:
408		break;
409	}
410
411	mdoc->last = p;
412
413	switch (p->type) {
414	case (MDOC_TEXT):
415		if ( ! mdoc_valid_post(mdoc))
416			return(0);
417		if ( ! mdoc_action_post(mdoc))
418			return(0);
419		break;
420	default:
421		break;
422	}
423
424	return(1);
425}
426
427
428static struct mdoc_node *
429node_alloc(struct mdoc *m, int line, int pos,
430		enum mdoct tok, enum mdoc_type type)
431{
432	struct mdoc_node *p;
433
434	p = mandoc_calloc(1, sizeof(struct mdoc_node));
435	p->sec = m->lastsec;
436	p->line = line;
437	p->pos = pos;
438	p->tok = tok;
439	p->type = type;
440
441	return(p);
442}
443
444
445int
446mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
447{
448	struct mdoc_node *p;
449
450	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
451	if ( ! node_append(m, p))
452		return(0);
453	m->next = MDOC_NEXT_CHILD;
454	return(1);
455}
456
457
458int
459mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
460{
461	struct mdoc_node *p;
462
463	assert(m->first);
464	assert(m->last);
465
466	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
467	if ( ! node_append(m, p))
468		return(0);
469	m->next = MDOC_NEXT_CHILD;
470	return(1);
471}
472
473
474int
475mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
476{
477	struct mdoc_node *p;
478
479	p = node_alloc(m, line, pos, tok, MDOC_BODY);
480	if ( ! node_append(m, p))
481		return(0);
482	m->next = MDOC_NEXT_CHILD;
483	return(1);
484}
485
486
487int
488mdoc_block_alloc(struct mdoc *m, int line, int pos,
489		enum mdoct tok, struct mdoc_arg *args)
490{
491	struct mdoc_node *p;
492
493	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
494	p->args = args;
495	if (p->args)
496		(args->refcnt)++;
497	if ( ! node_append(m, p))
498		return(0);
499	m->next = MDOC_NEXT_CHILD;
500	return(1);
501}
502
503
504int
505mdoc_elem_alloc(struct mdoc *m, int line, int pos,
506		enum mdoct tok, struct mdoc_arg *args)
507{
508	struct mdoc_node *p;
509
510	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
511	p->args = args;
512	if (p->args)
513		(args->refcnt)++;
514	if ( ! node_append(m, p))
515		return(0);
516	m->next = MDOC_NEXT_CHILD;
517	return(1);
518}
519
520
521static int
522pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
523{
524	struct mdoc_node *n;
525	size_t		  sv;
526
527	n = node_alloc(m, line, pos, -1, MDOC_TEXT);
528	n->string = mandoc_malloc(len + 1);
529	sv = strlcpy(n->string, p, len + 1);
530
531	/* Prohibit truncation. */
532	assert(sv < len + 1);
533
534	if ( ! node_append(m, n))
535		return(0);
536	m->next = MDOC_NEXT_SIBLING;
537	return(1);
538}
539
540
541int
542mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
543{
544
545	return(pstring(m, line, pos, p, strlen(p)));
546}
547
548
549void
550mdoc_node_free(struct mdoc_node *p)
551{
552
553	if (p->parent)
554		p->parent->nchild--;
555	if (p->string)
556		free(p->string);
557	if (p->args)
558		mdoc_argv_free(p->args);
559	free(p);
560}
561
562
563void
564mdoc_node_freelist(struct mdoc_node *p)
565{
566
567	if (p->child)
568		mdoc_node_freelist(p->child);
569	if (p->next)
570		mdoc_node_freelist(p->next);
571
572	assert(0 == p->nchild);
573	mdoc_node_free(p);
574}
575
576
577/*
578 * Parse free-form text, that is, a line that does not begin with the
579 * control character.
580 */
581static int
582parsetext(struct mdoc *m, int line, char *buf)
583{
584	int		 i, j;
585	char		 sv;
586
587	if (SEC_NONE == m->lastnamed)
588		return(mdoc_perr(m, line, 0, ETEXTPROL));
589
590	/*
591	 * If in literal mode, then pass the buffer directly to the
592	 * back-end, as it should be preserved as a single term.
593	 */
594
595	if (MDOC_LITERAL & m->flags)
596		return(mdoc_word_alloc(m, line, 0, buf));
597
598	/* Disallow blank/white-space lines in non-literal mode. */
599
600	for (i = 0; ' ' == buf[i]; i++)
601		/* Skip leading whitespace. */ ;
602
603	if ('\0' == buf[i])
604		return(mdoc_perr(m, line, 0, ENOBLANK));
605
606	/*
607	 * Break apart a free-form line into tokens.  Spaces are
608	 * stripped out of the input.
609	 */
610
611	for (j = i; buf[i]; i++) {
612		if (' ' != buf[i])
613			continue;
614
615		/* Escaped whitespace. */
616		if (i && ' ' == buf[i] && '\\' == buf[i - 1])
617			continue;
618
619		sv = buf[i];
620		buf[i++] = '\0';
621
622		if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
623			return(0);
624
625		/* Trailing whitespace?  Check at overwritten byte. */
626
627		if (' ' == sv && '\0' == buf[i])
628			if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
629				return(0);
630
631		for ( ; ' ' == buf[i]; i++)
632			/* Skip trailing whitespace. */ ;
633
634		j = i;
635
636		/* Trailing whitespace? */
637
638		if (' ' == buf[i - 1] && '\0' == buf[i])
639			if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
640				return(0);
641
642		if ('\0' == buf[i])
643			break;
644	}
645
646	if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
647		return(0);
648
649	m->next = MDOC_NEXT_SIBLING;
650	return(1);
651}
652
653
654
655static int
656macrowarn(struct mdoc *m, int ln, const char *buf)
657{
658	if ( ! (MDOC_IGN_MACRO & m->pflags))
659		return(mdoc_verr(m, ln, 0,
660				"unknown macro: %s%s",
661				buf, strlen(buf) > 3 ? "..." : ""));
662	return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
663				buf, strlen(buf) > 3 ? "..." : ""));
664}
665
666
667/*
668 * Parse a macro line, that is, a line beginning with the control
669 * character.
670 */
671int
672parsemacro(struct mdoc *m, int ln, char *buf)
673{
674	int		  i, j, c;
675	char		  mac[5];
676
677	/* Empty lines are ignored. */
678
679	if ('\0' == buf[1])
680		return(1);
681
682	i = 1;
683
684	/* Accept whitespace after the initial control char. */
685
686	if (' ' == buf[i]) {
687		i++;
688		while (buf[i] && ' ' == buf[i])
689			i++;
690		if ('\0' == buf[i])
691			return(1);
692	}
693
694	/* Copy the first word into a nil-terminated buffer. */
695
696	for (j = 0; j < 4; j++, i++) {
697		if ('\0' == (mac[j] = buf[i]))
698			break;
699		else if (' ' == buf[i])
700			break;
701
702		/* Check for invalid characters. */
703
704		if (isgraph((u_char)buf[i]))
705			continue;
706		return(mdoc_perr(m, ln, i, EPRINT));
707	}
708
709	mac[j] = 0;
710
711	if (j == 4 || j < 2) {
712		if ( ! macrowarn(m, ln, mac))
713			goto err;
714		return(1);
715	}
716
717	if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
718		if ( ! macrowarn(m, ln, mac))
719			goto err;
720		return(1);
721	}
722
723	/* The macro is sane.  Jump to the next word. */
724
725	while (buf[i] && ' ' == buf[i])
726		i++;
727
728	/* Trailing whitespace? */
729
730	if ('\0' == buf[i] && ' ' == buf[i - 1])
731		if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS))
732			goto err;
733
734	/*
735	 * Begin recursive parse sequence.  Since we're at the start of
736	 * the line, we don't need to do callable/parseable checks.
737	 */
738	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
739		goto err;
740
741	return(1);
742
743err:	/* Error out. */
744
745	m->flags |= MDOC_HALT;
746	return(0);
747}
748
749
750