mdoc.c revision 1.21
1/*	$Id: mdoc.c,v 1.21 2009/07/26 02:17:29 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <assert.h>
18#include <ctype.h>
19#include <stdarg.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23
24#include "libmdoc.h"
25
26const	char *const __mdoc_merrnames[MERRMAX] = {
27	"trailing whitespace", /* ETAILWS */
28	"empty last list column", /* ECOLEMPTY */
29	"unexpected quoted parameter", /* EQUOTPARM */
30	"unterminated quoted parameter", /* EQUOTTERM */
31	"system: malloc error", /* EMALLOC */
32	"argument parameter suggested", /* EARGVAL */
33	"macro not callable", /* ENOCALL */
34	"macro disallowed in prologue", /* EBODYPROL */
35	"macro disallowed in body", /* EPROLBODY */
36	"text disallowed in prologue", /* ETEXTPROL */
37	"blank line disallowed", /* ENOBLANK */
38	"text parameter too long", /* ETOOLONG */
39	"invalid escape sequence", /* EESCAPE */
40	"invalid character", /* EPRINT */
41	"document has no body", /* ENODAT */
42	"document has no prologue", /* ENOPROLOGUE */
43	"expected line arguments", /* ELINE */
44	"invalid AT&T argument", /* EATT */
45	"default name not yet set", /* ENAME */
46	"missing list type", /* ELISTTYPE */
47	"missing display type", /* EDISPTYPE */
48	"too many display types", /* EMULTIDISP */
49	"too many list types", /* EMULTILIST */
50	"NAME section must be first", /* ESECNAME */
51	"badly-formed NAME section", /* ENAMESECINC */
52	"argument repeated", /* EARGREP */
53	"expected boolean parameter", /* EBOOL */
54	"inconsistent column syntax", /* ECOLMIS */
55	"nested display invalid", /* ENESTDISP */
56	"width argument missing", /* EMISSWIDTH */
57	"invalid section for this manual section", /* EWRONGMSEC */
58	"section out of conventional order", /* ESECOOO */
59	"section repeated", /* ESECREP */
60	"invalid standard argument", /* EBADSTAND */
61	"multi-line arguments discouraged", /* ENOMULTILINE */
62	"multi-line arguments suggested", /* EMULTILINE */
63	"line arguments discouraged", /* ENOLINE */
64	"prologue macro out of conventional order", /* EPROLOOO */
65	"prologue macro repeated", /* EPROLREP */
66	"invalid manual section", /* EBADMSEC */
67	"invalid section", /* EBADSEC */
68	"invalid font mode", /* EFONT */
69	"invalid date syntax", /* EBADDATE */
70	"invalid number format", /* ENUMFMT */
71	"superfluous width argument", /* ENOWIDTH */
72	"system: utsname error", /* EUTSNAME */
73	"obsolete macro", /* EOBS */
74	"macro-like parameter", /* EMACPARM */
75	"end-of-line scope violation", /* EIMPBRK */
76	"empty macro ignored", /* EIGNE */
77	"unclosed explicit scope", /* EOPEN */
78	"unterminated quoted phrase", /* EQUOTPHR */
79	"closure macro without prior context", /* ENOCTX */
80	"invalid whitespace after control character", /* ESPACE */
81	"no description found for library" /* ELIB */
82};
83
84const	char *const __mdoc_macronames[MDOC_MAX] = {
85	"Ap",		"Dd",		"Dt",		"Os",
86	"Sh",		"Ss",		"Pp",		"D1",
87	"Dl",		"Bd",		"Ed",		"Bl",
88	"El",		"It",		"Ad",		"An",
89	"Ar",		"Cd",		"Cm",		"Dv",
90	"Er",		"Ev",		"Ex",		"Fa",
91	"Fd",		"Fl",		"Fn",		"Ft",
92	"Ic",		"In",		"Li",		"Nd",
93	"Nm",		"Op",		"Ot",		"Pa",
94	"Rv",		"St",		"Va",		"Vt",
95	/* LINTED */
96	"Xr",		"\%A",		"\%B",		"\%D",
97	/* LINTED */
98	"\%I",		"\%J",		"\%N",		"\%O",
99	/* LINTED */
100	"\%P",		"\%R",		"\%T",		"\%V",
101	"Ac",		"Ao",		"Aq",		"At",
102	"Bc",		"Bf",		"Bo",		"Bq",
103	"Bsx",		"Bx",		"Db",		"Dc",
104	"Do",		"Dq",		"Ec",		"Ef",
105	"Em",		"Eo",		"Fx",		"Ms",
106	"No",		"Ns",		"Nx",		"Ox",
107	"Pc",		"Pf",		"Po",		"Pq",
108	"Qc",		"Ql",		"Qo",		"Qq",
109	"Re",		"Rs",		"Sc",		"So",
110	"Sq",		"Sm",		"Sx",		"Sy",
111	"Tn",		"Ux",		"Xc",		"Xo",
112	"Fo",		"Fc",		"Oo",		"Oc",
113	"Bk",		"Ek",		"Bt",		"Hf",
114	"Fr",		"Ud",		"Lb",		"Lp",
115	"Lk",		"Mt",		"Brq",		"Bro",
116	/* LINTED */
117	"Brc",		"\%C",		"Es",		"En",
118	/* LINTED */
119	"Dx",		"\%Q",		"br",		"sp"
120	};
121
122const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
123	"split",		"nosplit",		"ragged",
124	"unfilled",		"literal",		"file",
125	"offset",		"bullet",		"dash",
126	"hyphen",		"item",			"enum",
127	"tag",			"diag",			"hang",
128	"ohang",		"inset",		"column",
129	"width",		"compact",		"std",
130	"filled",		"words",		"emphasis",
131	"symbolic",		"nested"
132	};
133
134const	char * const *mdoc_macronames = __mdoc_macronames;
135const	char * const *mdoc_argnames = __mdoc_argnames;
136
137static	void		  mdoc_free1(struct mdoc *);
138static	int		  mdoc_alloc1(struct mdoc *);
139static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
140				int, enum mdoc_type);
141static	int		  node_append(struct mdoc *,
142				struct mdoc_node *);
143static	int		  parsetext(struct mdoc *, int, char *);
144static	int		  parsemacro(struct mdoc *, int, char *);
145static	int		  macrowarn(struct mdoc *, int, const char *);
146static	int		  pstring(struct mdoc *, int, int,
147				const char *, size_t);
148
149
150const struct mdoc_node *
151mdoc_node(const struct mdoc *m)
152{
153
154	return(MDOC_HALT & m->flags ? NULL : m->first);
155}
156
157
158const struct mdoc_meta *
159mdoc_meta(const struct mdoc *m)
160{
161
162	return(MDOC_HALT & m->flags ? NULL : &m->meta);
163}
164
165
166/*
167 * Frees volatile resources (parse tree, meta-data, fields).
168 */
169static void
170mdoc_free1(struct mdoc *mdoc)
171{
172
173	if (mdoc->first)
174		mdoc_node_freelist(mdoc->first);
175	if (mdoc->meta.title)
176		free(mdoc->meta.title);
177	if (mdoc->meta.os)
178		free(mdoc->meta.os);
179	if (mdoc->meta.name)
180		free(mdoc->meta.name);
181	if (mdoc->meta.arch)
182		free(mdoc->meta.arch);
183	if (mdoc->meta.vol)
184		free(mdoc->meta.vol);
185}
186
187
188/*
189 * Allocate all volatile resources (parse tree, meta-data, fields).
190 */
191static int
192mdoc_alloc1(struct mdoc *mdoc)
193{
194
195	bzero(&mdoc->meta, sizeof(struct mdoc_meta));
196	mdoc->flags = 0;
197	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
198	mdoc->last = calloc(1, sizeof(struct mdoc_node));
199	if (NULL == mdoc->last)
200		return(0);
201
202	mdoc->first = mdoc->last;
203	mdoc->last->type = MDOC_ROOT;
204	mdoc->next = MDOC_NEXT_CHILD;
205	return(1);
206}
207
208
209/*
210 * Free up volatile resources (see mdoc_free1()) then re-initialises the
211 * data with mdoc_alloc1().  After invocation, parse data has been reset
212 * and the parser is ready for re-invocation on a new tree; however,
213 * cross-parse non-volatile data is kept intact.
214 */
215int
216mdoc_reset(struct mdoc *mdoc)
217{
218
219	mdoc_free1(mdoc);
220	return(mdoc_alloc1(mdoc));
221}
222
223
224/*
225 * Completely free up all volatile and non-volatile parse resources.
226 * After invocation, the pointer is no longer usable.
227 */
228void
229mdoc_free(struct mdoc *mdoc)
230{
231
232	mdoc_free1(mdoc);
233	if (mdoc->htab)
234		mdoc_hash_free(mdoc->htab);
235	free(mdoc);
236}
237
238
239/*
240 * Allocate volatile and non-volatile parse resources.
241 */
242struct mdoc *
243mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
244{
245	struct mdoc	*p;
246
247	if (NULL == (p = calloc(1, sizeof(struct mdoc))))
248		return(NULL);
249	if (cb)
250		(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
251
252	p->data = data;
253	p->pflags = pflags;
254
255	if (NULL == (p->htab = mdoc_hash_alloc())) {
256		free(p);
257		return(NULL);
258	} else if (mdoc_alloc1(p))
259		return(p);
260
261	free(p);
262	return(NULL);
263}
264
265
266/*
267 * Climb back up the parse tree, validating open scopes.  Mostly calls
268 * through to macro_end() in macro.c.
269 */
270int
271mdoc_endparse(struct mdoc *m)
272{
273
274	if (MDOC_HALT & m->flags)
275		return(0);
276	else if (mdoc_macroend(m))
277		return(1);
278	m->flags |= MDOC_HALT;
279	return(0);
280}
281
282
283/*
284 * Main parse routine.  Parses a single line -- really just hands off to
285 * the macro (parsemacro()) or text parser (parsetext()).
286 */
287int
288mdoc_parseln(struct mdoc *m, int ln, char *buf)
289{
290
291	if (MDOC_HALT & m->flags)
292		return(0);
293
294	return('.' == *buf ? parsemacro(m, ln, buf) :
295			parsetext(m, ln, buf));
296}
297
298
299int
300mdoc_verr(struct mdoc *mdoc, int ln, int pos,
301		const char *fmt, ...)
302{
303	char		 buf[256];
304	va_list		 ap;
305
306	if (NULL == mdoc->cb.mdoc_err)
307		return(0);
308
309	va_start(ap, fmt);
310	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
311	va_end(ap);
312
313	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
314}
315
316
317int
318mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
319{
320	char		 buf[256];
321	va_list		 ap;
322
323	if (NULL == mdoc->cb.mdoc_warn)
324		return(0);
325
326	va_start(ap, fmt);
327	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
328	va_end(ap);
329
330	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
331}
332
333
334int
335mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
336{
337	const char	*p;
338
339	p = __mdoc_merrnames[(int)type];
340	assert(p);
341
342	if (iserr)
343		return(mdoc_verr(m, line, pos, p));
344
345	return(mdoc_vwarn(m, line, pos, p));
346}
347
348
349int
350mdoc_macro(struct mdoc *m, int tok,
351		int ln, int pp, int *pos, char *buf)
352{
353
354	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
355			MDOC_PBODY & m->flags)
356		return(mdoc_perr(m, ln, pp, EPROLBODY));
357	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
358			! (MDOC_PBODY & m->flags))
359		return(mdoc_perr(m, ln, pp, EBODYPROL));
360
361	if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
362		return(mdoc_perr(m, ln, pp, ENOCALL));
363
364	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
365}
366
367
368static int
369node_append(struct mdoc *mdoc, struct mdoc_node *p)
370{
371
372	assert(mdoc->last);
373	assert(mdoc->first);
374	assert(MDOC_ROOT != p->type);
375
376	switch (mdoc->next) {
377	case (MDOC_NEXT_SIBLING):
378		mdoc->last->next = p;
379		p->prev = mdoc->last;
380		p->parent = mdoc->last->parent;
381		break;
382	case (MDOC_NEXT_CHILD):
383		mdoc->last->child = p;
384		p->parent = mdoc->last;
385		break;
386	default:
387		abort();
388		/* NOTREACHED */
389	}
390
391	p->parent->nchild++;
392
393	if ( ! mdoc_valid_pre(mdoc, p))
394		return(0);
395	if ( ! mdoc_action_pre(mdoc, p))
396		return(0);
397
398	switch (p->type) {
399	case (MDOC_HEAD):
400		assert(MDOC_BLOCK == p->parent->type);
401		p->parent->head = p;
402		break;
403	case (MDOC_TAIL):
404		assert(MDOC_BLOCK == p->parent->type);
405		p->parent->tail = p;
406		break;
407	case (MDOC_BODY):
408		assert(MDOC_BLOCK == p->parent->type);
409		p->parent->body = p;
410		break;
411	default:
412		break;
413	}
414
415	mdoc->last = p;
416
417	switch (p->type) {
418	case (MDOC_TEXT):
419		if ( ! mdoc_valid_post(mdoc))
420			return(0);
421		if ( ! mdoc_action_post(mdoc))
422			return(0);
423		break;
424	default:
425		break;
426	}
427
428	return(1);
429}
430
431
432static struct mdoc_node *
433node_alloc(struct mdoc *m, int line,
434		int pos, int tok, enum mdoc_type type)
435{
436	struct mdoc_node *p;
437
438	if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
439		(void)mdoc_nerr(m, m->last, EMALLOC);
440		return(NULL);
441	}
442
443	p->sec = m->lastsec;
444	p->line = line;
445	p->pos = pos;
446	p->tok = tok;
447	if (MDOC_TEXT != (p->type = type))
448		assert(p->tok >= 0);
449
450	return(p);
451}
452
453
454int
455mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
456{
457	struct mdoc_node *p;
458
459	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
460	if (NULL == p)
461		return(0);
462	return(node_append(m, p));
463}
464
465
466int
467mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
468{
469	struct mdoc_node *p;
470
471	assert(m->first);
472	assert(m->last);
473
474	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
475	if (NULL == p)
476		return(0);
477	return(node_append(m, p));
478}
479
480
481int
482mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
483{
484	struct mdoc_node *p;
485
486	p = node_alloc(m, line, pos, tok, MDOC_BODY);
487	if (NULL == p)
488		return(0);
489	return(node_append(m, p));
490}
491
492
493int
494mdoc_block_alloc(struct mdoc *m, int line, int pos,
495		int tok, struct mdoc_arg *args)
496{
497	struct mdoc_node *p;
498
499	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
500	if (NULL == p)
501		return(0);
502	p->args = args;
503	if (p->args)
504		(args->refcnt)++;
505	return(node_append(m, p));
506}
507
508
509int
510mdoc_elem_alloc(struct mdoc *m, int line, int pos,
511		int tok, struct mdoc_arg *args)
512{
513	struct mdoc_node *p;
514
515	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
516	if (NULL == p)
517		return(0);
518	p->args = args;
519	if (p->args)
520		(args->refcnt)++;
521	return(node_append(m, p));
522}
523
524
525static int
526pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
527{
528	struct mdoc_node *n;
529	size_t		  sv;
530
531	n = node_alloc(m, line, pos, -1, MDOC_TEXT);
532	if (NULL == n)
533		return(mdoc_nerr(m, m->last, EMALLOC));
534
535	n->string = malloc(len + 1);
536	if (NULL == n->string) {
537		free(n);
538		return(mdoc_nerr(m, m->last, EMALLOC));
539	}
540
541	sv = strlcpy(n->string, p, len + 1);
542
543	/* Prohibit truncation. */
544	assert(sv < len + 1);
545
546	return(node_append(m, n));
547}
548
549
550int
551mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
552{
553
554	return(pstring(m, line, pos, p, strlen(p)));
555}
556
557
558void
559mdoc_node_free(struct mdoc_node *p)
560{
561
562	if (p->parent)
563		p->parent->nchild--;
564	if (p->string)
565		free(p->string);
566	if (p->args)
567		mdoc_argv_free(p->args);
568	free(p);
569}
570
571
572void
573mdoc_node_freelist(struct mdoc_node *p)
574{
575
576	if (p->child)
577		mdoc_node_freelist(p->child);
578	if (p->next)
579		mdoc_node_freelist(p->next);
580
581	assert(0 == p->nchild);
582	mdoc_node_free(p);
583}
584
585
586/*
587 * Parse free-form text, that is, a line that does not begin with the
588 * control character.
589 */
590static int
591parsetext(struct mdoc *m, int line, char *buf)
592{
593	int		 i, j;
594
595	if (SEC_NONE == m->lastnamed)
596		return(mdoc_perr(m, line, 0, ETEXTPROL));
597
598	/*
599	 * If in literal mode, then pass the buffer directly to the
600	 * back-end, as it should be preserved as a single term.
601	 */
602
603	if (MDOC_LITERAL & m->flags) {
604		if ( ! mdoc_word_alloc(m, line, 0, buf))
605			return(0);
606		m->next = MDOC_NEXT_SIBLING;
607		return(1);
608	}
609
610	/* Disallow blank/white-space lines in non-literal mode. */
611
612	for (i = 0; ' ' == buf[i]; i++)
613		/* Skip leading whitespace. */ ;
614	if (0 == buf[i])
615		return(mdoc_perr(m, line, 0, ENOBLANK));
616
617	/*
618	 * Break apart a free-form line into tokens.  Spaces are
619	 * stripped out of the input.
620	 */
621
622	for (j = i; buf[i]; i++) {
623		if (' ' != buf[i])
624			continue;
625
626		/* Escaped whitespace. */
627		if (i && ' ' == buf[i] && '\\' == buf[i - 1])
628			continue;
629
630		buf[i++] = 0;
631		if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
632			return(0);
633		m->next = MDOC_NEXT_SIBLING;
634
635		for ( ; ' ' == buf[i]; i++)
636			/* Skip trailing whitespace. */ ;
637
638		j = i;
639		if (0 == buf[i])
640			break;
641	}
642
643	if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
644		return(0);
645
646	m->next = MDOC_NEXT_SIBLING;
647	return(1);
648}
649
650
651
652
653static int
654macrowarn(struct mdoc *m, int ln, const char *buf)
655{
656	if ( ! (MDOC_IGN_MACRO & m->pflags))
657		return(mdoc_verr(m, ln, 1,
658				"unknown macro: %s%s",
659				buf, strlen(buf) > 3 ? "..." : ""));
660	return(mdoc_vwarn(m, ln, 1, "unknown macro: %s%s",
661				buf, strlen(buf) > 3 ? "..." : ""));
662}
663
664
665/*
666 * Parse a macro line, that is, a line beginning with the control
667 * character.
668 */
669int
670parsemacro(struct mdoc *m, int ln, char *buf)
671{
672	int		  i, c;
673	char		  mac[5];
674
675	/* Empty lines are ignored. */
676
677	if (0 == buf[1])
678		return(1);
679
680	if (' ' == buf[1]) {
681		i = 2;
682		while (buf[i] && ' ' == buf[i])
683			i++;
684		if (0 == buf[i])
685			return(1);
686		return(mdoc_perr(m, ln, 1, ESPACE));
687	}
688
689	/* Copy the first word into a nil-terminated buffer. */
690
691	for (i = 1; i < 5; i++) {
692		if (0 == (mac[i - 1] = buf[i]))
693			break;
694		else if (' ' == buf[i])
695			break;
696	}
697
698	mac[i - 1] = 0;
699
700	if (i == 5 || i <= 2) {
701		if ( ! macrowarn(m, ln, mac))
702			goto err;
703		return(1);
704	}
705
706	if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
707		if ( ! macrowarn(m, ln, mac))
708			goto err;
709		return(1);
710	}
711
712	/* The macro is sane.  Jump to the next word. */
713
714	while (buf[i] && ' ' == buf[i])
715		i++;
716
717	/* Begin recursive parse sequence. */
718
719	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
720		goto err;
721
722	return(1);
723
724err:	/* Error out. */
725
726	m->flags |= MDOC_HALT;
727	return(0);
728}
729