mdoc.c revision 1.4
1/*	$Id: mdoc.c,v 1.4 2009/06/15 02:19:32 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <assert.h>
18#include <ctype.h>
19#include <stdarg.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23
24#include "libmdoc.h"
25
26enum	merr {
27	ENOCALL,
28	EBODYPROL,
29	EPROLBODY,
30	ESPACE,
31	ETEXTPROL,
32	ENOBLANK,
33	EMALLOC
34};
35
36const	char *const __mdoc_macronames[MDOC_MAX] = {
37	"\\\"",		"Dd",		"Dt",		"Os",
38	"Sh",		"Ss",		"Pp",		"D1",
39	"Dl",		"Bd",		"Ed",		"Bl",
40	"El",		"It",		"Ad",		"An",
41	"Ar",		"Cd",		"Cm",		"Dv",
42	"Er",		"Ev",		"Ex",		"Fa",
43	"Fd",		"Fl",		"Fn",		"Ft",
44	"Ic",		"In",		"Li",		"Nd",
45	"Nm",		"Op",		"Ot",		"Pa",
46	"Rv",		"St",		"Va",		"Vt",
47	/* LINTED */
48	"Xr",		"\%A",		"\%B",		"\%D",
49	/* LINTED */
50	"\%I",		"\%J",		"\%N",		"\%O",
51	/* LINTED */
52	"\%P",		"\%R",		"\%T",		"\%V",
53	"Ac",		"Ao",		"Aq",		"At",
54	"Bc",		"Bf",		"Bo",		"Bq",
55	"Bsx",		"Bx",		"Db",		"Dc",
56	"Do",		"Dq",		"Ec",		"Ef",
57	"Em",		"Eo",		"Fx",		"Ms",
58	"No",		"Ns",		"Nx",		"Ox",
59	"Pc",		"Pf",		"Po",		"Pq",
60	"Qc",		"Ql",		"Qo",		"Qq",
61	"Re",		"Rs",		"Sc",		"So",
62	"Sq",		"Sm",		"Sx",		"Sy",
63	"Tn",		"Ux",		"Xc",		"Xo",
64	"Fo",		"Fc",		"Oo",		"Oc",
65	"Bk",		"Ek",		"Bt",		"Hf",
66	"Fr",		"Ud",		"Lb",		"Ap",
67	"Lp",		"Lk",		"Mt",		"Brq",
68	/* LINTED */
69	"Bro",		"Brc",		"\%C",		"Es",
70	/* LINTED */
71	"En",		"Dx",		"\%Q"
72	};
73
74const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
75	"split",		"nosplit",		"ragged",
76	"unfilled",		"literal",		"file",
77	"offset",		"bullet",		"dash",
78	"hyphen",		"item",			"enum",
79	"tag",			"diag",			"hang",
80	"ohang",		"inset",		"column",
81	"width",		"compact",		"std",
82	"filled",		"words",		"emphasis",
83	"symbolic",		"nested"
84	};
85
86const	char * const *mdoc_macronames = __mdoc_macronames;
87const	char * const *mdoc_argnames = __mdoc_argnames;
88
89static	void		  mdoc_free1(struct mdoc *);
90static	int		  mdoc_alloc1(struct mdoc *);
91static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
92				int, enum mdoc_type);
93static	int		  node_append(struct mdoc *,
94				struct mdoc_node *);
95static	int		  parsetext(struct mdoc *, int, char *);
96static	int		  parsemacro(struct mdoc *, int, char *);
97static	int		  macrowarn(struct mdoc *, int, const char *);
98static	int		  perr(struct mdoc *, int, int, enum merr);
99
100#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
101
102/*
103 * Get the first (root) node of the parse tree.
104 */
105const struct mdoc_node *
106mdoc_node(const struct mdoc *m)
107{
108
109	return(MDOC_HALT & m->flags ? NULL : m->first);
110}
111
112
113const struct mdoc_meta *
114mdoc_meta(const struct mdoc *m)
115{
116
117	return(MDOC_HALT & m->flags ? NULL : &m->meta);
118}
119
120
121static void
122mdoc_free1(struct mdoc *mdoc)
123{
124
125	if (mdoc->first)
126		mdoc_node_freelist(mdoc->first);
127	if (mdoc->meta.title)
128		free(mdoc->meta.title);
129	if (mdoc->meta.os)
130		free(mdoc->meta.os);
131	if (mdoc->meta.name)
132		free(mdoc->meta.name);
133	if (mdoc->meta.arch)
134		free(mdoc->meta.arch);
135	if (mdoc->meta.vol)
136		free(mdoc->meta.vol);
137}
138
139
140static int
141mdoc_alloc1(struct mdoc *mdoc)
142{
143
144	bzero(&mdoc->meta, sizeof(struct mdoc_meta));
145	mdoc->flags = 0;
146	mdoc->lastnamed = mdoc->lastsec = 0;
147	mdoc->last = calloc(1, sizeof(struct mdoc_node));
148	if (NULL == mdoc->last)
149		return(0);
150
151	mdoc->first = mdoc->last;
152	mdoc->last->type = MDOC_ROOT;
153	mdoc->next = MDOC_NEXT_CHILD;
154	return(1);
155}
156
157
158/*
159 * Free up all resources contributed by a parse:  the node tree,
160 * meta-data and so on.  Then reallocate the root node for another
161 * parse.
162 */
163int
164mdoc_reset(struct mdoc *mdoc)
165{
166
167	mdoc_free1(mdoc);
168	return(mdoc_alloc1(mdoc));
169}
170
171
172/*
173 * Completely free up all resources.
174 */
175void
176mdoc_free(struct mdoc *mdoc)
177{
178
179	mdoc_free1(mdoc);
180	if (mdoc->htab)
181		mdoc_hash_free(mdoc->htab);
182	free(mdoc);
183}
184
185
186struct mdoc *
187mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
188{
189	struct mdoc	*p;
190
191	if (NULL == (p = calloc(1, sizeof(struct mdoc))))
192		return(NULL);
193	if (cb)
194		(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
195
196	p->data = data;
197	p->pflags = pflags;
198
199	if (NULL == (p->htab = mdoc_hash_alloc())) {
200		free(p);
201		return(NULL);
202	} else if (mdoc_alloc1(p))
203		return(p);
204
205	free(p);
206	return(NULL);
207}
208
209
210/*
211 * Climb back up the parse tree, validating open scopes.  Mostly calls
212 * through to macro_end in macro.c.
213 */
214int
215mdoc_endparse(struct mdoc *m)
216{
217
218	if (MDOC_HALT & m->flags)
219		return(0);
220	else if (mdoc_macroend(m))
221		return(1);
222	m->flags |= MDOC_HALT;
223	return(0);
224}
225
226
227/*
228 * Main parse routine.  Parses a single line -- really just hands off to
229 * the macro or text parser.
230 */
231int
232mdoc_parseln(struct mdoc *m, int ln, char *buf)
233{
234
235	/* If in error-mode, then we parse no more. */
236
237	if (MDOC_HALT & m->flags)
238		return(0);
239
240	return('.' == *buf ? parsemacro(m, ln, buf) :
241			parsetext(m, ln, buf));
242}
243
244
245void
246mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
247{
248	char		  buf[256];
249	va_list		  ap;
250
251	if (NULL == mdoc->cb.mdoc_msg)
252		return;
253
254	va_start(ap, fmt);
255	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
256	va_end(ap);
257	(*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
258}
259
260
261int
262mdoc_verr(struct mdoc *mdoc, int ln, int pos,
263		const char *fmt, ...)
264{
265	char		 buf[256];
266	va_list		 ap;
267
268	if (NULL == mdoc->cb.mdoc_err)
269		return(0);
270
271	va_start(ap, fmt);
272	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
273	va_end(ap);
274	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
275}
276
277
278int
279mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
280		enum mdoc_warn type, const char *fmt, ...)
281{
282	char		 buf[256];
283	va_list		 ap;
284
285	if (NULL == mdoc->cb.mdoc_warn)
286		return(0);
287
288	va_start(ap, fmt);
289	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
290	va_end(ap);
291	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
292}
293
294
295int
296mdoc_macro(struct mdoc *m, int tok,
297		int ln, int pp, int *pos, char *buf)
298{
299
300	/* FIXME - these should happen during validation. */
301
302	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
303			SEC_PROLOGUE != m->lastnamed)
304		return(perr(m, ln, pp, EPROLBODY));
305
306	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
307			SEC_PROLOGUE == m->lastnamed)
308		return(perr(m, ln, pp, EBODYPROL));
309
310	if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
311		return(perr(m, ln, pp, ENOCALL));
312
313	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
314}
315
316
317static int
318perr(struct mdoc *m, int line, int pos, enum merr type)
319{
320	char		*p;
321
322	p = NULL;
323	switch (type) {
324	case (ENOCALL):
325		p = "not callable";
326		break;
327	case (EPROLBODY):
328		p = "macro disallowed in document body";
329		break;
330	case (EBODYPROL):
331		p = "macro disallowed in document prologue";
332		break;
333	case (EMALLOC):
334		p = "memory exhausted";
335		break;
336	case (ETEXTPROL):
337		p = "text disallowed in document prologue";
338		break;
339	case (ENOBLANK):
340		p = "blank lines disallowed in non-literal contexts";
341		break;
342	case (ESPACE):
343		p = "whitespace disallowed after delimiter";
344		break;
345	}
346	assert(p);
347	return(mdoc_perr(m, line, pos, p));
348}
349
350
351static int
352node_append(struct mdoc *mdoc, struct mdoc_node *p)
353{
354
355	assert(mdoc->last);
356	assert(mdoc->first);
357	assert(MDOC_ROOT != p->type);
358
359	switch (mdoc->next) {
360	case (MDOC_NEXT_SIBLING):
361		mdoc->last->next = p;
362		p->prev = mdoc->last;
363		p->parent = mdoc->last->parent;
364		break;
365	case (MDOC_NEXT_CHILD):
366		mdoc->last->child = p;
367		p->parent = mdoc->last;
368		break;
369	default:
370		abort();
371		/* NOTREACHED */
372	}
373
374	if ( ! mdoc_valid_pre(mdoc, p))
375		return(0);
376	if ( ! mdoc_action_pre(mdoc, p))
377		return(0);
378
379	switch (p->type) {
380	case (MDOC_HEAD):
381		assert(MDOC_BLOCK == p->parent->type);
382		p->parent->head = p;
383		break;
384	case (MDOC_TAIL):
385		assert(MDOC_BLOCK == p->parent->type);
386		p->parent->tail = p;
387		break;
388	case (MDOC_BODY):
389		assert(MDOC_BLOCK == p->parent->type);
390		p->parent->body = p;
391		break;
392	default:
393		break;
394	}
395
396	mdoc->last = p;
397
398	switch (p->type) {
399	case (MDOC_TEXT):
400		if ( ! mdoc_valid_post(mdoc))
401			return(0);
402		if ( ! mdoc_action_post(mdoc))
403			return(0);
404		break;
405	default:
406		break;
407	}
408
409	return(1);
410}
411
412
413static struct mdoc_node *
414node_alloc(struct mdoc *mdoc, int line,
415		int pos, int tok, enum mdoc_type type)
416{
417	struct mdoc_node *p;
418
419	if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
420		(void)verr(mdoc, EMALLOC);
421		return(NULL);
422	}
423
424	p->sec = mdoc->lastsec;
425	p->line = line;
426	p->pos = pos;
427	p->tok = tok;
428	if (MDOC_TEXT != (p->type = type))
429		assert(p->tok >= 0);
430
431	return(p);
432}
433
434
435int
436mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
437{
438	struct mdoc_node *p;
439
440	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
441	if (NULL == p)
442		return(0);
443	return(node_append(mdoc, p));
444}
445
446
447int
448mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
449{
450	struct mdoc_node *p;
451
452	assert(mdoc->first);
453	assert(mdoc->last);
454
455	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
456	if (NULL == p)
457		return(0);
458	return(node_append(mdoc, p));
459}
460
461
462int
463mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
464{
465	struct mdoc_node *p;
466
467	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
468	if (NULL == p)
469		return(0);
470	return(node_append(mdoc, p));
471}
472
473
474int
475mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
476		int tok, struct mdoc_arg *args)
477{
478	struct mdoc_node *p;
479
480	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
481	if (NULL == p)
482		return(0);
483	p->args = args;
484	if (p->args)
485		(args->refcnt)++;
486	return(node_append(mdoc, p));
487}
488
489
490int
491mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
492		int tok, struct mdoc_arg *args)
493{
494	struct mdoc_node *p;
495
496	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
497	if (NULL == p)
498		return(0);
499	p->args = args;
500	if (p->args)
501		(args->refcnt)++;
502	return(node_append(mdoc, p));
503}
504
505
506int
507mdoc_word_alloc(struct mdoc *mdoc,
508		int line, int pos, const char *word)
509{
510	struct mdoc_node *p;
511
512	p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
513	if (NULL == p)
514		return(0);
515	if (NULL == (p->string = strdup(word))) {
516		(void)verr(mdoc, EMALLOC);
517		return(0);
518	}
519	return(node_append(mdoc, p));
520}
521
522
523void
524mdoc_node_free(struct mdoc_node *p)
525{
526
527	if (p->string)
528		free(p->string);
529	if (p->args)
530		mdoc_argv_free(p->args);
531	free(p);
532}
533
534
535void
536mdoc_node_freelist(struct mdoc_node *p)
537{
538
539	if (p->child)
540		mdoc_node_freelist(p->child);
541	if (p->next)
542		mdoc_node_freelist(p->next);
543
544	mdoc_node_free(p);
545}
546
547
548/*
549 * Parse free-form text, that is, a line that does not begin with the
550 * control character.
551 */
552static int
553parsetext(struct mdoc *m, int line, char *buf)
554{
555
556	if (SEC_PROLOGUE == m->lastnamed)
557		return(perr(m, line, 0, ETEXTPROL));
558
559	if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
560		return(perr(m, line, 0, ENOBLANK));
561
562	if ( ! mdoc_word_alloc(m, line, 0, buf))
563		return(0);
564
565	m->next = MDOC_NEXT_SIBLING;
566	return(1);
567}
568
569
570static int
571macrowarn(struct mdoc *m, int ln, const char *buf)
572{
573	if ( ! (MDOC_IGN_MACRO & m->pflags))
574		return(mdoc_perr(m, ln, 1,
575				"unknown macro: %s%s",
576				buf, strlen(buf) > 3 ? "..." : ""));
577	return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
578				"unknown macro: %s%s",
579				buf, strlen(buf) > 3 ? "..." : ""));
580}
581
582
583
584/*
585 * Parse a macro line, that is, a line beginning with the control
586 * character.
587 */
588int
589parsemacro(struct mdoc *m, int ln, char *buf)
590{
591	int		  i, c;
592	char		  mac[5];
593
594	/* Comments and empties are quickly ignored. */
595
596	if (0 == buf[1])
597		return(1);
598
599	if (' ' == buf[1]) {
600		i = 2;
601		while (buf[i] && ' ' == buf[i])
602			i++;
603		if (0 == buf[i])
604			return(1);
605		return(perr(m, ln, 1, ESPACE));
606	}
607
608	if (buf[1] && '\\' == buf[1])
609		if (buf[2] && '\"' == buf[2])
610			return(1);
611
612	/* Copy the first word into a nil-terminated buffer. */
613
614	for (i = 1; i < 5; i++) {
615		if (0 == (mac[i - 1] = buf[i]))
616			break;
617		else if (' ' == buf[i])
618			break;
619	}
620
621	mac[i - 1] = 0;
622
623	if (i == 5 || i <= 2) {
624		if ( ! macrowarn(m, ln, mac))
625			goto err;
626		return(1);
627	}
628
629	if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
630		if ( ! macrowarn(m, ln, mac))
631			goto err;
632		return(1);
633	}
634
635	/* The macro is sane.  Jump to the next word. */
636
637	while (buf[i] && ' ' == buf[i])
638		i++;
639
640	/* Begin recursive parse sequence. */
641
642	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
643		goto err;
644
645	return(1);
646
647err:	/* Error out. */
648
649	m->flags |= MDOC_HALT;
650	return(0);
651}
652