mdoc.c revision 1.3
1/*	$Id: mdoc.c,v 1.3 2009/06/14 23:00:57 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <assert.h>
18#include <ctype.h>
19#include <stdarg.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23
24#include "libmdoc.h"
25
26enum	merr {
27	ENOCALL,
28	EBODYPROL,
29	EPROLBODY,
30	ESPACE,
31	ETEXTPROL,
32	ENOBLANK,
33	EMALLOC
34};
35
36const	char *const __mdoc_macronames[MDOC_MAX] = {
37	"\\\"",		"Dd",		"Dt",		"Os",
38	"Sh",		"Ss",		"Pp",		"D1",
39	"Dl",		"Bd",		"Ed",		"Bl",
40	"El",		"It",		"Ad",		"An",
41	"Ar",		"Cd",		"Cm",		"Dv",
42	"Er",		"Ev",		"Ex",		"Fa",
43	"Fd",		"Fl",		"Fn",		"Ft",
44	"Ic",		"In",		"Li",		"Nd",
45	"Nm",		"Op",		"Ot",		"Pa",
46	"Rv",		"St",		"Va",		"Vt",
47	/* LINTED */
48	"Xr",		"\%A",		"\%B",		"\%D",
49	/* LINTED */
50	"\%I",		"\%J",		"\%N",		"\%O",
51	/* LINTED */
52	"\%P",		"\%R",		"\%T",		"\%V",
53	"Ac",		"Ao",		"Aq",		"At",
54	"Bc",		"Bf",		"Bo",		"Bq",
55	"Bsx",		"Bx",		"Db",		"Dc",
56	"Do",		"Dq",		"Ec",		"Ef",
57	"Em",		"Eo",		"Fx",		"Ms",
58	"No",		"Ns",		"Nx",		"Ox",
59	"Pc",		"Pf",		"Po",		"Pq",
60	"Qc",		"Ql",		"Qo",		"Qq",
61	"Re",		"Rs",		"Sc",		"So",
62	"Sq",		"Sm",		"Sx",		"Sy",
63	"Tn",		"Ux",		"Xc",		"Xo",
64	"Fo",		"Fc",		"Oo",		"Oc",
65	"Bk",		"Ek",		"Bt",		"Hf",
66	"Fr",		"Ud",		"Lb",		"Ap",
67	"Lp",		"Lk",		"Mt",		"Brq",
68	/* LINTED */
69	"Bro",		"Brc",		"\%C",		"Es",
70	/* LINTED */
71	"En",		"Dx",		"\%Q"
72	};
73
74const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
75	"split",		"nosplit",		"ragged",
76	"unfilled",		"literal",		"file",
77	"offset",		"bullet",		"dash",
78	"hyphen",		"item",			"enum",
79	"tag",			"diag",			"hang",
80	"ohang",		"inset",		"column",
81	"width",		"compact",		"std",
82	"filled",		"words",		"emphasis",
83	"symbolic",		"nested"
84	};
85
86const	char * const *mdoc_macronames = __mdoc_macronames;
87const	char * const *mdoc_argnames = __mdoc_argnames;
88
89static	void		  mdoc_free1(struct mdoc *);
90static	int		  mdoc_alloc1(struct mdoc *);
91static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
92				int, enum mdoc_type);
93static	int		  node_append(struct mdoc *,
94				struct mdoc_node *);
95static	int		  parsetext(struct mdoc *, int, char *);
96static	int		  parsemacro(struct mdoc *, int, char *);
97static	int		  macrowarn(struct mdoc *, int, const char *);
98static	int		  perr(struct mdoc *, int, int, enum merr);
99
100#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
101
102/*
103 * Get the first (root) node of the parse tree.
104 */
105const struct mdoc_node *
106mdoc_node(const struct mdoc *m)
107{
108
109	return(MDOC_HALT & m->flags ? NULL : m->first);
110}
111
112
113const struct mdoc_meta *
114mdoc_meta(const struct mdoc *m)
115{
116
117	return(MDOC_HALT & m->flags ? NULL : &m->meta);
118}
119
120
121static void
122mdoc_free1(struct mdoc *mdoc)
123{
124
125	if (mdoc->first)
126		mdoc_node_freelist(mdoc->first);
127	if (mdoc->meta.title)
128		free(mdoc->meta.title);
129	if (mdoc->meta.os)
130		free(mdoc->meta.os);
131	if (mdoc->meta.name)
132		free(mdoc->meta.name);
133	if (mdoc->meta.arch)
134		free(mdoc->meta.arch);
135	if (mdoc->meta.vol)
136		free(mdoc->meta.vol);
137}
138
139
140static int
141mdoc_alloc1(struct mdoc *mdoc)
142{
143
144	bzero(&mdoc->meta, sizeof(struct mdoc_meta));
145	mdoc->flags = 0;
146	mdoc->lastnamed = mdoc->lastsec = 0;
147	mdoc->last = calloc(1, sizeof(struct mdoc_node));
148	if (NULL == mdoc->last)
149		return(0);
150
151	mdoc->first = mdoc->last;
152	mdoc->last->type = MDOC_ROOT;
153	mdoc->next = MDOC_NEXT_CHILD;
154	return(1);
155}
156
157
158/*
159 * Free up all resources contributed by a parse:  the node tree,
160 * meta-data and so on.  Then reallocate the root node for another
161 * parse.
162 */
163int
164mdoc_reset(struct mdoc *mdoc)
165{
166
167	mdoc_free1(mdoc);
168	return(mdoc_alloc1(mdoc));
169}
170
171
172/*
173 * Completely free up all resources.
174 */
175void
176mdoc_free(struct mdoc *mdoc)
177{
178
179	mdoc_free1(mdoc);
180	if (mdoc->htab)
181		mdoc_hash_free(mdoc->htab);
182	free(mdoc);
183}
184
185
186struct mdoc *
187mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
188{
189	struct mdoc	*p;
190
191	if (NULL == (p = calloc(1, sizeof(struct mdoc))))
192		return(NULL);
193	if (cb)
194		(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
195
196	p->data = data;
197	p->pflags = pflags;
198
199	if (NULL == (p->htab = mdoc_hash_alloc())) {
200		free(p);
201		return(NULL);
202	} else if (mdoc_alloc1(p))
203		return(p);
204
205	free(p);
206	return(NULL);
207}
208
209
210/*
211 * Climb back up the parse tree, validating open scopes.  Mostly calls
212 * through to macro_end in macro.c.
213 */
214int
215mdoc_endparse(struct mdoc *m)
216{
217
218	if (MDOC_HALT & m->flags)
219		return(0);
220	else if (mdoc_macroend(m))
221		return(1);
222	m->flags |= MDOC_HALT;
223	return(0);
224}
225
226
227/*
228 * Main parse routine.  Parses a single line -- really just hands off to
229 * the macro or text parser.
230 */
231int
232mdoc_parseln(struct mdoc *m, int ln, char *buf)
233{
234
235	/* If in error-mode, then we parse no more. */
236
237	if (MDOC_HALT & m->flags)
238		return(0);
239
240	return('.' == *buf ? parsemacro(m, ln, buf) :
241			parsetext(m, ln, buf));
242}
243
244
245void
246mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
247{
248	char		  buf[256];
249	va_list		  ap;
250
251	if (NULL == mdoc->cb.mdoc_msg)
252		return;
253
254	va_start(ap, fmt);
255	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
256	va_end(ap);
257	(*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
258}
259
260
261int
262mdoc_verr(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
263{
264	char		 buf[256];
265	va_list		 ap;
266
267	if (NULL == mdoc->cb.mdoc_err)
268		return(0);
269
270	va_start(ap, fmt);
271	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
272	va_end(ap);
273	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
274}
275
276
277int
278mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
279		enum mdoc_warn type, const char *fmt, ...)
280{
281	char		 buf[256];
282	va_list		 ap;
283
284	if (NULL == mdoc->cb.mdoc_warn)
285		return(0);
286
287	va_start(ap, fmt);
288	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
289	va_end(ap);
290	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
291}
292
293
294int
295mdoc_nwarn(struct mdoc *mdoc, const struct mdoc_node *node, enum mdoc_warn type,
296		const char *fmt, ...)
297{
298	char		 buf[256];
299	va_list		 ap;
300
301	if (NULL == mdoc->cb.mdoc_warn)
302		return(0);
303
304	va_start(ap, fmt);
305	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
306	va_end(ap);
307	return((*mdoc->cb.mdoc_warn)(mdoc->data, node->line, node->pos, type,
308	    buf));
309}
310
311int
312mdoc_nerr(struct mdoc *mdoc, const struct mdoc_node *node, const char *fmt, ...)
313{
314	char		 buf[256];
315	va_list		 ap;
316
317	if (NULL == mdoc->cb.mdoc_err)
318		return(0);
319
320	va_start(ap, fmt);
321	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
322	va_end(ap);
323	return((*mdoc->cb.mdoc_err)(mdoc->data, node->line, node->pos, buf));
324}
325
326
327int
328mdoc_warn(struct mdoc *mdoc, enum mdoc_warn type, const char *fmt, ...)
329{
330	char		 buf[256];
331	va_list		 ap;
332
333	if (NULL == mdoc->cb.mdoc_warn)
334		return(0);
335
336	va_start(ap, fmt);
337	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
338	va_end(ap);
339	return((*mdoc->cb.mdoc_warn)(mdoc->data, mdoc->last->line,
340	    mdoc->last->pos, type, buf));
341}
342
343
344int
345mdoc_err(struct mdoc *mdoc, const char *fmt, ...)
346{
347	char		 buf[256];
348	va_list		 ap;
349
350	if (NULL == mdoc->cb.mdoc_err)
351		return(0);
352
353	va_start(ap, fmt);
354	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
355	va_end(ap);
356	return((*mdoc->cb.mdoc_err)(mdoc->data, mdoc->last->line,
357	    mdoc->last->pos, buf));
358}
359
360
361void
362mdoc_msg(struct mdoc *mdoc, const char *fmt, ...)
363{
364	char		  buf[256];
365	va_list		  ap;
366
367	if (NULL == mdoc->cb.mdoc_msg)
368		return;
369
370	va_start(ap, fmt);
371	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
372	va_end(ap);
373	(*mdoc->cb.mdoc_msg)(mdoc->data, mdoc->last->line, mdoc->last->pos,
374	    buf);
375}
376
377
378void
379mdoc_pmsg(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
380{
381	char		  buf[256];
382	va_list		  ap;
383
384	if (NULL == mdoc->cb.mdoc_msg)
385		return;
386
387	va_start(ap, fmt);
388	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
389	va_end(ap);
390	(*mdoc->cb.mdoc_msg)(mdoc->data, line, pos, buf);
391}
392
393
394int
395mdoc_pwarn(struct mdoc *mdoc, int line, int pos, enum mdoc_warn type,
396		const char *fmt, ...)
397{
398	char		 buf[256];
399	va_list		 ap;
400
401	if (NULL == mdoc->cb.mdoc_warn)
402		return(0);
403
404	va_start(ap, fmt);
405	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
406	va_end(ap);
407	return((*mdoc->cb.mdoc_warn)(mdoc->data, line, pos, type, buf));
408}
409
410int
411mdoc_perr(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
412{
413	char		 buf[256];
414	va_list		 ap;
415
416	if (NULL == mdoc->cb.mdoc_err)
417		return(0);
418
419	va_start(ap, fmt);
420	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
421	va_end(ap);
422	return((*mdoc->cb.mdoc_err)(mdoc->data, line, pos, buf));
423}
424
425
426int
427mdoc_macro(struct mdoc *m, int tok,
428		int ln, int pp, int *pos, char *buf)
429{
430
431	/* FIXME - these should happen during validation. */
432
433	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
434			SEC_PROLOGUE != m->lastnamed)
435		return(perr(m, ln, pp, EPROLBODY));
436
437	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
438			SEC_PROLOGUE == m->lastnamed)
439		return(perr(m, ln, pp, EBODYPROL));
440
441	if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
442		return(perr(m, ln, pp, ENOCALL));
443
444	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
445}
446
447
448static int
449perr(struct mdoc *m, int line, int pos, enum merr type)
450{
451	char		*p;
452
453	p = NULL;
454	switch (type) {
455	case (ENOCALL):
456		p = "not callable";
457		break;
458	case (EPROLBODY):
459		p = "macro disallowed in document body";
460		break;
461	case (EBODYPROL):
462		p = "macro disallowed in document prologue";
463		break;
464	case (EMALLOC):
465		p = "memory exhausted";
466		break;
467	case (ETEXTPROL):
468		p = "text disallowed in document prologue";
469		break;
470	case (ENOBLANK):
471		p = "blank lines disallowed in non-literal contexts";
472		break;
473	case (ESPACE):
474		p = "whitespace disallowed after delimiter";
475		break;
476	}
477	assert(p);
478	return(mdoc_perr(m, line, pos, p));
479}
480
481
482static int
483node_append(struct mdoc *mdoc, struct mdoc_node *p)
484{
485
486	assert(mdoc->last);
487	assert(mdoc->first);
488	assert(MDOC_ROOT != p->type);
489
490	switch (mdoc->next) {
491	case (MDOC_NEXT_SIBLING):
492		mdoc->last->next = p;
493		p->prev = mdoc->last;
494		p->parent = mdoc->last->parent;
495		break;
496	case (MDOC_NEXT_CHILD):
497		mdoc->last->child = p;
498		p->parent = mdoc->last;
499		break;
500	default:
501		abort();
502		/* NOTREACHED */
503	}
504
505	if ( ! mdoc_valid_pre(mdoc, p))
506		return(0);
507	if ( ! mdoc_action_pre(mdoc, p))
508		return(0);
509
510	switch (p->type) {
511	case (MDOC_HEAD):
512		assert(MDOC_BLOCK == p->parent->type);
513		p->parent->head = p;
514		break;
515	case (MDOC_TAIL):
516		assert(MDOC_BLOCK == p->parent->type);
517		p->parent->tail = p;
518		break;
519	case (MDOC_BODY):
520		assert(MDOC_BLOCK == p->parent->type);
521		p->parent->body = p;
522		break;
523	default:
524		break;
525	}
526
527	mdoc->last = p;
528
529	switch (p->type) {
530	case (MDOC_TEXT):
531		if ( ! mdoc_valid_post(mdoc))
532			return(0);
533		if ( ! mdoc_action_post(mdoc))
534			return(0);
535		break;
536	default:
537		break;
538	}
539
540	return(1);
541}
542
543
544static struct mdoc_node *
545node_alloc(struct mdoc *mdoc, int line,
546		int pos, int tok, enum mdoc_type type)
547{
548	struct mdoc_node *p;
549
550	if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
551		(void)verr(mdoc, EMALLOC);
552		return(NULL);
553	}
554
555	p->sec = mdoc->lastsec;
556	p->line = line;
557	p->pos = pos;
558	p->tok = tok;
559	if (MDOC_TEXT != (p->type = type))
560		assert(p->tok >= 0);
561
562	return(p);
563}
564
565
566int
567mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
568{
569	struct mdoc_node *p;
570
571	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
572	if (NULL == p)
573		return(0);
574	return(node_append(mdoc, p));
575}
576
577
578int
579mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
580{
581	struct mdoc_node *p;
582
583	assert(mdoc->first);
584	assert(mdoc->last);
585
586	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
587	if (NULL == p)
588		return(0);
589	return(node_append(mdoc, p));
590}
591
592
593int
594mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
595{
596	struct mdoc_node *p;
597
598	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
599	if (NULL == p)
600		return(0);
601	return(node_append(mdoc, p));
602}
603
604
605int
606mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
607		int tok, struct mdoc_arg *args)
608{
609	struct mdoc_node *p;
610
611	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
612	if (NULL == p)
613		return(0);
614	if ((p->args = args))
615		(args->refcnt)++;
616	return(node_append(mdoc, p));
617}
618
619
620int
621mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
622		int tok, struct mdoc_arg *args)
623{
624	struct mdoc_node *p;
625
626	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
627	if (NULL == p)
628		return(0);
629	if ((p->args = args))
630		(args->refcnt)++;
631	return(node_append(mdoc, p));
632}
633
634
635int
636mdoc_word_alloc(struct mdoc *mdoc,
637		int line, int pos, const char *word)
638{
639	struct mdoc_node *p;
640
641	p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
642	if (NULL == p)
643		return(0);
644	if (NULL == (p->string = strdup(word))) {
645		(void)verr(mdoc, EMALLOC);
646		return(0);
647	}
648	return(node_append(mdoc, p));
649}
650
651
652void
653mdoc_node_free(struct mdoc_node *p)
654{
655
656	if (p->string)
657		free(p->string);
658	if (p->args)
659		mdoc_argv_free(p->args);
660	free(p);
661}
662
663
664void
665mdoc_node_freelist(struct mdoc_node *p)
666{
667
668	if (p->child)
669		mdoc_node_freelist(p->child);
670	if (p->next)
671		mdoc_node_freelist(p->next);
672
673	mdoc_node_free(p);
674}
675
676
677/*
678 * Parse free-form text, that is, a line that does not begin with the
679 * control character.
680 */
681static int
682parsetext(struct mdoc *m, int line, char *buf)
683{
684
685	if (SEC_PROLOGUE == m->lastnamed)
686		return(perr(m, line, 0, ETEXTPROL));
687
688	if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
689		return(perr(m, line, 0, ENOBLANK));
690
691	if ( ! mdoc_word_alloc(m, line, 0, buf))
692		return(0);
693
694	m->next = MDOC_NEXT_SIBLING;
695	return(1);
696}
697
698
699static int
700macrowarn(struct mdoc *m, int ln, const char *buf)
701{
702	if ( ! (MDOC_IGN_MACRO & m->pflags))
703		return(mdoc_perr(m, ln, 1,
704				"unknown macro: %s%s",
705				buf, strlen(buf) > 3 ? "..." : ""));
706	return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
707				"unknown macro: %s%s",
708				buf, strlen(buf) > 3 ? "..." : ""));
709}
710
711
712
713/*
714 * Parse a macro line, that is, a line beginning with the control
715 * character.
716 */
717int
718parsemacro(struct mdoc *m, int ln, char *buf)
719{
720	int		  i, c;
721	char		  mac[5];
722
723	/* Comments and empties are quickly ignored. */
724
725	if (0 == buf[1])
726		return(1);
727
728	if (' ' == buf[1]) {
729		i = 2;
730		while (buf[i] && ' ' == buf[i])
731			i++;
732		if (0 == buf[i])
733			return(1);
734		return(perr(m, ln, 1, ESPACE));
735	}
736
737	if (buf[1] && '\\' == buf[1])
738		if (buf[2] && '\"' == buf[2])
739			return(1);
740
741	/* Copy the first word into a nil-terminated buffer. */
742
743	for (i = 1; i < 5; i++) {
744		if (0 == (mac[i - 1] = buf[i]))
745			break;
746		else if (' ' == buf[i])
747			break;
748	}
749
750	mac[i - 1] = 0;
751
752	if (i == 5 || i <= 2) {
753		if ( ! macrowarn(m, ln, mac))
754			goto err;
755		return(1);
756	}
757
758	if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
759		if ( ! macrowarn(m, ln, mac))
760			goto err;
761		return(1);
762	}
763
764	/* The macro is sane.  Jump to the next word. */
765
766	while (buf[i] && ' ' == buf[i])
767		i++;
768
769	/* Begin recursive parse sequence. */
770
771	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
772		goto err;
773
774	return(1);
775
776err:	/* Error out. */
777
778	m->flags |= MDOC_HALT;
779	return(0);
780}
781