mdoc.c revision 1.2
1/* $Id: mdoc.c,v 1.2 2009/04/15 20:10:20 miod Exp $ */
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the
7 * above copyright notice and this permission notice appear in all
8 * copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
14 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
15 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
16 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
17 * PERFORMANCE OF THIS SOFTWARE.
18 */
19#include <assert.h>
20#include <ctype.h>
21#include <stdarg.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include "libmdoc.h"
27
28enum	merr {
29	ENOCALL,
30	EBODYPROL,
31	EPROLBODY,
32	ESPACE,
33	ETEXTPROL,
34	ENOBLANK,
35	EMALLOC
36};
37
38const	char *const __mdoc_macronames[MDOC_MAX] = {
39	"\\\"",		"Dd",		"Dt",		"Os",
40	"Sh",		"Ss",		"Pp",		"D1",
41	"Dl",		"Bd",		"Ed",		"Bl",
42	"El",		"It",		"Ad",		"An",
43	"Ar",		"Cd",		"Cm",		"Dv",
44	"Er",		"Ev",		"Ex",		"Fa",
45	"Fd",		"Fl",		"Fn",		"Ft",
46	"Ic",		"In",		"Li",		"Nd",
47	"Nm",		"Op",		"Ot",		"Pa",
48	"Rv",		"St",		"Va",		"Vt",
49	/* LINTED */
50	"Xr",		"\%A",		"\%B",		"\%D",
51	/* LINTED */
52	"\%I",		"\%J",		"\%N",		"\%O",
53	/* LINTED */
54	"\%P",		"\%R",		"\%T",		"\%V",
55	"Ac",		"Ao",		"Aq",		"At",
56	"Bc",		"Bf",		"Bo",		"Bq",
57	"Bsx",		"Bx",		"Db",		"Dc",
58	"Do",		"Dq",		"Ec",		"Ef",
59	"Em",		"Eo",		"Fx",		"Ms",
60	"No",		"Ns",		"Nx",		"Ox",
61	"Pc",		"Pf",		"Po",		"Pq",
62	"Qc",		"Ql",		"Qo",		"Qq",
63	"Re",		"Rs",		"Sc",		"So",
64	"Sq",		"Sm",		"Sx",		"Sy",
65	"Tn",		"Ux",		"Xc",		"Xo",
66	"Fo",		"Fc",		"Oo",		"Oc",
67	"Bk",		"Ek",		"Bt",		"Hf",
68	"Fr",		"Ud",		"Lb",		"Ap",
69	"Lp",		"Lk",		"Mt",		"Brq",
70	/* LINTED */
71	"Bro",		"Brc",		"\%C",		"Es",
72	/* LINTED */
73	"En",		"Dx",		"\%Q"
74	};
75
76const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
77	"split",		"nosplit",		"ragged",
78	"unfilled",		"literal",		"file",
79	"offset",		"bullet",		"dash",
80	"hyphen",		"item",			"enum",
81	"tag",			"diag",			"hang",
82	"ohang",		"inset",		"column",
83	"width",		"compact",		"std",
84	"filled",		"words",		"emphasis",
85	"symbolic",		"nested"
86	};
87
88const	char * const *mdoc_macronames = __mdoc_macronames;
89const	char * const *mdoc_argnames = __mdoc_argnames;
90
91static	void		  mdoc_free1(struct mdoc *);
92static	int		  mdoc_alloc1(struct mdoc *);
93static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
94				int, enum mdoc_type);
95static	int		  node_append(struct mdoc *,
96				struct mdoc_node *);
97static	int		  parsetext(struct mdoc *, int, char *);
98static	int		  parsemacro(struct mdoc *, int, char *);
99static	int		  macrowarn(struct mdoc *, int, const char *);
100static	int		  perr(struct mdoc *, int, int, enum merr);
101
102#define verr(m, t) perr((m), (m)->last->line, (m)->last->pos, (t))
103
104/*
105 * Get the first (root) node of the parse tree.
106 */
107const struct mdoc_node *
108mdoc_node(const struct mdoc *m)
109{
110
111	return(MDOC_HALT & m->flags ? NULL : m->first);
112}
113
114
115const struct mdoc_meta *
116mdoc_meta(const struct mdoc *m)
117{
118
119	return(MDOC_HALT & m->flags ? NULL : &m->meta);
120}
121
122
123static void
124mdoc_free1(struct mdoc *mdoc)
125{
126
127	if (mdoc->first)
128		mdoc_node_freelist(mdoc->first);
129	if (mdoc->meta.title)
130		free(mdoc->meta.title);
131	if (mdoc->meta.os)
132		free(mdoc->meta.os);
133	if (mdoc->meta.name)
134		free(mdoc->meta.name);
135	if (mdoc->meta.arch)
136		free(mdoc->meta.arch);
137	if (mdoc->meta.vol)
138		free(mdoc->meta.vol);
139}
140
141
142static int
143mdoc_alloc1(struct mdoc *mdoc)
144{
145
146	bzero(&mdoc->meta, sizeof(struct mdoc_meta));
147	mdoc->flags = 0;
148	mdoc->lastnamed = mdoc->lastsec = 0;
149	mdoc->last = calloc(1, sizeof(struct mdoc_node));
150	if (NULL == mdoc->last)
151		return(0);
152
153	mdoc->first = mdoc->last;
154	mdoc->last->type = MDOC_ROOT;
155	mdoc->next = MDOC_NEXT_CHILD;
156	return(1);
157}
158
159
160/*
161 * Free up all resources contributed by a parse:  the node tree,
162 * meta-data and so on.  Then reallocate the root node for another
163 * parse.
164 */
165int
166mdoc_reset(struct mdoc *mdoc)
167{
168
169	mdoc_free1(mdoc);
170	return(mdoc_alloc1(mdoc));
171}
172
173
174/*
175 * Completely free up all resources.
176 */
177void
178mdoc_free(struct mdoc *mdoc)
179{
180
181	mdoc_free1(mdoc);
182	if (mdoc->htab)
183		mdoc_hash_free(mdoc->htab);
184	free(mdoc);
185}
186
187
188struct mdoc *
189mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
190{
191	struct mdoc	*p;
192
193	if (NULL == (p = calloc(1, sizeof(struct mdoc))))
194		return(NULL);
195	if (cb)
196		(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
197
198	p->data = data;
199	p->pflags = pflags;
200
201	if (NULL == (p->htab = mdoc_hash_alloc())) {
202		free(p);
203		return(NULL);
204	} else if (mdoc_alloc1(p))
205		return(p);
206
207	free(p);
208	return(NULL);
209}
210
211
212/*
213 * Climb back up the parse tree, validating open scopes.  Mostly calls
214 * through to macro_end in macro.c.
215 */
216int
217mdoc_endparse(struct mdoc *m)
218{
219
220	if (MDOC_HALT & m->flags)
221		return(0);
222	else if (mdoc_macroend(m))
223		return(1);
224	m->flags |= MDOC_HALT;
225	return(0);
226}
227
228
229/*
230 * Main parse routine.  Parses a single line -- really just hands off to
231 * the macro or text parser.
232 */
233int
234mdoc_parseln(struct mdoc *m, int ln, char *buf)
235{
236
237	/* If in error-mode, then we parse no more. */
238
239	if (MDOC_HALT & m->flags)
240		return(0);
241
242	return('.' == *buf ? parsemacro(m, ln, buf) :
243			parsetext(m, ln, buf));
244}
245
246
247void
248mdoc_vmsg(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
249{
250	char		  buf[256];
251	va_list		  ap;
252
253	if (NULL == mdoc->cb.mdoc_msg)
254		return;
255
256	va_start(ap, fmt);
257	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
258	va_end(ap);
259	(*mdoc->cb.mdoc_msg)(mdoc->data, ln, pos, buf);
260}
261
262
263int
264mdoc_verr(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
265{
266	char		 buf[256];
267	va_list		 ap;
268
269	if (NULL == mdoc->cb.mdoc_err)
270		return(0);
271
272	va_start(ap, fmt);
273	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
274	va_end(ap);
275	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
276}
277
278
279int
280mdoc_vwarn(struct mdoc *mdoc, int ln, int pos,
281		enum mdoc_warn type, const char *fmt, ...)
282{
283	char		 buf[256];
284	va_list		 ap;
285
286	if (NULL == mdoc->cb.mdoc_warn)
287		return(0);
288
289	va_start(ap, fmt);
290	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
291	va_end(ap);
292	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, type, buf));
293}
294
295
296int
297mdoc_nwarn(struct mdoc *mdoc, const struct mdoc_node *node, enum mdoc_warn type,
298		const char *fmt, ...)
299{
300	char		 buf[256];
301	va_list		 ap;
302
303	if (NULL == mdoc->cb.mdoc_warn)
304		return(0);
305
306	va_start(ap, fmt);
307	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
308	va_end(ap);
309	return((*mdoc->cb.mdoc_warn)(mdoc->data, node->line, node->pos, type,
310	    buf));
311}
312
313int
314mdoc_nerr(struct mdoc *mdoc, const struct mdoc_node *node, const char *fmt, ...)
315{
316	char		 buf[256];
317	va_list		 ap;
318
319	if (NULL == mdoc->cb.mdoc_err)
320		return(0);
321
322	va_start(ap, fmt);
323	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
324	va_end(ap);
325	return((*mdoc->cb.mdoc_err)(mdoc->data, node->line, node->pos, buf));
326}
327
328
329int
330mdoc_warn(struct mdoc *mdoc, enum mdoc_warn type, const char *fmt, ...)
331{
332	char		 buf[256];
333	va_list		 ap;
334
335	if (NULL == mdoc->cb.mdoc_warn)
336		return(0);
337
338	va_start(ap, fmt);
339	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
340	va_end(ap);
341	return((*mdoc->cb.mdoc_warn)(mdoc->data, mdoc->last->line,
342	    mdoc->last->pos, type, buf));
343}
344
345
346int
347mdoc_err(struct mdoc *mdoc, const char *fmt, ...)
348{
349	char		 buf[256];
350	va_list		 ap;
351
352	if (NULL == mdoc->cb.mdoc_err)
353		return(0);
354
355	va_start(ap, fmt);
356	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
357	va_end(ap);
358	return((*mdoc->cb.mdoc_err)(mdoc->data, mdoc->last->line,
359	    mdoc->last->pos, buf));
360}
361
362
363void
364mdoc_msg(struct mdoc *mdoc, const char *fmt, ...)
365{
366	char		  buf[256];
367	va_list		  ap;
368
369	if (NULL == mdoc->cb.mdoc_msg)
370		return;
371
372	va_start(ap, fmt);
373	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
374	va_end(ap);
375	(*mdoc->cb.mdoc_msg)(mdoc->data, mdoc->last->line, mdoc->last->pos,
376	    buf);
377}
378
379
380void
381mdoc_pmsg(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
382{
383	char		  buf[256];
384	va_list		  ap;
385
386	if (NULL == mdoc->cb.mdoc_msg)
387		return;
388
389	va_start(ap, fmt);
390	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
391	va_end(ap);
392	(*mdoc->cb.mdoc_msg)(mdoc->data, line, pos, buf);
393}
394
395
396int
397mdoc_pwarn(struct mdoc *mdoc, int line, int pos, enum mdoc_warn type,
398		const char *fmt, ...)
399{
400	char		 buf[256];
401	va_list		 ap;
402
403	if (NULL == mdoc->cb.mdoc_warn)
404		return(0);
405
406	va_start(ap, fmt);
407	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
408	va_end(ap);
409	return((*mdoc->cb.mdoc_warn)(mdoc->data, line, pos, type, buf));
410}
411
412int
413mdoc_perr(struct mdoc *mdoc, int line, int pos, const char *fmt, ...)
414{
415	char		 buf[256];
416	va_list		 ap;
417
418	if (NULL == mdoc->cb.mdoc_err)
419		return(0);
420
421	va_start(ap, fmt);
422	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
423	va_end(ap);
424	return((*mdoc->cb.mdoc_err)(mdoc->data, line, pos, buf));
425}
426
427
428int
429mdoc_macro(struct mdoc *m, int tok,
430		int ln, int pp, int *pos, char *buf)
431{
432
433	/* FIXME - these should happen during validation. */
434
435	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
436			SEC_PROLOGUE != m->lastnamed)
437		return(perr(m, ln, pp, EPROLBODY));
438
439	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
440			SEC_PROLOGUE == m->lastnamed)
441		return(perr(m, ln, pp, EBODYPROL));
442
443	if (1 != pp && ! (MDOC_CALLABLE & mdoc_macros[tok].flags))
444		return(perr(m, ln, pp, ENOCALL));
445
446	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
447}
448
449
450static int
451perr(struct mdoc *m, int line, int pos, enum merr type)
452{
453	char		*p;
454
455	p = NULL;
456	switch (type) {
457	case (ENOCALL):
458		p = "not callable";
459		break;
460	case (EPROLBODY):
461		p = "macro disallowed in document body";
462		break;
463	case (EBODYPROL):
464		p = "macro disallowed in document prologue";
465		break;
466	case (EMALLOC):
467		p = "memory exhausted";
468		break;
469	case (ETEXTPROL):
470		p = "text disallowed in document prologue";
471		break;
472	case (ENOBLANK):
473		p = "blank lines disallowed in non-literal contexts";
474		break;
475	case (ESPACE):
476		p = "whitespace disallowed after delimiter";
477		break;
478	}
479	assert(p);
480	return(mdoc_perr(m, line, pos, p));
481}
482
483
484static int
485node_append(struct mdoc *mdoc, struct mdoc_node *p)
486{
487
488	assert(mdoc->last);
489	assert(mdoc->first);
490	assert(MDOC_ROOT != p->type);
491
492	switch (mdoc->next) {
493	case (MDOC_NEXT_SIBLING):
494		mdoc->last->next = p;
495		p->prev = mdoc->last;
496		p->parent = mdoc->last->parent;
497		break;
498	case (MDOC_NEXT_CHILD):
499		mdoc->last->child = p;
500		p->parent = mdoc->last;
501		break;
502	default:
503		abort();
504		/* NOTREACHED */
505	}
506
507	if ( ! mdoc_valid_pre(mdoc, p))
508		return(0);
509	if ( ! mdoc_action_pre(mdoc, p))
510		return(0);
511
512	switch (p->type) {
513	case (MDOC_HEAD):
514		assert(MDOC_BLOCK == p->parent->type);
515		p->parent->head = p;
516		break;
517	case (MDOC_TAIL):
518		assert(MDOC_BLOCK == p->parent->type);
519		p->parent->tail = p;
520		break;
521	case (MDOC_BODY):
522		assert(MDOC_BLOCK == p->parent->type);
523		p->parent->body = p;
524		break;
525	default:
526		break;
527	}
528
529	mdoc->last = p;
530
531	switch (p->type) {
532	case (MDOC_TEXT):
533		if ( ! mdoc_valid_post(mdoc))
534			return(0);
535		if ( ! mdoc_action_post(mdoc))
536			return(0);
537		break;
538	default:
539		break;
540	}
541
542	return(1);
543}
544
545
546static struct mdoc_node *
547node_alloc(struct mdoc *mdoc, int line,
548		int pos, int tok, enum mdoc_type type)
549{
550	struct mdoc_node *p;
551
552	if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
553		(void)verr(mdoc, EMALLOC);
554		return(NULL);
555	}
556
557	p->sec = mdoc->lastsec;
558	p->line = line;
559	p->pos = pos;
560	p->tok = tok;
561	if (MDOC_TEXT != (p->type = type))
562		assert(p->tok >= 0);
563
564	return(p);
565}
566
567
568int
569mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, int tok)
570{
571	struct mdoc_node *p;
572
573	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
574	if (NULL == p)
575		return(0);
576	return(node_append(mdoc, p));
577}
578
579
580int
581mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, int tok)
582{
583	struct mdoc_node *p;
584
585	assert(mdoc->first);
586	assert(mdoc->last);
587
588	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
589	if (NULL == p)
590		return(0);
591	return(node_append(mdoc, p));
592}
593
594
595int
596mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, int tok)
597{
598	struct mdoc_node *p;
599
600	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
601	if (NULL == p)
602		return(0);
603	return(node_append(mdoc, p));
604}
605
606
607int
608mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
609		int tok, struct mdoc_arg *args)
610{
611	struct mdoc_node *p;
612
613	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
614	if (NULL == p)
615		return(0);
616	if ((p->args = args))
617		(args->refcnt)++;
618	return(node_append(mdoc, p));
619}
620
621
622int
623mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
624		int tok, struct mdoc_arg *args)
625{
626	struct mdoc_node *p;
627
628	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
629	if (NULL == p)
630		return(0);
631	if ((p->args = args))
632		(args->refcnt)++;
633	return(node_append(mdoc, p));
634}
635
636
637int
638mdoc_word_alloc(struct mdoc *mdoc,
639		int line, int pos, const char *word)
640{
641	struct mdoc_node *p;
642
643	p = node_alloc(mdoc, line, pos, -1, MDOC_TEXT);
644	if (NULL == p)
645		return(0);
646	if (NULL == (p->string = strdup(word))) {
647		(void)verr(mdoc, EMALLOC);
648		return(0);
649	}
650	return(node_append(mdoc, p));
651}
652
653
654void
655mdoc_node_free(struct mdoc_node *p)
656{
657
658	if (p->string)
659		free(p->string);
660	if (p->args)
661		mdoc_argv_free(p->args);
662	free(p);
663}
664
665
666void
667mdoc_node_freelist(struct mdoc_node *p)
668{
669
670	if (p->child)
671		mdoc_node_freelist(p->child);
672	if (p->next)
673		mdoc_node_freelist(p->next);
674
675	mdoc_node_free(p);
676}
677
678
679/*
680 * Parse free-form text, that is, a line that does not begin with the
681 * control character.
682 */
683static int
684parsetext(struct mdoc *m, int line, char *buf)
685{
686
687	if (SEC_PROLOGUE == m->lastnamed)
688		return(perr(m, line, 0, ETEXTPROL));
689
690	if (0 == buf[0] && ! (MDOC_LITERAL & m->flags))
691		return(perr(m, line, 0, ENOBLANK));
692
693	if ( ! mdoc_word_alloc(m, line, 0, buf))
694		return(0);
695
696	m->next = MDOC_NEXT_SIBLING;
697	return(1);
698}
699
700
701static int
702macrowarn(struct mdoc *m, int ln, const char *buf)
703{
704	if ( ! (MDOC_IGN_MACRO & m->pflags))
705		return(mdoc_perr(m, ln, 1,
706				"unknown macro: %s%s",
707				buf, strlen(buf) > 3 ? "..." : ""));
708	return(mdoc_pwarn(m, ln, 1, WARN_SYNTAX,
709				"unknown macro: %s%s",
710				buf, strlen(buf) > 3 ? "..." : ""));
711}
712
713
714
715/*
716 * Parse a macro line, that is, a line beginning with the control
717 * character.
718 */
719int
720parsemacro(struct mdoc *m, int ln, char *buf)
721{
722	int		  i, c;
723	char		  mac[5];
724
725	/* Comments and empties are quickly ignored. */
726
727	if (0 == buf[1])
728		return(1);
729
730	if (' ' == buf[1]) {
731		i = 2;
732		while (buf[i] && ' ' == buf[i])
733			i++;
734		if (0 == buf[i])
735			return(1);
736		return(perr(m, ln, 1, ESPACE));
737	}
738
739	if (buf[1] && '\\' == buf[1])
740		if (buf[2] && '\"' == buf[2])
741			return(1);
742
743	/* Copy the first word into a nil-terminated buffer. */
744
745	for (i = 1; i < 5; i++) {
746		if (0 == (mac[i - 1] = buf[i]))
747			break;
748		else if (' ' == buf[i])
749			break;
750	}
751
752	mac[i - 1] = 0;
753
754	if (i == 5 || i <= 2) {
755		if ( ! macrowarn(m, ln, mac))
756			goto err;
757		return(1);
758	}
759
760	if (MDOC_MAX == (c = mdoc_hash_find(m->htab, mac))) {
761		if ( ! macrowarn(m, ln, mac))
762			goto err;
763		return(1);
764	}
765
766	/* The macro is sane.  Jump to the next word. */
767
768	while (buf[i] && ' ' == buf[i])
769		i++;
770
771	/* Begin recursive parse sequence. */
772
773	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
774		goto err;
775
776	return(1);
777
778err:	/* Error out. */
779
780	m->flags |= MDOC_HALT;
781	return(0);
782}
783