mdoc.c revision 1.1.1.2
1/*	$Vendor-Id: mdoc.c,v 1.111 2009/10/26 07:11:07 kristaps Exp $ */
2/*
3 * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <sys/types.h>
18
19#include <assert.h>
20#include <ctype.h>
21#include <stdarg.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include "libmdoc.h"
27
28const	char *const __mdoc_merrnames[MERRMAX] = {
29	"trailing whitespace", /* ETAILWS */
30	"unexpected quoted parameter", /* EQUOTPARM */
31	"unterminated quoted parameter", /* EQUOTTERM */
32	"system: malloc error", /* EMALLOC */
33	"argument parameter suggested", /* EARGVAL */
34	"macro disallowed in prologue", /* EBODYPROL */
35	"macro disallowed in body", /* EPROLBODY */
36	"text disallowed in prologue", /* ETEXTPROL */
37	"blank line disallowed", /* ENOBLANK */
38	"text parameter too long", /* ETOOLONG */
39	"invalid escape sequence", /* EESCAPE */
40	"invalid character", /* EPRINT */
41	"document has no body", /* ENODAT */
42	"document has no prologue", /* ENOPROLOGUE */
43	"expected line arguments", /* ELINE */
44	"invalid AT&T argument", /* EATT */
45	"default name not yet set", /* ENAME */
46	"missing list type", /* ELISTTYPE */
47	"missing display type", /* EDISPTYPE */
48	"too many display types", /* EMULTIDISP */
49	"too many list types", /* EMULTILIST */
50	"NAME section must be first", /* ESECNAME */
51	"badly-formed NAME section", /* ENAMESECINC */
52	"argument repeated", /* EARGREP */
53	"expected boolean parameter", /* EBOOL */
54	"inconsistent column syntax", /* ECOLMIS */
55	"nested display invalid", /* ENESTDISP */
56	"width argument missing", /* EMISSWIDTH */
57	"invalid section for this manual section", /* EWRONGMSEC */
58	"section out of conventional order", /* ESECOOO */
59	"section repeated", /* ESECREP */
60	"invalid standard argument", /* EBADSTAND */
61	"multi-line arguments discouraged", /* ENOMULTILINE */
62	"multi-line arguments suggested", /* EMULTILINE */
63	"line arguments discouraged", /* ENOLINE */
64	"prologue macro out of conventional order", /* EPROLOOO */
65	"prologue macro repeated", /* EPROLREP */
66	"invalid manual section", /* EBADMSEC */
67	"invalid section", /* EBADSEC */
68	"invalid font mode", /* EFONT */
69	"invalid date syntax", /* EBADDATE */
70	"invalid number format", /* ENUMFMT */
71	"superfluous width argument", /* ENOWIDTH */
72	"system: utsname error", /* EUTSNAME */
73	"obsolete macro", /* EOBS */
74	"end-of-line scope violation", /* EIMPBRK */
75	"empty macro ignored", /* EIGNE */
76	"unclosed explicit scope", /* EOPEN */
77	"unterminated quoted phrase", /* EQUOTPHR */
78	"closure macro without prior context", /* ENOCTX */
79	"no description found for library", /* ELIB */
80	"bad child for parent context", /* EBADCHILD */
81	"list arguments preceding type", /* ENOTYPE */
82};
83
84const	char *const __mdoc_macronames[MDOC_MAX] = {
85	"Ap",		"Dd",		"Dt",		"Os",
86	"Sh",		"Ss",		"Pp",		"D1",
87	"Dl",		"Bd",		"Ed",		"Bl",
88	"El",		"It",		"Ad",		"An",
89	"Ar",		"Cd",		"Cm",		"Dv",
90	"Er",		"Ev",		"Ex",		"Fa",
91	"Fd",		"Fl",		"Fn",		"Ft",
92	"Ic",		"In",		"Li",		"Nd",
93	"Nm",		"Op",		"Ot",		"Pa",
94	"Rv",		"St",		"Va",		"Vt",
95	/* LINTED */
96	"Xr",		"\%A",		"\%B",		"\%D",
97	/* LINTED */
98	"\%I",		"\%J",		"\%N",		"\%O",
99	/* LINTED */
100	"\%P",		"\%R",		"\%T",		"\%V",
101	"Ac",		"Ao",		"Aq",		"At",
102	"Bc",		"Bf",		"Bo",		"Bq",
103	"Bsx",		"Bx",		"Db",		"Dc",
104	"Do",		"Dq",		"Ec",		"Ef",
105	"Em",		"Eo",		"Fx",		"Ms",
106	"No",		"Ns",		"Nx",		"Ox",
107	"Pc",		"Pf",		"Po",		"Pq",
108	"Qc",		"Ql",		"Qo",		"Qq",
109	"Re",		"Rs",		"Sc",		"So",
110	"Sq",		"Sm",		"Sx",		"Sy",
111	"Tn",		"Ux",		"Xc",		"Xo",
112	"Fo",		"Fc",		"Oo",		"Oc",
113	"Bk",		"Ek",		"Bt",		"Hf",
114	"Fr",		"Ud",		"Lb",		"Lp",
115	"Lk",		"Mt",		"Brq",		"Bro",
116	/* LINTED */
117	"Brc",		"\%C",		"Es",		"En",
118	/* LINTED */
119	"Dx",		"\%Q",		"br",		"sp",
120	/* LINTED */
121	"\%U"
122	};
123
124const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
125	"split",		"nosplit",		"ragged",
126	"unfilled",		"literal",		"file",
127	"offset",		"bullet",		"dash",
128	"hyphen",		"item",			"enum",
129	"tag",			"diag",			"hang",
130	"ohang",		"inset",		"column",
131	"width",		"compact",		"std",
132	"filled",		"words",		"emphasis",
133	"symbolic",		"nested",		"centered"
134	};
135
136const	char * const *mdoc_macronames = __mdoc_macronames;
137const	char * const *mdoc_argnames = __mdoc_argnames;
138
139static	void		  mdoc_free1(struct mdoc *);
140static	int		  mdoc_alloc1(struct mdoc *);
141static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
142				int, enum mdoc_type);
143static	int		  node_append(struct mdoc *,
144				struct mdoc_node *);
145static	int		  parsetext(struct mdoc *, int, char *);
146static	int		  parsemacro(struct mdoc *, int, char *);
147static	int		  macrowarn(struct mdoc *, int, const char *);
148static	int		  pstring(struct mdoc *, int, int,
149				const char *, size_t);
150
151#ifdef __linux__
152extern	size_t	  	  strlcpy(char *, const char *, size_t);
153#endif
154
155
156const struct mdoc_node *
157mdoc_node(const struct mdoc *m)
158{
159
160	return(MDOC_HALT & m->flags ? NULL : m->first);
161}
162
163
164const struct mdoc_meta *
165mdoc_meta(const struct mdoc *m)
166{
167
168	return(MDOC_HALT & m->flags ? NULL : &m->meta);
169}
170
171
172/*
173 * Frees volatile resources (parse tree, meta-data, fields).
174 */
175static void
176mdoc_free1(struct mdoc *mdoc)
177{
178
179	if (mdoc->first)
180		mdoc_node_freelist(mdoc->first);
181	if (mdoc->meta.title)
182		free(mdoc->meta.title);
183	if (mdoc->meta.os)
184		free(mdoc->meta.os);
185	if (mdoc->meta.name)
186		free(mdoc->meta.name);
187	if (mdoc->meta.arch)
188		free(mdoc->meta.arch);
189	if (mdoc->meta.vol)
190		free(mdoc->meta.vol);
191}
192
193
194/*
195 * Allocate all volatile resources (parse tree, meta-data, fields).
196 */
197static int
198mdoc_alloc1(struct mdoc *mdoc)
199{
200
201	bzero(&mdoc->meta, sizeof(struct mdoc_meta));
202	mdoc->flags = 0;
203	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
204	mdoc->last = calloc(1, sizeof(struct mdoc_node));
205	if (NULL == mdoc->last)
206		return(0);
207
208	mdoc->first = mdoc->last;
209	mdoc->last->type = MDOC_ROOT;
210	mdoc->next = MDOC_NEXT_CHILD;
211	return(1);
212}
213
214
215/*
216 * Free up volatile resources (see mdoc_free1()) then re-initialises the
217 * data with mdoc_alloc1().  After invocation, parse data has been reset
218 * and the parser is ready for re-invocation on a new tree; however,
219 * cross-parse non-volatile data is kept intact.
220 */
221int
222mdoc_reset(struct mdoc *mdoc)
223{
224
225	mdoc_free1(mdoc);
226	return(mdoc_alloc1(mdoc));
227}
228
229
230/*
231 * Completely free up all volatile and non-volatile parse resources.
232 * After invocation, the pointer is no longer usable.
233 */
234void
235mdoc_free(struct mdoc *mdoc)
236{
237
238	mdoc_free1(mdoc);
239	free(mdoc);
240}
241
242
243/*
244 * Allocate volatile and non-volatile parse resources.
245 */
246struct mdoc *
247mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
248{
249	struct mdoc	*p;
250
251	if (NULL == (p = calloc(1, sizeof(struct mdoc))))
252		return(NULL);
253	if (cb)
254		(void)memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
255
256	mdoc_hash_init();
257
258	p->data = data;
259	p->pflags = pflags;
260
261	if (mdoc_alloc1(p))
262		return(p);
263
264	free(p);
265	return(NULL);
266}
267
268
269/*
270 * Climb back up the parse tree, validating open scopes.  Mostly calls
271 * through to macro_end() in macro.c.
272 */
273int
274mdoc_endparse(struct mdoc *m)
275{
276
277	if (MDOC_HALT & m->flags)
278		return(0);
279	else if (mdoc_macroend(m))
280		return(1);
281	m->flags |= MDOC_HALT;
282	return(0);
283}
284
285
286/*
287 * Main parse routine.  Parses a single line -- really just hands off to
288 * the macro (parsemacro()) or text parser (parsetext()).
289 */
290int
291mdoc_parseln(struct mdoc *m, int ln, char *buf)
292{
293
294	if (MDOC_HALT & m->flags)
295		return(0);
296
297	return('.' == *buf ? parsemacro(m, ln, buf) :
298			parsetext(m, ln, buf));
299}
300
301
302int
303mdoc_verr(struct mdoc *mdoc, int ln, int pos,
304		const char *fmt, ...)
305{
306	char		 buf[256];
307	va_list		 ap;
308
309	if (NULL == mdoc->cb.mdoc_err)
310		return(0);
311
312	va_start(ap, fmt);
313	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
314	va_end(ap);
315
316	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
317}
318
319
320int
321mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
322{
323	char		 buf[256];
324	va_list		 ap;
325
326	if (NULL == mdoc->cb.mdoc_warn)
327		return(0);
328
329	va_start(ap, fmt);
330	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
331	va_end(ap);
332
333	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
334}
335
336
337int
338mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
339{
340	const char	*p;
341
342	p = __mdoc_merrnames[(int)type];
343	assert(p);
344
345	if (iserr)
346		return(mdoc_verr(m, line, pos, p));
347
348	return(mdoc_vwarn(m, line, pos, p));
349}
350
351
352int
353mdoc_macro(struct mdoc *m, int tok,
354		int ln, int pp, int *pos, char *buf)
355{
356	/*
357	 * If we're in the prologue, deny "body" macros.  Similarly, if
358	 * we're in the body, deny prologue calls.
359	 */
360	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
361			MDOC_PBODY & m->flags)
362		return(mdoc_perr(m, ln, pp, EPROLBODY));
363	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
364			! (MDOC_PBODY & m->flags))
365		return(mdoc_perr(m, ln, pp, EBODYPROL));
366
367	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
368}
369
370
371static int
372node_append(struct mdoc *mdoc, struct mdoc_node *p)
373{
374
375	assert(mdoc->last);
376	assert(mdoc->first);
377	assert(MDOC_ROOT != p->type);
378
379	switch (mdoc->next) {
380	case (MDOC_NEXT_SIBLING):
381		mdoc->last->next = p;
382		p->prev = mdoc->last;
383		p->parent = mdoc->last->parent;
384		break;
385	case (MDOC_NEXT_CHILD):
386		mdoc->last->child = p;
387		p->parent = mdoc->last;
388		break;
389	default:
390		abort();
391		/* NOTREACHED */
392	}
393
394	p->parent->nchild++;
395
396	if ( ! mdoc_valid_pre(mdoc, p))
397		return(0);
398	if ( ! mdoc_action_pre(mdoc, p))
399		return(0);
400
401	switch (p->type) {
402	case (MDOC_HEAD):
403		assert(MDOC_BLOCK == p->parent->type);
404		p->parent->head = p;
405		break;
406	case (MDOC_TAIL):
407		assert(MDOC_BLOCK == p->parent->type);
408		p->parent->tail = p;
409		break;
410	case (MDOC_BODY):
411		assert(MDOC_BLOCK == p->parent->type);
412		p->parent->body = p;
413		break;
414	default:
415		break;
416	}
417
418	mdoc->last = p;
419
420	switch (p->type) {
421	case (MDOC_TEXT):
422		if ( ! mdoc_valid_post(mdoc))
423			return(0);
424		if ( ! mdoc_action_post(mdoc))
425			return(0);
426		break;
427	default:
428		break;
429	}
430
431	return(1);
432}
433
434
435static struct mdoc_node *
436node_alloc(struct mdoc *m, int line,
437		int pos, int tok, enum mdoc_type type)
438{
439	struct mdoc_node *p;
440
441	if (NULL == (p = calloc(1, sizeof(struct mdoc_node)))) {
442		(void)mdoc_nerr(m, m->last, EMALLOC);
443		return(NULL);
444	}
445
446	p->sec = m->lastsec;
447	p->line = line;
448	p->pos = pos;
449	p->tok = tok;
450	if (MDOC_TEXT != (p->type = type))
451		assert(p->tok >= 0);
452
453	return(p);
454}
455
456
457int
458mdoc_tail_alloc(struct mdoc *m, int line, int pos, int tok)
459{
460	struct mdoc_node *p;
461
462	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
463	if (NULL == p)
464		return(0);
465	if ( ! node_append(m, p))
466		return(0);
467	m->next = MDOC_NEXT_CHILD;
468	return(1);
469}
470
471
472int
473mdoc_head_alloc(struct mdoc *m, int line, int pos, int tok)
474{
475	struct mdoc_node *p;
476
477	assert(m->first);
478	assert(m->last);
479
480	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
481	if (NULL == p)
482		return(0);
483	if ( ! node_append(m, p))
484		return(0);
485	m->next = MDOC_NEXT_CHILD;
486	return(1);
487}
488
489
490int
491mdoc_body_alloc(struct mdoc *m, int line, int pos, int tok)
492{
493	struct mdoc_node *p;
494
495	p = node_alloc(m, line, pos, tok, MDOC_BODY);
496	if (NULL == p)
497		return(0);
498	if ( ! node_append(m, p))
499		return(0);
500	m->next = MDOC_NEXT_CHILD;
501	return(1);
502}
503
504
505int
506mdoc_block_alloc(struct mdoc *m, int line, int pos,
507		int tok, struct mdoc_arg *args)
508{
509	struct mdoc_node *p;
510
511	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
512	if (NULL == p)
513		return(0);
514	p->args = args;
515	if (p->args)
516		(args->refcnt)++;
517	if ( ! node_append(m, p))
518		return(0);
519	m->next = MDOC_NEXT_CHILD;
520	return(1);
521}
522
523
524int
525mdoc_elem_alloc(struct mdoc *m, int line, int pos,
526		int tok, struct mdoc_arg *args)
527{
528	struct mdoc_node *p;
529
530	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
531	if (NULL == p)
532		return(0);
533	p->args = args;
534	if (p->args)
535		(args->refcnt)++;
536	if ( ! node_append(m, p))
537		return(0);
538	m->next = MDOC_NEXT_CHILD;
539	return(1);
540}
541
542
543static int
544pstring(struct mdoc *m, int line, int pos, const char *p, size_t len)
545{
546	struct mdoc_node *n;
547	size_t		  sv;
548
549	n = node_alloc(m, line, pos, -1, MDOC_TEXT);
550	if (NULL == n)
551		return(mdoc_nerr(m, m->last, EMALLOC));
552
553	n->string = malloc(len + 1);
554	if (NULL == n->string) {
555		free(n);
556		return(mdoc_nerr(m, m->last, EMALLOC));
557	}
558
559	sv = strlcpy(n->string, p, len + 1);
560
561	/* Prohibit truncation. */
562	assert(sv < len + 1);
563
564	if ( ! node_append(m, n))
565		return(0);
566	m->next = MDOC_NEXT_SIBLING;
567	return(1);
568}
569
570
571int
572mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
573{
574
575	return(pstring(m, line, pos, p, strlen(p)));
576}
577
578
579void
580mdoc_node_free(struct mdoc_node *p)
581{
582
583	if (p->parent)
584		p->parent->nchild--;
585	if (p->string)
586		free(p->string);
587	if (p->args)
588		mdoc_argv_free(p->args);
589	free(p);
590}
591
592
593void
594mdoc_node_freelist(struct mdoc_node *p)
595{
596
597	if (p->child)
598		mdoc_node_freelist(p->child);
599	if (p->next)
600		mdoc_node_freelist(p->next);
601
602	assert(0 == p->nchild);
603	mdoc_node_free(p);
604}
605
606
607/*
608 * Parse free-form text, that is, a line that does not begin with the
609 * control character.
610 */
611static int
612parsetext(struct mdoc *m, int line, char *buf)
613{
614	int		 i, j;
615
616	if (SEC_NONE == m->lastnamed)
617		return(mdoc_perr(m, line, 0, ETEXTPROL));
618
619	/*
620	 * If in literal mode, then pass the buffer directly to the
621	 * back-end, as it should be preserved as a single term.
622	 */
623
624	if (MDOC_LITERAL & m->flags)
625		return(mdoc_word_alloc(m, line, 0, buf));
626
627	/* Disallow blank/white-space lines in non-literal mode. */
628
629	for (i = 0; ' ' == buf[i]; i++)
630		/* Skip leading whitespace. */ ;
631	if (0 == buf[i])
632		return(mdoc_perr(m, line, 0, ENOBLANK));
633
634	/*
635	 * Break apart a free-form line into tokens.  Spaces are
636	 * stripped out of the input.
637	 */
638
639	for (j = i; buf[i]; i++) {
640		if (' ' != buf[i])
641			continue;
642
643		/* Escaped whitespace. */
644		if (i && ' ' == buf[i] && '\\' == buf[i - 1])
645			continue;
646
647		buf[i++] = 0;
648		if ( ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
649			return(0);
650
651		for ( ; ' ' == buf[i]; i++)
652			/* Skip trailing whitespace. */ ;
653
654		j = i;
655		if (0 == buf[i])
656			break;
657	}
658
659	if (j != i && ! pstring(m, line, j, &buf[j], (size_t)(i - j)))
660		return(0);
661
662	m->next = MDOC_NEXT_SIBLING;
663	return(1);
664}
665
666
667
668static int
669macrowarn(struct mdoc *m, int ln, const char *buf)
670{
671	if ( ! (MDOC_IGN_MACRO & m->pflags))
672		return(mdoc_verr(m, ln, 0,
673				"unknown macro: %s%s",
674				buf, strlen(buf) > 3 ? "..." : ""));
675	return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
676				buf, strlen(buf) > 3 ? "..." : ""));
677}
678
679
680/*
681 * Parse a macro line, that is, a line beginning with the control
682 * character.
683 */
684int
685parsemacro(struct mdoc *m, int ln, char *buf)
686{
687	int		  i, j, c;
688	char		  mac[5];
689
690	/* Empty lines are ignored. */
691
692	if (0 == buf[1])
693		return(1);
694
695	i = 1;
696
697	/* Accept whitespace after the initial control char. */
698
699	if (' ' == buf[i]) {
700		i++;
701		while (buf[i] && ' ' == buf[i])
702			i++;
703		if (0 == buf[i])
704			return(1);
705	}
706
707	/* Copy the first word into a nil-terminated buffer. */
708
709	for (j = 0; j < 4; j++, i++) {
710		if (0 == (mac[j] = buf[i]))
711			break;
712		else if (' ' == buf[i])
713			break;
714
715		/* Check for invalid characters. */
716
717		if (isgraph((u_char)buf[i]))
718			continue;
719		return(mdoc_perr(m, ln, i, EPRINT));
720	}
721
722	mac[j] = 0;
723
724	if (j == 4 || j < 2) {
725		if ( ! macrowarn(m, ln, mac))
726			goto err;
727		return(1);
728	}
729
730	if (MDOC_MAX == (c = mdoc_hash_find(mac))) {
731		if ( ! macrowarn(m, ln, mac))
732			goto err;
733		return(1);
734	}
735
736	/* The macro is sane.  Jump to the next word. */
737
738	while (buf[i] && ' ' == buf[i])
739		i++;
740
741	/*
742	 * Begin recursive parse sequence.  Since we're at the start of
743	 * the line, we don't need to do callable/parseable checks.
744	 */
745	if ( ! mdoc_macro(m, c, ln, 1, &i, buf))
746		goto err;
747
748	return(1);
749
750err:	/* Error out. */
751
752	m->flags |= MDOC_HALT;
753	return(0);
754}
755
756
757