1241675Suqs/*	$Id: mdoc.c,v 1.196 2011/09/30 00:13:28 schwarze Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4241675Suqs * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5241675Suqs *
6241675Suqs * Permission to use, copy, modify, and distribute this software for any
7241675Suqs * purpose with or without fee is hereby granted, provided that the above
8241675Suqs * copyright notice and this permission notice appear in all copies.
9241675Suqs *
10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17241675Suqs */
18241675Suqs#ifdef HAVE_CONFIG_H
19241675Suqs#include "config.h"
20241675Suqs#endif
21241675Suqs
22241675Suqs#include <sys/types.h>
23241675Suqs
24241675Suqs#include <assert.h>
25241675Suqs#include <stdarg.h>
26241675Suqs#include <stdio.h>
27241675Suqs#include <stdlib.h>
28241675Suqs#include <string.h>
29241675Suqs#include <time.h>
30241675Suqs
31241675Suqs#include "mdoc.h"
32241675Suqs#include "mandoc.h"
33241675Suqs#include "libmdoc.h"
34241675Suqs#include "libmandoc.h"
35241675Suqs
36241675Suqsconst	char *const __mdoc_macronames[MDOC_MAX] = {
37241675Suqs	"Ap",		"Dd",		"Dt",		"Os",
38241675Suqs	"Sh",		"Ss",		"Pp",		"D1",
39241675Suqs	"Dl",		"Bd",		"Ed",		"Bl",
40241675Suqs	"El",		"It",		"Ad",		"An",
41241675Suqs	"Ar",		"Cd",		"Cm",		"Dv",
42241675Suqs	"Er",		"Ev",		"Ex",		"Fa",
43241675Suqs	"Fd",		"Fl",		"Fn",		"Ft",
44241675Suqs	"Ic",		"In",		"Li",		"Nd",
45241675Suqs	"Nm",		"Op",		"Ot",		"Pa",
46241675Suqs	"Rv",		"St",		"Va",		"Vt",
47241675Suqs	/* LINTED */
48241675Suqs	"Xr",		"%A",		"%B",		"%D",
49241675Suqs	/* LINTED */
50241675Suqs	"%I",		"%J",		"%N",		"%O",
51241675Suqs	/* LINTED */
52241675Suqs	"%P",		"%R",		"%T",		"%V",
53241675Suqs	"Ac",		"Ao",		"Aq",		"At",
54241675Suqs	"Bc",		"Bf",		"Bo",		"Bq",
55241675Suqs	"Bsx",		"Bx",		"Db",		"Dc",
56241675Suqs	"Do",		"Dq",		"Ec",		"Ef",
57241675Suqs	"Em",		"Eo",		"Fx",		"Ms",
58241675Suqs	"No",		"Ns",		"Nx",		"Ox",
59241675Suqs	"Pc",		"Pf",		"Po",		"Pq",
60241675Suqs	"Qc",		"Ql",		"Qo",		"Qq",
61241675Suqs	"Re",		"Rs",		"Sc",		"So",
62241675Suqs	"Sq",		"Sm",		"Sx",		"Sy",
63241675Suqs	"Tn",		"Ux",		"Xc",		"Xo",
64241675Suqs	"Fo",		"Fc",		"Oo",		"Oc",
65241675Suqs	"Bk",		"Ek",		"Bt",		"Hf",
66241675Suqs	"Fr",		"Ud",		"Lb",		"Lp",
67241675Suqs	"Lk",		"Mt",		"Brq",		"Bro",
68241675Suqs	/* LINTED */
69241675Suqs	"Brc",		"%C",		"Es",		"En",
70241675Suqs	/* LINTED */
71241675Suqs	"Dx",		"%Q",		"br",		"sp",
72241675Suqs	/* LINTED */
73241675Suqs	"%U",		"Ta"
74241675Suqs	};
75241675Suqs
76241675Suqsconst	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
77241675Suqs	"split",		"nosplit",		"ragged",
78241675Suqs	"unfilled",		"literal",		"file",
79241675Suqs	"offset",		"bullet",		"dash",
80241675Suqs	"hyphen",		"item",			"enum",
81241675Suqs	"tag",			"diag",			"hang",
82241675Suqs	"ohang",		"inset",		"column",
83241675Suqs	"width",		"compact",		"std",
84241675Suqs	"filled",		"words",		"emphasis",
85241675Suqs	"symbolic",		"nested",		"centered"
86241675Suqs	};
87241675Suqs
88241675Suqsconst	char * const *mdoc_macronames = __mdoc_macronames;
89241675Suqsconst	char * const *mdoc_argnames = __mdoc_argnames;
90241675Suqs
91241675Suqsstatic	void		  mdoc_node_free(struct mdoc_node *);
92241675Suqsstatic	void		  mdoc_node_unlink(struct mdoc *,
93241675Suqs				struct mdoc_node *);
94241675Suqsstatic	void		  mdoc_free1(struct mdoc *);
95241675Suqsstatic	void		  mdoc_alloc1(struct mdoc *);
96241675Suqsstatic	struct mdoc_node *node_alloc(struct mdoc *, int, int,
97241675Suqs				enum mdoct, enum mdoc_type);
98241675Suqsstatic	int		  node_append(struct mdoc *,
99241675Suqs				struct mdoc_node *);
100241675Suqs#if 0
101241675Suqsstatic	int		  mdoc_preptext(struct mdoc *, int, char *, int);
102241675Suqs#endif
103241675Suqsstatic	int		  mdoc_ptext(struct mdoc *, int, char *, int);
104241675Suqsstatic	int		  mdoc_pmacro(struct mdoc *, int, char *, int);
105241675Suqs
106241675Suqsconst struct mdoc_node *
107241675Suqsmdoc_node(const struct mdoc *m)
108241675Suqs{
109241675Suqs
110241675Suqs	assert( ! (MDOC_HALT & m->flags));
111241675Suqs	return(m->first);
112241675Suqs}
113241675Suqs
114241675Suqs
115241675Suqsconst struct mdoc_meta *
116241675Suqsmdoc_meta(const struct mdoc *m)
117241675Suqs{
118241675Suqs
119241675Suqs	assert( ! (MDOC_HALT & m->flags));
120241675Suqs	return(&m->meta);
121241675Suqs}
122241675Suqs
123241675Suqs
124241675Suqs/*
125241675Suqs * Frees volatile resources (parse tree, meta-data, fields).
126241675Suqs */
127241675Suqsstatic void
128241675Suqsmdoc_free1(struct mdoc *mdoc)
129241675Suqs{
130241675Suqs
131241675Suqs	if (mdoc->first)
132241675Suqs		mdoc_node_delete(mdoc, mdoc->first);
133241675Suqs	if (mdoc->meta.title)
134241675Suqs		free(mdoc->meta.title);
135241675Suqs	if (mdoc->meta.os)
136241675Suqs		free(mdoc->meta.os);
137241675Suqs	if (mdoc->meta.name)
138241675Suqs		free(mdoc->meta.name);
139241675Suqs	if (mdoc->meta.arch)
140241675Suqs		free(mdoc->meta.arch);
141241675Suqs	if (mdoc->meta.vol)
142241675Suqs		free(mdoc->meta.vol);
143241675Suqs	if (mdoc->meta.msec)
144241675Suqs		free(mdoc->meta.msec);
145241675Suqs	if (mdoc->meta.date)
146241675Suqs		free(mdoc->meta.date);
147241675Suqs}
148241675Suqs
149241675Suqs
150241675Suqs/*
151241675Suqs * Allocate all volatile resources (parse tree, meta-data, fields).
152241675Suqs */
153241675Suqsstatic void
154241675Suqsmdoc_alloc1(struct mdoc *mdoc)
155241675Suqs{
156241675Suqs
157241675Suqs	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
158241675Suqs	mdoc->flags = 0;
159241675Suqs	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
160241675Suqs	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
161241675Suqs	mdoc->first = mdoc->last;
162241675Suqs	mdoc->last->type = MDOC_ROOT;
163241675Suqs	mdoc->last->tok = MDOC_MAX;
164241675Suqs	mdoc->next = MDOC_NEXT_CHILD;
165241675Suqs}
166241675Suqs
167241675Suqs
168241675Suqs/*
169241675Suqs * Free up volatile resources (see mdoc_free1()) then re-initialises the
170241675Suqs * data with mdoc_alloc1().  After invocation, parse data has been reset
171241675Suqs * and the parser is ready for re-invocation on a new tree; however,
172241675Suqs * cross-parse non-volatile data is kept intact.
173241675Suqs */
174241675Suqsvoid
175241675Suqsmdoc_reset(struct mdoc *mdoc)
176241675Suqs{
177241675Suqs
178241675Suqs	mdoc_free1(mdoc);
179241675Suqs	mdoc_alloc1(mdoc);
180241675Suqs}
181241675Suqs
182241675Suqs
183241675Suqs/*
184241675Suqs * Completely free up all volatile and non-volatile parse resources.
185241675Suqs * After invocation, the pointer is no longer usable.
186241675Suqs */
187241675Suqsvoid
188241675Suqsmdoc_free(struct mdoc *mdoc)
189241675Suqs{
190241675Suqs
191241675Suqs	mdoc_free1(mdoc);
192241675Suqs	free(mdoc);
193241675Suqs}
194241675Suqs
195241675Suqs
196241675Suqs/*
197241675Suqs * Allocate volatile and non-volatile parse resources.
198241675Suqs */
199241675Suqsstruct mdoc *
200241675Suqsmdoc_alloc(struct roff *roff, struct mparse *parse)
201241675Suqs{
202241675Suqs	struct mdoc	*p;
203241675Suqs
204241675Suqs	p = mandoc_calloc(1, sizeof(struct mdoc));
205241675Suqs
206241675Suqs	p->parse = parse;
207241675Suqs	p->roff = roff;
208241675Suqs
209241675Suqs	mdoc_hash_init();
210241675Suqs	mdoc_alloc1(p);
211241675Suqs	return(p);
212241675Suqs}
213241675Suqs
214241675Suqs
215241675Suqs/*
216241675Suqs * Climb back up the parse tree, validating open scopes.  Mostly calls
217241675Suqs * through to macro_end() in macro.c.
218241675Suqs */
219241675Suqsint
220241675Suqsmdoc_endparse(struct mdoc *m)
221241675Suqs{
222241675Suqs
223241675Suqs	assert( ! (MDOC_HALT & m->flags));
224241675Suqs	if (mdoc_macroend(m))
225241675Suqs		return(1);
226241675Suqs	m->flags |= MDOC_HALT;
227241675Suqs	return(0);
228241675Suqs}
229241675Suqs
230241675Suqsint
231241675Suqsmdoc_addeqn(struct mdoc *m, const struct eqn *ep)
232241675Suqs{
233241675Suqs	struct mdoc_node *n;
234241675Suqs
235241675Suqs	assert( ! (MDOC_HALT & m->flags));
236241675Suqs
237241675Suqs	/* No text before an initial macro. */
238241675Suqs
239241675Suqs	if (SEC_NONE == m->lastnamed) {
240241675Suqs		mdoc_pmsg(m, ep->ln, ep->pos, MANDOCERR_NOTEXT);
241241675Suqs		return(1);
242241675Suqs	}
243241675Suqs
244241675Suqs	n = node_alloc(m, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN);
245241675Suqs	n->eqn = ep;
246241675Suqs
247241675Suqs	if ( ! node_append(m, n))
248241675Suqs		return(0);
249241675Suqs
250241675Suqs	m->next = MDOC_NEXT_SIBLING;
251241675Suqs	return(1);
252241675Suqs}
253241675Suqs
254241675Suqsint
255241675Suqsmdoc_addspan(struct mdoc *m, const struct tbl_span *sp)
256241675Suqs{
257241675Suqs	struct mdoc_node *n;
258241675Suqs
259241675Suqs	assert( ! (MDOC_HALT & m->flags));
260241675Suqs
261241675Suqs	/* No text before an initial macro. */
262241675Suqs
263241675Suqs	if (SEC_NONE == m->lastnamed) {
264241675Suqs		mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT);
265241675Suqs		return(1);
266241675Suqs	}
267241675Suqs
268241675Suqs	n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL);
269241675Suqs	n->span = sp;
270241675Suqs
271241675Suqs	if ( ! node_append(m, n))
272241675Suqs		return(0);
273241675Suqs
274241675Suqs	m->next = MDOC_NEXT_SIBLING;
275241675Suqs	return(1);
276241675Suqs}
277241675Suqs
278241675Suqs
279241675Suqs/*
280241675Suqs * Main parse routine.  Parses a single line -- really just hands off to
281241675Suqs * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
282241675Suqs */
283241675Suqsint
284241675Suqsmdoc_parseln(struct mdoc *m, int ln, char *buf, int offs)
285241675Suqs{
286241675Suqs
287241675Suqs	assert( ! (MDOC_HALT & m->flags));
288241675Suqs
289241675Suqs	m->flags |= MDOC_NEWLINE;
290241675Suqs
291241675Suqs	/*
292241675Suqs	 * Let the roff nS register switch SYNOPSIS mode early,
293241675Suqs	 * such that the parser knows at all times
294241675Suqs	 * whether this mode is on or off.
295241675Suqs	 * Note that this mode is also switched by the Sh macro.
296241675Suqs	 */
297241675Suqs	if (roff_regisset(m->roff, REG_nS)) {
298241675Suqs		if (roff_regget(m->roff, REG_nS))
299241675Suqs			m->flags |= MDOC_SYNOPSIS;
300241675Suqs		else
301241675Suqs			m->flags &= ~MDOC_SYNOPSIS;
302241675Suqs	}
303241675Suqs
304241675Suqs	return(mandoc_getcontrol(buf, &offs) ?
305241675Suqs			mdoc_pmacro(m, ln, buf, offs) :
306241675Suqs			mdoc_ptext(m, ln, buf, offs));
307241675Suqs}
308241675Suqs
309241675Suqsint
310241675Suqsmdoc_macro(MACRO_PROT_ARGS)
311241675Suqs{
312241675Suqs	assert(tok < MDOC_MAX);
313241675Suqs
314241675Suqs	/* If we're in the body, deny prologue calls. */
315241675Suqs
316241675Suqs	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
317241675Suqs			MDOC_PBODY & m->flags) {
318241675Suqs		mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY);
319241675Suqs		return(1);
320241675Suqs	}
321241675Suqs
322241675Suqs	/* If we're in the prologue, deny "body" macros.  */
323241675Suqs
324241675Suqs	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
325241675Suqs			! (MDOC_PBODY & m->flags)) {
326241675Suqs		mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG);
327241675Suqs		if (NULL == m->meta.msec)
328241675Suqs			m->meta.msec = mandoc_strdup("1");
329241675Suqs		if (NULL == m->meta.title)
330241675Suqs			m->meta.title = mandoc_strdup("UNKNOWN");
331241675Suqs		if (NULL == m->meta.vol)
332241675Suqs			m->meta.vol = mandoc_strdup("LOCAL");
333241675Suqs		if (NULL == m->meta.os)
334241675Suqs			m->meta.os = mandoc_strdup("LOCAL");
335241675Suqs		if (NULL == m->meta.date)
336241675Suqs			m->meta.date = mandoc_normdate
337241675Suqs				(m->parse, NULL, line, ppos);
338241675Suqs		m->flags |= MDOC_PBODY;
339241675Suqs	}
340241675Suqs
341241675Suqs	return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf));
342241675Suqs}
343241675Suqs
344241675Suqs
345241675Suqsstatic int
346241675Suqsnode_append(struct mdoc *mdoc, struct mdoc_node *p)
347241675Suqs{
348241675Suqs
349241675Suqs	assert(mdoc->last);
350241675Suqs	assert(mdoc->first);
351241675Suqs	assert(MDOC_ROOT != p->type);
352241675Suqs
353241675Suqs	switch (mdoc->next) {
354241675Suqs	case (MDOC_NEXT_SIBLING):
355241675Suqs		mdoc->last->next = p;
356241675Suqs		p->prev = mdoc->last;
357241675Suqs		p->parent = mdoc->last->parent;
358241675Suqs		break;
359241675Suqs	case (MDOC_NEXT_CHILD):
360241675Suqs		mdoc->last->child = p;
361241675Suqs		p->parent = mdoc->last;
362241675Suqs		break;
363241675Suqs	default:
364241675Suqs		abort();
365241675Suqs		/* NOTREACHED */
366241675Suqs	}
367241675Suqs
368241675Suqs	p->parent->nchild++;
369241675Suqs
370241675Suqs	/*
371241675Suqs	 * Copy over the normalised-data pointer of our parent.  Not
372241675Suqs	 * everybody has one, but copying a null pointer is fine.
373241675Suqs	 */
374241675Suqs
375241675Suqs	switch (p->type) {
376241675Suqs	case (MDOC_BODY):
377241675Suqs		/* FALLTHROUGH */
378241675Suqs	case (MDOC_TAIL):
379241675Suqs		/* FALLTHROUGH */
380241675Suqs	case (MDOC_HEAD):
381241675Suqs		p->norm = p->parent->norm;
382241675Suqs		break;
383241675Suqs	default:
384241675Suqs		break;
385241675Suqs	}
386241675Suqs
387241675Suqs	if ( ! mdoc_valid_pre(mdoc, p))
388241675Suqs		return(0);
389241675Suqs
390241675Suqs	switch (p->type) {
391241675Suqs	case (MDOC_HEAD):
392241675Suqs		assert(MDOC_BLOCK == p->parent->type);
393241675Suqs		p->parent->head = p;
394241675Suqs		break;
395241675Suqs	case (MDOC_TAIL):
396241675Suqs		assert(MDOC_BLOCK == p->parent->type);
397241675Suqs		p->parent->tail = p;
398241675Suqs		break;
399241675Suqs	case (MDOC_BODY):
400241675Suqs		if (p->end)
401241675Suqs			break;
402241675Suqs		assert(MDOC_BLOCK == p->parent->type);
403241675Suqs		p->parent->body = p;
404241675Suqs		break;
405241675Suqs	default:
406241675Suqs		break;
407241675Suqs	}
408241675Suqs
409241675Suqs	mdoc->last = p;
410241675Suqs
411241675Suqs	switch (p->type) {
412241675Suqs	case (MDOC_TBL):
413241675Suqs		/* FALLTHROUGH */
414241675Suqs	case (MDOC_TEXT):
415241675Suqs		if ( ! mdoc_valid_post(mdoc))
416241675Suqs			return(0);
417241675Suqs		break;
418241675Suqs	default:
419241675Suqs		break;
420241675Suqs	}
421241675Suqs
422241675Suqs	return(1);
423241675Suqs}
424241675Suqs
425241675Suqs
426241675Suqsstatic struct mdoc_node *
427241675Suqsnode_alloc(struct mdoc *m, int line, int pos,
428241675Suqs		enum mdoct tok, enum mdoc_type type)
429241675Suqs{
430241675Suqs	struct mdoc_node *p;
431241675Suqs
432241675Suqs	p = mandoc_calloc(1, sizeof(struct mdoc_node));
433241675Suqs	p->sec = m->lastsec;
434241675Suqs	p->line = line;
435241675Suqs	p->pos = pos;
436241675Suqs	p->tok = tok;
437241675Suqs	p->type = type;
438241675Suqs
439241675Suqs	/* Flag analysis. */
440241675Suqs
441241675Suqs	if (MDOC_SYNOPSIS & m->flags)
442241675Suqs		p->flags |= MDOC_SYNPRETTY;
443241675Suqs	else
444241675Suqs		p->flags &= ~MDOC_SYNPRETTY;
445241675Suqs	if (MDOC_NEWLINE & m->flags)
446241675Suqs		p->flags |= MDOC_LINE;
447241675Suqs	m->flags &= ~MDOC_NEWLINE;
448241675Suqs
449241675Suqs	return(p);
450241675Suqs}
451241675Suqs
452241675Suqs
453241675Suqsint
454241675Suqsmdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
455241675Suqs{
456241675Suqs	struct mdoc_node *p;
457241675Suqs
458241675Suqs	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
459241675Suqs	if ( ! node_append(m, p))
460241675Suqs		return(0);
461241675Suqs	m->next = MDOC_NEXT_CHILD;
462241675Suqs	return(1);
463241675Suqs}
464241675Suqs
465241675Suqs
466241675Suqsint
467241675Suqsmdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
468241675Suqs{
469241675Suqs	struct mdoc_node *p;
470241675Suqs
471241675Suqs	assert(m->first);
472241675Suqs	assert(m->last);
473241675Suqs
474241675Suqs	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
475241675Suqs	if ( ! node_append(m, p))
476241675Suqs		return(0);
477241675Suqs	m->next = MDOC_NEXT_CHILD;
478241675Suqs	return(1);
479241675Suqs}
480241675Suqs
481241675Suqs
482241675Suqsint
483241675Suqsmdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
484241675Suqs{
485241675Suqs	struct mdoc_node *p;
486241675Suqs
487241675Suqs	p = node_alloc(m, line, pos, tok, MDOC_BODY);
488241675Suqs	if ( ! node_append(m, p))
489241675Suqs		return(0);
490241675Suqs	m->next = MDOC_NEXT_CHILD;
491241675Suqs	return(1);
492241675Suqs}
493241675Suqs
494241675Suqs
495241675Suqsint
496241675Suqsmdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok,
497241675Suqs		struct mdoc_node *body, enum mdoc_endbody end)
498241675Suqs{
499241675Suqs	struct mdoc_node *p;
500241675Suqs
501241675Suqs	p = node_alloc(m, line, pos, tok, MDOC_BODY);
502241675Suqs	p->pending = body;
503241675Suqs	p->end = end;
504241675Suqs	if ( ! node_append(m, p))
505241675Suqs		return(0);
506241675Suqs	m->next = MDOC_NEXT_SIBLING;
507241675Suqs	return(1);
508241675Suqs}
509241675Suqs
510241675Suqs
511241675Suqsint
512241675Suqsmdoc_block_alloc(struct mdoc *m, int line, int pos,
513241675Suqs		enum mdoct tok, struct mdoc_arg *args)
514241675Suqs{
515241675Suqs	struct mdoc_node *p;
516241675Suqs
517241675Suqs	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
518241675Suqs	p->args = args;
519241675Suqs	if (p->args)
520241675Suqs		(args->refcnt)++;
521241675Suqs
522241675Suqs	switch (tok) {
523241675Suqs	case (MDOC_Bd):
524241675Suqs		/* FALLTHROUGH */
525241675Suqs	case (MDOC_Bf):
526241675Suqs		/* FALLTHROUGH */
527241675Suqs	case (MDOC_Bl):
528241675Suqs		/* FALLTHROUGH */
529241675Suqs	case (MDOC_Rs):
530241675Suqs		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
531241675Suqs		break;
532241675Suqs	default:
533241675Suqs		break;
534241675Suqs	}
535241675Suqs
536241675Suqs	if ( ! node_append(m, p))
537241675Suqs		return(0);
538241675Suqs	m->next = MDOC_NEXT_CHILD;
539241675Suqs	return(1);
540241675Suqs}
541241675Suqs
542241675Suqs
543241675Suqsint
544241675Suqsmdoc_elem_alloc(struct mdoc *m, int line, int pos,
545241675Suqs		enum mdoct tok, struct mdoc_arg *args)
546241675Suqs{
547241675Suqs	struct mdoc_node *p;
548241675Suqs
549241675Suqs	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
550241675Suqs	p->args = args;
551241675Suqs	if (p->args)
552241675Suqs		(args->refcnt)++;
553241675Suqs
554241675Suqs	switch (tok) {
555241675Suqs	case (MDOC_An):
556241675Suqs		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
557241675Suqs		break;
558241675Suqs	default:
559241675Suqs		break;
560241675Suqs	}
561241675Suqs
562241675Suqs	if ( ! node_append(m, p))
563241675Suqs		return(0);
564241675Suqs	m->next = MDOC_NEXT_CHILD;
565241675Suqs	return(1);
566241675Suqs}
567241675Suqs
568241675Suqsint
569241675Suqsmdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
570241675Suqs{
571241675Suqs	struct mdoc_node *n;
572241675Suqs
573241675Suqs	n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT);
574241675Suqs	n->string = roff_strdup(m->roff, p);
575241675Suqs
576241675Suqs	if ( ! node_append(m, n))
577241675Suqs		return(0);
578241675Suqs
579241675Suqs	m->next = MDOC_NEXT_SIBLING;
580241675Suqs	return(1);
581241675Suqs}
582241675Suqs
583241675Suqs
584241675Suqsstatic void
585241675Suqsmdoc_node_free(struct mdoc_node *p)
586241675Suqs{
587241675Suqs
588241675Suqs	if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
589241675Suqs		free(p->norm);
590241675Suqs	if (p->string)
591241675Suqs		free(p->string);
592241675Suqs	if (p->args)
593241675Suqs		mdoc_argv_free(p->args);
594241675Suqs	free(p);
595241675Suqs}
596241675Suqs
597241675Suqs
598241675Suqsstatic void
599241675Suqsmdoc_node_unlink(struct mdoc *m, struct mdoc_node *n)
600241675Suqs{
601241675Suqs
602241675Suqs	/* Adjust siblings. */
603241675Suqs
604241675Suqs	if (n->prev)
605241675Suqs		n->prev->next = n->next;
606241675Suqs	if (n->next)
607241675Suqs		n->next->prev = n->prev;
608241675Suqs
609241675Suqs	/* Adjust parent. */
610241675Suqs
611241675Suqs	if (n->parent) {
612241675Suqs		n->parent->nchild--;
613241675Suqs		if (n->parent->child == n)
614241675Suqs			n->parent->child = n->prev ? n->prev : n->next;
615241675Suqs		if (n->parent->last == n)
616241675Suqs			n->parent->last = n->prev ? n->prev : NULL;
617241675Suqs	}
618241675Suqs
619241675Suqs	/* Adjust parse point, if applicable. */
620241675Suqs
621241675Suqs	if (m && m->last == n) {
622241675Suqs		if (n->prev) {
623241675Suqs			m->last = n->prev;
624241675Suqs			m->next = MDOC_NEXT_SIBLING;
625241675Suqs		} else {
626241675Suqs			m->last = n->parent;
627241675Suqs			m->next = MDOC_NEXT_CHILD;
628241675Suqs		}
629241675Suqs	}
630241675Suqs
631241675Suqs	if (m && m->first == n)
632241675Suqs		m->first = NULL;
633241675Suqs}
634241675Suqs
635241675Suqs
636241675Suqsvoid
637241675Suqsmdoc_node_delete(struct mdoc *m, struct mdoc_node *p)
638241675Suqs{
639241675Suqs
640241675Suqs	while (p->child) {
641241675Suqs		assert(p->nchild);
642241675Suqs		mdoc_node_delete(m, p->child);
643241675Suqs	}
644241675Suqs	assert(0 == p->nchild);
645241675Suqs
646241675Suqs	mdoc_node_unlink(m, p);
647241675Suqs	mdoc_node_free(p);
648241675Suqs}
649241675Suqs
650241675Suqs#if 0
651241675Suqs/*
652241675Suqs * Pre-treat a text line.
653241675Suqs * Text lines can consist of equations, which must be handled apart from
654241675Suqs * the regular text.
655241675Suqs * Thus, use this function to step through a line checking if it has any
656241675Suqs * equations embedded in it.
657241675Suqs * This must handle multiple equations AND equations that do not end at
658241675Suqs * the end-of-line, i.e., will re-enter in the next roff parse.
659241675Suqs */
660241675Suqsstatic int
661241675Suqsmdoc_preptext(struct mdoc *m, int line, char *buf, int offs)
662241675Suqs{
663241675Suqs	char		*start, *end;
664241675Suqs	char		 delim;
665241675Suqs
666241675Suqs	while ('\0' != buf[offs]) {
667241675Suqs		/* Mark starting position if eqn is set. */
668241675Suqs		start = NULL;
669241675Suqs		if ('\0' != (delim = roff_eqndelim(m->roff)))
670241675Suqs			if (NULL != (start = strchr(buf + offs, delim)))
671241675Suqs				*start++ = '\0';
672241675Suqs
673241675Suqs		/* Parse text as normal. */
674241675Suqs		if ( ! mdoc_ptext(m, line, buf, offs))
675241675Suqs			return(0);
676241675Suqs
677241675Suqs		/* Continue only if an equation exists. */
678241675Suqs		if (NULL == start)
679241675Suqs			break;
680241675Suqs
681241675Suqs		/* Read past the end of the equation. */
682241675Suqs		offs += start - (buf + offs);
683241675Suqs		assert(start == &buf[offs]);
684241675Suqs		if (NULL != (end = strchr(buf + offs, delim))) {
685241675Suqs			*end++ = '\0';
686241675Suqs			while (' ' == *end)
687241675Suqs				end++;
688241675Suqs		}
689241675Suqs
690241675Suqs		/* Parse the equation itself. */
691241675Suqs		roff_openeqn(m->roff, NULL, line, offs, buf);
692241675Suqs
693241675Suqs		/* Process a finished equation? */
694241675Suqs		if (roff_closeeqn(m->roff))
695241675Suqs			if ( ! mdoc_addeqn(m, roff_eqn(m->roff)))
696241675Suqs				return(0);
697241675Suqs		offs += (end - (buf + offs));
698241675Suqs	}
699241675Suqs
700241675Suqs	return(1);
701241675Suqs}
702241675Suqs#endif
703241675Suqs
704241675Suqs/*
705241675Suqs * Parse free-form text, that is, a line that does not begin with the
706241675Suqs * control character.
707241675Suqs */
708241675Suqsstatic int
709241675Suqsmdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
710241675Suqs{
711241675Suqs	char		 *c, *ws, *end;
712241675Suqs	struct mdoc_node *n;
713241675Suqs
714241675Suqs	/* No text before an initial macro. */
715241675Suqs
716241675Suqs	if (SEC_NONE == m->lastnamed) {
717241675Suqs		mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT);
718241675Suqs		return(1);
719241675Suqs	}
720241675Suqs
721241675Suqs	assert(m->last);
722241675Suqs	n = m->last;
723241675Suqs
724241675Suqs	/*
725241675Suqs	 * Divert directly to list processing if we're encountering a
726241675Suqs	 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
727241675Suqs	 * (a MDOC_BODY means it's already open, in which case we should
728241675Suqs	 * process within its context in the normal way).
729241675Suqs	 */
730241675Suqs
731241675Suqs	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
732241675Suqs			LIST_column == n->norm->Bl.type) {
733241675Suqs		/* `Bl' is open without any children. */
734241675Suqs		m->flags |= MDOC_FREECOL;
735241675Suqs		return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf));
736241675Suqs	}
737241675Suqs
738241675Suqs	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
739241675Suqs			NULL != n->parent &&
740241675Suqs			MDOC_Bl == n->parent->tok &&
741241675Suqs			LIST_column == n->parent->norm->Bl.type) {
742241675Suqs		/* `Bl' has block-level `It' children. */
743241675Suqs		m->flags |= MDOC_FREECOL;
744241675Suqs		return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf));
745241675Suqs	}
746241675Suqs
747241675Suqs	/*
748241675Suqs	 * Search for the beginning of unescaped trailing whitespace (ws)
749241675Suqs	 * and for the first character not to be output (end).
750241675Suqs	 */
751241675Suqs
752241675Suqs	/* FIXME: replace with strcspn(). */
753241675Suqs	ws = NULL;
754241675Suqs	for (c = end = buf + offs; *c; c++) {
755241675Suqs		switch (*c) {
756241675Suqs		case ' ':
757241675Suqs			if (NULL == ws)
758241675Suqs				ws = c;
759241675Suqs			continue;
760241675Suqs		case '\t':
761241675Suqs			/*
762241675Suqs			 * Always warn about trailing tabs,
763241675Suqs			 * even outside literal context,
764241675Suqs			 * where they should be put on the next line.
765241675Suqs			 */
766241675Suqs			if (NULL == ws)
767241675Suqs				ws = c;
768241675Suqs			/*
769241675Suqs			 * Strip trailing tabs in literal context only;
770241675Suqs			 * outside, they affect the next line.
771241675Suqs			 */
772241675Suqs			if (MDOC_LITERAL & m->flags)
773241675Suqs				continue;
774241675Suqs			break;
775241675Suqs		case '\\':
776241675Suqs			/* Skip the escaped character, too, if any. */
777241675Suqs			if (c[1])
778241675Suqs				c++;
779241675Suqs			/* FALLTHROUGH */
780241675Suqs		default:
781241675Suqs			ws = NULL;
782241675Suqs			break;
783241675Suqs		}
784241675Suqs		end = c + 1;
785241675Suqs	}
786241675Suqs	*end = '\0';
787241675Suqs
788241675Suqs	if (ws)
789241675Suqs		mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE);
790241675Suqs
791241675Suqs	if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) {
792241675Suqs		mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN);
793241675Suqs
794241675Suqs		/*
795241675Suqs		 * Insert a `sp' in the case of a blank line.  Technically,
796241675Suqs		 * blank lines aren't allowed, but enough manuals assume this
797241675Suqs		 * behaviour that we want to work around it.
798241675Suqs		 */
799241675Suqs		if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL))
800241675Suqs			return(0);
801241675Suqs
802241675Suqs		m->next = MDOC_NEXT_SIBLING;
803241675Suqs		return(1);
804241675Suqs	}
805241675Suqs
806241675Suqs	if ( ! mdoc_word_alloc(m, line, offs, buf+offs))
807241675Suqs		return(0);
808241675Suqs
809241675Suqs	if (MDOC_LITERAL & m->flags)
810241675Suqs		return(1);
811241675Suqs
812241675Suqs	/*
813241675Suqs	 * End-of-sentence check.  If the last character is an unescaped
814241675Suqs	 * EOS character, then flag the node as being the end of a
815241675Suqs	 * sentence.  The front-end will know how to interpret this.
816241675Suqs	 */
817241675Suqs
818241675Suqs	assert(buf < end);
819241675Suqs
820241675Suqs	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0))
821241675Suqs		m->last->flags |= MDOC_EOS;
822241675Suqs
823241675Suqs	return(1);
824241675Suqs}
825241675Suqs
826241675Suqs
827241675Suqs/*
828241675Suqs * Parse a macro line, that is, a line beginning with the control
829241675Suqs * character.
830241675Suqs */
831241675Suqsstatic int
832241675Suqsmdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs)
833241675Suqs{
834241675Suqs	enum mdoct	  tok;
835241675Suqs	int		  i, sv;
836241675Suqs	char		  mac[5];
837241675Suqs	struct mdoc_node *n;
838241675Suqs
839241675Suqs	/* Empty post-control lines are ignored. */
840241675Suqs
841241675Suqs	if ('"' == buf[offs]) {
842241675Suqs		mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
843241675Suqs		return(1);
844241675Suqs	} else if ('\0' == buf[offs])
845241675Suqs		return(1);
846241675Suqs
847241675Suqs	sv = offs;
848241675Suqs
849241675Suqs	/*
850241675Suqs	 * Copy the first word into a nil-terminated buffer.
851241675Suqs	 * Stop copying when a tab, space, or eoln is encountered.
852241675Suqs	 */
853241675Suqs
854241675Suqs	i = 0;
855241675Suqs	while (i < 4 && '\0' != buf[offs] &&
856241675Suqs			' ' != buf[offs] && '\t' != buf[offs])
857241675Suqs		mac[i++] = buf[offs++];
858241675Suqs
859241675Suqs	mac[i] = '\0';
860241675Suqs
861241675Suqs	tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
862241675Suqs
863241675Suqs	if (MDOC_MAX == tok) {
864241675Suqs		mandoc_vmsg(MANDOCERR_MACRO, m->parse,
865241675Suqs				ln, sv, "%s", buf + sv - 1);
866241675Suqs		return(1);
867241675Suqs	}
868241675Suqs
869241675Suqs	/* Disregard the first trailing tab, if applicable. */
870241675Suqs
871241675Suqs	if ('\t' == buf[offs])
872241675Suqs		offs++;
873241675Suqs
874241675Suqs	/* Jump to the next non-whitespace word. */
875241675Suqs
876241675Suqs	while (buf[offs] && ' ' == buf[offs])
877241675Suqs		offs++;
878241675Suqs
879241675Suqs	/*
880241675Suqs	 * Trailing whitespace.  Note that tabs are allowed to be passed
881241675Suqs	 * into the parser as "text", so we only warn about spaces here.
882241675Suqs	 */
883241675Suqs
884241675Suqs	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
885241675Suqs		mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
886241675Suqs
887241675Suqs	/*
888241675Suqs	 * If an initial macro or a list invocation, divert directly
889241675Suqs	 * into macro processing.
890241675Suqs	 */
891241675Suqs
892241675Suqs	if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) {
893241675Suqs		if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf))
894241675Suqs			goto err;
895241675Suqs		return(1);
896241675Suqs	}
897241675Suqs
898241675Suqs	n = m->last;
899241675Suqs	assert(m->last);
900241675Suqs
901241675Suqs	/*
902241675Suqs	 * If the first macro of a `Bl -column', open an `It' block
903241675Suqs	 * context around the parsed macro.
904241675Suqs	 */
905241675Suqs
906241675Suqs	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
907241675Suqs			LIST_column == n->norm->Bl.type) {
908241675Suqs		m->flags |= MDOC_FREECOL;
909241675Suqs		if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf))
910241675Suqs			goto err;
911241675Suqs		return(1);
912241675Suqs	}
913241675Suqs
914241675Suqs	/*
915241675Suqs	 * If we're following a block-level `It' within a `Bl -column'
916241675Suqs	 * context (perhaps opened in the above block or in ptext()),
917241675Suqs	 * then open an `It' block context around the parsed macro.
918241675Suqs	 */
919241675Suqs
920241675Suqs	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
921241675Suqs			NULL != n->parent &&
922241675Suqs			MDOC_Bl == n->parent->tok &&
923241675Suqs			LIST_column == n->parent->norm->Bl.type) {
924241675Suqs		m->flags |= MDOC_FREECOL;
925241675Suqs		if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf))
926241675Suqs			goto err;
927241675Suqs		return(1);
928241675Suqs	}
929241675Suqs
930241675Suqs	/* Normal processing of a macro. */
931241675Suqs
932241675Suqs	if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf))
933241675Suqs		goto err;
934241675Suqs
935241675Suqs	return(1);
936241675Suqs
937241675Suqserr:	/* Error out. */
938241675Suqs
939241675Suqs	m->flags |= MDOC_HALT;
940241675Suqs	return(0);
941241675Suqs}
942241675Suqs
943241675Suqsenum mdelim
944241675Suqsmdoc_isdelim(const char *p)
945241675Suqs{
946241675Suqs
947241675Suqs	if ('\0' == p[0])
948241675Suqs		return(DELIM_NONE);
949241675Suqs
950241675Suqs	if ('\0' == p[1])
951241675Suqs		switch (p[0]) {
952241675Suqs		case('('):
953241675Suqs			/* FALLTHROUGH */
954241675Suqs		case('['):
955241675Suqs			return(DELIM_OPEN);
956241675Suqs		case('|'):
957241675Suqs			return(DELIM_MIDDLE);
958241675Suqs		case('.'):
959241675Suqs			/* FALLTHROUGH */
960241675Suqs		case(','):
961241675Suqs			/* FALLTHROUGH */
962241675Suqs		case(';'):
963241675Suqs			/* FALLTHROUGH */
964241675Suqs		case(':'):
965241675Suqs			/* FALLTHROUGH */
966241675Suqs		case('?'):
967241675Suqs			/* FALLTHROUGH */
968241675Suqs		case('!'):
969241675Suqs			/* FALLTHROUGH */
970241675Suqs		case(')'):
971241675Suqs			/* FALLTHROUGH */
972241675Suqs		case(']'):
973241675Suqs			return(DELIM_CLOSE);
974241675Suqs		default:
975241675Suqs			return(DELIM_NONE);
976241675Suqs		}
977241675Suqs
978241675Suqs	if ('\\' != p[0])
979241675Suqs		return(DELIM_NONE);
980241675Suqs
981241675Suqs	if (0 == strcmp(p + 1, "."))
982241675Suqs		return(DELIM_CLOSE);
983241675Suqs	if (0 == strcmp(p + 1, "*(Ba"))
984241675Suqs		return(DELIM_MIDDLE);
985241675Suqs
986241675Suqs	return(DELIM_NONE);
987241675Suqs}
988