mdoc.c revision 261344
1261344Suqs/*	$Id: mdoc.c,v 1.206 2013/12/24 19:11:46 schwarze Exp $ */
2241675Suqs/*
3241675Suqs * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4261344Suqs * Copyright (c) 2010, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5241675Suqs *
6241675Suqs * Permission to use, copy, modify, and distribute this software for any
7241675Suqs * purpose with or without fee is hereby granted, provided that the above
8241675Suqs * copyright notice and this permission notice appear in all copies.
9241675Suqs *
10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17241675Suqs */
18241675Suqs#ifdef HAVE_CONFIG_H
19241675Suqs#include "config.h"
20241675Suqs#endif
21241675Suqs
22241675Suqs#include <sys/types.h>
23241675Suqs
24241675Suqs#include <assert.h>
25241675Suqs#include <stdarg.h>
26241675Suqs#include <stdio.h>
27241675Suqs#include <stdlib.h>
28241675Suqs#include <string.h>
29241675Suqs#include <time.h>
30241675Suqs
31241675Suqs#include "mdoc.h"
32241675Suqs#include "mandoc.h"
33241675Suqs#include "libmdoc.h"
34241675Suqs#include "libmandoc.h"
35241675Suqs
36241675Suqsconst	char *const __mdoc_macronames[MDOC_MAX] = {
37241675Suqs	"Ap",		"Dd",		"Dt",		"Os",
38241675Suqs	"Sh",		"Ss",		"Pp",		"D1",
39241675Suqs	"Dl",		"Bd",		"Ed",		"Bl",
40241675Suqs	"El",		"It",		"Ad",		"An",
41241675Suqs	"Ar",		"Cd",		"Cm",		"Dv",
42241675Suqs	"Er",		"Ev",		"Ex",		"Fa",
43241675Suqs	"Fd",		"Fl",		"Fn",		"Ft",
44241675Suqs	"Ic",		"In",		"Li",		"Nd",
45241675Suqs	"Nm",		"Op",		"Ot",		"Pa",
46241675Suqs	"Rv",		"St",		"Va",		"Vt",
47241675Suqs	/* LINTED */
48241675Suqs	"Xr",		"%A",		"%B",		"%D",
49241675Suqs	/* LINTED */
50241675Suqs	"%I",		"%J",		"%N",		"%O",
51241675Suqs	/* LINTED */
52241675Suqs	"%P",		"%R",		"%T",		"%V",
53241675Suqs	"Ac",		"Ao",		"Aq",		"At",
54241675Suqs	"Bc",		"Bf",		"Bo",		"Bq",
55241675Suqs	"Bsx",		"Bx",		"Db",		"Dc",
56241675Suqs	"Do",		"Dq",		"Ec",		"Ef",
57241675Suqs	"Em",		"Eo",		"Fx",		"Ms",
58241675Suqs	"No",		"Ns",		"Nx",		"Ox",
59241675Suqs	"Pc",		"Pf",		"Po",		"Pq",
60241675Suqs	"Qc",		"Ql",		"Qo",		"Qq",
61241675Suqs	"Re",		"Rs",		"Sc",		"So",
62241675Suqs	"Sq",		"Sm",		"Sx",		"Sy",
63241675Suqs	"Tn",		"Ux",		"Xc",		"Xo",
64241675Suqs	"Fo",		"Fc",		"Oo",		"Oc",
65241675Suqs	"Bk",		"Ek",		"Bt",		"Hf",
66241675Suqs	"Fr",		"Ud",		"Lb",		"Lp",
67241675Suqs	"Lk",		"Mt",		"Brq",		"Bro",
68241675Suqs	/* LINTED */
69241675Suqs	"Brc",		"%C",		"Es",		"En",
70241675Suqs	/* LINTED */
71241675Suqs	"Dx",		"%Q",		"br",		"sp",
72241675Suqs	/* LINTED */
73241675Suqs	"%U",		"Ta"
74241675Suqs	};
75241675Suqs
76241675Suqsconst	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
77241675Suqs	"split",		"nosplit",		"ragged",
78241675Suqs	"unfilled",		"literal",		"file",
79241675Suqs	"offset",		"bullet",		"dash",
80241675Suqs	"hyphen",		"item",			"enum",
81241675Suqs	"tag",			"diag",			"hang",
82241675Suqs	"ohang",		"inset",		"column",
83241675Suqs	"width",		"compact",		"std",
84241675Suqs	"filled",		"words",		"emphasis",
85241675Suqs	"symbolic",		"nested",		"centered"
86241675Suqs	};
87241675Suqs
88241675Suqsconst	char * const *mdoc_macronames = __mdoc_macronames;
89241675Suqsconst	char * const *mdoc_argnames = __mdoc_argnames;
90241675Suqs
91241675Suqsstatic	void		  mdoc_node_free(struct mdoc_node *);
92241675Suqsstatic	void		  mdoc_node_unlink(struct mdoc *,
93241675Suqs				struct mdoc_node *);
94241675Suqsstatic	void		  mdoc_free1(struct mdoc *);
95241675Suqsstatic	void		  mdoc_alloc1(struct mdoc *);
96241675Suqsstatic	struct mdoc_node *node_alloc(struct mdoc *, int, int,
97241675Suqs				enum mdoct, enum mdoc_type);
98241675Suqsstatic	int		  node_append(struct mdoc *,
99241675Suqs				struct mdoc_node *);
100241675Suqs#if 0
101241675Suqsstatic	int		  mdoc_preptext(struct mdoc *, int, char *, int);
102241675Suqs#endif
103241675Suqsstatic	int		  mdoc_ptext(struct mdoc *, int, char *, int);
104241675Suqsstatic	int		  mdoc_pmacro(struct mdoc *, int, char *, int);
105241675Suqs
106241675Suqsconst struct mdoc_node *
107261344Suqsmdoc_node(const struct mdoc *mdoc)
108241675Suqs{
109241675Suqs
110261344Suqs	assert( ! (MDOC_HALT & mdoc->flags));
111261344Suqs	return(mdoc->first);
112241675Suqs}
113241675Suqs
114241675Suqs
115241675Suqsconst struct mdoc_meta *
116261344Suqsmdoc_meta(const struct mdoc *mdoc)
117241675Suqs{
118241675Suqs
119261344Suqs	assert( ! (MDOC_HALT & mdoc->flags));
120261344Suqs	return(&mdoc->meta);
121241675Suqs}
122241675Suqs
123241675Suqs
124241675Suqs/*
125241675Suqs * Frees volatile resources (parse tree, meta-data, fields).
126241675Suqs */
127241675Suqsstatic void
128241675Suqsmdoc_free1(struct mdoc *mdoc)
129241675Suqs{
130241675Suqs
131241675Suqs	if (mdoc->first)
132241675Suqs		mdoc_node_delete(mdoc, mdoc->first);
133241675Suqs	if (mdoc->meta.title)
134241675Suqs		free(mdoc->meta.title);
135241675Suqs	if (mdoc->meta.os)
136241675Suqs		free(mdoc->meta.os);
137241675Suqs	if (mdoc->meta.name)
138241675Suqs		free(mdoc->meta.name);
139241675Suqs	if (mdoc->meta.arch)
140241675Suqs		free(mdoc->meta.arch);
141241675Suqs	if (mdoc->meta.vol)
142241675Suqs		free(mdoc->meta.vol);
143241675Suqs	if (mdoc->meta.msec)
144241675Suqs		free(mdoc->meta.msec);
145241675Suqs	if (mdoc->meta.date)
146241675Suqs		free(mdoc->meta.date);
147241675Suqs}
148241675Suqs
149241675Suqs
150241675Suqs/*
151241675Suqs * Allocate all volatile resources (parse tree, meta-data, fields).
152241675Suqs */
153241675Suqsstatic void
154241675Suqsmdoc_alloc1(struct mdoc *mdoc)
155241675Suqs{
156241675Suqs
157241675Suqs	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
158241675Suqs	mdoc->flags = 0;
159241675Suqs	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
160241675Suqs	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
161241675Suqs	mdoc->first = mdoc->last;
162241675Suqs	mdoc->last->type = MDOC_ROOT;
163241675Suqs	mdoc->last->tok = MDOC_MAX;
164241675Suqs	mdoc->next = MDOC_NEXT_CHILD;
165241675Suqs}
166241675Suqs
167241675Suqs
168241675Suqs/*
169241675Suqs * Free up volatile resources (see mdoc_free1()) then re-initialises the
170241675Suqs * data with mdoc_alloc1().  After invocation, parse data has been reset
171241675Suqs * and the parser is ready for re-invocation on a new tree; however,
172241675Suqs * cross-parse non-volatile data is kept intact.
173241675Suqs */
174241675Suqsvoid
175241675Suqsmdoc_reset(struct mdoc *mdoc)
176241675Suqs{
177241675Suqs
178241675Suqs	mdoc_free1(mdoc);
179241675Suqs	mdoc_alloc1(mdoc);
180241675Suqs}
181241675Suqs
182241675Suqs
183241675Suqs/*
184241675Suqs * Completely free up all volatile and non-volatile parse resources.
185241675Suqs * After invocation, the pointer is no longer usable.
186241675Suqs */
187241675Suqsvoid
188241675Suqsmdoc_free(struct mdoc *mdoc)
189241675Suqs{
190241675Suqs
191241675Suqs	mdoc_free1(mdoc);
192241675Suqs	free(mdoc);
193241675Suqs}
194241675Suqs
195241675Suqs
196241675Suqs/*
197241675Suqs * Allocate volatile and non-volatile parse resources.
198241675Suqs */
199241675Suqsstruct mdoc *
200261344Suqsmdoc_alloc(struct roff *roff, struct mparse *parse, char *defos)
201241675Suqs{
202241675Suqs	struct mdoc	*p;
203241675Suqs
204241675Suqs	p = mandoc_calloc(1, sizeof(struct mdoc));
205241675Suqs
206241675Suqs	p->parse = parse;
207261344Suqs	p->defos = defos;
208241675Suqs	p->roff = roff;
209241675Suqs
210241675Suqs	mdoc_hash_init();
211241675Suqs	mdoc_alloc1(p);
212241675Suqs	return(p);
213241675Suqs}
214241675Suqs
215241675Suqs
216241675Suqs/*
217241675Suqs * Climb back up the parse tree, validating open scopes.  Mostly calls
218241675Suqs * through to macro_end() in macro.c.
219241675Suqs */
220241675Suqsint
221261344Suqsmdoc_endparse(struct mdoc *mdoc)
222241675Suqs{
223241675Suqs
224261344Suqs	assert( ! (MDOC_HALT & mdoc->flags));
225261344Suqs	if (mdoc_macroend(mdoc))
226241675Suqs		return(1);
227261344Suqs	mdoc->flags |= MDOC_HALT;
228241675Suqs	return(0);
229241675Suqs}
230241675Suqs
231241675Suqsint
232261344Suqsmdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep)
233241675Suqs{
234241675Suqs	struct mdoc_node *n;
235241675Suqs
236261344Suqs	assert( ! (MDOC_HALT & mdoc->flags));
237241675Suqs
238241675Suqs	/* No text before an initial macro. */
239241675Suqs
240261344Suqs	if (SEC_NONE == mdoc->lastnamed) {
241261344Suqs		mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT);
242241675Suqs		return(1);
243241675Suqs	}
244241675Suqs
245261344Suqs	n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN);
246241675Suqs	n->eqn = ep;
247241675Suqs
248261344Suqs	if ( ! node_append(mdoc, n))
249241675Suqs		return(0);
250241675Suqs
251261344Suqs	mdoc->next = MDOC_NEXT_SIBLING;
252241675Suqs	return(1);
253241675Suqs}
254241675Suqs
255241675Suqsint
256261344Suqsmdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp)
257241675Suqs{
258241675Suqs	struct mdoc_node *n;
259241675Suqs
260261344Suqs	assert( ! (MDOC_HALT & mdoc->flags));
261241675Suqs
262241675Suqs	/* No text before an initial macro. */
263241675Suqs
264261344Suqs	if (SEC_NONE == mdoc->lastnamed) {
265261344Suqs		mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT);
266241675Suqs		return(1);
267241675Suqs	}
268241675Suqs
269261344Suqs	n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL);
270241675Suqs	n->span = sp;
271241675Suqs
272261344Suqs	if ( ! node_append(mdoc, n))
273241675Suqs		return(0);
274241675Suqs
275261344Suqs	mdoc->next = MDOC_NEXT_SIBLING;
276241675Suqs	return(1);
277241675Suqs}
278241675Suqs
279241675Suqs
280241675Suqs/*
281241675Suqs * Main parse routine.  Parses a single line -- really just hands off to
282241675Suqs * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
283241675Suqs */
284241675Suqsint
285261344Suqsmdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs)
286241675Suqs{
287241675Suqs
288261344Suqs	assert( ! (MDOC_HALT & mdoc->flags));
289241675Suqs
290261344Suqs	mdoc->flags |= MDOC_NEWLINE;
291241675Suqs
292241675Suqs	/*
293241675Suqs	 * Let the roff nS register switch SYNOPSIS mode early,
294241675Suqs	 * such that the parser knows at all times
295241675Suqs	 * whether this mode is on or off.
296241675Suqs	 * Note that this mode is also switched by the Sh macro.
297241675Suqs	 */
298261344Suqs	if (roff_getreg(mdoc->roff, "nS"))
299261344Suqs		mdoc->flags |= MDOC_SYNOPSIS;
300261344Suqs	else
301261344Suqs		mdoc->flags &= ~MDOC_SYNOPSIS;
302241675Suqs
303261344Suqs	return(roff_getcontrol(mdoc->roff, buf, &offs) ?
304261344Suqs			mdoc_pmacro(mdoc, ln, buf, offs) :
305261344Suqs			mdoc_ptext(mdoc, ln, buf, offs));
306241675Suqs}
307241675Suqs
308241675Suqsint
309241675Suqsmdoc_macro(MACRO_PROT_ARGS)
310241675Suqs{
311241675Suqs	assert(tok < MDOC_MAX);
312241675Suqs
313241675Suqs	/* If we're in the body, deny prologue calls. */
314241675Suqs
315241675Suqs	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
316261344Suqs			MDOC_PBODY & mdoc->flags) {
317261344Suqs		mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY);
318241675Suqs		return(1);
319241675Suqs	}
320241675Suqs
321241675Suqs	/* If we're in the prologue, deny "body" macros.  */
322241675Suqs
323241675Suqs	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
324261344Suqs			! (MDOC_PBODY & mdoc->flags)) {
325261344Suqs		mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG);
326261344Suqs		if (NULL == mdoc->meta.msec)
327261344Suqs			mdoc->meta.msec = mandoc_strdup("1");
328261344Suqs		if (NULL == mdoc->meta.title)
329261344Suqs			mdoc->meta.title = mandoc_strdup("UNKNOWN");
330261344Suqs		if (NULL == mdoc->meta.vol)
331261344Suqs			mdoc->meta.vol = mandoc_strdup("LOCAL");
332261344Suqs		if (NULL == mdoc->meta.os)
333261344Suqs			mdoc->meta.os = mandoc_strdup("LOCAL");
334261344Suqs		if (NULL == mdoc->meta.date)
335261344Suqs			mdoc->meta.date = mandoc_normdate
336261344Suqs				(mdoc->parse, NULL, line, ppos);
337261344Suqs		mdoc->flags |= MDOC_PBODY;
338241675Suqs	}
339241675Suqs
340261344Suqs	return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf));
341241675Suqs}
342241675Suqs
343241675Suqs
344241675Suqsstatic int
345241675Suqsnode_append(struct mdoc *mdoc, struct mdoc_node *p)
346241675Suqs{
347241675Suqs
348241675Suqs	assert(mdoc->last);
349241675Suqs	assert(mdoc->first);
350241675Suqs	assert(MDOC_ROOT != p->type);
351241675Suqs
352241675Suqs	switch (mdoc->next) {
353241675Suqs	case (MDOC_NEXT_SIBLING):
354241675Suqs		mdoc->last->next = p;
355241675Suqs		p->prev = mdoc->last;
356241675Suqs		p->parent = mdoc->last->parent;
357241675Suqs		break;
358241675Suqs	case (MDOC_NEXT_CHILD):
359241675Suqs		mdoc->last->child = p;
360241675Suqs		p->parent = mdoc->last;
361241675Suqs		break;
362241675Suqs	default:
363241675Suqs		abort();
364241675Suqs		/* NOTREACHED */
365241675Suqs	}
366241675Suqs
367241675Suqs	p->parent->nchild++;
368241675Suqs
369241675Suqs	/*
370241675Suqs	 * Copy over the normalised-data pointer of our parent.  Not
371241675Suqs	 * everybody has one, but copying a null pointer is fine.
372241675Suqs	 */
373241675Suqs
374241675Suqs	switch (p->type) {
375241675Suqs	case (MDOC_BODY):
376261344Suqs		if (ENDBODY_NOT != p->end)
377261344Suqs			break;
378241675Suqs		/* FALLTHROUGH */
379241675Suqs	case (MDOC_TAIL):
380241675Suqs		/* FALLTHROUGH */
381241675Suqs	case (MDOC_HEAD):
382241675Suqs		p->norm = p->parent->norm;
383241675Suqs		break;
384241675Suqs	default:
385241675Suqs		break;
386241675Suqs	}
387241675Suqs
388241675Suqs	if ( ! mdoc_valid_pre(mdoc, p))
389241675Suqs		return(0);
390241675Suqs
391241675Suqs	switch (p->type) {
392241675Suqs	case (MDOC_HEAD):
393241675Suqs		assert(MDOC_BLOCK == p->parent->type);
394241675Suqs		p->parent->head = p;
395241675Suqs		break;
396241675Suqs	case (MDOC_TAIL):
397241675Suqs		assert(MDOC_BLOCK == p->parent->type);
398241675Suqs		p->parent->tail = p;
399241675Suqs		break;
400241675Suqs	case (MDOC_BODY):
401241675Suqs		if (p->end)
402241675Suqs			break;
403241675Suqs		assert(MDOC_BLOCK == p->parent->type);
404241675Suqs		p->parent->body = p;
405241675Suqs		break;
406241675Suqs	default:
407241675Suqs		break;
408241675Suqs	}
409241675Suqs
410241675Suqs	mdoc->last = p;
411241675Suqs
412241675Suqs	switch (p->type) {
413241675Suqs	case (MDOC_TBL):
414241675Suqs		/* FALLTHROUGH */
415241675Suqs	case (MDOC_TEXT):
416241675Suqs		if ( ! mdoc_valid_post(mdoc))
417241675Suqs			return(0);
418241675Suqs		break;
419241675Suqs	default:
420241675Suqs		break;
421241675Suqs	}
422241675Suqs
423241675Suqs	return(1);
424241675Suqs}
425241675Suqs
426241675Suqs
427241675Suqsstatic struct mdoc_node *
428261344Suqsnode_alloc(struct mdoc *mdoc, int line, int pos,
429241675Suqs		enum mdoct tok, enum mdoc_type type)
430241675Suqs{
431241675Suqs	struct mdoc_node *p;
432241675Suqs
433241675Suqs	p = mandoc_calloc(1, sizeof(struct mdoc_node));
434261344Suqs	p->sec = mdoc->lastsec;
435241675Suqs	p->line = line;
436241675Suqs	p->pos = pos;
437261344Suqs	p->lastline = line;
438241675Suqs	p->tok = tok;
439241675Suqs	p->type = type;
440241675Suqs
441241675Suqs	/* Flag analysis. */
442241675Suqs
443261344Suqs	if (MDOC_SYNOPSIS & mdoc->flags)
444241675Suqs		p->flags |= MDOC_SYNPRETTY;
445241675Suqs	else
446241675Suqs		p->flags &= ~MDOC_SYNPRETTY;
447261344Suqs	if (MDOC_NEWLINE & mdoc->flags)
448241675Suqs		p->flags |= MDOC_LINE;
449261344Suqs	mdoc->flags &= ~MDOC_NEWLINE;
450241675Suqs
451241675Suqs	return(p);
452241675Suqs}
453241675Suqs
454241675Suqs
455241675Suqsint
456261344Suqsmdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
457241675Suqs{
458241675Suqs	struct mdoc_node *p;
459241675Suqs
460261344Suqs	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
461261344Suqs	if ( ! node_append(mdoc, p))
462241675Suqs		return(0);
463261344Suqs	mdoc->next = MDOC_NEXT_CHILD;
464241675Suqs	return(1);
465241675Suqs}
466241675Suqs
467241675Suqs
468241675Suqsint
469261344Suqsmdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
470241675Suqs{
471241675Suqs	struct mdoc_node *p;
472241675Suqs
473261344Suqs	assert(mdoc->first);
474261344Suqs	assert(mdoc->last);
475241675Suqs
476261344Suqs	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
477261344Suqs	if ( ! node_append(mdoc, p))
478241675Suqs		return(0);
479261344Suqs	mdoc->next = MDOC_NEXT_CHILD;
480241675Suqs	return(1);
481241675Suqs}
482241675Suqs
483241675Suqs
484241675Suqsint
485261344Suqsmdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
486241675Suqs{
487241675Suqs	struct mdoc_node *p;
488241675Suqs
489261344Suqs	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
490261344Suqs	if ( ! node_append(mdoc, p))
491241675Suqs		return(0);
492261344Suqs	mdoc->next = MDOC_NEXT_CHILD;
493241675Suqs	return(1);
494241675Suqs}
495241675Suqs
496241675Suqs
497241675Suqsint
498261344Suqsmdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok,
499241675Suqs		struct mdoc_node *body, enum mdoc_endbody end)
500241675Suqs{
501241675Suqs	struct mdoc_node *p;
502241675Suqs
503261344Suqs	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
504241675Suqs	p->pending = body;
505261344Suqs	p->norm = body->norm;
506241675Suqs	p->end = end;
507261344Suqs	if ( ! node_append(mdoc, p))
508241675Suqs		return(0);
509261344Suqs	mdoc->next = MDOC_NEXT_SIBLING;
510241675Suqs	return(1);
511241675Suqs}
512241675Suqs
513241675Suqs
514241675Suqsint
515261344Suqsmdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
516241675Suqs		enum mdoct tok, struct mdoc_arg *args)
517241675Suqs{
518241675Suqs	struct mdoc_node *p;
519241675Suqs
520261344Suqs	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
521241675Suqs	p->args = args;
522241675Suqs	if (p->args)
523241675Suqs		(args->refcnt)++;
524241675Suqs
525241675Suqs	switch (tok) {
526241675Suqs	case (MDOC_Bd):
527241675Suqs		/* FALLTHROUGH */
528241675Suqs	case (MDOC_Bf):
529241675Suqs		/* FALLTHROUGH */
530241675Suqs	case (MDOC_Bl):
531241675Suqs		/* FALLTHROUGH */
532241675Suqs	case (MDOC_Rs):
533241675Suqs		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
534241675Suqs		break;
535241675Suqs	default:
536241675Suqs		break;
537241675Suqs	}
538241675Suqs
539261344Suqs	if ( ! node_append(mdoc, p))
540241675Suqs		return(0);
541261344Suqs	mdoc->next = MDOC_NEXT_CHILD;
542241675Suqs	return(1);
543241675Suqs}
544241675Suqs
545241675Suqs
546241675Suqsint
547261344Suqsmdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
548241675Suqs		enum mdoct tok, struct mdoc_arg *args)
549241675Suqs{
550241675Suqs	struct mdoc_node *p;
551241675Suqs
552261344Suqs	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
553241675Suqs	p->args = args;
554241675Suqs	if (p->args)
555241675Suqs		(args->refcnt)++;
556241675Suqs
557241675Suqs	switch (tok) {
558241675Suqs	case (MDOC_An):
559241675Suqs		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
560241675Suqs		break;
561241675Suqs	default:
562241675Suqs		break;
563241675Suqs	}
564241675Suqs
565261344Suqs	if ( ! node_append(mdoc, p))
566241675Suqs		return(0);
567261344Suqs	mdoc->next = MDOC_NEXT_CHILD;
568241675Suqs	return(1);
569241675Suqs}
570241675Suqs
571241675Suqsint
572261344Suqsmdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p)
573241675Suqs{
574241675Suqs	struct mdoc_node *n;
575241675Suqs
576261344Suqs	n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT);
577261344Suqs	n->string = roff_strdup(mdoc->roff, p);
578241675Suqs
579261344Suqs	if ( ! node_append(mdoc, n))
580241675Suqs		return(0);
581241675Suqs
582261344Suqs	mdoc->next = MDOC_NEXT_SIBLING;
583241675Suqs	return(1);
584241675Suqs}
585241675Suqs
586261344Suqsvoid
587261344Suqsmdoc_word_append(struct mdoc *mdoc, const char *p)
588261344Suqs{
589261344Suqs	struct mdoc_node	*n;
590261344Suqs	char			*addstr, *newstr;
591241675Suqs
592261344Suqs	n = mdoc->last;
593261344Suqs	addstr = roff_strdup(mdoc->roff, p);
594261344Suqs	if (-1 == asprintf(&newstr, "%s %s", n->string, addstr)) {
595261344Suqs		perror(NULL);
596261344Suqs		exit((int)MANDOCLEVEL_SYSERR);
597261344Suqs	}
598261344Suqs	free(addstr);
599261344Suqs	free(n->string);
600261344Suqs	n->string = newstr;
601261344Suqs	mdoc->next = MDOC_NEXT_SIBLING;
602261344Suqs}
603261344Suqs
604241675Suqsstatic void
605241675Suqsmdoc_node_free(struct mdoc_node *p)
606241675Suqs{
607241675Suqs
608241675Suqs	if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
609241675Suqs		free(p->norm);
610241675Suqs	if (p->string)
611241675Suqs		free(p->string);
612241675Suqs	if (p->args)
613241675Suqs		mdoc_argv_free(p->args);
614241675Suqs	free(p);
615241675Suqs}
616241675Suqs
617241675Suqs
618241675Suqsstatic void
619261344Suqsmdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n)
620241675Suqs{
621241675Suqs
622241675Suqs	/* Adjust siblings. */
623241675Suqs
624241675Suqs	if (n->prev)
625241675Suqs		n->prev->next = n->next;
626241675Suqs	if (n->next)
627241675Suqs		n->next->prev = n->prev;
628241675Suqs
629241675Suqs	/* Adjust parent. */
630241675Suqs
631241675Suqs	if (n->parent) {
632241675Suqs		n->parent->nchild--;
633241675Suqs		if (n->parent->child == n)
634241675Suqs			n->parent->child = n->prev ? n->prev : n->next;
635241675Suqs		if (n->parent->last == n)
636241675Suqs			n->parent->last = n->prev ? n->prev : NULL;
637241675Suqs	}
638241675Suqs
639241675Suqs	/* Adjust parse point, if applicable. */
640241675Suqs
641261344Suqs	if (mdoc && mdoc->last == n) {
642241675Suqs		if (n->prev) {
643261344Suqs			mdoc->last = n->prev;
644261344Suqs			mdoc->next = MDOC_NEXT_SIBLING;
645241675Suqs		} else {
646261344Suqs			mdoc->last = n->parent;
647261344Suqs			mdoc->next = MDOC_NEXT_CHILD;
648241675Suqs		}
649241675Suqs	}
650241675Suqs
651261344Suqs	if (mdoc && mdoc->first == n)
652261344Suqs		mdoc->first = NULL;
653241675Suqs}
654241675Suqs
655241675Suqs
656241675Suqsvoid
657261344Suqsmdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p)
658241675Suqs{
659241675Suqs
660241675Suqs	while (p->child) {
661241675Suqs		assert(p->nchild);
662261344Suqs		mdoc_node_delete(mdoc, p->child);
663241675Suqs	}
664241675Suqs	assert(0 == p->nchild);
665241675Suqs
666261344Suqs	mdoc_node_unlink(mdoc, p);
667241675Suqs	mdoc_node_free(p);
668241675Suqs}
669241675Suqs
670261344Suqsint
671261344Suqsmdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p)
672261344Suqs{
673261344Suqs
674261344Suqs	mdoc_node_unlink(mdoc, p);
675261344Suqs	return(node_append(mdoc, p));
676261344Suqs}
677261344Suqs
678241675Suqs#if 0
679241675Suqs/*
680241675Suqs * Pre-treat a text line.
681241675Suqs * Text lines can consist of equations, which must be handled apart from
682241675Suqs * the regular text.
683241675Suqs * Thus, use this function to step through a line checking if it has any
684241675Suqs * equations embedded in it.
685241675Suqs * This must handle multiple equations AND equations that do not end at
686241675Suqs * the end-of-line, i.e., will re-enter in the next roff parse.
687241675Suqs */
688241675Suqsstatic int
689261344Suqsmdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs)
690241675Suqs{
691241675Suqs	char		*start, *end;
692241675Suqs	char		 delim;
693241675Suqs
694241675Suqs	while ('\0' != buf[offs]) {
695241675Suqs		/* Mark starting position if eqn is set. */
696241675Suqs		start = NULL;
697261344Suqs		if ('\0' != (delim = roff_eqndelim(mdoc->roff)))
698241675Suqs			if (NULL != (start = strchr(buf + offs, delim)))
699241675Suqs				*start++ = '\0';
700241675Suqs
701241675Suqs		/* Parse text as normal. */
702261344Suqs		if ( ! mdoc_ptext(mdoc, line, buf, offs))
703241675Suqs			return(0);
704241675Suqs
705241675Suqs		/* Continue only if an equation exists. */
706241675Suqs		if (NULL == start)
707241675Suqs			break;
708241675Suqs
709241675Suqs		/* Read past the end of the equation. */
710241675Suqs		offs += start - (buf + offs);
711241675Suqs		assert(start == &buf[offs]);
712241675Suqs		if (NULL != (end = strchr(buf + offs, delim))) {
713241675Suqs			*end++ = '\0';
714241675Suqs			while (' ' == *end)
715241675Suqs				end++;
716241675Suqs		}
717241675Suqs
718241675Suqs		/* Parse the equation itself. */
719261344Suqs		roff_openeqn(mdoc->roff, NULL, line, offs, buf);
720241675Suqs
721241675Suqs		/* Process a finished equation? */
722261344Suqs		if (roff_closeeqn(mdoc->roff))
723261344Suqs			if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff)))
724241675Suqs				return(0);
725241675Suqs		offs += (end - (buf + offs));
726241675Suqs	}
727241675Suqs
728241675Suqs	return(1);
729241675Suqs}
730241675Suqs#endif
731241675Suqs
732241675Suqs/*
733241675Suqs * Parse free-form text, that is, a line that does not begin with the
734241675Suqs * control character.
735241675Suqs */
736241675Suqsstatic int
737261344Suqsmdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs)
738241675Suqs{
739241675Suqs	char		 *c, *ws, *end;
740241675Suqs	struct mdoc_node *n;
741241675Suqs
742241675Suqs	/* No text before an initial macro. */
743241675Suqs
744261344Suqs	if (SEC_NONE == mdoc->lastnamed) {
745261344Suqs		mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT);
746241675Suqs		return(1);
747241675Suqs	}
748241675Suqs
749261344Suqs	assert(mdoc->last);
750261344Suqs	n = mdoc->last;
751241675Suqs
752241675Suqs	/*
753241675Suqs	 * Divert directly to list processing if we're encountering a
754241675Suqs	 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
755241675Suqs	 * (a MDOC_BODY means it's already open, in which case we should
756241675Suqs	 * process within its context in the normal way).
757241675Suqs	 */
758241675Suqs
759241675Suqs	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
760241675Suqs			LIST_column == n->norm->Bl.type) {
761241675Suqs		/* `Bl' is open without any children. */
762261344Suqs		mdoc->flags |= MDOC_FREECOL;
763261344Suqs		return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
764241675Suqs	}
765241675Suqs
766241675Suqs	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
767241675Suqs			NULL != n->parent &&
768241675Suqs			MDOC_Bl == n->parent->tok &&
769241675Suqs			LIST_column == n->parent->norm->Bl.type) {
770241675Suqs		/* `Bl' has block-level `It' children. */
771261344Suqs		mdoc->flags |= MDOC_FREECOL;
772261344Suqs		return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
773241675Suqs	}
774241675Suqs
775241675Suqs	/*
776241675Suqs	 * Search for the beginning of unescaped trailing whitespace (ws)
777241675Suqs	 * and for the first character not to be output (end).
778241675Suqs	 */
779241675Suqs
780241675Suqs	/* FIXME: replace with strcspn(). */
781241675Suqs	ws = NULL;
782241675Suqs	for (c = end = buf + offs; *c; c++) {
783241675Suqs		switch (*c) {
784241675Suqs		case ' ':
785241675Suqs			if (NULL == ws)
786241675Suqs				ws = c;
787241675Suqs			continue;
788241675Suqs		case '\t':
789241675Suqs			/*
790241675Suqs			 * Always warn about trailing tabs,
791241675Suqs			 * even outside literal context,
792241675Suqs			 * where they should be put on the next line.
793241675Suqs			 */
794241675Suqs			if (NULL == ws)
795241675Suqs				ws = c;
796241675Suqs			/*
797241675Suqs			 * Strip trailing tabs in literal context only;
798241675Suqs			 * outside, they affect the next line.
799241675Suqs			 */
800261344Suqs			if (MDOC_LITERAL & mdoc->flags)
801241675Suqs				continue;
802241675Suqs			break;
803241675Suqs		case '\\':
804241675Suqs			/* Skip the escaped character, too, if any. */
805241675Suqs			if (c[1])
806241675Suqs				c++;
807241675Suqs			/* FALLTHROUGH */
808241675Suqs		default:
809241675Suqs			ws = NULL;
810241675Suqs			break;
811241675Suqs		}
812241675Suqs		end = c + 1;
813241675Suqs	}
814241675Suqs	*end = '\0';
815241675Suqs
816241675Suqs	if (ws)
817261344Suqs		mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE);
818241675Suqs
819261344Suqs	if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) {
820261344Suqs		mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN);
821241675Suqs
822241675Suqs		/*
823241675Suqs		 * Insert a `sp' in the case of a blank line.  Technically,
824241675Suqs		 * blank lines aren't allowed, but enough manuals assume this
825241675Suqs		 * behaviour that we want to work around it.
826241675Suqs		 */
827261344Suqs		if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL))
828241675Suqs			return(0);
829241675Suqs
830261344Suqs		mdoc->next = MDOC_NEXT_SIBLING;
831261344Suqs
832261344Suqs		return(mdoc_valid_post(mdoc));
833241675Suqs	}
834241675Suqs
835261344Suqs	if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs))
836241675Suqs		return(0);
837241675Suqs
838261344Suqs	if (MDOC_LITERAL & mdoc->flags)
839241675Suqs		return(1);
840241675Suqs
841241675Suqs	/*
842241675Suqs	 * End-of-sentence check.  If the last character is an unescaped
843241675Suqs	 * EOS character, then flag the node as being the end of a
844241675Suqs	 * sentence.  The front-end will know how to interpret this.
845241675Suqs	 */
846241675Suqs
847241675Suqs	assert(buf < end);
848241675Suqs
849241675Suqs	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0))
850261344Suqs		mdoc->last->flags |= MDOC_EOS;
851241675Suqs
852241675Suqs	return(1);
853241675Suqs}
854241675Suqs
855241675Suqs
856241675Suqs/*
857241675Suqs * Parse a macro line, that is, a line beginning with the control
858241675Suqs * character.
859241675Suqs */
860241675Suqsstatic int
861261344Suqsmdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs)
862241675Suqs{
863241675Suqs	enum mdoct	  tok;
864241675Suqs	int		  i, sv;
865241675Suqs	char		  mac[5];
866241675Suqs	struct mdoc_node *n;
867241675Suqs
868241675Suqs	/* Empty post-control lines are ignored. */
869241675Suqs
870241675Suqs	if ('"' == buf[offs]) {
871261344Suqs		mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT);
872241675Suqs		return(1);
873241675Suqs	} else if ('\0' == buf[offs])
874241675Suqs		return(1);
875241675Suqs
876241675Suqs	sv = offs;
877241675Suqs
878241675Suqs	/*
879241675Suqs	 * Copy the first word into a nil-terminated buffer.
880241675Suqs	 * Stop copying when a tab, space, or eoln is encountered.
881241675Suqs	 */
882241675Suqs
883241675Suqs	i = 0;
884241675Suqs	while (i < 4 && '\0' != buf[offs] &&
885241675Suqs			' ' != buf[offs] && '\t' != buf[offs])
886241675Suqs		mac[i++] = buf[offs++];
887241675Suqs
888241675Suqs	mac[i] = '\0';
889241675Suqs
890241675Suqs	tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
891241675Suqs
892241675Suqs	if (MDOC_MAX == tok) {
893261344Suqs		mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse,
894241675Suqs				ln, sv, "%s", buf + sv - 1);
895241675Suqs		return(1);
896241675Suqs	}
897241675Suqs
898241675Suqs	/* Disregard the first trailing tab, if applicable. */
899241675Suqs
900241675Suqs	if ('\t' == buf[offs])
901241675Suqs		offs++;
902241675Suqs
903241675Suqs	/* Jump to the next non-whitespace word. */
904241675Suqs
905241675Suqs	while (buf[offs] && ' ' == buf[offs])
906241675Suqs		offs++;
907241675Suqs
908241675Suqs	/*
909241675Suqs	 * Trailing whitespace.  Note that tabs are allowed to be passed
910241675Suqs	 * into the parser as "text", so we only warn about spaces here.
911241675Suqs	 */
912241675Suqs
913241675Suqs	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
914261344Suqs		mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE);
915241675Suqs
916241675Suqs	/*
917241675Suqs	 * If an initial macro or a list invocation, divert directly
918241675Suqs	 * into macro processing.
919241675Suqs	 */
920241675Suqs
921261344Suqs	if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) {
922261344Suqs		if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
923241675Suqs			goto err;
924241675Suqs		return(1);
925241675Suqs	}
926241675Suqs
927261344Suqs	n = mdoc->last;
928261344Suqs	assert(mdoc->last);
929241675Suqs
930241675Suqs	/*
931241675Suqs	 * If the first macro of a `Bl -column', open an `It' block
932241675Suqs	 * context around the parsed macro.
933241675Suqs	 */
934241675Suqs
935241675Suqs	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
936241675Suqs			LIST_column == n->norm->Bl.type) {
937261344Suqs		mdoc->flags |= MDOC_FREECOL;
938261344Suqs		if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf))
939241675Suqs			goto err;
940241675Suqs		return(1);
941241675Suqs	}
942241675Suqs
943241675Suqs	/*
944241675Suqs	 * If we're following a block-level `It' within a `Bl -column'
945241675Suqs	 * context (perhaps opened in the above block or in ptext()),
946241675Suqs	 * then open an `It' block context around the parsed macro.
947241675Suqs	 */
948241675Suqs
949241675Suqs	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
950241675Suqs			NULL != n->parent &&
951241675Suqs			MDOC_Bl == n->parent->tok &&
952241675Suqs			LIST_column == n->parent->norm->Bl.type) {
953261344Suqs		mdoc->flags |= MDOC_FREECOL;
954261344Suqs		if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf))
955241675Suqs			goto err;
956241675Suqs		return(1);
957241675Suqs	}
958241675Suqs
959241675Suqs	/* Normal processing of a macro. */
960241675Suqs
961261344Suqs	if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
962241675Suqs		goto err;
963241675Suqs
964241675Suqs	return(1);
965241675Suqs
966241675Suqserr:	/* Error out. */
967241675Suqs
968261344Suqs	mdoc->flags |= MDOC_HALT;
969241675Suqs	return(0);
970241675Suqs}
971241675Suqs
972241675Suqsenum mdelim
973241675Suqsmdoc_isdelim(const char *p)
974241675Suqs{
975241675Suqs
976241675Suqs	if ('\0' == p[0])
977241675Suqs		return(DELIM_NONE);
978241675Suqs
979241675Suqs	if ('\0' == p[1])
980241675Suqs		switch (p[0]) {
981241675Suqs		case('('):
982241675Suqs			/* FALLTHROUGH */
983241675Suqs		case('['):
984241675Suqs			return(DELIM_OPEN);
985241675Suqs		case('|'):
986241675Suqs			return(DELIM_MIDDLE);
987241675Suqs		case('.'):
988241675Suqs			/* FALLTHROUGH */
989241675Suqs		case(','):
990241675Suqs			/* FALLTHROUGH */
991241675Suqs		case(';'):
992241675Suqs			/* FALLTHROUGH */
993241675Suqs		case(':'):
994241675Suqs			/* FALLTHROUGH */
995241675Suqs		case('?'):
996241675Suqs			/* FALLTHROUGH */
997241675Suqs		case('!'):
998241675Suqs			/* FALLTHROUGH */
999241675Suqs		case(')'):
1000241675Suqs			/* FALLTHROUGH */
1001241675Suqs		case(']'):
1002241675Suqs			return(DELIM_CLOSE);
1003241675Suqs		default:
1004241675Suqs			return(DELIM_NONE);
1005241675Suqs		}
1006241675Suqs
1007241675Suqs	if ('\\' != p[0])
1008241675Suqs		return(DELIM_NONE);
1009241675Suqs
1010241675Suqs	if (0 == strcmp(p + 1, "."))
1011241675Suqs		return(DELIM_CLOSE);
1012261344Suqs	if (0 == strcmp(p + 1, "fR|\\fP"))
1013241675Suqs		return(DELIM_MIDDLE);
1014241675Suqs
1015241675Suqs	return(DELIM_NONE);
1016241675Suqs}
1017