man.c revision 241675
1219820Sjeff/*	$Id: man.c,v 1.115 2012/01/03 15:16:24 kristaps Exp $ */
2219820Sjeff/*
3219820Sjeff * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4219820Sjeff *
5219820Sjeff * Permission to use, copy, modify, and distribute this software for any
6219820Sjeff * purpose with or without fee is hereby granted, provided that the above
7219820Sjeff * copyright notice and this permission notice appear in all copies.
8219820Sjeff *
9219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10219820Sjeff * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11219820Sjeff * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12219820Sjeff * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13219820Sjeff * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14219820Sjeff * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15219820Sjeff * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16219820Sjeff */
17219820Sjeff#ifdef HAVE_CONFIG_H
18219820Sjeff#include "config.h"
19219820Sjeff#endif
20219820Sjeff
21219820Sjeff#include <sys/types.h>
22219820Sjeff
23219820Sjeff#include <assert.h>
24219820Sjeff#include <stdarg.h>
25219820Sjeff#include <stdlib.h>
26219820Sjeff#include <stdio.h>
27219820Sjeff#include <string.h>
28219820Sjeff
29219820Sjeff#include "man.h"
30219820Sjeff#include "mandoc.h"
31219820Sjeff#include "libman.h"
32219820Sjeff#include "libmandoc.h"
33219820Sjeff
34219820Sjeffconst	char *const __man_macronames[MAN_MAX] = {
35219820Sjeff	"br",		"TH",		"SH",		"SS",
36219820Sjeff	"TP", 		"LP",		"PP",		"P",
37219820Sjeff	"IP",		"HP",		"SM",		"SB",
38219820Sjeff	"BI",		"IB",		"BR",		"RB",
39219820Sjeff	"R",		"B",		"I",		"IR",
40219820Sjeff	"RI",		"na",		"sp",		"nf",
41219820Sjeff	"fi",		"RE",		"RS",		"DT",
42219820Sjeff	"UC",		"PD",		"AT",		"in",
43219820Sjeff	"ft",		"OP"
44219820Sjeff	};
45219820Sjeff
46219820Sjeffconst	char * const *man_macronames = __man_macronames;
47219820Sjeff
48219820Sjeffstatic	struct man_node	*man_node_alloc(struct man *, int, int,
49219820Sjeff				enum man_type, enum mant);
50219820Sjeffstatic	int		 man_node_append(struct man *,
51219820Sjeff				struct man_node *);
52219820Sjeffstatic	void		 man_node_free(struct man_node *);
53219820Sjeffstatic	void		 man_node_unlink(struct man *,
54219820Sjeff				struct man_node *);
55219820Sjeffstatic	int		 man_ptext(struct man *, int, char *, int);
56219820Sjeffstatic	int		 man_pmacro(struct man *, int, char *, int);
57219820Sjeffstatic	void		 man_free1(struct man *);
58219820Sjeffstatic	void		 man_alloc1(struct man *);
59219820Sjeffstatic	int		 man_descope(struct man *, int, int);
60219820Sjeff
61219820Sjeff
62219820Sjeffconst struct man_node *
63219820Sjeffman_node(const struct man *m)
64219820Sjeff{
65219820Sjeff
66219820Sjeff	assert( ! (MAN_HALT & m->flags));
67219820Sjeff	return(m->first);
68219820Sjeff}
69219820Sjeff
70219820Sjeff
71219820Sjeffconst struct man_meta *
72219820Sjeffman_meta(const struct man *m)
73219820Sjeff{
74219820Sjeff
75219820Sjeff	assert( ! (MAN_HALT & m->flags));
76219820Sjeff	return(&m->meta);
77219820Sjeff}
78219820Sjeff
79219820Sjeff
80219820Sjeffvoid
81219820Sjeffman_reset(struct man *man)
82219820Sjeff{
83219820Sjeff
84219820Sjeff	man_free1(man);
85219820Sjeff	man_alloc1(man);
86219820Sjeff}
87219820Sjeff
88219820Sjeff
89219820Sjeffvoid
90219820Sjeffman_free(struct man *man)
91219820Sjeff{
92219820Sjeff
93219820Sjeff	man_free1(man);
94219820Sjeff	free(man);
95219820Sjeff}
96219820Sjeff
97219820Sjeff
98219820Sjeffstruct man *
99219820Sjeffman_alloc(struct roff *roff, struct mparse *parse)
100219820Sjeff{
101219820Sjeff	struct man	*p;
102219820Sjeff
103219820Sjeff	p = mandoc_calloc(1, sizeof(struct man));
104219820Sjeff
105219820Sjeff	man_hash_init();
106219820Sjeff	p->parse = parse;
107219820Sjeff	p->roff = roff;
108219820Sjeff
109219820Sjeff	man_alloc1(p);
110219820Sjeff	return(p);
111219820Sjeff}
112219820Sjeff
113219820Sjeff
114219820Sjeffint
115219820Sjeffman_endparse(struct man *m)
116219820Sjeff{
117219820Sjeff
118219820Sjeff	assert( ! (MAN_HALT & m->flags));
119219820Sjeff	if (man_macroend(m))
120219820Sjeff		return(1);
121219820Sjeff	m->flags |= MAN_HALT;
122	return(0);
123}
124
125
126int
127man_parseln(struct man *m, int ln, char *buf, int offs)
128{
129
130	m->flags |= MAN_NEWLINE;
131
132	assert( ! (MAN_HALT & m->flags));
133
134	return (mandoc_getcontrol(buf, &offs) ?
135			man_pmacro(m, ln, buf, offs) :
136			man_ptext(m, ln, buf, offs));
137}
138
139
140static void
141man_free1(struct man *man)
142{
143
144	if (man->first)
145		man_node_delete(man, man->first);
146	if (man->meta.title)
147		free(man->meta.title);
148	if (man->meta.source)
149		free(man->meta.source);
150	if (man->meta.date)
151		free(man->meta.date);
152	if (man->meta.vol)
153		free(man->meta.vol);
154	if (man->meta.msec)
155		free(man->meta.msec);
156}
157
158
159static void
160man_alloc1(struct man *m)
161{
162
163	memset(&m->meta, 0, sizeof(struct man_meta));
164	m->flags = 0;
165	m->last = mandoc_calloc(1, sizeof(struct man_node));
166	m->first = m->last;
167	m->last->type = MAN_ROOT;
168	m->last->tok = MAN_MAX;
169	m->next = MAN_NEXT_CHILD;
170}
171
172
173static int
174man_node_append(struct man *man, struct man_node *p)
175{
176
177	assert(man->last);
178	assert(man->first);
179	assert(MAN_ROOT != p->type);
180
181	switch (man->next) {
182	case (MAN_NEXT_SIBLING):
183		man->last->next = p;
184		p->prev = man->last;
185		p->parent = man->last->parent;
186		break;
187	case (MAN_NEXT_CHILD):
188		man->last->child = p;
189		p->parent = man->last;
190		break;
191	default:
192		abort();
193		/* NOTREACHED */
194	}
195
196	assert(p->parent);
197	p->parent->nchild++;
198
199	if ( ! man_valid_pre(man, p))
200		return(0);
201
202	switch (p->type) {
203	case (MAN_HEAD):
204		assert(MAN_BLOCK == p->parent->type);
205		p->parent->head = p;
206		break;
207	case (MAN_TAIL):
208		assert(MAN_BLOCK == p->parent->type);
209		p->parent->tail = p;
210		break;
211	case (MAN_BODY):
212		assert(MAN_BLOCK == p->parent->type);
213		p->parent->body = p;
214		break;
215	default:
216		break;
217	}
218
219	man->last = p;
220
221	switch (p->type) {
222	case (MAN_TBL):
223		/* FALLTHROUGH */
224	case (MAN_TEXT):
225		if ( ! man_valid_post(man))
226			return(0);
227		break;
228	default:
229		break;
230	}
231
232	return(1);
233}
234
235
236static struct man_node *
237man_node_alloc(struct man *m, int line, int pos,
238		enum man_type type, enum mant tok)
239{
240	struct man_node *p;
241
242	p = mandoc_calloc(1, sizeof(struct man_node));
243	p->line = line;
244	p->pos = pos;
245	p->type = type;
246	p->tok = tok;
247
248	if (MAN_NEWLINE & m->flags)
249		p->flags |= MAN_LINE;
250	m->flags &= ~MAN_NEWLINE;
251	return(p);
252}
253
254
255int
256man_elem_alloc(struct man *m, int line, int pos, enum mant tok)
257{
258	struct man_node *p;
259
260	p = man_node_alloc(m, line, pos, MAN_ELEM, tok);
261	if ( ! man_node_append(m, p))
262		return(0);
263	m->next = MAN_NEXT_CHILD;
264	return(1);
265}
266
267
268int
269man_tail_alloc(struct man *m, int line, int pos, enum mant tok)
270{
271	struct man_node *p;
272
273	p = man_node_alloc(m, line, pos, MAN_TAIL, tok);
274	if ( ! man_node_append(m, p))
275		return(0);
276	m->next = MAN_NEXT_CHILD;
277	return(1);
278}
279
280
281int
282man_head_alloc(struct man *m, int line, int pos, enum mant tok)
283{
284	struct man_node *p;
285
286	p = man_node_alloc(m, line, pos, MAN_HEAD, tok);
287	if ( ! man_node_append(m, p))
288		return(0);
289	m->next = MAN_NEXT_CHILD;
290	return(1);
291}
292
293
294int
295man_body_alloc(struct man *m, int line, int pos, enum mant tok)
296{
297	struct man_node *p;
298
299	p = man_node_alloc(m, line, pos, MAN_BODY, tok);
300	if ( ! man_node_append(m, p))
301		return(0);
302	m->next = MAN_NEXT_CHILD;
303	return(1);
304}
305
306
307int
308man_block_alloc(struct man *m, int line, int pos, enum mant tok)
309{
310	struct man_node *p;
311
312	p = man_node_alloc(m, line, pos, MAN_BLOCK, tok);
313	if ( ! man_node_append(m, p))
314		return(0);
315	m->next = MAN_NEXT_CHILD;
316	return(1);
317}
318
319int
320man_word_alloc(struct man *m, int line, int pos, const char *word)
321{
322	struct man_node	*n;
323
324	n = man_node_alloc(m, line, pos, MAN_TEXT, MAN_MAX);
325	n->string = roff_strdup(m->roff, word);
326
327	if ( ! man_node_append(m, n))
328		return(0);
329
330	m->next = MAN_NEXT_SIBLING;
331	return(1);
332}
333
334
335/*
336 * Free all of the resources held by a node.  This does NOT unlink a
337 * node from its context; for that, see man_node_unlink().
338 */
339static void
340man_node_free(struct man_node *p)
341{
342
343	if (p->string)
344		free(p->string);
345	free(p);
346}
347
348
349void
350man_node_delete(struct man *m, struct man_node *p)
351{
352
353	while (p->child)
354		man_node_delete(m, p->child);
355
356	man_node_unlink(m, p);
357	man_node_free(p);
358}
359
360int
361man_addeqn(struct man *m, const struct eqn *ep)
362{
363	struct man_node	*n;
364
365	assert( ! (MAN_HALT & m->flags));
366
367	n = man_node_alloc(m, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
368	n->eqn = ep;
369
370	if ( ! man_node_append(m, n))
371		return(0);
372
373	m->next = MAN_NEXT_SIBLING;
374	return(man_descope(m, ep->ln, ep->pos));
375}
376
377int
378man_addspan(struct man *m, const struct tbl_span *sp)
379{
380	struct man_node	*n;
381
382	assert( ! (MAN_HALT & m->flags));
383
384	n = man_node_alloc(m, sp->line, 0, MAN_TBL, MAN_MAX);
385	n->span = sp;
386
387	if ( ! man_node_append(m, n))
388		return(0);
389
390	m->next = MAN_NEXT_SIBLING;
391	return(man_descope(m, sp->line, 0));
392}
393
394static int
395man_descope(struct man *m, int line, int offs)
396{
397	/*
398	 * Co-ordinate what happens with having a next-line scope open:
399	 * first close out the element scope (if applicable), then close
400	 * out the block scope (also if applicable).
401	 */
402
403	if (MAN_ELINE & m->flags) {
404		m->flags &= ~MAN_ELINE;
405		if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
406			return(0);
407	}
408
409	if ( ! (MAN_BLINE & m->flags))
410		return(1);
411	m->flags &= ~MAN_BLINE;
412
413	if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
414		return(0);
415	return(man_body_alloc(m, line, offs, m->last->tok));
416}
417
418static int
419man_ptext(struct man *m, int line, char *buf, int offs)
420{
421	int		 i;
422
423	/* Literal free-form text whitespace is preserved. */
424
425	if (MAN_LITERAL & m->flags) {
426		if ( ! man_word_alloc(m, line, offs, buf + offs))
427			return(0);
428		return(man_descope(m, line, offs));
429	}
430
431	/* Pump blank lines directly into the backend. */
432
433	for (i = offs; ' ' == buf[i]; i++)
434		/* Skip leading whitespace. */ ;
435
436	if ('\0' == buf[i]) {
437		/* Allocate a blank entry. */
438		if ( ! man_word_alloc(m, line, offs, ""))
439			return(0);
440		return(man_descope(m, line, offs));
441	}
442
443	/*
444	 * Warn if the last un-escaped character is whitespace. Then
445	 * strip away the remaining spaces (tabs stay!).
446	 */
447
448	i = (int)strlen(buf);
449	assert(i);
450
451	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
452		if (i > 1 && '\\' != buf[i - 2])
453			man_pmsg(m, line, i - 1, MANDOCERR_EOLNSPACE);
454
455		for (--i; i && ' ' == buf[i]; i--)
456			/* Spin back to non-space. */ ;
457
458		/* Jump ahead of escaped whitespace. */
459		i += '\\' == buf[i] ? 2 : 1;
460
461		buf[i] = '\0';
462	}
463
464	if ( ! man_word_alloc(m, line, offs, buf + offs))
465		return(0);
466
467	/*
468	 * End-of-sentence check.  If the last character is an unescaped
469	 * EOS character, then flag the node as being the end of a
470	 * sentence.  The front-end will know how to interpret this.
471	 */
472
473	assert(i);
474	if (mandoc_eos(buf, (size_t)i, 0))
475		m->last->flags |= MAN_EOS;
476
477	return(man_descope(m, line, offs));
478}
479
480static int
481man_pmacro(struct man *m, int ln, char *buf, int offs)
482{
483	int		 i, ppos;
484	enum mant	 tok;
485	char		 mac[5];
486	struct man_node	*n;
487
488	if ('"' == buf[offs]) {
489		man_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
490		return(1);
491	} else if ('\0' == buf[offs])
492		return(1);
493
494	ppos = offs;
495
496	/*
497	 * Copy the first word into a nil-terminated buffer.
498	 * Stop copying when a tab, space, or eoln is encountered.
499	 */
500
501	i = 0;
502	while (i < 4 && '\0' != buf[offs] &&
503			' ' != buf[offs] && '\t' != buf[offs])
504		mac[i++] = buf[offs++];
505
506	mac[i] = '\0';
507
508	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
509
510	if (MAN_MAX == tok) {
511		mandoc_vmsg(MANDOCERR_MACRO, m->parse, ln,
512				ppos, "%s", buf + ppos - 1);
513		return(1);
514	}
515
516	/* The macro is sane.  Jump to the next word. */
517
518	while (buf[offs] && ' ' == buf[offs])
519		offs++;
520
521	/*
522	 * Trailing whitespace.  Note that tabs are allowed to be passed
523	 * into the parser as "text", so we only warn about spaces here.
524	 */
525
526	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
527		man_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
528
529	/*
530	 * Remove prior ELINE macro, as it's being clobbered by a new
531	 * macro.  Note that NSCOPED macros do not close out ELINE
532	 * macros---they don't print text---so we let those slip by.
533	 */
534
535	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
536			m->flags & MAN_ELINE) {
537		n = m->last;
538		assert(MAN_TEXT != n->type);
539
540		/* Remove repeated NSCOPED macros causing ELINE. */
541
542		if (MAN_NSCOPED & man_macros[n->tok].flags)
543			n = n->parent;
544
545		mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line,
546		    n->pos, "%s breaks %s", man_macronames[tok],
547		    man_macronames[n->tok]);
548
549		man_node_delete(m, n);
550		m->flags &= ~MAN_ELINE;
551	}
552
553	/*
554	 * Remove prior BLINE macro that is being clobbered.
555	 */
556	if ((m->flags & MAN_BLINE) &&
557	    (MAN_BSCOPE & man_macros[tok].flags)) {
558		n = m->last;
559
560		/* Might be a text node like 8 in
561		 * .TP 8
562		 * .SH foo
563		 */
564		if (MAN_TEXT == n->type)
565			n = n->parent;
566
567		/* Remove element that didn't end BLINE, if any. */
568		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
569			n = n->parent;
570
571		assert(MAN_HEAD == n->type);
572		n = n->parent;
573		assert(MAN_BLOCK == n->type);
574		assert(MAN_SCOPED & man_macros[n->tok].flags);
575
576		mandoc_vmsg(MANDOCERR_LINESCOPE, m->parse, n->line,
577		    n->pos, "%s breaks %s", man_macronames[tok],
578		    man_macronames[n->tok]);
579
580		man_node_delete(m, n);
581		m->flags &= ~MAN_BLINE;
582	}
583
584	/*
585	 * Save the fact that we're in the next-line for a block.  In
586	 * this way, embedded roff instructions can "remember" state
587	 * when they exit.
588	 */
589
590	if (MAN_BLINE & m->flags)
591		m->flags |= MAN_BPLINE;
592
593	/* Call to handler... */
594
595	assert(man_macros[tok].fp);
596	if ( ! (*man_macros[tok].fp)(m, tok, ln, ppos, &offs, buf))
597		goto err;
598
599	/*
600	 * We weren't in a block-line scope when entering the
601	 * above-parsed macro, so return.
602	 */
603
604	if ( ! (MAN_BPLINE & m->flags)) {
605		m->flags &= ~MAN_ILINE;
606		return(1);
607	}
608	m->flags &= ~MAN_BPLINE;
609
610	/*
611	 * If we're in a block scope, then allow this macro to slip by
612	 * without closing scope around it.
613	 */
614
615	if (MAN_ILINE & m->flags) {
616		m->flags &= ~MAN_ILINE;
617		return(1);
618	}
619
620	/*
621	 * If we've opened a new next-line element scope, then return
622	 * now, as the next line will close out the block scope.
623	 */
624
625	if (MAN_ELINE & m->flags)
626		return(1);
627
628	/* Close out the block scope opened in the prior line.  */
629
630	assert(MAN_BLINE & m->flags);
631	m->flags &= ~MAN_BLINE;
632
633	if ( ! man_unscope(m, m->last->parent, MANDOCERR_MAX))
634		return(0);
635	return(man_body_alloc(m, ln, ppos, m->last->tok));
636
637err:	/* Error out. */
638
639	m->flags |= MAN_HALT;
640	return(0);
641}
642
643/*
644 * Unlink a node from its context.  If "m" is provided, the last parse
645 * point will also be adjusted accordingly.
646 */
647static void
648man_node_unlink(struct man *m, struct man_node *n)
649{
650
651	/* Adjust siblings. */
652
653	if (n->prev)
654		n->prev->next = n->next;
655	if (n->next)
656		n->next->prev = n->prev;
657
658	/* Adjust parent. */
659
660	if (n->parent) {
661		n->parent->nchild--;
662		if (n->parent->child == n)
663			n->parent->child = n->prev ? n->prev : n->next;
664	}
665
666	/* Adjust parse point, if applicable. */
667
668	if (m && m->last == n) {
669		/*XXX: this can occur when bailing from validation. */
670		/*assert(NULL == n->next);*/
671		if (n->prev) {
672			m->last = n->prev;
673			m->next = MAN_NEXT_SIBLING;
674		} else {
675			m->last = n->parent;
676			m->next = MAN_NEXT_CHILD;
677		}
678	}
679
680	if (m && m->first == n)
681		m->first = NULL;
682}
683
684const struct mparse *
685man_mparse(const struct man *m)
686{
687
688	assert(m && m->parse);
689	return(m->parse);
690}
691