mdoc.c revision 1.49
150476Speter/*	$Id: mdoc.c,v 1.49 2010/05/15 16:48:12 schwarze Exp $ */
215903Swosch/*
315903Swosch * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
415903Swosch *
515903Swosch * Permission to use, copy, modify, and distribute this software for any
615903Swosch * purpose with or without fee is hereby granted, provided that the above
715903Swosch * copyright notice and this permission notice appear in all copies.
8105327Sru *
9105327Sru * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10105327Sru * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11105327Sru * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1215903Swosch * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1315903Swosch * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14105327Sru * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15105327Sru * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16105327Sru */
17105327Sru#include <sys/types.h>
1815903Swosch
1915903Swosch#include <assert.h>
2015903Swosch#include <ctype.h>
21139761Skrion#include <stdarg.h>
2215903Swosch#include <stdio.h>
2315903Swosch#include <stdlib.h>
24124490Sru#include <string.h>
25124490Sru#include <time.h>
2615903Swosch
2715903Swosch#include "libmdoc.h"
2815903Swosch#include "libmandoc.h"
2915903Swosch
3015903Swoschconst	char *const __mdoc_merrnames[MERRMAX] = {
3115903Swosch	"trailing whitespace", /* ETAILWS */
3215903Swosch	"unexpected quoted parameter", /* EQUOTPARM */
3315903Swosch	"unterminated quoted parameter", /* EQUOTTERM */
3415903Swosch	"argument parameter suggested", /* EARGVAL */
3515903Swosch	"macro disallowed in prologue", /* EBODYPROL */
3615903Swosch	"macro disallowed in body", /* EPROLBODY */
37105327Sru	"text disallowed in prologue", /* ETEXTPROL */
38105327Sru	"blank line disallowed", /* ENOBLANK */
39105327Sru	"text parameter too long", /* ETOOLONG */
40105327Sru	"invalid escape sequence", /* EESCAPE */
411845Swollman	"invalid character", /* EPRINT */
4295255Sru	"document has no body", /* ENODAT */
4395255Sru	"document has no prologue", /* ENOPROLOGUE */
4495255Sru	"expected line arguments", /* ELINE */
4515903Swosch	"invalid AT&T argument", /* EATT */
46105327Sru	"default name not yet set", /* ENAME */
47105327Sru	"missing list type", /* ELISTTYPE */
48105327Sru	"missing display type", /* EDISPTYPE */
49105327Sru	"too many display types", /* EMULTIDISP */
50105327Sru	"too many list types", /* EMULTILIST */
5194768Sru	"NAME section must be first", /* ESECNAME */
5296132Sbde	"badly-formed NAME section", /* ENAMESECINC */
5394768Sru	"argument repeated", /* EARGREP */
5494768Sru	"expected boolean parameter", /* EBOOL */
5594768Sru	"inconsistent column syntax", /* ECOLMIS */
5614986Swosch	"nested display invalid", /* ENESTDISP */
5714986Swosch	"width argument missing", /* EMISSWIDTH */
58105327Sru	"invalid section for this manual section", /* EWRONGMSEC */
59139108Sru	"section out of conventional order", /* ESECOOO */
60105327Sru	"section repeated", /* ESECREP */
61202579Sru	"invalid standard argument", /* EBADSTAND */
62105327Sru	"multi-line arguments discouraged", /* ENOMULTILINE */
63105327Sru	"multi-line arguments suggested", /* EMULTILINE */
64105327Sru	"line arguments discouraged", /* ENOLINE */
65105327Sru	"prologue macro out of conventional order", /* EPROLOOO */
66202579Sru	"prologue macro repeated", /* EPROLREP */
67202579Sru	"invalid section", /* EBADSEC */
68202579Sru	"invalid font mode", /* EFONT */
69105327Sru	"invalid date syntax", /* EBADDATE */
70105327Sru	"invalid number format", /* ENUMFMT */
71105327Sru	"superfluous width argument", /* ENOWIDTH */
7235951Sbde	"system: utsname error", /* EUTSNAME */
7335951Sbde	"obsolete macro", /* EOBS */
7435951Sbde	"end-of-line scope violation", /* EIMPBRK */
75124637Sru	"empty macro ignored", /* EIGNE */
76124637Sru	"unclosed explicit scope", /* EOPEN */
77124637Sru	"unterminated quoted phrase", /* EQUOTPHR */
78124637Sru	"closure macro without prior context", /* ENOCTX */
79124637Sru	"no description found for library", /* ELIB */
80124637Sru	"bad child for parent context", /* EBADCHILD */
8135951Sbde	"list arguments preceding type", /* ENOTYPE */
82124435Sru	"deprecated comment style", /* EBADCOMMENT */
8335951Sbde};
8435951Sbde
85124637Sruconst	char *const __mdoc_macronames[MDOC_MAX] = {
86124637Sru	"Ap",		"Dd",		"Dt",		"Os",
87124637Sru	"Sh",		"Ss",		"Pp",		"D1",
8835951Sbde	"Dl",		"Bd",		"Ed",		"Bl",
89124435Sru	"El",		"It",		"Ad",		"An",
9035951Sbde	"Ar",		"Cd",		"Cm",		"Dv",
9135951Sbde	"Er",		"Ev",		"Ex",		"Fa",
9235951Sbde	"Fd",		"Fl",		"Fn",		"Ft",
9335951Sbde	"Ic",		"In",		"Li",		"Nd",
94124435Sru	"Nm",		"Op",		"Ot",		"Pa",
9535951Sbde	"Rv",		"St",		"Va",		"Vt",
96124435Sru	/* LINTED */
97125119Sru	"Xr",		"%A",		"%B",		"%D",
9835951Sbde	/* LINTED */
9935951Sbde	"%I",		"%J",		"%N",		"%O",
10035951Sbde	/* LINTED */
10135951Sbde	"%P",		"%R",		"%T",		"%V",
102124435Sru	"Ac",		"Ao",		"Aq",		"At",
103125119Sru	"Bc",		"Bf",		"Bo",		"Bq",
104124435Sru	"Bsx",		"Bx",		"Db",		"Dc",
105241298Smarcel	"Do",		"Dq",		"Ec",		"Ef",
106241298Smarcel	"Em",		"Eo",		"Fx",		"Ms",
10735951Sbde	"No",		"Ns",		"Nx",		"Ox",
108124435Sru	"Pc",		"Pf",		"Po",		"Pq",
109124435Sru	"Qc",		"Ql",		"Qo",		"Qq",
11035951Sbde	"Re",		"Rs",		"Sc",		"So",
11135951Sbde	"Sq",		"Sm",		"Sx",		"Sy",
11235951Sbde	"Tn",		"Ux",		"Xc",		"Xo",
11335951Sbde	"Fo",		"Fc",		"Oo",		"Oc",
11435951Sbde	"Bk",		"Ek",		"Bt",		"Hf",
115124637Sru	"Fr",		"Ud",		"Lb",		"Lp",
116124637Sru	"Lk",		"Mt",		"Brq",		"Bro",
117124637Sru	/* LINTED */
11835951Sbde	"Brc",		"%C",		"Es",		"En",
11935951Sbde	/* LINTED */
12035951Sbde	"Dx",		"%Q",		"br",		"sp",
12135951Sbde	/* LINTED */
1221845Swollman	"%U"
12330113Sjkh	};
12495306Sru
12514986Swoschconst	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
126241298Smarcel	"split",		"nosplit",		"ragged",
127241298Smarcel	"unfilled",		"literal",		"file",
128241298Smarcel	"offset",		"bullet",		"dash",
12934181Sbde	"hyphen",		"item",			"enum",
13034181Sbde	"tag",			"diag",			"hang",
131239613Sdim	"ohang",		"inset",		"column",
132239613Sdim	"width",		"compact",		"std",
133239613Sdim	"filled",		"words",		"emphasis",
134239613Sdim	"symbolic",		"nested",		"centered"
135126890Strhodes	};
136124490Sru
137124490Sruconst	char * const *mdoc_macronames = __mdoc_macronames;
13814986Swoschconst	char * const *mdoc_argnames = __mdoc_argnames;
139125119Sru
14034181Sbdestatic	void		  mdoc_node_free(struct mdoc_node *);
141126890Strhodesstatic	void		  mdoc_node_unlink(struct mdoc *,
14214986Swosch				struct mdoc_node *);
143125119Srustatic	void		  mdoc_free1(struct mdoc *);
144125119Srustatic	void		  mdoc_alloc1(struct mdoc *);
14514986Swoschstatic	struct mdoc_node *node_alloc(struct mdoc *, int, int,
146126890Strhodes				enum mdoct, enum mdoc_type);
14736673Sdtstatic	int		  node_append(struct mdoc *,
14814986Swosch				struct mdoc_node *);
14924750Sbdestatic	int		  mdoc_ptext(struct mdoc *, int, char *);
15094922Srustatic	int		  mdoc_pmacro(struct mdoc *, int, char *);
15194841Srustatic	int		  macrowarn(struct mdoc *, int, const char *);
15224750Sbde
15314986Swosch
15430113Sjkhconst struct mdoc_node *
1551845Swollmanmdoc_node(const struct mdoc *m)
15695306Sru{
1571845Swollman
1581845Swollman	return(MDOC_HALT & m->flags ? NULL : m->first);
1591845Swollman}
16030113Sjkh
16130113Sjkh
16230113Sjkhconst struct mdoc_meta *
1631845Swollmanmdoc_meta(const struct mdoc *m)
1641845Swollman{
1651845Swollman
1661845Swollman	return(MDOC_HALT & m->flags ? NULL : &m->meta);
1671845Swollman}
1681845Swollman
16916663Sjkh
17095306Sru/*
17128806Sbde * Frees volatile resources (parse tree, meta-data, fields).
172202579Sru */
173105327Srustatic void
17424861Sjkhmdoc_free1(struct mdoc *mdoc)
175105327Sru{
17616663Sjkh
177202578Sru	if (mdoc->first)
178202579Sru		mdoc_node_delete(mdoc, mdoc->first);
17916663Sjkh	if (mdoc->meta.title)
18024861Sjkh		free(mdoc->meta.title);
181105327Sru	if (mdoc->meta.os)
18299344Sru		free(mdoc->meta.os);
18399344Sru	if (mdoc->meta.name)
184117195Sbde		free(mdoc->meta.name);
185117195Sbde	if (mdoc->meta.arch)
18699344Sru		free(mdoc->meta.arch);
187117195Sbde	if (mdoc->meta.vol)
188117195Sbde		free(mdoc->meta.vol);
189117195Sbde}
190117195Sbde
19199344Sru
192117195Sbde/*
193 * Allocate all volatile resources (parse tree, meta-data, fields).
194 */
195static void
196mdoc_alloc1(struct mdoc *mdoc)
197{
198
199	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
200	mdoc->flags = 0;
201	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
202	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
203	mdoc->first = mdoc->last;
204	mdoc->last->type = MDOC_ROOT;
205	mdoc->next = MDOC_NEXT_CHILD;
206}
207
208
209/*
210 * Free up volatile resources (see mdoc_free1()) then re-initialises the
211 * data with mdoc_alloc1().  After invocation, parse data has been reset
212 * and the parser is ready for re-invocation on a new tree; however,
213 * cross-parse non-volatile data is kept intact.
214 */
215void
216mdoc_reset(struct mdoc *mdoc)
217{
218
219	mdoc_free1(mdoc);
220	mdoc_alloc1(mdoc);
221}
222
223
224/*
225 * Completely free up all volatile and non-volatile parse resources.
226 * After invocation, the pointer is no longer usable.
227 */
228void
229mdoc_free(struct mdoc *mdoc)
230{
231
232	mdoc_free1(mdoc);
233	free(mdoc);
234}
235
236
237/*
238 * Allocate volatile and non-volatile parse resources.
239 */
240struct mdoc *
241mdoc_alloc(void *data, int pflags, const struct mdoc_cb *cb)
242{
243	struct mdoc	*p;
244
245	p = mandoc_calloc(1, sizeof(struct mdoc));
246
247	if (cb)
248		memcpy(&p->cb, cb, sizeof(struct mdoc_cb));
249
250	p->data = data;
251	p->pflags = pflags;
252
253	mdoc_hash_init();
254	mdoc_alloc1(p);
255	return(p);
256}
257
258
259/*
260 * Climb back up the parse tree, validating open scopes.  Mostly calls
261 * through to macro_end() in macro.c.
262 */
263int
264mdoc_endparse(struct mdoc *m)
265{
266
267	if (MDOC_HALT & m->flags)
268		return(0);
269	else if (mdoc_macroend(m))
270		return(1);
271	m->flags |= MDOC_HALT;
272	return(0);
273}
274
275
276/*
277 * Main parse routine.  Parses a single line -- really just hands off to
278 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
279 */
280int
281mdoc_parseln(struct mdoc *m, int ln, char *buf)
282{
283
284	if (MDOC_HALT & m->flags)
285		return(0);
286
287	m->flags |= MDOC_NEWLINE;
288	return('.' == *buf ?
289			mdoc_pmacro(m, ln, buf) :
290			mdoc_ptext(m, ln, buf));
291}
292
293
294int
295mdoc_verr(struct mdoc *mdoc, int ln, int pos,
296		const char *fmt, ...)
297{
298	char		 buf[256];
299	va_list		 ap;
300
301	if (NULL == mdoc->cb.mdoc_err)
302		return(0);
303
304	va_start(ap, fmt);
305	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
306	va_end(ap);
307
308	return((*mdoc->cb.mdoc_err)(mdoc->data, ln, pos, buf));
309}
310
311
312int
313mdoc_vwarn(struct mdoc *mdoc, int ln, int pos, const char *fmt, ...)
314{
315	char		 buf[256];
316	va_list		 ap;
317
318	if (NULL == mdoc->cb.mdoc_warn)
319		return(0);
320
321	va_start(ap, fmt);
322	(void)vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
323	va_end(ap);
324
325	return((*mdoc->cb.mdoc_warn)(mdoc->data, ln, pos, buf));
326}
327
328
329int
330mdoc_err(struct mdoc *m, int line, int pos, int iserr, enum merr type)
331{
332	const char	*p;
333
334	p = __mdoc_merrnames[(int)type];
335	assert(p);
336
337	if (iserr)
338		return(mdoc_verr(m, line, pos, p));
339
340	return(mdoc_vwarn(m, line, pos, p));
341}
342
343
344int
345mdoc_macro(struct mdoc *m, enum mdoct tok,
346		int ln, int pp, int *pos, char *buf)
347{
348	assert(tok < MDOC_MAX);
349
350	/* If we're in the body, deny prologue calls. */
351
352	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
353			MDOC_PBODY & m->flags)
354		return(mdoc_perr(m, ln, pp, EPROLBODY));
355
356	/* If we're in the prologue, deny "body" macros.  */
357
358	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
359			! (MDOC_PBODY & m->flags)) {
360		if ( ! mdoc_pwarn(m, ln, pp, EBODYPROL))
361			return(0);
362		if (NULL == m->meta.title)
363			m->meta.title = mandoc_strdup("unknown");
364		if (NULL == m->meta.vol)
365			m->meta.vol = mandoc_strdup("local");
366		if (NULL == m->meta.os)
367			m->meta.os = mandoc_strdup("local");
368		if (0 == m->meta.date)
369			m->meta.date = time(NULL);
370		m->flags |= MDOC_PBODY;
371	}
372
373	return((*mdoc_macros[tok].fp)(m, tok, ln, pp, pos, buf));
374}
375
376
377static int
378node_append(struct mdoc *mdoc, struct mdoc_node *p)
379{
380
381	assert(mdoc->last);
382	assert(mdoc->first);
383	assert(MDOC_ROOT != p->type);
384
385	switch (mdoc->next) {
386	case (MDOC_NEXT_SIBLING):
387		mdoc->last->next = p;
388		p->prev = mdoc->last;
389		p->parent = mdoc->last->parent;
390		break;
391	case (MDOC_NEXT_CHILD):
392		mdoc->last->child = p;
393		p->parent = mdoc->last;
394		break;
395	default:
396		abort();
397		/* NOTREACHED */
398	}
399
400	p->parent->nchild++;
401
402	if ( ! mdoc_valid_pre(mdoc, p))
403		return(0);
404	if ( ! mdoc_action_pre(mdoc, p))
405		return(0);
406
407	switch (p->type) {
408	case (MDOC_HEAD):
409		assert(MDOC_BLOCK == p->parent->type);
410		p->parent->head = p;
411		break;
412	case (MDOC_TAIL):
413		assert(MDOC_BLOCK == p->parent->type);
414		p->parent->tail = p;
415		break;
416	case (MDOC_BODY):
417		assert(MDOC_BLOCK == p->parent->type);
418		p->parent->body = p;
419		break;
420	default:
421		break;
422	}
423
424	mdoc->last = p;
425
426	switch (p->type) {
427	case (MDOC_TEXT):
428		if ( ! mdoc_valid_post(mdoc))
429			return(0);
430		if ( ! mdoc_action_post(mdoc))
431			return(0);
432		break;
433	default:
434		break;
435	}
436
437	return(1);
438}
439
440
441static struct mdoc_node *
442node_alloc(struct mdoc *m, int line, int pos,
443		enum mdoct tok, enum mdoc_type type)
444{
445	struct mdoc_node *p;
446
447	p = mandoc_calloc(1, sizeof(struct mdoc_node));
448	p->sec = m->lastsec;
449	p->line = line;
450	p->pos = pos;
451	p->tok = tok;
452	p->type = type;
453	if (MDOC_NEWLINE & m->flags)
454		p->flags |= MDOC_LINE;
455	m->flags &= ~MDOC_NEWLINE;
456	return(p);
457}
458
459
460int
461mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
462{
463	struct mdoc_node *p;
464
465	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
466	if ( ! node_append(m, p))
467		return(0);
468	m->next = MDOC_NEXT_CHILD;
469	return(1);
470}
471
472
473int
474mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
475{
476	struct mdoc_node *p;
477
478	assert(m->first);
479	assert(m->last);
480
481	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
482	if ( ! node_append(m, p))
483		return(0);
484	m->next = MDOC_NEXT_CHILD;
485	return(1);
486}
487
488
489int
490mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
491{
492	struct mdoc_node *p;
493
494	p = node_alloc(m, line, pos, tok, MDOC_BODY);
495	if ( ! node_append(m, p))
496		return(0);
497	m->next = MDOC_NEXT_CHILD;
498	return(1);
499}
500
501
502int
503mdoc_block_alloc(struct mdoc *m, int line, int pos,
504		enum mdoct tok, struct mdoc_arg *args)
505{
506	struct mdoc_node *p;
507
508	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
509	p->args = args;
510	if (p->args)
511		(args->refcnt)++;
512	if ( ! node_append(m, p))
513		return(0);
514	m->next = MDOC_NEXT_CHILD;
515	return(1);
516}
517
518
519int
520mdoc_elem_alloc(struct mdoc *m, int line, int pos,
521		enum mdoct tok, struct mdoc_arg *args)
522{
523	struct mdoc_node *p;
524
525	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
526	p->args = args;
527	if (p->args)
528		(args->refcnt)++;
529	if ( ! node_append(m, p))
530		return(0);
531	m->next = MDOC_NEXT_CHILD;
532	return(1);
533}
534
535
536int
537mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
538{
539	struct mdoc_node *n;
540	size_t		  sv, len;
541
542	len = strlen(p);
543
544	n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT);
545	n->string = mandoc_malloc(len + 1);
546	sv = strlcpy(n->string, p, len + 1);
547
548	/* Prohibit truncation. */
549	assert(sv < len + 1);
550
551	if ( ! node_append(m, n))
552		return(0);
553
554	m->next = MDOC_NEXT_SIBLING;
555	return(1);
556}
557
558
559void
560mdoc_node_free(struct mdoc_node *p)
561{
562
563	if (p->string)
564		free(p->string);
565	if (p->args)
566		mdoc_argv_free(p->args);
567	free(p);
568}
569
570
571static void
572mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n)
573{
574
575	/* Adjust siblings. */
576
577	if (n->prev)
578		n->prev->next = n->next;
579	if (n->next)
580		n->next->prev = n->prev;
581
582	/* Adjust parent. */
583
584	if (n->parent) {
585		n->parent->nchild--;
586		if (n->parent->child == n)
587			n->parent->child = n->prev ? n->prev : n->next;
588	}
589
590	/* Adjust parse point, if applicable. */
591
592	if (m && m->last == n) {
593		if (n->prev) {
594			m->last = n->prev;
595			m->next = MDOC_NEXT_SIBLING;
596		} else {
597			m->last = n->parent;
598			m->next = MDOC_NEXT_CHILD;
599		}
600	}
601
602	if (m && m->first == n)
603		m->first = NULL;
604}
605
606
607void
608mdoc_node_delete(struct mdoc *m, struct mdoc_node *p)
609{
610
611	while (p->child) {
612		assert(p->nchild);
613		mdoc_node_delete(m, p->child);
614	}
615	assert(0 == p->nchild);
616
617	mdoc_node_unlink(m, p);
618	mdoc_node_free(p);
619}
620
621
622/*
623 * Parse free-form text, that is, a line that does not begin with the
624 * control character.
625 */
626static int
627mdoc_ptext(struct mdoc *m, int line, char *buf)
628{
629	int		 i;
630
631	/* Ignore bogus comments. */
632
633	if ('\\' == buf[0] && '.' == buf[1] && '\"' == buf[2])
634		return(mdoc_pwarn(m, line, 0, EBADCOMMENT));
635
636	/* No text before an initial macro. */
637
638	if (SEC_NONE == m->lastnamed)
639		return(mdoc_perr(m, line, 0, ETEXTPROL));
640
641	/* Literal just gets pulled in as-is. */
642
643	if (MDOC_LITERAL & m->flags)
644		return(mdoc_word_alloc(m, line, 0, buf));
645
646	/* Check for a blank line, which may also consist of spaces. */
647
648	for (i = 0; ' ' == buf[i]; i++)
649		/* Skip to first non-space. */ ;
650
651	if ('\0' == buf[i]) {
652		if ( ! mdoc_pwarn(m, line, 0, ENOBLANK))
653			return(0);
654
655		/*
656		 * Insert a `Pp' in the case of a blank line.  Technically,
657		 * blank lines aren't allowed, but enough manuals assume this
658		 * behaviour that we want to work around it.
659		 */
660		if ( ! mdoc_elem_alloc(m, line, 0, MDOC_Pp, NULL))
661			return(0);
662
663		m->next = MDOC_NEXT_SIBLING;
664		return(1);
665	}
666
667	/*
668	 * Warn if the last un-escaped character is whitespace. Then
669	 * strip away the remaining spaces (tabs stay!).
670	 */
671
672	i = (int)strlen(buf);
673	assert(i);
674
675	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
676		if (i > 1 && '\\' != buf[i - 2])
677			if ( ! mdoc_pwarn(m, line, i - 1, ETAILWS))
678				return(0);
679
680		for (--i; i && ' ' == buf[i]; i--)
681			/* Spin back to non-space. */ ;
682
683		/* Jump ahead of escaped whitespace. */
684		i += '\\' == buf[i] ? 2 : 1;
685
686		buf[i] = '\0';
687	}
688
689	/* Allocate the whole word. */
690
691	if ( ! mdoc_word_alloc(m, line, 0, buf))
692		return(0);
693
694	/*
695	 * End-of-sentence check.  If the last character is an unescaped
696	 * EOS character, then flag the node as being the end of a
697	 * sentence.  The front-end will know how to interpret this.
698	 */
699
700	assert(i);
701
702	if (mandoc_eos(buf, (size_t)i))
703		m->last->flags |= MDOC_EOS;
704
705	return(1);
706}
707
708
709static int
710macrowarn(struct mdoc *m, int ln, const char *buf)
711{
712	if ( ! (MDOC_IGN_MACRO & m->pflags))
713		return(mdoc_verr(m, ln, 0, "unknown macro: %s%s",
714				buf, strlen(buf) > 3 ? "..." : ""));
715	return(mdoc_vwarn(m, ln, 0, "unknown macro: %s%s",
716				buf, strlen(buf) > 3 ? "..." : ""));
717}
718
719
720/*
721 * Parse a macro line, that is, a line beginning with the control
722 * character.
723 */
724int
725mdoc_pmacro(struct mdoc *m, int ln, char *buf)
726{
727	enum mdoct	tok;
728	int		i, j, sv;
729	char		mac[5];
730
731	/* Empty lines are ignored. */
732
733	if ('\0' == buf[1])
734		return(1);
735
736	i = 1;
737
738	/* Accept whitespace after the initial control char. */
739
740	if (' ' == buf[i]) {
741		i++;
742		while (buf[i] && ' ' == buf[i])
743			i++;
744		if ('\0' == buf[i])
745			return(1);
746	}
747
748	sv = i;
749
750	/* Copy the first word into a nil-terminated buffer. */
751
752	for (j = 0; j < 4; j++, i++) {
753		if ('\0' == (mac[j] = buf[i]))
754			break;
755		else if (' ' == buf[i])
756			break;
757
758		/* Check for invalid characters. */
759
760		if (isgraph((u_char)buf[i]))
761			continue;
762		return(mdoc_perr(m, ln, i, EPRINT));
763	}
764
765	mac[j] = 0;
766
767	if (j == 4 || j < 2) {
768		if ( ! macrowarn(m, ln, mac))
769			goto err;
770		return(1);
771	}
772
773	if (MDOC_MAX == (tok = mdoc_hash_find(mac))) {
774		if ( ! macrowarn(m, ln, mac))
775			goto err;
776		return(1);
777	}
778
779	/* The macro is sane.  Jump to the next word. */
780
781	while (buf[i] && ' ' == buf[i])
782		i++;
783
784	/*
785	 * Trailing whitespace.  Note that tabs are allowed to be passed
786	 * into the parser as "text", so we only warn about spaces here.
787	 */
788
789	if ('\0' == buf[i] && ' ' == buf[i - 1])
790		if ( ! mdoc_pwarn(m, ln, i - 1, ETAILWS))
791			goto err;
792
793	/*
794	 * Begin recursive parse sequence.  Since we're at the start of
795	 * the line, we don't need to do callable/parseable checks.
796	 */
797	if ( ! mdoc_macro(m, tok, ln, sv, &i, buf))
798		goto err;
799
800	return(1);
801
802err:	/* Error out. */
803
804	m->flags |= MDOC_HALT;
805	return(0);
806}
807
808
809