man_validate.c revision 274880
1/*	$Id: man_validate.c,v 1.105 2014/08/06 15:09:05 schwarze Exp $ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <sys/types.h>
23
24#include <assert.h>
25#include <ctype.h>
26#include <errno.h>
27#include <limits.h>
28#include <stdarg.h>
29#include <stdlib.h>
30#include <string.h>
31#include <time.h>
32
33#include "man.h"
34#include "mandoc.h"
35#include "mandoc_aux.h"
36#include "libman.h"
37#include "libmandoc.h"
38
39#define	CHKARGS	  struct man *man, struct man_node *n
40
41typedef	int	(*v_check)(CHKARGS);
42
43static	int	  check_eq0(CHKARGS);
44static	int	  check_eq2(CHKARGS);
45static	int	  check_le1(CHKARGS);
46static	int	  check_le5(CHKARGS);
47static	int	  check_par(CHKARGS);
48static	int	  check_part(CHKARGS);
49static	int	  check_root(CHKARGS);
50static	int	  check_text(CHKARGS);
51
52static	int	  post_AT(CHKARGS);
53static	int	  post_IP(CHKARGS);
54static	int	  post_vs(CHKARGS);
55static	int	  post_fi(CHKARGS);
56static	int	  post_ft(CHKARGS);
57static	int	  post_nf(CHKARGS);
58static	int	  post_TH(CHKARGS);
59static	int	  post_UC(CHKARGS);
60static	int	  post_UR(CHKARGS);
61
62static	v_check man_valids[MAN_MAX] = {
63	post_vs,    /* br */
64	post_TH,    /* TH */
65	NULL,       /* SH */
66	NULL,       /* SS */
67	NULL,       /* TP */
68	check_par,  /* LP */
69	check_par,  /* PP */
70	check_par,  /* P */
71	post_IP,    /* IP */
72	NULL,       /* HP */
73	NULL,       /* SM */
74	NULL,       /* SB */
75	NULL,       /* BI */
76	NULL,       /* IB */
77	NULL,       /* BR */
78	NULL,       /* RB */
79	NULL,       /* R */
80	NULL,       /* B */
81	NULL,       /* I */
82	NULL,       /* IR */
83	NULL,       /* RI */
84	check_eq0,  /* na */
85	post_vs,    /* sp */
86	post_nf,    /* nf */
87	post_fi,    /* fi */
88	NULL,       /* RE */
89	check_part, /* RS */
90	NULL,       /* DT */
91	post_UC,    /* UC */
92	check_le1,  /* PD */
93	post_AT,    /* AT */
94	NULL,       /* in */
95	post_ft,    /* ft */
96	check_eq2,  /* OP */
97	post_nf,    /* EX */
98	post_fi,    /* EE */
99	post_UR,    /* UR */
100	NULL,       /* UE */
101	NULL,       /* ll */
102};
103
104
105int
106man_valid_post(struct man *man)
107{
108	struct man_node	*n;
109	v_check		*cp;
110
111	n = man->last;
112	if (n->flags & MAN_VALID)
113		return(1);
114	n->flags |= MAN_VALID;
115
116	switch (n->type) {
117	case MAN_TEXT:
118		return(check_text(man, n));
119	case MAN_ROOT:
120		return(check_root(man, n));
121	case MAN_EQN:
122		/* FALLTHROUGH */
123	case MAN_TBL:
124		return(1);
125	default:
126		cp = man_valids + n->tok;
127		return(*cp ? (*cp)(man, n) : 1);
128	}
129}
130
131static int
132check_root(CHKARGS)
133{
134
135	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
136
137	if (NULL == man->first->child)
138		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
139		    n->line, n->pos, NULL);
140	else
141		man->meta.hasbody = 1;
142
143	if (NULL == man->meta.title) {
144		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
145		    n->line, n->pos, NULL);
146
147		/*
148		 * If a title hasn't been set, do so now (by
149		 * implication, date and section also aren't set).
150		 */
151
152		man->meta.title = mandoc_strdup("");
153		man->meta.msec = mandoc_strdup("");
154		man->meta.date = man->quick ? mandoc_strdup("") :
155		    mandoc_normdate(man->parse, NULL, n->line, n->pos);
156	}
157
158	return(1);
159}
160
161static int
162check_text(CHKARGS)
163{
164	char		*cp, *p;
165
166	if (MAN_LITERAL & man->flags)
167		return(1);
168
169	cp = n->string;
170	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
171		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
172		    n->line, n->pos + (p - cp), NULL);
173	return(1);
174}
175
176#define	INEQ_DEFINE(x, ineq, name) \
177static int \
178check_##name(CHKARGS) \
179{ \
180	if (n->nchild ineq (x)) \
181		return(1); \
182	mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \
183	    "line arguments %s %d (have %d)", \
184	    #ineq, (x), n->nchild); \
185	return(1); \
186}
187
188INEQ_DEFINE(0, ==, eq0)
189INEQ_DEFINE(2, ==, eq2)
190INEQ_DEFINE(1, <=, le1)
191INEQ_DEFINE(5, <=, le5)
192
193static int
194post_UR(CHKARGS)
195{
196
197	if (MAN_HEAD == n->type && 1 != n->nchild)
198		mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
199		    n->pos, "line arguments eq 1 (have %d)", n->nchild);
200
201	return(check_part(man, n));
202}
203
204static int
205post_ft(CHKARGS)
206{
207	char	*cp;
208	int	 ok;
209
210	if (0 == n->nchild)
211		return(1);
212
213	ok = 0;
214	cp = n->child->string;
215	switch (*cp) {
216	case '1':
217		/* FALLTHROUGH */
218	case '2':
219		/* FALLTHROUGH */
220	case '3':
221		/* FALLTHROUGH */
222	case '4':
223		/* FALLTHROUGH */
224	case 'I':
225		/* FALLTHROUGH */
226	case 'P':
227		/* FALLTHROUGH */
228	case 'R':
229		if ('\0' == cp[1])
230			ok = 1;
231		break;
232	case 'B':
233		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
234			ok = 1;
235		break;
236	case 'C':
237		if ('W' == cp[1] && '\0' == cp[2])
238			ok = 1;
239		break;
240	default:
241		break;
242	}
243
244	if (0 == ok) {
245		mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
246		    n->line, n->pos, "ft %s", cp);
247		*cp = '\0';
248	}
249
250	if (1 < n->nchild)
251		mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
252		    n->pos, "want one child (have %d)", n->nchild);
253
254	return(1);
255}
256
257static int
258check_part(CHKARGS)
259{
260
261	if (MAN_BODY == n->type && 0 == n->nchild)
262		mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line,
263		    n->pos, "want children (have none)");
264
265	return(1);
266}
267
268static int
269check_par(CHKARGS)
270{
271
272	switch (n->type) {
273	case MAN_BLOCK:
274		if (0 == n->body->nchild)
275			man_node_delete(man, n);
276		break;
277	case MAN_BODY:
278		if (0 == n->nchild)
279			mandoc_vmsg(MANDOCERR_PAR_SKIP,
280			    man->parse, n->line, n->pos,
281			    "%s empty", man_macronames[n->tok]);
282		break;
283	case MAN_HEAD:
284		if (n->nchild)
285			mandoc_vmsg(MANDOCERR_ARG_SKIP,
286			    man->parse, n->line, n->pos,
287			    "%s %s%s", man_macronames[n->tok],
288			    n->child->string,
289			    n->nchild > 1 ? " ..." : "");
290		break;
291	default:
292		break;
293	}
294
295	return(1);
296}
297
298static int
299post_IP(CHKARGS)
300{
301
302	switch (n->type) {
303	case MAN_BLOCK:
304		if (0 == n->head->nchild && 0 == n->body->nchild)
305			man_node_delete(man, n);
306		break;
307	case MAN_BODY:
308		if (0 == n->parent->head->nchild && 0 == n->nchild)
309			mandoc_vmsg(MANDOCERR_PAR_SKIP,
310			    man->parse, n->line, n->pos,
311			    "%s empty", man_macronames[n->tok]);
312		break;
313	default:
314		break;
315	}
316	return(1);
317}
318
319static int
320post_TH(CHKARGS)
321{
322	struct man_node	*nb;
323	const char	*p;
324
325	check_le5(man, n);
326
327	free(man->meta.title);
328	free(man->meta.vol);
329	free(man->meta.source);
330	free(man->meta.msec);
331	free(man->meta.date);
332
333	man->meta.title = man->meta.vol = man->meta.date =
334	    man->meta.msec = man->meta.source = NULL;
335
336	nb = n;
337
338	/* ->TITLE<- MSEC DATE SOURCE VOL */
339
340	n = n->child;
341	if (n && n->string) {
342		for (p = n->string; '\0' != *p; p++) {
343			/* Only warn about this once... */
344			if (isalpha((unsigned char)*p) &&
345			    ! isupper((unsigned char)*p)) {
346				mandoc_vmsg(MANDOCERR_TITLE_CASE,
347				    man->parse, n->line,
348				    n->pos + (p - n->string),
349				    "TH %s", n->string);
350				break;
351			}
352		}
353		man->meta.title = mandoc_strdup(n->string);
354	} else {
355		man->meta.title = mandoc_strdup("");
356		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
357		    nb->line, nb->pos, "TH");
358	}
359
360	/* TITLE ->MSEC<- DATE SOURCE VOL */
361
362	if (n)
363		n = n->next;
364	if (n && n->string)
365		man->meta.msec = mandoc_strdup(n->string);
366	else {
367		man->meta.msec = mandoc_strdup("");
368		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
369		    nb->line, nb->pos, "TH %s", man->meta.title);
370	}
371
372	/* TITLE MSEC ->DATE<- SOURCE VOL */
373
374	if (n)
375		n = n->next;
376	if (n && n->string && '\0' != n->string[0]) {
377		man->meta.date = man->quick ?
378		    mandoc_strdup(n->string) :
379		    mandoc_normdate(man->parse, n->string,
380			n->line, n->pos);
381	} else {
382		man->meta.date = mandoc_strdup("");
383		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
384		    n ? n->line : nb->line,
385		    n ? n->pos : nb->pos, "TH");
386	}
387
388	/* TITLE MSEC DATE ->SOURCE<- VOL */
389
390	if (n && (n = n->next))
391		man->meta.source = mandoc_strdup(n->string);
392
393	/* TITLE MSEC DATE SOURCE ->VOL<- */
394	/* If missing, use the default VOL name for MSEC. */
395
396	if (n && (n = n->next))
397		man->meta.vol = mandoc_strdup(n->string);
398	else if ('\0' != man->meta.msec[0] &&
399	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
400		man->meta.vol = mandoc_strdup(p);
401
402	/*
403	 * Remove the `TH' node after we've processed it for our
404	 * meta-data.
405	 */
406	man_node_delete(man, man->last);
407	return(1);
408}
409
410static int
411post_nf(CHKARGS)
412{
413
414	check_eq0(man, n);
415
416	if (MAN_LITERAL & man->flags)
417		mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
418		    n->line, n->pos, "nf");
419
420	man->flags |= MAN_LITERAL;
421	return(1);
422}
423
424static int
425post_fi(CHKARGS)
426{
427
428	check_eq0(man, n);
429
430	if ( ! (MAN_LITERAL & man->flags))
431		mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
432		    n->line, n->pos, "fi");
433
434	man->flags &= ~MAN_LITERAL;
435	return(1);
436}
437
438static int
439post_UC(CHKARGS)
440{
441	static const char * const bsd_versions[] = {
442	    "3rd Berkeley Distribution",
443	    "4th Berkeley Distribution",
444	    "4.2 Berkeley Distribution",
445	    "4.3 Berkeley Distribution",
446	    "4.4 Berkeley Distribution",
447	};
448
449	const char	*p, *s;
450
451	n = n->child;
452
453	if (NULL == n || MAN_TEXT != n->type)
454		p = bsd_versions[0];
455	else {
456		s = n->string;
457		if (0 == strcmp(s, "3"))
458			p = bsd_versions[0];
459		else if (0 == strcmp(s, "4"))
460			p = bsd_versions[1];
461		else if (0 == strcmp(s, "5"))
462			p = bsd_versions[2];
463		else if (0 == strcmp(s, "6"))
464			p = bsd_versions[3];
465		else if (0 == strcmp(s, "7"))
466			p = bsd_versions[4];
467		else
468			p = bsd_versions[0];
469	}
470
471	free(man->meta.source);
472	man->meta.source = mandoc_strdup(p);
473	return(1);
474}
475
476static int
477post_AT(CHKARGS)
478{
479	static const char * const unix_versions[] = {
480	    "7th Edition",
481	    "System III",
482	    "System V",
483	    "System V Release 2",
484	};
485
486	const char	*p, *s;
487	struct man_node	*nn;
488
489	n = n->child;
490
491	if (NULL == n || MAN_TEXT != n->type)
492		p = unix_versions[0];
493	else {
494		s = n->string;
495		if (0 == strcmp(s, "3"))
496			p = unix_versions[0];
497		else if (0 == strcmp(s, "4"))
498			p = unix_versions[1];
499		else if (0 == strcmp(s, "5")) {
500			nn = n->next;
501			if (nn && MAN_TEXT == nn->type && nn->string[0])
502				p = unix_versions[3];
503			else
504				p = unix_versions[2];
505		} else
506			p = unix_versions[0];
507	}
508
509	free(man->meta.source);
510	man->meta.source = mandoc_strdup(p);
511	return(1);
512}
513
514static int
515post_vs(CHKARGS)
516{
517
518	if (n->tok == MAN_br)
519		check_eq0(man, n);
520	else
521		check_le1(man, n);
522
523	if (NULL != n->prev)
524		return(1);
525
526	switch (n->parent->tok) {
527	case MAN_SH:
528		/* FALLTHROUGH */
529	case MAN_SS:
530		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
531		    "%s after %s", man_macronames[n->tok],
532		    man_macronames[n->parent->tok]);
533		/* FALLTHROUGH */
534	case MAN_MAX:
535		/*
536		 * Don't warn about this because it occurs in pod2man
537		 * and would cause considerable (unfixable) warnage.
538		 */
539		man_node_delete(man, n);
540		break;
541	default:
542		break;
543	}
544
545	return(1);
546}
547