man_validate.c revision 303975
1/*	$OpenBSD$ */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <errno.h>
25#include <limits.h>
26#include <stdarg.h>
27#include <stdlib.h>
28#include <string.h>
29#include <time.h>
30
31#include "mandoc_aux.h"
32#include "mandoc.h"
33#include "roff.h"
34#include "man.h"
35#include "libmandoc.h"
36#include "roff_int.h"
37#include "libman.h"
38
39#define	CHKARGS	  struct roff_man *man, struct roff_node *n
40
41typedef	void	(*v_check)(CHKARGS);
42
43static	void	  check_par(CHKARGS);
44static	void	  check_part(CHKARGS);
45static	void	  check_root(CHKARGS);
46static	void	  check_text(CHKARGS);
47
48static	void	  post_AT(CHKARGS);
49static	void	  post_IP(CHKARGS);
50static	void	  post_vs(CHKARGS);
51static	void	  post_ft(CHKARGS);
52static	void	  post_OP(CHKARGS);
53static	void	  post_TH(CHKARGS);
54static	void	  post_UC(CHKARGS);
55static	void	  post_UR(CHKARGS);
56
57static	v_check man_valids[MAN_MAX] = {
58	post_vs,    /* br */
59	post_TH,    /* TH */
60	NULL,       /* SH */
61	NULL,       /* SS */
62	NULL,       /* TP */
63	check_par,  /* LP */
64	check_par,  /* PP */
65	check_par,  /* P */
66	post_IP,    /* IP */
67	NULL,       /* HP */
68	NULL,       /* SM */
69	NULL,       /* SB */
70	NULL,       /* BI */
71	NULL,       /* IB */
72	NULL,       /* BR */
73	NULL,       /* RB */
74	NULL,       /* R */
75	NULL,       /* B */
76	NULL,       /* I */
77	NULL,       /* IR */
78	NULL,       /* RI */
79	post_vs,    /* sp */
80	NULL,       /* nf */
81	NULL,       /* fi */
82	NULL,       /* RE */
83	check_part, /* RS */
84	NULL,       /* DT */
85	post_UC,    /* UC */
86	NULL,       /* PD */
87	post_AT,    /* AT */
88	NULL,       /* in */
89	post_ft,    /* ft */
90	post_OP,    /* OP */
91	NULL,       /* EX */
92	NULL,       /* EE */
93	post_UR,    /* UR */
94	NULL,       /* UE */
95	NULL,       /* ll */
96};
97
98
99void
100man_node_validate(struct roff_man *man)
101{
102	struct roff_node *n;
103	v_check		*cp;
104
105	n = man->last;
106	man->last = man->last->child;
107	while (man->last != NULL) {
108		man_node_validate(man);
109		if (man->last == n)
110			man->last = man->last->child;
111		else
112			man->last = man->last->next;
113	}
114
115	man->last = n;
116	man->next = ROFF_NEXT_SIBLING;
117	switch (n->type) {
118	case ROFFT_TEXT:
119		check_text(man, n);
120		break;
121	case ROFFT_ROOT:
122		check_root(man, n);
123		break;
124	case ROFFT_EQN:
125	case ROFFT_TBL:
126		break;
127	default:
128		cp = man_valids + n->tok;
129		if (*cp)
130			(*cp)(man, n);
131		if (man->last == n)
132			man_state(man, n);
133		break;
134	}
135}
136
137static void
138check_root(CHKARGS)
139{
140
141	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
142
143	if (NULL == man->first->child)
144		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
145		    n->line, n->pos, NULL);
146	else
147		man->meta.hasbody = 1;
148
149	if (NULL == man->meta.title) {
150		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
151		    n->line, n->pos, NULL);
152
153		/*
154		 * If a title hasn't been set, do so now (by
155		 * implication, date and section also aren't set).
156		 */
157
158		man->meta.title = mandoc_strdup("");
159		man->meta.msec = mandoc_strdup("");
160		man->meta.date = man->quick ? mandoc_strdup("") :
161		    mandoc_normdate(man->parse, NULL, n->line, n->pos);
162	}
163}
164
165static void
166check_text(CHKARGS)
167{
168	char		*cp, *p;
169
170	if (MAN_LITERAL & man->flags)
171		return;
172
173	cp = n->string;
174	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
175		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
176		    n->line, n->pos + (p - cp), NULL);
177}
178
179static void
180post_OP(CHKARGS)
181{
182
183	if (n->child == NULL)
184		mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
185		    n->line, n->pos, "OP");
186	else if (n->child->next != NULL && n->child->next->next != NULL) {
187		n = n->child->next->next;
188		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
189		    n->line, n->pos, "OP ... %s", n->string);
190	}
191}
192
193static void
194post_UR(CHKARGS)
195{
196
197	if (n->type == ROFFT_HEAD && n->child == NULL)
198		mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse,
199		    n->line, n->pos, "UR");
200	check_part(man, n);
201}
202
203static void
204post_ft(CHKARGS)
205{
206	char	*cp;
207	int	 ok;
208
209	if (n->child == NULL)
210		return;
211
212	ok = 0;
213	cp = n->child->string;
214	switch (*cp) {
215	case '1':
216	case '2':
217	case '3':
218	case '4':
219	case 'I':
220	case 'P':
221	case 'R':
222		if ('\0' == cp[1])
223			ok = 1;
224		break;
225	case 'B':
226		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
227			ok = 1;
228		break;
229	case 'C':
230		if ('W' == cp[1] && '\0' == cp[2])
231			ok = 1;
232		break;
233	default:
234		break;
235	}
236
237	if (0 == ok) {
238		mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
239		    n->line, n->pos, "ft %s", cp);
240		*cp = '\0';
241	}
242}
243
244static void
245check_part(CHKARGS)
246{
247
248	if (n->type == ROFFT_BODY && n->child == NULL)
249		mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
250		    n->line, n->pos, man_macronames[n->tok]);
251}
252
253static void
254check_par(CHKARGS)
255{
256
257	switch (n->type) {
258	case ROFFT_BLOCK:
259		if (n->body->child == NULL)
260			roff_node_delete(man, n);
261		break;
262	case ROFFT_BODY:
263		if (n->child == NULL)
264			mandoc_vmsg(MANDOCERR_PAR_SKIP,
265			    man->parse, n->line, n->pos,
266			    "%s empty", man_macronames[n->tok]);
267		break;
268	case ROFFT_HEAD:
269		if (n->child != NULL)
270			mandoc_vmsg(MANDOCERR_ARG_SKIP,
271			    man->parse, n->line, n->pos,
272			    "%s %s%s", man_macronames[n->tok],
273			    n->child->string,
274			    n->child->next != NULL ? " ..." : "");
275		break;
276	default:
277		break;
278	}
279}
280
281static void
282post_IP(CHKARGS)
283{
284
285	switch (n->type) {
286	case ROFFT_BLOCK:
287		if (n->head->child == NULL && n->body->child == NULL)
288			roff_node_delete(man, n);
289		break;
290	case ROFFT_BODY:
291		if (n->parent->head->child == NULL && n->child == NULL)
292			mandoc_vmsg(MANDOCERR_PAR_SKIP,
293			    man->parse, n->line, n->pos,
294			    "%s empty", man_macronames[n->tok]);
295		break;
296	default:
297		break;
298	}
299}
300
301static void
302post_TH(CHKARGS)
303{
304	struct roff_node *nb;
305	const char	*p;
306
307	free(man->meta.title);
308	free(man->meta.vol);
309	free(man->meta.os);
310	free(man->meta.msec);
311	free(man->meta.date);
312
313	man->meta.title = man->meta.vol = man->meta.date =
314	    man->meta.msec = man->meta.os = NULL;
315
316	nb = n;
317
318	/* ->TITLE<- MSEC DATE OS VOL */
319
320	n = n->child;
321	if (n && n->string) {
322		for (p = n->string; '\0' != *p; p++) {
323			/* Only warn about this once... */
324			if (isalpha((unsigned char)*p) &&
325			    ! isupper((unsigned char)*p)) {
326				mandoc_vmsg(MANDOCERR_TITLE_CASE,
327				    man->parse, n->line,
328				    n->pos + (p - n->string),
329				    "TH %s", n->string);
330				break;
331			}
332		}
333		man->meta.title = mandoc_strdup(n->string);
334	} else {
335		man->meta.title = mandoc_strdup("");
336		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
337		    nb->line, nb->pos, "TH");
338	}
339
340	/* TITLE ->MSEC<- DATE OS VOL */
341
342	if (n)
343		n = n->next;
344	if (n && n->string)
345		man->meta.msec = mandoc_strdup(n->string);
346	else {
347		man->meta.msec = mandoc_strdup("");
348		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
349		    nb->line, nb->pos, "TH %s", man->meta.title);
350	}
351
352	/* TITLE MSEC ->DATE<- OS VOL */
353
354	if (n)
355		n = n->next;
356	if (n && n->string && '\0' != n->string[0]) {
357		man->meta.date = man->quick ?
358		    mandoc_strdup(n->string) :
359		    mandoc_normdate(man->parse, n->string,
360			n->line, n->pos);
361	} else {
362		man->meta.date = mandoc_strdup("");
363		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
364		    n ? n->line : nb->line,
365		    n ? n->pos : nb->pos, "TH");
366	}
367
368	/* TITLE MSEC DATE ->OS<- VOL */
369
370	if (n && (n = n->next))
371		man->meta.os = mandoc_strdup(n->string);
372	else if (man->defos != NULL)
373		man->meta.os = mandoc_strdup(man->defos);
374
375	/* TITLE MSEC DATE OS ->VOL<- */
376	/* If missing, use the default VOL name for MSEC. */
377
378	if (n && (n = n->next))
379		man->meta.vol = mandoc_strdup(n->string);
380	else if ('\0' != man->meta.msec[0] &&
381	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
382		man->meta.vol = mandoc_strdup(p);
383
384	if (n != NULL && (n = n->next) != NULL)
385		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
386		    n->line, n->pos, "TH ... %s", n->string);
387
388	/*
389	 * Remove the `TH' node after we've processed it for our
390	 * meta-data.
391	 */
392	roff_node_delete(man, man->last);
393}
394
395static void
396post_UC(CHKARGS)
397{
398	static const char * const bsd_versions[] = {
399	    "3rd Berkeley Distribution",
400	    "4th Berkeley Distribution",
401	    "4.2 Berkeley Distribution",
402	    "4.3 Berkeley Distribution",
403	    "4.4 Berkeley Distribution",
404	};
405
406	const char	*p, *s;
407
408	n = n->child;
409
410	if (n == NULL || n->type != ROFFT_TEXT)
411		p = bsd_versions[0];
412	else {
413		s = n->string;
414		if (0 == strcmp(s, "3"))
415			p = bsd_versions[0];
416		else if (0 == strcmp(s, "4"))
417			p = bsd_versions[1];
418		else if (0 == strcmp(s, "5"))
419			p = bsd_versions[2];
420		else if (0 == strcmp(s, "6"))
421			p = bsd_versions[3];
422		else if (0 == strcmp(s, "7"))
423			p = bsd_versions[4];
424		else
425			p = bsd_versions[0];
426	}
427
428	free(man->meta.os);
429	man->meta.os = mandoc_strdup(p);
430}
431
432static void
433post_AT(CHKARGS)
434{
435	static const char * const unix_versions[] = {
436	    "7th Edition",
437	    "System III",
438	    "System V",
439	    "System V Release 2",
440	};
441
442	struct roff_node *nn;
443	const char	*p, *s;
444
445	n = n->child;
446
447	if (n == NULL || n->type != ROFFT_TEXT)
448		p = unix_versions[0];
449	else {
450		s = n->string;
451		if (0 == strcmp(s, "3"))
452			p = unix_versions[0];
453		else if (0 == strcmp(s, "4"))
454			p = unix_versions[1];
455		else if (0 == strcmp(s, "5")) {
456			nn = n->next;
457			if (nn != NULL &&
458			    nn->type == ROFFT_TEXT &&
459			    nn->string[0] != '\0')
460				p = unix_versions[3];
461			else
462				p = unix_versions[2];
463		} else
464			p = unix_versions[0];
465	}
466
467	free(man->meta.os);
468	man->meta.os = mandoc_strdup(p);
469}
470
471static void
472post_vs(CHKARGS)
473{
474
475	if (NULL != n->prev)
476		return;
477
478	switch (n->parent->tok) {
479	case MAN_SH:
480	case MAN_SS:
481		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
482		    "%s after %s", man_macronames[n->tok],
483		    man_macronames[n->parent->tok]);
484		/* FALLTHROUGH */
485	case TOKEN_NONE:
486		/*
487		 * Don't warn about this because it occurs in pod2man
488		 * and would cause considerable (unfixable) warnage.
489		 */
490		roff_node_delete(man, n);
491		break;
492	default:
493		break;
494	}
495}
496