man_validate.c revision 279527
11539Srgrimes/*	$OpenBSD$ */
21539Srgrimes/*
31539Srgrimes * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
41539Srgrimes * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org>
51539Srgrimes *
61539Srgrimes * Permission to use, copy, modify, and distribute this software for any
71539Srgrimes * purpose with or without fee is hereby granted, provided that the above
81539Srgrimes * copyright notice and this permission notice appear in all copies.
91539Srgrimes *
101539Srgrimes * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
111539Srgrimes * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
121539Srgrimes * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
131539Srgrimes * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
141539Srgrimes * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
151539Srgrimes * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
161539Srgrimes * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
171539Srgrimes */
181539Srgrimes#include "config.h"
191539Srgrimes
201539Srgrimes#include <sys/types.h>
211539Srgrimes
221539Srgrimes#include <assert.h>
231539Srgrimes#include <ctype.h>
241539Srgrimes#include <errno.h>
251539Srgrimes#include <limits.h>
261539Srgrimes#include <stdarg.h>
271539Srgrimes#include <stdlib.h>
281539Srgrimes#include <string.h>
291539Srgrimes#include <time.h>
3050473Speter
311539Srgrimes#include "man.h"
321539Srgrimes#include "mandoc.h"
331539Srgrimes#include "mandoc_aux.h"
341539Srgrimes#include "libman.h"
351539Srgrimes#include "libmandoc.h"
3645513Sbde
37102227Smike#define	CHKARGS	  struct man *man, struct man_node *n
3845513Sbde
3945513Sbdetypedef	void	(*v_check)(CHKARGS);
401539Srgrimes
411539Srgrimesstatic	void	  check_par(CHKARGS);
421539Srgrimesstatic	void	  check_part(CHKARGS);
431539Srgrimesstatic	void	  check_root(CHKARGS);
44102227Smikestatic	void	  check_text(CHKARGS);
45102227Smike
46102227Smikestatic	void	  post_AT(CHKARGS);
4741882Sbdestatic	void	  post_IP(CHKARGS);
481539Srgrimesstatic	void	  post_vs(CHKARGS);
49102227Smikestatic	void	  post_fi(CHKARGS);
50102227Smikestatic	void	  post_ft(CHKARGS);
51102227Smikestatic	void	  post_nf(CHKARGS);
5241882Sbdestatic	void	  post_OP(CHKARGS);
5341882Sbdestatic	void	  post_TH(CHKARGS);
541539Srgrimesstatic	void	  post_UC(CHKARGS);
551539Srgrimesstatic	void	  post_UR(CHKARGS);
561539Srgrimes
5741882Sbdestatic	v_check man_valids[MAN_MAX] = {
5841882Sbde	post_vs,    /* br */
5943282Sbde	post_TH,    /* TH */
6043085Sdillon	NULL,       /* SH */
6143085Sdillon	NULL,       /* SS */
6243085Sdillon	NULL,       /* TP */
6343085Sdillon	check_par,  /* LP */
6443085Sdillon	check_par,  /* PP */
6543085Sdillon	check_par,  /* P */
6643282Sbde	post_IP,    /* IP */
6743085Sdillon	NULL,       /* HP */
6843085Sdillon	NULL,       /* SM */
6943085Sdillon	NULL,       /* SB */
7041882Sbde	NULL,       /* BI */
7192917Sobrien	NULL,       /* IB */
7292917Sobrien	NULL,       /* BR */
73181880Sjhb	NULL,       /* RB */
7492917Sobrien	NULL,       /* R */
7592917Sobrien	NULL,       /* B */
7692917Sobrien	NULL,       /* I */
7792917Sobrien	NULL,       /* IR */
78181876Sjhb	NULL,       /* RI */
79181876Sjhb	post_vs,    /* sp */
801539Srgrimes	post_nf,    /* nf */
8192917Sobrien	post_fi,    /* fi */
8292917Sobrien	NULL,       /* RE */
8392917Sobrien	check_part, /* RS */
841539Srgrimes	NULL,       /* DT */
85121060Sbde	post_UC,    /* UC */
861539Srgrimes	NULL,       /* PD */
87121060Sbde	post_AT,    /* AT */
8892917Sobrien	NULL,       /* in */
8941882Sbde	post_ft,    /* ft */
90121060Sbde	post_OP,    /* OP */
9192917Sobrien	post_nf,    /* EX */
921539Srgrimes	post_fi,    /* EE */
931539Srgrimes	post_UR,    /* UR */
941539Srgrimes	NULL,       /* UE */
95	NULL,       /* ll */
96};
97
98
99void
100man_valid_post(struct man *man)
101{
102	struct man_node	*n;
103	v_check		*cp;
104
105	n = man->last;
106	if (n->flags & MAN_VALID)
107		return;
108	n->flags |= MAN_VALID;
109
110	switch (n->type) {
111	case MAN_TEXT:
112		check_text(man, n);
113		break;
114	case MAN_ROOT:
115		check_root(man, n);
116		break;
117	case MAN_EQN:
118		/* FALLTHROUGH */
119	case MAN_TBL:
120		break;
121	default:
122		cp = man_valids + n->tok;
123		if (*cp)
124			(*cp)(man, n);
125		break;
126	}
127}
128
129static void
130check_root(CHKARGS)
131{
132
133	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
134
135	if (NULL == man->first->child)
136		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
137		    n->line, n->pos, NULL);
138	else
139		man->meta.hasbody = 1;
140
141	if (NULL == man->meta.title) {
142		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
143		    n->line, n->pos, NULL);
144
145		/*
146		 * If a title hasn't been set, do so now (by
147		 * implication, date and section also aren't set).
148		 */
149
150		man->meta.title = mandoc_strdup("");
151		man->meta.msec = mandoc_strdup("");
152		man->meta.date = man->quick ? mandoc_strdup("") :
153		    mandoc_normdate(man->parse, NULL, n->line, n->pos);
154	}
155}
156
157static void
158check_text(CHKARGS)
159{
160	char		*cp, *p;
161
162	if (MAN_LITERAL & man->flags)
163		return;
164
165	cp = n->string;
166	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
167		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
168		    n->line, n->pos + (p - cp), NULL);
169}
170
171static void
172post_OP(CHKARGS)
173{
174
175	if (n->nchild == 0)
176		mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
177		    n->line, n->pos, "OP");
178	else if (n->nchild > 2) {
179		n = n->child->next->next;
180		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
181		    n->line, n->pos, "OP ... %s", n->string);
182	}
183}
184
185static void
186post_UR(CHKARGS)
187{
188
189	if (n->type == MAN_HEAD && n->child == NULL)
190		mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse,
191		    n->line, n->pos, "UR");
192	check_part(man, n);
193}
194
195static void
196post_ft(CHKARGS)
197{
198	char	*cp;
199	int	 ok;
200
201	if (0 == n->nchild)
202		return;
203
204	ok = 0;
205	cp = n->child->string;
206	switch (*cp) {
207	case '1':
208		/* FALLTHROUGH */
209	case '2':
210		/* FALLTHROUGH */
211	case '3':
212		/* FALLTHROUGH */
213	case '4':
214		/* FALLTHROUGH */
215	case 'I':
216		/* FALLTHROUGH */
217	case 'P':
218		/* FALLTHROUGH */
219	case 'R':
220		if ('\0' == cp[1])
221			ok = 1;
222		break;
223	case 'B':
224		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
225			ok = 1;
226		break;
227	case 'C':
228		if ('W' == cp[1] && '\0' == cp[2])
229			ok = 1;
230		break;
231	default:
232		break;
233	}
234
235	if (0 == ok) {
236		mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
237		    n->line, n->pos, "ft %s", cp);
238		*cp = '\0';
239	}
240}
241
242static void
243check_part(CHKARGS)
244{
245
246	if (n->type == MAN_BODY && n->child == NULL)
247		mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
248		    n->line, n->pos, man_macronames[n->tok]);
249}
250
251static void
252check_par(CHKARGS)
253{
254
255	switch (n->type) {
256	case MAN_BLOCK:
257		if (0 == n->body->nchild)
258			man_node_delete(man, n);
259		break;
260	case MAN_BODY:
261		if (0 == n->nchild)
262			mandoc_vmsg(MANDOCERR_PAR_SKIP,
263			    man->parse, n->line, n->pos,
264			    "%s empty", man_macronames[n->tok]);
265		break;
266	case MAN_HEAD:
267		if (n->nchild)
268			mandoc_vmsg(MANDOCERR_ARG_SKIP,
269			    man->parse, n->line, n->pos,
270			    "%s %s%s", man_macronames[n->tok],
271			    n->child->string,
272			    n->nchild > 1 ? " ..." : "");
273		break;
274	default:
275		break;
276	}
277}
278
279static void
280post_IP(CHKARGS)
281{
282
283	switch (n->type) {
284	case MAN_BLOCK:
285		if (0 == n->head->nchild && 0 == n->body->nchild)
286			man_node_delete(man, n);
287		break;
288	case MAN_BODY:
289		if (0 == n->parent->head->nchild && 0 == n->nchild)
290			mandoc_vmsg(MANDOCERR_PAR_SKIP,
291			    man->parse, n->line, n->pos,
292			    "%s empty", man_macronames[n->tok]);
293		break;
294	default:
295		break;
296	}
297}
298
299static void
300post_TH(CHKARGS)
301{
302	struct man_node	*nb;
303	const char	*p;
304
305	free(man->meta.title);
306	free(man->meta.vol);
307	free(man->meta.source);
308	free(man->meta.msec);
309	free(man->meta.date);
310
311	man->meta.title = man->meta.vol = man->meta.date =
312	    man->meta.msec = man->meta.source = NULL;
313
314	nb = n;
315
316	/* ->TITLE<- MSEC DATE SOURCE VOL */
317
318	n = n->child;
319	if (n && n->string) {
320		for (p = n->string; '\0' != *p; p++) {
321			/* Only warn about this once... */
322			if (isalpha((unsigned char)*p) &&
323			    ! isupper((unsigned char)*p)) {
324				mandoc_vmsg(MANDOCERR_TITLE_CASE,
325				    man->parse, n->line,
326				    n->pos + (p - n->string),
327				    "TH %s", n->string);
328				break;
329			}
330		}
331		man->meta.title = mandoc_strdup(n->string);
332	} else {
333		man->meta.title = mandoc_strdup("");
334		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
335		    nb->line, nb->pos, "TH");
336	}
337
338	/* TITLE ->MSEC<- DATE SOURCE VOL */
339
340	if (n)
341		n = n->next;
342	if (n && n->string)
343		man->meta.msec = mandoc_strdup(n->string);
344	else {
345		man->meta.msec = mandoc_strdup("");
346		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
347		    nb->line, nb->pos, "TH %s", man->meta.title);
348	}
349
350	/* TITLE MSEC ->DATE<- SOURCE VOL */
351
352	if (n)
353		n = n->next;
354	if (n && n->string && '\0' != n->string[0]) {
355		man->meta.date = man->quick ?
356		    mandoc_strdup(n->string) :
357		    mandoc_normdate(man->parse, n->string,
358			n->line, n->pos);
359	} else {
360		man->meta.date = mandoc_strdup("");
361		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
362		    n ? n->line : nb->line,
363		    n ? n->pos : nb->pos, "TH");
364	}
365
366	/* TITLE MSEC DATE ->SOURCE<- VOL */
367
368	if (n && (n = n->next))
369		man->meta.source = mandoc_strdup(n->string);
370	else if (man->defos != NULL)
371		man->meta.source = mandoc_strdup(man->defos);
372
373	/* TITLE MSEC DATE SOURCE ->VOL<- */
374	/* If missing, use the default VOL name for MSEC. */
375
376	if (n && (n = n->next))
377		man->meta.vol = mandoc_strdup(n->string);
378	else if ('\0' != man->meta.msec[0] &&
379	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
380		man->meta.vol = mandoc_strdup(p);
381
382	if (n != NULL && (n = n->next) != NULL)
383		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
384		    n->line, n->pos, "TH ... %s", n->string);
385
386	/*
387	 * Remove the `TH' node after we've processed it for our
388	 * meta-data.
389	 */
390	man_node_delete(man, man->last);
391}
392
393static void
394post_nf(CHKARGS)
395{
396
397	if (man->flags & MAN_LITERAL)
398		mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
399		    n->line, n->pos, "nf");
400
401	man->flags |= MAN_LITERAL;
402}
403
404static void
405post_fi(CHKARGS)
406{
407
408	if ( ! (MAN_LITERAL & man->flags))
409		mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
410		    n->line, n->pos, "fi");
411
412	man->flags &= ~MAN_LITERAL;
413}
414
415static void
416post_UC(CHKARGS)
417{
418	static const char * const bsd_versions[] = {
419	    "3rd Berkeley Distribution",
420	    "4th Berkeley Distribution",
421	    "4.2 Berkeley Distribution",
422	    "4.3 Berkeley Distribution",
423	    "4.4 Berkeley Distribution",
424	};
425
426	const char	*p, *s;
427
428	n = n->child;
429
430	if (NULL == n || MAN_TEXT != n->type)
431		p = bsd_versions[0];
432	else {
433		s = n->string;
434		if (0 == strcmp(s, "3"))
435			p = bsd_versions[0];
436		else if (0 == strcmp(s, "4"))
437			p = bsd_versions[1];
438		else if (0 == strcmp(s, "5"))
439			p = bsd_versions[2];
440		else if (0 == strcmp(s, "6"))
441			p = bsd_versions[3];
442		else if (0 == strcmp(s, "7"))
443			p = bsd_versions[4];
444		else
445			p = bsd_versions[0];
446	}
447
448	free(man->meta.source);
449	man->meta.source = mandoc_strdup(p);
450}
451
452static void
453post_AT(CHKARGS)
454{
455	static const char * const unix_versions[] = {
456	    "7th Edition",
457	    "System III",
458	    "System V",
459	    "System V Release 2",
460	};
461
462	const char	*p, *s;
463	struct man_node	*nn;
464
465	n = n->child;
466
467	if (NULL == n || MAN_TEXT != n->type)
468		p = unix_versions[0];
469	else {
470		s = n->string;
471		if (0 == strcmp(s, "3"))
472			p = unix_versions[0];
473		else if (0 == strcmp(s, "4"))
474			p = unix_versions[1];
475		else if (0 == strcmp(s, "5")) {
476			nn = n->next;
477			if (nn && MAN_TEXT == nn->type && nn->string[0])
478				p = unix_versions[3];
479			else
480				p = unix_versions[2];
481		} else
482			p = unix_versions[0];
483	}
484
485	free(man->meta.source);
486	man->meta.source = mandoc_strdup(p);
487}
488
489static void
490post_vs(CHKARGS)
491{
492
493	if (NULL != n->prev)
494		return;
495
496	switch (n->parent->tok) {
497	case MAN_SH:
498		/* FALLTHROUGH */
499	case MAN_SS:
500		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
501		    "%s after %s", man_macronames[n->tok],
502		    man_macronames[n->parent->tok]);
503		/* FALLTHROUGH */
504	case MAN_MAX:
505		/*
506		 * Don't warn about this because it occurs in pod2man
507		 * and would cause considerable (unfixable) warnage.
508		 */
509		man_node_delete(man, n);
510		break;
511	default:
512		break;
513	}
514}
515