cgi.c revision 294113
1/*	$Id: cgi.c,v 1.116 2016/01/04 12:36:26 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21#include <sys/time.h>
22
23#include <ctype.h>
24#include <errno.h>
25#include <fcntl.h>
26#include <limits.h>
27#include <stdint.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32
33#include "mandoc_aux.h"
34#include "mandoc.h"
35#include "roff.h"
36#include "mdoc.h"
37#include "man.h"
38#include "main.h"
39#include "manconf.h"
40#include "mansearch.h"
41#include "cgi.h"
42
43/*
44 * A query as passed to the search function.
45 */
46struct	query {
47	char		*manpath; /* desired manual directory */
48	char		*arch; /* architecture */
49	char		*sec; /* manual section */
50	char		*query; /* unparsed query expression */
51	int		 equal; /* match whole names, not substrings */
52};
53
54struct	req {
55	struct query	  q;
56	char		**p; /* array of available manpaths */
57	size_t		  psz; /* number of available manpaths */
58};
59
60static	void		 catman(const struct req *, const char *);
61static	void		 format(const struct req *, const char *);
62static	void		 html_print(const char *);
63static	void		 html_putchar(char);
64static	int		 http_decode(char *);
65static	void		 http_parse(struct req *, const char *);
66static	void		 pathgen(struct req *);
67static	void		 pg_error_badrequest(const char *);
68static	void		 pg_error_internal(void);
69static	void		 pg_index(const struct req *);
70static	void		 pg_noresult(const struct req *, const char *);
71static	void		 pg_search(const struct req *);
72static	void		 pg_searchres(const struct req *,
73				struct manpage *, size_t);
74static	void		 pg_show(struct req *, const char *);
75static	void		 resp_begin_html(int, const char *);
76static	void		 resp_begin_http(int, const char *);
77static	void		 resp_copy(const char *);
78static	void		 resp_end_html(void);
79static	void		 resp_searchform(const struct req *);
80static	void		 resp_show(const struct req *, const char *);
81static	void		 set_query_attr(char **, char **);
82static	int		 validate_filename(const char *);
83static	int		 validate_manpath(const struct req *, const char *);
84static	int		 validate_urifrag(const char *);
85
86static	const char	 *scriptname; /* CGI script name */
87
88static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
89static	const char *const sec_numbers[] = {
90    "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
91};
92static	const char *const sec_names[] = {
93    "All Sections",
94    "1 - General Commands",
95    "2 - System Calls",
96    "3 - Library Functions",
97    "3p - Perl Library",
98    "4 - Device Drivers",
99    "5 - File Formats",
100    "6 - Games",
101    "7 - Miscellaneous Information",
102    "8 - System Manager\'s Manual",
103    "9 - Kernel Developer\'s Manual"
104};
105static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
106
107static	const char *const arch_names[] = {
108    "amd64",       "alpha",       "armish",      "armv7",
109    "aviion",      "hppa",        "hppa64",      "i386",
110    "ia64",        "landisk",     "loongson",    "luna88k",
111    "macppc",      "mips64",      "octeon",      "sgi",
112    "socppc",      "solbourne",   "sparc",       "sparc64",
113    "vax",         "zaurus",
114    "amiga",       "arc",         "arm32",       "atari",
115    "beagle",      "cats",        "hp300",       "mac68k",
116    "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
117    "pc532",       "pegasos",     "pmax",        "powerpc",
118    "sun3",        "wgrisc",      "x68k"
119};
120static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
121
122/*
123 * Print a character, escaping HTML along the way.
124 * This will pass non-ASCII straight to output: be warned!
125 */
126static void
127html_putchar(char c)
128{
129
130	switch (c) {
131	case ('"'):
132		printf("&quote;");
133		break;
134	case ('&'):
135		printf("&amp;");
136		break;
137	case ('>'):
138		printf("&gt;");
139		break;
140	case ('<'):
141		printf("&lt;");
142		break;
143	default:
144		putchar((unsigned char)c);
145		break;
146	}
147}
148
149/*
150 * Call through to html_putchar().
151 * Accepts NULL strings.
152 */
153static void
154html_print(const char *p)
155{
156
157	if (NULL == p)
158		return;
159	while ('\0' != *p)
160		html_putchar(*p++);
161}
162
163/*
164 * Transfer the responsibility for the allocated string *val
165 * to the query structure.
166 */
167static void
168set_query_attr(char **attr, char **val)
169{
170
171	free(*attr);
172	if (**val == '\0') {
173		*attr = NULL;
174		free(*val);
175	} else
176		*attr = *val;
177	*val = NULL;
178}
179
180/*
181 * Parse the QUERY_STRING for key-value pairs
182 * and store the values into the query structure.
183 */
184static void
185http_parse(struct req *req, const char *qs)
186{
187	char		*key, *val;
188	size_t		 keysz, valsz;
189
190	req->q.manpath	= NULL;
191	req->q.arch	= NULL;
192	req->q.sec	= NULL;
193	req->q.query	= NULL;
194	req->q.equal	= 1;
195
196	key = val = NULL;
197	while (*qs != '\0') {
198
199		/* Parse one key. */
200
201		keysz = strcspn(qs, "=;&");
202		key = mandoc_strndup(qs, keysz);
203		qs += keysz;
204		if (*qs != '=')
205			goto next;
206
207		/* Parse one value. */
208
209		valsz = strcspn(++qs, ";&");
210		val = mandoc_strndup(qs, valsz);
211		qs += valsz;
212
213		/* Decode and catch encoding errors. */
214
215		if ( ! (http_decode(key) && http_decode(val)))
216			goto next;
217
218		/* Handle key-value pairs. */
219
220		if ( ! strcmp(key, "query"))
221			set_query_attr(&req->q.query, &val);
222
223		else if ( ! strcmp(key, "apropos"))
224			req->q.equal = !strcmp(val, "0");
225
226		else if ( ! strcmp(key, "manpath")) {
227#ifdef COMPAT_OLDURI
228			if ( ! strncmp(val, "OpenBSD ", 8)) {
229				val[7] = '-';
230				if ('C' == val[8])
231					val[8] = 'c';
232			}
233#endif
234			set_query_attr(&req->q.manpath, &val);
235		}
236
237		else if ( ! (strcmp(key, "sec")
238#ifdef COMPAT_OLDURI
239		    && strcmp(key, "sektion")
240#endif
241		    )) {
242			if ( ! strcmp(val, "0"))
243				*val = '\0';
244			set_query_attr(&req->q.sec, &val);
245		}
246
247		else if ( ! strcmp(key, "arch")) {
248			if ( ! strcmp(val, "default"))
249				*val = '\0';
250			set_query_attr(&req->q.arch, &val);
251		}
252
253		/*
254		 * The key must be freed in any case.
255		 * The val may have been handed over to the query
256		 * structure, in which case it is now NULL.
257		 */
258next:
259		free(key);
260		key = NULL;
261		free(val);
262		val = NULL;
263
264		if (*qs != '\0')
265			qs++;
266	}
267}
268
269/*
270 * HTTP-decode a string.  The standard explanation is that this turns
271 * "%4e+foo" into "n foo" in the regular way.  This is done in-place
272 * over the allocated string.
273 */
274static int
275http_decode(char *p)
276{
277	char             hex[3];
278	char		*q;
279	int              c;
280
281	hex[2] = '\0';
282
283	q = p;
284	for ( ; '\0' != *p; p++, q++) {
285		if ('%' == *p) {
286			if ('\0' == (hex[0] = *(p + 1)))
287				return 0;
288			if ('\0' == (hex[1] = *(p + 2)))
289				return 0;
290			if (1 != sscanf(hex, "%x", &c))
291				return 0;
292			if ('\0' == c)
293				return 0;
294
295			*q = (char)c;
296			p += 2;
297		} else
298			*q = '+' == *p ? ' ' : *p;
299	}
300
301	*q = '\0';
302	return 1;
303}
304
305static void
306resp_begin_http(int code, const char *msg)
307{
308
309	if (200 != code)
310		printf("Status: %d %s\r\n", code, msg);
311
312	printf("Content-Type: text/html; charset=utf-8\r\n"
313	     "Cache-Control: no-cache\r\n"
314	     "Pragma: no-cache\r\n"
315	     "\r\n");
316
317	fflush(stdout);
318}
319
320static void
321resp_copy(const char *filename)
322{
323	char	 buf[4096];
324	ssize_t	 sz;
325	int	 fd;
326
327	if ((fd = open(filename, O_RDONLY)) != -1) {
328		fflush(stdout);
329		while ((sz = read(fd, buf, sizeof(buf))) > 0)
330			write(STDOUT_FILENO, buf, sz);
331	}
332}
333
334static void
335resp_begin_html(int code, const char *msg)
336{
337
338	resp_begin_http(code, msg);
339
340	printf("<!DOCTYPE html>\n"
341	       "<HTML>\n"
342	       "<HEAD>\n"
343	       "<META CHARSET=\"UTF-8\" />\n"
344	       "<LINK REL=\"stylesheet\" HREF=\"%s/mandoc.css\""
345	       " TYPE=\"text/css\" media=\"all\">\n"
346	       "<TITLE>%s</TITLE>\n"
347	       "</HEAD>\n"
348	       "<BODY>\n"
349	       "<!-- Begin page content. //-->\n",
350	       CSS_DIR, CUSTOMIZE_TITLE);
351
352	resp_copy(MAN_DIR "/header.html");
353}
354
355static void
356resp_end_html(void)
357{
358
359	resp_copy(MAN_DIR "/footer.html");
360
361	puts("</BODY>\n"
362	     "</HTML>");
363}
364
365static void
366resp_searchform(const struct req *req)
367{
368	int		 i;
369
370	puts("<!-- Begin search form. //-->");
371	printf("<DIV ID=\"mancgi\">\n"
372	       "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
373	       "<FIELDSET>\n"
374	       "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
375	       scriptname);
376
377	/* Write query input box. */
378
379	printf(	"<TABLE><TR><TD>\n"
380		"<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
381	if (NULL != req->q.query)
382		html_print(req->q.query);
383	puts("\" SIZE=\"40\">");
384
385	/* Write submission and reset buttons. */
386
387	printf(	"<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
388		"<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
389
390	/* Write show radio button */
391
392	printf(	"</TD><TD>\n"
393		"<INPUT TYPE=\"radio\" ");
394	if (req->q.equal)
395		printf("CHECKED=\"checked\" ");
396	printf(	"NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
397		"<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
398
399	/* Write section selector. */
400
401	puts(	"</TD></TR><TR><TD>\n"
402		"<SELECT NAME=\"sec\">");
403	for (i = 0; i < sec_MAX; i++) {
404		printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
405		if (NULL != req->q.sec &&
406		    0 == strcmp(sec_numbers[i], req->q.sec))
407			printf(" SELECTED=\"selected\"");
408		printf(">%s</OPTION>\n", sec_names[i]);
409	}
410	puts("</SELECT>");
411
412	/* Write architecture selector. */
413
414	printf(	"<SELECT NAME=\"arch\">\n"
415		"<OPTION VALUE=\"default\"");
416	if (NULL == req->q.arch)
417		printf(" SELECTED=\"selected\"");
418	puts(">All Architectures</OPTION>");
419	for (i = 0; i < arch_MAX; i++) {
420		printf("<OPTION VALUE=\"%s\"", arch_names[i]);
421		if (NULL != req->q.arch &&
422		    0 == strcmp(arch_names[i], req->q.arch))
423			printf(" SELECTED=\"selected\"");
424		printf(">%s</OPTION>\n", arch_names[i]);
425	}
426	puts("</SELECT>");
427
428	/* Write manpath selector. */
429
430	if (req->psz > 1) {
431		puts("<SELECT NAME=\"manpath\">");
432		for (i = 0; i < (int)req->psz; i++) {
433			printf("<OPTION ");
434			if (strcmp(req->q.manpath, req->p[i]) == 0)
435				printf("SELECTED=\"selected\" ");
436			printf("VALUE=\"");
437			html_print(req->p[i]);
438			printf("\">");
439			html_print(req->p[i]);
440			puts("</OPTION>");
441		}
442		puts("</SELECT>");
443	}
444
445	/* Write search radio button */
446
447	printf(	"</TD><TD>\n"
448		"<INPUT TYPE=\"radio\" ");
449	if (0 == req->q.equal)
450		printf("CHECKED=\"checked\" ");
451	printf(	"NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
452		"<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
453
454	puts("</TD></TR></TABLE>\n"
455	     "</FIELDSET>\n"
456	     "</FORM>\n"
457	     "</DIV>");
458	puts("<!-- End search form. //-->");
459}
460
461static int
462validate_urifrag(const char *frag)
463{
464
465	while ('\0' != *frag) {
466		if ( ! (isalnum((unsigned char)*frag) ||
467		    '-' == *frag || '.' == *frag ||
468		    '/' == *frag || '_' == *frag))
469			return 0;
470		frag++;
471	}
472	return 1;
473}
474
475static int
476validate_manpath(const struct req *req, const char* manpath)
477{
478	size_t	 i;
479
480	if ( ! strcmp(manpath, "mandoc"))
481		return 1;
482
483	for (i = 0; i < req->psz; i++)
484		if ( ! strcmp(manpath, req->p[i]))
485			return 1;
486
487	return 0;
488}
489
490static int
491validate_filename(const char *file)
492{
493
494	if ('.' == file[0] && '/' == file[1])
495		file += 2;
496
497	return ! (strstr(file, "../") || strstr(file, "/..") ||
498	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
499}
500
501static void
502pg_index(const struct req *req)
503{
504
505	resp_begin_html(200, NULL);
506	resp_searchform(req);
507	printf("<P>\n"
508	       "This web interface is documented in the\n"
509	       "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
510	       "manual, and the\n"
511	       "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
512	       "manual explains the query syntax.\n"
513	       "</P>\n",
514	       scriptname, scriptname);
515	resp_end_html();
516}
517
518static void
519pg_noresult(const struct req *req, const char *msg)
520{
521	resp_begin_html(200, NULL);
522	resp_searchform(req);
523	puts("<P>");
524	puts(msg);
525	puts("</P>");
526	resp_end_html();
527}
528
529static void
530pg_error_badrequest(const char *msg)
531{
532
533	resp_begin_html(400, "Bad Request");
534	puts("<H1>Bad Request</H1>\n"
535	     "<P>\n");
536	puts(msg);
537	printf("Try again from the\n"
538	       "<A HREF=\"%s\">main page</A>.\n"
539	       "</P>", scriptname);
540	resp_end_html();
541}
542
543static void
544pg_error_internal(void)
545{
546	resp_begin_html(500, "Internal Server Error");
547	puts("<P>Internal Server Error</P>");
548	resp_end_html();
549}
550
551static void
552pg_searchres(const struct req *req, struct manpage *r, size_t sz)
553{
554	char		*arch, *archend;
555	size_t		 i, iuse, isec;
556	int		 archprio, archpriouse;
557	int		 prio, priouse;
558	char		 sec;
559
560	for (i = 0; i < sz; i++) {
561		if (validate_filename(r[i].file))
562			continue;
563		fprintf(stderr, "invalid filename %s in %s database\n",
564		    r[i].file, req->q.manpath);
565		pg_error_internal();
566		return;
567	}
568
569	if (1 == sz) {
570		/*
571		 * If we have just one result, then jump there now
572		 * without any delay.
573		 */
574		printf("Status: 303 See Other\r\n");
575		printf("Location: http://%s%s/%s/%s",
576		    HTTP_HOST, scriptname, req->q.manpath, r[0].file);
577		printf("\r\n"
578		     "Content-Type: text/html; charset=utf-8\r\n"
579		     "\r\n");
580		return;
581	}
582
583	resp_begin_html(200, NULL);
584	resp_searchform(req);
585	puts("<DIV CLASS=\"results\">");
586	puts("<TABLE>");
587
588	for (i = 0; i < sz; i++) {
589		printf("<TR>\n"
590		       "<TD CLASS=\"title\">\n"
591		       "<A HREF=\"%s/%s/%s",
592		    scriptname, req->q.manpath, r[i].file);
593		printf("\">");
594		html_print(r[i].names);
595		printf("</A>\n"
596		       "</TD>\n"
597		       "<TD CLASS=\"desc\">");
598		html_print(r[i].output);
599		puts("</TD>\n"
600		     "</TR>");
601	}
602
603	puts("</TABLE>\n"
604	     "</DIV>");
605
606	/*
607	 * In man(1) mode, show one of the pages
608	 * even if more than one is found.
609	 */
610
611	if (req->q.equal) {
612		puts("<HR>");
613		iuse = 0;
614		priouse = 10;
615		archpriouse = 3;
616		for (i = 0; i < sz; i++) {
617			isec = strcspn(r[i].file, "123456789");
618			sec = r[i].file[isec];
619			if ('\0' == sec)
620				continue;
621			prio = sec_prios[sec - '1'];
622			if (NULL == req->q.arch) {
623				archprio =
624				    (NULL == (arch = strchr(
625					r[i].file + isec, '/'))) ? 3 :
626				    (NULL == (archend = strchr(
627					arch + 1, '/'))) ? 0 :
628				    strncmp(arch, "amd64/",
629					archend - arch) ? 2 : 1;
630				if (archprio < archpriouse) {
631					archpriouse = archprio;
632					priouse = prio;
633					iuse = i;
634					continue;
635				}
636				if (archprio > archpriouse)
637					continue;
638			}
639			if (prio >= priouse)
640				continue;
641			priouse = prio;
642			iuse = i;
643		}
644		resp_show(req, r[iuse].file);
645	}
646
647	resp_end_html();
648}
649
650static void
651catman(const struct req *req, const char *file)
652{
653	FILE		*f;
654	char		*p;
655	size_t		 sz;
656	ssize_t		 len;
657	int		 i;
658	int		 italic, bold;
659
660	if ((f = fopen(file, "r")) == NULL) {
661		puts("<P>You specified an invalid manual file.</P>");
662		return;
663	}
664
665	puts("<DIV CLASS=\"catman\">\n"
666	     "<PRE>");
667
668	p = NULL;
669	sz = 0;
670
671	while ((len = getline(&p, &sz, f)) != -1) {
672		bold = italic = 0;
673		for (i = 0; i < len - 1; i++) {
674			/*
675			 * This means that the catpage is out of state.
676			 * Ignore it and keep going (although the
677			 * catpage is bogus).
678			 */
679
680			if ('\b' == p[i] || '\n' == p[i])
681				continue;
682
683			/*
684			 * Print a regular character.
685			 * Close out any bold/italic scopes.
686			 * If we're in back-space mode, make sure we'll
687			 * have something to enter when we backspace.
688			 */
689
690			if ('\b' != p[i + 1]) {
691				if (italic)
692					printf("</I>");
693				if (bold)
694					printf("</B>");
695				italic = bold = 0;
696				html_putchar(p[i]);
697				continue;
698			} else if (i + 2 >= len)
699				continue;
700
701			/* Italic mode. */
702
703			if ('_' == p[i]) {
704				if (bold)
705					printf("</B>");
706				if ( ! italic)
707					printf("<I>");
708				bold = 0;
709				italic = 1;
710				i += 2;
711				html_putchar(p[i]);
712				continue;
713			}
714
715			/*
716			 * Handle funny behaviour troff-isms.
717			 * These grok'd from the original man2html.c.
718			 */
719
720			if (('+' == p[i] && 'o' == p[i + 2]) ||
721					('o' == p[i] && '+' == p[i + 2]) ||
722					('|' == p[i] && '=' == p[i + 2]) ||
723					('=' == p[i] && '|' == p[i + 2]) ||
724					('*' == p[i] && '=' == p[i + 2]) ||
725					('=' == p[i] && '*' == p[i + 2]) ||
726					('*' == p[i] && '|' == p[i + 2]) ||
727					('|' == p[i] && '*' == p[i + 2]))  {
728				if (italic)
729					printf("</I>");
730				if (bold)
731					printf("</B>");
732				italic = bold = 0;
733				putchar('*');
734				i += 2;
735				continue;
736			} else if (('|' == p[i] && '-' == p[i + 2]) ||
737					('-' == p[i] && '|' == p[i + 1]) ||
738					('+' == p[i] && '-' == p[i + 1]) ||
739					('-' == p[i] && '+' == p[i + 1]) ||
740					('+' == p[i] && '|' == p[i + 1]) ||
741					('|' == p[i] && '+' == p[i + 1]))  {
742				if (italic)
743					printf("</I>");
744				if (bold)
745					printf("</B>");
746				italic = bold = 0;
747				putchar('+');
748				i += 2;
749				continue;
750			}
751
752			/* Bold mode. */
753
754			if (italic)
755				printf("</I>");
756			if ( ! bold)
757				printf("<B>");
758			bold = 1;
759			italic = 0;
760			i += 2;
761			html_putchar(p[i]);
762		}
763
764		/*
765		 * Clean up the last character.
766		 * We can get to a newline; don't print that.
767		 */
768
769		if (italic)
770			printf("</I>");
771		if (bold)
772			printf("</B>");
773
774		if (i == len - 1 && p[i] != '\n')
775			html_putchar(p[i]);
776
777		putchar('\n');
778	}
779	free(p);
780
781	puts("</PRE>\n"
782	     "</DIV>");
783
784	fclose(f);
785}
786
787static void
788format(const struct req *req, const char *file)
789{
790	struct manoutput conf;
791	struct mparse	*mp;
792	struct roff_man	*man;
793	void		*vp;
794	int		 fd;
795	int		 usepath;
796
797	if (-1 == (fd = open(file, O_RDONLY, 0))) {
798		puts("<P>You specified an invalid manual file.</P>");
799		return;
800	}
801
802	mchars_alloc();
803	mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
804	mparse_readfd(mp, fd, file);
805	close(fd);
806
807	memset(&conf, 0, sizeof(conf));
808	conf.fragment = 1;
809	usepath = strcmp(req->q.manpath, req->p[0]);
810	mandoc_asprintf(&conf.man, "%s?query=%%N&sec=%%S%s%s%s%s",
811	    scriptname,
812	    req->q.arch	? "&arch="       : "",
813	    req->q.arch	? req->q.arch    : "",
814	    usepath	? "&manpath="    : "",
815	    usepath	? req->q.manpath : "");
816
817	mparse_result(mp, &man, NULL);
818	if (man == NULL) {
819		fprintf(stderr, "fatal mandoc error: %s/%s\n",
820		    req->q.manpath, file);
821		pg_error_internal();
822		mparse_free(mp);
823		mchars_free();
824		return;
825	}
826
827	vp = html_alloc(&conf);
828
829	if (man->macroset == MACROSET_MDOC) {
830		mdoc_validate(man);
831		html_mdoc(vp, man);
832	} else {
833		man_validate(man);
834		html_man(vp, man);
835	}
836
837	html_free(vp);
838	mparse_free(mp);
839	mchars_free();
840	free(conf.man);
841}
842
843static void
844resp_show(const struct req *req, const char *file)
845{
846
847	if ('.' == file[0] && '/' == file[1])
848		file += 2;
849
850	if ('c' == *file)
851		catman(req, file);
852	else
853		format(req, file);
854}
855
856static void
857pg_show(struct req *req, const char *fullpath)
858{
859	char		*manpath;
860	const char	*file;
861
862	if ((file = strchr(fullpath, '/')) == NULL) {
863		pg_error_badrequest(
864		    "You did not specify a page to show.");
865		return;
866	}
867	manpath = mandoc_strndup(fullpath, file - fullpath);
868	file++;
869
870	if ( ! validate_manpath(req, manpath)) {
871		pg_error_badrequest(
872		    "You specified an invalid manpath.");
873		free(manpath);
874		return;
875	}
876
877	/*
878	 * Begin by chdir()ing into the manpath.
879	 * This way we can pick up the database files, which are
880	 * relative to the manpath root.
881	 */
882
883	if (chdir(manpath) == -1) {
884		fprintf(stderr, "chdir %s: %s\n",
885		    manpath, strerror(errno));
886		pg_error_internal();
887		free(manpath);
888		return;
889	}
890
891	if (strcmp(manpath, "mandoc")) {
892		free(req->q.manpath);
893		req->q.manpath = manpath;
894	} else
895		free(manpath);
896
897	if ( ! validate_filename(file)) {
898		pg_error_badrequest(
899		    "You specified an invalid manual file.");
900		return;
901	}
902
903	resp_begin_html(200, NULL);
904	resp_searchform(req);
905	resp_show(req, file);
906	resp_end_html();
907}
908
909static void
910pg_search(const struct req *req)
911{
912	struct mansearch	  search;
913	struct manpaths		  paths;
914	struct manpage		 *res;
915	char			**argv;
916	char			 *query, *rp, *wp;
917	size_t			  ressz;
918	int			  argc;
919
920	/*
921	 * Begin by chdir()ing into the root of the manpath.
922	 * This way we can pick up the database files, which are
923	 * relative to the manpath root.
924	 */
925
926	if (-1 == (chdir(req->q.manpath))) {
927		fprintf(stderr, "chdir %s: %s\n",
928		    req->q.manpath, strerror(errno));
929		pg_error_internal();
930		return;
931	}
932
933	search.arch = req->q.arch;
934	search.sec = req->q.sec;
935	search.outkey = "Nd";
936	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
937	search.firstmatch = 1;
938
939	paths.sz = 1;
940	paths.paths = mandoc_malloc(sizeof(char *));
941	paths.paths[0] = mandoc_strdup(".");
942
943	/*
944	 * Break apart at spaces with backslash-escaping.
945	 */
946
947	argc = 0;
948	argv = NULL;
949	rp = query = mandoc_strdup(req->q.query);
950	for (;;) {
951		while (isspace((unsigned char)*rp))
952			rp++;
953		if (*rp == '\0')
954			break;
955		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
956		argv[argc++] = wp = rp;
957		for (;;) {
958			if (isspace((unsigned char)*rp)) {
959				*wp = '\0';
960				rp++;
961				break;
962			}
963			if (rp[0] == '\\' && rp[1] != '\0')
964				rp++;
965			if (wp != rp)
966				*wp = *rp;
967			if (*rp == '\0')
968				break;
969			wp++;
970			rp++;
971		}
972	}
973
974	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
975		pg_noresult(req, "You entered an invalid query.");
976	else if (0 == ressz)
977		pg_noresult(req, "No results found.");
978	else
979		pg_searchres(req, res, ressz);
980
981	free(query);
982	mansearch_free(res, ressz);
983	free(paths.paths[0]);
984	free(paths.paths);
985}
986
987int
988main(void)
989{
990	struct req	 req;
991	struct itimerval itimer;
992	const char	*path;
993	const char	*querystring;
994	int		 i;
995
996	/* Poor man's ReDoS mitigation. */
997
998	itimer.it_value.tv_sec = 2;
999	itimer.it_value.tv_usec = 0;
1000	itimer.it_interval.tv_sec = 2;
1001	itimer.it_interval.tv_usec = 0;
1002	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1003		fprintf(stderr, "setitimer: %s\n", strerror(errno));
1004		pg_error_internal();
1005		return EXIT_FAILURE;
1006	}
1007
1008	/* Scan our run-time environment. */
1009
1010	if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1011		scriptname = "";
1012
1013	if ( ! validate_urifrag(scriptname)) {
1014		fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1015		    scriptname);
1016		pg_error_internal();
1017		return EXIT_FAILURE;
1018	}
1019
1020	/*
1021	 * First we change directory into the MAN_DIR so that
1022	 * subsequent scanning for manpath directories is rooted
1023	 * relative to the same position.
1024	 */
1025
1026	if (-1 == chdir(MAN_DIR)) {
1027		fprintf(stderr, "MAN_DIR: %s: %s\n",
1028		    MAN_DIR, strerror(errno));
1029		pg_error_internal();
1030		return EXIT_FAILURE;
1031	}
1032
1033	memset(&req, 0, sizeof(struct req));
1034	pathgen(&req);
1035
1036	/* Next parse out the query string. */
1037
1038	if (NULL != (querystring = getenv("QUERY_STRING")))
1039		http_parse(&req, querystring);
1040
1041	if (req.q.manpath == NULL)
1042		req.q.manpath = mandoc_strdup(req.p[0]);
1043	else if ( ! validate_manpath(&req, req.q.manpath)) {
1044		pg_error_badrequest(
1045		    "You specified an invalid manpath.");
1046		return EXIT_FAILURE;
1047	}
1048
1049	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1050		pg_error_badrequest(
1051		    "You specified an invalid architecture.");
1052		return EXIT_FAILURE;
1053	}
1054
1055	/* Dispatch to the three different pages. */
1056
1057	path = getenv("PATH_INFO");
1058	if (NULL == path)
1059		path = "";
1060	else if ('/' == *path)
1061		path++;
1062
1063	if ('\0' != *path)
1064		pg_show(&req, path);
1065	else if (NULL != req.q.query)
1066		pg_search(&req);
1067	else
1068		pg_index(&req);
1069
1070	free(req.q.manpath);
1071	free(req.q.arch);
1072	free(req.q.sec);
1073	free(req.q.query);
1074	for (i = 0; i < (int)req.psz; i++)
1075		free(req.p[i]);
1076	free(req.p);
1077	return EXIT_SUCCESS;
1078}
1079
1080/*
1081 * Scan for indexable paths.
1082 */
1083static void
1084pathgen(struct req *req)
1085{
1086	FILE	*fp;
1087	char	*dp;
1088	size_t	 dpsz;
1089	ssize_t	 len;
1090
1091	if (NULL == (fp = fopen("manpath.conf", "r"))) {
1092		fprintf(stderr, "%s/manpath.conf: %s\n",
1093			MAN_DIR, strerror(errno));
1094		pg_error_internal();
1095		exit(EXIT_FAILURE);
1096	}
1097
1098	dp = NULL;
1099	dpsz = 0;
1100
1101	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1102		if (dp[len - 1] == '\n')
1103			dp[--len] = '\0';
1104		req->p = mandoc_realloc(req->p,
1105		    (req->psz + 1) * sizeof(char *));
1106		if ( ! validate_urifrag(dp)) {
1107			fprintf(stderr, "%s/manpath.conf contains "
1108			    "unsafe path \"%s\"\n", MAN_DIR, dp);
1109			pg_error_internal();
1110			exit(EXIT_FAILURE);
1111		}
1112		if (NULL != strchr(dp, '/')) {
1113			fprintf(stderr, "%s/manpath.conf contains "
1114			    "path with slash \"%s\"\n", MAN_DIR, dp);
1115			pg_error_internal();
1116			exit(EXIT_FAILURE);
1117		}
1118		req->p[req->psz++] = dp;
1119		dp = NULL;
1120		dpsz = 0;
1121	}
1122	free(dp);
1123
1124	if ( req->p == NULL ) {
1125		fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1126		pg_error_internal();
1127		exit(EXIT_FAILURE);
1128	}
1129}
1130