1/*	$Id: cgi.c,v 1.156 2017/06/24 14:38:32 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21#include <sys/time.h>
22
23#include <ctype.h>
24#if HAVE_ERR
25#include <err.h>
26#endif
27#include <errno.h>
28#include <fcntl.h>
29#include <limits.h>
30#include <stdint.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35
36#include "mandoc_aux.h"
37#include "mandoc.h"
38#include "roff.h"
39#include "mdoc.h"
40#include "man.h"
41#include "main.h"
42#include "manconf.h"
43#include "mansearch.h"
44#include "cgi.h"
45
46/*
47 * A query as passed to the search function.
48 */
49struct	query {
50	char		*manpath; /* desired manual directory */
51	char		*arch; /* architecture */
52	char		*sec; /* manual section */
53	char		*query; /* unparsed query expression */
54	int		 equal; /* match whole names, not substrings */
55};
56
57struct	req {
58	struct query	  q;
59	char		**p; /* array of available manpaths */
60	size_t		  psz; /* number of available manpaths */
61	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
62};
63
64enum	focus {
65	FOCUS_NONE = 0,
66	FOCUS_QUERY
67};
68
69static	void		 html_print(const char *);
70static	void		 html_putchar(char);
71static	int		 http_decode(char *);
72static	void		 parse_manpath_conf(struct req *);
73static	void		 parse_path_info(struct req *req, const char *path);
74static	void		 parse_query_string(struct req *, const char *);
75static	void		 pg_error_badrequest(const char *);
76static	void		 pg_error_internal(void);
77static	void		 pg_index(const struct req *);
78static	void		 pg_noresult(const struct req *, const char *);
79static	void		 pg_redirect(const struct req *, const char *);
80static	void		 pg_search(const struct req *);
81static	void		 pg_searchres(const struct req *,
82				struct manpage *, size_t);
83static	void		 pg_show(struct req *, const char *);
84static	void		 resp_begin_html(int, const char *, const char *);
85static	void		 resp_begin_http(int, const char *);
86static	void		 resp_catman(const struct req *, const char *);
87static	void		 resp_copy(const char *);
88static	void		 resp_end_html(void);
89static	void		 resp_format(const struct req *, const char *);
90static	void		 resp_searchform(const struct req *, enum focus);
91static	void		 resp_show(const struct req *, const char *);
92static	void		 set_query_attr(char **, char **);
93static	int		 validate_filename(const char *);
94static	int		 validate_manpath(const struct req *, const char *);
95static	int		 validate_urifrag(const char *);
96
97static	const char	 *scriptname = SCRIPT_NAME;
98
99static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100static	const char *const sec_numbers[] = {
101    "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102};
103static	const char *const sec_names[] = {
104    "All Sections",
105    "1 - General Commands",
106    "2 - System Calls",
107    "3 - Library Functions",
108    "3p - Perl Library",
109    "4 - Device Drivers",
110    "5 - File Formats",
111    "6 - Games",
112    "7 - Miscellaneous Information",
113    "8 - System Manager\'s Manual",
114    "9 - Kernel Developer\'s Manual"
115};
116static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117
118static	const char *const arch_names[] = {
119    "amd64",       "alpha",       "armv7",	"arm64",
120    "hppa",        "i386",        "landisk",
121    "loongson",    "luna88k",     "macppc",      "mips64",
122    "octeon",      "sgi",         "socppc",      "sparc64",
123    "amiga",       "arc",         "armish",      "arm32",
124    "atari",       "aviion",      "beagle",      "cats",
125    "hppa64",      "hp300",
126    "ia64",        "mac68k",      "mvme68k",     "mvme88k",
127    "mvmeppc",     "palm",        "pc532",       "pegasos",
128    "pmax",        "powerpc",     "solbourne",   "sparc",
129    "sun3",        "vax",         "wgrisc",      "x68k",
130    "zaurus"
131};
132static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133
134/*
135 * Print a character, escaping HTML along the way.
136 * This will pass non-ASCII straight to output: be warned!
137 */
138static void
139html_putchar(char c)
140{
141
142	switch (c) {
143	case '"':
144		printf("&quot;");
145		break;
146	case '&':
147		printf("&amp;");
148		break;
149	case '>':
150		printf("&gt;");
151		break;
152	case '<':
153		printf("&lt;");
154		break;
155	default:
156		putchar((unsigned char)c);
157		break;
158	}
159}
160
161/*
162 * Call through to html_putchar().
163 * Accepts NULL strings.
164 */
165static void
166html_print(const char *p)
167{
168
169	if (NULL == p)
170		return;
171	while ('\0' != *p)
172		html_putchar(*p++);
173}
174
175/*
176 * Transfer the responsibility for the allocated string *val
177 * to the query structure.
178 */
179static void
180set_query_attr(char **attr, char **val)
181{
182
183	free(*attr);
184	if (**val == '\0') {
185		*attr = NULL;
186		free(*val);
187	} else
188		*attr = *val;
189	*val = NULL;
190}
191
192/*
193 * Parse the QUERY_STRING for key-value pairs
194 * and store the values into the query structure.
195 */
196static void
197parse_query_string(struct req *req, const char *qs)
198{
199	char		*key, *val;
200	size_t		 keysz, valsz;
201
202	req->isquery	= 1;
203	req->q.manpath	= NULL;
204	req->q.arch	= NULL;
205	req->q.sec	= NULL;
206	req->q.query	= NULL;
207	req->q.equal	= 1;
208
209	key = val = NULL;
210	while (*qs != '\0') {
211
212		/* Parse one key. */
213
214		keysz = strcspn(qs, "=;&");
215		key = mandoc_strndup(qs, keysz);
216		qs += keysz;
217		if (*qs != '=')
218			goto next;
219
220		/* Parse one value. */
221
222		valsz = strcspn(++qs, ";&");
223		val = mandoc_strndup(qs, valsz);
224		qs += valsz;
225
226		/* Decode and catch encoding errors. */
227
228		if ( ! (http_decode(key) && http_decode(val)))
229			goto next;
230
231		/* Handle key-value pairs. */
232
233		if ( ! strcmp(key, "query"))
234			set_query_attr(&req->q.query, &val);
235
236		else if ( ! strcmp(key, "apropos"))
237			req->q.equal = !strcmp(val, "0");
238
239		else if ( ! strcmp(key, "manpath")) {
240#ifdef COMPAT_OLDURI
241			if ( ! strncmp(val, "OpenBSD ", 8)) {
242				val[7] = '-';
243				if ('C' == val[8])
244					val[8] = 'c';
245			}
246#endif
247			set_query_attr(&req->q.manpath, &val);
248		}
249
250		else if ( ! (strcmp(key, "sec")
251#ifdef COMPAT_OLDURI
252		    && strcmp(key, "sektion")
253#endif
254		    )) {
255			if ( ! strcmp(val, "0"))
256				*val = '\0';
257			set_query_attr(&req->q.sec, &val);
258		}
259
260		else if ( ! strcmp(key, "arch")) {
261			if ( ! strcmp(val, "default"))
262				*val = '\0';
263			set_query_attr(&req->q.arch, &val);
264		}
265
266		/*
267		 * The key must be freed in any case.
268		 * The val may have been handed over to the query
269		 * structure, in which case it is now NULL.
270		 */
271next:
272		free(key);
273		key = NULL;
274		free(val);
275		val = NULL;
276
277		if (*qs != '\0')
278			qs++;
279	}
280}
281
282/*
283 * HTTP-decode a string.  The standard explanation is that this turns
284 * "%4e+foo" into "n foo" in the regular way.  This is done in-place
285 * over the allocated string.
286 */
287static int
288http_decode(char *p)
289{
290	char             hex[3];
291	char		*q;
292	int              c;
293
294	hex[2] = '\0';
295
296	q = p;
297	for ( ; '\0' != *p; p++, q++) {
298		if ('%' == *p) {
299			if ('\0' == (hex[0] = *(p + 1)))
300				return 0;
301			if ('\0' == (hex[1] = *(p + 2)))
302				return 0;
303			if (1 != sscanf(hex, "%x", &c))
304				return 0;
305			if ('\0' == c)
306				return 0;
307
308			*q = (char)c;
309			p += 2;
310		} else
311			*q = '+' == *p ? ' ' : *p;
312	}
313
314	*q = '\0';
315	return 1;
316}
317
318static void
319resp_begin_http(int code, const char *msg)
320{
321
322	if (200 != code)
323		printf("Status: %d %s\r\n", code, msg);
324
325	printf("Content-Type: text/html; charset=utf-8\r\n"
326	     "Cache-Control: no-cache\r\n"
327	     "Pragma: no-cache\r\n"
328	     "\r\n");
329
330	fflush(stdout);
331}
332
333static void
334resp_copy(const char *filename)
335{
336	char	 buf[4096];
337	ssize_t	 sz;
338	int	 fd;
339
340	if ((fd = open(filename, O_RDONLY)) != -1) {
341		fflush(stdout);
342		while ((sz = read(fd, buf, sizeof(buf))) > 0)
343			write(STDOUT_FILENO, buf, sz);
344		close(fd);
345	}
346}
347
348static void
349resp_begin_html(int code, const char *msg, const char *file)
350{
351	char	*cp;
352
353	resp_begin_http(code, msg);
354
355	printf("<!DOCTYPE html>\n"
356	       "<html>\n"
357	       "<head>\n"
358	       "  <meta charset=\"UTF-8\"/>\n"
359	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
360	       " type=\"text/css\" media=\"all\">\n"
361	       "  <title>",
362	       CSS_DIR);
363	if (file != NULL) {
364		if ((cp = strrchr(file, '/')) != NULL)
365			file = cp + 1;
366		if ((cp = strrchr(file, '.')) != NULL) {
367			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
368		} else
369			printf("%s - ", file);
370	}
371	printf("%s</title>\n"
372	       "</head>\n"
373	       "<body>\n",
374	       CUSTOMIZE_TITLE);
375
376	resp_copy(MAN_DIR "/header.html");
377}
378
379static void
380resp_end_html(void)
381{
382
383	resp_copy(MAN_DIR "/footer.html");
384
385	puts("</body>\n"
386	     "</html>");
387}
388
389static void
390resp_searchform(const struct req *req, enum focus focus)
391{
392	int		 i;
393
394	printf("<form action=\"/%s\" method=\"get\">\n"
395	       "  <fieldset>\n"
396	       "    <legend>Manual Page Search Parameters</legend>\n",
397	       scriptname);
398
399	/* Write query input box. */
400
401	printf("    <input type=\"text\" name=\"query\" value=\"");
402	if (req->q.query != NULL)
403		html_print(req->q.query);
404	printf( "\" size=\"40\"");
405	if (focus == FOCUS_QUERY)
406		printf(" autofocus");
407	puts(">");
408
409	/* Write submission buttons. */
410
411	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
412		"man</button>\n"
413		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
414		"apropos</button>\n"
415		"    <br/>\n");
416
417	/* Write section selector. */
418
419	puts("    <select name=\"sec\">");
420	for (i = 0; i < sec_MAX; i++) {
421		printf("      <option value=\"%s\"", sec_numbers[i]);
422		if (NULL != req->q.sec &&
423		    0 == strcmp(sec_numbers[i], req->q.sec))
424			printf(" selected=\"selected\"");
425		printf(">%s</option>\n", sec_names[i]);
426	}
427	puts("    </select>");
428
429	/* Write architecture selector. */
430
431	printf(	"    <select name=\"arch\">\n"
432		"      <option value=\"default\"");
433	if (NULL == req->q.arch)
434		printf(" selected=\"selected\"");
435	puts(">All Architectures</option>");
436	for (i = 0; i < arch_MAX; i++) {
437		printf("      <option value=\"%s\"", arch_names[i]);
438		if (NULL != req->q.arch &&
439		    0 == strcmp(arch_names[i], req->q.arch))
440			printf(" selected=\"selected\"");
441		printf(">%s</option>\n", arch_names[i]);
442	}
443	puts("    </select>");
444
445	/* Write manpath selector. */
446
447	if (req->psz > 1) {
448		puts("    <select name=\"manpath\">");
449		for (i = 0; i < (int)req->psz; i++) {
450			printf("      <option ");
451			if (strcmp(req->q.manpath, req->p[i]) == 0)
452				printf("selected=\"selected\" ");
453			printf("value=\"");
454			html_print(req->p[i]);
455			printf("\">");
456			html_print(req->p[i]);
457			puts("</option>");
458		}
459		puts("    </select>");
460	}
461
462	puts("  </fieldset>\n"
463	     "</form>");
464}
465
466static int
467validate_urifrag(const char *frag)
468{
469
470	while ('\0' != *frag) {
471		if ( ! (isalnum((unsigned char)*frag) ||
472		    '-' == *frag || '.' == *frag ||
473		    '/' == *frag || '_' == *frag))
474			return 0;
475		frag++;
476	}
477	return 1;
478}
479
480static int
481validate_manpath(const struct req *req, const char* manpath)
482{
483	size_t	 i;
484
485	for (i = 0; i < req->psz; i++)
486		if ( ! strcmp(manpath, req->p[i]))
487			return 1;
488
489	return 0;
490}
491
492static int
493validate_filename(const char *file)
494{
495
496	if ('.' == file[0] && '/' == file[1])
497		file += 2;
498
499	return ! (strstr(file, "../") || strstr(file, "/..") ||
500	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
501}
502
503static void
504pg_index(const struct req *req)
505{
506
507	resp_begin_html(200, NULL, NULL);
508	resp_searchform(req, FOCUS_QUERY);
509	printf("<p>\n"
510	       "This web interface is documented in the\n"
511	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
512	       "manual, and the\n"
513	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
514	       "manual explains the query syntax.\n"
515	       "</p>\n",
516	       scriptname, *scriptname == '\0' ? "" : "/",
517	       scriptname, *scriptname == '\0' ? "" : "/");
518	resp_end_html();
519}
520
521static void
522pg_noresult(const struct req *req, const char *msg)
523{
524	resp_begin_html(200, NULL, NULL);
525	resp_searchform(req, FOCUS_QUERY);
526	puts("<p>");
527	puts(msg);
528	puts("</p>");
529	resp_end_html();
530}
531
532static void
533pg_error_badrequest(const char *msg)
534{
535
536	resp_begin_html(400, "Bad Request", NULL);
537	puts("<h1>Bad Request</h1>\n"
538	     "<p>\n");
539	puts(msg);
540	printf("Try again from the\n"
541	       "<a href=\"/%s\">main page</a>.\n"
542	       "</p>", scriptname);
543	resp_end_html();
544}
545
546static void
547pg_error_internal(void)
548{
549	resp_begin_html(500, "Internal Server Error", NULL);
550	puts("<p>Internal Server Error</p>");
551	resp_end_html();
552}
553
554static void
555pg_redirect(const struct req *req, const char *name)
556{
557	printf("Status: 303 See Other\r\n"
558	    "Location: /");
559	if (*scriptname != '\0')
560		printf("%s/", scriptname);
561	if (strcmp(req->q.manpath, req->p[0]))
562		printf("%s/", req->q.manpath);
563	if (req->q.arch != NULL)
564		printf("%s/", req->q.arch);
565	printf("%s", name);
566	if (req->q.sec != NULL)
567		printf(".%s", req->q.sec);
568	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
569}
570
571static void
572pg_searchres(const struct req *req, struct manpage *r, size_t sz)
573{
574	char		*arch, *archend;
575	const char	*sec;
576	size_t		 i, iuse;
577	int		 archprio, archpriouse;
578	int		 prio, priouse;
579
580	for (i = 0; i < sz; i++) {
581		if (validate_filename(r[i].file))
582			continue;
583		warnx("invalid filename %s in %s database",
584		    r[i].file, req->q.manpath);
585		pg_error_internal();
586		return;
587	}
588
589	if (req->isquery && sz == 1) {
590		/*
591		 * If we have just one result, then jump there now
592		 * without any delay.
593		 */
594		printf("Status: 303 See Other\r\n"
595		    "Location: /");
596		if (*scriptname != '\0')
597			printf("%s/", scriptname);
598		if (strcmp(req->q.manpath, req->p[0]))
599			printf("%s/", req->q.manpath);
600		printf("%s\r\n"
601		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
602		    r[0].file);
603		return;
604	}
605
606	/*
607	 * In man(1) mode, show one of the pages
608	 * even if more than one is found.
609	 */
610
611	iuse = 0;
612	if (req->q.equal || sz == 1) {
613		priouse = 20;
614		archpriouse = 3;
615		for (i = 0; i < sz; i++) {
616			sec = r[i].file;
617			sec += strcspn(sec, "123456789");
618			if (sec[0] == '\0')
619				continue;
620			prio = sec_prios[sec[0] - '1'];
621			if (sec[1] != '/')
622				prio += 10;
623			if (req->q.arch == NULL) {
624				archprio =
625				    ((arch = strchr(sec + 1, '/'))
626					== NULL) ? 3 :
627				    ((archend = strchr(arch + 1, '/'))
628					== NULL) ? 0 :
629				    strncmp(arch, "amd64/",
630					archend - arch) ? 2 : 1;
631				if (archprio < archpriouse) {
632					archpriouse = archprio;
633					priouse = prio;
634					iuse = i;
635					continue;
636				}
637				if (archprio > archpriouse)
638					continue;
639			}
640			if (prio >= priouse)
641				continue;
642			priouse = prio;
643			iuse = i;
644		}
645		resp_begin_html(200, NULL, r[iuse].file);
646	} else
647		resp_begin_html(200, NULL, NULL);
648
649	resp_searchform(req,
650	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
651
652	if (sz > 1) {
653		puts("<table class=\"results\">");
654		for (i = 0; i < sz; i++) {
655			printf("  <tr>\n"
656			       "    <td>"
657			       "<a class=\"Xr\" href=\"/");
658			if (*scriptname != '\0')
659				printf("%s/", scriptname);
660			if (strcmp(req->q.manpath, req->p[0]))
661				printf("%s/", req->q.manpath);
662			printf("%s\">", r[i].file);
663			html_print(r[i].names);
664			printf("</a></td>\n"
665			       "    <td><span class=\"Nd\">");
666			html_print(r[i].output);
667			puts("</span></td>\n"
668			     "  </tr>");
669		}
670		puts("</table>");
671	}
672
673	if (req->q.equal || sz == 1) {
674		puts("<hr>");
675		resp_show(req, r[iuse].file);
676	}
677
678	resp_end_html();
679}
680
681static void
682resp_catman(const struct req *req, const char *file)
683{
684	FILE		*f;
685	char		*p;
686	size_t		 sz;
687	ssize_t		 len;
688	int		 i;
689	int		 italic, bold;
690
691	if ((f = fopen(file, "r")) == NULL) {
692		puts("<p>You specified an invalid manual file.</p>");
693		return;
694	}
695
696	puts("<div class=\"catman\">\n"
697	     "<pre>");
698
699	p = NULL;
700	sz = 0;
701
702	while ((len = getline(&p, &sz, f)) != -1) {
703		bold = italic = 0;
704		for (i = 0; i < len - 1; i++) {
705			/*
706			 * This means that the catpage is out of state.
707			 * Ignore it and keep going (although the
708			 * catpage is bogus).
709			 */
710
711			if ('\b' == p[i] || '\n' == p[i])
712				continue;
713
714			/*
715			 * Print a regular character.
716			 * Close out any bold/italic scopes.
717			 * If we're in back-space mode, make sure we'll
718			 * have something to enter when we backspace.
719			 */
720
721			if ('\b' != p[i + 1]) {
722				if (italic)
723					printf("</i>");
724				if (bold)
725					printf("</b>");
726				italic = bold = 0;
727				html_putchar(p[i]);
728				continue;
729			} else if (i + 2 >= len)
730				continue;
731
732			/* Italic mode. */
733
734			if ('_' == p[i]) {
735				if (bold)
736					printf("</b>");
737				if ( ! italic)
738					printf("<i>");
739				bold = 0;
740				italic = 1;
741				i += 2;
742				html_putchar(p[i]);
743				continue;
744			}
745
746			/*
747			 * Handle funny behaviour troff-isms.
748			 * These grok'd from the original man2html.c.
749			 */
750
751			if (('+' == p[i] && 'o' == p[i + 2]) ||
752					('o' == p[i] && '+' == p[i + 2]) ||
753					('|' == p[i] && '=' == p[i + 2]) ||
754					('=' == p[i] && '|' == p[i + 2]) ||
755					('*' == p[i] && '=' == p[i + 2]) ||
756					('=' == p[i] && '*' == p[i + 2]) ||
757					('*' == p[i] && '|' == p[i + 2]) ||
758					('|' == p[i] && '*' == p[i + 2]))  {
759				if (italic)
760					printf("</i>");
761				if (bold)
762					printf("</b>");
763				italic = bold = 0;
764				putchar('*');
765				i += 2;
766				continue;
767			} else if (('|' == p[i] && '-' == p[i + 2]) ||
768					('-' == p[i] && '|' == p[i + 1]) ||
769					('+' == p[i] && '-' == p[i + 1]) ||
770					('-' == p[i] && '+' == p[i + 1]) ||
771					('+' == p[i] && '|' == p[i + 1]) ||
772					('|' == p[i] && '+' == p[i + 1]))  {
773				if (italic)
774					printf("</i>");
775				if (bold)
776					printf("</b>");
777				italic = bold = 0;
778				putchar('+');
779				i += 2;
780				continue;
781			}
782
783			/* Bold mode. */
784
785			if (italic)
786				printf("</i>");
787			if ( ! bold)
788				printf("<b>");
789			bold = 1;
790			italic = 0;
791			i += 2;
792			html_putchar(p[i]);
793		}
794
795		/*
796		 * Clean up the last character.
797		 * We can get to a newline; don't print that.
798		 */
799
800		if (italic)
801			printf("</i>");
802		if (bold)
803			printf("</b>");
804
805		if (i == len - 1 && p[i] != '\n')
806			html_putchar(p[i]);
807
808		putchar('\n');
809	}
810	free(p);
811
812	puts("</pre>\n"
813	     "</div>");
814
815	fclose(f);
816}
817
818static void
819resp_format(const struct req *req, const char *file)
820{
821	struct manoutput conf;
822	struct mparse	*mp;
823	struct roff_man	*man;
824	void		*vp;
825	int		 fd;
826	int		 usepath;
827
828	if (-1 == (fd = open(file, O_RDONLY, 0))) {
829		puts("<p>You specified an invalid manual file.</p>");
830		return;
831	}
832
833	mchars_alloc();
834	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
835	    MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
836	mparse_readfd(mp, fd, file);
837	close(fd);
838
839	memset(&conf, 0, sizeof(conf));
840	conf.fragment = 1;
841	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
842	usepath = strcmp(req->q.manpath, req->p[0]);
843	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
844	    scriptname, *scriptname == '\0' ? "" : "/",
845	    usepath ? req->q.manpath : "", usepath ? "/" : "");
846
847	mparse_result(mp, &man, NULL);
848	if (man == NULL) {
849		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
850		pg_error_internal();
851		mparse_free(mp);
852		mchars_free();
853		return;
854	}
855
856	vp = html_alloc(&conf);
857
858	if (man->macroset == MACROSET_MDOC) {
859		mdoc_validate(man);
860		html_mdoc(vp, man);
861	} else {
862		man_validate(man);
863		html_man(vp, man);
864	}
865
866	html_free(vp);
867	mparse_free(mp);
868	mchars_free();
869	free(conf.man);
870	free(conf.style);
871}
872
873static void
874resp_show(const struct req *req, const char *file)
875{
876
877	if ('.' == file[0] && '/' == file[1])
878		file += 2;
879
880	if ('c' == *file)
881		resp_catman(req, file);
882	else
883		resp_format(req, file);
884}
885
886static void
887pg_show(struct req *req, const char *fullpath)
888{
889	char		*manpath;
890	const char	*file;
891
892	if ((file = strchr(fullpath, '/')) == NULL) {
893		pg_error_badrequest(
894		    "You did not specify a page to show.");
895		return;
896	}
897	manpath = mandoc_strndup(fullpath, file - fullpath);
898	file++;
899
900	if ( ! validate_manpath(req, manpath)) {
901		pg_error_badrequest(
902		    "You specified an invalid manpath.");
903		free(manpath);
904		return;
905	}
906
907	/*
908	 * Begin by chdir()ing into the manpath.
909	 * This way we can pick up the database files, which are
910	 * relative to the manpath root.
911	 */
912
913	if (chdir(manpath) == -1) {
914		warn("chdir %s", manpath);
915		pg_error_internal();
916		free(manpath);
917		return;
918	}
919	free(manpath);
920
921	if ( ! validate_filename(file)) {
922		pg_error_badrequest(
923		    "You specified an invalid manual file.");
924		return;
925	}
926
927	resp_begin_html(200, NULL, file);
928	resp_searchform(req, FOCUS_NONE);
929	resp_show(req, file);
930	resp_end_html();
931}
932
933static void
934pg_search(const struct req *req)
935{
936	struct mansearch	  search;
937	struct manpaths		  paths;
938	struct manpage		 *res;
939	char			**argv;
940	char			 *query, *rp, *wp;
941	size_t			  ressz;
942	int			  argc;
943
944	/*
945	 * Begin by chdir()ing into the root of the manpath.
946	 * This way we can pick up the database files, which are
947	 * relative to the manpath root.
948	 */
949
950	if (chdir(req->q.manpath) == -1) {
951		warn("chdir %s", req->q.manpath);
952		pg_error_internal();
953		return;
954	}
955
956	search.arch = req->q.arch;
957	search.sec = req->q.sec;
958	search.outkey = "Nd";
959	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
960	search.firstmatch = 1;
961
962	paths.sz = 1;
963	paths.paths = mandoc_malloc(sizeof(char *));
964	paths.paths[0] = mandoc_strdup(".");
965
966	/*
967	 * Break apart at spaces with backslash-escaping.
968	 */
969
970	argc = 0;
971	argv = NULL;
972	rp = query = mandoc_strdup(req->q.query);
973	for (;;) {
974		while (isspace((unsigned char)*rp))
975			rp++;
976		if (*rp == '\0')
977			break;
978		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
979		argv[argc++] = wp = rp;
980		for (;;) {
981			if (isspace((unsigned char)*rp)) {
982				*wp = '\0';
983				rp++;
984				break;
985			}
986			if (rp[0] == '\\' && rp[1] != '\0')
987				rp++;
988			if (wp != rp)
989				*wp = *rp;
990			if (*rp == '\0')
991				break;
992			wp++;
993			rp++;
994		}
995	}
996
997	res = NULL;
998	ressz = 0;
999	if (req->isquery && req->q.equal && argc == 1)
1000		pg_redirect(req, argv[0]);
1001	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1002		pg_noresult(req, "You entered an invalid query.");
1003	else if (ressz == 0)
1004		pg_noresult(req, "No results found.");
1005	else
1006		pg_searchres(req, res, ressz);
1007
1008	free(query);
1009	mansearch_free(res, ressz);
1010	free(paths.paths[0]);
1011	free(paths.paths);
1012}
1013
1014int
1015main(void)
1016{
1017	struct req	 req;
1018	struct itimerval itimer;
1019	const char	*path;
1020	const char	*querystring;
1021	int		 i;
1022
1023#if HAVE_PLEDGE
1024	/*
1025	 * The "rpath" pledge could be revoked after mparse_readfd()
1026	 * if the file desciptor to "/footer.html" would be opened
1027	 * up front, but it's probably not worth the complication
1028	 * of the code it would cause: it would require scattering
1029	 * pledge() calls in multiple low-level resp_*() functions.
1030	 */
1031
1032	if (pledge("stdio rpath", NULL) == -1) {
1033		warn("pledge");
1034		pg_error_internal();
1035		return EXIT_FAILURE;
1036	}
1037#endif
1038
1039	/* Poor man's ReDoS mitigation. */
1040
1041	itimer.it_value.tv_sec = 2;
1042	itimer.it_value.tv_usec = 0;
1043	itimer.it_interval.tv_sec = 2;
1044	itimer.it_interval.tv_usec = 0;
1045	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1046		warn("setitimer");
1047		pg_error_internal();
1048		return EXIT_FAILURE;
1049	}
1050
1051	/*
1052	 * First we change directory into the MAN_DIR so that
1053	 * subsequent scanning for manpath directories is rooted
1054	 * relative to the same position.
1055	 */
1056
1057	if (chdir(MAN_DIR) == -1) {
1058		warn("MAN_DIR: %s", MAN_DIR);
1059		pg_error_internal();
1060		return EXIT_FAILURE;
1061	}
1062
1063	memset(&req, 0, sizeof(struct req));
1064	req.q.equal = 1;
1065	parse_manpath_conf(&req);
1066
1067	/* Parse the path info and the query string. */
1068
1069	if ((path = getenv("PATH_INFO")) == NULL)
1070		path = "";
1071	else if (*path == '/')
1072		path++;
1073
1074	if (*path != '\0') {
1075		parse_path_info(&req, path);
1076		if (req.q.manpath == NULL || req.q.sec == NULL ||
1077		    *req.q.query == '\0' || access(path, F_OK) == -1)
1078			path = "";
1079	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1080		parse_query_string(&req, querystring);
1081
1082	/* Validate parsed data and add defaults. */
1083
1084	if (req.q.manpath == NULL)
1085		req.q.manpath = mandoc_strdup(req.p[0]);
1086	else if ( ! validate_manpath(&req, req.q.manpath)) {
1087		pg_error_badrequest(
1088		    "You specified an invalid manpath.");
1089		return EXIT_FAILURE;
1090	}
1091
1092	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1093		pg_error_badrequest(
1094		    "You specified an invalid architecture.");
1095		return EXIT_FAILURE;
1096	}
1097
1098	/* Dispatch to the three different pages. */
1099
1100	if ('\0' != *path)
1101		pg_show(&req, path);
1102	else if (NULL != req.q.query)
1103		pg_search(&req);
1104	else
1105		pg_index(&req);
1106
1107	free(req.q.manpath);
1108	free(req.q.arch);
1109	free(req.q.sec);
1110	free(req.q.query);
1111	for (i = 0; i < (int)req.psz; i++)
1112		free(req.p[i]);
1113	free(req.p);
1114	return EXIT_SUCCESS;
1115}
1116
1117/*
1118 * If PATH_INFO is not a file name, translate it to a query.
1119 */
1120static void
1121parse_path_info(struct req *req, const char *path)
1122{
1123	char	*dir[4];
1124	int	 i;
1125
1126	req->isquery = 0;
1127	req->q.equal = 1;
1128	req->q.manpath = mandoc_strdup(path);
1129	req->q.arch = NULL;
1130
1131	/* Mandatory manual page name. */
1132	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1133		req->q.query = req->q.manpath;
1134		req->q.manpath = NULL;
1135	} else
1136		*req->q.query++ = '\0';
1137
1138	/* Optional trailing section. */
1139	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1140		if(isdigit((unsigned char)req->q.sec[1])) {
1141			*req->q.sec++ = '\0';
1142			req->q.sec = mandoc_strdup(req->q.sec);
1143		} else
1144			req->q.sec = NULL;
1145	}
1146
1147	/* Handle the case of name[.section] only. */
1148	if (req->q.manpath == NULL)
1149		return;
1150	req->q.query = mandoc_strdup(req->q.query);
1151
1152	/* Split directory components. */
1153	dir[i = 0] = req->q.manpath;
1154	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1155		if (++i == 3) {
1156			pg_error_badrequest(
1157			    "You specified too many directory components.");
1158			exit(EXIT_FAILURE);
1159		}
1160		*dir[i]++ = '\0';
1161	}
1162
1163	/* Optional manpath. */
1164	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1165		req->q.manpath = NULL;
1166	else if (dir[1] == NULL)
1167		return;
1168
1169	/* Optional section. */
1170	if (strncmp(dir[i], "man", 3) == 0) {
1171		free(req->q.sec);
1172		req->q.sec = mandoc_strdup(dir[i++] + 3);
1173	}
1174	if (dir[i] == NULL) {
1175		if (req->q.manpath == NULL)
1176			free(dir[0]);
1177		return;
1178	}
1179	if (dir[i + 1] != NULL) {
1180		pg_error_badrequest(
1181		    "You specified an invalid directory component.");
1182		exit(EXIT_FAILURE);
1183	}
1184
1185	/* Optional architecture. */
1186	if (i) {
1187		req->q.arch = mandoc_strdup(dir[i]);
1188		if (req->q.manpath == NULL)
1189			free(dir[0]);
1190	} else
1191		req->q.arch = dir[0];
1192}
1193
1194/*
1195 * Scan for indexable paths.
1196 */
1197static void
1198parse_manpath_conf(struct req *req)
1199{
1200	FILE	*fp;
1201	char	*dp;
1202	size_t	 dpsz;
1203	ssize_t	 len;
1204
1205	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1206		warn("%s/manpath.conf", MAN_DIR);
1207		pg_error_internal();
1208		exit(EXIT_FAILURE);
1209	}
1210
1211	dp = NULL;
1212	dpsz = 0;
1213
1214	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1215		if (dp[len - 1] == '\n')
1216			dp[--len] = '\0';
1217		req->p = mandoc_realloc(req->p,
1218		    (req->psz + 1) * sizeof(char *));
1219		if ( ! validate_urifrag(dp)) {
1220			warnx("%s/manpath.conf contains "
1221			    "unsafe path \"%s\"", MAN_DIR, dp);
1222			pg_error_internal();
1223			exit(EXIT_FAILURE);
1224		}
1225		if (strchr(dp, '/') != NULL) {
1226			warnx("%s/manpath.conf contains "
1227			    "path with slash \"%s\"", MAN_DIR, dp);
1228			pg_error_internal();
1229			exit(EXIT_FAILURE);
1230		}
1231		req->p[req->psz++] = dp;
1232		dp = NULL;
1233		dpsz = 0;
1234	}
1235	free(dp);
1236
1237	if (req->p == NULL) {
1238		warnx("%s/manpath.conf is empty", MAN_DIR);
1239		pg_error_internal();
1240		exit(EXIT_FAILURE);
1241	}
1242}
1243