1/*	$Id: cgi.c,v 1.167 2019/07/10 12:49:20 schwarze Exp $ */
2/*
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21#include <sys/time.h>
22
23#include <ctype.h>
24#if HAVE_ERR
25#include <err.h>
26#endif
27#include <errno.h>
28#include <fcntl.h>
29#include <limits.h>
30#include <stdint.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35
36#include "mandoc_aux.h"
37#include "mandoc.h"
38#include "roff.h"
39#include "mdoc.h"
40#include "man.h"
41#include "mandoc_parse.h"
42#include "main.h"
43#include "manconf.h"
44#include "mansearch.h"
45#include "cgi.h"
46
47/*
48 * A query as passed to the search function.
49 */
50struct	query {
51	char		*manpath; /* desired manual directory */
52	char		*arch; /* architecture */
53	char		*sec; /* manual section */
54	char		*query; /* unparsed query expression */
55	int		 equal; /* match whole names, not substrings */
56};
57
58struct	req {
59	struct query	  q;
60	char		**p; /* array of available manpaths */
61	size_t		  psz; /* number of available manpaths */
62	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
63};
64
65enum	focus {
66	FOCUS_NONE = 0,
67	FOCUS_QUERY
68};
69
70static	void		 html_print(const char *);
71static	void		 html_putchar(char);
72static	int		 http_decode(char *);
73static	void		 http_encode(const char *p);
74static	void		 parse_manpath_conf(struct req *);
75static	void		 parse_path_info(struct req *req, const char *path);
76static	void		 parse_query_string(struct req *, const char *);
77static	void		 pg_error_badrequest(const char *);
78static	void		 pg_error_internal(void);
79static	void		 pg_index(const struct req *);
80static	void		 pg_noresult(const struct req *, const char *);
81static	void		 pg_redirect(const struct req *, const char *);
82static	void		 pg_search(const struct req *);
83static	void		 pg_searchres(const struct req *,
84				struct manpage *, size_t);
85static	void		 pg_show(struct req *, const char *);
86static	void		 resp_begin_html(int, const char *, const char *);
87static	void		 resp_begin_http(int, const char *);
88static	void		 resp_catman(const struct req *, const char *);
89static	void		 resp_copy(const char *);
90static	void		 resp_end_html(void);
91static	void		 resp_format(const struct req *, const char *);
92static	void		 resp_searchform(const struct req *, enum focus);
93static	void		 resp_show(const struct req *, const char *);
94static	void		 set_query_attr(char **, char **);
95static	int		 validate_arch(const char *);
96static	int		 validate_filename(const char *);
97static	int		 validate_manpath(const struct req *, const char *);
98static	int		 validate_urifrag(const char *);
99
100static	const char	 *scriptname = SCRIPT_NAME;
101
102static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103static	const char *const sec_numbers[] = {
104    "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105};
106static	const char *const sec_names[] = {
107    "All Sections",
108    "1 - General Commands",
109    "2 - System Calls",
110    "3 - Library Functions",
111    "3p - Perl Library",
112    "4 - Device Drivers",
113    "5 - File Formats",
114    "6 - Games",
115    "7 - Miscellaneous Information",
116    "8 - System Manager\'s Manual",
117    "9 - Kernel Developer\'s Manual"
118};
119static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120
121static	const char *const arch_names[] = {
122    "amd64",       "alpha",       "armv7",	"arm64",
123    "hppa",        "i386",        "landisk",
124    "loongson",    "luna88k",     "macppc",      "mips64",
125    "octeon",      "sgi",         "socppc",      "sparc64",
126    "amiga",       "arc",         "armish",      "arm32",
127    "atari",       "aviion",      "beagle",      "cats",
128    "hppa64",      "hp300",
129    "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130    "mvmeppc",     "palm",        "pc532",       "pegasos",
131    "pmax",        "powerpc",     "solbourne",   "sparc",
132    "sun3",        "vax",         "wgrisc",      "x68k",
133    "zaurus"
134};
135static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
136
137/*
138 * Print a character, escaping HTML along the way.
139 * This will pass non-ASCII straight to output: be warned!
140 */
141static void
142html_putchar(char c)
143{
144
145	switch (c) {
146	case '"':
147		printf("&quot;");
148		break;
149	case '&':
150		printf("&amp;");
151		break;
152	case '>':
153		printf("&gt;");
154		break;
155	case '<':
156		printf("&lt;");
157		break;
158	default:
159		putchar((unsigned char)c);
160		break;
161	}
162}
163
164/*
165 * Call through to html_putchar().
166 * Accepts NULL strings.
167 */
168static void
169html_print(const char *p)
170{
171
172	if (NULL == p)
173		return;
174	while ('\0' != *p)
175		html_putchar(*p++);
176}
177
178/*
179 * Transfer the responsibility for the allocated string *val
180 * to the query structure.
181 */
182static void
183set_query_attr(char **attr, char **val)
184{
185
186	free(*attr);
187	if (**val == '\0') {
188		*attr = NULL;
189		free(*val);
190	} else
191		*attr = *val;
192	*val = NULL;
193}
194
195/*
196 * Parse the QUERY_STRING for key-value pairs
197 * and store the values into the query structure.
198 */
199static void
200parse_query_string(struct req *req, const char *qs)
201{
202	char		*key, *val;
203	size_t		 keysz, valsz;
204
205	req->isquery	= 1;
206	req->q.manpath	= NULL;
207	req->q.arch	= NULL;
208	req->q.sec	= NULL;
209	req->q.query	= NULL;
210	req->q.equal	= 1;
211
212	key = val = NULL;
213	while (*qs != '\0') {
214
215		/* Parse one key. */
216
217		keysz = strcspn(qs, "=;&");
218		key = mandoc_strndup(qs, keysz);
219		qs += keysz;
220		if (*qs != '=')
221			goto next;
222
223		/* Parse one value. */
224
225		valsz = strcspn(++qs, ";&");
226		val = mandoc_strndup(qs, valsz);
227		qs += valsz;
228
229		/* Decode and catch encoding errors. */
230
231		if ( ! (http_decode(key) && http_decode(val)))
232			goto next;
233
234		/* Handle key-value pairs. */
235
236		if ( ! strcmp(key, "query"))
237			set_query_attr(&req->q.query, &val);
238
239		else if ( ! strcmp(key, "apropos"))
240			req->q.equal = !strcmp(val, "0");
241
242		else if ( ! strcmp(key, "manpath")) {
243#ifdef COMPAT_OLDURI
244			if ( ! strncmp(val, "OpenBSD ", 8)) {
245				val[7] = '-';
246				if ('C' == val[8])
247					val[8] = 'c';
248			}
249#endif
250			set_query_attr(&req->q.manpath, &val);
251		}
252
253		else if ( ! (strcmp(key, "sec")
254#ifdef COMPAT_OLDURI
255		    && strcmp(key, "sektion")
256#endif
257		    )) {
258			if ( ! strcmp(val, "0"))
259				*val = '\0';
260			set_query_attr(&req->q.sec, &val);
261		}
262
263		else if ( ! strcmp(key, "arch")) {
264			if ( ! strcmp(val, "default"))
265				*val = '\0';
266			set_query_attr(&req->q.arch, &val);
267		}
268
269		/*
270		 * The key must be freed in any case.
271		 * The val may have been handed over to the query
272		 * structure, in which case it is now NULL.
273		 */
274next:
275		free(key);
276		key = NULL;
277		free(val);
278		val = NULL;
279
280		if (*qs != '\0')
281			qs++;
282	}
283}
284
285/*
286 * HTTP-decode a string.  The standard explanation is that this turns
287 * "%4e+foo" into "n foo" in the regular way.  This is done in-place
288 * over the allocated string.
289 */
290static int
291http_decode(char *p)
292{
293	char             hex[3];
294	char		*q;
295	int              c;
296
297	hex[2] = '\0';
298
299	q = p;
300	for ( ; '\0' != *p; p++, q++) {
301		if ('%' == *p) {
302			if ('\0' == (hex[0] = *(p + 1)))
303				return 0;
304			if ('\0' == (hex[1] = *(p + 2)))
305				return 0;
306			if (1 != sscanf(hex, "%x", &c))
307				return 0;
308			if ('\0' == c)
309				return 0;
310
311			*q = (char)c;
312			p += 2;
313		} else
314			*q = '+' == *p ? ' ' : *p;
315	}
316
317	*q = '\0';
318	return 1;
319}
320
321static void
322http_encode(const char *p)
323{
324	for (; *p != '\0'; p++) {
325		if (isalnum((unsigned char)*p) == 0 &&
326		    strchr("-._~", *p) == NULL)
327			printf("%%%2.2X", (unsigned char)*p);
328		else
329			putchar(*p);
330	}
331}
332
333static void
334resp_begin_http(int code, const char *msg)
335{
336
337	if (200 != code)
338		printf("Status: %d %s\r\n", code, msg);
339
340	printf("Content-Type: text/html; charset=utf-8\r\n"
341	     "Cache-Control: no-cache\r\n"
342	     "Pragma: no-cache\r\n"
343	     "\r\n");
344
345	fflush(stdout);
346}
347
348static void
349resp_copy(const char *filename)
350{
351	char	 buf[4096];
352	ssize_t	 sz;
353	int	 fd;
354
355	if ((fd = open(filename, O_RDONLY)) != -1) {
356		fflush(stdout);
357		while ((sz = read(fd, buf, sizeof(buf))) > 0)
358			write(STDOUT_FILENO, buf, sz);
359		close(fd);
360	}
361}
362
363static void
364resp_begin_html(int code, const char *msg, const char *file)
365{
366	char	*cp;
367
368	resp_begin_http(code, msg);
369
370	printf("<!DOCTYPE html>\n"
371	       "<html>\n"
372	       "<head>\n"
373	       "  <meta charset=\"UTF-8\"/>\n"
374	       "  <meta name=\"viewport\""
375		      " content=\"width=device-width, initial-scale=1.0\">\n"
376	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
377	       " type=\"text/css\" media=\"all\">\n"
378	       "  <title>",
379	       CSS_DIR);
380	if (file != NULL) {
381		if ((cp = strrchr(file, '/')) != NULL)
382			file = cp + 1;
383		if ((cp = strrchr(file, '.')) != NULL) {
384			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
385		} else
386			printf("%s - ", file);
387	}
388	printf("%s</title>\n"
389	       "</head>\n"
390	       "<body>\n",
391	       CUSTOMIZE_TITLE);
392
393	resp_copy(MAN_DIR "/header.html");
394}
395
396static void
397resp_end_html(void)
398{
399
400	resp_copy(MAN_DIR "/footer.html");
401
402	puts("</body>\n"
403	     "</html>");
404}
405
406static void
407resp_searchform(const struct req *req, enum focus focus)
408{
409	int		 i;
410
411	printf("<form action=\"/%s\" method=\"get\">\n"
412	       "  <fieldset>\n"
413	       "    <legend>Manual Page Search Parameters</legend>\n",
414	       scriptname);
415
416	/* Write query input box. */
417
418	printf("    <input type=\"search\" name=\"query\" value=\"");
419	if (req->q.query != NULL)
420		html_print(req->q.query);
421	printf( "\" size=\"40\"");
422	if (focus == FOCUS_QUERY)
423		printf(" autofocus");
424	puts(">");
425
426	/* Write submission buttons. */
427
428	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
429		"man</button>\n"
430		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
431		"apropos</button>\n"
432		"    <br/>\n");
433
434	/* Write section selector. */
435
436	puts("    <select name=\"sec\">");
437	for (i = 0; i < sec_MAX; i++) {
438		printf("      <option value=\"%s\"", sec_numbers[i]);
439		if (NULL != req->q.sec &&
440		    0 == strcmp(sec_numbers[i], req->q.sec))
441			printf(" selected=\"selected\"");
442		printf(">%s</option>\n", sec_names[i]);
443	}
444	puts("    </select>");
445
446	/* Write architecture selector. */
447
448	printf(	"    <select name=\"arch\">\n"
449		"      <option value=\"default\"");
450	if (NULL == req->q.arch)
451		printf(" selected=\"selected\"");
452	puts(">All Architectures</option>");
453	for (i = 0; i < arch_MAX; i++) {
454		printf("      <option");
455		if (NULL != req->q.arch &&
456		    0 == strcmp(arch_names[i], req->q.arch))
457			printf(" selected=\"selected\"");
458		printf(">%s</option>\n", arch_names[i]);
459	}
460	puts("    </select>");
461
462	/* Write manpath selector. */
463
464	if (req->psz > 1) {
465		puts("    <select name=\"manpath\">");
466		for (i = 0; i < (int)req->psz; i++) {
467			printf("      <option");
468			if (strcmp(req->q.manpath, req->p[i]) == 0)
469				printf(" selected=\"selected\"");
470			printf(">");
471			html_print(req->p[i]);
472			puts("</option>");
473		}
474		puts("    </select>");
475	}
476
477	puts("  </fieldset>\n"
478	     "</form>");
479}
480
481static int
482validate_urifrag(const char *frag)
483{
484
485	while ('\0' != *frag) {
486		if ( ! (isalnum((unsigned char)*frag) ||
487		    '-' == *frag || '.' == *frag ||
488		    '/' == *frag || '_' == *frag))
489			return 0;
490		frag++;
491	}
492	return 1;
493}
494
495static int
496validate_manpath(const struct req *req, const char* manpath)
497{
498	size_t	 i;
499
500	for (i = 0; i < req->psz; i++)
501		if ( ! strcmp(manpath, req->p[i]))
502			return 1;
503
504	return 0;
505}
506
507static int
508validate_arch(const char *arch)
509{
510	int	 i;
511
512	for (i = 0; i < arch_MAX; i++)
513		if (strcmp(arch, arch_names[i]) == 0)
514			return 1;
515
516	return 0;
517}
518
519static int
520validate_filename(const char *file)
521{
522
523	if ('.' == file[0] && '/' == file[1])
524		file += 2;
525
526	return ! (strstr(file, "../") || strstr(file, "/..") ||
527	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
528}
529
530static void
531pg_index(const struct req *req)
532{
533
534	resp_begin_html(200, NULL, NULL);
535	resp_searchform(req, FOCUS_QUERY);
536	printf("<p>\n"
537	       "This web interface is documented in the\n"
538	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
539	       "manual, and the\n"
540	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
541	       "manual explains the query syntax.\n"
542	       "</p>\n",
543	       scriptname, *scriptname == '\0' ? "" : "/",
544	       scriptname, *scriptname == '\0' ? "" : "/");
545	resp_end_html();
546}
547
548static void
549pg_noresult(const struct req *req, const char *msg)
550{
551	resp_begin_html(200, NULL, NULL);
552	resp_searchform(req, FOCUS_QUERY);
553	puts("<p>");
554	puts(msg);
555	puts("</p>");
556	resp_end_html();
557}
558
559static void
560pg_error_badrequest(const char *msg)
561{
562
563	resp_begin_html(400, "Bad Request", NULL);
564	puts("<h1>Bad Request</h1>\n"
565	     "<p>\n");
566	puts(msg);
567	printf("Try again from the\n"
568	       "<a href=\"/%s\">main page</a>.\n"
569	       "</p>", scriptname);
570	resp_end_html();
571}
572
573static void
574pg_error_internal(void)
575{
576	resp_begin_html(500, "Internal Server Error", NULL);
577	puts("<p>Internal Server Error</p>");
578	resp_end_html();
579}
580
581static void
582pg_redirect(const struct req *req, const char *name)
583{
584	printf("Status: 303 See Other\r\n"
585	    "Location: /");
586	if (*scriptname != '\0')
587		printf("%s/", scriptname);
588	if (strcmp(req->q.manpath, req->p[0]))
589		printf("%s/", req->q.manpath);
590	if (req->q.arch != NULL)
591		printf("%s/", req->q.arch);
592	http_encode(name);
593	if (req->q.sec != NULL) {
594		putchar('.');
595		http_encode(req->q.sec);
596	}
597	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
598}
599
600static void
601pg_searchres(const struct req *req, struct manpage *r, size_t sz)
602{
603	char		*arch, *archend;
604	const char	*sec;
605	size_t		 i, iuse;
606	int		 archprio, archpriouse;
607	int		 prio, priouse;
608
609	for (i = 0; i < sz; i++) {
610		if (validate_filename(r[i].file))
611			continue;
612		warnx("invalid filename %s in %s database",
613		    r[i].file, req->q.manpath);
614		pg_error_internal();
615		return;
616	}
617
618	if (req->isquery && sz == 1) {
619		/*
620		 * If we have just one result, then jump there now
621		 * without any delay.
622		 */
623		printf("Status: 303 See Other\r\n"
624		    "Location: /");
625		if (*scriptname != '\0')
626			printf("%s/", scriptname);
627		if (strcmp(req->q.manpath, req->p[0]))
628			printf("%s/", req->q.manpath);
629		printf("%s\r\n"
630		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
631		    r[0].file);
632		return;
633	}
634
635	/*
636	 * In man(1) mode, show one of the pages
637	 * even if more than one is found.
638	 */
639
640	iuse = 0;
641	if (req->q.equal || sz == 1) {
642		priouse = 20;
643		archpriouse = 3;
644		for (i = 0; i < sz; i++) {
645			sec = r[i].file;
646			sec += strcspn(sec, "123456789");
647			if (sec[0] == '\0')
648				continue;
649			prio = sec_prios[sec[0] - '1'];
650			if (sec[1] != '/')
651				prio += 10;
652			if (req->q.arch == NULL) {
653				archprio =
654				    ((arch = strchr(sec + 1, '/'))
655					== NULL) ? 3 :
656				    ((archend = strchr(arch + 1, '/'))
657					== NULL) ? 0 :
658				    strncmp(arch, "amd64/",
659					archend - arch) ? 2 : 1;
660				if (archprio < archpriouse) {
661					archpriouse = archprio;
662					priouse = prio;
663					iuse = i;
664					continue;
665				}
666				if (archprio > archpriouse)
667					continue;
668			}
669			if (prio >= priouse)
670				continue;
671			priouse = prio;
672			iuse = i;
673		}
674		resp_begin_html(200, NULL, r[iuse].file);
675	} else
676		resp_begin_html(200, NULL, NULL);
677
678	resp_searchform(req,
679	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
680
681	if (sz > 1) {
682		puts("<table class=\"results\">");
683		for (i = 0; i < sz; i++) {
684			printf("  <tr>\n"
685			       "    <td>"
686			       "<a class=\"Xr\" href=\"/");
687			if (*scriptname != '\0')
688				printf("%s/", scriptname);
689			if (strcmp(req->q.manpath, req->p[0]))
690				printf("%s/", req->q.manpath);
691			printf("%s\">", r[i].file);
692			html_print(r[i].names);
693			printf("</a></td>\n"
694			       "    <td><span class=\"Nd\">");
695			html_print(r[i].output);
696			puts("</span></td>\n"
697			     "  </tr>");
698		}
699		puts("</table>");
700	}
701
702	if (req->q.equal || sz == 1) {
703		puts("<hr>");
704		resp_show(req, r[iuse].file);
705	}
706
707	resp_end_html();
708}
709
710static void
711resp_catman(const struct req *req, const char *file)
712{
713	FILE		*f;
714	char		*p;
715	size_t		 sz;
716	ssize_t		 len;
717	int		 i;
718	int		 italic, bold;
719
720	if ((f = fopen(file, "r")) == NULL) {
721		puts("<p>You specified an invalid manual file.</p>");
722		return;
723	}
724
725	puts("<div class=\"catman\">\n"
726	     "<pre>");
727
728	p = NULL;
729	sz = 0;
730
731	while ((len = getline(&p, &sz, f)) != -1) {
732		bold = italic = 0;
733		for (i = 0; i < len - 1; i++) {
734			/*
735			 * This means that the catpage is out of state.
736			 * Ignore it and keep going (although the
737			 * catpage is bogus).
738			 */
739
740			if ('\b' == p[i] || '\n' == p[i])
741				continue;
742
743			/*
744			 * Print a regular character.
745			 * Close out any bold/italic scopes.
746			 * If we're in back-space mode, make sure we'll
747			 * have something to enter when we backspace.
748			 */
749
750			if ('\b' != p[i + 1]) {
751				if (italic)
752					printf("</i>");
753				if (bold)
754					printf("</b>");
755				italic = bold = 0;
756				html_putchar(p[i]);
757				continue;
758			} else if (i + 2 >= len)
759				continue;
760
761			/* Italic mode. */
762
763			if ('_' == p[i]) {
764				if (bold)
765					printf("</b>");
766				if ( ! italic)
767					printf("<i>");
768				bold = 0;
769				italic = 1;
770				i += 2;
771				html_putchar(p[i]);
772				continue;
773			}
774
775			/*
776			 * Handle funny behaviour troff-isms.
777			 * These grok'd from the original man2html.c.
778			 */
779
780			if (('+' == p[i] && 'o' == p[i + 2]) ||
781					('o' == p[i] && '+' == p[i + 2]) ||
782					('|' == p[i] && '=' == p[i + 2]) ||
783					('=' == p[i] && '|' == p[i + 2]) ||
784					('*' == p[i] && '=' == p[i + 2]) ||
785					('=' == p[i] && '*' == p[i + 2]) ||
786					('*' == p[i] && '|' == p[i + 2]) ||
787					('|' == p[i] && '*' == p[i + 2]))  {
788				if (italic)
789					printf("</i>");
790				if (bold)
791					printf("</b>");
792				italic = bold = 0;
793				putchar('*');
794				i += 2;
795				continue;
796			} else if (('|' == p[i] && '-' == p[i + 2]) ||
797					('-' == p[i] && '|' == p[i + 1]) ||
798					('+' == p[i] && '-' == p[i + 1]) ||
799					('-' == p[i] && '+' == p[i + 1]) ||
800					('+' == p[i] && '|' == p[i + 1]) ||
801					('|' == p[i] && '+' == p[i + 1]))  {
802				if (italic)
803					printf("</i>");
804				if (bold)
805					printf("</b>");
806				italic = bold = 0;
807				putchar('+');
808				i += 2;
809				continue;
810			}
811
812			/* Bold mode. */
813
814			if (italic)
815				printf("</i>");
816			if ( ! bold)
817				printf("<b>");
818			bold = 1;
819			italic = 0;
820			i += 2;
821			html_putchar(p[i]);
822		}
823
824		/*
825		 * Clean up the last character.
826		 * We can get to a newline; don't print that.
827		 */
828
829		if (italic)
830			printf("</i>");
831		if (bold)
832			printf("</b>");
833
834		if (i == len - 1 && p[i] != '\n')
835			html_putchar(p[i]);
836
837		putchar('\n');
838	}
839	free(p);
840
841	puts("</pre>\n"
842	     "</div>");
843
844	fclose(f);
845}
846
847static void
848resp_format(const struct req *req, const char *file)
849{
850	struct manoutput conf;
851	struct mparse	*mp;
852	struct roff_meta *meta;
853	void		*vp;
854	int		 fd;
855	int		 usepath;
856
857	if (-1 == (fd = open(file, O_RDONLY, 0))) {
858		puts("<p>You specified an invalid manual file.</p>");
859		return;
860	}
861
862	mchars_alloc();
863	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
864	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
865	mparse_readfd(mp, fd, file);
866	close(fd);
867	meta = mparse_result(mp);
868
869	memset(&conf, 0, sizeof(conf));
870	conf.fragment = 1;
871	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
872	usepath = strcmp(req->q.manpath, req->p[0]);
873	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
874	    scriptname, *scriptname == '\0' ? "" : "/",
875	    usepath ? req->q.manpath : "", usepath ? "/" : "");
876
877	vp = html_alloc(&conf);
878	if (meta->macroset == MACROSET_MDOC)
879		html_mdoc(vp, meta);
880	else
881		html_man(vp, meta);
882
883	html_free(vp);
884	mparse_free(mp);
885	mchars_free();
886	free(conf.man);
887	free(conf.style);
888}
889
890static void
891resp_show(const struct req *req, const char *file)
892{
893
894	if ('.' == file[0] && '/' == file[1])
895		file += 2;
896
897	if ('c' == *file)
898		resp_catman(req, file);
899	else
900		resp_format(req, file);
901}
902
903static void
904pg_show(struct req *req, const char *fullpath)
905{
906	char		*manpath;
907	const char	*file;
908
909	if ((file = strchr(fullpath, '/')) == NULL) {
910		pg_error_badrequest(
911		    "You did not specify a page to show.");
912		return;
913	}
914	manpath = mandoc_strndup(fullpath, file - fullpath);
915	file++;
916
917	if ( ! validate_manpath(req, manpath)) {
918		pg_error_badrequest(
919		    "You specified an invalid manpath.");
920		free(manpath);
921		return;
922	}
923
924	/*
925	 * Begin by chdir()ing into the manpath.
926	 * This way we can pick up the database files, which are
927	 * relative to the manpath root.
928	 */
929
930	if (chdir(manpath) == -1) {
931		warn("chdir %s", manpath);
932		pg_error_internal();
933		free(manpath);
934		return;
935	}
936	free(manpath);
937
938	if ( ! validate_filename(file)) {
939		pg_error_badrequest(
940		    "You specified an invalid manual file.");
941		return;
942	}
943
944	resp_begin_html(200, NULL, file);
945	resp_searchform(req, FOCUS_NONE);
946	resp_show(req, file);
947	resp_end_html();
948}
949
950static void
951pg_search(const struct req *req)
952{
953	struct mansearch	  search;
954	struct manpaths		  paths;
955	struct manpage		 *res;
956	char			**argv;
957	char			 *query, *rp, *wp;
958	size_t			  ressz;
959	int			  argc;
960
961	/*
962	 * Begin by chdir()ing into the root of the manpath.
963	 * This way we can pick up the database files, which are
964	 * relative to the manpath root.
965	 */
966
967	if (chdir(req->q.manpath) == -1) {
968		warn("chdir %s", req->q.manpath);
969		pg_error_internal();
970		return;
971	}
972
973	search.arch = req->q.arch;
974	search.sec = req->q.sec;
975	search.outkey = "Nd";
976	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
977	search.firstmatch = 1;
978
979	paths.sz = 1;
980	paths.paths = mandoc_malloc(sizeof(char *));
981	paths.paths[0] = mandoc_strdup(".");
982
983	/*
984	 * Break apart at spaces with backslash-escaping.
985	 */
986
987	argc = 0;
988	argv = NULL;
989	rp = query = mandoc_strdup(req->q.query);
990	for (;;) {
991		while (isspace((unsigned char)*rp))
992			rp++;
993		if (*rp == '\0')
994			break;
995		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
996		argv[argc++] = wp = rp;
997		for (;;) {
998			if (isspace((unsigned char)*rp)) {
999				*wp = '\0';
1000				rp++;
1001				break;
1002			}
1003			if (rp[0] == '\\' && rp[1] != '\0')
1004				rp++;
1005			if (wp != rp)
1006				*wp = *rp;
1007			if (*rp == '\0')
1008				break;
1009			wp++;
1010			rp++;
1011		}
1012	}
1013
1014	res = NULL;
1015	ressz = 0;
1016	if (req->isquery && req->q.equal && argc == 1)
1017		pg_redirect(req, argv[0]);
1018	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1019		pg_noresult(req, "You entered an invalid query.");
1020	else if (ressz == 0)
1021		pg_noresult(req, "No results found.");
1022	else
1023		pg_searchres(req, res, ressz);
1024
1025	free(query);
1026	mansearch_free(res, ressz);
1027	free(paths.paths[0]);
1028	free(paths.paths);
1029}
1030
1031int
1032main(void)
1033{
1034	struct req	 req;
1035	struct itimerval itimer;
1036	const char	*path;
1037	const char	*querystring;
1038	int		 i;
1039
1040#if HAVE_PLEDGE
1041	/*
1042	 * The "rpath" pledge could be revoked after mparse_readfd()
1043	 * if the file desciptor to "/footer.html" would be opened
1044	 * up front, but it's probably not worth the complication
1045	 * of the code it would cause: it would require scattering
1046	 * pledge() calls in multiple low-level resp_*() functions.
1047	 */
1048
1049	if (pledge("stdio rpath", NULL) == -1) {
1050		warn("pledge");
1051		pg_error_internal();
1052		return EXIT_FAILURE;
1053	}
1054#endif
1055
1056	/* Poor man's ReDoS mitigation. */
1057
1058	itimer.it_value.tv_sec = 2;
1059	itimer.it_value.tv_usec = 0;
1060	itimer.it_interval.tv_sec = 2;
1061	itimer.it_interval.tv_usec = 0;
1062	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1063		warn("setitimer");
1064		pg_error_internal();
1065		return EXIT_FAILURE;
1066	}
1067
1068	/*
1069	 * First we change directory into the MAN_DIR so that
1070	 * subsequent scanning for manpath directories is rooted
1071	 * relative to the same position.
1072	 */
1073
1074	if (chdir(MAN_DIR) == -1) {
1075		warn("MAN_DIR: %s", MAN_DIR);
1076		pg_error_internal();
1077		return EXIT_FAILURE;
1078	}
1079
1080	memset(&req, 0, sizeof(struct req));
1081	req.q.equal = 1;
1082	parse_manpath_conf(&req);
1083
1084	/* Parse the path info and the query string. */
1085
1086	if ((path = getenv("PATH_INFO")) == NULL)
1087		path = "";
1088	else if (*path == '/')
1089		path++;
1090
1091	if (*path != '\0') {
1092		parse_path_info(&req, path);
1093		if (req.q.manpath == NULL || req.q.sec == NULL ||
1094		    *req.q.query == '\0' || access(path, F_OK) == -1)
1095			path = "";
1096	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1097		parse_query_string(&req, querystring);
1098
1099	/* Validate parsed data and add defaults. */
1100
1101	if (req.q.manpath == NULL)
1102		req.q.manpath = mandoc_strdup(req.p[0]);
1103	else if ( ! validate_manpath(&req, req.q.manpath)) {
1104		pg_error_badrequest(
1105		    "You specified an invalid manpath.");
1106		return EXIT_FAILURE;
1107	}
1108
1109	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1110		pg_error_badrequest(
1111		    "You specified an invalid architecture.");
1112		return EXIT_FAILURE;
1113	}
1114
1115	/* Dispatch to the three different pages. */
1116
1117	if ('\0' != *path)
1118		pg_show(&req, path);
1119	else if (NULL != req.q.query)
1120		pg_search(&req);
1121	else
1122		pg_index(&req);
1123
1124	free(req.q.manpath);
1125	free(req.q.arch);
1126	free(req.q.sec);
1127	free(req.q.query);
1128	for (i = 0; i < (int)req.psz; i++)
1129		free(req.p[i]);
1130	free(req.p);
1131	return EXIT_SUCCESS;
1132}
1133
1134/*
1135 * Translate PATH_INFO to a query.
1136 */
1137static void
1138parse_path_info(struct req *req, const char *path)
1139{
1140	const char	*name, *sec, *end;
1141
1142	req->isquery = 0;
1143	req->q.equal = 1;
1144	req->q.manpath = NULL;
1145	req->q.arch = NULL;
1146
1147	/* Mandatory manual page name. */
1148	if ((name = strrchr(path, '/')) == NULL)
1149		name = path;
1150	else
1151		name++;
1152
1153	/* Optional trailing section. */
1154	sec = strrchr(name, '.');
1155	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1156		req->q.query = mandoc_strndup(name, sec - name - 1);
1157		req->q.sec = mandoc_strdup(sec);
1158	} else {
1159		req->q.query = mandoc_strdup(name);
1160		req->q.sec = NULL;
1161	}
1162
1163	/* Handle the case of name[.section] only. */
1164	if (name == path)
1165		return;
1166
1167	/* Optional manpath. */
1168	end = strchr(path, '/');
1169	req->q.manpath = mandoc_strndup(path, end - path);
1170	if (validate_manpath(req, req->q.manpath)) {
1171		path = end + 1;
1172		if (name == path)
1173			return;
1174	} else {
1175		free(req->q.manpath);
1176		req->q.manpath = NULL;
1177	}
1178
1179	/* Optional section. */
1180	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1181		path += 3;
1182		end = strchr(path, '/');
1183		free(req->q.sec);
1184		req->q.sec = mandoc_strndup(path, end - path);
1185		path = end + 1;
1186		if (name == path)
1187			return;
1188	}
1189
1190	/* Optional architecture. */
1191	end = strchr(path, '/');
1192	if (end + 1 != name) {
1193		pg_error_badrequest(
1194		    "You specified too many directory components.");
1195		exit(EXIT_FAILURE);
1196	}
1197	req->q.arch = mandoc_strndup(path, end - path);
1198	if (validate_arch(req->q.arch) == 0) {
1199		pg_error_badrequest(
1200		    "You specified an invalid directory component.");
1201		exit(EXIT_FAILURE);
1202	}
1203}
1204
1205/*
1206 * Scan for indexable paths.
1207 */
1208static void
1209parse_manpath_conf(struct req *req)
1210{
1211	FILE	*fp;
1212	char	*dp;
1213	size_t	 dpsz;
1214	ssize_t	 len;
1215
1216	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1217		warn("%s/manpath.conf", MAN_DIR);
1218		pg_error_internal();
1219		exit(EXIT_FAILURE);
1220	}
1221
1222	dp = NULL;
1223	dpsz = 0;
1224
1225	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1226		if (dp[len - 1] == '\n')
1227			dp[--len] = '\0';
1228		req->p = mandoc_realloc(req->p,
1229		    (req->psz + 1) * sizeof(char *));
1230		if ( ! validate_urifrag(dp)) {
1231			warnx("%s/manpath.conf contains "
1232			    "unsafe path \"%s\"", MAN_DIR, dp);
1233			pg_error_internal();
1234			exit(EXIT_FAILURE);
1235		}
1236		if (strchr(dp, '/') != NULL) {
1237			warnx("%s/manpath.conf contains "
1238			    "path with slash \"%s\"", MAN_DIR, dp);
1239			pg_error_internal();
1240			exit(EXIT_FAILURE);
1241		}
1242		req->p[req->psz++] = dp;
1243		dp = NULL;
1244		dpsz = 0;
1245	}
1246	free(dp);
1247
1248	if (req->p == NULL) {
1249		warnx("%s/manpath.conf is empty", MAN_DIR);
1250		pg_error_internal();
1251		exit(EXIT_FAILURE);
1252	}
1253}
1254