1/*	$Id: http.c,v 1.17 2023/04/19 12:58:16 jsg Exp $ */
2/*
3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/types.h>
19#include <sys/socket.h>
20
21#include <arpa/inet.h>
22#include <netinet/in.h>
23
24#include <ctype.h>
25#include <err.h>
26#include <limits.h>
27#include <netdb.h>
28#include <stdio.h>
29#include <stdint.h>
30#include <stdlib.h>
31#include <string.h>
32#include <tls.h>
33#include <unistd.h>
34
35#include "http.h"
36
37/*
38 * A buffer for transferring HTTP/S data.
39 */
40struct	httpxfer {
41	char		*hbuf;    /* header transfer buffer */
42	size_t		 hbufsz;  /* header buffer size */
43	int		 headok;  /* header has been parsed */
44	char		*bbuf;    /* body transfer buffer */
45	size_t		 bbufsz;  /* body buffer size */
46	int		 bodyok;  /* body has been parsed */
47	char		*headbuf; /* lookaside buffer for headers */
48	struct httphead	*head;    /* parsed headers */
49	size_t		 headsz;  /* number of headers */
50};
51
52/*
53 * An HTTP/S connection object.
54 */
55struct	http {
56	int		   fd;     /* connected socket */
57	short		   port;   /* port number */
58	struct source	   src;    /* endpoint (raw) host */
59	char		  *path;   /* path to request */
60	char		  *host;   /* name of endpoint host */
61	struct tls	  *ctx;    /* if TLS */
62	writefp		   writer; /* write function */
63	readfp		   reader; /* read function */
64};
65
66struct tls_config *tlscfg;
67
68static ssize_t
69dosysread(char *buf, size_t sz, const struct http *http)
70{
71	ssize_t	 rc;
72
73	rc = read(http->fd, buf, sz);
74	if (rc == -1)
75		warn("%s: read", http->src.ip);
76	return rc;
77}
78
79static ssize_t
80dosyswrite(const void *buf, size_t sz, const struct http *http)
81{
82	ssize_t	 rc;
83
84	rc = write(http->fd, buf, sz);
85	if (rc == -1)
86		warn("%s: write", http->src.ip);
87	return rc;
88}
89
90static ssize_t
91dotlsread(char *buf, size_t sz, const struct http *http)
92{
93	ssize_t	 rc;
94
95	do {
96		rc = tls_read(http->ctx, buf, sz);
97	} while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT);
98
99	if (rc == -1)
100		warnx("%s: tls_read: %s", http->src.ip,
101		    tls_error(http->ctx));
102	return rc;
103}
104
105static ssize_t
106dotlswrite(const void *buf, size_t sz, const struct http *http)
107{
108	ssize_t	 rc;
109
110	do {
111		rc = tls_write(http->ctx, buf, sz);
112	} while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT);
113
114	if (rc == -1)
115		warnx("%s: tls_write: %s", http->src.ip,
116		    tls_error(http->ctx));
117	return rc;
118}
119
120int
121http_init(void)
122{
123	if (tlscfg != NULL)
124		return 0;
125
126	tlscfg = tls_config_new();
127	if (tlscfg == NULL) {
128		warn("tls_config_new");
129		goto err;
130	}
131
132	if (tls_config_set_ca_file(tlscfg, tls_default_ca_cert_file()) == -1) {
133		warn("tls_config_set_ca_file: %s", tls_config_error(tlscfg));
134		goto err;
135	}
136
137	return 0;
138
139 err:
140	tls_config_free(tlscfg);
141	tlscfg = NULL;
142
143	return -1;
144}
145
146static ssize_t
147http_read(char *buf, size_t sz, const struct http *http)
148{
149	ssize_t	 ssz, xfer;
150
151	xfer = 0;
152	do {
153		if ((ssz = http->reader(buf, sz, http)) < 0)
154			return -1;
155		if (ssz == 0)
156			break;
157		xfer += ssz;
158		sz -= ssz;
159		buf += ssz;
160	} while (ssz > 0 && sz > 0);
161
162	return xfer;
163}
164
165static int
166http_write(const char *buf, size_t sz, const struct http *http)
167{
168	ssize_t	 ssz, xfer;
169
170	xfer = sz;
171	while (sz > 0) {
172		if ((ssz = http->writer(buf, sz, http)) < 0)
173			return -1;
174		sz -= ssz;
175		buf += (size_t)ssz;
176	}
177	return xfer;
178}
179
180void
181http_disconnect(struct http *http)
182{
183	int rc;
184
185	if (http->ctx != NULL) {
186		/* TLS connection. */
187		do {
188			rc = tls_close(http->ctx);
189		} while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT);
190
191		if (rc < 0)
192			warnx("%s: tls_close: %s", http->src.ip,
193			    tls_error(http->ctx));
194
195		tls_free(http->ctx);
196	}
197	if (http->fd != -1) {
198		if (close(http->fd) == -1)
199			warn("%s: close", http->src.ip);
200	}
201
202	http->fd = -1;
203	http->ctx = NULL;
204}
205
206void
207http_free(struct http *http)
208{
209
210	if (http == NULL)
211		return;
212	http_disconnect(http);
213	free(http->host);
214	free(http->path);
215	free(http->src.ip);
216	free(http);
217}
218
219struct http *
220http_alloc(const struct source *addrs, size_t addrsz,
221    const char *host, short port, const char *path)
222{
223	struct sockaddr_storage ss;
224	int		 family, fd, c;
225	socklen_t	 len;
226	size_t		 cur, i = 0;
227	struct http	*http;
228
229	/* Do this while we still have addresses to connect. */
230again:
231	if (i == addrsz)
232		return NULL;
233	cur = i++;
234
235	/* Convert to PF_INET or PF_INET6 address from string. */
236
237	memset(&ss, 0, sizeof(struct sockaddr_storage));
238
239	if (addrs[cur].family == 4) {
240		family = PF_INET;
241		((struct sockaddr_in *)&ss)->sin_family = AF_INET;
242		((struct sockaddr_in *)&ss)->sin_port = htons(port);
243		c = inet_pton(AF_INET, addrs[cur].ip,
244		    &((struct sockaddr_in *)&ss)->sin_addr);
245		len = sizeof(struct sockaddr_in);
246	} else if (addrs[cur].family == 6) {
247		family = PF_INET6;
248		((struct sockaddr_in6 *)&ss)->sin6_family = AF_INET6;
249		((struct sockaddr_in6 *)&ss)->sin6_port = htons(port);
250		c = inet_pton(AF_INET6, addrs[cur].ip,
251		    &((struct sockaddr_in6 *)&ss)->sin6_addr);
252		len = sizeof(struct sockaddr_in6);
253	} else {
254		warnx("%s: unknown family", addrs[cur].ip);
255		goto again;
256	}
257
258	if (c < 0) {
259		warn("%s: inet_ntop", addrs[cur].ip);
260		goto again;
261	} else if (c == 0) {
262		warnx("%s: inet_ntop", addrs[cur].ip);
263		goto again;
264	}
265
266	/* Create socket and connect. */
267
268	fd = socket(family, SOCK_STREAM, 0);
269	if (fd == -1) {
270		warn("%s: socket", addrs[cur].ip);
271		goto again;
272	} else if (connect(fd, (struct sockaddr *)&ss, len) == -1) {
273		warn("%s: connect", addrs[cur].ip);
274		close(fd);
275		goto again;
276	}
277
278	/* Allocate the communicator. */
279
280	http = calloc(1, sizeof(struct http));
281	if (http == NULL) {
282		warn("calloc");
283		close(fd);
284		return NULL;
285	}
286	http->fd = fd;
287	http->port = port;
288	http->src.family = addrs[cur].family;
289	http->src.ip = strdup(addrs[cur].ip);
290	http->host = strdup(host);
291	http->path = strdup(path);
292	if (http->src.ip == NULL || http->host == NULL || http->path == NULL) {
293		warn("strdup");
294		goto err;
295	}
296
297	/* If necessary, do our TLS setup. */
298
299	if (port != 443) {
300		http->writer = dosyswrite;
301		http->reader = dosysread;
302		return http;
303	}
304
305	http->writer = dotlswrite;
306	http->reader = dotlsread;
307
308	if ((http->ctx = tls_client()) == NULL) {
309		warn("tls_client");
310		goto err;
311	} else if (tls_configure(http->ctx, tlscfg) == -1) {
312		warnx("%s: tls_configure: %s",
313			http->src.ip, tls_error(http->ctx));
314		goto err;
315	}
316
317	if (tls_connect_socket(http->ctx, http->fd, http->host) != 0) {
318		warnx("%s: tls_connect_socket: %s, %s", http->src.ip,
319		    http->host, tls_error(http->ctx));
320		goto err;
321	}
322
323	return http;
324err:
325	http_free(http);
326	return NULL;
327}
328
329struct httpxfer *
330http_open(const struct http *http, const void *p, size_t psz)
331{
332	char		*req;
333	int		 c;
334	struct httpxfer	*trans;
335
336	if (p == NULL) {
337		c = asprintf(&req,
338		    "GET %s HTTP/1.0\r\n"
339		    "Host: %s\r\n"
340		    "\r\n",
341		    http->path, http->host);
342	} else {
343		c = asprintf(&req,
344		    "POST %s HTTP/1.0\r\n"
345		    "Host: %s\r\n"
346		    "Content-Type: application/ocsp-request\r\n"
347		    "Content-Length: %zu\r\n"
348		    "\r\n",
349		    http->path, http->host, psz);
350	}
351	if (c == -1) {
352		warn("asprintf");
353		return NULL;
354	} else if (!http_write(req, c, http)) {
355		free(req);
356		return NULL;
357	} else if (p != NULL && !http_write(p, psz, http)) {
358		free(req);
359		return NULL;
360	}
361
362	free(req);
363
364	trans = calloc(1, sizeof(struct httpxfer));
365	if (trans == NULL)
366		warn("calloc");
367	return trans;
368}
369
370void
371http_close(struct httpxfer *x)
372{
373
374	if (x == NULL)
375		return;
376	free(x->hbuf);
377	free(x->bbuf);
378	free(x->headbuf);
379	free(x->head);
380	free(x);
381}
382
383/*
384 * Read the HTTP body from the wire.
385 * If invoked multiple times, this will return the same pointer with the
386 * same data (or NULL, if the original invocation returned NULL).
387 * Returns NULL if read or allocation errors occur.
388 * You must not free the returned pointer.
389 */
390char *
391http_body_read(const struct http *http, struct httpxfer *trans, size_t *sz)
392{
393	char		 buf[BUFSIZ];
394	ssize_t		 ssz;
395	void		*pp;
396	size_t		 szp;
397
398	if (sz == NULL)
399		sz = &szp;
400
401	/* Have we already parsed this? */
402
403	if (trans->bodyok > 0) {
404		*sz = trans->bbufsz;
405		return trans->bbuf;
406	} else if (trans->bodyok < 0)
407		return NULL;
408
409	*sz = 0;
410	trans->bodyok = -1;
411
412	do {
413		/* If less than sizeof(buf), at EOF. */
414		if ((ssz = http_read(buf, sizeof(buf), http)) < 0)
415			return NULL;
416		else if (ssz == 0)
417			break;
418
419		pp = recallocarray(trans->bbuf,
420		    trans->bbufsz, trans->bbufsz + ssz, 1);
421		if (pp == NULL) {
422			warn("recallocarray");
423			return NULL;
424		}
425		trans->bbuf = pp;
426		memcpy(trans->bbuf + trans->bbufsz, buf, ssz);
427		trans->bbufsz += ssz;
428	} while (ssz == sizeof(buf));
429
430	trans->bodyok = 1;
431	*sz = trans->bbufsz;
432	return trans->bbuf;
433}
434
435struct httphead *
436http_head_get(const char *v, struct httphead *h, size_t hsz)
437{
438	size_t	 i;
439
440	for (i = 0; i < hsz; i++) {
441		if (strcmp(h[i].key, v))
442			continue;
443		return &h[i];
444	}
445	return NULL;
446}
447
448/*
449 * Look through the headers and determine our HTTP code.
450 * This will return -1 on failure, otherwise the code.
451 */
452int
453http_head_status(const struct http *http, struct httphead *h, size_t sz)
454{
455	int		 rc;
456	unsigned int	 code;
457	struct httphead *st;
458
459	if ((st = http_head_get("Status", h, sz)) == NULL) {
460		warnx("%s: no status header", http->src.ip);
461		return -1;
462	}
463
464	rc = sscanf(st->val, "%*s %u %*s", &code);
465	if (rc < 0) {
466		warn("sscanf");
467		return -1;
468	} else if (rc != 1) {
469		warnx("%s: cannot convert status header", http->src.ip);
470		return -1;
471	}
472	return code;
473}
474
475/*
476 * Parse headers from the transfer.
477 * Malformed headers are skipped.
478 * A special "Status" header is added for the HTTP status line.
479 * This can only happen once http_head_read has been called with
480 * success.
481 * This can be invoked multiple times: it will only parse the headers
482 * once and after that it will just return the cache.
483 * You must not free the returned pointer.
484 * If the original header parse failed, or if memory allocation fails
485 * internally, this returns NULL.
486 */
487struct httphead *
488http_head_parse(const struct http *http, struct httpxfer *trans, size_t *sz)
489{
490	size_t		 hsz, szp;
491	struct httphead	*h;
492	char		*cp, *ep, *ccp, *buf;
493
494	if (sz == NULL)
495		sz = &szp;
496
497	/*
498	 * If we've already parsed the headers, return the
499	 * previously-parsed buffer now.
500	 * If we have errors on the stream, return NULL now.
501	 */
502
503	if (trans->head != NULL) {
504		*sz = trans->headsz;
505		return trans->head;
506	} else if (trans->headok <= 0)
507		return NULL;
508
509	if ((buf = strdup(trans->hbuf)) == NULL) {
510		warn("strdup");
511		return NULL;
512	}
513	hsz = 0;
514	cp = buf;
515
516	do {
517		if ((cp = strstr(cp, "\r\n")) != NULL)
518			cp += 2;
519		hsz++;
520	} while (cp != NULL);
521
522	/*
523	 * Allocate headers, then step through the data buffer, parsing
524	 * out headers as we have them.
525	 * We know at this point that the buffer is NUL-terminated in
526	 * the usual way.
527	 */
528
529	h = calloc(hsz, sizeof(struct httphead));
530	if (h == NULL) {
531		warn("calloc");
532		free(buf);
533		return NULL;
534	}
535
536	*sz = hsz;
537	hsz = 0;
538	cp = buf;
539
540	do {
541		if ((ep = strstr(cp, "\r\n")) != NULL) {
542			*ep = '\0';
543			ep += 2;
544		}
545		if (hsz == 0) {
546			h[hsz].key = "Status";
547			h[hsz++].val = cp;
548			continue;
549		}
550
551		/* Skip bad headers. */
552		if ((ccp = strchr(cp, ':')) == NULL) {
553			warnx("%s: header without separator", http->src.ip);
554			continue;
555		}
556
557		*ccp++ = '\0';
558		while (isspace((unsigned char)*ccp))
559			ccp++;
560		h[hsz].key = cp;
561		h[hsz++].val = ccp;
562	} while ((cp = ep) != NULL);
563
564	trans->headbuf = buf;
565	trans->head = h;
566	trans->headsz = hsz;
567	return h;
568}
569
570/*
571 * Read the HTTP headers from the wire.
572 * If invoked multiple times, this will return the same pointer with the
573 * same data (or NULL, if the original invocation returned NULL).
574 * Returns NULL if read or allocation errors occur.
575 * You must not free the returned pointer.
576 */
577char *
578http_head_read(const struct http *http, struct httpxfer *trans, size_t *sz)
579{
580	char		 buf[BUFSIZ];
581	ssize_t		 ssz;
582	char		*ep;
583	void		*pp;
584	size_t		 szp;
585
586	if (sz == NULL)
587		sz = &szp;
588
589	/* Have we already parsed this? */
590
591	if (trans->headok > 0) {
592		*sz = trans->hbufsz;
593		return trans->hbuf;
594	} else if (trans->headok < 0)
595		return NULL;
596
597	*sz = 0;
598	ep = NULL;
599	trans->headok = -1;
600
601	/*
602	 * Begin by reading by BUFSIZ blocks until we reach the header
603	 * termination marker (two CRLFs).
604	 * We might read into our body, but that's ok: we'll copy out
605	 * the body parts into our body buffer afterward.
606	 */
607
608	do {
609		/* If less than sizeof(buf), at EOF. */
610		if ((ssz = http_read(buf, sizeof(buf), http)) < 0)
611			return NULL;
612		else if (ssz == 0)
613			break;
614		pp = realloc(trans->hbuf, trans->hbufsz + ssz);
615		if (pp == NULL) {
616			warn("realloc");
617			return NULL;
618		}
619		trans->hbuf = pp;
620		memcpy(trans->hbuf + trans->hbufsz, buf, ssz);
621		trans->hbufsz += ssz;
622		/* Search for end of headers marker. */
623		ep = memmem(trans->hbuf, trans->hbufsz, "\r\n\r\n", 4);
624	} while (ep == NULL && ssz == sizeof(buf));
625
626	if (ep == NULL) {
627		warnx("%s: partial transfer", http->src.ip);
628		return NULL;
629	}
630	*ep = '\0';
631
632	/*
633	 * The header data is invalid if it has any binary characters in
634	 * it: check that now.
635	 * This is important because we want to guarantee that all
636	 * header keys and pairs are properly NUL-terminated.
637	 */
638
639	if (strlen(trans->hbuf) != (uintptr_t)(ep - trans->hbuf)) {
640		warnx("%s: binary data in header", http->src.ip);
641		return NULL;
642	}
643
644	/*
645	 * Copy remaining buffer into body buffer.
646	 */
647
648	ep += 4;
649	trans->bbufsz = (trans->hbuf + trans->hbufsz) - ep;
650	trans->bbuf = malloc(trans->bbufsz);
651	if (trans->bbuf == NULL) {
652		warn("malloc");
653		return NULL;
654	}
655	memcpy(trans->bbuf, ep, trans->bbufsz);
656
657	trans->headok = 1;
658	*sz = trans->hbufsz;
659	return trans->hbuf;
660}
661
662void
663http_get_free(struct httpget *g)
664{
665
666	if (g == NULL)
667		return;
668	http_close(g->xfer);
669	http_free(g->http);
670	free(g);
671}
672
673struct httpget *
674http_get(const struct source *addrs, size_t addrsz, const char *domain,
675    short port, const char *path, const void *post, size_t postsz)
676{
677	struct http	*h;
678	struct httpxfer	*x;
679	struct httpget	*g;
680	struct httphead	*head;
681	size_t		 headsz, bodsz, headrsz;
682	int		 code;
683	char		*bod, *headr;
684
685	h = http_alloc(addrs, addrsz, domain, port, path);
686	if (h == NULL)
687		return NULL;
688
689	if ((x = http_open(h, post, postsz)) == NULL) {
690		http_free(h);
691		return NULL;
692	} else if ((headr = http_head_read(h, x, &headrsz)) == NULL) {
693		http_close(x);
694		http_free(h);
695		return NULL;
696	} else if ((bod = http_body_read(h, x, &bodsz)) == NULL) {
697		http_close(x);
698		http_free(h);
699		return NULL;
700	}
701
702	http_disconnect(h);
703
704	if ((head = http_head_parse(h, x, &headsz)) == NULL) {
705		http_close(x);
706		http_free(h);
707		return NULL;
708	} else if ((code = http_head_status(h, head, headsz)) < 0) {
709		http_close(x);
710		http_free(h);
711		return NULL;
712	}
713
714	if ((g = calloc(1, sizeof(struct httpget))) == NULL) {
715		warn("calloc");
716		http_close(x);
717		http_free(h);
718		return NULL;
719	}
720
721	g->headpart = headr;
722	g->headpartsz = headrsz;
723	g->bodypart = bod;
724	g->bodypartsz = bodsz;
725	g->head = head;
726	g->headsz = headsz;
727	g->code = code;
728	g->xfer = x;
729	g->http = h;
730	return g;
731}
732
733#if 0
734int
735main(void)
736{
737	struct httpget	*g;
738	struct httphead	*httph;
739	size_t		 i, httphsz;
740	struct source	 addrs[2];
741	size_t		 addrsz;
742
743#if 0
744	addrs[0].ip = "127.0.0.1";
745	addrs[0].family = 4;
746	addrsz = 1;
747#else
748	addrs[0].ip = "2a00:1450:400a:806::2004";
749	addrs[0].family = 6;
750	addrs[1].ip = "193.135.3.123";
751	addrs[1].family = 4;
752	addrsz = 2;
753#endif
754
755	if (http_init() == -1)
756		errx(EXIT_FAILURE, "http_init");
757
758#if 0
759	g = http_get(addrs, addrsz, "localhost", 80, "/index.html");
760#else
761	g = http_get(addrs, addrsz, "www.google.ch", 80, "/index.html",
762	    NULL, 0);
763#endif
764
765	if (g == NULL)
766		errx(EXIT_FAILURE, "http_get");
767
768	httph = http_head_parse(g->http, g->xfer, &httphsz);
769	warnx("code: %d", g->code);
770
771	for (i = 0; i < httphsz; i++)
772		warnx("head: [%s]=[%s]", httph[i].key, httph[i].val);
773
774	http_get_free(g);
775	return (EXIT_SUCCESS);
776}
777#endif
778