http.c revision 1.18
1/*	$Id: http.c,v 1.18 2017/01/24 13:32:55 jsing Exp $ */
2/*
3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/socket.h>
19#include <sys/param.h>
20#include <arpa/inet.h>
21
22#include <ctype.h>
23#include <err.h>
24#include <limits.h>
25#include <netdb.h>
26#include <stdio.h>
27#include <stdint.h>
28#include <stdlib.h>
29#include <string.h>
30#include <tls.h>
31#include <unistd.h>
32
33#include "http.h"
34#include "extern.h"
35
36#define DEFAULT_CA_FILE "/etc/ssl/cert.pem"
37
38/*
39 * A buffer for transferring HTTP/S data.
40 */
41struct	httpxfer {
42	char		*hbuf;    /* header transfer buffer */
43	size_t		 hbufsz;  /* header buffer size */
44	int		 headok;  /* header has been parsed */
45	char		*bbuf;    /* body transfer buffer */
46	size_t		 bbufsz;  /* body buffer size */
47	int		 bodyok;  /* body has been parsed */
48	char		*headbuf; /* lookaside buffer for headers */
49	struct httphead	*head;    /* parsed headers */
50	size_t		 headsz;  /* number of headers */
51};
52
53/*
54 * An HTTP/S connection object.
55 */
56struct	http {
57	int		   fd;     /* connected socket */
58	short		   port;   /* port number */
59	struct source	   src;    /* endpoint (raw) host */
60	char		  *path;   /* path to request */
61	char		  *host;   /* name of endpoint host */
62	struct tls	  *ctx;    /* if TLS */
63	writefp		   writer; /* write function */
64	readfp		   reader; /* read function */
65};
66
67struct tls_config *tlscfg;
68
69static ssize_t
70dosysread(char *buf, size_t sz, const struct http *http)
71{
72	ssize_t	 rc;
73
74	rc = read(http->fd, buf, sz);
75	if (rc < 0)
76		warn("%s: read", http->src.ip);
77	return rc;
78}
79
80static ssize_t
81dosyswrite(const void *buf, size_t sz, const struct http *http)
82{
83	ssize_t	 rc;
84
85	rc = write(http->fd, buf, sz);
86	if (rc < 0)
87		warn("%s: write", http->src.ip);
88	return rc;
89}
90
91static ssize_t
92dotlsread(char *buf, size_t sz, const struct http *http)
93{
94	ssize_t	 rc;
95
96	do {
97		rc = tls_read(http->ctx, buf, sz);
98	} while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT);
99
100	if (rc < 0)
101		warnx("%s: tls_read: %s", http->src.ip,
102		    tls_error(http->ctx));
103	return rc;
104}
105
106static ssize_t
107dotlswrite(const void *buf, size_t sz, const struct http *http)
108{
109	ssize_t	 rc;
110
111	do {
112		rc = tls_write(http->ctx, buf, sz);
113	} while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT);
114
115	if (rc < 0)
116		warnx("%s: tls_write: %s", http->src.ip,
117		    tls_error(http->ctx));
118	return rc;
119}
120
121int
122http_init()
123{
124	if (tlscfg != NULL)
125		return 0;
126
127	if (tls_init() == -1) {
128		warn("tls_init");
129		goto err;
130	}
131
132	tlscfg = tls_config_new();
133	if (tlscfg == NULL) {
134		warn("tls_config_new");
135		goto err;
136	}
137
138	if (tls_config_set_ca_file(tlscfg, DEFAULT_CA_FILE) == -1) {
139		warn("tls_config_set_ca_file: %s", tls_config_error(tlscfg));
140		goto err;
141	}
142
143	return 0;
144
145 err:
146	tls_config_free(tlscfg);
147	tlscfg = NULL;
148
149	return -1;
150}
151
152static ssize_t
153http_read(char *buf, size_t sz, const struct http *http)
154{
155	ssize_t	 ssz, xfer;
156
157	xfer = 0;
158	do {
159		if ((ssz = http->reader(buf, sz, http)) < 0)
160			return -1;
161		if (ssz == 0)
162			break;
163		xfer += ssz;
164		sz -= ssz;
165		buf += ssz;
166	} while (ssz > 0 && sz > 0);
167
168	return xfer;
169}
170
171static int
172http_write(const char *buf, size_t sz, const struct http *http)
173{
174	ssize_t	 ssz, xfer;
175
176	xfer = sz;
177	while (sz > 0) {
178		if ((ssz = http->writer(buf, sz, http)) < 0)
179			return -1;
180		sz -= ssz;
181		buf += (size_t)ssz;
182	}
183	return xfer;
184}
185
186void
187http_disconnect(struct http *http)
188{
189	int rc;
190
191	if (http->ctx != NULL) {
192		/* TLS connection. */
193		do {
194			rc = tls_close(http->ctx);
195		} while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT);
196
197		if (rc < 0)
198			warnx("%s: tls_close: %s", http->src.ip,
199			    tls_error(http->ctx));
200
201		tls_free(http->ctx);
202	}
203	if (http->fd != -1) {
204		if (close(http->fd) == -1)
205			warn("%s: close", http->src.ip);
206	}
207
208	http->fd = -1;
209	http->ctx = NULL;
210}
211
212void
213http_free(struct http *http)
214{
215
216	if (http == NULL)
217		return;
218	http_disconnect(http);
219	free(http->host);
220	free(http->path);
221	free(http->src.ip);
222	free(http);
223}
224
225struct http *
226http_alloc(const struct source *addrs, size_t addrsz,
227    const char *host, short port, const char *path)
228{
229	struct sockaddr_storage ss;
230	int		 family, fd, c;
231	socklen_t	 len;
232	size_t		 cur, i = 0;
233	struct http	*http;
234
235	/* Do this while we still have addresses to connect. */
236again:
237	if (i == addrsz)
238		return NULL;
239	cur = i++;
240
241	/* Convert to PF_INET or PF_INET6 address from string. */
242
243	memset(&ss, 0, sizeof(struct sockaddr_storage));
244
245	if (addrs[cur].family == 4) {
246		family = PF_INET;
247		((struct sockaddr_in *)&ss)->sin_family = AF_INET;
248		((struct sockaddr_in *)&ss)->sin_port = htons(port);
249		c = inet_pton(AF_INET, addrs[cur].ip,
250		    &((struct sockaddr_in *)&ss)->sin_addr);
251		len = sizeof(struct sockaddr_in);
252	} else if (addrs[cur].family == 6) {
253		family = PF_INET6;
254		((struct sockaddr_in6 *)&ss)->sin6_family = AF_INET6;
255		((struct sockaddr_in6 *)&ss)->sin6_port = htons(port);
256		c = inet_pton(AF_INET6, addrs[cur].ip,
257		    &((struct sockaddr_in6 *)&ss)->sin6_addr);
258		len = sizeof(struct sockaddr_in6);
259	} else {
260		warnx("%s: unknown family", addrs[cur].ip);
261		goto again;
262	}
263
264	if (c < 0) {
265		warn("%s: inet_ntop", addrs[cur].ip);
266		goto again;
267	} else if (c == 0) {
268		warnx("%s: inet_ntop", addrs[cur].ip);
269		goto again;
270	}
271
272	/* Create socket and connect. */
273
274	fd = socket(family, SOCK_STREAM, 0);
275	if (fd == -1) {
276		warn("%s: socket", addrs[cur].ip);
277		goto again;
278	} else if (connect(fd, (struct sockaddr *)&ss, len) == -1) {
279		warn("%s: connect", addrs[cur].ip);
280		close(fd);
281		goto again;
282	}
283
284	/* Allocate the communicator. */
285
286	http = calloc(1, sizeof(struct http));
287	if (http == NULL) {
288		warn("calloc");
289		close(fd);
290		return NULL;
291	}
292	http->fd = fd;
293	http->port = port;
294	http->src.family = addrs[cur].family;
295	http->src.ip = strdup(addrs[cur].ip);
296	http->host = strdup(host);
297	http->path = strdup(path);
298	if (http->src.ip == NULL || http->host == NULL || http->path == NULL) {
299		warn("strdup");
300		goto err;
301	}
302
303	/* If necessary, do our TLS setup. */
304
305	if (port != 443) {
306		http->writer = dosyswrite;
307		http->reader = dosysread;
308		return http;
309	}
310
311	http->writer = dotlswrite;
312	http->reader = dotlsread;
313
314	if ((http->ctx = tls_client()) == NULL) {
315		warn("tls_client");
316		goto err;
317	} else if (tls_configure(http->ctx, tlscfg) == -1) {
318		warnx("%s: tls_configure: %s",
319			http->src.ip, tls_error(http->ctx));
320		goto err;
321	}
322
323	if (tls_connect_socket(http->ctx, http->fd, http->host) != 0) {
324		warnx("%s: tls_connect_socket: %s, %s", http->src.ip,
325		    http->host, tls_error(http->ctx));
326		goto err;
327	}
328
329	return http;
330err:
331	http_free(http);
332	return NULL;
333}
334
335struct httpxfer *
336http_open(const struct http *http, const void *p, size_t psz)
337{
338	char		*req;
339	int		 c;
340	struct httpxfer	*trans;
341
342	if (p == NULL) {
343		c = asprintf(&req,
344		    "GET %s HTTP/1.0\r\n"
345		    "Host: %s\r\n"
346		    "\r\n",
347		    http->path, http->host);
348	} else {
349		c = asprintf(&req,
350		    "POST %s HTTP/1.0\r\n"
351		    "Host: %s\r\n"
352		    "Content-Length: %zu\r\n"
353		    "\r\n",
354		    http->path, http->host, psz);
355	}
356	if (c == -1) {
357		warn("asprintf");
358		return NULL;
359	} else if (!http_write(req, c, http)) {
360		free(req);
361		return NULL;
362	} else if (p != NULL && !http_write(p, psz, http)) {
363		free(req);
364		return NULL;
365	}
366
367	free(req);
368
369	trans = calloc(1, sizeof(struct httpxfer));
370	if (trans == NULL)
371		warn("calloc");
372	return trans;
373}
374
375void
376http_close(struct httpxfer *x)
377{
378
379	if (x == NULL)
380		return;
381	free(x->hbuf);
382	free(x->bbuf);
383	free(x->headbuf);
384	free(x->head);
385	free(x);
386}
387
388/*
389 * Read the HTTP body from the wire.
390 * If invoked multiple times, this will return the same pointer with the
391 * same data (or NULL, if the original invocation returned NULL).
392 * Returns NULL if read or allocation errors occur.
393 * You must not free the returned pointer.
394 */
395char *
396http_body_read(const struct http *http, struct httpxfer *trans, size_t *sz)
397{
398	char		 buf[BUFSIZ];
399	ssize_t		 ssz;
400	void		*pp;
401	size_t		 szp;
402
403	if (sz == NULL)
404		sz = &szp;
405
406	/* Have we already parsed this? */
407
408	if (trans->bodyok > 0) {
409		*sz = trans->bbufsz;
410		return trans->bbuf;
411	} else if (trans->bodyok < 0)
412		return NULL;
413
414	*sz = 0;
415	trans->bodyok = -1;
416
417	do {
418		/* If less than sizeof(buf), at EOF. */
419		if ((ssz = http_read(buf, sizeof(buf), http)) < 0)
420			return NULL;
421		else if (ssz == 0)
422			break;
423		pp = realloc(trans->bbuf, trans->bbufsz + ssz);
424		if (pp == NULL) {
425			warn("realloc");
426			return NULL;
427		}
428		trans->bbuf = pp;
429		memcpy(trans->bbuf + trans->bbufsz, buf, ssz);
430		trans->bbufsz += ssz;
431	} while (ssz == sizeof(buf));
432
433	trans->bodyok = 1;
434	*sz = trans->bbufsz;
435	return trans->bbuf;
436}
437
438struct httphead *
439http_head_get(const char *v, struct httphead *h, size_t hsz)
440{
441	size_t	 i;
442
443	for (i = 0; i < hsz; i++) {
444		if (strcmp(h[i].key, v))
445			continue;
446		return &h[i];
447	}
448	return NULL;
449}
450
451/*
452 * Look through the headers and determine our HTTP code.
453 * This will return -1 on failure, otherwise the code.
454 */
455int
456http_head_status(const struct http *http, struct httphead *h, size_t sz)
457{
458	int		 rc;
459	unsigned int	 code;
460	struct httphead *st;
461
462	if ((st = http_head_get("Status", h, sz)) == NULL) {
463		warnx("%s: no status header", http->src.ip);
464		return -1;
465	}
466
467	rc = sscanf(st->val, "%*s %u %*s", &code);
468	if (rc < 0) {
469		warn("sscanf");
470		return -1;
471	} else if (rc != 1) {
472		warnx("%s: cannot convert status header", http->src.ip);
473		return -1;
474	}
475	return code;
476}
477
478/*
479 * Parse headers from the transfer.
480 * Malformed headers are skipped.
481 * A special "Status" header is added for the HTTP status line.
482 * This can only happen once http_head_read has been called with
483 * success.
484 * This can be invoked multiple times: it will only parse the headers
485 * once and after that it will just return the cache.
486 * You must not free the returned pointer.
487 * If the original header parse failed, or if memory allocation fails
488 * internally, this returns NULL.
489 */
490struct httphead *
491http_head_parse(const struct http *http, struct httpxfer *trans, size_t *sz)
492{
493	size_t		 hsz, szp;
494	struct httphead	*h;
495	char		*cp, *ep, *ccp, *buf;
496
497	if (sz == NULL)
498		sz = &szp;
499
500	/*
501	 * If we've already parsed the headers, return the
502	 * previously-parsed buffer now.
503	 * If we have errors on the stream, return NULL now.
504	 */
505
506	if (trans->head != NULL) {
507		*sz = trans->headsz;
508		return trans->head;
509	} else if (trans->headok <= 0)
510		return NULL;
511
512	if ((buf = strdup(trans->hbuf)) == NULL) {
513		warn("strdup");
514		return NULL;
515	}
516	hsz = 0;
517	cp = buf;
518
519	do {
520		if ((cp = strstr(cp, "\r\n")) != NULL)
521			cp += 2;
522		hsz++;
523	} while (cp != NULL);
524
525	/*
526	 * Allocate headers, then step through the data buffer, parsing
527	 * out headers as we have them.
528	 * We know at this point that the buffer is NUL-terminated in
529	 * the usual way.
530	 */
531
532	h = calloc(hsz, sizeof(struct httphead));
533	if (h == NULL) {
534		warn("calloc");
535		free(buf);
536		return NULL;
537	}
538
539	*sz = hsz;
540	hsz = 0;
541	cp = buf;
542
543	do {
544		if ((ep = strstr(cp, "\r\n")) != NULL) {
545			*ep = '\0';
546			ep += 2;
547		}
548		if (hsz == 0) {
549			h[hsz].key = "Status";
550			h[hsz++].val = cp;
551			continue;
552		}
553
554		/* Skip bad headers. */
555		if ((ccp = strchr(cp, ':')) == NULL) {
556			warnx("%s: header without separator", http->src.ip);
557			continue;
558		}
559
560		*ccp++ = '\0';
561		while (isspace((int)*ccp))
562			ccp++;
563		h[hsz].key = cp;
564		h[hsz++].val = ccp;
565	} while ((cp = ep) != NULL);
566
567	trans->headbuf = buf;
568	trans->head = h;
569	trans->headsz = hsz;
570	return h;
571}
572
573/*
574 * Read the HTTP headers from the wire.
575 * If invoked multiple times, this will return the same pointer with the
576 * same data (or NULL, if the original invocation returned NULL).
577 * Returns NULL if read or allocation errors occur.
578 * You must not free the returned pointer.
579 */
580char *
581http_head_read(const struct http *http, struct httpxfer *trans, size_t *sz)
582{
583	char		 buf[BUFSIZ];
584	ssize_t		 ssz;
585	char		*ep;
586	void		*pp;
587	size_t		 szp;
588
589	if (sz == NULL)
590		sz = &szp;
591
592	/* Have we already parsed this? */
593
594	if (trans->headok > 0) {
595		*sz = trans->hbufsz;
596		return trans->hbuf;
597	} else if (trans->headok < 0)
598		return NULL;
599
600	*sz = 0;
601	ep = NULL;
602	trans->headok = -1;
603
604	/*
605	 * Begin by reading by BUFSIZ blocks until we reach the header
606	 * termination marker (two CRLFs).
607	 * We might read into our body, but that's ok: we'll copy out
608	 * the body parts into our body buffer afterward.
609	 */
610
611	do {
612		/* If less than sizeof(buf), at EOF. */
613		if ((ssz = http_read(buf, sizeof(buf), http)) < 0)
614			return NULL;
615		else if (ssz == 0)
616			break;
617		pp = realloc(trans->hbuf, trans->hbufsz + ssz);
618		if (pp == NULL) {
619			warn("realloc");
620			return NULL;
621		}
622		trans->hbuf = pp;
623		memcpy(trans->hbuf + trans->hbufsz, buf, ssz);
624		trans->hbufsz += ssz;
625		/* Search for end of headers marker. */
626		ep = memmem(trans->hbuf, trans->hbufsz, "\r\n\r\n", 4);
627	} while (ep == NULL && ssz == sizeof(buf));
628
629	if (ep == NULL) {
630		warnx("%s: partial transfer", http->src.ip);
631		return NULL;
632	}
633	*ep = '\0';
634
635	/*
636	 * The header data is invalid if it has any binary characters in
637	 * it: check that now.
638	 * This is important because we want to guarantee that all
639	 * header keys and pairs are properly NUL-terminated.
640	 */
641
642	if (strlen(trans->hbuf) != (uintptr_t)(ep - trans->hbuf)) {
643		warnx("%s: binary data in header", http->src.ip);
644		return NULL;
645	}
646
647	/*
648	 * Copy remaining buffer into body buffer.
649	 */
650
651	ep += 4;
652	trans->bbufsz = (trans->hbuf + trans->hbufsz) - ep;
653	trans->bbuf = malloc(trans->bbufsz);
654	if (trans->bbuf == NULL) {
655		warn("malloc");
656		return NULL;
657	}
658	memcpy(trans->bbuf, ep, trans->bbufsz);
659
660	trans->headok = 1;
661	*sz = trans->hbufsz;
662	return trans->hbuf;
663}
664
665void
666http_get_free(struct httpget *g)
667{
668
669	if (g == NULL)
670		return;
671	http_close(g->xfer);
672	http_free(g->http);
673	free(g);
674}
675
676struct httpget *
677http_get(const struct source *addrs, size_t addrsz, const char *domain,
678    short port, const char *path, const void *post, size_t postsz)
679{
680	struct http	*h;
681	struct httpxfer	*x;
682	struct httpget	*g;
683	struct httphead	*head;
684	size_t		 headsz, bodsz, headrsz;
685	int		 code;
686	char		*bod, *headr;
687
688	h = http_alloc(addrs, addrsz, domain, port, path);
689	if (h == NULL)
690		return NULL;
691
692	if ((x = http_open(h, post, postsz)) == NULL) {
693		http_free(h);
694		return NULL;
695	} else if ((headr = http_head_read(h, x, &headrsz)) == NULL) {
696		http_close(x);
697		http_free(h);
698		return NULL;
699	} else if ((bod = http_body_read(h, x, &bodsz)) == NULL) {
700		http_close(x);
701		http_free(h);
702		return NULL;
703	}
704
705	http_disconnect(h);
706
707	if ((head = http_head_parse(h, x, &headsz)) == NULL) {
708		http_close(x);
709		http_free(h);
710		return NULL;
711	} else if ((code = http_head_status(h, head, headsz)) < 0) {
712		http_close(x);
713		http_free(h);
714		return NULL;
715	}
716
717	if ((g = calloc(1, sizeof(struct httpget))) == NULL) {
718		warn("calloc");
719		http_close(x);
720		http_free(h);
721		return NULL;
722	}
723
724	g->headpart = headr;
725	g->headpartsz = headrsz;
726	g->bodypart = bod;
727	g->bodypartsz = bodsz;
728	g->head = head;
729	g->headsz = headsz;
730	g->code = code;
731	g->xfer = x;
732	g->http = h;
733	return g;
734}
735
736#if 0
737int
738main(void)
739{
740	struct httpget	*g;
741	struct httphead	*httph;
742	size_t		 i, httphsz;
743	struct source	 addrs[2];
744	size_t		 addrsz;
745
746#if 0
747	addrs[0].ip = "127.0.0.1";
748	addrs[0].family = 4;
749	addrsz = 1;
750#else
751	addrs[0].ip = "2a00:1450:400a:806::2004";
752	addrs[0].family = 6;
753	addrs[1].ip = "193.135.3.123";
754	addrs[1].family = 4;
755	addrsz = 2;
756#endif
757
758	if (http_init() == -1)
759		errx(EXIT_FAILURE, "http_init");
760
761#if 0
762	g = http_get(addrs, addrsz, "localhost", 80, "/index.html");
763#else
764	g = http_get(addrs, addrsz, "www.google.ch", 80, "/index.html",
765	    NULL, 0);
766#endif
767
768	if (g == NULL)
769		errx(EXIT_FAILURE, "http_get");
770
771	httph = http_head_parse(g->http, g->xfer, &httphsz);
772	warnx("code: %d", g->code);
773
774	for (i = 0; i < httphsz; i++)
775		warnx("head: [%s]=[%s]", httph[i].key, httph[i].val);
776
777	http_get_free(g);
778	return (EXIT_SUCCESS);
779}
780#endif
781