1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <sysexits.h>
45#include <unistd.h>
46
47static const char *	env_HTTP_PROXY;
48static char *		env_HTTP_PROXY_AUTH;
49static const char *	env_HTTP_USER_AGENT;
50static char *		env_HTTP_TIMEOUT;
51static const char *	proxyport;
52static char *		proxyauth;
53
54static struct timeval	timo = { 15, 0};
55
56static void
57usage(void)
58{
59
60	fprintf(stderr, "usage: phttpget server [file ...]\n");
61	exit(EX_USAGE);
62}
63
64/*
65 * Base64 encode a string; the string returned, if non-NULL, is
66 * allocated using malloc() and must be freed by the caller.
67 */
68static char *
69b64enc(const char *ptext)
70{
71	static const char base64[] =
72	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73	    "abcdefghijklmnopqrstuvwxyz"
74	    "0123456789+/";
75	const char *pt;
76	char *ctext, *pc;
77	size_t ptlen, ctlen;
78	uint32_t t;
79	unsigned int j;
80
81	/*
82	 * Encoded length is 4 characters per 3-byte block or partial
83	 * block of plaintext, plus one byte for the terminating NUL
84	 */
85	ptlen = strlen(ptext);
86	if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2)
87		return NULL;	/* Possible integer overflow */
88	ctlen = 4 * ((ptlen + 2) / 3) + 1;
89	if ((ctext = malloc(ctlen)) == NULL)
90		return NULL;
91	ctext[ctlen - 1] = 0;
92
93	/*
94	 * Scan through ptext, reading up to 3 bytes from ptext and
95	 * writing 4 bytes to ctext, until we run out of input.
96	 */
97	for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) {
98		/* Read 3 bytes */
99		for (t = j = 0; j < 3; j++) {
100			t <<= 8;
101			if (j < ptlen)
102				t += *pt++;
103		}
104
105		/* Write 4 bytes */
106		for (j = 0; j < 4; j++) {
107			if (j <= ptlen + 1)
108				pc[j] = base64[(t >> 18) & 0x3f];
109			else
110				pc[j] = '=';
111			t <<= 6;
112		}
113
114		/* If we're done, exit the loop */
115		if (ptlen <= 3)
116			break;
117	}
118
119	return (ctext);
120}
121
122static void
123readenv(void)
124{
125	char *proxy_auth_userpass, *proxy_auth_userpass64, *p;
126	char *proxy_auth_user = NULL;
127	char *proxy_auth_pass = NULL;
128	long http_timeout;
129
130	env_HTTP_PROXY = getenv("HTTP_PROXY");
131	if (env_HTTP_PROXY == NULL)
132		env_HTTP_PROXY = getenv("http_proxy");
133	if (env_HTTP_PROXY != NULL) {
134		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
135			env_HTTP_PROXY += 7;
136		p = strchr(env_HTTP_PROXY, '/');
137		if (p != NULL)
138			*p = 0;
139		p = strchr(env_HTTP_PROXY, ':');
140		if (p != NULL) {
141			*p = 0;
142			proxyport = p + 1;
143		} else
144			proxyport = "3128";
145	}
146
147	env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH");
148	if ((env_HTTP_PROXY != NULL) &&
149	    (env_HTTP_PROXY_AUTH != NULL) &&
150	    (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) {
151		/* Ignore authentication scheme */
152		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
153
154		/* Ignore realm */
155		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
156
157		/* Obtain username and password */
158		proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":");
159		proxy_auth_pass = env_HTTP_PROXY_AUTH;
160	}
161
162	if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) {
163		asprintf(&proxy_auth_userpass, "%s:%s",
164		    proxy_auth_user, proxy_auth_pass);
165		if (proxy_auth_userpass == NULL)
166			err(1, "asprintf");
167
168		proxy_auth_userpass64 = b64enc(proxy_auth_userpass);
169		if (proxy_auth_userpass64 == NULL)
170			err(1, "malloc");
171
172		asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n",
173		    proxy_auth_userpass64);
174		if (proxyauth == NULL)
175			err(1, "asprintf");
176
177		free(proxy_auth_userpass);
178		free(proxy_auth_userpass64);
179	} else
180		proxyauth = NULL;
181
182	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
183	if (env_HTTP_USER_AGENT == NULL)
184		env_HTTP_USER_AGENT = "phttpget/0.1";
185
186	env_HTTP_TIMEOUT = getenv("HTTP_TIMEOUT");
187	if (env_HTTP_TIMEOUT != NULL) {
188		http_timeout = strtol(env_HTTP_TIMEOUT, &p, 10);
189		if ((*env_HTTP_TIMEOUT == '\0') || (*p != '\0') ||
190		    (http_timeout < 0))
191			warnx("HTTP_TIMEOUT (%s) is not a positive integer",
192			    env_HTTP_TIMEOUT);
193		else
194			timo.tv_sec = http_timeout;
195	}
196}
197
198static int
199makerequest(char ** buf, char * path, char * server, int connclose)
200{
201	int buflen;
202
203	buflen = asprintf(buf,
204	    "GET %s%s/%s HTTP/1.1\r\n"
205	    "Host: %s\r\n"
206	    "User-Agent: %s\r\n"
207	    "%s"
208	    "%s"
209	    "\r\n",
210	    env_HTTP_PROXY ? "http://" : "",
211	    env_HTTP_PROXY ? server : "",
212	    path, server, env_HTTP_USER_AGENT,
213	    proxyauth ? proxyauth : "",
214	    connclose ? "Connection: Close\r\n" : "Connection: Keep-Alive\r\n");
215	if (buflen == -1)
216		err(1, "asprintf");
217	return(buflen);
218}
219
220static int
221readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
222{
223	ssize_t len;
224
225	while (strnstr(resbuf + *resbufpos, "\r\n",
226	    *resbuflen - *resbufpos) == NULL) {
227		/* Move buffered data to the start of the buffer */
228		if (*resbufpos != 0) {
229			memmove(resbuf, resbuf + *resbufpos,
230			    *resbuflen - *resbufpos);
231			*resbuflen -= *resbufpos;
232			*resbufpos = 0;
233		}
234
235		/* If the buffer is full, complain */
236		if (*resbuflen == BUFSIZ)
237			return -1;
238
239		/* Read more data into the buffer */
240		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
241		if ((len == 0) ||
242		    ((len == -1) && (errno != EINTR)))
243			return -1;
244
245		if (len != -1)
246			*resbuflen += len;
247	}
248
249	return 0;
250}
251
252static int
253copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
254    int * resbufpos)
255{
256	ssize_t len;
257
258	while (copylen) {
259		/* Write data from resbuf to fd */
260		len = *resbuflen - *resbufpos;
261		if (copylen < len)
262			len = copylen;
263		if (len > 0) {
264			if (fd != -1)
265				len = write(fd, resbuf + *resbufpos, len);
266			if (len == -1)
267				err(1, "write");
268			*resbufpos += len;
269			copylen -= len;
270			continue;
271		}
272
273		/* Read more data into buffer */
274		len = recv(sd, resbuf, BUFSIZ, 0);
275		if (len == -1) {
276			if (errno == EINTR)
277				continue;
278			return -1;
279		} else if (len == 0) {
280			return -2;
281		} else {
282			*resbuflen = len;
283			*resbufpos = 0;
284		}
285	}
286
287	return 0;
288}
289
290int
291main(int argc, char *argv[])
292{
293	struct addrinfo hints;	/* Hints to getaddrinfo */
294	struct addrinfo *res;	/* Pointer to server address being used */
295	struct addrinfo *res0;	/* Pointer to server addresses */
296	char * resbuf = NULL;	/* Response buffer */
297	int resbufpos = 0;	/* Response buffer position */
298	int resbuflen = 0;	/* Response buffer length */
299	char * eolp;		/* Pointer to "\r\n" within resbuf */
300	char * hln;		/* Pointer within header line */
301	char * servername;	/* Name of server */
302	char * fname = NULL;	/* Name of downloaded file */
303	char * reqbuf = NULL;	/* Request buffer */
304	int reqbufpos = 0;	/* Request buffer position */
305	int reqbuflen = 0;	/* Request buffer length */
306	ssize_t len;		/* Length sent or received */
307	int nreq = 0;		/* Number of next request to send */
308	int nres = 0;		/* Number of next reply to receive */
309	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
310	int keepalive;		/* != 0 if HTTP/1.0 keep-alive rcvd. */
311	int sd = -1;		/* Socket descriptor */
312	int sdflags = 0;	/* Flags on the socket sd */
313	int fd = -1;		/* Descriptor for download target file */
314	int error;		/* Error code */
315	int statuscode;		/* HTTP Status code */
316	off_t contentlength;	/* Value from Content-Length header */
317	int chunked;		/* != if transfer-encoding is chunked */
318	off_t clen;		/* Chunk length */
319	int firstreq = 0;	/* # of first request for this connection */
320	int val;		/* Value used for setsockopt call */
321
322	/* Check that the arguments are sensible */
323	if (argc < 2)
324		usage();
325
326	/* Read important environment variables */
327	readenv();
328
329	/* Get server name and adjust arg[cv] to point at file names */
330	servername = argv[1];
331	argv += 2;
332	argc -= 2;
333
334	/* Allocate response buffer */
335	resbuf = malloc(BUFSIZ);
336	if (resbuf == NULL)
337		err(1, "malloc");
338
339	/* Look up server */
340	memset(&hints, 0, sizeof(hints));
341	hints.ai_family = PF_UNSPEC;
342	hints.ai_socktype = SOCK_STREAM;
343	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
344	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
345	if (error)
346		errx(1, "host = %s, port = %s: %s",
347		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
348		    env_HTTP_PROXY ? proxyport : "http",
349		    gai_strerror(error));
350	if (res0 == NULL)
351		errx(1, "could not look up %s", servername);
352	res = res0;
353
354	/* Do the fetching */
355	while (nres < argc) {
356		/* Make sure we have a connected socket */
357		for (; sd == -1; res = res->ai_next) {
358			/* No addresses left to try :-( */
359			if (res == NULL)
360				errx(1, "Could not connect to %s", servername);
361
362			/* Create a socket... */
363			sd = socket(res->ai_family, res->ai_socktype,
364			    res->ai_protocol);
365			if (sd == -1)
366				continue;
367
368			/* ... set 15-second timeouts ... */
369			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
370			    (void *)&timo, (socklen_t)sizeof(timo));
371			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
372			    (void *)&timo, (socklen_t)sizeof(timo));
373
374			/* ... disable SIGPIPE generation ... */
375			val = 1;
376			setsockopt(sd, SOL_SOCKET, SO_NOSIGPIPE,
377			    (void *)&val, sizeof(int));
378
379			/* ... and connect to the server. */
380			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
381				close(sd);
382				sd = -1;
383				continue;
384			}
385
386			firstreq = nres;
387		}
388
389		/*
390		 * If in pipelined HTTP mode, put socket into non-blocking
391		 * mode, since we're probably going to want to try to send
392		 * several HTTP requests.
393		 */
394		if (pipelined) {
395			sdflags = fcntl(sd, F_GETFL);
396			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
397				err(1, "fcntl");
398		}
399
400		/* Construct requests and/or send them without blocking */
401		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
402			/* If not in the middle of a request, make one */
403			if (reqbuf == NULL) {
404				reqbuflen = makerequest(&reqbuf, argv[nreq],
405				    servername, (nreq == argc - 1));
406				reqbufpos = 0;
407			}
408
409			/* If in pipelined mode, try to send the request */
410			if (pipelined) {
411				while (reqbufpos < reqbuflen) {
412					len = send(sd, reqbuf + reqbufpos,
413					    reqbuflen - reqbufpos, 0);
414					if (len == -1)
415						break;
416					reqbufpos += len;
417				}
418				if (reqbufpos < reqbuflen) {
419					if (errno != EAGAIN)
420						goto conndied;
421					break;
422				} else {
423					free(reqbuf);
424					reqbuf = NULL;
425					nreq++;
426				}
427			}
428		}
429
430		/* Put connection back into blocking mode */
431		if (pipelined) {
432			if (fcntl(sd, F_SETFL, sdflags) == -1)
433				err(1, "fcntl");
434		}
435
436		/* Do we need to blocking-send a request? */
437		if (nres == nreq) {
438			while (reqbufpos < reqbuflen) {
439				len = send(sd, reqbuf + reqbufpos,
440				    reqbuflen - reqbufpos, 0);
441				if (len == -1)
442					goto conndied;
443				reqbufpos += len;
444			}
445			free(reqbuf);
446			reqbuf = NULL;
447			nreq++;
448		}
449
450		/* Scan through the response processing headers. */
451		statuscode = 0;
452		contentlength = -1;
453		chunked = 0;
454		keepalive = 0;
455		do {
456			/* Get a header line */
457			error = readln(sd, resbuf, &resbuflen, &resbufpos);
458			if (error)
459				goto conndied;
460			hln = resbuf + resbufpos;
461			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
462			resbufpos = (eolp - resbuf) + 2;
463			*eolp = '\0';
464
465			/* Make sure it doesn't contain a NUL character */
466			if (strchr(hln, '\0') != eolp)
467				goto conndied;
468
469			if (statuscode == 0) {
470				/* The first line MUST be HTTP/1.x xxx ... */
471				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
472				    ! isdigit(hln[7]))
473					goto conndied;
474
475				/*
476				 * If the minor version number isn't zero,
477				 * then we can assume that pipelining our
478				 * requests is OK -- as long as we don't
479				 * see a "Connection: close" line later
480				 * and we either have a Content-Length or
481				 * Transfer-Encoding: chunked header to
482				 * tell us the length.
483				 */
484				if (hln[7] != '0')
485					pipelined = 1;
486
487				/* Skip over the minor version number */
488				hln = strchr(hln + 7, ' ');
489				if (hln == NULL)
490					goto conndied;
491				else
492					hln++;
493
494				/* Read the status code */
495				while (isdigit(*hln)) {
496					statuscode = statuscode * 10 +
497					    *hln - '0';
498					hln++;
499				}
500
501				if (statuscode < 100 || statuscode > 599)
502					goto conndied;
503
504				/* Ignore the rest of the line */
505				continue;
506			}
507
508			/*
509			 * Check for "Connection: close" or
510			 * "Connection: Keep-Alive" header
511			 */
512			if (strncasecmp(hln, "Connection:", 11) == 0) {
513				hln += 11;
514				if (strcasestr(hln, "close") != NULL)
515					pipelined = 0;
516				if (strcasestr(hln, "Keep-Alive") != NULL)
517					keepalive = 1;
518
519				/* Next header... */
520				continue;
521			}
522
523			/* Check for "Content-Length:" header */
524			if (strncasecmp(hln, "Content-Length:", 15) == 0) {
525				hln += 15;
526				contentlength = 0;
527
528				/* Find the start of the length */
529				while (!isdigit(*hln) && (*hln != '\0'))
530					hln++;
531
532				/* Compute the length */
533				while (isdigit(*hln)) {
534					if (contentlength >= OFF_MAX / 10) {
535						/* Nasty people... */
536						goto conndied;
537					}
538					contentlength = contentlength * 10 +
539					    *hln - '0';
540					hln++;
541				}
542
543				/* Next header... */
544				continue;
545			}
546
547			/* Check for "Transfer-Encoding: chunked" header */
548			if (strncasecmp(hln, "Transfer-Encoding:", 18) == 0) {
549				hln += 18;
550				if (strcasestr(hln, "chunked") != NULL)
551					chunked = 1;
552
553				/* Next header... */
554				continue;
555			}
556
557			/* We blithely ignore any other header lines */
558
559			/* No more header lines */
560			if (strlen(hln) == 0) {
561				/*
562				 * If the status code was 1xx, then there will
563				 * be a real header later.  Servers may emit
564				 * 1xx header blocks at will, but since we
565				 * don't expect one, we should just ignore it.
566				 */
567				if (100 <= statuscode && statuscode <= 199) {
568					statuscode = 0;
569					continue;
570				}
571
572				/* End of header; message body follows */
573				break;
574			}
575		} while (1);
576
577		/* No message body for 204 or 304 */
578		if (statuscode == 204 || statuscode == 304) {
579			nres++;
580			continue;
581		}
582
583		/*
584		 * There should be a message body coming, but we only want
585		 * to send it to a file if the status code is 200
586		 */
587		if (statuscode == 200) {
588			/* Generate a file name for the download */
589			fname = strrchr(argv[nres], '/');
590			if (fname == NULL)
591				fname = argv[nres];
592			else
593				fname++;
594			if (strlen(fname) == 0)
595				errx(1, "Cannot obtain file name from %s\n",
596				    argv[nres]);
597
598			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
599			if (fd == -1)
600				errx(1, "open(%s)", fname);
601		};
602
603		/* Read the message and send data to fd if appropriate */
604		if (chunked) {
605			/* Handle a chunked-encoded entity */
606
607			/* Read chunks */
608			do {
609				error = readln(sd, resbuf, &resbuflen,
610				    &resbufpos);
611				if (error)
612					goto conndied;
613				hln = resbuf + resbufpos;
614				eolp = strstr(hln, "\r\n");
615				resbufpos = (eolp - resbuf) + 2;
616
617				clen = 0;
618				while (isxdigit(*hln)) {
619					if (clen >= OFF_MAX / 16) {
620						/* Nasty people... */
621						goto conndied;
622					}
623					if (isdigit(*hln))
624						clen = clen * 16 + *hln - '0';
625					else
626						clen = clen * 16 + 10 +
627						    tolower(*hln) - 'a';
628					hln++;
629				}
630
631				error = copybytes(sd, fd, clen, resbuf,
632				    &resbuflen, &resbufpos);
633				if (error) {
634					goto conndied;
635				}
636			} while (clen != 0);
637
638			/* Read trailer and final CRLF */
639			do {
640				error = readln(sd, resbuf, &resbuflen,
641				    &resbufpos);
642				if (error)
643					goto conndied;
644				hln = resbuf + resbufpos;
645				eolp = strstr(hln, "\r\n");
646				resbufpos = (eolp - resbuf) + 2;
647			} while (hln != eolp);
648		} else if (contentlength != -1) {
649			error = copybytes(sd, fd, contentlength, resbuf,
650			    &resbuflen, &resbufpos);
651			if (error)
652				goto conndied;
653		} else {
654			/*
655			 * Not chunked, and no content length header.
656			 * Read everything until the server closes the
657			 * socket.
658			 */
659			error = copybytes(sd, fd, OFF_MAX, resbuf,
660			    &resbuflen, &resbufpos);
661			if (error == -1)
662				goto conndied;
663			pipelined = 0;
664		}
665
666		if (fd != -1) {
667			close(fd);
668			fd = -1;
669		}
670
671		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
672		    statuscode);
673		if (statuscode == 200)
674			fprintf(stderr, "OK\n");
675		else if (statuscode < 300)
676			fprintf(stderr, "Successful (ignored)\n");
677		else if (statuscode < 400)
678			fprintf(stderr, "Redirection (ignored)\n");
679		else
680			fprintf(stderr, "Error (ignored)\n");
681
682		/* We've finished this file! */
683		nres++;
684
685		/*
686		 * If necessary, clean up this connection so that we
687		 * can start a new one.
688		 */
689		if (pipelined == 0 && keepalive == 0)
690			goto cleanupconn;
691		continue;
692
693conndied:
694		/*
695		 * Something went wrong -- our connection died, the server
696		 * sent us garbage, etc.  If this happened on the first
697		 * request we sent over this connection, give up.  Otherwise,
698		 * close this connection, open a new one, and reissue the
699		 * request.
700		 */
701		if (nres == firstreq)
702			errx(1, "Connection failure");
703
704cleanupconn:
705		/*
706		 * Clean up our connection and keep on going
707		 */
708		shutdown(sd, SHUT_RDWR);
709		close(sd);
710		sd = -1;
711		if (fd != -1) {
712			close(fd);
713			fd = -1;
714		}
715		if (reqbuf != NULL) {
716			free(reqbuf);
717			reqbuf = NULL;
718		}
719		nreq = nres;
720		res = res0;
721		pipelined = 0;
722		resbufpos = resbuflen = 0;
723		continue;
724	}
725
726	free(resbuf);
727	freeaddrinfo(res0);
728
729	return 0;
730}
731