phttpget.c revision 164057
1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 164057 2006-11-07 09:18:09Z cperciva $");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <sysexits.h>
45#include <unistd.h>
46
47static const char *	env_HTTP_PROXY;
48static char *		env_HTTP_PROXY_AUTH;
49static const char *	env_HTTP_USER_AGENT;
50static char *		env_HTTP_TIMEOUT;
51static const char *	proxyport;
52static char *		proxyauth;
53
54static struct timeval	timo = { 15, 0};
55
56static void
57usage(void)
58{
59
60	fprintf(stderr, "usage: phttpget server [file ...]\n");
61	exit(EX_USAGE);
62}
63
64/*
65 * Base64 encode a string; the string returned, if non-NULL, is
66 * allocated using malloc() and must be freed by the caller.
67 */
68static char *
69b64enc(const char *ptext)
70{
71	static const char base64[] =
72	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
73	    "abcdefghijklmnopqrstuvwxyz"
74	    "0123456789+/";
75	const char *pt;
76	char *ctext, *pc;
77	size_t ptlen, ctlen;
78	uint32_t t;
79	unsigned int j;
80
81	/*
82	 * Encoded length is 4 characters per 3-byte block or partial
83	 * block of plaintext, plus one byte for the terminating NUL
84	 */
85	ptlen = strlen(ptext);
86	if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2)
87		return NULL;	/* Possible integer overflow */
88	ctlen = 4 * ((ptlen + 2) / 3) + 1;
89	if ((ctext = malloc(ctlen)) == NULL)
90		return NULL;
91	ctext[ctlen - 1] = 0;
92
93	/*
94	 * Scan through ptext, reading up to 3 bytes from ptext and
95	 * writing 4 bytes to ctext, until we run out of input.
96	 */
97	for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) {
98		/* Read 3 bytes */
99		for (t = j = 0; j < 3; j++) {
100			t <<= 8;
101			if (j < ptlen)
102				t += *pt++;
103		}
104
105		/* Write 4 bytes */
106		for (j = 0; j < 4; j++) {
107			if (j <= ptlen + 1)
108				pc[j] = base64[(t >> 18) & 0x3f];
109			else
110				pc[j] = '=';
111			t <<= 6;
112		}
113
114		/* If we're done, exit the loop */
115		if (ptlen <= 3)
116			break;
117	}
118
119	return (ctext);
120}
121
122static void
123readenv(void)
124{
125	char *proxy_auth_userpass, *proxy_auth_userpass64, *p;
126	char *proxy_auth_user = NULL;
127	char *proxy_auth_pass = NULL;
128	long http_timeout;
129
130	env_HTTP_PROXY = getenv("HTTP_PROXY");
131	if (env_HTTP_PROXY == NULL)
132		env_HTTP_PROXY = getenv("http_proxy");
133	if (env_HTTP_PROXY != NULL) {
134		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
135			env_HTTP_PROXY += 7;
136		p = strchr(env_HTTP_PROXY, '/');
137		if (p != NULL)
138			*p = 0;
139		p = strchr(env_HTTP_PROXY, ':');
140		if (p != NULL) {
141			*p = 0;
142			proxyport = p + 1;
143		} else
144			proxyport = "3128";
145	}
146
147	env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH");
148	if ((env_HTTP_PROXY != NULL) &&
149	    (env_HTTP_PROXY_AUTH != NULL) &&
150	    (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) {
151		/* Ignore authentication scheme */
152		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
153
154		/* Ignore realm */
155		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
156
157		/* Obtain username and password */
158		proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":");
159		proxy_auth_pass = env_HTTP_PROXY_AUTH;
160	}
161
162	if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) {
163		asprintf(&proxy_auth_userpass, "%s:%s",
164		    proxy_auth_user, proxy_auth_pass);
165		if (proxy_auth_userpass == NULL)
166			err(1, "asprintf");
167
168		proxy_auth_userpass64 = b64enc(proxy_auth_userpass);
169		if (proxy_auth_userpass64 == NULL)
170			err(1, "malloc");
171
172		asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n",
173		    proxy_auth_userpass64);
174		if (proxyauth == NULL)
175			err(1, "asprintf");
176
177		free(proxy_auth_userpass);
178		free(proxy_auth_userpass64);
179	} else
180		proxyauth = NULL;
181
182	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
183	if (env_HTTP_USER_AGENT == NULL)
184		env_HTTP_USER_AGENT = "phttpget/0.1";
185
186	env_HTTP_TIMEOUT = getenv("HTTP_TIMEOUT");
187	if (env_HTTP_TIMEOUT != NULL) {
188		http_timeout = strtol(env_HTTP_TIMEOUT, &p, 10);
189		if ((*env_HTTP_TIMEOUT == '\0') || (*p != '\0') ||
190		    (http_timeout < 0))
191			warnx("HTTP_TIMEOUT (%s) is not a positive integer",
192			    env_HTTP_TIMEOUT);
193		else
194			timo.tv_sec = http_timeout;
195	}
196}
197
198static int
199makerequest(char ** buf, char * path, char * server, int connclose)
200{
201	int buflen;
202
203	buflen = asprintf(buf,
204	    "GET %s%s/%s HTTP/1.1\r\n"
205	    "Host: %s\r\n"
206	    "User-Agent: %s\r\n"
207	    "%s"
208	    "%s"
209	    "\r\n",
210	    env_HTTP_PROXY ? "http://" : "",
211	    env_HTTP_PROXY ? server : "",
212	    path, server, env_HTTP_USER_AGENT,
213	    proxyauth ? proxyauth : "",
214	    connclose ? "Connection: Close\r\n" : "");
215	if (buflen == -1)
216		err(1, "asprintf");
217	return(buflen);
218}
219
220static int
221readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
222{
223	ssize_t len;
224
225	while (strnstr(resbuf + *resbufpos, "\r\n",
226	    *resbuflen - *resbufpos) == NULL) {
227		/* Move buffered data to the start of the buffer */
228		if (*resbufpos != 0) {
229			memmove(resbuf, resbuf + *resbufpos,
230			    *resbuflen - *resbufpos);
231			*resbuflen -= *resbufpos;
232			*resbufpos = 0;
233		}
234
235		/* If the buffer is full, complain */
236		if (*resbuflen == BUFSIZ)
237			return -1;
238
239		/* Read more data into the buffer */
240		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
241		if ((len == 0) ||
242		    ((len == -1) && (errno != EINTR)))
243			return -1;
244
245		if (len != -1)
246			*resbuflen += len;
247	}
248
249	return 0;
250}
251
252static int
253copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
254    int * resbufpos)
255{
256	ssize_t len;
257
258	while (copylen) {
259		/* Write data from resbuf to fd */
260		len = *resbuflen - *resbufpos;
261		if (copylen < len)
262			len = copylen;
263		if (len > 0) {
264			if (fd != -1)
265				len = write(fd, resbuf + *resbufpos, len);
266			if (len == -1)
267				err(1, "write");
268			*resbufpos += len;
269			copylen -= len;
270			continue;
271		}
272
273		/* Read more data into buffer */
274		len = recv(sd, resbuf, BUFSIZ, 0);
275		if (len == -1) {
276			if (errno == EINTR)
277				continue;
278			return -1;
279		} else if (len == 0) {
280			return -2;
281		} else {
282			*resbuflen = len;
283			*resbufpos = 0;
284		}
285	}
286
287	return 0;
288}
289
290int
291main(int argc, char *argv[])
292{
293	struct addrinfo hints;	/* Hints to getaddrinfo */
294	struct addrinfo *res;	/* Pointer to server address being used */
295	struct addrinfo *res0;	/* Pointer to server addresses */
296	char * resbuf = NULL;	/* Response buffer */
297	int resbufpos = 0;	/* Response buffer position */
298	int resbuflen = 0;	/* Response buffer length */
299	char * eolp;		/* Pointer to "\r\n" within resbuf */
300	char * hln;		/* Pointer within header line */
301	char * servername;	/* Name of server */
302	char * fname = NULL;	/* Name of downloaded file */
303	char * reqbuf = NULL;	/* Request buffer */
304	int reqbufpos = 0;	/* Request buffer position */
305	int reqbuflen = 0;	/* Request buffer length */
306	ssize_t len;		/* Length sent or received */
307	int nreq = 0;		/* Number of next request to send */
308	int nres = 0;		/* Number of next reply to receive */
309	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
310	int sd = -1;		/* Socket descriptor */
311	int sdflags = 0;	/* Flags on the socket sd */
312	int fd = -1;		/* Descriptor for download target file */
313	int error;		/* Error code */
314	int statuscode;		/* HTTP Status code */
315	off_t contentlength;	/* Value from Content-Length header */
316	int chunked;		/* != if transfer-encoding is chunked */
317	off_t clen;		/* Chunk length */
318	int firstreq = 0;	/* # of first request for this connection */
319
320	/* Check that the arguments are sensible */
321	if (argc < 2)
322		usage();
323
324	/* Read important environment variables */
325	readenv();
326
327	/* Get server name and adjust arg[cv] to point at file names */
328	servername = argv[1];
329	argv += 2;
330	argc -= 2;
331
332	/* Allocate response buffer */
333	resbuf = malloc(BUFSIZ);
334	if (resbuf == NULL)
335		err(1, "malloc");
336
337	/* Look up server */
338	memset(&hints, 0, sizeof(hints));
339	hints.ai_family = PF_UNSPEC;
340	hints.ai_socktype = SOCK_STREAM;
341	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
342	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
343	if (error)
344		errx(1, "host = %s, port = %s: %s",
345		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
346		    env_HTTP_PROXY ? proxyport : "http",
347		    gai_strerror(error));
348	if (res0 == NULL)
349		errx(1, "could not look up %s", servername);
350	res = res0;
351
352	/* Do the fetching */
353	while (nres < argc) {
354		/* Make sure we have a connected socket */
355		for (; sd == -1; res = res->ai_next) {
356			/* No addresses left to try :-( */
357			if (res == NULL)
358				errx(1, "Could not connect to %s", servername);
359
360			/* Create a socket... */
361			sd = socket(res->ai_family, res->ai_socktype,
362			    res->ai_protocol);
363			if (sd == -1)
364				continue;
365
366			/* ... set 15-second timeouts ... */
367			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
368			    (void *)&timo, (socklen_t)sizeof(timo));
369			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
370			    (void *)&timo, (socklen_t)sizeof(timo));
371
372			/* ... and connect to the server. */
373			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
374				close(sd);
375				sd = -1;
376				continue;
377			}
378
379			firstreq = nres;
380		}
381
382		/*
383		 * If in pipelined HTTP mode, put socket into non-blocking
384		 * mode, since we're probably going to want to try to send
385		 * several HTTP requests.
386		 */
387		if (pipelined) {
388			sdflags = fcntl(sd, F_GETFL);
389			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
390				err(1, "fcntl");
391		}
392
393		/* Construct requests and/or send them without blocking */
394		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
395			/* If not in the middle of a request, make one */
396			if (reqbuf == NULL) {
397				reqbuflen = makerequest(&reqbuf, argv[nreq],
398				    servername, (nreq == argc - 1));
399				reqbufpos = 0;
400			}
401
402			/* If in pipelined mode, try to send the request */
403			if (pipelined) {
404				while (reqbufpos < reqbuflen) {
405					len = send(sd, reqbuf + reqbufpos,
406					    reqbuflen - reqbufpos, 0);
407					if (len == -1)
408						break;
409					reqbufpos += len;
410				}
411				if (reqbufpos < reqbuflen) {
412					if (errno != EAGAIN)
413						goto conndied;
414					break;
415				} else {
416					free(reqbuf);
417					reqbuf = NULL;
418					nreq++;
419				}
420			}
421		}
422
423		/* Put connection back into blocking mode */
424		if (pipelined) {
425			if (fcntl(sd, F_SETFL, sdflags) == -1)
426				err(1, "fcntl");
427		}
428
429		/* Do we need to blocking-send a request? */
430		if (nres == nreq) {
431			while (reqbufpos < reqbuflen) {
432				len = send(sd, reqbuf + reqbufpos,
433				    reqbuflen - reqbufpos, 0);
434				if (len == -1)
435					goto conndied;
436				reqbufpos += len;
437			}
438			free(reqbuf);
439			reqbuf = NULL;
440			nreq++;
441		}
442
443		/* Scan through the response processing headers. */
444		statuscode = 0;
445		contentlength = -1;
446		chunked = 0;
447		do {
448			/* Get a header line */
449			error = readln(sd, resbuf, &resbuflen, &resbufpos);
450			if (error)
451				goto conndied;
452			hln = resbuf + resbufpos;
453			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
454			resbufpos = (eolp - resbuf) + 2;
455			*eolp = '\0';
456
457			/* Make sure it doesn't contain a NUL character */
458			if (strchr(hln, '\0') != eolp)
459				goto conndied;
460
461			if (statuscode == 0) {
462				/* The first line MUST be HTTP/1.x xxx ... */
463				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
464				    ! isdigit(hln[7]))
465					goto conndied;
466
467				/*
468				 * If the minor version number isn't zero,
469				 * then we can assume that pipelining our
470				 * requests is OK -- as long as we don't
471				 * see a "Connection: close" line later
472				 * and we either have a Content-Length or
473				 * Transfer-Encoding: chunked header to
474				 * tell us the length.
475				 */
476				if (hln[7] != '0')
477					pipelined = 1;
478
479				/* Skip over the minor version number */
480				hln = strchr(hln + 7, ' ');
481				if (hln == NULL)
482					goto conndied;
483				else
484					hln++;
485
486				/* Read the status code */
487				while (isdigit(*hln)) {
488					statuscode = statuscode * 10 +
489					    *hln - '0';
490					hln++;
491				}
492
493				if (statuscode < 100 || statuscode > 599)
494					goto conndied;
495
496				/* Ignore the rest of the line */
497				continue;
498			}
499
500			/* Check for "Connection: close" header */
501			if (strncmp(hln, "Connection:", 11) == 0) {
502				hln += 11;
503				if (strstr(hln, "close") != NULL)
504					pipelined = 0;
505
506				/* Next header... */
507				continue;
508			}
509
510			/* Check for "Content-Length:" header */
511			if (strncmp(hln, "Content-Length:", 15) == 0) {
512				hln += 15;
513				contentlength = 0;
514
515				/* Find the start of the length */
516				while (!isdigit(*hln) && (*hln != '\0'))
517					hln++;
518
519				/* Compute the length */
520				while (isdigit(*hln)) {
521					if (contentlength >= OFF_MAX / 10) {
522						/* Nasty people... */
523						goto conndied;
524					}
525					contentlength = contentlength * 10 +
526					    *hln - '0';
527					hln++;
528				}
529
530				/* Next header... */
531				continue;
532			}
533
534			/* Check for "Transfer-Encoding: chunked" header */
535			if (strncmp(hln, "Transfer-Encoding:", 18) == 0) {
536				hln += 18;
537				if (strstr(hln, "chunked") != NULL)
538					chunked = 1;
539
540				/* Next header... */
541				continue;
542			}
543
544			/* We blithely ignore any other header lines */
545
546			/* No more header lines */
547			if (strlen(hln) == 0) {
548				/*
549				 * If the status code was 1xx, then there will
550				 * be a real header later.  Servers may emit
551				 * 1xx header blocks at will, but since we
552				 * don't expect one, we should just ignore it.
553				 */
554				if (100 <= statuscode && statuscode <= 199) {
555					statuscode = 0;
556					continue;
557				}
558
559				/* End of header; message body follows */
560				break;
561			}
562		} while (1);
563
564		/* No message body for 204 or 304 */
565		if (statuscode == 204 || statuscode == 304) {
566			nres++;
567			continue;
568		}
569
570		/*
571		 * There should be a message body coming, but we only want
572		 * to send it to a file if the status code is 200
573		 */
574		if (statuscode == 200) {
575			/* Generate a file name for the download */
576			fname = strrchr(argv[nres], '/');
577			if (fname == NULL)
578				fname = argv[nres];
579			else
580				fname++;
581			if (strlen(fname) == 0)
582				errx(1, "Cannot obtain file name from %s\n",
583				    argv[nres]);
584
585			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
586			if (fd == -1)
587				errx(1, "open(%s)", fname);
588		};
589
590		/* Read the message and send data to fd if appropriate */
591		if (chunked) {
592			/* Handle a chunked-encoded entity */
593
594			/* Read chunks */
595			do {
596				error = readln(sd, resbuf, &resbuflen,
597				    &resbufpos);
598				if (error)
599					goto conndied;
600				hln = resbuf + resbufpos;
601				eolp = strstr(hln, "\r\n");
602				resbufpos = (eolp - resbuf) + 2;
603
604				clen = 0;
605				while (isxdigit(*hln)) {
606					if (clen >= OFF_MAX / 16) {
607						/* Nasty people... */
608						goto conndied;
609					}
610					if (isdigit(*hln))
611						clen = clen * 16 + *hln - '0';
612					else
613						clen = clen * 16 + 10 +
614						    tolower(*hln) - 'a';
615					hln++;
616				}
617
618				error = copybytes(sd, fd, clen, resbuf,
619				    &resbuflen, &resbufpos);
620				if (error) {
621					goto conndied;
622				}
623			} while (clen != 0);
624
625			/* Read trailer and final CRLF */
626			do {
627				error = readln(sd, resbuf, &resbuflen,
628				    &resbufpos);
629				if (error)
630					goto conndied;
631				hln = resbuf + resbufpos;
632				eolp = strstr(hln, "\r\n");
633				resbufpos = (eolp - resbuf) + 2;
634			} while (hln != eolp);
635		} else if (contentlength != -1) {
636			error = copybytes(sd, fd, contentlength, resbuf,
637			    &resbuflen, &resbufpos);
638			if (error)
639				goto conndied;
640		} else {
641			/*
642			 * Not chunked, and no content length header.
643			 * Read everything until the server closes the
644			 * socket.
645			 */
646			error = copybytes(sd, fd, OFF_MAX, resbuf,
647			    &resbuflen, &resbufpos);
648			if (error == -1)
649				goto conndied;
650			pipelined = 0;
651		}
652
653		if (fd != -1) {
654			close(fd);
655			fd = -1;
656		}
657
658		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
659		    statuscode);
660		if (statuscode == 200)
661			fprintf(stderr, "OK\n");
662		else if (statuscode < 300)
663			fprintf(stderr, "Successful (ignored)\n");
664		else if (statuscode < 400)
665			fprintf(stderr, "Redirection (ignored)\n");
666		else
667			fprintf(stderr, "Error (ignored)\n");
668
669		/* We've finished this file! */
670		nres++;
671
672		/*
673		 * If necessary, clean up this connection so that we
674		 * can start a new one.
675		 */
676		if (pipelined == 0)
677			goto cleanupconn;
678		continue;
679
680conndied:
681		/*
682		 * Something went wrong -- our connection died, the server
683		 * sent us garbage, etc.  If this happened on the first
684		 * request we sent over this connection, give up.  Otherwise,
685		 * close this connection, open a new one, and reissue the
686		 * request.
687		 */
688		if (nres == firstreq)
689			errx(1, "Connection failure");
690
691cleanupconn:
692		/*
693		 * Clean up our connection and keep on going
694		 */
695		shutdown(sd, SHUT_RDWR);
696		close(sd);
697		sd = -1;
698		if (fd != -1) {
699			close(fd);
700			fd = -1;
701		}
702		if (reqbuf != NULL) {
703			free(reqbuf);
704			reqbuf = NULL;
705		}
706		nreq = nres;
707		res = res0;
708		pipelined = 0;
709		resbufpos = resbuflen = 0;
710		continue;
711	}
712
713	free(resbuf);
714	freeaddrinfo(res0);
715
716	return 0;
717}
718