phttpget.c revision 158301
1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 158301 2006-05-05 04:47:00Z cperciva $");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <sysexits.h>
45#include <unistd.h>
46
47static const char *	env_HTTP_PROXY;
48static char *		env_HTTP_PROXY_AUTH;
49static const char *	env_HTTP_USER_AGENT;
50static const char *	proxyport;
51static char *		proxyauth;
52
53static struct timeval	timo = { 15, 0};
54
55static void
56usage(void)
57{
58
59	fprintf(stderr, "usage: phttpget server [file ...]\n");
60	exit(EX_USAGE);
61}
62
63/*
64 * Base64 encode a string; the string returned, if non-NULL, is
65 * allocated using malloc() and must be freed by the caller.
66 */
67static char *
68b64enc(const char *ptext)
69{
70	static const char base64[] =
71	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72	    "abcdefghijklmnopqrstuvwxyz"
73	    "0123456789+/";
74	const char *pt;
75	char *ctext, *pc;
76	size_t ptlen, ctlen;
77	uint32_t t;
78	unsigned int j;
79
80	/*
81	 * Encoded length is 4 characters per 3-byte block or partial
82	 * block of plaintext, plus one byte for the terminating NUL
83	 */
84	ptlen = strlen(ptext);
85	if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2)
86		return NULL;	/* Possible integer overflow */
87	ctlen = 4 * ((ptlen + 2) / 3) + 1;
88	if ((ctext = malloc(ctlen)) == NULL)
89		return NULL;
90	ctext[ctlen - 1] = 0;
91
92	/*
93	 * Scan through ptext, reading up to 3 bytes from ptext and
94	 * writing 4 bytes to ctext, until we run out of input.
95	 */
96	for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) {
97		/* Read 3 bytes */
98		for (t = j = 0; j < 3; j++) {
99			t <<= 8;
100			if (j < ptlen)
101				t += *pt++;
102		}
103
104		/* Write 4 bytes */
105		for (j = 0; j < 4; j++) {
106			if (j <= ptlen + 1)
107				pc[j] = base64[(t >> 18) & 0x3f];
108			else
109				pc[j] = '=';
110			t <<= 6;
111		}
112
113		/* If we're done, exit the loop */
114		if (ptlen <= 3)
115			break;
116	}
117
118	return (ctext);
119}
120
121static void
122readenv(void)
123{
124	char *proxy_auth_userpass, *proxy_auth_userpass64, *p;
125	char *proxy_auth_user = NULL;
126	char *proxy_auth_pass = NULL;
127
128	env_HTTP_PROXY = getenv("HTTP_PROXY");
129	if (env_HTTP_PROXY == NULL)
130		env_HTTP_PROXY = getenv("http_proxy");
131	if (env_HTTP_PROXY != NULL) {
132		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
133			env_HTTP_PROXY += 7;
134		p = strchr(env_HTTP_PROXY, '/');
135		if (p != NULL)
136			*p = 0;
137		p = strchr(env_HTTP_PROXY, ':');
138		if (p != NULL) {
139			*p = 0;
140			proxyport = p + 1;
141		} else
142			proxyport = "3128";
143	}
144
145	env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH");
146	if ((env_HTTP_PROXY != NULL) &&
147	    (env_HTTP_PROXY_AUTH != NULL) &&
148	    (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) {
149		/* Ignore authentication scheme */
150		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
151
152		/* Ignore realm */
153		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
154
155		/* Obtain username and password */
156		proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":");
157		proxy_auth_pass = env_HTTP_PROXY_AUTH;
158	}
159
160	if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) {
161		asprintf(&proxy_auth_userpass, "%s:%s",
162		    proxy_auth_user, proxy_auth_pass);
163		if (proxy_auth_userpass == NULL)
164			err(1, "asprintf");
165
166		proxy_auth_userpass64 = b64enc(proxy_auth_userpass);
167		if (proxy_auth_userpass64 == NULL)
168			err(1, "malloc");
169
170		asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n",
171		    proxy_auth_userpass64);
172		if (proxyauth == NULL)
173			err(1, "asprintf");
174
175		free(proxy_auth_userpass);
176		free(proxy_auth_userpass64);
177	} else
178		proxyauth = NULL;
179
180	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
181	if (env_HTTP_USER_AGENT == NULL)
182		env_HTTP_USER_AGENT = "phttpget/0.1";
183}
184
185static int
186makerequest(char ** buf, char * path, char * server, int connclose)
187{
188	int buflen;
189
190	buflen = asprintf(buf,
191	    "GET %s%s/%s HTTP/1.1\r\n"
192	    "Host: %s\r\n"
193	    "User-Agent: %s\r\n"
194	    "%s"
195	    "%s"
196	    "\r\n",
197	    env_HTTP_PROXY ? "http://" : "",
198	    env_HTTP_PROXY ? server : "",
199	    path, server, env_HTTP_USER_AGENT,
200	    proxyauth ? proxyauth : "",
201	    connclose ? "Connection: Close\r\n" : "");
202	if (buflen == -1)
203		err(1, "asprintf");
204	return(buflen);
205}
206
207static int
208readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
209{
210	ssize_t len;
211
212	while (strnstr(resbuf + *resbufpos, "\r\n",
213	    *resbuflen - *resbufpos) == NULL) {
214		/* Move buffered data to the start of the buffer */
215		if (*resbufpos != 0) {
216			memmove(resbuf, resbuf + *resbufpos,
217			    *resbuflen - *resbufpos);
218			*resbuflen -= *resbufpos;
219			*resbufpos = 0;
220		}
221
222		/* If the buffer is full, complain */
223		if (*resbuflen == BUFSIZ)
224			return -1;
225
226		/* Read more data into the buffer */
227		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
228		if ((len == 0) ||
229		    ((len == -1) && (errno != EINTR)))
230			return -1;
231
232		if (len != -1)
233			*resbuflen += len;
234	}
235
236	return 0;
237}
238
239static int
240copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
241    int * resbufpos)
242{
243	ssize_t len;
244
245	while (copylen) {
246		/* Write data from resbuf to fd */
247		len = *resbuflen - *resbufpos;
248		if (copylen < len)
249			len = copylen;
250		if (len > 0) {
251			if (fd != -1)
252				len = write(fd, resbuf + *resbufpos, len);
253			if (len == -1)
254				err(1, "write");
255			*resbufpos += len;
256			copylen -= len;
257			continue;
258		}
259
260		/* Read more data into buffer */
261		len = recv(sd, resbuf, BUFSIZ, 0);
262		if (len == -1) {
263			if (errno == EINTR)
264				continue;
265			return -1;
266		} else if (len == 0) {
267			return -2;
268		} else {
269			*resbuflen = len;
270			*resbufpos = 0;
271		}
272	}
273
274	return 0;
275}
276
277int
278main(int argc, char *argv[])
279{
280	struct addrinfo hints;	/* Hints to getaddrinfo */
281	struct addrinfo *res;	/* Pointer to server address being used */
282	struct addrinfo *res0;	/* Pointer to server addresses */
283	char * resbuf = NULL;	/* Response buffer */
284	int resbufpos = 0;	/* Response buffer position */
285	int resbuflen = 0;	/* Response buffer length */
286	char * eolp;		/* Pointer to "\r\n" within resbuf */
287	char * hln;		/* Pointer within header line */
288	char * servername;	/* Name of server */
289	char * fname = NULL;	/* Name of downloaded file */
290	char * reqbuf = NULL;	/* Request buffer */
291	int reqbufpos = 0;	/* Request buffer position */
292	int reqbuflen = 0;	/* Request buffer length */
293	ssize_t len;		/* Length sent or received */
294	int nreq = 0;		/* Number of next request to send */
295	int nres = 0;		/* Number of next reply to receive */
296	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
297	int sd = -1;		/* Socket descriptor */
298	int sdflags = 0;	/* Flags on the socket sd */
299	int fd = -1;		/* Descriptor for download target file */
300	int error;		/* Error code */
301	int statuscode;		/* HTTP Status code */
302	off_t contentlength;	/* Value from Content-Length header */
303	int chunked;		/* != if transfer-encoding is chunked */
304	off_t clen;		/* Chunk length */
305	int firstreq = 0;	/* # of first request for this connection */
306
307	/* Check that the arguments are sensible */
308	if (argc < 2)
309		usage();
310
311	/* Read important environment variables */
312	readenv();
313
314	/* Get server name and adjust arg[cv] to point at file names */
315	servername = argv[1];
316	argv += 2;
317	argc -= 2;
318
319	/* Allocate response buffer */
320	resbuf = malloc(BUFSIZ);
321	if (resbuf == NULL)
322		err(1, "malloc");
323
324	/* Look up server */
325	memset(&hints, 0, sizeof(hints));
326	hints.ai_family = PF_UNSPEC;
327	hints.ai_socktype = SOCK_STREAM;
328	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
329	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
330	if (error)
331		errx(1, "host = %s, port = %s: %s",
332		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
333		    env_HTTP_PROXY ? proxyport : "http",
334		    gai_strerror(error));
335	if (res0 == NULL)
336		errx(1, "could not look up %s", servername);
337	res = res0;
338
339	/* Do the fetching */
340	while (nres < argc) {
341		/* Make sure we have a connected socket */
342		for (; sd == -1; res = res->ai_next) {
343			/* No addresses left to try :-( */
344			if (res == NULL)
345				errx(1, "Could not connect to %s", servername);
346
347			/* Create a socket... */
348			sd = socket(res->ai_family, res->ai_socktype,
349			    res->ai_protocol);
350			if (sd == -1)
351				continue;
352
353			/* ... set 15-second timeouts ... */
354			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
355			    (void *)&timo, (socklen_t)sizeof(timo));
356			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
357			    (void *)&timo, (socklen_t)sizeof(timo));
358
359			/* ... and connect to the server. */
360			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
361				close(sd);
362				sd = -1;
363				continue;
364			}
365
366			firstreq = nres;
367		}
368
369		/*
370		 * If in pipelined HTTP mode, put socket into non-blocking
371		 * mode, since we're probably going to want to try to send
372		 * several HTTP requests.
373		 */
374		if (pipelined) {
375			sdflags = fcntl(sd, F_GETFL);
376			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
377				err(1, "fcntl");
378		}
379
380		/* Construct requests and/or send them without blocking */
381		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
382			/* If not in the middle of a request, make one */
383			if (reqbuf == NULL) {
384				reqbuflen = makerequest(&reqbuf, argv[nreq],
385				    servername, (nreq == argc - 1));
386				reqbufpos = 0;
387			}
388
389			/* If in pipelined mode, try to send the request */
390			if (pipelined) {
391				while (reqbufpos < reqbuflen) {
392					len = send(sd, reqbuf + reqbufpos,
393					    reqbuflen - reqbufpos, 0);
394					if (len == -1)
395						break;
396					reqbufpos += len;
397				}
398				if (reqbufpos < reqbuflen) {
399					if (errno != EAGAIN)
400						goto conndied;
401					break;
402				} else {
403					free(reqbuf);
404					reqbuf = NULL;
405					nreq++;
406				}
407			}
408		}
409
410		/* Put connection back into blocking mode */
411		if (pipelined) {
412			if (fcntl(sd, F_SETFL, sdflags) == -1)
413				err(1, "fcntl");
414		}
415
416		/* Do we need to blocking-send a request? */
417		if (nres == nreq) {
418			while (reqbufpos < reqbuflen) {
419				len = send(sd, reqbuf + reqbufpos,
420				    reqbuflen - reqbufpos, 0);
421				if (len == -1)
422					goto conndied;
423				reqbufpos += len;
424			}
425			free(reqbuf);
426			reqbuf = NULL;
427			nreq++;
428		}
429
430		/* Scan through the response processing headers. */
431		statuscode = 0;
432		contentlength = -1;
433		chunked = 0;
434		do {
435			/* Get a header line */
436			error = readln(sd, resbuf, &resbuflen, &resbufpos);
437			if (error)
438				goto conndied;
439			hln = resbuf + resbufpos;
440			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
441			resbufpos = (eolp - resbuf) + 2;
442			*eolp = '\0';
443
444			/* Make sure it doesn't contain a NUL character */
445			if (strchr(hln, '\0') != eolp)
446				goto conndied;
447
448			if (statuscode == 0) {
449				/* The first line MUST be HTTP/1.x xxx ... */
450				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
451				    ! isdigit(hln[7]))
452					goto conndied;
453
454				/*
455				 * If the minor version number isn't zero,
456				 * then we can assume that pipelining our
457				 * requests is OK -- as long as we don't
458				 * see a "Connection: close" line later
459				 * and we either have a Content-Length or
460				 * Transfer-Encoding: chunked header to
461				 * tell us the length.
462				 */
463				if (hln[7] != '0')
464					pipelined = 1;
465
466				/* Skip over the minor version number */
467				hln = strchr(hln + 7, ' ');
468				if (hln == NULL)
469					goto conndied;
470				else
471					hln++;
472
473				/* Read the status code */
474				while (isdigit(*hln)) {
475					statuscode = statuscode * 10 +
476					    *hln - '0';
477					hln++;
478				}
479
480				if (statuscode < 100 || statuscode > 599)
481					goto conndied;
482
483				/* Ignore the rest of the line */
484				continue;
485			}
486
487			/* Check for "Connection: close" header */
488			if (strncmp(hln, "Connection:", 11) == 0) {
489				hln += 11;
490				if (strstr(hln, "close") != NULL)
491					pipelined = 0;
492
493				/* Next header... */
494				continue;
495			}
496
497			/* Check for "Content-Length:" header */
498			if (strncmp(hln, "Content-Length:", 15) == 0) {
499				hln += 15;
500				contentlength = 0;
501
502				/* Find the start of the length */
503				while (!isdigit(*hln) && (*hln != '\0'))
504					hln++;
505
506				/* Compute the length */
507				while (isdigit(*hln)) {
508					if (contentlength >= OFF_MAX / 10) {
509						/* Nasty people... */
510						goto conndied;
511					}
512					contentlength = contentlength * 10 +
513					    *hln - '0';
514					hln++;
515				}
516
517				/* Next header... */
518				continue;
519			}
520
521			/* Check for "Transfer-Encoding: chunked" header */
522			if (strncmp(hln, "Transfer-Encoding:", 18) == 0) {
523				hln += 18;
524				if (strstr(hln, "chunked") != NULL)
525					chunked = 1;
526
527				/* Next header... */
528				continue;
529			}
530
531			/* We blithely ignore any other header lines */
532
533			/* No more header lines */
534			if (strlen(hln) == 0) {
535				/*
536				 * If the status code was 1xx, then there will
537				 * be a real header later.  Servers may emit
538				 * 1xx header blocks at will, but since we
539				 * don't expect one, we should just ignore it.
540				 */
541				if (100 <= statuscode && statuscode <= 199) {
542					statuscode = 0;
543					continue;
544				}
545
546				/* End of header; message body follows */
547				break;
548			}
549		} while (1);
550
551		/* No message body for 204 or 304 */
552		if (statuscode == 204 || statuscode == 304) {
553			nres++;
554			continue;
555		}
556
557		/*
558		 * There should be a message body coming, but we only want
559		 * to send it to a file if the status code is 200
560		 */
561		if (statuscode == 200) {
562			/* Generate a file name for the download */
563			fname = strrchr(argv[nres], '/');
564			if (fname == NULL)
565				fname = argv[nres];
566			else
567				fname++;
568			if (strlen(fname) == 0)
569				errx(1, "Cannot obtain file name from %s\n",
570				    argv[nres]);
571
572			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
573			if (fd == -1)
574				errx(1, "open(%s)", fname);
575		};
576
577		/* Read the message and send data to fd if appropriate */
578		if (chunked) {
579			/* Handle a chunked-encoded entity */
580
581			/* Read chunks */
582			do {
583				error = readln(sd, resbuf, &resbuflen,
584				    &resbufpos);
585				if (error)
586					goto conndied;
587				hln = resbuf + resbufpos;
588				eolp = strstr(hln, "\r\n");
589				resbufpos = (eolp - resbuf) + 2;
590
591				clen = 0;
592				while (isxdigit(*hln)) {
593					if (clen >= OFF_MAX / 16) {
594						/* Nasty people... */
595						goto conndied;
596					}
597					if (isdigit(*hln))
598						clen = clen * 16 + *hln - '0';
599					else
600						clen = clen * 16 + 10 +
601						    tolower(*hln) - 'a';
602					hln++;
603				}
604
605				error = copybytes(sd, fd, clen, resbuf,
606				    &resbuflen, &resbufpos);
607				if (error) {
608					goto conndied;
609				}
610			} while (clen != 0);
611
612			/* Read trailer and final CRLF */
613			do {
614				error = readln(sd, resbuf, &resbuflen,
615				    &resbufpos);
616				if (error)
617					goto conndied;
618				hln = resbuf + resbufpos;
619				eolp = strstr(hln, "\r\n");
620				resbufpos = (eolp - resbuf) + 2;
621			} while (hln != eolp);
622		} else if (contentlength != -1) {
623			error = copybytes(sd, fd, contentlength, resbuf,
624			    &resbuflen, &resbufpos);
625			if (error)
626				goto conndied;
627		} else {
628			/*
629			 * Not chunked, and no content length header.
630			 * Read everything until the server closes the
631			 * socket.
632			 */
633			error = copybytes(sd, fd, OFF_MAX, resbuf,
634			    &resbuflen, &resbufpos);
635			if (error == -1)
636				goto conndied;
637			pipelined = 0;
638		}
639
640		if (fd != -1) {
641			close(fd);
642			fd = -1;
643		}
644
645		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
646		    statuscode);
647		if (statuscode == 200)
648			fprintf(stderr, "OK\n");
649		else if (statuscode < 300)
650			fprintf(stderr, "Successful (ignored)\n");
651		else if (statuscode < 400)
652			fprintf(stderr, "Redirection (ignored)\n");
653		else
654			fprintf(stderr, "Error (ignored)\n");
655
656		/* We've finished this file! */
657		nres++;
658
659		/*
660		 * If necessary, clean up this connection so that we
661		 * can start a new one.
662		 */
663		if (pipelined == 0)
664			goto cleanupconn;
665		continue;
666
667conndied:
668		/*
669		 * Something went wrong -- our connection died, the server
670		 * sent us garbage, etc.  If this happened on the first
671		 * request we sent over this connection, give up.  Otherwise,
672		 * close this connection, open a new one, and reissue the
673		 * request.
674		 */
675		if (nres == firstreq)
676			errx(1, "Connection failure");
677
678cleanupconn:
679		/*
680		 * Clean up our connection and keep on going
681		 */
682		shutdown(sd, SHUT_RDWR);
683		close(sd);
684		sd = -1;
685		if (fd != -1) {
686			close(fd);
687			fd = -1;
688		}
689		if (reqbuf != NULL) {
690			free(reqbuf);
691			reqbuf = NULL;
692		}
693		nreq = nres;
694		res = res0;
695		pipelined = 0;
696		resbufpos = resbuflen = 0;
697		continue;
698	}
699
700	free(resbuf);
701	freeaddrinfo(res0);
702
703	return 0;
704}
705