phttpget.c revision 154909
1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 154909 2006-01-27 14:42:15Z cperciva $");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <sysexits.h>
45#include <unistd.h>
46
47static const char *	env_HTTP_PROXY;
48static char *		env_HTTP_PROXY_AUTH;
49static const char *	env_HTTP_USER_AGENT;
50static const char *	proxyport;
51static char *		proxyauth;
52
53static struct timeval	timo = { 15, 0};
54
55static void
56usage(void)
57{
58
59	fprintf(stderr, "usage: phttpget server [file ...]\n");
60	exit(EX_USAGE);
61}
62
63/*
64 * Base64 encode a string; the string returned, if non-NULL, is
65 * allocated using malloc() and must be freed by the caller.
66 */
67static char *
68b64enc(const char *ptext)
69{
70	static const char base64[] =
71	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72	    "abcdefghijklmnopqrstuvwxyz"
73	    "0123456789+/";
74	const char *pt;
75	char *ctext, *pc;
76	size_t ptlen, ctlen;
77	uint32_t t;
78	unsigned int j;
79
80	/*
81	 * Encoded length is 4 characters per 3-byte block or partial
82	 * block of plaintext, plus one byte for the terminating NUL
83	 */
84	ptlen = strlen(ptext);
85	if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2)
86		return NULL;	/* Possible integer overflow */
87	ctlen = 4 * ((ptlen + 2) / 3) + 1;
88	if ((ctext = malloc(ctlen)) == NULL)
89		return NULL;
90	ctext[ctlen - 1] = 0;
91
92	/*
93	 * Scan through ptext, reading up to 3 bytes from ptext and
94	 * writing 4 bytes to ctext, until we run out of input.
95	 */
96	for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) {
97		/* Read 3 bytes */
98		for (t = j = 0; j < 3; j++) {
99			t <<= 8;
100			if (j < ptlen)
101				t += *pt++;
102		}
103
104		/* Write 4 bytes */
105		for (j = 0; j < 4; j++) {
106			if (j <= ptlen + 1)
107				pc[j] = base64[(t >> 18) & 0x3f];
108			else
109				pc[j] = '=';
110			t <<= 6;
111		}
112
113		/* If we're done, exit the loop */
114		if (ptlen <= 3)
115			break;
116	}
117
118	return (ctext);
119}
120
121static void
122readenv(void)
123{
124	char *proxy_auth_userpass, *proxy_auth_userpass64, *p;
125	char *proxy_auth_user = NULL;
126	char *proxy_auth_pass = NULL;
127
128	env_HTTP_PROXY = getenv("HTTP_PROXY");
129	if (env_HTTP_PROXY != NULL) {
130		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
131			env_HTTP_PROXY += 7;
132		p = strchr(env_HTTP_PROXY, '/');
133		if (p != NULL)
134			*p = 0;
135		p = strchr(env_HTTP_PROXY, ':');
136		if (p != NULL) {
137			*p = 0;
138			proxyport = p + 1;
139		} else
140			proxyport = "3128";
141	}
142
143	env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH");
144	if ((env_HTTP_PROXY != NULL) &&
145	    (env_HTTP_PROXY_AUTH != NULL) &&
146	    (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) {
147		/* Ignore authentication scheme */
148		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
149
150		/* Ignore realm */
151		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
152
153		/* Obtain username and password */
154		proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":");
155		proxy_auth_pass = strsep(&env_HTTP_PROXY_AUTH, ":");
156	}
157
158	if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) {
159		asprintf(&proxy_auth_userpass, "%s:%s",
160		    proxy_auth_user, proxy_auth_pass);
161		if (proxy_auth_userpass == NULL)
162			err(1, "asprintf");
163
164		proxy_auth_userpass64 = b64enc(proxy_auth_userpass);
165		if (proxy_auth_userpass64 == NULL)
166			err(1, "malloc");
167
168		asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n",
169		    proxy_auth_userpass64);
170		if (proxyauth == NULL)
171			err(1, "asprintf");
172
173		free(proxy_auth_userpass);
174		free(proxy_auth_userpass64);
175	} else
176		proxyauth = NULL;
177
178	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
179	if (env_HTTP_USER_AGENT == NULL)
180		env_HTTP_USER_AGENT = "phttpget/0.1";
181}
182
183static int
184makerequest(char ** buf, char * path, char * server, int connclose)
185{
186	int buflen;
187
188	buflen = asprintf(buf,
189	    "GET %s%s/%s HTTP/1.1\r\n"
190	    "Host: %s\r\n"
191	    "User-Agent: %s\r\n"
192	    "%s"
193	    "%s"
194	    "\r\n",
195	    env_HTTP_PROXY ? "http://" : "",
196	    env_HTTP_PROXY ? server : "",
197	    path, server, env_HTTP_USER_AGENT,
198	    proxyauth ? proxyauth : "",
199	    connclose ? "Connection: Close\r\n" : "");
200	if (buflen == -1)
201		err(1, "asprintf");
202	return(buflen);
203}
204
205static int
206readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
207{
208	ssize_t len;
209
210	while (strnstr(resbuf + *resbufpos, "\r\n",
211	    *resbuflen - *resbufpos) == NULL) {
212		/* Move buffered data to the start of the buffer */
213		if (*resbufpos != 0) {
214			memmove(resbuf, resbuf + *resbufpos,
215			    *resbuflen - *resbufpos);
216			*resbuflen -= *resbufpos;
217			*resbufpos = 0;
218		}
219
220		/* If the buffer is full, complain */
221		if (*resbuflen == BUFSIZ)
222			return -1;
223
224		/* Read more data into the buffer */
225		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
226		if ((len == 0) ||
227		    ((len == -1) && (errno != EINTR)))
228			return -1;
229
230		if (len != -1)
231			*resbuflen += len;
232	}
233
234	return 0;
235}
236
237static int
238copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
239    int * resbufpos)
240{
241	ssize_t len;
242
243	while (copylen) {
244		/* Write data from resbuf to fd */
245		len = *resbuflen - *resbufpos;
246		if (copylen < len)
247			len = copylen;
248		if (len > 0) {
249			if (fd != -1)
250				len = write(fd, resbuf + *resbufpos, len);
251			if (len == -1)
252				err(1, "write");
253			*resbufpos += len;
254			copylen -= len;
255			continue;
256		}
257
258		/* Read more data into buffer */
259		len = recv(sd, resbuf, BUFSIZ, 0);
260		if (len == -1) {
261			if (errno == EINTR)
262				continue;
263			return -1;
264		} else if (len == 0) {
265			return -2;
266		} else {
267			*resbuflen = len;
268			*resbufpos = 0;
269		}
270	}
271
272	return 0;
273}
274
275int
276main(int argc, char *argv[])
277{
278	struct addrinfo hints;	/* Hints to getaddrinfo */
279	struct addrinfo *res;	/* Pointer to server address being used */
280	struct addrinfo *res0;	/* Pointer to server addresses */
281	char * resbuf = NULL;	/* Response buffer */
282	int resbufpos = 0;	/* Response buffer position */
283	int resbuflen = 0;	/* Response buffer length */
284	char * eolp;		/* Pointer to "\r\n" within resbuf */
285	char * hln;		/* Pointer within header line */
286	char * servername;	/* Name of server */
287	char * fname = NULL;	/* Name of downloaded file */
288	char * reqbuf = NULL;	/* Request buffer */
289	int reqbufpos = 0;	/* Request buffer position */
290	int reqbuflen = 0;	/* Request buffer length */
291	ssize_t len;		/* Length sent or received */
292	int nreq = 0;		/* Number of next request to send */
293	int nres = 0;		/* Number of next reply to receive */
294	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
295	int sd = -1;		/* Socket descriptor */
296	int sdflags = 0;	/* Flags on the socket sd */
297	int fd = -1;		/* Descriptor for download target file */
298	int error;		/* Error code */
299	int statuscode;		/* HTTP Status code */
300	off_t contentlength;	/* Value from Content-Length header */
301	int chunked;		/* != if transfer-encoding is chunked */
302	off_t clen;		/* Chunk length */
303	int firstreq = 0;	/* # of first request for this connection */
304
305	/* Check that the arguments are sensible */
306	if (argc < 2)
307		usage();
308
309	/* Read important environment variables */
310	readenv();
311
312	/* Get server name and adjust arg[cv] to point at file names */
313	servername = argv[1];
314	argv += 2;
315	argc -= 2;
316
317	/* Allocate response buffer */
318	resbuf = malloc(BUFSIZ);
319	if (resbuf == NULL)
320		err(1, "malloc");
321
322	/* Look up server */
323	memset(&hints, 0, sizeof(hints));
324	hints.ai_family = PF_UNSPEC;
325	hints.ai_socktype = SOCK_STREAM;
326	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
327	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
328	if (error)
329		errx(1, "host = %s, port = %s: %s",
330		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
331		    env_HTTP_PROXY ? proxyport : "http",
332		    gai_strerror(error));
333	if (res0 == NULL)
334		errx(1, "could not look up %s", servername);
335	res = res0;
336
337	/* Do the fetching */
338	while (nres < argc) {
339		/* Make sure we have a connected socket */
340		for (; sd == -1; res = res->ai_next) {
341			/* No addresses left to try :-( */
342			if (res == NULL)
343				errx(1, "Could not connect to %s", servername);
344
345			/* Create a socket... */
346			sd = socket(res->ai_family, res->ai_socktype,
347			    res->ai_protocol);
348			if (sd == -1)
349				continue;
350
351			/* ... set 15-second timeouts ... */
352			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
353			    (void *)&timo, (socklen_t)sizeof(timo));
354			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
355			    (void *)&timo, (socklen_t)sizeof(timo));
356
357			/* ... and connect to the server. */
358			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
359				close(sd);
360				sd = -1;
361				continue;
362			}
363
364			firstreq = nres;
365		}
366
367		/*
368		 * If in pipelined HTTP mode, put socket into non-blocking
369		 * mode, since we're probably going to want to try to send
370		 * several HTTP requests.
371		 */
372		if (pipelined) {
373			sdflags = fcntl(sd, F_GETFL);
374			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
375				err(1, "fcntl");
376		}
377
378		/* Construct requests and/or send them without blocking */
379		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
380			/* If not in the middle of a request, make one */
381			if (reqbuf == NULL) {
382				reqbuflen = makerequest(&reqbuf, argv[nreq],
383				    servername, (nreq == argc - 1));
384				reqbufpos = 0;
385			}
386
387			/* If in pipelined mode, try to send the request */
388			if (pipelined) {
389				while (reqbufpos < reqbuflen) {
390					len = send(sd, reqbuf + reqbufpos,
391					    reqbuflen - reqbufpos, 0);
392					if (len == -1)
393						break;
394					reqbufpos += len;
395				}
396				if (reqbufpos < reqbuflen) {
397					if (errno != EAGAIN)
398						goto conndied;
399					break;
400				} else {
401					free(reqbuf);
402					reqbuf = NULL;
403					nreq++;
404				}
405			}
406		}
407
408		/* Put connection back into blocking mode */
409		if (pipelined) {
410			if (fcntl(sd, F_SETFL, sdflags) == -1)
411				err(1, "fcntl");
412		}
413
414		/* Do we need to blocking-send a request? */
415		if (nres == nreq) {
416			while (reqbufpos < reqbuflen) {
417				len = send(sd, reqbuf + reqbufpos,
418				    reqbuflen - reqbufpos, 0);
419				if (len == -1)
420					goto conndied;
421				reqbufpos += len;
422			}
423			free(reqbuf);
424			reqbuf = NULL;
425			nreq++;
426		}
427
428		/* Scan through the response processing headers. */
429		statuscode = 0;
430		contentlength = -1;
431		chunked = 0;
432		do {
433			/* Get a header line */
434			error = readln(sd, resbuf, &resbuflen, &resbufpos);
435			if (error)
436				goto conndied;
437			hln = resbuf + resbufpos;
438			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
439			resbufpos = (eolp - resbuf) + 2;
440			*eolp = '\0';
441
442			/* Make sure it doesn't contain a NUL character */
443			if (strchr(hln, '\0') != eolp)
444				goto conndied;
445
446			if (statuscode == 0) {
447				/* The first line MUST be HTTP/1.x xxx ... */
448				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
449				    ! isdigit(hln[7]))
450					goto conndied;
451
452				/*
453				 * If the minor version number isn't zero,
454				 * then we can assume that pipelining our
455				 * requests is OK -- as long as we don't
456				 * see a "Connection: close" line later
457				 * and we either have a Content-Length or
458				 * Transfer-Encoding: chunked header to
459				 * tell us the length.
460				 */
461				if (hln[7] != '0')
462					pipelined = 1;
463
464				/* Skip over the minor version number */
465				hln = strchr(hln + 7, ' ');
466				if (hln == NULL)
467					goto conndied;
468				else
469					hln++;
470
471				/* Read the status code */
472				while (isdigit(*hln)) {
473					statuscode = statuscode * 10 +
474					    *hln - '0';
475					hln++;
476				}
477
478				if (statuscode < 100 || statuscode > 599)
479					goto conndied;
480
481				/* Ignore the rest of the line */
482				continue;
483			}
484
485			/* Check for "Connection: close" header */
486			if (strncmp(hln, "Connection:", 11) == 0) {
487				hln += 11;
488				if (strstr(hln, "close") != NULL)
489					pipelined = 0;
490
491				/* Next header... */
492				continue;
493			}
494
495			/* Check for "Content-Length:" header */
496			if (strncmp(hln, "Content-Length:", 15) == 0) {
497				hln += 15;
498				contentlength = 0;
499
500				/* Find the start of the length */
501				while (!isdigit(*hln) && (*hln != '\0'))
502					hln++;
503
504				/* Compute the length */
505				while (isdigit(*hln)) {
506					if (contentlength >= OFF_MAX / 10) {
507						/* Nasty people... */
508						goto conndied;
509					}
510					contentlength = contentlength * 10 +
511					    *hln - '0';
512					hln++;
513				}
514
515				/* Next header... */
516				continue;
517			}
518
519			/* Check for "Transfer-Encoding: chunked" header */
520			if (strncmp(hln, "Transfer-Encoding:", 18) == 0) {
521				hln += 18;
522				if (strstr(hln, "chunked") != NULL)
523					chunked = 1;
524
525				/* Next header... */
526				continue;
527			}
528
529			/* We blithely ignore any other header lines */
530
531			/* No more header lines */
532			if (strlen(hln) == 0) {
533				/*
534				 * If the status code was 1xx, then there will
535				 * be a real header later.  Servers may emit
536				 * 1xx header blocks at will, but since we
537				 * don't expect one, we should just ignore it.
538				 */
539				if (100 <= statuscode && statuscode <= 199) {
540					statuscode = 0;
541					continue;
542				}
543
544				/* End of header; message body follows */
545				break;
546			}
547		} while (1);
548
549		/* No message body for 204 or 304 */
550		if (statuscode == 204 || statuscode == 304) {
551			nres++;
552			continue;
553		}
554
555		/*
556		 * There should be a message body coming, but we only want
557		 * to send it to a file if the status code is 200
558		 */
559		if (statuscode == 200) {
560			/* Generate a file name for the download */
561			fname = strrchr(argv[nres], '/');
562			if (fname == NULL)
563				fname = argv[nres];
564			else
565				fname++;
566			if (strlen(fname) == 0)
567				errx(1, "Cannot obtain file name from %s\n",
568				    argv[nres]);
569
570			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
571			if (fd == -1)
572				errx(1, "open(%s)", fname);
573		};
574
575		/* Read the message and send data to fd if appropriate */
576		if (chunked) {
577			/* Handle a chunked-encoded entity */
578
579			/* Read chunks */
580			do {
581				error = readln(sd, resbuf, &resbuflen,
582				    &resbufpos);
583				if (error)
584					goto conndied;
585				hln = resbuf + resbufpos;
586				eolp = strstr(hln, "\r\n");
587				resbufpos = (eolp - resbuf) + 2;
588
589				clen = 0;
590				while (isxdigit(*hln)) {
591					if (clen >= OFF_MAX / 16) {
592						/* Nasty people... */
593						goto conndied;
594					}
595					if (isdigit(*hln))
596						clen = clen * 16 + *hln - '0';
597					else
598						clen = clen * 16 + 10 +
599						    tolower(*hln) - 'a';
600					hln++;
601				}
602
603				error = copybytes(sd, fd, clen, resbuf,
604				    &resbuflen, &resbufpos);
605				if (error) {
606					goto conndied;
607				}
608			} while (clen != 0);
609
610			/* Read trailer and final CRLF */
611			do {
612				error = readln(sd, resbuf, &resbuflen,
613				    &resbufpos);
614				if (error)
615					goto conndied;
616				hln = resbuf + resbufpos;
617				eolp = strstr(hln, "\r\n");
618				resbufpos = (eolp - resbuf) + 2;
619			} while (hln != eolp);
620		} else if (contentlength != -1) {
621			error = copybytes(sd, fd, contentlength, resbuf,
622			    &resbuflen, &resbufpos);
623			if (error)
624				goto conndied;
625		} else {
626			/*
627			 * Not chunked, and no content length header.
628			 * Read everything until the server closes the
629			 * socket.
630			 */
631			error = copybytes(sd, fd, OFF_MAX, resbuf,
632			    &resbuflen, &resbufpos);
633			if (error == -1)
634				goto conndied;
635			pipelined = 0;
636		}
637
638		if (fd != -1) {
639			close(fd);
640			fd = -1;
641		}
642
643		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
644		    statuscode);
645		if (statuscode == 200)
646			fprintf(stderr, "OK\n");
647		else if (statuscode < 300)
648			fprintf(stderr, "Successful (ignored)\n");
649		else if (statuscode < 400)
650			fprintf(stderr, "Redirection (ignored)\n");
651		else
652			fprintf(stderr, "Error (ignored)\n");
653
654		/* We've finished this file! */
655		nres++;
656
657		/*
658		 * If necessary, clean up this connection so that we
659		 * can start a new one.
660		 */
661		if (pipelined == 0)
662			goto cleanupconn;
663		continue;
664
665conndied:
666		/*
667		 * Something went wrong -- our connection died, the server
668		 * sent us garbage, etc.  If this happened on the first
669		 * request we sent over this connection, give up.  Otherwise,
670		 * close this connection, open a new one, and reissue the
671		 * request.
672		 */
673		if (nres == firstreq)
674			errx(1, "Connection failure");
675
676cleanupconn:
677		/*
678		 * Clean up our connection and keep on going
679		 */
680		shutdown(sd, SHUT_RDWR);
681		close(sd);
682		sd = -1;
683		if (fd != -1) {
684			close(fd);
685			fd = -1;
686		}
687		if (reqbuf != NULL) {
688			free(reqbuf);
689			reqbuf = NULL;
690		}
691		nreq = nres;
692		res = res0;
693		pipelined = 0;
694		resbufpos = resbuflen = 0;
695		continue;
696	}
697
698	free(resbuf);
699	freeaddrinfo(res0);
700
701	return 0;
702}
703