phttpget.c revision 150461
1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 150461 2005-09-22 07:11:27Z cperciva $");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdint.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <sysexits.h>
45#include <unistd.h>
46
47static const char *	env_HTTP_PROXY;
48static char *		env_HTTP_PROXY_AUTH;
49static const char *	env_HTTP_USER_AGENT;
50static const char *	proxyport;
51static char *		proxyauth;
52
53static struct timeval	timo = { 15, 0};
54
55static void
56usage(void)
57{
58
59	fprintf(stderr, "usage: phttpget server [file ...]\n");
60	exit(EX_USAGE);
61}
62
63/*
64 * Base64 encode a string; the string returned, if non-NULL, is
65 * allocated using malloc() and must be freed by the caller.
66 */
67static char *
68b64enc(const char *ptext)
69{
70	static const char base64[] =
71	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
72	    "abcdefghijklmnopqrstuvwxyz"
73	    "0123456789+/";
74	const char *pt;
75	char *ctext, *pc;
76	size_t ptlen, ctlen;
77	uint32_t t;
78	unsigned int j;
79
80	/*
81	 * Encoded length is 4 characters per 3-byte block or partial
82	 * block of plaintext, plus one byte for the terminating NUL
83	 */
84	ptlen = strlen(ptext);
85	if (ptlen > ((SIZE_MAX - 1) / 4) * 3 - 2)
86		return NULL;	/* Possible integer overflow */
87	ctlen = 4 * ((ptlen + 2) / 3) + 1;
88	if ((ctext = malloc(ctlen)) == NULL)
89		return NULL;
90	ctext[ctlen - 1] = 0;
91
92	/*
93	 * Scan through ptext, reading up to 3 bytes from ptext and
94	 * writing 4 bytes to ctext, until we run out of input.
95	 */
96	for (pt = ptext, pc = ctext; ptlen; ptlen -= 3, pc += 4) {
97		/* Read 3 bytes */
98		for (t = j = 0; j < 3; j++) {
99			t <<= 8;
100			if (j < ptlen)
101				t += *pt++;
102		}
103
104		/* Write 4 bytes */
105		for (j = 0; j < 4; j++) {
106			if (j <= ptlen + 1)
107				pc[j] = base64[(t >> 18) & 0x3f];
108			else
109				pc[j] = '=';
110			t <<= 6;
111		}
112
113		/* If we're done, exit the loop */
114		if (ptlen <= 3)
115			break;
116	}
117
118	return (ctext);
119}
120
121static void
122readenv(void)
123{
124	char *proxy_auth_userpass, *proxy_auth_userpass64, *p;
125	char *proxy_auth_user = NULL;
126	char *proxy_auth_pass = NULL;
127
128	env_HTTP_PROXY = getenv("HTTP_PROXY");
129	if (env_HTTP_PROXY != NULL) {
130		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
131			env_HTTP_PROXY += 7;
132		p = strchr(env_HTTP_PROXY, '/');
133		if (p != NULL)
134			*p = 0;
135		p = strchr(env_HTTP_PROXY, ':');
136		if (p != NULL) {
137			*p = 0;
138			proxyport = p + 1;
139		} else
140			proxyport = "3128";
141	}
142
143	env_HTTP_PROXY_AUTH = getenv("HTTP_PROXY_AUTH");
144	if ((env_HTTP_PROXY != NULL) &&
145	    (env_HTTP_PROXY_AUTH != NULL) &&
146	    (strncasecmp(env_HTTP_PROXY_AUTH, "basic:" , 6) == 0)) {
147		/* Ignore authentication scheme */
148		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
149
150		/* Ignore realm */
151		(void) strsep(&env_HTTP_PROXY_AUTH, ":");
152
153		/* Obtain username and password */
154		proxy_auth_user = strsep(&env_HTTP_PROXY_AUTH, ":");
155		proxy_auth_pass = strsep(&env_HTTP_PROXY_AUTH, ":");
156	}
157
158	if ((proxy_auth_user != NULL) && (proxy_auth_pass != NULL)) {
159		asprintf(&proxy_auth_userpass, "%s:%s",
160		    proxy_auth_user, proxy_auth_pass);
161		if (proxy_auth_userpass == NULL)
162			err(1, "asprintf");
163
164		proxy_auth_userpass64 = b64enc(proxy_auth_userpass);
165		if (proxy_auth_userpass64 == NULL)
166			err(1, "malloc");
167
168		asprintf(&proxyauth, "Proxy-Authorization: Basic %s\r\n",
169		    proxy_auth_userpass64);
170		if (proxyauth == NULL)
171			err(1, "asprintf");
172
173		free(proxy_auth_userpass);
174		free(proxy_auth_userpass64);
175	} else
176		proxyauth = NULL;
177
178	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
179	if (env_HTTP_USER_AGENT == NULL)
180		env_HTTP_USER_AGENT = "phttpget/0.1";
181}
182
183static int
184makerequest(char ** buf, char * path, char * server, int connclose)
185{
186	int buflen;
187
188	buflen = asprintf(buf,
189	    "GET %s%s/%s HTTP/1.1\r\n"
190	    "Host: %s\r\n"
191	    "User-Agent: %s\r\n"
192	    "%s"
193	    "%s"
194	    "\r\n",
195	    env_HTTP_PROXY ? "http://" : "",
196	    env_HTTP_PROXY ? server : "",
197	    path, server, env_HTTP_USER_AGENT,
198	    proxyauth ? proxyauth : "",
199	    connclose ? "Connection: Close\r\n" : "");
200	if (buflen == -1)
201		err(1, "asprintf");
202	return(buflen);
203}
204
205static int
206readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
207{
208	ssize_t len;
209
210	while (strnstr(resbuf + *resbufpos, "\r\n",
211	    *resbuflen - *resbufpos) == NULL) {
212		/* Move buffered data to the start of the buffer */
213		if (*resbufpos != 0) {
214			memmove(resbuf, resbuf + *resbufpos,
215			    *resbuflen - *resbufpos);
216			*resbuflen -= *resbufpos;
217			*resbufpos = 0;
218		}
219
220		/* If the buffer is full, complain */
221		if (*resbuflen == BUFSIZ)
222			return -1;
223
224		/* Read more data into the buffer */
225		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
226		if ((len == -1) && (errno != EINTR))
227			return -1;
228
229		if (len != -1)
230			*resbuflen += len;
231	}
232
233	return 0;
234}
235
236static int
237copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
238    int * resbufpos)
239{
240	ssize_t len;
241
242	while (copylen) {
243		/* Write data from resbuf to fd */
244		len = *resbuflen - *resbufpos;
245		if (copylen < len)
246			len = copylen;
247		if (len > 0) {
248			if (fd != -1)
249				len = write(fd, resbuf + *resbufpos, len);
250			if (len == -1)
251				err(1, "write");
252			*resbufpos += len;
253			copylen -= len;
254			continue;
255		}
256
257		/* Read more data into buffer */
258		len = recv(sd, resbuf, BUFSIZ, 0);
259		if (len == -1) {
260			if (errno == EINTR)
261				continue;
262			return -1;
263		} else if (len == 0) {
264			return -2;
265		} else {
266			*resbuflen = len;
267			*resbufpos = 0;
268		}
269	}
270
271	return 0;
272}
273
274int
275main(int argc, char *argv[])
276{
277	struct addrinfo hints;	/* Hints to getaddrinfo */
278	struct addrinfo *res;	/* Pointer to server address being used */
279	struct addrinfo *res0;	/* Pointer to server addresses */
280	char * resbuf = NULL;	/* Response buffer */
281	int resbufpos = 0;	/* Response buffer position */
282	int resbuflen = 0;	/* Response buffer length */
283	char * eolp;		/* Pointer to "\r\n" within resbuf */
284	char * hln;		/* Pointer within header line */
285	char * servername;	/* Name of server */
286	char * fname = NULL;	/* Name of downloaded file */
287	char * reqbuf = NULL;	/* Request buffer */
288	int reqbufpos = 0;	/* Request buffer position */
289	int reqbuflen = 0;	/* Request buffer length */
290	ssize_t len;		/* Length sent or received */
291	int nreq = 0;		/* Number of next request to send */
292	int nres = 0;		/* Number of next reply to receive */
293	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
294	int sd = -1;		/* Socket descriptor */
295	int sdflags = 0;	/* Flags on the socket sd */
296	int fd = -1;		/* Descriptor for download target file */
297	int error;		/* Error code */
298	int statuscode;		/* HTTP Status code */
299	off_t contentlength;	/* Value from Content-Length header */
300	int chunked;		/* != if transfer-encoding is chunked */
301	off_t clen;		/* Chunk length */
302	int firstreq = 0;	/* # of first request for this connection */
303
304	/* Check that the arguments are sensible */
305	if (argc < 2)
306		usage();
307
308	/* Read important environment variables */
309	readenv();
310
311	/* Get server name and adjust arg[cv] to point at file names */
312	servername = argv[1];
313	argv += 2;
314	argc -= 2;
315
316	/* Allocate response buffer */
317	resbuf = malloc(BUFSIZ);
318	if (resbuf == NULL)
319		err(1, "malloc");
320
321	/* Look up server */
322	memset(&hints, 0, sizeof(hints));
323	hints.ai_family = PF_UNSPEC;
324	hints.ai_socktype = SOCK_STREAM;
325	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
326	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
327	if (error)
328		errx(1, "%s: host = %s, port = %s",
329		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
330		    env_HTTP_PROXY ? proxyport : "http",
331		    gai_strerror(error));
332	if (res0 == NULL)
333		errx(1, "could not look up %s", servername);
334	res = res0;
335
336	/* Do the fetching */
337	while (nres < argc) {
338		/* Make sure we have a connected socket */
339		for (; sd == -1; res = res->ai_next) {
340			/* No addresses left to try :-( */
341			if (res == NULL)
342				errx(1, "Could not connect to %s", servername);
343
344			/* Create a socket... */
345			sd = socket(res->ai_family, res->ai_socktype,
346			    res->ai_protocol);
347			if (sd == -1)
348				continue;
349
350			/* ... set 15-second timeouts ... */
351			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
352			    (void *)&timo, (socklen_t)sizeof(timo));
353			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
354			    (void *)&timo, (socklen_t)sizeof(timo));
355
356			/* ... and connect to the server. */
357			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
358				close(sd);
359				sd = -1;
360				continue;
361			}
362
363			firstreq = nres;
364		}
365
366		/*
367		 * If in pipelined HTTP mode, put socket into non-blocking
368		 * mode, since we're probably going to want to try to send
369		 * several HTTP requests.
370		 */
371		if (pipelined) {
372			sdflags = fcntl(sd, F_GETFL);
373			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
374				err(1, "fcntl");
375		}
376
377		/* Construct requests and/or send them without blocking */
378		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
379			/* If not in the middle of a request, make one */
380			if (reqbuf == NULL) {
381				reqbuflen = makerequest(&reqbuf, argv[nreq],
382				    servername, (nreq == argc - 1));
383				reqbufpos = 0;
384			}
385
386			/* If in pipelined mode, try to send the request */
387			if (pipelined) {
388				while (reqbufpos < reqbuflen) {
389					len = send(sd, reqbuf + reqbufpos,
390					    reqbuflen - reqbufpos, 0);
391					if (len == -1)
392						break;
393					reqbufpos += len;
394				}
395				if (reqbufpos < reqbuflen) {
396					if (errno != EAGAIN)
397						goto conndied;
398					break;
399				} else {
400					free(reqbuf);
401					reqbuf = NULL;
402					nreq++;
403				}
404			}
405		}
406
407		/* Put connection back into blocking mode */
408		if (pipelined) {
409			if (fcntl(sd, F_SETFL, sdflags) == -1)
410				err(1, "fcntl");
411		}
412
413		/* Do we need to blocking-send a request? */
414		if (nres == nreq) {
415			while (reqbufpos < reqbuflen) {
416				len = send(sd, reqbuf + reqbufpos,
417				    reqbuflen - reqbufpos, 0);
418				if (len == -1)
419					goto conndied;
420				reqbufpos += len;
421			}
422			free(reqbuf);
423			reqbuf = NULL;
424			nreq++;
425		}
426
427		/* Scan through the response processing headers. */
428		statuscode = 0;
429		contentlength = -1;
430		chunked = 0;
431		do {
432			/* Get a header line */
433			error = readln(sd, resbuf, &resbuflen, &resbufpos);
434			if (error)
435				goto conndied;
436			hln = resbuf + resbufpos;
437			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
438			resbufpos = (eolp - resbuf) + 2;
439			*eolp = '\0';
440
441			/* Make sure it doesn't contain a NUL character */
442			if (strchr(hln, '\0') != eolp)
443				goto conndied;
444
445			if (statuscode == 0) {
446				/* The first line MUST be HTTP/1.x xxx ... */
447				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
448				    ! isdigit(hln[7]))
449					goto conndied;
450
451				/*
452				 * If the minor version number isn't zero,
453				 * then we can assume that pipelining our
454				 * requests is OK -- as long as we don't
455				 * see a "Connection: close" line later
456				 * and we either have a Content-Length or
457				 * Transfer-Encoding: chunked header to
458				 * tell us the length.
459				 */
460				if (hln[7] != '0')
461					pipelined = 1;
462
463				/* Skip over the minor version number */
464				hln = strchr(hln + 7, ' ');
465				if (hln == NULL)
466					goto conndied;
467				else
468					hln++;
469
470				/* Read the status code */
471				while (isdigit(*hln)) {
472					statuscode = statuscode * 10 +
473					    *hln - '0';
474					hln++;
475				}
476
477				if (statuscode < 100 || statuscode > 599)
478					goto conndied;
479
480				/* Ignore the rest of the line */
481				continue;
482			}
483
484			/* Check for "Connection: close" header */
485			if (strncmp(hln, "Connection:", 11) == 0) {
486				hln += 11;
487				if (strstr(hln, "close") != NULL)
488					pipelined = 0;
489
490				/* Next header... */
491				continue;
492			}
493
494			/* Check for "Content-Length:" header */
495			if (strncmp(hln, "Content-Length:", 15) == 0) {
496				hln += 15;
497				contentlength = 0;
498
499				/* Find the start of the length */
500				while (!isdigit(*hln) && (*hln != '\0'))
501					hln++;
502
503				/* Compute the length */
504				while (isdigit(*hln)) {
505					if (contentlength >= OFF_MAX / 10) {
506						/* Nasty people... */
507						goto conndied;
508					}
509					contentlength = contentlength * 10 +
510					    *hln - '0';
511					hln++;
512				}
513
514				/* Next header... */
515				continue;
516			}
517
518			/* Check for "Transfer-Encoding: chunked" header */
519			if (strncmp(hln, "Transfer-Encoding:", 18) == 0) {
520				hln += 18;
521				if (strstr(hln, "chunked") != NULL)
522					chunked = 1;
523
524				/* Next header... */
525				continue;
526			}
527
528			/* We blithely ignore any other header lines */
529
530			/* No more header lines */
531			if (strlen(hln) == 0) {
532				/*
533				 * If the status code was 1xx, then there will
534				 * be a real header later.  Servers may emit
535				 * 1xx header blocks at will, but since we
536				 * don't expect one, we should just ignore it.
537				 */
538				if (100 <= statuscode && statuscode <= 199) {
539					statuscode = 0;
540					continue;
541				}
542
543				/* End of header; message body follows */
544				break;
545			}
546		} while (1);
547
548		/* No message body for 204 or 304 */
549		if (statuscode == 204 || statuscode == 304) {
550			nres++;
551			continue;
552		}
553
554		/*
555		 * There should be a message body coming, but we only want
556		 * to send it to a file if the status code is 200
557		 */
558		if (statuscode == 200) {
559			/* Generate a file name for the download */
560			fname = strrchr(argv[nres], '/');
561			if (fname == NULL)
562				fname = argv[nres];
563			else
564				fname++;
565			if (strlen(fname) == 0)
566				errx(1, "Cannot obtain file name from %s\n",
567				    argv[nres]);
568
569			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
570			if (fd == -1)
571				errx(1, "open(%s)", fname);
572		};
573
574		/* Read the message and send data to fd if appropriate */
575		if (chunked) {
576			/* Handle a chunked-encoded entity */
577
578			/* Read chunks */
579			do {
580				error = readln(sd, resbuf, &resbuflen,
581				    &resbufpos);
582				if (error)
583					goto conndied;
584				hln = resbuf + resbufpos;
585				eolp = strstr(hln, "\r\n");
586				resbufpos = (eolp - resbuf) + 2;
587
588				clen = 0;
589				while (isxdigit(*hln)) {
590					if (clen >= OFF_MAX / 16) {
591						/* Nasty people... */
592						goto conndied;
593					}
594					if (isdigit(*hln))
595						clen = clen * 16 + *hln - '0';
596					else
597						clen = clen * 16 + 10 +
598						    tolower(*hln) - 'a';
599					hln++;
600				}
601
602				error = copybytes(sd, fd, clen, resbuf,
603				    &resbuflen, &resbufpos);
604				if (error) {
605					goto conndied;
606				}
607			} while (clen != 0);
608
609			/* Read trailer and final CRLF */
610			do {
611				error = readln(sd, resbuf, &resbuflen,
612				    &resbufpos);
613				if (error)
614					goto conndied;
615				hln = resbuf + resbufpos;
616				eolp = strstr(hln, "\r\n");
617				resbufpos = (eolp - resbuf) + 2;
618			} while (hln != eolp);
619		} else if (contentlength != -1) {
620			error = copybytes(sd, fd, contentlength, resbuf,
621			    &resbuflen, &resbufpos);
622			if (error)
623				goto conndied;
624		} else {
625			/*
626			 * Not chunked, and no content length header.
627			 * Read everything until the server closes the
628			 * socket.
629			 */
630			error = copybytes(sd, fd, OFF_MAX, resbuf,
631			    &resbuflen, &resbufpos);
632			if (error == -1)
633				goto conndied;
634			pipelined = 0;
635		}
636
637		if (fd != -1) {
638			close(fd);
639			fd = -1;
640		}
641
642		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
643		    statuscode);
644		if (statuscode == 200)
645			fprintf(stderr, "OK\n");
646		else if (statuscode < 300)
647			fprintf(stderr, "Successful (ignored)\n");
648		else if (statuscode < 400)
649			fprintf(stderr, "Redirection (ignored)\n");
650		else
651			fprintf(stderr, "Error (ignored)\n");
652
653		/* We've finished this file! */
654		nres++;
655
656		/*
657		 * If necessary, clean up this connection so that we
658		 * can start a new one.
659		 */
660		if (pipelined == 0)
661			goto cleanupconn;
662		continue;
663
664conndied:
665		/*
666		 * Something went wrong -- our connection died, the server
667		 * sent us garbage, etc.  If this happened on the first
668		 * request we sent over this connection, give up.  Otherwise,
669		 * close this connection, open a new one, and reissue the
670		 * request.
671		 */
672		if (nres == firstreq)
673			errx(1, "Connection failure");
674
675cleanupconn:
676		/*
677		 * Clean up our connection and keep on going
678		 */
679		shutdown(sd, SHUT_RDWR);
680		close(sd);
681		sd = -1;
682		if (fd != -1) {
683			close(fd);
684			fd = -1;
685		}
686		if (reqbuf != NULL) {
687			free(reqbuf);
688			reqbuf = NULL;
689		}
690		nreq = nres;
691		res = res0;
692		pipelined = 0;
693		resbufpos = resbuflen = 0;
694		continue;
695	}
696
697	free(resbuf);
698	freeaddrinfo(res0);
699
700	return 0;
701}
702