phttpget.c revision 148880
1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 148880 2005-08-09 03:32:29Z cperciva $");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <sysexits.h>
44#include <unistd.h>
45
46static const char *	env_HTTP_PROXY;
47static const char *	env_HTTP_USER_AGENT;
48static const char *	proxyport;
49
50static struct timeval	timo = { 15, 0};
51
52static void
53usage(void)
54{
55
56	fprintf(stderr, "usage: phttpget server [file ...]\n");
57	exit(EX_USAGE);
58}
59
60static void
61readenv(void)
62{
63	char * p;
64
65	env_HTTP_PROXY = getenv("HTTP_PROXY");
66	if (env_HTTP_PROXY) {
67		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
68			env_HTTP_PROXY += 7;
69		p = strchr(env_HTTP_PROXY, '/');
70		if (p != NULL)
71			*p = 0;
72		p = strchr(env_HTTP_PROXY, ':');
73		if (p != NULL) {
74			*p = 0;
75			proxyport = p + 1;
76		} else
77			proxyport = "3128";
78	}
79
80	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
81	if (env_HTTP_USER_AGENT == NULL)
82		env_HTTP_USER_AGENT = "phttpget/0.1";
83}
84
85static int
86makerequest(char ** buf, char * path, char * server, int connclose)
87{
88	int buflen;
89
90	buflen = asprintf(buf,
91	    "GET %s%s/%s HTTP/1.1\r\n"
92	    "Host: %s\r\n"
93	    "User-Agent: %s\r\n"
94	    "%s"
95	    "\r\n",
96	    env_HTTP_PROXY ? "http://" : "",
97	    env_HTTP_PROXY ? server : "",
98	    path, server, env_HTTP_USER_AGENT,
99	    connclose ? "Connection: Close\r\n" : "");
100	if (buflen == -1)
101		err(1, "asprintf");
102	return(buflen);
103}
104
105static int
106readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
107{
108	ssize_t len;
109
110	while (strnstr(resbuf + *resbufpos, "\r\n",
111	    *resbuflen - *resbufpos) == NULL) {
112		/* Move buffered data to the start of the buffer */
113		if (*resbufpos != 0) {
114			memmove(resbuf, resbuf + *resbufpos,
115			    *resbuflen - *resbufpos);
116			*resbuflen -= *resbufpos;
117			*resbufpos = 0;
118		}
119
120		/* If the buffer is full, complain */
121		if (*resbuflen == BUFSIZ)
122			return -1;
123
124		/* Read more data into the buffer */
125		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
126		if ((len == -1) && (errno != EINTR))
127			return -1;
128
129		if (len != -1)
130			*resbuflen += len;
131	}
132
133	return 0;
134}
135
136static int
137copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
138    int * resbufpos)
139{
140	ssize_t len;
141
142	while (copylen) {
143		/* Write data from resbuf to fd */
144		len = *resbuflen - *resbufpos;
145		if (copylen < len)
146			len = copylen;
147		if (len > 0) {
148			if (fd != -1)
149				len = write(fd, resbuf + *resbufpos, len);
150			if (len == -1)
151				err(1, "write");
152			*resbufpos += len;
153			copylen -= len;
154			continue;
155		}
156
157		/* Read more data into buffer */
158		len = recv(sd, resbuf, BUFSIZ, 0);
159		if (len == -1) {
160			if (errno == EINTR)
161				continue;
162			return -1;
163		} else if (len == 0) {
164			return -2;
165		} else {
166			*resbuflen = len;
167			*resbufpos = 0;
168		}
169	}
170
171	return 0;
172}
173
174int
175main(int argc, char *argv[])
176{
177	struct addrinfo hints;	/* Hints to getaddrinfo */
178	struct addrinfo *res;	/* Pointer to server address being used */
179	struct addrinfo *res0;	/* Pointer to server addresses */
180	char * resbuf = NULL;	/* Response buffer */
181	int resbufpos = 0;	/* Response buffer position */
182	int resbuflen = 0;	/* Response buffer length */
183	char * eolp;		/* Pointer to "\r\n" within resbuf */
184	char * hln0;		/* Pointer to start of header line */
185	char * hln;		/* Pointer within header line */
186	char * servername;	/* Name of server */
187	char * fname = NULL;	/* Name of downloaded file */
188	char * reqbuf = NULL;	/* Request buffer */
189	int reqbufpos = 0;	/* Request buffer position */
190	int reqbuflen = 0;	/* Request buffer length */
191	ssize_t len;		/* Length sent or received */
192	int nreq = 0;		/* Number of next request to send */
193	int nres = 0;		/* Number of next reply to receive */
194	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
195	int sd = -1;		/* Socket descriptor */
196	int sdflags = 0;	/* Flags on the socket sd */
197	int fd = -1;		/* Descriptor for download target file */
198	int error;		/* Error code */
199	int statuscode;		/* HTTP Status code */
200	off_t contentlength;	/* Value from Content-Length header */
201	int chunked;		/* != if transfer-encoding is chunked */
202	off_t clen;		/* Chunk length */
203	int firstreq = 0;	/* # of first request for this connection */
204
205	/* Check that the arguments are sensible */
206	if (argc < 2)
207		usage();
208
209	/* Read important environment variables */
210	readenv();
211
212	/* Get server name and adjust arg[cv] to point at file names */
213	servername = argv[1];
214	argv += 2;
215	argc -= 2;
216
217	/* Allocate response buffer */
218	resbuf = malloc(BUFSIZ);
219	if (resbuf == NULL)
220		err(1, "malloc");
221
222	/* Look up server */
223	memset(&hints, 0, sizeof(hints));
224	hints.ai_family = PF_UNSPEC;
225	hints.ai_socktype = SOCK_STREAM;
226	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
227	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
228	if (error)
229		errx(1, "%s: host = %s, port = %s",
230		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
231		    env_HTTP_PROXY ? proxyport : "http",
232		    gai_strerror(error));
233	if (res0 == NULL)
234		errx(1, "could not look up %s", servername);
235	res = res0;
236
237	/* Do the fetching */
238	while (nres < argc) {
239		/* Make sure we have a connected socket */
240		for (; sd == -1; res = res->ai_next) {
241			/* No addresses left to try :-( */
242			if (res == NULL)
243				errx(1, "Could not connect to %s", servername);
244
245			/* Create a socket... */
246			sd = socket(res->ai_family, res->ai_socktype,
247			    res->ai_protocol);
248			if (sd == -1)
249				continue;
250
251			/* ... set 15-second timeouts ... */
252			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
253			    (void *)&timo, (socklen_t)sizeof(timo));
254			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
255			    (void *)&timo, (socklen_t)sizeof(timo));
256
257			/* ... and connect to the server. */
258			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
259				close(sd);
260				sd = -1;
261				continue;
262			}
263
264			firstreq = nres;
265		}
266
267		/*
268		 * If in pipelined HTTP mode, put socket into non-blocking
269		 * mode, since we're probably going to want to try to send
270		 * several HTTP requests.
271		 */
272		if (pipelined) {
273			sdflags = fcntl(sd, F_GETFL);
274			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
275				err(1, "fcntl");
276		}
277
278		/* Construct requests and/or send them without blocking */
279		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
280			/* If not in the middle of a request, make one */
281			if (reqbuf == NULL) {
282				reqbuflen = makerequest(&reqbuf, argv[nreq],
283				    servername, (nreq == argc - 1));
284				reqbufpos = 0;
285			}
286
287			/* If in pipelined mode, try to send the request */
288			if (pipelined) {
289				while (reqbufpos < reqbuflen) {
290					len = send(sd, reqbuf + reqbufpos,
291					    reqbuflen - reqbufpos, 0);
292					if (len == -1)
293						break;
294					reqbufpos += len;
295				}
296				if (reqbufpos < reqbuflen) {
297					if (errno != EAGAIN)
298						goto conndied;
299					break;
300				} else {
301					free(reqbuf);
302					reqbuf = NULL;
303					nreq++;
304				}
305			}
306		}
307
308		/* Put connection back into blocking mode */
309		if (pipelined) {
310			if (fcntl(sd, F_SETFL, sdflags) == -1)
311				err(1, "fcntl");
312		}
313
314		/* Do we need to blocking-send a request? */
315		if (nres == nreq) {
316			while (reqbufpos < reqbuflen) {
317				len = send(sd, reqbuf + reqbufpos,
318				    reqbuflen - reqbufpos, 0);
319				if (len == -1)
320					goto conndied;
321				reqbufpos += len;
322			}
323			free(reqbuf);
324			reqbuf = NULL;
325			nreq++;
326		}
327
328		/* Scan through the response processing headers. */
329		statuscode = 0;
330		contentlength = -1;
331		chunked = 0;
332		do {
333			/* Get a header line */
334			error = readln(sd, resbuf, &resbuflen, &resbufpos);
335			if (error)
336				goto conndied;
337			hln0 = hln = resbuf + resbufpos;
338			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
339			resbufpos = (eolp - resbuf) + 2;
340			*eolp = '\0';
341
342			/* Make sure it doesn't contain a NUL character */
343			if (strchr(hln, '\0') != eolp)
344				goto conndied;
345
346			if (statuscode == 0) {
347				/* The first line MUST be HTTP/1.x xxx ... */
348				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
349				    ! isdigit(hln[7]))
350					goto conndied;
351
352				/*
353				 * If the minor version number isn't zero,
354				 * then we can assume that pipelining our
355				 * requests is OK -- as long as we don't
356				 * see a "Connection: close" line later
357				 * and we either have a Content-Length or
358				 * Transfer-Encoding: chunked header to
359				 * tell us the length.
360				 */
361				if (hln[7] != '0')
362					pipelined = 1;
363
364				/* Skip over the minor version number */
365				hln = strchr(hln + 7, ' ');
366				if (hln == NULL)
367					goto conndied;
368				else
369					hln++;
370
371				/* Read the status code */
372				while (isdigit(*hln)) {
373					statuscode = statuscode * 10 +
374					    *hln - '0';
375					hln++;
376				}
377
378				if (statuscode < 100 || statuscode > 599)
379					goto conndied;
380
381				/* Ignore the rest of the line */
382				continue;
383			}
384
385			/* Check for "Connection: close" header */
386			if (strncmp(hln, "Connection:", 11) == 0) {
387				hln += 11;
388				if (strstr(hln, "close") != NULL)
389					pipelined = 0;
390
391				/* Next header... */
392				continue;
393			}
394
395			/* Check for "Content-Length:" header */
396			if (strncmp(hln, "Content-Length:", 15) == 0) {
397				hln += 15;
398				contentlength = 0;
399
400				/* Find the start of the length */
401				while (!isdigit(*hln) && (*hln != '\0'))
402					hln++;
403
404				/* Compute the length */
405				while (isdigit(*hln)) {
406					if (contentlength > INT_MAX / 10) {
407						/* Nasty people... */
408						goto conndied;
409					}
410					contentlength = contentlength * 10 +
411					    *hln - '0';
412					hln++;
413				}
414
415				/* Next header... */
416				continue;
417			}
418
419			/* Check for "Transfer-Encoding: chunked" header */
420			if (strncmp(hln, "Transfer-Encoding:", 18) == 0) {
421				hln += 18;
422				if (strstr(hln, "chunked") != NULL)
423					chunked = 1;
424
425				/* Next header... */
426				continue;
427			}
428
429			/* We blithely ignore any other header lines */
430
431			/* No more header lines */
432			if (strlen(hln) == 0) {
433				/*
434				 * If the status code was 1xx, then there will
435				 * be a real header later.  Servers may emit
436				 * 1xx header blocks at will, but since we
437				 * don't expect one, we should just ignore it.
438				 */
439				if (100 <= statuscode && statuscode <= 199) {
440					statuscode = 0;
441					continue;
442				}
443
444				/* End of header; message body follows */
445				break;
446			}
447		} while (1);
448
449		/* No message body for 204 or 304 */
450		if (statuscode == 204 || statuscode == 304) {
451			nres++;
452			continue;
453		}
454
455		/*
456		 * There should be a message body coming, but we only want
457		 * to send it to a file if the status code is 200
458		 */
459		if (statuscode == 200) {
460			/* Generate a file name for the download */
461			fname = strrchr(argv[nres], '/');
462			if (fname == NULL)
463				fname = argv[nres];
464			else
465				fname++;
466			if (strlen(fname) == 0)
467				errx(1, "Cannot obtain file name from %s\n",
468				    argv[nres]);
469
470			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
471			if (fd == -1)
472				errx(1, "open(%s)", fname);
473		};
474
475		/* Read the message and send data to fd if appropriate */
476		if (chunked) {
477			/* Handle a chunked-encoded entity */
478
479			/* Read chunks */
480			do {
481				error = readln(sd, resbuf, &resbuflen,
482				    &resbufpos);
483				if (error)
484					goto conndied;
485				hln = resbuf + resbufpos;
486				eolp = strstr(hln, "\r\n");
487				resbufpos = (eolp - resbuf) + 2;
488
489				clen = 0;
490				while (isxdigit(*hln)) {
491					if (clen > INT_MAX / 16) {
492						/* Nasty people... */
493						goto conndied;
494					}
495					if (isdigit(*hln))
496						clen = clen * 16 + *hln - '0';
497					else
498						clen = clen * 16 + 10 +
499						    tolower(*hln) - 'a';
500					hln++;
501				}
502
503				error = copybytes(sd, fd, clen, resbuf,
504				    &resbuflen, &resbufpos);
505				if (error) {
506					goto conndied;
507				}
508			} while (clen != 0);
509
510			/* Read trailer and final CRLF */
511			do {
512				error = readln(sd, resbuf, &resbuflen,
513				    &resbufpos);
514				if (error)
515					goto conndied;
516				hln = resbuf + resbufpos;
517				eolp = strstr(hln, "\r\n");
518				resbufpos = (eolp - resbuf) + 2;
519			} while (hln != eolp);
520		} else if (contentlength != -1) {
521			error = copybytes(sd, fd, contentlength, resbuf,
522			    &resbuflen, &resbufpos);
523			if (error)
524				goto conndied;
525		} else {
526			/*
527			 * Not chunked, and no content length header.
528			 * Read everything until the server closes the
529			 * socket.
530			 */
531			error = copybytes(sd, fd, INT_MAX, resbuf,
532			    &resbuflen, &resbufpos);
533			if (error == -1)
534				goto conndied;
535			pipelined = 0;
536		}
537
538		if (fd != -1) {
539			close(fd);
540			fd = -1;
541		}
542
543		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
544		    statuscode);
545		if (statuscode == 200)
546			fprintf(stderr, "OK\n");
547		else if (statuscode < 300)
548			fprintf(stderr, "Successful (ignored)\n");
549		else if (statuscode < 400)
550			fprintf(stderr, "Redirection (ignored)\n");
551		else
552			fprintf(stderr, "Error (ignored)\n");
553
554		/* We've finished this file! */
555		nres++;
556
557		/*
558		 * If necessary, clean up this connection so that we
559		 * can start a new one.
560		 */
561		if (pipelined == 0)
562			goto cleanupconn;
563		continue;
564
565conndied:
566		/*
567		 * Something went wrong -- our connection died, the server
568		 * sent us garbage, etc.  If this happened on the first
569		 * request we sent over this connection, give up.  Otherwise,
570		 * close this connection, open a new one, and reissue the
571		 * request.
572		 */
573		if (nres == firstreq)
574			errx(1, "Connection failure");
575
576cleanupconn:
577		/*
578		 * Clean up our connection and keep on going
579		 */
580		shutdown(sd, SHUT_RDWR);
581		close(sd);
582		sd = -1;
583		if (fd != -1) {
584			close(fd);
585			fd = -1;
586		}
587		if (reqbuf != NULL) {
588			free(reqbuf);
589			reqbuf = NULL;
590		}
591		nreq = nres;
592		res = res0;
593		pipelined = 0;
594		resbufpos = resbuflen = 0;
595		continue;
596	}
597
598	free(resbuf);
599	freeaddrinfo(res0);
600
601	return 0;
602}
603