phttpget.c revision 148871
1/*-
2 * Copyright 2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/usr.sbin/portsnap/phttpget/phttpget.c 148871 2005-08-08 20:10:06Z cperciva $");
29
30#include <sys/types.h>
31#include <sys/time.h>
32#include <sys/socket.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <errno.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <netdb.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <sysexits.h>
44#include <unistd.h>
45
46static const char *	env_HTTP_PROXY;
47static const char *	env_HTTP_USER_AGENT;
48static const char *	proxyport;
49
50static struct timeval	timo = { 15, 0};
51
52static void
53usage(void)
54{
55
56	fprintf(stderr, "usage: phttpget server [file ...]\n");
57	exit(EX_USAGE);
58}
59
60static void
61readenv(void)
62{
63	char * p;
64
65	env_HTTP_PROXY = getenv("HTTP_PROXY");
66	if (env_HTTP_PROXY) {
67		if (strncmp(env_HTTP_PROXY, "http://", 7) == 0)
68			env_HTTP_PROXY += 7;
69		p = strchr(env_HTTP_PROXY, ':');
70		if (p != NULL) {
71			*p = 0;
72			proxyport = p + 1;
73		} else
74			proxyport = "3128";
75	}
76
77	env_HTTP_USER_AGENT = getenv("HTTP_USER_AGENT");
78	if (env_HTTP_USER_AGENT == NULL)
79		env_HTTP_USER_AGENT = "phttpget/0.1";
80}
81
82static int
83makerequest(char ** buf, char * path, char * server, int connclose)
84{
85	int buflen;
86
87	buflen = asprintf(buf,
88	    "GET %s%s/%s HTTP/1.1\r\n"
89	    "Host: %s\r\n"
90	    "User-Agent: %s\r\n"
91	    "%s"
92	    "\r\n",
93	    env_HTTP_PROXY ? "http://" : "",
94	    env_HTTP_PROXY ? server : "",
95	    path, server, env_HTTP_USER_AGENT,
96	    connclose ? "Connection: Close\r\n" : "");
97	if (buflen == -1)
98		err(1, "asprintf");
99	return(buflen);
100}
101
102static int
103readln(int sd, char * resbuf, int * resbuflen, int * resbufpos)
104{
105	ssize_t len;
106
107	while (strnstr(resbuf + *resbufpos, "\r\n",
108	    *resbuflen - *resbufpos) == NULL) {
109		/* Move buffered data to the start of the buffer */
110		if (*resbufpos != 0) {
111			memmove(resbuf, resbuf + *resbufpos,
112			    *resbuflen - *resbufpos);
113			*resbuflen -= *resbufpos;
114			*resbufpos = 0;
115		}
116
117		/* If the buffer is full, complain */
118		if (*resbuflen == BUFSIZ)
119			return -1;
120
121		/* Read more data into the buffer */
122		len = recv(sd, resbuf + *resbuflen, BUFSIZ - *resbuflen, 0);
123		if ((len == -1) && (errno != EINTR))
124			return -1;
125
126		if (len != -1)
127			*resbuflen += len;
128	}
129
130	return 0;
131}
132
133static int
134copybytes(int sd, int fd, off_t copylen, char * resbuf, int * resbuflen,
135    int * resbufpos)
136{
137	ssize_t len;
138
139	while (copylen) {
140		/* Write data from resbuf to fd */
141		len = *resbuflen - *resbufpos;
142		if (copylen < len)
143			len = copylen;
144		if (len > 0) {
145			if (fd != -1)
146				len = write(fd, resbuf + *resbufpos, len);
147			if (len == -1)
148				err(1, "write");
149			*resbufpos += len;
150			copylen -= len;
151			continue;
152		}
153
154		/* Read more data into buffer */
155		len = recv(sd, resbuf, BUFSIZ, 0);
156		if (len == -1) {
157			if (errno == EINTR)
158				continue;
159			return -1;
160		} else if (len == 0) {
161			return -2;
162		} else {
163			*resbuflen = len;
164			*resbufpos = 0;
165		}
166	}
167
168	return 0;
169}
170
171int
172main(int argc, char *argv[])
173{
174	struct addrinfo hints;	/* Hints to getaddrinfo */
175	struct addrinfo *res;	/* Pointer to server address being used */
176	struct addrinfo *res0;	/* Pointer to server addresses */
177	char * resbuf = NULL;	/* Response buffer */
178	int resbufpos = 0;	/* Response buffer position */
179	int resbuflen = 0;	/* Response buffer length */
180	char * eolp;		/* Pointer to "\r\n" within resbuf */
181	char * hln0;		/* Pointer to start of header line */
182	char * hln;		/* Pointer within header line */
183	char * servername;	/* Name of server */
184	char * fname = NULL;	/* Name of downloaded file */
185	char * reqbuf = NULL;	/* Request buffer */
186	int reqbufpos = 0;	/* Request buffer position */
187	int reqbuflen = 0;	/* Request buffer length */
188	ssize_t len;		/* Length sent or received */
189	int nreq = 0;		/* Number of next request to send */
190	int nres = 0;		/* Number of next reply to receive */
191	int pipelined = 0;	/* != 0 if connection in pipelined mode. */
192	int sd = -1;		/* Socket descriptor */
193	int sdflags = 0;	/* Flags on the socket sd */
194	int fd = -1;		/* Descriptor for download target file */
195	int error;		/* Error code */
196	int statuscode;		/* HTTP Status code */
197	off_t contentlength;	/* Value from Content-Length header */
198	int chunked;		/* != if transfer-encoding is chunked */
199	off_t clen;		/* Chunk length */
200	int firstreq = 0;	/* # of first request for this connection */
201
202	/* Check that the arguments are sensible */
203	if (argc < 2)
204		usage();
205
206	/* Read important environment variables */
207	readenv();
208
209	/* Get server name and adjust arg[cv] to point at file names */
210	servername = argv[1];
211	argv += 2;
212	argc -= 2;
213
214	/* Allocate response buffer */
215	resbuf = malloc(BUFSIZ);
216	if (resbuf == NULL)
217		err(1, "malloc");
218
219	/* Look up server */
220	memset(&hints, 0, sizeof(hints));
221	hints.ai_family = PF_UNSPEC;
222	hints.ai_socktype = SOCK_STREAM;
223	error = getaddrinfo(env_HTTP_PROXY ? env_HTTP_PROXY : servername,
224	    env_HTTP_PROXY ? proxyport : "http", &hints, &res0);
225	if (error)
226		errx(1, "%s: %s",
227		    env_HTTP_PROXY ? env_HTTP_PROXY : servername,
228		    gai_strerror(error));
229	if (res0 == NULL)
230		errx(1, "could not look up %s", servername);
231	res = res0;
232
233	/* Do the fetching */
234	while (nres < argc) {
235		/* Make sure we have a connected socket */
236		for (; sd == -1; res = res->ai_next) {
237			/* No addresses left to try :-( */
238			if (res == NULL)
239				errx(1, "Could not connect to %s", servername);
240
241			/* Create a socket... */
242			sd = socket(res->ai_family, res->ai_socktype,
243			    res->ai_protocol);
244			if (sd == -1)
245				continue;
246
247			/* ... set 15-second timeouts ... */
248			setsockopt(sd, SOL_SOCKET, SO_SNDTIMEO,
249			    (void *)&timo, (socklen_t)sizeof(timo));
250			setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO,
251			    (void *)&timo, (socklen_t)sizeof(timo));
252
253			/* ... and connect to the server. */
254			if(connect(sd, res->ai_addr, res->ai_addrlen)) {
255				close(sd);
256				sd = -1;
257				continue;
258			}
259
260			firstreq = nres;
261		}
262
263		/*
264		 * If in pipelined HTTP mode, put socket into non-blocking
265		 * mode, since we're probably going to want to try to send
266		 * several HTTP requests.
267		 */
268		if (pipelined) {
269			sdflags = fcntl(sd, F_GETFL);
270			if (fcntl(sd, F_SETFL, sdflags | O_NONBLOCK) == -1)
271				err(1, "fcntl");
272		}
273
274		/* Construct requests and/or send them without blocking */
275		while ((nreq < argc) && ((reqbuf == NULL) || pipelined)) {
276			/* If not in the middle of a request, make one */
277			if (reqbuf == NULL) {
278				reqbuflen = makerequest(&reqbuf, argv[nreq],
279				    servername, (nreq == argc - 1));
280				reqbufpos = 0;
281			}
282
283			/* If in pipelined mode, try to send the request */
284			if (pipelined) {
285				while (reqbufpos < reqbuflen) {
286					len = send(sd, reqbuf + reqbufpos,
287					    reqbuflen - reqbufpos, 0);
288					if (len == -1)
289						break;
290					reqbufpos += len;
291				}
292				if (reqbufpos < reqbuflen) {
293					if (errno != EAGAIN)
294						goto conndied;
295					break;
296				} else {
297					free(reqbuf);
298					reqbuf = NULL;
299					nreq++;
300				}
301			}
302		}
303
304		/* Put connection back into blocking mode */
305		if (pipelined) {
306			if (fcntl(sd, F_SETFL, sdflags) == -1)
307				err(1, "fcntl");
308		}
309
310		/* Do we need to blocking-send a request? */
311		if (nres == nreq) {
312			while (reqbufpos < reqbuflen) {
313				len = send(sd, reqbuf + reqbufpos,
314				    reqbuflen - reqbufpos, 0);
315				if (len == -1)
316					goto conndied;
317				reqbufpos += len;
318			}
319			free(reqbuf);
320			reqbuf = NULL;
321			nreq++;
322		}
323
324		/* Scan through the response processing headers. */
325		statuscode = 0;
326		contentlength = -1;
327		chunked = 0;
328		do {
329			/* Get a header line */
330			error = readln(sd, resbuf, &resbuflen, &resbufpos);
331			if (error)
332				goto conndied;
333			hln0 = hln = resbuf + resbufpos;
334			eolp = strnstr(hln, "\r\n", resbuflen - resbufpos);
335			resbufpos = (eolp - resbuf) + 2;
336			*eolp = '\0';
337
338			/* Make sure it doesn't contain a NUL character */
339			if (strchr(hln, '\0') != eolp)
340				goto conndied;
341
342			if (statuscode == 0) {
343				/* The first line MUST be HTTP/1.x xxx ... */
344				if ((strncmp(hln, "HTTP/1.", 7) != 0) ||
345				    ! isdigit(hln[7]))
346					goto conndied;
347
348				/*
349				 * If the minor version number isn't zero,
350				 * then we can assume that pipelining our
351				 * requests is OK -- as long as we don't
352				 * see a "Connection: close" line later
353				 * and we either have a Content-Length or
354				 * Transfer-Encoding: chunked header to
355				 * tell us the length.
356				 */
357				if (hln[7] != '0')
358					pipelined = 1;
359
360				/* Skip over the minor version number */
361				hln = strchr(hln + 7, ' ');
362				if (hln == NULL)
363					goto conndied;
364				else
365					hln++;
366
367				/* Read the status code */
368				while (isdigit(*hln)) {
369					statuscode = statuscode * 10 +
370					    *hln - '0';
371					hln++;
372				}
373
374				if (statuscode < 100 || statuscode > 599)
375					goto conndied;
376
377				/* Ignore the rest of the line */
378				continue;
379			}
380
381			/* Check for "Connection: close" header */
382			if (strncmp(hln, "Connection:", 11) == 0) {
383				hln += 11;
384				if (strstr(hln, "close") != NULL)
385					pipelined = 0;
386
387				/* Next header... */
388				continue;
389			}
390
391			/* Check for "Content-Length:" header */
392			if (strncmp(hln, "Content-Length:", 15) == 0) {
393				hln += 15;
394				contentlength = 0;
395
396				/* Find the start of the length */
397				while (!isdigit(*hln) && (*hln != '\0'))
398					hln++;
399
400				/* Compute the length */
401				while (isdigit(*hln)) {
402					if (contentlength > INT_MAX / 10) {
403						/* Nasty people... */
404						goto conndied;
405					}
406					contentlength = contentlength * 10 +
407					    *hln - '0';
408					hln++;
409				}
410
411				/* Next header... */
412				continue;
413			}
414
415			/* Check for "Transfer-Encoding: chunked" header */
416			if (strncmp(hln, "Transfer-Encoding:", 18) == 0) {
417				hln += 18;
418				if (strstr(hln, "chunked") != NULL)
419					chunked = 1;
420
421				/* Next header... */
422				continue;
423			}
424
425			/* We blithely ignore any other header lines */
426
427			/* No more header lines */
428			if (strlen(hln) == 0) {
429				/*
430				 * If the status code was 1xx, then there will
431				 * be a real header later.  Servers may emit
432				 * 1xx header blocks at will, but since we
433				 * don't expect one, we should just ignore it.
434				 */
435				if (100 <= statuscode && statuscode <= 199) {
436					statuscode = 0;
437					continue;
438				}
439
440				/* End of header; message body follows */
441				break;
442			}
443		} while (1);
444
445		/* No message body for 204 or 304 */
446		if (statuscode == 204 || statuscode == 304) {
447			nres++;
448			continue;
449		}
450
451		/*
452		 * There should be a message body coming, but we only want
453		 * to send it to a file if the status code is 200
454		 */
455		if (statuscode == 200) {
456			/* Generate a file name for the download */
457			fname = strrchr(argv[nres], '/');
458			if (fname == NULL)
459				fname = argv[nres];
460			else
461				fname++;
462			if (strlen(fname) == 0)
463				errx(1, "Cannot obtain file name from %s\n",
464				    argv[nres]);
465
466			fd = open(fname, O_CREAT | O_TRUNC | O_WRONLY, 0644);
467			if (fd == -1)
468				errx(1, "open(%s)", fname);
469		};
470
471		/* Read the message and send data to fd if appropriate */
472		if (chunked) {
473			/* Handle a chunked-encoded entity */
474
475			/* Read chunks */
476			do {
477				error = readln(sd, resbuf, &resbuflen,
478				    &resbufpos);
479				if (error)
480					goto conndied;
481				hln = resbuf + resbufpos;
482				eolp = strstr(hln, "\r\n");
483				resbufpos = (eolp - resbuf) + 2;
484
485				clen = 0;
486				while (isxdigit(*hln)) {
487					if (clen > INT_MAX / 16) {
488						/* Nasty people... */
489						goto conndied;
490					}
491					if (isdigit(*hln))
492						clen = clen * 16 + *hln - '0';
493					else
494						clen = clen * 16 + 10 +
495						    tolower(*hln) - 'a';
496					hln++;
497				}
498
499				error = copybytes(sd, fd, clen, resbuf,
500				    &resbuflen, &resbufpos);
501				if (error) {
502					goto conndied;
503				}
504			} while (clen != 0);
505
506			/* Read trailer and final CRLF */
507			do {
508				error = readln(sd, resbuf, &resbuflen,
509				    &resbufpos);
510				if (error)
511					goto conndied;
512				hln = resbuf + resbufpos;
513				eolp = strstr(hln, "\r\n");
514				resbufpos = (eolp - resbuf) + 2;
515			} while (hln != eolp);
516		} else if (contentlength != -1) {
517			error = copybytes(sd, fd, contentlength, resbuf,
518			    &resbuflen, &resbufpos);
519			if (error)
520				goto conndied;
521		} else {
522			/*
523			 * Not chunked, and no content length header.
524			 * Read everything until the server closes the
525			 * socket.
526			 */
527			error = copybytes(sd, fd, INT_MAX, resbuf,
528			    &resbuflen, &resbufpos);
529			if (error == -1)
530				goto conndied;
531			pipelined = 0;
532		}
533
534		if (fd != -1) {
535			close(fd);
536			fd = -1;
537		}
538
539		fprintf(stderr, "http://%s/%s: %d ", servername, argv[nres],
540		    statuscode);
541		if (statuscode == 200)
542			fprintf(stderr, "OK\n");
543		else if (statuscode < 300)
544			fprintf(stderr, "Successful (ignored)\n");
545		else if (statuscode < 400)
546			fprintf(stderr, "Redirection (ignored)\n");
547		else
548			fprintf(stderr, "Error (ignored)\n");
549
550		/* We've finished this file! */
551		nres++;
552
553		/*
554		 * If necessary, clean up this connection so that we
555		 * can start a new one.
556		 */
557		if (pipelined == 0)
558			goto cleanupconn;
559		continue;
560
561conndied:
562		/*
563		 * Something went wrong -- our connection died, the server
564		 * sent us garbage, etc.  If this happened on the first
565		 * request we sent over this connection, give up.  Otherwise,
566		 * close this connection, open a new one, and reissue the
567		 * request.
568		 */
569		if (nres == firstreq)
570			errx(1, "Connection failure");
571
572cleanupconn:
573		/*
574		 * Clean up our connection and keep on going
575		 */
576		shutdown(sd, SHUT_RDWR);
577		close(sd);
578		sd = -1;
579		if (fd != -1) {
580			close(fd);
581			fd = -1;
582		}
583		if (reqbuf != NULL) {
584			free(reqbuf);
585			reqbuf = NULL;
586		}
587		nreq = nres;
588		res = res0;
589		pipelined = 0;
590		resbufpos = resbuflen = 0;
591		continue;
592	}
593
594	free(resbuf);
595	freeaddrinfo(res0);
596
597	return 0;
598}
599