1/*	$NetBSD: load_http.c,v 1.7 2012/02/15 17:55:06 riz Exp $	*/
2
3/*
4 * Copyright (C) 2006 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 *
8 * Id: load_http.c,v 1.1.2.2 2009/07/23 20:01:12 darrenr Exp
9 */
10
11#include "ipf.h"
12
13/*
14 * Because the URL can be included twice into the buffer, once as the
15 * full path for the "GET" and once as the "Host:", the buffer it is
16 * put in needs to be larger than 512*2 to make room for the supporting
17 * text. Why not just use snprintf and truncate? The warning about the
18 * URL being too long tells you something is wrong and does not fetch
19 * any data - just truncating the URL (with snprintf, etc) and sending
20 * that to the server is allowing an unknown and unintentioned action
21 * to happen.
22 */
23#define	MAX_URL_LEN	512
24#define	LOAD_BUFSIZE	(MAX_URL_LEN * 2 + 128)
25
26/*
27 * Format expected is one addres per line, at the start of each line.
28 */
29alist_t *
30load_http(char *url)
31{
32	char *s, *t, *u, buffer[LOAD_BUFSIZE], *myurl;
33	int fd, len, left, port, endhdr, removed;
34	alist_t *a, *rtop, *rbot;
35	struct sockaddr_in sin;
36	struct hostent *host;
37	size_t rem;
38
39	/*
40	 * More than this would just be absurd.
41	 */
42	if (strlen(url) > MAX_URL_LEN) {
43		fprintf(stderr, "load_http has a URL > %d bytes?!\n",
44			MAX_URL_LEN);
45		return NULL;
46	}
47
48	fd = -1;
49	rtop = NULL;
50	rbot = NULL;
51
52	myurl = strdup(url);
53	if (myurl == NULL)
54		goto done;
55
56	rem = sizeof(buffer);
57	left = snprintf(buffer, rem, "GET %s HTTP/1.0\r\n", url);
58	if (left < 0 || left > rem)
59		goto done;
60	rem -= left;
61
62	s = myurl + 7;			/* http:// */
63	t = strchr(s, '/');
64	if (t == NULL) {
65		fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
66		goto done;
67	}
68	*t++ = '\0';
69
70	/*
71	 * 10 is the length of 'Host: \r\n\r\n' below.
72	 */
73	if (strlen(s) + strlen(buffer) + 10 > sizeof(buffer)) {
74		fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
75		free(myurl);
76		return NULL;
77	}
78
79	u = strchr(s, '@');
80	if (u != NULL)
81		s = u + 1;		/* AUTH */
82
83	left = snprintf(buffer + left, rem, "Host: %s\r\n\r\n", s);
84	if (left < 0 || left > rem)
85		goto done;
86	rem -= left;
87
88	u = strchr(s, ':');
89	if (u != NULL) {
90		*u++ = '\0';
91		port = atoi(u);
92		if (port < 0 || port > 65535)
93			goto done;
94	} else {
95		port = 80;
96	}
97
98	memset(&sin, 0, sizeof(sin));
99	sin.sin_family = AF_INET;
100	sin.sin_port = htons(port);
101
102	if (isdigit((unsigned char)*s)) {
103		if (inet_aton(s, &sin.sin_addr) == -1) {
104			goto done;
105		}
106	} else {
107		host = gethostbyname(s);
108		if (host == NULL)
109			goto done;
110		memcpy(&sin.sin_addr, host->h_addr_list[0],
111		       sizeof(sin.sin_addr));
112	}
113
114	fd = socket(AF_INET, SOCK_STREAM, 0);
115	if (fd == -1)
116		goto done;
117
118	if (connect(fd, (struct sockaddr *)&sin, sizeof(sin)) == -1)
119		goto done;
120
121	len = strlen(buffer);
122	if (write(fd, buffer, len) != len)
123		goto done;
124
125	s = buffer;
126	endhdr = 0;
127	left = sizeof(buffer) - 1;
128
129	while ((len = read(fd, s, left)) > 0) {
130		s[len] = '\0';
131		left -= len;
132		s += len;
133
134		if (endhdr >= 0) {
135			if (endhdr == 0) {
136				t = strchr(buffer, ' ');
137				if (t == NULL)
138					continue;
139				t++;
140				if (*t != '2')
141					break;
142			}
143
144			u = buffer;
145			while ((t = strchr(u, '\r')) != NULL) {
146				if (t == u) {
147					if (*(t + 1) == '\n') {
148						u = t + 2;
149						endhdr = -1;
150						break;
151					} else
152						t++;
153				} else if (*(t + 1) == '\n') {
154					endhdr++;
155					u = t + 2;
156				} else
157					u = t + 1;
158			}
159			if (endhdr >= 0)
160				continue;
161			removed = (u - buffer) + 1;
162			memmove(buffer, u, (sizeof(buffer) - left) - removed);
163			s -= removed;
164			left += removed;
165		}
166
167		do {
168			t = strchr(buffer, '\n');
169			if (t == NULL)
170				break;
171
172			*t++ = '\0';
173			for (u = buffer; isdigit((unsigned char)*u) ||
174			    (*u == '.'); u++)
175				continue;
176			if (*u == '/') {
177				char *slash;
178
179				slash = u;
180				u++;
181				while (isdigit((unsigned char)*u))
182					u++;
183				if (!isspace((unsigned char)*u) && *u)
184					u = slash;
185			}
186			*u = '\0';
187
188			a = alist_new(4, buffer);
189			if (a != NULL) {
190				if (rbot != NULL)
191					rbot->al_next = a;
192				else
193					rtop = a;
194				rbot = a;
195			}
196
197			removed = t - buffer;
198			memmove(buffer, t, sizeof(buffer) - left - removed);
199			s -= removed;
200			left += removed;
201
202		} while (1);
203	}
204
205done:
206	if (myurl != NULL)
207		free(myurl);
208	if (fd != -1)
209		close(fd);
210	return rtop;
211}
212