1/* Kernel module to match a string into a packet.
2 *
3 * Copyright (C) 2000 Emmanuel Roger  <winfield@freegates.be>
4 *
5 * ChangeLog
6 *	19.02.2002: Gianni Tedesco <gianni@ecsc.co.uk>
7 *		Fixed SMP re-entrancy problem using per-cpu data areas
8 *		for the skip/shift tables.
9 *	02.05.2001: Gianni Tedesco <gianni@ecsc.co.uk>
10 *		Fixed kernel panic, due to overrunning boyer moore string
11 *		tables. Also slightly tweaked heuristic for deciding what
12 * 		search algo to use.
13 * 	27.01.2001: Gianni Tedesco <gianni@ecsc.co.uk>
14 * 		Implemented Boyer Moore Sublinear search algorithm
15 * 		alongside the existing linear search based on memcmp().
16 * 		Also a quick check to decide which method to use on a per
17 * 		packet basis.
18 */
19
20/* Kernel module to match a http header string into a packet.
21 * Description:
22 *   This is kernel module for web content inspection. It was derived from
23 *   'string' match module, declared as above.
24 *
25 *   The module follows the Netfilter framework, called extended packet
26 *   matching modules.
27 */
28
29#include <linux/module.h>
30#include <linux/skbuff.h>
31#include <net/sock.h>
32
33#include <linux/netfilter_ipv4/ip_tables.h>
34#include <linux/netfilter_ipv4/ipt_webstr.h>
35
36#define	isdigit(x) ((x) >= '0' && (x) <= '9')
37#define	isupper(x) (((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z'))
38#define	islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
39#define	isalpha(x) (isupper(x) || islower(x))
40#define	toupper(x) (isupper(x) ? (x) : (x) - 'a' + 'A')
41#define tolower(x) (isupper(x) ? ((x) - 'A' + 'a') : (x))
42
43#define split(word, wordlist, next, delim) \
44    for (next = wordlist, \
45	strncpy(word, next, sizeof(word)), \
46	word[(next=strstr(next, delim)) ? strstr(word, delim) - word : sizeof(word) - 1] = '\0', \
47	next = next ? next + sizeof(delim) - 1 : NULL ; \
48	strlen(word); \
49	next = next ? : "", \
50	strncpy(word, next, sizeof(word)), \
51	word[(next=strstr(next, delim)) ? strstr(word, delim) - word : sizeof(word) - 1] = '\0', \
52	next = next ? next + sizeof(delim) - 1 : NULL)
53
54#define BUFSIZE 	1024
55
56/* Flags for get_http_info() */
57#define HTTP_HOST	0x01
58#define HTTP_URL	0x02
59/* Flags for mangle_http_header() */
60#define HTTP_COOKIE	0x04
61
62#if 0
63#define SPARQ_LOG       printk
64#else
65#define SPARQ_LOG(format, args...)
66#endif
67
68typedef struct httpinfo {
69    char host[BUFSIZE + 1];
70    int hostlen;
71    char url[BUFSIZE + 1];
72    int urllen;
73} httpinfo_t;
74
75/* Return 1 for match, 0 for accept, -1 for partial. */
76static int find_pattern2(const char *data, size_t dlen,
77	const char *pattern, size_t plen,
78	char term,
79	unsigned int *numoff,
80	unsigned int *numlen)
81{
82    size_t i, j, k;
83    int state = 0;
84    *numoff = *numlen = 0;
85
86    SPARQ_LOG("%s: pattern = '%s', dlen = %u\n",__FUNCTION__, pattern, dlen);
87    if (dlen == 0)
88	return 0;
89
90    if (dlen <= plen) {	/* Short packet: try for partial? */
91	if (strnicmp(data, pattern, dlen) == 0)
92	    return -1;
93	else
94	    return 0;
95    }
96    for (i = 0; i <= (dlen - plen); i++) {
97	/* DFA : \r\n\r\n :: 1234 */
98	if (*(data + i) == '\r') {
99	    if (!(state % 2)) state++;	/* forwarding move */
100	    else state = 0;		/* reset */
101	}
102	else if (*(data + i) == '\n') {
103	    if (state % 2) state++;
104	    else state = 0;
105	}
106	else state = 0;
107
108	if (state >= 4)
109	    break;
110
111	/* pattern compare */
112	if (memcmp(data + i, pattern, plen ) != 0)
113	    continue;
114
115	/* Here, it means patten match!! */
116	*numoff=i + plen;
117	for (j = *numoff, k = 0; data[j] != term; j++, k++)
118	    if (j > dlen) return -1 ;	/* no terminal char */
119
120	*numlen = k;
121	return 1;
122    }
123    return 0;
124}
125
126static int mangle_http_header(const struct sk_buff *skb, int flags)
127{
128    struct iphdr *iph = (skb)->nh.iph;
129    struct tcphdr *tcph = (void *)iph + iph->ihl*4;
130    unsigned char *data = (void *)tcph + tcph->doff*4;
131    unsigned int datalen = (skb)->len - (iph->ihl*4) - (tcph->doff*4);
132
133    int found, offset, len;
134    int ret = 0;
135
136    SPARQ_LOG("%s: seq=%u\n", __FUNCTION__, ntohl(tcph->seq));
137
138    /* Basic checking, is it HTTP packet? */
139    if (datalen < 10)
140	return ret;	/* Not enough length, ignore it */
141    if (memcmp(data, "GET ", sizeof("GET ") - 1) != 0 &&
142        memcmp(data, "POST ", sizeof("POST ") - 1) != 0)
143	return ret;	/* Pass it */
144
145    /* COOKIE modification */
146    if (flags & HTTP_COOKIE) {
147	found = find_pattern2(data, datalen, "Cookie: ",
148		sizeof("Cookie: ")-1, '\r', &offset, &len);
149	if (found) {
150	    char c;
151	    offset -= (sizeof("Cookie: ") - 1);
152	    /* Swap the 2rd and 4th bit */
153	    c = *(data + offset + 2) ;
154	    *(data + offset + 2) = *(data + offset + 4) ;
155	    *(data + offset + 4) = c ;
156	    ret++;
157	}
158    }
159
160    return ret;
161}
162
163static int get_http_info(const struct sk_buff *skb, int flags, httpinfo_t *info)
164{
165    struct iphdr *iph = (skb)->nh.iph;
166    struct tcphdr *tcph = (void *)iph + iph->ihl*4;
167    unsigned char *data = (void *)tcph + tcph->doff*4;
168    unsigned int datalen = (skb)->len - (iph->ihl*4) - (tcph->doff*4);
169
170    int found, offset;
171    int hostlen, pathlen;
172    int ret = 0;
173
174    SPARQ_LOG("%s: seq=%u\n", __FUNCTION__, ntohl(tcph->seq));
175
176    /* Basic checking, is it HTTP packet? */
177    if (datalen < 10)
178	return ret;	/* Not enough length, ignore it */
179    if (memcmp(data, "GET ", sizeof("GET ") - 1) != 0 &&
180        memcmp(data, "POST ", sizeof("POST ") - 1) != 0)
181	return ret;	/* Pass it */
182
183    if (!(flags & (HTTP_HOST | HTTP_URL)))
184	return ret;
185
186    /* find the 'Host: ' value */
187    found = find_pattern2(data, datalen, "Host: ",
188	    sizeof("Host: ") - 1, '\r', &offset, &hostlen);
189    SPARQ_LOG("Host found=%d\n", found);
190
191    if (!found || !hostlen)
192	return ret;
193
194    ret++;	/* Host found, increase the return value */
195    hostlen = (hostlen < BUFSIZE) ? hostlen : BUFSIZE;
196    strncpy(info->host, data + offset, hostlen);
197    *(info->host + hostlen) = 0;		/* null-terminated */
198    info->hostlen = hostlen;
199    SPARQ_LOG("HOST=%s, hostlen=%d\n", info->host, info->hostlen);
200
201    if (!(flags & HTTP_URL))
202	return ret;
203
204    /* find the 'GET ' or 'POST ' value */
205    found = find_pattern2(data, datalen, "GET ",
206	    sizeof("GET ") - 1, '\r', &offset, &pathlen);
207    if (!found)
208	found = find_pattern2(data, datalen, "POST ",
209		sizeof("POST ") - 1, '\r', &offset, &pathlen);
210    SPARQ_LOG("GET/POST found=%d\n", found);
211
212    if (!found || (pathlen -= (sizeof(" HTTP/x.x") - 1)) <= 0)/* ignor this field */
213	return ret;
214
215    ret++;	/* GET/POST found, increase the return value */
216    pathlen = ((pathlen + hostlen) < BUFSIZE) ? pathlen : BUFSIZE - hostlen;
217    strncpy(info->url, info->host, hostlen);
218    strncpy(info->url + hostlen, data + offset, pathlen);
219    *(info->url + hostlen + pathlen) = 0;	/* null-terminated */
220    info->urllen = hostlen + pathlen;
221    SPARQ_LOG("URL=%s, urllen=%d\n", info->url, info->urllen);
222
223    return ret;
224}
225
226/* Linear string search based on memcmp() */
227static char *search_linear (char *needle, char *haystack, int needle_len, int haystack_len)
228{
229	char *k = haystack + (haystack_len-needle_len);
230	char *t = haystack;
231
232	SPARQ_LOG("%s: haystack=%s, needle=%s\n", __FUNCTION__, t, needle);
233	for(; t <= k; t++) {
234		//SPARQ_LOG("%s: haystack=%s, needle=%s\n", __FUNCTION__, t, needle);
235		if (strnicmp(t, needle, needle_len) == 0) return t;
236		//if ( memcmp(t, needle, needle_len) == 0 ) return t;
237	}
238
239	return NULL;
240}
241
242static int
243match(const struct sk_buff *skb,
244      const struct net_device *in,
245      const struct net_device *out,
246      const void *matchinfo,
247      int offset,
248      const void *hdr,
249      u_int16_t datalen,
250      int *hotdrop)
251{
252	const struct ipt_webstr_info *info = matchinfo;
253	struct iphdr *ip = skb->nh.iph;
254	proc_ipt_search search=search_linear;
255
256	char token[] = "<&nbsp;>";
257	char *wordlist = (char *)&info->string;
258	httpinfo_t htinfo;
259	int flags = 0;
260	int found = 0;
261	long int opt = 0;
262
263	if (!ip || info->len < 1)
264	    return 0;
265
266	SPARQ_LOG("\n************************************************\n"
267		"%s: type=%s\n", __FUNCTION__, (info->type == IPT_WEBSTR_URL)
268		? "IPT_WEBSTR_URL"  : (info->type == IPT_WEBSTR_HOST)
269		? "IPT_WEBSTR_HOST" : "IPT_WEBSTR_CONTENT" );
270
271	/* Determine the flags value for get_http_info(), and mangle packet
272	 * if needed. */
273	switch(info->type)
274	{
275	    case IPT_WEBSTR_URL:	/* fall through */
276		flags |= HTTP_URL;
277
278	    case IPT_WEBSTR_HOST:
279		flags |= HTTP_HOST;
280		break;
281
282	    case IPT_WEBSTR_CONTENT:
283		opt = simple_strtol(wordlist, (char **)NULL, 10);
284		SPARQ_LOG("%s: string=%s, opt=%#lx\n", __FUNCTION__, wordlist, opt);
285
286		if (opt & (BLK_JAVA | BLK_ACTIVE | BLK_PROXY))
287		    flags |= HTTP_URL;
288		if (opt & BLK_PROXY)
289		    flags |= HTTP_HOST;
290		if (opt & BLK_COOKIE)
291		    mangle_http_header(skb, HTTP_COOKIE);
292		break;
293
294	    default:
295		printk("%s: Sorry! Cannot find this match option.\n", __FILE__);
296		return 0;
297	}
298
299	/* Get the http header info */
300	if (get_http_info(skb, flags, &htinfo) < 1)
301	    return 0;
302
303	/* Check if the http header content contains the forbidden keyword */
304	if (info->type == IPT_WEBSTR_HOST || info->type == IPT_WEBSTR_URL) {
305	    int nlen = 0, hlen = 0;
306	    char needle[BUFSIZE], *haystack = NULL;
307	    char *next;
308
309	    if (info->type == IPT_WEBSTR_HOST) {
310		haystack = htinfo.host;
311		hlen = htinfo.hostlen;
312	    }
313	    else {
314		haystack = htinfo.url;
315		hlen = htinfo.urllen;
316	    }
317	    split(needle, wordlist, next, token) {
318		nlen = strlen(needle);
319		SPARQ_LOG("keyword=%s, nlen=%d, hlen=%d\n", needle, nlen, hlen);
320		if (!nlen || !hlen || nlen > hlen) continue;
321		if (search(needle, haystack, nlen, hlen) != NULL) {
322		    found = 1;
323		    break;
324		}
325	    }
326	}
327	else {		/* IPT_WEBSTR_CONTENT */
328	    int vicelen;
329
330	    if (opt & BLK_JAVA) {
331		vicelen = sizeof(".js") - 1;
332		if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".js", vicelen) == 0) {
333		    SPARQ_LOG("%s: MATCH....java\n", __FUNCTION__);
334		    found = 1;
335		    goto match_ret;
336		}
337		vicelen = sizeof(".class") - 1;
338		if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".class", vicelen) == 0) {
339		    SPARQ_LOG("%s: MATCH....java\n", __FUNCTION__);
340		    found = 1;
341		    goto match_ret;
342		}
343	    }
344	    if (opt & BLK_ACTIVE){
345		vicelen = sizeof(".ocx") - 1;
346		if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".ocx", vicelen) == 0) {
347		    SPARQ_LOG("%s: MATCH....activex\n", __FUNCTION__);
348		    found = 1;
349		    goto match_ret;
350		}
351		vicelen = sizeof(".cab") - 1;
352		if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".cab", vicelen) == 0) {
353		    SPARQ_LOG("%s: MATCH....activex\n", __FUNCTION__);
354		    found = 1;
355		    goto match_ret;
356		}
357	    }
358	    if (opt & BLK_PROXY){
359		if (strnicmp(htinfo.url + htinfo.hostlen, "http://", sizeof("http://") - 1) == 0) {
360		    SPARQ_LOG("%s: MATCH....proxy\n", __FUNCTION__);
361		    found = 1;
362		    goto match_ret;
363		}
364	    }
365	}
366
367match_ret:
368	SPARQ_LOG("%s: Verdict =======> %s \n",__FUNCTION__
369		, found ? "DROP" : "ACCEPT");
370
371	return (found ^ info->invert);
372}
373
374static int
375checkentry(const char *tablename,
376           const struct ipt_ip *ip,
377           void *matchinfo,
378           unsigned int matchsize,
379           unsigned int hook_mask)
380{
381
382       if (matchsize != IPT_ALIGN(sizeof(struct ipt_webstr_info)))
383               return 0;
384
385       return 1;
386}
387
388static struct ipt_match webstr_match
389= { { NULL, NULL }, "webstr", &match, &checkentry, NULL, THIS_MODULE };
390
391static int __init init(void)
392{
393	return ipt_register_match(&webstr_match);
394}
395
396static void __exit fini(void)
397{
398	ipt_unregister_match(&webstr_match);
399}
400
401module_init(init);
402module_exit(fini);
403