• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/netfilter/
1/*  webmon --	A netfilter module to match URLs in HTTP requests
2 *  		This module can match using string match or regular expressions
3 *  		Originally designed for use with Gargoyle router firmware (gargoyle-router.com)
4 *
5 *
6 *  Copyright �� 2008-2010 by Eric Bishop <eric@gargoyle-router.com>
7 *
8 *  This file is free software: you may copy, redistribute and/or modify it
9 *  under the terms of the GNU General Public License as published by the
10 *  Free Software Foundation, either version 2 of the License, or (at your
11 *  option) any later version.
12 *
13 *  This file is distributed in the hope that it will be useful, but
14 *  WITHOUT ANY WARRANTY; without even the implied warranty of
15 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 *  General Public License for more details.
17 *
18 *  You should have received a copy of the GNU General Public License
19 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 */
21
22#include <linux/kernel.h>
23#include <linux/version.h>
24#include <linux/module.h>
25#include <linux/skbuff.h>
26#include <linux/if_ether.h>
27#include <linux/string.h>
28#include <linux/ctype.h>
29#include <net/sock.h>
30#include <net/ip.h>
31#include <net/tcp.h>
32#include <linux/time.h>
33#include <linux/spinlock.h>
34#include <linux/proc_fs.h>
35#include <linux/netfilter_ipv4/ip_tables.h>
36#include <linux/netfilter_ipv4/ipt_webmon.h>
37#include <linux/ktime.h>
38#include <linux/ip.h>
39#include <linux/netfilter/x_tables.h>
40
41#include "tree_map.h"
42
43#define STRIP "%pI4"
44
45MODULE_LICENSE("GPL");
46MODULE_AUTHOR("Eric Bishop");
47MODULE_DESCRIPTION("Monitor URL in HTTP Requests, designed for use with Gargoyle web interface (www.gargoyle-router.com)");
48
49typedef struct qn {
50	uint32_t src_ip;
51	char *value;
52	struct timeval time;
53	struct qn *next;
54	struct qn *previous;
55} queue_node;
56
57typedef struct {
58	queue_node *first;
59	queue_node *last;
60	int length;
61} queue;
62
63static string_map *domain_map = NULL;
64static queue *recent_domains = NULL;
65
66static string_map *search_map = NULL;
67static queue *recent_searches = NULL;
68
69static int max_domain_queue_length = 5;
70static int max_search_queue_length = 5;
71
72static DEFINE_SPINLOCK(webmon_lock);
73
74static void update_queue_node_time(queue_node * update_node, queue * full_queue)
75{
76	struct timeval t;
77	do_gettimeofday(&t);
78	update_node->time = t;
79
80	/* move to front of queue if not already at front of queue */
81	if (update_node->previous != NULL) {
82		queue_node *p = update_node->previous;
83		queue_node *n = update_node->next;
84		p->next = n;
85		if (n != NULL) {
86			n->previous = p;
87		} else {
88			full_queue->last = p;
89		}
90		update_node->previous = NULL;
91		update_node->next = full_queue->first;
92		full_queue->first->previous = update_node;
93		full_queue->first = update_node;
94	}
95}
96
97void add_queue_node(uint32_t src_ip, char *value, queue * full_queue, string_map * queue_index, char *queue_index_key,
98		    uint32_t max_queue_length)
99{
100
101	queue_node *new_node = (queue_node *) kmalloc(sizeof(queue_node), GFP_ATOMIC);
102	char *dyn_value = kernel_strdup(value);
103	struct timeval t;
104
105	if (new_node == NULL || dyn_value == NULL) {
106		if (dyn_value) {
107			kfree(dyn_value);
108		}
109		if (new_node) {
110			kfree(new_node);
111		};
112
113		return;
114	}
115	set_map_element(queue_index, queue_index_key, (void *)new_node);
116
117	do_gettimeofday(&t);
118	new_node->time = t;
119	new_node->src_ip = src_ip;
120	new_node->value = dyn_value;
121	new_node->previous = NULL;
122
123	new_node->next = full_queue->first;
124	if (full_queue->first != NULL) {
125		full_queue->first->previous = new_node;
126	}
127	full_queue->first = new_node;
128	full_queue->last = (full_queue->last == NULL) ? new_node : full_queue->last;
129	full_queue->length = full_queue->length + 1;
130
131	if (full_queue->length > max_queue_length) {
132		queue_node *old_node = full_queue->last;
133		full_queue->last = old_node->previous;
134		full_queue->last->next = NULL;
135		full_queue->first = old_node->previous == NULL ? NULL : full_queue->first;	/*shouldn't be needed, but just in case... */
136		full_queue->length = full_queue->length - 1;
137
138		sprintf(queue_index_key, STRIP "@%s", &old_node->src_ip, old_node->value);
139		remove_map_element(queue_index, queue_index_key);
140
141		kfree(old_node->value);
142		kfree(old_node);
143	}
144
145	/*
146	   queue_node* n = full_queue->first;
147	   while(n != NULL)
148	   {
149	   printf("%ld\t%s\t%s\t%s\n", (unsigned long)n->time, n->src_ip, n->dst_ip, n->domain);
150	   n = (queue_node*)n->next;
151	   }
152	   printf("\n\n");
153	 */
154}
155
156void add_queue_node_last(uint32_t src_ip, char *value, time_t sec, queue * full_queue, string_map * queue_index,
157			 char *queue_index_key, uint32_t max_queue_length)
158{
159	queue_node *new_node;
160	char *dyn_value;
161
162	if (full_queue->length >= max_queue_length)
163		return;
164
165	new_node = (queue_node *) kzalloc(sizeof(queue_node), GFP_ATOMIC);
166	dyn_value = kernel_strdup(value);
167
168	if (new_node == NULL || dyn_value == NULL) {
169		kfree(dyn_value);
170		kfree(new_node);
171		return;
172	}
173	set_map_element(queue_index, queue_index_key, (void *)new_node);
174
175	new_node->time.tv_sec = sec;
176	new_node->src_ip = src_ip;
177	new_node->value = dyn_value;
178
179	new_node->previous = full_queue->last;
180	if (full_queue->last != NULL) {
181		full_queue->last->next = new_node;
182	}
183	full_queue->last = new_node;
184	full_queue->first = (full_queue->first == NULL) ? new_node : full_queue->first;
185	full_queue->length = full_queue->length + 1;
186}
187
188void destroy_queue(queue * q)
189{
190	queue_node *last_node = q->last;
191	while (last_node != NULL) {
192		queue_node *previous_node = last_node->previous;
193		free(last_node->value);
194		free(last_node);
195		last_node = previous_node;
196	}
197	free(q);
198}
199
200#ifdef REMOVE
201int strnicmp(const char *cs, const char *ct, size_t count)
202{
203	register signed char __res = 0;
204
205	while (count) {
206		if ((__res = toupper(*cs) - toupper(*ct++)) != 0 || !*cs++) {
207			break;
208		}
209		count--;
210	}
211	return __res;
212}
213#endif
214
215char *strnistr(const char *s, const char *find, size_t slen)
216{
217	char c, sc;
218	size_t len;
219
220	if ((c = *find++) != '\0') {
221		len = strlen(find);
222		do {
223			do {
224				if (slen < 1 || (sc = *s) == '\0') {
225					return (NULL);
226				}
227				--slen;
228				++s;
229			}
230			while (toupper(sc) != toupper(c));
231
232			if (len > slen) {
233				return (NULL);
234			}
235		}
236		while (strnicmp(s, find, len) != 0);
237
238		s--;
239	}
240	return ((char *)s);
241}
242
243/* NOTE: This is not quite real edit distance -- all differences are assumed to be in one contiguous block
244 *       If differences are not in a contiguous block computed edit distance will be greater than real edit distance.
245 *       Edit distance computed here is an upper bound on real edit distance.
246 */
247int within_edit_distance(char *s1, char *s2, int max_edit)
248{
249	int edit1, edit2;
250	char *s1sp, *s2sp, *s1ep, *s2ep;
251
252	if (s1 == NULL || s2 == NULL) {
253		return 0;
254	}
255
256	edit1 = strlen(s1);
257	edit2 = strlen(s2);
258	s1sp = s1;
259	s2sp = s2;
260	s1ep = s1 + (edit1 - 1);
261	s2ep = s2 + (edit2 - 1);
262	while (*s1sp != '\0' && *s2sp != '\0' && *s1sp == *s2sp) {
263		s1sp++;
264		s2sp++;
265		edit1--;
266		edit2--;
267	}
268
269	/* if either is zero we got to the end of one of the strings */
270	while (s1ep > s1sp && s2ep > s2sp && *s1ep == *s2ep) {
271		s1ep--;
272		s2ep--;
273		edit1--;
274		edit2--;
275	}
276
277	return edit1 <= max_edit && edit2 <= max_edit ? 1 : 0;
278}
279
280/*
281 * line is the line to be parsed -- it is not modified in any way
282 * max_pieces indicates number of pieces to return, if negative this is determined dynamically
283 * include_remainder_at_max indicates whether the last piece, when max pieces are reached,
284 * 	should be what it would normally be (0) or the entire remainder of the line (1)
285 * 	if max_pieces < 0 this parameter is ignored
286 *
287 *
288 * returns all non-separator pieces in a line
289 * result is dynamically allocated, MUST be freed after call-- even if
290 * line is empty (you still get a valid char** pointer to to a NULL char*)
291 */
292char **split_on_separators(char *line, char *separators, int num_separators, int max_pieces, int include_remainder_at_max,
293			   unsigned long *num_pieces)
294{
295	char **split;
296
297	*num_pieces = 0;
298	if (line != NULL) {
299		int split_index;
300		int non_separator_found;
301		char *dup_line;
302		char *start;
303
304		if (max_pieces < 0) {
305			/* count number of separator characters in line -- this count + 1 is an upperbound on number of pieces */
306			int separator_count = 0;
307			int line_index;
308			for (line_index = 0; line[line_index] != '\0'; line_index++) {
309				int sep_index;
310				int found = 0;
311				for (sep_index = 0; found == 0 && sep_index < num_separators; sep_index++) {
312					found = separators[sep_index] == line[line_index] ? 1 : 0;
313				}
314				separator_count = separator_count + found;
315			}
316			max_pieces = separator_count + 1;
317		}
318		split = (char **)malloc((1 + max_pieces) * sizeof(char *));
319		split_index = 0;
320		split[split_index] = NULL;
321
322		dup_line = strdup(line);
323		start = dup_line;
324		non_separator_found = 0;
325		while (non_separator_found == 0) {
326			int matches = 0;
327			int sep_index;
328			for (sep_index = 0; sep_index < num_separators; sep_index++) {
329				matches = matches == 1 || separators[sep_index] == start[0] ? 1 : 0;
330			}
331			non_separator_found = matches == 0 || start[0] == '\0' ? 1 : 0;
332			if (non_separator_found == 0) {
333				start++;
334			}
335		}
336
337		while (start[0] != '\0' && split_index < max_pieces) {
338			/* find first separator index */
339			int first_separator_index = 0;
340			int separator_found = 0;
341			while (separator_found == 0) {
342				int sep_index;
343				for (sep_index = 0; separator_found == 0 && sep_index < num_separators; sep_index++) {
344					separator_found = separators[sep_index] == start[first_separator_index]
345					    || start[first_separator_index] == '\0' ? 1 : 0;
346				}
347				if (separator_found == 0) {
348					first_separator_index++;
349				}
350			}
351
352			/* copy next piece to split array */
353			if (first_separator_index > 0) {
354				char *next_piece = NULL;
355				if (split_index + 1 < max_pieces || include_remainder_at_max <= 0) {
356					next_piece = (char *)malloc((first_separator_index + 1) * sizeof(char));
357					memcpy(next_piece, start, first_separator_index);
358					next_piece[first_separator_index] = '\0';
359				} else {
360					next_piece = strdup(start);
361				}
362				split[split_index] = next_piece;
363				split[split_index + 1] = NULL;
364				split_index++;
365			}
366
367			/* find next non-separator index, indicating start of next piece */
368			start = start + first_separator_index;
369			non_separator_found = 0;
370			while (non_separator_found == 0) {
371				int matches = 0;
372				int sep_index;
373				for (sep_index = 0; sep_index < num_separators; sep_index++) {
374					matches = matches == 1 || separators[sep_index] == start[0] ? 1 : 0;
375				}
376				non_separator_found = matches == 0 || start[0] == '\0' ? 1 : 0;
377				if (non_separator_found == 0) {
378					start++;
379				}
380			}
381		}
382		free(dup_line);
383		*num_pieces = split_index;
384	} else {
385		split = (char **)malloc((1) * sizeof(char *));
386		split[0] = NULL;
387	}
388	return split;
389}
390
391static void extract_url(const unsigned char *packet_data, int packet_length, char *domain, char *path)
392{
393
394	int path_start_index;
395	int path_end_index;
396	int last_header_index;
397	char last_two_buf[2];
398	int end_found;
399	char *domain_match;
400	char *start_ptr;
401
402	domain[0] = '\0';
403	path[0] = '\0';
404
405	/* get path portion of URL */
406	start_ptr = strnistr((char *)packet_data, " ", packet_length);
407	if (start_ptr == NULL) {
408		return;
409	}
410
411	path_start_index = (int)(start_ptr - (char *)packet_data);
412	start_ptr = strnistr((char *)(packet_data + path_start_index), " ", packet_length - (path_start_index + 2));
413	if (start_ptr == NULL) {
414		return;
415	}
416
417	while (packet_data[path_start_index] == ' ') {
418		path_start_index++;
419	}
420	path_end_index = (int)(strstr((char *)(packet_data + path_start_index), " ") - (char *)packet_data);
421	if (path_end_index > 0) {
422		int path_length = path_end_index - path_start_index;
423		path_length = path_length < 625 ? path_length : 624;	/* prevent overflow */
424		memcpy(path, packet_data + path_start_index, path_length);
425		path[path_length] = '\0';
426	} else {
427		return;
428	}
429
430	/* get header length */
431	last_header_index = 2;
432	memcpy(last_two_buf, (char *)packet_data, 2);
433	end_found = 0;
434	while (end_found == 0 && last_header_index < packet_length) {
435		char next = (char)packet_data[last_header_index];
436		if (next == '\n') {
437			end_found = last_two_buf[1] == '\n' || (last_two_buf[0] == '\n' && last_two_buf[1] == '\r') ? 1 : 0;
438		}
439		if (end_found == 0) {
440			last_two_buf[0] = last_two_buf[1];
441			last_two_buf[1] = next;
442			last_header_index++;
443		}
444	}
445
446	/* get domain portion of URL */
447	domain_match = strnistr((char *)packet_data, "Host:", last_header_index);
448	if (domain_match != NULL) {
449		int domain_end_index;
450		domain_match = domain_match + 5;	/* character after "Host:" */
451		while (domain_match[0] == ' ' && ((char *)domain_match - (char *)packet_data) < last_header_index) {
452			domain_match = domain_match + 1;
453		}
454
455		domain_end_index = 0;
456		while (domain_match[domain_end_index] != '\n' &&
457		       domain_match[domain_end_index] != '\r' &&
458		       domain_match[domain_end_index] != ' ' &&
459		       domain_match[domain_end_index] != ':' &&
460		       ((char *)domain_match - (char *)packet_data) + domain_end_index < last_header_index) {
461			domain_end_index++;
462		}
463		domain_end_index = domain_end_index < 625 ? domain_end_index : 624;	/* prevent overflow */
464		memcpy(domain, domain_match, domain_end_index);
465		domain[domain_end_index] = '\0';
466
467		for (domain_end_index = 0; domain[domain_end_index] != '\0'; domain_end_index++) {
468			domain[domain_end_index] = (char)tolower(domain[domain_end_index]);
469		}
470	}
471}
472
473#ifdef CONFIG_PROC_FS
474
475static void *webmon_proc_start(struct seq_file *seq, loff_t * loff_pos)
476{
477	static unsigned long counter = 0;
478
479	/* beginning a new sequence ? */
480	if (*loff_pos == 0) {
481		/* yes => return a non null value to begin the sequence */
482		return &counter;
483	} else {
484		/* no => it's the end of the sequence, return end to stop reading */
485		*loff_pos = 0;
486		return NULL;
487	}
488}
489
490static void *webmon_proc_next(struct seq_file *seq, void *v, loff_t * pos)
491{
492	return NULL;
493}
494
495static void webmon_proc_stop(struct seq_file *seq, void *v)
496{
497	//don't need to do anything
498}
499
500static int webmon_proc_domain_show(struct seq_file *s, void *v)
501{
502	queue_node *next_node;
503
504	spin_lock_bh(&webmon_lock);
505
506	next_node = recent_domains->first;
507	while (next_node != NULL) {
508		seq_printf(s, "%ld\t" STRIP "\t%s\n", (unsigned long)(next_node->time).tv_sec, &next_node->src_ip,
509			   next_node->value);
510		next_node = (queue_node *) next_node->next;
511	}
512	spin_unlock_bh(&webmon_lock);
513
514	return 0;
515}
516
517static int webmon_proc_search_show(struct seq_file *s, void *v)
518{
519	queue_node *next_node;
520
521	spin_lock_bh(&webmon_lock);
522
523	next_node = recent_searches->first;
524	while (next_node != NULL) {
525		seq_printf(s, "%ld\t" STRIP "\t%s\n", (unsigned long)(next_node->time).tv_sec, &next_node->src_ip,
526			   next_node->value);
527		next_node = (queue_node *) next_node->next;
528	}
529	spin_unlock_bh(&webmon_lock);
530
531	return 0;
532}
533
534static struct seq_operations webmon_proc_domain_sops = {
535	.start = webmon_proc_start,
536	.next = webmon_proc_next,
537	.stop = webmon_proc_stop,
538	.show = webmon_proc_domain_show
539};
540
541static struct seq_operations webmon_proc_search_sops = {
542	.start = webmon_proc_start,
543	.next = webmon_proc_next,
544	.stop = webmon_proc_stop,
545	.show = webmon_proc_search_show
546};
547
548static int webmon_proc_domain_open(struct inode *inode, struct file *file)
549{
550	return seq_open(file, &webmon_proc_domain_sops);
551}
552
553static int webmon_proc_search_open(struct inode *inode, struct file *file)
554{
555	return seq_open(file, &webmon_proc_search_sops);
556}
557
558static struct file_operations webmon_proc_domain_fops = {
559	.owner = THIS_MODULE,
560	.open = webmon_proc_domain_open,
561	.read = seq_read,
562	.llseek = seq_lseek,
563	.release = seq_release
564};
565
566static struct file_operations webmon_proc_search_fops = {
567	.owner = THIS_MODULE,
568	.open = webmon_proc_search_open,
569	.read = seq_read,
570	.llseek = seq_lseek,
571	.release = seq_release
572};
573
574#endif
575
576static int ipt_webmon_set_ctl(struct sock *sk, int cmd, void *user, u_int32_t len)
577{
578
579	char *buffer = kmalloc(len, GFP_ATOMIC);
580	if (buffer == NULL) {	/* check for malloc failure */
581		return 0;
582	}
583	copy_from_user(buffer, user, len);
584
585	if (len > 1 + sizeof(uint32_t)) {
586		unsigned char type = buffer[0];
587		uint32_t max_queue_length = *((uint32_t *) (buffer + 1));
588		char *data = buffer + 1 + sizeof(uint32_t);
589		char newline_terminator[] = { '\n', '\r' };
590		char whitespace_chars[] = { '\t', ' ' };
591
592		spin_lock_bh(&webmon_lock);
593		if (type == WEBMON_DOMAIN || type == WEBMON_SEARCH) {
594			unsigned long num_lines;
595			unsigned long line_index;
596			unsigned long num_destroyed;
597			char **lines = split_on_separators(data, newline_terminator, 2, -1, 0, &num_lines);
598
599			/* destroy and re-initialize queue and map */
600			if (type == WEBMON_DOMAIN) {
601				destroy_map(domain_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed);
602				destroy_queue(recent_domains);
603				recent_domains = (queue *) malloc(sizeof(queue));
604				recent_domains->first = NULL;
605				recent_domains->last = NULL;
606				recent_domains->length = 0;
607				domain_map = initialize_map(0);
608
609				max_domain_queue_length = max_queue_length;
610			} else if (type == WEBMON_SEARCH) {
611				destroy_map(search_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed);
612				destroy_queue(recent_searches);
613				recent_searches = (queue *) malloc(sizeof(queue));
614				recent_searches->first = NULL;
615				recent_searches->last = NULL;
616				recent_searches->length = 0;
617				search_map = initialize_map(0);
618
619				max_search_queue_length = max_queue_length;
620			}
621
622			for (line_index = 0; line_index < num_lines; line_index++) {
623				char *line = lines[line_index];
624				unsigned long num_pieces;
625				char **split = split_on_separators(line, whitespace_chars, 2, -1, 0, &num_pieces);
626
627				//check that there are 3 pieces (time, src_ip, value)
628				int length;
629				for (length = 0; split[length] != NULL; length++) {
630				}
631				if (length == 3) {
632					time_t time;
633					int parsed_ip[4];
634					int valid_ip =
635					    sscanf(split[1], "%d.%d.%d.%d", parsed_ip, parsed_ip + 1, parsed_ip + 2,
636						   parsed_ip + 3);
637					if (valid_ip == 4) {
638						valid_ip = parsed_ip[0] <= 255 && parsed_ip[1] <= 255 && parsed_ip[2] <= 255
639						    && parsed_ip[3] <= 255 ? valid_ip : 0;
640					}
641					if (sscanf(split[0], "%ld", &time) > 0 && valid_ip == 4) {
642						char *value = split[2];
643						char value_key[700];
644						uint32_t ip =
645						    (parsed_ip[0] << 24) + (parsed_ip[1] << 16) + (parsed_ip[2] << 8) +
646						    (parsed_ip[3]);
647						ip = htonl(ip);
648						sprintf(value_key, STRIP "@%s", &ip, value);
649						if (type == WEBMON_DOMAIN) {
650							add_queue_node_last(ip, value, time, recent_domains, domain_map,
651									    value_key, max_domain_queue_length);
652						} else if (type == WEBMON_SEARCH) {
653							add_queue_node_last(ip, value, time, recent_searches, search_map,
654									    value_key, max_search_queue_length);
655						}
656					}
657				}
658
659				for (length = 0; split[length] != NULL; length++) {
660					free(split[length]);
661				}
662				free(split);
663				free(line);
664			}
665			free(lines);
666		}
667
668		spin_unlock_bh(&webmon_lock);
669	}
670
671	return 1;
672}
673
674static struct nf_sockopt_ops ipt_webmon_sockopts = {
675	.pf = PF_INET,
676	.set_optmin = WEBMON_SET,
677	.set_optmax = WEBMON_SET + 1,
678	.set = ipt_webmon_set_ctl,
679};
680
681static bool match(const struct sk_buff *skb, struct xt_action_param *par)
682{
683	const struct ipt_webmon_info *info = (const struct ipt_webmon_info *)(par->matchinfo);
684
685	struct iphdr *iph;
686
687	/* linearize skb if necessary */
688	struct sk_buff *linear_skb;
689	int skb_copied;
690	if (skb_is_nonlinear(skb)) {
691		linear_skb = skb_copy(skb, GFP_ATOMIC);
692		skb_copied = 1;
693	} else {
694		linear_skb = (struct sk_buff *)skb;
695		skb_copied = 0;
696	}
697
698	/* ignore packets that are not TCP */
699	iph = (struct iphdr *)(skb_network_header(skb));
700	if (iph->protocol == IPPROTO_TCP) {
701		/* get payload */
702		struct tcphdr *tcp_hdr = (struct tcphdr *)(((unsigned char *)iph) + (iph->ihl * 4));
703		unsigned short payload_offset = (tcp_hdr->doff * 4) + (iph->ihl * 4);
704		unsigned char *payload = ((unsigned char *)iph) + payload_offset;
705		unsigned short payload_length = ntohs(iph->tot_len) - payload_offset;
706
707		/* if payload length <= 10 bytes don't bother doing a check, otherwise check for match */
708		if (payload_length > 10) {
709			/* are we dealing with a web page request */
710			if (strnicmp((char *)payload, "GET ", 4) == 0 || strnicmp((char *)payload, "POST ", 5) == 0
711			    || strnicmp((char *)payload, "HEAD ", 5) == 0) {
712				char domain[650];
713				char path[650];
714				char domain_key[700];
715				unsigned char save = info->exclude_type == WEBMON_EXCLUDE ? 1 : 0;
716				uint32_t ip_index;
717
718				for (ip_index = 0; ip_index < info->num_exclude_ips; ip_index++) {
719					if ((info->exclude_ips)[ip_index] == iph->saddr) {
720						save = info->exclude_type == WEBMON_EXCLUDE ? 0 : 1;
721					}
722				}
723				for (ip_index = 0; ip_index < info->num_exclude_ranges; ip_index++) {
724					struct ipt_webmon_ip_range r = (info->exclude_ranges)[ip_index];
725					if (ntohl(r.start) >= ntohl(iph->saddr) && ntohl(r.end) <= ntohl(iph->saddr)) {
726						save = info->exclude_type == WEBMON_EXCLUDE ? 0 : 1;
727					}
728				}
729
730				if (save) {
731					extract_url(payload, payload_length, domain, path);
732
733					sprintf(domain_key, STRIP "@%s", &iph->saddr, domain);
734
735					if (strlen(domain) > 0) {
736						char *search_part = NULL;
737						spin_lock_bh(&webmon_lock);
738
739						if (get_string_map_element(domain_map, domain_key)) {
740							//update time
741							update_queue_node_time((queue_node *)
742									       get_map_element(domain_map, domain_key),
743									       recent_domains);
744						} else {
745							//add
746							add_queue_node(iph->saddr, domain, recent_domains, domain_map,
747								       domain_key, max_domain_queue_length);
748						}
749
750						/* printk("domain,path=\"%s\", \"%s\"\n", domain, path); */
751
752						if (strnistr(domain, "google.", 625) != NULL) {
753							search_part = strstr(path, "&q=");
754							search_part = search_part == NULL ? strstr(path, "#q=") : search_part;
755							search_part = search_part == NULL ? strstr(path, "?q=") : search_part;
756							search_part = search_part == NULL ? search_part : search_part + 3;
757						} else if (strstr(domain, "bing.") != NULL) {
758							search_part = strstr(path, "?q=");
759							search_part = search_part == NULL ? strstr(path, "&q=") : search_part;
760							search_part = search_part == NULL ? search_part : search_part + 3;
761						} else if (strstr(domain, "yahoo.") != NULL) {
762							search_part = strstr(path, "?p=");
763							search_part = search_part == NULL ? strstr(path, "&p=") : search_part;
764							search_part = search_part == NULL ? search_part : search_part + 3;
765						} else if (strstr(domain, "lycos.") != NULL) {
766							search_part = strstr(path, "&query=");
767							search_part =
768							    search_part == NULL ? strstr(path, "?query=") : search_part;
769							search_part = search_part == NULL ? search_part : search_part + 7;
770						} else if (strstr(domain, "altavista.") != NULL) {
771							search_part = strstr(path, "&q=");
772							search_part = search_part == NULL ? strstr(path, "?q=") : search_part;
773							search_part = search_part == NULL ? search_part : search_part + 3;
774						} else if (strstr(domain, "duckduckgo.") != NULL) {
775							search_part = strstr(path, "?q=");
776							search_part = search_part == NULL ? strstr(path, "&q=") : search_part;
777							search_part = search_part == NULL ? search_part : search_part + 3;
778						} else if (strstr(domain, "baidu.") != NULL) {
779							search_part = strstr(path, "?wd=");
780							search_part = search_part == NULL ? strstr(path, "&wd=") : search_part;
781							search_part = search_part == NULL ? search_part : search_part + 4;
782						} else if (strstr(domain, "search.") != NULL) {
783							search_part = strstr(path, "?q=");
784							search_part = search_part == NULL ? strstr(path, "&q=") : search_part;
785							search_part = search_part == NULL ? search_part : search_part + 3;
786						} else if (strstr(domain, "aol.") != NULL) {
787							search_part = strstr(path, "&q=");
788							search_part = search_part == NULL ? strstr(path, "?q=") : search_part;
789							search_part = search_part == NULL ? search_part : search_part + 3;
790						} else if (strstr(domain, "ask.") != NULL) {
791							search_part = strstr(path, "?q=");
792							search_part = search_part == NULL ? strstr(path, "&q=") : search_part;
793							search_part = search_part == NULL ? search_part : search_part + 3;
794						} else if (strstr(domain, "yandex.") != NULL) {
795							search_part = strstr(path, "?text=");
796							search_part =
797							    search_part == NULL ? strstr(path, "&text=") : search_part;
798							search_part = search_part == NULL ? search_part : search_part + 6;
799						} else if (strstr(domain, "naver.") != NULL) {
800							search_part = strstr(path, "&query=");
801							search_part =
802							    search_part == NULL ? strstr(path, "?query=") : search_part;
803							search_part = search_part == NULL ? search_part : search_part + 7;
804						} else if (strstr(domain, "daum.") != NULL) {
805							search_part = strstr(path, "&q=");
806							search_part = search_part == NULL ? strstr(path, "?q=") : search_part;
807							search_part = search_part == NULL ? search_part : search_part + 3;
808						} else if (strstr(domain, "cuil.") != NULL) {
809							search_part = strstr(path, "?q=");
810							search_part = search_part == NULL ? strstr(path, "&q=") : search_part;
811							search_part = search_part == NULL ? search_part : search_part + 3;
812						} else if (strstr(domain, "kosmix.") != NULL) {
813							search_part = strstr(path, "/topic/");
814							search_part = search_part == NULL ? search_part : search_part + 7;
815						} else if (strstr(domain, "yebol.") != NULL) {
816							search_part = strstr(path, "?key=");
817							search_part = search_part == NULL ? strstr(path, "&key=") : search_part;
818							search_part = search_part == NULL ? search_part : search_part + 5;
819						} else if (strstr(domain, "sogou.") != NULL) {
820							search_part = strstr(path, "&query=");
821							search_part =
822							    search_part == NULL ? strstr(path, "?query=") : search_part;
823							search_part = search_part == NULL ? search_part : search_part + 7;
824						} else if (strstr(domain, "youdao.") != NULL) {
825							search_part = strstr(path, "?q=");
826							search_part = search_part == NULL ? strstr(path, "&q=") : search_part;
827							search_part = search_part == NULL ? search_part : search_part + 3;
828						} else if (strstr(domain, "metacrawler.") != NULL) {
829							search_part = strstr(path, "/ws/results/Web/");
830							search_part = search_part == NULL ? search_part : search_part + 16;
831						} else if (strstr(domain, "webcrawler.") != NULL) {
832							search_part = strstr(path, "/ws/results/Web/");
833							search_part = search_part == NULL ? search_part : search_part + 16;
834						}
835
836						if (search_part != NULL) {
837							int spi, si;
838							char search_key[700];
839							char search[650];
840							queue_node *recent_node = recent_searches->first;
841
842							/*unescape, replacing whitespace with + */
843							si = 0;
844							for (spi = 0;
845							     search_part[spi] != '\0' && search_part[spi] != '&'
846							     && search_part[spi] != '/'; spi++) {
847								int parsed_hex = 0;
848								if (search_part[spi] == '%') {
849									if (search_part[spi + 1] != '\0'
850									    && search_part[spi + 1] != '&'
851									    && search_part[spi + 1] != '/') {
852										if (search_part[spi + 2] != '\0'
853										    && search_part[spi + 2] != '&'
854										    && search_part[spi + 2] != '/') {
855											char enc[3];
856											int hex;
857											enc[0] = search_part[spi + 1];
858											enc[1] = search_part[spi + 2];
859											enc[2] = '\0';
860											if (sscanf(enc, "%x", &hex) > 0) {
861												parsed_hex = 1;
862												search[si] = hex == ' '
863												    || hex == '\t'
864												    || hex == '\r'
865												    || hex ==
866												    '\n' ? '+' : (char)hex;
867												spi = spi + 2;
868											}
869										}
870									}
871								}
872								if (parsed_hex == 0) {
873									search[si] = search_part[spi];
874								}
875								si++;
876							}
877							search[si] = '\0';
878
879							sprintf(search_key, STRIP "@%s", &iph->saddr, search);
880
881							/* Often times search engines will initiate a search as you type it in, but these intermediate queries aren't the real search query
882							 * So, if the most recent query is a substring of the current one, discard it in favor of this one
883							 */
884							if (recent_node != NULL) {
885								if (recent_node->src_ip == iph->saddr) {
886									struct timeval t;
887									do_gettimeofday(&t);
888									if ((recent_node->time).tv_sec + 1 >= t.tv_sec
889									    || ((recent_node->time).tv_sec + 5 >= t.tv_sec
890										&& within_edit_distance(search,
891													recent_node->value,
892													2))) {
893										char recent_key[700];
894
895										sprintf(recent_key, STRIP "@%s",
896											&recent_node->src_ip,
897											recent_node->value);
898										remove_map_element(search_map, recent_key);
899
900										recent_searches->first = recent_node->next;
901										recent_searches->last =
902										    recent_searches->first ==
903										    NULL ? NULL : recent_searches->last;
904										if (recent_searches->first != NULL) {
905											recent_searches->first->previous = NULL;
906										}
907										recent_searches->length =
908										    recent_searches->length - 1;
909										free(recent_node->value);
910										free(recent_node);
911									}
912								}
913							}
914
915							if (get_string_map_element(search_map, search_key)) {
916								//update time
917								update_queue_node_time((queue_node *)
918										       get_map_element(search_map, search_key),
919										       recent_searches);
920							} else {
921								//add
922								add_queue_node(iph->saddr, search, recent_searches, search_map,
923									       search_key, max_search_queue_length);
924							}
925						}
926						spin_unlock_bh(&webmon_lock);
927					}
928				}
929			}
930		}
931	}
932
933	/* free skb if we made a copy to linearize it */
934	if (skb_copied == 1) {
935		kfree_skb(linear_skb);
936	}
937
938	/* printk("returning %d from webmon\n\n\n", test); */
939	return 0;
940}
941
942static int checkentry(const struct xt_mtchk_param *par)
943{
944	struct ipt_webmon_info *info = (struct ipt_webmon_info *)(par->matchinfo);
945
946	if (info->ref_count == NULL) {	/* first instance, we're inserting rule */
947		info->ref_count = (uint32_t *) kmalloc(sizeof(uint32_t), GFP_ATOMIC);
948		if (info->ref_count == NULL) {	/* deal with kmalloc failure */
949			printk("ipt_webmon: kmalloc failure in checkentry!\n");
950			return -ENOMEM;
951		}
952		*(info->ref_count) = 1;
953
954		spin_lock_bh(&webmon_lock);
955
956		max_search_queue_length = info->max_searches;
957		max_domain_queue_length = info->max_domains;
958
959		spin_unlock_bh(&webmon_lock);
960
961	} else {
962		*(info->ref_count) = *(info->ref_count) + 1;
963	}
964	return 0;
965}
966
967static void destroy(const struct xt_mtdtor_param *par)
968{
969	struct ipt_webmon_info *info = (struct ipt_webmon_info *)(par->matchinfo);
970
971	*(info->ref_count) = *(info->ref_count) - 1;
972	if (*(info->ref_count) == 0) {
973		kfree(info->ref_count);
974	}
975}
976
977static struct xt_match webmon_match = {
978	.name = "webmon",
979	.match = &match,
980	.family = AF_INET,
981	.matchsize = sizeof(struct ipt_webmon_info),
982	.checkentry = &checkentry,
983	.destroy = &destroy,
984	.me = THIS_MODULE,
985};
986
987static int __init init(void)
988{
989	spin_lock_bh(&webmon_lock);
990
991	recent_domains = (queue *) malloc(sizeof(queue));
992	recent_domains->first = NULL;
993	recent_domains->last = NULL;
994	recent_domains->length = 0;
995	domain_map = initialize_string_map(0);
996
997	recent_searches = (queue *) malloc(sizeof(queue));
998	recent_searches->first = NULL;
999	recent_searches->last = NULL;
1000	recent_searches->length = 0;
1001	search_map = initialize_string_map(0);
1002
1003#ifdef CONFIG_PROC_FS
1004	{
1005		struct proc_dir_entry *proc_webmon_recent_domains = create_proc_entry("webmon_recent_domains", 0, NULL);
1006		struct proc_dir_entry *proc_webmon_recent_searches = create_proc_entry("webmon_recent_searches", 0, NULL);
1007		if (proc_webmon_recent_domains)
1008			proc_webmon_recent_domains->proc_fops = &webmon_proc_domain_fops;
1009		if (proc_webmon_recent_searches)
1010			proc_webmon_recent_searches->proc_fops = &webmon_proc_search_fops;
1011	}
1012#endif
1013
1014	if (nf_register_sockopt(&ipt_webmon_sockopts) < 0) {
1015		printk("ipt_webmon: Can't register sockopts. Aborting\n");
1016		spin_unlock_bh(&webmon_lock);
1017		return -1;
1018	}
1019	spin_unlock_bh(&webmon_lock);
1020
1021	return xt_register_match(&webmon_match);
1022}
1023
1024static void __exit fini(void)
1025{
1026
1027	unsigned long num_destroyed;
1028
1029	spin_lock_bh(&webmon_lock);
1030
1031#ifdef CONFIG_PROC_FS
1032	remove_proc_entry("webmon_recent_domains", NULL);
1033	remove_proc_entry("webmon_recent_searches", NULL);
1034#endif
1035	nf_unregister_sockopt(&ipt_webmon_sockopts);
1036	xt_unregister_match(&webmon_match);
1037	destroy_map(domain_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed);
1038	destroy_map(search_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed);
1039	destroy_queue(recent_domains);
1040	destroy_queue(recent_searches);
1041
1042	spin_unlock_bh(&webmon_lock);
1043
1044}
1045
1046module_init(init);
1047module_exit(fini);
1048