1/* webmon -- A netfilter module to match URLs in HTTP requests 2 * This module can match using string match or regular expressions 3 * Originally designed for use with Gargoyle router firmware (gargoyle-router.com) 4 * 5 * 6 * Copyright �� 2008-2010 by Eric Bishop <eric@gargoyle-router.com> 7 * 8 * This file is free software: you may copy, redistribute and/or modify it 9 * under the terms of the GNU General Public License as published by the 10 * Free Software Foundation, either version 2 of the License, or (at your 11 * option) any later version. 12 * 13 * This file is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 22#include <linux/kernel.h> 23#include <linux/version.h> 24#include <linux/module.h> 25#include <linux/skbuff.h> 26#include <linux/if_ether.h> 27#include <linux/string.h> 28#include <linux/ctype.h> 29#include <net/sock.h> 30#include <net/ip.h> 31#include <net/tcp.h> 32#include <linux/time.h> 33#include <linux/spinlock.h> 34#include <linux/proc_fs.h> 35#include <linux/netfilter_ipv4/ip_tables.h> 36#include <linux/netfilter_ipv4/ipt_webmon.h> 37#include <linux/ktime.h> 38#include <linux/ip.h> 39#include <linux/netfilter/x_tables.h> 40 41#include "tree_map.h" 42 43#define STRIP "%pI4" 44 45MODULE_LICENSE("GPL"); 46MODULE_AUTHOR("Eric Bishop"); 47MODULE_DESCRIPTION("Monitor URL in HTTP Requests, designed for use with Gargoyle web interface (www.gargoyle-router.com)"); 48 49typedef struct qn { 50 uint32_t src_ip; 51 char *value; 52 struct timeval time; 53 struct qn *next; 54 struct qn *previous; 55} queue_node; 56 57typedef struct { 58 queue_node *first; 59 queue_node *last; 60 int length; 61} queue; 62 63static string_map *domain_map = NULL; 64static queue *recent_domains = NULL; 65 66static string_map *search_map = NULL; 67static queue *recent_searches = NULL; 68 69static int max_domain_queue_length = 5; 70static int max_search_queue_length = 5; 71 72static DEFINE_SPINLOCK(webmon_lock); 73 74static void update_queue_node_time(queue_node * update_node, queue * full_queue) 75{ 76 struct timeval t; 77 do_gettimeofday(&t); 78 update_node->time = t; 79 80 /* move to front of queue if not already at front of queue */ 81 if (update_node->previous != NULL) { 82 queue_node *p = update_node->previous; 83 queue_node *n = update_node->next; 84 p->next = n; 85 if (n != NULL) { 86 n->previous = p; 87 } else { 88 full_queue->last = p; 89 } 90 update_node->previous = NULL; 91 update_node->next = full_queue->first; 92 full_queue->first->previous = update_node; 93 full_queue->first = update_node; 94 } 95} 96 97void add_queue_node(uint32_t src_ip, char *value, queue * full_queue, string_map * queue_index, char *queue_index_key, 98 uint32_t max_queue_length) 99{ 100 101 queue_node *new_node = (queue_node *) kmalloc(sizeof(queue_node), GFP_ATOMIC); 102 char *dyn_value = kernel_strdup(value); 103 struct timeval t; 104 105 if (new_node == NULL || dyn_value == NULL) { 106 if (dyn_value) { 107 kfree(dyn_value); 108 } 109 if (new_node) { 110 kfree(new_node); 111 }; 112 113 return; 114 } 115 set_map_element(queue_index, queue_index_key, (void *)new_node); 116 117 do_gettimeofday(&t); 118 new_node->time = t; 119 new_node->src_ip = src_ip; 120 new_node->value = dyn_value; 121 new_node->previous = NULL; 122 123 new_node->next = full_queue->first; 124 if (full_queue->first != NULL) { 125 full_queue->first->previous = new_node; 126 } 127 full_queue->first = new_node; 128 full_queue->last = (full_queue->last == NULL) ? new_node : full_queue->last; 129 full_queue->length = full_queue->length + 1; 130 131 if (full_queue->length > max_queue_length) { 132 queue_node *old_node = full_queue->last; 133 full_queue->last = old_node->previous; 134 full_queue->last->next = NULL; 135 full_queue->first = old_node->previous == NULL ? NULL : full_queue->first; /*shouldn't be needed, but just in case... */ 136 full_queue->length = full_queue->length - 1; 137 138 sprintf(queue_index_key, STRIP "@%s", &old_node->src_ip, old_node->value); 139 remove_map_element(queue_index, queue_index_key); 140 141 kfree(old_node->value); 142 kfree(old_node); 143 } 144 145 /* 146 queue_node* n = full_queue->first; 147 while(n != NULL) 148 { 149 printf("%ld\t%s\t%s\t%s\n", (unsigned long)n->time, n->src_ip, n->dst_ip, n->domain); 150 n = (queue_node*)n->next; 151 } 152 printf("\n\n"); 153 */ 154} 155 156void add_queue_node_last(uint32_t src_ip, char *value, time_t sec, queue * full_queue, string_map * queue_index, 157 char *queue_index_key, uint32_t max_queue_length) 158{ 159 queue_node *new_node; 160 char *dyn_value; 161 162 if (full_queue->length >= max_queue_length) 163 return; 164 165 new_node = (queue_node *) kzalloc(sizeof(queue_node), GFP_ATOMIC); 166 dyn_value = kernel_strdup(value); 167 168 if (new_node == NULL || dyn_value == NULL) { 169 kfree(dyn_value); 170 kfree(new_node); 171 return; 172 } 173 set_map_element(queue_index, queue_index_key, (void *)new_node); 174 175 new_node->time.tv_sec = sec; 176 new_node->src_ip = src_ip; 177 new_node->value = dyn_value; 178 179 new_node->previous = full_queue->last; 180 if (full_queue->last != NULL) { 181 full_queue->last->next = new_node; 182 } 183 full_queue->last = new_node; 184 full_queue->first = (full_queue->first == NULL) ? new_node : full_queue->first; 185 full_queue->length = full_queue->length + 1; 186} 187 188void destroy_queue(queue * q) 189{ 190 queue_node *last_node = q->last; 191 while (last_node != NULL) { 192 queue_node *previous_node = last_node->previous; 193 free(last_node->value); 194 free(last_node); 195 last_node = previous_node; 196 } 197 free(q); 198} 199 200#ifdef REMOVE 201int strnicmp(const char *cs, const char *ct, size_t count) 202{ 203 register signed char __res = 0; 204 205 while (count) { 206 if ((__res = toupper(*cs) - toupper(*ct++)) != 0 || !*cs++) { 207 break; 208 } 209 count--; 210 } 211 return __res; 212} 213#endif 214 215char *strnistr(const char *s, const char *find, size_t slen) 216{ 217 char c, sc; 218 size_t len; 219 220 if ((c = *find++) != '\0') { 221 len = strlen(find); 222 do { 223 do { 224 if (slen < 1 || (sc = *s) == '\0') { 225 return (NULL); 226 } 227 --slen; 228 ++s; 229 } 230 while (toupper(sc) != toupper(c)); 231 232 if (len > slen) { 233 return (NULL); 234 } 235 } 236 while (strnicmp(s, find, len) != 0); 237 238 s--; 239 } 240 return ((char *)s); 241} 242 243/* NOTE: This is not quite real edit distance -- all differences are assumed to be in one contiguous block 244 * If differences are not in a contiguous block computed edit distance will be greater than real edit distance. 245 * Edit distance computed here is an upper bound on real edit distance. 246 */ 247int within_edit_distance(char *s1, char *s2, int max_edit) 248{ 249 int edit1, edit2; 250 char *s1sp, *s2sp, *s1ep, *s2ep; 251 252 if (s1 == NULL || s2 == NULL) { 253 return 0; 254 } 255 256 edit1 = strlen(s1); 257 edit2 = strlen(s2); 258 s1sp = s1; 259 s2sp = s2; 260 s1ep = s1 + (edit1 - 1); 261 s2ep = s2 + (edit2 - 1); 262 while (*s1sp != '\0' && *s2sp != '\0' && *s1sp == *s2sp) { 263 s1sp++; 264 s2sp++; 265 edit1--; 266 edit2--; 267 } 268 269 /* if either is zero we got to the end of one of the strings */ 270 while (s1ep > s1sp && s2ep > s2sp && *s1ep == *s2ep) { 271 s1ep--; 272 s2ep--; 273 edit1--; 274 edit2--; 275 } 276 277 return edit1 <= max_edit && edit2 <= max_edit ? 1 : 0; 278} 279 280/* 281 * line is the line to be parsed -- it is not modified in any way 282 * max_pieces indicates number of pieces to return, if negative this is determined dynamically 283 * include_remainder_at_max indicates whether the last piece, when max pieces are reached, 284 * should be what it would normally be (0) or the entire remainder of the line (1) 285 * if max_pieces < 0 this parameter is ignored 286 * 287 * 288 * returns all non-separator pieces in a line 289 * result is dynamically allocated, MUST be freed after call-- even if 290 * line is empty (you still get a valid char** pointer to to a NULL char*) 291 */ 292char **split_on_separators(char *line, char *separators, int num_separators, int max_pieces, int include_remainder_at_max, 293 unsigned long *num_pieces) 294{ 295 char **split; 296 297 *num_pieces = 0; 298 if (line != NULL) { 299 int split_index; 300 int non_separator_found; 301 char *dup_line; 302 char *start; 303 304 if (max_pieces < 0) { 305 /* count number of separator characters in line -- this count + 1 is an upperbound on number of pieces */ 306 int separator_count = 0; 307 int line_index; 308 for (line_index = 0; line[line_index] != '\0'; line_index++) { 309 int sep_index; 310 int found = 0; 311 for (sep_index = 0; found == 0 && sep_index < num_separators; sep_index++) { 312 found = separators[sep_index] == line[line_index] ? 1 : 0; 313 } 314 separator_count = separator_count + found; 315 } 316 max_pieces = separator_count + 1; 317 } 318 split = (char **)malloc((1 + max_pieces) * sizeof(char *)); 319 split_index = 0; 320 split[split_index] = NULL; 321 322 dup_line = strdup(line); 323 start = dup_line; 324 non_separator_found = 0; 325 while (non_separator_found == 0) { 326 int matches = 0; 327 int sep_index; 328 for (sep_index = 0; sep_index < num_separators; sep_index++) { 329 matches = matches == 1 || separators[sep_index] == start[0] ? 1 : 0; 330 } 331 non_separator_found = matches == 0 || start[0] == '\0' ? 1 : 0; 332 if (non_separator_found == 0) { 333 start++; 334 } 335 } 336 337 while (start[0] != '\0' && split_index < max_pieces) { 338 /* find first separator index */ 339 int first_separator_index = 0; 340 int separator_found = 0; 341 while (separator_found == 0) { 342 int sep_index; 343 for (sep_index = 0; separator_found == 0 && sep_index < num_separators; sep_index++) { 344 separator_found = separators[sep_index] == start[first_separator_index] 345 || start[first_separator_index] == '\0' ? 1 : 0; 346 } 347 if (separator_found == 0) { 348 first_separator_index++; 349 } 350 } 351 352 /* copy next piece to split array */ 353 if (first_separator_index > 0) { 354 char *next_piece = NULL; 355 if (split_index + 1 < max_pieces || include_remainder_at_max <= 0) { 356 next_piece = (char *)malloc((first_separator_index + 1) * sizeof(char)); 357 memcpy(next_piece, start, first_separator_index); 358 next_piece[first_separator_index] = '\0'; 359 } else { 360 next_piece = strdup(start); 361 } 362 split[split_index] = next_piece; 363 split[split_index + 1] = NULL; 364 split_index++; 365 } 366 367 /* find next non-separator index, indicating start of next piece */ 368 start = start + first_separator_index; 369 non_separator_found = 0; 370 while (non_separator_found == 0) { 371 int matches = 0; 372 int sep_index; 373 for (sep_index = 0; sep_index < num_separators; sep_index++) { 374 matches = matches == 1 || separators[sep_index] == start[0] ? 1 : 0; 375 } 376 non_separator_found = matches == 0 || start[0] == '\0' ? 1 : 0; 377 if (non_separator_found == 0) { 378 start++; 379 } 380 } 381 } 382 free(dup_line); 383 *num_pieces = split_index; 384 } else { 385 split = (char **)malloc((1) * sizeof(char *)); 386 split[0] = NULL; 387 } 388 return split; 389} 390 391static void extract_url(const unsigned char *packet_data, int packet_length, char *domain, char *path) 392{ 393 394 int path_start_index; 395 int path_end_index; 396 int last_header_index; 397 char last_two_buf[2]; 398 int end_found; 399 char *domain_match; 400 char *start_ptr; 401 402 domain[0] = '\0'; 403 path[0] = '\0'; 404 405 /* get path portion of URL */ 406 start_ptr = strnistr((char *)packet_data, " ", packet_length); 407 if (start_ptr == NULL) { 408 return; 409 } 410 411 path_start_index = (int)(start_ptr - (char *)packet_data); 412 start_ptr = strnistr((char *)(packet_data + path_start_index), " ", packet_length - (path_start_index + 2)); 413 if (start_ptr == NULL) { 414 return; 415 } 416 417 while (packet_data[path_start_index] == ' ') { 418 path_start_index++; 419 } 420 path_end_index = (int)(strstr((char *)(packet_data + path_start_index), " ") - (char *)packet_data); 421 if (path_end_index > 0) { 422 int path_length = path_end_index - path_start_index; 423 path_length = path_length < 625 ? path_length : 624; /* prevent overflow */ 424 memcpy(path, packet_data + path_start_index, path_length); 425 path[path_length] = '\0'; 426 } else { 427 return; 428 } 429 430 /* get header length */ 431 last_header_index = 2; 432 memcpy(last_two_buf, (char *)packet_data, 2); 433 end_found = 0; 434 while (end_found == 0 && last_header_index < packet_length) { 435 char next = (char)packet_data[last_header_index]; 436 if (next == '\n') { 437 end_found = last_two_buf[1] == '\n' || (last_two_buf[0] == '\n' && last_two_buf[1] == '\r') ? 1 : 0; 438 } 439 if (end_found == 0) { 440 last_two_buf[0] = last_two_buf[1]; 441 last_two_buf[1] = next; 442 last_header_index++; 443 } 444 } 445 446 /* get domain portion of URL */ 447 domain_match = strnistr((char *)packet_data, "Host:", last_header_index); 448 if (domain_match != NULL) { 449 int domain_end_index; 450 domain_match = domain_match + 5; /* character after "Host:" */ 451 while (domain_match[0] == ' ' && ((char *)domain_match - (char *)packet_data) < last_header_index) { 452 domain_match = domain_match + 1; 453 } 454 455 domain_end_index = 0; 456 while (domain_match[domain_end_index] != '\n' && 457 domain_match[domain_end_index] != '\r' && 458 domain_match[domain_end_index] != ' ' && 459 domain_match[domain_end_index] != ':' && 460 ((char *)domain_match - (char *)packet_data) + domain_end_index < last_header_index) { 461 domain_end_index++; 462 } 463 domain_end_index = domain_end_index < 625 ? domain_end_index : 624; /* prevent overflow */ 464 memcpy(domain, domain_match, domain_end_index); 465 domain[domain_end_index] = '\0'; 466 467 for (domain_end_index = 0; domain[domain_end_index] != '\0'; domain_end_index++) { 468 domain[domain_end_index] = (char)tolower(domain[domain_end_index]); 469 } 470 } 471} 472 473#ifdef CONFIG_PROC_FS 474 475static void *webmon_proc_start(struct seq_file *seq, loff_t * loff_pos) 476{ 477 static unsigned long counter = 0; 478 479 /* beginning a new sequence ? */ 480 if (*loff_pos == 0) { 481 /* yes => return a non null value to begin the sequence */ 482 return &counter; 483 } else { 484 /* no => it's the end of the sequence, return end to stop reading */ 485 *loff_pos = 0; 486 return NULL; 487 } 488} 489 490static void *webmon_proc_next(struct seq_file *seq, void *v, loff_t * pos) 491{ 492 return NULL; 493} 494 495static void webmon_proc_stop(struct seq_file *seq, void *v) 496{ 497 //don't need to do anything 498} 499 500static int webmon_proc_domain_show(struct seq_file *s, void *v) 501{ 502 queue_node *next_node; 503 504 spin_lock_bh(&webmon_lock); 505 506 next_node = recent_domains->first; 507 while (next_node != NULL) { 508 seq_printf(s, "%ld\t" STRIP "\t%s\n", (unsigned long)(next_node->time).tv_sec, &next_node->src_ip, 509 next_node->value); 510 next_node = (queue_node *) next_node->next; 511 } 512 spin_unlock_bh(&webmon_lock); 513 514 return 0; 515} 516 517static int webmon_proc_search_show(struct seq_file *s, void *v) 518{ 519 queue_node *next_node; 520 521 spin_lock_bh(&webmon_lock); 522 523 next_node = recent_searches->first; 524 while (next_node != NULL) { 525 seq_printf(s, "%ld\t" STRIP "\t%s\n", (unsigned long)(next_node->time).tv_sec, &next_node->src_ip, 526 next_node->value); 527 next_node = (queue_node *) next_node->next; 528 } 529 spin_unlock_bh(&webmon_lock); 530 531 return 0; 532} 533 534static struct seq_operations webmon_proc_domain_sops = { 535 .start = webmon_proc_start, 536 .next = webmon_proc_next, 537 .stop = webmon_proc_stop, 538 .show = webmon_proc_domain_show 539}; 540 541static struct seq_operations webmon_proc_search_sops = { 542 .start = webmon_proc_start, 543 .next = webmon_proc_next, 544 .stop = webmon_proc_stop, 545 .show = webmon_proc_search_show 546}; 547 548static int webmon_proc_domain_open(struct inode *inode, struct file *file) 549{ 550 return seq_open(file, &webmon_proc_domain_sops); 551} 552 553static int webmon_proc_search_open(struct inode *inode, struct file *file) 554{ 555 return seq_open(file, &webmon_proc_search_sops); 556} 557 558static struct file_operations webmon_proc_domain_fops = { 559 .owner = THIS_MODULE, 560 .open = webmon_proc_domain_open, 561 .read = seq_read, 562 .llseek = seq_lseek, 563 .release = seq_release 564}; 565 566static struct file_operations webmon_proc_search_fops = { 567 .owner = THIS_MODULE, 568 .open = webmon_proc_search_open, 569 .read = seq_read, 570 .llseek = seq_lseek, 571 .release = seq_release 572}; 573 574#endif 575 576static int ipt_webmon_set_ctl(struct sock *sk, int cmd, void *user, u_int32_t len) 577{ 578 579 char *buffer = kmalloc(len, GFP_ATOMIC); 580 if (buffer == NULL) { /* check for malloc failure */ 581 return 0; 582 } 583 copy_from_user(buffer, user, len); 584 585 if (len > 1 + sizeof(uint32_t)) { 586 unsigned char type = buffer[0]; 587 uint32_t max_queue_length = *((uint32_t *) (buffer + 1)); 588 char *data = buffer + 1 + sizeof(uint32_t); 589 char newline_terminator[] = { '\n', '\r' }; 590 char whitespace_chars[] = { '\t', ' ' }; 591 592 spin_lock_bh(&webmon_lock); 593 if (type == WEBMON_DOMAIN || type == WEBMON_SEARCH) { 594 unsigned long num_lines; 595 unsigned long line_index; 596 unsigned long num_destroyed; 597 char **lines = split_on_separators(data, newline_terminator, 2, -1, 0, &num_lines); 598 599 /* destroy and re-initialize queue and map */ 600 if (type == WEBMON_DOMAIN) { 601 destroy_map(domain_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed); 602 destroy_queue(recent_domains); 603 recent_domains = (queue *) malloc(sizeof(queue)); 604 recent_domains->first = NULL; 605 recent_domains->last = NULL; 606 recent_domains->length = 0; 607 domain_map = initialize_map(0); 608 609 max_domain_queue_length = max_queue_length; 610 } else if (type == WEBMON_SEARCH) { 611 destroy_map(search_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed); 612 destroy_queue(recent_searches); 613 recent_searches = (queue *) malloc(sizeof(queue)); 614 recent_searches->first = NULL; 615 recent_searches->last = NULL; 616 recent_searches->length = 0; 617 search_map = initialize_map(0); 618 619 max_search_queue_length = max_queue_length; 620 } 621 622 for (line_index = 0; line_index < num_lines; line_index++) { 623 char *line = lines[line_index]; 624 unsigned long num_pieces; 625 char **split = split_on_separators(line, whitespace_chars, 2, -1, 0, &num_pieces); 626 627 //check that there are 3 pieces (time, src_ip, value) 628 int length; 629 for (length = 0; split[length] != NULL; length++) { 630 } 631 if (length == 3) { 632 time_t time; 633 int parsed_ip[4]; 634 int valid_ip = 635 sscanf(split[1], "%d.%d.%d.%d", parsed_ip, parsed_ip + 1, parsed_ip + 2, 636 parsed_ip + 3); 637 if (valid_ip == 4) { 638 valid_ip = parsed_ip[0] <= 255 && parsed_ip[1] <= 255 && parsed_ip[2] <= 255 639 && parsed_ip[3] <= 255 ? valid_ip : 0; 640 } 641 if (sscanf(split[0], "%ld", &time) > 0 && valid_ip == 4) { 642 char *value = split[2]; 643 char value_key[700]; 644 uint32_t ip = 645 (parsed_ip[0] << 24) + (parsed_ip[1] << 16) + (parsed_ip[2] << 8) + 646 (parsed_ip[3]); 647 ip = htonl(ip); 648 sprintf(value_key, STRIP "@%s", &ip, value); 649 if (type == WEBMON_DOMAIN) { 650 add_queue_node_last(ip, value, time, recent_domains, domain_map, 651 value_key, max_domain_queue_length); 652 } else if (type == WEBMON_SEARCH) { 653 add_queue_node_last(ip, value, time, recent_searches, search_map, 654 value_key, max_search_queue_length); 655 } 656 } 657 } 658 659 for (length = 0; split[length] != NULL; length++) { 660 free(split[length]); 661 } 662 free(split); 663 free(line); 664 } 665 free(lines); 666 } 667 668 spin_unlock_bh(&webmon_lock); 669 } 670 671 return 1; 672} 673 674static struct nf_sockopt_ops ipt_webmon_sockopts = { 675 .pf = PF_INET, 676 .set_optmin = WEBMON_SET, 677 .set_optmax = WEBMON_SET + 1, 678 .set = ipt_webmon_set_ctl, 679}; 680 681static bool match(const struct sk_buff *skb, struct xt_action_param *par) 682{ 683 const struct ipt_webmon_info *info = (const struct ipt_webmon_info *)(par->matchinfo); 684 685 struct iphdr *iph; 686 687 /* linearize skb if necessary */ 688 struct sk_buff *linear_skb; 689 int skb_copied; 690 if (skb_is_nonlinear(skb)) { 691 linear_skb = skb_copy(skb, GFP_ATOMIC); 692 skb_copied = 1; 693 } else { 694 linear_skb = (struct sk_buff *)skb; 695 skb_copied = 0; 696 } 697 698 /* ignore packets that are not TCP */ 699 iph = (struct iphdr *)(skb_network_header(skb)); 700 if (iph->protocol == IPPROTO_TCP) { 701 /* get payload */ 702 struct tcphdr *tcp_hdr = (struct tcphdr *)(((unsigned char *)iph) + (iph->ihl * 4)); 703 unsigned short payload_offset = (tcp_hdr->doff * 4) + (iph->ihl * 4); 704 unsigned char *payload = ((unsigned char *)iph) + payload_offset; 705 unsigned short payload_length = ntohs(iph->tot_len) - payload_offset; 706 707 /* if payload length <= 10 bytes don't bother doing a check, otherwise check for match */ 708 if (payload_length > 10) { 709 /* are we dealing with a web page request */ 710 if (strnicmp((char *)payload, "GET ", 4) == 0 || strnicmp((char *)payload, "POST ", 5) == 0 711 || strnicmp((char *)payload, "HEAD ", 5) == 0) { 712 char domain[650]; 713 char path[650]; 714 char domain_key[700]; 715 unsigned char save = info->exclude_type == WEBMON_EXCLUDE ? 1 : 0; 716 uint32_t ip_index; 717 718 for (ip_index = 0; ip_index < info->num_exclude_ips; ip_index++) { 719 if ((info->exclude_ips)[ip_index] == iph->saddr) { 720 save = info->exclude_type == WEBMON_EXCLUDE ? 0 : 1; 721 } 722 } 723 for (ip_index = 0; ip_index < info->num_exclude_ranges; ip_index++) { 724 struct ipt_webmon_ip_range r = (info->exclude_ranges)[ip_index]; 725 if (ntohl(r.start) >= ntohl(iph->saddr) && ntohl(r.end) <= ntohl(iph->saddr)) { 726 save = info->exclude_type == WEBMON_EXCLUDE ? 0 : 1; 727 } 728 } 729 730 if (save) { 731 extract_url(payload, payload_length, domain, path); 732 733 sprintf(domain_key, STRIP "@%s", &iph->saddr, domain); 734 735 if (strlen(domain) > 0) { 736 char *search_part = NULL; 737 spin_lock_bh(&webmon_lock); 738 739 if (get_string_map_element(domain_map, domain_key)) { 740 //update time 741 update_queue_node_time((queue_node *) 742 get_map_element(domain_map, domain_key), 743 recent_domains); 744 } else { 745 //add 746 add_queue_node(iph->saddr, domain, recent_domains, domain_map, 747 domain_key, max_domain_queue_length); 748 } 749 750 /* printk("domain,path=\"%s\", \"%s\"\n", domain, path); */ 751 752 if (strnistr(domain, "google.", 625) != NULL) { 753 search_part = strstr(path, "&q="); 754 search_part = search_part == NULL ? strstr(path, "#q=") : search_part; 755 search_part = search_part == NULL ? strstr(path, "?q=") : search_part; 756 search_part = search_part == NULL ? search_part : search_part + 3; 757 } else if (strstr(domain, "bing.") != NULL) { 758 search_part = strstr(path, "?q="); 759 search_part = search_part == NULL ? strstr(path, "&q=") : search_part; 760 search_part = search_part == NULL ? search_part : search_part + 3; 761 } else if (strstr(domain, "yahoo.") != NULL) { 762 search_part = strstr(path, "?p="); 763 search_part = search_part == NULL ? strstr(path, "&p=") : search_part; 764 search_part = search_part == NULL ? search_part : search_part + 3; 765 } else if (strstr(domain, "lycos.") != NULL) { 766 search_part = strstr(path, "&query="); 767 search_part = 768 search_part == NULL ? strstr(path, "?query=") : search_part; 769 search_part = search_part == NULL ? search_part : search_part + 7; 770 } else if (strstr(domain, "altavista.") != NULL) { 771 search_part = strstr(path, "&q="); 772 search_part = search_part == NULL ? strstr(path, "?q=") : search_part; 773 search_part = search_part == NULL ? search_part : search_part + 3; 774 } else if (strstr(domain, "duckduckgo.") != NULL) { 775 search_part = strstr(path, "?q="); 776 search_part = search_part == NULL ? strstr(path, "&q=") : search_part; 777 search_part = search_part == NULL ? search_part : search_part + 3; 778 } else if (strstr(domain, "baidu.") != NULL) { 779 search_part = strstr(path, "?wd="); 780 search_part = search_part == NULL ? strstr(path, "&wd=") : search_part; 781 search_part = search_part == NULL ? search_part : search_part + 4; 782 } else if (strstr(domain, "search.") != NULL) { 783 search_part = strstr(path, "?q="); 784 search_part = search_part == NULL ? strstr(path, "&q=") : search_part; 785 search_part = search_part == NULL ? search_part : search_part + 3; 786 } else if (strstr(domain, "aol.") != NULL) { 787 search_part = strstr(path, "&q="); 788 search_part = search_part == NULL ? strstr(path, "?q=") : search_part; 789 search_part = search_part == NULL ? search_part : search_part + 3; 790 } else if (strstr(domain, "ask.") != NULL) { 791 search_part = strstr(path, "?q="); 792 search_part = search_part == NULL ? strstr(path, "&q=") : search_part; 793 search_part = search_part == NULL ? search_part : search_part + 3; 794 } else if (strstr(domain, "yandex.") != NULL) { 795 search_part = strstr(path, "?text="); 796 search_part = 797 search_part == NULL ? strstr(path, "&text=") : search_part; 798 search_part = search_part == NULL ? search_part : search_part + 6; 799 } else if (strstr(domain, "naver.") != NULL) { 800 search_part = strstr(path, "&query="); 801 search_part = 802 search_part == NULL ? strstr(path, "?query=") : search_part; 803 search_part = search_part == NULL ? search_part : search_part + 7; 804 } else if (strstr(domain, "daum.") != NULL) { 805 search_part = strstr(path, "&q="); 806 search_part = search_part == NULL ? strstr(path, "?q=") : search_part; 807 search_part = search_part == NULL ? search_part : search_part + 3; 808 } else if (strstr(domain, "cuil.") != NULL) { 809 search_part = strstr(path, "?q="); 810 search_part = search_part == NULL ? strstr(path, "&q=") : search_part; 811 search_part = search_part == NULL ? search_part : search_part + 3; 812 } else if (strstr(domain, "kosmix.") != NULL) { 813 search_part = strstr(path, "/topic/"); 814 search_part = search_part == NULL ? search_part : search_part + 7; 815 } else if (strstr(domain, "yebol.") != NULL) { 816 search_part = strstr(path, "?key="); 817 search_part = search_part == NULL ? strstr(path, "&key=") : search_part; 818 search_part = search_part == NULL ? search_part : search_part + 5; 819 } else if (strstr(domain, "sogou.") != NULL) { 820 search_part = strstr(path, "&query="); 821 search_part = 822 search_part == NULL ? strstr(path, "?query=") : search_part; 823 search_part = search_part == NULL ? search_part : search_part + 7; 824 } else if (strstr(domain, "youdao.") != NULL) { 825 search_part = strstr(path, "?q="); 826 search_part = search_part == NULL ? strstr(path, "&q=") : search_part; 827 search_part = search_part == NULL ? search_part : search_part + 3; 828 } else if (strstr(domain, "metacrawler.") != NULL) { 829 search_part = strstr(path, "/ws/results/Web/"); 830 search_part = search_part == NULL ? search_part : search_part + 16; 831 } else if (strstr(domain, "webcrawler.") != NULL) { 832 search_part = strstr(path, "/ws/results/Web/"); 833 search_part = search_part == NULL ? search_part : search_part + 16; 834 } 835 836 if (search_part != NULL) { 837 int spi, si; 838 char search_key[700]; 839 char search[650]; 840 queue_node *recent_node = recent_searches->first; 841 842 /*unescape, replacing whitespace with + */ 843 si = 0; 844 for (spi = 0; 845 search_part[spi] != '\0' && search_part[spi] != '&' 846 && search_part[spi] != '/'; spi++) { 847 int parsed_hex = 0; 848 if (search_part[spi] == '%') { 849 if (search_part[spi + 1] != '\0' 850 && search_part[spi + 1] != '&' 851 && search_part[spi + 1] != '/') { 852 if (search_part[spi + 2] != '\0' 853 && search_part[spi + 2] != '&' 854 && search_part[spi + 2] != '/') { 855 char enc[3]; 856 int hex; 857 enc[0] = search_part[spi + 1]; 858 enc[1] = search_part[spi + 2]; 859 enc[2] = '\0'; 860 if (sscanf(enc, "%x", &hex) > 0) { 861 parsed_hex = 1; 862 search[si] = hex == ' ' 863 || hex == '\t' 864 || hex == '\r' 865 || hex == 866 '\n' ? '+' : (char)hex; 867 spi = spi + 2; 868 } 869 } 870 } 871 } 872 if (parsed_hex == 0) { 873 search[si] = search_part[spi]; 874 } 875 si++; 876 } 877 search[si] = '\0'; 878 879 sprintf(search_key, STRIP "@%s", &iph->saddr, search); 880 881 /* Often times search engines will initiate a search as you type it in, but these intermediate queries aren't the real search query 882 * So, if the most recent query is a substring of the current one, discard it in favor of this one 883 */ 884 if (recent_node != NULL) { 885 if (recent_node->src_ip == iph->saddr) { 886 struct timeval t; 887 do_gettimeofday(&t); 888 if ((recent_node->time).tv_sec + 1 >= t.tv_sec 889 || ((recent_node->time).tv_sec + 5 >= t.tv_sec 890 && within_edit_distance(search, 891 recent_node->value, 892 2))) { 893 char recent_key[700]; 894 895 sprintf(recent_key, STRIP "@%s", 896 &recent_node->src_ip, 897 recent_node->value); 898 remove_map_element(search_map, recent_key); 899 900 recent_searches->first = recent_node->next; 901 recent_searches->last = 902 recent_searches->first == 903 NULL ? NULL : recent_searches->last; 904 if (recent_searches->first != NULL) { 905 recent_searches->first->previous = NULL; 906 } 907 recent_searches->length = 908 recent_searches->length - 1; 909 free(recent_node->value); 910 free(recent_node); 911 } 912 } 913 } 914 915 if (get_string_map_element(search_map, search_key)) { 916 //update time 917 update_queue_node_time((queue_node *) 918 get_map_element(search_map, search_key), 919 recent_searches); 920 } else { 921 //add 922 add_queue_node(iph->saddr, search, recent_searches, search_map, 923 search_key, max_search_queue_length); 924 } 925 } 926 spin_unlock_bh(&webmon_lock); 927 } 928 } 929 } 930 } 931 } 932 933 /* free skb if we made a copy to linearize it */ 934 if (skb_copied == 1) { 935 kfree_skb(linear_skb); 936 } 937 938 /* printk("returning %d from webmon\n\n\n", test); */ 939 return 0; 940} 941 942static int checkentry(const struct xt_mtchk_param *par) 943{ 944 struct ipt_webmon_info *info = (struct ipt_webmon_info *)(par->matchinfo); 945 946 if (info->ref_count == NULL) { /* first instance, we're inserting rule */ 947 info->ref_count = (uint32_t *) kmalloc(sizeof(uint32_t), GFP_ATOMIC); 948 if (info->ref_count == NULL) { /* deal with kmalloc failure */ 949 printk("ipt_webmon: kmalloc failure in checkentry!\n"); 950 return -ENOMEM; 951 } 952 *(info->ref_count) = 1; 953 954 spin_lock_bh(&webmon_lock); 955 956 max_search_queue_length = info->max_searches; 957 max_domain_queue_length = info->max_domains; 958 959 spin_unlock_bh(&webmon_lock); 960 961 } else { 962 *(info->ref_count) = *(info->ref_count) + 1; 963 } 964 return 0; 965} 966 967static void destroy(const struct xt_mtdtor_param *par) 968{ 969 struct ipt_webmon_info *info = (struct ipt_webmon_info *)(par->matchinfo); 970 971 *(info->ref_count) = *(info->ref_count) - 1; 972 if (*(info->ref_count) == 0) { 973 kfree(info->ref_count); 974 } 975} 976 977static struct xt_match webmon_match = { 978 .name = "webmon", 979 .match = &match, 980 .family = AF_INET, 981 .matchsize = sizeof(struct ipt_webmon_info), 982 .checkentry = &checkentry, 983 .destroy = &destroy, 984 .me = THIS_MODULE, 985}; 986 987static int __init init(void) 988{ 989 spin_lock_bh(&webmon_lock); 990 991 recent_domains = (queue *) malloc(sizeof(queue)); 992 recent_domains->first = NULL; 993 recent_domains->last = NULL; 994 recent_domains->length = 0; 995 domain_map = initialize_string_map(0); 996 997 recent_searches = (queue *) malloc(sizeof(queue)); 998 recent_searches->first = NULL; 999 recent_searches->last = NULL; 1000 recent_searches->length = 0; 1001 search_map = initialize_string_map(0); 1002 1003#ifdef CONFIG_PROC_FS 1004 { 1005 struct proc_dir_entry *proc_webmon_recent_domains = create_proc_entry("webmon_recent_domains", 0, NULL); 1006 struct proc_dir_entry *proc_webmon_recent_searches = create_proc_entry("webmon_recent_searches", 0, NULL); 1007 if (proc_webmon_recent_domains) 1008 proc_webmon_recent_domains->proc_fops = &webmon_proc_domain_fops; 1009 if (proc_webmon_recent_searches) 1010 proc_webmon_recent_searches->proc_fops = &webmon_proc_search_fops; 1011 } 1012#endif 1013 1014 if (nf_register_sockopt(&ipt_webmon_sockopts) < 0) { 1015 printk("ipt_webmon: Can't register sockopts. Aborting\n"); 1016 spin_unlock_bh(&webmon_lock); 1017 return -1; 1018 } 1019 spin_unlock_bh(&webmon_lock); 1020 1021 return xt_register_match(&webmon_match); 1022} 1023 1024static void __exit fini(void) 1025{ 1026 1027 unsigned long num_destroyed; 1028 1029 spin_lock_bh(&webmon_lock); 1030 1031#ifdef CONFIG_PROC_FS 1032 remove_proc_entry("webmon_recent_domains", NULL); 1033 remove_proc_entry("webmon_recent_searches", NULL); 1034#endif 1035 nf_unregister_sockopt(&ipt_webmon_sockopts); 1036 xt_unregister_match(&webmon_match); 1037 destroy_map(domain_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed); 1038 destroy_map(search_map, DESTROY_MODE_IGNORE_VALUES, &num_destroyed); 1039 destroy_queue(recent_domains); 1040 destroy_queue(recent_searches); 1041 1042 spin_unlock_bh(&webmon_lock); 1043 1044} 1045 1046module_init(init); 1047module_exit(fini); 1048