1/* Kernel module to match a string into a packet. 2 * 3 * Copyright (C) 2000 Emmanuel Roger <winfield@freegates.be> 4 * 5 * ChangeLog 6 * 19.02.2002: Gianni Tedesco <gianni@ecsc.co.uk> 7 * Fixed SMP re-entrancy problem using per-cpu data areas 8 * for the skip/shift tables. 9 * 02.05.2001: Gianni Tedesco <gianni@ecsc.co.uk> 10 * Fixed kernel panic, due to overrunning boyer moore string 11 * tables. Also slightly tweaked heuristic for deciding what 12 * search algo to use. 13 * 27.01.2001: Gianni Tedesco <gianni@ecsc.co.uk> 14 * Implemented Boyer Moore Sublinear search algorithm 15 * alongside the existing linear search based on memcmp(). 16 * Also a quick check to decide which method to use on a per 17 * packet basis. 18 */ 19 20/* Kernel module to match a http header string into a packet. 21 * Description: 22 * This is kernel module for web content inspection. It was derived from 23 * 'string' match module, declared as above. 24 * 25 * The module follows the Netfilter framework, called extended packet 26 * matching modules. 27 */ 28 29#include <linux/module.h> 30#include <linux/skbuff.h> 31#include <net/sock.h> 32 33#include <linux/netfilter_ipv4/ip_tables.h> 34#include <linux/netfilter_ipv4/ipt_webstr.h> 35 36#define isdigit(x) ((x) >= '0' && (x) <= '9') 37#define isupper(x) (((unsigned)(x) >= 'A') && ((unsigned)(x) <= 'Z')) 38#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z')) 39#define isalpha(x) (isupper(x) || islower(x)) 40#define toupper(x) (isupper(x) ? (x) : (x) - 'a' + 'A') 41#define tolower(x) (isupper(x) ? ((x) - 'A' + 'a') : (x)) 42 43#define split(word, wordlist, next, delim) \ 44 for (next = wordlist, \ 45 strncpy(word, next, sizeof(word)), \ 46 word[(next=strstr(next, delim)) ? strstr(word, delim) - word : sizeof(word) - 1] = '\0', \ 47 next = next ? next + sizeof(delim) - 1 : NULL ; \ 48 strlen(word); \ 49 next = next ? : "", \ 50 strncpy(word, next, sizeof(word)), \ 51 word[(next=strstr(next, delim)) ? strstr(word, delim) - word : sizeof(word) - 1] = '\0', \ 52 next = next ? next + sizeof(delim) - 1 : NULL) 53 54#define BUFSIZE 1024 55 56/* Flags for get_http_info() */ 57#define HTTP_HOST 0x01 58#define HTTP_URL 0x02 59/* Flags for mangle_http_header() */ 60#define HTTP_COOKIE 0x04 61 62#if 0 63#define SPARQ_LOG printk 64#else 65#define SPARQ_LOG(format, args...) 66#endif 67 68typedef struct httpinfo { 69 char host[BUFSIZE + 1]; 70 int hostlen; 71 char url[BUFSIZE + 1]; 72 int urllen; 73} httpinfo_t; 74 75/* Return 1 for match, 0 for accept, -1 for partial. */ 76static int find_pattern2(const char *data, size_t dlen, 77 const char *pattern, size_t plen, 78 char term, 79 unsigned int *numoff, 80 unsigned int *numlen) 81{ 82 size_t i, j, k; 83 int state = 0; 84 *numoff = *numlen = 0; 85 86 SPARQ_LOG("%s: pattern = '%s', dlen = %u\n",__FUNCTION__, pattern, dlen); 87 if (dlen == 0) 88 return 0; 89 90 if (dlen <= plen) { /* Short packet: try for partial? */ 91 if (strnicmp(data, pattern, dlen) == 0) 92 return -1; 93 else 94 return 0; 95 } 96 for (i = 0; i <= (dlen - plen); i++) { 97 /* DFA : \r\n\r\n :: 1234 */ 98 if (*(data + i) == '\r') { 99 if (!(state % 2)) state++; /* forwarding move */ 100 else state = 0; /* reset */ 101 } 102 else if (*(data + i) == '\n') { 103 if (state % 2) state++; 104 else state = 0; 105 } 106 else state = 0; 107 108 if (state >= 4) 109 break; 110 111 /* pattern compare */ 112 if (memcmp(data + i, pattern, plen ) != 0) 113 continue; 114 115 /* Here, it means patten match!! */ 116 *numoff=i + plen; 117 for (j = *numoff, k = 0; data[j] != term; j++, k++) 118 if (j > dlen) return -1 ; /* no terminal char */ 119 120 *numlen = k; 121 return 1; 122 } 123 return 0; 124} 125 126static int mangle_http_header(const struct sk_buff *skb, int flags) 127{ 128 struct iphdr *iph = (skb)->nh.iph; 129 struct tcphdr *tcph = (void *)iph + iph->ihl*4; 130 unsigned char *data = (void *)tcph + tcph->doff*4; 131 unsigned int datalen = (skb)->len - (iph->ihl*4) - (tcph->doff*4); 132 133 int found, offset, len; 134 int ret = 0; 135 136 SPARQ_LOG("%s: seq=%u\n", __FUNCTION__, ntohl(tcph->seq)); 137 138 /* Basic checking, is it HTTP packet? */ 139 if (datalen < 10) 140 return ret; /* Not enough length, ignore it */ 141 if (memcmp(data, "GET ", sizeof("GET ") - 1) != 0 && 142 memcmp(data, "POST ", sizeof("POST ") - 1) != 0) 143 return ret; /* Pass it */ 144 145 /* COOKIE modification */ 146 if (flags & HTTP_COOKIE) { 147 found = find_pattern2(data, datalen, "Cookie: ", 148 sizeof("Cookie: ")-1, '\r', &offset, &len); 149 if (found) { 150 char c; 151 offset -= (sizeof("Cookie: ") - 1); 152 /* Swap the 2rd and 4th bit */ 153 c = *(data + offset + 2) ; 154 *(data + offset + 2) = *(data + offset + 4) ; 155 *(data + offset + 4) = c ; 156 ret++; 157 } 158 } 159 160 return ret; 161} 162 163static int get_http_info(const struct sk_buff *skb, int flags, httpinfo_t *info) 164{ 165 struct iphdr *iph = (skb)->nh.iph; 166 struct tcphdr *tcph = (void *)iph + iph->ihl*4; 167 unsigned char *data = (void *)tcph + tcph->doff*4; 168 unsigned int datalen = (skb)->len - (iph->ihl*4) - (tcph->doff*4); 169 170 int found, offset; 171 int hostlen, pathlen; 172 int ret = 0; 173 174 SPARQ_LOG("%s: seq=%u\n", __FUNCTION__, ntohl(tcph->seq)); 175 176 /* Basic checking, is it HTTP packet? */ 177 if (datalen < 10) 178 return ret; /* Not enough length, ignore it */ 179 if (memcmp(data, "GET ", sizeof("GET ") - 1) != 0 && 180 memcmp(data, "POST ", sizeof("POST ") - 1) != 0) 181 return ret; /* Pass it */ 182 183 if (!(flags & (HTTP_HOST | HTTP_URL))) 184 return ret; 185 186 /* find the 'Host: ' value */ 187 found = find_pattern2(data, datalen, "Host: ", 188 sizeof("Host: ") - 1, '\r', &offset, &hostlen); 189 SPARQ_LOG("Host found=%d\n", found); 190 191 if (!found || !hostlen) 192 return ret; 193 194 ret++; /* Host found, increase the return value */ 195 hostlen = (hostlen < BUFSIZE) ? hostlen : BUFSIZE; 196 strncpy(info->host, data + offset, hostlen); 197 *(info->host + hostlen) = 0; /* null-terminated */ 198 info->hostlen = hostlen; 199 SPARQ_LOG("HOST=%s, hostlen=%d\n", info->host, info->hostlen); 200 201 if (!(flags & HTTP_URL)) 202 return ret; 203 204 /* find the 'GET ' or 'POST ' value */ 205 found = find_pattern2(data, datalen, "GET ", 206 sizeof("GET ") - 1, '\r', &offset, &pathlen); 207 if (!found) 208 found = find_pattern2(data, datalen, "POST ", 209 sizeof("POST ") - 1, '\r', &offset, &pathlen); 210 SPARQ_LOG("GET/POST found=%d\n", found); 211 212 if (!found || (pathlen -= (sizeof(" HTTP/x.x") - 1)) <= 0)/* ignor this field */ 213 return ret; 214 215 ret++; /* GET/POST found, increase the return value */ 216 pathlen = ((pathlen + hostlen) < BUFSIZE) ? pathlen : BUFSIZE - hostlen; 217 strncpy(info->url, info->host, hostlen); 218 strncpy(info->url + hostlen, data + offset, pathlen); 219 *(info->url + hostlen + pathlen) = 0; /* null-terminated */ 220 info->urllen = hostlen + pathlen; 221 SPARQ_LOG("URL=%s, urllen=%d\n", info->url, info->urllen); 222 223 return ret; 224} 225 226/* Linear string search based on memcmp() */ 227static char *search_linear (char *needle, char *haystack, int needle_len, int haystack_len) 228{ 229 char *k = haystack + (haystack_len-needle_len); 230 char *t = haystack; 231 232 SPARQ_LOG("%s: haystack=%s, needle=%s\n", __FUNCTION__, t, needle); 233 for(; t <= k; t++) { 234 //SPARQ_LOG("%s: haystack=%s, needle=%s\n", __FUNCTION__, t, needle); 235 if (strnicmp(t, needle, needle_len) == 0) return t; 236 //if ( memcmp(t, needle, needle_len) == 0 ) return t; 237 } 238 239 return NULL; 240} 241 242static int 243match(const struct sk_buff *skb, 244 const struct net_device *in, 245 const struct net_device *out, 246 const void *matchinfo, 247 int offset, 248 const void *hdr, 249 u_int16_t datalen, 250 int *hotdrop) 251{ 252 const struct ipt_webstr_info *info = matchinfo; 253 struct iphdr *ip = skb->nh.iph; 254 proc_ipt_search search=search_linear; 255 256 char token[] = "< >"; 257 char *wordlist = (char *)&info->string; 258 httpinfo_t htinfo; 259 int flags = 0; 260 int found = 0; 261 long int opt = 0; 262 263 if (!ip || info->len < 1) 264 return 0; 265 266 SPARQ_LOG("\n************************************************\n" 267 "%s: type=%s\n", __FUNCTION__, (info->type == IPT_WEBSTR_URL) 268 ? "IPT_WEBSTR_URL" : (info->type == IPT_WEBSTR_HOST) 269 ? "IPT_WEBSTR_HOST" : "IPT_WEBSTR_CONTENT" ); 270 271 /* Determine the flags value for get_http_info(), and mangle packet 272 * if needed. */ 273 switch(info->type) 274 { 275 case IPT_WEBSTR_URL: /* fall through */ 276 flags |= HTTP_URL; 277 278 case IPT_WEBSTR_HOST: 279 flags |= HTTP_HOST; 280 break; 281 282 case IPT_WEBSTR_CONTENT: 283 opt = simple_strtol(wordlist, (char **)NULL, 10); 284 SPARQ_LOG("%s: string=%s, opt=%#lx\n", __FUNCTION__, wordlist, opt); 285 286 if (opt & (BLK_JAVA | BLK_ACTIVE | BLK_PROXY)) 287 flags |= HTTP_URL; 288 if (opt & BLK_PROXY) 289 flags |= HTTP_HOST; 290 if (opt & BLK_COOKIE) 291 mangle_http_header(skb, HTTP_COOKIE); 292 break; 293 294 default: 295 printk("%s: Sorry! Cannot find this match option.\n", __FILE__); 296 return 0; 297 } 298 299 /* Get the http header info */ 300 if (get_http_info(skb, flags, &htinfo) < 1) 301 return 0; 302 303 /* Check if the http header content contains the forbidden keyword */ 304 if (info->type == IPT_WEBSTR_HOST || info->type == IPT_WEBSTR_URL) { 305 int nlen = 0, hlen = 0; 306 char needle[BUFSIZE], *haystack = NULL; 307 char *next; 308 309 if (info->type == IPT_WEBSTR_HOST) { 310 haystack = htinfo.host; 311 hlen = htinfo.hostlen; 312 } 313 else { 314 haystack = htinfo.url; 315 hlen = htinfo.urllen; 316 } 317 split(needle, wordlist, next, token) { 318 nlen = strlen(needle); 319 SPARQ_LOG("keyword=%s, nlen=%d, hlen=%d\n", needle, nlen, hlen); 320 if (!nlen || !hlen || nlen > hlen) continue; 321 if (search(needle, haystack, nlen, hlen) != NULL) { 322 found = 1; 323 break; 324 } 325 } 326 } 327 else { /* IPT_WEBSTR_CONTENT */ 328 int vicelen; 329 330 if (opt & BLK_JAVA) { 331 vicelen = sizeof(".js") - 1; 332 if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".js", vicelen) == 0) { 333 SPARQ_LOG("%s: MATCH....java\n", __FUNCTION__); 334 found = 1; 335 goto match_ret; 336 } 337 vicelen = sizeof(".class") - 1; 338 if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".class", vicelen) == 0) { 339 SPARQ_LOG("%s: MATCH....java\n", __FUNCTION__); 340 found = 1; 341 goto match_ret; 342 } 343 } 344 if (opt & BLK_ACTIVE){ 345 vicelen = sizeof(".ocx") - 1; 346 if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".ocx", vicelen) == 0) { 347 SPARQ_LOG("%s: MATCH....activex\n", __FUNCTION__); 348 found = 1; 349 goto match_ret; 350 } 351 vicelen = sizeof(".cab") - 1; 352 if (strnicmp(htinfo.url + htinfo.urllen - vicelen, ".cab", vicelen) == 0) { 353 SPARQ_LOG("%s: MATCH....activex\n", __FUNCTION__); 354 found = 1; 355 goto match_ret; 356 } 357 } 358 if (opt & BLK_PROXY){ 359 if (strnicmp(htinfo.url + htinfo.hostlen, "http://", sizeof("http://") - 1) == 0) { 360 SPARQ_LOG("%s: MATCH....proxy\n", __FUNCTION__); 361 found = 1; 362 goto match_ret; 363 } 364 } 365 } 366 367match_ret: 368 SPARQ_LOG("%s: Verdict =======> %s \n",__FUNCTION__ 369 , found ? "DROP" : "ACCEPT"); 370 371 return (found ^ info->invert); 372} 373 374static int 375checkentry(const char *tablename, 376 const struct ipt_ip *ip, 377 void *matchinfo, 378 unsigned int matchsize, 379 unsigned int hook_mask) 380{ 381 382 if (matchsize != IPT_ALIGN(sizeof(struct ipt_webstr_info))) 383 return 0; 384 385 return 1; 386} 387 388static struct ipt_match webstr_match 389= { { NULL, NULL }, "webstr", &match, &checkentry, NULL, THIS_MODULE }; 390 391static int __init init(void) 392{ 393 return ipt_register_match(&webstr_match); 394} 395 396static void __exit fini(void) 397{ 398 ipt_unregister_match(&webstr_match); 399} 400 401module_init(init); 402module_exit(fini); 403