1/* Connection state tracking for netfilter. This is separated from, 2 but required by, the NAT layer; it can also be used by an iptables 3 extension. */ 4 5/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General 6 * Public Licence. 7 * 8 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> 9 * - new API and handling of conntrack/nat helpers 10 * - now capable of multiple expectations for one master 11 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> 12 * - add usage/reference counts to ip_conntrack_expect 13 * - export ip_conntrack[_expect]_{find_get,put} functions 14 * */ 15 16#ifdef MODULE 17#define __NO_VERSION__ 18#endif 19#include <linux/version.h> 20#include <linux/config.h> 21#include <linux/types.h> 22#include <linux/ip.h> 23#include <linux/netfilter.h> 24#include <linux/netfilter_ipv4.h> 25#include <linux/module.h> 26#include <linux/skbuff.h> 27#include <linux/proc_fs.h> 28#include <linux/vmalloc.h> 29#include <linux/brlock.h> 30#include <net/checksum.h> 31#include <linux/stddef.h> 32#include <linux/sysctl.h> 33#include <linux/slab.h> 34//#include "bcmnvram.h" // 2009.12 James. 35//#include <linux/nvram.h> 36 37extern int qos_enable; // 2009.12 James. 38extern ulong qos_wan_ip; // 2009.12 James. 39 40#include <linux/time.h> 41/* For ERR_PTR(). Yeah, I know... --RR */ 42#include <linux/fs.h> 43 44/* This rwlock protects the main hash table, protocol/helper/expected 45 registrations, conntrack timers*/ 46#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) 47#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) 48 49#include <linux/netfilter_ipv4/ip_conntrack.h> 50#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> 51#include <linux/netfilter_ipv4/ip_conntrack_helper.h> 52#include <linux/netfilter_ipv4/ip_conntrack_core.h> 53#include <linux/netfilter_ipv4/listhelp.h> 54 55#define IP_CONNTRACK_VERSION "2.1" 56 57#define DEBUGP(format, args...) 58 59DECLARE_RWLOCK(ip_conntrack_lock); 60DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); 61 62void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; 63LIST_HEAD(ip_conntrack_expect_list); 64LIST_HEAD(protocol_list); 65static LIST_HEAD(helpers); 66unsigned int ip_conntrack_htable_size = 0; 67static int ip_conntrack_max = 0; 68static atomic_t ip_conntrack_count = ATOMIC_INIT(0); 69struct list_head *ip_conntrack_hash; 70static kmem_cache_t *ip_conntrack_cachep; 71 72// add by Angela 2008.07. 73int track_flag = 0; 74//ulong ipaddr = 0; // 2009.12 James. 75 76extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; 77 78static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, 79 u_int8_t protocol) 80{ 81 return protocol == curr->proto; 82} 83 84struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol) 85{ 86 struct ip_conntrack_protocol *p; 87 88 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 89 p = LIST_FIND(&protocol_list, proto_cmpfn, 90 struct ip_conntrack_protocol *, protocol); 91 if (!p) 92 p = &ip_conntrack_generic_protocol; 93 94 return p; 95} 96 97struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) 98{ 99 struct ip_conntrack_protocol *p; 100 101 READ_LOCK(&ip_conntrack_lock); 102 p = __ip_ct_find_proto(protocol); 103 READ_UNLOCK(&ip_conntrack_lock); 104 return p; 105} 106 107inline void 108ip_conntrack_put(struct ip_conntrack *ct) 109{ 110 IP_NF_ASSERT(ct); 111 IP_NF_ASSERT(ct->infos[0].master); 112 /* nf_conntrack_put wants to go via an info struct, so feed it 113 one at random. */ 114 nf_conntrack_put(&ct->infos[0]); 115} 116 117static inline u_int32_t 118hash_conntrack(const struct ip_conntrack_tuple *tuple) 119{ 120 /* ntohl because more differences in low bits. */ 121 /* To ensure that halves of the same connection don't hash 122 clash, we add the source per-proto again. */ 123 return (ntohl(tuple->src.ip + tuple->dst.ip 124 + tuple->src.u.all + tuple->dst.u.all 125 + tuple->dst.protonum) 126 + ntohs(tuple->src.u.all)) 127 % ip_conntrack_htable_size; 128} 129 130inline int 131get_tuple(const struct iphdr *iph, size_t len, 132 struct ip_conntrack_tuple *tuple, 133 struct ip_conntrack_protocol *protocol) 134{ 135 int ret; 136 137 /* Never happen */ 138 if (iph->frag_off & htons(IP_OFFSET)) { 139 printk("ip_conntrack_core: Frag of proto %u.\n", 140 iph->protocol); 141 return 0; 142 } 143 /* Guarantee 8 protocol bytes: if more wanted, use len param */ 144 else if (iph->ihl * 4 + 8 > len) 145 return 0; 146 147 tuple->src.ip = iph->saddr; 148 tuple->dst.ip = iph->daddr; 149 tuple->dst.protonum = iph->protocol; 150 151 tuple->src.u.all = tuple->dst.u.all = 0; 152 153 ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl, 154 len - 4*iph->ihl, 155 tuple); 156 return ret; 157} 158 159static int 160invert_tuple(struct ip_conntrack_tuple *inverse, 161 const struct ip_conntrack_tuple *orig, 162 const struct ip_conntrack_protocol *protocol) 163{ 164 inverse->src.ip = orig->dst.ip; 165 inverse->dst.ip = orig->src.ip; 166 inverse->dst.protonum = orig->dst.protonum; 167 168 inverse->src.u.all = inverse->dst.u.all = 0; 169 170 return protocol->invert_tuple(inverse, orig); 171} 172 173 174/* ip_conntrack_expect helper functions */ 175 176/* Compare tuple parts depending on mask. */ 177static inline int expect_cmp(const struct ip_conntrack_expect *i, 178 const struct ip_conntrack_tuple *tuple) 179{ 180 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); 181 return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask); 182} 183 184static void 185destroy_expect(struct ip_conntrack_expect *exp) 186{ 187 DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use)); 188 IP_NF_ASSERT(atomic_read(&exp->use)); 189 IP_NF_ASSERT(!timer_pending(&exp->timeout)); 190 191 kfree(exp); 192} 193 194 195inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) 196{ 197 IP_NF_ASSERT(exp); 198 199 if (atomic_dec_and_test(&exp->use)) { 200 /* usage count dropped to zero */ 201 destroy_expect(exp); 202 } 203} 204 205static inline struct ip_conntrack_expect * 206__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple) 207{ 208 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 209 MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); 210 return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, 211 struct ip_conntrack_expect *, tuple); 212} 213 214/* Find a expectation corresponding to a tuple. */ 215struct ip_conntrack_expect * 216ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) 217{ 218 struct ip_conntrack_expect *exp; 219 220 READ_LOCK(&ip_conntrack_lock); 221 READ_LOCK(&ip_conntrack_expect_tuple_lock); 222 exp = __ip_ct_expect_find(tuple); 223 if (exp) 224 atomic_inc(&exp->use); 225 READ_UNLOCK(&ip_conntrack_expect_tuple_lock); 226 READ_UNLOCK(&ip_conntrack_lock); 227 228 return exp; 229} 230 231/* remove one specific expectation from all lists and drop refcount, 232 * does _NOT_ delete the timer. */ 233static void __unexpect_related(struct ip_conntrack_expect *expect) 234{ 235 DEBUGP("unexpect_related(%p)\n", expect); 236 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); 237 238 /* we're not allowed to unexpect a confirmed expectation! */ 239 IP_NF_ASSERT(!expect->sibling); 240 241 /* delete from global and local lists */ 242 list_del(&expect->list); 243 list_del(&expect->expected_list); 244 245 /* decrement expect-count of master conntrack */ 246 if (expect->expectant) 247 expect->expectant->expecting--; 248 249 ip_conntrack_expect_put(expect); 250} 251 252/* remove one specific expecatation from all lists, drop refcount 253 * and expire timer. 254 * This function can _NOT_ be called for confirmed expects! */ 255static void unexpect_related(struct ip_conntrack_expect *expect) 256{ 257 IP_NF_ASSERT(expect->expectant); 258 /* if we are supposed to have a timer, but we can't delete 259 * it: race condition. __unexpect_related will 260 * be calledd by timeout function */ 261 if (expect->expectant->helper 262 && expect->expectant->helper->timeout 263 && !del_timer(&expect->timeout)) 264 return; 265 266 __unexpect_related(expect); 267} 268 269/* delete all unconfirmed expectations for this conntrack */ 270static void remove_expectations(struct ip_conntrack *ct) 271{ 272 struct list_head *exp_entry, *next; 273 struct ip_conntrack_expect *exp; 274 275 DEBUGP("remove_expectations(%p)\n", ct); 276 277 for (exp_entry = ct->sibling_list.next; 278 exp_entry != &ct->sibling_list; exp_entry = next) { 279 next = exp_entry->next; 280 exp = list_entry(exp_entry, struct ip_conntrack_expect, 281 expected_list); 282 283 /* we skip established expectations, as we want to delete 284 * the un-established ones only */ 285 if (exp->sibling) { 286 DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct); 287 continue; 288 } 289 290 IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp)); 291 IP_NF_ASSERT(exp->expectant == ct); 292 293 /* delete expectation from global and private lists */ 294 unexpect_related(exp); 295 } 296} 297 298static void 299clean_from_lists(struct ip_conntrack *ct) 300{ 301 DEBUGP("clean_from_lists(%p)\n", ct); 302 MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); 303 /* Remove from both hash lists: must not NULL out next ptrs, 304 otherwise we'll look unconfirmed. Fortunately, LIST_DELETE 305 doesn't do this. --RR */ 306 LIST_DELETE(&ip_conntrack_hash 307 [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)], 308 &ct->tuplehash[IP_CT_DIR_ORIGINAL]); 309 LIST_DELETE(&ip_conntrack_hash 310 [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)], 311 &ct->tuplehash[IP_CT_DIR_REPLY]); 312 313 /* Destroy all un-established, pending expectations */ 314 remove_expectations(ct); 315} 316 317static void 318destroy_conntrack(struct nf_conntrack *nfct) 319{ 320 struct ip_conntrack *ct = (struct ip_conntrack *)nfct; 321 struct ip_conntrack_protocol *proto; 322 323 DEBUGP("destroy_conntrack(%p)\n", ct); 324 IP_NF_ASSERT(atomic_read(&nfct->use) == 0); 325 IP_NF_ASSERT(!timer_pending(&ct->timeout)); 326 327 if (ct->master && master_ct(ct)) 328 ip_conntrack_put(master_ct(ct)); 329 330 /* To make sure we don't get any weird locking issues here: 331 * destroy_conntrack() MUST NOT be called with a write lock 332 * to ip_conntrack_lock!!! -HW */ 333 proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); 334 if (proto && proto->destroy) 335 proto->destroy(ct); 336 337 if (ip_conntrack_destroyed) 338 ip_conntrack_destroyed(ct); 339 340 WRITE_LOCK(&ip_conntrack_lock); 341 /* Delete our master expectation */ 342 if (ct->master) { 343 /* can't call __unexpect_related here, 344 * since it would screw up expect_list */ 345 list_del(&ct->master->expected_list); 346 kfree(ct->master); 347 } 348 WRITE_UNLOCK(&ip_conntrack_lock); 349 350 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); 351 kmem_cache_free(ip_conntrack_cachep, ct); 352 atomic_dec(&ip_conntrack_count); 353} 354 355static void death_by_timeout(unsigned long ul_conntrack) 356{ 357 struct ip_conntrack *ct = (void *)ul_conntrack; 358 359 WRITE_LOCK(&ip_conntrack_lock); 360 clean_from_lists(ct); 361 WRITE_UNLOCK(&ip_conntrack_lock); 362 ip_conntrack_put(ct); 363} 364 365static inline int 366conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, 367 const struct ip_conntrack_tuple *tuple, 368 const struct ip_conntrack *ignored_conntrack) 369{ 370 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 371 return i->ctrack != ignored_conntrack 372 && ip_ct_tuple_equal(tuple, &i->tuple); 373} 374 375static struct ip_conntrack_tuple_hash * 376__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, 377 const struct ip_conntrack *ignored_conntrack) 378{ 379 struct ip_conntrack_tuple_hash *h; 380 381 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 382 h = LIST_FIND(&ip_conntrack_hash[hash_conntrack(tuple)], 383 conntrack_tuple_cmp, 384 struct ip_conntrack_tuple_hash *, 385 tuple, ignored_conntrack); 386 return h; 387} 388 389/* Find a connection corresponding to a tuple. */ 390struct ip_conntrack_tuple_hash * 391ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, 392 const struct ip_conntrack *ignored_conntrack) 393{ 394 struct ip_conntrack_tuple_hash *h; 395 396 READ_LOCK(&ip_conntrack_lock); 397 h = __ip_conntrack_find(tuple, ignored_conntrack); 398 if (h) 399 atomic_inc(&h->ctrack->ct_general.use); 400 READ_UNLOCK(&ip_conntrack_lock); 401 402 return h; 403} 404 405static inline struct ip_conntrack * 406__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo) 407{ 408 struct ip_conntrack *ct 409 = (struct ip_conntrack *)nfct->master; 410 411 /* ctinfo is the index of the nfct inside the conntrack */ 412 *ctinfo = nfct - ct->infos; 413 IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); 414 return ct; 415} 416 417/* Return conntrack and conntrack_info given skb->nfct->master */ 418struct ip_conntrack * 419ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) 420{ 421 if (skb->nfct) 422 return __ip_conntrack_get(skb->nfct, ctinfo); 423 return NULL; 424} 425 426/* Confirm a connection given skb->nfct; places it in hash table */ 427int 428__ip_conntrack_confirm(struct nf_ct_info *nfct) 429{ 430 unsigned int hash, repl_hash; 431 struct ip_conntrack *ct; 432 enum ip_conntrack_info ctinfo; 433 434 ct = __ip_conntrack_get(nfct, &ctinfo); 435 436 /* ipt_REJECT uses ip_conntrack_attach to attach related 437 ICMP/TCP RST packets in other direction. Actual packet 438 which created connection will be IP_CT_NEW or for an 439 expected connection, IP_CT_RELATED. */ 440 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 441 return NF_ACCEPT; 442 443 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 444 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 445 446 /* We're not in hash table, and we refuse to set up related 447 connections for unconfirmed conns. But packet copies and 448 REJECT will give spurious warnings here. */ 449 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 450 451 /* No external references means noone else could have 452 confirmed us. */ 453 IP_NF_ASSERT(!is_confirmed(ct)); 454 DEBUGP("Confirming conntrack %p\n", ct); 455 456 WRITE_LOCK(&ip_conntrack_lock); 457 /* See if there's one in the list already, including reverse: 458 NAT could have grabbed it without realizing, since we're 459 not in the hash. If there is, we lost race. */ 460 if (!LIST_FIND(&ip_conntrack_hash[hash], 461 conntrack_tuple_cmp, 462 struct ip_conntrack_tuple_hash *, 463 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) 464 && !LIST_FIND(&ip_conntrack_hash[repl_hash], 465 conntrack_tuple_cmp, 466 struct ip_conntrack_tuple_hash *, 467 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { 468// ct->tuplehash[IP_CT_DIR_ORIGINAL].track.number =1; 469// ct->tuplehash[IP_CT_DIR_ORIGINAL].track.size = ntohs(skb->nh.iph->tot_len); 470 list_prepend(&ip_conntrack_hash[hash], 471 &ct->tuplehash[IP_CT_DIR_ORIGINAL]); 472 list_prepend(&ip_conntrack_hash[repl_hash], 473 &ct->tuplehash[IP_CT_DIR_REPLY]); 474 475 /* Timer relative to confirmation time, not original 476 setting time, otherwise we'd get timer wrap in 477 weird delay cases. */ 478 ct->timeout.expires += jiffies; 479 add_timer(&ct->timeout); 480 atomic_inc(&ct->ct_general.use); 481 WRITE_UNLOCK(&ip_conntrack_lock); 482 return NF_ACCEPT; 483 } 484 485 WRITE_UNLOCK(&ip_conntrack_lock); 486 return NF_DROP; 487} 488 489/* Returns true if a connection correspondings to the tuple (required 490 for NAT). */ 491int 492ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, 493 const struct ip_conntrack *ignored_conntrack) 494{ 495 struct ip_conntrack_tuple_hash *h; 496 497 READ_LOCK(&ip_conntrack_lock); 498 h = __ip_conntrack_find(tuple, ignored_conntrack); 499 READ_UNLOCK(&ip_conntrack_lock); 500 501 return h != NULL; 502} 503 504/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ 505struct ip_conntrack * 506icmp_error_track(struct sk_buff *skb, 507 enum ip_conntrack_info *ctinfo, 508 unsigned int hooknum) 509{ 510 const struct iphdr *iph; 511 struct icmphdr *hdr; 512 struct ip_conntrack_tuple innertuple, origtuple; 513 struct iphdr *inner; 514 size_t datalen; 515 struct ip_conntrack_protocol *innerproto; 516 struct ip_conntrack_tuple_hash *h; 517 518 IP_NF_ASSERT(iph->protocol == IPPROTO_ICMP); 519 IP_NF_ASSERT(skb->nfct == NULL); 520 521 iph = skb->nh.iph; 522 hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl); 523 inner = (struct iphdr *)(hdr + 1); 524 datalen = skb->len - iph->ihl*4 - sizeof(*hdr); 525 526 if (skb->len < iph->ihl * 4 + sizeof(*hdr) + sizeof(*iph)) { 527 DEBUGP("icmp_error_track: too short\n"); 528 return NULL; 529 } 530 531 if (hdr->type != ICMP_DEST_UNREACH 532 && hdr->type != ICMP_SOURCE_QUENCH 533 && hdr->type != ICMP_TIME_EXCEEDED 534 && hdr->type != ICMP_PARAMETERPROB 535 && hdr->type != ICMP_REDIRECT) 536 return NULL; 537 538 /* Ignore ICMP's containing fragments (shouldn't happen) */ 539 if (inner->frag_off & htons(IP_OFFSET)) { 540 DEBUGP("icmp_error_track: fragment of proto %u\n", 541 inner->protocol); 542 return NULL; 543 } 544 545 /* Ignore it if the checksum's bogus. */ 546 if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) { 547 DEBUGP("icmp_error_track: bad csum\n"); 548 return NULL; 549 } 550 551 innerproto = ip_ct_find_proto(inner->protocol); 552 /* Are they talking about one of our connections? */ 553 if (inner->ihl * 4 + 8 > datalen 554 || !get_tuple(inner, datalen, &origtuple, innerproto)) { 555 DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n", 556 inner->protocol, inner->ihl, 8, 557 datalen); 558 return NULL; 559 } 560 561 /* Ordinarily, we'd expect the inverted tupleproto, but it's 562 been preserved inside the ICMP. */ 563 if (!invert_tuple(&innertuple, &origtuple, innerproto)) { 564 DEBUGP("icmp_error_track: Can't invert tuple\n"); 565 return NULL; 566 } 567 568 *ctinfo = IP_CT_RELATED; 569 570 h = ip_conntrack_find_get(&innertuple, NULL); 571 if (!h) { 572 /* Locally generated ICMPs will match inverted if they 573 haven't been SNAT'ed yet */ 574 if (hooknum == NF_IP_LOCAL_OUT) 575 h = ip_conntrack_find_get(&origtuple, NULL); 576 577 if (!h) { 578 DEBUGP("icmp_error_track: no match\n"); 579 return NULL; 580 } 581 /* Reverse direction from that found */ 582 if (DIRECTION(h) != IP_CT_DIR_REPLY) 583 *ctinfo += IP_CT_IS_REPLY; 584 } else { 585 if (DIRECTION(h) == IP_CT_DIR_REPLY) 586 *ctinfo += IP_CT_IS_REPLY; 587 } 588 589 /* Update skb to refer to this connection */ 590 skb->nfct = &h->ctrack->infos[*ctinfo]; 591 return h->ctrack; 592} 593 594/* There's a small race here where we may free a just-assured 595 connection. Too bad: we're in trouble anyway. */ 596static inline int unreplied(const struct ip_conntrack_tuple_hash *i) 597{ 598 return !(i->ctrack->status & IPS_ASSURED); 599} 600 601static int early_drop(struct list_head *chain) 602{ 603 /* Traverse backwards: gives us oldest, which is roughly LRU */ 604 struct ip_conntrack_tuple_hash *h; 605 int dropped = 0; 606 607 READ_LOCK(&ip_conntrack_lock); 608 h = LIST_FIND(chain, unreplied, struct ip_conntrack_tuple_hash *); 609 if (h) 610 atomic_inc(&h->ctrack->ct_general.use); 611 READ_UNLOCK(&ip_conntrack_lock); 612 613 if (!h) 614 return dropped; 615 616 if (del_timer(&h->ctrack->timeout)) { 617 death_by_timeout((unsigned long)h->ctrack); 618 dropped = 1; 619 } 620 ip_conntrack_put(h->ctrack); 621 return dropped; 622} 623 624static inline int helper_cmp(const struct ip_conntrack_helper *i, 625 const struct ip_conntrack_tuple *rtuple) 626{ 627 return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); 628} 629 630struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) 631{ 632 return LIST_FIND(&helpers, helper_cmp, 633 struct ip_conntrack_helper *, 634 tuple); 635} 636 637/* Allocate a new conntrack: we return -ENOMEM if classification 638 failed due to stress. Otherwise it really is unclassifiable. */ 639static struct ip_conntrack_tuple_hash * 640init_conntrack(const struct ip_conntrack_tuple *tuple, 641 struct ip_conntrack_protocol *protocol, 642 struct sk_buff *skb) 643{ 644 struct ip_conntrack *conntrack; 645 struct ip_conntrack_tuple repl_tuple; 646 size_t hash, repl_hash; 647 struct ip_conntrack_expect *expected; 648 int i; 649 static unsigned int drop_next = 0; 650 651 hash = hash_conntrack(tuple); 652 653 if (ip_conntrack_max && 654 atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { 655 /* Try dropping from random chain, or else from the 656 chain about to put into (in case they're trying to 657 bomb one hash chain). */ 658 unsigned int next = (drop_next++)%ip_conntrack_htable_size; 659 660 if (!early_drop(&ip_conntrack_hash[next]) 661 && !early_drop(&ip_conntrack_hash[hash])) { 662 if (net_ratelimit()) 663 printk(KERN_WARNING 664 "ip_conntrack: table full, dropping" 665 " packet.\n"); 666 return ERR_PTR(-ENOMEM); 667 } 668 } 669 670 if (!invert_tuple(&repl_tuple, tuple, protocol)) { 671 DEBUGP("Can't invert tuple.\n"); 672 return NULL; 673 } 674 repl_hash = hash_conntrack(&repl_tuple); 675 676 conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); 677 if (!conntrack) { 678 DEBUGP("Can't allocate conntrack.\n"); 679 return ERR_PTR(-ENOMEM); 680 } 681 682 memset(conntrack, 0, sizeof(struct ip_conntrack)); 683 atomic_set(&conntrack->ct_general.use, 1); 684 conntrack->ct_general.destroy = destroy_conntrack; 685 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; 686 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; 687 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; 688 conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; 689 690 /*add by Angela */ 691 /*Initial the flag, number and length of the connect track, when create a connect track first time */ 692 //if(nvram_match("qos_enable", "1")) 693 if(qos_enable == 1) // 2009.12 James. 694 { 695 track_flag = 1; 696 //ipaddr = atoi(nvram_safe_get("wan_ipaddr_t")); // 2009.12 James. 697 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].track.flag = 0; 698 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].track.number =1; 699 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].track.length[0] = ntohs(skb->nh.iph->tot_len); 700 } 701 else 702 track_flag = 0; 703 704 for (i=0; i < IP_CT_NUMBER; i++) 705 conntrack->infos[i].master = &conntrack->ct_general; 706 707 if (!protocol->new(conntrack, skb->nh.iph, skb->len)) { 708 kmem_cache_free(ip_conntrack_cachep, conntrack); 709 return NULL; 710 } 711 /* Don't set timer yet: wait for confirmation */ 712 init_timer(&conntrack->timeout); 713 conntrack->timeout.data = (unsigned long)conntrack; 714 conntrack->timeout.function = death_by_timeout; 715 716 INIT_LIST_HEAD(&conntrack->sibling_list); 717 718 /* Mark clearly that it's not in the hash table. */ 719 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list.next = NULL; 720 721 WRITE_LOCK(&ip_conntrack_lock); 722 /* Need finding and deleting of expected ONLY if we win race */ 723 READ_LOCK(&ip_conntrack_expect_tuple_lock); 724 expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp, 725 struct ip_conntrack_expect *, tuple); 726 READ_UNLOCK(&ip_conntrack_expect_tuple_lock); 727 728 /* Look up the conntrack helper for master connections only */ 729 if (!expected) 730 conntrack->helper = ip_ct_find_helper(&repl_tuple); 731 732 /* If the expectation is dying, then this is a looser. */ 733 if (expected 734 && expected->expectant->helper 735 && expected->expectant->helper->timeout 736 && ! del_timer(&expected->timeout)) 737 expected = NULL; 738 739 /* If master is not in hash table yet (ie. packet hasn't left 740 this machine yet), how can other end know about expected? 741 Hence these are not the droids you are looking for (if 742 master ct never got confirmed, we'd hold a reference to it 743 and weird things would happen to future packets). */ 744 if (expected && is_confirmed(expected->expectant)) { 745 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", 746 conntrack, expected); 747 /* Welcome, Mr. Bond. We've been expecting you... */ 748 IP_NF_ASSERT(master_ct(conntrack)); 749 conntrack->status = IPS_EXPECTED; 750 conntrack->master = expected; 751 expected->sibling = conntrack; 752 LIST_DELETE(&ip_conntrack_expect_list, expected); 753 INIT_LIST_HEAD(&expected->list); 754 expected->expectant->expecting--; 755 nf_conntrack_get(&master_ct(conntrack)->infos[0]); 756 } 757 atomic_inc(&ip_conntrack_count); 758 WRITE_UNLOCK(&ip_conntrack_lock); 759 760 if (expected && expected->expectfn) 761 expected->expectfn(conntrack); 762 763// track[hash].number = 1; 764// track[hash].size = ntohs(skb->nh.iph->tot_len); 765 766 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; 767} 768 769#define isdigit(c) (c >= '0' && c <= '9') 770 __inline unsigned int atoi(const char *s) 771{ 772 int i=0,j,k=0; 773 for(j=1; j<5; j++) { 774 i=0; 775 while (isdigit(*s)) { 776 i = i*10 + *(s++) - '0'; 777 } 778 k = k*256 +i; 779 if(j == 4) 780 return k; 781 s++; 782 } 783 return k; 784} 785 786//On success, returns h->track.flags & IP_TRACK_MARK 787inline int deal_track(struct ip_conntrack_tuple_hash *h, int len) 788{ 789 struct ip_conntrack_tuple_hash *rep_h; 790 int i, org_len =0, rep_len = 0; 791 792 // Add the packet number of this connect track and record the length of the packet 793 h->track.number ++; 794 h->track.length[(h->track.number-1) % IP_TRACK_MAX] = len; 795 if((h->track.number >IP_TRACK_MAX) && !(h->track.flag & IP_TRACK_FULL)) 796 h->track.flag |= IP_TRACK_FULL; 797 798 // The download packet set the IP_TRACK_DOWN flag 799 //if(ntohl(h->tuple.dst.ip) == ipaddr) 800 if(ntohl(h->tuple.dst.ip) == qos_wan_ip) // 2009.12 James. 801 h->track.flag |= IP_TRACK_DOWN; 802 803 // if the destination port of this connect track is one of 80,8080,443.We return IP_TRACK_PORT 804 if((h->track.flag & IP_TRACK_PORT) == IP_TRACK_PORT) 805 return IP_TRACK_PORT; 806 807 // if the connect track is data connect ,we return IP_TRACK_DATA 808 if((h->track.flag & IP_TRACK_DATA) == IP_TRACK_DATA) 809 return IP_TRACK_DATA; 810 811 // if the packet number is larger than the size what we want to compare, return 0 812 if(h->track.number > IP_TRACK_COMPARE) 813 return 0; 814 815 // we just compare datas form the ORIGINAL direction start compare 816 if(DIRECTION(h) == IP_CT_DIR_REPLY) 817 rep_h = &h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]; 818 else 819 rep_h = &h->ctrack->tuplehash[IP_CT_DIR_REPLY]; 820 if(!rep_h) 821 return 0; 822 823 // If the packet number reaches the size what we want to compare, we compare 824 if((h->track.number & IP_TRACK_FULL) && (rep_h->track.flag & IP_TRACK_FULL)) { 825 for(i = 0; i < IP_TRACK_MAX; i++) { 826 org_len += h->track.length[i]; 827 rep_len += rep_h->track.length[i]; 828 } 829 830 //compare for data 831 if(org_len > 512*IP_TRACK_MAX || rep_len > 512*IP_TRACK_MAX ) { 832 //compare for port 833 if(ntohs(h->tuple.dst.u.all) == 80 || 834 ntohs(h->tuple.dst.u.all) == 8080 || 835 ntohs(h->tuple.dst.u.all) == 443 || 836 ntohs(h->tuple.src.u.all) == 80 || 837 ntohs(h->tuple.src.u.all) == 8080 || 838 ntohs(h->tuple.src.u.all) == 443) { 839 h->track.flag |= IP_TRACK_PORT; 840 rep_h->track.flag |= IP_TRACK_PORT; 841 return IP_TRACK_PORT; 842 } 843 h->track.flag |= IP_TRACK_DATA; 844 rep_h->track.flag |= IP_TRACK_DATA; 845 return IP_TRACK_DATA; 846 } 847 } 848 if((h->track.flag & IP_TRACK_DOWN) == IP_TRACK_DOWN) 849 return IP_TRACK_DOWN; 850 851 return 0; 852} 853 854/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 855static inline struct ip_conntrack * 856resolve_normal_ct(struct sk_buff *skb, 857 struct ip_conntrack_protocol *proto, 858 int *set_reply, 859 unsigned int hooknum, 860 enum ip_conntrack_info *ctinfo) 861{ 862 struct ip_conntrack_tuple tuple; 863 struct ip_conntrack_tuple_hash *h; 864 865 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); 866 867 if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto)) 868 return NULL; 869 870 /* look for tuple match */ 871 h = ip_conntrack_find_get(&tuple, NULL); 872 if (!h) { 873 h = init_conntrack(&tuple, proto, skb); 874 if (!h) 875 return NULL; 876 if (IS_ERR(h)) 877 return (void *)h; 878 } 879 /*add by Angela */ 880 else if(track_flag == 1) { 881 882 switch(deal_track(h, ntohs(skb->nh.iph->tot_len))) { 883 case IP_TRACK_DATA: 884 if ((h->track.flag & IP_TRACK_DOWN) == IP_TRACK_DOWN) 885 skb->nfmark = 90; 886 else 887 skb->nfmark = 50; 888 break; 889 case IP_TRACK_PORT: 890 if ((h->track.flag & IP_TRACK_DOWN) == IP_TRACK_DOWN) 891 skb->nfmark = 80; 892 else 893 skb->nfmark = 20; 894 break; 895 case IP_TRACK_DOWN: 896 skb->nfmark = 70; 897 break; 898 } 899 } 900 901 /* It exists; we have (non-exclusive) reference. */ 902 if (DIRECTION(h) == IP_CT_DIR_REPLY) { 903 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; 904 /* Please set reply bit if this packet OK */ 905 *set_reply = 1; 906 } else { 907 /* Once we've had two way comms, always ESTABLISHED. */ 908 if (h->ctrack->status & IPS_SEEN_REPLY) { 909 DEBUGP("ip_conntrack_in: normal packet for %p\n", 910 h->ctrack); 911 *ctinfo = IP_CT_ESTABLISHED; 912 } else if (h->ctrack->status & IPS_EXPECTED) { 913 DEBUGP("ip_conntrack_in: related packet for %p\n", 914 h->ctrack); 915 *ctinfo = IP_CT_RELATED; 916 } else { 917 DEBUGP("ip_conntrack_in: new packet for %p\n", 918 h->ctrack); 919 *ctinfo = IP_CT_NEW; 920 } 921 *set_reply = 0; 922 } 923 924 skb->nfct = &h->ctrack->infos[*ctinfo]; 925 return h->ctrack; 926} 927 928/* Netfilter hook itself. */ 929unsigned int ip_conntrack_in(unsigned int hooknum, 930 struct sk_buff **pskb, 931 const struct net_device *in, 932 const struct net_device *out, 933 int (*okfn)(struct sk_buff *)) 934{ 935 struct ip_conntrack *ct; 936 enum ip_conntrack_info ctinfo; 937 struct ip_conntrack_protocol *proto; 938 int set_reply; 939 int ret; 940 941 (*pskb)->nfcache |= NFC_UNKNOWN; 942 943/* Doesn't cover locally-generated broadcast, so not worth it. */ 944 945 /* Previously seen (loopback)? Ignore. Do this before 946 fragment check. */ 947 if ((*pskb)->nfct) 948 return NF_ACCEPT; 949 950 /* Gather fragments. */ 951 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 952 *pskb = ip_ct_gather_frags(*pskb); 953 if (!*pskb) 954 return NF_STOLEN; 955 } 956 957 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); 958 959 /* It may be an icmp error... */ 960 if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP 961 && icmp_error_track(*pskb, &ctinfo, hooknum)) 962 return NF_ACCEPT; 963 964 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) 965 /* Not valid part of a connection */ 966 return NF_ACCEPT; 967 968 if (IS_ERR(ct)) 969 /* Too stressed to deal. */ 970 return NF_DROP; 971 972 IP_NF_ASSERT((*pskb)->nfct); 973 974 ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo); 975 if (ret == -1) { 976 /* Invalid */ 977 nf_conntrack_put((*pskb)->nfct); 978 (*pskb)->nfct = NULL; 979 return NF_ACCEPT; 980 } 981 982 if (ret != NF_DROP && ct->helper) { 983 ret = ct->helper->help((*pskb)->nh.iph, (*pskb)->len, 984 ct, ctinfo); 985 if (ret == -1) { 986 /* Invalid */ 987 nf_conntrack_put((*pskb)->nfct); 988 (*pskb)->nfct = NULL; 989 return NF_ACCEPT; 990 } 991 } 992 if (set_reply) 993 set_bit(IPS_SEEN_REPLY_BIT, &ct->status); 994 995 return ret; 996} 997 998int invert_tuplepr(struct ip_conntrack_tuple *inverse, 999 const struct ip_conntrack_tuple *orig) 1000{ 1001 return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum)); 1002} 1003 1004static inline int resent_expect(const struct ip_conntrack_expect *i, 1005 const struct ip_conntrack_tuple *tuple, 1006 const struct ip_conntrack_tuple *mask) 1007{ 1008 DEBUGP("resent_expect\n"); 1009 DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple); 1010 DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple); 1011 DEBUGP("test tuple: "); DUMP_TUPLE(tuple); 1012 return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple)) 1013 || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple))) 1014 && ip_ct_tuple_equal(&i->mask, mask)); 1015} 1016 1017/* Would two expected things clash? */ 1018static inline int expect_clash(const struct ip_conntrack_expect *i, 1019 const struct ip_conntrack_tuple *tuple, 1020 const struct ip_conntrack_tuple *mask) 1021{ 1022 /* Part covered by intersection of masks must be unequal, 1023 otherwise they clash */ 1024 struct ip_conntrack_tuple intersect_mask 1025 = { { i->mask.src.ip & mask->src.ip, 1026 { i->mask.src.u.all & mask->src.u.all } }, 1027 { i->mask.dst.ip & mask->dst.ip, 1028 { i->mask.dst.u.all & mask->dst.u.all }, 1029 i->mask.dst.protonum & mask->dst.protonum } }; 1030 1031 return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask); 1032} 1033 1034inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect) 1035{ 1036 WRITE_LOCK(&ip_conntrack_lock); 1037 unexpect_related(expect); 1038 WRITE_UNLOCK(&ip_conntrack_lock); 1039} 1040 1041static void expectation_timed_out(unsigned long ul_expect) 1042{ 1043 struct ip_conntrack_expect *expect = (void *) ul_expect; 1044 1045 DEBUGP("expectation %p timed out\n", expect); 1046 WRITE_LOCK(&ip_conntrack_lock); 1047 __unexpect_related(expect); 1048 WRITE_UNLOCK(&ip_conntrack_lock); 1049} 1050 1051/* Add a related connection. */ 1052int ip_conntrack_expect_related(struct ip_conntrack *related_to, 1053 struct ip_conntrack_expect *expect) 1054{ 1055 struct ip_conntrack_expect *old, *new; 1056 int ret = 0; 1057 1058 WRITE_LOCK(&ip_conntrack_lock); 1059 /* Because of the write lock, no reader can walk the lists, 1060 * so there is no need to use the tuple lock too */ 1061 1062 DEBUGP("ip_conntrack_expect_related %p\n", related_to); 1063 DEBUGP("tuple: "); DUMP_TUPLE_RAW(&expect->tuple); 1064 DEBUGP("mask: "); DUMP_TUPLE_RAW(&expect->mask); 1065 1066 old = LIST_FIND(&ip_conntrack_expect_list, resent_expect, 1067 struct ip_conntrack_expect *, &expect->tuple, 1068 &expect->mask); 1069 if (old) { 1070 /* Helper private data may contain offsets but no pointers 1071 pointing into the payload - otherwise we should have to copy 1072 the data filled out by the helper over the old one */ 1073 DEBUGP("expect_related: resent packet\n"); 1074 if (related_to->helper && 1075 related_to->helper->timeout) { 1076 if (!del_timer(&old->timeout)) { 1077 /* expectation is dying. Fall through */ 1078 old = NULL; 1079 } else { 1080 old->timeout.expires = jiffies + 1081 related_to->helper->timeout * HZ; 1082 add_timer(&old->timeout); 1083 } 1084 } 1085 1086 if (old) { 1087 WRITE_UNLOCK(&ip_conntrack_lock); 1088 return -EEXIST; 1089 } 1090 } else if (related_to->helper && 1091 related_to->helper->max_expected && 1092 related_to->expecting >= related_to->helper->max_expected) { 1093 struct list_head *cur_item; 1094 /* old == NULL */ 1095 if (!(related_to->helper->flags & 1096 IP_CT_HELPER_F_REUSE_EXPECT)) { 1097 WRITE_UNLOCK(&ip_conntrack_lock); 1098 if (net_ratelimit()) 1099 printk(KERN_WARNING 1100 "ip_conntrack: max number of expected " 1101 "connections %i of %s reached for " 1102 "%u.%u.%u.%u->%u.%u.%u.%u\n", 1103 related_to->helper->max_expected, 1104 related_to->helper->name, 1105 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), 1106 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); 1107 return -EPERM; 1108 } 1109 DEBUGP("ip_conntrack: max number of expected " 1110 "connections %i of %s reached for " 1111 "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n", 1112 related_to->helper->max_expected, 1113 related_to->helper->name, 1114 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), 1115 NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); 1116 1117 /* choose the the oldest expectation to evict */ 1118 list_for_each(cur_item, &related_to->sibling_list) { 1119 struct ip_conntrack_expect *cur; 1120 1121 cur = list_entry(cur_item, 1122 struct ip_conntrack_expect, 1123 expected_list); 1124 if (cur->sibling == NULL) { 1125 old = cur; 1126 break; 1127 } 1128 } 1129 1130 /* (!old) cannot happen, since related_to->expecting is the 1131 * number of unconfirmed expects */ 1132 IP_NF_ASSERT(old); 1133 1134 /* newnat14 does not reuse the real allocated memory 1135 * structures but rather unexpects the old and 1136 * allocates a new. unexpect_related will decrement 1137 * related_to->expecting. 1138 */ 1139 unexpect_related(old); 1140 ret = -EPERM; 1141 } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash, 1142 struct ip_conntrack_expect *, &expect->tuple, 1143 &expect->mask)) { 1144 WRITE_UNLOCK(&ip_conntrack_lock); 1145 DEBUGP("expect_related: busy!\n"); 1146 return -EBUSY; 1147 } 1148 1149 new = (struct ip_conntrack_expect *) 1150 kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); 1151 if (!new) { 1152 WRITE_UNLOCK(&ip_conntrack_lock); 1153 DEBUGP("expect_relaed: OOM allocating expect\n"); 1154 return -ENOMEM; 1155 } 1156 1157 /* Zero out the new structure, then fill out it with the data */ 1158 DEBUGP("new expectation %p of conntrack %p\n", new, related_to); 1159 memset(new, 0, sizeof(*expect)); 1160 INIT_LIST_HEAD(&new->list); 1161 INIT_LIST_HEAD(&new->expected_list); 1162 memcpy(new, expect, sizeof(*expect)); 1163 new->expectant = related_to; 1164 new->sibling = NULL; 1165 /* increase usage count. This sucks. The memset above overwrites 1166 * old usage count [if still present] and we increase to one. Only 1167 * works because everything is done under ip_conntrack_lock() */ 1168 atomic_inc(&new->use); 1169 1170 /* add to expected list for this connection */ 1171 list_add(&new->expected_list, &related_to->sibling_list); 1172 /* add to global list of expectations */ 1173 list_prepend(&ip_conntrack_expect_list, &new->list); 1174 /* add and start timer if required */ 1175 if (related_to->helper && 1176 related_to->helper->timeout) { 1177 init_timer(&new->timeout); 1178 new->timeout.data = (unsigned long)new; 1179 new->timeout.function = expectation_timed_out; 1180 new->timeout.expires = jiffies + 1181 related_to->helper->timeout * HZ; 1182 add_timer(&new->timeout); 1183 } 1184 related_to->expecting++; 1185 1186 WRITE_UNLOCK(&ip_conntrack_lock); 1187 1188 return ret; 1189} 1190 1191/* Change tuple in an existing expectation */ 1192int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, 1193 struct ip_conntrack_tuple *newtuple) 1194{ 1195 int ret; 1196 1197 MUST_BE_READ_LOCKED(&ip_conntrack_lock); 1198 WRITE_LOCK(&ip_conntrack_expect_tuple_lock); 1199 DEBUGP("change_expect:\n"); 1200 DEBUGP("exp tuple: "); DUMP_TUPLE_RAW(&expect->tuple); 1201 DEBUGP("exp mask: "); DUMP_TUPLE_RAW(&expect->mask); 1202 DEBUGP("newtuple: "); DUMP_TUPLE_RAW(newtuple); 1203 if (expect->ct_tuple.dst.protonum == 0) { 1204 /* Never seen before */ 1205 DEBUGP("change expect: never seen before\n"); 1206 if (!ip_ct_tuple_equal(&expect->tuple, newtuple) 1207 && LIST_FIND(&ip_conntrack_expect_list, expect_clash, 1208 struct ip_conntrack_expect *, newtuple, &expect->mask)) { 1209 /* Force NAT to find an unused tuple */ 1210 ret = -1; 1211 } else { 1212 memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple)); 1213 memcpy(&expect->tuple, newtuple, sizeof(expect->tuple)); 1214 ret = 0; 1215 } 1216 } else { 1217 /* Resent packet */ 1218 DEBUGP("change expect: resent packet\n"); 1219 if (ip_ct_tuple_equal(&expect->tuple, newtuple)) { 1220 ret = 0; 1221 } else { 1222 /* Force NAT to choose again the same port */ 1223 ret = -1; 1224 } 1225 } 1226 WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock); 1227 1228 return ret; 1229} 1230 1231/* Alter reply tuple (maybe alter helper). If it's already taken, 1232 return 0 and don't do alteration. */ 1233int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, 1234 const struct ip_conntrack_tuple *newreply) 1235{ 1236 WRITE_LOCK(&ip_conntrack_lock); 1237 if (__ip_conntrack_find(newreply, conntrack)) { 1238 WRITE_UNLOCK(&ip_conntrack_lock); 1239 return 0; 1240 } 1241 /* Should be unconfirmed, so not in hash table yet */ 1242 IP_NF_ASSERT(!is_confirmed(conntrack)); 1243 1244 DEBUGP("Altering reply tuple of %p to ", conntrack); 1245 DUMP_TUPLE(newreply); 1246 1247 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 1248 if (!conntrack->master) 1249 conntrack->helper = LIST_FIND(&helpers, helper_cmp, 1250 struct ip_conntrack_helper *, 1251 newreply); 1252 WRITE_UNLOCK(&ip_conntrack_lock); 1253 1254 return 1; 1255} 1256 1257int ip_conntrack_helper_register(struct ip_conntrack_helper *me) 1258{ 1259 MOD_INC_USE_COUNT; 1260 1261 WRITE_LOCK(&ip_conntrack_lock); 1262 list_prepend(&helpers, me); 1263 WRITE_UNLOCK(&ip_conntrack_lock); 1264 1265 return 0; 1266} 1267 1268static inline int unhelp(struct ip_conntrack_tuple_hash *i, 1269 const struct ip_conntrack_helper *me) 1270{ 1271 if (i->ctrack->helper == me) { 1272 /* Get rid of any expected. */ 1273 remove_expectations(i->ctrack); 1274 /* And *then* set helper to NULL */ 1275 i->ctrack->helper = NULL; 1276 } 1277 return 0; 1278} 1279 1280void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) 1281{ 1282 unsigned int i; 1283 1284 /* Need write lock here, to delete helper. */ 1285 WRITE_LOCK(&ip_conntrack_lock); 1286 LIST_DELETE(&helpers, me); 1287 1288 /* Get rid of expecteds, set helpers to NULL. */ 1289 for (i = 0; i < ip_conntrack_htable_size; i++) 1290 LIST_FIND_W(&ip_conntrack_hash[i], unhelp, 1291 struct ip_conntrack_tuple_hash *, me); 1292 WRITE_UNLOCK(&ip_conntrack_lock); 1293 1294 /* Someone could be still looking at the helper in a bh. */ 1295 br_write_lock_bh(BR_NETPROTO_LOCK); 1296 br_write_unlock_bh(BR_NETPROTO_LOCK); 1297 1298 MOD_DEC_USE_COUNT; 1299} 1300 1301/* Refresh conntrack for this many jiffies. */ 1302void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) 1303{ 1304 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); 1305 1306 WRITE_LOCK(&ip_conntrack_lock); 1307 /* If not in hash table, timer will not be active yet */ 1308 if (!is_confirmed(ct)) 1309 ct->timeout.expires = extra_jiffies; 1310 else { 1311 /* Need del_timer for race avoidance (may already be dying). */ 1312 if (del_timer(&ct->timeout)) { 1313 ct->timeout.expires = jiffies + extra_jiffies; 1314 add_timer(&ct->timeout); 1315 } 1316 } 1317 WRITE_UNLOCK(&ip_conntrack_lock); 1318} 1319 1320/* Returns new sk_buff, or NULL */ 1321struct sk_buff * 1322ip_ct_gather_frags(struct sk_buff *skb) 1323{ 1324 struct sock *sk = skb->sk; 1325#ifdef CONFIG_NETFILTER_DEBUG 1326 unsigned int olddebug = skb->nf_debug; 1327#endif 1328 if (sk) { 1329 sock_hold(sk); 1330 skb_orphan(skb); 1331 } 1332 1333 local_bh_disable(); 1334 skb = ip_defrag(skb); 1335 local_bh_enable(); 1336 1337 if (!skb) { 1338 if (sk) sock_put(sk); 1339 return skb; 1340 } else if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { 1341 kfree_skb(skb); 1342 if (sk) sock_put(sk); 1343 return NULL; 1344 } 1345 1346 if (sk) { 1347 skb_set_owner_w(skb, sk); 1348 sock_put(sk); 1349 } 1350 1351 ip_send_check(skb->nh.iph); 1352 skb->nfcache |= NFC_ALTERED; 1353#ifdef CONFIG_NETFILTER_DEBUG 1354 /* Packet path as if nothing had happened. */ 1355 skb->nf_debug = olddebug; 1356#endif 1357 return skb; 1358} 1359 1360/* Used by ipt_REJECT. */ 1361static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct) 1362{ 1363 struct ip_conntrack *ct; 1364 enum ip_conntrack_info ctinfo; 1365 1366 ct = __ip_conntrack_get(nfct, &ctinfo); 1367 1368 /* This ICMP is in reverse direction to the packet which 1369 caused it */ 1370 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 1371 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; 1372 else 1373 ctinfo = IP_CT_RELATED; 1374 1375 /* Attach new skbuff, and increment count */ 1376 nskb->nfct = &ct->infos[ctinfo]; 1377 atomic_inc(&ct->ct_general.use); 1378} 1379 1380static inline int 1381do_kill(const struct ip_conntrack_tuple_hash *i, 1382 int (*kill)(const struct ip_conntrack *i, void *data), 1383 void *data) 1384{ 1385 return kill(i->ctrack, data); 1386} 1387 1388/* Bring out ya dead! */ 1389static struct ip_conntrack_tuple_hash * 1390get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), 1391 void *data) 1392{ 1393 struct ip_conntrack_tuple_hash *h = NULL; 1394 unsigned int i; 1395 1396 READ_LOCK(&ip_conntrack_lock); 1397 for (i = 0; !h && i < ip_conntrack_htable_size; i++) { 1398 h = LIST_FIND(&ip_conntrack_hash[i], do_kill, 1399 struct ip_conntrack_tuple_hash *, kill, data); 1400 } 1401 if (h) 1402 atomic_inc(&h->ctrack->ct_general.use); 1403 READ_UNLOCK(&ip_conntrack_lock); 1404 1405 return h; 1406} 1407 1408void 1409ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), 1410 void *data) 1411{ 1412 struct ip_conntrack_tuple_hash *h; 1413 1414 /* This is order n^2, by the way. */ 1415 while ((h = get_next_corpse(kill, data)) != NULL) { 1416 /* Time to push up daises... */ 1417 if (del_timer(&h->ctrack->timeout)) 1418 death_by_timeout((unsigned long)h->ctrack); 1419 /* ... else the timer will get him soon. */ 1420 1421 ip_conntrack_put(h->ctrack); 1422 } 1423} 1424 1425/* Fast function for those who don't want to parse /proc (and I don't 1426 blame them). */ 1427/* Reversing the socket's dst/src point of view gives us the reply 1428 mapping. */ 1429static int 1430getorigdst(struct sock *sk, int optval, void *user, int *len) 1431{ 1432 struct ip_conntrack_tuple_hash *h; 1433 struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport } }, 1434 { sk->daddr, { sk->dport }, 1435 IPPROTO_TCP } }; 1436 1437 /* We only do TCP at the moment: is there a better way? */ 1438 if (strcmp(sk->prot->name, "TCP") != 0) { 1439 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); 1440 return -ENOPROTOOPT; 1441 } 1442 1443 if ((unsigned int) *len < sizeof(struct sockaddr_in)) { 1444 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", 1445 *len, sizeof(struct sockaddr_in)); 1446 return -EINVAL; 1447 } 1448 1449 h = ip_conntrack_find_get(&tuple, NULL); 1450 if (h) { 1451 struct sockaddr_in sin; 1452 1453 sin.sin_family = AF_INET; 1454 sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] 1455 .tuple.dst.u.tcp.port; 1456 sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] 1457 .tuple.dst.ip; 1458 1459 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", 1460 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 1461 ip_conntrack_put(h->ctrack); 1462 if (copy_to_user(user, &sin, sizeof(sin)) != 0) 1463 return -EFAULT; 1464 else 1465 return 0; 1466 } 1467 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", 1468 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), 1469 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); 1470 return -ENOENT; 1471} 1472 1473static struct nf_sockopt_ops so_getorigdst 1474= { { NULL, NULL }, PF_INET, 1475 0, 0, NULL, /* Setsockopts */ 1476 SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst, 1477 0, NULL }; 1478 1479#define NET_IP_CONNTRACK_MAX 2089 1480#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max" 1481 1482#ifdef CONFIG_SYSCTL 1483static struct ctl_table_header *ip_conntrack_sysctl_header; 1484 1485static ctl_table ip_conntrack_table[] = { 1486 { NET_IP_CONNTRACK_MAX, NET_IP_CONNTRACK_MAX_NAME, &ip_conntrack_max, 1487 sizeof(ip_conntrack_max), 0644, NULL, proc_dointvec }, 1488 { 0 } 1489}; 1490 1491static ctl_table ip_conntrack_dir_table[] = { 1492 {NET_IPV4, "ipv4", NULL, 0, 0555, ip_conntrack_table, 0, 0, 0, 0, 0}, 1493 { 0 } 1494}; 1495 1496static ctl_table ip_conntrack_root_table[] = { 1497 {CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0}, 1498 { 0 } 1499}; 1500#endif /*CONFIG_SYSCTL*/ 1501 1502static int kill_all(const struct ip_conntrack *i, void *data) 1503{ 1504 return 1; 1505} 1506 1507/* Mishearing the voices in his head, our hero wonders how he's 1508 supposed to kill the mall. */ 1509void ip_conntrack_cleanup(void) 1510{ 1511#ifdef CONFIG_SYSCTL 1512 unregister_sysctl_table(ip_conntrack_sysctl_header); 1513#endif 1514 ip_ct_attach = NULL; 1515 /* This makes sure all current packets have passed through 1516 netfilter framework. Roll on, two-stage module 1517 delete... */ 1518 br_write_lock_bh(BR_NETPROTO_LOCK); 1519 br_write_unlock_bh(BR_NETPROTO_LOCK); 1520 1521 i_see_dead_people: 1522 ip_ct_selective_cleanup(kill_all, NULL); 1523 if (atomic_read(&ip_conntrack_count) != 0) { 1524 schedule(); 1525 goto i_see_dead_people; 1526 } 1527 1528 kmem_cache_destroy(ip_conntrack_cachep); 1529 vfree(ip_conntrack_hash); 1530 nf_unregister_sockopt(&so_getorigdst); 1531} 1532 1533static int hashsize = 0; 1534MODULE_PARM(hashsize, "i"); 1535 1536int __init ip_conntrack_init(void) 1537{ 1538 unsigned int i; 1539 int ret; 1540 1541 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1542 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ 1543 if (hashsize) { 1544 ip_conntrack_htable_size = hashsize; 1545 } else { 1546 ip_conntrack_htable_size 1547 = (((num_physpages << PAGE_SHIFT) / 16384) 1548 / sizeof(struct list_head)); 1549 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1550 ip_conntrack_htable_size = 8192; 1551 if (ip_conntrack_htable_size < 16) 1552 ip_conntrack_htable_size = 16; 1553 } 1554 ip_conntrack_max = 8 * ip_conntrack_htable_size; 1555 1556 printk("ip_conntrack version %s (%u buckets, %d max)" 1557 " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION, 1558 ip_conntrack_htable_size, ip_conntrack_max, 1559 sizeof(struct ip_conntrack)); 1560 1561 ret = nf_register_sockopt(&so_getorigdst); 1562 if (ret != 0) { 1563 printk(KERN_ERR "Unable to register netfilter socket option\n"); 1564 return ret; 1565 } 1566 1567 ip_conntrack_hash = vmalloc(sizeof(struct list_head) 1568 * ip_conntrack_htable_size); 1569 if (!ip_conntrack_hash) { 1570 printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); 1571 goto err_unreg_sockopt; 1572 } 1573 1574 ip_conntrack_cachep = kmem_cache_create("ip_conntrack", 1575 sizeof(struct ip_conntrack), 0, 1576 SLAB_HWCACHE_ALIGN, NULL, NULL); 1577 if (!ip_conntrack_cachep) { 1578 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); 1579 goto err_free_hash; 1580 } 1581 /* Don't NEED lock here, but good form anyway. */ 1582 WRITE_LOCK(&ip_conntrack_lock); 1583 /* Sew in builtin protocols. */ 1584 list_append(&protocol_list, &ip_conntrack_protocol_tcp); 1585 list_append(&protocol_list, &ip_conntrack_protocol_udp); 1586 list_append(&protocol_list, &ip_conntrack_protocol_icmp); 1587 WRITE_UNLOCK(&ip_conntrack_lock); 1588 1589 for (i = 0; i < ip_conntrack_htable_size; i++) 1590 INIT_LIST_HEAD(&ip_conntrack_hash[i]); 1591 1592/* This is fucking braindead. There is NO WAY of doing this without 1593 the CONFIG_SYSCTL unless you don't want to detect errors. 1594 Grrr... --RR */ 1595#ifdef CONFIG_SYSCTL 1596 ip_conntrack_sysctl_header 1597 = register_sysctl_table(ip_conntrack_root_table, 0); 1598 if (ip_conntrack_sysctl_header == NULL) { 1599 goto err_free_ct_cachep; 1600 } 1601#endif /*CONFIG_SYSCTL*/ 1602 1603 /* For use by ipt_REJECT */ 1604 ip_ct_attach = ip_conntrack_attach; 1605// do_gettimeofday(&tv); 1606// do_gettimeofday(&tp); 1607 return ret; 1608 1609err_free_ct_cachep: 1610 kmem_cache_destroy(ip_conntrack_cachep); 1611err_free_hash: 1612 vfree(ip_conntrack_hash); 1613err_unreg_sockopt: 1614 nf_unregister_sockopt(&so_getorigdst); 1615 1616 return -ENOMEM; 1617} 1618