1/* Connection state tracking for netfilter.  This is separated from,
2   but required by, the NAT layer; it can also be used by an iptables
3   extension. */
4
5/* (c) 1999 Paul `Rusty' Russell.  Licenced under the GNU General
6 * Public Licence.
7 *
8 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
9 * 	- new API and handling of conntrack/nat helpers
10 * 	- now capable of multiple expectations for one master
11 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
12 * 	- add usage/reference counts to ip_conntrack_expect
13 *	- export ip_conntrack[_expect]_{find_get,put} functions
14 * */
15
16#ifdef MODULE
17#define __NO_VERSION__
18#endif
19#include <linux/version.h>
20#include <linux/config.h>
21#include <linux/types.h>
22#include <linux/ip.h>
23#include <linux/netfilter.h>
24#include <linux/netfilter_ipv4.h>
25#include <linux/module.h>
26#include <linux/skbuff.h>
27#include <linux/proc_fs.h>
28#include <linux/vmalloc.h>
29#include <linux/brlock.h>
30#include <net/checksum.h>
31#include <linux/stddef.h>
32#include <linux/sysctl.h>
33#include <linux/slab.h>
34//#include "bcmnvram.h" // 2009.12 James.
35//#include <linux/nvram.h>
36
37extern int qos_enable; // 2009.12 James.
38extern ulong qos_wan_ip; // 2009.12 James.
39
40#include <linux/time.h>
41/* For ERR_PTR().  Yeah, I know... --RR */
42#include <linux/fs.h>
43
44/* This rwlock protects the main hash table, protocol/helper/expected
45   registrations, conntrack timers*/
46#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
47#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
48
49#include <linux/netfilter_ipv4/ip_conntrack.h>
50#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
51#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
52#include <linux/netfilter_ipv4/ip_conntrack_core.h>
53#include <linux/netfilter_ipv4/listhelp.h>
54
55#define IP_CONNTRACK_VERSION	"2.1"
56
57#define DEBUGP(format, args...)
58
59DECLARE_RWLOCK(ip_conntrack_lock);
60DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
61
62void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
63LIST_HEAD(ip_conntrack_expect_list);
64LIST_HEAD(protocol_list);
65static LIST_HEAD(helpers);
66unsigned int ip_conntrack_htable_size = 0;
67static int ip_conntrack_max = 0;
68static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
69struct list_head *ip_conntrack_hash;
70static kmem_cache_t *ip_conntrack_cachep;
71
72// add by Angela 2008.07.
73int track_flag = 0;
74//ulong ipaddr = 0; // 2009.12 James.
75
76extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
77
78static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
79			      u_int8_t protocol)
80{
81	return protocol == curr->proto;
82}
83
84struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
85{
86	struct ip_conntrack_protocol *p;
87
88	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
89	p = LIST_FIND(&protocol_list, proto_cmpfn,
90		      struct ip_conntrack_protocol *, protocol);
91	if (!p)
92		p = &ip_conntrack_generic_protocol;
93
94	return p;
95}
96
97struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
98{
99	struct ip_conntrack_protocol *p;
100
101	READ_LOCK(&ip_conntrack_lock);
102	p = __ip_ct_find_proto(protocol);
103	READ_UNLOCK(&ip_conntrack_lock);
104	return p;
105}
106
107inline void
108ip_conntrack_put(struct ip_conntrack *ct)
109{
110	IP_NF_ASSERT(ct);
111	IP_NF_ASSERT(ct->infos[0].master);
112	/* nf_conntrack_put wants to go via an info struct, so feed it
113           one at random. */
114	nf_conntrack_put(&ct->infos[0]);
115}
116
117static inline u_int32_t
118hash_conntrack(const struct ip_conntrack_tuple *tuple)
119{
120	/* ntohl because more differences in low bits. */
121	/* To ensure that halves of the same connection don't hash
122	   clash, we add the source per-proto again. */
123	return (ntohl(tuple->src.ip + tuple->dst.ip
124		     + tuple->src.u.all + tuple->dst.u.all
125		     + tuple->dst.protonum)
126		+ ntohs(tuple->src.u.all))
127		% ip_conntrack_htable_size;
128}
129
130inline int
131get_tuple(const struct iphdr *iph, size_t len,
132	  struct ip_conntrack_tuple *tuple,
133	  struct ip_conntrack_protocol *protocol)
134{
135	int ret;
136
137	/* Never happen */
138	if (iph->frag_off & htons(IP_OFFSET)) {
139		printk("ip_conntrack_core: Frag of proto %u.\n",
140		       iph->protocol);
141		return 0;
142	}
143	/* Guarantee 8 protocol bytes: if more wanted, use len param */
144	else if (iph->ihl * 4 + 8 > len)
145		return 0;
146
147	tuple->src.ip = iph->saddr;
148	tuple->dst.ip = iph->daddr;
149	tuple->dst.protonum = iph->protocol;
150
151	tuple->src.u.all = tuple->dst.u.all = 0;
152
153	ret = protocol->pkt_to_tuple((u_int32_t *)iph + iph->ihl,
154				     len - 4*iph->ihl,
155				     tuple);
156	return ret;
157}
158
159static int
160invert_tuple(struct ip_conntrack_tuple *inverse,
161	     const struct ip_conntrack_tuple *orig,
162	     const struct ip_conntrack_protocol *protocol)
163{
164	inverse->src.ip = orig->dst.ip;
165	inverse->dst.ip = orig->src.ip;
166	inverse->dst.protonum = orig->dst.protonum;
167
168	inverse->src.u.all = inverse->dst.u.all = 0;
169
170	return protocol->invert_tuple(inverse, orig);
171}
172
173
174/* ip_conntrack_expect helper functions */
175
176/* Compare tuple parts depending on mask. */
177static inline int expect_cmp(const struct ip_conntrack_expect *i,
178			     const struct ip_conntrack_tuple *tuple)
179{
180	MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
181	return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
182}
183
184static void
185destroy_expect(struct ip_conntrack_expect *exp)
186{
187	DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
188	IP_NF_ASSERT(atomic_read(&exp->use));
189	IP_NF_ASSERT(!timer_pending(&exp->timeout));
190
191	kfree(exp);
192}
193
194
195inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
196{
197	IP_NF_ASSERT(exp);
198
199	if (atomic_dec_and_test(&exp->use)) {
200		/* usage count dropped to zero */
201		destroy_expect(exp);
202	}
203}
204
205static inline struct ip_conntrack_expect *
206__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
207{
208	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
209	MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
210	return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
211			 struct ip_conntrack_expect *, tuple);
212}
213
214/* Find a expectation corresponding to a tuple. */
215struct ip_conntrack_expect *
216ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
217{
218	struct ip_conntrack_expect *exp;
219
220	READ_LOCK(&ip_conntrack_lock);
221	READ_LOCK(&ip_conntrack_expect_tuple_lock);
222	exp = __ip_ct_expect_find(tuple);
223	if (exp)
224		atomic_inc(&exp->use);
225	READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
226	READ_UNLOCK(&ip_conntrack_lock);
227
228	return exp;
229}
230
231/* remove one specific expectation from all lists and drop refcount,
232 * does _NOT_ delete the timer. */
233static void __unexpect_related(struct ip_conntrack_expect *expect)
234{
235	DEBUGP("unexpect_related(%p)\n", expect);
236	MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
237
238	/* we're not allowed to unexpect a confirmed expectation! */
239	IP_NF_ASSERT(!expect->sibling);
240
241	/* delete from global and local lists */
242	list_del(&expect->list);
243	list_del(&expect->expected_list);
244
245	/* decrement expect-count of master conntrack */
246	if (expect->expectant)
247		expect->expectant->expecting--;
248
249	ip_conntrack_expect_put(expect);
250}
251
252/* remove one specific expecatation from all lists, drop refcount
253 * and expire timer.
254 * This function can _NOT_ be called for confirmed expects! */
255static void unexpect_related(struct ip_conntrack_expect *expect)
256{
257	IP_NF_ASSERT(expect->expectant);
258	/* if we are supposed to have a timer, but we can't delete
259	 * it: race condition.  __unexpect_related will
260	 * be calledd by timeout function */
261	if (expect->expectant->helper
262	    && expect->expectant->helper->timeout
263	    && !del_timer(&expect->timeout))
264		return;
265
266	__unexpect_related(expect);
267}
268
269/* delete all unconfirmed expectations for this conntrack */
270static void remove_expectations(struct ip_conntrack *ct)
271{
272	struct list_head *exp_entry, *next;
273	struct ip_conntrack_expect *exp;
274
275	DEBUGP("remove_expectations(%p)\n", ct);
276
277	for (exp_entry = ct->sibling_list.next;
278	     exp_entry != &ct->sibling_list; exp_entry = next) {
279		next = exp_entry->next;
280		exp = list_entry(exp_entry, struct ip_conntrack_expect,
281				 expected_list);
282
283		/* we skip established expectations, as we want to delete
284		 * the un-established ones only */
285		if (exp->sibling) {
286			DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
287			continue;
288		}
289
290		IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
291		IP_NF_ASSERT(exp->expectant == ct);
292
293		/* delete expectation from global and private lists */
294		unexpect_related(exp);
295	}
296}
297
298static void
299clean_from_lists(struct ip_conntrack *ct)
300{
301	DEBUGP("clean_from_lists(%p)\n", ct);
302	MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
303	/* Remove from both hash lists: must not NULL out next ptrs,
304           otherwise we'll look unconfirmed.  Fortunately, LIST_DELETE
305           doesn't do this. --RR */
306	LIST_DELETE(&ip_conntrack_hash
307		    [hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)],
308		    &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
309	LIST_DELETE(&ip_conntrack_hash
310		    [hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)],
311		    &ct->tuplehash[IP_CT_DIR_REPLY]);
312
313	/* Destroy all un-established, pending expectations */
314	remove_expectations(ct);
315}
316
317static void
318destroy_conntrack(struct nf_conntrack *nfct)
319{
320	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
321	struct ip_conntrack_protocol *proto;
322
323	DEBUGP("destroy_conntrack(%p)\n", ct);
324	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
325	IP_NF_ASSERT(!timer_pending(&ct->timeout));
326
327	if (ct->master && master_ct(ct))
328		ip_conntrack_put(master_ct(ct));
329
330	/* To make sure we don't get any weird locking issues here:
331	 * destroy_conntrack() MUST NOT be called with a write lock
332	 * to ip_conntrack_lock!!! -HW */
333	proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
334	if (proto && proto->destroy)
335		proto->destroy(ct);
336
337	if (ip_conntrack_destroyed)
338		ip_conntrack_destroyed(ct);
339
340	WRITE_LOCK(&ip_conntrack_lock);
341	/* Delete our master expectation */
342	if (ct->master) {
343		/* can't call __unexpect_related here,
344		 * since it would screw up expect_list */
345		list_del(&ct->master->expected_list);
346		kfree(ct->master);
347	}
348	WRITE_UNLOCK(&ip_conntrack_lock);
349
350	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
351	kmem_cache_free(ip_conntrack_cachep, ct);
352	atomic_dec(&ip_conntrack_count);
353}
354
355static void death_by_timeout(unsigned long ul_conntrack)
356{
357	struct ip_conntrack *ct = (void *)ul_conntrack;
358
359	WRITE_LOCK(&ip_conntrack_lock);
360	clean_from_lists(ct);
361	WRITE_UNLOCK(&ip_conntrack_lock);
362	ip_conntrack_put(ct);
363}
364
365static inline int
366conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
367		    const struct ip_conntrack_tuple *tuple,
368		    const struct ip_conntrack *ignored_conntrack)
369{
370	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
371	return i->ctrack != ignored_conntrack
372		&& ip_ct_tuple_equal(tuple, &i->tuple);
373}
374
375static struct ip_conntrack_tuple_hash *
376__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
377		    const struct ip_conntrack *ignored_conntrack)
378{
379	struct ip_conntrack_tuple_hash *h;
380
381	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
382	h = LIST_FIND(&ip_conntrack_hash[hash_conntrack(tuple)],
383		      conntrack_tuple_cmp,
384		      struct ip_conntrack_tuple_hash *,
385		      tuple, ignored_conntrack);
386	return h;
387}
388
389/* Find a connection corresponding to a tuple. */
390struct ip_conntrack_tuple_hash *
391ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
392		      const struct ip_conntrack *ignored_conntrack)
393{
394	struct ip_conntrack_tuple_hash *h;
395
396	READ_LOCK(&ip_conntrack_lock);
397	h = __ip_conntrack_find(tuple, ignored_conntrack);
398	if (h)
399		atomic_inc(&h->ctrack->ct_general.use);
400	READ_UNLOCK(&ip_conntrack_lock);
401
402	return h;
403}
404
405static inline struct ip_conntrack *
406__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
407{
408	struct ip_conntrack *ct
409		= (struct ip_conntrack *)nfct->master;
410
411	/* ctinfo is the index of the nfct inside the conntrack */
412	*ctinfo = nfct - ct->infos;
413	IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
414	return ct;
415}
416
417/* Return conntrack and conntrack_info given skb->nfct->master */
418struct ip_conntrack *
419ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
420{
421	if (skb->nfct)
422		return __ip_conntrack_get(skb->nfct, ctinfo);
423	return NULL;
424}
425
426/* Confirm a connection given skb->nfct; places it in hash table */
427int
428__ip_conntrack_confirm(struct nf_ct_info *nfct)
429{
430	unsigned int hash, repl_hash;
431	struct ip_conntrack *ct;
432	enum ip_conntrack_info ctinfo;
433
434	ct = __ip_conntrack_get(nfct, &ctinfo);
435
436	/* ipt_REJECT uses ip_conntrack_attach to attach related
437	   ICMP/TCP RST packets in other direction.  Actual packet
438	   which created connection will be IP_CT_NEW or for an
439	   expected connection, IP_CT_RELATED. */
440	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
441		return NF_ACCEPT;
442
443	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
444	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
445
446	/* We're not in hash table, and we refuse to set up related
447	   connections for unconfirmed conns.  But packet copies and
448	   REJECT will give spurious warnings here. */
449	/* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
450
451	/* No external references means noone else could have
452           confirmed us. */
453	IP_NF_ASSERT(!is_confirmed(ct));
454	DEBUGP("Confirming conntrack %p\n", ct);
455
456	WRITE_LOCK(&ip_conntrack_lock);
457	/* See if there's one in the list already, including reverse:
458           NAT could have grabbed it without realizing, since we're
459           not in the hash.  If there is, we lost race. */
460	if (!LIST_FIND(&ip_conntrack_hash[hash],
461		       conntrack_tuple_cmp,
462		       struct ip_conntrack_tuple_hash *,
463		       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
464	    && !LIST_FIND(&ip_conntrack_hash[repl_hash],
465			  conntrack_tuple_cmp,
466			  struct ip_conntrack_tuple_hash *,
467			  &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
468//		ct->tuplehash[IP_CT_DIR_ORIGINAL].track.number =1;
469//		ct->tuplehash[IP_CT_DIR_ORIGINAL].track.size = ntohs(skb->nh.iph->tot_len);
470		list_prepend(&ip_conntrack_hash[hash],
471			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
472		list_prepend(&ip_conntrack_hash[repl_hash],
473			     &ct->tuplehash[IP_CT_DIR_REPLY]);
474
475		/* Timer relative to confirmation time, not original
476		   setting time, otherwise we'd get timer wrap in
477		   weird delay cases. */
478		ct->timeout.expires += jiffies;
479		add_timer(&ct->timeout);
480		atomic_inc(&ct->ct_general.use);
481		WRITE_UNLOCK(&ip_conntrack_lock);
482		return NF_ACCEPT;
483	}
484
485	WRITE_UNLOCK(&ip_conntrack_lock);
486	return NF_DROP;
487}
488
489/* Returns true if a connection correspondings to the tuple (required
490   for NAT). */
491int
492ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
493			 const struct ip_conntrack *ignored_conntrack)
494{
495	struct ip_conntrack_tuple_hash *h;
496
497	READ_LOCK(&ip_conntrack_lock);
498	h = __ip_conntrack_find(tuple, ignored_conntrack);
499	READ_UNLOCK(&ip_conntrack_lock);
500
501	return h != NULL;
502}
503
504/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
505struct ip_conntrack *
506icmp_error_track(struct sk_buff *skb,
507		 enum ip_conntrack_info *ctinfo,
508		 unsigned int hooknum)
509{
510	const struct iphdr *iph;
511	struct icmphdr *hdr;
512	struct ip_conntrack_tuple innertuple, origtuple;
513	struct iphdr *inner;
514	size_t datalen;
515	struct ip_conntrack_protocol *innerproto;
516	struct ip_conntrack_tuple_hash *h;
517
518	IP_NF_ASSERT(iph->protocol == IPPROTO_ICMP);
519	IP_NF_ASSERT(skb->nfct == NULL);
520
521	iph = skb->nh.iph;
522	hdr = (struct icmphdr *)((u_int32_t *)iph + iph->ihl);
523	inner = (struct iphdr *)(hdr + 1);
524	datalen = skb->len - iph->ihl*4 - sizeof(*hdr);
525
526	if (skb->len < iph->ihl * 4 + sizeof(*hdr) + sizeof(*iph)) {
527		DEBUGP("icmp_error_track: too short\n");
528		return NULL;
529	}
530
531	if (hdr->type != ICMP_DEST_UNREACH
532	    && hdr->type != ICMP_SOURCE_QUENCH
533	    && hdr->type != ICMP_TIME_EXCEEDED
534	    && hdr->type != ICMP_PARAMETERPROB
535	    && hdr->type != ICMP_REDIRECT)
536		return NULL;
537
538	/* Ignore ICMP's containing fragments (shouldn't happen) */
539	if (inner->frag_off & htons(IP_OFFSET)) {
540		DEBUGP("icmp_error_track: fragment of proto %u\n",
541		       inner->protocol);
542		return NULL;
543	}
544
545	/* Ignore it if the checksum's bogus. */
546	if (ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen)) {
547		DEBUGP("icmp_error_track: bad csum\n");
548		return NULL;
549	}
550
551	innerproto = ip_ct_find_proto(inner->protocol);
552	/* Are they talking about one of our connections? */
553	if (inner->ihl * 4 + 8 > datalen
554	    || !get_tuple(inner, datalen, &origtuple, innerproto)) {
555		DEBUGP("icmp_error: ! get_tuple p=%u (%u*4+%u dlen=%u)\n",
556		       inner->protocol, inner->ihl, 8,
557		       datalen);
558		return NULL;
559	}
560
561	/* Ordinarily, we'd expect the inverted tupleproto, but it's
562	   been preserved inside the ICMP. */
563	if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
564		DEBUGP("icmp_error_track: Can't invert tuple\n");
565		return NULL;
566	}
567
568	*ctinfo = IP_CT_RELATED;
569
570	h = ip_conntrack_find_get(&innertuple, NULL);
571	if (!h) {
572		/* Locally generated ICMPs will match inverted if they
573		   haven't been SNAT'ed yet */
574		if (hooknum == NF_IP_LOCAL_OUT)
575			h = ip_conntrack_find_get(&origtuple, NULL);
576
577		if (!h) {
578			DEBUGP("icmp_error_track: no match\n");
579			return NULL;
580		}
581		/* Reverse direction from that found */
582		if (DIRECTION(h) != IP_CT_DIR_REPLY)
583			*ctinfo += IP_CT_IS_REPLY;
584	} else {
585		if (DIRECTION(h) == IP_CT_DIR_REPLY)
586			*ctinfo += IP_CT_IS_REPLY;
587	}
588
589	/* Update skb to refer to this connection */
590	skb->nfct = &h->ctrack->infos[*ctinfo];
591	return h->ctrack;
592}
593
594/* There's a small race here where we may free a just-assured
595   connection.  Too bad: we're in trouble anyway. */
596static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
597{
598	return !(i->ctrack->status & IPS_ASSURED);
599}
600
601static int early_drop(struct list_head *chain)
602{
603	/* Traverse backwards: gives us oldest, which is roughly LRU */
604	struct ip_conntrack_tuple_hash *h;
605	int dropped = 0;
606
607	READ_LOCK(&ip_conntrack_lock);
608	h = LIST_FIND(chain, unreplied, struct ip_conntrack_tuple_hash *);
609	if (h)
610		atomic_inc(&h->ctrack->ct_general.use);
611	READ_UNLOCK(&ip_conntrack_lock);
612
613	if (!h)
614		return dropped;
615
616	if (del_timer(&h->ctrack->timeout)) {
617		death_by_timeout((unsigned long)h->ctrack);
618		dropped = 1;
619	}
620	ip_conntrack_put(h->ctrack);
621	return dropped;
622}
623
624static inline int helper_cmp(const struct ip_conntrack_helper *i,
625			     const struct ip_conntrack_tuple *rtuple)
626{
627	return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
628}
629
630struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
631{
632	return LIST_FIND(&helpers, helper_cmp,
633			 struct ip_conntrack_helper *,
634			 tuple);
635}
636
637/* Allocate a new conntrack: we return -ENOMEM if classification
638   failed due to stress.  Otherwise it really is unclassifiable. */
639static struct ip_conntrack_tuple_hash *
640init_conntrack(const struct ip_conntrack_tuple *tuple,
641	       struct ip_conntrack_protocol *protocol,
642	       struct sk_buff *skb)
643{
644	struct ip_conntrack *conntrack;
645	struct ip_conntrack_tuple repl_tuple;
646	size_t hash, repl_hash;
647	struct ip_conntrack_expect *expected;
648	int i;
649	static unsigned int drop_next = 0;
650
651	hash = hash_conntrack(tuple);
652
653	if (ip_conntrack_max &&
654	    atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
655		/* Try dropping from random chain, or else from the
656                   chain about to put into (in case they're trying to
657                   bomb one hash chain). */
658		unsigned int next = (drop_next++)%ip_conntrack_htable_size;
659
660		if (!early_drop(&ip_conntrack_hash[next])
661		    && !early_drop(&ip_conntrack_hash[hash])) {
662			if (net_ratelimit())
663				printk(KERN_WARNING
664				       "ip_conntrack: table full, dropping"
665				       " packet.\n");
666			return ERR_PTR(-ENOMEM);
667		}
668	}
669
670	if (!invert_tuple(&repl_tuple, tuple, protocol)) {
671		DEBUGP("Can't invert tuple.\n");
672		return NULL;
673	}
674	repl_hash = hash_conntrack(&repl_tuple);
675
676	conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
677	if (!conntrack) {
678		DEBUGP("Can't allocate conntrack.\n");
679		return ERR_PTR(-ENOMEM);
680	}
681
682	memset(conntrack, 0, sizeof(struct ip_conntrack));
683	atomic_set(&conntrack->ct_general.use, 1);
684	conntrack->ct_general.destroy = destroy_conntrack;
685	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
686	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
687	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
688	conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
689
690	/*add by Angela */
691	/*Initial the flag, number and length of the connect track, when create a connect track first time */
692	//if(nvram_match("qos_enable", "1"))
693	if(qos_enable == 1) // 2009.12 James.
694	{
695		track_flag = 1;
696		//ipaddr = atoi(nvram_safe_get("wan_ipaddr_t")); // 2009.12 James.
697		conntrack->tuplehash[IP_CT_DIR_ORIGINAL].track.flag = 0;
698		conntrack->tuplehash[IP_CT_DIR_ORIGINAL].track.number =1;
699		conntrack->tuplehash[IP_CT_DIR_ORIGINAL].track.length[0] = ntohs(skb->nh.iph->tot_len);
700	}
701	else
702		track_flag = 0;
703
704	for (i=0; i < IP_CT_NUMBER; i++)
705		conntrack->infos[i].master = &conntrack->ct_general;
706
707	if (!protocol->new(conntrack, skb->nh.iph, skb->len)) {
708		kmem_cache_free(ip_conntrack_cachep, conntrack);
709		return NULL;
710	}
711	/* Don't set timer yet: wait for confirmation */
712	init_timer(&conntrack->timeout);
713	conntrack->timeout.data = (unsigned long)conntrack;
714	conntrack->timeout.function = death_by_timeout;
715
716	INIT_LIST_HEAD(&conntrack->sibling_list);
717
718	/* Mark clearly that it's not in the hash table. */
719	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list.next = NULL;
720
721	WRITE_LOCK(&ip_conntrack_lock);
722	/* Need finding and deleting of expected ONLY if we win race */
723	READ_LOCK(&ip_conntrack_expect_tuple_lock);
724	expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
725			     struct ip_conntrack_expect *, tuple);
726	READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
727
728	/* Look up the conntrack helper for master connections only */
729	if (!expected)
730		conntrack->helper = ip_ct_find_helper(&repl_tuple);
731
732	/* If the expectation is dying, then this is a looser. */
733	if (expected
734	    && expected->expectant->helper
735	    && expected->expectant->helper->timeout
736	    && ! del_timer(&expected->timeout))
737		expected = NULL;
738
739	/* If master is not in hash table yet (ie. packet hasn't left
740	   this machine yet), how can other end know about expected?
741	   Hence these are not the droids you are looking for (if
742	   master ct never got confirmed, we'd hold a reference to it
743	   and weird things would happen to future packets). */
744	if (expected && is_confirmed(expected->expectant)) {
745		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
746			conntrack, expected);
747		/* Welcome, Mr. Bond.  We've been expecting you... */
748		IP_NF_ASSERT(master_ct(conntrack));
749		conntrack->status = IPS_EXPECTED;
750		conntrack->master = expected;
751		expected->sibling = conntrack;
752		LIST_DELETE(&ip_conntrack_expect_list, expected);
753		INIT_LIST_HEAD(&expected->list);
754		expected->expectant->expecting--;
755		nf_conntrack_get(&master_ct(conntrack)->infos[0]);
756	}
757	atomic_inc(&ip_conntrack_count);
758	WRITE_UNLOCK(&ip_conntrack_lock);
759
760	if (expected && expected->expectfn)
761		expected->expectfn(conntrack);
762
763//	track[hash].number = 1;
764//	track[hash].size = ntohs(skb->nh.iph->tot_len);
765
766	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
767}
768
769#define isdigit(c) (c >= '0' && c <= '9')
770 __inline unsigned int atoi(const char *s)
771{
772         int i=0,j,k=0;
773	 for(j=1; j<5; j++) {
774		 i=0;
775	         while (isdigit(*s)) {
776        	         i = i*10 + *(s++) - '0';
777		}
778		 k = k*256 +i;
779		 if(j == 4)
780			 return k;
781		 s++;
782	}
783         return k;
784}
785
786//On success, returns h->track.flags & IP_TRACK_MARK
787inline int deal_track(struct ip_conntrack_tuple_hash *h, int len)
788{
789	struct ip_conntrack_tuple_hash *rep_h;
790	int i, org_len =0, rep_len = 0;
791
792	// Add the packet number of this connect track and record the length of the packet
793	h->track.number ++;
794	h->track.length[(h->track.number-1) % IP_TRACK_MAX] = len;
795	if((h->track.number >IP_TRACK_MAX) && !(h->track.flag & IP_TRACK_FULL))
796		h->track.flag |= IP_TRACK_FULL;
797
798	// The download packet set the IP_TRACK_DOWN flag
799	//if(ntohl(h->tuple.dst.ip) == ipaddr)
800	if(ntohl(h->tuple.dst.ip) == qos_wan_ip) // 2009.12 James.
801	       h->track.flag |= IP_TRACK_DOWN;
802
803	// if the destination port of this connect track is one of 80,8080,443.We return IP_TRACK_PORT
804	if((h->track.flag & IP_TRACK_PORT) == IP_TRACK_PORT)
805		return IP_TRACK_PORT;
806
807	// if the connect track is data connect ,we return IP_TRACK_DATA
808	if((h->track.flag & IP_TRACK_DATA) == IP_TRACK_DATA)
809			return IP_TRACK_DATA;
810
811	// if the packet number is larger than the size what we want to compare, return 0
812	if(h->track.number > IP_TRACK_COMPARE)
813		return 0;
814
815	// we just compare datas form the ORIGINAL direction	start compare
816	if(DIRECTION(h) == IP_CT_DIR_REPLY)
817		rep_h = &h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL];
818	else
819		rep_h = &h->ctrack->tuplehash[IP_CT_DIR_REPLY];
820	if(!rep_h)
821		return 0;
822
823	// If the packet number reaches the size what we want to compare, we compare
824	if((h->track.number & IP_TRACK_FULL) && (rep_h->track.flag & IP_TRACK_FULL)) {
825		for(i = 0; i < IP_TRACK_MAX; i++) {
826			org_len += h->track.length[i];
827			rep_len += rep_h->track.length[i];
828		}
829
830		//compare for data
831		if(org_len > 512*IP_TRACK_MAX || rep_len > 512*IP_TRACK_MAX ) {
832			//compare for port
833			if(ntohs(h->tuple.dst.u.all) == 80 ||
834		  	 ntohs(h->tuple.dst.u.all) == 8080 ||
835		  	 ntohs(h->tuple.dst.u.all) == 443 ||
836		  	 ntohs(h->tuple.src.u.all) == 80 ||
837		  	 ntohs(h->tuple.src.u.all) == 8080 ||
838		  	 ntohs(h->tuple.src.u.all) == 443) {
839		  		h->track.flag |= IP_TRACK_PORT;
840				rep_h->track.flag |= IP_TRACK_PORT;
841				return IP_TRACK_PORT;
842			}
843			h->track.flag |= IP_TRACK_DATA;
844			rep_h->track.flag |= IP_TRACK_DATA;
845			return IP_TRACK_DATA;
846		}
847	}
848	if((h->track.flag & IP_TRACK_DOWN) == IP_TRACK_DOWN)
849		return IP_TRACK_DOWN;
850
851	return 0;
852}
853
854/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
855static inline struct ip_conntrack *
856resolve_normal_ct(struct sk_buff *skb,
857		  struct ip_conntrack_protocol *proto,
858		  int *set_reply,
859		  unsigned int hooknum,
860		  enum ip_conntrack_info *ctinfo)
861{
862	struct ip_conntrack_tuple tuple;
863	struct ip_conntrack_tuple_hash *h;
864
865	IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
866
867	if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto))
868		return NULL;
869
870	/* look for tuple match */
871	h = ip_conntrack_find_get(&tuple, NULL);
872	if (!h) {
873		h = init_conntrack(&tuple, proto, skb);
874		if (!h)
875			return NULL;
876		if (IS_ERR(h))
877			return (void *)h;
878	}
879	/*add by Angela */
880	else if(track_flag == 1) {
881
882		switch(deal_track(h, ntohs(skb->nh.iph->tot_len))) {
883			case IP_TRACK_DATA:
884				if ((h->track.flag & IP_TRACK_DOWN) == IP_TRACK_DOWN)
885					skb->nfmark = 90;
886				else
887					skb->nfmark = 50;
888				break;
889			case IP_TRACK_PORT:
890				if ((h->track.flag & IP_TRACK_DOWN) == IP_TRACK_DOWN)
891					skb->nfmark = 80;
892				else
893					skb->nfmark = 20;
894				break;
895			case IP_TRACK_DOWN:
896				skb->nfmark = 70;
897				break;
898		}
899	}
900
901	/* It exists; we have (non-exclusive) reference. */
902	if (DIRECTION(h) == IP_CT_DIR_REPLY) {
903		*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
904		/* Please set reply bit if this packet OK */
905		*set_reply = 1;
906	} else {
907		/* Once we've had two way comms, always ESTABLISHED. */
908		if (h->ctrack->status & IPS_SEEN_REPLY) {
909			DEBUGP("ip_conntrack_in: normal packet for %p\n",
910			       h->ctrack);
911		        *ctinfo = IP_CT_ESTABLISHED;
912		} else if (h->ctrack->status & IPS_EXPECTED) {
913			DEBUGP("ip_conntrack_in: related packet for %p\n",
914			       h->ctrack);
915			*ctinfo = IP_CT_RELATED;
916		} else {
917			DEBUGP("ip_conntrack_in: new packet for %p\n",
918			       h->ctrack);
919			*ctinfo = IP_CT_NEW;
920		}
921		*set_reply = 0;
922	}
923
924	skb->nfct = &h->ctrack->infos[*ctinfo];
925	return h->ctrack;
926}
927
928/* Netfilter hook itself. */
929unsigned int ip_conntrack_in(unsigned int hooknum,
930			     struct sk_buff **pskb,
931			     const struct net_device *in,
932			     const struct net_device *out,
933			     int (*okfn)(struct sk_buff *))
934{
935	struct ip_conntrack *ct;
936	enum ip_conntrack_info ctinfo;
937	struct ip_conntrack_protocol *proto;
938	int set_reply;
939	int ret;
940
941	(*pskb)->nfcache |= NFC_UNKNOWN;
942
943/* Doesn't cover locally-generated broadcast, so not worth it. */
944
945	/* Previously seen (loopback)?  Ignore.  Do this before
946           fragment check. */
947	if ((*pskb)->nfct)
948		return NF_ACCEPT;
949
950	/* Gather fragments. */
951	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
952		*pskb = ip_ct_gather_frags(*pskb);
953		if (!*pskb)
954			return NF_STOLEN;
955	}
956
957	proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
958
959	/* It may be an icmp error... */
960	if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
961	    && icmp_error_track(*pskb, &ctinfo, hooknum))
962		return NF_ACCEPT;
963
964	if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
965		/* Not valid part of a connection */
966		return NF_ACCEPT;
967
968	if (IS_ERR(ct))
969		/* Too stressed to deal. */
970		return NF_DROP;
971
972	IP_NF_ASSERT((*pskb)->nfct);
973
974	ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo);
975	if (ret == -1) {
976		/* Invalid */
977		nf_conntrack_put((*pskb)->nfct);
978		(*pskb)->nfct = NULL;
979		return NF_ACCEPT;
980	}
981
982	if (ret != NF_DROP && ct->helper) {
983		ret = ct->helper->help((*pskb)->nh.iph, (*pskb)->len,
984				       ct, ctinfo);
985		if (ret == -1) {
986			/* Invalid */
987			nf_conntrack_put((*pskb)->nfct);
988			(*pskb)->nfct = NULL;
989			return NF_ACCEPT;
990		}
991	}
992	if (set_reply)
993		set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
994
995	return ret;
996}
997
998int invert_tuplepr(struct ip_conntrack_tuple *inverse,
999		   const struct ip_conntrack_tuple *orig)
1000{
1001	return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
1002}
1003
1004static inline int resent_expect(const struct ip_conntrack_expect *i,
1005			        const struct ip_conntrack_tuple *tuple,
1006			        const struct ip_conntrack_tuple *mask)
1007{
1008	DEBUGP("resent_expect\n");
1009	DEBUGP("   tuple:   "); DUMP_TUPLE(&i->tuple);
1010	DEBUGP("ct_tuple:   "); DUMP_TUPLE(&i->ct_tuple);
1011	DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
1012	return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
1013	         || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
1014		&& ip_ct_tuple_equal(&i->mask, mask));
1015}
1016
1017/* Would two expected things clash? */
1018static inline int expect_clash(const struct ip_conntrack_expect *i,
1019			       const struct ip_conntrack_tuple *tuple,
1020			       const struct ip_conntrack_tuple *mask)
1021{
1022	/* Part covered by intersection of masks must be unequal,
1023           otherwise they clash */
1024	struct ip_conntrack_tuple intersect_mask
1025		= { { i->mask.src.ip & mask->src.ip,
1026		      { i->mask.src.u.all & mask->src.u.all } },
1027		    { i->mask.dst.ip & mask->dst.ip,
1028		      { i->mask.dst.u.all & mask->dst.u.all },
1029		      i->mask.dst.protonum & mask->dst.protonum } };
1030
1031	return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
1032}
1033
1034inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
1035{
1036	WRITE_LOCK(&ip_conntrack_lock);
1037	unexpect_related(expect);
1038	WRITE_UNLOCK(&ip_conntrack_lock);
1039}
1040
1041static void expectation_timed_out(unsigned long ul_expect)
1042{
1043	struct ip_conntrack_expect *expect = (void *) ul_expect;
1044
1045	DEBUGP("expectation %p timed out\n", expect);
1046	WRITE_LOCK(&ip_conntrack_lock);
1047	__unexpect_related(expect);
1048	WRITE_UNLOCK(&ip_conntrack_lock);
1049}
1050
1051/* Add a related connection. */
1052int ip_conntrack_expect_related(struct ip_conntrack *related_to,
1053				struct ip_conntrack_expect *expect)
1054{
1055	struct ip_conntrack_expect *old, *new;
1056	int ret = 0;
1057
1058	WRITE_LOCK(&ip_conntrack_lock);
1059	/* Because of the write lock, no reader can walk the lists,
1060	 * so there is no need to use the tuple lock too */
1061
1062	DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1063	DEBUGP("tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1064	DEBUGP("mask:  "); DUMP_TUPLE_RAW(&expect->mask);
1065
1066	old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
1067		        struct ip_conntrack_expect *, &expect->tuple,
1068			&expect->mask);
1069	if (old) {
1070		/* Helper private data may contain offsets but no pointers
1071		   pointing into the payload - otherwise we should have to copy
1072		   the data filled out by the helper over the old one */
1073		DEBUGP("expect_related: resent packet\n");
1074		if (related_to->helper &&
1075		    related_to->helper->timeout) {
1076			if (!del_timer(&old->timeout)) {
1077				/* expectation is dying. Fall through */
1078				old = NULL;
1079			} else {
1080				old->timeout.expires = jiffies +
1081					related_to->helper->timeout * HZ;
1082				add_timer(&old->timeout);
1083			}
1084		}
1085
1086		if (old) {
1087			WRITE_UNLOCK(&ip_conntrack_lock);
1088			return -EEXIST;
1089		}
1090	} else if (related_to->helper &&
1091		   related_to->helper->max_expected &&
1092		   related_to->expecting >= related_to->helper->max_expected) {
1093		struct list_head *cur_item;
1094		/* old == NULL */
1095		if (!(related_to->helper->flags &
1096		      IP_CT_HELPER_F_REUSE_EXPECT)) {
1097			WRITE_UNLOCK(&ip_conntrack_lock);
1098 		    	if (net_ratelimit())
1099 			    	printk(KERN_WARNING
1100				       "ip_conntrack: max number of expected "
1101				       "connections %i of %s reached for "
1102				       "%u.%u.%u.%u->%u.%u.%u.%u\n",
1103				       related_to->helper->max_expected,
1104				       related_to->helper->name,
1105 		    	       	       NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1106 		    	       	       NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1107			return -EPERM;
1108		}
1109		DEBUGP("ip_conntrack: max number of expected "
1110		       "connections %i of %s reached for "
1111		       "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
1112 		       related_to->helper->max_expected,
1113		       related_to->helper->name,
1114		       NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
1115		       NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
1116
1117		/* choose the the oldest expectation to evict */
1118		list_for_each(cur_item, &related_to->sibling_list) {
1119			struct ip_conntrack_expect *cur;
1120
1121			cur = list_entry(cur_item,
1122					 struct ip_conntrack_expect,
1123					 expected_list);
1124			if (cur->sibling == NULL) {
1125				old = cur;
1126				break;
1127			}
1128		}
1129
1130		/* (!old) cannot happen, since related_to->expecting is the
1131		 * number of unconfirmed expects */
1132		IP_NF_ASSERT(old);
1133
1134		/* newnat14 does not reuse the real allocated memory
1135		 * structures but rather unexpects the old and
1136		 * allocates a new.  unexpect_related will decrement
1137		 * related_to->expecting.
1138		 */
1139		unexpect_related(old);
1140		ret = -EPERM;
1141	} else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1142			     struct ip_conntrack_expect *, &expect->tuple,
1143			     &expect->mask)) {
1144		WRITE_UNLOCK(&ip_conntrack_lock);
1145		DEBUGP("expect_related: busy!\n");
1146		return -EBUSY;
1147	}
1148
1149	new = (struct ip_conntrack_expect *)
1150	      kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
1151	if (!new) {
1152		WRITE_UNLOCK(&ip_conntrack_lock);
1153		DEBUGP("expect_relaed: OOM allocating expect\n");
1154		return -ENOMEM;
1155	}
1156
1157	/* Zero out the new structure, then fill out it with the data */
1158	DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
1159	memset(new, 0, sizeof(*expect));
1160	INIT_LIST_HEAD(&new->list);
1161	INIT_LIST_HEAD(&new->expected_list);
1162	memcpy(new, expect, sizeof(*expect));
1163	new->expectant = related_to;
1164	new->sibling = NULL;
1165	/* increase usage count. This sucks. The memset above overwrites
1166	 * old usage count [if still present] and we increase to one.  Only
1167	 * works because everything is done under ip_conntrack_lock() */
1168	atomic_inc(&new->use);
1169
1170	/* add to expected list for this connection */
1171	list_add(&new->expected_list, &related_to->sibling_list);
1172	/* add to global list of expectations */
1173	list_prepend(&ip_conntrack_expect_list, &new->list);
1174	/* add and start timer if required */
1175	if (related_to->helper &&
1176	    related_to->helper->timeout) {
1177		init_timer(&new->timeout);
1178		new->timeout.data = (unsigned long)new;
1179		new->timeout.function = expectation_timed_out;
1180		new->timeout.expires = jiffies +
1181					related_to->helper->timeout * HZ;
1182		add_timer(&new->timeout);
1183	}
1184	related_to->expecting++;
1185
1186	WRITE_UNLOCK(&ip_conntrack_lock);
1187
1188	return ret;
1189}
1190
1191/* Change tuple in an existing expectation */
1192int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
1193			       struct ip_conntrack_tuple *newtuple)
1194{
1195	int ret;
1196
1197	MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1198	WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
1199	DEBUGP("change_expect:\n");
1200	DEBUGP("exp tuple: "); DUMP_TUPLE_RAW(&expect->tuple);
1201	DEBUGP("exp mask:  "); DUMP_TUPLE_RAW(&expect->mask);
1202	DEBUGP("newtuple:  "); DUMP_TUPLE_RAW(newtuple);
1203	if (expect->ct_tuple.dst.protonum == 0) {
1204		/* Never seen before */
1205		DEBUGP("change expect: never seen before\n");
1206		if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
1207		    && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
1208			         struct ip_conntrack_expect *, newtuple, &expect->mask)) {
1209			/* Force NAT to find an unused tuple */
1210			ret = -1;
1211		} else {
1212			memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
1213			memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
1214			ret = 0;
1215		}
1216	} else {
1217		/* Resent packet */
1218		DEBUGP("change expect: resent packet\n");
1219		if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
1220			ret = 0;
1221		} else {
1222			/* Force NAT to choose again the same port */
1223			ret = -1;
1224		}
1225	}
1226	WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
1227
1228	return ret;
1229}
1230
1231/* Alter reply tuple (maybe alter helper).  If it's already taken,
1232   return 0 and don't do alteration. */
1233int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1234			     const struct ip_conntrack_tuple *newreply)
1235{
1236	WRITE_LOCK(&ip_conntrack_lock);
1237	if (__ip_conntrack_find(newreply, conntrack)) {
1238		WRITE_UNLOCK(&ip_conntrack_lock);
1239		return 0;
1240	}
1241	/* Should be unconfirmed, so not in hash table yet */
1242	IP_NF_ASSERT(!is_confirmed(conntrack));
1243
1244	DEBUGP("Altering reply tuple of %p to ", conntrack);
1245	DUMP_TUPLE(newreply);
1246
1247	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1248	if (!conntrack->master)
1249		conntrack->helper = LIST_FIND(&helpers, helper_cmp,
1250					      struct ip_conntrack_helper *,
1251					      newreply);
1252	WRITE_UNLOCK(&ip_conntrack_lock);
1253
1254	return 1;
1255}
1256
1257int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1258{
1259	MOD_INC_USE_COUNT;
1260
1261	WRITE_LOCK(&ip_conntrack_lock);
1262	list_prepend(&helpers, me);
1263	WRITE_UNLOCK(&ip_conntrack_lock);
1264
1265	return 0;
1266}
1267
1268static inline int unhelp(struct ip_conntrack_tuple_hash *i,
1269			 const struct ip_conntrack_helper *me)
1270{
1271	if (i->ctrack->helper == me) {
1272		/* Get rid of any expected. */
1273		remove_expectations(i->ctrack);
1274		/* And *then* set helper to NULL */
1275		i->ctrack->helper = NULL;
1276	}
1277	return 0;
1278}
1279
1280void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1281{
1282	unsigned int i;
1283
1284	/* Need write lock here, to delete helper. */
1285	WRITE_LOCK(&ip_conntrack_lock);
1286	LIST_DELETE(&helpers, me);
1287
1288	/* Get rid of expecteds, set helpers to NULL. */
1289	for (i = 0; i < ip_conntrack_htable_size; i++)
1290		LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
1291			    struct ip_conntrack_tuple_hash *, me);
1292	WRITE_UNLOCK(&ip_conntrack_lock);
1293
1294	/* Someone could be still looking at the helper in a bh. */
1295	br_write_lock_bh(BR_NETPROTO_LOCK);
1296	br_write_unlock_bh(BR_NETPROTO_LOCK);
1297
1298	MOD_DEC_USE_COUNT;
1299}
1300
1301/* Refresh conntrack for this many jiffies. */
1302void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
1303{
1304	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1305
1306	WRITE_LOCK(&ip_conntrack_lock);
1307	/* If not in hash table, timer will not be active yet */
1308	if (!is_confirmed(ct))
1309		ct->timeout.expires = extra_jiffies;
1310	else {
1311		/* Need del_timer for race avoidance (may already be dying). */
1312		if (del_timer(&ct->timeout)) {
1313			ct->timeout.expires = jiffies + extra_jiffies;
1314			add_timer(&ct->timeout);
1315		}
1316	}
1317	WRITE_UNLOCK(&ip_conntrack_lock);
1318}
1319
1320/* Returns new sk_buff, or NULL */
1321struct sk_buff *
1322ip_ct_gather_frags(struct sk_buff *skb)
1323{
1324	struct sock *sk = skb->sk;
1325#ifdef CONFIG_NETFILTER_DEBUG
1326	unsigned int olddebug = skb->nf_debug;
1327#endif
1328	if (sk) {
1329		sock_hold(sk);
1330		skb_orphan(skb);
1331	}
1332
1333	local_bh_disable();
1334	skb = ip_defrag(skb);
1335	local_bh_enable();
1336
1337	if (!skb) {
1338		if (sk) sock_put(sk);
1339		return skb;
1340	} else if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
1341		kfree_skb(skb);
1342		if (sk) sock_put(sk);
1343		return NULL;
1344	}
1345
1346	if (sk) {
1347		skb_set_owner_w(skb, sk);
1348		sock_put(sk);
1349	}
1350
1351	ip_send_check(skb->nh.iph);
1352	skb->nfcache |= NFC_ALTERED;
1353#ifdef CONFIG_NETFILTER_DEBUG
1354	/* Packet path as if nothing had happened. */
1355	skb->nf_debug = olddebug;
1356#endif
1357	return skb;
1358}
1359
1360/* Used by ipt_REJECT. */
1361static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
1362{
1363	struct ip_conntrack *ct;
1364	enum ip_conntrack_info ctinfo;
1365
1366	ct = __ip_conntrack_get(nfct, &ctinfo);
1367
1368	/* This ICMP is in reverse direction to the packet which
1369           caused it */
1370	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1371		ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1372	else
1373		ctinfo = IP_CT_RELATED;
1374
1375	/* Attach new skbuff, and increment count */
1376	nskb->nfct = &ct->infos[ctinfo];
1377	atomic_inc(&ct->ct_general.use);
1378}
1379
1380static inline int
1381do_kill(const struct ip_conntrack_tuple_hash *i,
1382	int (*kill)(const struct ip_conntrack *i, void *data),
1383	void *data)
1384{
1385	return kill(i->ctrack, data);
1386}
1387
1388/* Bring out ya dead! */
1389static struct ip_conntrack_tuple_hash *
1390get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
1391		void *data)
1392{
1393	struct ip_conntrack_tuple_hash *h = NULL;
1394	unsigned int i;
1395
1396	READ_LOCK(&ip_conntrack_lock);
1397	for (i = 0; !h && i < ip_conntrack_htable_size; i++) {
1398		h = LIST_FIND(&ip_conntrack_hash[i], do_kill,
1399			      struct ip_conntrack_tuple_hash *, kill, data);
1400	}
1401	if (h)
1402		atomic_inc(&h->ctrack->ct_general.use);
1403	READ_UNLOCK(&ip_conntrack_lock);
1404
1405	return h;
1406}
1407
1408void
1409ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
1410			void *data)
1411{
1412	struct ip_conntrack_tuple_hash *h;
1413
1414	/* This is order n^2, by the way. */
1415	while ((h = get_next_corpse(kill, data)) != NULL) {
1416		/* Time to push up daises... */
1417		if (del_timer(&h->ctrack->timeout))
1418			death_by_timeout((unsigned long)h->ctrack);
1419		/* ... else the timer will get him soon. */
1420
1421		ip_conntrack_put(h->ctrack);
1422	}
1423}
1424
1425/* Fast function for those who don't want to parse /proc (and I don't
1426   blame them). */
1427/* Reversing the socket's dst/src point of view gives us the reply
1428   mapping. */
1429static int
1430getorigdst(struct sock *sk, int optval, void *user, int *len)
1431{
1432	struct ip_conntrack_tuple_hash *h;
1433	struct ip_conntrack_tuple tuple = { { sk->rcv_saddr, { sk->sport } },
1434					    { sk->daddr, { sk->dport },
1435					      IPPROTO_TCP } };
1436
1437	/* We only do TCP at the moment: is there a better way? */
1438	if (strcmp(sk->prot->name, "TCP") != 0) {
1439		DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1440		return -ENOPROTOOPT;
1441	}
1442
1443	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1444		DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1445		       *len, sizeof(struct sockaddr_in));
1446		return -EINVAL;
1447	}
1448
1449	h = ip_conntrack_find_get(&tuple, NULL);
1450	if (h) {
1451		struct sockaddr_in sin;
1452
1453		sin.sin_family = AF_INET;
1454		sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1455			.tuple.dst.u.tcp.port;
1456		sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
1457			.tuple.dst.ip;
1458
1459		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1460		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1461		ip_conntrack_put(h->ctrack);
1462		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1463			return -EFAULT;
1464		else
1465			return 0;
1466	}
1467	DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1468	       NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1469	       NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1470	return -ENOENT;
1471}
1472
1473static struct nf_sockopt_ops so_getorigdst
1474= { { NULL, NULL }, PF_INET,
1475    0, 0, NULL, /* Setsockopts */
1476    SO_ORIGINAL_DST, SO_ORIGINAL_DST+1, &getorigdst,
1477    0, NULL };
1478
1479#define NET_IP_CONNTRACK_MAX 2089
1480#define NET_IP_CONNTRACK_MAX_NAME "ip_conntrack_max"
1481
1482#ifdef CONFIG_SYSCTL
1483static struct ctl_table_header *ip_conntrack_sysctl_header;
1484
1485static ctl_table ip_conntrack_table[] = {
1486	{ NET_IP_CONNTRACK_MAX, NET_IP_CONNTRACK_MAX_NAME, &ip_conntrack_max,
1487		sizeof(ip_conntrack_max), 0644,  NULL, proc_dointvec },
1488 	{ 0 }
1489};
1490
1491static ctl_table ip_conntrack_dir_table[] = {
1492	{NET_IPV4, "ipv4", NULL, 0, 0555, ip_conntrack_table, 0, 0, 0, 0, 0},
1493	{ 0 }
1494};
1495
1496static ctl_table ip_conntrack_root_table[] = {
1497	{CTL_NET, "net", NULL, 0, 0555, ip_conntrack_dir_table, 0, 0, 0, 0, 0},
1498	{ 0 }
1499};
1500#endif /*CONFIG_SYSCTL*/
1501
1502static int kill_all(const struct ip_conntrack *i, void *data)
1503{
1504	return 1;
1505}
1506
1507/* Mishearing the voices in his head, our hero wonders how he's
1508   supposed to kill the mall. */
1509void ip_conntrack_cleanup(void)
1510{
1511#ifdef CONFIG_SYSCTL
1512	unregister_sysctl_table(ip_conntrack_sysctl_header);
1513#endif
1514	ip_ct_attach = NULL;
1515	/* This makes sure all current packets have passed through
1516           netfilter framework.  Roll on, two-stage module
1517           delete... */
1518	br_write_lock_bh(BR_NETPROTO_LOCK);
1519	br_write_unlock_bh(BR_NETPROTO_LOCK);
1520
1521 i_see_dead_people:
1522	ip_ct_selective_cleanup(kill_all, NULL);
1523	if (atomic_read(&ip_conntrack_count) != 0) {
1524		schedule();
1525		goto i_see_dead_people;
1526	}
1527
1528	kmem_cache_destroy(ip_conntrack_cachep);
1529	vfree(ip_conntrack_hash);
1530	nf_unregister_sockopt(&so_getorigdst);
1531}
1532
1533static int hashsize = 0;
1534MODULE_PARM(hashsize, "i");
1535
1536int __init ip_conntrack_init(void)
1537{
1538	unsigned int i;
1539	int ret;
1540
1541	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
1542	 * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
1543 	if (hashsize) {
1544 		ip_conntrack_htable_size = hashsize;
1545 	} else {
1546		ip_conntrack_htable_size
1547			= (((num_physpages << PAGE_SHIFT) / 16384)
1548			   / sizeof(struct list_head));
1549		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1550			ip_conntrack_htable_size = 8192;
1551		if (ip_conntrack_htable_size < 16)
1552			ip_conntrack_htable_size = 16;
1553	}
1554	ip_conntrack_max = 8 * ip_conntrack_htable_size;
1555
1556	printk("ip_conntrack version %s (%u buckets, %d max)"
1557	       " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION,
1558	       ip_conntrack_htable_size, ip_conntrack_max,
1559	       sizeof(struct ip_conntrack));
1560
1561	ret = nf_register_sockopt(&so_getorigdst);
1562	if (ret != 0) {
1563		printk(KERN_ERR "Unable to register netfilter socket option\n");
1564		return ret;
1565	}
1566
1567	ip_conntrack_hash = vmalloc(sizeof(struct list_head)
1568				    * ip_conntrack_htable_size);
1569	if (!ip_conntrack_hash) {
1570		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1571		goto err_unreg_sockopt;
1572	}
1573
1574	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1575	                                        sizeof(struct ip_conntrack), 0,
1576	                                        SLAB_HWCACHE_ALIGN, NULL, NULL);
1577	if (!ip_conntrack_cachep) {
1578		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1579		goto err_free_hash;
1580	}
1581	/* Don't NEED lock here, but good form anyway. */
1582	WRITE_LOCK(&ip_conntrack_lock);
1583	/* Sew in builtin protocols. */
1584	list_append(&protocol_list, &ip_conntrack_protocol_tcp);
1585	list_append(&protocol_list, &ip_conntrack_protocol_udp);
1586	list_append(&protocol_list, &ip_conntrack_protocol_icmp);
1587	WRITE_UNLOCK(&ip_conntrack_lock);
1588
1589	for (i = 0; i < ip_conntrack_htable_size; i++)
1590		INIT_LIST_HEAD(&ip_conntrack_hash[i]);
1591
1592/* This is fucking braindead.  There is NO WAY of doing this without
1593   the CONFIG_SYSCTL unless you don't want to detect errors.
1594   Grrr... --RR */
1595#ifdef CONFIG_SYSCTL
1596	ip_conntrack_sysctl_header
1597		= register_sysctl_table(ip_conntrack_root_table, 0);
1598	if (ip_conntrack_sysctl_header == NULL) {
1599		goto err_free_ct_cachep;
1600	}
1601#endif /*CONFIG_SYSCTL*/
1602
1603	/* For use by ipt_REJECT */
1604	ip_ct_attach = ip_conntrack_attach;
1605//	do_gettimeofday(&tv);
1606//	do_gettimeofday(&tp);
1607	return ret;
1608
1609err_free_ct_cachep:
1610	kmem_cache_destroy(ip_conntrack_cachep);
1611err_free_hash:
1612	vfree(ip_conntrack_hash);
1613err_unreg_sockopt:
1614	nf_unregister_sockopt(&so_getorigdst);
1615
1616	return -ENOMEM;
1617}
1618