1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/timer.h>
11#include <linux/netfilter.h>
12#include <linux/module.h>
13#include <linux/in.h>
14#include <linux/tcp.h>
15#include <linux/spinlock.h>
16#include <linux/skbuff.h>
17#include <linux/ipv6.h>
18#include <net/ip6_checksum.h>
19
20#include <net/tcp.h>
21
22#include <linux/netfilter.h>
23#include <linux/netfilter_ipv4.h>
24#include <linux/netfilter_ipv6.h>
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_l4proto.h>
27#include <net/netfilter/nf_conntrack_ecache.h>
28
29#define DEBUGP(format, args...)
30
31#ifdef HNDCTF
32#include <ctf/hndctf.h>
33extern int ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout);
34#endif /* HNDCTF */
35
36/* Protects conntrack->proto.tcp */
37static DEFINE_RWLOCK(tcp_lock);
38
39/* "Be conservative in what you do,
40    be liberal in what you accept from others."
41    If it's non-zero, we mark only out of window RST segments as INVALID. */
42static int nf_ct_tcp_be_liberal __read_mostly = 0;
43
44/* If it is set to zero, we disable picking up already established
45   connections. */
46static int nf_ct_tcp_loose __read_mostly = 1;
47
48/* Max number of the retransmitted packets without receiving an (acceptable)
49   ACK from the destination. If this number is reached, a shorter timer
50   will be started. */
51static int nf_ct_tcp_max_retrans __read_mostly = 3;
52
53
54static const char *tcp_conntrack_names[] = {
55	"NONE",
56	"SYN_SENT",
57	"SYN_RECV",
58	"ESTABLISHED",
59	"FIN_WAIT",
60	"CLOSE_WAIT",
61	"LAST_ACK",
62	"TIME_WAIT",
63	"CLOSE",
64	"LISTEN"
65};
66
67#define SECS * HZ
68#define MINS * 60 SECS
69#define HOURS * 60 MINS
70#define DAYS * 24 HOURS
71
72static unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
73static unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
74static unsigned int nf_ct_tcp_timeout_established __read_mostly =   5 DAYS;
75static unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
76static unsigned int nf_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
77static unsigned int nf_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
78static unsigned int nf_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
79static unsigned int nf_ct_tcp_timeout_close __read_mostly =        10 SECS;
80
81/* RFC1122 says the R2 limit should be at least 100 seconds.
82   Linux uses 15 packets as limit, which corresponds
83   to ~13-30min depending on RTO. */
84static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
85
86static unsigned int * tcp_timeouts[] = {
87    NULL,                              /* TCP_CONNTRACK_NONE */
88    &nf_ct_tcp_timeout_syn_sent,       /* TCP_CONNTRACK_SYN_SENT, */
89    &nf_ct_tcp_timeout_syn_recv,       /* TCP_CONNTRACK_SYN_RECV, */
90    &nf_ct_tcp_timeout_established,    /* TCP_CONNTRACK_ESTABLISHED, */
91    &nf_ct_tcp_timeout_fin_wait,       /* TCP_CONNTRACK_FIN_WAIT, */
92    &nf_ct_tcp_timeout_close_wait,     /* TCP_CONNTRACK_CLOSE_WAIT, */
93    &nf_ct_tcp_timeout_last_ack,       /* TCP_CONNTRACK_LAST_ACK, */
94    &nf_ct_tcp_timeout_time_wait,      /* TCP_CONNTRACK_TIME_WAIT, */
95    &nf_ct_tcp_timeout_close,          /* TCP_CONNTRACK_CLOSE, */
96    NULL,                              /* TCP_CONNTRACK_LISTEN */
97 };
98
99#define sNO TCP_CONNTRACK_NONE
100#define sSS TCP_CONNTRACK_SYN_SENT
101#define sSR TCP_CONNTRACK_SYN_RECV
102#define sES TCP_CONNTRACK_ESTABLISHED
103#define sFW TCP_CONNTRACK_FIN_WAIT
104#define sCW TCP_CONNTRACK_CLOSE_WAIT
105#define sLA TCP_CONNTRACK_LAST_ACK
106#define sTW TCP_CONNTRACK_TIME_WAIT
107#define sCL TCP_CONNTRACK_CLOSE
108#define sLI TCP_CONNTRACK_LISTEN
109#define sIV TCP_CONNTRACK_MAX
110#define sIG TCP_CONNTRACK_IGNORE
111
112/* What TCP flags are set from RST/SYN/FIN/ACK. */
113enum tcp_bit_set {
114	TCP_SYN_SET,
115	TCP_SYNACK_SET,
116	TCP_FIN_SET,
117	TCP_ACK_SET,
118	TCP_RST_SET,
119	TCP_NONE_SET,
120};
121
122/*
123 * The TCP state transition table needs a few words...
124 *
125 * We are the man in the middle. All the packets go through us
126 * but might get lost in transit to the destination.
127 * It is assumed that the destinations can't receive segments
128 * we haven't seen.
129 *
130 * The checked segment is in window, but our windows are *not*
131 * equivalent with the ones of the sender/receiver. We always
132 * try to guess the state of the current sender.
133 *
134 * The meaning of the states are:
135 *
136 * NONE:	initial state
137 * SYN_SENT:	SYN-only packet seen
138 * SYN_RECV:	SYN-ACK packet seen
139 * ESTABLISHED:	ACK packet seen
140 * FIN_WAIT:	FIN packet seen
141 * CLOSE_WAIT:	ACK seen (after FIN)
142 * LAST_ACK:	FIN seen (after FIN)
143 * TIME_WAIT:	last ACK seen
144 * CLOSE:	closed connection
145 *
146 * LISTEN state is not used.
147 *
148 * Packets marked as IGNORED (sIG):
149 *	if they may be either invalid or valid
150 *	and the receiver may send back a connection
151 *	closing RST or a SYN/ACK.
152 *
153 * Packets marked as INVALID (sIV):
154 *	if they are invalid
155 *	or we do not support the request (simultaneous open)
156 */
157static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
158	{
159/* ORIGINAL */
160/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
161/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
162/*
163 *	sNO -> sSS	Initialize a new connection
164 *	sSS -> sSS	Retransmitted SYN
165 *	sSR -> sIG	Late retransmitted SYN?
166 *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
167 *			are errors. Receiver will reply with RST
168 *			and close the connection.
169 *			Or we are not in sync and hold a dead connection.
170 *	sFW -> sIG
171 *	sCW -> sIG
172 *	sLA -> sIG
173 *	sTW -> sSS	Reopened connection (RFC 1122).
174 *	sCL -> sSS
175 */
176/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
177/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
178/*
179 * A SYN/ACK from the client is always invalid:
180 *	- either it tries to set up a simultaneous open, which is
181 *	  not supported;
182 *	- or the firewall has just been inserted between the two hosts
183 *	  during the session set-up. The SYN will be retransmitted
184 *	  by the true client (or it'll time out).
185 */
186/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
187/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
188/*
189 *	sNO -> sIV	Too late and no reason to do anything...
190 *	sSS -> sIV	Client migth not send FIN in this state:
191 *			we enforce waiting for a SYN/ACK reply first.
192 *	sSR -> sFW	Close started.
193 *	sES -> sFW
194 *	sFW -> sLA	FIN seen in both directions, waiting for
195 *			the last ACK.
196 *			Migth be a retransmitted FIN as well...
197 *	sCW -> sLA
198 *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
199 *	sTW -> sTW
200 *	sCL -> sCL
201 */
202/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
203/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
204/*
205 *	sNO -> sES	Assumed.
206 *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
207 *	sSR -> sES	Established state is reached.
208 *	sES -> sES	:-)
209 *	sFW -> sCW	Normal close request answered by ACK.
210 *	sCW -> sCW
211 *	sLA -> sTW	Last ACK detected.
212 *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
213 *	sCL -> sCL
214 */
215/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
216/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
217/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
218	},
219	{
220/* REPLY */
221/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
222/*syn*/	   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
223/*
224 *	sNO -> sIV	Never reached.
225 *	sSS -> sIV	Simultaneous open, not supported
226 *	sSR -> sIV	Simultaneous open, not supported.
227 *	sES -> sIV	Server may not initiate a connection.
228 *	sFW -> sIV
229 *	sCW -> sIV
230 *	sLA -> sIV
231 *	sTW -> sIV	Reopened connection, but server may not do it.
232 *	sCL -> sIV
233 */
234/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
235/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
236/*
237 *	sSS -> sSR	Standard open.
238 *	sSR -> sSR	Retransmitted SYN/ACK.
239 *	sES -> sIG	Late retransmitted SYN/ACK?
240 *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
241 *	sCW -> sIG
242 *	sLA -> sIG
243 *	sTW -> sIG
244 *	sCL -> sIG
245 */
246/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
247/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
248/*
249 *	sSS -> sIV	Server might not send FIN in this state.
250 *	sSR -> sFW	Close started.
251 *	sES -> sFW
252 *	sFW -> sLA	FIN seen in both directions.
253 *	sCW -> sLA
254 *	sLA -> sLA	Retransmitted FIN.
255 *	sTW -> sTW
256 *	sCL -> sCL
257 */
258/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
259/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
260/*
261 *	sSS -> sIG	Might be a half-open connection.
262 *	sSR -> sSR	Might answer late resent SYN.
263 *	sES -> sES	:-)
264 *	sFW -> sCW	Normal close request answered by ACK.
265 *	sCW -> sCW
266 *	sLA -> sTW	Last ACK detected.
267 *	sTW -> sTW	Retransmitted last ACK.
268 *	sCL -> sCL
269 */
270/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
271/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
272/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
273	}
274};
275
276static int tcp_pkt_to_tuple(const struct sk_buff *skb,
277			    unsigned int dataoff,
278			    struct nf_conntrack_tuple *tuple)
279{
280	struct tcphdr _hdr, *hp;
281
282	/* Actually only need first 8 bytes. */
283	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
284	if (hp == NULL)
285		return 0;
286
287	tuple->src.u.tcp.port = hp->source;
288	tuple->dst.u.tcp.port = hp->dest;
289
290	return 1;
291}
292
293static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
294			    const struct nf_conntrack_tuple *orig)
295{
296	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
297	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
298	return 1;
299}
300
301/* Print out the per-protocol part of the tuple. */
302static int tcp_print_tuple(struct seq_file *s,
303			   const struct nf_conntrack_tuple *tuple)
304{
305	return seq_printf(s, "sport=%hu dport=%hu ",
306			  ntohs(tuple->src.u.tcp.port),
307			  ntohs(tuple->dst.u.tcp.port));
308}
309
310/* Print out the private part of the conntrack. */
311static int tcp_print_conntrack(struct seq_file *s,
312			       const struct nf_conn *conntrack)
313{
314	enum tcp_conntrack state;
315
316	read_lock_bh(&tcp_lock);
317	state = conntrack->proto.tcp.state;
318	read_unlock_bh(&tcp_lock);
319
320	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
321}
322
323static unsigned int get_conntrack_index(const struct tcphdr *tcph)
324{
325	if (tcph->rst) return TCP_RST_SET;
326	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
327	else if (tcph->fin) return TCP_FIN_SET;
328	else if (tcph->ack) return TCP_ACK_SET;
329	else return TCP_NONE_SET;
330}
331
332/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
333   in IP Filter' by Guido van Rooij.
334
335   http://www.nluug.nl/events/sane2000/papers.html
336   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
337
338   The boundaries and the conditions are changed according to RFC793:
339   the packet must intersect the window (i.e. segments may be
340   after the right or before the left edge) and thus receivers may ACK
341   segments after the right edge of the window.
342
343	td_maxend = max(sack + max(win,1)) seen in reply packets
344	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
345	td_maxwin += seq + len - sender.td_maxend
346			if seq + len > sender.td_maxend
347	td_end    = max(seq + len) seen in sent packets
348
349   I.   Upper bound for valid data:	seq <= sender.td_maxend
350   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
351   III.	Upper bound for valid ack:      sack <= receiver.td_end
352   IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
353
354   where sack is the highest right edge of sack block found in the packet.
355
356   The upper bound limit for a valid ack is not ignored -
357   we doesn't have to deal with fragments.
358*/
359
360static inline __u32 segment_seq_plus_len(__u32 seq,
361					 size_t len,
362					 unsigned int dataoff,
363					 struct tcphdr *tcph)
364{
365	return (seq + len - dataoff - tcph->doff*4
366		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
367}
368
369#define MAXACKWINCONST			66000
370#define MAXACKWINDOW(sender)						\
371	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
372					      : MAXACKWINCONST)
373
374/*
375 * Simplified tcp_parse_options routine from tcp_input.c
376 */
377static void tcp_options(const struct sk_buff *skb,
378			unsigned int dataoff,
379			struct tcphdr *tcph,
380			struct ip_ct_tcp_state *state)
381{
382	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
383	unsigned char *ptr;
384	int length = (tcph->doff*4) - sizeof(struct tcphdr);
385
386	if (!length)
387		return;
388
389	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
390				 length, buff);
391	BUG_ON(ptr == NULL);
392
393	state->td_scale =
394	state->flags = 0;
395
396	while (length > 0) {
397		int opcode=*ptr++;
398		int opsize;
399
400		switch (opcode) {
401		case TCPOPT_EOL:
402			return;
403		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
404			length--;
405			continue;
406		default:
407			opsize=*ptr++;
408			if (opsize < 2) /* "silly options" */
409				return;
410			if (opsize > length)
411				break;	/* don't parse partial options */
412
413			if (opcode == TCPOPT_SACK_PERM
414			    && opsize == TCPOLEN_SACK_PERM)
415				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
416			else if (opcode == TCPOPT_WINDOW
417				 && opsize == TCPOLEN_WINDOW) {
418				state->td_scale = *(u_int8_t *)ptr;
419
420				if (state->td_scale > 14) {
421					/* See RFC1323 */
422					state->td_scale = 14;
423				}
424				state->flags |=
425					IP_CT_TCP_FLAG_WINDOW_SCALE;
426			}
427			ptr += opsize - 2;
428			length -= opsize;
429		}
430	}
431}
432
433static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
434		     struct tcphdr *tcph, __u32 *sack)
435{
436	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
437	unsigned char *ptr;
438	int length = (tcph->doff*4) - sizeof(struct tcphdr);
439	__u32 tmp;
440
441	if (!length)
442		return;
443
444	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
445				 length, buff);
446	BUG_ON(ptr == NULL);
447
448	/* Fast path for timestamp-only option */
449	if (length == TCPOLEN_TSTAMP_ALIGNED*4
450	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
451				       | (TCPOPT_NOP << 16)
452				       | (TCPOPT_TIMESTAMP << 8)
453				       | TCPOLEN_TIMESTAMP))
454		return;
455
456	while (length > 0) {
457		int opcode = *ptr++;
458		int opsize, i;
459
460		switch (opcode) {
461		case TCPOPT_EOL:
462			return;
463		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
464			length--;
465			continue;
466		default:
467			opsize = *ptr++;
468			if (opsize < 2) /* "silly options" */
469				return;
470			if (opsize > length)
471				break;	/* don't parse partial options */
472
473			if (opcode == TCPOPT_SACK
474			    && opsize >= (TCPOLEN_SACK_BASE
475					  + TCPOLEN_SACK_PERBLOCK)
476			    && !((opsize - TCPOLEN_SACK_BASE)
477				 % TCPOLEN_SACK_PERBLOCK)) {
478				for (i = 0;
479				     i < (opsize - TCPOLEN_SACK_BASE);
480				     i += TCPOLEN_SACK_PERBLOCK) {
481					tmp = ntohl(*((__be32 *)(ptr+i)+1));
482
483					if (after(tmp, *sack))
484						*sack = tmp;
485				}
486				return;
487			}
488			ptr += opsize - 2;
489			length -= opsize;
490		}
491	}
492}
493
494static int tcp_in_window(struct ip_ct_tcp *state,
495			 enum ip_conntrack_dir dir,
496			 unsigned int index,
497			 const struct sk_buff *skb,
498			 unsigned int dataoff,
499			 struct tcphdr *tcph,
500			 int pf)
501{
502	struct ip_ct_tcp_state *sender = &state->seen[dir];
503	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
504	__u32 seq, ack, sack, end, win, swin;
505	int res;
506
507	/*
508	 * Get the required data from the packet.
509	 */
510	seq = ntohl(tcph->seq);
511	ack = sack = ntohl(tcph->ack_seq);
512	win = ntohs(tcph->window);
513	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
514
515	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
516		tcp_sack(skb, dataoff, tcph, &sack);
517
518	DEBUGP("tcp_in_window: START\n");
519	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
520	       "seq=%u ack=%u sack=%u win=%u end=%u\n",
521		NIPQUAD(iph->saddr), ntohs(tcph->source),
522		NIPQUAD(iph->daddr), ntohs(tcph->dest),
523		seq, ack, sack, win, end);
524	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
525	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
526		sender->td_end, sender->td_maxend, sender->td_maxwin,
527		sender->td_scale,
528		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
529		receiver->td_scale);
530
531	if (sender->td_end == 0) {
532		/*
533		 * Initialize sender data.
534		 */
535		if (tcph->syn && tcph->ack) {
536			/*
537			 * Outgoing SYN-ACK in reply to a SYN.
538			 */
539			sender->td_end =
540			sender->td_maxend = end;
541			sender->td_maxwin = (win == 0 ? 1 : win);
542
543			tcp_options(skb, dataoff, tcph, sender);
544			/*
545			 * RFC 1323:
546			 * Both sides must send the Window Scale option
547			 * to enable window scaling in either direction.
548			 */
549			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
550			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
551				sender->td_scale =
552				receiver->td_scale = 0;
553		} else {
554			/*
555			 * We are in the middle of a connection,
556			 * its history is lost for us.
557			 * Let's try to use the data from the packet.
558			 */
559			sender->td_end = end;
560			sender->td_maxwin = (win == 0 ? 1 : win);
561			sender->td_maxend = end + sender->td_maxwin;
562		}
563	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
564		     && dir == IP_CT_DIR_ORIGINAL)
565		   || (state->state == TCP_CONNTRACK_SYN_RECV
566		     && dir == IP_CT_DIR_REPLY))
567		   && after(end, sender->td_end)) {
568		/*
569		 * RFC 793: "if a TCP is reinitialized ... then it need
570		 * not wait at all; it must only be sure to use sequence
571		 * numbers larger than those recently used."
572		 */
573		sender->td_end =
574		sender->td_maxend = end;
575		sender->td_maxwin = (win == 0 ? 1 : win);
576
577		tcp_options(skb, dataoff, tcph, sender);
578	}
579
580	if (!(tcph->ack)) {
581		/*
582		 * If there is no ACK, just pretend it was set and OK.
583		 */
584		ack = sack = receiver->td_end;
585	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
586		    (TCP_FLAG_ACK|TCP_FLAG_RST))
587		   && (ack == 0)) {
588		/*
589		 * Broken TCP stacks, that set ACK in RST packets as well
590		 * with zero ack value.
591		 */
592		ack = sack = receiver->td_end;
593	}
594
595	if (seq == end
596	    && (!tcph->rst
597		|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
598		/*
599		 * Packets contains no data: we assume it is valid
600		 * and check the ack value only.
601		 * However RST segments are always validated by their
602		 * SEQ number, except when seq == 0 (reset sent answering
603		 * SYN.
604		 */
605		seq = end = sender->td_end;
606
607	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
608	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
609		NIPQUAD(iph->saddr), ntohs(tcph->source),
610		NIPQUAD(iph->daddr), ntohs(tcph->dest),
611		seq, ack, sack, win, end);
612	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
613	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
614		sender->td_end, sender->td_maxend, sender->td_maxwin,
615		sender->td_scale,
616		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
617		receiver->td_scale);
618
619	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
620		before(seq, sender->td_maxend + 1),
621		after(end, sender->td_end - receiver->td_maxwin - 1),
622		before(sack, receiver->td_end + 1),
623		after(ack, receiver->td_end - MAXACKWINDOW(sender)));
624
625	if (before(seq, sender->td_maxend + 1) &&
626	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
627	    before(sack, receiver->td_end + 1) &&
628	    after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
629		/*
630		 * Take into account window scaling (RFC 1323).
631		 */
632		if (!tcph->syn)
633			win <<= sender->td_scale;
634
635		/*
636		 * Update sender data.
637		 */
638		swin = win + (sack - ack);
639		if (sender->td_maxwin < swin)
640			sender->td_maxwin = swin;
641		if (after(end, sender->td_end))
642			sender->td_end = end;
643		/*
644		 * Update receiver data.
645		 */
646		if (after(end, sender->td_maxend))
647			receiver->td_maxwin += end - sender->td_maxend;
648		if (after(sack + win, receiver->td_maxend - 1)) {
649			receiver->td_maxend = sack + win;
650			if (win == 0)
651				receiver->td_maxend++;
652		}
653
654		/*
655		 * Check retransmissions.
656		 */
657		if (index == TCP_ACK_SET) {
658			if (state->last_dir == dir
659			    && state->last_seq == seq
660			    && state->last_ack == ack
661			    && state->last_end == end
662			    && state->last_win == win)
663				state->retrans++;
664			else {
665				state->last_dir = dir;
666				state->last_seq = seq;
667				state->last_ack = ack;
668				state->last_end = end;
669				state->last_win = win;
670				state->retrans = 0;
671			}
672		}
673		res = 1;
674	} else {
675		res = 0;
676		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
677		    nf_ct_tcp_be_liberal)
678			res = 1;
679		if (!res && LOG_INVALID(IPPROTO_TCP))
680			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
681			"nf_ct_tcp: %s ",
682			before(seq, sender->td_maxend + 1) ?
683			after(end, sender->td_end - receiver->td_maxwin - 1) ?
684			before(sack, receiver->td_end + 1) ?
685			after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
686			: "ACK is under the lower bound (possible overly delayed ACK)"
687			: "ACK is over the upper bound (ACKed data not seen yet)"
688			: "SEQ is under the lower bound (already ACKed data retransmitted)"
689			: "SEQ is over the upper bound (over the window of the receiver)");
690	}
691
692	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
693	       "receiver end=%u maxend=%u maxwin=%u\n",
694		res, sender->td_end, sender->td_maxend, sender->td_maxwin,
695		receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
696
697	return res;
698}
699
700#ifdef CONFIG_NF_NAT_NEEDED
701/* Update sender->td_end after NAT successfully mangled the packet */
702/* Caller must linearize skb at tcp header. */
703void nf_conntrack_tcp_update(struct sk_buff *skb,
704			     unsigned int dataoff,
705			     struct nf_conn *conntrack,
706			     int dir)
707{
708	struct tcphdr *tcph = (void *)skb->data + dataoff;
709	__u32 end;
710#ifdef DEBUGP_VARS
711	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
712	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
713#endif
714
715	end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
716
717	write_lock_bh(&tcp_lock);
718	/*
719	 * We have to worry for the ack in the reply packet only...
720	 */
721	if (after(end, conntrack->proto.tcp.seen[dir].td_end))
722		conntrack->proto.tcp.seen[dir].td_end = end;
723	conntrack->proto.tcp.last_end = end;
724	write_unlock_bh(&tcp_lock);
725	DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
726	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
727		sender->td_end, sender->td_maxend, sender->td_maxwin,
728		sender->td_scale,
729		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
730		receiver->td_scale);
731}
732EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
733#endif
734
735#define	TH_FIN	0x01
736#define	TH_SYN	0x02
737#define	TH_RST	0x04
738#define	TH_PUSH	0x08
739#define	TH_ACK	0x10
740#define	TH_URG	0x20
741#define	TH_ECE	0x40
742#define	TH_CWR	0x80
743
744/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
745static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
746{
747	[TH_SYN]			= 1,
748	[TH_SYN|TH_URG]			= 1,
749	[TH_SYN|TH_ACK]			= 1,
750	[TH_RST]			= 1,
751	[TH_RST|TH_ACK]			= 1,
752	[TH_FIN|TH_ACK]			= 1,
753	[TH_FIN|TH_ACK|TH_URG]		= 1,
754	[TH_ACK]			= 1,
755	[TH_ACK|TH_URG]			= 1,
756};
757
758/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
759static int tcp_error(struct sk_buff *skb,
760		     unsigned int dataoff,
761		     enum ip_conntrack_info *ctinfo,
762		     int pf,
763		     unsigned int hooknum)
764{
765	struct tcphdr _tcph, *th;
766	unsigned int tcplen = skb->len - dataoff;
767	u_int8_t tcpflags;
768
769	/* Smaller that minimal TCP header? */
770	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
771	if (th == NULL) {
772		if (LOG_INVALID(IPPROTO_TCP))
773			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
774				"nf_ct_tcp: short packet ");
775		return -NF_ACCEPT;
776	}
777
778	/* Not whole TCP header or malformed packet */
779	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
780		if (LOG_INVALID(IPPROTO_TCP))
781			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
782				"nf_ct_tcp: truncated/malformed packet ");
783		return -NF_ACCEPT;
784	}
785
786	/* Checksum invalid? Ignore.
787	 * We skip checking packets on the outgoing path
788	 * because the checksum is assumed to be correct.
789	 */
790	if (nf_conntrack_checksum &&
791	    ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) ||
792	     (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) &&
793	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
794		if (LOG_INVALID(IPPROTO_TCP))
795			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
796				  "nf_ct_tcp: bad TCP checksum ");
797		return -NF_ACCEPT;
798	}
799
800	/* Check TCP flags. */
801	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
802	if (!tcp_valid_flags[tcpflags]) {
803		if (LOG_INVALID(IPPROTO_TCP))
804			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
805				  "nf_ct_tcp: invalid TCP flag combination ");
806		return -NF_ACCEPT;
807	}
808
809	return NF_ACCEPT;
810}
811
812/* Returns verdict for packet, or -1 for invalid. */
813static int tcp_packet(struct nf_conn *conntrack,
814		      const struct sk_buff *skb,
815		      unsigned int dataoff,
816		      enum ip_conntrack_info ctinfo,
817		      int pf,
818		      unsigned int hooknum)
819{
820	enum tcp_conntrack new_state, old_state;
821	enum ip_conntrack_dir dir;
822	struct tcphdr *th, _tcph;
823	unsigned long timeout;
824	unsigned int index;
825
826	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
827	BUG_ON(th == NULL);
828
829	write_lock_bh(&tcp_lock);
830	old_state = conntrack->proto.tcp.state;
831	dir = CTINFO2DIR(ctinfo);
832	index = get_conntrack_index(th);
833	new_state = tcp_conntracks[dir][index][old_state];
834
835	switch (new_state) {
836	case TCP_CONNTRACK_IGNORE:
837		/* Ignored packets:
838		 *
839		 * a) SYN in ORIGINAL
840		 * b) SYN/ACK in REPLY
841		 * c) ACK in reply direction after initial SYN in original.
842		 */
843		if (index == TCP_SYNACK_SET
844		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
845		    && conntrack->proto.tcp.last_dir != dir
846		    && ntohl(th->ack_seq) ==
847			     conntrack->proto.tcp.last_end) {
848			/* This SYN/ACK acknowledges a SYN that we earlier
849			 * ignored as invalid. This means that the client and
850			 * the server are both in sync, while the firewall is
851			 * not. We kill this session and block the SYN/ACK so
852			 * that the client cannot but retransmit its SYN and
853			 * thus initiate a clean new session.
854			 */
855			write_unlock_bh(&tcp_lock);
856			if (LOG_INVALID(IPPROTO_TCP))
857				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
858					  "nf_ct_tcp: killing out of sync session ");
859			if (del_timer(&conntrack->timeout))
860				conntrack->timeout.function((unsigned long)
861							    conntrack);
862			return -NF_DROP;
863		}
864		conntrack->proto.tcp.last_index = index;
865		conntrack->proto.tcp.last_dir = dir;
866		conntrack->proto.tcp.last_seq = ntohl(th->seq);
867		conntrack->proto.tcp.last_end =
868		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
869
870		write_unlock_bh(&tcp_lock);
871		if (LOG_INVALID(IPPROTO_TCP))
872			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
873				  "nf_ct_tcp: invalid packed ignored ");
874		return NF_ACCEPT;
875	case TCP_CONNTRACK_MAX:
876		/* Invalid packet */
877		DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
878		       dir, get_conntrack_index(th),
879		       old_state);
880		write_unlock_bh(&tcp_lock);
881		if (LOG_INVALID(IPPROTO_TCP))
882			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
883				  "nf_ct_tcp: invalid state ");
884		return -NF_ACCEPT;
885	case TCP_CONNTRACK_SYN_SENT:
886		if (old_state < TCP_CONNTRACK_TIME_WAIT)
887			break;
888		if ((conntrack->proto.tcp.seen[dir].flags &
889			IP_CT_TCP_FLAG_CLOSE_INIT)
890		    || after(ntohl(th->seq),
891			     conntrack->proto.tcp.seen[dir].td_end)) {
892			/* Attempt to reopen a closed connection.
893			* Delete this connection and look up again. */
894			write_unlock_bh(&tcp_lock);
895			if (del_timer(&conntrack->timeout))
896				conntrack->timeout.function((unsigned long)
897							    conntrack);
898			return -NF_REPEAT;
899		} else {
900			write_unlock_bh(&tcp_lock);
901			if (LOG_INVALID(IPPROTO_TCP))
902				nf_log_packet(pf, 0, skb, NULL, NULL,
903					      NULL, "nf_ct_tcp: invalid SYN");
904			return -NF_ACCEPT;
905		}
906	case TCP_CONNTRACK_CLOSE:
907		if (index == TCP_RST_SET
908		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
909			 && conntrack->proto.tcp.last_index == TCP_SYN_SET)
910			|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
911			    && conntrack->proto.tcp.last_index == TCP_ACK_SET))
912		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
913			/* RST sent to invalid SYN or ACK we had let through
914			 * at a) and c) above:
915			 *
916			 * a) SYN was in window then
917			 * c) we hold a half-open connection.
918			 *
919			 * Delete our connection entry.
920			 * We skip window checking, because packet might ACK
921			 * segments we ignored. */
922			goto in_window;
923		}
924		/* Just fall through */
925	default:
926		/* Keep compilers happy. */
927		break;
928	}
929
930#ifdef HNDCTF
931	/* Remove the ipc entries on receipt of FIN or RST */
932	if (CTF_ENAB(kcih)) {
933		if (conntrack->ctf_flags & CTF_FLAGS_CACHED) {
934			if (th->fin || th->rst) {
935				ip_conntrack_ipct_delete(conntrack, 0);
936			}
937			goto in_window;
938		}
939	}
940#endif /* HNDCTF */
941
942	if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
943			   skb, dataoff, th, pf)) {
944		write_unlock_bh(&tcp_lock);
945		return -NF_ACCEPT;
946	}
947     in_window:
948	/* From now on we have got in-window packets */
949	conntrack->proto.tcp.last_index = index;
950
951	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
952	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
953		NIPQUAD(iph->saddr), ntohs(th->source),
954		NIPQUAD(iph->daddr), ntohs(th->dest),
955		(th->syn ? 1 : 0), (th->ack ? 1 : 0),
956		(th->fin ? 1 : 0), (th->rst ? 1 : 0),
957		old_state, new_state);
958
959	conntrack->proto.tcp.state = new_state;
960	if (old_state != new_state
961	    && (new_state == TCP_CONNTRACK_FIN_WAIT
962		|| new_state == TCP_CONNTRACK_CLOSE))
963		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
964	timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
965		  && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
966		  ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
967	write_unlock_bh(&tcp_lock);
968
969	nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
970	if (new_state != old_state)
971		nf_conntrack_event_cache(IPCT_PROTOINFO, skb);
972
973	if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
974		/* If only reply is a RST, we can consider ourselves not to
975		   have an established connection: this is a fairly common
976		   problem case, so we can delete the conntrack
977		   immediately.  --RR */
978		if (th->rst) {
979			if (del_timer(&conntrack->timeout))
980				conntrack->timeout.function((unsigned long)
981							    conntrack);
982			return NF_ACCEPT;
983		}
984	} else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
985		   && (old_state == TCP_CONNTRACK_SYN_RECV
986		       || old_state == TCP_CONNTRACK_ESTABLISHED)
987		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
988		/* Set ASSURED if we see see valid ack in ESTABLISHED
989		   after SYN_RECV or a valid answer for a picked up
990		   connection. */
991		set_bit(IPS_ASSURED_BIT, &conntrack->status);
992		nf_conntrack_event_cache(IPCT_STATUS, skb);
993	}
994	nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
995
996	return NF_ACCEPT;
997}
998
999/* Called when a new connection for this protocol found. */
1000static int tcp_new(struct nf_conn *conntrack,
1001		   const struct sk_buff *skb,
1002		   unsigned int dataoff)
1003{
1004	enum tcp_conntrack new_state;
1005	struct tcphdr *th, _tcph;
1006#ifdef DEBUGP_VARS
1007	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
1008	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
1009#endif
1010
1011	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1012	BUG_ON(th == NULL);
1013
1014	/* Don't need lock here: this conntrack not in circulation yet */
1015	new_state
1016		= tcp_conntracks[0][get_conntrack_index(th)]
1017		[TCP_CONNTRACK_NONE];
1018
1019	/* Invalid: delete conntrack */
1020	if (new_state >= TCP_CONNTRACK_MAX) {
1021		DEBUGP("nf_ct_tcp: invalid new deleting.\n");
1022		return 0;
1023	}
1024
1025	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1026		/* SYN packet */
1027		conntrack->proto.tcp.seen[0].td_end =
1028			segment_seq_plus_len(ntohl(th->seq), skb->len,
1029					     dataoff, th);
1030		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1031		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1032			conntrack->proto.tcp.seen[0].td_maxwin = 1;
1033		conntrack->proto.tcp.seen[0].td_maxend =
1034			conntrack->proto.tcp.seen[0].td_end;
1035
1036		tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]);
1037		conntrack->proto.tcp.seen[1].flags = 0;
1038	} else if (nf_ct_tcp_loose == 0) {
1039		/* Don't try to pick up connections. */
1040		return 0;
1041	} else {
1042		/*
1043		 * We are in the middle of a connection,
1044		 * its history is lost for us.
1045		 * Let's try to use the data from the packet.
1046		 */
1047		conntrack->proto.tcp.seen[0].td_end =
1048			segment_seq_plus_len(ntohl(th->seq), skb->len,
1049					     dataoff, th);
1050		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1051		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
1052			conntrack->proto.tcp.seen[0].td_maxwin = 1;
1053		conntrack->proto.tcp.seen[0].td_maxend =
1054			conntrack->proto.tcp.seen[0].td_end +
1055			conntrack->proto.tcp.seen[0].td_maxwin;
1056		conntrack->proto.tcp.seen[0].td_scale = 0;
1057
1058		/* We assume SACK and liberal window checking to handle
1059		 * window scaling */
1060		conntrack->proto.tcp.seen[0].flags =
1061		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1062						     IP_CT_TCP_FLAG_BE_LIBERAL;
1063	}
1064
1065	conntrack->proto.tcp.seen[1].td_end = 0;
1066	conntrack->proto.tcp.seen[1].td_maxend = 0;
1067	conntrack->proto.tcp.seen[1].td_maxwin = 1;
1068	conntrack->proto.tcp.seen[1].td_scale = 0;
1069
1070	/* tcp_packet will set them */
1071	conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
1072	conntrack->proto.tcp.last_index = TCP_NONE_SET;
1073
1074	DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1075	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1076		sender->td_end, sender->td_maxend, sender->td_maxwin,
1077		sender->td_scale,
1078		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1079		receiver->td_scale);
1080	return 1;
1081}
1082
1083#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1084
1085#include <linux/netfilter/nfnetlink.h>
1086#include <linux/netfilter/nfnetlink_conntrack.h>
1087
1088static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
1089			 const struct nf_conn *ct)
1090{
1091	struct nfattr *nest_parms;
1092	struct nf_ct_tcp_flags tmp = {};
1093
1094	read_lock_bh(&tcp_lock);
1095	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
1096	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
1097		&ct->proto.tcp.state);
1098
1099	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
1100		&ct->proto.tcp.seen[0].td_scale);
1101
1102	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
1103		&ct->proto.tcp.seen[1].td_scale);
1104
1105	tmp.flags = ct->proto.tcp.seen[0].flags;
1106	NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1107		sizeof(struct nf_ct_tcp_flags), &tmp);
1108
1109	tmp.flags = ct->proto.tcp.seen[1].flags;
1110	NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1111		sizeof(struct nf_ct_tcp_flags), &tmp);
1112	read_unlock_bh(&tcp_lock);
1113
1114	NFA_NEST_END(skb, nest_parms);
1115
1116	return 0;
1117
1118nfattr_failure:
1119	read_unlock_bh(&tcp_lock);
1120	return -1;
1121}
1122
1123static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
1124	[CTA_PROTOINFO_TCP_STATE-1]	      = sizeof(u_int8_t),
1125	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
1126	[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]    = sizeof(u_int8_t),
1127	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]  = sizeof(struct nf_ct_tcp_flags),
1128	[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]     = sizeof(struct nf_ct_tcp_flags)
1129};
1130
1131static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
1132{
1133	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
1134	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
1135
1136	/* updates could not contain anything about the private
1137	 * protocol info, in that case skip the parsing */
1138	if (!attr)
1139		return 0;
1140
1141	nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
1142
1143	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
1144		return -EINVAL;
1145
1146	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
1147		return -EINVAL;
1148
1149	write_lock_bh(&tcp_lock);
1150	ct->proto.tcp.state =
1151		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
1152
1153	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
1154		struct nf_ct_tcp_flags *attr =
1155			NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
1156		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1157		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1158	}
1159
1160	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
1161		struct nf_ct_tcp_flags *attr =
1162			NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
1163		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1164		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1165	}
1166
1167	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
1168	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
1169	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1170	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1171		ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
1172			NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
1173		ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
1174			NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
1175	}
1176	write_unlock_bh(&tcp_lock);
1177
1178	return 0;
1179}
1180#endif
1181
1182#ifdef CONFIG_SYSCTL
1183static unsigned int tcp_sysctl_table_users;
1184static struct ctl_table_header *tcp_sysctl_header;
1185static struct ctl_table tcp_sysctl_table[] = {
1186	{
1187		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
1188		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1189		.data		= &nf_ct_tcp_timeout_syn_sent,
1190		.maxlen		= sizeof(unsigned int),
1191		.mode		= 0644,
1192		.proc_handler	= &proc_dointvec_jiffies,
1193	},
1194	{
1195		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
1196		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1197		.data		= &nf_ct_tcp_timeout_syn_recv,
1198		.maxlen		= sizeof(unsigned int),
1199		.mode		= 0644,
1200		.proc_handler	= &proc_dointvec_jiffies,
1201	},
1202	{
1203		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
1204		.procname	= "nf_conntrack_tcp_timeout_established",
1205		.data		= &nf_ct_tcp_timeout_established,
1206		.maxlen		= sizeof(unsigned int),
1207		.mode		= 0644,
1208		.proc_handler	= &proc_dointvec_jiffies,
1209	},
1210	{
1211		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
1212		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1213		.data		= &nf_ct_tcp_timeout_fin_wait,
1214		.maxlen		= sizeof(unsigned int),
1215		.mode		= 0644,
1216		.proc_handler	= &proc_dointvec_jiffies,
1217	},
1218	{
1219		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
1220		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1221		.data		= &nf_ct_tcp_timeout_close_wait,
1222		.maxlen		= sizeof(unsigned int),
1223		.mode		= 0644,
1224		.proc_handler	= &proc_dointvec_jiffies,
1225	},
1226	{
1227		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
1228		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1229		.data		= &nf_ct_tcp_timeout_last_ack,
1230		.maxlen		= sizeof(unsigned int),
1231		.mode		= 0644,
1232		.proc_handler	= &proc_dointvec_jiffies,
1233	},
1234	{
1235		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
1236		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1237		.data		= &nf_ct_tcp_timeout_time_wait,
1238		.maxlen		= sizeof(unsigned int),
1239		.mode		= 0644,
1240		.proc_handler	= &proc_dointvec_jiffies,
1241	},
1242	{
1243		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
1244		.procname	= "nf_conntrack_tcp_timeout_close",
1245		.data		= &nf_ct_tcp_timeout_close,
1246		.maxlen		= sizeof(unsigned int),
1247		.mode		= 0644,
1248		.proc_handler	= &proc_dointvec_jiffies,
1249	},
1250	{
1251		.ctl_name	= NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
1252		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1253		.data		= &nf_ct_tcp_timeout_max_retrans,
1254		.maxlen		= sizeof(unsigned int),
1255		.mode		= 0644,
1256		.proc_handler	= &proc_dointvec_jiffies,
1257	},
1258	{
1259		.ctl_name	= NET_NF_CONNTRACK_TCP_LOOSE,
1260		.procname	= "nf_conntrack_tcp_loose",
1261		.data		= &nf_ct_tcp_loose,
1262		.maxlen		= sizeof(unsigned int),
1263		.mode		= 0644,
1264		.proc_handler	= &proc_dointvec,
1265	},
1266	{
1267		.ctl_name	= NET_NF_CONNTRACK_TCP_BE_LIBERAL,
1268		.procname       = "nf_conntrack_tcp_be_liberal",
1269		.data           = &nf_ct_tcp_be_liberal,
1270		.maxlen         = sizeof(unsigned int),
1271		.mode           = 0644,
1272		.proc_handler   = &proc_dointvec,
1273	},
1274	{
1275		.ctl_name	= NET_NF_CONNTRACK_TCP_MAX_RETRANS,
1276		.procname	= "nf_conntrack_tcp_max_retrans",
1277		.data		= &nf_ct_tcp_max_retrans,
1278		.maxlen		= sizeof(unsigned int),
1279		.mode		= 0644,
1280		.proc_handler	= &proc_dointvec,
1281	},
1282	{
1283		.ctl_name	= 0
1284	}
1285};
1286
1287#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1288static struct ctl_table tcp_compat_sysctl_table[] = {
1289	{
1290		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
1291		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
1292		.data		= &nf_ct_tcp_timeout_syn_sent,
1293		.maxlen		= sizeof(unsigned int),
1294		.mode		= 0644,
1295		.proc_handler	= &proc_dointvec_jiffies,
1296	},
1297	{
1298		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
1299		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
1300		.data		= &nf_ct_tcp_timeout_syn_recv,
1301		.maxlen		= sizeof(unsigned int),
1302		.mode		= 0644,
1303		.proc_handler	= &proc_dointvec_jiffies,
1304	},
1305	{
1306		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
1307		.procname	= "ip_conntrack_tcp_timeout_established",
1308		.data		= &nf_ct_tcp_timeout_established,
1309		.maxlen		= sizeof(unsigned int),
1310		.mode		= 0644,
1311		.proc_handler	= &proc_dointvec_jiffies,
1312	},
1313	{
1314		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
1315		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
1316		.data		= &nf_ct_tcp_timeout_fin_wait,
1317		.maxlen		= sizeof(unsigned int),
1318		.mode		= 0644,
1319		.proc_handler	= &proc_dointvec_jiffies,
1320	},
1321	{
1322		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
1323		.procname	= "ip_conntrack_tcp_timeout_close_wait",
1324		.data		= &nf_ct_tcp_timeout_close_wait,
1325		.maxlen		= sizeof(unsigned int),
1326		.mode		= 0644,
1327		.proc_handler	= &proc_dointvec_jiffies,
1328	},
1329	{
1330		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
1331		.procname	= "ip_conntrack_tcp_timeout_last_ack",
1332		.data		= &nf_ct_tcp_timeout_last_ack,
1333		.maxlen		= sizeof(unsigned int),
1334		.mode		= 0644,
1335		.proc_handler	= &proc_dointvec_jiffies,
1336	},
1337	{
1338		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
1339		.procname	= "ip_conntrack_tcp_timeout_time_wait",
1340		.data		= &nf_ct_tcp_timeout_time_wait,
1341		.maxlen		= sizeof(unsigned int),
1342		.mode		= 0644,
1343		.proc_handler	= &proc_dointvec_jiffies,
1344	},
1345	{
1346		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
1347		.procname	= "ip_conntrack_tcp_timeout_close",
1348		.data		= &nf_ct_tcp_timeout_close,
1349		.maxlen		= sizeof(unsigned int),
1350		.mode		= 0644,
1351		.proc_handler	= &proc_dointvec_jiffies,
1352	},
1353	{
1354		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
1355		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
1356		.data		= &nf_ct_tcp_timeout_max_retrans,
1357		.maxlen		= sizeof(unsigned int),
1358		.mode		= 0644,
1359		.proc_handler	= &proc_dointvec_jiffies,
1360	},
1361	{
1362		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
1363		.procname	= "ip_conntrack_tcp_loose",
1364		.data		= &nf_ct_tcp_loose,
1365		.maxlen		= sizeof(unsigned int),
1366		.mode		= 0644,
1367		.proc_handler	= &proc_dointvec,
1368	},
1369	{
1370		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
1371		.procname	= "ip_conntrack_tcp_be_liberal",
1372		.data		= &nf_ct_tcp_be_liberal,
1373		.maxlen		= sizeof(unsigned int),
1374		.mode		= 0644,
1375		.proc_handler	= &proc_dointvec,
1376	},
1377	{
1378		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
1379		.procname	= "ip_conntrack_tcp_max_retrans",
1380		.data		= &nf_ct_tcp_max_retrans,
1381		.maxlen		= sizeof(unsigned int),
1382		.mode		= 0644,
1383		.proc_handler	= &proc_dointvec,
1384	},
1385	{
1386		.ctl_name	= 0
1387	}
1388};
1389#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1390#endif /* CONFIG_SYSCTL */
1391
1392struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1393{
1394	.l3proto		= PF_INET,
1395	.l4proto 		= IPPROTO_TCP,
1396	.name 			= "tcp",
1397	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1398	.invert_tuple 		= tcp_invert_tuple,
1399	.print_tuple 		= tcp_print_tuple,
1400	.print_conntrack 	= tcp_print_conntrack,
1401	.packet 		= tcp_packet,
1402	.new 			= tcp_new,
1403	.error			= tcp_error,
1404#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1405	.to_nfattr		= tcp_to_nfattr,
1406	.from_nfattr		= nfattr_to_tcp,
1407	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
1408	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
1409#endif
1410#ifdef CONFIG_SYSCTL
1411	.ctl_table_users	= &tcp_sysctl_table_users,
1412	.ctl_table_header	= &tcp_sysctl_header,
1413	.ctl_table		= tcp_sysctl_table,
1414#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1415	.ctl_compat_table	= tcp_compat_sysctl_table,
1416#endif
1417#endif
1418};
1419EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1420
1421struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1422{
1423	.l3proto		= PF_INET6,
1424	.l4proto 		= IPPROTO_TCP,
1425	.name 			= "tcp",
1426	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1427	.invert_tuple 		= tcp_invert_tuple,
1428	.print_tuple 		= tcp_print_tuple,
1429	.print_conntrack 	= tcp_print_conntrack,
1430	.packet 		= tcp_packet,
1431	.new 			= tcp_new,
1432	.error			= tcp_error,
1433#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1434	.to_nfattr		= tcp_to_nfattr,
1435	.from_nfattr		= nfattr_to_tcp,
1436	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
1437	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
1438#endif
1439#ifdef CONFIG_SYSCTL
1440	.ctl_table_users	= &tcp_sysctl_table_users,
1441	.ctl_table_header	= &tcp_sysctl_header,
1442	.ctl_table		= tcp_sysctl_table,
1443#endif
1444};
1445EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1446