• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6/net/netfilter/
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/timer.h>
11#include <linux/module.h>
12#include <linux/in.h>
13#include <linux/tcp.h>
14#include <linux/spinlock.h>
15#include <linux/skbuff.h>
16#include <linux/ipv6.h>
17#include <net/ip6_checksum.h>
18#include <asm/unaligned.h>
19
20#include <net/tcp.h>
21
22#include <linux/netfilter.h>
23#include <linux/netfilter_ipv4.h>
24#include <linux/netfilter_ipv6.h>
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_l4proto.h>
27#include <net/netfilter/nf_conntrack_ecache.h>
28#include <net/netfilter/nf_log.h>
29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31
32#ifdef HNDCTF
33#include <ctf/hndctf.h>
34extern int ip_conntrack_ipct_delete(struct nf_conn *ct, int ct_timeout);
35#else
36#define BCMFASTPATH_HOST
37#endif /* HNDCTF */
38
39/* "Be conservative in what you do,
40    be liberal in what you accept from others."
41    If it's non-zero, we mark only out of window RST segments as INVALID. */
42static int nf_ct_tcp_be_liberal __read_mostly = 0;
43
44/* If it is set to zero, we disable picking up already established
45   connections. */
46static int nf_ct_tcp_loose __read_mostly = 1;
47
48/* Max number of the retransmitted packets without receiving an (acceptable)
49   ACK from the destination. If this number is reached, a shorter timer
50   will be started. */
51static int nf_ct_tcp_max_retrans __read_mostly = 3;
52
53
54static const char *const tcp_conntrack_names[] = {
55	"NONE",
56	"SYN_SENT",
57	"SYN_RECV",
58	"ESTABLISHED",
59	"FIN_WAIT",
60	"CLOSE_WAIT",
61	"LAST_ACK",
62	"TIME_WAIT",
63	"CLOSE",
64	"SYN_SENT2",
65};
66
67#define SECS * HZ
68#define MINS * 60 SECS
69#define HOURS * 60 MINS
70#define DAYS * 24 HOURS
71
72/* RFC1122 says the R2 limit should be at least 100 seconds.
73   Linux uses 15 packets as limit, which corresponds
74   to ~13-30min depending on RTO. */
75static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly    =   5 MINS;
76static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly =   5 MINS;
77
78static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
79	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
80	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
81	[TCP_CONNTRACK_ESTABLISHED]	= 40 MINS, /* was 5 DAYS, no less then tcp_keepalive_time + tcp_keepalive_probes * tcp_keepalive_intvl */
82	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
83	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
84	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
85	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
86	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
87	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
88};
89
90#define sNO TCP_CONNTRACK_NONE
91#define sSS TCP_CONNTRACK_SYN_SENT
92#define sSR TCP_CONNTRACK_SYN_RECV
93#define sES TCP_CONNTRACK_ESTABLISHED
94#define sFW TCP_CONNTRACK_FIN_WAIT
95#define sCW TCP_CONNTRACK_CLOSE_WAIT
96#define sLA TCP_CONNTRACK_LAST_ACK
97#define sTW TCP_CONNTRACK_TIME_WAIT
98#define sCL TCP_CONNTRACK_CLOSE
99#define sS2 TCP_CONNTRACK_SYN_SENT2
100#define sIV TCP_CONNTRACK_MAX
101#define sIG TCP_CONNTRACK_IGNORE
102
103/* What TCP flags are set from RST/SYN/FIN/ACK. */
104enum tcp_bit_set {
105	TCP_SYN_SET,
106	TCP_SYNACK_SET,
107	TCP_FIN_SET,
108	TCP_ACK_SET,
109	TCP_RST_SET,
110	TCP_NONE_SET,
111};
112
113/*
114 * The TCP state transition table needs a few words...
115 *
116 * We are the man in the middle. All the packets go through us
117 * but might get lost in transit to the destination.
118 * It is assumed that the destinations can't receive segments
119 * we haven't seen.
120 *
121 * The checked segment is in window, but our windows are *not*
122 * equivalent with the ones of the sender/receiver. We always
123 * try to guess the state of the current sender.
124 *
125 * The meaning of the states are:
126 *
127 * NONE:	initial state
128 * SYN_SENT:	SYN-only packet seen
129 * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
130 * SYN_RECV:	SYN-ACK packet seen
131 * ESTABLISHED:	ACK packet seen
132 * FIN_WAIT:	FIN packet seen
133 * CLOSE_WAIT:	ACK seen (after FIN)
134 * LAST_ACK:	FIN seen (after FIN)
135 * TIME_WAIT:	last ACK seen
136 * CLOSE:	closed connection (RST)
137 *
138 * Packets marked as IGNORED (sIG):
139 *	if they may be either invalid or valid
140 *	and the receiver may send back a connection
141 *	closing RST or a SYN/ACK.
142 *
143 * Packets marked as INVALID (sIV):
144 *	if we regard them as truly invalid packets
145 */
146static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
147	{
148/* ORIGINAL */
149/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
150/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
151/*
152 *	sNO -> sSS	Initialize a new connection
153 *	sSS -> sSS	Retransmitted SYN
154 *	sS2 -> sS2	Late retransmitted SYN
155 *	sSR -> sIG
156 *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
157 *			are errors. Receiver will reply with RST
158 *			and close the connection.
159 *			Or we are not in sync and hold a dead connection.
160 *	sFW -> sIG
161 *	sCW -> sIG
162 *	sLA -> sIG
163 *	sTW -> sSS	Reopened connection (RFC 1122).
164 *	sCL -> sSS
165 */
166/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
167/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
168/*
169 *	sNO -> sIV	Too late and no reason to do anything
170 *	sSS -> sIV	Client can't send SYN and then SYN/ACK
171 *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
172 *	sSR -> sIG
173 *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
174 *			are errors. Receiver will reply with RST
175 *			and close the connection.
176 *			Or we are not in sync and hold a dead connection.
177 *	sFW -> sIG
178 *	sCW -> sIG
179 *	sLA -> sIG
180 *	sTW -> sIG
181 *	sCL -> sIG
182 */
183/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
184/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
185/*
186 *	sNO -> sIV	Too late and no reason to do anything...
187 *	sSS -> sIV	Client migth not send FIN in this state:
188 *			we enforce waiting for a SYN/ACK reply first.
189 *	sS2 -> sIV
190 *	sSR -> sFW	Close started.
191 *	sES -> sFW
192 *	sFW -> sLA	FIN seen in both directions, waiting for
193 *			the last ACK.
194 *			Migth be a retransmitted FIN as well...
195 *	sCW -> sLA
196 *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
197 *	sTW -> sTW
198 *	sCL -> sCL
199 */
200/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
201/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
202/*
203 *	sNO -> sES	Assumed.
204 *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
205 *	sS2 -> sIV
206 *	sSR -> sES	Established state is reached.
207 *	sES -> sES	:-)
208 *	sFW -> sCW	Normal close request answered by ACK.
209 *	sCW -> sCW
210 *	sLA -> sTW	Last ACK detected.
211 *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
212 *	sCL -> sCL
213 */
214/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
215/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
216/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
217	},
218	{
219/* REPLY */
220/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
221/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
222/*
223 *	sNO -> sIV	Never reached.
224 *	sSS -> sS2	Simultaneous open
225 *	sS2 -> sS2	Retransmitted simultaneous SYN
226 *	sSR -> sIV	Invalid SYN packets sent by the server
227 *	sES -> sIV
228 *	sFW -> sIV
229 *	sCW -> sIV
230 *	sLA -> sIV
231 *	sTW -> sIV	Reopened connection, but server may not do it.
232 *	sCL -> sIV
233 */
234/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
235/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
236/*
237 *	sSS -> sSR	Standard open.
238 *	sS2 -> sSR	Simultaneous open
239 *	sSR -> sSR	Retransmitted SYN/ACK.
240 *	sES -> sIG	Late retransmitted SYN/ACK?
241 *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
242 *	sCW -> sIG
243 *	sLA -> sIG
244 *	sTW -> sIG
245 *	sCL -> sIG
246 */
247/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
248/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
249/*
250 *	sSS -> sIV	Server might not send FIN in this state.
251 *	sS2 -> sIV
252 *	sSR -> sFW	Close started.
253 *	sES -> sFW
254 *	sFW -> sLA	FIN seen in both directions.
255 *	sCW -> sLA
256 *	sLA -> sLA	Retransmitted FIN.
257 *	sTW -> sTW
258 *	sCL -> sCL
259 */
260/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
261/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
262/*
263 *	sSS -> sIG	Might be a half-open connection.
264 *	sS2 -> sIG
265 *	sSR -> sSR	Might answer late resent SYN.
266 *	sES -> sES	:-)
267 *	sFW -> sCW	Normal close request answered by ACK.
268 *	sCW -> sCW
269 *	sLA -> sTW	Last ACK detected.
270 *	sTW -> sTW	Retransmitted last ACK.
271 *	sCL -> sCL
272 */
273/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
274/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
275/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
276	}
277};
278
279static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280			     struct nf_conntrack_tuple *tuple)
281{
282	const struct tcphdr *hp;
283	struct tcphdr _hdr;
284
285	/* Actually only need first 8 bytes. */
286	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
287	if (hp == NULL)
288		return false;
289
290	tuple->src.u.tcp.port = hp->source;
291	tuple->dst.u.tcp.port = hp->dest;
292
293	return true;
294}
295
296static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
297			     const struct nf_conntrack_tuple *orig)
298{
299	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
300	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
301	return true;
302}
303
304/* Print out the per-protocol part of the tuple. */
305static int tcp_print_tuple(struct seq_file *s,
306			   const struct nf_conntrack_tuple *tuple)
307{
308	return seq_printf(s, "sport=%hu dport=%hu ",
309			  ntohs(tuple->src.u.tcp.port),
310			  ntohs(tuple->dst.u.tcp.port));
311}
312
313/* Print out the private part of the conntrack. */
314static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
315{
316	enum tcp_conntrack state;
317
318	spin_lock_bh(&ct->lock);
319	state = ct->proto.tcp.state;
320	spin_unlock_bh(&ct->lock);
321
322	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
323}
324
325static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326{
327	if (tcph->rst) return TCP_RST_SET;
328	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329	else if (tcph->fin) return TCP_FIN_SET;
330	else if (tcph->ack) return TCP_ACK_SET;
331	else return TCP_NONE_SET;
332}
333
334/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335   in IP Filter' by Guido van Rooij.
336
337   http://www.nluug.nl/events/sane2000/papers.html
338   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
339
340   The boundaries and the conditions are changed according to RFC793:
341   the packet must intersect the window (i.e. segments may be
342   after the right or before the left edge) and thus receivers may ACK
343   segments after the right edge of the window.
344
345	td_maxend = max(sack + max(win,1)) seen in reply packets
346	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347	td_maxwin += seq + len - sender.td_maxend
348			if seq + len > sender.td_maxend
349	td_end    = max(seq + len) seen in sent packets
350
351   I.   Upper bound for valid data:	seq <= sender.td_maxend
352   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
353   III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
354   IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
355
356   where sack is the highest right edge of sack block found in the packet
357   or ack in the case of packet without SACK option.
358
359   The upper bound limit for a valid (s)ack is not ignored -
360   we doesn't have to deal with fragments.
361*/
362
363static inline __u32 segment_seq_plus_len(__u32 seq,
364					 size_t len,
365					 unsigned int dataoff,
366					 const struct tcphdr *tcph)
367{
368	return (seq + len - dataoff - tcph->doff*4
369		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
370}
371
372#define MAXACKWINCONST			66000
373#define MAXACKWINDOW(sender)						\
374	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
375					      : MAXACKWINCONST)
376
377/*
378 * Simplified tcp_parse_options routine from tcp_input.c
379 */
380static void tcp_options(const struct sk_buff *skb,
381			unsigned int dataoff,
382			const struct tcphdr *tcph,
383			struct ip_ct_tcp_state *state)
384{
385	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
386	const unsigned char *ptr;
387	int length = (tcph->doff*4) - sizeof(struct tcphdr);
388
389	if (!length)
390		return;
391
392	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
393				 length, buff);
394	BUG_ON(ptr == NULL);
395
396	state->td_scale =
397	state->flags = 0;
398
399	while (length > 0) {
400		int opcode=*ptr++;
401		int opsize;
402
403		switch (opcode) {
404		case TCPOPT_EOL:
405			return;
406		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
407			length--;
408			continue;
409		default:
410			opsize=*ptr++;
411			if (opsize < 2) /* "silly options" */
412				return;
413			if (opsize > length)
414				break;	/* don't parse partial options */
415
416			if (opcode == TCPOPT_SACK_PERM
417			    && opsize == TCPOLEN_SACK_PERM)
418				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
419			else if (opcode == TCPOPT_WINDOW
420				 && opsize == TCPOLEN_WINDOW) {
421				state->td_scale = *(u_int8_t *)ptr;
422
423				if (state->td_scale > 14) {
424					/* See RFC1323 */
425					state->td_scale = 14;
426				}
427				state->flags |=
428					IP_CT_TCP_FLAG_WINDOW_SCALE;
429			}
430			ptr += opsize - 2;
431			length -= opsize;
432		}
433	}
434}
435
436static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
437                     const struct tcphdr *tcph, __u32 *sack)
438{
439	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
440	const unsigned char *ptr;
441	int length = (tcph->doff*4) - sizeof(struct tcphdr);
442	__u32 tmp;
443
444	if (!length)
445		return;
446
447	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
448				 length, buff);
449	BUG_ON(ptr == NULL);
450
451	/* Fast path for timestamp-only option */
452	if (length == TCPOLEN_TSTAMP_ALIGNED*4
453	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
454				       | (TCPOPT_NOP << 16)
455				       | (TCPOPT_TIMESTAMP << 8)
456				       | TCPOLEN_TIMESTAMP))
457		return;
458
459	while (length > 0) {
460		int opcode = *ptr++;
461		int opsize, i;
462
463		switch (opcode) {
464		case TCPOPT_EOL:
465			return;
466		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
467			length--;
468			continue;
469		default:
470			opsize = *ptr++;
471			if (opsize < 2) /* "silly options" */
472				return;
473			if (opsize > length)
474				break;	/* don't parse partial options */
475
476			if (opcode == TCPOPT_SACK
477			    && opsize >= (TCPOLEN_SACK_BASE
478					  + TCPOLEN_SACK_PERBLOCK)
479			    && !((opsize - TCPOLEN_SACK_BASE)
480				 % TCPOLEN_SACK_PERBLOCK)) {
481				for (i = 0;
482				     i < (opsize - TCPOLEN_SACK_BASE);
483				     i += TCPOLEN_SACK_PERBLOCK) {
484					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
485
486					if (after(tmp, *sack))
487						*sack = tmp;
488				}
489				return;
490			}
491			ptr += opsize - 2;
492			length -= opsize;
493		}
494	}
495}
496
497#ifdef CONFIG_NF_NAT_NEEDED
498static inline s16 nat_offset(const struct nf_conn *ct,
499			     enum ip_conntrack_dir dir,
500			     u32 seq)
501{
502	typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
503
504	return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
505}
506#define NAT_OFFSET(pf, ct, dir, seq) \
507	(pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
508#else
509#define NAT_OFFSET(pf, ct, dir, seq)	0
510#endif
511
512static bool tcp_in_window(const struct nf_conn *ct,
513			  struct ip_ct_tcp *state,
514			  enum ip_conntrack_dir dir,
515			  unsigned int index,
516			  const struct sk_buff *skb,
517			  unsigned int dataoff,
518			  const struct tcphdr *tcph,
519			  u_int8_t pf)
520{
521	struct net *net = nf_ct_net(ct);
522	struct ip_ct_tcp_state *sender = &state->seen[dir];
523	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
524	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
525	__u32 seq, ack, sack, end, win, swin;
526	s16 receiver_offset;
527	bool res;
528
529	/*
530	 * Get the required data from the packet.
531	 */
532	seq = ntohl(tcph->seq);
533	ack = sack = ntohl(tcph->ack_seq);
534	win = ntohs(tcph->window);
535	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
536
537	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
538		tcp_sack(skb, dataoff, tcph, &sack);
539
540	/* Take into account NAT sequence number mangling */
541	receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
542	ack -= receiver_offset;
543	sack -= receiver_offset;
544
545	pr_debug("tcp_in_window: START\n");
546	pr_debug("tcp_in_window: ");
547	nf_ct_dump_tuple(tuple);
548	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
549		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
550	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
551		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
552		 sender->td_end, sender->td_maxend, sender->td_maxwin,
553		 sender->td_scale,
554		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
555		 receiver->td_scale);
556
557	if (sender->td_maxwin == 0) {
558		/*
559		 * Initialize sender data.
560		 */
561		if (tcph->syn) {
562			/*
563			 * SYN-ACK in reply to a SYN
564			 * or SYN from reply direction in simultaneous open.
565			 */
566			sender->td_end =
567			sender->td_maxend = end;
568			sender->td_maxwin = (win == 0 ? 1 : win);
569
570			tcp_options(skb, dataoff, tcph, sender);
571			/*
572			 * RFC 1323:
573			 * Both sides must send the Window Scale option
574			 * to enable window scaling in either direction.
575			 */
576			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
577			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
578				sender->td_scale =
579				receiver->td_scale = 0;
580			if (!tcph->ack)
581				/* Simultaneous open */
582				return true;
583		} else {
584			/*
585			 * We are in the middle of a connection,
586			 * its history is lost for us.
587			 * Let's try to use the data from the packet.
588			 */
589			sender->td_end = end;
590			win <<= sender->td_scale;
591			sender->td_maxwin = (win == 0 ? 1 : win);
592			sender->td_maxend = end + sender->td_maxwin;
593			/*
594			 * We haven't seen traffic in the other direction yet
595			 * but we have to tweak window tracking to pass III
596			 * and IV until that happens.
597			 */
598			if (receiver->td_maxwin == 0)
599				receiver->td_end = receiver->td_maxend = sack;
600		}
601	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
602		     && dir == IP_CT_DIR_ORIGINAL)
603		   || (state->state == TCP_CONNTRACK_SYN_RECV
604		     && dir == IP_CT_DIR_REPLY))
605		   && after(end, sender->td_end)) {
606		/*
607		 * RFC 793: "if a TCP is reinitialized ... then it need
608		 * not wait at all; it must only be sure to use sequence
609		 * numbers larger than those recently used."
610		 */
611		sender->td_end =
612		sender->td_maxend = end;
613		sender->td_maxwin = (win == 0 ? 1 : win);
614
615		tcp_options(skb, dataoff, tcph, sender);
616	}
617
618	if (!(tcph->ack)) {
619		/*
620		 * If there is no ACK, just pretend it was set and OK.
621		 */
622		ack = sack = receiver->td_end;
623	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
624		    (TCP_FLAG_ACK|TCP_FLAG_RST))
625		   && (ack == 0)) {
626		/*
627		 * Broken TCP stacks, that set ACK in RST packets as well
628		 * with zero ack value.
629		 */
630		ack = sack = receiver->td_end;
631	}
632
633	if (seq == end
634	    && (!tcph->rst
635		|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
636		/*
637		 * Packets contains no data: we assume it is valid
638		 * and check the ack value only.
639		 * However RST segments are always validated by their
640		 * SEQ number, except when seq == 0 (reset sent answering
641		 * SYN.
642		 */
643		seq = end = sender->td_end;
644
645	pr_debug("tcp_in_window: ");
646	nf_ct_dump_tuple(tuple);
647	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
648		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
649	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
650		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
651		 sender->td_end, sender->td_maxend, sender->td_maxwin,
652		 sender->td_scale,
653		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
654		 receiver->td_scale);
655
656	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
657		 before(seq, sender->td_maxend + 1),
658		 after(end, sender->td_end - receiver->td_maxwin - 1),
659		 before(sack, receiver->td_end + 1),
660		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
661
662	if (before(seq, sender->td_maxend + 1) &&
663	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
664	    before(sack, receiver->td_end + 1) &&
665	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
666		/*
667		 * Take into account window scaling (RFC 1323).
668		 */
669		if (!tcph->syn)
670			win <<= sender->td_scale;
671
672		/*
673		 * Update sender data.
674		 */
675		swin = win + (sack - ack);
676		if (sender->td_maxwin < swin)
677			sender->td_maxwin = swin;
678		if (after(end, sender->td_end)) {
679			sender->td_end = end;
680			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
681		}
682		if (tcph->ack) {
683			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
684				sender->td_maxack = ack;
685				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
686			} else if (after(ack, sender->td_maxack))
687				sender->td_maxack = ack;
688		}
689
690		/*
691		 * Update receiver data.
692		 */
693		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
694			receiver->td_maxwin += end - sender->td_maxend;
695		if (after(sack + win, receiver->td_maxend - 1)) {
696			receiver->td_maxend = sack + win;
697			if (win == 0)
698				receiver->td_maxend++;
699		}
700		if (ack == receiver->td_end)
701			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
702
703		/*
704		 * Check retransmissions.
705		 */
706		if (index == TCP_ACK_SET) {
707			if (state->last_dir == dir
708			    && state->last_seq == seq
709			    && state->last_ack == ack
710			    && state->last_end == end
711			    && state->last_win == win)
712				state->retrans++;
713			else {
714				state->last_dir = dir;
715				state->last_seq = seq;
716				state->last_ack = ack;
717				state->last_end = end;
718				state->last_win = win;
719				state->retrans = 0;
720			}
721		}
722		res = true;
723	} else {
724		res = false;
725		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
726		    nf_ct_tcp_be_liberal)
727			res = true;
728		if (!res && LOG_INVALID(net, IPPROTO_TCP))
729			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
730			"nf_ct_tcp: %s ",
731			before(seq, sender->td_maxend + 1) ?
732			after(end, sender->td_end - receiver->td_maxwin - 1) ?
733			before(sack, receiver->td_end + 1) ?
734			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
735			: "ACK is under the lower bound (possible overly delayed ACK)"
736			: "ACK is over the upper bound (ACKed data not seen yet)"
737			: "SEQ is under the lower bound (already ACKed data retransmitted)"
738			: "SEQ is over the upper bound (over the window of the receiver)");
739	}
740
741	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
742		 "receiver end=%u maxend=%u maxwin=%u\n",
743		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
744		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
745
746	return res;
747}
748
749/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
750static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
751				 TCPHDR_URG) + 1] =
752{
753	[TCPHDR_SYN]				= 1,
754	[TCPHDR_SYN|TCPHDR_URG]			= 1,
755	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
756	[TCPHDR_RST]				= 1,
757	[TCPHDR_RST|TCPHDR_ACK]			= 1,
758	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
759	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
760	[TCPHDR_ACK]				= 1,
761	[TCPHDR_ACK|TCPHDR_URG]			= 1,
762};
763
764/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
765static int BCMFASTPATH_HOST tcp_error(struct net *net, struct nf_conn *tmpl,
766		     struct sk_buff *skb,
767		     unsigned int dataoff,
768		     enum ip_conntrack_info *ctinfo,
769		     u_int8_t pf,
770		     unsigned int hooknum)
771{
772	const struct tcphdr *th;
773	struct tcphdr _tcph;
774	unsigned int tcplen = skb->len - dataoff;
775	u_int8_t tcpflags;
776
777	/* Smaller that minimal TCP header? */
778	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
779	if (th == NULL) {
780		if (LOG_INVALID(net, IPPROTO_TCP))
781			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
782				"nf_ct_tcp: short packet ");
783		return -NF_ACCEPT;
784	}
785
786	/* Not whole TCP header or malformed packet */
787	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
788		if (LOG_INVALID(net, IPPROTO_TCP))
789			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
790				"nf_ct_tcp: truncated/malformed packet ");
791		return -NF_ACCEPT;
792	}
793
794	/* Checksum invalid? Ignore.
795	 * We skip checking packets on the outgoing path
796	 * because the checksum is assumed to be correct.
797	 */
798	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
799	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
800		if (LOG_INVALID(net, IPPROTO_TCP))
801			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
802				  "nf_ct_tcp: bad TCP checksum ");
803		return -NF_ACCEPT;
804	}
805
806	/* Check TCP flags. */
807	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
808	if (!tcp_valid_flags[tcpflags]) {
809		if (LOG_INVALID(net, IPPROTO_TCP))
810			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
811				  "nf_ct_tcp: invalid TCP flag combination ");
812		return -NF_ACCEPT;
813	}
814
815	return NF_ACCEPT;
816}
817
818/* Returns verdict for packet, or -1 for invalid. */
819static int BCMFASTPATH_HOST tcp_packet(struct nf_conn *ct,
820		      const struct sk_buff *skb,
821		      unsigned int dataoff,
822		      enum ip_conntrack_info ctinfo,
823		      u_int8_t pf,
824		      unsigned int hooknum)
825{
826	struct net *net = nf_ct_net(ct);
827	struct nf_conntrack_tuple *tuple;
828	enum tcp_conntrack new_state, old_state;
829	enum ip_conntrack_dir dir;
830	const struct tcphdr *th;
831	struct tcphdr _tcph;
832	unsigned long timeout;
833	unsigned int index;
834
835	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
836	BUG_ON(th == NULL);
837
838	spin_lock_bh(&ct->lock);
839	old_state = ct->proto.tcp.state;
840	dir = CTINFO2DIR(ctinfo);
841	index = get_conntrack_index(th);
842	new_state = tcp_conntracks[dir][index][old_state];
843	tuple = &ct->tuplehash[dir].tuple;
844
845	switch (new_state) {
846	case TCP_CONNTRACK_SYN_SENT:
847		if (old_state < TCP_CONNTRACK_TIME_WAIT)
848			break;
849		/* RFC 1122: "When a connection is closed actively,
850		 * it MUST linger in TIME-WAIT state for a time 2xMSL
851		 * (Maximum Segment Lifetime). However, it MAY accept
852		 * a new SYN from the remote TCP to reopen the connection
853		 * directly from TIME-WAIT state, if..."
854		 * We ignore the conditions because we are in the
855		 * TIME-WAIT state anyway.
856		 *
857		 * Handle aborted connections: we and the server
858		 * think there is an existing connection but the client
859		 * aborts it and starts a new one.
860		 */
861		if (((ct->proto.tcp.seen[dir].flags
862		      | ct->proto.tcp.seen[!dir].flags)
863		     & IP_CT_TCP_FLAG_CLOSE_INIT)
864		    || (ct->proto.tcp.last_dir == dir
865		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
866			/* Attempt to reopen a closed/aborted connection.
867			 * Delete this connection and look up again. */
868			spin_unlock_bh(&ct->lock);
869
870			/* Only repeat if we can actually remove the timer.
871			 * Destruction may already be in progress in process
872			 * context and we must give it a chance to terminate.
873			 */
874			if (nf_ct_kill(ct))
875				return -NF_REPEAT;
876			return NF_DROP;
877		}
878		/* Fall through */
879	case TCP_CONNTRACK_IGNORE:
880		/* Ignored packets:
881		 *
882		 * Our connection entry may be out of sync, so ignore
883		 * packets which may signal the real connection between
884		 * the client and the server.
885		 *
886		 * a) SYN in ORIGINAL
887		 * b) SYN/ACK in REPLY
888		 * c) ACK in reply direction after initial SYN in original.
889		 *
890		 * If the ignored packet is invalid, the receiver will send
891		 * a RST we'll catch below.
892		 */
893		if (index == TCP_SYNACK_SET
894		    && ct->proto.tcp.last_index == TCP_SYN_SET
895		    && ct->proto.tcp.last_dir != dir
896		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
897			/* b) This SYN/ACK acknowledges a SYN that we earlier
898			 * ignored as invalid. This means that the client and
899			 * the server are both in sync, while the firewall is
900			 * not. We get in sync from the previously annotated
901			 * values.
902			 */
903			old_state = TCP_CONNTRACK_SYN_SENT;
904			new_state = TCP_CONNTRACK_SYN_RECV;
905			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
906				ct->proto.tcp.last_end;
907			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
908				ct->proto.tcp.last_end;
909			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
910				ct->proto.tcp.last_win == 0 ?
911					1 : ct->proto.tcp.last_win;
912			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
913				ct->proto.tcp.last_wscale;
914			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
915				ct->proto.tcp.last_flags;
916			memset(&ct->proto.tcp.seen[dir], 0,
917			       sizeof(struct ip_ct_tcp_state));
918			break;
919		}
920		ct->proto.tcp.last_index = index;
921		ct->proto.tcp.last_dir = dir;
922		ct->proto.tcp.last_seq = ntohl(th->seq);
923		ct->proto.tcp.last_end =
924		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
925		ct->proto.tcp.last_win = ntohs(th->window);
926
927		/* a) This is a SYN in ORIGINAL. The client and the server
928		 * may be in sync but we are not. In that case, we annotate
929		 * the TCP options and let the packet go through. If it is a
930		 * valid SYN packet, the server will reply with a SYN/ACK, and
931		 * then we'll get in sync. Otherwise, the server ignores it. */
932		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
933			struct ip_ct_tcp_state seen = {};
934
935			ct->proto.tcp.last_flags =
936			ct->proto.tcp.last_wscale = 0;
937			tcp_options(skb, dataoff, th, &seen);
938			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
939				ct->proto.tcp.last_flags |=
940					IP_CT_TCP_FLAG_WINDOW_SCALE;
941				ct->proto.tcp.last_wscale = seen.td_scale;
942			}
943			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
944				ct->proto.tcp.last_flags |=
945					IP_CT_TCP_FLAG_SACK_PERM;
946			}
947		}
948		spin_unlock_bh(&ct->lock);
949		if (LOG_INVALID(net, IPPROTO_TCP))
950			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
951				  "nf_ct_tcp: invalid packet ignored ");
952		return NF_ACCEPT;
953	case TCP_CONNTRACK_MAX:
954		/* Invalid packet */
955		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
956			 dir, get_conntrack_index(th), old_state);
957		spin_unlock_bh(&ct->lock);
958		if (LOG_INVALID(net, IPPROTO_TCP))
959			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
960				  "nf_ct_tcp: invalid state ");
961		return -NF_ACCEPT;
962	case TCP_CONNTRACK_CLOSE:
963		if (index == TCP_RST_SET
964		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
965		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
966			/* Invalid RST  */
967			spin_unlock_bh(&ct->lock);
968			if (LOG_INVALID(net, IPPROTO_TCP))
969				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
970					  "nf_ct_tcp: invalid RST ");
971			return -NF_ACCEPT;
972		}
973		if (index == TCP_RST_SET
974		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
975			 && ct->proto.tcp.last_index == TCP_SYN_SET)
976			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
977			    && ct->proto.tcp.last_index == TCP_ACK_SET))
978		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
979			/* RST sent to invalid SYN or ACK we had let through
980			 * at a) and c) above:
981			 *
982			 * a) SYN was in window then
983			 * c) we hold a half-open connection.
984			 *
985			 * Delete our connection entry.
986			 * We skip window checking, because packet might ACK
987			 * segments we ignored. */
988			goto in_window;
989		}
990		/* Just fall through */
991	default:
992		/* Keep compilers happy. */
993		break;
994	}
995
996#ifdef HNDCTF
997	/* Remove the ipc entries on receipt of FIN or RST */
998	if (CTF_ENAB(kcih)) {
999		if (ct->ctf_flags & CTF_FLAGS_CACHED) {
1000			if (th->fin || th->rst) {
1001				ip_conntrack_ipct_delete(ct, 0);
1002			}
1003			goto in_window;
1004		}
1005	}
1006#endif /* HNDCTF */
1007
1008	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1009			   skb, dataoff, th, pf)) {
1010		spin_unlock_bh(&ct->lock);
1011		return -NF_ACCEPT;
1012	}
1013     in_window:
1014	/* From now on we have got in-window packets */
1015	ct->proto.tcp.last_index = index;
1016	ct->proto.tcp.last_dir = dir;
1017
1018	pr_debug("tcp_conntracks: ");
1019	nf_ct_dump_tuple(tuple);
1020	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1021		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1022		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1023		 old_state, new_state);
1024
1025	ct->proto.tcp.state = new_state;
1026	if (old_state != new_state
1027	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1028		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1029
1030	if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
1031	    tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans)
1032		timeout = nf_ct_tcp_timeout_max_retrans;
1033	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1034		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1035		 tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged)
1036		timeout = nf_ct_tcp_timeout_unacknowledged;
1037	else
1038		timeout = tcp_timeouts[new_state];
1039	spin_unlock_bh(&ct->lock);
1040
1041	if (new_state != old_state)
1042		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1043
1044	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1045		/* If only reply is a RST, we can consider ourselves not to
1046		   have an established connection: this is a fairly common
1047		   problem case, so we can delete the conntrack
1048		   immediately.  --RR */
1049		if (th->rst) {
1050			nf_ct_kill_acct(ct, ctinfo, skb);
1051			return NF_ACCEPT;
1052		}
1053	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1054		   && (old_state == TCP_CONNTRACK_SYN_RECV
1055		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1056		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1057		/* Set ASSURED if we see see valid ack in ESTABLISHED
1058		   after SYN_RECV or a valid answer for a picked up
1059		   connection. */
1060		set_bit(IPS_ASSURED_BIT, &ct->status);
1061		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1062	}
1063	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1064
1065	return NF_ACCEPT;
1066}
1067
1068/* Called when a new connection for this protocol found. */
1069static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1070		    unsigned int dataoff)
1071{
1072	enum tcp_conntrack new_state;
1073	const struct tcphdr *th;
1074	struct tcphdr _tcph;
1075	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1076	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1077
1078	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1079	BUG_ON(th == NULL);
1080
1081	/* Don't need lock here: this conntrack not in circulation yet */
1082	new_state
1083		= tcp_conntracks[0][get_conntrack_index(th)]
1084		[TCP_CONNTRACK_NONE];
1085
1086	/* Invalid: delete conntrack */
1087	if (new_state >= TCP_CONNTRACK_MAX) {
1088		pr_debug("nf_ct_tcp: invalid new deleting.\n");
1089		return false;
1090	}
1091
1092	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1093		/* SYN packet */
1094		ct->proto.tcp.seen[0].td_end =
1095			segment_seq_plus_len(ntohl(th->seq), skb->len,
1096					     dataoff, th);
1097		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1098		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1099			ct->proto.tcp.seen[0].td_maxwin = 1;
1100		ct->proto.tcp.seen[0].td_maxend =
1101			ct->proto.tcp.seen[0].td_end;
1102
1103		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1104		ct->proto.tcp.seen[1].flags = 0;
1105	} else if (nf_ct_tcp_loose == 0) {
1106		/* Don't try to pick up connections. */
1107		return false;
1108	} else {
1109		/*
1110		 * We are in the middle of a connection,
1111		 * its history is lost for us.
1112		 * Let's try to use the data from the packet.
1113		 */
1114		ct->proto.tcp.seen[0].td_end =
1115			segment_seq_plus_len(ntohl(th->seq), skb->len,
1116					     dataoff, th);
1117		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1118		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1119			ct->proto.tcp.seen[0].td_maxwin = 1;
1120		ct->proto.tcp.seen[0].td_maxend =
1121			ct->proto.tcp.seen[0].td_end +
1122			ct->proto.tcp.seen[0].td_maxwin;
1123		ct->proto.tcp.seen[0].td_scale = 0;
1124
1125		/* We assume SACK and liberal window checking to handle
1126		 * window scaling */
1127		ct->proto.tcp.seen[0].flags =
1128		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1129					      IP_CT_TCP_FLAG_BE_LIBERAL;
1130	}
1131
1132	ct->proto.tcp.seen[1].td_end = 0;
1133	ct->proto.tcp.seen[1].td_maxend = 0;
1134	ct->proto.tcp.seen[1].td_maxwin = 0;
1135	ct->proto.tcp.seen[1].td_scale = 0;
1136
1137	/* tcp_packet will set them */
1138	ct->proto.tcp.state = TCP_CONNTRACK_NONE;
1139	ct->proto.tcp.last_index = TCP_NONE_SET;
1140
1141	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1142		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1143		 sender->td_end, sender->td_maxend, sender->td_maxwin,
1144		 sender->td_scale,
1145		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1146		 receiver->td_scale);
1147	return true;
1148}
1149
1150#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1151
1152#include <linux/netfilter/nfnetlink.h>
1153#include <linux/netfilter/nfnetlink_conntrack.h>
1154
1155static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1156			 struct nf_conn *ct)
1157{
1158	struct nlattr *nest_parms;
1159	struct nf_ct_tcp_flags tmp = {};
1160
1161	spin_lock_bh(&ct->lock);
1162	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1163	if (!nest_parms)
1164		goto nla_put_failure;
1165
1166	NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
1167
1168	NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1169		   ct->proto.tcp.seen[0].td_scale);
1170
1171	NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1172		   ct->proto.tcp.seen[1].td_scale);
1173
1174	tmp.flags = ct->proto.tcp.seen[0].flags;
1175	NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1176		sizeof(struct nf_ct_tcp_flags), &tmp);
1177
1178	tmp.flags = ct->proto.tcp.seen[1].flags;
1179	NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1180		sizeof(struct nf_ct_tcp_flags), &tmp);
1181	spin_unlock_bh(&ct->lock);
1182
1183	nla_nest_end(skb, nest_parms);
1184
1185	return 0;
1186
1187nla_put_failure:
1188	spin_unlock_bh(&ct->lock);
1189	return -1;
1190}
1191
1192static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1193	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1194	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1195	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1196	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1197	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1198};
1199
1200static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1201{
1202	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1203	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1204	int err;
1205
1206	/* updates could not contain anything about the private
1207	 * protocol info, in that case skip the parsing */
1208	if (!pattr)
1209		return 0;
1210
1211	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1212	if (err < 0)
1213		return err;
1214
1215	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1216	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1217		return -EINVAL;
1218
1219	spin_lock_bh(&ct->lock);
1220	if (tb[CTA_PROTOINFO_TCP_STATE])
1221		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1222
1223	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1224		struct nf_ct_tcp_flags *attr =
1225			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1226		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1227		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1228	}
1229
1230	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1231		struct nf_ct_tcp_flags *attr =
1232			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1233		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1234		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1235	}
1236
1237	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1238	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1239	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1240	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1241		ct->proto.tcp.seen[0].td_scale =
1242			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1243		ct->proto.tcp.seen[1].td_scale =
1244			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1245	}
1246	spin_unlock_bh(&ct->lock);
1247
1248	return 0;
1249}
1250
1251static int tcp_nlattr_size(void)
1252{
1253	return nla_total_size(0)	   /* CTA_PROTOINFO_TCP */
1254		+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1255}
1256
1257static int tcp_nlattr_tuple_size(void)
1258{
1259	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1260}
1261#endif
1262
1263#ifdef CONFIG_SYSCTL
1264static unsigned int tcp_sysctl_table_users;
1265static struct ctl_table_header *tcp_sysctl_header;
1266static struct ctl_table tcp_sysctl_table[] = {
1267	{
1268		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1269		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1270		.maxlen		= sizeof(unsigned int),
1271		.mode		= 0644,
1272		.proc_handler	= proc_dointvec_jiffies,
1273	},
1274	{
1275		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1276		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1277		.maxlen		= sizeof(unsigned int),
1278		.mode		= 0644,
1279		.proc_handler	= proc_dointvec_jiffies,
1280	},
1281	{
1282		.procname	= "nf_conntrack_tcp_timeout_established",
1283		.data		= &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1284		.maxlen		= sizeof(unsigned int),
1285		.mode		= 0644,
1286		.proc_handler	= proc_dointvec_jiffies,
1287	},
1288	{
1289		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1290		.data		= &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1291		.maxlen		= sizeof(unsigned int),
1292		.mode		= 0644,
1293		.proc_handler	= proc_dointvec_jiffies,
1294	},
1295	{
1296		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1297		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1298		.maxlen		= sizeof(unsigned int),
1299		.mode		= 0644,
1300		.proc_handler	= proc_dointvec_jiffies,
1301	},
1302	{
1303		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1304		.data		= &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1305		.maxlen		= sizeof(unsigned int),
1306		.mode		= 0644,
1307		.proc_handler	= proc_dointvec_jiffies,
1308	},
1309	{
1310		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1311		.data		= &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1312		.maxlen		= sizeof(unsigned int),
1313		.mode		= 0644,
1314		.proc_handler	= proc_dointvec_jiffies,
1315	},
1316	{
1317		.procname	= "nf_conntrack_tcp_timeout_close",
1318		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1319		.maxlen		= sizeof(unsigned int),
1320		.mode		= 0644,
1321		.proc_handler	= proc_dointvec_jiffies,
1322	},
1323	{
1324		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1325		.data		= &nf_ct_tcp_timeout_max_retrans,
1326		.maxlen		= sizeof(unsigned int),
1327		.mode		= 0644,
1328		.proc_handler	= proc_dointvec_jiffies,
1329	},
1330	{
1331		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
1332		.data		= &nf_ct_tcp_timeout_unacknowledged,
1333		.maxlen		= sizeof(unsigned int),
1334		.mode		= 0644,
1335		.proc_handler	= proc_dointvec_jiffies,
1336	},
1337	{
1338		.procname	= "nf_conntrack_tcp_loose",
1339		.data		= &nf_ct_tcp_loose,
1340		.maxlen		= sizeof(unsigned int),
1341		.mode		= 0644,
1342		.proc_handler	= proc_dointvec,
1343	},
1344	{
1345		.procname       = "nf_conntrack_tcp_be_liberal",
1346		.data           = &nf_ct_tcp_be_liberal,
1347		.maxlen         = sizeof(unsigned int),
1348		.mode           = 0644,
1349		.proc_handler   = proc_dointvec,
1350	},
1351	{
1352		.procname	= "nf_conntrack_tcp_max_retrans",
1353		.data		= &nf_ct_tcp_max_retrans,
1354		.maxlen		= sizeof(unsigned int),
1355		.mode		= 0644,
1356		.proc_handler	= proc_dointvec,
1357	},
1358	{ }
1359};
1360
1361#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1362static struct ctl_table tcp_compat_sysctl_table[] = {
1363	{
1364		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
1365		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
1366		.maxlen		= sizeof(unsigned int),
1367		.mode		= 0644,
1368		.proc_handler	= proc_dointvec_jiffies,
1369	},
1370	{
1371		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
1372		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
1373		.maxlen		= sizeof(unsigned int),
1374		.mode		= 0644,
1375		.proc_handler	= proc_dointvec_jiffies,
1376	},
1377	{
1378		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
1379		.data		= &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
1380		.maxlen		= sizeof(unsigned int),
1381		.mode		= 0644,
1382		.proc_handler	= proc_dointvec_jiffies,
1383	},
1384	{
1385		.procname	= "ip_conntrack_tcp_timeout_established",
1386		.data		= &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
1387		.maxlen		= sizeof(unsigned int),
1388		.mode		= 0644,
1389		.proc_handler	= proc_dointvec_jiffies,
1390	},
1391	{
1392		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
1393		.data		= &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
1394		.maxlen		= sizeof(unsigned int),
1395		.mode		= 0644,
1396		.proc_handler	= proc_dointvec_jiffies,
1397	},
1398	{
1399		.procname	= "ip_conntrack_tcp_timeout_close_wait",
1400		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
1401		.maxlen		= sizeof(unsigned int),
1402		.mode		= 0644,
1403		.proc_handler	= proc_dointvec_jiffies,
1404	},
1405	{
1406		.procname	= "ip_conntrack_tcp_timeout_last_ack",
1407		.data		= &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
1408		.maxlen		= sizeof(unsigned int),
1409		.mode		= 0644,
1410		.proc_handler	= proc_dointvec_jiffies,
1411	},
1412	{
1413		.procname	= "ip_conntrack_tcp_timeout_time_wait",
1414		.data		= &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
1415		.maxlen		= sizeof(unsigned int),
1416		.mode		= 0644,
1417		.proc_handler	= proc_dointvec_jiffies,
1418	},
1419	{
1420		.procname	= "ip_conntrack_tcp_timeout_close",
1421		.data		= &tcp_timeouts[TCP_CONNTRACK_CLOSE],
1422		.maxlen		= sizeof(unsigned int),
1423		.mode		= 0644,
1424		.proc_handler	= proc_dointvec_jiffies,
1425	},
1426	{
1427		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
1428		.data		= &nf_ct_tcp_timeout_max_retrans,
1429		.maxlen		= sizeof(unsigned int),
1430		.mode		= 0644,
1431		.proc_handler	= proc_dointvec_jiffies,
1432	},
1433	{
1434		.procname	= "ip_conntrack_tcp_loose",
1435		.data		= &nf_ct_tcp_loose,
1436		.maxlen		= sizeof(unsigned int),
1437		.mode		= 0644,
1438		.proc_handler	= proc_dointvec,
1439	},
1440	{
1441		.procname	= "ip_conntrack_tcp_be_liberal",
1442		.data		= &nf_ct_tcp_be_liberal,
1443		.maxlen		= sizeof(unsigned int),
1444		.mode		= 0644,
1445		.proc_handler	= proc_dointvec,
1446	},
1447	{
1448		.procname	= "ip_conntrack_tcp_max_retrans",
1449		.data		= &nf_ct_tcp_max_retrans,
1450		.maxlen		= sizeof(unsigned int),
1451		.mode		= 0644,
1452		.proc_handler	= proc_dointvec,
1453	},
1454	{ }
1455};
1456#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1457#endif /* CONFIG_SYSCTL */
1458
1459struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1460{
1461	.l3proto		= PF_INET,
1462	.l4proto 		= IPPROTO_TCP,
1463	.name 			= "tcp",
1464	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1465	.invert_tuple 		= tcp_invert_tuple,
1466	.print_tuple 		= tcp_print_tuple,
1467	.print_conntrack 	= tcp_print_conntrack,
1468	.packet 		= tcp_packet,
1469	.new 			= tcp_new,
1470	.error			= tcp_error,
1471#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1472	.to_nlattr		= tcp_to_nlattr,
1473	.nlattr_size		= tcp_nlattr_size,
1474	.from_nlattr		= nlattr_to_tcp,
1475	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1476	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1477	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1478	.nla_policy		= nf_ct_port_nla_policy,
1479#endif
1480#ifdef CONFIG_SYSCTL
1481	.ctl_table_users	= &tcp_sysctl_table_users,
1482	.ctl_table_header	= &tcp_sysctl_header,
1483	.ctl_table		= tcp_sysctl_table,
1484#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1485	.ctl_compat_table	= tcp_compat_sysctl_table,
1486#endif
1487#endif
1488};
1489EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1490
1491struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1492{
1493	.l3proto		= PF_INET6,
1494	.l4proto 		= IPPROTO_TCP,
1495	.name 			= "tcp",
1496	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1497	.invert_tuple 		= tcp_invert_tuple,
1498	.print_tuple 		= tcp_print_tuple,
1499	.print_conntrack 	= tcp_print_conntrack,
1500	.packet 		= tcp_packet,
1501	.new 			= tcp_new,
1502	.error			= tcp_error,
1503#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1504	.to_nlattr		= tcp_to_nlattr,
1505	.nlattr_size		= tcp_nlattr_size,
1506	.from_nlattr		= nlattr_to_tcp,
1507	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1508	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1509	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1510	.nla_policy		= nf_ct_port_nla_policy,
1511#endif
1512#ifdef CONFIG_SYSCTL
1513	.ctl_table_users	= &tcp_sysctl_table_users,
1514	.ctl_table_header	= &tcp_sysctl_header,
1515	.ctl_table		= tcp_sysctl_table,
1516#endif
1517};
1518EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1519