1/* Modified by Broadcom Corp. Portions Copyright (c) Broadcom Corp, 2012. */
2/*
3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
4 *		operating system.  INET is implemented using the  BSD Socket
5 *		interface as the means of communication with the user level.
6 *
7 *		Implementation of the Transmission Control Protocol(TCP).
8 *
9 *		IPv4 specific functions
10 *
11 *
12 *		code split from:
13 *		linux/ipv4/tcp.c
14 *		linux/ipv4/tcp_input.c
15 *		linux/ipv4/tcp_output.c
16 *
17 *		See tcp.c for author information
18 *
19 *	This program is free software; you can redistribute it and/or
20 *      modify it under the terms of the GNU General Public License
21 *      as published by the Free Software Foundation; either version
22 *      2 of the License, or (at your option) any later version.
23 */
24
25/*
26 * Changes:
27 *		David S. Miller	:	New socket lookup architecture.
28 *					This code is dedicated to John Dyson.
29 *		David S. Miller :	Change semantics of established hash,
30 *					half is devoted to TIME_WAIT sockets
31 *					and the rest go in the other half.
32 *		Andi Kleen :		Add support for syncookies and fixed
33 *					some bugs: ip options weren't passed to
34 *					the TCP layer, missed a check for an
35 *					ACK bit.
36 *		Andi Kleen :		Implemented fast path mtu discovery.
37 *	     				Fixed many serious bugs in the
38 *					request_sock handling and moved
39 *					most of it into the af independent code.
40 *					Added tail drop and some other bugfixes.
41 *					Added new listen semantics.
42 *		Mike McLagan	:	Routing by source
43 *	Juan Jose Ciarlante:		ip_dynaddr bits
44 *		Andi Kleen:		various fixes.
45 *	Vitaly E. Lavrov	:	Transparent proxy revived after year
46 *					coma.
47 *	Andi Kleen		:	Fix new listen.
48 *	Andi Kleen		:	Fix accept error reporting.
49 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
50 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
51 *					a single port at the same time.
52 */
53
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/netdma.h>
76
77#include <linux/inet.h>
78#include <linux/ipv6.h>
79#include <linux/stddef.h>
80#include <linux/proc_fs.h>
81#include <linux/seq_file.h>
82
83#include <linux/crypto.h>
84#include <linux/scatterlist.h>
85
86#include <typedefs.h>
87#include <bcmdefs.h>
88
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
91EXPORT_SYMBOL(sysctl_tcp_low_latency);
92
93
94#ifdef CONFIG_TCP_MD5SIG
95static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
96						   __be32 addr);
97static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
98			       __be32 daddr, __be32 saddr, struct tcphdr *th);
99#else
100static inline
101struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
102{
103	return NULL;
104}
105#endif
106
107struct inet_hashinfo tcp_hashinfo;
108EXPORT_SYMBOL(tcp_hashinfo);
109
110static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
111{
112	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
113					  ip_hdr(skb)->saddr,
114					  tcp_hdr(skb)->dest,
115					  tcp_hdr(skb)->source);
116}
117
118int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
119{
120	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
121	struct tcp_sock *tp = tcp_sk(sk);
122
123	/* With PAWS, it is safe from the viewpoint
124	   of data integrity. Even without PAWS it is safe provided sequence
125	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
126
127	   Actually, the idea is close to VJ's one, only timestamp cache is
128	   held not per host, but per port pair and TW bucket is used as state
129	   holder.
130
131	   If TW bucket has been already destroyed we fall back to VJ's scheme
132	   and use initial timestamp retrieved from peer table.
133	 */
134	if (tcptw->tw_ts_recent_stamp &&
135	    (twp == NULL || (sysctl_tcp_tw_reuse &&
136			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
137		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
138		if (tp->write_seq == 0)
139			tp->write_seq = 1;
140		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
141		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
142		sock_hold(sktw);
143		return 1;
144	}
145
146	return 0;
147}
148EXPORT_SYMBOL_GPL(tcp_twsk_unique);
149
150/* This will initiate an outgoing connection. */
151int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
152{
153	struct inet_sock *inet = inet_sk(sk);
154	struct tcp_sock *tp = tcp_sk(sk);
155	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
156	struct rtable *rt;
157	__be32 daddr, nexthop;
158	int tmp;
159	int err;
160
161	if (addr_len < sizeof(struct sockaddr_in))
162		return -EINVAL;
163
164	if (usin->sin_family != AF_INET)
165		return -EAFNOSUPPORT;
166
167	nexthop = daddr = usin->sin_addr.s_addr;
168	if (inet->opt && inet->opt->srr) {
169		if (!daddr)
170			return -EINVAL;
171		nexthop = inet->opt->faddr;
172	}
173
174	tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
175			       RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
176			       IPPROTO_TCP,
177			       inet->inet_sport, usin->sin_port, sk, 1);
178	if (tmp < 0) {
179		if (tmp == -ENETUNREACH)
180			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
181		return tmp;
182	}
183
184	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185		ip_rt_put(rt);
186		return -ENETUNREACH;
187	}
188
189	if (!inet->opt || !inet->opt->srr)
190		daddr = rt->rt_dst;
191
192	if (!inet->inet_saddr)
193		inet->inet_saddr = rt->rt_src;
194	inet->inet_rcv_saddr = inet->inet_saddr;
195
196	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
197		/* Reset inherited state */
198		tp->rx_opt.ts_recent	   = 0;
199		tp->rx_opt.ts_recent_stamp = 0;
200		tp->write_seq		   = 0;
201	}
202
203	if (tcp_death_row.sysctl_tw_recycle &&
204	    !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
205		struct inet_peer *peer = rt_get_peer(rt);
206		/*
207		 * VJ's idea. We save last timestamp seen from
208		 * the destination in peer table, when entering state
209		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
210		 * when trying new connection.
211		 */
212		if (peer) {
213			inet_peer_refcheck(peer);
214			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
215				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
216				tp->rx_opt.ts_recent = peer->tcp_ts;
217			}
218		}
219	}
220
221	inet->inet_dport = usin->sin_port;
222	inet->inet_daddr = daddr;
223
224	inet_csk(sk)->icsk_ext_hdr_len = 0;
225	if (inet->opt)
226		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
227
228	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
229
230	/* Socket identity is still unknown (sport may be zero).
231	 * However we set state to SYN-SENT and not releasing socket
232	 * lock select source port, enter ourselves into the hash tables and
233	 * complete initialization after this.
234	 */
235	tcp_set_state(sk, TCP_SYN_SENT);
236	err = inet_hash_connect(&tcp_death_row, sk);
237	if (err)
238		goto failure;
239
240	err = ip_route_newports(&rt, IPPROTO_TCP,
241				inet->inet_sport, inet->inet_dport, sk);
242	if (err)
243		goto failure;
244
245	/* OK, now commit destination to socket.  */
246	sk->sk_gso_type = SKB_GSO_TCPV4;
247	sk_setup_caps(sk, &rt->dst);
248
249	if (!tp->write_seq)
250		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
251							   inet->inet_daddr,
252							   inet->inet_sport,
253							   usin->sin_port);
254
255	inet->inet_id = tp->write_seq ^ jiffies;
256
257	err = tcp_connect(sk);
258	rt = NULL;
259	if (err)
260		goto failure;
261
262	return 0;
263
264failure:
265	/*
266	 * This unhashes the socket and releases the local port,
267	 * if necessary.
268	 */
269	tcp_set_state(sk, TCP_CLOSE);
270	ip_rt_put(rt);
271	sk->sk_route_caps = 0;
272	inet->inet_dport = 0;
273	return err;
274}
275EXPORT_SYMBOL(tcp_v4_connect);
276
277/*
278 * This routine does path mtu discovery as defined in RFC1191.
279 */
280static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
281{
282	struct dst_entry *dst;
283	struct inet_sock *inet = inet_sk(sk);
284
285	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286	 * send out by Linux are always <576bytes so they should go through
287	 * unfragmented).
288	 */
289	if (sk->sk_state == TCP_LISTEN)
290		return;
291
292	/* We don't check in the destentry if pmtu discovery is forbidden
293	 * on this route. We just assume that no packet_to_big packets
294	 * are send back when pmtu discovery is not active.
295	 * There is a small race when the user changes this flag in the
296	 * route, but I think that's acceptable.
297	 */
298	if ((dst = __sk_dst_check(sk, 0)) == NULL)
299		return;
300
301	dst->ops->update_pmtu(dst, mtu);
302
303	/* Something is about to be wrong... Remember soft error
304	 * for the case, if this connection will not able to recover.
305	 */
306	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
307		sk->sk_err_soft = EMSGSIZE;
308
309	mtu = dst_mtu(dst);
310
311	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
312	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
313		tcp_sync_mss(sk, mtu);
314
315		/* Resend the TCP packet because it's
316		 * clear that the old packet has been
317		 * dropped. This is the new "fast" path mtu
318		 * discovery.
319		 */
320		tcp_simple_retransmit(sk);
321	} /* else let the usual retransmit timer handle it */
322}
323
324/*
325 * This routine is called by the ICMP module when it gets some
326 * sort of error condition.  If err < 0 then the socket should
327 * be closed and the error returned to the user.  If err > 0
328 * it's just the icmp type << 8 | icmp code.  After adjustment
329 * header points to the first 8 bytes of the tcp header.  We need
330 * to find the appropriate port.
331 *
332 * The locking strategy used here is very "optimistic". When
333 * someone else accesses the socket the ICMP is just dropped
334 * and for some paths there is no check at all.
335 * A more general error queue to queue errors for later handling
336 * is probably better.
337 *
338 */
339
340void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
341{
342	struct iphdr *iph = (struct iphdr *)icmp_skb->data;
343	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
344	struct inet_connection_sock *icsk;
345	struct tcp_sock *tp;
346	struct inet_sock *inet;
347	const int type = icmp_hdr(icmp_skb)->type;
348	const int code = icmp_hdr(icmp_skb)->code;
349	struct sock *sk;
350	struct sk_buff *skb;
351	__u32 seq;
352	__u32 remaining;
353	int err;
354	struct net *net = dev_net(icmp_skb->dev);
355
356	if (icmp_skb->len < (iph->ihl << 2) + 8) {
357		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
358		return;
359	}
360
361	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
362			iph->saddr, th->source, inet_iif(icmp_skb));
363	if (!sk) {
364		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
365		return;
366	}
367	if (sk->sk_state == TCP_TIME_WAIT) {
368		inet_twsk_put(inet_twsk(sk));
369		return;
370	}
371
372	bh_lock_sock(sk);
373	/* If too many ICMPs get dropped on busy
374	 * servers this needs to be solved differently.
375	 */
376	if (sock_owned_by_user(sk))
377		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
378
379	if (sk->sk_state == TCP_CLOSE)
380		goto out;
381
382	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
383		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
384		goto out;
385	}
386
387	icsk = inet_csk(sk);
388	tp = tcp_sk(sk);
389	seq = ntohl(th->seq);
390	if (sk->sk_state != TCP_LISTEN &&
391	    !between(seq, tp->snd_una, tp->snd_nxt)) {
392		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
393		goto out;
394	}
395
396	switch (type) {
397	case ICMP_SOURCE_QUENCH:
398		/* Just silently ignore these. */
399		goto out;
400	case ICMP_PARAMETERPROB:
401		err = EPROTO;
402		break;
403	case ICMP_DEST_UNREACH:
404		if (code > NR_ICMP_UNREACH)
405			goto out;
406
407		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
408			if (!sock_owned_by_user(sk))
409				do_pmtu_discovery(sk, iph, info);
410			goto out;
411		}
412
413		err = icmp_err_convert[code].errno;
414		/* check if icmp_skb allows revert of backoff
415		 * (see draft-zimmermann-tcp-lcd) */
416		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
417			break;
418		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
419		    !icsk->icsk_backoff)
420			break;
421
422		if (sock_owned_by_user(sk))
423			break;
424
425		icsk->icsk_backoff--;
426		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
427					 icsk->icsk_backoff;
428		tcp_bound_rto(sk);
429
430		skb = tcp_write_queue_head(sk);
431		BUG_ON(!skb);
432
433		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
434				tcp_time_stamp - TCP_SKB_CB(skb)->when);
435
436		if (remaining) {
437			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
438						  remaining, TCP_RTO_MAX);
439		} else {
440			/* RTO revert clocked out retransmission.
441			 * Will retransmit now */
442			tcp_retransmit_timer(sk);
443		}
444
445		break;
446	case ICMP_TIME_EXCEEDED:
447		err = EHOSTUNREACH;
448		break;
449	default:
450		goto out;
451	}
452
453	switch (sk->sk_state) {
454		struct request_sock *req, **prev;
455	case TCP_LISTEN:
456		if (sock_owned_by_user(sk))
457			goto out;
458
459		req = inet_csk_search_req(sk, &prev, th->dest,
460					  iph->daddr, iph->saddr);
461		if (!req)
462			goto out;
463
464		/* ICMPs are not backlogged, hence we cannot get
465		   an established socket here.
466		 */
467		WARN_ON(req->sk);
468
469		if (seq != tcp_rsk(req)->snt_isn) {
470			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
471			goto out;
472		}
473
474		/*
475		 * Still in SYN_RECV, just remove it silently.
476		 * There is no good way to pass the error to the newly
477		 * created socket, and POSIX does not want network
478		 * errors returned from accept().
479		 */
480		inet_csk_reqsk_queue_drop(sk, req, prev);
481		goto out;
482
483	case TCP_SYN_SENT:
484	case TCP_SYN_RECV:  /* Cannot happen.
485			       It can f.e. if SYNs crossed.
486			     */
487		if (!sock_owned_by_user(sk)) {
488			sk->sk_err = err;
489
490			sk->sk_error_report(sk);
491
492			tcp_done(sk);
493		} else {
494			sk->sk_err_soft = err;
495		}
496		goto out;
497	}
498
499	/* If we've already connected we will keep trying
500	 * until we time out, or the user gives up.
501	 *
502	 * rfc1122 4.2.3.9 allows to consider as hard errors
503	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
504	 * but it is obsoleted by pmtu discovery).
505	 *
506	 * Note, that in modern internet, where routing is unreliable
507	 * and in each dark corner broken firewalls sit, sending random
508	 * errors ordered by their masters even this two messages finally lose
509	 * their original sense (even Linux sends invalid PORT_UNREACHs)
510	 *
511	 * Now we are in compliance with RFCs.
512	 *							--ANK (980905)
513	 */
514
515	inet = inet_sk(sk);
516	if (!sock_owned_by_user(sk) && inet->recverr) {
517		sk->sk_err = err;
518		sk->sk_error_report(sk);
519	} else	{ /* Only an error on timeout */
520		sk->sk_err_soft = err;
521	}
522
523out:
524	bh_unlock_sock(sk);
525	sock_put(sk);
526}
527
528static void __tcp_v4_send_check(struct sk_buff *skb,
529				__be32 saddr, __be32 daddr)
530{
531	struct tcphdr *th = tcp_hdr(skb);
532
533	if (skb->ip_summed == CHECKSUM_PARTIAL) {
534		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
535		skb->csum_start = skb_transport_header(skb) - skb->head;
536		skb->csum_offset = offsetof(struct tcphdr, check);
537	} else {
538		th->check = tcp_v4_check(skb->len, saddr, daddr,
539					 csum_partial(th,
540						      th->doff << 2,
541						      skb->csum));
542	}
543}
544
545/* This routine computes an IPv4 TCP checksum. */
546void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
547{
548	struct inet_sock *inet = inet_sk(sk);
549
550	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
551}
552EXPORT_SYMBOL(tcp_v4_send_check);
553
554int tcp_v4_gso_send_check(struct sk_buff *skb)
555{
556	const struct iphdr *iph;
557	struct tcphdr *th;
558
559	if (!pskb_may_pull(skb, sizeof(*th)))
560		return -EINVAL;
561
562	iph = ip_hdr(skb);
563	th = tcp_hdr(skb);
564
565	th->check = 0;
566	skb->ip_summed = CHECKSUM_PARTIAL;
567	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
568	return 0;
569}
570
571/*
572 *	This routine will send an RST to the other tcp.
573 *
574 *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
575 *		      for reset.
576 *	Answer: if a packet caused RST, it is not for a socket
577 *		existing in our system, if it is matched to a socket,
578 *		it is just duplicate segment or bug in other side's TCP.
579 *		So that we build reply only basing on parameters
580 *		arrived with segment.
581 *	Exception: precedence violation. We do not implement it in any case.
582 */
583
584static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
585{
586	struct tcphdr *th = tcp_hdr(skb);
587	struct {
588		struct tcphdr th;
589#ifdef CONFIG_TCP_MD5SIG
590		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
591#endif
592	} rep;
593	struct ip_reply_arg arg;
594#ifdef CONFIG_TCP_MD5SIG
595	struct tcp_md5sig_key *key;
596#endif
597	struct net *net;
598
599	/* Never send a reset in response to a reset. */
600	if (th->rst)
601		return;
602
603	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
604		return;
605
606	/* Swap the send and the receive. */
607	memset(&rep, 0, sizeof(rep));
608	rep.th.dest   = th->source;
609	rep.th.source = th->dest;
610	rep.th.doff   = sizeof(struct tcphdr) / 4;
611	rep.th.rst    = 1;
612
613	if (th->ack) {
614		rep.th.seq = th->ack_seq;
615	} else {
616		rep.th.ack = 1;
617		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
618				       skb->len - (th->doff << 2));
619	}
620
621	memset(&arg, 0, sizeof(arg));
622	arg.iov[0].iov_base = (unsigned char *)&rep;
623	arg.iov[0].iov_len  = sizeof(rep.th);
624
625#ifdef CONFIG_TCP_MD5SIG
626	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
627	if (key) {
628		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
629				   (TCPOPT_NOP << 16) |
630				   (TCPOPT_MD5SIG << 8) |
631				   TCPOLEN_MD5SIG);
632		/* Update length and the length the header thinks exists */
633		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
634		rep.th.doff = arg.iov[0].iov_len / 4;
635
636		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
637				     key, ip_hdr(skb)->saddr,
638				     ip_hdr(skb)->daddr, &rep.th);
639	}
640#endif
641	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
642				      ip_hdr(skb)->saddr,
643				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
644	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
645	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
646
647	net = dev_net(skb_dst(skb)->dev);
648	ip_send_reply(net->ipv4.tcp_sock, skb,
649		      &arg, arg.iov[0].iov_len);
650
651	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
652	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
653}
654
655/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
656   outside socket context is ugly, certainly. What can I do?
657 */
658
659static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
660			    u32 win, u32 ts, int oif,
661			    struct tcp_md5sig_key *key,
662			    int reply_flags)
663{
664	struct tcphdr *th = tcp_hdr(skb);
665	struct {
666		struct tcphdr th;
667		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
668#ifdef CONFIG_TCP_MD5SIG
669			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
670#endif
671			];
672	} rep;
673	struct ip_reply_arg arg;
674	struct net *net = dev_net(skb_dst(skb)->dev);
675
676	memset(&rep.th, 0, sizeof(struct tcphdr));
677	memset(&arg, 0, sizeof(arg));
678
679	arg.iov[0].iov_base = (unsigned char *)&rep;
680	arg.iov[0].iov_len  = sizeof(rep.th);
681	if (ts) {
682		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
683				   (TCPOPT_TIMESTAMP << 8) |
684				   TCPOLEN_TIMESTAMP);
685		rep.opt[1] = htonl(tcp_time_stamp);
686		rep.opt[2] = htonl(ts);
687		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
688	}
689
690	/* Swap the send and the receive. */
691	rep.th.dest    = th->source;
692	rep.th.source  = th->dest;
693	rep.th.doff    = arg.iov[0].iov_len / 4;
694	rep.th.seq     = htonl(seq);
695	rep.th.ack_seq = htonl(ack);
696	rep.th.ack     = 1;
697	rep.th.window  = htons(win);
698
699#ifdef CONFIG_TCP_MD5SIG
700	if (key) {
701		int offset = (ts) ? 3 : 0;
702
703		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
704					  (TCPOPT_NOP << 16) |
705					  (TCPOPT_MD5SIG << 8) |
706					  TCPOLEN_MD5SIG);
707		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
708		rep.th.doff = arg.iov[0].iov_len/4;
709
710		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
711				    key, ip_hdr(skb)->saddr,
712				    ip_hdr(skb)->daddr, &rep.th);
713	}
714#endif
715	arg.flags = reply_flags;
716	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
717				      ip_hdr(skb)->saddr,
718				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
719	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
720	if (oif)
721		arg.bound_dev_if = oif;
722
723	ip_send_reply(net->ipv4.tcp_sock, skb,
724		      &arg, arg.iov[0].iov_len);
725
726	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
727}
728
729static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
730{
731	struct inet_timewait_sock *tw = inet_twsk(sk);
732	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
733
734	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
735			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
736			tcptw->tw_ts_recent,
737			tw->tw_bound_dev_if,
738			tcp_twsk_md5_key(tcptw),
739			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
740			);
741
742	inet_twsk_put(tw);
743}
744
745static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
746				  struct request_sock *req)
747{
748	tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
749			tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
750			req->ts_recent,
751			0,
752			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
753			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
754}
755
756/*
757 *	Send a SYN-ACK after having received a SYN.
758 *	This still operates on a request_sock only, not on a big
759 *	socket.
760 */
761static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
762			      struct request_sock *req,
763			      struct request_values *rvp)
764{
765	const struct inet_request_sock *ireq = inet_rsk(req);
766	int err = -1;
767	struct sk_buff * skb;
768
769	/* First, grab a route. */
770	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
771		return -1;
772
773	skb = tcp_make_synack(sk, dst, req, rvp);
774
775	if (skb) {
776		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
777
778		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
779					    ireq->rmt_addr,
780					    ireq->opt);
781		err = net_xmit_eval(err);
782	}
783
784	dst_release(dst);
785	return err;
786}
787
788static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
789			      struct request_values *rvp)
790{
791	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
792	return tcp_v4_send_synack(sk, NULL, req, rvp);
793}
794
795/*
796 *	IPv4 request_sock destructor.
797 */
798static void tcp_v4_reqsk_destructor(struct request_sock *req)
799{
800	kfree(inet_rsk(req)->opt);
801}
802
803static void syn_flood_warning(const struct sk_buff *skb)
804{
805	const char *msg;
806
807#ifdef CONFIG_SYN_COOKIES
808	if (sysctl_tcp_syncookies)
809		msg = "Sending cookies";
810	else
811#endif
812		msg = "Dropping request";
813
814	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
815				ntohs(tcp_hdr(skb)->dest), msg);
816}
817
818/*
819 * Save and compile IPv4 options into the request_sock if needed.
820 */
821static struct ip_options *tcp_v4_save_options(struct sock *sk,
822					      struct sk_buff *skb)
823{
824	struct ip_options *opt = &(IPCB(skb)->opt);
825	struct ip_options *dopt = NULL;
826
827	if (opt && opt->optlen) {
828		int opt_size = optlength(opt);
829		dopt = kmalloc(opt_size, GFP_ATOMIC);
830		if (dopt) {
831			if (ip_options_echo(dopt, skb)) {
832				kfree(dopt);
833				dopt = NULL;
834			}
835		}
836	}
837	return dopt;
838}
839
840#ifdef CONFIG_TCP_MD5SIG
841/*
842 * RFC2385 MD5 checksumming requires a mapping of
843 * IP address->MD5 Key.
844 * We need to maintain these in the sk structure.
845 */
846
847/* Find the Key structure for an address.  */
848static struct tcp_md5sig_key *
849			tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
850{
851	struct tcp_sock *tp = tcp_sk(sk);
852	int i;
853
854	if (!tp->md5sig_info || !tp->md5sig_info->entries4)
855		return NULL;
856	for (i = 0; i < tp->md5sig_info->entries4; i++) {
857		if (tp->md5sig_info->keys4[i].addr == addr)
858			return &tp->md5sig_info->keys4[i].base;
859	}
860	return NULL;
861}
862
863struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
864					 struct sock *addr_sk)
865{
866	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
867}
868EXPORT_SYMBOL(tcp_v4_md5_lookup);
869
870static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
871						      struct request_sock *req)
872{
873	return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
874}
875
876/* This can be called on a newly created socket, from other files */
877int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
878		      u8 *newkey, u8 newkeylen)
879{
880	/* Add Key to the list */
881	struct tcp_md5sig_key *key;
882	struct tcp_sock *tp = tcp_sk(sk);
883	struct tcp4_md5sig_key *keys;
884
885	key = tcp_v4_md5_do_lookup(sk, addr);
886	if (key) {
887		/* Pre-existing entry - just update that one. */
888		kfree(key->key);
889		key->key = newkey;
890		key->keylen = newkeylen;
891	} else {
892		struct tcp_md5sig_info *md5sig;
893
894		if (!tp->md5sig_info) {
895			tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
896						  GFP_ATOMIC);
897			if (!tp->md5sig_info) {
898				kfree(newkey);
899				return -ENOMEM;
900			}
901			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
902		}
903		if (tcp_alloc_md5sig_pool(sk) == NULL) {
904			kfree(newkey);
905			return -ENOMEM;
906		}
907		md5sig = tp->md5sig_info;
908
909		if (md5sig->alloced4 == md5sig->entries4) {
910			keys = kmalloc((sizeof(*keys) *
911					(md5sig->entries4 + 1)), GFP_ATOMIC);
912			if (!keys) {
913				kfree(newkey);
914				tcp_free_md5sig_pool();
915				return -ENOMEM;
916			}
917
918			if (md5sig->entries4)
919				memcpy(keys, md5sig->keys4,
920				       sizeof(*keys) * md5sig->entries4);
921
922			/* Free old key list, and reference new one */
923			kfree(md5sig->keys4);
924			md5sig->keys4 = keys;
925			md5sig->alloced4++;
926		}
927		md5sig->entries4++;
928		md5sig->keys4[md5sig->entries4 - 1].addr        = addr;
929		md5sig->keys4[md5sig->entries4 - 1].base.key    = newkey;
930		md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
931	}
932	return 0;
933}
934EXPORT_SYMBOL(tcp_v4_md5_do_add);
935
936static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
937			       u8 *newkey, u8 newkeylen)
938{
939	return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
940				 newkey, newkeylen);
941}
942
943int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
944{
945	struct tcp_sock *tp = tcp_sk(sk);
946	int i;
947
948	for (i = 0; i < tp->md5sig_info->entries4; i++) {
949		if (tp->md5sig_info->keys4[i].addr == addr) {
950			/* Free the key */
951			kfree(tp->md5sig_info->keys4[i].base.key);
952			tp->md5sig_info->entries4--;
953
954			if (tp->md5sig_info->entries4 == 0) {
955				kfree(tp->md5sig_info->keys4);
956				tp->md5sig_info->keys4 = NULL;
957				tp->md5sig_info->alloced4 = 0;
958			} else if (tp->md5sig_info->entries4 != i) {
959				/* Need to do some manipulation */
960				memmove(&tp->md5sig_info->keys4[i],
961					&tp->md5sig_info->keys4[i+1],
962					(tp->md5sig_info->entries4 - i) *
963					 sizeof(struct tcp4_md5sig_key));
964			}
965			tcp_free_md5sig_pool();
966			return 0;
967		}
968	}
969	return -ENOENT;
970}
971EXPORT_SYMBOL(tcp_v4_md5_do_del);
972
973static void tcp_v4_clear_md5_list(struct sock *sk)
974{
975	struct tcp_sock *tp = tcp_sk(sk);
976
977	/* Free each key, then the set of key keys,
978	 * the crypto element, and then decrement our
979	 * hold on the last resort crypto.
980	 */
981	if (tp->md5sig_info->entries4) {
982		int i;
983		for (i = 0; i < tp->md5sig_info->entries4; i++)
984			kfree(tp->md5sig_info->keys4[i].base.key);
985		tp->md5sig_info->entries4 = 0;
986		tcp_free_md5sig_pool();
987	}
988	if (tp->md5sig_info->keys4) {
989		kfree(tp->md5sig_info->keys4);
990		tp->md5sig_info->keys4 = NULL;
991		tp->md5sig_info->alloced4  = 0;
992	}
993}
994
995static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
996				 int optlen)
997{
998	struct tcp_md5sig cmd;
999	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1000	u8 *newkey;
1001
1002	if (optlen < sizeof(cmd))
1003		return -EINVAL;
1004
1005	if (copy_from_user(&cmd, optval, sizeof(cmd)))
1006		return -EFAULT;
1007
1008	if (sin->sin_family != AF_INET)
1009		return -EINVAL;
1010
1011	if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1012		if (!tcp_sk(sk)->md5sig_info)
1013			return -ENOENT;
1014		return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1015	}
1016
1017	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1018		return -EINVAL;
1019
1020	if (!tcp_sk(sk)->md5sig_info) {
1021		struct tcp_sock *tp = tcp_sk(sk);
1022		struct tcp_md5sig_info *p;
1023
1024		p = kzalloc(sizeof(*p), sk->sk_allocation);
1025		if (!p)
1026			return -EINVAL;
1027
1028		tp->md5sig_info = p;
1029		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1030	}
1031
1032	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1033	if (!newkey)
1034		return -ENOMEM;
1035	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1036				 newkey, cmd.tcpm_keylen);
1037}
1038
1039static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1040					__be32 daddr, __be32 saddr, int nbytes)
1041{
1042	struct tcp4_pseudohdr *bp;
1043	struct scatterlist sg;
1044
1045	bp = &hp->md5_blk.ip4;
1046
1047	/*
1048	 * 1. the TCP pseudo-header (in the order: source IP address,
1049	 * destination IP address, zero-padded protocol number, and
1050	 * segment length)
1051	 */
1052	bp->saddr = saddr;
1053	bp->daddr = daddr;
1054	bp->pad = 0;
1055	bp->protocol = IPPROTO_TCP;
1056	bp->len = cpu_to_be16(nbytes);
1057
1058	sg_init_one(&sg, bp, sizeof(*bp));
1059	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1060}
1061
1062static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1063			       __be32 daddr, __be32 saddr, struct tcphdr *th)
1064{
1065	struct tcp_md5sig_pool *hp;
1066	struct hash_desc *desc;
1067
1068	hp = tcp_get_md5sig_pool();
1069	if (!hp)
1070		goto clear_hash_noput;
1071	desc = &hp->md5_desc;
1072
1073	if (crypto_hash_init(desc))
1074		goto clear_hash;
1075	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1076		goto clear_hash;
1077	if (tcp_md5_hash_header(hp, th))
1078		goto clear_hash;
1079	if (tcp_md5_hash_key(hp, key))
1080		goto clear_hash;
1081	if (crypto_hash_final(desc, md5_hash))
1082		goto clear_hash;
1083
1084	tcp_put_md5sig_pool();
1085	return 0;
1086
1087clear_hash:
1088	tcp_put_md5sig_pool();
1089clear_hash_noput:
1090	memset(md5_hash, 0, 16);
1091	return 1;
1092}
1093
1094int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1095			struct sock *sk, struct request_sock *req,
1096			struct sk_buff *skb)
1097{
1098	struct tcp_md5sig_pool *hp;
1099	struct hash_desc *desc;
1100	struct tcphdr *th = tcp_hdr(skb);
1101	__be32 saddr, daddr;
1102
1103	if (sk) {
1104		saddr = inet_sk(sk)->inet_saddr;
1105		daddr = inet_sk(sk)->inet_daddr;
1106	} else if (req) {
1107		saddr = inet_rsk(req)->loc_addr;
1108		daddr = inet_rsk(req)->rmt_addr;
1109	} else {
1110		const struct iphdr *iph = ip_hdr(skb);
1111		saddr = iph->saddr;
1112		daddr = iph->daddr;
1113	}
1114
1115	hp = tcp_get_md5sig_pool();
1116	if (!hp)
1117		goto clear_hash_noput;
1118	desc = &hp->md5_desc;
1119
1120	if (crypto_hash_init(desc))
1121		goto clear_hash;
1122
1123	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1124		goto clear_hash;
1125	if (tcp_md5_hash_header(hp, th))
1126		goto clear_hash;
1127	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1128		goto clear_hash;
1129	if (tcp_md5_hash_key(hp, key))
1130		goto clear_hash;
1131	if (crypto_hash_final(desc, md5_hash))
1132		goto clear_hash;
1133
1134	tcp_put_md5sig_pool();
1135	return 0;
1136
1137clear_hash:
1138	tcp_put_md5sig_pool();
1139clear_hash_noput:
1140	memset(md5_hash, 0, 16);
1141	return 1;
1142}
1143EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1144
1145static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
1146{
1147	/*
1148	 * This gets called for each TCP segment that arrives
1149	 * so we want to be efficient.
1150	 * We have 3 drop cases:
1151	 * o No MD5 hash and one expected.
1152	 * o MD5 hash and we're not expecting one.
1153	 * o MD5 hash and its wrong.
1154	 */
1155	__u8 *hash_location = NULL;
1156	struct tcp_md5sig_key *hash_expected;
1157	const struct iphdr *iph = ip_hdr(skb);
1158	struct tcphdr *th = tcp_hdr(skb);
1159	int genhash;
1160	unsigned char newhash[16];
1161
1162	hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1163	hash_location = tcp_parse_md5sig_option(th);
1164
1165	/* We've parsed the options - do we have a hash? */
1166	if (!hash_expected && !hash_location)
1167		return 0;
1168
1169	if (hash_expected && !hash_location) {
1170		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1171		return 1;
1172	}
1173
1174	if (!hash_expected && hash_location) {
1175		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1176		return 1;
1177	}
1178
1179	/* Okay, so this is hash_expected and hash_location -
1180	 * so we need to calculate the checksum.
1181	 */
1182	genhash = tcp_v4_md5_hash_skb(newhash,
1183				      hash_expected,
1184				      NULL, NULL, skb);
1185
1186	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1187		if (net_ratelimit()) {
1188			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1189			       &iph->saddr, ntohs(th->source),
1190			       &iph->daddr, ntohs(th->dest),
1191			       genhash ? " tcp_v4_calc_md5_hash failed" : "");
1192		}
1193		return 1;
1194	}
1195	return 0;
1196}
1197
1198#endif
1199
1200struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1201	.family		=	PF_INET,
1202	.obj_size	=	sizeof(struct tcp_request_sock),
1203	.rtx_syn_ack	=	tcp_v4_rtx_synack,
1204	.send_ack	=	tcp_v4_reqsk_send_ack,
1205	.destructor	=	tcp_v4_reqsk_destructor,
1206	.send_reset	=	tcp_v4_send_reset,
1207	.syn_ack_timeout = 	tcp_syn_ack_timeout,
1208};
1209
1210#ifdef CONFIG_TCP_MD5SIG
1211static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1212	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
1213	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1214};
1215#endif
1216
1217static struct timewait_sock_ops tcp_timewait_sock_ops = {
1218	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
1219	.twsk_unique	= tcp_twsk_unique,
1220	.twsk_destructor= tcp_twsk_destructor,
1221};
1222
1223int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1224{
1225	struct tcp_extend_values tmp_ext;
1226	struct tcp_options_received tmp_opt;
1227	u8 *hash_location;
1228	struct request_sock *req;
1229	struct inet_request_sock *ireq;
1230	struct tcp_sock *tp = tcp_sk(sk);
1231	struct dst_entry *dst = NULL;
1232	__be32 saddr = ip_hdr(skb)->saddr;
1233	__be32 daddr = ip_hdr(skb)->daddr;
1234	__u32 isn = TCP_SKB_CB(skb)->when;
1235#ifdef CONFIG_SYN_COOKIES
1236	int want_cookie = 0;
1237#else
1238#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1239#endif
1240
1241	/* Never answer to SYNs send to broadcast or multicast */
1242	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1243		goto drop;
1244
1245	/* TW buckets are converted to open requests without
1246	 * limitations, they conserve resources and peer is
1247	 * evidently real one.
1248	 */
1249	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1250		if (net_ratelimit())
1251			syn_flood_warning(skb);
1252#ifdef CONFIG_SYN_COOKIES
1253		if (sysctl_tcp_syncookies) {
1254			want_cookie = 1;
1255		} else
1256#endif
1257		goto drop;
1258	}
1259
1260	/* Accept backlog is full. If we have already queued enough
1261	 * of warm entries in syn queue, drop request. It is better than
1262	 * clogging syn queue with openreqs with exponentially increasing
1263	 * timeout.
1264	 */
1265	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1266		goto drop;
1267
1268	req = inet_reqsk_alloc(&tcp_request_sock_ops);
1269	if (!req)
1270		goto drop;
1271
1272#ifdef CONFIG_TCP_MD5SIG
1273	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1274#endif
1275
1276	tcp_clear_options(&tmp_opt);
1277	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1278	tmp_opt.user_mss  = tp->rx_opt.user_mss;
1279	tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1280
1281	if (tmp_opt.cookie_plus > 0 &&
1282	    tmp_opt.saw_tstamp &&
1283	    !tp->rx_opt.cookie_out_never &&
1284	    (sysctl_tcp_cookie_size > 0 ||
1285	     (tp->cookie_values != NULL &&
1286	      tp->cookie_values->cookie_desired > 0))) {
1287		u8 *c;
1288		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1289		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1290
1291		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1292			goto drop_and_release;
1293
1294		/* Secret recipe starts with IP addresses */
1295		*mess++ ^= (__force u32)daddr;
1296		*mess++ ^= (__force u32)saddr;
1297
1298		/* plus variable length Initiator Cookie */
1299		c = (u8 *)mess;
1300		while (l-- > 0)
1301			*c++ ^= *hash_location++;
1302
1303#ifdef CONFIG_SYN_COOKIES
1304		want_cookie = 0;	/* not our kind of cookie */
1305#endif
1306		tmp_ext.cookie_out_never = 0; /* false */
1307		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1308	} else if (!tp->rx_opt.cookie_in_always) {
1309		/* redundant indications, but ensure initialization. */
1310		tmp_ext.cookie_out_never = 1; /* true */
1311		tmp_ext.cookie_plus = 0;
1312	} else {
1313		goto drop_and_release;
1314	}
1315	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1316
1317	if (want_cookie && !tmp_opt.saw_tstamp)
1318		tcp_clear_options(&tmp_opt);
1319
1320	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1321	tcp_openreq_init(req, &tmp_opt, skb);
1322
1323	ireq = inet_rsk(req);
1324	ireq->loc_addr = daddr;
1325	ireq->rmt_addr = saddr;
1326	ireq->no_srccheck = inet_sk(sk)->transparent;
1327	ireq->opt = tcp_v4_save_options(sk, skb);
1328
1329	if (security_inet_conn_request(sk, skb, req))
1330		goto drop_and_free;
1331
1332	if (!want_cookie || tmp_opt.tstamp_ok)
1333		TCP_ECN_create_request(req, tcp_hdr(skb));
1334
1335	if (want_cookie) {
1336		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1337		req->cookie_ts = tmp_opt.tstamp_ok;
1338	} else if (!isn) {
1339		struct inet_peer *peer = NULL;
1340
1341		/* VJ's idea. We save last timestamp seen
1342		 * from the destination in peer table, when entering
1343		 * state TIME-WAIT, and check against it before
1344		 * accepting new connection request.
1345		 *
1346		 * If "isn" is not zero, this request hit alive
1347		 * timewait bucket, so that all the necessary checks
1348		 * are made in the function processing timewait state.
1349		 */
1350		if (tmp_opt.saw_tstamp &&
1351		    tcp_death_row.sysctl_tw_recycle &&
1352		    (dst = inet_csk_route_req(sk, req)) != NULL &&
1353		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1354		    peer->v4daddr == saddr) {
1355			inet_peer_refcheck(peer);
1356			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1357			    (s32)(peer->tcp_ts - req->ts_recent) >
1358							TCP_PAWS_WINDOW) {
1359				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1360				goto drop_and_release;
1361			}
1362		}
1363		/* Kill the following clause, if you dislike this way. */
1364		else if (!sysctl_tcp_syncookies &&
1365			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1366			  (sysctl_max_syn_backlog >> 2)) &&
1367			 (!peer || !peer->tcp_ts_stamp) &&
1368			 (!dst || !dst_metric(dst, RTAX_RTT))) {
1369			/* Without syncookies last quarter of
1370			 * backlog is filled with destinations,
1371			 * proven to be alive.
1372			 * It means that we continue to communicate
1373			 * to destinations, already remembered
1374			 * to the moment of synflood.
1375			 */
1376			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1377				       &saddr, ntohs(tcp_hdr(skb)->source));
1378			goto drop_and_release;
1379		}
1380
1381		isn = tcp_v4_init_sequence(skb);
1382	}
1383	tcp_rsk(req)->snt_isn = isn;
1384
1385	if (tcp_v4_send_synack(sk, dst, req,
1386			       (struct request_values *)&tmp_ext) ||
1387	    want_cookie)
1388		goto drop_and_free;
1389
1390	inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1391	return 0;
1392
1393drop_and_release:
1394	dst_release(dst);
1395drop_and_free:
1396	reqsk_free(req);
1397drop:
1398	return 0;
1399}
1400EXPORT_SYMBOL(tcp_v4_conn_request);
1401
1402
1403/*
1404 * The three way handshake has completed - we got a valid synack -
1405 * now create the new socket.
1406 */
1407struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1408				  struct request_sock *req,
1409				  struct dst_entry *dst)
1410{
1411	struct inet_request_sock *ireq;
1412	struct inet_sock *newinet;
1413	struct tcp_sock *newtp;
1414	struct sock *newsk;
1415#ifdef CONFIG_TCP_MD5SIG
1416	struct tcp_md5sig_key *key;
1417#endif
1418
1419	if (sk_acceptq_is_full(sk))
1420		goto exit_overflow;
1421
1422	if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1423		goto exit;
1424
1425	newsk = tcp_create_openreq_child(sk, req, skb);
1426	if (!newsk)
1427		goto exit;
1428
1429	newsk->sk_gso_type = SKB_GSO_TCPV4;
1430	sk_setup_caps(newsk, dst);
1431
1432	newtp		      = tcp_sk(newsk);
1433	newinet		      = inet_sk(newsk);
1434	ireq		      = inet_rsk(req);
1435	newinet->inet_daddr   = ireq->rmt_addr;
1436	newinet->inet_rcv_saddr = ireq->loc_addr;
1437	newinet->inet_saddr	      = ireq->loc_addr;
1438	newinet->opt	      = ireq->opt;
1439	ireq->opt	      = NULL;
1440	newinet->mc_index     = inet_iif(skb);
1441	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
1442	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1443	if (newinet->opt)
1444		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1445	newinet->inet_id = newtp->write_seq ^ jiffies;
1446
1447	tcp_mtup_init(newsk);
1448	tcp_sync_mss(newsk, dst_mtu(dst));
1449	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1450	if (tcp_sk(sk)->rx_opt.user_mss &&
1451	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1452		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1453
1454	tcp_initialize_rcv_mss(newsk);
1455
1456#ifdef CONFIG_TCP_MD5SIG
1457	/* Copy over the MD5 key from the original socket */
1458	key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1459	if (key != NULL) {
1460		/*
1461		 * We're using one, so create a matching key
1462		 * on the newsk structure. If we fail to get
1463		 * memory, then we end up not copying the key
1464		 * across. Shucks.
1465		 */
1466		char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1467		if (newkey != NULL)
1468			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1469					  newkey, key->keylen);
1470		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1471	}
1472#endif
1473
1474	__inet_hash_nolisten(newsk, NULL);
1475	__inet_inherit_port(sk, newsk);
1476
1477	return newsk;
1478
1479exit_overflow:
1480	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1481exit:
1482	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1483	dst_release(dst);
1484	return NULL;
1485}
1486EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1487
1488static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1489{
1490	struct tcphdr *th = tcp_hdr(skb);
1491	const struct iphdr *iph = ip_hdr(skb);
1492	struct sock *nsk;
1493	struct request_sock **prev;
1494	/* Find possible connection requests. */
1495	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1496						       iph->saddr, iph->daddr);
1497	if (req)
1498		return tcp_check_req(sk, skb, req, prev);
1499
1500	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1501			th->source, iph->daddr, th->dest, inet_iif(skb));
1502
1503	if (nsk) {
1504		if (nsk->sk_state != TCP_TIME_WAIT) {
1505			bh_lock_sock(nsk);
1506			return nsk;
1507		}
1508		inet_twsk_put(inet_twsk(nsk));
1509		return NULL;
1510	}
1511
1512#ifdef CONFIG_SYN_COOKIES
1513	if (!th->syn)
1514		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1515#endif
1516	return sk;
1517}
1518
1519static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1520{
1521	const struct iphdr *iph = ip_hdr(skb);
1522
1523	if (skb->ip_summed == CHECKSUM_COMPLETE) {
1524		if (!tcp_v4_check(skb->len, iph->saddr,
1525				  iph->daddr, skb->csum)) {
1526			skb->ip_summed = CHECKSUM_UNNECESSARY;
1527			return 0;
1528		}
1529	}
1530
1531	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1532				       skb->len, IPPROTO_TCP, 0);
1533
1534	if (skb->len <= 76) {
1535		return __skb_checksum_complete(skb);
1536	}
1537	return 0;
1538}
1539
1540
1541/* The socket must have it's spinlock held when we get
1542 * here.
1543 *
1544 * We have a potential double-lock case here, so even when
1545 * doing backlog processing we use the BH locking scheme.
1546 * This is because we cannot sleep with the original spinlock
1547 * held.
1548 */
1549int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1550{
1551	struct sock *rsk;
1552#ifdef CONFIG_TCP_MD5SIG
1553	/*
1554	 * We really want to reject the packet as early as possible
1555	 * if:
1556	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
1557	 *  o There is an MD5 option and we're not expecting one
1558	 */
1559	if (tcp_v4_inbound_md5_hash(sk, skb))
1560		goto discard;
1561#endif
1562
1563	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1564		sock_rps_save_rxhash(sk, skb->rxhash);
1565		TCP_CHECK_TIMER(sk);
1566		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1567			rsk = sk;
1568			goto reset;
1569		}
1570		TCP_CHECK_TIMER(sk);
1571		return 0;
1572	}
1573
1574	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1575		goto csum_err;
1576
1577	if (sk->sk_state == TCP_LISTEN) {
1578		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1579		if (!nsk)
1580			goto discard;
1581
1582		if (nsk != sk) {
1583			if (tcp_child_process(sk, nsk, skb)) {
1584				rsk = nsk;
1585				goto reset;
1586			}
1587			return 0;
1588		}
1589	} else
1590		sock_rps_save_rxhash(sk, skb->rxhash);
1591
1592
1593	TCP_CHECK_TIMER(sk);
1594	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1595		rsk = sk;
1596		goto reset;
1597	}
1598	TCP_CHECK_TIMER(sk);
1599	return 0;
1600
1601reset:
1602	tcp_v4_send_reset(rsk, skb);
1603discard:
1604	kfree_skb(skb);
1605	/* Be careful here. If this function gets more complicated and
1606	 * gcc suffers from register pressure on the x86, sk (in %ebx)
1607	 * might be destroyed here. This current version compiles correctly,
1608	 * but you have been warned.
1609	 */
1610	return 0;
1611
1612csum_err:
1613	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1614	goto discard;
1615}
1616EXPORT_SYMBOL(tcp_v4_do_rcv);
1617
1618/*
1619 *	From tcp_input.c
1620 */
1621
1622int BCMFASTPATH_HOST tcp_v4_rcv(struct sk_buff *skb)
1623{
1624	const struct iphdr *iph;
1625	struct tcphdr *th;
1626	struct sock *sk;
1627	int ret;
1628	struct net *net = dev_net(skb->dev);
1629
1630	if (skb->pkt_type != PACKET_HOST)
1631		goto discard_it;
1632
1633	/* Count it even if it's bad */
1634	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1635
1636	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1637		goto discard_it;
1638
1639	th = tcp_hdr(skb);
1640
1641	if (th->doff < sizeof(struct tcphdr) / 4)
1642		goto bad_packet;
1643	if (!pskb_may_pull(skb, th->doff * 4))
1644		goto discard_it;
1645
1646	/* An explanation is required here, I think.
1647	 * Packet length and doff are validated by header prediction,
1648	 * provided case of th->doff==0 is eliminated.
1649	 * So, we defer the checks. */
1650	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1651		goto bad_packet;
1652
1653	th = tcp_hdr(skb);
1654	iph = ip_hdr(skb);
1655	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1656	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1657				    skb->len - th->doff * 4);
1658	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1659	TCP_SKB_CB(skb)->when	 = 0;
1660	TCP_SKB_CB(skb)->flags	 = iph->tos;
1661	TCP_SKB_CB(skb)->sacked	 = 0;
1662
1663	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1664	if (!sk)
1665		goto no_tcp_socket;
1666
1667process:
1668	if (sk->sk_state == TCP_TIME_WAIT)
1669		goto do_time_wait;
1670
1671	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1672		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1673		goto discard_and_relse;
1674	}
1675
1676	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1677		goto discard_and_relse;
1678	nf_reset(skb);
1679
1680	if (sk_filter(sk, skb))
1681		goto discard_and_relse;
1682
1683	skb->dev = NULL;
1684
1685	bh_lock_sock_nested(sk);
1686	ret = 0;
1687	if (!sock_owned_by_user(sk)) {
1688#ifdef CONFIG_NET_DMA
1689		struct tcp_sock *tp = tcp_sk(sk);
1690		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1691			tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1692		if (tp->ucopy.dma_chan)
1693			ret = tcp_v4_do_rcv(sk, skb);
1694		else
1695#endif
1696		{
1697			if (!tcp_prequeue(sk, skb))
1698				ret = tcp_v4_do_rcv(sk, skb);
1699		}
1700	} else if (unlikely(sk_add_backlog(sk, skb))) {
1701		bh_unlock_sock(sk);
1702		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1703		goto discard_and_relse;
1704	}
1705	bh_unlock_sock(sk);
1706
1707	sock_put(sk);
1708
1709	return ret;
1710
1711no_tcp_socket:
1712	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1713		goto discard_it;
1714
1715	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1716bad_packet:
1717		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1718	} else {
1719		tcp_v4_send_reset(NULL, skb);
1720	}
1721
1722discard_it:
1723	/* Discard frame. */
1724	kfree_skb(skb);
1725	return 0;
1726
1727discard_and_relse:
1728	sock_put(sk);
1729	goto discard_it;
1730
1731do_time_wait:
1732	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1733		inet_twsk_put(inet_twsk(sk));
1734		goto discard_it;
1735	}
1736
1737	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1738		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1739		inet_twsk_put(inet_twsk(sk));
1740		goto discard_it;
1741	}
1742	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1743	case TCP_TW_SYN: {
1744		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1745							&tcp_hashinfo,
1746							iph->daddr, th->dest,
1747							inet_iif(skb));
1748		if (sk2) {
1749			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1750			inet_twsk_put(inet_twsk(sk));
1751			sk = sk2;
1752			goto process;
1753		}
1754		/* Fall through to ACK */
1755	}
1756	case TCP_TW_ACK:
1757		tcp_v4_timewait_ack(sk, skb);
1758		break;
1759	case TCP_TW_RST:
1760		goto no_tcp_socket;
1761	case TCP_TW_SUCCESS:;
1762	}
1763	goto discard_it;
1764}
1765
1766/* VJ's idea. Save last timestamp seen from this destination
1767 * and hold it at least for normal timewait interval to use for duplicate
1768 * segment detection in subsequent connections, before they enter synchronized
1769 * state.
1770 */
1771
1772int tcp_v4_remember_stamp(struct sock *sk)
1773{
1774	struct inet_sock *inet = inet_sk(sk);
1775	struct tcp_sock *tp = tcp_sk(sk);
1776	struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1777	struct inet_peer *peer = NULL;
1778	int release_it = 0;
1779
1780	if (!rt || rt->rt_dst != inet->inet_daddr) {
1781		peer = inet_getpeer(inet->inet_daddr, 1);
1782		release_it = 1;
1783	} else {
1784		if (!rt->peer)
1785			rt_bind_peer(rt, 1);
1786		peer = rt->peer;
1787	}
1788
1789	if (peer) {
1790		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1791		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1792		     peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) {
1793			peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp;
1794			peer->tcp_ts = tp->rx_opt.ts_recent;
1795		}
1796		if (release_it)
1797			inet_putpeer(peer);
1798		return 1;
1799	}
1800
1801	return 0;
1802}
1803EXPORT_SYMBOL(tcp_v4_remember_stamp);
1804
1805int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1806{
1807	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1808
1809	if (peer) {
1810		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1811
1812		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1813		    ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL &&
1814		     peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) {
1815			peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp;
1816			peer->tcp_ts	   = tcptw->tw_ts_recent;
1817		}
1818		inet_putpeer(peer);
1819		return 1;
1820	}
1821
1822	return 0;
1823}
1824
1825const struct inet_connection_sock_af_ops ipv4_specific = {
1826	.queue_xmit	   = ip_queue_xmit,
1827	.send_check	   = tcp_v4_send_check,
1828	.rebuild_header	   = inet_sk_rebuild_header,
1829	.conn_request	   = tcp_v4_conn_request,
1830	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
1831	.remember_stamp	   = tcp_v4_remember_stamp,
1832	.net_header_len	   = sizeof(struct iphdr),
1833	.setsockopt	   = ip_setsockopt,
1834	.getsockopt	   = ip_getsockopt,
1835	.addr2sockaddr	   = inet_csk_addr2sockaddr,
1836	.sockaddr_len	   = sizeof(struct sockaddr_in),
1837	.bind_conflict	   = inet_csk_bind_conflict,
1838#ifdef CONFIG_COMPAT
1839	.compat_setsockopt = compat_ip_setsockopt,
1840	.compat_getsockopt = compat_ip_getsockopt,
1841#endif
1842};
1843EXPORT_SYMBOL(ipv4_specific);
1844
1845#ifdef CONFIG_TCP_MD5SIG
1846static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1847	.md5_lookup		= tcp_v4_md5_lookup,
1848	.calc_md5_hash		= tcp_v4_md5_hash_skb,
1849	.md5_add		= tcp_v4_md5_add_func,
1850	.md5_parse		= tcp_v4_parse_md5_keys,
1851};
1852#endif
1853
1854/* NOTE: A lot of things set to zero explicitly by call to
1855 *       sk_alloc() so need not be done here.
1856 */
1857static int tcp_v4_init_sock(struct sock *sk)
1858{
1859	struct inet_connection_sock *icsk = inet_csk(sk);
1860	struct tcp_sock *tp = tcp_sk(sk);
1861
1862	skb_queue_head_init(&tp->out_of_order_queue);
1863	tcp_init_xmit_timers(sk);
1864	tcp_prequeue_init(tp);
1865
1866	icsk->icsk_rto = TCP_TIMEOUT_INIT;
1867	tp->mdev = TCP_TIMEOUT_INIT;
1868
1869	/* So many TCP implementations out there (incorrectly) count the
1870	 * initial SYN frame in their delayed-ACK and congestion control
1871	 * algorithms that we must have the following bandaid to talk
1872	 * efficiently to them.  -DaveM
1873	 */
1874	tp->snd_cwnd = 2;
1875
1876	/* See draft-stevens-tcpca-spec-01 for discussion of the
1877	 * initialization of these values.
1878	 */
1879	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1880	tp->snd_cwnd_clamp = ~0;
1881	tp->mss_cache = TCP_MSS_DEFAULT;
1882
1883	tp->reordering = sysctl_tcp_reordering;
1884	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1885
1886	sk->sk_state = TCP_CLOSE;
1887
1888	sk->sk_write_space = sk_stream_write_space;
1889	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1890
1891	icsk->icsk_af_ops = &ipv4_specific;
1892	icsk->icsk_sync_mss = tcp_sync_mss;
1893#ifdef CONFIG_TCP_MD5SIG
1894	tp->af_specific = &tcp_sock_ipv4_specific;
1895#endif
1896
1897	/* TCP Cookie Transactions */
1898	if (sysctl_tcp_cookie_size > 0) {
1899		/* Default, cookies without s_data_payload. */
1900		tp->cookie_values =
1901			kzalloc(sizeof(*tp->cookie_values),
1902				sk->sk_allocation);
1903		if (tp->cookie_values != NULL)
1904			kref_init(&tp->cookie_values->kref);
1905	}
1906	/* Presumed zeroed, in order of appearance:
1907	 *	cookie_in_always, cookie_out_never,
1908	 *	s_data_constant, s_data_in, s_data_out
1909	 */
1910	sk->sk_sndbuf = sysctl_tcp_wmem[1];
1911	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1912
1913	local_bh_disable();
1914	percpu_counter_inc(&tcp_sockets_allocated);
1915	local_bh_enable();
1916
1917	return 0;
1918}
1919
1920void tcp_v4_destroy_sock(struct sock *sk)
1921{
1922	struct tcp_sock *tp = tcp_sk(sk);
1923
1924	tcp_clear_xmit_timers(sk);
1925
1926	tcp_cleanup_congestion_control(sk);
1927
1928	/* Cleanup up the write buffer. */
1929	tcp_write_queue_purge(sk);
1930
1931	/* Cleans up our, hopefully empty, out_of_order_queue. */
1932	__skb_queue_purge(&tp->out_of_order_queue);
1933
1934#ifdef CONFIG_TCP_MD5SIG
1935	/* Clean up the MD5 key list, if any */
1936	if (tp->md5sig_info) {
1937		tcp_v4_clear_md5_list(sk);
1938		kfree(tp->md5sig_info);
1939		tp->md5sig_info = NULL;
1940	}
1941#endif
1942
1943#ifdef CONFIG_NET_DMA
1944	/* Cleans up our sk_async_wait_queue */
1945	__skb_queue_purge(&sk->sk_async_wait_queue);
1946#endif
1947
1948	/* Clean prequeue, it must be empty really */
1949	__skb_queue_purge(&tp->ucopy.prequeue);
1950
1951	/* Clean up a referenced TCP bind bucket. */
1952	if (inet_csk(sk)->icsk_bind_hash)
1953		inet_put_port(sk);
1954
1955	/*
1956	 * If sendmsg cached page exists, toss it.
1957	 */
1958	if (sk->sk_sndmsg_page) {
1959		__free_page(sk->sk_sndmsg_page);
1960		sk->sk_sndmsg_page = NULL;
1961	}
1962
1963	/* TCP Cookie Transactions */
1964	if (tp->cookie_values != NULL) {
1965		kref_put(&tp->cookie_values->kref,
1966			 tcp_cookie_values_release);
1967		tp->cookie_values = NULL;
1968	}
1969
1970	percpu_counter_dec(&tcp_sockets_allocated);
1971}
1972EXPORT_SYMBOL(tcp_v4_destroy_sock);
1973
1974#ifdef CONFIG_PROC_FS
1975/* Proc filesystem TCP sock list dumping. */
1976
1977static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1978{
1979	return hlist_nulls_empty(head) ? NULL :
1980		list_entry(head->first, struct inet_timewait_sock, tw_node);
1981}
1982
1983static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1984{
1985	return !is_a_nulls(tw->tw_node.next) ?
1986		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1987}
1988
1989/*
1990 * Get next listener socket follow cur.  If cur is NULL, get first socket
1991 * starting from bucket given in st->bucket; when st->bucket is zero the
1992 * very first socket in the hash table is returned.
1993 */
1994static void *listening_get_next(struct seq_file *seq, void *cur)
1995{
1996	struct inet_connection_sock *icsk;
1997	struct hlist_nulls_node *node;
1998	struct sock *sk = cur;
1999	struct inet_listen_hashbucket *ilb;
2000	struct tcp_iter_state *st = seq->private;
2001	struct net *net = seq_file_net(seq);
2002
2003	if (!sk) {
2004		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2005		spin_lock_bh(&ilb->lock);
2006		sk = sk_nulls_head(&ilb->head);
2007		st->offset = 0;
2008		goto get_sk;
2009	}
2010	ilb = &tcp_hashinfo.listening_hash[st->bucket];
2011	++st->num;
2012	++st->offset;
2013
2014	if (st->state == TCP_SEQ_STATE_OPENREQ) {
2015		struct request_sock *req = cur;
2016
2017		icsk = inet_csk(st->syn_wait_sk);
2018		req = req->dl_next;
2019		while (1) {
2020			while (req) {
2021				if (req->rsk_ops->family == st->family) {
2022					cur = req;
2023					goto out;
2024				}
2025				req = req->dl_next;
2026			}
2027			st->offset = 0;
2028			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2029				break;
2030get_req:
2031			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2032		}
2033		sk	  = sk_next(st->syn_wait_sk);
2034		st->state = TCP_SEQ_STATE_LISTENING;
2035		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2036	} else {
2037		icsk = inet_csk(sk);
2038		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2039		if (reqsk_queue_len(&icsk->icsk_accept_queue))
2040			goto start_req;
2041		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2042		sk = sk_next(sk);
2043	}
2044get_sk:
2045	sk_nulls_for_each_from(sk, node) {
2046		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
2047			cur = sk;
2048			goto out;
2049		}
2050		icsk = inet_csk(sk);
2051		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2052		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2053start_req:
2054			st->uid		= sock_i_uid(sk);
2055			st->syn_wait_sk = sk;
2056			st->state	= TCP_SEQ_STATE_OPENREQ;
2057			st->sbucket	= 0;
2058			goto get_req;
2059		}
2060		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2061	}
2062	spin_unlock_bh(&ilb->lock);
2063	st->offset = 0;
2064	if (++st->bucket < INET_LHTABLE_SIZE) {
2065		ilb = &tcp_hashinfo.listening_hash[st->bucket];
2066		spin_lock_bh(&ilb->lock);
2067		sk = sk_nulls_head(&ilb->head);
2068		goto get_sk;
2069	}
2070	cur = NULL;
2071out:
2072	return cur;
2073}
2074
2075static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2076{
2077	struct tcp_iter_state *st = seq->private;
2078	void *rc;
2079
2080	st->bucket = 0;
2081	st->offset = 0;
2082	rc = listening_get_next(seq, NULL);
2083
2084	while (rc && *pos) {
2085		rc = listening_get_next(seq, rc);
2086		--*pos;
2087	}
2088	return rc;
2089}
2090
2091static inline int empty_bucket(struct tcp_iter_state *st)
2092{
2093	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2094		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2095}
2096
2097/*
2098 * Get first established socket starting from bucket given in st->bucket.
2099 * If st->bucket is zero, the very first socket in the hash is returned.
2100 */
2101static void *established_get_first(struct seq_file *seq)
2102{
2103	struct tcp_iter_state *st = seq->private;
2104	struct net *net = seq_file_net(seq);
2105	void *rc = NULL;
2106
2107	st->offset = 0;
2108	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2109		struct sock *sk;
2110		struct hlist_nulls_node *node;
2111		struct inet_timewait_sock *tw;
2112		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2113
2114		/* Lockless fast path for the common case of empty buckets */
2115		if (empty_bucket(st))
2116			continue;
2117
2118		spin_lock_bh(lock);
2119		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2120			if (sk->sk_family != st->family ||
2121			    !net_eq(sock_net(sk), net)) {
2122				continue;
2123			}
2124			rc = sk;
2125			goto out;
2126		}
2127		st->state = TCP_SEQ_STATE_TIME_WAIT;
2128		inet_twsk_for_each(tw, node,
2129				   &tcp_hashinfo.ehash[st->bucket].twchain) {
2130			if (tw->tw_family != st->family ||
2131			    !net_eq(twsk_net(tw), net)) {
2132				continue;
2133			}
2134			rc = tw;
2135			goto out;
2136		}
2137		spin_unlock_bh(lock);
2138		st->state = TCP_SEQ_STATE_ESTABLISHED;
2139	}
2140out:
2141	return rc;
2142}
2143
2144static void *established_get_next(struct seq_file *seq, void *cur)
2145{
2146	struct sock *sk = cur;
2147	struct inet_timewait_sock *tw;
2148	struct hlist_nulls_node *node;
2149	struct tcp_iter_state *st = seq->private;
2150	struct net *net = seq_file_net(seq);
2151
2152	++st->num;
2153	++st->offset;
2154
2155	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2156		tw = cur;
2157		tw = tw_next(tw);
2158get_tw:
2159		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2160			tw = tw_next(tw);
2161		}
2162		if (tw) {
2163			cur = tw;
2164			goto out;
2165		}
2166		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2167		st->state = TCP_SEQ_STATE_ESTABLISHED;
2168
2169		/* Look for next non empty bucket */
2170		st->offset = 0;
2171		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2172				empty_bucket(st))
2173			;
2174		if (st->bucket > tcp_hashinfo.ehash_mask)
2175			return NULL;
2176
2177		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2178		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2179	} else
2180		sk = sk_nulls_next(sk);
2181
2182	sk_nulls_for_each_from(sk, node) {
2183		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2184			goto found;
2185	}
2186
2187	st->state = TCP_SEQ_STATE_TIME_WAIT;
2188	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2189	goto get_tw;
2190found:
2191	cur = sk;
2192out:
2193	return cur;
2194}
2195
2196static void *established_get_idx(struct seq_file *seq, loff_t pos)
2197{
2198	struct tcp_iter_state *st = seq->private;
2199	void *rc;
2200
2201	st->bucket = 0;
2202	rc = established_get_first(seq);
2203
2204	while (rc && pos) {
2205		rc = established_get_next(seq, rc);
2206		--pos;
2207	}
2208	return rc;
2209}
2210
2211static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2212{
2213	void *rc;
2214	struct tcp_iter_state *st = seq->private;
2215
2216	st->state = TCP_SEQ_STATE_LISTENING;
2217	rc	  = listening_get_idx(seq, &pos);
2218
2219	if (!rc) {
2220		st->state = TCP_SEQ_STATE_ESTABLISHED;
2221		rc	  = established_get_idx(seq, pos);
2222	}
2223
2224	return rc;
2225}
2226
2227static void *tcp_seek_last_pos(struct seq_file *seq)
2228{
2229	struct tcp_iter_state *st = seq->private;
2230	int offset = st->offset;
2231	int orig_num = st->num;
2232	void *rc = NULL;
2233
2234	switch (st->state) {
2235	case TCP_SEQ_STATE_OPENREQ:
2236	case TCP_SEQ_STATE_LISTENING:
2237		if (st->bucket >= INET_LHTABLE_SIZE)
2238			break;
2239		st->state = TCP_SEQ_STATE_LISTENING;
2240		rc = listening_get_next(seq, NULL);
2241		while (offset-- && rc)
2242			rc = listening_get_next(seq, rc);
2243		if (rc)
2244			break;
2245		st->bucket = 0;
2246		/* Fallthrough */
2247	case TCP_SEQ_STATE_ESTABLISHED:
2248	case TCP_SEQ_STATE_TIME_WAIT:
2249		st->state = TCP_SEQ_STATE_ESTABLISHED;
2250		if (st->bucket > tcp_hashinfo.ehash_mask)
2251			break;
2252		rc = established_get_first(seq);
2253		while (offset-- && rc)
2254			rc = established_get_next(seq, rc);
2255	}
2256
2257	st->num = orig_num;
2258
2259	return rc;
2260}
2261
2262static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2263{
2264	struct tcp_iter_state *st = seq->private;
2265	void *rc;
2266
2267	if (*pos && *pos == st->last_pos) {
2268		rc = tcp_seek_last_pos(seq);
2269		if (rc)
2270			goto out;
2271	}
2272
2273	st->state = TCP_SEQ_STATE_LISTENING;
2274	st->num = 0;
2275	st->bucket = 0;
2276	st->offset = 0;
2277	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2278
2279out:
2280	st->last_pos = *pos;
2281	return rc;
2282}
2283
2284static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2285{
2286	struct tcp_iter_state *st = seq->private;
2287	void *rc = NULL;
2288
2289	if (v == SEQ_START_TOKEN) {
2290		rc = tcp_get_idx(seq, 0);
2291		goto out;
2292	}
2293
2294	switch (st->state) {
2295	case TCP_SEQ_STATE_OPENREQ:
2296	case TCP_SEQ_STATE_LISTENING:
2297		rc = listening_get_next(seq, v);
2298		if (!rc) {
2299			st->state = TCP_SEQ_STATE_ESTABLISHED;
2300			st->bucket = 0;
2301			st->offset = 0;
2302			rc	  = established_get_first(seq);
2303		}
2304		break;
2305	case TCP_SEQ_STATE_ESTABLISHED:
2306	case TCP_SEQ_STATE_TIME_WAIT:
2307		rc = established_get_next(seq, v);
2308		break;
2309	}
2310out:
2311	++*pos;
2312	st->last_pos = *pos;
2313	return rc;
2314}
2315
2316static void tcp_seq_stop(struct seq_file *seq, void *v)
2317{
2318	struct tcp_iter_state *st = seq->private;
2319
2320	switch (st->state) {
2321	case TCP_SEQ_STATE_OPENREQ:
2322		if (v) {
2323			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2324			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2325		}
2326	case TCP_SEQ_STATE_LISTENING:
2327		if (v != SEQ_START_TOKEN)
2328			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2329		break;
2330	case TCP_SEQ_STATE_TIME_WAIT:
2331	case TCP_SEQ_STATE_ESTABLISHED:
2332		if (v)
2333			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2334		break;
2335	}
2336}
2337
2338static int tcp_seq_open(struct inode *inode, struct file *file)
2339{
2340	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2341	struct tcp_iter_state *s;
2342	int err;
2343
2344	err = seq_open_net(inode, file, &afinfo->seq_ops,
2345			  sizeof(struct tcp_iter_state));
2346	if (err < 0)
2347		return err;
2348
2349	s = ((struct seq_file *)file->private_data)->private;
2350	s->family		= afinfo->family;
2351	s->last_pos 		= 0;
2352	return 0;
2353}
2354
2355int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2356{
2357	int rc = 0;
2358	struct proc_dir_entry *p;
2359
2360	afinfo->seq_fops.open		= tcp_seq_open;
2361	afinfo->seq_fops.read		= seq_read;
2362	afinfo->seq_fops.llseek		= seq_lseek;
2363	afinfo->seq_fops.release	= seq_release_net;
2364
2365	afinfo->seq_ops.start		= tcp_seq_start;
2366	afinfo->seq_ops.next		= tcp_seq_next;
2367	afinfo->seq_ops.stop		= tcp_seq_stop;
2368
2369	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2370			     &afinfo->seq_fops, afinfo);
2371	if (!p)
2372		rc = -ENOMEM;
2373	return rc;
2374}
2375EXPORT_SYMBOL(tcp_proc_register);
2376
2377void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2378{
2379	proc_net_remove(net, afinfo->name);
2380}
2381EXPORT_SYMBOL(tcp_proc_unregister);
2382
2383static void get_openreq4(struct sock *sk, struct request_sock *req,
2384			 struct seq_file *f, int i, int uid, int *len)
2385{
2386	const struct inet_request_sock *ireq = inet_rsk(req);
2387	int ttd = req->expires - jiffies;
2388
2389	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2390		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
2391		i,
2392		ireq->loc_addr,
2393		ntohs(inet_sk(sk)->inet_sport),
2394		ireq->rmt_addr,
2395		ntohs(ireq->rmt_port),
2396		TCP_SYN_RECV,
2397		0, 0, /* could print option size, but that is af dependent. */
2398		1,    /* timers active (only the expire timer) */
2399		jiffies_to_clock_t(ttd),
2400		req->retrans,
2401		uid,
2402		0,  /* non standard timer */
2403		0, /* open_requests have no inode */
2404		atomic_read(&sk->sk_refcnt),
2405		req,
2406		len);
2407}
2408
2409static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2410{
2411	int timer_active;
2412	unsigned long timer_expires;
2413	struct tcp_sock *tp = tcp_sk(sk);
2414	const struct inet_connection_sock *icsk = inet_csk(sk);
2415	struct inet_sock *inet = inet_sk(sk);
2416	__be32 dest = inet->inet_daddr;
2417	__be32 src = inet->inet_rcv_saddr;
2418	__u16 destp = ntohs(inet->inet_dport);
2419	__u16 srcp = ntohs(inet->inet_sport);
2420	int rx_queue;
2421
2422	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2423		timer_active	= 1;
2424		timer_expires	= icsk->icsk_timeout;
2425	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2426		timer_active	= 4;
2427		timer_expires	= icsk->icsk_timeout;
2428	} else if (timer_pending(&sk->sk_timer)) {
2429		timer_active	= 2;
2430		timer_expires	= sk->sk_timer.expires;
2431	} else {
2432		timer_active	= 0;
2433		timer_expires = jiffies;
2434	}
2435
2436	if (sk->sk_state == TCP_LISTEN)
2437		rx_queue = sk->sk_ack_backlog;
2438	else
2439		/*
2440		 * because we dont lock socket, we might find a transient negative value
2441		 */
2442		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2443
2444	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2445			"%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
2446		i, src, srcp, dest, destp, sk->sk_state,
2447		tp->write_seq - tp->snd_una,
2448		rx_queue,
2449		timer_active,
2450		jiffies_to_clock_t(timer_expires - jiffies),
2451		icsk->icsk_retransmits,
2452		sock_i_uid(sk),
2453		icsk->icsk_probes_out,
2454		sock_i_ino(sk),
2455		atomic_read(&sk->sk_refcnt), sk,
2456		jiffies_to_clock_t(icsk->icsk_rto),
2457		jiffies_to_clock_t(icsk->icsk_ack.ato),
2458		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2459		tp->snd_cwnd,
2460		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2461		len);
2462}
2463
2464static void get_timewait4_sock(struct inet_timewait_sock *tw,
2465			       struct seq_file *f, int i, int *len)
2466{
2467	__be32 dest, src;
2468	__u16 destp, srcp;
2469	int ttd = tw->tw_ttd - jiffies;
2470
2471	if (ttd < 0)
2472		ttd = 0;
2473
2474	dest  = tw->tw_daddr;
2475	src   = tw->tw_rcv_saddr;
2476	destp = ntohs(tw->tw_dport);
2477	srcp  = ntohs(tw->tw_sport);
2478
2479	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2480		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
2481		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2482		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2483		atomic_read(&tw->tw_refcnt), tw, len);
2484}
2485
2486#define TMPSZ 150
2487
2488static int tcp4_seq_show(struct seq_file *seq, void *v)
2489{
2490	struct tcp_iter_state *st;
2491	int len;
2492
2493	if (v == SEQ_START_TOKEN) {
2494		seq_printf(seq, "%-*s\n", TMPSZ - 1,
2495			   "  sl  local_address rem_address   st tx_queue "
2496			   "rx_queue tr tm->when retrnsmt   uid  timeout "
2497			   "inode");
2498		goto out;
2499	}
2500	st = seq->private;
2501
2502	switch (st->state) {
2503	case TCP_SEQ_STATE_LISTENING:
2504	case TCP_SEQ_STATE_ESTABLISHED:
2505		get_tcp4_sock(v, seq, st->num, &len);
2506		break;
2507	case TCP_SEQ_STATE_OPENREQ:
2508		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2509		break;
2510	case TCP_SEQ_STATE_TIME_WAIT:
2511		get_timewait4_sock(v, seq, st->num, &len);
2512		break;
2513	}
2514	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2515out:
2516	return 0;
2517}
2518
2519static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2520	.name		= "tcp",
2521	.family		= AF_INET,
2522	.seq_fops	= {
2523		.owner		= THIS_MODULE,
2524	},
2525	.seq_ops	= {
2526		.show		= tcp4_seq_show,
2527	},
2528};
2529
2530static int __net_init tcp4_proc_init_net(struct net *net)
2531{
2532	return tcp_proc_register(net, &tcp4_seq_afinfo);
2533}
2534
2535static void __net_exit tcp4_proc_exit_net(struct net *net)
2536{
2537	tcp_proc_unregister(net, &tcp4_seq_afinfo);
2538}
2539
2540static struct pernet_operations tcp4_net_ops = {
2541	.init = tcp4_proc_init_net,
2542	.exit = tcp4_proc_exit_net,
2543};
2544
2545int __init tcp4_proc_init(void)
2546{
2547	return register_pernet_subsys(&tcp4_net_ops);
2548}
2549
2550void tcp4_proc_exit(void)
2551{
2552	unregister_pernet_subsys(&tcp4_net_ops);
2553}
2554#endif /* CONFIG_PROC_FS */
2555
2556#ifdef CONFIG_INET_GRO
2557extern atomic_t gro_timer_init;
2558#endif
2559struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2560{
2561	struct iphdr *iph;
2562
2563#ifdef CONFIG_INET_GRO
2564	if (atomic_read(&gro_timer_init))
2565		return tcp_gro_receive(head, skb);
2566#else
2567	/* We don't support hw-checksum. Skip this part to do real TCP merge */
2568	iph = skb_gro_network_header(skb);
2569	switch (skb->ip_summed) {
2570	case CHECKSUM_COMPLETE:
2571		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2572				  skb->csum)) {
2573			skb->ip_summed = CHECKSUM_UNNECESSARY;
2574			break;
2575		}
2576
2577		/* fall through */
2578	case CHECKSUM_NONE:
2579		NAPI_GRO_CB(skb)->flush = 1;
2580		return NULL;
2581	}
2582
2583	return tcp_gro_receive(head, skb);
2584#endif /* CONFIG_INET_GRO */
2585}
2586EXPORT_SYMBOL(tcp4_gro_receive);
2587
2588int BCMFASTPATH_HOST tcp4_gro_complete(struct sk_buff *skb)
2589{
2590	struct iphdr *iph = ip_hdr(skb);
2591	struct tcphdr *th = tcp_hdr(skb);
2592
2593	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2594				  iph->saddr, iph->daddr, 0);
2595	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2596
2597	return tcp_gro_complete(skb);
2598}
2599EXPORT_SYMBOL(tcp4_gro_complete);
2600
2601struct proto tcp_prot = {
2602	.name			= "TCP",
2603	.owner			= THIS_MODULE,
2604	.close			= tcp_close,
2605	.connect		= tcp_v4_connect,
2606	.disconnect		= tcp_disconnect,
2607	.accept			= inet_csk_accept,
2608	.ioctl			= tcp_ioctl,
2609	.init			= tcp_v4_init_sock,
2610	.destroy		= tcp_v4_destroy_sock,
2611	.shutdown		= tcp_shutdown,
2612	.setsockopt		= tcp_setsockopt,
2613	.getsockopt		= tcp_getsockopt,
2614	.recvmsg		= tcp_recvmsg,
2615	.sendmsg		= tcp_sendmsg,
2616	.sendpage		= tcp_sendpage,
2617	.backlog_rcv		= tcp_v4_do_rcv,
2618	.hash			= inet_hash,
2619	.unhash			= inet_unhash,
2620	.get_port		= inet_csk_get_port,
2621	.enter_memory_pressure	= tcp_enter_memory_pressure,
2622	.sockets_allocated	= &tcp_sockets_allocated,
2623	.orphan_count		= &tcp_orphan_count,
2624	.memory_allocated	= &tcp_memory_allocated,
2625	.memory_pressure	= &tcp_memory_pressure,
2626	.sysctl_mem		= sysctl_tcp_mem,
2627	.sysctl_wmem		= sysctl_tcp_wmem,
2628	.sysctl_rmem		= sysctl_tcp_rmem,
2629	.max_header		= MAX_TCP_HEADER,
2630	.obj_size		= sizeof(struct tcp_sock),
2631	.slab_flags		= SLAB_DESTROY_BY_RCU,
2632	.twsk_prot		= &tcp_timewait_sock_ops,
2633	.rsk_prot		= &tcp_request_sock_ops,
2634	.h.hashinfo		= &tcp_hashinfo,
2635	.no_autobind		= true,
2636#ifdef CONFIG_COMPAT
2637	.compat_setsockopt	= compat_tcp_setsockopt,
2638	.compat_getsockopt	= compat_tcp_getsockopt,
2639#endif
2640};
2641EXPORT_SYMBOL(tcp_prot);
2642
2643
2644static int __net_init tcp_sk_init(struct net *net)
2645{
2646	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2647				    PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2648}
2649
2650static void __net_exit tcp_sk_exit(struct net *net)
2651{
2652	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2653}
2654
2655static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2656{
2657	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2658}
2659
2660static struct pernet_operations __net_initdata tcp_sk_ops = {
2661       .init	   = tcp_sk_init,
2662       .exit	   = tcp_sk_exit,
2663       .exit_batch = tcp_sk_exit_batch,
2664};
2665
2666void __init tcp_v4_init(void)
2667{
2668	inet_hashinfo_init(&tcp_hashinfo);
2669	if (register_pernet_subsys(&tcp_sk_ops))
2670		panic("Failed to create the TCP control socket.\n");
2671}
2672