1/* Modified by Broadcom Corp. Portions Copyright (c) Broadcom Corp, 2012. */ 2/* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * Implementation of the Transmission Control Protocol(TCP). 8 * 9 * IPv4 specific functions 10 * 11 * 12 * code split from: 13 * linux/ipv4/tcp.c 14 * linux/ipv4/tcp_input.c 15 * linux/ipv4/tcp_output.c 16 * 17 * See tcp.c for author information 18 * 19 * This program is free software; you can redistribute it and/or 20 * modify it under the terms of the GNU General Public License 21 * as published by the Free Software Foundation; either version 22 * 2 of the License, or (at your option) any later version. 23 */ 24 25/* 26 * Changes: 27 * David S. Miller : New socket lookup architecture. 28 * This code is dedicated to John Dyson. 29 * David S. Miller : Change semantics of established hash, 30 * half is devoted to TIME_WAIT sockets 31 * and the rest go in the other half. 32 * Andi Kleen : Add support for syncookies and fixed 33 * some bugs: ip options weren't passed to 34 * the TCP layer, missed a check for an 35 * ACK bit. 36 * Andi Kleen : Implemented fast path mtu discovery. 37 * Fixed many serious bugs in the 38 * request_sock handling and moved 39 * most of it into the af independent code. 40 * Added tail drop and some other bugfixes. 41 * Added new listen semantics. 42 * Mike McLagan : Routing by source 43 * Juan Jose Ciarlante: ip_dynaddr bits 44 * Andi Kleen: various fixes. 45 * Vitaly E. Lavrov : Transparent proxy revived after year 46 * coma. 47 * Andi Kleen : Fix new listen. 48 * Andi Kleen : Fix accept error reporting. 49 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 50 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 51 * a single port at the same time. 52 */ 53 54 55#include <linux/bottom_half.h> 56#include <linux/types.h> 57#include <linux/fcntl.h> 58#include <linux/module.h> 59#include <linux/random.h> 60#include <linux/cache.h> 61#include <linux/jhash.h> 62#include <linux/init.h> 63#include <linux/times.h> 64#include <linux/slab.h> 65 66#include <net/net_namespace.h> 67#include <net/icmp.h> 68#include <net/inet_hashtables.h> 69#include <net/tcp.h> 70#include <net/transp_v6.h> 71#include <net/ipv6.h> 72#include <net/inet_common.h> 73#include <net/timewait_sock.h> 74#include <net/xfrm.h> 75#include <net/netdma.h> 76 77#include <linux/inet.h> 78#include <linux/ipv6.h> 79#include <linux/stddef.h> 80#include <linux/proc_fs.h> 81#include <linux/seq_file.h> 82 83#include <linux/crypto.h> 84#include <linux/scatterlist.h> 85 86#include <typedefs.h> 87#include <bcmdefs.h> 88 89int sysctl_tcp_tw_reuse __read_mostly; 90int sysctl_tcp_low_latency __read_mostly; 91EXPORT_SYMBOL(sysctl_tcp_low_latency); 92 93 94#ifdef CONFIG_TCP_MD5SIG 95static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, 96 __be32 addr); 97static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 98 __be32 daddr, __be32 saddr, struct tcphdr *th); 99#else 100static inline 101struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) 102{ 103 return NULL; 104} 105#endif 106 107struct inet_hashinfo tcp_hashinfo; 108EXPORT_SYMBOL(tcp_hashinfo); 109 110static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 111{ 112 return secure_tcp_sequence_number(ip_hdr(skb)->daddr, 113 ip_hdr(skb)->saddr, 114 tcp_hdr(skb)->dest, 115 tcp_hdr(skb)->source); 116} 117 118int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) 119{ 120 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); 121 struct tcp_sock *tp = tcp_sk(sk); 122 123 /* With PAWS, it is safe from the viewpoint 124 of data integrity. Even without PAWS it is safe provided sequence 125 spaces do not overlap i.e. at data rates <= 80Mbit/sec. 126 127 Actually, the idea is close to VJ's one, only timestamp cache is 128 held not per host, but per port pair and TW bucket is used as state 129 holder. 130 131 If TW bucket has been already destroyed we fall back to VJ's scheme 132 and use initial timestamp retrieved from peer table. 133 */ 134 if (tcptw->tw_ts_recent_stamp && 135 (twp == NULL || (sysctl_tcp_tw_reuse && 136 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { 137 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; 138 if (tp->write_seq == 0) 139 tp->write_seq = 1; 140 tp->rx_opt.ts_recent = tcptw->tw_ts_recent; 141 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 142 sock_hold(sktw); 143 return 1; 144 } 145 146 return 0; 147} 148EXPORT_SYMBOL_GPL(tcp_twsk_unique); 149 150/* This will initiate an outgoing connection. */ 151int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 152{ 153 struct inet_sock *inet = inet_sk(sk); 154 struct tcp_sock *tp = tcp_sk(sk); 155 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 156 struct rtable *rt; 157 __be32 daddr, nexthop; 158 int tmp; 159 int err; 160 161 if (addr_len < sizeof(struct sockaddr_in)) 162 return -EINVAL; 163 164 if (usin->sin_family != AF_INET) 165 return -EAFNOSUPPORT; 166 167 nexthop = daddr = usin->sin_addr.s_addr; 168 if (inet->opt && inet->opt->srr) { 169 if (!daddr) 170 return -EINVAL; 171 nexthop = inet->opt->faddr; 172 } 173 174 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, 175 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 176 IPPROTO_TCP, 177 inet->inet_sport, usin->sin_port, sk, 1); 178 if (tmp < 0) { 179 if (tmp == -ENETUNREACH) 180 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 181 return tmp; 182 } 183 184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 185 ip_rt_put(rt); 186 return -ENETUNREACH; 187 } 188 189 if (!inet->opt || !inet->opt->srr) 190 daddr = rt->rt_dst; 191 192 if (!inet->inet_saddr) 193 inet->inet_saddr = rt->rt_src; 194 inet->inet_rcv_saddr = inet->inet_saddr; 195 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { 197 /* Reset inherited state */ 198 tp->rx_opt.ts_recent = 0; 199 tp->rx_opt.ts_recent_stamp = 0; 200 tp->write_seq = 0; 201 } 202 203 if (tcp_death_row.sysctl_tw_recycle && 204 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { 205 struct inet_peer *peer = rt_get_peer(rt); 206 /* 207 * VJ's idea. We save last timestamp seen from 208 * the destination in peer table, when entering state 209 * TIME-WAIT * and initialize rx_opt.ts_recent from it, 210 * when trying new connection. 211 */ 212 if (peer) { 213 inet_peer_refcheck(peer); 214 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 215 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 216 tp->rx_opt.ts_recent = peer->tcp_ts; 217 } 218 } 219 } 220 221 inet->inet_dport = usin->sin_port; 222 inet->inet_daddr = daddr; 223 224 inet_csk(sk)->icsk_ext_hdr_len = 0; 225 if (inet->opt) 226 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 227 228 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; 229 230 /* Socket identity is still unknown (sport may be zero). 231 * However we set state to SYN-SENT and not releasing socket 232 * lock select source port, enter ourselves into the hash tables and 233 * complete initialization after this. 234 */ 235 tcp_set_state(sk, TCP_SYN_SENT); 236 err = inet_hash_connect(&tcp_death_row, sk); 237 if (err) 238 goto failure; 239 240 err = ip_route_newports(&rt, IPPROTO_TCP, 241 inet->inet_sport, inet->inet_dport, sk); 242 if (err) 243 goto failure; 244 245 /* OK, now commit destination to socket. */ 246 sk->sk_gso_type = SKB_GSO_TCPV4; 247 sk_setup_caps(sk, &rt->dst); 248 249 if (!tp->write_seq) 250 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 251 inet->inet_daddr, 252 inet->inet_sport, 253 usin->sin_port); 254 255 inet->inet_id = tp->write_seq ^ jiffies; 256 257 err = tcp_connect(sk); 258 rt = NULL; 259 if (err) 260 goto failure; 261 262 return 0; 263 264failure: 265 /* 266 * This unhashes the socket and releases the local port, 267 * if necessary. 268 */ 269 tcp_set_state(sk, TCP_CLOSE); 270 ip_rt_put(rt); 271 sk->sk_route_caps = 0; 272 inet->inet_dport = 0; 273 return err; 274} 275EXPORT_SYMBOL(tcp_v4_connect); 276 277/* 278 * This routine does path mtu discovery as defined in RFC1191. 279 */ 280static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) 281{ 282 struct dst_entry *dst; 283 struct inet_sock *inet = inet_sk(sk); 284 285 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs 286 * send out by Linux are always <576bytes so they should go through 287 * unfragmented). 288 */ 289 if (sk->sk_state == TCP_LISTEN) 290 return; 291 292 /* We don't check in the destentry if pmtu discovery is forbidden 293 * on this route. We just assume that no packet_to_big packets 294 * are send back when pmtu discovery is not active. 295 * There is a small race when the user changes this flag in the 296 * route, but I think that's acceptable. 297 */ 298 if ((dst = __sk_dst_check(sk, 0)) == NULL) 299 return; 300 301 dst->ops->update_pmtu(dst, mtu); 302 303 /* Something is about to be wrong... Remember soft error 304 * for the case, if this connection will not able to recover. 305 */ 306 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 307 sk->sk_err_soft = EMSGSIZE; 308 309 mtu = dst_mtu(dst); 310 311 if (inet->pmtudisc != IP_PMTUDISC_DONT && 312 inet_csk(sk)->icsk_pmtu_cookie > mtu) { 313 tcp_sync_mss(sk, mtu); 314 315 /* Resend the TCP packet because it's 316 * clear that the old packet has been 317 * dropped. This is the new "fast" path mtu 318 * discovery. 319 */ 320 tcp_simple_retransmit(sk); 321 } /* else let the usual retransmit timer handle it */ 322} 323 324/* 325 * This routine is called by the ICMP module when it gets some 326 * sort of error condition. If err < 0 then the socket should 327 * be closed and the error returned to the user. If err > 0 328 * it's just the icmp type << 8 | icmp code. After adjustment 329 * header points to the first 8 bytes of the tcp header. We need 330 * to find the appropriate port. 331 * 332 * The locking strategy used here is very "optimistic". When 333 * someone else accesses the socket the ICMP is just dropped 334 * and for some paths there is no check at all. 335 * A more general error queue to queue errors for later handling 336 * is probably better. 337 * 338 */ 339 340void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) 341{ 342 struct iphdr *iph = (struct iphdr *)icmp_skb->data; 343 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); 344 struct inet_connection_sock *icsk; 345 struct tcp_sock *tp; 346 struct inet_sock *inet; 347 const int type = icmp_hdr(icmp_skb)->type; 348 const int code = icmp_hdr(icmp_skb)->code; 349 struct sock *sk; 350 struct sk_buff *skb; 351 __u32 seq; 352 __u32 remaining; 353 int err; 354 struct net *net = dev_net(icmp_skb->dev); 355 356 if (icmp_skb->len < (iph->ihl << 2) + 8) { 357 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 358 return; 359 } 360 361 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 362 iph->saddr, th->source, inet_iif(icmp_skb)); 363 if (!sk) { 364 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 365 return; 366 } 367 if (sk->sk_state == TCP_TIME_WAIT) { 368 inet_twsk_put(inet_twsk(sk)); 369 return; 370 } 371 372 bh_lock_sock(sk); 373 /* If too many ICMPs get dropped on busy 374 * servers this needs to be solved differently. 375 */ 376 if (sock_owned_by_user(sk)) 377 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); 378 379 if (sk->sk_state == TCP_CLOSE) 380 goto out; 381 382 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 383 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 384 goto out; 385 } 386 387 icsk = inet_csk(sk); 388 tp = tcp_sk(sk); 389 seq = ntohl(th->seq); 390 if (sk->sk_state != TCP_LISTEN && 391 !between(seq, tp->snd_una, tp->snd_nxt)) { 392 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 393 goto out; 394 } 395 396 switch (type) { 397 case ICMP_SOURCE_QUENCH: 398 /* Just silently ignore these. */ 399 goto out; 400 case ICMP_PARAMETERPROB: 401 err = EPROTO; 402 break; 403 case ICMP_DEST_UNREACH: 404 if (code > NR_ICMP_UNREACH) 405 goto out; 406 407 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 408 if (!sock_owned_by_user(sk)) 409 do_pmtu_discovery(sk, iph, info); 410 goto out; 411 } 412 413 err = icmp_err_convert[code].errno; 414 /* check if icmp_skb allows revert of backoff 415 * (see draft-zimmermann-tcp-lcd) */ 416 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) 417 break; 418 if (seq != tp->snd_una || !icsk->icsk_retransmits || 419 !icsk->icsk_backoff) 420 break; 421 422 if (sock_owned_by_user(sk)) 423 break; 424 425 icsk->icsk_backoff--; 426 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 427 icsk->icsk_backoff; 428 tcp_bound_rto(sk); 429 430 skb = tcp_write_queue_head(sk); 431 BUG_ON(!skb); 432 433 remaining = icsk->icsk_rto - min(icsk->icsk_rto, 434 tcp_time_stamp - TCP_SKB_CB(skb)->when); 435 436 if (remaining) { 437 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 438 remaining, TCP_RTO_MAX); 439 } else { 440 /* RTO revert clocked out retransmission. 441 * Will retransmit now */ 442 tcp_retransmit_timer(sk); 443 } 444 445 break; 446 case ICMP_TIME_EXCEEDED: 447 err = EHOSTUNREACH; 448 break; 449 default: 450 goto out; 451 } 452 453 switch (sk->sk_state) { 454 struct request_sock *req, **prev; 455 case TCP_LISTEN: 456 if (sock_owned_by_user(sk)) 457 goto out; 458 459 req = inet_csk_search_req(sk, &prev, th->dest, 460 iph->daddr, iph->saddr); 461 if (!req) 462 goto out; 463 464 /* ICMPs are not backlogged, hence we cannot get 465 an established socket here. 466 */ 467 WARN_ON(req->sk); 468 469 if (seq != tcp_rsk(req)->snt_isn) { 470 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 471 goto out; 472 } 473 474 /* 475 * Still in SYN_RECV, just remove it silently. 476 * There is no good way to pass the error to the newly 477 * created socket, and POSIX does not want network 478 * errors returned from accept(). 479 */ 480 inet_csk_reqsk_queue_drop(sk, req, prev); 481 goto out; 482 483 case TCP_SYN_SENT: 484 case TCP_SYN_RECV: /* Cannot happen. 485 It can f.e. if SYNs crossed. 486 */ 487 if (!sock_owned_by_user(sk)) { 488 sk->sk_err = err; 489 490 sk->sk_error_report(sk); 491 492 tcp_done(sk); 493 } else { 494 sk->sk_err_soft = err; 495 } 496 goto out; 497 } 498 499 /* If we've already connected we will keep trying 500 * until we time out, or the user gives up. 501 * 502 * rfc1122 4.2.3.9 allows to consider as hard errors 503 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 504 * but it is obsoleted by pmtu discovery). 505 * 506 * Note, that in modern internet, where routing is unreliable 507 * and in each dark corner broken firewalls sit, sending random 508 * errors ordered by their masters even this two messages finally lose 509 * their original sense (even Linux sends invalid PORT_UNREACHs) 510 * 511 * Now we are in compliance with RFCs. 512 * --ANK (980905) 513 */ 514 515 inet = inet_sk(sk); 516 if (!sock_owned_by_user(sk) && inet->recverr) { 517 sk->sk_err = err; 518 sk->sk_error_report(sk); 519 } else { /* Only an error on timeout */ 520 sk->sk_err_soft = err; 521 } 522 523out: 524 bh_unlock_sock(sk); 525 sock_put(sk); 526} 527 528static void __tcp_v4_send_check(struct sk_buff *skb, 529 __be32 saddr, __be32 daddr) 530{ 531 struct tcphdr *th = tcp_hdr(skb); 532 533 if (skb->ip_summed == CHECKSUM_PARTIAL) { 534 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); 535 skb->csum_start = skb_transport_header(skb) - skb->head; 536 skb->csum_offset = offsetof(struct tcphdr, check); 537 } else { 538 th->check = tcp_v4_check(skb->len, saddr, daddr, 539 csum_partial(th, 540 th->doff << 2, 541 skb->csum)); 542 } 543} 544 545/* This routine computes an IPv4 TCP checksum. */ 546void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) 547{ 548 struct inet_sock *inet = inet_sk(sk); 549 550 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); 551} 552EXPORT_SYMBOL(tcp_v4_send_check); 553 554int tcp_v4_gso_send_check(struct sk_buff *skb) 555{ 556 const struct iphdr *iph; 557 struct tcphdr *th; 558 559 if (!pskb_may_pull(skb, sizeof(*th))) 560 return -EINVAL; 561 562 iph = ip_hdr(skb); 563 th = tcp_hdr(skb); 564 565 th->check = 0; 566 skb->ip_summed = CHECKSUM_PARTIAL; 567 __tcp_v4_send_check(skb, iph->saddr, iph->daddr); 568 return 0; 569} 570 571/* 572 * This routine will send an RST to the other tcp. 573 * 574 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) 575 * for reset. 576 * Answer: if a packet caused RST, it is not for a socket 577 * existing in our system, if it is matched to a socket, 578 * it is just duplicate segment or bug in other side's TCP. 579 * So that we build reply only basing on parameters 580 * arrived with segment. 581 * Exception: precedence violation. We do not implement it in any case. 582 */ 583 584static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) 585{ 586 struct tcphdr *th = tcp_hdr(skb); 587 struct { 588 struct tcphdr th; 589#ifdef CONFIG_TCP_MD5SIG 590 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; 591#endif 592 } rep; 593 struct ip_reply_arg arg; 594#ifdef CONFIG_TCP_MD5SIG 595 struct tcp_md5sig_key *key; 596#endif 597 struct net *net; 598 599 /* Never send a reset in response to a reset. */ 600 if (th->rst) 601 return; 602 603 if (skb_rtable(skb)->rt_type != RTN_LOCAL) 604 return; 605 606 /* Swap the send and the receive. */ 607 memset(&rep, 0, sizeof(rep)); 608 rep.th.dest = th->source; 609 rep.th.source = th->dest; 610 rep.th.doff = sizeof(struct tcphdr) / 4; 611 rep.th.rst = 1; 612 613 if (th->ack) { 614 rep.th.seq = th->ack_seq; 615 } else { 616 rep.th.ack = 1; 617 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + 618 skb->len - (th->doff << 2)); 619 } 620 621 memset(&arg, 0, sizeof(arg)); 622 arg.iov[0].iov_base = (unsigned char *)&rep; 623 arg.iov[0].iov_len = sizeof(rep.th); 624 625#ifdef CONFIG_TCP_MD5SIG 626 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; 627 if (key) { 628 rep.opt[0] = htonl((TCPOPT_NOP << 24) | 629 (TCPOPT_NOP << 16) | 630 (TCPOPT_MD5SIG << 8) | 631 TCPOLEN_MD5SIG); 632 /* Update length and the length the header thinks exists */ 633 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 634 rep.th.doff = arg.iov[0].iov_len / 4; 635 636 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], 637 key, ip_hdr(skb)->saddr, 638 ip_hdr(skb)->daddr, &rep.th); 639 } 640#endif 641 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 642 ip_hdr(skb)->saddr, 643 arg.iov[0].iov_len, IPPROTO_TCP, 0); 644 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 645 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; 646 647 net = dev_net(skb_dst(skb)->dev); 648 ip_send_reply(net->ipv4.tcp_sock, skb, 649 &arg, arg.iov[0].iov_len); 650 651 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 652 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 653} 654 655/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states 656 outside socket context is ugly, certainly. What can I do? 657 */ 658 659static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, 660 u32 win, u32 ts, int oif, 661 struct tcp_md5sig_key *key, 662 int reply_flags) 663{ 664 struct tcphdr *th = tcp_hdr(skb); 665 struct { 666 struct tcphdr th; 667 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) 668#ifdef CONFIG_TCP_MD5SIG 669 + (TCPOLEN_MD5SIG_ALIGNED >> 2) 670#endif 671 ]; 672 } rep; 673 struct ip_reply_arg arg; 674 struct net *net = dev_net(skb_dst(skb)->dev); 675 676 memset(&rep.th, 0, sizeof(struct tcphdr)); 677 memset(&arg, 0, sizeof(arg)); 678 679 arg.iov[0].iov_base = (unsigned char *)&rep; 680 arg.iov[0].iov_len = sizeof(rep.th); 681 if (ts) { 682 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 683 (TCPOPT_TIMESTAMP << 8) | 684 TCPOLEN_TIMESTAMP); 685 rep.opt[1] = htonl(tcp_time_stamp); 686 rep.opt[2] = htonl(ts); 687 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; 688 } 689 690 /* Swap the send and the receive. */ 691 rep.th.dest = th->source; 692 rep.th.source = th->dest; 693 rep.th.doff = arg.iov[0].iov_len / 4; 694 rep.th.seq = htonl(seq); 695 rep.th.ack_seq = htonl(ack); 696 rep.th.ack = 1; 697 rep.th.window = htons(win); 698 699#ifdef CONFIG_TCP_MD5SIG 700 if (key) { 701 int offset = (ts) ? 3 : 0; 702 703 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | 704 (TCPOPT_NOP << 16) | 705 (TCPOPT_MD5SIG << 8) | 706 TCPOLEN_MD5SIG); 707 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; 708 rep.th.doff = arg.iov[0].iov_len/4; 709 710 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], 711 key, ip_hdr(skb)->saddr, 712 ip_hdr(skb)->daddr, &rep.th); 713 } 714#endif 715 arg.flags = reply_flags; 716 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, 717 ip_hdr(skb)->saddr, 718 arg.iov[0].iov_len, IPPROTO_TCP, 0); 719 arg.csumoffset = offsetof(struct tcphdr, check) / 2; 720 if (oif) 721 arg.bound_dev_if = oif; 722 723 ip_send_reply(net->ipv4.tcp_sock, skb, 724 &arg, arg.iov[0].iov_len); 725 726 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 727} 728 729static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) 730{ 731 struct inet_timewait_sock *tw = inet_twsk(sk); 732 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 733 734 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 735 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 736 tcptw->tw_ts_recent, 737 tw->tw_bound_dev_if, 738 tcp_twsk_md5_key(tcptw), 739 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 740 ); 741 742 inet_twsk_put(tw); 743} 744 745static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, 746 struct request_sock *req) 747{ 748 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, 749 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, 750 req->ts_recent, 751 0, 752 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), 753 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); 754} 755 756/* 757 * Send a SYN-ACK after having received a SYN. 758 * This still operates on a request_sock only, not on a big 759 * socket. 760 */ 761static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 762 struct request_sock *req, 763 struct request_values *rvp) 764{ 765 const struct inet_request_sock *ireq = inet_rsk(req); 766 int err = -1; 767 struct sk_buff * skb; 768 769 /* First, grab a route. */ 770 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 771 return -1; 772 773 skb = tcp_make_synack(sk, dst, req, rvp); 774 775 if (skb) { 776 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 777 778 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 779 ireq->rmt_addr, 780 ireq->opt); 781 err = net_xmit_eval(err); 782 } 783 784 dst_release(dst); 785 return err; 786} 787 788static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 789 struct request_values *rvp) 790{ 791 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 792 return tcp_v4_send_synack(sk, NULL, req, rvp); 793} 794 795/* 796 * IPv4 request_sock destructor. 797 */ 798static void tcp_v4_reqsk_destructor(struct request_sock *req) 799{ 800 kfree(inet_rsk(req)->opt); 801} 802 803static void syn_flood_warning(const struct sk_buff *skb) 804{ 805 const char *msg; 806 807#ifdef CONFIG_SYN_COOKIES 808 if (sysctl_tcp_syncookies) 809 msg = "Sending cookies"; 810 else 811#endif 812 msg = "Dropping request"; 813 814 pr_info("TCP: Possible SYN flooding on port %d. %s.\n", 815 ntohs(tcp_hdr(skb)->dest), msg); 816} 817 818/* 819 * Save and compile IPv4 options into the request_sock if needed. 820 */ 821static struct ip_options *tcp_v4_save_options(struct sock *sk, 822 struct sk_buff *skb) 823{ 824 struct ip_options *opt = &(IPCB(skb)->opt); 825 struct ip_options *dopt = NULL; 826 827 if (opt && opt->optlen) { 828 int opt_size = optlength(opt); 829 dopt = kmalloc(opt_size, GFP_ATOMIC); 830 if (dopt) { 831 if (ip_options_echo(dopt, skb)) { 832 kfree(dopt); 833 dopt = NULL; 834 } 835 } 836 } 837 return dopt; 838} 839 840#ifdef CONFIG_TCP_MD5SIG 841/* 842 * RFC2385 MD5 checksumming requires a mapping of 843 * IP address->MD5 Key. 844 * We need to maintain these in the sk structure. 845 */ 846 847/* Find the Key structure for an address. */ 848static struct tcp_md5sig_key * 849 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) 850{ 851 struct tcp_sock *tp = tcp_sk(sk); 852 int i; 853 854 if (!tp->md5sig_info || !tp->md5sig_info->entries4) 855 return NULL; 856 for (i = 0; i < tp->md5sig_info->entries4; i++) { 857 if (tp->md5sig_info->keys4[i].addr == addr) 858 return &tp->md5sig_info->keys4[i].base; 859 } 860 return NULL; 861} 862 863struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, 864 struct sock *addr_sk) 865{ 866 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); 867} 868EXPORT_SYMBOL(tcp_v4_md5_lookup); 869 870static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 871 struct request_sock *req) 872{ 873 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); 874} 875 876/* This can be called on a newly created socket, from other files */ 877int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, 878 u8 *newkey, u8 newkeylen) 879{ 880 /* Add Key to the list */ 881 struct tcp_md5sig_key *key; 882 struct tcp_sock *tp = tcp_sk(sk); 883 struct tcp4_md5sig_key *keys; 884 885 key = tcp_v4_md5_do_lookup(sk, addr); 886 if (key) { 887 /* Pre-existing entry - just update that one. */ 888 kfree(key->key); 889 key->key = newkey; 890 key->keylen = newkeylen; 891 } else { 892 struct tcp_md5sig_info *md5sig; 893 894 if (!tp->md5sig_info) { 895 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), 896 GFP_ATOMIC); 897 if (!tp->md5sig_info) { 898 kfree(newkey); 899 return -ENOMEM; 900 } 901 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 902 } 903 if (tcp_alloc_md5sig_pool(sk) == NULL) { 904 kfree(newkey); 905 return -ENOMEM; 906 } 907 md5sig = tp->md5sig_info; 908 909 if (md5sig->alloced4 == md5sig->entries4) { 910 keys = kmalloc((sizeof(*keys) * 911 (md5sig->entries4 + 1)), GFP_ATOMIC); 912 if (!keys) { 913 kfree(newkey); 914 tcp_free_md5sig_pool(); 915 return -ENOMEM; 916 } 917 918 if (md5sig->entries4) 919 memcpy(keys, md5sig->keys4, 920 sizeof(*keys) * md5sig->entries4); 921 922 /* Free old key list, and reference new one */ 923 kfree(md5sig->keys4); 924 md5sig->keys4 = keys; 925 md5sig->alloced4++; 926 } 927 md5sig->entries4++; 928 md5sig->keys4[md5sig->entries4 - 1].addr = addr; 929 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; 930 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; 931 } 932 return 0; 933} 934EXPORT_SYMBOL(tcp_v4_md5_do_add); 935 936static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 937 u8 *newkey, u8 newkeylen) 938{ 939 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, 940 newkey, newkeylen); 941} 942 943int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) 944{ 945 struct tcp_sock *tp = tcp_sk(sk); 946 int i; 947 948 for (i = 0; i < tp->md5sig_info->entries4; i++) { 949 if (tp->md5sig_info->keys4[i].addr == addr) { 950 /* Free the key */ 951 kfree(tp->md5sig_info->keys4[i].base.key); 952 tp->md5sig_info->entries4--; 953 954 if (tp->md5sig_info->entries4 == 0) { 955 kfree(tp->md5sig_info->keys4); 956 tp->md5sig_info->keys4 = NULL; 957 tp->md5sig_info->alloced4 = 0; 958 } else if (tp->md5sig_info->entries4 != i) { 959 /* Need to do some manipulation */ 960 memmove(&tp->md5sig_info->keys4[i], 961 &tp->md5sig_info->keys4[i+1], 962 (tp->md5sig_info->entries4 - i) * 963 sizeof(struct tcp4_md5sig_key)); 964 } 965 tcp_free_md5sig_pool(); 966 return 0; 967 } 968 } 969 return -ENOENT; 970} 971EXPORT_SYMBOL(tcp_v4_md5_do_del); 972 973static void tcp_v4_clear_md5_list(struct sock *sk) 974{ 975 struct tcp_sock *tp = tcp_sk(sk); 976 977 /* Free each key, then the set of key keys, 978 * the crypto element, and then decrement our 979 * hold on the last resort crypto. 980 */ 981 if (tp->md5sig_info->entries4) { 982 int i; 983 for (i = 0; i < tp->md5sig_info->entries4; i++) 984 kfree(tp->md5sig_info->keys4[i].base.key); 985 tp->md5sig_info->entries4 = 0; 986 tcp_free_md5sig_pool(); 987 } 988 if (tp->md5sig_info->keys4) { 989 kfree(tp->md5sig_info->keys4); 990 tp->md5sig_info->keys4 = NULL; 991 tp->md5sig_info->alloced4 = 0; 992 } 993} 994 995static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, 996 int optlen) 997{ 998 struct tcp_md5sig cmd; 999 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; 1000 u8 *newkey; 1001 1002 if (optlen < sizeof(cmd)) 1003 return -EINVAL; 1004 1005 if (copy_from_user(&cmd, optval, sizeof(cmd))) 1006 return -EFAULT; 1007 1008 if (sin->sin_family != AF_INET) 1009 return -EINVAL; 1010 1011 if (!cmd.tcpm_key || !cmd.tcpm_keylen) { 1012 if (!tcp_sk(sk)->md5sig_info) 1013 return -ENOENT; 1014 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); 1015 } 1016 1017 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 1018 return -EINVAL; 1019 1020 if (!tcp_sk(sk)->md5sig_info) { 1021 struct tcp_sock *tp = tcp_sk(sk); 1022 struct tcp_md5sig_info *p; 1023 1024 p = kzalloc(sizeof(*p), sk->sk_allocation); 1025 if (!p) 1026 return -EINVAL; 1027 1028 tp->md5sig_info = p; 1029 sk_nocaps_add(sk, NETIF_F_GSO_MASK); 1030 } 1031 1032 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); 1033 if (!newkey) 1034 return -ENOMEM; 1035 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, 1036 newkey, cmd.tcpm_keylen); 1037} 1038 1039static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, 1040 __be32 daddr, __be32 saddr, int nbytes) 1041{ 1042 struct tcp4_pseudohdr *bp; 1043 struct scatterlist sg; 1044 1045 bp = &hp->md5_blk.ip4; 1046 1047 /* 1048 * 1. the TCP pseudo-header (in the order: source IP address, 1049 * destination IP address, zero-padded protocol number, and 1050 * segment length) 1051 */ 1052 bp->saddr = saddr; 1053 bp->daddr = daddr; 1054 bp->pad = 0; 1055 bp->protocol = IPPROTO_TCP; 1056 bp->len = cpu_to_be16(nbytes); 1057 1058 sg_init_one(&sg, bp, sizeof(*bp)); 1059 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); 1060} 1061 1062static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, 1063 __be32 daddr, __be32 saddr, struct tcphdr *th) 1064{ 1065 struct tcp_md5sig_pool *hp; 1066 struct hash_desc *desc; 1067 1068 hp = tcp_get_md5sig_pool(); 1069 if (!hp) 1070 goto clear_hash_noput; 1071 desc = &hp->md5_desc; 1072 1073 if (crypto_hash_init(desc)) 1074 goto clear_hash; 1075 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) 1076 goto clear_hash; 1077 if (tcp_md5_hash_header(hp, th)) 1078 goto clear_hash; 1079 if (tcp_md5_hash_key(hp, key)) 1080 goto clear_hash; 1081 if (crypto_hash_final(desc, md5_hash)) 1082 goto clear_hash; 1083 1084 tcp_put_md5sig_pool(); 1085 return 0; 1086 1087clear_hash: 1088 tcp_put_md5sig_pool(); 1089clear_hash_noput: 1090 memset(md5_hash, 0, 16); 1091 return 1; 1092} 1093 1094int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, 1095 struct sock *sk, struct request_sock *req, 1096 struct sk_buff *skb) 1097{ 1098 struct tcp_md5sig_pool *hp; 1099 struct hash_desc *desc; 1100 struct tcphdr *th = tcp_hdr(skb); 1101 __be32 saddr, daddr; 1102 1103 if (sk) { 1104 saddr = inet_sk(sk)->inet_saddr; 1105 daddr = inet_sk(sk)->inet_daddr; 1106 } else if (req) { 1107 saddr = inet_rsk(req)->loc_addr; 1108 daddr = inet_rsk(req)->rmt_addr; 1109 } else { 1110 const struct iphdr *iph = ip_hdr(skb); 1111 saddr = iph->saddr; 1112 daddr = iph->daddr; 1113 } 1114 1115 hp = tcp_get_md5sig_pool(); 1116 if (!hp) 1117 goto clear_hash_noput; 1118 desc = &hp->md5_desc; 1119 1120 if (crypto_hash_init(desc)) 1121 goto clear_hash; 1122 1123 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) 1124 goto clear_hash; 1125 if (tcp_md5_hash_header(hp, th)) 1126 goto clear_hash; 1127 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 1128 goto clear_hash; 1129 if (tcp_md5_hash_key(hp, key)) 1130 goto clear_hash; 1131 if (crypto_hash_final(desc, md5_hash)) 1132 goto clear_hash; 1133 1134 tcp_put_md5sig_pool(); 1135 return 0; 1136 1137clear_hash: 1138 tcp_put_md5sig_pool(); 1139clear_hash_noput: 1140 memset(md5_hash, 0, 16); 1141 return 1; 1142} 1143EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1144 1145static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1146{ 1147 /* 1148 * This gets called for each TCP segment that arrives 1149 * so we want to be efficient. 1150 * We have 3 drop cases: 1151 * o No MD5 hash and one expected. 1152 * o MD5 hash and we're not expecting one. 1153 * o MD5 hash and its wrong. 1154 */ 1155 __u8 *hash_location = NULL; 1156 struct tcp_md5sig_key *hash_expected; 1157 const struct iphdr *iph = ip_hdr(skb); 1158 struct tcphdr *th = tcp_hdr(skb); 1159 int genhash; 1160 unsigned char newhash[16]; 1161 1162 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); 1163 hash_location = tcp_parse_md5sig_option(th); 1164 1165 /* We've parsed the options - do we have a hash? */ 1166 if (!hash_expected && !hash_location) 1167 return 0; 1168 1169 if (hash_expected && !hash_location) { 1170 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); 1171 return 1; 1172 } 1173 1174 if (!hash_expected && hash_location) { 1175 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); 1176 return 1; 1177 } 1178 1179 /* Okay, so this is hash_expected and hash_location - 1180 * so we need to calculate the checksum. 1181 */ 1182 genhash = tcp_v4_md5_hash_skb(newhash, 1183 hash_expected, 1184 NULL, NULL, skb); 1185 1186 if (genhash || memcmp(hash_location, newhash, 16) != 0) { 1187 if (net_ratelimit()) { 1188 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", 1189 &iph->saddr, ntohs(th->source), 1190 &iph->daddr, ntohs(th->dest), 1191 genhash ? " tcp_v4_calc_md5_hash failed" : ""); 1192 } 1193 return 1; 1194 } 1195 return 0; 1196} 1197 1198#endif 1199 1200struct request_sock_ops tcp_request_sock_ops __read_mostly = { 1201 .family = PF_INET, 1202 .obj_size = sizeof(struct tcp_request_sock), 1203 .rtx_syn_ack = tcp_v4_rtx_synack, 1204 .send_ack = tcp_v4_reqsk_send_ack, 1205 .destructor = tcp_v4_reqsk_destructor, 1206 .send_reset = tcp_v4_send_reset, 1207 .syn_ack_timeout = tcp_syn_ack_timeout, 1208}; 1209 1210#ifdef CONFIG_TCP_MD5SIG 1211static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1212 .md5_lookup = tcp_v4_reqsk_md5_lookup, 1213 .calc_md5_hash = tcp_v4_md5_hash_skb, 1214}; 1215#endif 1216 1217static struct timewait_sock_ops tcp_timewait_sock_ops = { 1218 .twsk_obj_size = sizeof(struct tcp_timewait_sock), 1219 .twsk_unique = tcp_twsk_unique, 1220 .twsk_destructor= tcp_twsk_destructor, 1221}; 1222 1223int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1224{ 1225 struct tcp_extend_values tmp_ext; 1226 struct tcp_options_received tmp_opt; 1227 u8 *hash_location; 1228 struct request_sock *req; 1229 struct inet_request_sock *ireq; 1230 struct tcp_sock *tp = tcp_sk(sk); 1231 struct dst_entry *dst = NULL; 1232 __be32 saddr = ip_hdr(skb)->saddr; 1233 __be32 daddr = ip_hdr(skb)->daddr; 1234 __u32 isn = TCP_SKB_CB(skb)->when; 1235#ifdef CONFIG_SYN_COOKIES 1236 int want_cookie = 0; 1237#else 1238#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ 1239#endif 1240 1241 /* Never answer to SYNs send to broadcast or multicast */ 1242 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 1243 goto drop; 1244 1245 /* TW buckets are converted to open requests without 1246 * limitations, they conserve resources and peer is 1247 * evidently real one. 1248 */ 1249 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1250 if (net_ratelimit()) 1251 syn_flood_warning(skb); 1252#ifdef CONFIG_SYN_COOKIES 1253 if (sysctl_tcp_syncookies) { 1254 want_cookie = 1; 1255 } else 1256#endif 1257 goto drop; 1258 } 1259 1260 /* Accept backlog is full. If we have already queued enough 1261 * of warm entries in syn queue, drop request. It is better than 1262 * clogging syn queue with openreqs with exponentially increasing 1263 * timeout. 1264 */ 1265 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 1266 goto drop; 1267 1268 req = inet_reqsk_alloc(&tcp_request_sock_ops); 1269 if (!req) 1270 goto drop; 1271 1272#ifdef CONFIG_TCP_MD5SIG 1273 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; 1274#endif 1275 1276 tcp_clear_options(&tmp_opt); 1277 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1278 tmp_opt.user_mss = tp->rx_opt.user_mss; 1279 tcp_parse_options(skb, &tmp_opt, &hash_location, 0); 1280 1281 if (tmp_opt.cookie_plus > 0 && 1282 tmp_opt.saw_tstamp && 1283 !tp->rx_opt.cookie_out_never && 1284 (sysctl_tcp_cookie_size > 0 || 1285 (tp->cookie_values != NULL && 1286 tp->cookie_values->cookie_desired > 0))) { 1287 u8 *c; 1288 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; 1289 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; 1290 1291 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) 1292 goto drop_and_release; 1293 1294 /* Secret recipe starts with IP addresses */ 1295 *mess++ ^= (__force u32)daddr; 1296 *mess++ ^= (__force u32)saddr; 1297 1298 /* plus variable length Initiator Cookie */ 1299 c = (u8 *)mess; 1300 while (l-- > 0) 1301 *c++ ^= *hash_location++; 1302 1303#ifdef CONFIG_SYN_COOKIES 1304 want_cookie = 0; /* not our kind of cookie */ 1305#endif 1306 tmp_ext.cookie_out_never = 0; /* false */ 1307 tmp_ext.cookie_plus = tmp_opt.cookie_plus; 1308 } else if (!tp->rx_opt.cookie_in_always) { 1309 /* redundant indications, but ensure initialization. */ 1310 tmp_ext.cookie_out_never = 1; /* true */ 1311 tmp_ext.cookie_plus = 0; 1312 } else { 1313 goto drop_and_release; 1314 } 1315 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; 1316 1317 if (want_cookie && !tmp_opt.saw_tstamp) 1318 tcp_clear_options(&tmp_opt); 1319 1320 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1321 tcp_openreq_init(req, &tmp_opt, skb); 1322 1323 ireq = inet_rsk(req); 1324 ireq->loc_addr = daddr; 1325 ireq->rmt_addr = saddr; 1326 ireq->no_srccheck = inet_sk(sk)->transparent; 1327 ireq->opt = tcp_v4_save_options(sk, skb); 1328 1329 if (security_inet_conn_request(sk, skb, req)) 1330 goto drop_and_free; 1331 1332 if (!want_cookie || tmp_opt.tstamp_ok) 1333 TCP_ECN_create_request(req, tcp_hdr(skb)); 1334 1335 if (want_cookie) { 1336 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1337 req->cookie_ts = tmp_opt.tstamp_ok; 1338 } else if (!isn) { 1339 struct inet_peer *peer = NULL; 1340 1341 /* VJ's idea. We save last timestamp seen 1342 * from the destination in peer table, when entering 1343 * state TIME-WAIT, and check against it before 1344 * accepting new connection request. 1345 * 1346 * If "isn" is not zero, this request hit alive 1347 * timewait bucket, so that all the necessary checks 1348 * are made in the function processing timewait state. 1349 */ 1350 if (tmp_opt.saw_tstamp && 1351 tcp_death_row.sysctl_tw_recycle && 1352 (dst = inet_csk_route_req(sk, req)) != NULL && 1353 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1354 peer->v4daddr == saddr) { 1355 inet_peer_refcheck(peer); 1356 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1357 (s32)(peer->tcp_ts - req->ts_recent) > 1358 TCP_PAWS_WINDOW) { 1359 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 1360 goto drop_and_release; 1361 } 1362 } 1363 /* Kill the following clause, if you dislike this way. */ 1364 else if (!sysctl_tcp_syncookies && 1365 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 1366 (sysctl_max_syn_backlog >> 2)) && 1367 (!peer || !peer->tcp_ts_stamp) && 1368 (!dst || !dst_metric(dst, RTAX_RTT))) { 1369 /* Without syncookies last quarter of 1370 * backlog is filled with destinations, 1371 * proven to be alive. 1372 * It means that we continue to communicate 1373 * to destinations, already remembered 1374 * to the moment of synflood. 1375 */ 1376 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", 1377 &saddr, ntohs(tcp_hdr(skb)->source)); 1378 goto drop_and_release; 1379 } 1380 1381 isn = tcp_v4_init_sequence(skb); 1382 } 1383 tcp_rsk(req)->snt_isn = isn; 1384 1385 if (tcp_v4_send_synack(sk, dst, req, 1386 (struct request_values *)&tmp_ext) || 1387 want_cookie) 1388 goto drop_and_free; 1389 1390 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 1391 return 0; 1392 1393drop_and_release: 1394 dst_release(dst); 1395drop_and_free: 1396 reqsk_free(req); 1397drop: 1398 return 0; 1399} 1400EXPORT_SYMBOL(tcp_v4_conn_request); 1401 1402 1403/* 1404 * The three way handshake has completed - we got a valid synack - 1405 * now create the new socket. 1406 */ 1407struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, 1408 struct request_sock *req, 1409 struct dst_entry *dst) 1410{ 1411 struct inet_request_sock *ireq; 1412 struct inet_sock *newinet; 1413 struct tcp_sock *newtp; 1414 struct sock *newsk; 1415#ifdef CONFIG_TCP_MD5SIG 1416 struct tcp_md5sig_key *key; 1417#endif 1418 1419 if (sk_acceptq_is_full(sk)) 1420 goto exit_overflow; 1421 1422 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) 1423 goto exit; 1424 1425 newsk = tcp_create_openreq_child(sk, req, skb); 1426 if (!newsk) 1427 goto exit; 1428 1429 newsk->sk_gso_type = SKB_GSO_TCPV4; 1430 sk_setup_caps(newsk, dst); 1431 1432 newtp = tcp_sk(newsk); 1433 newinet = inet_sk(newsk); 1434 ireq = inet_rsk(req); 1435 newinet->inet_daddr = ireq->rmt_addr; 1436 newinet->inet_rcv_saddr = ireq->loc_addr; 1437 newinet->inet_saddr = ireq->loc_addr; 1438 newinet->opt = ireq->opt; 1439 ireq->opt = NULL; 1440 newinet->mc_index = inet_iif(skb); 1441 newinet->mc_ttl = ip_hdr(skb)->ttl; 1442 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1443 if (newinet->opt) 1444 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; 1445 newinet->inet_id = newtp->write_seq ^ jiffies; 1446 1447 tcp_mtup_init(newsk); 1448 tcp_sync_mss(newsk, dst_mtu(dst)); 1449 newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 1450 if (tcp_sk(sk)->rx_opt.user_mss && 1451 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1452 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1453 1454 tcp_initialize_rcv_mss(newsk); 1455 1456#ifdef CONFIG_TCP_MD5SIG 1457 /* Copy over the MD5 key from the original socket */ 1458 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); 1459 if (key != NULL) { 1460 /* 1461 * We're using one, so create a matching key 1462 * on the newsk structure. If we fail to get 1463 * memory, then we end up not copying the key 1464 * across. Shucks. 1465 */ 1466 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); 1467 if (newkey != NULL) 1468 tcp_v4_md5_do_add(newsk, newinet->inet_daddr, 1469 newkey, key->keylen); 1470 sk_nocaps_add(newsk, NETIF_F_GSO_MASK); 1471 } 1472#endif 1473 1474 __inet_hash_nolisten(newsk, NULL); 1475 __inet_inherit_port(sk, newsk); 1476 1477 return newsk; 1478 1479exit_overflow: 1480 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1481exit: 1482 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1483 dst_release(dst); 1484 return NULL; 1485} 1486EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1487 1488static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1489{ 1490 struct tcphdr *th = tcp_hdr(skb); 1491 const struct iphdr *iph = ip_hdr(skb); 1492 struct sock *nsk; 1493 struct request_sock **prev; 1494 /* Find possible connection requests. */ 1495 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, 1496 iph->saddr, iph->daddr); 1497 if (req) 1498 return tcp_check_req(sk, skb, req, prev); 1499 1500 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1501 th->source, iph->daddr, th->dest, inet_iif(skb)); 1502 1503 if (nsk) { 1504 if (nsk->sk_state != TCP_TIME_WAIT) { 1505 bh_lock_sock(nsk); 1506 return nsk; 1507 } 1508 inet_twsk_put(inet_twsk(nsk)); 1509 return NULL; 1510 } 1511 1512#ifdef CONFIG_SYN_COOKIES 1513 if (!th->syn) 1514 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1515#endif 1516 return sk; 1517} 1518 1519static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) 1520{ 1521 const struct iphdr *iph = ip_hdr(skb); 1522 1523 if (skb->ip_summed == CHECKSUM_COMPLETE) { 1524 if (!tcp_v4_check(skb->len, iph->saddr, 1525 iph->daddr, skb->csum)) { 1526 skb->ip_summed = CHECKSUM_UNNECESSARY; 1527 return 0; 1528 } 1529 } 1530 1531 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1532 skb->len, IPPROTO_TCP, 0); 1533 1534 if (skb->len <= 76) { 1535 return __skb_checksum_complete(skb); 1536 } 1537 return 0; 1538} 1539 1540 1541/* The socket must have it's spinlock held when we get 1542 * here. 1543 * 1544 * We have a potential double-lock case here, so even when 1545 * doing backlog processing we use the BH locking scheme. 1546 * This is because we cannot sleep with the original spinlock 1547 * held. 1548 */ 1549int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 1550{ 1551 struct sock *rsk; 1552#ifdef CONFIG_TCP_MD5SIG 1553 /* 1554 * We really want to reject the packet as early as possible 1555 * if: 1556 * o We're expecting an MD5'd packet and this is no MD5 tcp option 1557 * o There is an MD5 option and we're not expecting one 1558 */ 1559 if (tcp_v4_inbound_md5_hash(sk, skb)) 1560 goto discard; 1561#endif 1562 1563 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1564 sock_rps_save_rxhash(sk, skb->rxhash); 1565 TCP_CHECK_TIMER(sk); 1566 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1567 rsk = sk; 1568 goto reset; 1569 } 1570 TCP_CHECK_TIMER(sk); 1571 return 0; 1572 } 1573 1574 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) 1575 goto csum_err; 1576 1577 if (sk->sk_state == TCP_LISTEN) { 1578 struct sock *nsk = tcp_v4_hnd_req(sk, skb); 1579 if (!nsk) 1580 goto discard; 1581 1582 if (nsk != sk) { 1583 if (tcp_child_process(sk, nsk, skb)) { 1584 rsk = nsk; 1585 goto reset; 1586 } 1587 return 0; 1588 } 1589 } else 1590 sock_rps_save_rxhash(sk, skb->rxhash); 1591 1592 1593 TCP_CHECK_TIMER(sk); 1594 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1595 rsk = sk; 1596 goto reset; 1597 } 1598 TCP_CHECK_TIMER(sk); 1599 return 0; 1600 1601reset: 1602 tcp_v4_send_reset(rsk, skb); 1603discard: 1604 kfree_skb(skb); 1605 /* Be careful here. If this function gets more complicated and 1606 * gcc suffers from register pressure on the x86, sk (in %ebx) 1607 * might be destroyed here. This current version compiles correctly, 1608 * but you have been warned. 1609 */ 1610 return 0; 1611 1612csum_err: 1613 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1614 goto discard; 1615} 1616EXPORT_SYMBOL(tcp_v4_do_rcv); 1617 1618/* 1619 * From tcp_input.c 1620 */ 1621 1622int BCMFASTPATH_HOST tcp_v4_rcv(struct sk_buff *skb) 1623{ 1624 const struct iphdr *iph; 1625 struct tcphdr *th; 1626 struct sock *sk; 1627 int ret; 1628 struct net *net = dev_net(skb->dev); 1629 1630 if (skb->pkt_type != PACKET_HOST) 1631 goto discard_it; 1632 1633 /* Count it even if it's bad */ 1634 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS); 1635 1636 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1637 goto discard_it; 1638 1639 th = tcp_hdr(skb); 1640 1641 if (th->doff < sizeof(struct tcphdr) / 4) 1642 goto bad_packet; 1643 if (!pskb_may_pull(skb, th->doff * 4)) 1644 goto discard_it; 1645 1646 /* An explanation is required here, I think. 1647 * Packet length and doff are validated by header prediction, 1648 * provided case of th->doff==0 is eliminated. 1649 * So, we defer the checks. */ 1650 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 1651 goto bad_packet; 1652 1653 th = tcp_hdr(skb); 1654 iph = ip_hdr(skb); 1655 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1656 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1657 skb->len - th->doff * 4); 1658 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1659 TCP_SKB_CB(skb)->when = 0; 1660 TCP_SKB_CB(skb)->flags = iph->tos; 1661 TCP_SKB_CB(skb)->sacked = 0; 1662 1663 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1664 if (!sk) 1665 goto no_tcp_socket; 1666 1667process: 1668 if (sk->sk_state == TCP_TIME_WAIT) 1669 goto do_time_wait; 1670 1671 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 1672 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1673 goto discard_and_relse; 1674 } 1675 1676 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1677 goto discard_and_relse; 1678 nf_reset(skb); 1679 1680 if (sk_filter(sk, skb)) 1681 goto discard_and_relse; 1682 1683 skb->dev = NULL; 1684 1685 bh_lock_sock_nested(sk); 1686 ret = 0; 1687 if (!sock_owned_by_user(sk)) { 1688#ifdef CONFIG_NET_DMA 1689 struct tcp_sock *tp = tcp_sk(sk); 1690 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1691 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY); 1692 if (tp->ucopy.dma_chan) 1693 ret = tcp_v4_do_rcv(sk, skb); 1694 else 1695#endif 1696 { 1697 if (!tcp_prequeue(sk, skb)) 1698 ret = tcp_v4_do_rcv(sk, skb); 1699 } 1700 } else if (unlikely(sk_add_backlog(sk, skb))) { 1701 bh_unlock_sock(sk); 1702 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 1703 goto discard_and_relse; 1704 } 1705 bh_unlock_sock(sk); 1706 1707 sock_put(sk); 1708 1709 return ret; 1710 1711no_tcp_socket: 1712 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1713 goto discard_it; 1714 1715 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1716bad_packet: 1717 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1718 } else { 1719 tcp_v4_send_reset(NULL, skb); 1720 } 1721 1722discard_it: 1723 /* Discard frame. */ 1724 kfree_skb(skb); 1725 return 0; 1726 1727discard_and_relse: 1728 sock_put(sk); 1729 goto discard_it; 1730 1731do_time_wait: 1732 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1733 inet_twsk_put(inet_twsk(sk)); 1734 goto discard_it; 1735 } 1736 1737 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 1738 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1739 inet_twsk_put(inet_twsk(sk)); 1740 goto discard_it; 1741 } 1742 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1743 case TCP_TW_SYN: { 1744 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), 1745 &tcp_hashinfo, 1746 iph->daddr, th->dest, 1747 inet_iif(skb)); 1748 if (sk2) { 1749 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); 1750 inet_twsk_put(inet_twsk(sk)); 1751 sk = sk2; 1752 goto process; 1753 } 1754 /* Fall through to ACK */ 1755 } 1756 case TCP_TW_ACK: 1757 tcp_v4_timewait_ack(sk, skb); 1758 break; 1759 case TCP_TW_RST: 1760 goto no_tcp_socket; 1761 case TCP_TW_SUCCESS:; 1762 } 1763 goto discard_it; 1764} 1765 1766/* VJ's idea. Save last timestamp seen from this destination 1767 * and hold it at least for normal timewait interval to use for duplicate 1768 * segment detection in subsequent connections, before they enter synchronized 1769 * state. 1770 */ 1771 1772int tcp_v4_remember_stamp(struct sock *sk) 1773{ 1774 struct inet_sock *inet = inet_sk(sk); 1775 struct tcp_sock *tp = tcp_sk(sk); 1776 struct rtable *rt = (struct rtable *)__sk_dst_get(sk); 1777 struct inet_peer *peer = NULL; 1778 int release_it = 0; 1779 1780 if (!rt || rt->rt_dst != inet->inet_daddr) { 1781 peer = inet_getpeer(inet->inet_daddr, 1); 1782 release_it = 1; 1783 } else { 1784 if (!rt->peer) 1785 rt_bind_peer(rt, 1); 1786 peer = rt->peer; 1787 } 1788 1789 if (peer) { 1790 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || 1791 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && 1792 peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { 1793 peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; 1794 peer->tcp_ts = tp->rx_opt.ts_recent; 1795 } 1796 if (release_it) 1797 inet_putpeer(peer); 1798 return 1; 1799 } 1800 1801 return 0; 1802} 1803EXPORT_SYMBOL(tcp_v4_remember_stamp); 1804 1805int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 1806{ 1807 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); 1808 1809 if (peer) { 1810 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 1811 1812 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || 1813 ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && 1814 peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { 1815 peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; 1816 peer->tcp_ts = tcptw->tw_ts_recent; 1817 } 1818 inet_putpeer(peer); 1819 return 1; 1820 } 1821 1822 return 0; 1823} 1824 1825const struct inet_connection_sock_af_ops ipv4_specific = { 1826 .queue_xmit = ip_queue_xmit, 1827 .send_check = tcp_v4_send_check, 1828 .rebuild_header = inet_sk_rebuild_header, 1829 .conn_request = tcp_v4_conn_request, 1830 .syn_recv_sock = tcp_v4_syn_recv_sock, 1831 .remember_stamp = tcp_v4_remember_stamp, 1832 .net_header_len = sizeof(struct iphdr), 1833 .setsockopt = ip_setsockopt, 1834 .getsockopt = ip_getsockopt, 1835 .addr2sockaddr = inet_csk_addr2sockaddr, 1836 .sockaddr_len = sizeof(struct sockaddr_in), 1837 .bind_conflict = inet_csk_bind_conflict, 1838#ifdef CONFIG_COMPAT 1839 .compat_setsockopt = compat_ip_setsockopt, 1840 .compat_getsockopt = compat_ip_getsockopt, 1841#endif 1842}; 1843EXPORT_SYMBOL(ipv4_specific); 1844 1845#ifdef CONFIG_TCP_MD5SIG 1846static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1847 .md5_lookup = tcp_v4_md5_lookup, 1848 .calc_md5_hash = tcp_v4_md5_hash_skb, 1849 .md5_add = tcp_v4_md5_add_func, 1850 .md5_parse = tcp_v4_parse_md5_keys, 1851}; 1852#endif 1853 1854/* NOTE: A lot of things set to zero explicitly by call to 1855 * sk_alloc() so need not be done here. 1856 */ 1857static int tcp_v4_init_sock(struct sock *sk) 1858{ 1859 struct inet_connection_sock *icsk = inet_csk(sk); 1860 struct tcp_sock *tp = tcp_sk(sk); 1861 1862 skb_queue_head_init(&tp->out_of_order_queue); 1863 tcp_init_xmit_timers(sk); 1864 tcp_prequeue_init(tp); 1865 1866 icsk->icsk_rto = TCP_TIMEOUT_INIT; 1867 tp->mdev = TCP_TIMEOUT_INIT; 1868 1869 /* So many TCP implementations out there (incorrectly) count the 1870 * initial SYN frame in their delayed-ACK and congestion control 1871 * algorithms that we must have the following bandaid to talk 1872 * efficiently to them. -DaveM 1873 */ 1874 tp->snd_cwnd = 2; 1875 1876 /* See draft-stevens-tcpca-spec-01 for discussion of the 1877 * initialization of these values. 1878 */ 1879 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 1880 tp->snd_cwnd_clamp = ~0; 1881 tp->mss_cache = TCP_MSS_DEFAULT; 1882 1883 tp->reordering = sysctl_tcp_reordering; 1884 icsk->icsk_ca_ops = &tcp_init_congestion_ops; 1885 1886 sk->sk_state = TCP_CLOSE; 1887 1888 sk->sk_write_space = sk_stream_write_space; 1889 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); 1890 1891 icsk->icsk_af_ops = &ipv4_specific; 1892 icsk->icsk_sync_mss = tcp_sync_mss; 1893#ifdef CONFIG_TCP_MD5SIG 1894 tp->af_specific = &tcp_sock_ipv4_specific; 1895#endif 1896 1897 /* TCP Cookie Transactions */ 1898 if (sysctl_tcp_cookie_size > 0) { 1899 /* Default, cookies without s_data_payload. */ 1900 tp->cookie_values = 1901 kzalloc(sizeof(*tp->cookie_values), 1902 sk->sk_allocation); 1903 if (tp->cookie_values != NULL) 1904 kref_init(&tp->cookie_values->kref); 1905 } 1906 /* Presumed zeroed, in order of appearance: 1907 * cookie_in_always, cookie_out_never, 1908 * s_data_constant, s_data_in, s_data_out 1909 */ 1910 sk->sk_sndbuf = sysctl_tcp_wmem[1]; 1911 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1912 1913 local_bh_disable(); 1914 percpu_counter_inc(&tcp_sockets_allocated); 1915 local_bh_enable(); 1916 1917 return 0; 1918} 1919 1920void tcp_v4_destroy_sock(struct sock *sk) 1921{ 1922 struct tcp_sock *tp = tcp_sk(sk); 1923 1924 tcp_clear_xmit_timers(sk); 1925 1926 tcp_cleanup_congestion_control(sk); 1927 1928 /* Cleanup up the write buffer. */ 1929 tcp_write_queue_purge(sk); 1930 1931 /* Cleans up our, hopefully empty, out_of_order_queue. */ 1932 __skb_queue_purge(&tp->out_of_order_queue); 1933 1934#ifdef CONFIG_TCP_MD5SIG 1935 /* Clean up the MD5 key list, if any */ 1936 if (tp->md5sig_info) { 1937 tcp_v4_clear_md5_list(sk); 1938 kfree(tp->md5sig_info); 1939 tp->md5sig_info = NULL; 1940 } 1941#endif 1942 1943#ifdef CONFIG_NET_DMA 1944 /* Cleans up our sk_async_wait_queue */ 1945 __skb_queue_purge(&sk->sk_async_wait_queue); 1946#endif 1947 1948 /* Clean prequeue, it must be empty really */ 1949 __skb_queue_purge(&tp->ucopy.prequeue); 1950 1951 /* Clean up a referenced TCP bind bucket. */ 1952 if (inet_csk(sk)->icsk_bind_hash) 1953 inet_put_port(sk); 1954 1955 /* 1956 * If sendmsg cached page exists, toss it. 1957 */ 1958 if (sk->sk_sndmsg_page) { 1959 __free_page(sk->sk_sndmsg_page); 1960 sk->sk_sndmsg_page = NULL; 1961 } 1962 1963 /* TCP Cookie Transactions */ 1964 if (tp->cookie_values != NULL) { 1965 kref_put(&tp->cookie_values->kref, 1966 tcp_cookie_values_release); 1967 tp->cookie_values = NULL; 1968 } 1969 1970 percpu_counter_dec(&tcp_sockets_allocated); 1971} 1972EXPORT_SYMBOL(tcp_v4_destroy_sock); 1973 1974#ifdef CONFIG_PROC_FS 1975/* Proc filesystem TCP sock list dumping. */ 1976 1977static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) 1978{ 1979 return hlist_nulls_empty(head) ? NULL : 1980 list_entry(head->first, struct inet_timewait_sock, tw_node); 1981} 1982 1983static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 1984{ 1985 return !is_a_nulls(tw->tw_node.next) ? 1986 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1987} 1988 1989/* 1990 * Get next listener socket follow cur. If cur is NULL, get first socket 1991 * starting from bucket given in st->bucket; when st->bucket is zero the 1992 * very first socket in the hash table is returned. 1993 */ 1994static void *listening_get_next(struct seq_file *seq, void *cur) 1995{ 1996 struct inet_connection_sock *icsk; 1997 struct hlist_nulls_node *node; 1998 struct sock *sk = cur; 1999 struct inet_listen_hashbucket *ilb; 2000 struct tcp_iter_state *st = seq->private; 2001 struct net *net = seq_file_net(seq); 2002 2003 if (!sk) { 2004 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2005 spin_lock_bh(&ilb->lock); 2006 sk = sk_nulls_head(&ilb->head); 2007 st->offset = 0; 2008 goto get_sk; 2009 } 2010 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2011 ++st->num; 2012 ++st->offset; 2013 2014 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2015 struct request_sock *req = cur; 2016 2017 icsk = inet_csk(st->syn_wait_sk); 2018 req = req->dl_next; 2019 while (1) { 2020 while (req) { 2021 if (req->rsk_ops->family == st->family) { 2022 cur = req; 2023 goto out; 2024 } 2025 req = req->dl_next; 2026 } 2027 st->offset = 0; 2028 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 2029 break; 2030get_req: 2031 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 2032 } 2033 sk = sk_next(st->syn_wait_sk); 2034 st->state = TCP_SEQ_STATE_LISTENING; 2035 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2036 } else { 2037 icsk = inet_csk(sk); 2038 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2039 if (reqsk_queue_len(&icsk->icsk_accept_queue)) 2040 goto start_req; 2041 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2042 sk = sk_next(sk); 2043 } 2044get_sk: 2045 sk_nulls_for_each_from(sk, node) { 2046 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { 2047 cur = sk; 2048 goto out; 2049 } 2050 icsk = inet_csk(sk); 2051 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2052 if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 2053start_req: 2054 st->uid = sock_i_uid(sk); 2055 st->syn_wait_sk = sk; 2056 st->state = TCP_SEQ_STATE_OPENREQ; 2057 st->sbucket = 0; 2058 goto get_req; 2059 } 2060 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2061 } 2062 spin_unlock_bh(&ilb->lock); 2063 st->offset = 0; 2064 if (++st->bucket < INET_LHTABLE_SIZE) { 2065 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2066 spin_lock_bh(&ilb->lock); 2067 sk = sk_nulls_head(&ilb->head); 2068 goto get_sk; 2069 } 2070 cur = NULL; 2071out: 2072 return cur; 2073} 2074 2075static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2076{ 2077 struct tcp_iter_state *st = seq->private; 2078 void *rc; 2079 2080 st->bucket = 0; 2081 st->offset = 0; 2082 rc = listening_get_next(seq, NULL); 2083 2084 while (rc && *pos) { 2085 rc = listening_get_next(seq, rc); 2086 --*pos; 2087 } 2088 return rc; 2089} 2090 2091static inline int empty_bucket(struct tcp_iter_state *st) 2092{ 2093 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 2094 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2095} 2096 2097/* 2098 * Get first established socket starting from bucket given in st->bucket. 2099 * If st->bucket is zero, the very first socket in the hash is returned. 2100 */ 2101static void *established_get_first(struct seq_file *seq) 2102{ 2103 struct tcp_iter_state *st = seq->private; 2104 struct net *net = seq_file_net(seq); 2105 void *rc = NULL; 2106 2107 st->offset = 0; 2108 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2109 struct sock *sk; 2110 struct hlist_nulls_node *node; 2111 struct inet_timewait_sock *tw; 2112 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 2113 2114 /* Lockless fast path for the common case of empty buckets */ 2115 if (empty_bucket(st)) 2116 continue; 2117 2118 spin_lock_bh(lock); 2119 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { 2120 if (sk->sk_family != st->family || 2121 !net_eq(sock_net(sk), net)) { 2122 continue; 2123 } 2124 rc = sk; 2125 goto out; 2126 } 2127 st->state = TCP_SEQ_STATE_TIME_WAIT; 2128 inet_twsk_for_each(tw, node, 2129 &tcp_hashinfo.ehash[st->bucket].twchain) { 2130 if (tw->tw_family != st->family || 2131 !net_eq(twsk_net(tw), net)) { 2132 continue; 2133 } 2134 rc = tw; 2135 goto out; 2136 } 2137 spin_unlock_bh(lock); 2138 st->state = TCP_SEQ_STATE_ESTABLISHED; 2139 } 2140out: 2141 return rc; 2142} 2143 2144static void *established_get_next(struct seq_file *seq, void *cur) 2145{ 2146 struct sock *sk = cur; 2147 struct inet_timewait_sock *tw; 2148 struct hlist_nulls_node *node; 2149 struct tcp_iter_state *st = seq->private; 2150 struct net *net = seq_file_net(seq); 2151 2152 ++st->num; 2153 ++st->offset; 2154 2155 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2156 tw = cur; 2157 tw = tw_next(tw); 2158get_tw: 2159 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { 2160 tw = tw_next(tw); 2161 } 2162 if (tw) { 2163 cur = tw; 2164 goto out; 2165 } 2166 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2167 st->state = TCP_SEQ_STATE_ESTABLISHED; 2168 2169 /* Look for next non empty bucket */ 2170 st->offset = 0; 2171 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2172 empty_bucket(st)) 2173 ; 2174 if (st->bucket > tcp_hashinfo.ehash_mask) 2175 return NULL; 2176 2177 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2178 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 2179 } else 2180 sk = sk_nulls_next(sk); 2181 2182 sk_nulls_for_each_from(sk, node) { 2183 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 2184 goto found; 2185 } 2186 2187 st->state = TCP_SEQ_STATE_TIME_WAIT; 2188 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 2189 goto get_tw; 2190found: 2191 cur = sk; 2192out: 2193 return cur; 2194} 2195 2196static void *established_get_idx(struct seq_file *seq, loff_t pos) 2197{ 2198 struct tcp_iter_state *st = seq->private; 2199 void *rc; 2200 2201 st->bucket = 0; 2202 rc = established_get_first(seq); 2203 2204 while (rc && pos) { 2205 rc = established_get_next(seq, rc); 2206 --pos; 2207 } 2208 return rc; 2209} 2210 2211static void *tcp_get_idx(struct seq_file *seq, loff_t pos) 2212{ 2213 void *rc; 2214 struct tcp_iter_state *st = seq->private; 2215 2216 st->state = TCP_SEQ_STATE_LISTENING; 2217 rc = listening_get_idx(seq, &pos); 2218 2219 if (!rc) { 2220 st->state = TCP_SEQ_STATE_ESTABLISHED; 2221 rc = established_get_idx(seq, pos); 2222 } 2223 2224 return rc; 2225} 2226 2227static void *tcp_seek_last_pos(struct seq_file *seq) 2228{ 2229 struct tcp_iter_state *st = seq->private; 2230 int offset = st->offset; 2231 int orig_num = st->num; 2232 void *rc = NULL; 2233 2234 switch (st->state) { 2235 case TCP_SEQ_STATE_OPENREQ: 2236 case TCP_SEQ_STATE_LISTENING: 2237 if (st->bucket >= INET_LHTABLE_SIZE) 2238 break; 2239 st->state = TCP_SEQ_STATE_LISTENING; 2240 rc = listening_get_next(seq, NULL); 2241 while (offset-- && rc) 2242 rc = listening_get_next(seq, rc); 2243 if (rc) 2244 break; 2245 st->bucket = 0; 2246 /* Fallthrough */ 2247 case TCP_SEQ_STATE_ESTABLISHED: 2248 case TCP_SEQ_STATE_TIME_WAIT: 2249 st->state = TCP_SEQ_STATE_ESTABLISHED; 2250 if (st->bucket > tcp_hashinfo.ehash_mask) 2251 break; 2252 rc = established_get_first(seq); 2253 while (offset-- && rc) 2254 rc = established_get_next(seq, rc); 2255 } 2256 2257 st->num = orig_num; 2258 2259 return rc; 2260} 2261 2262static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2263{ 2264 struct tcp_iter_state *st = seq->private; 2265 void *rc; 2266 2267 if (*pos && *pos == st->last_pos) { 2268 rc = tcp_seek_last_pos(seq); 2269 if (rc) 2270 goto out; 2271 } 2272 2273 st->state = TCP_SEQ_STATE_LISTENING; 2274 st->num = 0; 2275 st->bucket = 0; 2276 st->offset = 0; 2277 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2278 2279out: 2280 st->last_pos = *pos; 2281 return rc; 2282} 2283 2284static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2285{ 2286 struct tcp_iter_state *st = seq->private; 2287 void *rc = NULL; 2288 2289 if (v == SEQ_START_TOKEN) { 2290 rc = tcp_get_idx(seq, 0); 2291 goto out; 2292 } 2293 2294 switch (st->state) { 2295 case TCP_SEQ_STATE_OPENREQ: 2296 case TCP_SEQ_STATE_LISTENING: 2297 rc = listening_get_next(seq, v); 2298 if (!rc) { 2299 st->state = TCP_SEQ_STATE_ESTABLISHED; 2300 st->bucket = 0; 2301 st->offset = 0; 2302 rc = established_get_first(seq); 2303 } 2304 break; 2305 case TCP_SEQ_STATE_ESTABLISHED: 2306 case TCP_SEQ_STATE_TIME_WAIT: 2307 rc = established_get_next(seq, v); 2308 break; 2309 } 2310out: 2311 ++*pos; 2312 st->last_pos = *pos; 2313 return rc; 2314} 2315 2316static void tcp_seq_stop(struct seq_file *seq, void *v) 2317{ 2318 struct tcp_iter_state *st = seq->private; 2319 2320 switch (st->state) { 2321 case TCP_SEQ_STATE_OPENREQ: 2322 if (v) { 2323 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2324 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2325 } 2326 case TCP_SEQ_STATE_LISTENING: 2327 if (v != SEQ_START_TOKEN) 2328 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 2329 break; 2330 case TCP_SEQ_STATE_TIME_WAIT: 2331 case TCP_SEQ_STATE_ESTABLISHED: 2332 if (v) 2333 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2334 break; 2335 } 2336} 2337 2338static int tcp_seq_open(struct inode *inode, struct file *file) 2339{ 2340 struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 2341 struct tcp_iter_state *s; 2342 int err; 2343 2344 err = seq_open_net(inode, file, &afinfo->seq_ops, 2345 sizeof(struct tcp_iter_state)); 2346 if (err < 0) 2347 return err; 2348 2349 s = ((struct seq_file *)file->private_data)->private; 2350 s->family = afinfo->family; 2351 s->last_pos = 0; 2352 return 0; 2353} 2354 2355int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) 2356{ 2357 int rc = 0; 2358 struct proc_dir_entry *p; 2359 2360 afinfo->seq_fops.open = tcp_seq_open; 2361 afinfo->seq_fops.read = seq_read; 2362 afinfo->seq_fops.llseek = seq_lseek; 2363 afinfo->seq_fops.release = seq_release_net; 2364 2365 afinfo->seq_ops.start = tcp_seq_start; 2366 afinfo->seq_ops.next = tcp_seq_next; 2367 afinfo->seq_ops.stop = tcp_seq_stop; 2368 2369 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 2370 &afinfo->seq_fops, afinfo); 2371 if (!p) 2372 rc = -ENOMEM; 2373 return rc; 2374} 2375EXPORT_SYMBOL(tcp_proc_register); 2376 2377void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 2378{ 2379 proc_net_remove(net, afinfo->name); 2380} 2381EXPORT_SYMBOL(tcp_proc_unregister); 2382 2383static void get_openreq4(struct sock *sk, struct request_sock *req, 2384 struct seq_file *f, int i, int uid, int *len) 2385{ 2386 const struct inet_request_sock *ireq = inet_rsk(req); 2387 int ttd = req->expires - jiffies; 2388 2389 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2390 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n", 2391 i, 2392 ireq->loc_addr, 2393 ntohs(inet_sk(sk)->inet_sport), 2394 ireq->rmt_addr, 2395 ntohs(ireq->rmt_port), 2396 TCP_SYN_RECV, 2397 0, 0, /* could print option size, but that is af dependent. */ 2398 1, /* timers active (only the expire timer) */ 2399 jiffies_to_clock_t(ttd), 2400 req->retrans, 2401 uid, 2402 0, /* non standard timer */ 2403 0, /* open_requests have no inode */ 2404 atomic_read(&sk->sk_refcnt), 2405 req, 2406 len); 2407} 2408 2409static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) 2410{ 2411 int timer_active; 2412 unsigned long timer_expires; 2413 struct tcp_sock *tp = tcp_sk(sk); 2414 const struct inet_connection_sock *icsk = inet_csk(sk); 2415 struct inet_sock *inet = inet_sk(sk); 2416 __be32 dest = inet->inet_daddr; 2417 __be32 src = inet->inet_rcv_saddr; 2418 __u16 destp = ntohs(inet->inet_dport); 2419 __u16 srcp = ntohs(inet->inet_sport); 2420 int rx_queue; 2421 2422 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 2423 timer_active = 1; 2424 timer_expires = icsk->icsk_timeout; 2425 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2426 timer_active = 4; 2427 timer_expires = icsk->icsk_timeout; 2428 } else if (timer_pending(&sk->sk_timer)) { 2429 timer_active = 2; 2430 timer_expires = sk->sk_timer.expires; 2431 } else { 2432 timer_active = 0; 2433 timer_expires = jiffies; 2434 } 2435 2436 if (sk->sk_state == TCP_LISTEN) 2437 rx_queue = sk->sk_ack_backlog; 2438 else 2439 /* 2440 * because we dont lock socket, we might find a transient negative value 2441 */ 2442 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); 2443 2444 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " 2445 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", 2446 i, src, srcp, dest, destp, sk->sk_state, 2447 tp->write_seq - tp->snd_una, 2448 rx_queue, 2449 timer_active, 2450 jiffies_to_clock_t(timer_expires - jiffies), 2451 icsk->icsk_retransmits, 2452 sock_i_uid(sk), 2453 icsk->icsk_probes_out, 2454 sock_i_ino(sk), 2455 atomic_read(&sk->sk_refcnt), sk, 2456 jiffies_to_clock_t(icsk->icsk_rto), 2457 jiffies_to_clock_t(icsk->icsk_ack.ato), 2458 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, 2459 tp->snd_cwnd, 2460 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh, 2461 len); 2462} 2463 2464static void get_timewait4_sock(struct inet_timewait_sock *tw, 2465 struct seq_file *f, int i, int *len) 2466{ 2467 __be32 dest, src; 2468 __u16 destp, srcp; 2469 int ttd = tw->tw_ttd - jiffies; 2470 2471 if (ttd < 0) 2472 ttd = 0; 2473 2474 dest = tw->tw_daddr; 2475 src = tw->tw_rcv_saddr; 2476 destp = ntohs(tw->tw_dport); 2477 srcp = ntohs(tw->tw_sport); 2478 2479 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2480 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n", 2481 i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 2482 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, 2483 atomic_read(&tw->tw_refcnt), tw, len); 2484} 2485 2486#define TMPSZ 150 2487 2488static int tcp4_seq_show(struct seq_file *seq, void *v) 2489{ 2490 struct tcp_iter_state *st; 2491 int len; 2492 2493 if (v == SEQ_START_TOKEN) { 2494 seq_printf(seq, "%-*s\n", TMPSZ - 1, 2495 " sl local_address rem_address st tx_queue " 2496 "rx_queue tr tm->when retrnsmt uid timeout " 2497 "inode"); 2498 goto out; 2499 } 2500 st = seq->private; 2501 2502 switch (st->state) { 2503 case TCP_SEQ_STATE_LISTENING: 2504 case TCP_SEQ_STATE_ESTABLISHED: 2505 get_tcp4_sock(v, seq, st->num, &len); 2506 break; 2507 case TCP_SEQ_STATE_OPENREQ: 2508 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 2509 break; 2510 case TCP_SEQ_STATE_TIME_WAIT: 2511 get_timewait4_sock(v, seq, st->num, &len); 2512 break; 2513 } 2514 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); 2515out: 2516 return 0; 2517} 2518 2519static struct tcp_seq_afinfo tcp4_seq_afinfo = { 2520 .name = "tcp", 2521 .family = AF_INET, 2522 .seq_fops = { 2523 .owner = THIS_MODULE, 2524 }, 2525 .seq_ops = { 2526 .show = tcp4_seq_show, 2527 }, 2528}; 2529 2530static int __net_init tcp4_proc_init_net(struct net *net) 2531{ 2532 return tcp_proc_register(net, &tcp4_seq_afinfo); 2533} 2534 2535static void __net_exit tcp4_proc_exit_net(struct net *net) 2536{ 2537 tcp_proc_unregister(net, &tcp4_seq_afinfo); 2538} 2539 2540static struct pernet_operations tcp4_net_ops = { 2541 .init = tcp4_proc_init_net, 2542 .exit = tcp4_proc_exit_net, 2543}; 2544 2545int __init tcp4_proc_init(void) 2546{ 2547 return register_pernet_subsys(&tcp4_net_ops); 2548} 2549 2550void tcp4_proc_exit(void) 2551{ 2552 unregister_pernet_subsys(&tcp4_net_ops); 2553} 2554#endif /* CONFIG_PROC_FS */ 2555 2556#ifdef CONFIG_INET_GRO 2557extern atomic_t gro_timer_init; 2558#endif 2559struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2560{ 2561 struct iphdr *iph; 2562 2563#ifdef CONFIG_INET_GRO 2564 if (atomic_read(&gro_timer_init)) 2565 return tcp_gro_receive(head, skb); 2566#else 2567 /* We don't support hw-checksum. Skip this part to do real TCP merge */ 2568 iph = skb_gro_network_header(skb); 2569 switch (skb->ip_summed) { 2570 case CHECKSUM_COMPLETE: 2571 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, 2572 skb->csum)) { 2573 skb->ip_summed = CHECKSUM_UNNECESSARY; 2574 break; 2575 } 2576 2577 /* fall through */ 2578 case CHECKSUM_NONE: 2579 NAPI_GRO_CB(skb)->flush = 1; 2580 return NULL; 2581 } 2582 2583 return tcp_gro_receive(head, skb); 2584#endif /* CONFIG_INET_GRO */ 2585} 2586EXPORT_SYMBOL(tcp4_gro_receive); 2587 2588int BCMFASTPATH_HOST tcp4_gro_complete(struct sk_buff *skb) 2589{ 2590 struct iphdr *iph = ip_hdr(skb); 2591 struct tcphdr *th = tcp_hdr(skb); 2592 2593 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), 2594 iph->saddr, iph->daddr, 0); 2595 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 2596 2597 return tcp_gro_complete(skb); 2598} 2599EXPORT_SYMBOL(tcp4_gro_complete); 2600 2601struct proto tcp_prot = { 2602 .name = "TCP", 2603 .owner = THIS_MODULE, 2604 .close = tcp_close, 2605 .connect = tcp_v4_connect, 2606 .disconnect = tcp_disconnect, 2607 .accept = inet_csk_accept, 2608 .ioctl = tcp_ioctl, 2609 .init = tcp_v4_init_sock, 2610 .destroy = tcp_v4_destroy_sock, 2611 .shutdown = tcp_shutdown, 2612 .setsockopt = tcp_setsockopt, 2613 .getsockopt = tcp_getsockopt, 2614 .recvmsg = tcp_recvmsg, 2615 .sendmsg = tcp_sendmsg, 2616 .sendpage = tcp_sendpage, 2617 .backlog_rcv = tcp_v4_do_rcv, 2618 .hash = inet_hash, 2619 .unhash = inet_unhash, 2620 .get_port = inet_csk_get_port, 2621 .enter_memory_pressure = tcp_enter_memory_pressure, 2622 .sockets_allocated = &tcp_sockets_allocated, 2623 .orphan_count = &tcp_orphan_count, 2624 .memory_allocated = &tcp_memory_allocated, 2625 .memory_pressure = &tcp_memory_pressure, 2626 .sysctl_mem = sysctl_tcp_mem, 2627 .sysctl_wmem = sysctl_tcp_wmem, 2628 .sysctl_rmem = sysctl_tcp_rmem, 2629 .max_header = MAX_TCP_HEADER, 2630 .obj_size = sizeof(struct tcp_sock), 2631 .slab_flags = SLAB_DESTROY_BY_RCU, 2632 .twsk_prot = &tcp_timewait_sock_ops, 2633 .rsk_prot = &tcp_request_sock_ops, 2634 .h.hashinfo = &tcp_hashinfo, 2635 .no_autobind = true, 2636#ifdef CONFIG_COMPAT 2637 .compat_setsockopt = compat_tcp_setsockopt, 2638 .compat_getsockopt = compat_tcp_getsockopt, 2639#endif 2640}; 2641EXPORT_SYMBOL(tcp_prot); 2642 2643 2644static int __net_init tcp_sk_init(struct net *net) 2645{ 2646 return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2647 PF_INET, SOCK_RAW, IPPROTO_TCP, net); 2648} 2649 2650static void __net_exit tcp_sk_exit(struct net *net) 2651{ 2652 inet_ctl_sock_destroy(net->ipv4.tcp_sock); 2653} 2654 2655static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) 2656{ 2657 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET); 2658} 2659 2660static struct pernet_operations __net_initdata tcp_sk_ops = { 2661 .init = tcp_sk_init, 2662 .exit = tcp_sk_exit, 2663 .exit_batch = tcp_sk_exit_batch, 2664}; 2665 2666void __init tcp_v4_init(void) 2667{ 2668 inet_hashinfo_init(&tcp_hashinfo); 2669 if (register_pernet_subsys(&tcp_sk_ops)) 2670 panic("Failed to create the TCP control socket.\n"); 2671} 2672