tcp_timewait.c (133591) | tcp_timewait.c (133874) |
---|---|
1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 13 unchanged lines hidden (view full) --- 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 | 1/* 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 13 unchanged lines hidden (view full) --- 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 |
30 * $FreeBSD: head/sys/netinet/tcp_timewait.c 133591 2004-08-12 18:19:36Z dwmalone $ | 30 * $FreeBSD: head/sys/netinet/tcp_timewait.c 133874 2004-08-16 18:32:07Z rwatson $ |
31 */ 32 33#include "opt_compat.h" 34#include "opt_inet.h" 35#include "opt_inet6.h" 36#include "opt_ipsec.h" 37#include "opt_mac.h" 38#include "opt_tcpdebug.h" --- 66 unchanged lines hidden (view full) --- 105#endif 106#include <netipsec/key.h> 107#define IPSEC 108#endif /*FAST_IPSEC*/ 109 110#include <machine/in_cksum.h> 111#include <sys/md5.h> 112 | 31 */ 32 33#include "opt_compat.h" 34#include "opt_inet.h" 35#include "opt_inet6.h" 36#include "opt_ipsec.h" 37#include "opt_mac.h" 38#include "opt_tcpdebug.h" --- 66 unchanged lines hidden (view full) --- 105#endif 106#include <netipsec/key.h> 107#define IPSEC 108#endif /*FAST_IPSEC*/ 109 110#include <machine/in_cksum.h> 111#include <sys/md5.h> 112 |
113int tcp_mssdflt = TCP_MSS; 114SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, | 113int tcp_mssdflt = TCP_MSS; 114SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW, |
115 &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); 116 117#ifdef INET6 118int tcp_v6mssdflt = TCP6_MSS; 119SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, 120 CTLFLAG_RW, &tcp_v6mssdflt , 0, 121 "Default TCP Maximum Segment Size for IPv6"); 122#endif --- 18 unchanged lines hidden (view full) --- 141 * (telnet, SSH) which send many small packets. 142 */ 143int tcp_minmssoverload = TCP_MINMSSOVERLOAD; 144SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW, 145 &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to" 146 "be under the MINMSS Size"); 147 148#if 0 | 115 &tcp_mssdflt , 0, "Default TCP Maximum Segment Size"); 116 117#ifdef INET6 118int tcp_v6mssdflt = TCP6_MSS; 119SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, 120 CTLFLAG_RW, &tcp_v6mssdflt , 0, 121 "Default TCP Maximum Segment Size for IPv6"); 122#endif --- 18 unchanged lines hidden (view full) --- 141 * (telnet, SSH) which send many small packets. 142 */ 143int tcp_minmssoverload = TCP_MINMSSOVERLOAD; 144SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW, 145 &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to" 146 "be under the MINMSS Size"); 147 148#if 0 |
149static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 150SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, | 149static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ; 150SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW, |
151 &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time"); 152#endif 153 154int tcp_do_rfc1323 = 1; | 151 &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time"); 152#endif 153 154int tcp_do_rfc1323 = 1; |
155SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, | 155SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW, |
156 &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); 157 158int tcp_do_rfc1644 = 0; | 156 &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); 157 158int tcp_do_rfc1644 = 0; |
159SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, | 159SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644, CTLFLAG_RW, |
160 &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); 161 162static int tcp_tcbhashsize = 0; 163SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN, 164 &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); 165 166static int do_tcpdrain = 1; 167SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, 168 "Enable tcp_drain routine for extra help when low on mbufs"); 169 | 160 &tcp_do_rfc1644 , 0, "Enable rfc1644 (TTCP) extensions"); 161 162static int tcp_tcbhashsize = 0; 163SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN, 164 &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); 165 166static int do_tcpdrain = 1; 167SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, 168 "Enable tcp_drain routine for extra help when low on mbufs"); 169 |
170SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, | 170SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, |
171 &tcbinfo.ipi_count, 0, "Number of active PCBs"); 172 173static int icmp_may_rst = 1; | 171 &tcbinfo.ipi_count, 0, "Number of active PCBs"); 172 173static int icmp_may_rst = 1; |
174SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, | 174SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, |
175 "Certain ICMP unreachable messages may abort connections in SYN_SENT"); 176 177static int tcp_isn_reseed_interval = 0; 178SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW, 179 &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret"); 180 181/* | 175 "Certain ICMP unreachable messages may abort connections in SYN_SENT"); 176 177static int tcp_isn_reseed_interval = 0; 178SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_RW, 179 &tcp_isn_reseed_interval, 0, "Seconds between reseeding of ISN secret"); 180 181/* |
182 * TCP bandwidth limiting sysctls. Note that the default lower bound of 183 * 1024 exists only for debugging. A good production default would be | 182 * TCP bandwidth limiting sysctls. Note that the default lower bound of 183 * 1024 exists only for debugging. A good production default would be |
184 * something like 6100. 185 */ 186SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0, 187 "TCP inflight data limiting"); 188 189static int tcp_inflight_enable = 1; 190SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW, 191 &tcp_inflight_enable, 0, "Enable automatic TCP inflight data limiting"); --- 53 unchanged lines hidden (view full) --- 245 246/* 247 * Tcp initialization 248 */ 249void 250tcp_init() 251{ 252 int hashsize = TCBHASHSIZE; | 184 * something like 6100. 185 */ 186SYSCTL_NODE(_net_inet_tcp, OID_AUTO, inflight, CTLFLAG_RW, 0, 187 "TCP inflight data limiting"); 188 189static int tcp_inflight_enable = 1; 190SYSCTL_INT(_net_inet_tcp_inflight, OID_AUTO, enable, CTLFLAG_RW, 191 &tcp_inflight_enable, 0, "Enable automatic TCP inflight data limiting"); --- 53 unchanged lines hidden (view full) --- 245 246/* 247 * Tcp initialization 248 */ 249void 250tcp_init() 251{ 252 int hashsize = TCBHASHSIZE; |
253 | 253 |
254 tcp_ccgen = 1; 255 256 tcp_delacktime = TCPTV_DELACK; 257 tcp_keepinit = TCPTV_KEEP_INIT; 258 tcp_keepidle = TCPTV_KEEP_IDLE; 259 tcp_keepintvl = TCPTV_KEEPINTVL; 260 tcp_maxpersistidle = TCPTV_KEEP_IDLE; 261 tcp_msl = TCPTV_MSL; --- 7 unchanged lines hidden (view full) --- 269 if (!powerof2(hashsize)) { 270 printf("WARNING: TCB hash size not a power of 2\n"); 271 hashsize = 512; /* safe default */ 272 } 273 tcp_tcbhashsize = hashsize; 274 tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); 275 tcbinfo.porthashbase = hashinit(hashsize, M_PCB, 276 &tcbinfo.porthashmask); | 254 tcp_ccgen = 1; 255 256 tcp_delacktime = TCPTV_DELACK; 257 tcp_keepinit = TCPTV_KEEP_INIT; 258 tcp_keepidle = TCPTV_KEEP_IDLE; 259 tcp_keepintvl = TCPTV_KEEPINTVL; 260 tcp_maxpersistidle = TCPTV_KEEP_IDLE; 261 tcp_msl = TCPTV_MSL; --- 7 unchanged lines hidden (view full) --- 269 if (!powerof2(hashsize)) { 270 printf("WARNING: TCB hash size not a power of 2\n"); 271 hashsize = 512; /* safe default */ 272 } 273 tcp_tcbhashsize = hashsize; 274 tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); 275 tcbinfo.porthashbase = hashinit(hashsize, M_PCB, 276 &tcbinfo.porthashmask); |
277 tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb), | 277 tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb), |
278 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 279 uma_zone_set_max(tcbinfo.ipi_zone, maxsockets); 280#ifdef INET6 281#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) 282#else /* INET6 */ 283#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) 284#endif /* INET6 */ 285 if (max_protohdr < TCP_MINPROTOHDR) 286 max_protohdr = TCP_MINPROTOHDR; 287 if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) 288 panic("tcp_init"); 289#undef TCP_MINPROTOHDR 290 /* 291 * These have to be type stable for the benefit of the timers. 292 */ | 278 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 279 uma_zone_set_max(tcbinfo.ipi_zone, maxsockets); 280#ifdef INET6 281#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) 282#else /* INET6 */ 283#define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) 284#endif /* INET6 */ 285 if (max_protohdr < TCP_MINPROTOHDR) 286 max_protohdr = TCP_MINPROTOHDR; 287 if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) 288 panic("tcp_init"); 289#undef TCP_MINPROTOHDR 290 /* 291 * These have to be type stable for the benefit of the timers. 292 */ |
293 tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), | 293 tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), |
294 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 295 uma_zone_set_max(tcpcb_zone, maxsockets); | 294 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 295 uma_zone_set_max(tcpcb_zone, maxsockets); |
296 tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw), | 296 tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw), |
297 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 298 uma_zone_set_max(tcptw_zone, maxsockets / 5); 299 tcp_timer_init(); 300 syncache_init(); 301 tcp_hc_init(); 302 tcp_reass_init(); 303 callout_init(&isn_callout, CALLOUT_MPSAFE); 304 tcp_isn_tick(NULL); 305 EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, 306 SHUTDOWN_PRI_DEFAULT); | 297 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 298 uma_zone_set_max(tcptw_zone, maxsockets / 5); 299 tcp_timer_init(); 300 syncache_init(); 301 tcp_hc_init(); 302 tcp_reass_init(); 303 callout_init(&isn_callout, CALLOUT_MPSAFE); 304 tcp_isn_tick(NULL); 305 EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, 306 SHUTDOWN_PRI_DEFAULT); |
307 sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), | 307 sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), |
308 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 309} 310 311void 312tcp_fini(xtp) 313 void *xtp; 314{ 315 callout_stop(&isn_callout); --- 138 unchanged lines hidden (view full) --- 454 if (m == NULL) { 455 m = m_gethdr(M_DONTWAIT, MT_HEADER); 456 if (m == NULL) 457 return; 458 tlen = 0; 459 m->m_data += max_linkhdr; 460#ifdef INET6 461 if (isipv6) { | 308 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 309} 310 311void 312tcp_fini(xtp) 313 void *xtp; 314{ 315 callout_stop(&isn_callout); --- 138 unchanged lines hidden (view full) --- 454 if (m == NULL) { 455 m = m_gethdr(M_DONTWAIT, MT_HEADER); 456 if (m == NULL) 457 return; 458 tlen = 0; 459 m->m_data += max_linkhdr; 460#ifdef INET6 461 if (isipv6) { |
462 bcopy((caddr_t)ip6, mtod(m, caddr_t), | 462 bcopy((caddr_t)ip6, mtod(m, caddr_t), |
463 sizeof(struct ip6_hdr)); 464 ip6 = mtod(m, struct ip6_hdr *); 465 nth = (struct tcphdr *)(ip6 + 1); 466 } else 467#endif /* INET6 */ 468 { 469 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 470 ip = mtod(m, struct ip *); --- 35 unchanged lines hidden (view full) --- 506 ip6->ip6_flow = 0; 507 ip6->ip6_vfc = IPV6_VERSION; 508 ip6->ip6_nxt = IPPROTO_TCP; 509 ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + 510 tlen)); 511 tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); 512 } else 513#endif | 463 sizeof(struct ip6_hdr)); 464 ip6 = mtod(m, struct ip6_hdr *); 465 nth = (struct tcphdr *)(ip6 + 1); 466 } else 467#endif /* INET6 */ 468 { 469 bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); 470 ip = mtod(m, struct ip *); --- 35 unchanged lines hidden (view full) --- 506 ip6->ip6_flow = 0; 507 ip6->ip6_vfc = IPV6_VERSION; 508 ip6->ip6_nxt = IPPROTO_TCP; 509 ip6->ip6_plen = htons((u_short)(sizeof (struct tcphdr) + 510 tlen)); 511 tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr); 512 } else 513#endif |
514 { 515 tlen += sizeof (struct tcpiphdr); 516 ip->ip_len = tlen; 517 ip->ip_ttl = ip_defttl; 518 if (path_mtu_discovery) 519 ip->ip_off |= IP_DF; 520 } | 514 { 515 tlen += sizeof (struct tcpiphdr); 516 ip->ip_len = tlen; 517 ip->ip_ttl = ip_defttl; 518 if (path_mtu_discovery) 519 ip->ip_off |= IP_DF; 520 } |
521 m->m_len = tlen; 522 m->m_pkthdr.len = tlen; 523 m->m_pkthdr.rcvif = NULL; 524#ifdef MAC 525 if (inp != NULL) { 526 /* 527 * Packet is associated with a socket, so allow the 528 * label of the response to reflect the socket label. --- 23 unchanged lines hidden (view full) --- 552 nth->th_sum = 0; 553 nth->th_sum = in6_cksum(m, IPPROTO_TCP, 554 sizeof(struct ip6_hdr), 555 tlen - sizeof(struct ip6_hdr)); 556 ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : 557 NULL, NULL); 558 } else 559#endif /* INET6 */ | 521 m->m_len = tlen; 522 m->m_pkthdr.len = tlen; 523 m->m_pkthdr.rcvif = NULL; 524#ifdef MAC 525 if (inp != NULL) { 526 /* 527 * Packet is associated with a socket, so allow the 528 * label of the response to reflect the socket label. --- 23 unchanged lines hidden (view full) --- 552 nth->th_sum = 0; 553 nth->th_sum = in6_cksum(m, IPPROTO_TCP, 554 sizeof(struct ip6_hdr), 555 tlen - sizeof(struct ip6_hdr)); 556 ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : 557 NULL, NULL); 558 } else 559#endif /* INET6 */ |
560 { 561 nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 562 htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); 563 m->m_pkthdr.csum_flags = CSUM_TCP; 564 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 565 } | 560 { 561 nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 562 htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); 563 m->m_pkthdr.csum_flags = CSUM_TCP; 564 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 565 } |
566#ifdef TCPDEBUG 567 if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) 568 tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); 569#endif 570#ifdef INET6 571 if (isipv6) 572 (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp); 573 else --- 57 unchanged lines hidden (view full) --- 631 tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; 632 tp->t_rttmin = tcp_rexmit_min; 633 tp->t_rxtcur = TCPTV_RTOBASE; 634 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 635 tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 636 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; 637 tp->t_rcvtime = ticks; 638 tp->t_bw_rtttime = ticks; | 566#ifdef TCPDEBUG 567 if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) 568 tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); 569#endif 570#ifdef INET6 571 if (isipv6) 572 (void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp); 573 else --- 57 unchanged lines hidden (view full) --- 631 tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; 632 tp->t_rttmin = tcp_rexmit_min; 633 tp->t_rxtcur = TCPTV_RTOBASE; 634 tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 635 tp->snd_bwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; 636 tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; 637 tp->t_rcvtime = ticks; 638 tp->t_bw_rtttime = ticks; |
639 /* | 639 /* |
640 * IPv4 TTL initialization is necessary for an IPv6 socket as well, 641 * because the socket may be bound to an IPv6 wildcard address, 642 * which may match an IPv4-mapped IPv6 address. 643 */ 644 inp->inp_ip_ttl = ip_defttl; 645 inp->inp_ppcb = (caddr_t)tp; 646 return (tp); /* XXX */ 647} --- 87 unchanged lines hidden (view full) --- 735 ssthresh = 0; 736 metrics.rmx_ssthresh = ssthresh; 737 738 metrics.rmx_rtt = tp->t_srtt; 739 metrics.rmx_rttvar = tp->t_rttvar; 740 /* XXX: This wraps if the pipe is more than 4 Gbit per second */ 741 metrics.rmx_bandwidth = tp->snd_bandwidth; 742 metrics.rmx_cwnd = tp->snd_cwnd; | 640 * IPv4 TTL initialization is necessary for an IPv6 socket as well, 641 * because the socket may be bound to an IPv6 wildcard address, 642 * which may match an IPv4-mapped IPv6 address. 643 */ 644 inp->inp_ip_ttl = ip_defttl; 645 inp->inp_ppcb = (caddr_t)tp; 646 return (tp); /* XXX */ 647} --- 87 unchanged lines hidden (view full) --- 735 ssthresh = 0; 736 metrics.rmx_ssthresh = ssthresh; 737 738 metrics.rmx_rtt = tp->t_srtt; 739 metrics.rmx_rttvar = tp->t_rttvar; 740 /* XXX: This wraps if the pipe is more than 4 Gbit per second */ 741 metrics.rmx_bandwidth = tp->snd_bandwidth; 742 metrics.rmx_cwnd = tp->snd_cwnd; |
743 metrics.rmx_sendpipe = 0; | 743 metrics.rmx_sendpipe = 0; |
744 metrics.rmx_recvpipe = 0; 745 746 tcp_hc_update(&inp->inp_inc, &metrics); 747 } 748 749 /* free the reassembly queue, if any */ 750 while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { 751 LIST_REMOVE(q, tqe_q); --- 44 unchanged lines hidden (view full) --- 796 struct tcpcb *tcpb; 797 struct tseg_qent *te; 798 799 /* 800 * Walk the tcpbs, if existing, and flush the reassembly queue, 801 * if there is one... 802 * XXX: The "Net/3" implementation doesn't imply that the TCP 803 * reassembly queue should be flushed, but in a situation | 744 metrics.rmx_recvpipe = 0; 745 746 tcp_hc_update(&inp->inp_inc, &metrics); 747 } 748 749 /* free the reassembly queue, if any */ 750 while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { 751 LIST_REMOVE(q, tqe_q); --- 44 unchanged lines hidden (view full) --- 796 struct tcpcb *tcpb; 797 struct tseg_qent *te; 798 799 /* 800 * Walk the tcpbs, if existing, and flush the reassembly queue, 801 * if there is one... 802 * XXX: The "Net/3" implementation doesn't imply that the TCP 803 * reassembly queue should be flushed, but in a situation |
804 * where we're really low on mbufs, this is potentially 805 * usefull. | 804 * where we're really low on mbufs, this is potentially 805 * usefull. |
806 */ 807 INP_INFO_RLOCK(&tcbinfo); 808 LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) { 809 if (inpb->inp_vflag & INP_TIMEWAIT) 810 continue; 811 INP_LOCK(inpb); 812 if ((tcpb = intotcpcb(inpb)) != NULL) { 813 while ((te = LIST_FIRST(&tcpb->t_segq)) --- 95 unchanged lines hidden (view full) --- 909 xig.xig_sogen = so_gencnt; 910 error = SYSCTL_OUT(req, &xig, sizeof xig); 911 if (error) 912 return error; 913 914 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 915 if (inp_list == NULL) 916 return ENOMEM; | 806 */ 807 INP_INFO_RLOCK(&tcbinfo); 808 LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) { 809 if (inpb->inp_vflag & INP_TIMEWAIT) 810 continue; 811 INP_LOCK(inpb); 812 if ((tcpb = intotcpcb(inpb)) != NULL) { 813 while ((te = LIST_FIRST(&tcpb->t_segq)) --- 95 unchanged lines hidden (view full) --- 909 xig.xig_sogen = so_gencnt; 910 error = SYSCTL_OUT(req, &xig, sizeof xig); 911 if (error) 912 return error; 913 914 inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); 915 if (inp_list == NULL) 916 return ENOMEM; |
917 | 917 |
918 s = splnet(); 919 INP_INFO_RLOCK(&tcbinfo); 920 for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp != NULL && i < n; 921 inp = LIST_NEXT(inp, inp_list)) { 922 INP_LOCK(inp); 923 if (inp->inp_gencnt <= gencnt) { 924 /* 925 * XXX: This use of cr_cansee(), introduced with --- 212 unchanged lines hidden (view full) --- 1138 * excellent DoS attack on machines with many connections. 1139 */ 1140 else if (cmd == PRC_HOSTDEAD) 1141 ip = NULL; 1142 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 1143 return; 1144 if (ip != NULL) { 1145 s = splnet(); | 918 s = splnet(); 919 INP_INFO_RLOCK(&tcbinfo); 920 for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp != NULL && i < n; 921 inp = LIST_NEXT(inp, inp_list)) { 922 INP_LOCK(inp); 923 if (inp->inp_gencnt <= gencnt) { 924 /* 925 * XXX: This use of cr_cansee(), introduced with --- 212 unchanged lines hidden (view full) --- 1138 * excellent DoS attack on machines with many connections. 1139 */ 1140 else if (cmd == PRC_HOSTDEAD) 1141 ip = NULL; 1142 else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) 1143 return; 1144 if (ip != NULL) { 1145 s = splnet(); |
1146 th = (struct tcphdr *)((caddr_t)ip | 1146 th = (struct tcphdr *)((caddr_t)ip |
1147 + (ip->ip_hl << 2)); 1148 INP_INFO_WLOCK(&tcbinfo); 1149 inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport, 1150 ip->ip_src, th->th_sport, 0, NULL); 1151 if (inp != NULL) { 1152 INP_LOCK(inp); 1153 if (inp->inp_socket != NULL) { 1154 icmp_seq = htonl(th->th_seq); 1155 tp = intotcpcb(inp); 1156 if (SEQ_GEQ(icmp_seq, tp->snd_una) && | 1147 + (ip->ip_hl << 2)); 1148 INP_INFO_WLOCK(&tcbinfo); 1149 inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport, 1150 ip->ip_src, th->th_sport, 0, NULL); 1151 if (inp != NULL) { 1152 INP_LOCK(inp); 1153 if (inp->inp_socket != NULL) { 1154 icmp_seq = htonl(th->th_seq); 1155 tp = intotcpcb(inp); 1156 if (SEQ_GEQ(icmp_seq, tp->snd_una) && |
1157 SEQ_LT(icmp_seq, tp->snd_max)) | 1157 SEQ_LT(icmp_seq, tp->snd_max)) |
1158 inp = (*notify)(inp, inetctlerrmap[cmd]); 1159 } 1160 if (inp != NULL) 1161 INP_UNLOCK(inp); 1162 } else { 1163 struct in_conninfo inc; 1164 1165 inc.inc_fport = th->th_dport; --- 98 unchanged lines hidden (view full) --- 1264 * 1265 * All ISNs for SYN-ACK packets are generated by the syncache. See 1266 * tcp_syncache.c for details. 1267 * 1268 * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling 1269 * depends on this property. In addition, these ISNs should be 1270 * unguessable so as to prevent connection hijacking. To satisfy 1271 * the requirements of this situation, the algorithm outlined in | 1158 inp = (*notify)(inp, inetctlerrmap[cmd]); 1159 } 1160 if (inp != NULL) 1161 INP_UNLOCK(inp); 1162 } else { 1163 struct in_conninfo inc; 1164 1165 inc.inc_fport = th->th_dport; --- 98 unchanged lines hidden (view full) --- 1264 * 1265 * All ISNs for SYN-ACK packets are generated by the syncache. See 1266 * tcp_syncache.c for details. 1267 * 1268 * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling 1269 * depends on this property. In addition, these ISNs should be 1270 * unguessable so as to prevent connection hijacking. To satisfy 1271 * the requirements of this situation, the algorithm outlined in |
1272 * RFC 1948 is used, with only small modifications. | 1272 * RFC 1948 is used, with only small modifications. |
1273 * 1274 * Implementation details: 1275 * 1276 * Time is based off the system timer, and is corrected so that it 1277 * increases by one megabyte per second. This allows for proper 1278 * recycling on high speed LANs while still leaving over an hour 1279 * before rollover. 1280 * --- 33 unchanged lines hidden (view full) --- 1314 1315 /* Seed if this is the first use, reseed if requested. */ 1316 if ((isn_last_reseed == 0) || ((tcp_isn_reseed_interval > 0) && 1317 (((u_int)isn_last_reseed + (u_int)tcp_isn_reseed_interval*hz) 1318 < (u_int)ticks))) { 1319 read_random(&isn_secret, sizeof(isn_secret)); 1320 isn_last_reseed = ticks; 1321 } | 1273 * 1274 * Implementation details: 1275 * 1276 * Time is based off the system timer, and is corrected so that it 1277 * increases by one megabyte per second. This allows for proper 1278 * recycling on high speed LANs while still leaving over an hour 1279 * before rollover. 1280 * --- 33 unchanged lines hidden (view full) --- 1314 1315 /* Seed if this is the first use, reseed if requested. */ 1316 if ((isn_last_reseed == 0) || ((tcp_isn_reseed_interval > 0) && 1317 (((u_int)isn_last_reseed + (u_int)tcp_isn_reseed_interval*hz) 1318 < (u_int)ticks))) { 1319 read_random(&isn_secret, sizeof(isn_secret)); 1320 isn_last_reseed = ticks; 1321 } |
1322 | 1322 |
1323 /* Compute the md5 hash and return the ISN. */ 1324 MD5Init(&isn_ctx); 1325 MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); 1326 MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); 1327#ifdef INET6 1328 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { 1329 MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, 1330 sizeof(struct in6_addr)); --- 21 unchanged lines hidden (view full) --- 1352 * to keep time flowing at a relatively constant rate. If the random 1353 * increments have already pushed us past the projected offset, do nothing. 1354 */ 1355static void 1356tcp_isn_tick(xtp) 1357 void *xtp; 1358{ 1359 u_int32_t projected_offset; | 1323 /* Compute the md5 hash and return the ISN. */ 1324 MD5Init(&isn_ctx); 1325 MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); 1326 MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); 1327#ifdef INET6 1328 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { 1329 MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, 1330 sizeof(struct in6_addr)); --- 21 unchanged lines hidden (view full) --- 1352 * to keep time flowing at a relatively constant rate. If the random 1353 * increments have already pushed us past the projected offset, do nothing. 1354 */ 1355static void 1356tcp_isn_tick(xtp) 1357 void *xtp; 1358{ 1359 u_int32_t projected_offset; |
1360 | 1360 |
1361 projected_offset = isn_offset_old + ISN_BYTES_PER_SECOND / hz; 1362 1363 if (projected_offset > isn_offset) 1364 isn_offset = projected_offset; 1365 1366 isn_offset_old = isn_offset; 1367 callout_reset(&isn_callout, 1, tcp_isn_tick, NULL); 1368} --- 138 unchanged lines hidden (view full) --- 1507} 1508 1509/* 1510 * Look-up the routing entry to the peer of this inpcb. If no route 1511 * is found and it cannot be allocated, then return NULL. This routine 1512 * is called by TCP routines that access the rmx structure and by tcp_mss 1513 * to get the interface MTU. 1514 */ | 1361 projected_offset = isn_offset_old + ISN_BYTES_PER_SECOND / hz; 1362 1363 if (projected_offset > isn_offset) 1364 isn_offset = projected_offset; 1365 1366 isn_offset_old = isn_offset; 1367 callout_reset(&isn_callout, 1, tcp_isn_tick, NULL); 1368} --- 138 unchanged lines hidden (view full) --- 1507} 1508 1509/* 1510 * Look-up the routing entry to the peer of this inpcb. If no route 1511 * is found and it cannot be allocated, then return NULL. This routine 1512 * is called by TCP routines that access the rmx structure and by tcp_mss 1513 * to get the interface MTU. 1514 */ |
1515u_long | 1515u_long |
1516tcp_maxmtu(inc) 1517 struct in_conninfo *inc; 1518{ 1519 struct route sro; 1520 struct sockaddr_in *dst; 1521 struct ifnet *ifp; 1522 u_long maxmtu = 0; 1523 --- 76 unchanged lines hidden (view full) --- 1600 ip6 = mtod(m, struct ip6_hdr *); 1601 th = (struct tcphdr *)(ip6 + 1); 1602 m->m_pkthdr.len = m->m_len = 1603 sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 1604 tcpip_fillheaders(inp, ip6, th); 1605 hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 1606 } else 1607#endif /* INET6 */ | 1516tcp_maxmtu(inc) 1517 struct in_conninfo *inc; 1518{ 1519 struct route sro; 1520 struct sockaddr_in *dst; 1521 struct ifnet *ifp; 1522 u_long maxmtu = 0; 1523 --- 76 unchanged lines hidden (view full) --- 1600 ip6 = mtod(m, struct ip6_hdr *); 1601 th = (struct tcphdr *)(ip6 + 1); 1602 m->m_pkthdr.len = m->m_len = 1603 sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 1604 tcpip_fillheaders(inp, ip6, th); 1605 hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 1606 } else 1607#endif /* INET6 */ |
1608 { 1609 ip = mtod(m, struct ip *); 1610 th = (struct tcphdr *)(ip + 1); 1611 m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); 1612 tcpip_fillheaders(inp, ip, th); 1613 hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 1614 } | 1608 { 1609 ip = mtod(m, struct ip *); 1610 th = (struct tcphdr *)(ip + 1); 1611 m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); 1612 tcpip_fillheaders(inp, ip, th); 1613 hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); 1614 } |
1615 1616 m_free(m); 1617 return hdrsiz; 1618} 1619#endif /*IPSEC*/ 1620 1621/* 1622 * Move a TCP connection into TIME_WAIT state. --- 23 unchanged lines hidden (view full) --- 1646 /* 1647 * Recover last window size sent. 1648 */ 1649 tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; 1650 1651 /* 1652 * Set t_recent if timestamps are used on the connection. 1653 */ | 1615 1616 m_free(m); 1617 return hdrsiz; 1618} 1619#endif /*IPSEC*/ 1620 1621/* 1622 * Move a TCP connection into TIME_WAIT state. --- 23 unchanged lines hidden (view full) --- 1646 /* 1647 * Recover last window size sent. 1648 */ 1649 tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale; 1650 1651 /* 1652 * Set t_recent if timestamps are used on the connection. 1653 */ |
1654 if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == 1655 (TF_REQ_TSTMP|TF_RCVD_TSTMP)) | 1654 if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == 1655 (TF_REQ_TSTMP|TF_RCVD_TSTMP)) |
1656 tw->t_recent = tp->ts_recent; 1657 else 1658 tw->t_recent = 0; 1659 1660 tw->snd_nxt = tp->snd_nxt; 1661 tw->rcv_nxt = tp->rcv_nxt; 1662 tw->iss = tp->iss; 1663 tw->irs = tp->irs; --- 50 unchanged lines hidden (view full) --- 1714int 1715tcp_twrecycleable(struct tcptw *tw) 1716{ 1717 tcp_seq new_iss = tw->iss; 1718 tcp_seq new_irs = tw->irs; 1719 1720 new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz); 1721 new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz); | 1656 tw->t_recent = tp->ts_recent; 1657 else 1658 tw->t_recent = 0; 1659 1660 tw->snd_nxt = tp->snd_nxt; 1661 tw->rcv_nxt = tp->rcv_nxt; 1662 tw->iss = tp->iss; 1663 tw->irs = tp->irs; --- 50 unchanged lines hidden (view full) --- 1714int 1715tcp_twrecycleable(struct tcptw *tw) 1716{ 1717 tcp_seq new_iss = tw->iss; 1718 tcp_seq new_irs = tw->irs; 1719 1720 new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz); 1721 new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz); |
1722 | 1722 |
1723 if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt)) 1724 return 1; 1725 else 1726 return 0; 1727} 1728 1729struct tcptw * 1730tcp_twclose(struct tcptw *tw, int reuse) --- 53 unchanged lines hidden (view full) --- 1784#endif 1785 { 1786 hdrlen = sizeof(struct tcpiphdr); 1787 ip = mtod(m, struct ip *); 1788 th = (struct tcphdr *)(ip + 1); 1789 tcpip_fillheaders(inp, ip, th); 1790 } 1791 optp = (u_int8_t *)(th + 1); | 1723 if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt)) 1724 return 1; 1725 else 1726 return 0; 1727} 1728 1729struct tcptw * 1730tcp_twclose(struct tcptw *tw, int reuse) --- 53 unchanged lines hidden (view full) --- 1784#endif 1785 { 1786 hdrlen = sizeof(struct tcpiphdr); 1787 ip = mtod(m, struct ip *); 1788 th = (struct tcphdr *)(ip + 1); 1789 tcpip_fillheaders(inp, ip, th); 1790 } 1791 optp = (u_int8_t *)(th + 1); |
1792 1793 /* | 1792 1793 /* |
1794 * Send a timestamp and echo-reply if both our side and our peer 1795 * have sent timestamps in our SYN's and this is not a RST. | 1794 * Send a timestamp and echo-reply if both our side and our peer 1795 * have sent timestamps in our SYN's and this is not a RST. |
1796 */ | 1796 */ |
1797 if (tw->t_recent && flags == TH_ACK) { 1798 u_int32_t *lp = (u_int32_t *)optp; 1799 | 1797 if (tw->t_recent && flags == TH_ACK) { 1798 u_int32_t *lp = (u_int32_t *)optp; 1799 |
1800 /* Form timestamp option as shown in appendix A of RFC 1323. */ 1801 *lp++ = htonl(TCPOPT_TSTAMP_HDR); 1802 *lp++ = htonl(ticks); 1803 *lp = htonl(tw->t_recent); 1804 optp += TCPOLEN_TSTAMP_APPA; 1805 } | 1800 /* Form timestamp option as shown in appendix A of RFC 1323. */ 1801 *lp++ = htonl(TCPOPT_TSTAMP_HDR); 1802 *lp++ = htonl(ticks); 1803 *lp = htonl(tw->t_recent); 1804 optp += TCPOLEN_TSTAMP_APPA; 1805 } |
1806 | 1806 |
1807 /* | 1807 /* |
1808 * Send `CC-family' options if needed, and it's not a RST. | 1808 * Send `CC-family' options if needed, and it's not a RST. |
1809 */ | 1809 */ |
1810 if (tw->cc_recv != 0 && flags == TH_ACK) { 1811 u_int32_t *lp = (u_int32_t *)optp; 1812 1813 *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC)); 1814 *lp = htonl(tw->cc_send); 1815 optp += TCPOLEN_CC_APPA; | 1810 if (tw->cc_recv != 0 && flags == TH_ACK) { 1811 u_int32_t *lp = (u_int32_t *)optp; 1812 1813 *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC)); 1814 *lp = htonl(tw->cc_send); 1815 optp += TCPOLEN_CC_APPA; |
1816 } | 1816 } |
1817 optlen = optp - (u_int8_t *)(th + 1); 1818 1819 m->m_len = hdrlen + optlen; 1820 m->m_pkthdr.len = m->m_len; 1821 1822 KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small")); 1823 1824 th->th_seq = htonl(tw->snd_nxt); --- 8 unchanged lines hidden (view full) --- 1833 sizeof(struct tcphdr) + optlen); 1834 ip6->ip6_hlim = in6_selecthlim(inp, NULL); 1835 error = ip6_output(m, inp->in6p_outputopts, NULL, 1836 (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); 1837 } else 1838#endif 1839 { 1840 th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, | 1817 optlen = optp - (u_int8_t *)(th + 1); 1818 1819 m->m_len = hdrlen + optlen; 1820 m->m_pkthdr.len = m->m_len; 1821 1822 KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small")); 1823 1824 th->th_seq = htonl(tw->snd_nxt); --- 8 unchanged lines hidden (view full) --- 1833 sizeof(struct tcphdr) + optlen); 1834 ip6->ip6_hlim = in6_selecthlim(inp, NULL); 1835 error = ip6_output(m, inp->in6p_outputopts, NULL, 1836 (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); 1837 } else 1838#endif 1839 { 1840 th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, |
1841 htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); | 1841 htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); |
1842 m->m_pkthdr.csum_flags = CSUM_TCP; 1843 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1844 ip->ip_len = m->m_pkthdr.len; 1845 if (path_mtu_discovery) 1846 ip->ip_off |= IP_DF; 1847 error = ip_output(m, inp->inp_options, NULL, 1848 (tw->tw_so_options & SO_DONTROUTE), NULL, inp); 1849 } --- 13 unchanged lines hidden (view full) --- 1863 * minimize RTT, and avoid the over-allocation of buffers on interfaces and 1864 * routers. This code also does a fairly good job keeping RTTs in check 1865 * across slow links like modems. We implement an algorithm which is very 1866 * similar (but not meant to be) TCP/Vegas. The code operates on the 1867 * transmitter side of a TCP connection and so only effects the transmit 1868 * side of the connection. 1869 * 1870 * BACKGROUND: TCP makes no provision for the management of buffer space | 1842 m->m_pkthdr.csum_flags = CSUM_TCP; 1843 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1844 ip->ip_len = m->m_pkthdr.len; 1845 if (path_mtu_discovery) 1846 ip->ip_off |= IP_DF; 1847 error = ip_output(m, inp->inp_options, NULL, 1848 (tw->tw_so_options & SO_DONTROUTE), NULL, inp); 1849 } --- 13 unchanged lines hidden (view full) --- 1863 * minimize RTT, and avoid the over-allocation of buffers on interfaces and 1864 * routers. This code also does a fairly good job keeping RTTs in check 1865 * across slow links like modems. We implement an algorithm which is very 1866 * similar (but not meant to be) TCP/Vegas. The code operates on the 1867 * transmitter side of a TCP connection and so only effects the transmit 1868 * side of the connection. 1869 * 1870 * BACKGROUND: TCP makes no provision for the management of buffer space |
1871 * at the end points or at the intermediate routers and switches. A TCP | 1871 * at the end points or at the intermediate routers and switches. A TCP |
1872 * stream, whether using NewReno or not, will eventually buffer as 1873 * many packets as it is able and the only reason this typically works is 1874 * due to the fairly small default buffers made available for a connection 1875 * (typicaly 16K or 32K). As machines use larger windows and/or window 1876 * scaling it is now fairly easy for even a single TCP connection to blow-out | 1872 * stream, whether using NewReno or not, will eventually buffer as 1873 * many packets as it is able and the only reason this typically works is 1874 * due to the fairly small default buffers made available for a connection 1875 * (typicaly 16K or 32K). As machines use larger windows and/or window 1876 * scaling it is now fairly easy for even a single TCP connection to blow-out |
1877 * all available buffer space not only on the local interface, but on | 1877 * all available buffer space not only on the local interface, but on |
1878 * intermediate routers and switches as well. NewReno makes a misguided 1879 * attempt to 'solve' this problem by waiting for an actual failure to occur, 1880 * then backing off, then steadily increasing the window again until another 1881 * failure occurs, ad-infinitum. This results in terrible oscillation that 1882 * is only made worse as network loads increase and the idea of intentionally 1883 * blowing out network buffers is, frankly, a terrible way to manage network 1884 * resources. 1885 * --- 5 unchanged lines hidden (view full) --- 1891 * repeat ad-infinitum. This method works in principle but has severe 1892 * implementation issues due to RTT variances, timer granularity, and 1893 * instability in the algorithm which can lead to many false positives and 1894 * create oscillations as well as interact badly with other TCP streams 1895 * implementing the same algorithm. 1896 * 1897 * The second method is to limit the window to the bandwidth delay product 1898 * of the link. This is the method we implement. RTT variances and our | 1878 * intermediate routers and switches as well. NewReno makes a misguided 1879 * attempt to 'solve' this problem by waiting for an actual failure to occur, 1880 * then backing off, then steadily increasing the window again until another 1881 * failure occurs, ad-infinitum. This results in terrible oscillation that 1882 * is only made worse as network loads increase and the idea of intentionally 1883 * blowing out network buffers is, frankly, a terrible way to manage network 1884 * resources. 1885 * --- 5 unchanged lines hidden (view full) --- 1891 * repeat ad-infinitum. This method works in principle but has severe 1892 * implementation issues due to RTT variances, timer granularity, and 1893 * instability in the algorithm which can lead to many false positives and 1894 * create oscillations as well as interact badly with other TCP streams 1895 * implementing the same algorithm. 1896 * 1897 * The second method is to limit the window to the bandwidth delay product 1898 * of the link. This is the method we implement. RTT variances and our |
1899 * own manipulation of the congestion window, bwnd, can potentially | 1899 * own manipulation of the congestion window, bwnd, can potentially |
1900 * destabilize the algorithm. For this reason we have to stabilize the 1901 * elements used to calculate the window. We do this by using the minimum 1902 * observed RTT, the long term average of the observed bandwidth, and 1903 * by adding two segments worth of slop. It isn't perfect but it is able 1904 * to react to changing conditions and gives us a very stable basis on 1905 * which to extend the algorithm. 1906 */ 1907void --- 23 unchanged lines hidden (view full) --- 1931 * 1932 * Note: if ticks rollover 'bw' may wind up negative. We must 1933 * effectively reset t_bw_rtttime for this case. 1934 */ 1935 save_ticks = ticks; 1936 if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1) 1937 return; 1938 | 1900 * destabilize the algorithm. For this reason we have to stabilize the 1901 * elements used to calculate the window. We do this by using the minimum 1902 * observed RTT, the long term average of the observed bandwidth, and 1903 * by adding two segments worth of slop. It isn't perfect but it is able 1904 * to react to changing conditions and gives us a very stable basis on 1905 * which to extend the algorithm. 1906 */ 1907void --- 23 unchanged lines hidden (view full) --- 1931 * 1932 * Note: if ticks rollover 'bw' may wind up negative. We must 1933 * effectively reset t_bw_rtttime for this case. 1934 */ 1935 save_ticks = ticks; 1936 if ((u_int)(save_ticks - tp->t_bw_rtttime) < 1) 1937 return; 1938 |
1939 bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / | 1939 bw = (int64_t)(ack_seq - tp->t_bw_rtseq) * hz / |
1940 (save_ticks - tp->t_bw_rtttime); 1941 tp->t_bw_rtttime = save_ticks; 1942 tp->t_bw_rtseq = ack_seq; 1943 if (tp->t_bw_rtttime == 0 || (int)bw < 0) 1944 return; 1945 bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4; 1946 1947 tp->snd_bandwidth = bw; 1948 1949 /* 1950 * Calculate the semi-static bandwidth delay product, plus two maximal 1951 * segments. The additional slop puts us squarely in the sweet 1952 * spot and also handles the bandwidth run-up case and stabilization. 1953 * Without the slop we could be locking ourselves into a lower 1954 * bandwidth. 1955 * 1956 * Situations Handled: 1957 * (1) Prevents over-queueing of packets on LANs, especially on 1958 * high speed LANs, allowing larger TCP buffers to be | 1940 (save_ticks - tp->t_bw_rtttime); 1941 tp->t_bw_rtttime = save_ticks; 1942 tp->t_bw_rtseq = ack_seq; 1943 if (tp->t_bw_rtttime == 0 || (int)bw < 0) 1944 return; 1945 bw = ((int64_t)tp->snd_bandwidth * 15 + bw) >> 4; 1946 1947 tp->snd_bandwidth = bw; 1948 1949 /* 1950 * Calculate the semi-static bandwidth delay product, plus two maximal 1951 * segments. The additional slop puts us squarely in the sweet 1952 * spot and also handles the bandwidth run-up case and stabilization. 1953 * Without the slop we could be locking ourselves into a lower 1954 * bandwidth. 1955 * 1956 * Situations Handled: 1957 * (1) Prevents over-queueing of packets on LANs, especially on 1958 * high speed LANs, allowing larger TCP buffers to be |
1959 * specified, and also does a good job preventing | 1959 * specified, and also does a good job preventing |
1960 * over-queueing of packets over choke points like modems 1961 * (at least for the transmit side). 1962 * 1963 * (2) Is able to handle changing network loads (bandwidth 1964 * drops so bwnd drops, bandwidth increases so bwnd 1965 * increases). 1966 * 1967 * (3) Theoretically should stabilize in the face of multiple --- 155 unchanged lines hidden --- | 1960 * over-queueing of packets over choke points like modems 1961 * (at least for the transmit side). 1962 * 1963 * (2) Is able to handle changing network loads (bandwidth 1964 * drops so bwnd drops, bandwidth increases so bwnd 1965 * increases). 1966 * 1967 * (3) Theoretically should stabilize in the face of multiple --- 155 unchanged lines hidden --- |