1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
| 1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
|
30 * $FreeBSD: head/sys/netinet/tcp_timer.c 169309 2007-05-06 13:38:25Z andre $
| 30 * $FreeBSD: head/sys/netinet/tcp_timer.c 169454 2007-05-10 15:58:48Z rwatson $
|
31 */ 32 33#include "opt_inet6.h" 34#include "opt_tcpdebug.h" 35 36#include <sys/param.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/limits.h> 41#include <sys/mbuf.h> 42#include <sys/mutex.h> 43#include <sys/protosw.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <sys/sysctl.h> 47#include <sys/systm.h> 48 49#include <net/route.h> 50 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/in_systm.h> 54#ifdef INET6 55#include <netinet6/in6_pcb.h> 56#endif 57#include <netinet/ip_var.h> 58#include <netinet/tcp.h> 59#include <netinet/tcp_fsm.h> 60#include <netinet/tcp_timer.h> 61#include <netinet/tcp_var.h> 62#include <netinet/tcpip.h> 63#ifdef TCPDEBUG 64#include <netinet/tcp_debug.h> 65#endif 66 67int tcp_keepinit; 68SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 69 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 70 71int tcp_keepidle; 72SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 74 75int tcp_keepintvl; 76SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 78 79int tcp_delacktime; 80SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 82 "Time before a delayed ACK is sent"); 83 84int tcp_msl; 85SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 86 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 87 88int tcp_rexmit_min; 89SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 91 "Minimum Retransmission Timeout"); 92 93int tcp_rexmit_slop; 94SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 95 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 96 "Retransmission Timer Slop"); 97 98static int always_keepalive = 1; 99SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 100 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 101 102int tcp_fast_finwait2_recycle = 0; 103SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 104 &tcp_fast_finwait2_recycle, 0, 105 "Recycle closed FIN_WAIT_2 connections faster"); 106 107int tcp_finwait2_timeout; 108SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 109 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 110 111 112static int tcp_keepcnt = TCPTV_KEEPCNT; 113 /* max idle probes */ 114int tcp_maxpersistidle; 115 /* max idle time in persist */ 116int tcp_maxidle; 117 118static void tcp_timer(void *); 119static int tcp_timer_delack(struct tcpcb *, struct inpcb *); 120static int tcp_timer_2msl(struct tcpcb *, struct inpcb *); 121static int tcp_timer_keep(struct tcpcb *, struct inpcb *); 122static int tcp_timer_persist(struct tcpcb *, struct inpcb *); 123static int tcp_timer_rexmt(struct tcpcb *, struct inpcb *); 124 125/* 126 * Tcp protocol timeout routine called every 500 ms. 127 * Updates timestamps used for TCP 128 * causes finite state machine actions if timers expire. 129 */ 130void
| 31 */ 32 33#include "opt_inet6.h" 34#include "opt_tcpdebug.h" 35 36#include <sys/param.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/limits.h> 41#include <sys/mbuf.h> 42#include <sys/mutex.h> 43#include <sys/protosw.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <sys/sysctl.h> 47#include <sys/systm.h> 48 49#include <net/route.h> 50 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/in_systm.h> 54#ifdef INET6 55#include <netinet6/in6_pcb.h> 56#endif 57#include <netinet/ip_var.h> 58#include <netinet/tcp.h> 59#include <netinet/tcp_fsm.h> 60#include <netinet/tcp_timer.h> 61#include <netinet/tcp_var.h> 62#include <netinet/tcpip.h> 63#ifdef TCPDEBUG 64#include <netinet/tcp_debug.h> 65#endif 66 67int tcp_keepinit; 68SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 69 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 70 71int tcp_keepidle; 72SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 74 75int tcp_keepintvl; 76SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 78 79int tcp_delacktime; 80SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 82 "Time before a delayed ACK is sent"); 83 84int tcp_msl; 85SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 86 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 87 88int tcp_rexmit_min; 89SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 91 "Minimum Retransmission Timeout"); 92 93int tcp_rexmit_slop; 94SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 95 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 96 "Retransmission Timer Slop"); 97 98static int always_keepalive = 1; 99SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 100 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 101 102int tcp_fast_finwait2_recycle = 0; 103SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 104 &tcp_fast_finwait2_recycle, 0, 105 "Recycle closed FIN_WAIT_2 connections faster"); 106 107int tcp_finwait2_timeout; 108SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 109 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 110 111 112static int tcp_keepcnt = TCPTV_KEEPCNT; 113 /* max idle probes */ 114int tcp_maxpersistidle; 115 /* max idle time in persist */ 116int tcp_maxidle; 117 118static void tcp_timer(void *); 119static int tcp_timer_delack(struct tcpcb *, struct inpcb *); 120static int tcp_timer_2msl(struct tcpcb *, struct inpcb *); 121static int tcp_timer_keep(struct tcpcb *, struct inpcb *); 122static int tcp_timer_persist(struct tcpcb *, struct inpcb *); 123static int tcp_timer_rexmt(struct tcpcb *, struct inpcb *); 124 125/* 126 * Tcp protocol timeout routine called every 500 ms. 127 * Updates timestamps used for TCP 128 * causes finite state machine actions if timers expire. 129 */ 130void
|
131tcp_slowtimo()
| 131tcp_slowtimo(void)
|
132{ 133 134 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 135 INP_INFO_WLOCK(&tcbinfo); 136 (void) tcp_timer_2msl_tw(0); 137 INP_INFO_WUNLOCK(&tcbinfo); 138} 139 140int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 141 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 142 143int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 144 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 145 146static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 147 148static int tcp_timer_race; 149SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 150 0, "Count of t_inpcb races on tcp_discardcb"); 151
| 132{ 133 134 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 135 INP_INFO_WLOCK(&tcbinfo); 136 (void) tcp_timer_2msl_tw(0); 137 INP_INFO_WUNLOCK(&tcbinfo); 138} 139 140int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 141 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 142 143int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 144 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 145 146static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 147 148static int tcp_timer_race; 149SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 150 0, "Count of t_inpcb races on tcp_discardcb"); 151
|
152
| |
153void 154tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 155{ 156 struct inpcb *inp = tp->t_inpcb; 157 struct tcp_timer *tt = tp->t_timers; 158 int tick = ticks; /* Stable time base. */ 159 int next = delta ? tick + delta : 0; 160 161 INP_LOCK_ASSERT(inp); 162 163 CTR6(KTR_NET, "%p %s inp %p active %x delta %i nextc %i", 164 tp, __func__, inp, tt->tt_active, delta, tt->tt_nextc); 165 166 /* Set new value for timer. */ 167 switch(timer_type) { 168 case TT_DELACK: 169 CTR4(KTR_NET, "%p %s TT_DELACK old %i new %i", 170 tp, __func__, tt->tt_delack, next); 171 tt->tt_delack = next; 172 break; 173 case TT_REXMT: 174 CTR4(KTR_NET, "%p %s TT_REXMT old %i new %i", 175 tp, __func__, tt->tt_rexmt, next); 176 tt->tt_rexmt = next; 177 break; 178 case TT_PERSIST: 179 CTR4(KTR_NET, "%p %s TT_PERSIST old %i new %i", 180 tp, __func__, tt->tt_persist, next); 181 tt->tt_persist = next; 182 break; 183 case TT_KEEP: 184 CTR4(KTR_NET, "%p %s TT_KEEP old %i new %i", 185 tp, __func__, tt->tt_keep, next); 186 tt->tt_keep = next; 187 break; 188 case TT_2MSL: 189 CTR4(KTR_NET, "%p %s TT_2MSL old %i new %i", 190 tp, __func__, tt->tt_2msl, next); 191 tt->tt_2msl = next; 192 break; 193 case 0: /* Dummy for timer rescan. */ 194 CTR3(KTR_NET, "%p %s timer rescan new %i", tp, __func__, next); 195 break; 196 } 197 198 /* If some other timer is active and is schedules sooner just return. */ 199 if (tt->tt_active != timer_type && tt->tt_nextc < next && 200 callout_active(&tt->tt_timer)) 201 return; 202 203 /* Select next timer to schedule. */ 204 tt->tt_nextc = INT_MAX; 205 tt->tt_active = 0; 206 if (tt->tt_delack && tt->tt_delack < tt->tt_nextc) { 207 tt->tt_nextc = tt->tt_delack; 208 tt->tt_active = TT_DELACK; 209 } 210 if (tt->tt_rexmt && tt->tt_rexmt < tt->tt_nextc) { 211 tt->tt_nextc = tt->tt_rexmt; 212 tt->tt_active = TT_REXMT; 213 } 214 if (tt->tt_persist && tt->tt_persist < tt->tt_nextc) { 215 tt->tt_nextc = tt->tt_persist; 216 tt->tt_active = TT_PERSIST; 217 } 218 if (tt->tt_keep && tt->tt_keep < tt->tt_nextc) { 219 tt->tt_nextc = tt->tt_keep; 220 tt->tt_active = TT_KEEP; 221 } 222 if (tt->tt_2msl && tt->tt_2msl < tt->tt_nextc) { 223 tt->tt_nextc = tt->tt_2msl; 224 tt->tt_active = TT_2MSL; 225 } 226 227 /* Rearm callout with new timer if we found one. */ 228 if (tt->tt_active) { 229 CTR4(KTR_NET, "%p %s callout_reset active %x nextc in %i", 230 tp, __func__, tt->tt_active, tt->tt_nextc - tick); 231 callout_reset(&tt->tt_timer, 232 tt->tt_nextc - tick, tcp_timer, (void *)inp); 233 } else { 234 CTR2(KTR_NET, "%p %s callout_stop", tp, __func__); 235 callout_stop(&tt->tt_timer); 236 tt->tt_nextc = 0; 237 } 238 239 return; 240} 241 242int 243tcp_timer_active(struct tcpcb *tp, int timer_type) 244{ 245 246 switch (timer_type) { 247 case TT_DELACK: 248 CTR3(KTR_NET, "%p %s TT_DELACK %i", 249 tp, __func__, tp->t_timers->tt_delack); 250 return (tp->t_timers->tt_delack ? 1 : 0); 251 break; 252 case TT_REXMT: 253 CTR3(KTR_NET, "%p %s TT_REXMT %i", 254 tp, __func__, tp->t_timers->tt_rexmt); 255 return (tp->t_timers->tt_rexmt ? 1 : 0); 256 break; 257 case TT_PERSIST: 258 CTR3(KTR_NET, "%p %s TT_PERSIST %i", 259 tp, __func__, tp->t_timers->tt_persist); 260 return (tp->t_timers->tt_persist ? 1 : 0); 261 break; 262 case TT_KEEP: 263 CTR3(KTR_NET, "%p %s TT_KEEP %i", 264 tp, __func__, tp->t_timers->tt_keep); 265 return (tp->t_timers->tt_keep ? 1 : 0); 266 break; 267 case TT_2MSL: 268 CTR3(KTR_NET, "%p %s TT_2MSL %i", 269 tp, __func__, tp->t_timers->tt_2msl); 270 return (tp->t_timers->tt_2msl ? 1 : 0); 271 break; 272 } 273 return (0); 274} 275 276static void 277tcp_timer(void *xinp) 278{ 279 struct inpcb *inp = (struct inpcb *)xinp; 280 struct tcpcb *tp = intotcpcb(inp); 281 struct tcp_timer *tt; 282 int tick = ticks; 283 int down, timer; 284 285 /* INP lock was obtained by callout. */ 286 INP_LOCK_ASSERT(inp); 287 288 /* 289 * We've got a couple of race conditions here: 290 * - The tcpcb was converted into a compressed TW pcb. All our 291 * timers have been stopped while this callout already tried 292 * to obtain the inpcb lock. TW pcbs have their own timers 293 * and we just return. 294 */ 295 if (inp->inp_vflag & INP_TIMEWAIT) 296 return; 297 /* 298 * - The tcpcb was discarded. All our timers have been stopped 299 * while this callout already tried to obtain the inpcb lock 300 * and we just return. 301 */ 302 if (tp == NULL) 303 return; 304 305 tt = tp->t_timers; /* Initialize. */ 306 CTR6(KTR_NET, "%p %s inp %p active %x tick %i nextc %i", 307 tp, __func__, inp, tt->tt_active, tick, tt->tt_nextc); 308 309 /* 310 * - We may have been waiting on the lock while the tcpcb has 311 * been scheduled for destruction. In this case no active 312 * timers remain and we just return. 313 */ 314 if (tt->tt_active == 0) 315 goto done; 316 317 /* 318 * - The timer was rescheduled while this callout was already 319 * waiting on the lock. This may happen when a packet just 320 * came in. Rescan and reschedule the the timer in case we 321 * just turned it off. 322 */ 323 if (tick < tt->tt_nextc) 324 goto rescan; 325 326 /* 327 * Mark as done. The active bit in struct callout is not 328 * automatically cleared. See callout(9) for more info. 329 * In tcp_discardcb() we depend on the correctly cleared 330 * active bit for faster processing. 331 */ 332 callout_deactivate(&tt->tt_timer); 333 334 /* Check which timer has fired and remove this timer activation. */ 335 timer = tt->tt_active; 336 tt->tt_active = 0; 337 tt->tt_nextc = 0; 338 339 switch (timer) { 340 case TT_DELACK: 341 CTR2(KTR_NET, "%p %s running TT_DELACK", tp, __func__); 342 tt->tt_delack = 0; 343 down = tcp_timer_delack(tp, inp); /* down == 0 */ 344 break; 345 case TT_REXMT: 346 CTR2(KTR_NET, "%p %s running TT_REXMT", tp, __func__); 347 tt->tt_rexmt = 0; 348 down = tcp_timer_rexmt(tp, inp); 349 break; 350 case TT_PERSIST: 351 CTR2(KTR_NET, "%p %s running TT_PERSIST", tp, __func__); 352 tt->tt_persist = 0; 353 down = tcp_timer_persist(tp, inp); 354 break; 355 case TT_KEEP: 356 CTR2(KTR_NET, "%p %s running TT_KEEP", tp, __func__); 357 tt->tt_keep = 0; 358 down = tcp_timer_keep(tp, inp); 359 break; 360 case TT_2MSL: 361 CTR2(KTR_NET, "%p %s running TT_2MSL", tp, __func__); 362 tt->tt_2msl = 0; 363 down = tcp_timer_2msl(tp, inp); 364 break; 365 default: 366 CTR2(KTR_NET, "%p %s running nothing", tp, __func__); 367 down = 0; 368 } 369 370 CTR4(KTR_NET, "%p %s down %i active %x", 371 tp, __func__, down, tt->tt_active); 372 /* Do we still exist? */ 373 if (down) 374 goto shutdown; 375 376rescan: 377 /* Rescan if no timer was reactivated above. */ 378 if (tt->tt_active == 0) 379 tcp_timer_activate(tp, 0, 0); 380 381done: 382 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 383 return; 384 385shutdown: 386 INP_UNLOCK(inp); /* Prevent LOR at expense of race. */ 387 INP_INFO_WLOCK(&tcbinfo); 388 INP_LOCK(inp); 389 390 /* When tp is gone we've lost the race. */ 391 if (inp->inp_ppcb == NULL) { 392 CTR3(KTR_NET, "%p %s inp %p lost shutdown race", 393 tp, __func__, inp); 394 tcp_timer_race++; 395 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 396 INP_INFO_WUNLOCK(&tcbinfo); 397 return; 398 } 399 KASSERT(tp == inp->inp_ppcb, ("%s: tp changed", __func__)); 400 401 /* Shutdown the connection. */ 402 switch (down) { 403 case 1: 404 tp = tcp_close(tp); 405 break; 406 case 2: 407 tp = tcp_drop(tp, 408 tp->t_softerror ? tp->t_softerror : ETIMEDOUT); 409 break; 410 } 411 CTR3(KTR_NET, "%p %s inp %p after shutdown", tp, __func__, inp); 412 413 if (tp) 414 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 415 416 INP_INFO_WUNLOCK(&tcbinfo); 417 return; 418} 419
| 152void 153tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 154{ 155 struct inpcb *inp = tp->t_inpcb; 156 struct tcp_timer *tt = tp->t_timers; 157 int tick = ticks; /* Stable time base. */ 158 int next = delta ? tick + delta : 0; 159 160 INP_LOCK_ASSERT(inp); 161 162 CTR6(KTR_NET, "%p %s inp %p active %x delta %i nextc %i", 163 tp, __func__, inp, tt->tt_active, delta, tt->tt_nextc); 164 165 /* Set new value for timer. */ 166 switch(timer_type) { 167 case TT_DELACK: 168 CTR4(KTR_NET, "%p %s TT_DELACK old %i new %i", 169 tp, __func__, tt->tt_delack, next); 170 tt->tt_delack = next; 171 break; 172 case TT_REXMT: 173 CTR4(KTR_NET, "%p %s TT_REXMT old %i new %i", 174 tp, __func__, tt->tt_rexmt, next); 175 tt->tt_rexmt = next; 176 break; 177 case TT_PERSIST: 178 CTR4(KTR_NET, "%p %s TT_PERSIST old %i new %i", 179 tp, __func__, tt->tt_persist, next); 180 tt->tt_persist = next; 181 break; 182 case TT_KEEP: 183 CTR4(KTR_NET, "%p %s TT_KEEP old %i new %i", 184 tp, __func__, tt->tt_keep, next); 185 tt->tt_keep = next; 186 break; 187 case TT_2MSL: 188 CTR4(KTR_NET, "%p %s TT_2MSL old %i new %i", 189 tp, __func__, tt->tt_2msl, next); 190 tt->tt_2msl = next; 191 break; 192 case 0: /* Dummy for timer rescan. */ 193 CTR3(KTR_NET, "%p %s timer rescan new %i", tp, __func__, next); 194 break; 195 } 196 197 /* If some other timer is active and is schedules sooner just return. */ 198 if (tt->tt_active != timer_type && tt->tt_nextc < next && 199 callout_active(&tt->tt_timer)) 200 return; 201 202 /* Select next timer to schedule. */ 203 tt->tt_nextc = INT_MAX; 204 tt->tt_active = 0; 205 if (tt->tt_delack && tt->tt_delack < tt->tt_nextc) { 206 tt->tt_nextc = tt->tt_delack; 207 tt->tt_active = TT_DELACK; 208 } 209 if (tt->tt_rexmt && tt->tt_rexmt < tt->tt_nextc) { 210 tt->tt_nextc = tt->tt_rexmt; 211 tt->tt_active = TT_REXMT; 212 } 213 if (tt->tt_persist && tt->tt_persist < tt->tt_nextc) { 214 tt->tt_nextc = tt->tt_persist; 215 tt->tt_active = TT_PERSIST; 216 } 217 if (tt->tt_keep && tt->tt_keep < tt->tt_nextc) { 218 tt->tt_nextc = tt->tt_keep; 219 tt->tt_active = TT_KEEP; 220 } 221 if (tt->tt_2msl && tt->tt_2msl < tt->tt_nextc) { 222 tt->tt_nextc = tt->tt_2msl; 223 tt->tt_active = TT_2MSL; 224 } 225 226 /* Rearm callout with new timer if we found one. */ 227 if (tt->tt_active) { 228 CTR4(KTR_NET, "%p %s callout_reset active %x nextc in %i", 229 tp, __func__, tt->tt_active, tt->tt_nextc - tick); 230 callout_reset(&tt->tt_timer, 231 tt->tt_nextc - tick, tcp_timer, (void *)inp); 232 } else { 233 CTR2(KTR_NET, "%p %s callout_stop", tp, __func__); 234 callout_stop(&tt->tt_timer); 235 tt->tt_nextc = 0; 236 } 237 238 return; 239} 240 241int 242tcp_timer_active(struct tcpcb *tp, int timer_type) 243{ 244 245 switch (timer_type) { 246 case TT_DELACK: 247 CTR3(KTR_NET, "%p %s TT_DELACK %i", 248 tp, __func__, tp->t_timers->tt_delack); 249 return (tp->t_timers->tt_delack ? 1 : 0); 250 break; 251 case TT_REXMT: 252 CTR3(KTR_NET, "%p %s TT_REXMT %i", 253 tp, __func__, tp->t_timers->tt_rexmt); 254 return (tp->t_timers->tt_rexmt ? 1 : 0); 255 break; 256 case TT_PERSIST: 257 CTR3(KTR_NET, "%p %s TT_PERSIST %i", 258 tp, __func__, tp->t_timers->tt_persist); 259 return (tp->t_timers->tt_persist ? 1 : 0); 260 break; 261 case TT_KEEP: 262 CTR3(KTR_NET, "%p %s TT_KEEP %i", 263 tp, __func__, tp->t_timers->tt_keep); 264 return (tp->t_timers->tt_keep ? 1 : 0); 265 break; 266 case TT_2MSL: 267 CTR3(KTR_NET, "%p %s TT_2MSL %i", 268 tp, __func__, tp->t_timers->tt_2msl); 269 return (tp->t_timers->tt_2msl ? 1 : 0); 270 break; 271 } 272 return (0); 273} 274 275static void 276tcp_timer(void *xinp) 277{ 278 struct inpcb *inp = (struct inpcb *)xinp; 279 struct tcpcb *tp = intotcpcb(inp); 280 struct tcp_timer *tt; 281 int tick = ticks; 282 int down, timer; 283 284 /* INP lock was obtained by callout. */ 285 INP_LOCK_ASSERT(inp); 286 287 /* 288 * We've got a couple of race conditions here: 289 * - The tcpcb was converted into a compressed TW pcb. All our 290 * timers have been stopped while this callout already tried 291 * to obtain the inpcb lock. TW pcbs have their own timers 292 * and we just return. 293 */ 294 if (inp->inp_vflag & INP_TIMEWAIT) 295 return; 296 /* 297 * - The tcpcb was discarded. All our timers have been stopped 298 * while this callout already tried to obtain the inpcb lock 299 * and we just return. 300 */ 301 if (tp == NULL) 302 return; 303 304 tt = tp->t_timers; /* Initialize. */ 305 CTR6(KTR_NET, "%p %s inp %p active %x tick %i nextc %i", 306 tp, __func__, inp, tt->tt_active, tick, tt->tt_nextc); 307 308 /* 309 * - We may have been waiting on the lock while the tcpcb has 310 * been scheduled for destruction. In this case no active 311 * timers remain and we just return. 312 */ 313 if (tt->tt_active == 0) 314 goto done; 315 316 /* 317 * - The timer was rescheduled while this callout was already 318 * waiting on the lock. This may happen when a packet just 319 * came in. Rescan and reschedule the the timer in case we 320 * just turned it off. 321 */ 322 if (tick < tt->tt_nextc) 323 goto rescan; 324 325 /* 326 * Mark as done. The active bit in struct callout is not 327 * automatically cleared. See callout(9) for more info. 328 * In tcp_discardcb() we depend on the correctly cleared 329 * active bit for faster processing. 330 */ 331 callout_deactivate(&tt->tt_timer); 332 333 /* Check which timer has fired and remove this timer activation. */ 334 timer = tt->tt_active; 335 tt->tt_active = 0; 336 tt->tt_nextc = 0; 337 338 switch (timer) { 339 case TT_DELACK: 340 CTR2(KTR_NET, "%p %s running TT_DELACK", tp, __func__); 341 tt->tt_delack = 0; 342 down = tcp_timer_delack(tp, inp); /* down == 0 */ 343 break; 344 case TT_REXMT: 345 CTR2(KTR_NET, "%p %s running TT_REXMT", tp, __func__); 346 tt->tt_rexmt = 0; 347 down = tcp_timer_rexmt(tp, inp); 348 break; 349 case TT_PERSIST: 350 CTR2(KTR_NET, "%p %s running TT_PERSIST", tp, __func__); 351 tt->tt_persist = 0; 352 down = tcp_timer_persist(tp, inp); 353 break; 354 case TT_KEEP: 355 CTR2(KTR_NET, "%p %s running TT_KEEP", tp, __func__); 356 tt->tt_keep = 0; 357 down = tcp_timer_keep(tp, inp); 358 break; 359 case TT_2MSL: 360 CTR2(KTR_NET, "%p %s running TT_2MSL", tp, __func__); 361 tt->tt_2msl = 0; 362 down = tcp_timer_2msl(tp, inp); 363 break; 364 default: 365 CTR2(KTR_NET, "%p %s running nothing", tp, __func__); 366 down = 0; 367 } 368 369 CTR4(KTR_NET, "%p %s down %i active %x", 370 tp, __func__, down, tt->tt_active); 371 /* Do we still exist? */ 372 if (down) 373 goto shutdown; 374 375rescan: 376 /* Rescan if no timer was reactivated above. */ 377 if (tt->tt_active == 0) 378 tcp_timer_activate(tp, 0, 0); 379 380done: 381 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 382 return; 383 384shutdown: 385 INP_UNLOCK(inp); /* Prevent LOR at expense of race. */ 386 INP_INFO_WLOCK(&tcbinfo); 387 INP_LOCK(inp); 388 389 /* When tp is gone we've lost the race. */ 390 if (inp->inp_ppcb == NULL) { 391 CTR3(KTR_NET, "%p %s inp %p lost shutdown race", 392 tp, __func__, inp); 393 tcp_timer_race++; 394 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 395 INP_INFO_WUNLOCK(&tcbinfo); 396 return; 397 } 398 KASSERT(tp == inp->inp_ppcb, ("%s: tp changed", __func__)); 399 400 /* Shutdown the connection. */ 401 switch (down) { 402 case 1: 403 tp = tcp_close(tp); 404 break; 405 case 2: 406 tp = tcp_drop(tp, 407 tp->t_softerror ? tp->t_softerror : ETIMEDOUT); 408 break; 409 } 410 CTR3(KTR_NET, "%p %s inp %p after shutdown", tp, __func__, inp); 411 412 if (tp) 413 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 414 415 INP_INFO_WUNLOCK(&tcbinfo); 416 return; 417} 418
|
420
| |
421/* 422 * TCP timer processing. 423 */ 424static int 425tcp_timer_delack(struct tcpcb *tp, struct inpcb *inp) 426{ 427 428 tp->t_flags |= TF_ACKNOW; 429 tcpstat.tcps_delack++; 430 (void) tcp_output(tp); 431 return (0); 432} 433 434static int 435tcp_timer_2msl(struct tcpcb *tp, struct inpcb *inp) 436{ 437#ifdef TCPDEBUG 438 int ostate; 439 440 ostate = tp->t_state; 441#endif 442 /* 443 * 2 MSL timeout in shutdown went off. If we're closed but 444 * still waiting for peer to close and connection has been idle 445 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 446 * control block. Otherwise, check again in a bit. 447 * 448 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 449 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 450 * Ignore fact that there were recent incoming segments. 451 */ 452 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 453 tp->t_inpcb->inp_socket && 454 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 455 tcpstat.tcps_finwait2_drops++; 456 return (1); /* tcp_close */ 457 } else { 458 if (tp->t_state != TCPS_TIME_WAIT && 459 (ticks - tp->t_rcvtime) <= tcp_maxidle) 460 tcp_timer_activate(tp, TT_2MSL, tcp_keepintvl); 461 else 462 return (1); /* tcp_close */ 463 } 464 465#ifdef TCPDEBUG 466 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 467 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 468 PRU_SLOWTIMO); 469#endif 470 return (0); 471} 472 473/* 474 * The timed wait queue contains references to each of the TCP sessions 475 * currently in the TIME_WAIT state. The queue pointers, including the 476 * queue pointers in each tcptw structure, are protected using the global 477 * tcbinfo lock, which must be held over queue iteration and modification. 478 */ 479static TAILQ_HEAD(, tcptw) twq_2msl; 480 481void 482tcp_timer_init(void) 483{ 484 485 TAILQ_INIT(&twq_2msl); 486} 487 488void 489tcp_timer_2msl_reset(struct tcptw *tw, int rearm) 490{ 491 492 INP_INFO_WLOCK_ASSERT(&tcbinfo); 493 INP_LOCK_ASSERT(tw->tw_inpcb); 494 if (rearm) 495 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 496 tw->tw_time = ticks + 2 * tcp_msl; 497 TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); 498} 499 500void 501tcp_timer_2msl_stop(struct tcptw *tw) 502{ 503 504 INP_INFO_WLOCK_ASSERT(&tcbinfo); 505 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 506} 507 508struct tcptw * 509tcp_timer_2msl_tw(int reuse) 510{ 511 struct tcptw *tw; 512 513 INP_INFO_WLOCK_ASSERT(&tcbinfo); 514 for (;;) { 515 tw = TAILQ_FIRST(&twq_2msl); 516 if (tw == NULL || (!reuse && tw->tw_time > ticks)) 517 break; 518 INP_LOCK(tw->tw_inpcb); 519 tcp_twclose(tw, reuse); 520 if (reuse) 521 return (tw); 522 } 523 return (NULL); 524} 525 526static int 527tcp_timer_keep(struct tcpcb *tp, struct inpcb *inp) 528{ 529 struct tcptemp *t_template; 530#ifdef TCPDEBUG 531 int ostate; 532 533 ostate = tp->t_state; 534#endif 535 /* 536 * Keep-alive timer went off; send something 537 * or drop connection if idle for too long. 538 */ 539 tcpstat.tcps_keeptimeo++; 540 if (tp->t_state < TCPS_ESTABLISHED) 541 goto dropit; 542 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 543 tp->t_state <= TCPS_CLOSING) { 544 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 545 goto dropit; 546 /* 547 * Send a packet designed to force a response 548 * if the peer is up and reachable: 549 * either an ACK if the connection is still alive, 550 * or an RST if the peer has closed the connection 551 * due to timeout or reboot. 552 * Using sequence number tp->snd_una-1 553 * causes the transmitted zero-length segment 554 * to lie outside the receive window; 555 * by the protocol spec, this requires the 556 * correspondent TCP to respond. 557 */ 558 tcpstat.tcps_keepprobe++; 559 t_template = tcpip_maketemplate(inp); 560 if (t_template) { 561 tcp_respond(tp, t_template->tt_ipgen, 562 &t_template->tt_t, (struct mbuf *)NULL, 563 tp->rcv_nxt, tp->snd_una - 1, 0); 564 (void) m_free(dtom(t_template)); 565 } 566 tcp_timer_activate(tp, TT_KEEP, tcp_keepintvl); 567 } else 568 tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); 569 570#ifdef TCPDEBUG 571 if (inp->inp_socket->so_options & SO_DEBUG) 572 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 573 PRU_SLOWTIMO); 574#endif 575 return (0); 576 577dropit: 578 tcpstat.tcps_keepdrops++; 579 return (2); /* tcp_drop() */ 580} 581 582static int 583tcp_timer_persist(struct tcpcb *tp, struct inpcb *inp) 584{ 585#ifdef TCPDEBUG 586 int ostate; 587 588 ostate = tp->t_state; 589#endif 590 /* 591 * Persistance timer into zero window. 592 * Force a byte to be output, if possible. 593 */ 594 tcpstat.tcps_persisttimeo++; 595 /* 596 * Hack: if the peer is dead/unreachable, we do not 597 * time out if the window is closed. After a full 598 * backoff, drop the connection if the idle time 599 * (no responses to probes) reaches the maximum 600 * backoff that we would use if retransmitting. 601 */ 602 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 603 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 604 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 605 tcpstat.tcps_persistdrop++; 606 return (2); /* tcp_drop() */ 607 } 608 tcp_setpersist(tp); 609 tp->t_flags |= TF_FORCEDATA; 610 (void) tcp_output(tp); 611 tp->t_flags &= ~TF_FORCEDATA; 612 613#ifdef TCPDEBUG 614 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 615 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 616#endif 617 return (0); 618} 619 620static int 621tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) 622{ 623 int rexmt; 624#ifdef TCPDEBUG 625 int ostate; 626 627 ostate = tp->t_state; 628#endif 629 tcp_free_sackholes(tp); 630 /* 631 * Retransmission timer went off. Message has not 632 * been acked within retransmit interval. Back off 633 * to a longer retransmit interval and retransmit one segment. 634 */ 635 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 636 tp->t_rxtshift = TCP_MAXRXTSHIFT; 637 tcpstat.tcps_timeoutdrop++; 638 return (2); /* tcp_drop() */ 639 } 640 if (tp->t_rxtshift == 1) { 641 /* 642 * first retransmit; record ssthresh and cwnd so they can 643 * be recovered if this turns out to be a "bad" retransmit. 644 * A retransmit is considered "bad" if an ACK for this 645 * segment is received within RTT/2 interval; the assumption 646 * here is that the ACK was already in flight. See 647 * "On Estimating End-to-End Network Path Properties" by 648 * Allman and Paxson for more details. 649 */ 650 tp->snd_cwnd_prev = tp->snd_cwnd; 651 tp->snd_ssthresh_prev = tp->snd_ssthresh; 652 tp->snd_recover_prev = tp->snd_recover; 653 if (IN_FASTRECOVERY(tp)) 654 tp->t_flags |= TF_WASFRECOVERY; 655 else 656 tp->t_flags &= ~TF_WASFRECOVERY; 657 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 658 } 659 tcpstat.tcps_rexmttimeo++; 660 if (tp->t_state == TCPS_SYN_SENT) 661 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 662 else 663 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 664 TCPT_RANGESET(tp->t_rxtcur, rexmt, 665 tp->t_rttmin, TCPTV_REXMTMAX); 666 /* 667 * Disable rfc1323 if we havn't got any response to 668 * our third SYN to work-around some broken terminal servers 669 * (most of which have hopefully been retired) that have bad VJ 670 * header compression code which trashes TCP segments containing 671 * unknown-to-them TCP options. 672 */ 673 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 674 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 675 /* 676 * If we backed off this far, our srtt estimate is probably bogus. 677 * Clobber it so we'll take the next rtt measurement as our srtt; 678 * move the current srtt into rttvar to keep the current 679 * retransmit times until then. 680 */ 681 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 682#ifdef INET6 683 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 684 in6_losing(tp->t_inpcb); 685 else 686#endif 687 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 688 tp->t_srtt = 0; 689 } 690 tp->snd_nxt = tp->snd_una; 691 tp->snd_recover = tp->snd_max; 692 /* 693 * Force a segment to be sent. 694 */ 695 tp->t_flags |= TF_ACKNOW; 696 /* 697 * If timing a segment in this window, stop the timer. 698 */ 699 tp->t_rtttime = 0; 700 /* 701 * Close the congestion window down to one segment 702 * (we'll open it by one segment for each ack we get). 703 * Since we probably have a window's worth of unacked 704 * data accumulated, this "slow start" keeps us from 705 * dumping all that data as back-to-back packets (which 706 * might overwhelm an intermediate gateway). 707 * 708 * There are two phases to the opening: Initially we 709 * open by one mss on each ack. This makes the window 710 * size increase exponentially with time. If the 711 * window is larger than the path can handle, this 712 * exponential growth results in dropped packet(s) 713 * almost immediately. To get more time between 714 * drops but still "push" the network to take advantage 715 * of improving conditions, we switch from exponential 716 * to linear window opening at some threshhold size. 717 * For a threshhold, we use half the current window 718 * size, truncated to a multiple of the mss. 719 * 720 * (the minimum cwnd that will give us exponential 721 * growth is 2 mss. We don't allow the threshhold 722 * to go below this.) 723 */ 724 { 725 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 726 if (win < 2) 727 win = 2; 728 tp->snd_cwnd = tp->t_maxseg; 729 tp->snd_ssthresh = win * tp->t_maxseg; 730 tp->t_dupacks = 0; 731 } 732 EXIT_FASTRECOVERY(tp); 733 (void) tcp_output(tp); 734 735#ifdef TCPDEBUG 736 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 737 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 738 PRU_SLOWTIMO); 739#endif 740 return (0); 741}
| 419/* 420 * TCP timer processing. 421 */ 422static int 423tcp_timer_delack(struct tcpcb *tp, struct inpcb *inp) 424{ 425 426 tp->t_flags |= TF_ACKNOW; 427 tcpstat.tcps_delack++; 428 (void) tcp_output(tp); 429 return (0); 430} 431 432static int 433tcp_timer_2msl(struct tcpcb *tp, struct inpcb *inp) 434{ 435#ifdef TCPDEBUG 436 int ostate; 437 438 ostate = tp->t_state; 439#endif 440 /* 441 * 2 MSL timeout in shutdown went off. If we're closed but 442 * still waiting for peer to close and connection has been idle 443 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 444 * control block. Otherwise, check again in a bit. 445 * 446 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 447 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 448 * Ignore fact that there were recent incoming segments. 449 */ 450 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 451 tp->t_inpcb->inp_socket && 452 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 453 tcpstat.tcps_finwait2_drops++; 454 return (1); /* tcp_close */ 455 } else { 456 if (tp->t_state != TCPS_TIME_WAIT && 457 (ticks - tp->t_rcvtime) <= tcp_maxidle) 458 tcp_timer_activate(tp, TT_2MSL, tcp_keepintvl); 459 else 460 return (1); /* tcp_close */ 461 } 462 463#ifdef TCPDEBUG 464 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 465 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 466 PRU_SLOWTIMO); 467#endif 468 return (0); 469} 470 471/* 472 * The timed wait queue contains references to each of the TCP sessions 473 * currently in the TIME_WAIT state. The queue pointers, including the 474 * queue pointers in each tcptw structure, are protected using the global 475 * tcbinfo lock, which must be held over queue iteration and modification. 476 */ 477static TAILQ_HEAD(, tcptw) twq_2msl; 478 479void 480tcp_timer_init(void) 481{ 482 483 TAILQ_INIT(&twq_2msl); 484} 485 486void 487tcp_timer_2msl_reset(struct tcptw *tw, int rearm) 488{ 489 490 INP_INFO_WLOCK_ASSERT(&tcbinfo); 491 INP_LOCK_ASSERT(tw->tw_inpcb); 492 if (rearm) 493 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 494 tw->tw_time = ticks + 2 * tcp_msl; 495 TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); 496} 497 498void 499tcp_timer_2msl_stop(struct tcptw *tw) 500{ 501 502 INP_INFO_WLOCK_ASSERT(&tcbinfo); 503 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 504} 505 506struct tcptw * 507tcp_timer_2msl_tw(int reuse) 508{ 509 struct tcptw *tw; 510 511 INP_INFO_WLOCK_ASSERT(&tcbinfo); 512 for (;;) { 513 tw = TAILQ_FIRST(&twq_2msl); 514 if (tw == NULL || (!reuse && tw->tw_time > ticks)) 515 break; 516 INP_LOCK(tw->tw_inpcb); 517 tcp_twclose(tw, reuse); 518 if (reuse) 519 return (tw); 520 } 521 return (NULL); 522} 523 524static int 525tcp_timer_keep(struct tcpcb *tp, struct inpcb *inp) 526{ 527 struct tcptemp *t_template; 528#ifdef TCPDEBUG 529 int ostate; 530 531 ostate = tp->t_state; 532#endif 533 /* 534 * Keep-alive timer went off; send something 535 * or drop connection if idle for too long. 536 */ 537 tcpstat.tcps_keeptimeo++; 538 if (tp->t_state < TCPS_ESTABLISHED) 539 goto dropit; 540 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 541 tp->t_state <= TCPS_CLOSING) { 542 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 543 goto dropit; 544 /* 545 * Send a packet designed to force a response 546 * if the peer is up and reachable: 547 * either an ACK if the connection is still alive, 548 * or an RST if the peer has closed the connection 549 * due to timeout or reboot. 550 * Using sequence number tp->snd_una-1 551 * causes the transmitted zero-length segment 552 * to lie outside the receive window; 553 * by the protocol spec, this requires the 554 * correspondent TCP to respond. 555 */ 556 tcpstat.tcps_keepprobe++; 557 t_template = tcpip_maketemplate(inp); 558 if (t_template) { 559 tcp_respond(tp, t_template->tt_ipgen, 560 &t_template->tt_t, (struct mbuf *)NULL, 561 tp->rcv_nxt, tp->snd_una - 1, 0); 562 (void) m_free(dtom(t_template)); 563 } 564 tcp_timer_activate(tp, TT_KEEP, tcp_keepintvl); 565 } else 566 tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); 567 568#ifdef TCPDEBUG 569 if (inp->inp_socket->so_options & SO_DEBUG) 570 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 571 PRU_SLOWTIMO); 572#endif 573 return (0); 574 575dropit: 576 tcpstat.tcps_keepdrops++; 577 return (2); /* tcp_drop() */ 578} 579 580static int 581tcp_timer_persist(struct tcpcb *tp, struct inpcb *inp) 582{ 583#ifdef TCPDEBUG 584 int ostate; 585 586 ostate = tp->t_state; 587#endif 588 /* 589 * Persistance timer into zero window. 590 * Force a byte to be output, if possible. 591 */ 592 tcpstat.tcps_persisttimeo++; 593 /* 594 * Hack: if the peer is dead/unreachable, we do not 595 * time out if the window is closed. After a full 596 * backoff, drop the connection if the idle time 597 * (no responses to probes) reaches the maximum 598 * backoff that we would use if retransmitting. 599 */ 600 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 601 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 602 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 603 tcpstat.tcps_persistdrop++; 604 return (2); /* tcp_drop() */ 605 } 606 tcp_setpersist(tp); 607 tp->t_flags |= TF_FORCEDATA; 608 (void) tcp_output(tp); 609 tp->t_flags &= ~TF_FORCEDATA; 610 611#ifdef TCPDEBUG 612 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 613 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 614#endif 615 return (0); 616} 617 618static int 619tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) 620{ 621 int rexmt; 622#ifdef TCPDEBUG 623 int ostate; 624 625 ostate = tp->t_state; 626#endif 627 tcp_free_sackholes(tp); 628 /* 629 * Retransmission timer went off. Message has not 630 * been acked within retransmit interval. Back off 631 * to a longer retransmit interval and retransmit one segment. 632 */ 633 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 634 tp->t_rxtshift = TCP_MAXRXTSHIFT; 635 tcpstat.tcps_timeoutdrop++; 636 return (2); /* tcp_drop() */ 637 } 638 if (tp->t_rxtshift == 1) { 639 /* 640 * first retransmit; record ssthresh and cwnd so they can 641 * be recovered if this turns out to be a "bad" retransmit. 642 * A retransmit is considered "bad" if an ACK for this 643 * segment is received within RTT/2 interval; the assumption 644 * here is that the ACK was already in flight. See 645 * "On Estimating End-to-End Network Path Properties" by 646 * Allman and Paxson for more details. 647 */ 648 tp->snd_cwnd_prev = tp->snd_cwnd; 649 tp->snd_ssthresh_prev = tp->snd_ssthresh; 650 tp->snd_recover_prev = tp->snd_recover; 651 if (IN_FASTRECOVERY(tp)) 652 tp->t_flags |= TF_WASFRECOVERY; 653 else 654 tp->t_flags &= ~TF_WASFRECOVERY; 655 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 656 } 657 tcpstat.tcps_rexmttimeo++; 658 if (tp->t_state == TCPS_SYN_SENT) 659 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 660 else 661 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 662 TCPT_RANGESET(tp->t_rxtcur, rexmt, 663 tp->t_rttmin, TCPTV_REXMTMAX); 664 /* 665 * Disable rfc1323 if we havn't got any response to 666 * our third SYN to work-around some broken terminal servers 667 * (most of which have hopefully been retired) that have bad VJ 668 * header compression code which trashes TCP segments containing 669 * unknown-to-them TCP options. 670 */ 671 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 672 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 673 /* 674 * If we backed off this far, our srtt estimate is probably bogus. 675 * Clobber it so we'll take the next rtt measurement as our srtt; 676 * move the current srtt into rttvar to keep the current 677 * retransmit times until then. 678 */ 679 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 680#ifdef INET6 681 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 682 in6_losing(tp->t_inpcb); 683 else 684#endif 685 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 686 tp->t_srtt = 0; 687 } 688 tp->snd_nxt = tp->snd_una; 689 tp->snd_recover = tp->snd_max; 690 /* 691 * Force a segment to be sent. 692 */ 693 tp->t_flags |= TF_ACKNOW; 694 /* 695 * If timing a segment in this window, stop the timer. 696 */ 697 tp->t_rtttime = 0; 698 /* 699 * Close the congestion window down to one segment 700 * (we'll open it by one segment for each ack we get). 701 * Since we probably have a window's worth of unacked 702 * data accumulated, this "slow start" keeps us from 703 * dumping all that data as back-to-back packets (which 704 * might overwhelm an intermediate gateway). 705 * 706 * There are two phases to the opening: Initially we 707 * open by one mss on each ack. This makes the window 708 * size increase exponentially with time. If the 709 * window is larger than the path can handle, this 710 * exponential growth results in dropped packet(s) 711 * almost immediately. To get more time between 712 * drops but still "push" the network to take advantage 713 * of improving conditions, we switch from exponential 714 * to linear window opening at some threshhold size. 715 * For a threshhold, we use half the current window 716 * size, truncated to a multiple of the mss. 717 * 718 * (the minimum cwnd that will give us exponential 719 * growth is 2 mss. We don't allow the threshhold 720 * to go below this.) 721 */ 722 { 723 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 724 if (win < 2) 725 win = 2; 726 tp->snd_cwnd = tp->t_maxseg; 727 tp->snd_ssthresh = win * tp->t_maxseg; 728 tp->t_dupacks = 0; 729 } 730 EXIT_FASTRECOVERY(tp); 731 (void) tcp_output(tp); 732 733#ifdef TCPDEBUG 734 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 735 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 736 PRU_SLOWTIMO); 737#endif 738 return (0); 739}
|