tcp_timer.c revision 169608
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 * $FreeBSD: head/sys/netinet/tcp_timer.c 169608 2007-05-16 17:14:25Z andre $ 31 */ 32 33#include "opt_inet6.h" 34#include "opt_tcpdebug.h" 35 36#include <sys/param.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/limits.h> 41#include <sys/mbuf.h> 42#include <sys/mutex.h> 43#include <sys/protosw.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <sys/sysctl.h> 47#include <sys/systm.h> 48 49#include <net/route.h> 50 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/in_systm.h> 54#ifdef INET6 55#include <netinet6/in6_pcb.h> 56#endif 57#include <netinet/ip_var.h> 58#include <netinet/tcp.h> 59#include <netinet/tcp_fsm.h> 60#include <netinet/tcp_timer.h> 61#include <netinet/tcp_var.h> 62#include <netinet/tcpip.h> 63#ifdef TCPDEBUG 64#include <netinet/tcp_debug.h> 65#endif 66 67int tcp_keepinit; 68SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 69 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 70 71int tcp_keepidle; 72SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 74 75int tcp_keepintvl; 76SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 78 79int tcp_delacktime; 80SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 82 "Time before a delayed ACK is sent"); 83 84int tcp_msl; 85SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 86 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 87 88int tcp_rexmit_min; 89SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 91 "Minimum Retransmission Timeout"); 92 93int tcp_rexmit_slop; 94SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 95 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 96 "Retransmission Timer Slop"); 97 98static int always_keepalive = 1; 99SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 100 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 101 102int tcp_fast_finwait2_recycle = 0; 103SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 104 &tcp_fast_finwait2_recycle, 0, 105 "Recycle closed FIN_WAIT_2 connections faster"); 106 107int tcp_finwait2_timeout; 108SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 109 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 110 111 112static int tcp_keepcnt = TCPTV_KEEPCNT; 113 /* max idle probes */ 114int tcp_maxpersistidle; 115 /* max idle time in persist */ 116int tcp_maxidle; 117 118static void tcp_timer(void *); 119static int tcp_timer_delack(struct tcpcb *, struct inpcb *); 120static int tcp_timer_2msl(struct tcpcb *, struct inpcb *); 121static int tcp_timer_keep(struct tcpcb *, struct inpcb *); 122static int tcp_timer_persist(struct tcpcb *, struct inpcb *); 123static int tcp_timer_rexmt(struct tcpcb *, struct inpcb *); 124 125/* 126 * Tcp protocol timeout routine called every 500 ms. 127 * Updates timestamps used for TCP 128 * causes finite state machine actions if timers expire. 129 */ 130void 131tcp_slowtimo(void) 132{ 133 134 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 135 INP_INFO_WLOCK(&tcbinfo); 136 (void) tcp_tw_2msl_scan(0); 137 INP_INFO_WUNLOCK(&tcbinfo); 138} 139 140int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 141 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 142 143int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 144 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 145 146static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 147 148static int tcp_timer_race; 149SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 150 0, "Count of t_inpcb races on tcp_discardcb"); 151 152void 153tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 154{ 155 struct inpcb *inp = tp->t_inpcb; 156 struct tcp_timer *tt = tp->t_timers; 157 int tick = ticks; /* Stable time base. */ 158 int next = delta ? tick + delta : 0; 159 160 INP_LOCK_ASSERT(inp); 161 162 CTR6(KTR_NET, "%p %s inp %p active %x delta %i nextc %i", 163 tp, __func__, inp, tt->tt_active, delta, tt->tt_nextc); 164 165 /* Set new value for timer. */ 166 switch(timer_type) { 167 case TT_DELACK: 168 CTR4(KTR_NET, "%p %s TT_DELACK old %i new %i", 169 tp, __func__, tt->tt_delack, next); 170 tt->tt_delack = next; 171 break; 172 case TT_REXMT: 173 CTR4(KTR_NET, "%p %s TT_REXMT old %i new %i", 174 tp, __func__, tt->tt_rexmt, next); 175 tt->tt_rexmt = next; 176 break; 177 case TT_PERSIST: 178 CTR4(KTR_NET, "%p %s TT_PERSIST old %i new %i", 179 tp, __func__, tt->tt_persist, next); 180 tt->tt_persist = next; 181 break; 182 case TT_KEEP: 183 CTR4(KTR_NET, "%p %s TT_KEEP old %i new %i", 184 tp, __func__, tt->tt_keep, next); 185 tt->tt_keep = next; 186 break; 187 case TT_2MSL: 188 CTR4(KTR_NET, "%p %s TT_2MSL old %i new %i", 189 tp, __func__, tt->tt_2msl, next); 190 tt->tt_2msl = next; 191 break; 192 case 0: /* Dummy for timer rescan. */ 193 CTR3(KTR_NET, "%p %s timer rescan new %i", tp, __func__, next); 194 break; 195 } 196 197 /* If some other timer is active and is schedules sooner just return. */ 198 if (tt->tt_active != timer_type && tt->tt_nextc < next && 199 callout_active(&tt->tt_timer)) 200 return; 201 202 /* Select next timer to schedule. */ 203 tt->tt_nextc = INT_MAX; 204 tt->tt_active = 0; 205 if (tt->tt_delack && tt->tt_delack < tt->tt_nextc) { 206 tt->tt_nextc = tt->tt_delack; 207 tt->tt_active = TT_DELACK; 208 } 209 if (tt->tt_rexmt && tt->tt_rexmt < tt->tt_nextc) { 210 tt->tt_nextc = tt->tt_rexmt; 211 tt->tt_active = TT_REXMT; 212 } 213 if (tt->tt_persist && tt->tt_persist < tt->tt_nextc) { 214 tt->tt_nextc = tt->tt_persist; 215 tt->tt_active = TT_PERSIST; 216 } 217 if (tt->tt_keep && tt->tt_keep < tt->tt_nextc) { 218 tt->tt_nextc = tt->tt_keep; 219 tt->tt_active = TT_KEEP; 220 } 221 if (tt->tt_2msl && tt->tt_2msl < tt->tt_nextc) { 222 tt->tt_nextc = tt->tt_2msl; 223 tt->tt_active = TT_2MSL; 224 } 225 226 /* Rearm callout with new timer if we found one. */ 227 if (tt->tt_active) { 228 CTR4(KTR_NET, "%p %s callout_reset active %x nextc in %i", 229 tp, __func__, tt->tt_active, tt->tt_nextc - tick); 230 callout_reset(&tt->tt_timer, 231 tt->tt_nextc - tick, tcp_timer, (void *)inp); 232 } else { 233 CTR2(KTR_NET, "%p %s callout_stop", tp, __func__); 234 callout_stop(&tt->tt_timer); 235 tt->tt_nextc = 0; 236 } 237 238 return; 239} 240 241int 242tcp_timer_active(struct tcpcb *tp, int timer_type) 243{ 244 245 switch (timer_type) { 246 case TT_DELACK: 247 CTR3(KTR_NET, "%p %s TT_DELACK %i", 248 tp, __func__, tp->t_timers->tt_delack); 249 return (tp->t_timers->tt_delack ? 1 : 0); 250 break; 251 case TT_REXMT: 252 CTR3(KTR_NET, "%p %s TT_REXMT %i", 253 tp, __func__, tp->t_timers->tt_rexmt); 254 return (tp->t_timers->tt_rexmt ? 1 : 0); 255 break; 256 case TT_PERSIST: 257 CTR3(KTR_NET, "%p %s TT_PERSIST %i", 258 tp, __func__, tp->t_timers->tt_persist); 259 return (tp->t_timers->tt_persist ? 1 : 0); 260 break; 261 case TT_KEEP: 262 CTR3(KTR_NET, "%p %s TT_KEEP %i", 263 tp, __func__, tp->t_timers->tt_keep); 264 return (tp->t_timers->tt_keep ? 1 : 0); 265 break; 266 case TT_2MSL: 267 CTR3(KTR_NET, "%p %s TT_2MSL %i", 268 tp, __func__, tp->t_timers->tt_2msl); 269 return (tp->t_timers->tt_2msl ? 1 : 0); 270 break; 271 } 272 return (0); 273} 274 275static void 276tcp_timer(void *xinp) 277{ 278 struct inpcb *inp = (struct inpcb *)xinp; 279 struct tcpcb *tp = intotcpcb(inp); 280 struct tcp_timer *tt; 281 int tick = ticks; 282 int down, timer; 283 284 /* INP lock was obtained by callout. */ 285 INP_LOCK_ASSERT(inp); 286 287 /* 288 * We've got a couple of race conditions here: 289 * - The tcpcb was converted into a compressed TW pcb. All our 290 * timers have been stopped while this callout already tried 291 * to obtain the inpcb lock. TW pcbs have their own timers 292 * and we just return. 293 */ 294 if (inp->inp_vflag & INP_TIMEWAIT) 295 return; 296 /* 297 * - The tcpcb was discarded. All our timers have been stopped 298 * while this callout already tried to obtain the inpcb lock 299 * and we just return. 300 */ 301 if (tp == NULL) 302 return; 303 304 tt = tp->t_timers; /* Initialize. */ 305 CTR6(KTR_NET, "%p %s inp %p active %x tick %i nextc %i", 306 tp, __func__, inp, tt->tt_active, tick, tt->tt_nextc); 307 308 /* 309 * - We may have been waiting on the lock while the tcpcb has 310 * been scheduled for destruction. In this case no active 311 * timers remain and we just return. 312 */ 313 if (tt->tt_active == 0) 314 goto done; 315 316 /* 317 * - The timer was rescheduled while this callout was already 318 * waiting on the lock. This may happen when a packet just 319 * came in. Rescan and reschedule the the timer in case we 320 * just turned it off. 321 */ 322 if (tick < tt->tt_nextc) 323 goto rescan; 324 325 /* 326 * Mark as done. The active bit in struct callout is not 327 * automatically cleared. See callout(9) for more info. 328 * In tcp_discardcb() we depend on the correctly cleared 329 * active bit for faster processing. 330 */ 331 callout_deactivate(&tt->tt_timer); 332 333 /* Check which timer has fired and remove this timer activation. */ 334 timer = tt->tt_active; 335 tt->tt_active = 0; 336 tt->tt_nextc = 0; 337 338 switch (timer) { 339 case TT_DELACK: 340 CTR2(KTR_NET, "%p %s running TT_DELACK", tp, __func__); 341 tt->tt_delack = 0; 342 down = tcp_timer_delack(tp, inp); /* down == 0 */ 343 break; 344 case TT_REXMT: 345 CTR2(KTR_NET, "%p %s running TT_REXMT", tp, __func__); 346 tt->tt_rexmt = 0; 347 down = tcp_timer_rexmt(tp, inp); 348 break; 349 case TT_PERSIST: 350 CTR2(KTR_NET, "%p %s running TT_PERSIST", tp, __func__); 351 tt->tt_persist = 0; 352 down = tcp_timer_persist(tp, inp); 353 break; 354 case TT_KEEP: 355 CTR2(KTR_NET, "%p %s running TT_KEEP", tp, __func__); 356 tt->tt_keep = 0; 357 down = tcp_timer_keep(tp, inp); 358 break; 359 case TT_2MSL: 360 CTR2(KTR_NET, "%p %s running TT_2MSL", tp, __func__); 361 tt->tt_2msl = 0; 362 down = tcp_timer_2msl(tp, inp); 363 break; 364 default: 365 CTR2(KTR_NET, "%p %s running nothing", tp, __func__); 366 down = 0; 367 } 368 369 CTR4(KTR_NET, "%p %s down %i active %x", 370 tp, __func__, down, tt->tt_active); 371 /* Do we still exist? */ 372 if (down) 373 goto shutdown; 374 375rescan: 376 /* Rescan if no timer was reactivated above. */ 377 if (tt->tt_active == 0) 378 tcp_timer_activate(tp, 0, 0); 379 380done: 381 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 382 return; 383 384shutdown: 385 INP_UNLOCK(inp); /* Prevent LOR at expense of race. */ 386 INP_INFO_WLOCK(&tcbinfo); 387 INP_LOCK(inp); 388 389 /* When tp is gone we've lost the race. */ 390 if (inp->inp_ppcb == NULL) { 391 CTR3(KTR_NET, "%p %s inp %p lost shutdown race", 392 tp, __func__, inp); 393 tcp_timer_race++; 394 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 395 INP_INFO_WUNLOCK(&tcbinfo); 396 return; 397 } 398 KASSERT(tp == inp->inp_ppcb, ("%s: tp changed", __func__)); 399 400 /* Shutdown the connection. */ 401 switch (down) { 402 case 1: 403 tp = tcp_close(tp); 404 break; 405 case 2: 406 tp = tcp_drop(tp, 407 tp->t_softerror ? tp->t_softerror : ETIMEDOUT); 408 break; 409 } 410 CTR3(KTR_NET, "%p %s inp %p after shutdown", tp, __func__, inp); 411 412 if (tp) 413 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 414 415 INP_INFO_WUNLOCK(&tcbinfo); 416 return; 417} 418 419/* 420 * TCP timer processing. 421 */ 422static int 423tcp_timer_delack(struct tcpcb *tp, struct inpcb *inp) 424{ 425 426 tp->t_flags |= TF_ACKNOW; 427 tcpstat.tcps_delack++; 428 (void) tcp_output(tp); 429 return (0); 430} 431 432static int 433tcp_timer_2msl(struct tcpcb *tp, struct inpcb *inp) 434{ 435#ifdef TCPDEBUG 436 int ostate; 437 438 ostate = tp->t_state; 439#endif 440 /* 441 * 2 MSL timeout in shutdown went off. If we're closed but 442 * still waiting for peer to close and connection has been idle 443 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 444 * control block. Otherwise, check again in a bit. 445 * 446 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 447 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 448 * Ignore fact that there were recent incoming segments. 449 */ 450 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 451 tp->t_inpcb->inp_socket && 452 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 453 tcpstat.tcps_finwait2_drops++; 454 return (1); /* tcp_close */ 455 } else { 456 if (tp->t_state != TCPS_TIME_WAIT && 457 (ticks - tp->t_rcvtime) <= tcp_maxidle) 458 tcp_timer_activate(tp, TT_2MSL, tcp_keepintvl); 459 else 460 return (1); /* tcp_close */ 461 } 462 463#ifdef TCPDEBUG 464 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 465 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 466 PRU_SLOWTIMO); 467#endif 468 return (0); 469} 470 471static int 472tcp_timer_keep(struct tcpcb *tp, struct inpcb *inp) 473{ 474 struct tcptemp *t_template; 475#ifdef TCPDEBUG 476 int ostate; 477 478 ostate = tp->t_state; 479#endif 480 /* 481 * Keep-alive timer went off; send something 482 * or drop connection if idle for too long. 483 */ 484 tcpstat.tcps_keeptimeo++; 485 if (tp->t_state < TCPS_ESTABLISHED) 486 goto dropit; 487 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 488 tp->t_state <= TCPS_CLOSING) { 489 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 490 goto dropit; 491 /* 492 * Send a packet designed to force a response 493 * if the peer is up and reachable: 494 * either an ACK if the connection is still alive, 495 * or an RST if the peer has closed the connection 496 * due to timeout or reboot. 497 * Using sequence number tp->snd_una-1 498 * causes the transmitted zero-length segment 499 * to lie outside the receive window; 500 * by the protocol spec, this requires the 501 * correspondent TCP to respond. 502 */ 503 tcpstat.tcps_keepprobe++; 504 t_template = tcpip_maketemplate(inp); 505 if (t_template) { 506 tcp_respond(tp, t_template->tt_ipgen, 507 &t_template->tt_t, (struct mbuf *)NULL, 508 tp->rcv_nxt, tp->snd_una - 1, 0); 509 (void) m_free(dtom(t_template)); 510 } 511 tcp_timer_activate(tp, TT_KEEP, tcp_keepintvl); 512 } else 513 tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); 514 515#ifdef TCPDEBUG 516 if (inp->inp_socket->so_options & SO_DEBUG) 517 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 518 PRU_SLOWTIMO); 519#endif 520 return (0); 521 522dropit: 523 tcpstat.tcps_keepdrops++; 524 return (2); /* tcp_drop() */ 525} 526 527static int 528tcp_timer_persist(struct tcpcb *tp, struct inpcb *inp) 529{ 530#ifdef TCPDEBUG 531 int ostate; 532 533 ostate = tp->t_state; 534#endif 535 /* 536 * Persistance timer into zero window. 537 * Force a byte to be output, if possible. 538 */ 539 tcpstat.tcps_persisttimeo++; 540 /* 541 * Hack: if the peer is dead/unreachable, we do not 542 * time out if the window is closed. After a full 543 * backoff, drop the connection if the idle time 544 * (no responses to probes) reaches the maximum 545 * backoff that we would use if retransmitting. 546 */ 547 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 548 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 549 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 550 tcpstat.tcps_persistdrop++; 551 return (2); /* tcp_drop() */ 552 } 553 tcp_setpersist(tp); 554 tp->t_flags |= TF_FORCEDATA; 555 (void) tcp_output(tp); 556 tp->t_flags &= ~TF_FORCEDATA; 557 558#ifdef TCPDEBUG 559 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 560 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 561#endif 562 return (0); 563} 564 565static int 566tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) 567{ 568 int rexmt; 569#ifdef TCPDEBUG 570 int ostate; 571 572 ostate = tp->t_state; 573#endif 574 tcp_free_sackholes(tp); 575 /* 576 * Retransmission timer went off. Message has not 577 * been acked within retransmit interval. Back off 578 * to a longer retransmit interval and retransmit one segment. 579 */ 580 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 581 tp->t_rxtshift = TCP_MAXRXTSHIFT; 582 tcpstat.tcps_timeoutdrop++; 583 return (2); /* tcp_drop() */ 584 } 585 if (tp->t_rxtshift == 1) { 586 /* 587 * first retransmit; record ssthresh and cwnd so they can 588 * be recovered if this turns out to be a "bad" retransmit. 589 * A retransmit is considered "bad" if an ACK for this 590 * segment is received within RTT/2 interval; the assumption 591 * here is that the ACK was already in flight. See 592 * "On Estimating End-to-End Network Path Properties" by 593 * Allman and Paxson for more details. 594 */ 595 tp->snd_cwnd_prev = tp->snd_cwnd; 596 tp->snd_ssthresh_prev = tp->snd_ssthresh; 597 tp->snd_recover_prev = tp->snd_recover; 598 if (IN_FASTRECOVERY(tp)) 599 tp->t_flags |= TF_WASFRECOVERY; 600 else 601 tp->t_flags &= ~TF_WASFRECOVERY; 602 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 603 } 604 tcpstat.tcps_rexmttimeo++; 605 if (tp->t_state == TCPS_SYN_SENT) 606 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 607 else 608 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 609 TCPT_RANGESET(tp->t_rxtcur, rexmt, 610 tp->t_rttmin, TCPTV_REXMTMAX); 611 /* 612 * Disable rfc1323 if we havn't got any response to 613 * our third SYN to work-around some broken terminal servers 614 * (most of which have hopefully been retired) that have bad VJ 615 * header compression code which trashes TCP segments containing 616 * unknown-to-them TCP options. 617 */ 618 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 619 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 620 /* 621 * If we backed off this far, our srtt estimate is probably bogus. 622 * Clobber it so we'll take the next rtt measurement as our srtt; 623 * move the current srtt into rttvar to keep the current 624 * retransmit times until then. 625 */ 626 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 627#ifdef INET6 628 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 629 in6_losing(tp->t_inpcb); 630 else 631#endif 632 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 633 tp->t_srtt = 0; 634 } 635 tp->snd_nxt = tp->snd_una; 636 tp->snd_recover = tp->snd_max; 637 /* 638 * Force a segment to be sent. 639 */ 640 tp->t_flags |= TF_ACKNOW; 641 /* 642 * If timing a segment in this window, stop the timer. 643 */ 644 tp->t_rtttime = 0; 645 /* 646 * Close the congestion window down to one segment 647 * (we'll open it by one segment for each ack we get). 648 * Since we probably have a window's worth of unacked 649 * data accumulated, this "slow start" keeps us from 650 * dumping all that data as back-to-back packets (which 651 * might overwhelm an intermediate gateway). 652 * 653 * There are two phases to the opening: Initially we 654 * open by one mss on each ack. This makes the window 655 * size increase exponentially with time. If the 656 * window is larger than the path can handle, this 657 * exponential growth results in dropped packet(s) 658 * almost immediately. To get more time between 659 * drops but still "push" the network to take advantage 660 * of improving conditions, we switch from exponential 661 * to linear window opening at some threshhold size. 662 * For a threshhold, we use half the current window 663 * size, truncated to a multiple of the mss. 664 * 665 * (the minimum cwnd that will give us exponential 666 * growth is 2 mss. We don't allow the threshhold 667 * to go below this.) 668 */ 669 { 670 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 671 if (win < 2) 672 win = 2; 673 tp->snd_cwnd = tp->t_maxseg; 674 tp->snd_ssthresh = win * tp->t_maxseg; 675 tp->t_dupacks = 0; 676 } 677 EXIT_FASTRECOVERY(tp); 678 (void) tcp_output(tp); 679 680#ifdef TCPDEBUG 681 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 682 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 683 PRU_SLOWTIMO); 684#endif 685 return (0); 686} 687