tcp_timer.c revision 169309
1/*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 30 * $FreeBSD: head/sys/netinet/tcp_timer.c 169309 2007-05-06 13:38:25Z andre $ 31 */ 32 33#include "opt_inet6.h" 34#include "opt_tcpdebug.h" 35 36#include <sys/param.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/limits.h> 41#include <sys/mbuf.h> 42#include <sys/mutex.h> 43#include <sys/protosw.h> 44#include <sys/socket.h> 45#include <sys/socketvar.h> 46#include <sys/sysctl.h> 47#include <sys/systm.h> 48 49#include <net/route.h> 50 51#include <netinet/in.h> 52#include <netinet/in_pcb.h> 53#include <netinet/in_systm.h> 54#ifdef INET6 55#include <netinet6/in6_pcb.h> 56#endif 57#include <netinet/ip_var.h> 58#include <netinet/tcp.h> 59#include <netinet/tcp_fsm.h> 60#include <netinet/tcp_timer.h> 61#include <netinet/tcp_var.h> 62#include <netinet/tcpip.h> 63#ifdef TCPDEBUG 64#include <netinet/tcp_debug.h> 65#endif 66 67int tcp_keepinit; 68SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 69 &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); 70 71int tcp_keepidle; 72SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 73 &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); 74 75int tcp_keepintvl; 76SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 77 &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); 78 79int tcp_delacktime; 80SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 81 &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 82 "Time before a delayed ACK is sent"); 83 84int tcp_msl; 85SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 86 &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 87 88int tcp_rexmit_min; 89SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 90 &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 91 "Minimum Retransmission Timeout"); 92 93int tcp_rexmit_slop; 94SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 95 &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 96 "Retransmission Timer Slop"); 97 98static int always_keepalive = 1; 99SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 100 &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 101 102int tcp_fast_finwait2_recycle = 0; 103SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 104 &tcp_fast_finwait2_recycle, 0, 105 "Recycle closed FIN_WAIT_2 connections faster"); 106 107int tcp_finwait2_timeout; 108SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 109 &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 110 111 112static int tcp_keepcnt = TCPTV_KEEPCNT; 113 /* max idle probes */ 114int tcp_maxpersistidle; 115 /* max idle time in persist */ 116int tcp_maxidle; 117 118static void tcp_timer(void *); 119static int tcp_timer_delack(struct tcpcb *, struct inpcb *); 120static int tcp_timer_2msl(struct tcpcb *, struct inpcb *); 121static int tcp_timer_keep(struct tcpcb *, struct inpcb *); 122static int tcp_timer_persist(struct tcpcb *, struct inpcb *); 123static int tcp_timer_rexmt(struct tcpcb *, struct inpcb *); 124 125/* 126 * Tcp protocol timeout routine called every 500 ms. 127 * Updates timestamps used for TCP 128 * causes finite state machine actions if timers expire. 129 */ 130void 131tcp_slowtimo() 132{ 133 134 tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 135 INP_INFO_WLOCK(&tcbinfo); 136 (void) tcp_timer_2msl_tw(0); 137 INP_INFO_WUNLOCK(&tcbinfo); 138} 139 140int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 141 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 142 143int tcp_backoff[TCP_MAXRXTSHIFT + 1] = 144 { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 145 146static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 147 148static int tcp_timer_race; 149SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 150 0, "Count of t_inpcb races on tcp_discardcb"); 151 152 153void 154tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 155{ 156 struct inpcb *inp = tp->t_inpcb; 157 struct tcp_timer *tt = tp->t_timers; 158 int tick = ticks; /* Stable time base. */ 159 int next = delta ? tick + delta : 0; 160 161 INP_LOCK_ASSERT(inp); 162 163 CTR6(KTR_NET, "%p %s inp %p active %x delta %i nextc %i", 164 tp, __func__, inp, tt->tt_active, delta, tt->tt_nextc); 165 166 /* Set new value for timer. */ 167 switch(timer_type) { 168 case TT_DELACK: 169 CTR4(KTR_NET, "%p %s TT_DELACK old %i new %i", 170 tp, __func__, tt->tt_delack, next); 171 tt->tt_delack = next; 172 break; 173 case TT_REXMT: 174 CTR4(KTR_NET, "%p %s TT_REXMT old %i new %i", 175 tp, __func__, tt->tt_rexmt, next); 176 tt->tt_rexmt = next; 177 break; 178 case TT_PERSIST: 179 CTR4(KTR_NET, "%p %s TT_PERSIST old %i new %i", 180 tp, __func__, tt->tt_persist, next); 181 tt->tt_persist = next; 182 break; 183 case TT_KEEP: 184 CTR4(KTR_NET, "%p %s TT_KEEP old %i new %i", 185 tp, __func__, tt->tt_keep, next); 186 tt->tt_keep = next; 187 break; 188 case TT_2MSL: 189 CTR4(KTR_NET, "%p %s TT_2MSL old %i new %i", 190 tp, __func__, tt->tt_2msl, next); 191 tt->tt_2msl = next; 192 break; 193 case 0: /* Dummy for timer rescan. */ 194 CTR3(KTR_NET, "%p %s timer rescan new %i", tp, __func__, next); 195 break; 196 } 197 198 /* If some other timer is active and is schedules sooner just return. */ 199 if (tt->tt_active != timer_type && tt->tt_nextc < next && 200 callout_active(&tt->tt_timer)) 201 return; 202 203 /* Select next timer to schedule. */ 204 tt->tt_nextc = INT_MAX; 205 tt->tt_active = 0; 206 if (tt->tt_delack && tt->tt_delack < tt->tt_nextc) { 207 tt->tt_nextc = tt->tt_delack; 208 tt->tt_active = TT_DELACK; 209 } 210 if (tt->tt_rexmt && tt->tt_rexmt < tt->tt_nextc) { 211 tt->tt_nextc = tt->tt_rexmt; 212 tt->tt_active = TT_REXMT; 213 } 214 if (tt->tt_persist && tt->tt_persist < tt->tt_nextc) { 215 tt->tt_nextc = tt->tt_persist; 216 tt->tt_active = TT_PERSIST; 217 } 218 if (tt->tt_keep && tt->tt_keep < tt->tt_nextc) { 219 tt->tt_nextc = tt->tt_keep; 220 tt->tt_active = TT_KEEP; 221 } 222 if (tt->tt_2msl && tt->tt_2msl < tt->tt_nextc) { 223 tt->tt_nextc = tt->tt_2msl; 224 tt->tt_active = TT_2MSL; 225 } 226 227 /* Rearm callout with new timer if we found one. */ 228 if (tt->tt_active) { 229 CTR4(KTR_NET, "%p %s callout_reset active %x nextc in %i", 230 tp, __func__, tt->tt_active, tt->tt_nextc - tick); 231 callout_reset(&tt->tt_timer, 232 tt->tt_nextc - tick, tcp_timer, (void *)inp); 233 } else { 234 CTR2(KTR_NET, "%p %s callout_stop", tp, __func__); 235 callout_stop(&tt->tt_timer); 236 tt->tt_nextc = 0; 237 } 238 239 return; 240} 241 242int 243tcp_timer_active(struct tcpcb *tp, int timer_type) 244{ 245 246 switch (timer_type) { 247 case TT_DELACK: 248 CTR3(KTR_NET, "%p %s TT_DELACK %i", 249 tp, __func__, tp->t_timers->tt_delack); 250 return (tp->t_timers->tt_delack ? 1 : 0); 251 break; 252 case TT_REXMT: 253 CTR3(KTR_NET, "%p %s TT_REXMT %i", 254 tp, __func__, tp->t_timers->tt_rexmt); 255 return (tp->t_timers->tt_rexmt ? 1 : 0); 256 break; 257 case TT_PERSIST: 258 CTR3(KTR_NET, "%p %s TT_PERSIST %i", 259 tp, __func__, tp->t_timers->tt_persist); 260 return (tp->t_timers->tt_persist ? 1 : 0); 261 break; 262 case TT_KEEP: 263 CTR3(KTR_NET, "%p %s TT_KEEP %i", 264 tp, __func__, tp->t_timers->tt_keep); 265 return (tp->t_timers->tt_keep ? 1 : 0); 266 break; 267 case TT_2MSL: 268 CTR3(KTR_NET, "%p %s TT_2MSL %i", 269 tp, __func__, tp->t_timers->tt_2msl); 270 return (tp->t_timers->tt_2msl ? 1 : 0); 271 break; 272 } 273 return (0); 274} 275 276static void 277tcp_timer(void *xinp) 278{ 279 struct inpcb *inp = (struct inpcb *)xinp; 280 struct tcpcb *tp = intotcpcb(inp); 281 struct tcp_timer *tt; 282 int tick = ticks; 283 int down, timer; 284 285 /* INP lock was obtained by callout. */ 286 INP_LOCK_ASSERT(inp); 287 288 /* 289 * We've got a couple of race conditions here: 290 * - The tcpcb was converted into a compressed TW pcb. All our 291 * timers have been stopped while this callout already tried 292 * to obtain the inpcb lock. TW pcbs have their own timers 293 * and we just return. 294 */ 295 if (inp->inp_vflag & INP_TIMEWAIT) 296 return; 297 /* 298 * - The tcpcb was discarded. All our timers have been stopped 299 * while this callout already tried to obtain the inpcb lock 300 * and we just return. 301 */ 302 if (tp == NULL) 303 return; 304 305 tt = tp->t_timers; /* Initialize. */ 306 CTR6(KTR_NET, "%p %s inp %p active %x tick %i nextc %i", 307 tp, __func__, inp, tt->tt_active, tick, tt->tt_nextc); 308 309 /* 310 * - We may have been waiting on the lock while the tcpcb has 311 * been scheduled for destruction. In this case no active 312 * timers remain and we just return. 313 */ 314 if (tt->tt_active == 0) 315 goto done; 316 317 /* 318 * - The timer was rescheduled while this callout was already 319 * waiting on the lock. This may happen when a packet just 320 * came in. Rescan and reschedule the the timer in case we 321 * just turned it off. 322 */ 323 if (tick < tt->tt_nextc) 324 goto rescan; 325 326 /* 327 * Mark as done. The active bit in struct callout is not 328 * automatically cleared. See callout(9) for more info. 329 * In tcp_discardcb() we depend on the correctly cleared 330 * active bit for faster processing. 331 */ 332 callout_deactivate(&tt->tt_timer); 333 334 /* Check which timer has fired and remove this timer activation. */ 335 timer = tt->tt_active; 336 tt->tt_active = 0; 337 tt->tt_nextc = 0; 338 339 switch (timer) { 340 case TT_DELACK: 341 CTR2(KTR_NET, "%p %s running TT_DELACK", tp, __func__); 342 tt->tt_delack = 0; 343 down = tcp_timer_delack(tp, inp); /* down == 0 */ 344 break; 345 case TT_REXMT: 346 CTR2(KTR_NET, "%p %s running TT_REXMT", tp, __func__); 347 tt->tt_rexmt = 0; 348 down = tcp_timer_rexmt(tp, inp); 349 break; 350 case TT_PERSIST: 351 CTR2(KTR_NET, "%p %s running TT_PERSIST", tp, __func__); 352 tt->tt_persist = 0; 353 down = tcp_timer_persist(tp, inp); 354 break; 355 case TT_KEEP: 356 CTR2(KTR_NET, "%p %s running TT_KEEP", tp, __func__); 357 tt->tt_keep = 0; 358 down = tcp_timer_keep(tp, inp); 359 break; 360 case TT_2MSL: 361 CTR2(KTR_NET, "%p %s running TT_2MSL", tp, __func__); 362 tt->tt_2msl = 0; 363 down = tcp_timer_2msl(tp, inp); 364 break; 365 default: 366 CTR2(KTR_NET, "%p %s running nothing", tp, __func__); 367 down = 0; 368 } 369 370 CTR4(KTR_NET, "%p %s down %i active %x", 371 tp, __func__, down, tt->tt_active); 372 /* Do we still exist? */ 373 if (down) 374 goto shutdown; 375 376rescan: 377 /* Rescan if no timer was reactivated above. */ 378 if (tt->tt_active == 0) 379 tcp_timer_activate(tp, 0, 0); 380 381done: 382 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 383 return; 384 385shutdown: 386 INP_UNLOCK(inp); /* Prevent LOR at expense of race. */ 387 INP_INFO_WLOCK(&tcbinfo); 388 INP_LOCK(inp); 389 390 /* When tp is gone we've lost the race. */ 391 if (inp->inp_ppcb == NULL) { 392 CTR3(KTR_NET, "%p %s inp %p lost shutdown race", 393 tp, __func__, inp); 394 tcp_timer_race++; 395 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 396 INP_INFO_WUNLOCK(&tcbinfo); 397 return; 398 } 399 KASSERT(tp == inp->inp_ppcb, ("%s: tp changed", __func__)); 400 401 /* Shutdown the connection. */ 402 switch (down) { 403 case 1: 404 tp = tcp_close(tp); 405 break; 406 case 2: 407 tp = tcp_drop(tp, 408 tp->t_softerror ? tp->t_softerror : ETIMEDOUT); 409 break; 410 } 411 CTR3(KTR_NET, "%p %s inp %p after shutdown", tp, __func__, inp); 412 413 if (tp) 414 INP_UNLOCK(inp); /* CALLOUT_RETURNUNLOCKED */ 415 416 INP_INFO_WUNLOCK(&tcbinfo); 417 return; 418} 419 420 421/* 422 * TCP timer processing. 423 */ 424static int 425tcp_timer_delack(struct tcpcb *tp, struct inpcb *inp) 426{ 427 428 tp->t_flags |= TF_ACKNOW; 429 tcpstat.tcps_delack++; 430 (void) tcp_output(tp); 431 return (0); 432} 433 434static int 435tcp_timer_2msl(struct tcpcb *tp, struct inpcb *inp) 436{ 437#ifdef TCPDEBUG 438 int ostate; 439 440 ostate = tp->t_state; 441#endif 442 /* 443 * 2 MSL timeout in shutdown went off. If we're closed but 444 * still waiting for peer to close and connection has been idle 445 * too long, or if 2MSL time is up from TIME_WAIT, delete connection 446 * control block. Otherwise, check again in a bit. 447 * 448 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 449 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 450 * Ignore fact that there were recent incoming segments. 451 */ 452 if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 453 tp->t_inpcb->inp_socket && 454 (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 455 tcpstat.tcps_finwait2_drops++; 456 return (1); /* tcp_close */ 457 } else { 458 if (tp->t_state != TCPS_TIME_WAIT && 459 (ticks - tp->t_rcvtime) <= tcp_maxidle) 460 tcp_timer_activate(tp, TT_2MSL, tcp_keepintvl); 461 else 462 return (1); /* tcp_close */ 463 } 464 465#ifdef TCPDEBUG 466 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 467 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 468 PRU_SLOWTIMO); 469#endif 470 return (0); 471} 472 473/* 474 * The timed wait queue contains references to each of the TCP sessions 475 * currently in the TIME_WAIT state. The queue pointers, including the 476 * queue pointers in each tcptw structure, are protected using the global 477 * tcbinfo lock, which must be held over queue iteration and modification. 478 */ 479static TAILQ_HEAD(, tcptw) twq_2msl; 480 481void 482tcp_timer_init(void) 483{ 484 485 TAILQ_INIT(&twq_2msl); 486} 487 488void 489tcp_timer_2msl_reset(struct tcptw *tw, int rearm) 490{ 491 492 INP_INFO_WLOCK_ASSERT(&tcbinfo); 493 INP_LOCK_ASSERT(tw->tw_inpcb); 494 if (rearm) 495 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 496 tw->tw_time = ticks + 2 * tcp_msl; 497 TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl); 498} 499 500void 501tcp_timer_2msl_stop(struct tcptw *tw) 502{ 503 504 INP_INFO_WLOCK_ASSERT(&tcbinfo); 505 TAILQ_REMOVE(&twq_2msl, tw, tw_2msl); 506} 507 508struct tcptw * 509tcp_timer_2msl_tw(int reuse) 510{ 511 struct tcptw *tw; 512 513 INP_INFO_WLOCK_ASSERT(&tcbinfo); 514 for (;;) { 515 tw = TAILQ_FIRST(&twq_2msl); 516 if (tw == NULL || (!reuse && tw->tw_time > ticks)) 517 break; 518 INP_LOCK(tw->tw_inpcb); 519 tcp_twclose(tw, reuse); 520 if (reuse) 521 return (tw); 522 } 523 return (NULL); 524} 525 526static int 527tcp_timer_keep(struct tcpcb *tp, struct inpcb *inp) 528{ 529 struct tcptemp *t_template; 530#ifdef TCPDEBUG 531 int ostate; 532 533 ostate = tp->t_state; 534#endif 535 /* 536 * Keep-alive timer went off; send something 537 * or drop connection if idle for too long. 538 */ 539 tcpstat.tcps_keeptimeo++; 540 if (tp->t_state < TCPS_ESTABLISHED) 541 goto dropit; 542 if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 543 tp->t_state <= TCPS_CLOSING) { 544 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle) 545 goto dropit; 546 /* 547 * Send a packet designed to force a response 548 * if the peer is up and reachable: 549 * either an ACK if the connection is still alive, 550 * or an RST if the peer has closed the connection 551 * due to timeout or reboot. 552 * Using sequence number tp->snd_una-1 553 * causes the transmitted zero-length segment 554 * to lie outside the receive window; 555 * by the protocol spec, this requires the 556 * correspondent TCP to respond. 557 */ 558 tcpstat.tcps_keepprobe++; 559 t_template = tcpip_maketemplate(inp); 560 if (t_template) { 561 tcp_respond(tp, t_template->tt_ipgen, 562 &t_template->tt_t, (struct mbuf *)NULL, 563 tp->rcv_nxt, tp->snd_una - 1, 0); 564 (void) m_free(dtom(t_template)); 565 } 566 tcp_timer_activate(tp, TT_KEEP, tcp_keepintvl); 567 } else 568 tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); 569 570#ifdef TCPDEBUG 571 if (inp->inp_socket->so_options & SO_DEBUG) 572 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 573 PRU_SLOWTIMO); 574#endif 575 return (0); 576 577dropit: 578 tcpstat.tcps_keepdrops++; 579 return (2); /* tcp_drop() */ 580} 581 582static int 583tcp_timer_persist(struct tcpcb *tp, struct inpcb *inp) 584{ 585#ifdef TCPDEBUG 586 int ostate; 587 588 ostate = tp->t_state; 589#endif 590 /* 591 * Persistance timer into zero window. 592 * Force a byte to be output, if possible. 593 */ 594 tcpstat.tcps_persisttimeo++; 595 /* 596 * Hack: if the peer is dead/unreachable, we do not 597 * time out if the window is closed. After a full 598 * backoff, drop the connection if the idle time 599 * (no responses to probes) reaches the maximum 600 * backoff that we would use if retransmitting. 601 */ 602 if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 603 ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle || 604 (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 605 tcpstat.tcps_persistdrop++; 606 return (2); /* tcp_drop() */ 607 } 608 tcp_setpersist(tp); 609 tp->t_flags |= TF_FORCEDATA; 610 (void) tcp_output(tp); 611 tp->t_flags &= ~TF_FORCEDATA; 612 613#ifdef TCPDEBUG 614 if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 615 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 616#endif 617 return (0); 618} 619 620static int 621tcp_timer_rexmt(struct tcpcb *tp, struct inpcb *inp) 622{ 623 int rexmt; 624#ifdef TCPDEBUG 625 int ostate; 626 627 ostate = tp->t_state; 628#endif 629 tcp_free_sackholes(tp); 630 /* 631 * Retransmission timer went off. Message has not 632 * been acked within retransmit interval. Back off 633 * to a longer retransmit interval and retransmit one segment. 634 */ 635 if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 636 tp->t_rxtshift = TCP_MAXRXTSHIFT; 637 tcpstat.tcps_timeoutdrop++; 638 return (2); /* tcp_drop() */ 639 } 640 if (tp->t_rxtshift == 1) { 641 /* 642 * first retransmit; record ssthresh and cwnd so they can 643 * be recovered if this turns out to be a "bad" retransmit. 644 * A retransmit is considered "bad" if an ACK for this 645 * segment is received within RTT/2 interval; the assumption 646 * here is that the ACK was already in flight. See 647 * "On Estimating End-to-End Network Path Properties" by 648 * Allman and Paxson for more details. 649 */ 650 tp->snd_cwnd_prev = tp->snd_cwnd; 651 tp->snd_ssthresh_prev = tp->snd_ssthresh; 652 tp->snd_recover_prev = tp->snd_recover; 653 if (IN_FASTRECOVERY(tp)) 654 tp->t_flags |= TF_WASFRECOVERY; 655 else 656 tp->t_flags &= ~TF_WASFRECOVERY; 657 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 658 } 659 tcpstat.tcps_rexmttimeo++; 660 if (tp->t_state == TCPS_SYN_SENT) 661 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 662 else 663 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 664 TCPT_RANGESET(tp->t_rxtcur, rexmt, 665 tp->t_rttmin, TCPTV_REXMTMAX); 666 /* 667 * Disable rfc1323 if we havn't got any response to 668 * our third SYN to work-around some broken terminal servers 669 * (most of which have hopefully been retired) that have bad VJ 670 * header compression code which trashes TCP segments containing 671 * unknown-to-them TCP options. 672 */ 673 if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 674 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 675 /* 676 * If we backed off this far, our srtt estimate is probably bogus. 677 * Clobber it so we'll take the next rtt measurement as our srtt; 678 * move the current srtt into rttvar to keep the current 679 * retransmit times until then. 680 */ 681 if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 682#ifdef INET6 683 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 684 in6_losing(tp->t_inpcb); 685 else 686#endif 687 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 688 tp->t_srtt = 0; 689 } 690 tp->snd_nxt = tp->snd_una; 691 tp->snd_recover = tp->snd_max; 692 /* 693 * Force a segment to be sent. 694 */ 695 tp->t_flags |= TF_ACKNOW; 696 /* 697 * If timing a segment in this window, stop the timer. 698 */ 699 tp->t_rtttime = 0; 700 /* 701 * Close the congestion window down to one segment 702 * (we'll open it by one segment for each ack we get). 703 * Since we probably have a window's worth of unacked 704 * data accumulated, this "slow start" keeps us from 705 * dumping all that data as back-to-back packets (which 706 * might overwhelm an intermediate gateway). 707 * 708 * There are two phases to the opening: Initially we 709 * open by one mss on each ack. This makes the window 710 * size increase exponentially with time. If the 711 * window is larger than the path can handle, this 712 * exponential growth results in dropped packet(s) 713 * almost immediately. To get more time between 714 * drops but still "push" the network to take advantage 715 * of improving conditions, we switch from exponential 716 * to linear window opening at some threshhold size. 717 * For a threshhold, we use half the current window 718 * size, truncated to a multiple of the mss. 719 * 720 * (the minimum cwnd that will give us exponential 721 * growth is 2 mss. We don't allow the threshhold 722 * to go below this.) 723 */ 724 { 725 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 726 if (win < 2) 727 win = 2; 728 tp->snd_cwnd = tp->t_maxseg; 729 tp->snd_ssthresh = win * tp->t_maxseg; 730 tp->t_dupacks = 0; 731 } 732 EXIT_FASTRECOVERY(tp); 733 (void) tcp_output(tp); 734 735#ifdef TCPDEBUG 736 if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 737 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 738 PRU_SLOWTIMO); 739#endif 740 return (0); 741} 742