tcp_timer.c revision 226318
1139823Simp/*- 211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 4. Neither the name of the University nor the names of its contributors 141541Srgrimes * may be used to endorse or promote products derived from this software 151541Srgrimes * without specific prior written permission. 161541Srgrimes * 171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271541Srgrimes * SUCH DAMAGE. 281541Srgrimes * 2911150Swollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 301541Srgrimes */ 311541Srgrimes 32172467Ssilby#include <sys/cdefs.h> 33172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 226318 2011-10-12 19:52:23Z np $"); 34172467Ssilby 3555679Sshin#include "opt_inet6.h" 3629514Sjoerg#include "opt_tcpdebug.h" 3729514Sjoerg 381541Srgrimes#include <sys/param.h> 3912172Sphk#include <sys/kernel.h> 40102967Sbde#include <sys/lock.h> 4178642Ssilby#include <sys/mbuf.h> 42102967Sbde#include <sys/mutex.h> 43102967Sbde#include <sys/protosw.h> 44205391Skmacy#include <sys/smp.h> 451541Srgrimes#include <sys/socket.h> 461541Srgrimes#include <sys/socketvar.h> 47102967Sbde#include <sys/sysctl.h> 48102967Sbde#include <sys/systm.h> 491541Srgrimes 50185571Sbz#include <net/if.h> 511541Srgrimes#include <net/route.h> 52196019Srwatson#include <net/vnet.h> 531541Srgrimes 54215166Slstewart#include <netinet/cc.h> 551541Srgrimes#include <netinet/in.h> 56102967Sbde#include <netinet/in_pcb.h> 571541Srgrimes#include <netinet/in_systm.h> 5855679Sshin#ifdef INET6 5955679Sshin#include <netinet6/in6_pcb.h> 6055679Sshin#endif 611541Srgrimes#include <netinet/ip_var.h> 621541Srgrimes#include <netinet/tcp_fsm.h> 631541Srgrimes#include <netinet/tcp_timer.h> 641541Srgrimes#include <netinet/tcp_var.h> 651541Srgrimes#include <netinet/tcpip.h> 6617138Sdg#ifdef TCPDEBUG 6717138Sdg#include <netinet/tcp_debug.h> 6817138Sdg#endif 691541Srgrimes 7050673Sjlemonint tcp_keepinit; 7150682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, 72180631Strhodes &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); 7318280Spst 7450673Sjlemonint tcp_keepidle; 7550682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, 76180631Strhodes &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin"); 7712172Sphk 7850673Sjlemonint tcp_keepintvl; 7950682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, 80180631Strhodes &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes"); 8112172Sphk 8250673Sjlemonint tcp_delacktime; 83167721SandreSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW, 84167721Sandre &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", 8550682Sjlemon "Time before a delayed ACK is sent"); 86133874Srwatson 8750673Sjlemonint tcp_msl; 8850682SjlemonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, 8950682Sjlemon &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); 9050673Sjlemon 91100335Sdillonint tcp_rexmit_min; 92100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW, 93167721Sandre &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", 94167721Sandre "Minimum Retransmission Timeout"); 95100335Sdillon 96100335Sdillonint tcp_rexmit_slop; 97100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW, 98167721Sandre &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", 99167721Sandre "Retransmission Timer Slop"); 100100335Sdillon 10187499Srwatsonstatic int always_keepalive = 1; 102133874SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 10346381Sbillf &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 10415039Sphk 105167036Smohansint tcp_fast_finwait2_recycle = 0; 106167036SmohansSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 107167721Sandre &tcp_fast_finwait2_recycle, 0, 108167721Sandre "Recycle closed FIN_WAIT_2 connections faster"); 109167036Smohans 110167036Smohansint tcp_finwait2_timeout; 111167036SmohansSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, 112167721Sandre &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); 113167036Smohans 114167036Smohans 11512296Sphkstatic int tcp_keepcnt = TCPTV_KEEPCNT; 11612296Sphk /* max idle probes */ 11750673Sjlemonint tcp_maxpersistidle; 11812296Sphk /* max idle time in persist */ 1191541Srgrimesint tcp_maxidle; 12011150Swollman 121205391Skmacystatic int per_cpu_timers = 0; 122205391SkmacySYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, 123205391Skmacy &per_cpu_timers , 0, "run tcp timers on all cpus"); 124205391Skmacy 125205391Skmacy#define INP_CPU(inp) (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \ 126205391Skmacy ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0) 127205391Skmacy 1281541Srgrimes/* 1291541Srgrimes * Tcp protocol timeout routine called every 500 ms. 13050673Sjlemon * Updates timestamps used for TCP 1311541Srgrimes * causes finite state machine actions if timers expire. 1321541Srgrimes */ 1331541Srgrimesvoid 134172309Ssilbytcp_slowtimo(void) 1351541Srgrimes{ 136183550Szec VNET_ITERATOR_DECL(vnet_iter); 1371541Srgrimes 138195760Srwatson VNET_LIST_RLOCK_NOSLEEP(); 139183550Szec VNET_FOREACH(vnet_iter) { 140183550Szec CURVNET_SET(vnet_iter); 141183550Szec tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 142183550Szec INP_INFO_WLOCK(&V_tcbinfo); 143183550Szec (void) tcp_tw_2msl_scan(0); 144183550Szec INP_INFO_WUNLOCK(&V_tcbinfo); 145183550Szec CURVNET_RESTORE(); 146183550Szec } 147195760Srwatson VNET_LIST_RUNLOCK_NOSLEEP(); 1481541Srgrimes} 1491541Srgrimes 15073110Sjlemonint tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = 15173110Sjlemon { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 15273110Sjlemon 1531541Srgrimesint tcp_backoff[TCP_MAXRXTSHIFT + 1] = 154115824Shsu { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 }; 1551541Srgrimes 156115824Shsustatic int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */ 15711150Swollman 158157376Srwatsonstatic int tcp_timer_race; 159157376SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race, 160157376Srwatson 0, "Count of t_inpcb races on tcp_discardcb"); 161157376Srwatson 162172074Srwatson/* 163172074Srwatson * TCP timer processing. 164172074Srwatson */ 165172074Srwatson 16650673Sjlemonvoid 167172074Srwatsontcp_timer_delack(void *xtp) 1681541Srgrimes{ 169172074Srwatson struct tcpcb *tp = xtp; 170172074Srwatson struct inpcb *inp; 171183550Szec CURVNET_SET(tp->t_vnet); 1721541Srgrimes 173172074Srwatson inp = tp->t_inpcb; 174157376Srwatson /* 175172074Srwatson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 176172074Srwatson * tear-down mean we need it as a work-around for races between 177172074Srwatson * timers and tcp_discardcb(). 178172074Srwatson * 179172074Srwatson * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL")); 180157376Srwatson */ 181172074Srwatson if (inp == NULL) { 182172074Srwatson tcp_timer_race++; 183183550Szec CURVNET_RESTORE(); 184108265Shsu return; 185108265Shsu } 186178285Srwatson INP_WLOCK(inp); 187189848Srwatson if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack) 188172309Ssilby || !callout_active(&tp->t_timers->tt_delack)) { 189178285Srwatson INP_WUNLOCK(inp); 190183550Szec CURVNET_RESTORE(); 19150673Sjlemon return; 19250673Sjlemon } 193172309Ssilby callout_deactivate(&tp->t_timers->tt_delack); 1941541Srgrimes 19550673Sjlemon tp->t_flags |= TF_ACKNOW; 196190948Srwatson TCPSTAT_INC(tcps_delack); 19750673Sjlemon (void) tcp_output(tp); 198178285Srwatson INP_WUNLOCK(inp); 199183550Szec CURVNET_RESTORE(); 20050673Sjlemon} 20150673Sjlemon 202172074Srwatsonvoid 203172074Srwatsontcp_timer_2msl(void *xtp) 20450673Sjlemon{ 205172074Srwatson struct tcpcb *tp = xtp; 206172074Srwatson struct inpcb *inp; 207183550Szec CURVNET_SET(tp->t_vnet); 20850673Sjlemon#ifdef TCPDEBUG 20950673Sjlemon int ostate; 21050673Sjlemon 21150673Sjlemon ostate = tp->t_state; 21250673Sjlemon#endif 213157376Srwatson /* 214172074Srwatson * XXXRW: Does this actually happen? 215172074Srwatson */ 216181803Sbz INP_INFO_WLOCK(&V_tcbinfo); 217172074Srwatson inp = tp->t_inpcb; 218172074Srwatson /* 219172074Srwatson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 220172074Srwatson * tear-down mean we need it as a work-around for races between 221172074Srwatson * timers and tcp_discardcb(). 222172074Srwatson * 223172074Srwatson * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL")); 224172074Srwatson */ 225172074Srwatson if (inp == NULL) { 226172074Srwatson tcp_timer_race++; 227181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 228183550Szec CURVNET_RESTORE(); 229172074Srwatson return; 230172074Srwatson } 231178285Srwatson INP_WLOCK(inp); 232172074Srwatson tcp_free_sackholes(tp); 233189848Srwatson if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) || 234172309Ssilby !callout_active(&tp->t_timers->tt_2msl)) { 235178285Srwatson INP_WUNLOCK(tp->t_inpcb); 236181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 237183550Szec CURVNET_RESTORE(); 238172074Srwatson return; 239172074Srwatson } 240172309Ssilby callout_deactivate(&tp->t_timers->tt_2msl); 241172074Srwatson /* 2421541Srgrimes * 2 MSL timeout in shutdown went off. If we're closed but 2431541Srgrimes * still waiting for peer to close and connection has been idle 2441541Srgrimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 2451541Srgrimes * control block. Otherwise, check again in a bit. 246167036Smohans * 247167036Smohans * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 248167036Smohans * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 249167036Smohans * Ignore fact that there were recent incoming segments. 2501541Srgrimes */ 251167036Smohans if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 && 252172074Srwatson tp->t_inpcb && tp->t_inpcb->inp_socket && 253167036Smohans (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { 254190948Srwatson TCPSTAT_INC(tcps_finwait2_drops); 255172074Srwatson tp = tcp_close(tp); 256167036Smohans } else { 257167036Smohans if (tp->t_state != TCPS_TIME_WAIT && 258194305Sjhb ticks - tp->t_rcvtime <= tcp_maxidle) 259205391Skmacy callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl, 260205391Skmacy tcp_timer_2msl, tp, INP_CPU(inp)); 261172074Srwatson else 262172074Srwatson tp = tcp_close(tp); 263172074Srwatson } 2641541Srgrimes 26550673Sjlemon#ifdef TCPDEBUG 266172312Skib if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 26797658Stanimura tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 26897658Stanimura PRU_SLOWTIMO); 26950673Sjlemon#endif 270172074Srwatson if (tp != NULL) 271178285Srwatson INP_WUNLOCK(inp); 272181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 273183550Szec CURVNET_RESTORE(); 27450673Sjlemon} 27550673Sjlemon 276172074Srwatsonvoid 277172074Srwatsontcp_timer_keep(void *xtp) 27850673Sjlemon{ 279172074Srwatson struct tcpcb *tp = xtp; 28078642Ssilby struct tcptemp *t_template; 281172074Srwatson struct inpcb *inp; 282183550Szec CURVNET_SET(tp->t_vnet); 28350673Sjlemon#ifdef TCPDEBUG 28450673Sjlemon int ostate; 28550673Sjlemon 28650673Sjlemon ostate = tp->t_state; 28750673Sjlemon#endif 288181803Sbz INP_INFO_WLOCK(&V_tcbinfo); 289172074Srwatson inp = tp->t_inpcb; 290157376Srwatson /* 291172074Srwatson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 292172074Srwatson * tear-down mean we need it as a work-around for races between 293172074Srwatson * timers and tcp_discardcb(). 294172074Srwatson * 295172074Srwatson * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL")); 296172074Srwatson */ 297172074Srwatson if (inp == NULL) { 298172074Srwatson tcp_timer_race++; 299181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 300183550Szec CURVNET_RESTORE(); 301172074Srwatson return; 302172074Srwatson } 303178285Srwatson INP_WLOCK(inp); 304189848Srwatson if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep) 305172309Ssilby || !callout_active(&tp->t_timers->tt_keep)) { 306178285Srwatson INP_WUNLOCK(inp); 307181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 308183550Szec CURVNET_RESTORE(); 309172074Srwatson return; 310172074Srwatson } 311172309Ssilby callout_deactivate(&tp->t_timers->tt_keep); 312172074Srwatson /* 31350673Sjlemon * Keep-alive timer went off; send something 31450673Sjlemon * or drop connection if idle for too long. 3151541Srgrimes */ 316190948Srwatson TCPSTAT_INC(tcps_keeptimeo); 31750673Sjlemon if (tp->t_state < TCPS_ESTABLISHED) 31850673Sjlemon goto dropit; 319122326Ssam if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && 32050673Sjlemon tp->t_state <= TCPS_CLOSING) { 321194305Sjhb if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle) 32250673Sjlemon goto dropit; 3231541Srgrimes /* 32450673Sjlemon * Send a packet designed to force a response 32550673Sjlemon * if the peer is up and reachable: 32650673Sjlemon * either an ACK if the connection is still alive, 32750673Sjlemon * or an RST if the peer has closed the connection 32850673Sjlemon * due to timeout or reboot. 32950673Sjlemon * Using sequence number tp->snd_una-1 33050673Sjlemon * causes the transmitted zero-length segment 33150673Sjlemon * to lie outside the receive window; 33250673Sjlemon * by the protocol spec, this requires the 33350673Sjlemon * correspondent TCP to respond. 3341541Srgrimes */ 335190948Srwatson TCPSTAT_INC(tcps_keepprobe); 336111144Sjlemon t_template = tcpip_maketemplate(inp); 33778642Ssilby if (t_template) { 33878642Ssilby tcp_respond(tp, t_template->tt_ipgen, 33978642Ssilby &t_template->tt_t, (struct mbuf *)NULL, 34078642Ssilby tp->rcv_nxt, tp->snd_una - 1, 0); 341179487Srwatson free(t_template, M_TEMP); 34278642Ssilby } 343205391Skmacy callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp, INP_CPU(inp)); 34497658Stanimura } else 345205391Skmacy callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp, INP_CPU(inp)); 34650673Sjlemon 34750673Sjlemon#ifdef TCPDEBUG 348122326Ssam if (inp->inp_socket->so_options & SO_DEBUG) 34955679Sshin tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 35050673Sjlemon PRU_SLOWTIMO); 35150673Sjlemon#endif 352178285Srwatson INP_WUNLOCK(inp); 353181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 354183550Szec CURVNET_RESTORE(); 355172074Srwatson return; 35650673Sjlemon 35750673Sjlemondropit: 358190948Srwatson TCPSTAT_INC(tcps_keepdrops); 359172074Srwatson tp = tcp_drop(tp, ETIMEDOUT); 360172074Srwatson 361172074Srwatson#ifdef TCPDEBUG 362172074Srwatson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 363172074Srwatson tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 364172074Srwatson PRU_SLOWTIMO); 365172074Srwatson#endif 366172074Srwatson if (tp != NULL) 367178285Srwatson INP_WUNLOCK(tp->t_inpcb); 368181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 369183550Szec CURVNET_RESTORE(); 37050673Sjlemon} 37150673Sjlemon 372172074Srwatsonvoid 373172074Srwatsontcp_timer_persist(void *xtp) 37450673Sjlemon{ 375172074Srwatson struct tcpcb *tp = xtp; 376172074Srwatson struct inpcb *inp; 377183550Szec CURVNET_SET(tp->t_vnet); 37850673Sjlemon#ifdef TCPDEBUG 37950673Sjlemon int ostate; 38050673Sjlemon 38150673Sjlemon ostate = tp->t_state; 38250673Sjlemon#endif 383181803Sbz INP_INFO_WLOCK(&V_tcbinfo); 384172074Srwatson inp = tp->t_inpcb; 385157376Srwatson /* 386172074Srwatson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 387172074Srwatson * tear-down mean we need it as a work-around for races between 388172074Srwatson * timers and tcp_discardcb(). 389172074Srwatson * 390172074Srwatson * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL")); 391172074Srwatson */ 392172074Srwatson if (inp == NULL) { 393172074Srwatson tcp_timer_race++; 394181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 395183550Szec CURVNET_RESTORE(); 396172074Srwatson return; 397172074Srwatson } 398178285Srwatson INP_WLOCK(inp); 399189848Srwatson if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist) 400172309Ssilby || !callout_active(&tp->t_timers->tt_persist)) { 401178285Srwatson INP_WUNLOCK(inp); 402181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 403183550Szec CURVNET_RESTORE(); 404172074Srwatson return; 405172074Srwatson } 406172309Ssilby callout_deactivate(&tp->t_timers->tt_persist); 407172074Srwatson /* 40850673Sjlemon * Persistance timer into zero window. 40950673Sjlemon * Force a byte to be output, if possible. 41050673Sjlemon */ 411190948Srwatson TCPSTAT_INC(tcps_persisttimeo); 41250673Sjlemon /* 41350673Sjlemon * Hack: if the peer is dead/unreachable, we do not 41450673Sjlemon * time out if the window is closed. After a full 41550673Sjlemon * backoff, drop the connection if the idle time 41650673Sjlemon * (no responses to probes) reaches the maximum 41750673Sjlemon * backoff that we would use if retransmitting. 41850673Sjlemon */ 41950673Sjlemon if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 420194305Sjhb (ticks - tp->t_rcvtime >= tcp_maxpersistidle || 421194305Sjhb ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 422190948Srwatson TCPSTAT_INC(tcps_persistdrop); 423172074Srwatson tp = tcp_drop(tp, ETIMEDOUT); 424172074Srwatson goto out; 42550673Sjlemon } 42650673Sjlemon tcp_setpersist(tp); 427146463Sps tp->t_flags |= TF_FORCEDATA; 42850673Sjlemon (void) tcp_output(tp); 429146463Sps tp->t_flags &= ~TF_FORCEDATA; 43050673Sjlemon 431172074Srwatsonout: 43250673Sjlemon#ifdef TCPDEBUG 433158644Sglebius if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) 434158644Sglebius tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); 43550673Sjlemon#endif 436172074Srwatson if (tp != NULL) 437178285Srwatson INP_WUNLOCK(inp); 438181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 439183550Szec CURVNET_RESTORE(); 44050673Sjlemon} 44150673Sjlemon 442172074Srwatsonvoid 443172074Srwatsontcp_timer_rexmt(void * xtp) 44450673Sjlemon{ 445172074Srwatson struct tcpcb *tp = xtp; 446183550Szec CURVNET_SET(tp->t_vnet); 44750673Sjlemon int rexmt; 448172074Srwatson int headlocked; 449172074Srwatson struct inpcb *inp; 45050673Sjlemon#ifdef TCPDEBUG 45150673Sjlemon int ostate; 45250673Sjlemon 45350673Sjlemon ostate = tp->t_state; 45450673Sjlemon#endif 455205391Skmacy INP_INFO_RLOCK(&V_tcbinfo); 456172074Srwatson inp = tp->t_inpcb; 457172074Srwatson /* 458172074Srwatson * XXXRW: While this assert is in fact correct, bugs in the tcpcb 459172074Srwatson * tear-down mean we need it as a work-around for races between 460172074Srwatson * timers and tcp_discardcb(). 461172074Srwatson * 462172074Srwatson * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL")); 463172074Srwatson */ 464172074Srwatson if (inp == NULL) { 465172074Srwatson tcp_timer_race++; 466205391Skmacy INP_INFO_RUNLOCK(&V_tcbinfo); 467183550Szec CURVNET_RESTORE(); 468172074Srwatson return; 469172074Srwatson } 470178285Srwatson INP_WLOCK(inp); 471189848Srwatson if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt) 472172309Ssilby || !callout_active(&tp->t_timers->tt_rexmt)) { 473178285Srwatson INP_WUNLOCK(inp); 474205391Skmacy INP_INFO_RUNLOCK(&V_tcbinfo); 475183550Szec CURVNET_RESTORE(); 476172074Srwatson return; 477172074Srwatson } 478172309Ssilby callout_deactivate(&tp->t_timers->tt_rexmt); 479130989Sps tcp_free_sackholes(tp); 48050673Sjlemon /* 48150673Sjlemon * Retransmission timer went off. Message has not 48250673Sjlemon * been acked within retransmit interval. Back off 48350673Sjlemon * to a longer retransmit interval and retransmit one segment. 48450673Sjlemon */ 48550673Sjlemon if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 48650673Sjlemon tp->t_rxtshift = TCP_MAXRXTSHIFT; 487190948Srwatson TCPSTAT_INC(tcps_timeoutdrop); 488205391Skmacy in_pcbref(inp); 489217126Sjhb INP_INFO_RUNLOCK(&V_tcbinfo); 490217126Sjhb INP_WUNLOCK(inp); 491217126Sjhb INP_INFO_WLOCK(&V_tcbinfo); 492217126Sjhb INP_WLOCK(inp); 493222488Srwatson if (in_pcbrele_wlocked(inp)) { 494217126Sjhb INP_INFO_WUNLOCK(&V_tcbinfo); 495217126Sjhb CURVNET_RESTORE(); 496217126Sjhb return; 497217126Sjhb } 498226318Snp if (inp->inp_flags & INP_DROPPED) { 499226318Snp INP_WUNLOCK(inp); 500226318Snp INP_INFO_WUNLOCK(&V_tcbinfo); 501226318Snp CURVNET_RESTORE(); 502226318Snp return; 503226318Snp } 504226318Snp 505172074Srwatson tp = tcp_drop(tp, tp->t_softerror ? 506172074Srwatson tp->t_softerror : ETIMEDOUT); 507205391Skmacy headlocked = 1; 508172074Srwatson goto out; 50950673Sjlemon } 510205391Skmacy INP_INFO_RUNLOCK(&V_tcbinfo); 511172074Srwatson headlocked = 0; 51250673Sjlemon if (tp->t_rxtshift == 1) { 51313229Solah /* 51450673Sjlemon * first retransmit; record ssthresh and cwnd so they can 515133874Srwatson * be recovered if this turns out to be a "bad" retransmit. 516133874Srwatson * A retransmit is considered "bad" if an ACK for this 51750673Sjlemon * segment is received within RTT/2 interval; the assumption 518133874Srwatson * here is that the ACK was already in flight. See 51950673Sjlemon * "On Estimating End-to-End Network Path Properties" by 52050673Sjlemon * Allman and Paxson for more details. 5211541Srgrimes */ 52250673Sjlemon tp->snd_cwnd_prev = tp->snd_cwnd; 52350673Sjlemon tp->snd_ssthresh_prev = tp->snd_ssthresh; 524117650Shsu tp->snd_recover_prev = tp->snd_recover; 525215166Slstewart if (IN_FASTRECOVERY(tp->t_flags)) 526215166Slstewart tp->t_flags |= TF_WASFRECOVERY; 527117650Shsu else 528215166Slstewart tp->t_flags &= ~TF_WASFRECOVERY; 529215166Slstewart if (IN_CONGRECOVERY(tp->t_flags)) 530215166Slstewart tp->t_flags |= TF_WASCRECOVERY; 531215166Slstewart else 532215166Slstewart tp->t_flags &= ~TF_WASCRECOVERY; 53350673Sjlemon tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); 534221209Sjhb tp->t_flags |= TF_PREVVALID; 535221209Sjhb } else 536221209Sjhb tp->t_flags &= ~TF_PREVVALID; 537190948Srwatson TCPSTAT_INC(tcps_rexmttimeo); 53873110Sjlemon if (tp->t_state == TCPS_SYN_SENT) 53973110Sjlemon rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; 54073110Sjlemon else 54173110Sjlemon rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 54250673Sjlemon TCPT_RANGESET(tp->t_rxtcur, rexmt, 54350673Sjlemon tp->t_rttmin, TCPTV_REXMTMAX); 54450673Sjlemon /* 545216621Sjhb * Disable rfc1323 if we haven't got any response to 546133874Srwatson * our third SYN to work-around some broken terminal servers 547133874Srwatson * (most of which have hopefully been retired) that have bad VJ 548133874Srwatson * header compression code which trashes TCP segments containing 54977539Sjesper * unknown-to-them TCP options. 55077539Sjesper */ 55177539Sjesper if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) 552137139Sandre tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP); 55377539Sjesper /* 554122922Sandre * If we backed off this far, our srtt estimate is probably bogus. 555122922Sandre * Clobber it so we'll take the next rtt measurement as our srtt; 55650673Sjlemon * move the current srtt into rttvar to keep the current 55750673Sjlemon * retransmit times until then. 55850673Sjlemon */ 55950673Sjlemon if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 56055679Sshin#ifdef INET6 56155679Sshin if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) 56255679Sshin in6_losing(tp->t_inpcb); 56355679Sshin else 56455679Sshin#endif 56550673Sjlemon tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 56650673Sjlemon tp->t_srtt = 0; 56750673Sjlemon } 56850673Sjlemon tp->snd_nxt = tp->snd_una; 569117650Shsu tp->snd_recover = tp->snd_max; 57050673Sjlemon /* 57150673Sjlemon * Force a segment to be sent. 57250673Sjlemon */ 57350673Sjlemon tp->t_flags |= TF_ACKNOW; 57450673Sjlemon /* 57550673Sjlemon * If timing a segment in this window, stop the timer. 57650673Sjlemon */ 57750673Sjlemon tp->t_rtttime = 0; 578215166Slstewart 579216101Slstewart cc_cong_signal(tp, NULL, CC_RTO); 580215166Slstewart 58150673Sjlemon (void) tcp_output(tp); 5821541Srgrimes 583172074Srwatsonout: 58450673Sjlemon#ifdef TCPDEBUG 585157136Srwatson if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) 58697658Stanimura tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, 58797658Stanimura PRU_SLOWTIMO); 5881541Srgrimes#endif 589172074Srwatson if (tp != NULL) 590178285Srwatson INP_WUNLOCK(inp); 591172074Srwatson if (headlocked) 592181803Sbz INP_INFO_WUNLOCK(&V_tcbinfo); 593183550Szec CURVNET_RESTORE(); 5941541Srgrimes} 595172074Srwatson 596172074Srwatsonvoid 597172074Srwatsontcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta) 598172074Srwatson{ 599172074Srwatson struct callout *t_callout; 600172074Srwatson void *f_callout; 601205391Skmacy struct inpcb *inp = tp->t_inpcb; 602205391Skmacy int cpu = INP_CPU(inp); 603172074Srwatson 604172074Srwatson switch (timer_type) { 605172074Srwatson case TT_DELACK: 606172309Ssilby t_callout = &tp->t_timers->tt_delack; 607172074Srwatson f_callout = tcp_timer_delack; 608172074Srwatson break; 609172074Srwatson case TT_REXMT: 610172309Ssilby t_callout = &tp->t_timers->tt_rexmt; 611172074Srwatson f_callout = tcp_timer_rexmt; 612172074Srwatson break; 613172074Srwatson case TT_PERSIST: 614172309Ssilby t_callout = &tp->t_timers->tt_persist; 615172074Srwatson f_callout = tcp_timer_persist; 616172074Srwatson break; 617172074Srwatson case TT_KEEP: 618172309Ssilby t_callout = &tp->t_timers->tt_keep; 619172074Srwatson f_callout = tcp_timer_keep; 620172074Srwatson break; 621172074Srwatson case TT_2MSL: 622172309Ssilby t_callout = &tp->t_timers->tt_2msl; 623172074Srwatson f_callout = tcp_timer_2msl; 624172074Srwatson break; 625172074Srwatson default: 626172074Srwatson panic("bad timer_type"); 627172074Srwatson } 628172074Srwatson if (delta == 0) { 629172074Srwatson callout_stop(t_callout); 630172074Srwatson } else { 631205391Skmacy callout_reset_on(t_callout, delta, f_callout, tp, cpu); 632172074Srwatson } 633172074Srwatson} 634172074Srwatson 635172074Srwatsonint 636172074Srwatsontcp_timer_active(struct tcpcb *tp, int timer_type) 637172074Srwatson{ 638172074Srwatson struct callout *t_callout; 639172074Srwatson 640172074Srwatson switch (timer_type) { 641172074Srwatson case TT_DELACK: 642172309Ssilby t_callout = &tp->t_timers->tt_delack; 643172074Srwatson break; 644172074Srwatson case TT_REXMT: 645172309Ssilby t_callout = &tp->t_timers->tt_rexmt; 646172074Srwatson break; 647172074Srwatson case TT_PERSIST: 648172309Ssilby t_callout = &tp->t_timers->tt_persist; 649172074Srwatson break; 650172074Srwatson case TT_KEEP: 651172309Ssilby t_callout = &tp->t_timers->tt_keep; 652172074Srwatson break; 653172074Srwatson case TT_2MSL: 654172309Ssilby t_callout = &tp->t_timers->tt_2msl; 655172074Srwatson break; 656172074Srwatson default: 657172074Srwatson panic("bad timer_type"); 658172074Srwatson } 659172074Srwatson return callout_active(t_callout); 660172074Srwatson} 661197244Ssilby 662197244Ssilby#define ticks_to_msecs(t) (1000*(t) / hz) 663197244Ssilby 664197244Ssilbyvoid 665197244Ssilbytcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer) 666197244Ssilby{ 667197244Ssilby bzero(xtimer, sizeof(struct xtcp_timer)); 668197244Ssilby if (timer == NULL) 669197244Ssilby return; 670197244Ssilby if (callout_active(&timer->tt_delack)) 671197244Ssilby xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks); 672197244Ssilby if (callout_active(&timer->tt_rexmt)) 673197244Ssilby xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks); 674197244Ssilby if (callout_active(&timer->tt_persist)) 675197244Ssilby xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks); 676197244Ssilby if (callout_active(&timer->tt_keep)) 677197244Ssilby xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks); 678197244Ssilby if (callout_active(&timer->tt_2msl)) 679197244Ssilby xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks); 680197244Ssilby xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); 681197244Ssilby} 682