tcp_timer.c revision 46381
11541Srgrimes/* 211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 3311150Swollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 3446381Sbillf * $Id: tcp_timer.c,v 1.28 1998/04/24 09:25:35 dg Exp $ 351541Srgrimes */ 361541Srgrimes 3732752Seivind#include "opt_compat.h" 3829514Sjoerg#include "opt_tcpdebug.h" 3929514Sjoerg 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 4212172Sphk#include <sys/kernel.h> 4312172Sphk#include <sys/sysctl.h> 441541Srgrimes#include <sys/socket.h> 451541Srgrimes#include <sys/socketvar.h> 461541Srgrimes#include <sys/protosw.h> 471541Srgrimes 4811150Swollman#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 4911150Swollman 501541Srgrimes#include <net/route.h> 511541Srgrimes 521541Srgrimes#include <netinet/in.h> 531541Srgrimes#include <netinet/in_systm.h> 541541Srgrimes#include <netinet/in_pcb.h> 551541Srgrimes#include <netinet/ip_var.h> 561541Srgrimes#include <netinet/tcp.h> 571541Srgrimes#include <netinet/tcp_fsm.h> 581541Srgrimes#include <netinet/tcp_seq.h> 591541Srgrimes#include <netinet/tcp_timer.h> 601541Srgrimes#include <netinet/tcp_var.h> 611541Srgrimes#include <netinet/tcpip.h> 6217138Sdg#ifdef TCPDEBUG 6317138Sdg#include <netinet/tcp_debug.h> 6417138Sdg#endif 651541Srgrimes 6618280Spstint tcp_keepinit = TCPTV_KEEP_INIT; 6718280SpstSYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, 6818280Spst CTLFLAG_RW, &tcp_keepinit , 0, ""); 6918280Spst 701541Srgrimesint tcp_keepidle = TCPTV_KEEP_IDLE; 7112172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, 7212172Sphk CTLFLAG_RW, &tcp_keepidle , 0, ""); 7312172Sphk 7412296Sphkstatic int tcp_keepintvl = TCPTV_KEEPINTVL; 7512172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, 7612172Sphk CTLFLAG_RW, &tcp_keepintvl , 0, ""); 7712172Sphk 7815039Sphkstatic int always_keepalive = 0; 7946381SbillfSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, 8046381Sbillf &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); 8115039Sphk 8212296Sphkstatic int tcp_keepcnt = TCPTV_KEEPCNT; 8312296Sphk /* max idle probes */ 8412296Sphkstatic int tcp_maxpersistidle = TCPTV_KEEP_IDLE; 8512296Sphk /* max idle time in persist */ 861541Srgrimesint tcp_maxidle; 8711150Swollman 881541Srgrimes/* 891541Srgrimes * Fast timeout routine for processing delayed acks 901541Srgrimes */ 911541Srgrimesvoid 921541Srgrimestcp_fasttimo() 931541Srgrimes{ 941541Srgrimes register struct inpcb *inp; 951541Srgrimes register struct tcpcb *tp; 967684Sdg int s; 971541Srgrimes 9833846Sdg if (tcp_delack_enabled) { 9933846Sdg s = splnet(); 10033846Sdg for (inp = tcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) { 10133846Sdg if ((tp = (struct tcpcb *)inp->inp_ppcb) && 10233846Sdg (tp->t_flags & TF_DELACK)) { 10333846Sdg tp->t_flags &= ~TF_DELACK; 10433846Sdg tp->t_flags |= TF_ACKNOW; 10533846Sdg tcpstat.tcps_delack++; 10633846Sdg (void) tcp_output(tp); 10733846Sdg } 1081541Srgrimes } 10933846Sdg splx(s); 1107684Sdg } 1111541Srgrimes} 1121541Srgrimes 1131541Srgrimes/* 1141541Srgrimes * Tcp protocol timeout routine called every 500 ms. 1151541Srgrimes * Updates the timers in all active tcb's and 1161541Srgrimes * causes finite state machine actions if timers expire. 1171541Srgrimes */ 1181541Srgrimesvoid 1191541Srgrimestcp_slowtimo() 1201541Srgrimes{ 1211541Srgrimes register struct inpcb *ip, *ipnxt; 1221541Srgrimes register struct tcpcb *tp; 1231541Srgrimes register int i; 1247684Sdg int s; 12517096Swollman#ifdef TCPDEBUG 12617096Swollman int ostate; 12717096Swollman#endif 1281541Srgrimes 1297684Sdg s = splnet(); 1307684Sdg 13111150Swollman tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 1327684Sdg 1337684Sdg ip = tcb.lh_first; 1347684Sdg if (ip == NULL) { 1357684Sdg splx(s); 1367684Sdg return; 1377684Sdg } 1381541Srgrimes /* 1391541Srgrimes * Search through tcb's and update active timers. 1401541Srgrimes */ 1417684Sdg for (; ip != NULL; ip = ipnxt) { 1427684Sdg ipnxt = ip->inp_list.le_next; 1431541Srgrimes tp = intotcpcb(ip); 14411150Swollman if (tp == 0 || tp->t_state == TCPS_LISTEN) 1451541Srgrimes continue; 1461541Srgrimes for (i = 0; i < TCPT_NTIMERS; i++) { 1471541Srgrimes if (tp->t_timer[i] && --tp->t_timer[i] == 0) { 14817096Swollman#ifdef TCPDEBUG 14917096Swollman ostate = tp->t_state; 15017096Swollman#endif 15117096Swollman tp = tcp_timers(tp, i); 15217096Swollman if (tp == NULL) 1531541Srgrimes goto tpgone; 15417096Swollman#ifdef TCPDEBUG 15517096Swollman if (tp->t_inpcb->inp_socket->so_options 15617096Swollman & SO_DEBUG) 15717096Swollman tcp_trace(TA_USER, ostate, tp, 15817096Swollman (struct tcpiphdr *)0, 15917096Swollman PRU_SLOWTIMO); 16017096Swollman#endif 1611541Srgrimes } 1621541Srgrimes } 1631541Srgrimes tp->t_idle++; 1646283Swollman tp->t_duration++; 1651541Srgrimes if (tp->t_rtt) 1661541Srgrimes tp->t_rtt++; 1671541Srgrimestpgone: 1681541Srgrimes ; 1691541Srgrimes } 1701541Srgrimes tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ 1711541Srgrimes#ifdef TCP_COMPAT_42 1721541Srgrimes if ((int)tcp_iss < 0) 17311150Swollman tcp_iss = TCP_ISSINCR; /* XXX */ 1741541Srgrimes#endif 1751541Srgrimes tcp_now++; /* for timestamps */ 1761541Srgrimes splx(s); 1771541Srgrimes} 1781541Srgrimes 1791541Srgrimes/* 1801541Srgrimes * Cancel all timers for TCP tp. 1811541Srgrimes */ 1821541Srgrimesvoid 1831541Srgrimestcp_canceltimers(tp) 1841541Srgrimes struct tcpcb *tp; 1851541Srgrimes{ 1861541Srgrimes register int i; 1871541Srgrimes 1881541Srgrimes for (i = 0; i < TCPT_NTIMERS; i++) 1891541Srgrimes tp->t_timer[i] = 0; 1901541Srgrimes} 1911541Srgrimes 1921541Srgrimesint tcp_backoff[TCP_MAXRXTSHIFT + 1] = 1931541Srgrimes { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 1941541Srgrimes 19512296Sphkstatic int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 19611150Swollman 1971541Srgrimes/* 1981541Srgrimes * TCP timer processing. 1991541Srgrimes */ 2001541Srgrimesstruct tcpcb * 2011541Srgrimestcp_timers(tp, timer) 2021541Srgrimes register struct tcpcb *tp; 2031541Srgrimes int timer; 2041541Srgrimes{ 2051541Srgrimes register int rexmt; 2061541Srgrimes 2071541Srgrimes switch (timer) { 2081541Srgrimes 2091541Srgrimes /* 2101541Srgrimes * 2 MSL timeout in shutdown went off. If we're closed but 2111541Srgrimes * still waiting for peer to close and connection has been idle 2121541Srgrimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 2131541Srgrimes * control block. Otherwise, check again in a bit. 2141541Srgrimes */ 2151541Srgrimes case TCPT_2MSL: 2161541Srgrimes if (tp->t_state != TCPS_TIME_WAIT && 2171541Srgrimes tp->t_idle <= tcp_maxidle) 2181541Srgrimes tp->t_timer[TCPT_2MSL] = tcp_keepintvl; 2191541Srgrimes else 2201541Srgrimes tp = tcp_close(tp); 2211541Srgrimes break; 2221541Srgrimes 2231541Srgrimes /* 2241541Srgrimes * Retransmission timer went off. Message has not 2251541Srgrimes * been acked within retransmit interval. Back off 2261541Srgrimes * to a longer retransmit interval and retransmit one segment. 2271541Srgrimes */ 2281541Srgrimes case TCPT_REXMT: 2291541Srgrimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 2301541Srgrimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 2311541Srgrimes tcpstat.tcps_timeoutdrop++; 2321541Srgrimes tp = tcp_drop(tp, tp->t_softerror ? 2331541Srgrimes tp->t_softerror : ETIMEDOUT); 2341541Srgrimes break; 2351541Srgrimes } 2361541Srgrimes tcpstat.tcps_rexmttimeo++; 2371541Srgrimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 2381541Srgrimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 2391541Srgrimes tp->t_rttmin, TCPTV_REXMTMAX); 2401541Srgrimes tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 2411541Srgrimes /* 2421541Srgrimes * If losing, let the lower level know and try for 2431541Srgrimes * a better route. Also, if we backed off this far, 2441541Srgrimes * our srtt estimate is probably bogus. Clobber it 2451541Srgrimes * so we'll take the next rtt measurement as our srtt; 2461541Srgrimes * move the current srtt into rttvar to keep the current 2471541Srgrimes * retransmit times until then. 2481541Srgrimes */ 2491541Srgrimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 2501541Srgrimes in_losing(tp->t_inpcb); 2511541Srgrimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 2521541Srgrimes tp->t_srtt = 0; 2531541Srgrimes } 2541541Srgrimes tp->snd_nxt = tp->snd_una; 2551541Srgrimes /* 25613229Solah * Force a segment to be sent. 25713229Solah */ 25813229Solah tp->t_flags |= TF_ACKNOW; 25913229Solah /* 2601541Srgrimes * If timing a segment in this window, stop the timer. 2611541Srgrimes */ 2621541Srgrimes tp->t_rtt = 0; 2631541Srgrimes /* 2641541Srgrimes * Close the congestion window down to one segment 2651541Srgrimes * (we'll open it by one segment for each ack we get). 2661541Srgrimes * Since we probably have a window's worth of unacked 2671541Srgrimes * data accumulated, this "slow start" keeps us from 2681541Srgrimes * dumping all that data as back-to-back packets (which 2691541Srgrimes * might overwhelm an intermediate gateway). 2701541Srgrimes * 2711541Srgrimes * There are two phases to the opening: Initially we 2721541Srgrimes * open by one mss on each ack. This makes the window 2731541Srgrimes * size increase exponentially with time. If the 2741541Srgrimes * window is larger than the path can handle, this 2751541Srgrimes * exponential growth results in dropped packet(s) 2768876Srgrimes * almost immediately. To get more time between 2771541Srgrimes * drops but still "push" the network to take advantage 2781541Srgrimes * of improving conditions, we switch from exponential 2791541Srgrimes * to linear window opening at some threshhold size. 2801541Srgrimes * For a threshhold, we use half the current window 2811541Srgrimes * size, truncated to a multiple of the mss. 2821541Srgrimes * 2831541Srgrimes * (the minimum cwnd that will give us exponential 2841541Srgrimes * growth is 2 mss. We don't allow the threshhold 2851541Srgrimes * to go below this.) 2861541Srgrimes */ 2871541Srgrimes { 2881541Srgrimes u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 2891541Srgrimes if (win < 2) 2901541Srgrimes win = 2; 2911541Srgrimes tp->snd_cwnd = tp->t_maxseg; 2921541Srgrimes tp->snd_ssthresh = win * tp->t_maxseg; 2931541Srgrimes tp->t_dupacks = 0; 2941541Srgrimes } 2951541Srgrimes (void) tcp_output(tp); 2961541Srgrimes break; 2971541Srgrimes 2981541Srgrimes /* 2991541Srgrimes * Persistance timer into zero window. 3001541Srgrimes * Force a byte to be output, if possible. 3011541Srgrimes */ 3021541Srgrimes case TCPT_PERSIST: 3031541Srgrimes tcpstat.tcps_persisttimeo++; 3049773Sdg /* 3059773Sdg * Hack: if the peer is dead/unreachable, we do not 3069773Sdg * time out if the window is closed. After a full 3079773Sdg * backoff, drop the connection if the idle time 3089773Sdg * (no responses to probes) reaches the maximum 3099773Sdg * backoff that we would use if retransmitting. 3109773Sdg */ 31135419Sdg if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 31235419Sdg (tp->t_idle >= tcp_maxpersistidle || 31335419Sdg tp->t_idle >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 31435419Sdg tcpstat.tcps_persistdrop++; 31535419Sdg tp = tcp_drop(tp, ETIMEDOUT); 31635419Sdg break; 3179773Sdg } 3181541Srgrimes tcp_setpersist(tp); 3191541Srgrimes tp->t_force = 1; 3201541Srgrimes (void) tcp_output(tp); 3211541Srgrimes tp->t_force = 0; 3221541Srgrimes break; 3231541Srgrimes 3241541Srgrimes /* 3251541Srgrimes * Keep-alive timer went off; send something 3261541Srgrimes * or drop connection if idle for too long. 3271541Srgrimes */ 3281541Srgrimes case TCPT_KEEP: 3291541Srgrimes tcpstat.tcps_keeptimeo++; 3301541Srgrimes if (tp->t_state < TCPS_ESTABLISHED) 3311541Srgrimes goto dropit; 33215039Sphk if ((always_keepalive || 33315039Sphk tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && 33415262Sdg tp->t_state <= TCPS_CLOSING) { 3351541Srgrimes if (tp->t_idle >= tcp_keepidle + tcp_maxidle) 3361541Srgrimes goto dropit; 3371541Srgrimes /* 3381541Srgrimes * Send a packet designed to force a response 3391541Srgrimes * if the peer is up and reachable: 3401541Srgrimes * either an ACK if the connection is still alive, 3411541Srgrimes * or an RST if the peer has closed the connection 3421541Srgrimes * due to timeout or reboot. 3431541Srgrimes * Using sequence number tp->snd_una-1 3441541Srgrimes * causes the transmitted zero-length segment 3451541Srgrimes * to lie outside the receive window; 3461541Srgrimes * by the protocol spec, this requires the 3471541Srgrimes * correspondent TCP to respond. 3481541Srgrimes */ 3491541Srgrimes tcpstat.tcps_keepprobe++; 3501541Srgrimes#ifdef TCP_COMPAT_42 3511541Srgrimes /* 3521541Srgrimes * The keepalive packet must have nonzero length 3531541Srgrimes * to get a 4.2 host to respond. 3541541Srgrimes */ 3551541Srgrimes tcp_respond(tp, tp->t_template, (struct mbuf *)NULL, 3561541Srgrimes tp->rcv_nxt - 1, tp->snd_una - 1, 0); 3571541Srgrimes#else 3581541Srgrimes tcp_respond(tp, tp->t_template, (struct mbuf *)NULL, 3591541Srgrimes tp->rcv_nxt, tp->snd_una - 1, 0); 3601541Srgrimes#endif 3611541Srgrimes tp->t_timer[TCPT_KEEP] = tcp_keepintvl; 3621541Srgrimes } else 3631541Srgrimes tp->t_timer[TCPT_KEEP] = tcp_keepidle; 3641541Srgrimes break; 3651541Srgrimes dropit: 3661541Srgrimes tcpstat.tcps_keepdrops++; 3671541Srgrimes tp = tcp_drop(tp, ETIMEDOUT); 3681541Srgrimes break; 3691541Srgrimes } 3701541Srgrimes return (tp); 3711541Srgrimes} 372