tcp_timer.c revision 12296
11541Srgrimes/* 211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 3311150Swollman * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 3412296Sphk * $Id: tcp_timer.c,v 1.11 1995/11/09 20:23:07 phk Exp $ 351541Srgrimes */ 361541Srgrimes 371541Srgrimes#ifndef TUBA_INCLUDE 381541Srgrimes#include <sys/param.h> 391541Srgrimes#include <sys/systm.h> 4012172Sphk#include <sys/kernel.h> 4112172Sphk#include <sys/sysctl.h> 421541Srgrimes#include <sys/malloc.h> 431541Srgrimes#include <sys/mbuf.h> 441541Srgrimes#include <sys/socket.h> 451541Srgrimes#include <sys/socketvar.h> 461541Srgrimes#include <sys/protosw.h> 471541Srgrimes#include <sys/errno.h> 487684Sdg#include <sys/queue.h> 491541Srgrimes 5011150Swollman#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 5111150Swollman 521541Srgrimes#include <net/if.h> 531541Srgrimes#include <net/route.h> 541541Srgrimes 551541Srgrimes#include <netinet/in.h> 561541Srgrimes#include <netinet/in_systm.h> 571541Srgrimes#include <netinet/ip.h> 581541Srgrimes#include <netinet/in_pcb.h> 591541Srgrimes#include <netinet/ip_var.h> 601541Srgrimes#include <netinet/tcp.h> 611541Srgrimes#include <netinet/tcp_fsm.h> 621541Srgrimes#include <netinet/tcp_seq.h> 631541Srgrimes#include <netinet/tcp_timer.h> 641541Srgrimes#include <netinet/tcp_var.h> 651541Srgrimes#include <netinet/tcpip.h> 661541Srgrimes 671541Srgrimesint tcp_keepidle = TCPTV_KEEP_IDLE; 6812172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, 6912172Sphk CTLFLAG_RW, &tcp_keepidle , 0, ""); 7012172Sphk 7112296Sphkstatic int tcp_keepintvl = TCPTV_KEEPINTVL; 7212172SphkSYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, 7312172Sphk CTLFLAG_RW, &tcp_keepintvl , 0, ""); 7412172Sphk 7512296Sphkstatic int tcp_keepcnt = TCPTV_KEEPCNT; 7612296Sphk /* max idle probes */ 7712296Sphkstatic int tcp_maxpersistidle = TCPTV_KEEP_IDLE; 7812296Sphk /* max idle time in persist */ 791541Srgrimesint tcp_maxidle; 8011150Swollman#else /* TUBA_INCLUDE */ 8111150Swollman 8212296Sphkstatic int tcp_maxpersistidle; 831541Srgrimes#endif /* TUBA_INCLUDE */ 8411150Swollman 851541Srgrimes/* 861541Srgrimes * Fast timeout routine for processing delayed acks 871541Srgrimes */ 881541Srgrimesvoid 891541Srgrimestcp_fasttimo() 901541Srgrimes{ 911541Srgrimes register struct inpcb *inp; 921541Srgrimes register struct tcpcb *tp; 937684Sdg int s; 941541Srgrimes 957684Sdg s = splnet(); 967684Sdg 977684Sdg for (inp = tcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) { 981541Srgrimes if ((tp = (struct tcpcb *)inp->inp_ppcb) && 991541Srgrimes (tp->t_flags & TF_DELACK)) { 1001541Srgrimes tp->t_flags &= ~TF_DELACK; 1011541Srgrimes tp->t_flags |= TF_ACKNOW; 1021541Srgrimes tcpstat.tcps_delack++; 1031541Srgrimes (void) tcp_output(tp); 1041541Srgrimes } 1057684Sdg } 1061541Srgrimes splx(s); 1071541Srgrimes} 1081541Srgrimes 1091541Srgrimes/* 1101541Srgrimes * Tcp protocol timeout routine called every 500 ms. 1111541Srgrimes * Updates the timers in all active tcb's and 1121541Srgrimes * causes finite state machine actions if timers expire. 1131541Srgrimes */ 1141541Srgrimesvoid 1151541Srgrimestcp_slowtimo() 1161541Srgrimes{ 1171541Srgrimes register struct inpcb *ip, *ipnxt; 1181541Srgrimes register struct tcpcb *tp; 1191541Srgrimes register int i; 1207684Sdg int s; 1211541Srgrimes 1227684Sdg s = splnet(); 1237684Sdg 12411150Swollman tcp_maxidle = tcp_keepcnt * tcp_keepintvl; 1257684Sdg 1267684Sdg ip = tcb.lh_first; 1277684Sdg if (ip == NULL) { 1287684Sdg splx(s); 1297684Sdg return; 1307684Sdg } 1311541Srgrimes /* 1321541Srgrimes * Search through tcb's and update active timers. 1331541Srgrimes */ 1347684Sdg for (; ip != NULL; ip = ipnxt) { 1357684Sdg ipnxt = ip->inp_list.le_next; 1361541Srgrimes tp = intotcpcb(ip); 13711150Swollman if (tp == 0 || tp->t_state == TCPS_LISTEN) 1381541Srgrimes continue; 1391541Srgrimes for (i = 0; i < TCPT_NTIMERS; i++) { 1401541Srgrimes if (tp->t_timer[i] && --tp->t_timer[i] == 0) { 1417770Sdg if (tcp_usrreq(tp->t_inpcb->inp_socket, 1421541Srgrimes PRU_SLOWTIMO, (struct mbuf *)0, 1437770Sdg (struct mbuf *)i, (struct mbuf *)0) == NULL) 1441541Srgrimes goto tpgone; 1451541Srgrimes } 1461541Srgrimes } 1471541Srgrimes tp->t_idle++; 1486283Swollman tp->t_duration++; 1491541Srgrimes if (tp->t_rtt) 1501541Srgrimes tp->t_rtt++; 1511541Srgrimestpgone: 1521541Srgrimes ; 1531541Srgrimes } 1541541Srgrimes tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ 1551541Srgrimes#ifdef TCP_COMPAT_42 1561541Srgrimes if ((int)tcp_iss < 0) 15711150Swollman tcp_iss = TCP_ISSINCR; /* XXX */ 1581541Srgrimes#endif 1591541Srgrimes tcp_now++; /* for timestamps */ 1601541Srgrimes splx(s); 1611541Srgrimes} 1621541Srgrimes#ifndef TUBA_INCLUDE 1631541Srgrimes 1641541Srgrimes/* 1651541Srgrimes * Cancel all timers for TCP tp. 1661541Srgrimes */ 1671541Srgrimesvoid 1681541Srgrimestcp_canceltimers(tp) 1691541Srgrimes struct tcpcb *tp; 1701541Srgrimes{ 1711541Srgrimes register int i; 1721541Srgrimes 1731541Srgrimes for (i = 0; i < TCPT_NTIMERS; i++) 1741541Srgrimes tp->t_timer[i] = 0; 1751541Srgrimes} 1761541Srgrimes 1771541Srgrimesint tcp_backoff[TCP_MAXRXTSHIFT + 1] = 1781541Srgrimes { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 1791541Srgrimes 18012296Sphkstatic int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ 18111150Swollman 1821541Srgrimes/* 1831541Srgrimes * TCP timer processing. 1841541Srgrimes */ 1851541Srgrimesstruct tcpcb * 1861541Srgrimestcp_timers(tp, timer) 1871541Srgrimes register struct tcpcb *tp; 1881541Srgrimes int timer; 1891541Srgrimes{ 1901541Srgrimes register int rexmt; 1911541Srgrimes 1921541Srgrimes switch (timer) { 1931541Srgrimes 1941541Srgrimes /* 1951541Srgrimes * 2 MSL timeout in shutdown went off. If we're closed but 1961541Srgrimes * still waiting for peer to close and connection has been idle 1971541Srgrimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 1981541Srgrimes * control block. Otherwise, check again in a bit. 1991541Srgrimes */ 2001541Srgrimes case TCPT_2MSL: 2011541Srgrimes if (tp->t_state != TCPS_TIME_WAIT && 2021541Srgrimes tp->t_idle <= tcp_maxidle) 2031541Srgrimes tp->t_timer[TCPT_2MSL] = tcp_keepintvl; 2041541Srgrimes else 2051541Srgrimes tp = tcp_close(tp); 2061541Srgrimes break; 2071541Srgrimes 2081541Srgrimes /* 2091541Srgrimes * Retransmission timer went off. Message has not 2101541Srgrimes * been acked within retransmit interval. Back off 2111541Srgrimes * to a longer retransmit interval and retransmit one segment. 2121541Srgrimes */ 2131541Srgrimes case TCPT_REXMT: 2141541Srgrimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 2151541Srgrimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 2161541Srgrimes tcpstat.tcps_timeoutdrop++; 2171541Srgrimes tp = tcp_drop(tp, tp->t_softerror ? 2181541Srgrimes tp->t_softerror : ETIMEDOUT); 2191541Srgrimes break; 2201541Srgrimes } 2211541Srgrimes tcpstat.tcps_rexmttimeo++; 2221541Srgrimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 2231541Srgrimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 2241541Srgrimes tp->t_rttmin, TCPTV_REXMTMAX); 2251541Srgrimes tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 2261541Srgrimes /* 2271541Srgrimes * If losing, let the lower level know and try for 2281541Srgrimes * a better route. Also, if we backed off this far, 2291541Srgrimes * our srtt estimate is probably bogus. Clobber it 2301541Srgrimes * so we'll take the next rtt measurement as our srtt; 2311541Srgrimes * move the current srtt into rttvar to keep the current 2321541Srgrimes * retransmit times until then. 2331541Srgrimes */ 2341541Srgrimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 2351541Srgrimes in_losing(tp->t_inpcb); 2361541Srgrimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 2371541Srgrimes tp->t_srtt = 0; 2381541Srgrimes } 2391541Srgrimes tp->snd_nxt = tp->snd_una; 2401541Srgrimes /* 2411541Srgrimes * If timing a segment in this window, stop the timer. 2421541Srgrimes */ 2431541Srgrimes tp->t_rtt = 0; 2441541Srgrimes /* 2451541Srgrimes * Close the congestion window down to one segment 2461541Srgrimes * (we'll open it by one segment for each ack we get). 2471541Srgrimes * Since we probably have a window's worth of unacked 2481541Srgrimes * data accumulated, this "slow start" keeps us from 2491541Srgrimes * dumping all that data as back-to-back packets (which 2501541Srgrimes * might overwhelm an intermediate gateway). 2511541Srgrimes * 2521541Srgrimes * There are two phases to the opening: Initially we 2531541Srgrimes * open by one mss on each ack. This makes the window 2541541Srgrimes * size increase exponentially with time. If the 2551541Srgrimes * window is larger than the path can handle, this 2561541Srgrimes * exponential growth results in dropped packet(s) 2578876Srgrimes * almost immediately. To get more time between 2581541Srgrimes * drops but still "push" the network to take advantage 2591541Srgrimes * of improving conditions, we switch from exponential 2601541Srgrimes * to linear window opening at some threshhold size. 2611541Srgrimes * For a threshhold, we use half the current window 2621541Srgrimes * size, truncated to a multiple of the mss. 2631541Srgrimes * 2641541Srgrimes * (the minimum cwnd that will give us exponential 2651541Srgrimes * growth is 2 mss. We don't allow the threshhold 2661541Srgrimes * to go below this.) 2671541Srgrimes */ 2681541Srgrimes { 2691541Srgrimes u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 2701541Srgrimes if (win < 2) 2711541Srgrimes win = 2; 2721541Srgrimes tp->snd_cwnd = tp->t_maxseg; 2731541Srgrimes tp->snd_ssthresh = win * tp->t_maxseg; 2741541Srgrimes tp->t_dupacks = 0; 2751541Srgrimes } 2761541Srgrimes (void) tcp_output(tp); 2771541Srgrimes break; 2781541Srgrimes 2791541Srgrimes /* 2801541Srgrimes * Persistance timer into zero window. 2811541Srgrimes * Force a byte to be output, if possible. 2821541Srgrimes */ 2831541Srgrimes case TCPT_PERSIST: 2841541Srgrimes tcpstat.tcps_persisttimeo++; 2859773Sdg /* 2869773Sdg * Hack: if the peer is dead/unreachable, we do not 2879773Sdg * time out if the window is closed. After a full 2889773Sdg * backoff, drop the connection if the idle time 2899773Sdg * (no responses to probes) reaches the maximum 2909773Sdg * backoff that we would use if retransmitting. 2919773Sdg */ 2929773Sdg if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 2939773Sdg (tp->t_idle >= tcp_maxpersistidle || 2949773Sdg tp->t_idle >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 2959773Sdg tcpstat.tcps_persistdrop++; 2969773Sdg tp = tcp_drop(tp, ETIMEDOUT); 2979773Sdg break; 2989773Sdg } 2991541Srgrimes tcp_setpersist(tp); 3001541Srgrimes tp->t_force = 1; 3011541Srgrimes (void) tcp_output(tp); 3021541Srgrimes tp->t_force = 0; 3031541Srgrimes break; 3041541Srgrimes 3051541Srgrimes /* 3061541Srgrimes * Keep-alive timer went off; send something 3071541Srgrimes * or drop connection if idle for too long. 3081541Srgrimes */ 3091541Srgrimes case TCPT_KEEP: 3101541Srgrimes tcpstat.tcps_keeptimeo++; 3111541Srgrimes if (tp->t_state < TCPS_ESTABLISHED) 3121541Srgrimes goto dropit; 3131541Srgrimes if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE && 3141541Srgrimes tp->t_state <= TCPS_CLOSE_WAIT) { 3151541Srgrimes if (tp->t_idle >= tcp_keepidle + tcp_maxidle) 3161541Srgrimes goto dropit; 3171541Srgrimes /* 3181541Srgrimes * Send a packet designed to force a response 3191541Srgrimes * if the peer is up and reachable: 3201541Srgrimes * either an ACK if the connection is still alive, 3211541Srgrimes * or an RST if the peer has closed the connection 3221541Srgrimes * due to timeout or reboot. 3231541Srgrimes * Using sequence number tp->snd_una-1 3241541Srgrimes * causes the transmitted zero-length segment 3251541Srgrimes * to lie outside the receive window; 3261541Srgrimes * by the protocol spec, this requires the 3271541Srgrimes * correspondent TCP to respond. 3281541Srgrimes */ 3291541Srgrimes tcpstat.tcps_keepprobe++; 3301541Srgrimes#ifdef TCP_COMPAT_42 3311541Srgrimes /* 3321541Srgrimes * The keepalive packet must have nonzero length 3331541Srgrimes * to get a 4.2 host to respond. 3341541Srgrimes */ 3351541Srgrimes tcp_respond(tp, tp->t_template, (struct mbuf *)NULL, 3361541Srgrimes tp->rcv_nxt - 1, tp->snd_una - 1, 0); 3371541Srgrimes#else 3381541Srgrimes tcp_respond(tp, tp->t_template, (struct mbuf *)NULL, 3391541Srgrimes tp->rcv_nxt, tp->snd_una - 1, 0); 3401541Srgrimes#endif 3411541Srgrimes tp->t_timer[TCPT_KEEP] = tcp_keepintvl; 3421541Srgrimes } else 3431541Srgrimes tp->t_timer[TCPT_KEEP] = tcp_keepidle; 3441541Srgrimes break; 3451541Srgrimes dropit: 3461541Srgrimes tcpstat.tcps_keepdrops++; 3471541Srgrimes tp = tcp_drop(tp, ETIMEDOUT); 3481541Srgrimes break; 3491541Srgrimes } 3501541Srgrimes return (tp); 3511541Srgrimes} 3521541Srgrimes#endif /* TUBA_INCLUDE */ 353