tcp_timer.c revision 9773
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice, this list of conditions and the following disclaimer. 101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111541Srgrimes * notice, this list of conditions and the following disclaimer in the 121541Srgrimes * documentation and/or other materials provided with the distribution. 131541Srgrimes * 3. All advertising materials mentioning features or use of this software 141541Srgrimes * must display the following acknowledgement: 151541Srgrimes * This product includes software developed by the University of 161541Srgrimes * California, Berkeley and its contributors. 171541Srgrimes * 4. Neither the name of the University nor the names of its contributors 181541Srgrimes * may be used to endorse or promote products derived from this software 191541Srgrimes * without specific prior written permission. 201541Srgrimes * 211541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311541Srgrimes * SUCH DAMAGE. 321541Srgrimes * 331541Srgrimes * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 349773Sdg * $Id: tcp_timer.c,v 1.7 1995/05/30 08:09:59 rgrimes Exp $ 351541Srgrimes */ 361541Srgrimes 371541Srgrimes#ifndef TUBA_INCLUDE 381541Srgrimes#include <sys/param.h> 391541Srgrimes#include <sys/systm.h> 401541Srgrimes#include <sys/malloc.h> 411541Srgrimes#include <sys/mbuf.h> 421541Srgrimes#include <sys/socket.h> 431541Srgrimes#include <sys/socketvar.h> 441541Srgrimes#include <sys/protosw.h> 451541Srgrimes#include <sys/errno.h> 467684Sdg#include <sys/queue.h> 471541Srgrimes 481541Srgrimes#include <net/if.h> 491541Srgrimes#include <net/route.h> 501541Srgrimes 511541Srgrimes#include <netinet/in.h> 521541Srgrimes#include <netinet/in_systm.h> 531541Srgrimes#include <netinet/ip.h> 541541Srgrimes#include <netinet/in_pcb.h> 551541Srgrimes#include <netinet/ip_var.h> 561541Srgrimes#include <netinet/tcp.h> 571541Srgrimes#include <netinet/tcp_fsm.h> 581541Srgrimes#include <netinet/tcp_seq.h> 591541Srgrimes#include <netinet/tcp_timer.h> 601541Srgrimes#include <netinet/tcp_var.h> 611541Srgrimes#include <netinet/tcpip.h> 621541Srgrimes 631541Srgrimesint tcp_keepidle = TCPTV_KEEP_IDLE; 641541Srgrimesint tcp_keepintvl = TCPTV_KEEPINTVL; 651541Srgrimesint tcp_maxidle; 669773Sdgint tcp_maxpersistidle = TCPTV_KEEP_IDLE; 679773Sdgint tcp_totbackoff = 511; 681541Srgrimes#endif /* TUBA_INCLUDE */ 691541Srgrimes/* 701541Srgrimes * Fast timeout routine for processing delayed acks 711541Srgrimes */ 721541Srgrimesvoid 731541Srgrimestcp_fasttimo() 741541Srgrimes{ 751541Srgrimes register struct inpcb *inp; 761541Srgrimes register struct tcpcb *tp; 777684Sdg int s; 781541Srgrimes 797684Sdg s = splnet(); 807684Sdg 817684Sdg for (inp = tcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) { 821541Srgrimes if ((tp = (struct tcpcb *)inp->inp_ppcb) && 831541Srgrimes (tp->t_flags & TF_DELACK)) { 841541Srgrimes tp->t_flags &= ~TF_DELACK; 851541Srgrimes tp->t_flags |= TF_ACKNOW; 861541Srgrimes tcpstat.tcps_delack++; 871541Srgrimes (void) tcp_output(tp); 881541Srgrimes } 897684Sdg } 901541Srgrimes splx(s); 911541Srgrimes} 921541Srgrimes 931541Srgrimes/* 941541Srgrimes * Tcp protocol timeout routine called every 500 ms. 951541Srgrimes * Updates the timers in all active tcb's and 961541Srgrimes * causes finite state machine actions if timers expire. 971541Srgrimes */ 981541Srgrimesvoid 991541Srgrimestcp_slowtimo() 1001541Srgrimes{ 1011541Srgrimes register struct inpcb *ip, *ipnxt; 1021541Srgrimes register struct tcpcb *tp; 1031541Srgrimes register int i; 1047684Sdg int s; 1051541Srgrimes 1067684Sdg s = splnet(); 1077684Sdg 1081541Srgrimes tcp_maxidle = TCPTV_KEEPCNT * tcp_keepintvl; 1097684Sdg 1107684Sdg ip = tcb.lh_first; 1117684Sdg if (ip == NULL) { 1127684Sdg splx(s); 1137684Sdg return; 1147684Sdg } 1151541Srgrimes /* 1161541Srgrimes * Search through tcb's and update active timers. 1171541Srgrimes */ 1187684Sdg for (; ip != NULL; ip = ipnxt) { 1197684Sdg ipnxt = ip->inp_list.le_next; 1201541Srgrimes tp = intotcpcb(ip); 1211541Srgrimes if (tp == 0) 1221541Srgrimes continue; 1231541Srgrimes for (i = 0; i < TCPT_NTIMERS; i++) { 1241541Srgrimes if (tp->t_timer[i] && --tp->t_timer[i] == 0) { 1257770Sdg if (tcp_usrreq(tp->t_inpcb->inp_socket, 1261541Srgrimes PRU_SLOWTIMO, (struct mbuf *)0, 1277770Sdg (struct mbuf *)i, (struct mbuf *)0) == NULL) 1281541Srgrimes goto tpgone; 1291541Srgrimes } 1301541Srgrimes } 1311541Srgrimes tp->t_idle++; 1326283Swollman tp->t_duration++; 1331541Srgrimes if (tp->t_rtt) 1341541Srgrimes tp->t_rtt++; 1351541Srgrimestpgone: 1361541Srgrimes ; 1371541Srgrimes } 1381541Srgrimes tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ 1391541Srgrimes#ifdef TCP_COMPAT_42 1401541Srgrimes if ((int)tcp_iss < 0) 1411541Srgrimes tcp_iss = 0; /* XXX */ 1421541Srgrimes#endif 1431541Srgrimes tcp_now++; /* for timestamps */ 1441541Srgrimes splx(s); 1451541Srgrimes} 1461541Srgrimes#ifndef TUBA_INCLUDE 1471541Srgrimes 1481541Srgrimes/* 1491541Srgrimes * Cancel all timers for TCP tp. 1501541Srgrimes */ 1511541Srgrimesvoid 1521541Srgrimestcp_canceltimers(tp) 1531541Srgrimes struct tcpcb *tp; 1541541Srgrimes{ 1551541Srgrimes register int i; 1561541Srgrimes 1571541Srgrimes for (i = 0; i < TCPT_NTIMERS; i++) 1581541Srgrimes tp->t_timer[i] = 0; 1591541Srgrimes} 1601541Srgrimes 1611541Srgrimesint tcp_backoff[TCP_MAXRXTSHIFT + 1] = 1621541Srgrimes { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 1631541Srgrimes 1641541Srgrimes/* 1651541Srgrimes * TCP timer processing. 1661541Srgrimes */ 1671541Srgrimesstruct tcpcb * 1681541Srgrimestcp_timers(tp, timer) 1691541Srgrimes register struct tcpcb *tp; 1701541Srgrimes int timer; 1711541Srgrimes{ 1721541Srgrimes register int rexmt; 1731541Srgrimes 1741541Srgrimes switch (timer) { 1751541Srgrimes 1761541Srgrimes /* 1771541Srgrimes * 2 MSL timeout in shutdown went off. If we're closed but 1781541Srgrimes * still waiting for peer to close and connection has been idle 1791541Srgrimes * too long, or if 2MSL time is up from TIME_WAIT, delete connection 1801541Srgrimes * control block. Otherwise, check again in a bit. 1811541Srgrimes */ 1821541Srgrimes case TCPT_2MSL: 1831541Srgrimes if (tp->t_state != TCPS_TIME_WAIT && 1841541Srgrimes tp->t_idle <= tcp_maxidle) 1851541Srgrimes tp->t_timer[TCPT_2MSL] = tcp_keepintvl; 1861541Srgrimes else 1871541Srgrimes tp = tcp_close(tp); 1881541Srgrimes break; 1891541Srgrimes 1901541Srgrimes /* 1911541Srgrimes * Retransmission timer went off. Message has not 1921541Srgrimes * been acked within retransmit interval. Back off 1931541Srgrimes * to a longer retransmit interval and retransmit one segment. 1941541Srgrimes */ 1951541Srgrimes case TCPT_REXMT: 1961541Srgrimes if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { 1971541Srgrimes tp->t_rxtshift = TCP_MAXRXTSHIFT; 1981541Srgrimes tcpstat.tcps_timeoutdrop++; 1991541Srgrimes tp = tcp_drop(tp, tp->t_softerror ? 2001541Srgrimes tp->t_softerror : ETIMEDOUT); 2011541Srgrimes break; 2021541Srgrimes } 2031541Srgrimes tcpstat.tcps_rexmttimeo++; 2041541Srgrimes rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; 2051541Srgrimes TCPT_RANGESET(tp->t_rxtcur, rexmt, 2061541Srgrimes tp->t_rttmin, TCPTV_REXMTMAX); 2071541Srgrimes tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; 2081541Srgrimes /* 2091541Srgrimes * If losing, let the lower level know and try for 2101541Srgrimes * a better route. Also, if we backed off this far, 2111541Srgrimes * our srtt estimate is probably bogus. Clobber it 2121541Srgrimes * so we'll take the next rtt measurement as our srtt; 2131541Srgrimes * move the current srtt into rttvar to keep the current 2141541Srgrimes * retransmit times until then. 2151541Srgrimes */ 2161541Srgrimes if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { 2171541Srgrimes in_losing(tp->t_inpcb); 2181541Srgrimes tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); 2191541Srgrimes tp->t_srtt = 0; 2201541Srgrimes } 2211541Srgrimes tp->snd_nxt = tp->snd_una; 2221541Srgrimes /* 2236283Swollman * Force a segment to be sent. 2246283Swollman */ 2256283Swollman tp->t_flags |= TF_ACKNOW; 2266283Swollman /* 2271541Srgrimes * If timing a segment in this window, stop the timer. 2281541Srgrimes */ 2291541Srgrimes tp->t_rtt = 0; 2301541Srgrimes /* 2311541Srgrimes * Close the congestion window down to one segment 2321541Srgrimes * (we'll open it by one segment for each ack we get). 2331541Srgrimes * Since we probably have a window's worth of unacked 2341541Srgrimes * data accumulated, this "slow start" keeps us from 2351541Srgrimes * dumping all that data as back-to-back packets (which 2361541Srgrimes * might overwhelm an intermediate gateway). 2371541Srgrimes * 2381541Srgrimes * There are two phases to the opening: Initially we 2391541Srgrimes * open by one mss on each ack. This makes the window 2401541Srgrimes * size increase exponentially with time. If the 2411541Srgrimes * window is larger than the path can handle, this 2421541Srgrimes * exponential growth results in dropped packet(s) 2438876Srgrimes * almost immediately. To get more time between 2441541Srgrimes * drops but still "push" the network to take advantage 2451541Srgrimes * of improving conditions, we switch from exponential 2461541Srgrimes * to linear window opening at some threshhold size. 2471541Srgrimes * For a threshhold, we use half the current window 2481541Srgrimes * size, truncated to a multiple of the mss. 2491541Srgrimes * 2501541Srgrimes * (the minimum cwnd that will give us exponential 2511541Srgrimes * growth is 2 mss. We don't allow the threshhold 2521541Srgrimes * to go below this.) 2531541Srgrimes */ 2541541Srgrimes { 2551541Srgrimes u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; 2561541Srgrimes if (win < 2) 2571541Srgrimes win = 2; 2581541Srgrimes tp->snd_cwnd = tp->t_maxseg; 2591541Srgrimes tp->snd_ssthresh = win * tp->t_maxseg; 2601541Srgrimes tp->t_dupacks = 0; 2611541Srgrimes } 2621541Srgrimes (void) tcp_output(tp); 2631541Srgrimes break; 2641541Srgrimes 2651541Srgrimes /* 2661541Srgrimes * Persistance timer into zero window. 2671541Srgrimes * Force a byte to be output, if possible. 2681541Srgrimes */ 2691541Srgrimes case TCPT_PERSIST: 2701541Srgrimes tcpstat.tcps_persisttimeo++; 2719773Sdg /* 2729773Sdg * Hack: if the peer is dead/unreachable, we do not 2739773Sdg * time out if the window is closed. After a full 2749773Sdg * backoff, drop the connection if the idle time 2759773Sdg * (no responses to probes) reaches the maximum 2769773Sdg * backoff that we would use if retransmitting. 2779773Sdg */ 2789773Sdg if (tp->t_rxtshift == TCP_MAXRXTSHIFT && 2799773Sdg (tp->t_idle >= tcp_maxpersistidle || 2809773Sdg tp->t_idle >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { 2819773Sdg tcpstat.tcps_persistdrop++; 2829773Sdg tp = tcp_drop(tp, ETIMEDOUT); 2839773Sdg break; 2849773Sdg } 2851541Srgrimes tcp_setpersist(tp); 2861541Srgrimes tp->t_force = 1; 2871541Srgrimes (void) tcp_output(tp); 2881541Srgrimes tp->t_force = 0; 2891541Srgrimes break; 2901541Srgrimes 2911541Srgrimes /* 2921541Srgrimes * Keep-alive timer went off; send something 2931541Srgrimes * or drop connection if idle for too long. 2941541Srgrimes */ 2951541Srgrimes case TCPT_KEEP: 2961541Srgrimes tcpstat.tcps_keeptimeo++; 2971541Srgrimes if (tp->t_state < TCPS_ESTABLISHED) 2981541Srgrimes goto dropit; 2991541Srgrimes if (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE && 3001541Srgrimes tp->t_state <= TCPS_CLOSE_WAIT) { 3011541Srgrimes if (tp->t_idle >= tcp_keepidle + tcp_maxidle) 3021541Srgrimes goto dropit; 3031541Srgrimes /* 3041541Srgrimes * Send a packet designed to force a response 3051541Srgrimes * if the peer is up and reachable: 3061541Srgrimes * either an ACK if the connection is still alive, 3071541Srgrimes * or an RST if the peer has closed the connection 3081541Srgrimes * due to timeout or reboot. 3091541Srgrimes * Using sequence number tp->snd_una-1 3101541Srgrimes * causes the transmitted zero-length segment 3111541Srgrimes * to lie outside the receive window; 3121541Srgrimes * by the protocol spec, this requires the 3131541Srgrimes * correspondent TCP to respond. 3141541Srgrimes */ 3151541Srgrimes tcpstat.tcps_keepprobe++; 3161541Srgrimes#ifdef TCP_COMPAT_42 3171541Srgrimes /* 3181541Srgrimes * The keepalive packet must have nonzero length 3191541Srgrimes * to get a 4.2 host to respond. 3201541Srgrimes */ 3211541Srgrimes tcp_respond(tp, tp->t_template, (struct mbuf *)NULL, 3221541Srgrimes tp->rcv_nxt - 1, tp->snd_una - 1, 0); 3231541Srgrimes#else 3241541Srgrimes tcp_respond(tp, tp->t_template, (struct mbuf *)NULL, 3251541Srgrimes tp->rcv_nxt, tp->snd_una - 1, 0); 3261541Srgrimes#endif 3271541Srgrimes tp->t_timer[TCPT_KEEP] = tcp_keepintvl; 3281541Srgrimes } else 3291541Srgrimes tp->t_timer[TCPT_KEEP] = tcp_keepidle; 3301541Srgrimes break; 3311541Srgrimes dropit: 3321541Srgrimes tcpstat.tcps_keepdrops++; 3331541Srgrimes tp = tcp_drop(tp, ETIMEDOUT); 3341541Srgrimes break; 3351541Srgrimes } 3361541Srgrimes return (tp); 3371541Srgrimes} 3381541Srgrimes#endif /* TUBA_INCLUDE */ 339