tcp_timer.c revision 226318
1139823Simp/*-
211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
2911150Swollman *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
301541Srgrimes */
311541Srgrimes
32172467Ssilby#include <sys/cdefs.h>
33172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 226318 2011-10-12 19:52:23Z np $");
34172467Ssilby
3555679Sshin#include "opt_inet6.h"
3629514Sjoerg#include "opt_tcpdebug.h"
3729514Sjoerg
381541Srgrimes#include <sys/param.h>
3912172Sphk#include <sys/kernel.h>
40102967Sbde#include <sys/lock.h>
4178642Ssilby#include <sys/mbuf.h>
42102967Sbde#include <sys/mutex.h>
43102967Sbde#include <sys/protosw.h>
44205391Skmacy#include <sys/smp.h>
451541Srgrimes#include <sys/socket.h>
461541Srgrimes#include <sys/socketvar.h>
47102967Sbde#include <sys/sysctl.h>
48102967Sbde#include <sys/systm.h>
491541Srgrimes
50185571Sbz#include <net/if.h>
511541Srgrimes#include <net/route.h>
52196019Srwatson#include <net/vnet.h>
531541Srgrimes
54215166Slstewart#include <netinet/cc.h>
551541Srgrimes#include <netinet/in.h>
56102967Sbde#include <netinet/in_pcb.h>
571541Srgrimes#include <netinet/in_systm.h>
5855679Sshin#ifdef INET6
5955679Sshin#include <netinet6/in6_pcb.h>
6055679Sshin#endif
611541Srgrimes#include <netinet/ip_var.h>
621541Srgrimes#include <netinet/tcp_fsm.h>
631541Srgrimes#include <netinet/tcp_timer.h>
641541Srgrimes#include <netinet/tcp_var.h>
651541Srgrimes#include <netinet/tcpip.h>
6617138Sdg#ifdef TCPDEBUG
6717138Sdg#include <netinet/tcp_debug.h>
6817138Sdg#endif
691541Srgrimes
7050673Sjlemonint	tcp_keepinit;
7150682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
72180631Strhodes    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
7318280Spst
7450673Sjlemonint	tcp_keepidle;
7550682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
76180631Strhodes    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
7712172Sphk
7850673Sjlemonint	tcp_keepintvl;
7950682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
80180631Strhodes    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
8112172Sphk
8250673Sjlemonint	tcp_delacktime;
83167721SandreSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
84167721Sandre    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
8550682Sjlemon    "Time before a delayed ACK is sent");
86133874Srwatson
8750673Sjlemonint	tcp_msl;
8850682SjlemonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
8950682Sjlemon    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
9050673Sjlemon
91100335Sdillonint	tcp_rexmit_min;
92100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
93167721Sandre    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
94167721Sandre    "Minimum Retransmission Timeout");
95100335Sdillon
96100335Sdillonint	tcp_rexmit_slop;
97100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
98167721Sandre    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
99167721Sandre    "Retransmission Timer Slop");
100100335Sdillon
10187499Srwatsonstatic int	always_keepalive = 1;
102133874SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
10346381Sbillf    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
10415039Sphk
105167036Smohansint    tcp_fast_finwait2_recycle = 0;
106167036SmohansSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
107167721Sandre    &tcp_fast_finwait2_recycle, 0,
108167721Sandre    "Recycle closed FIN_WAIT_2 connections faster");
109167036Smohans
110167036Smohansint    tcp_finwait2_timeout;
111167036SmohansSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
112167721Sandre    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
113167036Smohans
114167036Smohans
11512296Sphkstatic int	tcp_keepcnt = TCPTV_KEEPCNT;
11612296Sphk	/* max idle probes */
11750673Sjlemonint	tcp_maxpersistidle;
11812296Sphk	/* max idle time in persist */
1191541Srgrimesint	tcp_maxidle;
12011150Swollman
121205391Skmacystatic int	per_cpu_timers = 0;
122205391SkmacySYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
123205391Skmacy    &per_cpu_timers , 0, "run tcp timers on all cpus");
124205391Skmacy
125205391Skmacy#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
126205391Skmacy		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
127205391Skmacy
1281541Srgrimes/*
1291541Srgrimes * Tcp protocol timeout routine called every 500 ms.
13050673Sjlemon * Updates timestamps used for TCP
1311541Srgrimes * causes finite state machine actions if timers expire.
1321541Srgrimes */
1331541Srgrimesvoid
134172309Ssilbytcp_slowtimo(void)
1351541Srgrimes{
136183550Szec	VNET_ITERATOR_DECL(vnet_iter);
1371541Srgrimes
138195760Srwatson	VNET_LIST_RLOCK_NOSLEEP();
139183550Szec	VNET_FOREACH(vnet_iter) {
140183550Szec		CURVNET_SET(vnet_iter);
141183550Szec		tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
142183550Szec		INP_INFO_WLOCK(&V_tcbinfo);
143183550Szec		(void) tcp_tw_2msl_scan(0);
144183550Szec		INP_INFO_WUNLOCK(&V_tcbinfo);
145183550Szec		CURVNET_RESTORE();
146183550Szec	}
147195760Srwatson	VNET_LIST_RUNLOCK_NOSLEEP();
1481541Srgrimes}
1491541Srgrimes
15073110Sjlemonint	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
15173110Sjlemon    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
15273110Sjlemon
1531541Srgrimesint	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
154115824Shsu    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
1551541Srgrimes
156115824Shsustatic int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
15711150Swollman
158157376Srwatsonstatic int tcp_timer_race;
159157376SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
160157376Srwatson    0, "Count of t_inpcb races on tcp_discardcb");
161157376Srwatson
162172074Srwatson/*
163172074Srwatson * TCP timer processing.
164172074Srwatson */
165172074Srwatson
16650673Sjlemonvoid
167172074Srwatsontcp_timer_delack(void *xtp)
1681541Srgrimes{
169172074Srwatson	struct tcpcb *tp = xtp;
170172074Srwatson	struct inpcb *inp;
171183550Szec	CURVNET_SET(tp->t_vnet);
1721541Srgrimes
173172074Srwatson	inp = tp->t_inpcb;
174157376Srwatson	/*
175172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
176172074Srwatson	 * tear-down mean we need it as a work-around for races between
177172074Srwatson	 * timers and tcp_discardcb().
178172074Srwatson	 *
179172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
180157376Srwatson	 */
181172074Srwatson	if (inp == NULL) {
182172074Srwatson		tcp_timer_race++;
183183550Szec		CURVNET_RESTORE();
184108265Shsu		return;
185108265Shsu	}
186178285Srwatson	INP_WLOCK(inp);
187189848Srwatson	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
188172309Ssilby	    || !callout_active(&tp->t_timers->tt_delack)) {
189178285Srwatson		INP_WUNLOCK(inp);
190183550Szec		CURVNET_RESTORE();
19150673Sjlemon		return;
19250673Sjlemon	}
193172309Ssilby	callout_deactivate(&tp->t_timers->tt_delack);
1941541Srgrimes
19550673Sjlemon	tp->t_flags |= TF_ACKNOW;
196190948Srwatson	TCPSTAT_INC(tcps_delack);
19750673Sjlemon	(void) tcp_output(tp);
198178285Srwatson	INP_WUNLOCK(inp);
199183550Szec	CURVNET_RESTORE();
20050673Sjlemon}
20150673Sjlemon
202172074Srwatsonvoid
203172074Srwatsontcp_timer_2msl(void *xtp)
20450673Sjlemon{
205172074Srwatson	struct tcpcb *tp = xtp;
206172074Srwatson	struct inpcb *inp;
207183550Szec	CURVNET_SET(tp->t_vnet);
20850673Sjlemon#ifdef TCPDEBUG
20950673Sjlemon	int ostate;
21050673Sjlemon
21150673Sjlemon	ostate = tp->t_state;
21250673Sjlemon#endif
213157376Srwatson	/*
214172074Srwatson	 * XXXRW: Does this actually happen?
215172074Srwatson	 */
216181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
217172074Srwatson	inp = tp->t_inpcb;
218172074Srwatson	/*
219172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
220172074Srwatson	 * tear-down mean we need it as a work-around for races between
221172074Srwatson	 * timers and tcp_discardcb().
222172074Srwatson	 *
223172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
224172074Srwatson	 */
225172074Srwatson	if (inp == NULL) {
226172074Srwatson		tcp_timer_race++;
227181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
228183550Szec		CURVNET_RESTORE();
229172074Srwatson		return;
230172074Srwatson	}
231178285Srwatson	INP_WLOCK(inp);
232172074Srwatson	tcp_free_sackholes(tp);
233189848Srwatson	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
234172309Ssilby	    !callout_active(&tp->t_timers->tt_2msl)) {
235178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
236181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
237183550Szec		CURVNET_RESTORE();
238172074Srwatson		return;
239172074Srwatson	}
240172309Ssilby	callout_deactivate(&tp->t_timers->tt_2msl);
241172074Srwatson	/*
2421541Srgrimes	 * 2 MSL timeout in shutdown went off.  If we're closed but
2431541Srgrimes	 * still waiting for peer to close and connection has been idle
2441541Srgrimes	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
2451541Srgrimes	 * control block.  Otherwise, check again in a bit.
246167036Smohans	 *
247167036Smohans	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
248167036Smohans	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
249167036Smohans	 * Ignore fact that there were recent incoming segments.
2501541Srgrimes	 */
251167036Smohans	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
252172074Srwatson	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
253167036Smohans	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
254190948Srwatson		TCPSTAT_INC(tcps_finwait2_drops);
255172074Srwatson		tp = tcp_close(tp);
256167036Smohans	} else {
257167036Smohans		if (tp->t_state != TCPS_TIME_WAIT &&
258194305Sjhb		   ticks - tp->t_rcvtime <= tcp_maxidle)
259205391Skmacy		       callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl,
260205391Skmacy			   tcp_timer_2msl, tp, INP_CPU(inp));
261172074Srwatson	       else
262172074Srwatson		       tp = tcp_close(tp);
263172074Srwatson       }
2641541Srgrimes
26550673Sjlemon#ifdef TCPDEBUG
266172312Skib	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
26797658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
26897658Stanimura			  PRU_SLOWTIMO);
26950673Sjlemon#endif
270172074Srwatson	if (tp != NULL)
271178285Srwatson		INP_WUNLOCK(inp);
272181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
273183550Szec	CURVNET_RESTORE();
27450673Sjlemon}
27550673Sjlemon
276172074Srwatsonvoid
277172074Srwatsontcp_timer_keep(void *xtp)
27850673Sjlemon{
279172074Srwatson	struct tcpcb *tp = xtp;
28078642Ssilby	struct tcptemp *t_template;
281172074Srwatson	struct inpcb *inp;
282183550Szec	CURVNET_SET(tp->t_vnet);
28350673Sjlemon#ifdef TCPDEBUG
28450673Sjlemon	int ostate;
28550673Sjlemon
28650673Sjlemon	ostate = tp->t_state;
28750673Sjlemon#endif
288181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
289172074Srwatson	inp = tp->t_inpcb;
290157376Srwatson	/*
291172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
292172074Srwatson	 * tear-down mean we need it as a work-around for races between
293172074Srwatson	 * timers and tcp_discardcb().
294172074Srwatson	 *
295172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
296172074Srwatson	 */
297172074Srwatson	if (inp == NULL) {
298172074Srwatson		tcp_timer_race++;
299181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
300183550Szec		CURVNET_RESTORE();
301172074Srwatson		return;
302172074Srwatson	}
303178285Srwatson	INP_WLOCK(inp);
304189848Srwatson	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
305172309Ssilby	    || !callout_active(&tp->t_timers->tt_keep)) {
306178285Srwatson		INP_WUNLOCK(inp);
307181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
308183550Szec		CURVNET_RESTORE();
309172074Srwatson		return;
310172074Srwatson	}
311172309Ssilby	callout_deactivate(&tp->t_timers->tt_keep);
312172074Srwatson	/*
31350673Sjlemon	 * Keep-alive timer went off; send something
31450673Sjlemon	 * or drop connection if idle for too long.
3151541Srgrimes	 */
316190948Srwatson	TCPSTAT_INC(tcps_keeptimeo);
31750673Sjlemon	if (tp->t_state < TCPS_ESTABLISHED)
31850673Sjlemon		goto dropit;
319122326Ssam	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
32050673Sjlemon	    tp->t_state <= TCPS_CLOSING) {
321194305Sjhb		if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
32250673Sjlemon			goto dropit;
3231541Srgrimes		/*
32450673Sjlemon		 * Send a packet designed to force a response
32550673Sjlemon		 * if the peer is up and reachable:
32650673Sjlemon		 * either an ACK if the connection is still alive,
32750673Sjlemon		 * or an RST if the peer has closed the connection
32850673Sjlemon		 * due to timeout or reboot.
32950673Sjlemon		 * Using sequence number tp->snd_una-1
33050673Sjlemon		 * causes the transmitted zero-length segment
33150673Sjlemon		 * to lie outside the receive window;
33250673Sjlemon		 * by the protocol spec, this requires the
33350673Sjlemon		 * correspondent TCP to respond.
3341541Srgrimes		 */
335190948Srwatson		TCPSTAT_INC(tcps_keepprobe);
336111144Sjlemon		t_template = tcpip_maketemplate(inp);
33778642Ssilby		if (t_template) {
33878642Ssilby			tcp_respond(tp, t_template->tt_ipgen,
33978642Ssilby				    &t_template->tt_t, (struct mbuf *)NULL,
34078642Ssilby				    tp->rcv_nxt, tp->snd_una - 1, 0);
341179487Srwatson			free(t_template, M_TEMP);
34278642Ssilby		}
343205391Skmacy		callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp, INP_CPU(inp));
34497658Stanimura	} else
345205391Skmacy		callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp, INP_CPU(inp));
34650673Sjlemon
34750673Sjlemon#ifdef TCPDEBUG
348122326Ssam	if (inp->inp_socket->so_options & SO_DEBUG)
34955679Sshin		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
35050673Sjlemon			  PRU_SLOWTIMO);
35150673Sjlemon#endif
352178285Srwatson	INP_WUNLOCK(inp);
353181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
354183550Szec	CURVNET_RESTORE();
355172074Srwatson	return;
35650673Sjlemon
35750673Sjlemondropit:
358190948Srwatson	TCPSTAT_INC(tcps_keepdrops);
359172074Srwatson	tp = tcp_drop(tp, ETIMEDOUT);
360172074Srwatson
361172074Srwatson#ifdef TCPDEBUG
362172074Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
363172074Srwatson		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
364172074Srwatson			  PRU_SLOWTIMO);
365172074Srwatson#endif
366172074Srwatson	if (tp != NULL)
367178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
368181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
369183550Szec	CURVNET_RESTORE();
37050673Sjlemon}
37150673Sjlemon
372172074Srwatsonvoid
373172074Srwatsontcp_timer_persist(void *xtp)
37450673Sjlemon{
375172074Srwatson	struct tcpcb *tp = xtp;
376172074Srwatson	struct inpcb *inp;
377183550Szec	CURVNET_SET(tp->t_vnet);
37850673Sjlemon#ifdef TCPDEBUG
37950673Sjlemon	int ostate;
38050673Sjlemon
38150673Sjlemon	ostate = tp->t_state;
38250673Sjlemon#endif
383181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
384172074Srwatson	inp = tp->t_inpcb;
385157376Srwatson	/*
386172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
387172074Srwatson	 * tear-down mean we need it as a work-around for races between
388172074Srwatson	 * timers and tcp_discardcb().
389172074Srwatson	 *
390172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
391172074Srwatson	 */
392172074Srwatson	if (inp == NULL) {
393172074Srwatson		tcp_timer_race++;
394181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
395183550Szec		CURVNET_RESTORE();
396172074Srwatson		return;
397172074Srwatson	}
398178285Srwatson	INP_WLOCK(inp);
399189848Srwatson	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
400172309Ssilby	    || !callout_active(&tp->t_timers->tt_persist)) {
401178285Srwatson		INP_WUNLOCK(inp);
402181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
403183550Szec		CURVNET_RESTORE();
404172074Srwatson		return;
405172074Srwatson	}
406172309Ssilby	callout_deactivate(&tp->t_timers->tt_persist);
407172074Srwatson	/*
40850673Sjlemon	 * Persistance timer into zero window.
40950673Sjlemon	 * Force a byte to be output, if possible.
41050673Sjlemon	 */
411190948Srwatson	TCPSTAT_INC(tcps_persisttimeo);
41250673Sjlemon	/*
41350673Sjlemon	 * Hack: if the peer is dead/unreachable, we do not
41450673Sjlemon	 * time out if the window is closed.  After a full
41550673Sjlemon	 * backoff, drop the connection if the idle time
41650673Sjlemon	 * (no responses to probes) reaches the maximum
41750673Sjlemon	 * backoff that we would use if retransmitting.
41850673Sjlemon	 */
41950673Sjlemon	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
420194305Sjhb	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
421194305Sjhb	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
422190948Srwatson		TCPSTAT_INC(tcps_persistdrop);
423172074Srwatson		tp = tcp_drop(tp, ETIMEDOUT);
424172074Srwatson		goto out;
42550673Sjlemon	}
42650673Sjlemon	tcp_setpersist(tp);
427146463Sps	tp->t_flags |= TF_FORCEDATA;
42850673Sjlemon	(void) tcp_output(tp);
429146463Sps	tp->t_flags &= ~TF_FORCEDATA;
43050673Sjlemon
431172074Srwatsonout:
43250673Sjlemon#ifdef TCPDEBUG
433158644Sglebius	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
434158644Sglebius		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
43550673Sjlemon#endif
436172074Srwatson	if (tp != NULL)
437178285Srwatson		INP_WUNLOCK(inp);
438181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
439183550Szec	CURVNET_RESTORE();
44050673Sjlemon}
44150673Sjlemon
442172074Srwatsonvoid
443172074Srwatsontcp_timer_rexmt(void * xtp)
44450673Sjlemon{
445172074Srwatson	struct tcpcb *tp = xtp;
446183550Szec	CURVNET_SET(tp->t_vnet);
44750673Sjlemon	int rexmt;
448172074Srwatson	int headlocked;
449172074Srwatson	struct inpcb *inp;
45050673Sjlemon#ifdef TCPDEBUG
45150673Sjlemon	int ostate;
45250673Sjlemon
45350673Sjlemon	ostate = tp->t_state;
45450673Sjlemon#endif
455205391Skmacy	INP_INFO_RLOCK(&V_tcbinfo);
456172074Srwatson	inp = tp->t_inpcb;
457172074Srwatson	/*
458172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
459172074Srwatson	 * tear-down mean we need it as a work-around for races between
460172074Srwatson	 * timers and tcp_discardcb().
461172074Srwatson	 *
462172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
463172074Srwatson	 */
464172074Srwatson	if (inp == NULL) {
465172074Srwatson		tcp_timer_race++;
466205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
467183550Szec		CURVNET_RESTORE();
468172074Srwatson		return;
469172074Srwatson	}
470178285Srwatson	INP_WLOCK(inp);
471189848Srwatson	if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
472172309Ssilby	    || !callout_active(&tp->t_timers->tt_rexmt)) {
473178285Srwatson		INP_WUNLOCK(inp);
474205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
475183550Szec		CURVNET_RESTORE();
476172074Srwatson		return;
477172074Srwatson	}
478172309Ssilby	callout_deactivate(&tp->t_timers->tt_rexmt);
479130989Sps	tcp_free_sackholes(tp);
48050673Sjlemon	/*
48150673Sjlemon	 * Retransmission timer went off.  Message has not
48250673Sjlemon	 * been acked within retransmit interval.  Back off
48350673Sjlemon	 * to a longer retransmit interval and retransmit one segment.
48450673Sjlemon	 */
48550673Sjlemon	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
48650673Sjlemon		tp->t_rxtshift = TCP_MAXRXTSHIFT;
487190948Srwatson		TCPSTAT_INC(tcps_timeoutdrop);
488205391Skmacy		in_pcbref(inp);
489217126Sjhb		INP_INFO_RUNLOCK(&V_tcbinfo);
490217126Sjhb		INP_WUNLOCK(inp);
491217126Sjhb		INP_INFO_WLOCK(&V_tcbinfo);
492217126Sjhb		INP_WLOCK(inp);
493222488Srwatson		if (in_pcbrele_wlocked(inp)) {
494217126Sjhb			INP_INFO_WUNLOCK(&V_tcbinfo);
495217126Sjhb			CURVNET_RESTORE();
496217126Sjhb			return;
497217126Sjhb		}
498226318Snp		if (inp->inp_flags & INP_DROPPED) {
499226318Snp			INP_WUNLOCK(inp);
500226318Snp			INP_INFO_WUNLOCK(&V_tcbinfo);
501226318Snp			CURVNET_RESTORE();
502226318Snp			return;
503226318Snp		}
504226318Snp
505172074Srwatson		tp = tcp_drop(tp, tp->t_softerror ?
506172074Srwatson			      tp->t_softerror : ETIMEDOUT);
507205391Skmacy		headlocked = 1;
508172074Srwatson		goto out;
50950673Sjlemon	}
510205391Skmacy	INP_INFO_RUNLOCK(&V_tcbinfo);
511172074Srwatson	headlocked = 0;
51250673Sjlemon	if (tp->t_rxtshift == 1) {
51313229Solah		/*
51450673Sjlemon		 * first retransmit; record ssthresh and cwnd so they can
515133874Srwatson		 * be recovered if this turns out to be a "bad" retransmit.
516133874Srwatson		 * A retransmit is considered "bad" if an ACK for this
51750673Sjlemon		 * segment is received within RTT/2 interval; the assumption
518133874Srwatson		 * here is that the ACK was already in flight.  See
51950673Sjlemon		 * "On Estimating End-to-End Network Path Properties" by
52050673Sjlemon		 * Allman and Paxson for more details.
5211541Srgrimes		 */
52250673Sjlemon		tp->snd_cwnd_prev = tp->snd_cwnd;
52350673Sjlemon		tp->snd_ssthresh_prev = tp->snd_ssthresh;
524117650Shsu		tp->snd_recover_prev = tp->snd_recover;
525215166Slstewart		if (IN_FASTRECOVERY(tp->t_flags))
526215166Slstewart			tp->t_flags |= TF_WASFRECOVERY;
527117650Shsu		else
528215166Slstewart			tp->t_flags &= ~TF_WASFRECOVERY;
529215166Slstewart		if (IN_CONGRECOVERY(tp->t_flags))
530215166Slstewart			tp->t_flags |= TF_WASCRECOVERY;
531215166Slstewart		else
532215166Slstewart			tp->t_flags &= ~TF_WASCRECOVERY;
53350673Sjlemon		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
534221209Sjhb		tp->t_flags |= TF_PREVVALID;
535221209Sjhb	} else
536221209Sjhb		tp->t_flags &= ~TF_PREVVALID;
537190948Srwatson	TCPSTAT_INC(tcps_rexmttimeo);
53873110Sjlemon	if (tp->t_state == TCPS_SYN_SENT)
53973110Sjlemon		rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
54073110Sjlemon	else
54173110Sjlemon		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
54250673Sjlemon	TCPT_RANGESET(tp->t_rxtcur, rexmt,
54350673Sjlemon		      tp->t_rttmin, TCPTV_REXMTMAX);
54450673Sjlemon	/*
545216621Sjhb	 * Disable rfc1323 if we haven't got any response to
546133874Srwatson	 * our third SYN to work-around some broken terminal servers
547133874Srwatson	 * (most of which have hopefully been retired) that have bad VJ
548133874Srwatson	 * header compression code which trashes TCP segments containing
54977539Sjesper	 * unknown-to-them TCP options.
55077539Sjesper	 */
55177539Sjesper	if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
552137139Sandre		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
55377539Sjesper	/*
554122922Sandre	 * If we backed off this far, our srtt estimate is probably bogus.
555122922Sandre	 * Clobber it so we'll take the next rtt measurement as our srtt;
55650673Sjlemon	 * move the current srtt into rttvar to keep the current
55750673Sjlemon	 * retransmit times until then.
55850673Sjlemon	 */
55950673Sjlemon	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
56055679Sshin#ifdef INET6
56155679Sshin		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
56255679Sshin			in6_losing(tp->t_inpcb);
56355679Sshin		else
56455679Sshin#endif
56550673Sjlemon		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
56650673Sjlemon		tp->t_srtt = 0;
56750673Sjlemon	}
56850673Sjlemon	tp->snd_nxt = tp->snd_una;
569117650Shsu	tp->snd_recover = tp->snd_max;
57050673Sjlemon	/*
57150673Sjlemon	 * Force a segment to be sent.
57250673Sjlemon	 */
57350673Sjlemon	tp->t_flags |= TF_ACKNOW;
57450673Sjlemon	/*
57550673Sjlemon	 * If timing a segment in this window, stop the timer.
57650673Sjlemon	 */
57750673Sjlemon	tp->t_rtttime = 0;
578215166Slstewart
579216101Slstewart	cc_cong_signal(tp, NULL, CC_RTO);
580215166Slstewart
58150673Sjlemon	(void) tcp_output(tp);
5821541Srgrimes
583172074Srwatsonout:
58450673Sjlemon#ifdef TCPDEBUG
585157136Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
58697658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
58797658Stanimura			  PRU_SLOWTIMO);
5881541Srgrimes#endif
589172074Srwatson	if (tp != NULL)
590178285Srwatson		INP_WUNLOCK(inp);
591172074Srwatson	if (headlocked)
592181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
593183550Szec	CURVNET_RESTORE();
5941541Srgrimes}
595172074Srwatson
596172074Srwatsonvoid
597172074Srwatsontcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
598172074Srwatson{
599172074Srwatson	struct callout *t_callout;
600172074Srwatson	void *f_callout;
601205391Skmacy	struct inpcb *inp = tp->t_inpcb;
602205391Skmacy	int cpu = INP_CPU(inp);
603172074Srwatson
604172074Srwatson	switch (timer_type) {
605172074Srwatson		case TT_DELACK:
606172309Ssilby			t_callout = &tp->t_timers->tt_delack;
607172074Srwatson			f_callout = tcp_timer_delack;
608172074Srwatson			break;
609172074Srwatson		case TT_REXMT:
610172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
611172074Srwatson			f_callout = tcp_timer_rexmt;
612172074Srwatson			break;
613172074Srwatson		case TT_PERSIST:
614172309Ssilby			t_callout = &tp->t_timers->tt_persist;
615172074Srwatson			f_callout = tcp_timer_persist;
616172074Srwatson			break;
617172074Srwatson		case TT_KEEP:
618172309Ssilby			t_callout = &tp->t_timers->tt_keep;
619172074Srwatson			f_callout = tcp_timer_keep;
620172074Srwatson			break;
621172074Srwatson		case TT_2MSL:
622172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
623172074Srwatson			f_callout = tcp_timer_2msl;
624172074Srwatson			break;
625172074Srwatson		default:
626172074Srwatson			panic("bad timer_type");
627172074Srwatson		}
628172074Srwatson	if (delta == 0) {
629172074Srwatson		callout_stop(t_callout);
630172074Srwatson	} else {
631205391Skmacy		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
632172074Srwatson	}
633172074Srwatson}
634172074Srwatson
635172074Srwatsonint
636172074Srwatsontcp_timer_active(struct tcpcb *tp, int timer_type)
637172074Srwatson{
638172074Srwatson	struct callout *t_callout;
639172074Srwatson
640172074Srwatson	switch (timer_type) {
641172074Srwatson		case TT_DELACK:
642172309Ssilby			t_callout = &tp->t_timers->tt_delack;
643172074Srwatson			break;
644172074Srwatson		case TT_REXMT:
645172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
646172074Srwatson			break;
647172074Srwatson		case TT_PERSIST:
648172309Ssilby			t_callout = &tp->t_timers->tt_persist;
649172074Srwatson			break;
650172074Srwatson		case TT_KEEP:
651172309Ssilby			t_callout = &tp->t_timers->tt_keep;
652172074Srwatson			break;
653172074Srwatson		case TT_2MSL:
654172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
655172074Srwatson			break;
656172074Srwatson		default:
657172074Srwatson			panic("bad timer_type");
658172074Srwatson		}
659172074Srwatson	return callout_active(t_callout);
660172074Srwatson}
661197244Ssilby
662197244Ssilby#define	ticks_to_msecs(t)	(1000*(t) / hz)
663197244Ssilby
664197244Ssilbyvoid
665197244Ssilbytcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
666197244Ssilby{
667197244Ssilby	bzero(xtimer, sizeof(struct xtcp_timer));
668197244Ssilby	if (timer == NULL)
669197244Ssilby		return;
670197244Ssilby	if (callout_active(&timer->tt_delack))
671197244Ssilby		xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
672197244Ssilby	if (callout_active(&timer->tt_rexmt))
673197244Ssilby		xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
674197244Ssilby	if (callout_active(&timer->tt_persist))
675197244Ssilby		xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
676197244Ssilby	if (callout_active(&timer->tt_keep))
677197244Ssilby		xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
678197244Ssilby	if (callout_active(&timer->tt_2msl))
679197244Ssilby		xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
680197244Ssilby	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
681197244Ssilby}
682