tcp_timer.c revision 247777
1/*-
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 247777 2013-03-04 11:09:56Z davide $");
34
35#include "opt_inet.h"
36#include "opt_inet6.h"
37#include "opt_tcpdebug.h"
38
39#include <sys/param.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/mbuf.h>
43#include <sys/mutex.h>
44#include <sys/protosw.h>
45#include <sys/smp.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50
51#include <net/if.h>
52#include <net/route.h>
53#include <net/vnet.h>
54
55#include <netinet/cc.h>
56#include <netinet/in.h>
57#include <netinet/in_pcb.h>
58#include <netinet/in_systm.h>
59#ifdef INET6
60#include <netinet6/in6_pcb.h>
61#endif
62#include <netinet/ip_var.h>
63#include <netinet/tcp_fsm.h>
64#include <netinet/tcp_timer.h>
65#include <netinet/tcp_var.h>
66#include <netinet/tcpip.h>
67#ifdef TCPDEBUG
68#include <netinet/tcp_debug.h>
69#endif
70
71int	tcp_keepinit;
72SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
73    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
74
75int	tcp_keepidle;
76SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
77    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
78
79int	tcp_keepintvl;
80SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
81    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
82
83int	tcp_delacktime;
84SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
85    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
86    "Time before a delayed ACK is sent");
87
88int	tcp_msl;
89SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
90    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
91
92int	tcp_rexmit_min;
93SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
94    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
95    "Minimum Retransmission Timeout");
96
97int	tcp_rexmit_slop;
98SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
99    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
100    "Retransmission Timer Slop");
101
102static int	always_keepalive = 1;
103SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
104    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
105
106int    tcp_fast_finwait2_recycle = 0;
107SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
108    &tcp_fast_finwait2_recycle, 0,
109    "Recycle closed FIN_WAIT_2 connections faster");
110
111int    tcp_finwait2_timeout;
112SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
113    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
114
115int	tcp_keepcnt = TCPTV_KEEPCNT;
116SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
117    "Number of keepalive probes to send");
118
119	/* max idle probes */
120int	tcp_maxpersistidle;
121
122static int	tcp_rexmit_drop_options = 0;
123SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
124    &tcp_rexmit_drop_options, 0,
125    "Drop TCP options from 3rd and later retransmitted SYN");
126
127static int	per_cpu_timers = 0;
128SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
129    &per_cpu_timers , 0, "run tcp timers on all cpus");
130
131#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
132		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
133
134/*
135 * Tcp protocol timeout routine called every 500 ms.
136 * Updates timestamps used for TCP
137 * causes finite state machine actions if timers expire.
138 */
139void
140tcp_slowtimo(void)
141{
142	VNET_ITERATOR_DECL(vnet_iter);
143
144	VNET_LIST_RLOCK_NOSLEEP();
145	VNET_FOREACH(vnet_iter) {
146		CURVNET_SET(vnet_iter);
147		INP_INFO_WLOCK(&V_tcbinfo);
148		(void) tcp_tw_2msl_scan(0);
149		INP_INFO_WUNLOCK(&V_tcbinfo);
150		CURVNET_RESTORE();
151	}
152	VNET_LIST_RUNLOCK_NOSLEEP();
153}
154
155int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
156    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
157
158int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
159    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
160
161static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
162
163static int tcp_timer_race;
164SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
165    0, "Count of t_inpcb races on tcp_discardcb");
166
167/*
168 * TCP timer processing.
169 */
170
171void
172tcp_timer_delack(void *xtp)
173{
174	struct tcpcb *tp = xtp;
175	struct inpcb *inp;
176	CURVNET_SET(tp->t_vnet);
177
178	inp = tp->t_inpcb;
179	/*
180	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
181	 * tear-down mean we need it as a work-around for races between
182	 * timers and tcp_discardcb().
183	 *
184	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
185	 */
186	if (inp == NULL) {
187		tcp_timer_race++;
188		CURVNET_RESTORE();
189		return;
190	}
191	INP_WLOCK(inp);
192	if (callout_pending(&tp->t_timers->tt_delack) ||
193	    !callout_active(&tp->t_timers->tt_delack)) {
194		INP_WUNLOCK(inp);
195		CURVNET_RESTORE();
196		return;
197	}
198	callout_deactivate(&tp->t_timers->tt_delack);
199	if ((inp->inp_flags & INP_DROPPED) != 0) {
200		INP_WUNLOCK(inp);
201		CURVNET_RESTORE();
202		return;
203	}
204
205	tp->t_flags |= TF_ACKNOW;
206	TCPSTAT_INC(tcps_delack);
207	(void) tcp_output(tp);
208	INP_WUNLOCK(inp);
209	CURVNET_RESTORE();
210}
211
212void
213tcp_timer_2msl(void *xtp)
214{
215	struct tcpcb *tp = xtp;
216	struct inpcb *inp;
217	CURVNET_SET(tp->t_vnet);
218#ifdef TCPDEBUG
219	int ostate;
220
221	ostate = tp->t_state;
222#endif
223	/*
224	 * XXXRW: Does this actually happen?
225	 */
226	INP_INFO_WLOCK(&V_tcbinfo);
227	inp = tp->t_inpcb;
228	/*
229	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
230	 * tear-down mean we need it as a work-around for races between
231	 * timers and tcp_discardcb().
232	 *
233	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
234	 */
235	if (inp == NULL) {
236		tcp_timer_race++;
237		INP_INFO_WUNLOCK(&V_tcbinfo);
238		CURVNET_RESTORE();
239		return;
240	}
241	INP_WLOCK(inp);
242	tcp_free_sackholes(tp);
243	if (callout_pending(&tp->t_timers->tt_2msl) ||
244	    !callout_active(&tp->t_timers->tt_2msl)) {
245		INP_WUNLOCK(tp->t_inpcb);
246		INP_INFO_WUNLOCK(&V_tcbinfo);
247		CURVNET_RESTORE();
248		return;
249	}
250	callout_deactivate(&tp->t_timers->tt_2msl);
251	if ((inp->inp_flags & INP_DROPPED) != 0) {
252		INP_WUNLOCK(inp);
253		INP_INFO_WUNLOCK(&V_tcbinfo);
254		CURVNET_RESTORE();
255		return;
256	}
257	/*
258	 * 2 MSL timeout in shutdown went off.  If we're closed but
259	 * still waiting for peer to close and connection has been idle
260	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
261	 * control block.  Otherwise, check again in a bit.
262	 *
263	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
264	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
265	 * Ignore fact that there were recent incoming segments.
266	 */
267	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
268	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
269	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
270		TCPSTAT_INC(tcps_finwait2_drops);
271		tp = tcp_close(tp);
272	} else {
273		if (tp->t_state != TCPS_TIME_WAIT &&
274		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
275		       callout_reset_on(&tp->t_timers->tt_2msl,
276			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
277	       else
278		       tp = tcp_close(tp);
279       }
280
281#ifdef TCPDEBUG
282	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
283		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
284			  PRU_SLOWTIMO);
285#endif
286	if (tp != NULL)
287		INP_WUNLOCK(inp);
288	INP_INFO_WUNLOCK(&V_tcbinfo);
289	CURVNET_RESTORE();
290}
291
292void
293tcp_timer_keep(void *xtp)
294{
295	struct tcpcb *tp = xtp;
296	struct tcptemp *t_template;
297	struct inpcb *inp;
298	CURVNET_SET(tp->t_vnet);
299#ifdef TCPDEBUG
300	int ostate;
301
302	ostate = tp->t_state;
303#endif
304	INP_INFO_WLOCK(&V_tcbinfo);
305	inp = tp->t_inpcb;
306	/*
307	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
308	 * tear-down mean we need it as a work-around for races between
309	 * timers and tcp_discardcb().
310	 *
311	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
312	 */
313	if (inp == NULL) {
314		tcp_timer_race++;
315		INP_INFO_WUNLOCK(&V_tcbinfo);
316		CURVNET_RESTORE();
317		return;
318	}
319	INP_WLOCK(inp);
320	if (callout_pending(&tp->t_timers->tt_keep) ||
321	    !callout_active(&tp->t_timers->tt_keep)) {
322		INP_WUNLOCK(inp);
323		INP_INFO_WUNLOCK(&V_tcbinfo);
324		CURVNET_RESTORE();
325		return;
326	}
327	callout_deactivate(&tp->t_timers->tt_keep);
328	if ((inp->inp_flags & INP_DROPPED) != 0) {
329		INP_WUNLOCK(inp);
330		INP_INFO_WUNLOCK(&V_tcbinfo);
331		CURVNET_RESTORE();
332		return;
333	}
334	/*
335	 * Keep-alive timer went off; send something
336	 * or drop connection if idle for too long.
337	 */
338	TCPSTAT_INC(tcps_keeptimeo);
339	if (tp->t_state < TCPS_ESTABLISHED)
340		goto dropit;
341	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
342	    tp->t_state <= TCPS_CLOSING) {
343		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
344			goto dropit;
345		/*
346		 * Send a packet designed to force a response
347		 * if the peer is up and reachable:
348		 * either an ACK if the connection is still alive,
349		 * or an RST if the peer has closed the connection
350		 * due to timeout or reboot.
351		 * Using sequence number tp->snd_una-1
352		 * causes the transmitted zero-length segment
353		 * to lie outside the receive window;
354		 * by the protocol spec, this requires the
355		 * correspondent TCP to respond.
356		 */
357		TCPSTAT_INC(tcps_keepprobe);
358		t_template = tcpip_maketemplate(inp);
359		if (t_template) {
360			tcp_respond(tp, t_template->tt_ipgen,
361				    &t_template->tt_t, (struct mbuf *)NULL,
362				    tp->rcv_nxt, tp->snd_una - 1, 0);
363			free(t_template, M_TEMP);
364		}
365		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
366		    tcp_timer_keep, tp, INP_CPU(inp));
367	} else
368		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
369		    tcp_timer_keep, tp, INP_CPU(inp));
370
371#ifdef TCPDEBUG
372	if (inp->inp_socket->so_options & SO_DEBUG)
373		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
374			  PRU_SLOWTIMO);
375#endif
376	INP_WUNLOCK(inp);
377	INP_INFO_WUNLOCK(&V_tcbinfo);
378	CURVNET_RESTORE();
379	return;
380
381dropit:
382	TCPSTAT_INC(tcps_keepdrops);
383	tp = tcp_drop(tp, ETIMEDOUT);
384
385#ifdef TCPDEBUG
386	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
387		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
388			  PRU_SLOWTIMO);
389#endif
390	if (tp != NULL)
391		INP_WUNLOCK(tp->t_inpcb);
392	INP_INFO_WUNLOCK(&V_tcbinfo);
393	CURVNET_RESTORE();
394}
395
396void
397tcp_timer_persist(void *xtp)
398{
399	struct tcpcb *tp = xtp;
400	struct inpcb *inp;
401	CURVNET_SET(tp->t_vnet);
402#ifdef TCPDEBUG
403	int ostate;
404
405	ostate = tp->t_state;
406#endif
407	INP_INFO_WLOCK(&V_tcbinfo);
408	inp = tp->t_inpcb;
409	/*
410	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
411	 * tear-down mean we need it as a work-around for races between
412	 * timers and tcp_discardcb().
413	 *
414	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
415	 */
416	if (inp == NULL) {
417		tcp_timer_race++;
418		INP_INFO_WUNLOCK(&V_tcbinfo);
419		CURVNET_RESTORE();
420		return;
421	}
422	INP_WLOCK(inp);
423	if (callout_pending(&tp->t_timers->tt_persist) ||
424	    !callout_active(&tp->t_timers->tt_persist)) {
425		INP_WUNLOCK(inp);
426		INP_INFO_WUNLOCK(&V_tcbinfo);
427		CURVNET_RESTORE();
428		return;
429	}
430	callout_deactivate(&tp->t_timers->tt_persist);
431	if ((inp->inp_flags & INP_DROPPED) != 0) {
432		INP_WUNLOCK(inp);
433		INP_INFO_WUNLOCK(&V_tcbinfo);
434		CURVNET_RESTORE();
435		return;
436	}
437	/*
438	 * Persistance timer into zero window.
439	 * Force a byte to be output, if possible.
440	 */
441	TCPSTAT_INC(tcps_persisttimeo);
442	/*
443	 * Hack: if the peer is dead/unreachable, we do not
444	 * time out if the window is closed.  After a full
445	 * backoff, drop the connection if the idle time
446	 * (no responses to probes) reaches the maximum
447	 * backoff that we would use if retransmitting.
448	 */
449	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
450	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
451	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
452		TCPSTAT_INC(tcps_persistdrop);
453		tp = tcp_drop(tp, ETIMEDOUT);
454		goto out;
455	}
456	/*
457	 * If the user has closed the socket then drop a persisting
458	 * connection after a much reduced timeout.
459	 */
460	if (tp->t_state > TCPS_CLOSE_WAIT &&
461	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
462		TCPSTAT_INC(tcps_persistdrop);
463		tp = tcp_drop(tp, ETIMEDOUT);
464		goto out;
465	}
466	tcp_setpersist(tp);
467	tp->t_flags |= TF_FORCEDATA;
468	(void) tcp_output(tp);
469	tp->t_flags &= ~TF_FORCEDATA;
470
471out:
472#ifdef TCPDEBUG
473	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
474		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
475#endif
476	if (tp != NULL)
477		INP_WUNLOCK(inp);
478	INP_INFO_WUNLOCK(&V_tcbinfo);
479	CURVNET_RESTORE();
480}
481
482void
483tcp_timer_rexmt(void * xtp)
484{
485	struct tcpcb *tp = xtp;
486	CURVNET_SET(tp->t_vnet);
487	int rexmt;
488	int headlocked;
489	struct inpcb *inp;
490#ifdef TCPDEBUG
491	int ostate;
492
493	ostate = tp->t_state;
494#endif
495	INP_INFO_RLOCK(&V_tcbinfo);
496	inp = tp->t_inpcb;
497	/*
498	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
499	 * tear-down mean we need it as a work-around for races between
500	 * timers and tcp_discardcb().
501	 *
502	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
503	 */
504	if (inp == NULL) {
505		tcp_timer_race++;
506		INP_INFO_RUNLOCK(&V_tcbinfo);
507		CURVNET_RESTORE();
508		return;
509	}
510	INP_WLOCK(inp);
511	if (callout_pending(&tp->t_timers->tt_rexmt) ||
512	    !callout_active(&tp->t_timers->tt_rexmt)) {
513		INP_WUNLOCK(inp);
514		INP_INFO_RUNLOCK(&V_tcbinfo);
515		CURVNET_RESTORE();
516		return;
517	}
518	callout_deactivate(&tp->t_timers->tt_rexmt);
519	if ((inp->inp_flags & INP_DROPPED) != 0) {
520		INP_WUNLOCK(inp);
521		INP_INFO_RUNLOCK(&V_tcbinfo);
522		CURVNET_RESTORE();
523		return;
524	}
525	tcp_free_sackholes(tp);
526	/*
527	 * Retransmission timer went off.  Message has not
528	 * been acked within retransmit interval.  Back off
529	 * to a longer retransmit interval and retransmit one segment.
530	 */
531	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
532		tp->t_rxtshift = TCP_MAXRXTSHIFT;
533		TCPSTAT_INC(tcps_timeoutdrop);
534		in_pcbref(inp);
535		INP_INFO_RUNLOCK(&V_tcbinfo);
536		INP_WUNLOCK(inp);
537		INP_INFO_WLOCK(&V_tcbinfo);
538		INP_WLOCK(inp);
539		if (in_pcbrele_wlocked(inp)) {
540			INP_INFO_WUNLOCK(&V_tcbinfo);
541			CURVNET_RESTORE();
542			return;
543		}
544		if (inp->inp_flags & INP_DROPPED) {
545			INP_WUNLOCK(inp);
546			INP_INFO_WUNLOCK(&V_tcbinfo);
547			CURVNET_RESTORE();
548			return;
549		}
550
551		tp = tcp_drop(tp, tp->t_softerror ?
552			      tp->t_softerror : ETIMEDOUT);
553		headlocked = 1;
554		goto out;
555	}
556	INP_INFO_RUNLOCK(&V_tcbinfo);
557	headlocked = 0;
558	if (tp->t_state == TCPS_SYN_SENT) {
559		/*
560		 * If the SYN was retransmitted, indicate CWND to be
561		 * limited to 1 segment in cc_conn_init().
562		 */
563		tp->snd_cwnd = 1;
564	} else if (tp->t_rxtshift == 1) {
565		/*
566		 * first retransmit; record ssthresh and cwnd so they can
567		 * be recovered if this turns out to be a "bad" retransmit.
568		 * A retransmit is considered "bad" if an ACK for this
569		 * segment is received within RTT/2 interval; the assumption
570		 * here is that the ACK was already in flight.  See
571		 * "On Estimating End-to-End Network Path Properties" by
572		 * Allman and Paxson for more details.
573		 */
574		tp->snd_cwnd_prev = tp->snd_cwnd;
575		tp->snd_ssthresh_prev = tp->snd_ssthresh;
576		tp->snd_recover_prev = tp->snd_recover;
577		if (IN_FASTRECOVERY(tp->t_flags))
578			tp->t_flags |= TF_WASFRECOVERY;
579		else
580			tp->t_flags &= ~TF_WASFRECOVERY;
581		if (IN_CONGRECOVERY(tp->t_flags))
582			tp->t_flags |= TF_WASCRECOVERY;
583		else
584			tp->t_flags &= ~TF_WASCRECOVERY;
585		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
586		tp->t_flags |= TF_PREVVALID;
587	} else
588		tp->t_flags &= ~TF_PREVVALID;
589	TCPSTAT_INC(tcps_rexmttimeo);
590	if (tp->t_state == TCPS_SYN_SENT)
591		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
592	else
593		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
594	TCPT_RANGESET(tp->t_rxtcur, rexmt,
595		      tp->t_rttmin, TCPTV_REXMTMAX);
596	/*
597	 * Disable RFC1323 and SACK if we haven't got any response to
598	 * our third SYN to work-around some broken terminal servers
599	 * (most of which have hopefully been retired) that have bad VJ
600	 * header compression code which trashes TCP segments containing
601	 * unknown-to-them TCP options.
602	 */
603	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
604	    (tp->t_rxtshift == 3))
605		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
606	/*
607	 * If we backed off this far, our srtt estimate is probably bogus.
608	 * Clobber it so we'll take the next rtt measurement as our srtt;
609	 * move the current srtt into rttvar to keep the current
610	 * retransmit times until then.
611	 */
612	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
613#ifdef INET6
614		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
615			in6_losing(tp->t_inpcb);
616#endif
617		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
618		tp->t_srtt = 0;
619	}
620	tp->snd_nxt = tp->snd_una;
621	tp->snd_recover = tp->snd_max;
622	/*
623	 * Force a segment to be sent.
624	 */
625	tp->t_flags |= TF_ACKNOW;
626	/*
627	 * If timing a segment in this window, stop the timer.
628	 */
629	tp->t_rtttime = 0;
630
631	cc_cong_signal(tp, NULL, CC_RTO);
632
633	(void) tcp_output(tp);
634
635out:
636#ifdef TCPDEBUG
637	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
638		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
639			  PRU_SLOWTIMO);
640#endif
641	if (tp != NULL)
642		INP_WUNLOCK(inp);
643	if (headlocked)
644		INP_INFO_WUNLOCK(&V_tcbinfo);
645	CURVNET_RESTORE();
646}
647
648void
649tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
650{
651	struct callout *t_callout;
652	void *f_callout;
653	struct inpcb *inp = tp->t_inpcb;
654	int cpu = INP_CPU(inp);
655
656#ifdef TCP_OFFLOAD
657	if (tp->t_flags & TF_TOE)
658		return;
659#endif
660
661	switch (timer_type) {
662		case TT_DELACK:
663			t_callout = &tp->t_timers->tt_delack;
664			f_callout = tcp_timer_delack;
665			break;
666		case TT_REXMT:
667			t_callout = &tp->t_timers->tt_rexmt;
668			f_callout = tcp_timer_rexmt;
669			break;
670		case TT_PERSIST:
671			t_callout = &tp->t_timers->tt_persist;
672			f_callout = tcp_timer_persist;
673			break;
674		case TT_KEEP:
675			t_callout = &tp->t_timers->tt_keep;
676			f_callout = tcp_timer_keep;
677			break;
678		case TT_2MSL:
679			t_callout = &tp->t_timers->tt_2msl;
680			f_callout = tcp_timer_2msl;
681			break;
682		default:
683			panic("bad timer_type");
684		}
685	if (delta == 0) {
686		callout_stop(t_callout);
687	} else {
688		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
689	}
690}
691
692int
693tcp_timer_active(struct tcpcb *tp, int timer_type)
694{
695	struct callout *t_callout;
696
697	switch (timer_type) {
698		case TT_DELACK:
699			t_callout = &tp->t_timers->tt_delack;
700			break;
701		case TT_REXMT:
702			t_callout = &tp->t_timers->tt_rexmt;
703			break;
704		case TT_PERSIST:
705			t_callout = &tp->t_timers->tt_persist;
706			break;
707		case TT_KEEP:
708			t_callout = &tp->t_timers->tt_keep;
709			break;
710		case TT_2MSL:
711			t_callout = &tp->t_timers->tt_2msl;
712			break;
713		default:
714			panic("bad timer_type");
715		}
716	return callout_active(t_callout);
717}
718
719#define	ticks_to_msecs(t)	(1000*(t) / hz)
720
721void
722tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
723    struct xtcp_timer *xtimer)
724{
725	sbintime_t now;
726
727	bzero(xtimer, sizeof(*xtimer));
728	if (timer == NULL)
729		return;
730	now = getsbinuptime();
731	if (callout_active(&timer->tt_delack))
732		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
733	if (callout_active(&timer->tt_rexmt))
734		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
735	if (callout_active(&timer->tt_persist))
736		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
737	if (callout_active(&timer->tt_keep))
738		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
739	if (callout_active(&timer->tt_2msl))
740		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
741	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
742}
743