tcp_timer.c revision 292706
1139823Simp/*-
211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
2911150Swollman *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
301541Srgrimes */
311541Srgrimes
32172467Ssilby#include <sys/cdefs.h>
33172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 292706 2015-12-24 19:09:48Z pkelsey $");
34172467Ssilby
35243603Snp#include "opt_inet.h"
3655679Sshin#include "opt_inet6.h"
3729514Sjoerg#include "opt_tcpdebug.h"
38266422Sadrian#include "opt_rss.h"
3929514Sjoerg
401541Srgrimes#include <sys/param.h>
4112172Sphk#include <sys/kernel.h>
42102967Sbde#include <sys/lock.h>
4378642Ssilby#include <sys/mbuf.h>
44102967Sbde#include <sys/mutex.h>
45102967Sbde#include <sys/protosw.h>
46205391Skmacy#include <sys/smp.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49102967Sbde#include <sys/sysctl.h>
50102967Sbde#include <sys/systm.h>
511541Srgrimes
52185571Sbz#include <net/if.h>
531541Srgrimes#include <net/route.h>
54277331Sadrian#include <net/rss_config.h>
55196019Srwatson#include <net/vnet.h>
56266422Sadrian#include <net/netisr.h>
571541Srgrimes
58215166Slstewart#include <netinet/cc.h>
591541Srgrimes#include <netinet/in.h>
60287759Sgnn#include <netinet/in_kdtrace.h>
61102967Sbde#include <netinet/in_pcb.h>
62266422Sadrian#include <netinet/in_rss.h>
631541Srgrimes#include <netinet/in_systm.h>
6455679Sshin#ifdef INET6
6555679Sshin#include <netinet6/in6_pcb.h>
6655679Sshin#endif
671541Srgrimes#include <netinet/ip_var.h>
681541Srgrimes#include <netinet/tcp_fsm.h>
691541Srgrimes#include <netinet/tcp_timer.h>
701541Srgrimes#include <netinet/tcp_var.h>
71272720Ssbruno#ifdef INET6
72272720Ssbruno#include <netinet6/tcp6_var.h>
73272720Ssbruno#endif
741541Srgrimes#include <netinet/tcpip.h>
7517138Sdg#ifdef TCPDEBUG
7617138Sdg#include <netinet/tcp_debug.h>
7717138Sdg#endif
781541Srgrimes
7950673Sjlemonint	tcp_keepinit;
8050682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
81180631Strhodes    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
8218280Spst
8350673Sjlemonint	tcp_keepidle;
8450682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
85180631Strhodes    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
8612172Sphk
8750673Sjlemonint	tcp_keepintvl;
8850682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
89180631Strhodes    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
9012172Sphk
9150673Sjlemonint	tcp_delacktime;
92167721SandreSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
93167721Sandre    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
9450682Sjlemon    "Time before a delayed ACK is sent");
95133874Srwatson
9650673Sjlemonint	tcp_msl;
9750682SjlemonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
9850682Sjlemon    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
9950673Sjlemon
100100335Sdillonint	tcp_rexmit_min;
101100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
102167721Sandre    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
103167721Sandre    "Minimum Retransmission Timeout");
104100335Sdillon
105100335Sdillonint	tcp_rexmit_slop;
106100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
107167721Sandre    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
108167721Sandre    "Retransmission Timer Slop");
109100335Sdillon
11087499Srwatsonstatic int	always_keepalive = 1;
111133874SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
11246381Sbillf    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
11315039Sphk
114167036Smohansint    tcp_fast_finwait2_recycle = 0;
115167036SmohansSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
116167721Sandre    &tcp_fast_finwait2_recycle, 0,
117167721Sandre    "Recycle closed FIN_WAIT_2 connections faster");
118167036Smohans
119167036Smohansint    tcp_finwait2_timeout;
120167036SmohansSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
121167721Sandre    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
122167036Smohans
123231025Sglebiusint	tcp_keepcnt = TCPTV_KEEPCNT;
124231025SglebiusSYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
125231025Sglebius    "Number of keepalive probes to send");
126167036Smohans
12712296Sphk	/* max idle probes */
12850673Sjlemonint	tcp_maxpersistidle;
12911150Swollman
130245238Sjhbstatic int	tcp_rexmit_drop_options = 0;
131245238SjhbSYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
132245238Sjhb    &tcp_rexmit_drop_options, 0,
133245238Sjhb    "Drop TCP options from 3rd and later retransmitted SYN");
134245238Sjhb
135272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
136272720Ssbruno#define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
137272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
138273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
139272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
140272720Ssbruno    "Path MTU Discovery Black Hole Detection Enabled");
141272720Ssbruno
142272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
143272720Ssbruno#define	V_tcp_pmtud_blackhole_activated \
144272720Ssbruno    VNET(tcp_pmtud_blackhole_activated)
145272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
146273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
147272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
148272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count");
149272720Ssbruno
150272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
151272720Ssbruno#define	V_tcp_pmtud_blackhole_activated_min_mss \
152272720Ssbruno    VNET(tcp_pmtud_blackhole_activated_min_mss)
153272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
154273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
155272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
156272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
157272720Ssbruno
158272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
159272720Ssbruno#define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
160272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
161273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
162272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
163272720Ssbruno    "Path MTU Discovery Black Hole Detection, Failure Count");
164272720Ssbruno
165272720Ssbruno#ifdef INET
166272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
167272720Ssbruno#define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
168272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
169273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
170272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
171272720Ssbruno    "Path MTU Discovery Black Hole Detection lowered MSS");
172272720Ssbruno#endif
173272720Ssbruno
174272720Ssbruno#ifdef INET6
175272720Ssbrunostatic VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
176272720Ssbruno#define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
177272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
178273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
179272720Ssbruno    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
180272720Ssbruno    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
181272720Ssbruno#endif
182272720Ssbruno
183268027Sadrian#ifdef	RSS
184268027Sadrianstatic int	per_cpu_timers = 1;
185268027Sadrian#else
186205391Skmacystatic int	per_cpu_timers = 0;
187268027Sadrian#endif
188205391SkmacySYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
189205391Skmacy    &per_cpu_timers , 0, "run tcp timers on all cpus");
190205391Skmacy
191266422Sadrian#if 0
192205391Skmacy#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
193205391Skmacy		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
194266422Sadrian#endif
195205391Skmacy
1961541Srgrimes/*
197266422Sadrian * Map the given inp to a CPU id.
198266422Sadrian *
199266422Sadrian * This queries RSS if it's compiled in, else it defaults to the current
200266422Sadrian * CPU ID.
201266422Sadrian */
202266422Sadrianstatic inline int
203266422Sadrianinp_to_cpuid(struct inpcb *inp)
204266422Sadrian{
205266422Sadrian	u_int cpuid;
206266422Sadrian
207266422Sadrian#ifdef	RSS
208266422Sadrian	if (per_cpu_timers) {
209266422Sadrian		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
210266422Sadrian		if (cpuid == NETISR_CPUID_NONE)
211266422Sadrian			return (curcpu);	/* XXX */
212266422Sadrian		else
213266422Sadrian			return (cpuid);
214266422Sadrian	}
215266422Sadrian#else
216266422Sadrian	/* Legacy, pre-RSS behaviour */
217266422Sadrian	if (per_cpu_timers) {
218266422Sadrian		/*
219266422Sadrian		 * We don't have a flowid -> cpuid mapping, so cheat and
220266422Sadrian		 * just map unknown cpuids to curcpu.  Not the best, but
221266422Sadrian		 * apparently better than defaulting to swi 0.
222266422Sadrian		 */
223266422Sadrian		cpuid = inp->inp_flowid % (mp_maxid + 1);
224266422Sadrian		if (! CPU_ABSENT(cpuid))
225266422Sadrian			return (cpuid);
226266422Sadrian		return (curcpu);
227266422Sadrian	}
228266422Sadrian#endif
229266422Sadrian	/* Default for RSS and non-RSS - cpuid 0 */
230266422Sadrian	else {
231266422Sadrian		return (0);
232266422Sadrian	}
233266422Sadrian}
234266422Sadrian
235266422Sadrian/*
2361541Srgrimes * Tcp protocol timeout routine called every 500 ms.
23750673Sjlemon * Updates timestamps used for TCP
2381541Srgrimes * causes finite state machine actions if timers expire.
2391541Srgrimes */
2401541Srgrimesvoid
241172309Ssilbytcp_slowtimo(void)
2421541Srgrimes{
243183550Szec	VNET_ITERATOR_DECL(vnet_iter);
2441541Srgrimes
245195760Srwatson	VNET_LIST_RLOCK_NOSLEEP();
246183550Szec	VNET_FOREACH(vnet_iter) {
247183550Szec		CURVNET_SET(vnet_iter);
248273850Sjch		(void) tcp_tw_2msl_scan(0);
249183550Szec		CURVNET_RESTORE();
250183550Szec	}
251195760Srwatson	VNET_LIST_RUNLOCK_NOSLEEP();
2521541Srgrimes}
2531541Srgrimes
25473110Sjlemonint	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
25573110Sjlemon    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
25673110Sjlemon
2571541Srgrimesint	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
258115824Shsu    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
2591541Srgrimes
260115824Shsustatic int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
26111150Swollman
262172074Srwatson/*
263172074Srwatson * TCP timer processing.
264172074Srwatson */
265172074Srwatson
26650673Sjlemonvoid
267172074Srwatsontcp_timer_delack(void *xtp)
2681541Srgrimes{
269172074Srwatson	struct tcpcb *tp = xtp;
270172074Srwatson	struct inpcb *inp;
271183550Szec	CURVNET_SET(tp->t_vnet);
2721541Srgrimes
273172074Srwatson	inp = tp->t_inpcb;
274281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
275178285Srwatson	INP_WLOCK(inp);
276239075Strociny	if (callout_pending(&tp->t_timers->tt_delack) ||
277239075Strociny	    !callout_active(&tp->t_timers->tt_delack)) {
278178285Srwatson		INP_WUNLOCK(inp);
279183550Szec		CURVNET_RESTORE();
28050673Sjlemon		return;
28150673Sjlemon	}
282172309Ssilby	callout_deactivate(&tp->t_timers->tt_delack);
283239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
284239075Strociny		INP_WUNLOCK(inp);
285239075Strociny		CURVNET_RESTORE();
286239075Strociny		return;
287239075Strociny	}
288281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
289281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
290281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
291281599Sjch		("%s: tp %p delack callout should be running", __func__, tp));
2921541Srgrimes
29350673Sjlemon	tp->t_flags |= TF_ACKNOW;
294190948Srwatson	TCPSTAT_INC(tcps_delack);
295292309Srrs	(void) tp->t_fb->tfb_tcp_output(tp);
296178285Srwatson	INP_WUNLOCK(inp);
297183550Szec	CURVNET_RESTORE();
29850673Sjlemon}
29950673Sjlemon
300172074Srwatsonvoid
301172074Srwatsontcp_timer_2msl(void *xtp)
30250673Sjlemon{
303172074Srwatson	struct tcpcb *tp = xtp;
304172074Srwatson	struct inpcb *inp;
305183550Szec	CURVNET_SET(tp->t_vnet);
30650673Sjlemon#ifdef TCPDEBUG
30750673Sjlemon	int ostate;
30850673Sjlemon
30950673Sjlemon	ostate = tp->t_state;
31050673Sjlemon#endif
311286227Sjch	INP_INFO_RLOCK(&V_tcbinfo);
312172074Srwatson	inp = tp->t_inpcb;
313281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
314178285Srwatson	INP_WLOCK(inp);
315172074Srwatson	tcp_free_sackholes(tp);
316239075Strociny	if (callout_pending(&tp->t_timers->tt_2msl) ||
317172309Ssilby	    !callout_active(&tp->t_timers->tt_2msl)) {
318178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
319286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
320183550Szec		CURVNET_RESTORE();
321172074Srwatson		return;
322172074Srwatson	}
323172309Ssilby	callout_deactivate(&tp->t_timers->tt_2msl);
324239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
325239075Strociny		INP_WUNLOCK(inp);
326286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
327239075Strociny		CURVNET_RESTORE();
328239075Strociny		return;
329239075Strociny	}
330281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
331281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
332281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
333281599Sjch		("%s: tp %p 2msl callout should be running", __func__, tp));
334172074Srwatson	/*
3351541Srgrimes	 * 2 MSL timeout in shutdown went off.  If we're closed but
3361541Srgrimes	 * still waiting for peer to close and connection has been idle
337286873Sjch	 * too long delete connection control block.  Otherwise, check
338286873Sjch	 * again in a bit.
339167036Smohans	 *
340286873Sjch	 * If in TIME_WAIT state just ignore as this timeout is handled in
341286873Sjch	 * tcp_tw_2msl_scan().
342286873Sjch	 *
343167036Smohans	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
344167036Smohans	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
345167036Smohans	 * Ignore fact that there were recent incoming segments.
3461541Srgrimes	 */
347286873Sjch	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
348286873Sjch		INP_WUNLOCK(inp);
349286873Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
350286873Sjch		CURVNET_RESTORE();
351286873Sjch		return;
352286873Sjch	}
353167036Smohans	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
354172074Srwatson	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
355167036Smohans	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
356190948Srwatson		TCPSTAT_INC(tcps_finwait2_drops);
357172074Srwatson		tp = tcp_close(tp);
358167036Smohans	} else {
359287304Sjch		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
360287304Sjch			if (!callout_reset(&tp->t_timers->tt_2msl,
361287304Sjch			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
362287304Sjch				tp->t_timers->tt_flags &= ~TT_2MSL_RST;
363287304Sjch			}
364287304Sjch		} else
365172074Srwatson		       tp = tcp_close(tp);
366172074Srwatson       }
3671541Srgrimes
36850673Sjlemon#ifdef TCPDEBUG
369172312Skib	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
37097658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
37197658Stanimura			  PRU_SLOWTIMO);
37250673Sjlemon#endif
373287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
374287759Sgnn
375172074Srwatson	if (tp != NULL)
376178285Srwatson		INP_WUNLOCK(inp);
377286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
378183550Szec	CURVNET_RESTORE();
37950673Sjlemon}
38050673Sjlemon
381172074Srwatsonvoid
382172074Srwatsontcp_timer_keep(void *xtp)
38350673Sjlemon{
384172074Srwatson	struct tcpcb *tp = xtp;
38578642Ssilby	struct tcptemp *t_template;
386172074Srwatson	struct inpcb *inp;
387183550Szec	CURVNET_SET(tp->t_vnet);
38850673Sjlemon#ifdef TCPDEBUG
38950673Sjlemon	int ostate;
39050673Sjlemon
39150673Sjlemon	ostate = tp->t_state;
39250673Sjlemon#endif
393286227Sjch	INP_INFO_RLOCK(&V_tcbinfo);
394172074Srwatson	inp = tp->t_inpcb;
395281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
396178285Srwatson	INP_WLOCK(inp);
397239075Strociny	if (callout_pending(&tp->t_timers->tt_keep) ||
398239075Strociny	    !callout_active(&tp->t_timers->tt_keep)) {
399178285Srwatson		INP_WUNLOCK(inp);
400286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
401183550Szec		CURVNET_RESTORE();
402172074Srwatson		return;
403172074Srwatson	}
404172309Ssilby	callout_deactivate(&tp->t_timers->tt_keep);
405239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
406239075Strociny		INP_WUNLOCK(inp);
407286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
408239075Strociny		CURVNET_RESTORE();
409239075Strociny		return;
410239075Strociny	}
411281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
412281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
413281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
414281599Sjch		("%s: tp %p keep callout should be running", __func__, tp));
415172074Srwatson	/*
41650673Sjlemon	 * Keep-alive timer went off; send something
41750673Sjlemon	 * or drop connection if idle for too long.
4181541Srgrimes	 */
419190948Srwatson	TCPSTAT_INC(tcps_keeptimeo);
42050673Sjlemon	if (tp->t_state < TCPS_ESTABLISHED)
42150673Sjlemon		goto dropit;
422122326Ssam	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
42350673Sjlemon	    tp->t_state <= TCPS_CLOSING) {
424231025Sglebius		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
42550673Sjlemon			goto dropit;
4261541Srgrimes		/*
42750673Sjlemon		 * Send a packet designed to force a response
42850673Sjlemon		 * if the peer is up and reachable:
42950673Sjlemon		 * either an ACK if the connection is still alive,
43050673Sjlemon		 * or an RST if the peer has closed the connection
43150673Sjlemon		 * due to timeout or reboot.
43250673Sjlemon		 * Using sequence number tp->snd_una-1
43350673Sjlemon		 * causes the transmitted zero-length segment
43450673Sjlemon		 * to lie outside the receive window;
43550673Sjlemon		 * by the protocol spec, this requires the
43650673Sjlemon		 * correspondent TCP to respond.
4371541Srgrimes		 */
438190948Srwatson		TCPSTAT_INC(tcps_keepprobe);
439111144Sjlemon		t_template = tcpip_maketemplate(inp);
44078642Ssilby		if (t_template) {
44178642Ssilby			tcp_respond(tp, t_template->tt_ipgen,
44278642Ssilby				    &t_template->tt_t, (struct mbuf *)NULL,
44378642Ssilby				    tp->rcv_nxt, tp->snd_una - 1, 0);
444179487Srwatson			free(t_template, M_TEMP);
44578642Ssilby		}
446287304Sjch		if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
447287304Sjch		    tcp_timer_keep, tp)) {
448287304Sjch			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
449287304Sjch		}
450287304Sjch	} else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
451287304Sjch		    tcp_timer_keep, tp)) {
452287304Sjch			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
453287304Sjch		}
45450673Sjlemon
45550673Sjlemon#ifdef TCPDEBUG
456122326Ssam	if (inp->inp_socket->so_options & SO_DEBUG)
45755679Sshin		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
45850673Sjlemon			  PRU_SLOWTIMO);
45950673Sjlemon#endif
460287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
461178285Srwatson	INP_WUNLOCK(inp);
462286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
463183550Szec	CURVNET_RESTORE();
464172074Srwatson	return;
46550673Sjlemon
46650673Sjlemondropit:
467190948Srwatson	TCPSTAT_INC(tcps_keepdrops);
468172074Srwatson	tp = tcp_drop(tp, ETIMEDOUT);
469172074Srwatson
470172074Srwatson#ifdef TCPDEBUG
471172074Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
472172074Srwatson		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
473172074Srwatson			  PRU_SLOWTIMO);
474172074Srwatson#endif
475287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
476172074Srwatson	if (tp != NULL)
477178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
478286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
479183550Szec	CURVNET_RESTORE();
48050673Sjlemon}
48150673Sjlemon
482172074Srwatsonvoid
483172074Srwatsontcp_timer_persist(void *xtp)
48450673Sjlemon{
485172074Srwatson	struct tcpcb *tp = xtp;
486172074Srwatson	struct inpcb *inp;
487183550Szec	CURVNET_SET(tp->t_vnet);
48850673Sjlemon#ifdef TCPDEBUG
48950673Sjlemon	int ostate;
49050673Sjlemon
49150673Sjlemon	ostate = tp->t_state;
49250673Sjlemon#endif
493286227Sjch	INP_INFO_RLOCK(&V_tcbinfo);
494172074Srwatson	inp = tp->t_inpcb;
495281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
496178285Srwatson	INP_WLOCK(inp);
497239075Strociny	if (callout_pending(&tp->t_timers->tt_persist) ||
498239075Strociny	    !callout_active(&tp->t_timers->tt_persist)) {
499178285Srwatson		INP_WUNLOCK(inp);
500286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
501183550Szec		CURVNET_RESTORE();
502172074Srwatson		return;
503172074Srwatson	}
504172309Ssilby	callout_deactivate(&tp->t_timers->tt_persist);
505239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
506239075Strociny		INP_WUNLOCK(inp);
507286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
508239075Strociny		CURVNET_RESTORE();
509239075Strociny		return;
510239075Strociny	}
511281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
512281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
513281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
514281599Sjch		("%s: tp %p persist callout should be running", __func__, tp));
515172074Srwatson	/*
51650673Sjlemon	 * Persistance timer into zero window.
51750673Sjlemon	 * Force a byte to be output, if possible.
51850673Sjlemon	 */
519190948Srwatson	TCPSTAT_INC(tcps_persisttimeo);
52050673Sjlemon	/*
52150673Sjlemon	 * Hack: if the peer is dead/unreachable, we do not
52250673Sjlemon	 * time out if the window is closed.  After a full
52350673Sjlemon	 * backoff, drop the connection if the idle time
52450673Sjlemon	 * (no responses to probes) reaches the maximum
52550673Sjlemon	 * backoff that we would use if retransmitting.
52650673Sjlemon	 */
52750673Sjlemon	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
528194305Sjhb	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
529194305Sjhb	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
530190948Srwatson		TCPSTAT_INC(tcps_persistdrop);
531172074Srwatson		tp = tcp_drop(tp, ETIMEDOUT);
532172074Srwatson		goto out;
53350673Sjlemon	}
534242267Sandre	/*
535242267Sandre	 * If the user has closed the socket then drop a persisting
536242267Sandre	 * connection after a much reduced timeout.
537242267Sandre	 */
538242267Sandre	if (tp->t_state > TCPS_CLOSE_WAIT &&
539242267Sandre	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
540242267Sandre		TCPSTAT_INC(tcps_persistdrop);
541242267Sandre		tp = tcp_drop(tp, ETIMEDOUT);
542242267Sandre		goto out;
543242267Sandre	}
54450673Sjlemon	tcp_setpersist(tp);
545146463Sps	tp->t_flags |= TF_FORCEDATA;
546292309Srrs	(void) tp->t_fb->tfb_tcp_output(tp);
547146463Sps	tp->t_flags &= ~TF_FORCEDATA;
54850673Sjlemon
549172074Srwatsonout:
55050673Sjlemon#ifdef TCPDEBUG
551158644Sglebius	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
552158644Sglebius		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
55350673Sjlemon#endif
554287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
555172074Srwatson	if (tp != NULL)
556178285Srwatson		INP_WUNLOCK(inp);
557286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
558183550Szec	CURVNET_RESTORE();
55950673Sjlemon}
56050673Sjlemon
561172074Srwatsonvoid
562172074Srwatsontcp_timer_rexmt(void * xtp)
56350673Sjlemon{
564172074Srwatson	struct tcpcb *tp = xtp;
565183550Szec	CURVNET_SET(tp->t_vnet);
56650673Sjlemon	int rexmt;
567172074Srwatson	int headlocked;
568172074Srwatson	struct inpcb *inp;
56950673Sjlemon#ifdef TCPDEBUG
57050673Sjlemon	int ostate;
57150673Sjlemon
57250673Sjlemon	ostate = tp->t_state;
57350673Sjlemon#endif
574272720Ssbruno
575205391Skmacy	INP_INFO_RLOCK(&V_tcbinfo);
576172074Srwatson	inp = tp->t_inpcb;
577281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
578178285Srwatson	INP_WLOCK(inp);
579239075Strociny	if (callout_pending(&tp->t_timers->tt_rexmt) ||
580239075Strociny	    !callout_active(&tp->t_timers->tt_rexmt)) {
581178285Srwatson		INP_WUNLOCK(inp);
582205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
583183550Szec		CURVNET_RESTORE();
584172074Srwatson		return;
585172074Srwatson	}
586172309Ssilby	callout_deactivate(&tp->t_timers->tt_rexmt);
587239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
588239075Strociny		INP_WUNLOCK(inp);
589239075Strociny		INP_INFO_RUNLOCK(&V_tcbinfo);
590239075Strociny		CURVNET_RESTORE();
591239075Strociny		return;
592239075Strociny	}
593281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
594281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
595281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
596281599Sjch		("%s: tp %p rexmt callout should be running", __func__, tp));
597130989Sps	tcp_free_sackholes(tp);
59850673Sjlemon	/*
59950673Sjlemon	 * Retransmission timer went off.  Message has not
60050673Sjlemon	 * been acked within retransmit interval.  Back off
60150673Sjlemon	 * to a longer retransmit interval and retransmit one segment.
60250673Sjlemon	 */
60350673Sjlemon	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
60450673Sjlemon		tp->t_rxtshift = TCP_MAXRXTSHIFT;
605190948Srwatson		TCPSTAT_INC(tcps_timeoutdrop);
606226318Snp
607172074Srwatson		tp = tcp_drop(tp, tp->t_softerror ?
608172074Srwatson			      tp->t_softerror : ETIMEDOUT);
609205391Skmacy		headlocked = 1;
610172074Srwatson		goto out;
61150673Sjlemon	}
612205391Skmacy	INP_INFO_RUNLOCK(&V_tcbinfo);
613172074Srwatson	headlocked = 0;
614242250Sandre	if (tp->t_state == TCPS_SYN_SENT) {
61513229Solah		/*
616242250Sandre		 * If the SYN was retransmitted, indicate CWND to be
617242250Sandre		 * limited to 1 segment in cc_conn_init().
618242250Sandre		 */
619242250Sandre		tp->snd_cwnd = 1;
620242250Sandre	} else if (tp->t_rxtshift == 1) {
621242250Sandre		/*
62250673Sjlemon		 * first retransmit; record ssthresh and cwnd so they can
623133874Srwatson		 * be recovered if this turns out to be a "bad" retransmit.
624133874Srwatson		 * A retransmit is considered "bad" if an ACK for this
62550673Sjlemon		 * segment is received within RTT/2 interval; the assumption
626133874Srwatson		 * here is that the ACK was already in flight.  See
62750673Sjlemon		 * "On Estimating End-to-End Network Path Properties" by
62850673Sjlemon		 * Allman and Paxson for more details.
6291541Srgrimes		 */
63050673Sjlemon		tp->snd_cwnd_prev = tp->snd_cwnd;
63150673Sjlemon		tp->snd_ssthresh_prev = tp->snd_ssthresh;
632117650Shsu		tp->snd_recover_prev = tp->snd_recover;
633215166Slstewart		if (IN_FASTRECOVERY(tp->t_flags))
634215166Slstewart			tp->t_flags |= TF_WASFRECOVERY;
635117650Shsu		else
636215166Slstewart			tp->t_flags &= ~TF_WASFRECOVERY;
637215166Slstewart		if (IN_CONGRECOVERY(tp->t_flags))
638215166Slstewart			tp->t_flags |= TF_WASCRECOVERY;
639215166Slstewart		else
640215166Slstewart			tp->t_flags &= ~TF_WASCRECOVERY;
64150673Sjlemon		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
642221209Sjhb		tp->t_flags |= TF_PREVVALID;
643221209Sjhb	} else
644221209Sjhb		tp->t_flags &= ~TF_PREVVALID;
645190948Srwatson	TCPSTAT_INC(tcps_rexmttimeo);
646292706Spkelsey	if ((tp->t_state == TCPS_SYN_SENT) ||
647292706Spkelsey	    (tp->t_state == TCPS_SYN_RECEIVED))
648242260Sandre		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
64973110Sjlemon	else
65073110Sjlemon		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
65150673Sjlemon	TCPT_RANGESET(tp->t_rxtcur, rexmt,
65250673Sjlemon		      tp->t_rttmin, TCPTV_REXMTMAX);
653272720Ssbruno
654273063Ssbruno	/*
655273063Ssbruno	 * We enter the path for PLMTUD if connection is established or, if
656273063Ssbruno	 * connection is FIN_WAIT_1 status, reason for the last is that if
657273063Ssbruno	 * amount of data we send is very small, we could send it in couple of
658273063Ssbruno	 * packets and process straight to FIN. In that case we won't catch
659273063Ssbruno	 * ESTABLISHED state.
660273063Ssbruno	 */
661273063Ssbruno	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
662273063Ssbruno	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
663272720Ssbruno		int optlen;
664272720Ssbruno#ifdef INET6
665272720Ssbruno		int isipv6;
666272720Ssbruno#endif
667272720Ssbruno
668289293Shiren		/*
669289293Shiren		 * Idea here is that at each stage of mtu probe (usually, 1448
670289293Shiren		 * -> 1188 -> 524) should be given 2 chances to recover before
671289293Shiren		 *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
672289293Shiren		 *  take care of that.
673289293Shiren		 */
674272720Ssbruno		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
675272720Ssbruno		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
676289293Shiren		    (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
677272720Ssbruno			/*
678272720Ssbruno			 * Enter Path MTU Black-hole Detection mechanism:
679272720Ssbruno			 * - Disable Path MTU Discovery (IP "DF" bit).
680272720Ssbruno			 * - Reduce MTU to lower value than what we
681272720Ssbruno			 *   negotiated with peer.
682272720Ssbruno			 */
683272720Ssbruno			/* Record that we may have found a black hole. */
684272720Ssbruno			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
685272720Ssbruno
686272720Ssbruno			/* Keep track of previous MSS. */
687272720Ssbruno			optlen = tp->t_maxopd - tp->t_maxseg;
688272720Ssbruno			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
689272720Ssbruno
690272720Ssbruno			/*
691272720Ssbruno			 * Reduce the MSS to blackhole value or to the default
692272720Ssbruno			 * in an attempt to retransmit.
693272720Ssbruno			 */
694272720Ssbruno#ifdef INET6
695272720Ssbruno			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
696272720Ssbruno			if (isipv6 &&
697272720Ssbruno			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
698272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
699272720Ssbruno				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
700272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
701272720Ssbruno			} else if (isipv6) {
702272720Ssbruno				/* Use the default MSS. */
703272720Ssbruno				tp->t_maxopd = V_tcp_v6mssdflt;
704272720Ssbruno				/*
705272720Ssbruno				 * Disable Path MTU Discovery when we switch to
706272720Ssbruno				 * minmss.
707272720Ssbruno				 */
708272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
709272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
710272720Ssbruno			}
711272720Ssbruno#endif
712272720Ssbruno#if defined(INET6) && defined(INET)
713272720Ssbruno			else
714272720Ssbruno#endif
715272720Ssbruno#ifdef INET
716272720Ssbruno			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
717272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
718272720Ssbruno				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
719272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
720272720Ssbruno			} else {
721272720Ssbruno				/* Use the default MSS. */
722272720Ssbruno				tp->t_maxopd = V_tcp_mssdflt;
723272720Ssbruno				/*
724272720Ssbruno				 * Disable Path MTU Discovery when we switch to
725272720Ssbruno				 * minmss.
726272720Ssbruno				 */
727272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
728272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
729272720Ssbruno			}
730272720Ssbruno#endif
731272720Ssbruno			tp->t_maxseg = tp->t_maxopd - optlen;
732272720Ssbruno			/*
733272720Ssbruno			 * Reset the slow-start flight size
734272720Ssbruno			 * as it may depend on the new MSS.
735272720Ssbruno			 */
736272720Ssbruno			if (CC_ALGO(tp)->conn_init != NULL)
737272720Ssbruno				CC_ALGO(tp)->conn_init(tp->ccv);
738272720Ssbruno		} else {
739272720Ssbruno			/*
740272720Ssbruno			 * If further retransmissions are still unsuccessful
741272720Ssbruno			 * with a lowered MTU, maybe this isn't a blackhole and
742272720Ssbruno			 * we restore the previous MSS and blackhole detection
743272720Ssbruno			 * flags.
744289293Shiren			 * The limit '6' is determined by giving each probe
745289293Shiren			 * stage (1448, 1188, 524) 2 chances to recover.
746272720Ssbruno			 */
747272720Ssbruno			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
748289293Shiren			    (tp->t_rxtshift > 6)) {
749272720Ssbruno				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
750272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
751272720Ssbruno				optlen = tp->t_maxopd - tp->t_maxseg;
752272720Ssbruno				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
753272720Ssbruno				tp->t_maxseg = tp->t_maxopd - optlen;
754272720Ssbruno				V_tcp_pmtud_blackhole_failed++;
755272720Ssbruno				/*
756272720Ssbruno				 * Reset the slow-start flight size as it
757272720Ssbruno				 * may depend on the new MSS.
758272720Ssbruno				 */
759272720Ssbruno				if (CC_ALGO(tp)->conn_init != NULL)
760272720Ssbruno					CC_ALGO(tp)->conn_init(tp->ccv);
761272720Ssbruno			}
762272720Ssbruno		}
763272720Ssbruno	}
764272720Ssbruno
76550673Sjlemon	/*
766242264Sandre	 * Disable RFC1323 and SACK if we haven't got any response to
767133874Srwatson	 * our third SYN to work-around some broken terminal servers
768133874Srwatson	 * (most of which have hopefully been retired) that have bad VJ
769133874Srwatson	 * header compression code which trashes TCP segments containing
77077539Sjesper	 * unknown-to-them TCP options.
77177539Sjesper	 */
772245238Sjhb	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
773245238Sjhb	    (tp->t_rxtshift == 3))
774242263Sandre		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
77577539Sjesper	/*
776122922Sandre	 * If we backed off this far, our srtt estimate is probably bogus.
777122922Sandre	 * Clobber it so we'll take the next rtt measurement as our srtt;
77850673Sjlemon	 * move the current srtt into rttvar to keep the current
77950673Sjlemon	 * retransmit times until then.
78050673Sjlemon	 */
78150673Sjlemon	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
78255679Sshin#ifdef INET6
78355679Sshin		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
78455679Sshin			in6_losing(tp->t_inpcb);
78555679Sshin#endif
78650673Sjlemon		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
78750673Sjlemon		tp->t_srtt = 0;
78850673Sjlemon	}
78950673Sjlemon	tp->snd_nxt = tp->snd_una;
790117650Shsu	tp->snd_recover = tp->snd_max;
79150673Sjlemon	/*
79250673Sjlemon	 * Force a segment to be sent.
79350673Sjlemon	 */
79450673Sjlemon	tp->t_flags |= TF_ACKNOW;
79550673Sjlemon	/*
79650673Sjlemon	 * If timing a segment in this window, stop the timer.
79750673Sjlemon	 */
79850673Sjlemon	tp->t_rtttime = 0;
799215166Slstewart
800216101Slstewart	cc_cong_signal(tp, NULL, CC_RTO);
801215166Slstewart
802292309Srrs	(void) tp->t_fb->tfb_tcp_output(tp);
8031541Srgrimes
804172074Srwatsonout:
80550673Sjlemon#ifdef TCPDEBUG
806157136Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
80797658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
80897658Stanimura			  PRU_SLOWTIMO);
8091541Srgrimes#endif
810287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
811172074Srwatson	if (tp != NULL)
812178285Srwatson		INP_WUNLOCK(inp);
813172074Srwatson	if (headlocked)
814286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
815183550Szec	CURVNET_RESTORE();
8161541Srgrimes}
817172074Srwatson
818172074Srwatsonvoid
819281599Sjchtcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
820172074Srwatson{
821172074Srwatson	struct callout *t_callout;
822280904Sjch	timeout_t *f_callout;
823205391Skmacy	struct inpcb *inp = tp->t_inpcb;
824266422Sadrian	int cpu = inp_to_cpuid(inp);
825287304Sjch	uint32_t f_reset;
826172074Srwatson
827237263Snp#ifdef TCP_OFFLOAD
828237263Snp	if (tp->t_flags & TF_TOE)
829237263Snp		return;
830237263Snp#endif
831237263Snp
832281599Sjch	if (tp->t_timers->tt_flags & TT_STOPPED)
833281599Sjch		return;
834281599Sjch
835172074Srwatson	switch (timer_type) {
836172074Srwatson		case TT_DELACK:
837172309Ssilby			t_callout = &tp->t_timers->tt_delack;
838172074Srwatson			f_callout = tcp_timer_delack;
839287304Sjch			f_reset = TT_DELACK_RST;
840172074Srwatson			break;
841172074Srwatson		case TT_REXMT:
842172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
843172074Srwatson			f_callout = tcp_timer_rexmt;
844287304Sjch			f_reset = TT_REXMT_RST;
845172074Srwatson			break;
846172074Srwatson		case TT_PERSIST:
847172309Ssilby			t_callout = &tp->t_timers->tt_persist;
848172074Srwatson			f_callout = tcp_timer_persist;
849287304Sjch			f_reset = TT_PERSIST_RST;
850172074Srwatson			break;
851172074Srwatson		case TT_KEEP:
852172309Ssilby			t_callout = &tp->t_timers->tt_keep;
853172074Srwatson			f_callout = tcp_timer_keep;
854287304Sjch			f_reset = TT_KEEP_RST;
855172074Srwatson			break;
856172074Srwatson		case TT_2MSL:
857172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
858172074Srwatson			f_callout = tcp_timer_2msl;
859287304Sjch			f_reset = TT_2MSL_RST;
860172074Srwatson			break;
861172074Srwatson		default:
862292309Srrs			if (tp->t_fb->tfb_tcp_timer_activate) {
863292309Srrs				tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
864292309Srrs				return;
865292309Srrs			}
866280990Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
867172074Srwatson		}
868172074Srwatson	if (delta == 0) {
869281599Sjch		if ((tp->t_timers->tt_flags & timer_type) &&
870290805Srrs		    (callout_stop(t_callout) > 0) &&
871287304Sjch		    (tp->t_timers->tt_flags & f_reset)) {
872287304Sjch			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
873281599Sjch		}
874172074Srwatson	} else {
875281599Sjch		if ((tp->t_timers->tt_flags & timer_type) == 0) {
876287304Sjch			tp->t_timers->tt_flags |= (timer_type | f_reset);
877281599Sjch			callout_reset_on(t_callout, delta, f_callout, tp, cpu);
878281599Sjch		} else {
879281599Sjch			/* Reset already running callout on the same CPU. */
880287304Sjch			if (!callout_reset(t_callout, delta, f_callout, tp)) {
881287304Sjch				/*
882287304Sjch				 * Callout not cancelled, consider it as not
883287304Sjch				 * properly restarted. */
884287304Sjch				tp->t_timers->tt_flags &= ~f_reset;
885287304Sjch			}
886281599Sjch		}
887172074Srwatson	}
888172074Srwatson}
889172074Srwatson
890172074Srwatsonint
891281599Sjchtcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
892172074Srwatson{
893172074Srwatson	struct callout *t_callout;
894172074Srwatson
895172074Srwatson	switch (timer_type) {
896172074Srwatson		case TT_DELACK:
897172309Ssilby			t_callout = &tp->t_timers->tt_delack;
898172074Srwatson			break;
899172074Srwatson		case TT_REXMT:
900172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
901172074Srwatson			break;
902172074Srwatson		case TT_PERSIST:
903172309Ssilby			t_callout = &tp->t_timers->tt_persist;
904172074Srwatson			break;
905172074Srwatson		case TT_KEEP:
906172309Ssilby			t_callout = &tp->t_timers->tt_keep;
907172074Srwatson			break;
908172074Srwatson		case TT_2MSL:
909172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
910172074Srwatson			break;
911172074Srwatson		default:
912292309Srrs			if (tp->t_fb->tfb_tcp_timer_active) {
913292309Srrs				return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
914292309Srrs			}
915280990Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
916172074Srwatson		}
917172074Srwatson	return callout_active(t_callout);
918172074Srwatson}
919197244Ssilby
920281599Sjchvoid
921281599Sjchtcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
922281599Sjch{
923281599Sjch	struct callout *t_callout;
924281599Sjch	timeout_t *f_callout;
925287304Sjch	uint32_t f_reset;
926281599Sjch
927281599Sjch	tp->t_timers->tt_flags |= TT_STOPPED;
928281599Sjch
929281599Sjch	switch (timer_type) {
930281599Sjch		case TT_DELACK:
931281599Sjch			t_callout = &tp->t_timers->tt_delack;
932281599Sjch			f_callout = tcp_timer_delack_discard;
933287304Sjch			f_reset = TT_DELACK_RST;
934281599Sjch			break;
935281599Sjch		case TT_REXMT:
936281599Sjch			t_callout = &tp->t_timers->tt_rexmt;
937281599Sjch			f_callout = tcp_timer_rexmt_discard;
938287304Sjch			f_reset = TT_REXMT_RST;
939281599Sjch			break;
940281599Sjch		case TT_PERSIST:
941281599Sjch			t_callout = &tp->t_timers->tt_persist;
942281599Sjch			f_callout = tcp_timer_persist_discard;
943287304Sjch			f_reset = TT_PERSIST_RST;
944281599Sjch			break;
945281599Sjch		case TT_KEEP:
946281599Sjch			t_callout = &tp->t_timers->tt_keep;
947281599Sjch			f_callout = tcp_timer_keep_discard;
948287304Sjch			f_reset = TT_KEEP_RST;
949281599Sjch			break;
950281599Sjch		case TT_2MSL:
951281599Sjch			t_callout = &tp->t_timers->tt_2msl;
952281599Sjch			f_callout = tcp_timer_2msl_discard;
953287304Sjch			f_reset = TT_2MSL_RST;
954281599Sjch			break;
955281599Sjch		default:
956292309Srrs			if (tp->t_fb->tfb_tcp_timer_stop) {
957292309Srrs				/*
958292309Srrs				 * XXXrrs we need to look at this with the
959292309Srrs				 * stop case below (flags).
960292309Srrs				 */
961292309Srrs				tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
962292309Srrs				return;
963292309Srrs			}
964281599Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
965281599Sjch		}
966281599Sjch
967281599Sjch	if (tp->t_timers->tt_flags & timer_type) {
968290805Srrs		if ((callout_stop(t_callout) > 0) &&
969287304Sjch		    (tp->t_timers->tt_flags & f_reset)) {
970287304Sjch			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
971281599Sjch		} else {
972281599Sjch			/*
973281599Sjch			 * Can't stop the callout, defer tcpcb actual deletion
974281599Sjch			 * to the last tcp timer discard callout.
975281599Sjch			 * The TT_STOPPED flag will ensure that no tcp timer
976281599Sjch			 * callouts can be restarted on our behalf, and
977281599Sjch			 * past this point currently running callouts waiting
978281599Sjch			 * on inp lock will return right away after the
979281599Sjch			 * classical check for callout reset/stop events:
980281599Sjch			 * callout_pending() || !callout_active()
981281599Sjch			 */
982281599Sjch			callout_reset(t_callout, 1, f_callout, tp);
983281599Sjch		}
984281599Sjch	}
985281599Sjch}
986281599Sjch
987197244Ssilby#define	ticks_to_msecs(t)	(1000*(t) / hz)
988197244Ssilby
989197244Ssilbyvoid
990247777Sdavidetcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
991247777Sdavide    struct xtcp_timer *xtimer)
992197244Ssilby{
993247777Sdavide	sbintime_t now;
994247777Sdavide
995247777Sdavide	bzero(xtimer, sizeof(*xtimer));
996197244Ssilby	if (timer == NULL)
997197244Ssilby		return;
998247777Sdavide	now = getsbinuptime();
999197244Ssilby	if (callout_active(&timer->tt_delack))
1000247777Sdavide		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
1001197244Ssilby	if (callout_active(&timer->tt_rexmt))
1002247777Sdavide		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
1003197244Ssilby	if (callout_active(&timer->tt_persist))
1004247777Sdavide		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
1005197244Ssilby	if (callout_active(&timer->tt_keep))
1006247777Sdavide		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
1007197244Ssilby	if (callout_active(&timer->tt_2msl))
1008247777Sdavide		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
1009197244Ssilby	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
1010197244Ssilby}
1011