1139823Simp/*-
211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
2911150Swollman *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
301541Srgrimes */
311541Srgrimes
32172467Ssilby#include <sys/cdefs.h>
33172467Ssilby__FBSDID("$FreeBSD: stable/11/sys/netinet/tcp_timer.c 334727 2018-06-06 19:48:39Z tuexen $");
34172467Ssilby
35243603Snp#include "opt_inet.h"
3655679Sshin#include "opt_inet6.h"
3729514Sjoerg#include "opt_tcpdebug.h"
38266422Sadrian#include "opt_rss.h"
3929514Sjoerg
401541Srgrimes#include <sys/param.h>
4112172Sphk#include <sys/kernel.h>
42102967Sbde#include <sys/lock.h>
4378642Ssilby#include <sys/mbuf.h>
44102967Sbde#include <sys/mutex.h>
45102967Sbde#include <sys/protosw.h>
46205391Skmacy#include <sys/smp.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49102967Sbde#include <sys/sysctl.h>
50102967Sbde#include <sys/systm.h>
511541Srgrimes
52185571Sbz#include <net/if.h>
531541Srgrimes#include <net/route.h>
54277331Sadrian#include <net/rss_config.h>
55196019Srwatson#include <net/vnet.h>
56266422Sadrian#include <net/netisr.h>
571541Srgrimes
581541Srgrimes#include <netinet/in.h>
59287759Sgnn#include <netinet/in_kdtrace.h>
60102967Sbde#include <netinet/in_pcb.h>
61266422Sadrian#include <netinet/in_rss.h>
621541Srgrimes#include <netinet/in_systm.h>
6355679Sshin#ifdef INET6
6455679Sshin#include <netinet6/in6_pcb.h>
6555679Sshin#endif
661541Srgrimes#include <netinet/ip_var.h>
67294535Sglebius#include <netinet/tcp.h>
681541Srgrimes#include <netinet/tcp_fsm.h>
691541Srgrimes#include <netinet/tcp_timer.h>
701541Srgrimes#include <netinet/tcp_var.h>
71294931Sglebius#include <netinet/cc/cc.h>
72272720Ssbruno#ifdef INET6
73272720Ssbruno#include <netinet6/tcp6_var.h>
74272720Ssbruno#endif
751541Srgrimes#include <netinet/tcpip.h>
7617138Sdg#ifdef TCPDEBUG
7717138Sdg#include <netinet/tcp_debug.h>
7817138Sdg#endif
791541Srgrimes
80294840Shirenint    tcp_persmin;
81294840ShirenSYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
82294840Shiren    &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
83294840Shiren
84294840Shirenint    tcp_persmax;
85294840ShirenSYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
86294840Shiren    &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
87294840Shiren
8850673Sjlemonint	tcp_keepinit;
8950682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
90180631Strhodes    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
9118280Spst
9250673Sjlemonint	tcp_keepidle;
9350682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
94180631Strhodes    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
9512172Sphk
9650673Sjlemonint	tcp_keepintvl;
9750682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
98180631Strhodes    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
9912172Sphk
10050673Sjlemonint	tcp_delacktime;
101167721SandreSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
102167721Sandre    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
10350682Sjlemon    "Time before a delayed ACK is sent");
104133874Srwatson
10550673Sjlemonint	tcp_msl;
10650682SjlemonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
10750682Sjlemon    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
10850673Sjlemon
109100335Sdillonint	tcp_rexmit_min;
110100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
111167721Sandre    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
112167721Sandre    "Minimum Retransmission Timeout");
113100335Sdillon
114100335Sdillonint	tcp_rexmit_slop;
115100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
116167721Sandre    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
117167721Sandre    "Retransmission Timer Slop");
118100335Sdillon
119330303Sjhbint	tcp_always_keepalive = 1;
120133874SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
121330303Sjhb    &tcp_always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
122330303Sjhb__strong_reference(tcp_always_keepalive, always_keepalive);
12315039Sphk
124167036Smohansint    tcp_fast_finwait2_recycle = 0;
125167036SmohansSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
126167721Sandre    &tcp_fast_finwait2_recycle, 0,
127167721Sandre    "Recycle closed FIN_WAIT_2 connections faster");
128167036Smohans
129167036Smohansint    tcp_finwait2_timeout;
130167036SmohansSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
131167721Sandre    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
132167036Smohans
133231025Sglebiusint	tcp_keepcnt = TCPTV_KEEPCNT;
134231025SglebiusSYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
135231025Sglebius    "Number of keepalive probes to send");
136167036Smohans
13712296Sphk	/* max idle probes */
13850673Sjlemonint	tcp_maxpersistidle;
13911150Swollman
140245238Sjhbstatic int	tcp_rexmit_drop_options = 0;
141245238SjhbSYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
142245238Sjhb    &tcp_rexmit_drop_options, 0,
143245238Sjhb    "Drop TCP options from 3rd and later retransmitted SYN");
144245238Sjhb
145272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
146272720Ssbruno#define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
147272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
148273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
149272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
150272720Ssbruno    "Path MTU Discovery Black Hole Detection Enabled");
151272720Ssbruno
152272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
153272720Ssbruno#define	V_tcp_pmtud_blackhole_activated \
154272720Ssbruno    VNET(tcp_pmtud_blackhole_activated)
155272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
156273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
157272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
158272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count");
159272720Ssbruno
160272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
161272720Ssbruno#define	V_tcp_pmtud_blackhole_activated_min_mss \
162272720Ssbruno    VNET(tcp_pmtud_blackhole_activated_min_mss)
163272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
164273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
165272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
166272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
167272720Ssbruno
168272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
169272720Ssbruno#define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
170272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
171273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
172272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
173272720Ssbruno    "Path MTU Discovery Black Hole Detection, Failure Count");
174272720Ssbruno
175272720Ssbruno#ifdef INET
176272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
177272720Ssbruno#define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
178272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
179273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
180272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
181272720Ssbruno    "Path MTU Discovery Black Hole Detection lowered MSS");
182272720Ssbruno#endif
183272720Ssbruno
184272720Ssbruno#ifdef INET6
185272720Ssbrunostatic VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
186272720Ssbruno#define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
187272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
188273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
189272720Ssbruno    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
190272720Ssbruno    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
191272720Ssbruno#endif
192272720Ssbruno
193268027Sadrian#ifdef	RSS
194268027Sadrianstatic int	per_cpu_timers = 1;
195268027Sadrian#else
196205391Skmacystatic int	per_cpu_timers = 0;
197268027Sadrian#endif
198205391SkmacySYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
199205391Skmacy    &per_cpu_timers , 0, "run tcp timers on all cpus");
200205391Skmacy
201266422Sadrian#if 0
202205391Skmacy#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
203205391Skmacy		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
204266422Sadrian#endif
205205391Skmacy
2061541Srgrimes/*
207266422Sadrian * Map the given inp to a CPU id.
208266422Sadrian *
209266422Sadrian * This queries RSS if it's compiled in, else it defaults to the current
210266422Sadrian * CPU ID.
211266422Sadrian */
212266422Sadrianstatic inline int
213266422Sadrianinp_to_cpuid(struct inpcb *inp)
214266422Sadrian{
215266422Sadrian	u_int cpuid;
216266422Sadrian
217266422Sadrian#ifdef	RSS
218266422Sadrian	if (per_cpu_timers) {
219266422Sadrian		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
220266422Sadrian		if (cpuid == NETISR_CPUID_NONE)
221266422Sadrian			return (curcpu);	/* XXX */
222266422Sadrian		else
223266422Sadrian			return (cpuid);
224266422Sadrian	}
225266422Sadrian#else
226266422Sadrian	/* Legacy, pre-RSS behaviour */
227266422Sadrian	if (per_cpu_timers) {
228266422Sadrian		/*
229266422Sadrian		 * We don't have a flowid -> cpuid mapping, so cheat and
230266422Sadrian		 * just map unknown cpuids to curcpu.  Not the best, but
231266422Sadrian		 * apparently better than defaulting to swi 0.
232266422Sadrian		 */
233266422Sadrian		cpuid = inp->inp_flowid % (mp_maxid + 1);
234266422Sadrian		if (! CPU_ABSENT(cpuid))
235266422Sadrian			return (cpuid);
236266422Sadrian		return (curcpu);
237266422Sadrian	}
238266422Sadrian#endif
239266422Sadrian	/* Default for RSS and non-RSS - cpuid 0 */
240266422Sadrian	else {
241266422Sadrian		return (0);
242266422Sadrian	}
243266422Sadrian}
244266422Sadrian
245266422Sadrian/*
2461541Srgrimes * Tcp protocol timeout routine called every 500 ms.
24750673Sjlemon * Updates timestamps used for TCP
2481541Srgrimes * causes finite state machine actions if timers expire.
2491541Srgrimes */
2501541Srgrimesvoid
251172309Ssilbytcp_slowtimo(void)
2521541Srgrimes{
253183550Szec	VNET_ITERATOR_DECL(vnet_iter);
2541541Srgrimes
255195760Srwatson	VNET_LIST_RLOCK_NOSLEEP();
256183550Szec	VNET_FOREACH(vnet_iter) {
257183550Szec		CURVNET_SET(vnet_iter);
258273850Sjch		(void) tcp_tw_2msl_scan(0);
259183550Szec		CURVNET_RESTORE();
260183550Szec	}
261195760Srwatson	VNET_LIST_RUNLOCK_NOSLEEP();
2621541Srgrimes}
2631541Srgrimes
26473110Sjlemonint	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
26573110Sjlemon    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
26673110Sjlemon
2671541Srgrimesint	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
268115824Shsu    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
2691541Srgrimes
270115824Shsustatic int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
27111150Swollman
272172074Srwatson/*
273172074Srwatson * TCP timer processing.
274172074Srwatson */
275172074Srwatson
27650673Sjlemonvoid
277172074Srwatsontcp_timer_delack(void *xtp)
2781541Srgrimes{
279172074Srwatson	struct tcpcb *tp = xtp;
280172074Srwatson	struct inpcb *inp;
281183550Szec	CURVNET_SET(tp->t_vnet);
2821541Srgrimes
283172074Srwatson	inp = tp->t_inpcb;
284281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
285178285Srwatson	INP_WLOCK(inp);
286239075Strociny	if (callout_pending(&tp->t_timers->tt_delack) ||
287239075Strociny	    !callout_active(&tp->t_timers->tt_delack)) {
288178285Srwatson		INP_WUNLOCK(inp);
289183550Szec		CURVNET_RESTORE();
29050673Sjlemon		return;
29150673Sjlemon	}
292172309Ssilby	callout_deactivate(&tp->t_timers->tt_delack);
293239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
294239075Strociny		INP_WUNLOCK(inp);
295239075Strociny		CURVNET_RESTORE();
296239075Strociny		return;
297239075Strociny	}
298281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
299281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
300281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
301281599Sjch		("%s: tp %p delack callout should be running", __func__, tp));
3021541Srgrimes
30350673Sjlemon	tp->t_flags |= TF_ACKNOW;
304190948Srwatson	TCPSTAT_INC(tcps_delack);
305292309Srrs	(void) tp->t_fb->tfb_tcp_output(tp);
306178285Srwatson	INP_WUNLOCK(inp);
307183550Szec	CURVNET_RESTORE();
30850673Sjlemon}
30950673Sjlemon
310172074Srwatsonvoid
311172074Srwatsontcp_timer_2msl(void *xtp)
31250673Sjlemon{
313172074Srwatson	struct tcpcb *tp = xtp;
314172074Srwatson	struct inpcb *inp;
315183550Szec	CURVNET_SET(tp->t_vnet);
31650673Sjlemon#ifdef TCPDEBUG
31750673Sjlemon	int ostate;
31850673Sjlemon
31950673Sjlemon	ostate = tp->t_state;
32050673Sjlemon#endif
321286227Sjch	INP_INFO_RLOCK(&V_tcbinfo);
322172074Srwatson	inp = tp->t_inpcb;
323281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
324178285Srwatson	INP_WLOCK(inp);
325172074Srwatson	tcp_free_sackholes(tp);
326239075Strociny	if (callout_pending(&tp->t_timers->tt_2msl) ||
327172309Ssilby	    !callout_active(&tp->t_timers->tt_2msl)) {
328178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
329286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
330183550Szec		CURVNET_RESTORE();
331172074Srwatson		return;
332172074Srwatson	}
333172309Ssilby	callout_deactivate(&tp->t_timers->tt_2msl);
334239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
335239075Strociny		INP_WUNLOCK(inp);
336286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
337239075Strociny		CURVNET_RESTORE();
338239075Strociny		return;
339239075Strociny	}
340281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
341281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
342281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
343281599Sjch		("%s: tp %p 2msl callout should be running", __func__, tp));
344172074Srwatson	/*
3451541Srgrimes	 * 2 MSL timeout in shutdown went off.  If we're closed but
3461541Srgrimes	 * still waiting for peer to close and connection has been idle
347286873Sjch	 * too long delete connection control block.  Otherwise, check
348286873Sjch	 * again in a bit.
349167036Smohans	 *
350286873Sjch	 * If in TIME_WAIT state just ignore as this timeout is handled in
351286873Sjch	 * tcp_tw_2msl_scan().
352286873Sjch	 *
353167036Smohans	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
354167036Smohans	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
355167036Smohans	 * Ignore fact that there were recent incoming segments.
3561541Srgrimes	 */
357286873Sjch	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
358286873Sjch		INP_WUNLOCK(inp);
359286873Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
360286873Sjch		CURVNET_RESTORE();
361286873Sjch		return;
362286873Sjch	}
363167036Smohans	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
364172074Srwatson	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
365167036Smohans	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
366190948Srwatson		TCPSTAT_INC(tcps_finwait2_drops);
367172074Srwatson		tp = tcp_close(tp);
368167036Smohans	} else {
369287304Sjch		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
370287304Sjch			if (!callout_reset(&tp->t_timers->tt_2msl,
371287304Sjch			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
372287304Sjch				tp->t_timers->tt_flags &= ~TT_2MSL_RST;
373287304Sjch			}
374287304Sjch		} else
375172074Srwatson		       tp = tcp_close(tp);
376172074Srwatson       }
3771541Srgrimes
37850673Sjlemon#ifdef TCPDEBUG
379172312Skib	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
38097658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
38197658Stanimura			  PRU_SLOWTIMO);
38250673Sjlemon#endif
383287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
384287759Sgnn
385172074Srwatson	if (tp != NULL)
386178285Srwatson		INP_WUNLOCK(inp);
387286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
388183550Szec	CURVNET_RESTORE();
38950673Sjlemon}
39050673Sjlemon
391172074Srwatsonvoid
392172074Srwatsontcp_timer_keep(void *xtp)
39350673Sjlemon{
394172074Srwatson	struct tcpcb *tp = xtp;
39578642Ssilby	struct tcptemp *t_template;
396172074Srwatson	struct inpcb *inp;
397183550Szec	CURVNET_SET(tp->t_vnet);
39850673Sjlemon#ifdef TCPDEBUG
39950673Sjlemon	int ostate;
40050673Sjlemon
40150673Sjlemon	ostate = tp->t_state;
40250673Sjlemon#endif
403286227Sjch	INP_INFO_RLOCK(&V_tcbinfo);
404172074Srwatson	inp = tp->t_inpcb;
405281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
406178285Srwatson	INP_WLOCK(inp);
407239075Strociny	if (callout_pending(&tp->t_timers->tt_keep) ||
408239075Strociny	    !callout_active(&tp->t_timers->tt_keep)) {
409178285Srwatson		INP_WUNLOCK(inp);
410286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
411183550Szec		CURVNET_RESTORE();
412172074Srwatson		return;
413172074Srwatson	}
414172309Ssilby	callout_deactivate(&tp->t_timers->tt_keep);
415239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
416239075Strociny		INP_WUNLOCK(inp);
417286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
418239075Strociny		CURVNET_RESTORE();
419239075Strociny		return;
420239075Strociny	}
421281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
422281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
423281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
424281599Sjch		("%s: tp %p keep callout should be running", __func__, tp));
425172074Srwatson	/*
42650673Sjlemon	 * Keep-alive timer went off; send something
42750673Sjlemon	 * or drop connection if idle for too long.
4281541Srgrimes	 */
429190948Srwatson	TCPSTAT_INC(tcps_keeptimeo);
43050673Sjlemon	if (tp->t_state < TCPS_ESTABLISHED)
43150673Sjlemon		goto dropit;
432330303Sjhb	if ((tcp_always_keepalive ||
433330303Sjhb	    inp->inp_socket->so_options & SO_KEEPALIVE) &&
43450673Sjlemon	    tp->t_state <= TCPS_CLOSING) {
435231025Sglebius		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
43650673Sjlemon			goto dropit;
4371541Srgrimes		/*
43850673Sjlemon		 * Send a packet designed to force a response
43950673Sjlemon		 * if the peer is up and reachable:
44050673Sjlemon		 * either an ACK if the connection is still alive,
44150673Sjlemon		 * or an RST if the peer has closed the connection
44250673Sjlemon		 * due to timeout or reboot.
44350673Sjlemon		 * Using sequence number tp->snd_una-1
44450673Sjlemon		 * causes the transmitted zero-length segment
44550673Sjlemon		 * to lie outside the receive window;
44650673Sjlemon		 * by the protocol spec, this requires the
44750673Sjlemon		 * correspondent TCP to respond.
4481541Srgrimes		 */
449190948Srwatson		TCPSTAT_INC(tcps_keepprobe);
450111144Sjlemon		t_template = tcpip_maketemplate(inp);
45178642Ssilby		if (t_template) {
45278642Ssilby			tcp_respond(tp, t_template->tt_ipgen,
45378642Ssilby				    &t_template->tt_t, (struct mbuf *)NULL,
45478642Ssilby				    tp->rcv_nxt, tp->snd_una - 1, 0);
455179487Srwatson			free(t_template, M_TEMP);
45678642Ssilby		}
457287304Sjch		if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
458287304Sjch		    tcp_timer_keep, tp)) {
459287304Sjch			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
460287304Sjch		}
461287304Sjch	} else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
462287304Sjch		    tcp_timer_keep, tp)) {
463287304Sjch			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
464287304Sjch		}
46550673Sjlemon
46650673Sjlemon#ifdef TCPDEBUG
467122326Ssam	if (inp->inp_socket->so_options & SO_DEBUG)
46855679Sshin		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
46950673Sjlemon			  PRU_SLOWTIMO);
47050673Sjlemon#endif
471287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
472178285Srwatson	INP_WUNLOCK(inp);
473286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
474183550Szec	CURVNET_RESTORE();
475172074Srwatson	return;
47650673Sjlemon
47750673Sjlemondropit:
478190948Srwatson	TCPSTAT_INC(tcps_keepdrops);
479172074Srwatson	tp = tcp_drop(tp, ETIMEDOUT);
480172074Srwatson
481172074Srwatson#ifdef TCPDEBUG
482172074Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
483172074Srwatson		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
484172074Srwatson			  PRU_SLOWTIMO);
485172074Srwatson#endif
486287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
487172074Srwatson	if (tp != NULL)
488178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
489286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
490183550Szec	CURVNET_RESTORE();
49150673Sjlemon}
49250673Sjlemon
493172074Srwatsonvoid
494172074Srwatsontcp_timer_persist(void *xtp)
49550673Sjlemon{
496172074Srwatson	struct tcpcb *tp = xtp;
497172074Srwatson	struct inpcb *inp;
498183550Szec	CURVNET_SET(tp->t_vnet);
49950673Sjlemon#ifdef TCPDEBUG
50050673Sjlemon	int ostate;
50150673Sjlemon
50250673Sjlemon	ostate = tp->t_state;
50350673Sjlemon#endif
504286227Sjch	INP_INFO_RLOCK(&V_tcbinfo);
505172074Srwatson	inp = tp->t_inpcb;
506281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
507178285Srwatson	INP_WLOCK(inp);
508239075Strociny	if (callout_pending(&tp->t_timers->tt_persist) ||
509239075Strociny	    !callout_active(&tp->t_timers->tt_persist)) {
510178285Srwatson		INP_WUNLOCK(inp);
511286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
512183550Szec		CURVNET_RESTORE();
513172074Srwatson		return;
514172074Srwatson	}
515172309Ssilby	callout_deactivate(&tp->t_timers->tt_persist);
516239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
517239075Strociny		INP_WUNLOCK(inp);
518286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
519239075Strociny		CURVNET_RESTORE();
520239075Strociny		return;
521239075Strociny	}
522281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
523281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
524281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
525281599Sjch		("%s: tp %p persist callout should be running", __func__, tp));
526172074Srwatson	/*
527298995Spfg	 * Persistence timer into zero window.
52850673Sjlemon	 * Force a byte to be output, if possible.
52950673Sjlemon	 */
530190948Srwatson	TCPSTAT_INC(tcps_persisttimeo);
53150673Sjlemon	/*
53250673Sjlemon	 * Hack: if the peer is dead/unreachable, we do not
53350673Sjlemon	 * time out if the window is closed.  After a full
53450673Sjlemon	 * backoff, drop the connection if the idle time
53550673Sjlemon	 * (no responses to probes) reaches the maximum
53650673Sjlemon	 * backoff that we would use if retransmitting.
53750673Sjlemon	 */
53850673Sjlemon	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
539194305Sjhb	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
540194305Sjhb	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
541190948Srwatson		TCPSTAT_INC(tcps_persistdrop);
542172074Srwatson		tp = tcp_drop(tp, ETIMEDOUT);
543172074Srwatson		goto out;
54450673Sjlemon	}
545242267Sandre	/*
546242267Sandre	 * If the user has closed the socket then drop a persisting
547242267Sandre	 * connection after a much reduced timeout.
548242267Sandre	 */
549242267Sandre	if (tp->t_state > TCPS_CLOSE_WAIT &&
550242267Sandre	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
551242267Sandre		TCPSTAT_INC(tcps_persistdrop);
552242267Sandre		tp = tcp_drop(tp, ETIMEDOUT);
553242267Sandre		goto out;
554242267Sandre	}
55550673Sjlemon	tcp_setpersist(tp);
556146463Sps	tp->t_flags |= TF_FORCEDATA;
557292309Srrs	(void) tp->t_fb->tfb_tcp_output(tp);
558146463Sps	tp->t_flags &= ~TF_FORCEDATA;
55950673Sjlemon
560172074Srwatsonout:
56150673Sjlemon#ifdef TCPDEBUG
562158644Sglebius	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
563158644Sglebius		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
56450673Sjlemon#endif
565287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
566172074Srwatson	if (tp != NULL)
567178285Srwatson		INP_WUNLOCK(inp);
568286227Sjch	INP_INFO_RUNLOCK(&V_tcbinfo);
569183550Szec	CURVNET_RESTORE();
57050673Sjlemon}
57150673Sjlemon
572172074Srwatsonvoid
573172074Srwatsontcp_timer_rexmt(void * xtp)
57450673Sjlemon{
575172074Srwatson	struct tcpcb *tp = xtp;
576183550Szec	CURVNET_SET(tp->t_vnet);
57750673Sjlemon	int rexmt;
578172074Srwatson	int headlocked;
579172074Srwatson	struct inpcb *inp;
58050673Sjlemon#ifdef TCPDEBUG
58150673Sjlemon	int ostate;
58250673Sjlemon
58350673Sjlemon	ostate = tp->t_state;
58450673Sjlemon#endif
585272720Ssbruno
586205391Skmacy	INP_INFO_RLOCK(&V_tcbinfo);
587172074Srwatson	inp = tp->t_inpcb;
588281599Sjch	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
589178285Srwatson	INP_WLOCK(inp);
590239075Strociny	if (callout_pending(&tp->t_timers->tt_rexmt) ||
591239075Strociny	    !callout_active(&tp->t_timers->tt_rexmt)) {
592178285Srwatson		INP_WUNLOCK(inp);
593205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
594183550Szec		CURVNET_RESTORE();
595172074Srwatson		return;
596172074Srwatson	}
597172309Ssilby	callout_deactivate(&tp->t_timers->tt_rexmt);
598239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
599239075Strociny		INP_WUNLOCK(inp);
600239075Strociny		INP_INFO_RUNLOCK(&V_tcbinfo);
601239075Strociny		CURVNET_RESTORE();
602239075Strociny		return;
603239075Strociny	}
604281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
605281599Sjch		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
606281599Sjch	KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
607281599Sjch		("%s: tp %p rexmt callout should be running", __func__, tp));
608130989Sps	tcp_free_sackholes(tp);
609300042Srrs	if (tp->t_fb->tfb_tcp_rexmit_tmr) {
610300042Srrs		/* The stack has a timer action too. */
611300042Srrs		(*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
612300042Srrs	}
61350673Sjlemon	/*
61450673Sjlemon	 * Retransmission timer went off.  Message has not
61550673Sjlemon	 * been acked within retransmit interval.  Back off
61650673Sjlemon	 * to a longer retransmit interval and retransmit one segment.
61750673Sjlemon	 */
61850673Sjlemon	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
61950673Sjlemon		tp->t_rxtshift = TCP_MAXRXTSHIFT;
620190948Srwatson		TCPSTAT_INC(tcps_timeoutdrop);
621226318Snp
622334727Stuexen		tp = tcp_drop(tp, ETIMEDOUT);
623205391Skmacy		headlocked = 1;
624172074Srwatson		goto out;
62550673Sjlemon	}
626205391Skmacy	INP_INFO_RUNLOCK(&V_tcbinfo);
627172074Srwatson	headlocked = 0;
628242250Sandre	if (tp->t_state == TCPS_SYN_SENT) {
62913229Solah		/*
630242250Sandre		 * If the SYN was retransmitted, indicate CWND to be
631242250Sandre		 * limited to 1 segment in cc_conn_init().
632242250Sandre		 */
633242250Sandre		tp->snd_cwnd = 1;
634242250Sandre	} else if (tp->t_rxtshift == 1) {
635242250Sandre		/*
63650673Sjlemon		 * first retransmit; record ssthresh and cwnd so they can
637133874Srwatson		 * be recovered if this turns out to be a "bad" retransmit.
638133874Srwatson		 * A retransmit is considered "bad" if an ACK for this
63950673Sjlemon		 * segment is received within RTT/2 interval; the assumption
640133874Srwatson		 * here is that the ACK was already in flight.  See
64150673Sjlemon		 * "On Estimating End-to-End Network Path Properties" by
64250673Sjlemon		 * Allman and Paxson for more details.
6431541Srgrimes		 */
64450673Sjlemon		tp->snd_cwnd_prev = tp->snd_cwnd;
64550673Sjlemon		tp->snd_ssthresh_prev = tp->snd_ssthresh;
646117650Shsu		tp->snd_recover_prev = tp->snd_recover;
647215166Slstewart		if (IN_FASTRECOVERY(tp->t_flags))
648215166Slstewart			tp->t_flags |= TF_WASFRECOVERY;
649117650Shsu		else
650215166Slstewart			tp->t_flags &= ~TF_WASFRECOVERY;
651215166Slstewart		if (IN_CONGRECOVERY(tp->t_flags))
652215166Slstewart			tp->t_flags |= TF_WASCRECOVERY;
653215166Slstewart		else
654215166Slstewart			tp->t_flags &= ~TF_WASCRECOVERY;
65550673Sjlemon		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
656221209Sjhb		tp->t_flags |= TF_PREVVALID;
657221209Sjhb	} else
658221209Sjhb		tp->t_flags &= ~TF_PREVVALID;
659190948Srwatson	TCPSTAT_INC(tcps_rexmttimeo);
660292706Spkelsey	if ((tp->t_state == TCPS_SYN_SENT) ||
661292706Spkelsey	    (tp->t_state == TCPS_SYN_RECEIVED))
662242260Sandre		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
66373110Sjlemon	else
66473110Sjlemon		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
66550673Sjlemon	TCPT_RANGESET(tp->t_rxtcur, rexmt,
66650673Sjlemon		      tp->t_rttmin, TCPTV_REXMTMAX);
667272720Ssbruno
668273063Ssbruno	/*
669273063Ssbruno	 * We enter the path for PLMTUD if connection is established or, if
670273063Ssbruno	 * connection is FIN_WAIT_1 status, reason for the last is that if
671273063Ssbruno	 * amount of data we send is very small, we could send it in couple of
672273063Ssbruno	 * packets and process straight to FIN. In that case we won't catch
673273063Ssbruno	 * ESTABLISHED state.
674273063Ssbruno	 */
675273063Ssbruno	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
676273063Ssbruno	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
677272720Ssbruno#ifdef INET6
678272720Ssbruno		int isipv6;
679272720Ssbruno#endif
680272720Ssbruno
681289293Shiren		/*
682289293Shiren		 * Idea here is that at each stage of mtu probe (usually, 1448
683289293Shiren		 * -> 1188 -> 524) should be given 2 chances to recover before
684289293Shiren		 *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
685289293Shiren		 *  take care of that.
686289293Shiren		 */
687272720Ssbruno		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
688272720Ssbruno		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
689332178Stuexen		    (tp->t_rxtshift >= 2 && tp->t_rxtshift < 6 &&
690332178Stuexen		    tp->t_rxtshift % 2 == 0)) {
691272720Ssbruno			/*
692272720Ssbruno			 * Enter Path MTU Black-hole Detection mechanism:
693272720Ssbruno			 * - Disable Path MTU Discovery (IP "DF" bit).
694272720Ssbruno			 * - Reduce MTU to lower value than what we
695272720Ssbruno			 *   negotiated with peer.
696272720Ssbruno			 */
697332178Stuexen			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) == 0) {
698332178Stuexen				/* Record that we may have found a black hole. */
699332178Stuexen				tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
700332178Stuexen				/* Keep track of previous MSS. */
701332178Stuexen				tp->t_pmtud_saved_maxseg = tp->t_maxseg;
702332178Stuexen			}
703272720Ssbruno
704272720Ssbruno			/*
705272720Ssbruno			 * Reduce the MSS to blackhole value or to the default
706272720Ssbruno			 * in an attempt to retransmit.
707272720Ssbruno			 */
708272720Ssbruno#ifdef INET6
709272720Ssbruno			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
710272720Ssbruno			if (isipv6 &&
711293284Sglebius			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
712272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
713293284Sglebius				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
714272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
715272720Ssbruno			} else if (isipv6) {
716272720Ssbruno				/* Use the default MSS. */
717293284Sglebius				tp->t_maxseg = V_tcp_v6mssdflt;
718272720Ssbruno				/*
719272720Ssbruno				 * Disable Path MTU Discovery when we switch to
720272720Ssbruno				 * minmss.
721272720Ssbruno				 */
722272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
723272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
724272720Ssbruno			}
725272720Ssbruno#endif
726272720Ssbruno#if defined(INET6) && defined(INET)
727272720Ssbruno			else
728272720Ssbruno#endif
729272720Ssbruno#ifdef INET
730293284Sglebius			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
731272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
732293284Sglebius				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
733272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
734272720Ssbruno			} else {
735272720Ssbruno				/* Use the default MSS. */
736293284Sglebius				tp->t_maxseg = V_tcp_mssdflt;
737272720Ssbruno				/*
738272720Ssbruno				 * Disable Path MTU Discovery when we switch to
739272720Ssbruno				 * minmss.
740272720Ssbruno				 */
741272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
742272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
743272720Ssbruno			}
744272720Ssbruno#endif
745272720Ssbruno			/*
746272720Ssbruno			 * Reset the slow-start flight size
747272720Ssbruno			 * as it may depend on the new MSS.
748272720Ssbruno			 */
749272720Ssbruno			if (CC_ALGO(tp)->conn_init != NULL)
750272720Ssbruno				CC_ALGO(tp)->conn_init(tp->ccv);
751272720Ssbruno		} else {
752272720Ssbruno			/*
753272720Ssbruno			 * If further retransmissions are still unsuccessful
754272720Ssbruno			 * with a lowered MTU, maybe this isn't a blackhole and
755272720Ssbruno			 * we restore the previous MSS and blackhole detection
756272720Ssbruno			 * flags.
757289293Shiren			 * The limit '6' is determined by giving each probe
758289293Shiren			 * stage (1448, 1188, 524) 2 chances to recover.
759272720Ssbruno			 */
760272720Ssbruno			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
761332178Stuexen			    (tp->t_rxtshift >= 6)) {
762272720Ssbruno				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
763272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
764293284Sglebius				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
765272720Ssbruno				V_tcp_pmtud_blackhole_failed++;
766272720Ssbruno				/*
767272720Ssbruno				 * Reset the slow-start flight size as it
768272720Ssbruno				 * may depend on the new MSS.
769272720Ssbruno				 */
770272720Ssbruno				if (CC_ALGO(tp)->conn_init != NULL)
771272720Ssbruno					CC_ALGO(tp)->conn_init(tp->ccv);
772272720Ssbruno			}
773272720Ssbruno		}
774272720Ssbruno	}
775272720Ssbruno
77650673Sjlemon	/*
777242264Sandre	 * Disable RFC1323 and SACK if we haven't got any response to
778133874Srwatson	 * our third SYN to work-around some broken terminal servers
779133874Srwatson	 * (most of which have hopefully been retired) that have bad VJ
780133874Srwatson	 * header compression code which trashes TCP segments containing
78177539Sjesper	 * unknown-to-them TCP options.
78277539Sjesper	 */
783245238Sjhb	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
784245238Sjhb	    (tp->t_rxtshift == 3))
785242263Sandre		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
78677539Sjesper	/*
787122922Sandre	 * If we backed off this far, our srtt estimate is probably bogus.
788122922Sandre	 * Clobber it so we'll take the next rtt measurement as our srtt;
78950673Sjlemon	 * move the current srtt into rttvar to keep the current
79050673Sjlemon	 * retransmit times until then.
79150673Sjlemon	 */
79250673Sjlemon	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
79355679Sshin#ifdef INET6
79455679Sshin		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
79555679Sshin			in6_losing(tp->t_inpcb);
796297225Sgnn		else
79755679Sshin#endif
798297225Sgnn			in_losing(tp->t_inpcb);
79950673Sjlemon		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
80050673Sjlemon		tp->t_srtt = 0;
80150673Sjlemon	}
80250673Sjlemon	tp->snd_nxt = tp->snd_una;
803117650Shsu	tp->snd_recover = tp->snd_max;
80450673Sjlemon	/*
80550673Sjlemon	 * Force a segment to be sent.
80650673Sjlemon	 */
80750673Sjlemon	tp->t_flags |= TF_ACKNOW;
80850673Sjlemon	/*
80950673Sjlemon	 * If timing a segment in this window, stop the timer.
81050673Sjlemon	 */
81150673Sjlemon	tp->t_rtttime = 0;
812215166Slstewart
813216101Slstewart	cc_cong_signal(tp, NULL, CC_RTO);
814215166Slstewart
815292309Srrs	(void) tp->t_fb->tfb_tcp_output(tp);
8161541Srgrimes
817172074Srwatsonout:
81850673Sjlemon#ifdef TCPDEBUG
819157136Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
82097658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
82197658Stanimura			  PRU_SLOWTIMO);
8221541Srgrimes#endif
823287759Sgnn	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
824172074Srwatson	if (tp != NULL)
825178285Srwatson		INP_WUNLOCK(inp);
826172074Srwatson	if (headlocked)
827286227Sjch		INP_INFO_RUNLOCK(&V_tcbinfo);
828183550Szec	CURVNET_RESTORE();
8291541Srgrimes}
830172074Srwatson
831172074Srwatsonvoid
832281599Sjchtcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
833172074Srwatson{
834172074Srwatson	struct callout *t_callout;
835280904Sjch	timeout_t *f_callout;
836205391Skmacy	struct inpcb *inp = tp->t_inpcb;
837266422Sadrian	int cpu = inp_to_cpuid(inp);
838287304Sjch	uint32_t f_reset;
839172074Srwatson
840237263Snp#ifdef TCP_OFFLOAD
841237263Snp	if (tp->t_flags & TF_TOE)
842237263Snp		return;
843237263Snp#endif
844237263Snp
845281599Sjch	if (tp->t_timers->tt_flags & TT_STOPPED)
846281599Sjch		return;
847281599Sjch
848172074Srwatson	switch (timer_type) {
849172074Srwatson		case TT_DELACK:
850172309Ssilby			t_callout = &tp->t_timers->tt_delack;
851172074Srwatson			f_callout = tcp_timer_delack;
852287304Sjch			f_reset = TT_DELACK_RST;
853172074Srwatson			break;
854172074Srwatson		case TT_REXMT:
855172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
856172074Srwatson			f_callout = tcp_timer_rexmt;
857287304Sjch			f_reset = TT_REXMT_RST;
858172074Srwatson			break;
859172074Srwatson		case TT_PERSIST:
860172309Ssilby			t_callout = &tp->t_timers->tt_persist;
861172074Srwatson			f_callout = tcp_timer_persist;
862287304Sjch			f_reset = TT_PERSIST_RST;
863172074Srwatson			break;
864172074Srwatson		case TT_KEEP:
865172309Ssilby			t_callout = &tp->t_timers->tt_keep;
866172074Srwatson			f_callout = tcp_timer_keep;
867287304Sjch			f_reset = TT_KEEP_RST;
868172074Srwatson			break;
869172074Srwatson		case TT_2MSL:
870172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
871172074Srwatson			f_callout = tcp_timer_2msl;
872287304Sjch			f_reset = TT_2MSL_RST;
873172074Srwatson			break;
874172074Srwatson		default:
875292309Srrs			if (tp->t_fb->tfb_tcp_timer_activate) {
876292309Srrs				tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
877292309Srrs				return;
878292309Srrs			}
879280990Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
880172074Srwatson		}
881172074Srwatson	if (delta == 0) {
882281599Sjch		if ((tp->t_timers->tt_flags & timer_type) &&
883290805Srrs		    (callout_stop(t_callout) > 0) &&
884287304Sjch		    (tp->t_timers->tt_flags & f_reset)) {
885287304Sjch			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
886281599Sjch		}
887172074Srwatson	} else {
888281599Sjch		if ((tp->t_timers->tt_flags & timer_type) == 0) {
889287304Sjch			tp->t_timers->tt_flags |= (timer_type | f_reset);
890281599Sjch			callout_reset_on(t_callout, delta, f_callout, tp, cpu);
891281599Sjch		} else {
892281599Sjch			/* Reset already running callout on the same CPU. */
893287304Sjch			if (!callout_reset(t_callout, delta, f_callout, tp)) {
894287304Sjch				/*
895287304Sjch				 * Callout not cancelled, consider it as not
896287304Sjch				 * properly restarted. */
897287304Sjch				tp->t_timers->tt_flags &= ~f_reset;
898287304Sjch			}
899281599Sjch		}
900172074Srwatson	}
901172074Srwatson}
902172074Srwatson
903172074Srwatsonint
904281599Sjchtcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
905172074Srwatson{
906172074Srwatson	struct callout *t_callout;
907172074Srwatson
908172074Srwatson	switch (timer_type) {
909172074Srwatson		case TT_DELACK:
910172309Ssilby			t_callout = &tp->t_timers->tt_delack;
911172074Srwatson			break;
912172074Srwatson		case TT_REXMT:
913172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
914172074Srwatson			break;
915172074Srwatson		case TT_PERSIST:
916172309Ssilby			t_callout = &tp->t_timers->tt_persist;
917172074Srwatson			break;
918172074Srwatson		case TT_KEEP:
919172309Ssilby			t_callout = &tp->t_timers->tt_keep;
920172074Srwatson			break;
921172074Srwatson		case TT_2MSL:
922172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
923172074Srwatson			break;
924172074Srwatson		default:
925292309Srrs			if (tp->t_fb->tfb_tcp_timer_active) {
926292309Srrs				return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
927292309Srrs			}
928280990Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
929172074Srwatson		}
930172074Srwatson	return callout_active(t_callout);
931172074Srwatson}
932197244Ssilby
933281599Sjchvoid
934281599Sjchtcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
935281599Sjch{
936281599Sjch	struct callout *t_callout;
937287304Sjch	uint32_t f_reset;
938281599Sjch
939281599Sjch	tp->t_timers->tt_flags |= TT_STOPPED;
940281599Sjch
941281599Sjch	switch (timer_type) {
942281599Sjch		case TT_DELACK:
943281599Sjch			t_callout = &tp->t_timers->tt_delack;
944287304Sjch			f_reset = TT_DELACK_RST;
945281599Sjch			break;
946281599Sjch		case TT_REXMT:
947281599Sjch			t_callout = &tp->t_timers->tt_rexmt;
948287304Sjch			f_reset = TT_REXMT_RST;
949281599Sjch			break;
950281599Sjch		case TT_PERSIST:
951281599Sjch			t_callout = &tp->t_timers->tt_persist;
952287304Sjch			f_reset = TT_PERSIST_RST;
953281599Sjch			break;
954281599Sjch		case TT_KEEP:
955281599Sjch			t_callout = &tp->t_timers->tt_keep;
956287304Sjch			f_reset = TT_KEEP_RST;
957281599Sjch			break;
958281599Sjch		case TT_2MSL:
959281599Sjch			t_callout = &tp->t_timers->tt_2msl;
960287304Sjch			f_reset = TT_2MSL_RST;
961281599Sjch			break;
962281599Sjch		default:
963292309Srrs			if (tp->t_fb->tfb_tcp_timer_stop) {
964292309Srrs				/*
965292309Srrs				 * XXXrrs we need to look at this with the
966292309Srrs				 * stop case below (flags).
967292309Srrs				 */
968292309Srrs				tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
969292309Srrs				return;
970292309Srrs			}
971281599Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
972281599Sjch		}
973281599Sjch
974281599Sjch	if (tp->t_timers->tt_flags & timer_type) {
975298743Srrs		if (callout_async_drain(t_callout, tcp_timer_discard) == 0) {
976281599Sjch			/*
977281599Sjch			 * Can't stop the callout, defer tcpcb actual deletion
978298743Srrs			 * to the last one. We do this using the async drain
979298743Srrs			 * function and incrementing the count in
980281599Sjch			 */
981298743Srrs			tp->t_timers->tt_draincnt++;
982281599Sjch		}
983281599Sjch	}
984281599Sjch}
985281599Sjch
986197244Ssilby#define	ticks_to_msecs(t)	(1000*(t) / hz)
987197244Ssilby
988197244Ssilbyvoid
989247777Sdavidetcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
990247777Sdavide    struct xtcp_timer *xtimer)
991197244Ssilby{
992247777Sdavide	sbintime_t now;
993247777Sdavide
994247777Sdavide	bzero(xtimer, sizeof(*xtimer));
995197244Ssilby	if (timer == NULL)
996197244Ssilby		return;
997247777Sdavide	now = getsbinuptime();
998197244Ssilby	if (callout_active(&timer->tt_delack))
999247777Sdavide		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
1000197244Ssilby	if (callout_active(&timer->tt_rexmt))
1001247777Sdavide		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
1002197244Ssilby	if (callout_active(&timer->tt_persist))
1003247777Sdavide		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
1004197244Ssilby	if (callout_active(&timer->tt_keep))
1005247777Sdavide		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
1006197244Ssilby	if (callout_active(&timer->tt_2msl))
1007247777Sdavide		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
1008197244Ssilby	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
1009197244Ssilby}
1010