tcp_timer.c revision 273377
1139823Simp/*-
211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
2911150Swollman *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
301541Srgrimes */
311541Srgrimes
32172467Ssilby#include <sys/cdefs.h>
33172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 273377 2014-10-21 07:31:21Z hselasky $");
34172467Ssilby
35243603Snp#include "opt_inet.h"
3655679Sshin#include "opt_inet6.h"
3729514Sjoerg#include "opt_tcpdebug.h"
38266422Sadrian#include "opt_rss.h"
3929514Sjoerg
401541Srgrimes#include <sys/param.h>
4112172Sphk#include <sys/kernel.h>
42102967Sbde#include <sys/lock.h>
4378642Ssilby#include <sys/mbuf.h>
44102967Sbde#include <sys/mutex.h>
45102967Sbde#include <sys/protosw.h>
46205391Skmacy#include <sys/smp.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49102967Sbde#include <sys/sysctl.h>
50102967Sbde#include <sys/systm.h>
511541Srgrimes
52185571Sbz#include <net/if.h>
531541Srgrimes#include <net/route.h>
54196019Srwatson#include <net/vnet.h>
55266422Sadrian#include <net/netisr.h>
561541Srgrimes
57215166Slstewart#include <netinet/cc.h>
581541Srgrimes#include <netinet/in.h>
59102967Sbde#include <netinet/in_pcb.h>
60266422Sadrian#include <netinet/in_rss.h>
611541Srgrimes#include <netinet/in_systm.h>
6255679Sshin#ifdef INET6
6355679Sshin#include <netinet6/in6_pcb.h>
6455679Sshin#endif
651541Srgrimes#include <netinet/ip_var.h>
661541Srgrimes#include <netinet/tcp_fsm.h>
671541Srgrimes#include <netinet/tcp_timer.h>
681541Srgrimes#include <netinet/tcp_var.h>
69272720Ssbruno#ifdef INET6
70272720Ssbruno#include <netinet6/tcp6_var.h>
71272720Ssbruno#endif
721541Srgrimes#include <netinet/tcpip.h>
7317138Sdg#ifdef TCPDEBUG
7417138Sdg#include <netinet/tcp_debug.h>
7517138Sdg#endif
761541Srgrimes
7750673Sjlemonint	tcp_keepinit;
7850682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
79180631Strhodes    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
8018280Spst
8150673Sjlemonint	tcp_keepidle;
8250682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
83180631Strhodes    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
8412172Sphk
8550673Sjlemonint	tcp_keepintvl;
8650682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
87180631Strhodes    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
8812172Sphk
8950673Sjlemonint	tcp_delacktime;
90167721SandreSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
91167721Sandre    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
9250682Sjlemon    "Time before a delayed ACK is sent");
93133874Srwatson
9450673Sjlemonint	tcp_msl;
9550682SjlemonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
9650682Sjlemon    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
9750673Sjlemon
98100335Sdillonint	tcp_rexmit_min;
99100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
100167721Sandre    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
101167721Sandre    "Minimum Retransmission Timeout");
102100335Sdillon
103100335Sdillonint	tcp_rexmit_slop;
104100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
105167721Sandre    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
106167721Sandre    "Retransmission Timer Slop");
107100335Sdillon
10887499Srwatsonstatic int	always_keepalive = 1;
109133874SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
11046381Sbillf    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
11115039Sphk
112167036Smohansint    tcp_fast_finwait2_recycle = 0;
113167036SmohansSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
114167721Sandre    &tcp_fast_finwait2_recycle, 0,
115167721Sandre    "Recycle closed FIN_WAIT_2 connections faster");
116167036Smohans
117167036Smohansint    tcp_finwait2_timeout;
118167036SmohansSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
119167721Sandre    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
120167036Smohans
121231025Sglebiusint	tcp_keepcnt = TCPTV_KEEPCNT;
122231025SglebiusSYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
123231025Sglebius    "Number of keepalive probes to send");
124167036Smohans
12512296Sphk	/* max idle probes */
12650673Sjlemonint	tcp_maxpersistidle;
12711150Swollman
128245238Sjhbstatic int	tcp_rexmit_drop_options = 0;
129245238SjhbSYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
130245238Sjhb    &tcp_rexmit_drop_options, 0,
131245238Sjhb    "Drop TCP options from 3rd and later retransmitted SYN");
132245238Sjhb
133272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
134272720Ssbruno#define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
135272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
136273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
137272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
138272720Ssbruno    "Path MTU Discovery Black Hole Detection Enabled");
139272720Ssbruno
140272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
141272720Ssbruno#define	V_tcp_pmtud_blackhole_activated \
142272720Ssbruno    VNET(tcp_pmtud_blackhole_activated)
143272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
144273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
145272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
146272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count");
147272720Ssbruno
148272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
149272720Ssbruno#define	V_tcp_pmtud_blackhole_activated_min_mss \
150272720Ssbruno    VNET(tcp_pmtud_blackhole_activated_min_mss)
151272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
152273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
153272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
154272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
155272720Ssbruno
156272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
157272720Ssbruno#define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
158272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
159273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
160272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
161272720Ssbruno    "Path MTU Discovery Black Hole Detection, Failure Count");
162272720Ssbruno
163272720Ssbruno#ifdef INET
164272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
165272720Ssbruno#define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
166272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
167273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
168272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
169272720Ssbruno    "Path MTU Discovery Black Hole Detection lowered MSS");
170272720Ssbruno#endif
171272720Ssbruno
172272720Ssbruno#ifdef INET6
173272720Ssbrunostatic VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
174272720Ssbruno#define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
175272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
176273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
177272720Ssbruno    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
178272720Ssbruno    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
179272720Ssbruno#endif
180272720Ssbruno
181268027Sadrian#ifdef	RSS
182268027Sadrianstatic int	per_cpu_timers = 1;
183268027Sadrian#else
184205391Skmacystatic int	per_cpu_timers = 0;
185268027Sadrian#endif
186205391SkmacySYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
187205391Skmacy    &per_cpu_timers , 0, "run tcp timers on all cpus");
188205391Skmacy
189266422Sadrian#if 0
190205391Skmacy#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
191205391Skmacy		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
192266422Sadrian#endif
193205391Skmacy
1941541Srgrimes/*
195266422Sadrian * Map the given inp to a CPU id.
196266422Sadrian *
197266422Sadrian * This queries RSS if it's compiled in, else it defaults to the current
198266422Sadrian * CPU ID.
199266422Sadrian */
200266422Sadrianstatic inline int
201266422Sadrianinp_to_cpuid(struct inpcb *inp)
202266422Sadrian{
203266422Sadrian	u_int cpuid;
204266422Sadrian
205266422Sadrian#ifdef	RSS
206266422Sadrian	if (per_cpu_timers) {
207266422Sadrian		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
208266422Sadrian		if (cpuid == NETISR_CPUID_NONE)
209266422Sadrian			return (curcpu);	/* XXX */
210266422Sadrian		else
211266422Sadrian			return (cpuid);
212266422Sadrian	}
213266422Sadrian#else
214266422Sadrian	/* Legacy, pre-RSS behaviour */
215266422Sadrian	if (per_cpu_timers) {
216266422Sadrian		/*
217266422Sadrian		 * We don't have a flowid -> cpuid mapping, so cheat and
218266422Sadrian		 * just map unknown cpuids to curcpu.  Not the best, but
219266422Sadrian		 * apparently better than defaulting to swi 0.
220266422Sadrian		 */
221266422Sadrian		cpuid = inp->inp_flowid % (mp_maxid + 1);
222266422Sadrian		if (! CPU_ABSENT(cpuid))
223266422Sadrian			return (cpuid);
224266422Sadrian		return (curcpu);
225266422Sadrian	}
226266422Sadrian#endif
227266422Sadrian	/* Default for RSS and non-RSS - cpuid 0 */
228266422Sadrian	else {
229266422Sadrian		return (0);
230266422Sadrian	}
231266422Sadrian}
232266422Sadrian
233266422Sadrian/*
2341541Srgrimes * Tcp protocol timeout routine called every 500 ms.
23550673Sjlemon * Updates timestamps used for TCP
2361541Srgrimes * causes finite state machine actions if timers expire.
2371541Srgrimes */
2381541Srgrimesvoid
239172309Ssilbytcp_slowtimo(void)
2401541Srgrimes{
241183550Szec	VNET_ITERATOR_DECL(vnet_iter);
2421541Srgrimes
243195760Srwatson	VNET_LIST_RLOCK_NOSLEEP();
244183550Szec	VNET_FOREACH(vnet_iter) {
245183550Szec		CURVNET_SET(vnet_iter);
246264321Sjhb		tcp_tw_2msl_scan();
247183550Szec		CURVNET_RESTORE();
248183550Szec	}
249195760Srwatson	VNET_LIST_RUNLOCK_NOSLEEP();
2501541Srgrimes}
2511541Srgrimes
25273110Sjlemonint	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
25373110Sjlemon    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
25473110Sjlemon
2551541Srgrimesint	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
256115824Shsu    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
2571541Srgrimes
258115824Shsustatic int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
25911150Swollman
260157376Srwatsonstatic int tcp_timer_race;
261157376SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
262157376Srwatson    0, "Count of t_inpcb races on tcp_discardcb");
263157376Srwatson
264172074Srwatson/*
265172074Srwatson * TCP timer processing.
266172074Srwatson */
267172074Srwatson
26850673Sjlemonvoid
269172074Srwatsontcp_timer_delack(void *xtp)
2701541Srgrimes{
271172074Srwatson	struct tcpcb *tp = xtp;
272172074Srwatson	struct inpcb *inp;
273183550Szec	CURVNET_SET(tp->t_vnet);
2741541Srgrimes
275172074Srwatson	inp = tp->t_inpcb;
276157376Srwatson	/*
277172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
278172074Srwatson	 * tear-down mean we need it as a work-around for races between
279172074Srwatson	 * timers and tcp_discardcb().
280172074Srwatson	 *
281172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
282157376Srwatson	 */
283172074Srwatson	if (inp == NULL) {
284172074Srwatson		tcp_timer_race++;
285183550Szec		CURVNET_RESTORE();
286108265Shsu		return;
287108265Shsu	}
288178285Srwatson	INP_WLOCK(inp);
289239075Strociny	if (callout_pending(&tp->t_timers->tt_delack) ||
290239075Strociny	    !callout_active(&tp->t_timers->tt_delack)) {
291178285Srwatson		INP_WUNLOCK(inp);
292183550Szec		CURVNET_RESTORE();
29350673Sjlemon		return;
29450673Sjlemon	}
295172309Ssilby	callout_deactivate(&tp->t_timers->tt_delack);
296239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
297239075Strociny		INP_WUNLOCK(inp);
298239075Strociny		CURVNET_RESTORE();
299239075Strociny		return;
300239075Strociny	}
3011541Srgrimes
30250673Sjlemon	tp->t_flags |= TF_ACKNOW;
303190948Srwatson	TCPSTAT_INC(tcps_delack);
30450673Sjlemon	(void) tcp_output(tp);
305178285Srwatson	INP_WUNLOCK(inp);
306183550Szec	CURVNET_RESTORE();
30750673Sjlemon}
30850673Sjlemon
309172074Srwatsonvoid
310172074Srwatsontcp_timer_2msl(void *xtp)
31150673Sjlemon{
312172074Srwatson	struct tcpcb *tp = xtp;
313172074Srwatson	struct inpcb *inp;
314183550Szec	CURVNET_SET(tp->t_vnet);
31550673Sjlemon#ifdef TCPDEBUG
31650673Sjlemon	int ostate;
31750673Sjlemon
31850673Sjlemon	ostate = tp->t_state;
31950673Sjlemon#endif
320157376Srwatson	/*
321172074Srwatson	 * XXXRW: Does this actually happen?
322172074Srwatson	 */
323181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
324172074Srwatson	inp = tp->t_inpcb;
325172074Srwatson	/*
326172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
327172074Srwatson	 * tear-down mean we need it as a work-around for races between
328172074Srwatson	 * timers and tcp_discardcb().
329172074Srwatson	 *
330172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
331172074Srwatson	 */
332172074Srwatson	if (inp == NULL) {
333172074Srwatson		tcp_timer_race++;
334181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
335183550Szec		CURVNET_RESTORE();
336172074Srwatson		return;
337172074Srwatson	}
338178285Srwatson	INP_WLOCK(inp);
339172074Srwatson	tcp_free_sackholes(tp);
340239075Strociny	if (callout_pending(&tp->t_timers->tt_2msl) ||
341172309Ssilby	    !callout_active(&tp->t_timers->tt_2msl)) {
342178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
343181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
344183550Szec		CURVNET_RESTORE();
345172074Srwatson		return;
346172074Srwatson	}
347172309Ssilby	callout_deactivate(&tp->t_timers->tt_2msl);
348239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
349239075Strociny		INP_WUNLOCK(inp);
350239075Strociny		INP_INFO_WUNLOCK(&V_tcbinfo);
351239075Strociny		CURVNET_RESTORE();
352239075Strociny		return;
353239075Strociny	}
354172074Srwatson	/*
3551541Srgrimes	 * 2 MSL timeout in shutdown went off.  If we're closed but
3561541Srgrimes	 * still waiting for peer to close and connection has been idle
3571541Srgrimes	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
3581541Srgrimes	 * control block.  Otherwise, check again in a bit.
359167036Smohans	 *
360167036Smohans	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
361167036Smohans	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
362167036Smohans	 * Ignore fact that there were recent incoming segments.
3631541Srgrimes	 */
364167036Smohans	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
365172074Srwatson	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
366167036Smohans	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
367190948Srwatson		TCPSTAT_INC(tcps_finwait2_drops);
368172074Srwatson		tp = tcp_close(tp);
369167036Smohans	} else {
370167036Smohans		if (tp->t_state != TCPS_TIME_WAIT &&
371231025Sglebius		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
372231025Sglebius		       callout_reset_on(&tp->t_timers->tt_2msl,
373266422Sadrian			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
374266422Sadrian			   inp_to_cpuid(inp));
375172074Srwatson	       else
376172074Srwatson		       tp = tcp_close(tp);
377172074Srwatson       }
3781541Srgrimes
37950673Sjlemon#ifdef TCPDEBUG
380172312Skib	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
38197658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
38297658Stanimura			  PRU_SLOWTIMO);
38350673Sjlemon#endif
384172074Srwatson	if (tp != NULL)
385178285Srwatson		INP_WUNLOCK(inp);
386181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
387183550Szec	CURVNET_RESTORE();
38850673Sjlemon}
38950673Sjlemon
390172074Srwatsonvoid
391172074Srwatsontcp_timer_keep(void *xtp)
39250673Sjlemon{
393172074Srwatson	struct tcpcb *tp = xtp;
39478642Ssilby	struct tcptemp *t_template;
395172074Srwatson	struct inpcb *inp;
396183550Szec	CURVNET_SET(tp->t_vnet);
39750673Sjlemon#ifdef TCPDEBUG
39850673Sjlemon	int ostate;
39950673Sjlemon
40050673Sjlemon	ostate = tp->t_state;
40150673Sjlemon#endif
402181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
403172074Srwatson	inp = tp->t_inpcb;
404157376Srwatson	/*
405172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
406172074Srwatson	 * tear-down mean we need it as a work-around for races between
407172074Srwatson	 * timers and tcp_discardcb().
408172074Srwatson	 *
409172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
410172074Srwatson	 */
411172074Srwatson	if (inp == NULL) {
412172074Srwatson		tcp_timer_race++;
413181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
414183550Szec		CURVNET_RESTORE();
415172074Srwatson		return;
416172074Srwatson	}
417178285Srwatson	INP_WLOCK(inp);
418239075Strociny	if (callout_pending(&tp->t_timers->tt_keep) ||
419239075Strociny	    !callout_active(&tp->t_timers->tt_keep)) {
420178285Srwatson		INP_WUNLOCK(inp);
421181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
422183550Szec		CURVNET_RESTORE();
423172074Srwatson		return;
424172074Srwatson	}
425172309Ssilby	callout_deactivate(&tp->t_timers->tt_keep);
426239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
427239075Strociny		INP_WUNLOCK(inp);
428239075Strociny		INP_INFO_WUNLOCK(&V_tcbinfo);
429239075Strociny		CURVNET_RESTORE();
430239075Strociny		return;
431239075Strociny	}
432172074Srwatson	/*
43350673Sjlemon	 * Keep-alive timer went off; send something
43450673Sjlemon	 * or drop connection if idle for too long.
4351541Srgrimes	 */
436190948Srwatson	TCPSTAT_INC(tcps_keeptimeo);
43750673Sjlemon	if (tp->t_state < TCPS_ESTABLISHED)
43850673Sjlemon		goto dropit;
439122326Ssam	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
44050673Sjlemon	    tp->t_state <= TCPS_CLOSING) {
441231025Sglebius		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
44250673Sjlemon			goto dropit;
4431541Srgrimes		/*
44450673Sjlemon		 * Send a packet designed to force a response
44550673Sjlemon		 * if the peer is up and reachable:
44650673Sjlemon		 * either an ACK if the connection is still alive,
44750673Sjlemon		 * or an RST if the peer has closed the connection
44850673Sjlemon		 * due to timeout or reboot.
44950673Sjlemon		 * Using sequence number tp->snd_una-1
45050673Sjlemon		 * causes the transmitted zero-length segment
45150673Sjlemon		 * to lie outside the receive window;
45250673Sjlemon		 * by the protocol spec, this requires the
45350673Sjlemon		 * correspondent TCP to respond.
4541541Srgrimes		 */
455190948Srwatson		TCPSTAT_INC(tcps_keepprobe);
456111144Sjlemon		t_template = tcpip_maketemplate(inp);
45778642Ssilby		if (t_template) {
45878642Ssilby			tcp_respond(tp, t_template->tt_ipgen,
45978642Ssilby				    &t_template->tt_t, (struct mbuf *)NULL,
46078642Ssilby				    tp->rcv_nxt, tp->snd_una - 1, 0);
461179487Srwatson			free(t_template, M_TEMP);
46278642Ssilby		}
463231025Sglebius		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
464266422Sadrian		    tcp_timer_keep, tp, inp_to_cpuid(inp));
46597658Stanimura	} else
466231025Sglebius		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
467266422Sadrian		    tcp_timer_keep, tp, inp_to_cpuid(inp));
46850673Sjlemon
46950673Sjlemon#ifdef TCPDEBUG
470122326Ssam	if (inp->inp_socket->so_options & SO_DEBUG)
47155679Sshin		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
47250673Sjlemon			  PRU_SLOWTIMO);
47350673Sjlemon#endif
474178285Srwatson	INP_WUNLOCK(inp);
475181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
476183550Szec	CURVNET_RESTORE();
477172074Srwatson	return;
47850673Sjlemon
47950673Sjlemondropit:
480190948Srwatson	TCPSTAT_INC(tcps_keepdrops);
481172074Srwatson	tp = tcp_drop(tp, ETIMEDOUT);
482172074Srwatson
483172074Srwatson#ifdef TCPDEBUG
484172074Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
485172074Srwatson		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
486172074Srwatson			  PRU_SLOWTIMO);
487172074Srwatson#endif
488172074Srwatson	if (tp != NULL)
489178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
490181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
491183550Szec	CURVNET_RESTORE();
49250673Sjlemon}
49350673Sjlemon
494172074Srwatsonvoid
495172074Srwatsontcp_timer_persist(void *xtp)
49650673Sjlemon{
497172074Srwatson	struct tcpcb *tp = xtp;
498172074Srwatson	struct inpcb *inp;
499183550Szec	CURVNET_SET(tp->t_vnet);
50050673Sjlemon#ifdef TCPDEBUG
50150673Sjlemon	int ostate;
50250673Sjlemon
50350673Sjlemon	ostate = tp->t_state;
50450673Sjlemon#endif
505181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
506172074Srwatson	inp = tp->t_inpcb;
507157376Srwatson	/*
508172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
509172074Srwatson	 * tear-down mean we need it as a work-around for races between
510172074Srwatson	 * timers and tcp_discardcb().
511172074Srwatson	 *
512172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
513172074Srwatson	 */
514172074Srwatson	if (inp == NULL) {
515172074Srwatson		tcp_timer_race++;
516181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
517183550Szec		CURVNET_RESTORE();
518172074Srwatson		return;
519172074Srwatson	}
520178285Srwatson	INP_WLOCK(inp);
521239075Strociny	if (callout_pending(&tp->t_timers->tt_persist) ||
522239075Strociny	    !callout_active(&tp->t_timers->tt_persist)) {
523178285Srwatson		INP_WUNLOCK(inp);
524181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
525183550Szec		CURVNET_RESTORE();
526172074Srwatson		return;
527172074Srwatson	}
528172309Ssilby	callout_deactivate(&tp->t_timers->tt_persist);
529239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
530239075Strociny		INP_WUNLOCK(inp);
531239075Strociny		INP_INFO_WUNLOCK(&V_tcbinfo);
532239075Strociny		CURVNET_RESTORE();
533239075Strociny		return;
534239075Strociny	}
535172074Srwatson	/*
53650673Sjlemon	 * Persistance timer into zero window.
53750673Sjlemon	 * Force a byte to be output, if possible.
53850673Sjlemon	 */
539190948Srwatson	TCPSTAT_INC(tcps_persisttimeo);
54050673Sjlemon	/*
54150673Sjlemon	 * Hack: if the peer is dead/unreachable, we do not
54250673Sjlemon	 * time out if the window is closed.  After a full
54350673Sjlemon	 * backoff, drop the connection if the idle time
54450673Sjlemon	 * (no responses to probes) reaches the maximum
54550673Sjlemon	 * backoff that we would use if retransmitting.
54650673Sjlemon	 */
54750673Sjlemon	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
548194305Sjhb	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
549194305Sjhb	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
550190948Srwatson		TCPSTAT_INC(tcps_persistdrop);
551172074Srwatson		tp = tcp_drop(tp, ETIMEDOUT);
552172074Srwatson		goto out;
55350673Sjlemon	}
554242267Sandre	/*
555242267Sandre	 * If the user has closed the socket then drop a persisting
556242267Sandre	 * connection after a much reduced timeout.
557242267Sandre	 */
558242267Sandre	if (tp->t_state > TCPS_CLOSE_WAIT &&
559242267Sandre	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
560242267Sandre		TCPSTAT_INC(tcps_persistdrop);
561242267Sandre		tp = tcp_drop(tp, ETIMEDOUT);
562242267Sandre		goto out;
563242267Sandre	}
56450673Sjlemon	tcp_setpersist(tp);
565146463Sps	tp->t_flags |= TF_FORCEDATA;
56650673Sjlemon	(void) tcp_output(tp);
567146463Sps	tp->t_flags &= ~TF_FORCEDATA;
56850673Sjlemon
569172074Srwatsonout:
57050673Sjlemon#ifdef TCPDEBUG
571158644Sglebius	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
572158644Sglebius		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
57350673Sjlemon#endif
574172074Srwatson	if (tp != NULL)
575178285Srwatson		INP_WUNLOCK(inp);
576181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
577183550Szec	CURVNET_RESTORE();
57850673Sjlemon}
57950673Sjlemon
580172074Srwatsonvoid
581172074Srwatsontcp_timer_rexmt(void * xtp)
58250673Sjlemon{
583172074Srwatson	struct tcpcb *tp = xtp;
584183550Szec	CURVNET_SET(tp->t_vnet);
58550673Sjlemon	int rexmt;
586172074Srwatson	int headlocked;
587172074Srwatson	struct inpcb *inp;
58850673Sjlemon#ifdef TCPDEBUG
58950673Sjlemon	int ostate;
59050673Sjlemon
59150673Sjlemon	ostate = tp->t_state;
59250673Sjlemon#endif
593272720Ssbruno
594205391Skmacy	INP_INFO_RLOCK(&V_tcbinfo);
595172074Srwatson	inp = tp->t_inpcb;
596172074Srwatson	/*
597172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
598172074Srwatson	 * tear-down mean we need it as a work-around for races between
599172074Srwatson	 * timers and tcp_discardcb().
600172074Srwatson	 *
601172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
602172074Srwatson	 */
603172074Srwatson	if (inp == NULL) {
604172074Srwatson		tcp_timer_race++;
605205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
606183550Szec		CURVNET_RESTORE();
607172074Srwatson		return;
608172074Srwatson	}
609178285Srwatson	INP_WLOCK(inp);
610239075Strociny	if (callout_pending(&tp->t_timers->tt_rexmt) ||
611239075Strociny	    !callout_active(&tp->t_timers->tt_rexmt)) {
612178285Srwatson		INP_WUNLOCK(inp);
613205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
614183550Szec		CURVNET_RESTORE();
615172074Srwatson		return;
616172074Srwatson	}
617172309Ssilby	callout_deactivate(&tp->t_timers->tt_rexmt);
618239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
619239075Strociny		INP_WUNLOCK(inp);
620239075Strociny		INP_INFO_RUNLOCK(&V_tcbinfo);
621239075Strociny		CURVNET_RESTORE();
622239075Strociny		return;
623239075Strociny	}
624130989Sps	tcp_free_sackholes(tp);
62550673Sjlemon	/*
62650673Sjlemon	 * Retransmission timer went off.  Message has not
62750673Sjlemon	 * been acked within retransmit interval.  Back off
62850673Sjlemon	 * to a longer retransmit interval and retransmit one segment.
62950673Sjlemon	 */
63050673Sjlemon	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
63150673Sjlemon		tp->t_rxtshift = TCP_MAXRXTSHIFT;
632190948Srwatson		TCPSTAT_INC(tcps_timeoutdrop);
633205391Skmacy		in_pcbref(inp);
634217126Sjhb		INP_INFO_RUNLOCK(&V_tcbinfo);
635217126Sjhb		INP_WUNLOCK(inp);
636217126Sjhb		INP_INFO_WLOCK(&V_tcbinfo);
637217126Sjhb		INP_WLOCK(inp);
638222488Srwatson		if (in_pcbrele_wlocked(inp)) {
639217126Sjhb			INP_INFO_WUNLOCK(&V_tcbinfo);
640217126Sjhb			CURVNET_RESTORE();
641217126Sjhb			return;
642217126Sjhb		}
643226318Snp		if (inp->inp_flags & INP_DROPPED) {
644226318Snp			INP_WUNLOCK(inp);
645226318Snp			INP_INFO_WUNLOCK(&V_tcbinfo);
646226318Snp			CURVNET_RESTORE();
647226318Snp			return;
648226318Snp		}
649226318Snp
650172074Srwatson		tp = tcp_drop(tp, tp->t_softerror ?
651172074Srwatson			      tp->t_softerror : ETIMEDOUT);
652205391Skmacy		headlocked = 1;
653172074Srwatson		goto out;
65450673Sjlemon	}
655205391Skmacy	INP_INFO_RUNLOCK(&V_tcbinfo);
656172074Srwatson	headlocked = 0;
657242250Sandre	if (tp->t_state == TCPS_SYN_SENT) {
65813229Solah		/*
659242250Sandre		 * If the SYN was retransmitted, indicate CWND to be
660242250Sandre		 * limited to 1 segment in cc_conn_init().
661242250Sandre		 */
662242250Sandre		tp->snd_cwnd = 1;
663242250Sandre	} else if (tp->t_rxtshift == 1) {
664242250Sandre		/*
66550673Sjlemon		 * first retransmit; record ssthresh and cwnd so they can
666133874Srwatson		 * be recovered if this turns out to be a "bad" retransmit.
667133874Srwatson		 * A retransmit is considered "bad" if an ACK for this
66850673Sjlemon		 * segment is received within RTT/2 interval; the assumption
669133874Srwatson		 * here is that the ACK was already in flight.  See
67050673Sjlemon		 * "On Estimating End-to-End Network Path Properties" by
67150673Sjlemon		 * Allman and Paxson for more details.
6721541Srgrimes		 */
67350673Sjlemon		tp->snd_cwnd_prev = tp->snd_cwnd;
67450673Sjlemon		tp->snd_ssthresh_prev = tp->snd_ssthresh;
675117650Shsu		tp->snd_recover_prev = tp->snd_recover;
676215166Slstewart		if (IN_FASTRECOVERY(tp->t_flags))
677215166Slstewart			tp->t_flags |= TF_WASFRECOVERY;
678117650Shsu		else
679215166Slstewart			tp->t_flags &= ~TF_WASFRECOVERY;
680215166Slstewart		if (IN_CONGRECOVERY(tp->t_flags))
681215166Slstewart			tp->t_flags |= TF_WASCRECOVERY;
682215166Slstewart		else
683215166Slstewart			tp->t_flags &= ~TF_WASCRECOVERY;
68450673Sjlemon		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
685221209Sjhb		tp->t_flags |= TF_PREVVALID;
686221209Sjhb	} else
687221209Sjhb		tp->t_flags &= ~TF_PREVVALID;
688190948Srwatson	TCPSTAT_INC(tcps_rexmttimeo);
68973110Sjlemon	if (tp->t_state == TCPS_SYN_SENT)
690242260Sandre		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
69173110Sjlemon	else
69273110Sjlemon		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
69350673Sjlemon	TCPT_RANGESET(tp->t_rxtcur, rexmt,
69450673Sjlemon		      tp->t_rttmin, TCPTV_REXMTMAX);
695272720Ssbruno
696273063Ssbruno	/*
697273063Ssbruno	 * We enter the path for PLMTUD if connection is established or, if
698273063Ssbruno	 * connection is FIN_WAIT_1 status, reason for the last is that if
699273063Ssbruno	 * amount of data we send is very small, we could send it in couple of
700273063Ssbruno	 * packets and process straight to FIN. In that case we won't catch
701273063Ssbruno	 * ESTABLISHED state.
702273063Ssbruno	 */
703273063Ssbruno	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
704273063Ssbruno	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
705272720Ssbruno		int optlen;
706272720Ssbruno#ifdef INET6
707272720Ssbruno		int isipv6;
708272720Ssbruno#endif
709272720Ssbruno
710272720Ssbruno		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
711272720Ssbruno		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
712272720Ssbruno		    (tp->t_rxtshift <= 2)) {
713272720Ssbruno			/*
714272720Ssbruno			 * Enter Path MTU Black-hole Detection mechanism:
715272720Ssbruno			 * - Disable Path MTU Discovery (IP "DF" bit).
716272720Ssbruno			 * - Reduce MTU to lower value than what we
717272720Ssbruno			 *   negotiated with peer.
718272720Ssbruno			 */
719272720Ssbruno			/* Record that we may have found a black hole. */
720272720Ssbruno			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
721272720Ssbruno
722272720Ssbruno			/* Keep track of previous MSS. */
723272720Ssbruno			optlen = tp->t_maxopd - tp->t_maxseg;
724272720Ssbruno			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
725272720Ssbruno
726272720Ssbruno			/*
727272720Ssbruno			 * Reduce the MSS to blackhole value or to the default
728272720Ssbruno			 * in an attempt to retransmit.
729272720Ssbruno			 */
730272720Ssbruno#ifdef INET6
731272720Ssbruno			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
732272720Ssbruno			if (isipv6 &&
733272720Ssbruno			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
734272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
735272720Ssbruno				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
736272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
737272720Ssbruno			} else if (isipv6) {
738272720Ssbruno				/* Use the default MSS. */
739272720Ssbruno				tp->t_maxopd = V_tcp_v6mssdflt;
740272720Ssbruno				/*
741272720Ssbruno				 * Disable Path MTU Discovery when we switch to
742272720Ssbruno				 * minmss.
743272720Ssbruno				 */
744272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
745272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
746272720Ssbruno			}
747272720Ssbruno#endif
748272720Ssbruno#if defined(INET6) && defined(INET)
749272720Ssbruno			else
750272720Ssbruno#endif
751272720Ssbruno#ifdef INET
752272720Ssbruno			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
753272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
754272720Ssbruno				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
755272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
756272720Ssbruno			} else {
757272720Ssbruno				/* Use the default MSS. */
758272720Ssbruno				tp->t_maxopd = V_tcp_mssdflt;
759272720Ssbruno				/*
760272720Ssbruno				 * Disable Path MTU Discovery when we switch to
761272720Ssbruno				 * minmss.
762272720Ssbruno				 */
763272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
764272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
765272720Ssbruno			}
766272720Ssbruno#endif
767272720Ssbruno			tp->t_maxseg = tp->t_maxopd - optlen;
768272720Ssbruno			/*
769272720Ssbruno			 * Reset the slow-start flight size
770272720Ssbruno			 * as it may depend on the new MSS.
771272720Ssbruno			 */
772272720Ssbruno			if (CC_ALGO(tp)->conn_init != NULL)
773272720Ssbruno				CC_ALGO(tp)->conn_init(tp->ccv);
774272720Ssbruno		} else {
775272720Ssbruno			/*
776272720Ssbruno			 * If further retransmissions are still unsuccessful
777272720Ssbruno			 * with a lowered MTU, maybe this isn't a blackhole and
778272720Ssbruno			 * we restore the previous MSS and blackhole detection
779272720Ssbruno			 * flags.
780272720Ssbruno			 */
781272720Ssbruno			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
782272720Ssbruno			    (tp->t_rxtshift > 4)) {
783272720Ssbruno				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
784272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
785272720Ssbruno				optlen = tp->t_maxopd - tp->t_maxseg;
786272720Ssbruno				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
787272720Ssbruno				tp->t_maxseg = tp->t_maxopd - optlen;
788272720Ssbruno				V_tcp_pmtud_blackhole_failed++;
789272720Ssbruno				/*
790272720Ssbruno				 * Reset the slow-start flight size as it
791272720Ssbruno				 * may depend on the new MSS.
792272720Ssbruno				 */
793272720Ssbruno				if (CC_ALGO(tp)->conn_init != NULL)
794272720Ssbruno					CC_ALGO(tp)->conn_init(tp->ccv);
795272720Ssbruno			}
796272720Ssbruno		}
797272720Ssbruno	}
798272720Ssbruno
79950673Sjlemon	/*
800242264Sandre	 * Disable RFC1323 and SACK if we haven't got any response to
801133874Srwatson	 * our third SYN to work-around some broken terminal servers
802133874Srwatson	 * (most of which have hopefully been retired) that have bad VJ
803133874Srwatson	 * header compression code which trashes TCP segments containing
80477539Sjesper	 * unknown-to-them TCP options.
80577539Sjesper	 */
806245238Sjhb	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
807245238Sjhb	    (tp->t_rxtshift == 3))
808242263Sandre		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
80977539Sjesper	/*
810122922Sandre	 * If we backed off this far, our srtt estimate is probably bogus.
811122922Sandre	 * Clobber it so we'll take the next rtt measurement as our srtt;
81250673Sjlemon	 * move the current srtt into rttvar to keep the current
81350673Sjlemon	 * retransmit times until then.
81450673Sjlemon	 */
81550673Sjlemon	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
81655679Sshin#ifdef INET6
81755679Sshin		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
81855679Sshin			in6_losing(tp->t_inpcb);
81955679Sshin#endif
82050673Sjlemon		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
82150673Sjlemon		tp->t_srtt = 0;
82250673Sjlemon	}
82350673Sjlemon	tp->snd_nxt = tp->snd_una;
824117650Shsu	tp->snd_recover = tp->snd_max;
82550673Sjlemon	/*
82650673Sjlemon	 * Force a segment to be sent.
82750673Sjlemon	 */
82850673Sjlemon	tp->t_flags |= TF_ACKNOW;
82950673Sjlemon	/*
83050673Sjlemon	 * If timing a segment in this window, stop the timer.
83150673Sjlemon	 */
83250673Sjlemon	tp->t_rtttime = 0;
833215166Slstewart
834216101Slstewart	cc_cong_signal(tp, NULL, CC_RTO);
835215166Slstewart
83650673Sjlemon	(void) tcp_output(tp);
8371541Srgrimes
838172074Srwatsonout:
83950673Sjlemon#ifdef TCPDEBUG
840157136Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
84197658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
84297658Stanimura			  PRU_SLOWTIMO);
8431541Srgrimes#endif
844172074Srwatson	if (tp != NULL)
845178285Srwatson		INP_WUNLOCK(inp);
846172074Srwatson	if (headlocked)
847181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
848183550Szec	CURVNET_RESTORE();
8491541Srgrimes}
850172074Srwatson
851172074Srwatsonvoid
852172074Srwatsontcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
853172074Srwatson{
854172074Srwatson	struct callout *t_callout;
855172074Srwatson	void *f_callout;
856205391Skmacy	struct inpcb *inp = tp->t_inpcb;
857266422Sadrian	int cpu = inp_to_cpuid(inp);
858172074Srwatson
859237263Snp#ifdef TCP_OFFLOAD
860237263Snp	if (tp->t_flags & TF_TOE)
861237263Snp		return;
862237263Snp#endif
863237263Snp
864172074Srwatson	switch (timer_type) {
865172074Srwatson		case TT_DELACK:
866172309Ssilby			t_callout = &tp->t_timers->tt_delack;
867172074Srwatson			f_callout = tcp_timer_delack;
868172074Srwatson			break;
869172074Srwatson		case TT_REXMT:
870172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
871172074Srwatson			f_callout = tcp_timer_rexmt;
872172074Srwatson			break;
873172074Srwatson		case TT_PERSIST:
874172309Ssilby			t_callout = &tp->t_timers->tt_persist;
875172074Srwatson			f_callout = tcp_timer_persist;
876172074Srwatson			break;
877172074Srwatson		case TT_KEEP:
878172309Ssilby			t_callout = &tp->t_timers->tt_keep;
879172074Srwatson			f_callout = tcp_timer_keep;
880172074Srwatson			break;
881172074Srwatson		case TT_2MSL:
882172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
883172074Srwatson			f_callout = tcp_timer_2msl;
884172074Srwatson			break;
885172074Srwatson		default:
886172074Srwatson			panic("bad timer_type");
887172074Srwatson		}
888172074Srwatson	if (delta == 0) {
889172074Srwatson		callout_stop(t_callout);
890172074Srwatson	} else {
891205391Skmacy		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
892172074Srwatson	}
893172074Srwatson}
894172074Srwatson
895172074Srwatsonint
896172074Srwatsontcp_timer_active(struct tcpcb *tp, int timer_type)
897172074Srwatson{
898172074Srwatson	struct callout *t_callout;
899172074Srwatson
900172074Srwatson	switch (timer_type) {
901172074Srwatson		case TT_DELACK:
902172309Ssilby			t_callout = &tp->t_timers->tt_delack;
903172074Srwatson			break;
904172074Srwatson		case TT_REXMT:
905172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
906172074Srwatson			break;
907172074Srwatson		case TT_PERSIST:
908172309Ssilby			t_callout = &tp->t_timers->tt_persist;
909172074Srwatson			break;
910172074Srwatson		case TT_KEEP:
911172309Ssilby			t_callout = &tp->t_timers->tt_keep;
912172074Srwatson			break;
913172074Srwatson		case TT_2MSL:
914172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
915172074Srwatson			break;
916172074Srwatson		default:
917172074Srwatson			panic("bad timer_type");
918172074Srwatson		}
919172074Srwatson	return callout_active(t_callout);
920172074Srwatson}
921197244Ssilby
922197244Ssilby#define	ticks_to_msecs(t)	(1000*(t) / hz)
923197244Ssilby
924197244Ssilbyvoid
925247777Sdavidetcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
926247777Sdavide    struct xtcp_timer *xtimer)
927197244Ssilby{
928247777Sdavide	sbintime_t now;
929247777Sdavide
930247777Sdavide	bzero(xtimer, sizeof(*xtimer));
931197244Ssilby	if (timer == NULL)
932197244Ssilby		return;
933247777Sdavide	now = getsbinuptime();
934197244Ssilby	if (callout_active(&timer->tt_delack))
935247777Sdavide		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
936197244Ssilby	if (callout_active(&timer->tt_rexmt))
937247777Sdavide		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
938197244Ssilby	if (callout_active(&timer->tt_persist))
939247777Sdavide		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
940197244Ssilby	if (callout_active(&timer->tt_keep))
941247777Sdavide		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
942197244Ssilby	if (callout_active(&timer->tt_2msl))
943247777Sdavide		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
944197244Ssilby	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
945197244Ssilby}
946