tcp_timer.c revision 280990
1139823Simp/*-
211150Swollman * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes * 4. Neither the name of the University nor the names of its contributors
141541Srgrimes *    may be used to endorse or promote products derived from this software
151541Srgrimes *    without specific prior written permission.
161541Srgrimes *
171541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271541Srgrimes * SUCH DAMAGE.
281541Srgrimes *
2911150Swollman *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
301541Srgrimes */
311541Srgrimes
32172467Ssilby#include <sys/cdefs.h>
33172467Ssilby__FBSDID("$FreeBSD: head/sys/netinet/tcp_timer.c 280990 2015-04-02 14:43:07Z jch $");
34172467Ssilby
35243603Snp#include "opt_inet.h"
3655679Sshin#include "opt_inet6.h"
3729514Sjoerg#include "opt_tcpdebug.h"
38266422Sadrian#include "opt_rss.h"
3929514Sjoerg
401541Srgrimes#include <sys/param.h>
4112172Sphk#include <sys/kernel.h>
42102967Sbde#include <sys/lock.h>
4378642Ssilby#include <sys/mbuf.h>
44102967Sbde#include <sys/mutex.h>
45102967Sbde#include <sys/protosw.h>
46205391Skmacy#include <sys/smp.h>
471541Srgrimes#include <sys/socket.h>
481541Srgrimes#include <sys/socketvar.h>
49102967Sbde#include <sys/sysctl.h>
50102967Sbde#include <sys/systm.h>
511541Srgrimes
52185571Sbz#include <net/if.h>
531541Srgrimes#include <net/route.h>
54277331Sadrian#include <net/rss_config.h>
55196019Srwatson#include <net/vnet.h>
56266422Sadrian#include <net/netisr.h>
571541Srgrimes
58215166Slstewart#include <netinet/cc.h>
591541Srgrimes#include <netinet/in.h>
60102967Sbde#include <netinet/in_pcb.h>
61266422Sadrian#include <netinet/in_rss.h>
621541Srgrimes#include <netinet/in_systm.h>
6355679Sshin#ifdef INET6
6455679Sshin#include <netinet6/in6_pcb.h>
6555679Sshin#endif
661541Srgrimes#include <netinet/ip_var.h>
671541Srgrimes#include <netinet/tcp_fsm.h>
681541Srgrimes#include <netinet/tcp_timer.h>
691541Srgrimes#include <netinet/tcp_var.h>
70272720Ssbruno#ifdef INET6
71272720Ssbruno#include <netinet6/tcp6_var.h>
72272720Ssbruno#endif
731541Srgrimes#include <netinet/tcpip.h>
7417138Sdg#ifdef TCPDEBUG
7517138Sdg#include <netinet/tcp_debug.h>
7617138Sdg#endif
771541Srgrimes
7850673Sjlemonint	tcp_keepinit;
7950682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
80180631Strhodes    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
8118280Spst
8250673Sjlemonint	tcp_keepidle;
8350682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
84180631Strhodes    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
8512172Sphk
8650673Sjlemonint	tcp_keepintvl;
8750682SjlemonSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
88180631Strhodes    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
8912172Sphk
9050673Sjlemonint	tcp_delacktime;
91167721SandreSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
92167721Sandre    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
9350682Sjlemon    "Time before a delayed ACK is sent");
94133874Srwatson
9550673Sjlemonint	tcp_msl;
9650682SjlemonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
9750682Sjlemon    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
9850673Sjlemon
99100335Sdillonint	tcp_rexmit_min;
100100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
101167721Sandre    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
102167721Sandre    "Minimum Retransmission Timeout");
103100335Sdillon
104100335Sdillonint	tcp_rexmit_slop;
105100335SdillonSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
106167721Sandre    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
107167721Sandre    "Retransmission Timer Slop");
108100335Sdillon
10987499Srwatsonstatic int	always_keepalive = 1;
110133874SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
11146381Sbillf    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
11215039Sphk
113167036Smohansint    tcp_fast_finwait2_recycle = 0;
114167036SmohansSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
115167721Sandre    &tcp_fast_finwait2_recycle, 0,
116167721Sandre    "Recycle closed FIN_WAIT_2 connections faster");
117167036Smohans
118167036Smohansint    tcp_finwait2_timeout;
119167036SmohansSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
120167721Sandre    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
121167036Smohans
122231025Sglebiusint	tcp_keepcnt = TCPTV_KEEPCNT;
123231025SglebiusSYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
124231025Sglebius    "Number of keepalive probes to send");
125167036Smohans
12612296Sphk	/* max idle probes */
12750673Sjlemonint	tcp_maxpersistidle;
12811150Swollman
129245238Sjhbstatic int	tcp_rexmit_drop_options = 0;
130245238SjhbSYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
131245238Sjhb    &tcp_rexmit_drop_options, 0,
132245238Sjhb    "Drop TCP options from 3rd and later retransmitted SYN");
133245238Sjhb
134272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
135272720Ssbruno#define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
136272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
137273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
138272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
139272720Ssbruno    "Path MTU Discovery Black Hole Detection Enabled");
140272720Ssbruno
141272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
142272720Ssbruno#define	V_tcp_pmtud_blackhole_activated \
143272720Ssbruno    VNET(tcp_pmtud_blackhole_activated)
144272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
145273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
146272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
147272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count");
148272720Ssbruno
149272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
150272720Ssbruno#define	V_tcp_pmtud_blackhole_activated_min_mss \
151272720Ssbruno    VNET(tcp_pmtud_blackhole_activated_min_mss)
152272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
153273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
154272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
155272720Ssbruno    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
156272720Ssbruno
157272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
158272720Ssbruno#define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
159272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
160273377Shselasky    CTLFLAG_RD|CTLFLAG_VNET,
161272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
162272720Ssbruno    "Path MTU Discovery Black Hole Detection, Failure Count");
163272720Ssbruno
164272720Ssbruno#ifdef INET
165272720Ssbrunostatic VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
166272720Ssbruno#define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
167272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
168273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
169272720Ssbruno    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
170272720Ssbruno    "Path MTU Discovery Black Hole Detection lowered MSS");
171272720Ssbruno#endif
172272720Ssbruno
173272720Ssbruno#ifdef INET6
174272720Ssbrunostatic VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
175272720Ssbruno#define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
176272720SsbrunoSYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
177273377Shselasky    CTLFLAG_RW|CTLFLAG_VNET,
178272720Ssbruno    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
179272720Ssbruno    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
180272720Ssbruno#endif
181272720Ssbruno
182268027Sadrian#ifdef	RSS
183268027Sadrianstatic int	per_cpu_timers = 1;
184268027Sadrian#else
185205391Skmacystatic int	per_cpu_timers = 0;
186268027Sadrian#endif
187205391SkmacySYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
188205391Skmacy    &per_cpu_timers , 0, "run tcp timers on all cpus");
189205391Skmacy
190266422Sadrian#if 0
191205391Skmacy#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
192205391Skmacy		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
193266422Sadrian#endif
194205391Skmacy
1951541Srgrimes/*
196266422Sadrian * Map the given inp to a CPU id.
197266422Sadrian *
198266422Sadrian * This queries RSS if it's compiled in, else it defaults to the current
199266422Sadrian * CPU ID.
200266422Sadrian */
201266422Sadrianstatic inline int
202266422Sadrianinp_to_cpuid(struct inpcb *inp)
203266422Sadrian{
204266422Sadrian	u_int cpuid;
205266422Sadrian
206266422Sadrian#ifdef	RSS
207266422Sadrian	if (per_cpu_timers) {
208266422Sadrian		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
209266422Sadrian		if (cpuid == NETISR_CPUID_NONE)
210266422Sadrian			return (curcpu);	/* XXX */
211266422Sadrian		else
212266422Sadrian			return (cpuid);
213266422Sadrian	}
214266422Sadrian#else
215266422Sadrian	/* Legacy, pre-RSS behaviour */
216266422Sadrian	if (per_cpu_timers) {
217266422Sadrian		/*
218266422Sadrian		 * We don't have a flowid -> cpuid mapping, so cheat and
219266422Sadrian		 * just map unknown cpuids to curcpu.  Not the best, but
220266422Sadrian		 * apparently better than defaulting to swi 0.
221266422Sadrian		 */
222266422Sadrian		cpuid = inp->inp_flowid % (mp_maxid + 1);
223266422Sadrian		if (! CPU_ABSENT(cpuid))
224266422Sadrian			return (cpuid);
225266422Sadrian		return (curcpu);
226266422Sadrian	}
227266422Sadrian#endif
228266422Sadrian	/* Default for RSS and non-RSS - cpuid 0 */
229266422Sadrian	else {
230266422Sadrian		return (0);
231266422Sadrian	}
232266422Sadrian}
233266422Sadrian
234266422Sadrian/*
2351541Srgrimes * Tcp protocol timeout routine called every 500 ms.
23650673Sjlemon * Updates timestamps used for TCP
2371541Srgrimes * causes finite state machine actions if timers expire.
2381541Srgrimes */
2391541Srgrimesvoid
240172309Ssilbytcp_slowtimo(void)
2411541Srgrimes{
242183550Szec	VNET_ITERATOR_DECL(vnet_iter);
2431541Srgrimes
244195760Srwatson	VNET_LIST_RLOCK_NOSLEEP();
245183550Szec	VNET_FOREACH(vnet_iter) {
246183550Szec		CURVNET_SET(vnet_iter);
247273850Sjch		(void) tcp_tw_2msl_scan(0);
248183550Szec		CURVNET_RESTORE();
249183550Szec	}
250195760Srwatson	VNET_LIST_RUNLOCK_NOSLEEP();
2511541Srgrimes}
2521541Srgrimes
25373110Sjlemonint	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
25473110Sjlemon    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
25573110Sjlemon
2561541Srgrimesint	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
257115824Shsu    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
2581541Srgrimes
259115824Shsustatic int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
26011150Swollman
261157376Srwatsonstatic int tcp_timer_race;
262157376SrwatsonSYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
263157376Srwatson    0, "Count of t_inpcb races on tcp_discardcb");
264157376Srwatson
265172074Srwatson/*
266172074Srwatson * TCP timer processing.
267172074Srwatson */
268172074Srwatson
26950673Sjlemonvoid
270172074Srwatsontcp_timer_delack(void *xtp)
2711541Srgrimes{
272172074Srwatson	struct tcpcb *tp = xtp;
273172074Srwatson	struct inpcb *inp;
274183550Szec	CURVNET_SET(tp->t_vnet);
2751541Srgrimes
276172074Srwatson	inp = tp->t_inpcb;
277157376Srwatson	/*
278172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
279172074Srwatson	 * tear-down mean we need it as a work-around for races between
280172074Srwatson	 * timers and tcp_discardcb().
281172074Srwatson	 *
282172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
283157376Srwatson	 */
284172074Srwatson	if (inp == NULL) {
285172074Srwatson		tcp_timer_race++;
286183550Szec		CURVNET_RESTORE();
287108265Shsu		return;
288108265Shsu	}
289178285Srwatson	INP_WLOCK(inp);
290239075Strociny	if (callout_pending(&tp->t_timers->tt_delack) ||
291239075Strociny	    !callout_active(&tp->t_timers->tt_delack)) {
292178285Srwatson		INP_WUNLOCK(inp);
293183550Szec		CURVNET_RESTORE();
29450673Sjlemon		return;
29550673Sjlemon	}
296172309Ssilby	callout_deactivate(&tp->t_timers->tt_delack);
297239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
298239075Strociny		INP_WUNLOCK(inp);
299239075Strociny		CURVNET_RESTORE();
300239075Strociny		return;
301239075Strociny	}
3021541Srgrimes
30350673Sjlemon	tp->t_flags |= TF_ACKNOW;
304190948Srwatson	TCPSTAT_INC(tcps_delack);
30550673Sjlemon	(void) tcp_output(tp);
306178285Srwatson	INP_WUNLOCK(inp);
307183550Szec	CURVNET_RESTORE();
30850673Sjlemon}
30950673Sjlemon
310172074Srwatsonvoid
311172074Srwatsontcp_timer_2msl(void *xtp)
31250673Sjlemon{
313172074Srwatson	struct tcpcb *tp = xtp;
314172074Srwatson	struct inpcb *inp;
315183550Szec	CURVNET_SET(tp->t_vnet);
31650673Sjlemon#ifdef TCPDEBUG
31750673Sjlemon	int ostate;
31850673Sjlemon
31950673Sjlemon	ostate = tp->t_state;
32050673Sjlemon#endif
321157376Srwatson	/*
322172074Srwatson	 * XXXRW: Does this actually happen?
323172074Srwatson	 */
324181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
325172074Srwatson	inp = tp->t_inpcb;
326172074Srwatson	/*
327172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
328172074Srwatson	 * tear-down mean we need it as a work-around for races between
329172074Srwatson	 * timers and tcp_discardcb().
330172074Srwatson	 *
331172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
332172074Srwatson	 */
333172074Srwatson	if (inp == NULL) {
334172074Srwatson		tcp_timer_race++;
335181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
336183550Szec		CURVNET_RESTORE();
337172074Srwatson		return;
338172074Srwatson	}
339178285Srwatson	INP_WLOCK(inp);
340172074Srwatson	tcp_free_sackholes(tp);
341239075Strociny	if (callout_pending(&tp->t_timers->tt_2msl) ||
342172309Ssilby	    !callout_active(&tp->t_timers->tt_2msl)) {
343178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
344181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
345183550Szec		CURVNET_RESTORE();
346172074Srwatson		return;
347172074Srwatson	}
348172309Ssilby	callout_deactivate(&tp->t_timers->tt_2msl);
349239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
350239075Strociny		INP_WUNLOCK(inp);
351239075Strociny		INP_INFO_WUNLOCK(&V_tcbinfo);
352239075Strociny		CURVNET_RESTORE();
353239075Strociny		return;
354239075Strociny	}
355172074Srwatson	/*
3561541Srgrimes	 * 2 MSL timeout in shutdown went off.  If we're closed but
3571541Srgrimes	 * still waiting for peer to close and connection has been idle
3581541Srgrimes	 * too long, or if 2MSL time is up from TIME_WAIT, delete connection
3591541Srgrimes	 * control block.  Otherwise, check again in a bit.
360167036Smohans	 *
361167036Smohans	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
362167036Smohans	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
363167036Smohans	 * Ignore fact that there were recent incoming segments.
3641541Srgrimes	 */
365167036Smohans	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
366172074Srwatson	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
367167036Smohans	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
368190948Srwatson		TCPSTAT_INC(tcps_finwait2_drops);
369172074Srwatson		tp = tcp_close(tp);
370167036Smohans	} else {
371167036Smohans		if (tp->t_state != TCPS_TIME_WAIT &&
372231025Sglebius		   ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
373231025Sglebius		       callout_reset_on(&tp->t_timers->tt_2msl,
374266422Sadrian			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
375266422Sadrian			   inp_to_cpuid(inp));
376172074Srwatson	       else
377172074Srwatson		       tp = tcp_close(tp);
378172074Srwatson       }
3791541Srgrimes
38050673Sjlemon#ifdef TCPDEBUG
381172312Skib	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
38297658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
38397658Stanimura			  PRU_SLOWTIMO);
38450673Sjlemon#endif
385172074Srwatson	if (tp != NULL)
386178285Srwatson		INP_WUNLOCK(inp);
387181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
388183550Szec	CURVNET_RESTORE();
38950673Sjlemon}
39050673Sjlemon
391172074Srwatsonvoid
392172074Srwatsontcp_timer_keep(void *xtp)
39350673Sjlemon{
394172074Srwatson	struct tcpcb *tp = xtp;
39578642Ssilby	struct tcptemp *t_template;
396172074Srwatson	struct inpcb *inp;
397183550Szec	CURVNET_SET(tp->t_vnet);
39850673Sjlemon#ifdef TCPDEBUG
39950673Sjlemon	int ostate;
40050673Sjlemon
40150673Sjlemon	ostate = tp->t_state;
40250673Sjlemon#endif
403181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
404172074Srwatson	inp = tp->t_inpcb;
405157376Srwatson	/*
406172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
407172074Srwatson	 * tear-down mean we need it as a work-around for races between
408172074Srwatson	 * timers and tcp_discardcb().
409172074Srwatson	 *
410172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
411172074Srwatson	 */
412172074Srwatson	if (inp == NULL) {
413172074Srwatson		tcp_timer_race++;
414181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
415183550Szec		CURVNET_RESTORE();
416172074Srwatson		return;
417172074Srwatson	}
418178285Srwatson	INP_WLOCK(inp);
419239075Strociny	if (callout_pending(&tp->t_timers->tt_keep) ||
420239075Strociny	    !callout_active(&tp->t_timers->tt_keep)) {
421178285Srwatson		INP_WUNLOCK(inp);
422181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
423183550Szec		CURVNET_RESTORE();
424172074Srwatson		return;
425172074Srwatson	}
426172309Ssilby	callout_deactivate(&tp->t_timers->tt_keep);
427239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
428239075Strociny		INP_WUNLOCK(inp);
429239075Strociny		INP_INFO_WUNLOCK(&V_tcbinfo);
430239075Strociny		CURVNET_RESTORE();
431239075Strociny		return;
432239075Strociny	}
433172074Srwatson	/*
43450673Sjlemon	 * Keep-alive timer went off; send something
43550673Sjlemon	 * or drop connection if idle for too long.
4361541Srgrimes	 */
437190948Srwatson	TCPSTAT_INC(tcps_keeptimeo);
43850673Sjlemon	if (tp->t_state < TCPS_ESTABLISHED)
43950673Sjlemon		goto dropit;
440122326Ssam	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
44150673Sjlemon	    tp->t_state <= TCPS_CLOSING) {
442231025Sglebius		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
44350673Sjlemon			goto dropit;
4441541Srgrimes		/*
44550673Sjlemon		 * Send a packet designed to force a response
44650673Sjlemon		 * if the peer is up and reachable:
44750673Sjlemon		 * either an ACK if the connection is still alive,
44850673Sjlemon		 * or an RST if the peer has closed the connection
44950673Sjlemon		 * due to timeout or reboot.
45050673Sjlemon		 * Using sequence number tp->snd_una-1
45150673Sjlemon		 * causes the transmitted zero-length segment
45250673Sjlemon		 * to lie outside the receive window;
45350673Sjlemon		 * by the protocol spec, this requires the
45450673Sjlemon		 * correspondent TCP to respond.
4551541Srgrimes		 */
456190948Srwatson		TCPSTAT_INC(tcps_keepprobe);
457111144Sjlemon		t_template = tcpip_maketemplate(inp);
45878642Ssilby		if (t_template) {
45978642Ssilby			tcp_respond(tp, t_template->tt_ipgen,
46078642Ssilby				    &t_template->tt_t, (struct mbuf *)NULL,
46178642Ssilby				    tp->rcv_nxt, tp->snd_una - 1, 0);
462179487Srwatson			free(t_template, M_TEMP);
46378642Ssilby		}
464231025Sglebius		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
465266422Sadrian		    tcp_timer_keep, tp, inp_to_cpuid(inp));
46697658Stanimura	} else
467231025Sglebius		callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
468266422Sadrian		    tcp_timer_keep, tp, inp_to_cpuid(inp));
46950673Sjlemon
47050673Sjlemon#ifdef TCPDEBUG
471122326Ssam	if (inp->inp_socket->so_options & SO_DEBUG)
47255679Sshin		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
47350673Sjlemon			  PRU_SLOWTIMO);
47450673Sjlemon#endif
475178285Srwatson	INP_WUNLOCK(inp);
476181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
477183550Szec	CURVNET_RESTORE();
478172074Srwatson	return;
47950673Sjlemon
48050673Sjlemondropit:
481190948Srwatson	TCPSTAT_INC(tcps_keepdrops);
482172074Srwatson	tp = tcp_drop(tp, ETIMEDOUT);
483172074Srwatson
484172074Srwatson#ifdef TCPDEBUG
485172074Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
486172074Srwatson		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
487172074Srwatson			  PRU_SLOWTIMO);
488172074Srwatson#endif
489172074Srwatson	if (tp != NULL)
490178285Srwatson		INP_WUNLOCK(tp->t_inpcb);
491181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
492183550Szec	CURVNET_RESTORE();
49350673Sjlemon}
49450673Sjlemon
495172074Srwatsonvoid
496172074Srwatsontcp_timer_persist(void *xtp)
49750673Sjlemon{
498172074Srwatson	struct tcpcb *tp = xtp;
499172074Srwatson	struct inpcb *inp;
500183550Szec	CURVNET_SET(tp->t_vnet);
50150673Sjlemon#ifdef TCPDEBUG
50250673Sjlemon	int ostate;
50350673Sjlemon
50450673Sjlemon	ostate = tp->t_state;
50550673Sjlemon#endif
506181803Sbz	INP_INFO_WLOCK(&V_tcbinfo);
507172074Srwatson	inp = tp->t_inpcb;
508157376Srwatson	/*
509172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
510172074Srwatson	 * tear-down mean we need it as a work-around for races between
511172074Srwatson	 * timers and tcp_discardcb().
512172074Srwatson	 *
513172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
514172074Srwatson	 */
515172074Srwatson	if (inp == NULL) {
516172074Srwatson		tcp_timer_race++;
517181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
518183550Szec		CURVNET_RESTORE();
519172074Srwatson		return;
520172074Srwatson	}
521178285Srwatson	INP_WLOCK(inp);
522239075Strociny	if (callout_pending(&tp->t_timers->tt_persist) ||
523239075Strociny	    !callout_active(&tp->t_timers->tt_persist)) {
524178285Srwatson		INP_WUNLOCK(inp);
525181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
526183550Szec		CURVNET_RESTORE();
527172074Srwatson		return;
528172074Srwatson	}
529172309Ssilby	callout_deactivate(&tp->t_timers->tt_persist);
530239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
531239075Strociny		INP_WUNLOCK(inp);
532239075Strociny		INP_INFO_WUNLOCK(&V_tcbinfo);
533239075Strociny		CURVNET_RESTORE();
534239075Strociny		return;
535239075Strociny	}
536172074Srwatson	/*
53750673Sjlemon	 * Persistance timer into zero window.
53850673Sjlemon	 * Force a byte to be output, if possible.
53950673Sjlemon	 */
540190948Srwatson	TCPSTAT_INC(tcps_persisttimeo);
54150673Sjlemon	/*
54250673Sjlemon	 * Hack: if the peer is dead/unreachable, we do not
54350673Sjlemon	 * time out if the window is closed.  After a full
54450673Sjlemon	 * backoff, drop the connection if the idle time
54550673Sjlemon	 * (no responses to probes) reaches the maximum
54650673Sjlemon	 * backoff that we would use if retransmitting.
54750673Sjlemon	 */
54850673Sjlemon	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
549194305Sjhb	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
550194305Sjhb	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
551190948Srwatson		TCPSTAT_INC(tcps_persistdrop);
552172074Srwatson		tp = tcp_drop(tp, ETIMEDOUT);
553172074Srwatson		goto out;
55450673Sjlemon	}
555242267Sandre	/*
556242267Sandre	 * If the user has closed the socket then drop a persisting
557242267Sandre	 * connection after a much reduced timeout.
558242267Sandre	 */
559242267Sandre	if (tp->t_state > TCPS_CLOSE_WAIT &&
560242267Sandre	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
561242267Sandre		TCPSTAT_INC(tcps_persistdrop);
562242267Sandre		tp = tcp_drop(tp, ETIMEDOUT);
563242267Sandre		goto out;
564242267Sandre	}
56550673Sjlemon	tcp_setpersist(tp);
566146463Sps	tp->t_flags |= TF_FORCEDATA;
56750673Sjlemon	(void) tcp_output(tp);
568146463Sps	tp->t_flags &= ~TF_FORCEDATA;
56950673Sjlemon
570172074Srwatsonout:
57150673Sjlemon#ifdef TCPDEBUG
572158644Sglebius	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
573158644Sglebius		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
57450673Sjlemon#endif
575172074Srwatson	if (tp != NULL)
576178285Srwatson		INP_WUNLOCK(inp);
577181803Sbz	INP_INFO_WUNLOCK(&V_tcbinfo);
578183550Szec	CURVNET_RESTORE();
57950673Sjlemon}
58050673Sjlemon
581172074Srwatsonvoid
582172074Srwatsontcp_timer_rexmt(void * xtp)
58350673Sjlemon{
584172074Srwatson	struct tcpcb *tp = xtp;
585183550Szec	CURVNET_SET(tp->t_vnet);
58650673Sjlemon	int rexmt;
587172074Srwatson	int headlocked;
588172074Srwatson	struct inpcb *inp;
58950673Sjlemon#ifdef TCPDEBUG
59050673Sjlemon	int ostate;
59150673Sjlemon
59250673Sjlemon	ostate = tp->t_state;
59350673Sjlemon#endif
594272720Ssbruno
595205391Skmacy	INP_INFO_RLOCK(&V_tcbinfo);
596172074Srwatson	inp = tp->t_inpcb;
597172074Srwatson	/*
598172074Srwatson	 * XXXRW: While this assert is in fact correct, bugs in the tcpcb
599172074Srwatson	 * tear-down mean we need it as a work-around for races between
600172074Srwatson	 * timers and tcp_discardcb().
601172074Srwatson	 *
602172074Srwatson	 * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
603172074Srwatson	 */
604172074Srwatson	if (inp == NULL) {
605172074Srwatson		tcp_timer_race++;
606205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
607183550Szec		CURVNET_RESTORE();
608172074Srwatson		return;
609172074Srwatson	}
610178285Srwatson	INP_WLOCK(inp);
611239075Strociny	if (callout_pending(&tp->t_timers->tt_rexmt) ||
612239075Strociny	    !callout_active(&tp->t_timers->tt_rexmt)) {
613178285Srwatson		INP_WUNLOCK(inp);
614205391Skmacy		INP_INFO_RUNLOCK(&V_tcbinfo);
615183550Szec		CURVNET_RESTORE();
616172074Srwatson		return;
617172074Srwatson	}
618172309Ssilby	callout_deactivate(&tp->t_timers->tt_rexmt);
619239075Strociny	if ((inp->inp_flags & INP_DROPPED) != 0) {
620239075Strociny		INP_WUNLOCK(inp);
621239075Strociny		INP_INFO_RUNLOCK(&V_tcbinfo);
622239075Strociny		CURVNET_RESTORE();
623239075Strociny		return;
624239075Strociny	}
625130989Sps	tcp_free_sackholes(tp);
62650673Sjlemon	/*
62750673Sjlemon	 * Retransmission timer went off.  Message has not
62850673Sjlemon	 * been acked within retransmit interval.  Back off
62950673Sjlemon	 * to a longer retransmit interval and retransmit one segment.
63050673Sjlemon	 */
63150673Sjlemon	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
63250673Sjlemon		tp->t_rxtshift = TCP_MAXRXTSHIFT;
633190948Srwatson		TCPSTAT_INC(tcps_timeoutdrop);
634205391Skmacy		in_pcbref(inp);
635217126Sjhb		INP_INFO_RUNLOCK(&V_tcbinfo);
636217126Sjhb		INP_WUNLOCK(inp);
637217126Sjhb		INP_INFO_WLOCK(&V_tcbinfo);
638217126Sjhb		INP_WLOCK(inp);
639222488Srwatson		if (in_pcbrele_wlocked(inp)) {
640217126Sjhb			INP_INFO_WUNLOCK(&V_tcbinfo);
641217126Sjhb			CURVNET_RESTORE();
642217126Sjhb			return;
643217126Sjhb		}
644226318Snp		if (inp->inp_flags & INP_DROPPED) {
645226318Snp			INP_WUNLOCK(inp);
646226318Snp			INP_INFO_WUNLOCK(&V_tcbinfo);
647226318Snp			CURVNET_RESTORE();
648226318Snp			return;
649226318Snp		}
650226318Snp
651172074Srwatson		tp = tcp_drop(tp, tp->t_softerror ?
652172074Srwatson			      tp->t_softerror : ETIMEDOUT);
653205391Skmacy		headlocked = 1;
654172074Srwatson		goto out;
65550673Sjlemon	}
656205391Skmacy	INP_INFO_RUNLOCK(&V_tcbinfo);
657172074Srwatson	headlocked = 0;
658242250Sandre	if (tp->t_state == TCPS_SYN_SENT) {
65913229Solah		/*
660242250Sandre		 * If the SYN was retransmitted, indicate CWND to be
661242250Sandre		 * limited to 1 segment in cc_conn_init().
662242250Sandre		 */
663242250Sandre		tp->snd_cwnd = 1;
664242250Sandre	} else if (tp->t_rxtshift == 1) {
665242250Sandre		/*
66650673Sjlemon		 * first retransmit; record ssthresh and cwnd so they can
667133874Srwatson		 * be recovered if this turns out to be a "bad" retransmit.
668133874Srwatson		 * A retransmit is considered "bad" if an ACK for this
66950673Sjlemon		 * segment is received within RTT/2 interval; the assumption
670133874Srwatson		 * here is that the ACK was already in flight.  See
67150673Sjlemon		 * "On Estimating End-to-End Network Path Properties" by
67250673Sjlemon		 * Allman and Paxson for more details.
6731541Srgrimes		 */
67450673Sjlemon		tp->snd_cwnd_prev = tp->snd_cwnd;
67550673Sjlemon		tp->snd_ssthresh_prev = tp->snd_ssthresh;
676117650Shsu		tp->snd_recover_prev = tp->snd_recover;
677215166Slstewart		if (IN_FASTRECOVERY(tp->t_flags))
678215166Slstewart			tp->t_flags |= TF_WASFRECOVERY;
679117650Shsu		else
680215166Slstewart			tp->t_flags &= ~TF_WASFRECOVERY;
681215166Slstewart		if (IN_CONGRECOVERY(tp->t_flags))
682215166Slstewart			tp->t_flags |= TF_WASCRECOVERY;
683215166Slstewart		else
684215166Slstewart			tp->t_flags &= ~TF_WASCRECOVERY;
68550673Sjlemon		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
686221209Sjhb		tp->t_flags |= TF_PREVVALID;
687221209Sjhb	} else
688221209Sjhb		tp->t_flags &= ~TF_PREVVALID;
689190948Srwatson	TCPSTAT_INC(tcps_rexmttimeo);
69073110Sjlemon	if (tp->t_state == TCPS_SYN_SENT)
691242260Sandre		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
69273110Sjlemon	else
69373110Sjlemon		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
69450673Sjlemon	TCPT_RANGESET(tp->t_rxtcur, rexmt,
69550673Sjlemon		      tp->t_rttmin, TCPTV_REXMTMAX);
696272720Ssbruno
697273063Ssbruno	/*
698273063Ssbruno	 * We enter the path for PLMTUD if connection is established or, if
699273063Ssbruno	 * connection is FIN_WAIT_1 status, reason for the last is that if
700273063Ssbruno	 * amount of data we send is very small, we could send it in couple of
701273063Ssbruno	 * packets and process straight to FIN. In that case we won't catch
702273063Ssbruno	 * ESTABLISHED state.
703273063Ssbruno	 */
704273063Ssbruno	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
705273063Ssbruno	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
706272720Ssbruno		int optlen;
707272720Ssbruno#ifdef INET6
708272720Ssbruno		int isipv6;
709272720Ssbruno#endif
710272720Ssbruno
711272720Ssbruno		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
712272720Ssbruno		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
713272720Ssbruno		    (tp->t_rxtshift <= 2)) {
714272720Ssbruno			/*
715272720Ssbruno			 * Enter Path MTU Black-hole Detection mechanism:
716272720Ssbruno			 * - Disable Path MTU Discovery (IP "DF" bit).
717272720Ssbruno			 * - Reduce MTU to lower value than what we
718272720Ssbruno			 *   negotiated with peer.
719272720Ssbruno			 */
720272720Ssbruno			/* Record that we may have found a black hole. */
721272720Ssbruno			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
722272720Ssbruno
723272720Ssbruno			/* Keep track of previous MSS. */
724272720Ssbruno			optlen = tp->t_maxopd - tp->t_maxseg;
725272720Ssbruno			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
726272720Ssbruno
727272720Ssbruno			/*
728272720Ssbruno			 * Reduce the MSS to blackhole value or to the default
729272720Ssbruno			 * in an attempt to retransmit.
730272720Ssbruno			 */
731272720Ssbruno#ifdef INET6
732272720Ssbruno			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
733272720Ssbruno			if (isipv6 &&
734272720Ssbruno			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
735272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
736272720Ssbruno				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
737272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
738272720Ssbruno			} else if (isipv6) {
739272720Ssbruno				/* Use the default MSS. */
740272720Ssbruno				tp->t_maxopd = V_tcp_v6mssdflt;
741272720Ssbruno				/*
742272720Ssbruno				 * Disable Path MTU Discovery when we switch to
743272720Ssbruno				 * minmss.
744272720Ssbruno				 */
745272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
746272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
747272720Ssbruno			}
748272720Ssbruno#endif
749272720Ssbruno#if defined(INET6) && defined(INET)
750272720Ssbruno			else
751272720Ssbruno#endif
752272720Ssbruno#ifdef INET
753272720Ssbruno			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
754272720Ssbruno				/* Use the sysctl tuneable blackhole MSS. */
755272720Ssbruno				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
756272720Ssbruno				V_tcp_pmtud_blackhole_activated++;
757272720Ssbruno			} else {
758272720Ssbruno				/* Use the default MSS. */
759272720Ssbruno				tp->t_maxopd = V_tcp_mssdflt;
760272720Ssbruno				/*
761272720Ssbruno				 * Disable Path MTU Discovery when we switch to
762272720Ssbruno				 * minmss.
763272720Ssbruno				 */
764272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
765272720Ssbruno				V_tcp_pmtud_blackhole_activated_min_mss++;
766272720Ssbruno			}
767272720Ssbruno#endif
768272720Ssbruno			tp->t_maxseg = tp->t_maxopd - optlen;
769272720Ssbruno			/*
770272720Ssbruno			 * Reset the slow-start flight size
771272720Ssbruno			 * as it may depend on the new MSS.
772272720Ssbruno			 */
773272720Ssbruno			if (CC_ALGO(tp)->conn_init != NULL)
774272720Ssbruno				CC_ALGO(tp)->conn_init(tp->ccv);
775272720Ssbruno		} else {
776272720Ssbruno			/*
777272720Ssbruno			 * If further retransmissions are still unsuccessful
778272720Ssbruno			 * with a lowered MTU, maybe this isn't a blackhole and
779272720Ssbruno			 * we restore the previous MSS and blackhole detection
780272720Ssbruno			 * flags.
781272720Ssbruno			 */
782272720Ssbruno			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
783272720Ssbruno			    (tp->t_rxtshift > 4)) {
784272720Ssbruno				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
785272720Ssbruno				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
786272720Ssbruno				optlen = tp->t_maxopd - tp->t_maxseg;
787272720Ssbruno				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
788272720Ssbruno				tp->t_maxseg = tp->t_maxopd - optlen;
789272720Ssbruno				V_tcp_pmtud_blackhole_failed++;
790272720Ssbruno				/*
791272720Ssbruno				 * Reset the slow-start flight size as it
792272720Ssbruno				 * may depend on the new MSS.
793272720Ssbruno				 */
794272720Ssbruno				if (CC_ALGO(tp)->conn_init != NULL)
795272720Ssbruno					CC_ALGO(tp)->conn_init(tp->ccv);
796272720Ssbruno			}
797272720Ssbruno		}
798272720Ssbruno	}
799272720Ssbruno
80050673Sjlemon	/*
801242264Sandre	 * Disable RFC1323 and SACK if we haven't got any response to
802133874Srwatson	 * our third SYN to work-around some broken terminal servers
803133874Srwatson	 * (most of which have hopefully been retired) that have bad VJ
804133874Srwatson	 * header compression code which trashes TCP segments containing
80577539Sjesper	 * unknown-to-them TCP options.
80677539Sjesper	 */
807245238Sjhb	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
808245238Sjhb	    (tp->t_rxtshift == 3))
809242263Sandre		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
81077539Sjesper	/*
811122922Sandre	 * If we backed off this far, our srtt estimate is probably bogus.
812122922Sandre	 * Clobber it so we'll take the next rtt measurement as our srtt;
81350673Sjlemon	 * move the current srtt into rttvar to keep the current
81450673Sjlemon	 * retransmit times until then.
81550673Sjlemon	 */
81650673Sjlemon	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
81755679Sshin#ifdef INET6
81855679Sshin		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
81955679Sshin			in6_losing(tp->t_inpcb);
82055679Sshin#endif
82150673Sjlemon		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
82250673Sjlemon		tp->t_srtt = 0;
82350673Sjlemon	}
82450673Sjlemon	tp->snd_nxt = tp->snd_una;
825117650Shsu	tp->snd_recover = tp->snd_max;
82650673Sjlemon	/*
82750673Sjlemon	 * Force a segment to be sent.
82850673Sjlemon	 */
82950673Sjlemon	tp->t_flags |= TF_ACKNOW;
83050673Sjlemon	/*
83150673Sjlemon	 * If timing a segment in this window, stop the timer.
83250673Sjlemon	 */
83350673Sjlemon	tp->t_rtttime = 0;
834215166Slstewart
835216101Slstewart	cc_cong_signal(tp, NULL, CC_RTO);
836215166Slstewart
83750673Sjlemon	(void) tcp_output(tp);
8381541Srgrimes
839172074Srwatsonout:
84050673Sjlemon#ifdef TCPDEBUG
841157136Srwatson	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
84297658Stanimura		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
84397658Stanimura			  PRU_SLOWTIMO);
8441541Srgrimes#endif
845172074Srwatson	if (tp != NULL)
846178285Srwatson		INP_WUNLOCK(inp);
847172074Srwatson	if (headlocked)
848181803Sbz		INP_INFO_WUNLOCK(&V_tcbinfo);
849183550Szec	CURVNET_RESTORE();
8501541Srgrimes}
851172074Srwatson
852172074Srwatsonvoid
853172074Srwatsontcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
854172074Srwatson{
855172074Srwatson	struct callout *t_callout;
856280904Sjch	timeout_t *f_callout;
857205391Skmacy	struct inpcb *inp = tp->t_inpcb;
858266422Sadrian	int cpu = inp_to_cpuid(inp);
859172074Srwatson
860237263Snp#ifdef TCP_OFFLOAD
861237263Snp	if (tp->t_flags & TF_TOE)
862237263Snp		return;
863237263Snp#endif
864237263Snp
865172074Srwatson	switch (timer_type) {
866172074Srwatson		case TT_DELACK:
867172309Ssilby			t_callout = &tp->t_timers->tt_delack;
868172074Srwatson			f_callout = tcp_timer_delack;
869172074Srwatson			break;
870172074Srwatson		case TT_REXMT:
871172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
872172074Srwatson			f_callout = tcp_timer_rexmt;
873172074Srwatson			break;
874172074Srwatson		case TT_PERSIST:
875172309Ssilby			t_callout = &tp->t_timers->tt_persist;
876172074Srwatson			f_callout = tcp_timer_persist;
877172074Srwatson			break;
878172074Srwatson		case TT_KEEP:
879172309Ssilby			t_callout = &tp->t_timers->tt_keep;
880172074Srwatson			f_callout = tcp_timer_keep;
881172074Srwatson			break;
882172074Srwatson		case TT_2MSL:
883172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
884172074Srwatson			f_callout = tcp_timer_2msl;
885172074Srwatson			break;
886172074Srwatson		default:
887280990Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
888172074Srwatson		}
889172074Srwatson	if (delta == 0) {
890172074Srwatson		callout_stop(t_callout);
891172074Srwatson	} else {
892205391Skmacy		callout_reset_on(t_callout, delta, f_callout, tp, cpu);
893172074Srwatson	}
894172074Srwatson}
895172074Srwatson
896172074Srwatsonint
897172074Srwatsontcp_timer_active(struct tcpcb *tp, int timer_type)
898172074Srwatson{
899172074Srwatson	struct callout *t_callout;
900172074Srwatson
901172074Srwatson	switch (timer_type) {
902172074Srwatson		case TT_DELACK:
903172309Ssilby			t_callout = &tp->t_timers->tt_delack;
904172074Srwatson			break;
905172074Srwatson		case TT_REXMT:
906172309Ssilby			t_callout = &tp->t_timers->tt_rexmt;
907172074Srwatson			break;
908172074Srwatson		case TT_PERSIST:
909172309Ssilby			t_callout = &tp->t_timers->tt_persist;
910172074Srwatson			break;
911172074Srwatson		case TT_KEEP:
912172309Ssilby			t_callout = &tp->t_timers->tt_keep;
913172074Srwatson			break;
914172074Srwatson		case TT_2MSL:
915172309Ssilby			t_callout = &tp->t_timers->tt_2msl;
916172074Srwatson			break;
917172074Srwatson		default:
918280990Sjch			panic("tp %p bad timer_type %#x", tp, timer_type);
919172074Srwatson		}
920172074Srwatson	return callout_active(t_callout);
921172074Srwatson}
922197244Ssilby
923197244Ssilby#define	ticks_to_msecs(t)	(1000*(t) / hz)
924197244Ssilby
925197244Ssilbyvoid
926247777Sdavidetcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
927247777Sdavide    struct xtcp_timer *xtimer)
928197244Ssilby{
929247777Sdavide	sbintime_t now;
930247777Sdavide
931247777Sdavide	bzero(xtimer, sizeof(*xtimer));
932197244Ssilby	if (timer == NULL)
933197244Ssilby		return;
934247777Sdavide	now = getsbinuptime();
935197244Ssilby	if (callout_active(&timer->tt_delack))
936247777Sdavide		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
937197244Ssilby	if (callout_active(&timer->tt_rexmt))
938247777Sdavide		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
939197244Ssilby	if (callout_active(&timer->tt_persist))
940247777Sdavide		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
941197244Ssilby	if (callout_active(&timer->tt_keep))
942247777Sdavide		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
943197244Ssilby	if (callout_active(&timer->tt_2msl))
944247777Sdavide		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
945197244Ssilby	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
946197244Ssilby}
947