tcp_timewait.c revision 221250
118334Speter/*-
290075Sobrien * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3169689Skan *	The Regents of the University of California.  All rights reserved.
418334Speter *
518334Speter * Redistribution and use in source and binary forms, with or without
6132718Skan * modification, are permitted provided that the following conditions
718334Speter * are met:
8132718Skan * 1. Redistributions of source code must retain the above copyright
918334Speter *    notice, this list of conditions and the following disclaimer.
1018334Speter * 2. Redistributions in binary form must reproduce the above copyright
1118334Speter *    notice, this list of conditions and the following disclaimer in the
1218334Speter *    documentation and/or other materials provided with the distribution.
13132718Skan * 4. Neither the name of the University nor the names of its contributors
1418334Speter *    may be used to endorse or promote products derived from this software
1518334Speter *    without specific prior written permission.
1618334Speter *
1718334Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1818334Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19132718Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20169689Skan * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2218334Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2318334Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2450397Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2518334Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2618334Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2750397Sobrien * SUCH DAMAGE.
28132718Skan *
29132718Skan *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
3018334Speter */
3118334Speter
3218334Speter#include <sys/cdefs.h>
3318334Speter__FBSDID("$FreeBSD: head/sys/netinet/tcp_timewait.c 221250 2011-04-30 11:21:29Z bz $");
3418334Speter
3550397Sobrien#include "opt_inet.h"
3696263Sobrien#include "opt_inet6.h"
37132718Skan#include "opt_tcpdebug.h"
38169689Skan
39169689Skan#include <sys/param.h>
4018334Speter#include <sys/systm.h>
4152284Sobrien#include <sys/callout.h>
4252284Sobrien#include <sys/kernel.h>
4352284Sobrien#include <sys/sysctl.h>
4418334Speter#include <sys/malloc.h>
4518334Speter#include <sys/mbuf.h>
4652284Sobrien#include <sys/priv.h>
4752284Sobrien#include <sys/proc.h>
4852284Sobrien#include <sys/socket.h>
4918334Speter#include <sys/socketvar.h>
5052284Sobrien#include <sys/protosw.h>
5152284Sobrien#include <sys/random.h>
5252284Sobrien
5318334Speter#include <vm/uma.h>
5452284Sobrien
5518334Speter#include <net/route.h>
5618334Speter#include <net/if.h>
5752284Sobrien#include <net/vnet.h>
5852284Sobrien
5952284Sobrien#include <netinet/in.h>
6018334Speter#include <netinet/in_pcb.h>
6152284Sobrien#include <netinet/in_systm.h>
6252284Sobrien#include <netinet/in_var.h>
63169689Skan#include <netinet/ip.h>
64169689Skan#include <netinet/ip_icmp.h>
65169689Skan#include <netinet/ip_var.h>
66169689Skan#ifdef INET6
6752284Sobrien#include <netinet/ip6.h>
6852284Sobrien#include <netinet6/in6_pcb.h>
6990075Sobrien#include <netinet6/ip6_var.h>
7090075Sobrien#include <netinet6/scope6_var.h>
7190075Sobrien#include <netinet6/nd6.h>
7290075Sobrien#endif
7390075Sobrien#include <netinet/tcp.h>
7490075Sobrien#include <netinet/tcp_fsm.h>
7590075Sobrien#include <netinet/tcp_seq.h>
7690075Sobrien#include <netinet/tcp_timer.h>
7790075Sobrien#include <netinet/tcp_var.h>
7890075Sobrien#ifdef INET6
7990075Sobrien#include <netinet6/tcp6_var.h>
8090075Sobrien#endif
8190075Sobrien#include <netinet/tcpip.h>
8290075Sobrien#ifdef TCPDEBUG
8390075Sobrien#include <netinet/tcp_debug.h>
8490075Sobrien#endif
8590075Sobrien#ifdef INET6
8690075Sobrien#include <netinet6/ip6protosw.h>
8790075Sobrien#endif
88169689Skan
8990075Sobrien#include <machine/in_cksum.h>
9090075Sobrien
9190075Sobrien#include <security/mac/mac_framework.h>
9290075Sobrien
9390075Sobrienstatic VNET_DEFINE(uma_zone_t, tcptw_zone);
9490075Sobrien#define	V_tcptw_zone			VNET(tcptw_zone)
9590075Sobrienstatic int	maxtcptw;
9690075Sobrien
97117395Skan/*
98117395Skan * The timed wait queue contains references to each of the TCP sessions
99117395Skan * currently in the TIME_WAIT state.  The queue pointers, including the
10090075Sobrien * queue pointers in each tcptw structure, are protected using the global
10190075Sobrien * tcbinfo lock, which must be held over queue iteration and modification.
10290075Sobrien */
103132718Skanstatic VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
10490075Sobrien#define	V_twq_2msl			VNET(twq_2msl)
105132718Skan
10652284Sobrienstatic void	tcp_tw_2msl_reset(struct tcptw *, int);
10752284Sobrienstatic void	tcp_tw_2msl_stop(struct tcptw *);
10852284Sobrien
10952284Sobrienstatic int
110169689Skantcptw_auto_size(void)
111169689Skan{
112169689Skan	int halfrange;
11390075Sobrien
11490075Sobrien	/*
115169689Skan	 * Max out at half the ephemeral port range so that TIME_WAIT
11618334Speter	 * sockets don't tie up too many ephemeral ports.
117132718Skan	 */
118132718Skan	if (V_ipport_lastauto > V_ipport_firstauto)
119132718Skan		halfrange = (V_ipport_lastauto - V_ipport_firstauto) / 2;
120132718Skan	else
121132718Skan		halfrange = (V_ipport_firstauto - V_ipport_lastauto) / 2;
122132718Skan	/* Protect against goofy port ranges smaller than 32. */
123132718Skan	return (imin(imax(halfrange, 32), maxsockets / 5));
124132718Skan}
125132718Skan
126132718Skanstatic int
127132718Skansysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
128132718Skan{
129132718Skan	int error, new;
130169689Skan
131132718Skan	if (maxtcptw == 0)
132132718Skan		new = tcptw_auto_size();
133132718Skan	else
134132718Skan		new = maxtcptw;
135169689Skan	error = sysctl_handle_int(oidp, &new, 0, req);
136132718Skan	if (error == 0 && req->newptr)
137169689Skan		if (new >= 32) {
138132718Skan			maxtcptw = new;
139132718Skan			uma_zone_set_max(V_tcptw_zone, maxtcptw);
140132718Skan		}
141132718Skan	return (error);
142132718Skan}
143169689Skan
144169689SkanSYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
145117395Skan    &maxtcptw, 0, sysctl_maxtcptw, "IU",
146117395Skan    "Maximum number of compressed TCP TIME_WAIT entries");
147132718Skan
148132718SkanVNET_DEFINE(int, nolocaltimewait) = 0;
149169689Skan#define	V_nolocaltimewait	VNET(nolocaltimewait)
150117395SkanSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_RW,
151117395Skan    &VNET_NAME(nolocaltimewait), 0,
152117395Skan    "Do not create compressed TCP TIME_WAIT entries for local connections");
153117395Skan
154132718Skanvoid
155132718Skantcp_tw_zone_change(void)
156132718Skan{
157117395Skan
158132718Skan	if (maxtcptw == 0)
159132718Skan		uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
160132718Skan}
161132718Skan
162117395Skanvoid
163132718Skantcp_tw_init(void)
164169689Skan{
165132718Skan
166132718Skan	V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
167132718Skan	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
168169689Skan	TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
169132718Skan	if (maxtcptw == 0)
170132718Skan		uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
171132718Skan	else
172132718Skan		uma_zone_set_max(V_tcptw_zone, maxtcptw);
173132718Skan	TAILQ_INIT(&V_twq_2msl);
174132718Skan}
175132718Skan
176132718Skan#ifdef VIMAGE
177132718Skanvoid
178132718Skantcp_tw_destroy(void)
179132718Skan{
180169689Skan	struct tcptw *tw;
181132718Skan
182132718Skan	INP_INFO_WLOCK(&V_tcbinfo);
183132718Skan	while((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
184132718Skan		tcp_twclose(tw, 0);
185132718Skan	INP_INFO_WUNLOCK(&V_tcbinfo);
186132718Skan
187169689Skan	uma_zdestroy(V_tcptw_zone);
188132718Skan}
189132718Skan#endif
190132718Skan
191132718Skan/*
192132718Skan * Move a TCP connection into TIME_WAIT state.
193132718Skan *    tcbinfo is locked.
194132718Skan *    inp is locked, and is unlocked before returning.
195132718Skan */
196132718Skanvoid
197132718Skantcp_twstart(struct tcpcb *tp)
198132718Skan{
199132718Skan	struct tcptw *tw;
200169689Skan	struct inpcb *inp = tp->t_inpcb;
201169689Skan	int acknow;
202132718Skan	struct socket *so;
203132718Skan#ifdef INET6
204169689Skan	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
205132718Skan#endif
206132718Skan
207117395Skan	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);	/* tcp_tw_2msl_reset(). */
208117395Skan	INP_WLOCK_ASSERT(inp);
209117395Skan
210117395Skan	if (V_nolocaltimewait) {
211117395Skan		int error = 0;
21250397Sobrien#ifdef INET6
21318334Speter		if (isipv6)
21418334Speter			error = in6_localaddr(&inp->in6p_faddr);
21550397Sobrien#endif
21618334Speter#if defined(INET6) && defined(INET)
21718334Speter		else
21818334Speter#endif
21918334Speter#ifdef INET
22018334Speter			error = in_localip(inp->inp_faddr);
22118334Speter#endif
22218334Speter		if (error) {
22350397Sobrien			tp = tcp_close(tp);
22418334Speter			if (tp != NULL)
22590075Sobrien				INP_WUNLOCK(inp);
22690075Sobrien			return;
22790075Sobrien		}
22890075Sobrien	}
22990075Sobrien
23090075Sobrien	tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
23190075Sobrien	if (tw == NULL) {
23290075Sobrien		tw = tcp_tw_2msl_scan(1);
23390075Sobrien		if (tw == NULL) {
23450397Sobrien			tp = tcp_close(tp);
23518334Speter			if (tp != NULL)
236132718Skan				INP_WUNLOCK(inp);
237169689Skan			return;
238169689Skan		}
239169689Skan	}
24018334Speter	tw->tw_inpcb = inp;
24190075Sobrien
24290075Sobrien	/*
24390075Sobrien	 * Recover last window size sent.
24490075Sobrien	 */
24590075Sobrien	tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
24690075Sobrien
24790075Sobrien	/*
24850397Sobrien	 * Set t_recent if timestamps are used on the connection.
24990075Sobrien	 */
250169689Skan	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
251169689Skan	    (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
25218334Speter		tw->t_recent = tp->ts_recent;
25390075Sobrien		tw->ts_offset = tp->ts_offset;
25490075Sobrien	} else {
25550397Sobrien		tw->t_recent = 0;
25690075Sobrien		tw->ts_offset = 0;
25718334Speter	}
25890075Sobrien
259169689Skan	tw->snd_nxt = tp->snd_nxt;
260169689Skan	tw->rcv_nxt = tp->rcv_nxt;
261169689Skan	tw->iss     = tp->iss;
26290075Sobrien	tw->irs     = tp->irs;
26318334Speter	tw->t_starttime = tp->t_starttime;
26418334Speter	tw->tw_time = 0;
26590075Sobrien
26690075Sobrien/* XXX
26790075Sobrien * If this code will
268169689Skan * be used for fin-wait-2 state also, then we may need
269169689Skan * a ts_recent from the last segment.
270169689Skan */
271169689Skan	acknow = tp->t_flags & TF_ACKNOW;
272169689Skan
273169689Skan	/*
274169689Skan	 * First, discard tcpcb state, which includes stopping its timers and
275169689Skan	 * freeing it.  tcp_discardcb() used to also release the inpcb, but
276169689Skan	 * that work is now done in the caller.
277169689Skan	 *
27890075Sobrien	 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
27918334Speter	 * and might not be needed here any longer.
280169689Skan	 */
28190075Sobrien	tcp_discardcb(tp);
28290075Sobrien	so = inp->inp_socket;
28318334Speter	soisdisconnected(so);
28418334Speter	tw->tw_cred = crhold(so->so_cred);
285117395Skan	SOCK_LOCK(so);
286117395Skan	tw->tw_so_options = so->so_options;
287117395Skan	SOCK_UNLOCK(so);
288117395Skan	if (acknow)
289169689Skan		tcp_twrespond(tw, TH_ACK);
29090075Sobrien	inp->inp_ppcb = tw;
291169689Skan	inp->inp_flags |= INP_TIMEWAIT;
29296263Sobrien	tcp_tw_2msl_reset(tw, 0);
293169689Skan
294169689Skan	/*
295169689Skan	 * If the inpcb owns the sole reference to the socket, then we can
296169689Skan	 * detach and free the socket as it is not needed in time wait.
297117395Skan	 */
29818334Speter	if (inp->inp_flags & INP_SOCKREF) {
299169689Skan		KASSERT(so->so_state & SS_PROTOREF,
300169689Skan		    ("tcp_twstart: !SS_PROTOREF"));
301169689Skan		inp->inp_flags &= ~INP_SOCKREF;
302169689Skan		INP_WUNLOCK(inp);
303169689Skan		ACCEPT_LOCK();
304169689Skan		SOCK_LOCK(so);
305169689Skan		so->so_state &= ~SS_PROTOREF;
306169689Skan		sofree(so);
307169689Skan	} else
308169689Skan		INP_WUNLOCK(inp);
309169689Skan}
310169689Skan
311169689Skan#if 0
312169689Skan/*
313169689Skan * The appromixate rate of ISN increase of Microsoft TCP stacks;
314169689Skan * the actual rate is slightly higher due to the addition of
315169689Skan * random positive increments.
316169689Skan *
317169689Skan * Most other new OSes use semi-randomized ISN values, so we
318169689Skan * do not need to worry about them.
319169689Skan */
320169689Skan#define MS_ISN_BYTES_PER_SECOND		250000
321169689Skan
322169689Skan/*
323169689Skan * Determine if the ISN we will generate has advanced beyond the last
324169689Skan * sequence number used by the previous connection.  If so, indicate
325169689Skan * that it is safe to recycle this tw socket by returning 1.
326169689Skan */
327169689Skanint
328169689Skantcp_twrecycleable(struct tcptw *tw)
329169689Skan{
330169689Skan	tcp_seq new_iss = tw->iss;
331169689Skan	tcp_seq new_irs = tw->irs;
332169689Skan
333169689Skan	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
334169689Skan	new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
335169689Skan	new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
336169689Skan
337169689Skan	if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
338169689Skan		return (1);
339169689Skan	else
340169689Skan		return (0);
341169689Skan}
342169689Skan#endif
343169689Skan
344169689Skan/*
345169689Skan * Returns 1 if the TIME_WAIT state was killed and we should start over,
346169689Skan * looking for a pcb in the listen state.  Returns 0 otherwise.
347169689Skan */
34890075Sobrienint
349169689Skantcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
350169689Skan    struct mbuf *m, int tlen)
351117395Skan{
352117395Skan	struct tcptw *tw;
353117395Skan	int thflags;
354117395Skan	tcp_seq seq;
355169689Skan
356169689Skan	/* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
357169689Skan	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
358169689Skan	INP_WLOCK_ASSERT(inp);
359169689Skan
360169689Skan	/*
361169689Skan	 * XXXRW: Time wait state for inpcb has been recycled, but inpcb is
362169689Skan	 * still present.  This is undesirable, but temporarily necessary
363169689Skan	 * until we work out how to handle inpcb's who's timewait state has
364117395Skan	 * been removed.
365117395Skan	 */
366117395Skan	tw = intotw(inp);
367117395Skan	if (tw == NULL)
368169689Skan		goto drop;
369169689Skan
370169689Skan	thflags = th->th_flags;
371169689Skan
37290075Sobrien	/*
37390075Sobrien	 * NOTE: for FIN_WAIT_2 (to be added later),
37490075Sobrien	 * must validate sequence number before accepting RST
375169689Skan	 */
376169689Skan
377117395Skan	/*
378132718Skan	 * If the segment contains RST:
379169689Skan	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
380132718Skan	 *      RFC 1337.
38190075Sobrien	 */
38290075Sobrien	if (thflags & TH_RST)
383169689Skan		goto drop;
38496263Sobrien
38596263Sobrien#if 0
38696263Sobrien/* PAWS not needed at the moment */
38796263Sobrien	/*
38896263Sobrien	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
389169689Skan	 * and it's less than ts_recent, drop it.
390169689Skan	 */
391169689Skan	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
392169689Skan	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
393169689Skan		if ((thflags & TH_ACK) == 0)
394169689Skan			goto drop;
39518334Speter		goto ack;
39690075Sobrien	}
39718334Speter	/*
39818334Speter	 * ts_recent is never updated because we never accept new segments.
39990075Sobrien	 */
40090075Sobrien#endif
40190075Sobrien
40290075Sobrien	/*
40390075Sobrien	 * If a new connection request is received
404169689Skan	 * while in TIME_WAIT, drop the old connection
40590075Sobrien	 * and start over if the sequence numbers
40618334Speter	 * are above the previous ones.
40790075Sobrien	 */
408169689Skan	if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
40990075Sobrien		tcp_twclose(tw, 0);
41090075Sobrien		return (1);
411169689Skan	}
41290075Sobrien
41390075Sobrien	/*
41418334Speter	 * Drop the segment if it does not contain an ACK.
415169689Skan	 */
41690075Sobrien	if ((thflags & TH_ACK) == 0)
417169689Skan		goto drop;
418169689Skan
419169689Skan	/*
42096263Sobrien	 * Reset the 2MSL timer if this is a duplicate FIN.
42190075Sobrien	 */
42218334Speter	if (thflags & TH_FIN) {
42318334Speter		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
424169689Skan		if (seq + 1 == tw->rcv_nxt)
425169689Skan			tcp_tw_2msl_reset(tw, 1);
426169689Skan	}
427117395Skan
428169689Skan	/*
429169689Skan	 * Acknowledge the segment if it has data or is not a duplicate ACK.
430169689Skan	 */
431169689Skan	if (thflags != TH_ACK || tlen != 0 ||
432169689Skan	    th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
433169689Skan		tcp_twrespond(tw, TH_ACK);
434169689Skandrop:
435169689Skan	INP_WUNLOCK(inp);
436169689Skan	m_freem(m);
437169689Skan	return (0);
438169689Skan}
439169689Skan
440169689Skanvoid
441169689Skantcp_twclose(struct tcptw *tw, int reuse)
442169689Skan{
443169689Skan	struct socket *so;
444169689Skan	struct inpcb *inp;
445169689Skan
446169689Skan	/*
447169689Skan	 * At this point, we are in one of two situations:
448169689Skan	 *
449169689Skan	 * (1) We have no socket, just an inpcb<->twtcp pair.  We can free
450169689Skan	 *     all state.
451169689Skan	 *
452169689Skan	 * (2) We have a socket -- if we own a reference, release it and
453169689Skan	 *     notify the socket layer.
454169689Skan	 */
455169689Skan	inp = tw->tw_inpcb;
456169689Skan	KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
457169689Skan	KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
458169689Skan	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);	/* tcp_tw_2msl_stop(). */
459169689Skan	INP_WLOCK_ASSERT(inp);
460169689Skan
461169689Skan	tw->tw_inpcb = NULL;
462169689Skan	tcp_tw_2msl_stop(tw);
463169689Skan	inp->inp_ppcb = NULL;
464169689Skan	in_pcbdrop(inp);
465169689Skan
466169689Skan	so = inp->inp_socket;
467169689Skan	if (so != NULL) {
468169689Skan		/*
469169689Skan		 * If there's a socket, handle two cases: first, we own a
470169689Skan		 * strong reference, which we will now release, or we don't
471169689Skan		 * in which case another reference exists (XXXRW: think
472169689Skan		 * about this more), and we don't need to take action.
473169689Skan		 */
474169689Skan		if (inp->inp_flags & INP_SOCKREF) {
475169689Skan			inp->inp_flags &= ~INP_SOCKREF;
476169689Skan			INP_WUNLOCK(inp);
477169689Skan			ACCEPT_LOCK();
478169689Skan			SOCK_LOCK(so);
479169689Skan			KASSERT(so->so_state & SS_PROTOREF,
480169689Skan			    ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
481169689Skan			so->so_state &= ~SS_PROTOREF;
482169689Skan			sofree(so);
483169689Skan		} else {
484169689Skan			/*
485169689Skan			 * If we don't own the only reference, the socket and
486169689Skan			 * inpcb need to be left around to be handled by
487169689Skan			 * tcp_usr_detach() later.
488169689Skan			 */
489169689Skan			INP_WUNLOCK(inp);
490169689Skan		}
491169689Skan	} else
492169689Skan		in_pcbfree(inp);
493169689Skan	TCPSTAT_INC(tcps_closed);
494117395Skan	crfree(tw->tw_cred);
495169689Skan	tw->tw_cred = NULL;
496117395Skan	if (reuse)
497117395Skan		return;
498169689Skan	uma_zfree(V_tcptw_zone, tw);
499117395Skan}
500169689Skan
501169689Skanint
502169689Skantcp_twrespond(struct tcptw *tw, int flags)
503169689Skan{
504169689Skan	struct inpcb *inp = tw->tw_inpcb;
505169689Skan#if defined(INET6) || defined(INET)
506169689Skan	struct tcphdr *th = NULL;
507169689Skan#endif
508169689Skan	struct mbuf *m;
509169689Skan#ifdef INET
510117395Skan	struct ip *ip = NULL;
511117395Skan#endif
512117395Skan	u_int hdrlen, optlen;
513117395Skan	int error = 0;			/* Keep compiler happy */
514169689Skan	struct tcpopt to;
515117395Skan#ifdef INET6
516117395Skan	struct ip6_hdr *ip6 = NULL;
517169689Skan	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
518169689Skan#endif
519119256Skan
520119256Skan	INP_WLOCK_ASSERT(inp);
521119256Skan
522119256Skan	m = m_gethdr(M_DONTWAIT, MT_DATA);
523119256Skan	if (m == NULL)
524119256Skan		return (ENOBUFS);
525119256Skan	m->m_data += max_linkhdr;
526119256Skan
527119256Skan#ifdef MAC
528169689Skan	mac_inpcb_create_mbuf(inp, m);
529119256Skan#endif
530119256Skan
531119256Skan#ifdef INET6
532119256Skan	if (isipv6) {
533119256Skan		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
534119256Skan		ip6 = mtod(m, struct ip6_hdr *);
535169689Skan		th = (struct tcphdr *)(ip6 + 1);
536169689Skan		tcpip_fillheaders(inp, ip6, th);
537119256Skan	}
538119256Skan#endif
539119256Skan#if defined(INET6) && defined(INET)
540119256Skan	else
541119256Skan#endif
542119256Skan#ifdef INET
543119256Skan	{
54490075Sobrien		hdrlen = sizeof(struct tcpiphdr);
545169689Skan		ip = mtod(m, struct ip *);
546169689Skan		th = (struct tcphdr *)(ip + 1);
547169689Skan		tcpip_fillheaders(inp, ip, th);
548169689Skan	}
549169689Skan#endif
550169689Skan	to.to_flags = 0;
551169689Skan
552169689Skan	/*
553169689Skan	 * Send a timestamp and echo-reply if both our side and our peer
554169689Skan	 * have sent timestamps in our SYN's and this is not a RST.
555169689Skan	 */
556169689Skan	if (tw->t_recent && flags == TH_ACK) {
557169689Skan		to.to_flags |= TOF_TS;
558169689Skan		to.to_tsval = ticks + tw->ts_offset;
559169689Skan		to.to_tsecr = tw->t_recent;
560169689Skan	}
561169689Skan	optlen = tcp_addoptions(&to, (u_char *)(th + 1));
562169689Skan
563169689Skan	m->m_len = hdrlen + optlen;
564169689Skan	m->m_pkthdr.len = m->m_len;
565169689Skan
566169689Skan	KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
567169689Skan
568169689Skan	th->th_seq = htonl(tw->snd_nxt);
569169689Skan	th->th_ack = htonl(tw->rcv_nxt);
570169689Skan	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
571169689Skan	th->th_flags = flags;
572169689Skan	th->th_win = htons(tw->last_win);
573169689Skan
574169689Skan#ifdef INET6
57518334Speter	if (isipv6) {
57690075Sobrien		th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
57790075Sobrien		    sizeof(struct tcphdr) + optlen);
57890075Sobrien		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
57950397Sobrien		error = ip6_output(m, inp->in6p_outputopts, NULL,
58090075Sobrien		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
581132718Skan	}
58218334Speter#endif
583117395Skan#if defined(INET6) && defined(INET)
584117395Skan	else
585117395Skan#endif
586117395Skan#ifdef INET
587117395Skan	{
588117395Skan		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
589117395Skan		    htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
590117395Skan		m->m_pkthdr.csum_flags = CSUM_TCP;
591117395Skan		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
592132718Skan		ip->ip_len = m->m_pkthdr.len;
59318334Speter		if (V_path_mtu_discovery)
594117395Skan			ip->ip_off |= IP_DF;
59518334Speter		error = ip_output(m, inp->inp_options, NULL,
596117395Skan		    ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
597169689Skan		    NULL, inp);
598117395Skan	}
599169689Skan#endif
600117395Skan	if (flags & TH_ACK)
60118334Speter		TCPSTAT_INC(tcps_sndacks);
602117395Skan	else
603132718Skan		TCPSTAT_INC(tcps_sndctrl);
604169689Skan	TCPSTAT_INC(tcps_sndtotal);
60518334Speter	return (error);
60652284Sobrien}
60718334Speter
608169689Skanstatic void
609169689Skantcp_tw_2msl_reset(struct tcptw *tw, int rearm)
61018334Speter{
61150397Sobrien
61250397Sobrien	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
61318334Speter	INP_WLOCK_ASSERT(tw->tw_inpcb);
61490075Sobrien	if (rearm)
615132718Skan		TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
61690075Sobrien	tw->tw_time = ticks + 2 * tcp_msl;
61790075Sobrien	TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
61850397Sobrien}
61990075Sobrien
62090075Sobrienstatic void
62190075Sobrientcp_tw_2msl_stop(struct tcptw *tw)
622169689Skan{
623169689Skan
62490075Sobrien	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
62550397Sobrien	TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
626169689Skan}
62750397Sobrien
628169689Skanstruct tcptw *
62950397Sobrientcp_tw_2msl_scan(int reuse)
630169689Skan{
631169689Skan	struct tcptw *tw;
63290075Sobrien
63390075Sobrien	INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
63490075Sobrien	for (;;) {
63590075Sobrien		tw = TAILQ_FIRST(&V_twq_2msl);
63690075Sobrien		if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0))
63750397Sobrien			break;
638169689Skan		INP_WLOCK(tw->tw_inpcb);
639169689Skan		tcp_twclose(tw, reuse);
640169689Skan		if (reuse)
64190075Sobrien			return (tw);
64218334Speter	}
64318334Speter	return (NULL);
64418334Speter}
64518334Speter