tcp_syncache.c revision 106696
186764Sjlemon/*-
292275Srwatson * Copyright (c) 2001 Networks Associates Technology, Inc.
386764Sjlemon * All rights reserved.
486764Sjlemon *
586764Sjlemon * This software was developed for the FreeBSD Project by Jonathan Lemon
686764Sjlemon * and NAI Labs, the Security Research Division of Network Associates, Inc.
786764Sjlemon * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
886764Sjlemon * DARPA CHATS research program.
986764Sjlemon *
1086764Sjlemon * Redistribution and use in source and binary forms, with or without
1186764Sjlemon * modification, are permitted provided that the following conditions
1286764Sjlemon * are met:
1386764Sjlemon * 1. Redistributions of source code must retain the above copyright
1486764Sjlemon *    notice, this list of conditions and the following disclaimer.
1586764Sjlemon * 2. Redistributions in binary form must reproduce the above copyright
1686764Sjlemon *    notice, this list of conditions and the following disclaimer in the
1786764Sjlemon *    documentation and/or other materials provided with the distribution.
1886764Sjlemon * 3. The name of the author may not be used to endorse or promote
1986764Sjlemon *    products derived from this software without specific prior written
2086764Sjlemon *    permission.
2186764Sjlemon *
2286764Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2386764Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2486764Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2586764Sjlemon * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2686764Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2786764Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2886764Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2986764Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3086764Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3186764Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3286764Sjlemon * SUCH DAMAGE.
3386764Sjlemon *
3486764Sjlemon * $FreeBSD: head/sys/netinet/tcp_syncache.c 106696 2002-11-09 12:55:07Z alfred $
3586764Sjlemon */
3686764Sjlemon
3786764Sjlemon#include "opt_inet6.h"
3886764Sjlemon#include "opt_ipsec.h"
39101106Srwatson#include "opt_mac.h"
4086764Sjlemon
4186764Sjlemon#include <sys/param.h>
4286764Sjlemon#include <sys/systm.h>
4386764Sjlemon#include <sys/kernel.h>
4486764Sjlemon#include <sys/sysctl.h>
4586764Sjlemon#include <sys/malloc.h>
46101106Srwatson#include <sys/mac.h>
4786764Sjlemon#include <sys/mbuf.h>
4886764Sjlemon#include <sys/md5.h>
4986764Sjlemon#include <sys/proc.h>		/* for proc0 declaration */
5086764Sjlemon#include <sys/random.h>
5186764Sjlemon#include <sys/socket.h>
5286764Sjlemon#include <sys/socketvar.h>
5386764Sjlemon
5486764Sjlemon#include <net/if.h>
5586764Sjlemon#include <net/route.h>
5686764Sjlemon
5786764Sjlemon#include <netinet/in.h>
5886764Sjlemon#include <netinet/in_systm.h>
5986764Sjlemon#include <netinet/ip.h>
6086764Sjlemon#include <netinet/in_var.h>
6186764Sjlemon#include <netinet/in_pcb.h>
6286764Sjlemon#include <netinet/ip_var.h>
6386764Sjlemon#ifdef INET6
6486764Sjlemon#include <netinet/ip6.h>
6586764Sjlemon#include <netinet/icmp6.h>
6686764Sjlemon#include <netinet6/nd6.h>
6786764Sjlemon#include <netinet6/ip6_var.h>
6886764Sjlemon#include <netinet6/in6_pcb.h>
6986764Sjlemon#endif
7086764Sjlemon#include <netinet/tcp.h>
7186764Sjlemon#include <netinet/tcp_fsm.h>
7286764Sjlemon#include <netinet/tcp_seq.h>
7386764Sjlemon#include <netinet/tcp_timer.h>
7486764Sjlemon#include <netinet/tcp_var.h>
7586764Sjlemon#ifdef INET6
7686764Sjlemon#include <netinet6/tcp6_var.h>
7786764Sjlemon#endif
7886764Sjlemon
7986764Sjlemon#ifdef IPSEC
8086764Sjlemon#include <netinet6/ipsec.h>
8186764Sjlemon#ifdef INET6
8286764Sjlemon#include <netinet6/ipsec6.h>
8386764Sjlemon#endif
8486764Sjlemon#endif /*IPSEC*/
8586764Sjlemon
86105199Ssam#ifdef FAST_IPSEC
87105199Ssam#include <netipsec/ipsec.h>
88105199Ssam#ifdef INET6
89105199Ssam#include <netipsec/ipsec6.h>
90105199Ssam#endif
91105199Ssam#include <netipsec/key.h>
92105199Ssam#define	IPSEC
93105199Ssam#endif /*FAST_IPSEC*/
94105199Ssam
9586764Sjlemon#include <machine/in_cksum.h>
9692760Sjeff#include <vm/uma.h>
9786764Sjlemon
9888180Sjlemonstatic int tcp_syncookies = 1;
9988180SjlemonSYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
10088180Sjlemon    &tcp_syncookies, 0,
10188180Sjlemon    "Use TCP SYN cookies if the syncache overflows");
10288180Sjlemon
10386764Sjlemonstatic void	 syncache_drop(struct syncache *, struct syncache_head *);
10486764Sjlemonstatic void	 syncache_free(struct syncache *);
10588180Sjlemonstatic void	 syncache_insert(struct syncache *, struct syncache_head *);
10686764Sjlemonstruct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
10786764Sjlemonstatic int	 syncache_respond(struct syncache *, struct mbuf *);
10896602Srwatsonstatic struct 	 socket *syncache_socket(struct syncache *, struct socket *,
10996602Srwatson		    struct mbuf *m);
11086764Sjlemonstatic void	 syncache_timer(void *);
11188180Sjlemonstatic u_int32_t syncookie_generate(struct syncache *);
11288180Sjlemonstatic struct syncache *syncookie_lookup(struct in_conninfo *,
11388180Sjlemon		    struct tcphdr *, struct socket *);
11486764Sjlemon
11586764Sjlemon/*
11686764Sjlemon * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
11786764Sjlemon * 3 retransmits corresponds to a timeout of (1 + 2 + 4 + 8 == 15) seconds,
11886764Sjlemon * the odds are that the user has given up attempting to connect by then.
11986764Sjlemon */
12086764Sjlemon#define SYNCACHE_MAXREXMTS		3
12186764Sjlemon
12286764Sjlemon/* Arbitrary values */
12386764Sjlemon#define TCP_SYNCACHE_HASHSIZE		512
12486764Sjlemon#define TCP_SYNCACHE_BUCKETLIMIT	30
12586764Sjlemon
12686764Sjlemonstruct tcp_syncache {
12786764Sjlemon	struct	syncache_head *hashbase;
12892760Sjeff	uma_zone_t zone;
12986764Sjlemon	u_int	hashsize;
13086764Sjlemon	u_int	hashmask;
13186764Sjlemon	u_int	bucket_limit;
13286764Sjlemon	u_int	cache_count;
13386764Sjlemon	u_int	cache_limit;
13486764Sjlemon	u_int	rexmt_limit;
13586764Sjlemon	u_int	hash_secret;
13686764Sjlemon	u_int	next_reseed;
13786764Sjlemon	TAILQ_HEAD(, syncache) timerq[SYNCACHE_MAXREXMTS + 1];
13886764Sjlemon	struct	callout tt_timerq[SYNCACHE_MAXREXMTS + 1];
13986764Sjlemon};
14086764Sjlemonstatic struct tcp_syncache tcp_syncache;
14186764Sjlemon
14286764SjlemonSYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache");
14386764Sjlemon
14486764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RD,
14586764Sjlemon     &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache");
14686764Sjlemon
14786764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RD,
14886764Sjlemon     &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache");
14986764Sjlemon
15086764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD,
15186764Sjlemon     &tcp_syncache.cache_count, 0, "Current number of entries in syncache");
15286764Sjlemon
15386764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RD,
15486764Sjlemon     &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable");
15586764Sjlemon
15686764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
15786764Sjlemon     &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
15886764Sjlemon
15986764Sjlemonstatic MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
16086764Sjlemon
16186764Sjlemon#define SYNCACHE_HASH(inc, mask) 					\
16286764Sjlemon	((tcp_syncache.hash_secret ^					\
16386764Sjlemon	  (inc)->inc_faddr.s_addr ^					\
16486764Sjlemon	  ((inc)->inc_faddr.s_addr >> 16) ^ 				\
16586764Sjlemon	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
16686764Sjlemon
16786764Sjlemon#define SYNCACHE_HASH6(inc, mask) 					\
16886764Sjlemon	((tcp_syncache.hash_secret ^					\
16986764Sjlemon	  (inc)->inc6_faddr.s6_addr32[0] ^ 				\
17086764Sjlemon	  (inc)->inc6_faddr.s6_addr32[3] ^ 				\
17186764Sjlemon	  (inc)->inc_fport ^ (inc)->inc_lport) & mask)
17286764Sjlemon
17386764Sjlemon#define ENDPTS_EQ(a, b) (						\
17489667Sjlemon	(a)->ie_fport == (b)->ie_fport &&				\
17586764Sjlemon	(a)->ie_lport == (b)->ie_lport &&				\
17686764Sjlemon	(a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr &&			\
17786764Sjlemon	(a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr			\
17886764Sjlemon)
17986764Sjlemon
18086764Sjlemon#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
18186764Sjlemon
182106696Salfred#define SYNCACHE_TIMEOUT(sc, slot) do {				\
183106696Salfred	sc->sc_rxtslot = (slot);					\
184106696Salfred	sc->sc_rxttime = ticks + TCPTV_RTOBASE * tcp_backoff[(slot)];	\
185106696Salfred	TAILQ_INSERT_TAIL(&tcp_syncache.timerq[(slot)], sc, sc_timerq);	\
186106696Salfred	if (!callout_active(&tcp_syncache.tt_timerq[(slot)]))		\
187106696Salfred		callout_reset(&tcp_syncache.tt_timerq[(slot)],		\
188106696Salfred		    TCPTV_RTOBASE * tcp_backoff[(slot)],		\
189106696Salfred		    syncache_timer, (void *)((intptr_t)(slot)));	\
19086764Sjlemon} while (0)
19186764Sjlemon
19286764Sjlemonstatic void
19386764Sjlemonsyncache_free(struct syncache *sc)
19486764Sjlemon{
19586764Sjlemon	struct rtentry *rt;
19686764Sjlemon
19786764Sjlemon	if (sc->sc_ipopts)
19886764Sjlemon		(void) m_free(sc->sc_ipopts);
19986764Sjlemon#ifdef INET6
20086764Sjlemon	if (sc->sc_inc.inc_isipv6)
20186764Sjlemon		rt = sc->sc_route6.ro_rt;
20286764Sjlemon	else
20386764Sjlemon#endif
20486764Sjlemon		rt = sc->sc_route.ro_rt;
20586764Sjlemon	if (rt != NULL) {
20686764Sjlemon		/*
20786764Sjlemon		 * If this is the only reference to a protocol cloned
20886764Sjlemon		 * route, remove it immediately.
20986764Sjlemon		 */
21086764Sjlemon		if (rt->rt_flags & RTF_WASCLONED &&
21186764Sjlemon		    (sc->sc_flags & SCF_KEEPROUTE) == 0 &&
21286764Sjlemon		    rt->rt_refcnt == 1)
21386764Sjlemon			rtrequest(RTM_DELETE, rt_key(rt),
21486764Sjlemon			    rt->rt_gateway, rt_mask(rt),
21586764Sjlemon			    rt->rt_flags, NULL);
21686764Sjlemon		RTFREE(rt);
21786764Sjlemon	}
21892760Sjeff	uma_zfree(tcp_syncache.zone, sc);
21986764Sjlemon}
22086764Sjlemon
22186764Sjlemonvoid
22286764Sjlemonsyncache_init(void)
22386764Sjlemon{
22486764Sjlemon	int i;
22586764Sjlemon
22686764Sjlemon	tcp_syncache.cache_count = 0;
22786764Sjlemon	tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
22886764Sjlemon	tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
22986764Sjlemon	tcp_syncache.cache_limit =
23086764Sjlemon	    tcp_syncache.hashsize * tcp_syncache.bucket_limit;
23186764Sjlemon	tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
23286764Sjlemon	tcp_syncache.next_reseed = 0;
23386764Sjlemon	tcp_syncache.hash_secret = arc4random();
23486764Sjlemon
23586764Sjlemon        TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
23686764Sjlemon	    &tcp_syncache.hashsize);
23786764Sjlemon        TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
23886764Sjlemon	    &tcp_syncache.cache_limit);
23986764Sjlemon        TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
24086764Sjlemon	    &tcp_syncache.bucket_limit);
24186764Sjlemon	if (!powerof2(tcp_syncache.hashsize)) {
24286764Sjlemon                printf("WARNING: syncache hash size is not a power of 2.\n");
24386764Sjlemon		tcp_syncache.hashsize = 512;	/* safe default */
24486764Sjlemon        }
24586764Sjlemon	tcp_syncache.hashmask = tcp_syncache.hashsize - 1;
24686764Sjlemon
24786764Sjlemon	/* Allocate the hash table. */
24886764Sjlemon	MALLOC(tcp_syncache.hashbase, struct syncache_head *,
24986764Sjlemon	    tcp_syncache.hashsize * sizeof(struct syncache_head),
25098108Shsu	    M_SYNCACHE, M_WAITOK);
25186764Sjlemon
25286764Sjlemon	/* Initialize the hash buckets. */
25386764Sjlemon	for (i = 0; i < tcp_syncache.hashsize; i++) {
25486764Sjlemon		TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket);
25586764Sjlemon		tcp_syncache.hashbase[i].sch_length = 0;
25686764Sjlemon	}
25786764Sjlemon
25886764Sjlemon	/* Initialize the timer queues. */
25986814Sbde	for (i = 0; i <= SYNCACHE_MAXREXMTS; i++) {
26086764Sjlemon		TAILQ_INIT(&tcp_syncache.timerq[i]);
26186764Sjlemon		callout_init(&tcp_syncache.tt_timerq[i], 0);
26286764Sjlemon	}
26386764Sjlemon
26486764Sjlemon	/*
26586764Sjlemon	 * Allocate the syncache entries.  Allow the zone to allocate one
26686764Sjlemon	 * more entry than cache limit, so a new entry can bump out an
26786764Sjlemon	 * older one.
26886764Sjlemon	 */
26986764Sjlemon	tcp_syncache.cache_limit -= 1;
27092760Sjeff	tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
27192760Sjeff	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
27292760Sjeff	uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
27386764Sjlemon}
27486764Sjlemon
27588180Sjlemonstatic void
27686764Sjlemonsyncache_insert(sc, sch)
27786764Sjlemon	struct syncache *sc;
27886764Sjlemon	struct syncache_head *sch;
27986764Sjlemon{
28086764Sjlemon	struct syncache *sc2;
28186764Sjlemon	int s, i;
28286764Sjlemon
28386764Sjlemon	/*
28486764Sjlemon	 * Make sure that we don't overflow the per-bucket
28586764Sjlemon	 * limit or the total cache size limit.
28686764Sjlemon	 */
28786764Sjlemon	s = splnet();
28886764Sjlemon	if (sch->sch_length >= tcp_syncache.bucket_limit) {
28986764Sjlemon		/*
29086764Sjlemon		 * The bucket is full, toss the oldest element.
29186764Sjlemon		 */
29286764Sjlemon		sc2 = TAILQ_FIRST(&sch->sch_bucket);
29388180Sjlemon		sc2->sc_tp->ts_recent = ticks;
29486764Sjlemon		syncache_drop(sc2, sch);
29586764Sjlemon		tcpstat.tcps_sc_bucketoverflow++;
29686764Sjlemon	} else if (tcp_syncache.cache_count >= tcp_syncache.cache_limit) {
29786764Sjlemon		/*
29886764Sjlemon		 * The cache is full.  Toss the oldest entry in the
29986764Sjlemon		 * entire cache.  This is the front entry in the
30086764Sjlemon		 * first non-empty timer queue with the largest
30186764Sjlemon		 * timeout value.
30286764Sjlemon		 */
30386764Sjlemon		for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) {
30486764Sjlemon			sc2 = TAILQ_FIRST(&tcp_syncache.timerq[i]);
30586764Sjlemon			if (sc2 != NULL)
30686764Sjlemon				break;
30786764Sjlemon		}
30888180Sjlemon		sc2->sc_tp->ts_recent = ticks;
30986764Sjlemon		syncache_drop(sc2, NULL);
31086764Sjlemon		tcpstat.tcps_sc_cacheoverflow++;
31186764Sjlemon	}
31286764Sjlemon
31386764Sjlemon	/* Initialize the entry's timer. */
31486764Sjlemon	SYNCACHE_TIMEOUT(sc, 0);
31586764Sjlemon
31686764Sjlemon	/* Put it into the bucket. */
31786764Sjlemon	TAILQ_INSERT_TAIL(&sch->sch_bucket, sc, sc_hash);
31886764Sjlemon	sch->sch_length++;
31986764Sjlemon	tcp_syncache.cache_count++;
32086764Sjlemon	tcpstat.tcps_sc_added++;
32186764Sjlemon	splx(s);
32286764Sjlemon}
32386764Sjlemon
32486764Sjlemonstatic void
32586764Sjlemonsyncache_drop(sc, sch)
32686764Sjlemon	struct syncache *sc;
32786764Sjlemon	struct syncache_head *sch;
32886764Sjlemon{
32986764Sjlemon	int s;
33086764Sjlemon
33186764Sjlemon	if (sch == NULL) {
33286764Sjlemon#ifdef INET6
33386764Sjlemon		if (sc->sc_inc.inc_isipv6) {
33486764Sjlemon			sch = &tcp_syncache.hashbase[
33586764Sjlemon			    SYNCACHE_HASH6(&sc->sc_inc, tcp_syncache.hashmask)];
33686764Sjlemon		} else
33786764Sjlemon#endif
33886764Sjlemon		{
33986764Sjlemon			sch = &tcp_syncache.hashbase[
34086764Sjlemon			    SYNCACHE_HASH(&sc->sc_inc, tcp_syncache.hashmask)];
34186764Sjlemon		}
34286764Sjlemon	}
34386764Sjlemon
34486764Sjlemon	s = splnet();
34586764Sjlemon
34686764Sjlemon	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
34786764Sjlemon	sch->sch_length--;
34886764Sjlemon	tcp_syncache.cache_count--;
34986764Sjlemon
35086764Sjlemon	TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot], sc, sc_timerq);
35186764Sjlemon	if (TAILQ_EMPTY(&tcp_syncache.timerq[sc->sc_rxtslot]))
35286764Sjlemon		callout_stop(&tcp_syncache.tt_timerq[sc->sc_rxtslot]);
35386764Sjlemon	splx(s);
35486764Sjlemon
35586764Sjlemon	syncache_free(sc);
35686764Sjlemon}
35786764Sjlemon
35886764Sjlemon/*
35986764Sjlemon * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
36086764Sjlemon * If we have retransmitted an entry the maximum number of times, expire it.
36186764Sjlemon */
36286764Sjlemonstatic void
36386764Sjlemonsyncache_timer(xslot)
36486764Sjlemon	void *xslot;
36586764Sjlemon{
36688195Sjlemon	intptr_t slot = (intptr_t)xslot;
36786764Sjlemon	struct syncache *sc, *nsc;
36886764Sjlemon	struct inpcb *inp;
36986764Sjlemon	int s;
37086764Sjlemon
37186764Sjlemon	s = splnet();
37286764Sjlemon        if (callout_pending(&tcp_syncache.tt_timerq[slot]) ||
37386764Sjlemon            !callout_active(&tcp_syncache.tt_timerq[slot])) {
37486764Sjlemon                splx(s);
37586764Sjlemon                return;
37686764Sjlemon        }
37786764Sjlemon        callout_deactivate(&tcp_syncache.tt_timerq[slot]);
37886764Sjlemon
37986764Sjlemon        nsc = TAILQ_FIRST(&tcp_syncache.timerq[slot]);
38098102Shsu	INP_INFO_RLOCK(&tcbinfo);
38186764Sjlemon	while (nsc != NULL) {
38286764Sjlemon		if (ticks < nsc->sc_rxttime)
38386764Sjlemon			break;
38486764Sjlemon		sc = nsc;
38586764Sjlemon		inp = sc->sc_tp->t_inpcb;
38698102Shsu		INP_LOCK(inp);
38786764Sjlemon		if (slot == SYNCACHE_MAXREXMTS ||
38886764Sjlemon		    slot >= tcp_syncache.rexmt_limit ||
38986764Sjlemon		    inp->inp_gencnt != sc->sc_inp_gencnt) {
39098982Sjlemon			nsc = TAILQ_NEXT(sc, sc_timerq);
39186764Sjlemon			syncache_drop(sc, NULL);
39286764Sjlemon			tcpstat.tcps_sc_stale++;
39398102Shsu			INP_UNLOCK(inp);
39486764Sjlemon			continue;
39586764Sjlemon		}
39698982Sjlemon		/*
39798982Sjlemon		 * syncache_respond() may call back into the syncache to
39898982Sjlemon		 * to modify another entry, so do not obtain the next
39998982Sjlemon		 * entry on the timer chain until it has completed.
40098982Sjlemon		 */
40186764Sjlemon		(void) syncache_respond(sc, NULL);
40298102Shsu		INP_UNLOCK(inp);
40398982Sjlemon		nsc = TAILQ_NEXT(sc, sc_timerq);
40486764Sjlemon		tcpstat.tcps_sc_retransmitted++;
40586764Sjlemon		TAILQ_REMOVE(&tcp_syncache.timerq[slot], sc, sc_timerq);
40686764Sjlemon		SYNCACHE_TIMEOUT(sc, slot + 1);
40786764Sjlemon	}
40898102Shsu	INP_INFO_RUNLOCK(&tcbinfo);
40986764Sjlemon	if (nsc != NULL)
41086764Sjlemon		callout_reset(&tcp_syncache.tt_timerq[slot],
41186764Sjlemon		    nsc->sc_rxttime - ticks, syncache_timer, (void *)(slot));
41286764Sjlemon	splx(s);
41386764Sjlemon}
41486764Sjlemon
41586764Sjlemon/*
41686764Sjlemon * Find an entry in the syncache.
41786764Sjlemon */
41886764Sjlemonstruct syncache *
41986764Sjlemonsyncache_lookup(inc, schp)
42086764Sjlemon	struct in_conninfo *inc;
42186764Sjlemon	struct syncache_head **schp;
42286764Sjlemon{
42386764Sjlemon	struct syncache *sc;
42486764Sjlemon	struct syncache_head *sch;
42586764Sjlemon	int s;
42686764Sjlemon
42786764Sjlemon#ifdef INET6
42886764Sjlemon	if (inc->inc_isipv6) {
42986764Sjlemon		sch = &tcp_syncache.hashbase[
43086764Sjlemon		    SYNCACHE_HASH6(inc, tcp_syncache.hashmask)];
43186764Sjlemon		*schp = sch;
43286764Sjlemon		s = splnet();
43386764Sjlemon		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
43486764Sjlemon			if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) {
43586764Sjlemon				splx(s);
43686764Sjlemon				return (sc);
43786764Sjlemon			}
43886764Sjlemon		}
43986764Sjlemon		splx(s);
44086764Sjlemon	} else
44186764Sjlemon#endif
44286764Sjlemon	{
44386764Sjlemon		sch = &tcp_syncache.hashbase[
44486764Sjlemon		    SYNCACHE_HASH(inc, tcp_syncache.hashmask)];
44586764Sjlemon		*schp = sch;
44686764Sjlemon		s = splnet();
44786764Sjlemon		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
44886764Sjlemon#ifdef INET6
44986764Sjlemon			if (sc->sc_inc.inc_isipv6)
45086764Sjlemon				continue;
45186764Sjlemon#endif
45286764Sjlemon			if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) {
45386764Sjlemon				splx(s);
45486764Sjlemon				return (sc);
45586764Sjlemon			}
45686764Sjlemon		}
45786764Sjlemon		splx(s);
45886764Sjlemon	}
45986764Sjlemon	return (NULL);
46086764Sjlemon}
46186764Sjlemon
46286764Sjlemon/*
46386764Sjlemon * This function is called when we get a RST for a
46486764Sjlemon * non-existent connection, so that we can see if the
46586764Sjlemon * connection is in the syn cache.  If it is, zap it.
46686764Sjlemon */
46786764Sjlemonvoid
46886764Sjlemonsyncache_chkrst(inc, th)
46986764Sjlemon	struct in_conninfo *inc;
47086764Sjlemon	struct tcphdr *th;
47186764Sjlemon{
47286764Sjlemon	struct syncache *sc;
47386764Sjlemon	struct syncache_head *sch;
47486764Sjlemon
47586764Sjlemon	sc = syncache_lookup(inc, &sch);
47686764Sjlemon	if (sc == NULL)
47786764Sjlemon		return;
47886764Sjlemon	/*
47986764Sjlemon	 * If the RST bit is set, check the sequence number to see
48086764Sjlemon	 * if this is a valid reset segment.
48186764Sjlemon	 * RFC 793 page 37:
48286764Sjlemon	 *   In all states except SYN-SENT, all reset (RST) segments
48386764Sjlemon	 *   are validated by checking their SEQ-fields.  A reset is
48486764Sjlemon	 *   valid if its sequence number is in the window.
48586764Sjlemon	 *
48686764Sjlemon	 *   The sequence number in the reset segment is normally an
48786764Sjlemon	 *   echo of our outgoing acknowlegement numbers, but some hosts
48886764Sjlemon	 *   send a reset with the sequence number at the rightmost edge
48986764Sjlemon	 *   of our receive window, and we have to handle this case.
49086764Sjlemon	 */
49186764Sjlemon	if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
49286764Sjlemon	    SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
49386764Sjlemon		syncache_drop(sc, sch);
49486764Sjlemon		tcpstat.tcps_sc_reset++;
49586764Sjlemon	}
49686764Sjlemon}
49786764Sjlemon
49886764Sjlemonvoid
49986764Sjlemonsyncache_badack(inc)
50086764Sjlemon	struct in_conninfo *inc;
50186764Sjlemon{
50286764Sjlemon	struct syncache *sc;
50386764Sjlemon	struct syncache_head *sch;
50486764Sjlemon
50586764Sjlemon	sc = syncache_lookup(inc, &sch);
50686764Sjlemon	if (sc != NULL) {
50786764Sjlemon		syncache_drop(sc, sch);
50886764Sjlemon		tcpstat.tcps_sc_badack++;
50986764Sjlemon	}
51086764Sjlemon}
51186764Sjlemon
51286764Sjlemonvoid
51386764Sjlemonsyncache_unreach(inc, th)
51486764Sjlemon	struct in_conninfo *inc;
51586764Sjlemon	struct tcphdr *th;
51686764Sjlemon{
51786764Sjlemon	struct syncache *sc;
51886764Sjlemon	struct syncache_head *sch;
51986764Sjlemon
52086764Sjlemon	/* we are called at splnet() here */
52186764Sjlemon	sc = syncache_lookup(inc, &sch);
52286764Sjlemon	if (sc == NULL)
52386764Sjlemon		return;
52486764Sjlemon
52586764Sjlemon	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
52686764Sjlemon	if (ntohl(th->th_seq) != sc->sc_iss)
52786764Sjlemon		return;
52886764Sjlemon
52986764Sjlemon	/*
53086764Sjlemon	 * If we've rertransmitted 3 times and this is our second error,
53186764Sjlemon	 * we remove the entry.  Otherwise, we allow it to continue on.
53286764Sjlemon	 * This prevents us from incorrectly nuking an entry during a
53386764Sjlemon	 * spurious network outage.
53486764Sjlemon	 *
53586764Sjlemon	 * See tcp_notify().
53686764Sjlemon	 */
53786764Sjlemon	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtslot < 3) {
53886764Sjlemon		sc->sc_flags |= SCF_UNREACH;
53986764Sjlemon		return;
54086764Sjlemon	}
54186764Sjlemon	syncache_drop(sc, sch);
54286764Sjlemon	tcpstat.tcps_sc_unreach++;
54386764Sjlemon}
54486764Sjlemon
54586764Sjlemon/*
54686764Sjlemon * Build a new TCP socket structure from a syncache entry.
54786764Sjlemon */
54886764Sjlemonstatic struct socket *
54996602Srwatsonsyncache_socket(sc, lso, m)
55086764Sjlemon	struct syncache *sc;
55186764Sjlemon	struct socket *lso;
55296602Srwatson	struct mbuf *m;
55386764Sjlemon{
55486764Sjlemon	struct inpcb *inp = NULL;
55586764Sjlemon	struct socket *so;
55686764Sjlemon	struct tcpcb *tp;
55786764Sjlemon
55886764Sjlemon	/*
55986764Sjlemon	 * Ok, create the full blown connection, and set things up
56086764Sjlemon	 * as they would have been set up if we had created the
56186764Sjlemon	 * connection when the SYN arrived.  If we can't create
56286764Sjlemon	 * the connection, abort it.
56386764Sjlemon	 */
56486764Sjlemon	so = sonewconn(lso, SS_ISCONNECTED);
56586764Sjlemon	if (so == NULL) {
56686764Sjlemon		/*
56786764Sjlemon		 * Drop the connection; we will send a RST if the peer
56886764Sjlemon		 * retransmits the ACK,
56986764Sjlemon		 */
57086764Sjlemon		tcpstat.tcps_listendrop++;
57186764Sjlemon		goto abort;
57286764Sjlemon	}
573101106Srwatson#ifdef MAC
574101106Srwatson	mac_set_socket_peer_from_mbuf(m, so);
575101106Srwatson#endif
57686764Sjlemon
57786764Sjlemon	inp = sotoinpcb(so);
57886764Sjlemon
57986764Sjlemon	/*
58086764Sjlemon	 * Insert new socket into hash list.
58186764Sjlemon	 */
58291492Sume	inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6;
58386764Sjlemon#ifdef INET6
58486764Sjlemon	if (sc->sc_inc.inc_isipv6) {
58586764Sjlemon		inp->in6p_laddr = sc->sc_inc.inc6_laddr;
58686764Sjlemon	} else {
58786764Sjlemon		inp->inp_vflag &= ~INP_IPV6;
58886764Sjlemon		inp->inp_vflag |= INP_IPV4;
58986764Sjlemon#endif
59086764Sjlemon		inp->inp_laddr = sc->sc_inc.inc_laddr;
59186764Sjlemon#ifdef INET6
59286764Sjlemon	}
59386764Sjlemon#endif
59486764Sjlemon	inp->inp_lport = sc->sc_inc.inc_lport;
59586764Sjlemon	if (in_pcbinshash(inp) != 0) {
59686764Sjlemon		/*
59786764Sjlemon		 * Undo the assignments above if we failed to
59886764Sjlemon		 * put the PCB on the hash lists.
59986764Sjlemon		 */
60086764Sjlemon#ifdef INET6
60186764Sjlemon		if (sc->sc_inc.inc_isipv6)
60286764Sjlemon			inp->in6p_laddr = in6addr_any;
60386764Sjlemon       		else
60486764Sjlemon#endif
60586764Sjlemon			inp->inp_laddr.s_addr = INADDR_ANY;
60686764Sjlemon		inp->inp_lport = 0;
60786764Sjlemon		goto abort;
60886764Sjlemon	}
60986764Sjlemon#ifdef IPSEC
61086764Sjlemon	/* copy old policy into new socket's */
61186764Sjlemon	if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
61286764Sjlemon		printf("syncache_expand: could not copy policy\n");
61386764Sjlemon#endif
61486764Sjlemon#ifdef INET6
61586764Sjlemon	if (sc->sc_inc.inc_isipv6) {
61686764Sjlemon		struct inpcb *oinp = sotoinpcb(lso);
61786764Sjlemon		struct in6_addr laddr6;
61886764Sjlemon		struct sockaddr_in6 *sin6;
61986764Sjlemon		/*
62086764Sjlemon		 * Inherit socket options from the listening socket.
62186764Sjlemon		 * Note that in6p_inputopts are not (and should not be)
62286764Sjlemon		 * copied, since it stores previously received options and is
62386764Sjlemon		 * used to detect if each new option is different than the
62486764Sjlemon		 * previous one and hence should be passed to a user.
62586764Sjlemon                 * If we copied in6p_inputopts, a user would not be able to
62686764Sjlemon		 * receive options just after calling the accept system call.
62786764Sjlemon		 */
62886764Sjlemon		inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
62986764Sjlemon		if (oinp->in6p_outputopts)
63086764Sjlemon			inp->in6p_outputopts =
63186764Sjlemon			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
63286764Sjlemon		inp->in6p_route = sc->sc_route6;
63386764Sjlemon		sc->sc_route6.ro_rt = NULL;
63486764Sjlemon
63586764Sjlemon		MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6,
63686958Stanimura		    M_SONAME, M_NOWAIT | M_ZERO);
63786764Sjlemon		if (sin6 == NULL)
63886764Sjlemon			goto abort;
63986764Sjlemon		sin6->sin6_family = AF_INET6;
64086764Sjlemon		sin6->sin6_len = sizeof(*sin6);
64186764Sjlemon		sin6->sin6_addr = sc->sc_inc.inc6_faddr;
64286764Sjlemon		sin6->sin6_port = sc->sc_inc.inc_fport;
64386764Sjlemon		laddr6 = inp->in6p_laddr;
64486764Sjlemon		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
64586764Sjlemon			inp->in6p_laddr = sc->sc_inc.inc6_laddr;
64690361Sjulian		if (in6_pcbconnect(inp, (struct sockaddr *)sin6, &thread0)) {
64786764Sjlemon			inp->in6p_laddr = laddr6;
64886764Sjlemon			FREE(sin6, M_SONAME);
64986764Sjlemon			goto abort;
65086764Sjlemon		}
65186764Sjlemon		FREE(sin6, M_SONAME);
65286764Sjlemon	} else
65386764Sjlemon#endif
65486764Sjlemon	{
65586764Sjlemon		struct in_addr laddr;
65686764Sjlemon		struct sockaddr_in *sin;
65786764Sjlemon
65886764Sjlemon		inp->inp_options = ip_srcroute();
65986764Sjlemon		if (inp->inp_options == NULL) {
66086764Sjlemon			inp->inp_options = sc->sc_ipopts;
66186764Sjlemon			sc->sc_ipopts = NULL;
66286764Sjlemon		}
66386764Sjlemon		inp->inp_route = sc->sc_route;
66486764Sjlemon		sc->sc_route.ro_rt = NULL;
66586764Sjlemon
66686764Sjlemon		MALLOC(sin, struct sockaddr_in *, sizeof *sin,
66786958Stanimura		    M_SONAME, M_NOWAIT | M_ZERO);
66886764Sjlemon		if (sin == NULL)
66986764Sjlemon			goto abort;
67086764Sjlemon		sin->sin_family = AF_INET;
67186764Sjlemon		sin->sin_len = sizeof(*sin);
67286764Sjlemon		sin->sin_addr = sc->sc_inc.inc_faddr;
67386764Sjlemon		sin->sin_port = sc->sc_inc.inc_fport;
67486764Sjlemon		bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
67586764Sjlemon		laddr = inp->inp_laddr;
67686764Sjlemon		if (inp->inp_laddr.s_addr == INADDR_ANY)
67786764Sjlemon			inp->inp_laddr = sc->sc_inc.inc_laddr;
67890361Sjulian		if (in_pcbconnect(inp, (struct sockaddr *)sin, &thread0)) {
67986764Sjlemon			inp->inp_laddr = laddr;
68086764Sjlemon			FREE(sin, M_SONAME);
68186764Sjlemon			goto abort;
68286764Sjlemon		}
68386764Sjlemon		FREE(sin, M_SONAME);
68486764Sjlemon	}
68586764Sjlemon
68686764Sjlemon	tp = intotcpcb(inp);
68786764Sjlemon	tp->t_state = TCPS_SYN_RECEIVED;
68886764Sjlemon	tp->iss = sc->sc_iss;
68986764Sjlemon	tp->irs = sc->sc_irs;
69086764Sjlemon	tcp_rcvseqinit(tp);
69186764Sjlemon	tcp_sendseqinit(tp);
69286764Sjlemon	tp->snd_wl1 = sc->sc_irs;
69386764Sjlemon	tp->rcv_up = sc->sc_irs + 1;
69486764Sjlemon	tp->rcv_wnd = sc->sc_wnd;
69586764Sjlemon	tp->rcv_adv += tp->rcv_wnd;
69686764Sjlemon
69790982Sjlemon	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
69886764Sjlemon	if (sc->sc_flags & SCF_NOOPT)
69986764Sjlemon		tp->t_flags |= TF_NOOPT;
70086764Sjlemon	if (sc->sc_flags & SCF_WINSCALE) {
70186764Sjlemon		tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
70286764Sjlemon		tp->requested_s_scale = sc->sc_requested_s_scale;
70386764Sjlemon		tp->request_r_scale = sc->sc_request_r_scale;
70486764Sjlemon	}
70586764Sjlemon	if (sc->sc_flags & SCF_TIMESTAMP) {
70686764Sjlemon		tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
70786764Sjlemon		tp->ts_recent = sc->sc_tsrecent;
70886764Sjlemon		tp->ts_recent_age = ticks;
70986764Sjlemon	}
71086764Sjlemon	if (sc->sc_flags & SCF_CC) {
71186764Sjlemon		/*
71286764Sjlemon		 * Initialization of the tcpcb for transaction;
71386764Sjlemon		 *   set SND.WND = SEG.WND,
71486764Sjlemon		 *   initialize CCsend and CCrecv.
71586764Sjlemon		 */
71686764Sjlemon		tp->t_flags |= TF_REQ_CC|TF_RCVD_CC;
71786764Sjlemon		tp->cc_send = sc->sc_cc_send;
71886764Sjlemon		tp->cc_recv = sc->sc_cc_recv;
71986764Sjlemon	}
72086764Sjlemon
72186764Sjlemon	tcp_mss(tp, sc->sc_peer_mss);
72286764Sjlemon
72386764Sjlemon	/*
72486764Sjlemon	 * If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
72586764Sjlemon	 */
72686764Sjlemon	if (sc->sc_rxtslot != 0)
72786764Sjlemon                tp->snd_cwnd = tp->t_maxseg;
72886764Sjlemon	callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
72986764Sjlemon
73086764Sjlemon	tcpstat.tcps_accepts++;
73186764Sjlemon	return (so);
73286764Sjlemon
73386764Sjlemonabort:
73486764Sjlemon	if (so != NULL)
73586764Sjlemon		(void) soabort(so);
73686764Sjlemon	return (NULL);
73786764Sjlemon}
73886764Sjlemon
73986764Sjlemon/*
74086764Sjlemon * This function gets called when we receive an ACK for a
74186764Sjlemon * socket in the LISTEN state.  We look up the connection
74286764Sjlemon * in the syncache, and if its there, we pull it out of
74386764Sjlemon * the cache and turn it into a full-blown connection in
74486764Sjlemon * the SYN-RECEIVED state.
74586764Sjlemon */
74686764Sjlemonint
74786764Sjlemonsyncache_expand(inc, th, sop, m)
74886764Sjlemon	struct in_conninfo *inc;
74986764Sjlemon	struct tcphdr *th;
75086764Sjlemon	struct socket **sop;
75186764Sjlemon	struct mbuf *m;
75286764Sjlemon{
75386764Sjlemon	struct syncache *sc;
75486764Sjlemon	struct syncache_head *sch;
75586764Sjlemon	struct socket *so;
75686764Sjlemon
75786764Sjlemon	sc = syncache_lookup(inc, &sch);
75888180Sjlemon	if (sc == NULL) {
75988180Sjlemon		/*
76088180Sjlemon		 * There is no syncache entry, so see if this ACK is
76188180Sjlemon		 * a returning syncookie.  To do this, first:
76288180Sjlemon		 *  A. See if this socket has had a syncache entry dropped in
76388180Sjlemon		 *     the past.  We don't want to accept a bogus syncookie
76488180Sjlemon 		 *     if we've never received a SYN.
76588180Sjlemon		 *  B. check that the syncookie is valid.  If it is, then
76688180Sjlemon		 *     cobble up a fake syncache entry, and return.
76788180Sjlemon		 */
76888180Sjlemon		if (!tcp_syncookies)
76988180Sjlemon			return (0);
77088180Sjlemon		sc = syncookie_lookup(inc, th, *sop);
77188180Sjlemon		if (sc == NULL)
77288180Sjlemon			return (0);
77388180Sjlemon		sch = NULL;
77488180Sjlemon		tcpstat.tcps_sc_recvcookie++;
77588180Sjlemon	}
77686764Sjlemon
77786764Sjlemon	/*
77886764Sjlemon	 * If seg contains an ACK, but not for our SYN/ACK, send a RST.
77986764Sjlemon	 */
78086764Sjlemon	if (th->th_ack != sc->sc_iss + 1)
78186764Sjlemon		return (0);
78286764Sjlemon
78396602Srwatson	so = syncache_socket(sc, *sop, m);
78486764Sjlemon	if (so == NULL) {
78586764Sjlemon#if 0
78686764Sjlemonresetandabort:
78786764Sjlemon		/* XXXjlemon check this - is this correct? */
78886764Sjlemon		(void) tcp_respond(NULL, m, m, th,
78986764Sjlemon		    th->th_seq + tlen, (tcp_seq)0, TH_RST|TH_ACK);
79086764Sjlemon#endif
79186764Sjlemon		m_freem(m);			/* XXX only needed for above */
79286764Sjlemon		tcpstat.tcps_sc_aborted++;
79386764Sjlemon	} else {
79486764Sjlemon		sc->sc_flags |= SCF_KEEPROUTE;
79586764Sjlemon		tcpstat.tcps_sc_completed++;
79686764Sjlemon	}
79786764Sjlemon	if (sch == NULL)
79886764Sjlemon		syncache_free(sc);
79986764Sjlemon	else
80086764Sjlemon		syncache_drop(sc, sch);
80186764Sjlemon	*sop = so;
80286764Sjlemon	return (1);
80386764Sjlemon}
80486764Sjlemon
80586764Sjlemon/*
80686764Sjlemon * Given a LISTEN socket and an inbound SYN request, add
80786764Sjlemon * this to the syn cache, and send back a segment:
80886764Sjlemon *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
80986764Sjlemon * to the source.
81086764Sjlemon *
81186764Sjlemon * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
81286764Sjlemon * Doing so would require that we hold onto the data and deliver it
81386764Sjlemon * to the application.  However, if we are the target of a SYN-flood
81486764Sjlemon * DoS attack, an attacker could send data which would eventually
81586764Sjlemon * consume all available buffer space if it were ACKed.  By not ACKing
81686764Sjlemon * the data, we avoid this DoS scenario.
81786764Sjlemon */
81886764Sjlemonint
81986764Sjlemonsyncache_add(inc, to, th, sop, m)
82086764Sjlemon	struct in_conninfo *inc;
82186764Sjlemon	struct tcpopt *to;
82286764Sjlemon	struct tcphdr *th;
82386764Sjlemon	struct socket **sop;
82486764Sjlemon	struct mbuf *m;
82586764Sjlemon{
82686764Sjlemon	struct tcpcb *tp;
82786764Sjlemon	struct socket *so;
82886764Sjlemon	struct syncache *sc = NULL;
82986764Sjlemon	struct syncache_head *sch;
83086764Sjlemon	struct mbuf *ipopts = NULL;
83186764Sjlemon	struct rmxp_tao *taop;
83286764Sjlemon	int i, s, win;
83386764Sjlemon
83486764Sjlemon	so = *sop;
83586764Sjlemon	tp = sototcpcb(so);
83686764Sjlemon
83786764Sjlemon	/*
83886764Sjlemon	 * Remember the IP options, if any.
83986764Sjlemon	 */
84086764Sjlemon#ifdef INET6
84186764Sjlemon	if (!inc->inc_isipv6)
84286764Sjlemon#endif
84386764Sjlemon		ipopts = ip_srcroute();
84486764Sjlemon
84586764Sjlemon	/*
84686764Sjlemon	 * See if we already have an entry for this connection.
84786764Sjlemon	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
84886764Sjlemon	 *
84986764Sjlemon	 * XXX
85086764Sjlemon	 * should the syncache be re-initialized with the contents
85186764Sjlemon	 * of the new SYN here (which may have different options?)
85286764Sjlemon	 */
85386764Sjlemon	sc = syncache_lookup(inc, &sch);
85486764Sjlemon	if (sc != NULL) {
85586764Sjlemon		tcpstat.tcps_sc_dupsyn++;
85686764Sjlemon		if (ipopts) {
85786764Sjlemon			/*
85886764Sjlemon			 * If we were remembering a previous source route,
85986764Sjlemon			 * forget it and use the new one we've been given.
86086764Sjlemon			 */
86186764Sjlemon			if (sc->sc_ipopts)
86286764Sjlemon				(void) m_free(sc->sc_ipopts);
86386764Sjlemon			sc->sc_ipopts = ipopts;
86486764Sjlemon		}
86586764Sjlemon		/*
86686764Sjlemon		 * Update timestamp if present.
86786764Sjlemon		 */
86886764Sjlemon		if (sc->sc_flags & SCF_TIMESTAMP)
86986764Sjlemon			sc->sc_tsrecent = to->to_tsval;
87090556Sjlemon		/*
87190556Sjlemon		 * PCB may have changed, pick up new values.
87290556Sjlemon		 */
87390556Sjlemon		sc->sc_tp = tp;
87490556Sjlemon		sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt;
87586764Sjlemon		if (syncache_respond(sc, m) == 0) {
87686764Sjlemon		        s = splnet();
87786764Sjlemon			TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot],
87886764Sjlemon			    sc, sc_timerq);
87986764Sjlemon			SYNCACHE_TIMEOUT(sc, sc->sc_rxtslot);
88086764Sjlemon		        splx(s);
88186764Sjlemon		 	tcpstat.tcps_sndacks++;
88286764Sjlemon			tcpstat.tcps_sndtotal++;
88386764Sjlemon		}
88486764Sjlemon		*sop = NULL;
88586764Sjlemon		return (1);
88686764Sjlemon	}
88786764Sjlemon
88892760Sjeff	sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT);
88986764Sjlemon	if (sc == NULL) {
89086764Sjlemon		/*
89186764Sjlemon		 * The zone allocator couldn't provide more entries.
89286764Sjlemon		 * Treat this as if the cache was full; drop the oldest
89386764Sjlemon		 * entry and insert the new one.
89486764Sjlemon		 */
89586764Sjlemon		s = splnet();
89686764Sjlemon		for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) {
89786764Sjlemon			sc = TAILQ_FIRST(&tcp_syncache.timerq[i]);
89886764Sjlemon			if (sc != NULL)
89986764Sjlemon				break;
90086764Sjlemon		}
90188180Sjlemon		sc->sc_tp->ts_recent = ticks;
90286764Sjlemon		syncache_drop(sc, NULL);
90386764Sjlemon		splx(s);
90486764Sjlemon		tcpstat.tcps_sc_zonefail++;
90592760Sjeff		sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT);
90686764Sjlemon		if (sc == NULL) {
90786764Sjlemon			if (ipopts)
90886764Sjlemon				(void) m_free(ipopts);
90986764Sjlemon			return (0);
91086764Sjlemon		}
91186764Sjlemon	}
91286764Sjlemon
91386764Sjlemon	/*
91486764Sjlemon	 * Fill in the syncache values.
91586764Sjlemon	 */
91686958Stanimura	bzero(sc, sizeof(*sc));
91786764Sjlemon	sc->sc_tp = tp;
91886764Sjlemon	sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt;
91986764Sjlemon	sc->sc_ipopts = ipopts;
92086764Sjlemon	sc->sc_inc.inc_fport = inc->inc_fport;
92186764Sjlemon	sc->sc_inc.inc_lport = inc->inc_lport;
92286764Sjlemon#ifdef INET6
92386764Sjlemon	sc->sc_inc.inc_isipv6 = inc->inc_isipv6;
92486764Sjlemon	if (inc->inc_isipv6) {
92586764Sjlemon		sc->sc_inc.inc6_faddr = inc->inc6_faddr;
92686764Sjlemon		sc->sc_inc.inc6_laddr = inc->inc6_laddr;
92786764Sjlemon		sc->sc_route6.ro_rt = NULL;
92886764Sjlemon	} else
92986764Sjlemon#endif
93086764Sjlemon	{
93186764Sjlemon		sc->sc_inc.inc_faddr = inc->inc_faddr;
93286764Sjlemon		sc->sc_inc.inc_laddr = inc->inc_laddr;
93386764Sjlemon		sc->sc_route.ro_rt = NULL;
93486764Sjlemon	}
93586764Sjlemon	sc->sc_irs = th->th_seq;
93688330Sjlemon	if (tcp_syncookies)
93788330Sjlemon		sc->sc_iss = syncookie_generate(sc);
93888330Sjlemon	else
93988330Sjlemon		sc->sc_iss = arc4random();
94086764Sjlemon
94186764Sjlemon	/* Initial receive window: clip sbspace to [0 .. TCP_MAXWIN] */
94286764Sjlemon	win = sbspace(&so->so_rcv);
94386764Sjlemon	win = imax(win, 0);
94486764Sjlemon	win = imin(win, TCP_MAXWIN);
94586764Sjlemon	sc->sc_wnd = win;
94686764Sjlemon
94786764Sjlemon	sc->sc_flags = 0;
94886764Sjlemon	sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0;
94986764Sjlemon	if (tcp_do_rfc1323) {
95086764Sjlemon		/*
95186764Sjlemon		 * A timestamp received in a SYN makes
95286764Sjlemon		 * it ok to send timestamp requests and replies.
95386764Sjlemon		 */
95486764Sjlemon		if (to->to_flags & TOF_TS) {
95586764Sjlemon			sc->sc_tsrecent = to->to_tsval;
95686764Sjlemon			sc->sc_flags |= SCF_TIMESTAMP;
95786764Sjlemon		}
95886764Sjlemon		if (to->to_flags & TOF_SCALE) {
95986764Sjlemon			int wscale = 0;
96086764Sjlemon
96186764Sjlemon			/* Compute proper scaling value from buffer space */
96286764Sjlemon			while (wscale < TCP_MAX_WINSHIFT &&
96386764Sjlemon			    (TCP_MAXWIN << wscale) < so->so_rcv.sb_hiwat)
96486764Sjlemon				wscale++;
96586764Sjlemon			sc->sc_request_r_scale = wscale;
96686764Sjlemon			sc->sc_requested_s_scale = to->to_requested_s_scale;
96786764Sjlemon			sc->sc_flags |= SCF_WINSCALE;
96886764Sjlemon		}
96986764Sjlemon	}
97086764Sjlemon	if (tcp_do_rfc1644) {
97186764Sjlemon		/*
97286764Sjlemon		 * A CC or CC.new option received in a SYN makes
97386764Sjlemon		 * it ok to send CC in subsequent segments.
97486764Sjlemon		 */
97586764Sjlemon		if (to->to_flags & (TOF_CC|TOF_CCNEW)) {
97686764Sjlemon			sc->sc_cc_recv = to->to_cc;
97786764Sjlemon			sc->sc_cc_send = CC_INC(tcp_ccgen);
97886764Sjlemon			sc->sc_flags |= SCF_CC;
97986764Sjlemon		}
98086764Sjlemon	}
98186764Sjlemon	if (tp->t_flags & TF_NOOPT)
98286764Sjlemon		sc->sc_flags = SCF_NOOPT;
98386764Sjlemon
98486764Sjlemon	/*
98586764Sjlemon	 * XXX
98686764Sjlemon	 * We have the option here of not doing TAO (even if the segment
98786764Sjlemon	 * qualifies) and instead fall back to a normal 3WHS via the syncache.
98886764Sjlemon	 * This allows us to apply synflood protection to TAO-qualifying SYNs
98986764Sjlemon	 * also. However, there should be a hueristic to determine when to
99086764Sjlemon	 * do this, and is not present at the moment.
99186764Sjlemon	 */
99286764Sjlemon
99386764Sjlemon	/*
99486764Sjlemon	 * Perform TAO test on incoming CC (SEG.CC) option, if any.
99586764Sjlemon	 * - compare SEG.CC against cached CC from the same host, if any.
99686764Sjlemon	 * - if SEG.CC > chached value, SYN must be new and is accepted
99786764Sjlemon	 *	immediately: save new CC in the cache, mark the socket
99886764Sjlemon	 *	connected, enter ESTABLISHED state, turn on flag to
99986764Sjlemon	 *	send a SYN in the next segment.
100086764Sjlemon	 *	A virtual advertised window is set in rcv_adv to
100186764Sjlemon	 *	initialize SWS prevention.  Then enter normal segment
100286764Sjlemon	 *	processing: drop SYN, process data and FIN.
100386764Sjlemon	 * - otherwise do a normal 3-way handshake.
100486764Sjlemon	 */
100586764Sjlemon	taop = tcp_gettaocache(&sc->sc_inc);
100686764Sjlemon	if ((to->to_flags & TOF_CC) != 0) {
100786764Sjlemon		if (((tp->t_flags & TF_NOPUSH) != 0) &&
100886764Sjlemon		    sc->sc_flags & SCF_CC &&
100986764Sjlemon		    taop != NULL && taop->tao_cc != 0 &&
101086764Sjlemon		    CC_GT(to->to_cc, taop->tao_cc)) {
101186764Sjlemon			sc->sc_rxtslot = 0;
101296602Srwatson			so = syncache_socket(sc, *sop, m);
101386764Sjlemon			if (so != NULL) {
101486764Sjlemon				sc->sc_flags |= SCF_KEEPROUTE;
101586764Sjlemon				taop->tao_cc = to->to_cc;
101686764Sjlemon				*sop = so;
101786764Sjlemon			}
101886764Sjlemon			syncache_free(sc);
101986764Sjlemon			return (so != NULL);
102086764Sjlemon		}
102186764Sjlemon	} else {
102286764Sjlemon		/*
102386764Sjlemon		 * No CC option, but maybe CC.NEW: invalidate cached value.
102486764Sjlemon		 */
102586764Sjlemon		if (taop != NULL)
102686764Sjlemon			taop->tao_cc = 0;
102786764Sjlemon	}
102886764Sjlemon	/*
102986764Sjlemon	 * TAO test failed or there was no CC option,
103086764Sjlemon	 *    do a standard 3-way handshake.
103186764Sjlemon	 */
103288180Sjlemon	if (syncache_respond(sc, m) == 0) {
103388180Sjlemon		syncache_insert(sc, sch);
103488180Sjlemon		tcpstat.tcps_sndacks++;
103588180Sjlemon		tcpstat.tcps_sndtotal++;
103686764Sjlemon	} else {
103786764Sjlemon		syncache_free(sc);
103888180Sjlemon		tcpstat.tcps_sc_dropped++;
103986764Sjlemon	}
104086764Sjlemon	*sop = NULL;
104186764Sjlemon	return (1);
104286764Sjlemon}
104386764Sjlemon
104486764Sjlemonstatic int
104586764Sjlemonsyncache_respond(sc, m)
104686764Sjlemon	struct syncache *sc;
104786764Sjlemon	struct mbuf *m;
104886764Sjlemon{
104986764Sjlemon	u_int8_t *optp;
105086764Sjlemon	int optlen, error;
105186764Sjlemon	u_int16_t tlen, hlen, mssopt;
105286764Sjlemon	struct ip *ip = NULL;
105386764Sjlemon	struct rtentry *rt;
105486764Sjlemon	struct tcphdr *th;
105586764Sjlemon#ifdef INET6
105686764Sjlemon	struct ip6_hdr *ip6 = NULL;
105786764Sjlemon#endif
105886764Sjlemon
105986764Sjlemon#ifdef INET6
106086764Sjlemon	if (sc->sc_inc.inc_isipv6) {
106186764Sjlemon		rt = tcp_rtlookup6(&sc->sc_inc);
106286764Sjlemon		if (rt != NULL)
106386764Sjlemon			mssopt = rt->rt_ifp->if_mtu -
106486764Sjlemon			     (sizeof(struct ip6_hdr) + sizeof(struct tcphdr));
106586764Sjlemon		else
106686764Sjlemon			mssopt = tcp_v6mssdflt;
106786764Sjlemon		hlen = sizeof(struct ip6_hdr);
106886764Sjlemon	} else
106986764Sjlemon#endif
107086764Sjlemon	{
107186764Sjlemon		rt = tcp_rtlookup(&sc->sc_inc);
107286764Sjlemon		if (rt != NULL)
107386764Sjlemon			mssopt = rt->rt_ifp->if_mtu -
107486764Sjlemon			     (sizeof(struct ip) + sizeof(struct tcphdr));
107586764Sjlemon		else
107686764Sjlemon			mssopt = tcp_mssdflt;
107786764Sjlemon		hlen = sizeof(struct ip);
107886764Sjlemon	}
107986764Sjlemon
108086764Sjlemon	/* Compute the size of the TCP options. */
108186764Sjlemon	if (sc->sc_flags & SCF_NOOPT) {
108286764Sjlemon		optlen = 0;
108386764Sjlemon	} else {
108486764Sjlemon		optlen = TCPOLEN_MAXSEG +
108586764Sjlemon		    ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
108686764Sjlemon		    ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0) +
108786764Sjlemon		    ((sc->sc_flags & SCF_CC) ? TCPOLEN_CC_APPA * 2 : 0);
108886764Sjlemon	}
108986764Sjlemon	tlen = hlen + sizeof(struct tcphdr) + optlen;
109086764Sjlemon
109186764Sjlemon	/*
109286764Sjlemon	 * XXX
109386764Sjlemon	 * assume that the entire packet will fit in a header mbuf
109486764Sjlemon	 */
109586764Sjlemon	KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small"));
109686764Sjlemon
109786764Sjlemon	/*
109886764Sjlemon	 * XXX shouldn't this reuse the mbuf if possible ?
109986764Sjlemon	 * Create the IP+TCP header from scratch.
110086764Sjlemon	 */
110186764Sjlemon	if (m)
110286764Sjlemon		m_freem(m);
110386764Sjlemon
110486764Sjlemon	m = m_gethdr(M_DONTWAIT, MT_HEADER);
110586764Sjlemon	if (m == NULL)
110686764Sjlemon		return (ENOBUFS);
110786764Sjlemon	m->m_data += max_linkhdr;
110886764Sjlemon	m->m_len = tlen;
110986764Sjlemon	m->m_pkthdr.len = tlen;
111086764Sjlemon	m->m_pkthdr.rcvif = NULL;
1111101106Srwatson#ifdef MAC
1112101106Srwatson	mac_create_mbuf_from_socket(sc->sc_tp->t_inpcb->inp_socket, m);
1113101106Srwatson#endif
111486764Sjlemon
111586764Sjlemon#ifdef INET6
111686764Sjlemon	if (sc->sc_inc.inc_isipv6) {
111786764Sjlemon		ip6 = mtod(m, struct ip6_hdr *);
111886764Sjlemon		ip6->ip6_vfc = IPV6_VERSION;
111986764Sjlemon		ip6->ip6_nxt = IPPROTO_TCP;
112086764Sjlemon		ip6->ip6_src = sc->sc_inc.inc6_laddr;
112186764Sjlemon		ip6->ip6_dst = sc->sc_inc.inc6_faddr;
112286764Sjlemon		ip6->ip6_plen = htons(tlen - hlen);
112386764Sjlemon		/* ip6_hlim is set after checksum */
112486764Sjlemon		/* ip6_flow = ??? */
112586764Sjlemon
112686764Sjlemon		th = (struct tcphdr *)(ip6 + 1);
112786764Sjlemon	} else
112886764Sjlemon#endif
112986764Sjlemon	{
113086764Sjlemon		ip = mtod(m, struct ip *);
113186764Sjlemon		ip->ip_v = IPVERSION;
113286764Sjlemon		ip->ip_hl = sizeof(struct ip) >> 2;
113386764Sjlemon		ip->ip_len = tlen;
113486764Sjlemon		ip->ip_id = 0;
113586764Sjlemon		ip->ip_off = 0;
113686764Sjlemon		ip->ip_sum = 0;
113786764Sjlemon		ip->ip_p = IPPROTO_TCP;
113886764Sjlemon		ip->ip_src = sc->sc_inc.inc_laddr;
113986764Sjlemon		ip->ip_dst = sc->sc_inc.inc_faddr;
114098204Ssilby		ip->ip_ttl = sc->sc_tp->t_inpcb->inp_ip_ttl;   /* XXX */
114198204Ssilby		ip->ip_tos = sc->sc_tp->t_inpcb->inp_ip_tos;   /* XXX */
114286764Sjlemon
114398204Ssilby		/*
1144101405Ssilby		 * See if we should do MTU discovery.  Route lookups are expensive,
1145101405Ssilby		 * so we will only unset the DF bit if:
1146101405Ssilby		 *
1147101405Ssilby		 *	1) path_mtu_discovery is disabled
1148101405Ssilby		 *	2) the SCF_UNREACH flag has been set
114998204Ssilby		 */
115098204Ssilby		if (path_mtu_discovery
1151101405Ssilby		    && ((sc->sc_flags & SCF_UNREACH) == 0)) {
115298204Ssilby		       ip->ip_off |= IP_DF;
115398204Ssilby		}
115498204Ssilby
115586764Sjlemon		th = (struct tcphdr *)(ip + 1);
115686764Sjlemon	}
115786764Sjlemon	th->th_sport = sc->sc_inc.inc_lport;
115886764Sjlemon	th->th_dport = sc->sc_inc.inc_fport;
115986764Sjlemon
116086764Sjlemon	th->th_seq = htonl(sc->sc_iss);
116186764Sjlemon	th->th_ack = htonl(sc->sc_irs + 1);
116286764Sjlemon	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
116386764Sjlemon	th->th_x2 = 0;
116486764Sjlemon	th->th_flags = TH_SYN|TH_ACK;
116586764Sjlemon	th->th_win = htons(sc->sc_wnd);
116686764Sjlemon	th->th_urp = 0;
116786764Sjlemon
116886764Sjlemon	/* Tack on the TCP options. */
116986764Sjlemon	if (optlen == 0)
117086764Sjlemon		goto no_options;
117186764Sjlemon	optp = (u_int8_t *)(th + 1);
117286764Sjlemon	*optp++ = TCPOPT_MAXSEG;
117386764Sjlemon	*optp++ = TCPOLEN_MAXSEG;
117486764Sjlemon	*optp++ = (mssopt >> 8) & 0xff;
117586764Sjlemon	*optp++ = mssopt & 0xff;
117686764Sjlemon
117786764Sjlemon	if (sc->sc_flags & SCF_WINSCALE) {
117886764Sjlemon		*((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
117986764Sjlemon		    TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
118086764Sjlemon		    sc->sc_request_r_scale);
118186764Sjlemon		optp += 4;
118286764Sjlemon	}
118386764Sjlemon
118486764Sjlemon	if (sc->sc_flags & SCF_TIMESTAMP) {
118586764Sjlemon		u_int32_t *lp = (u_int32_t *)(optp);
118686764Sjlemon
118786764Sjlemon		/* Form timestamp option as shown in appendix A of RFC 1323. */
118886764Sjlemon		*lp++ = htonl(TCPOPT_TSTAMP_HDR);
118986764Sjlemon		*lp++ = htonl(ticks);
119086764Sjlemon		*lp   = htonl(sc->sc_tsrecent);
119186764Sjlemon		optp += TCPOLEN_TSTAMP_APPA;
119286764Sjlemon	}
119386764Sjlemon
119486764Sjlemon	/*
119586764Sjlemon         * Send CC and CC.echo if we received CC from our peer.
119686764Sjlemon         */
119786764Sjlemon        if (sc->sc_flags & SCF_CC) {
119886764Sjlemon		u_int32_t *lp = (u_int32_t *)(optp);
119986764Sjlemon
120086764Sjlemon		*lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC));
120186764Sjlemon		*lp++ = htonl(sc->sc_cc_send);
120286764Sjlemon		*lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CCECHO));
120386764Sjlemon		*lp   = htonl(sc->sc_cc_recv);
120486764Sjlemon		optp += TCPOLEN_CC_APPA * 2;
120586764Sjlemon	}
120686764Sjlemonno_options:
120786764Sjlemon
120886764Sjlemon#ifdef INET6
120986764Sjlemon	if (sc->sc_inc.inc_isipv6) {
121086764Sjlemon		struct route_in6 *ro6 = &sc->sc_route6;
121186764Sjlemon
121286764Sjlemon		th->th_sum = 0;
121386764Sjlemon		th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
121486764Sjlemon		ip6->ip6_hlim = in6_selecthlim(NULL,
121586764Sjlemon		    ro6->ro_rt ? ro6->ro_rt->rt_ifp : NULL);
1216105194Ssam		error = ip6_output(m, NULL, ro6, 0, NULL, NULL,
1217105194Ssam				sc->sc_tp->t_inpcb);
121886764Sjlemon	} else
121986764Sjlemon#endif
122086764Sjlemon	{
122186764Sjlemon        	th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
122286764Sjlemon		    htons(tlen - hlen + IPPROTO_TCP));
122386764Sjlemon		m->m_pkthdr.csum_flags = CSUM_TCP;
122486764Sjlemon		m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
1225105194Ssam		error = ip_output(m, sc->sc_ipopts, &sc->sc_route, 0, NULL,
1226105194Ssam				sc->sc_tp->t_inpcb);
122786764Sjlemon	}
122886764Sjlemon	return (error);
122986764Sjlemon}
123088180Sjlemon
123188180Sjlemon/*
123288180Sjlemon * cookie layers:
123388180Sjlemon *
123488180Sjlemon *	|. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .|
123588180Sjlemon *	| peer iss                                                      |
123688180Sjlemon *	| MD5(laddr,faddr,lport,fport,secret)             |. . . . . . .|
123788180Sjlemon *	|                     0                       |(A)|             |
123888180Sjlemon * (A): peer mss index
123988180Sjlemon */
124088180Sjlemon
124188180Sjlemon/*
124288180Sjlemon * The values below are chosen to minimize the size of the tcp_secret
124388180Sjlemon * table, as well as providing roughly a 4 second lifetime for the cookie.
124488180Sjlemon */
124588180Sjlemon
124688180Sjlemon#define SYNCOOKIE_HASHSHIFT	2	/* log2(# of 32bit words from hash) */
124788180Sjlemon#define SYNCOOKIE_WNDBITS	7	/* exposed bits for window indexing */
124888180Sjlemon#define SYNCOOKIE_TIMESHIFT	5	/* scale ticks to window time units */
124988180Sjlemon
125088180Sjlemon#define SYNCOOKIE_HASHMASK	((1 << SYNCOOKIE_HASHSHIFT) - 1)
125188180Sjlemon#define SYNCOOKIE_WNDMASK	((1 << SYNCOOKIE_WNDBITS) - 1)
125288180Sjlemon#define SYNCOOKIE_NSECRETS	(1 << (SYNCOOKIE_WNDBITS - SYNCOOKIE_HASHSHIFT))
125388180Sjlemon#define SYNCOOKIE_TIMEOUT \
125488180Sjlemon    (hz * (1 << SYNCOOKIE_WNDBITS) / (1 << SYNCOOKIE_TIMESHIFT))
125588180Sjlemon#define SYNCOOKIE_DATAMASK 	((3 << SYNCOOKIE_WNDBITS) | SYNCOOKIE_WNDMASK)
125688180Sjlemon
125788180Sjlemonstatic struct {
125888180Sjlemon	u_int32_t	ts_secbits;
125988180Sjlemon	u_int		ts_expire;
126088180Sjlemon} tcp_secret[SYNCOOKIE_NSECRETS];
126188180Sjlemon
126288180Sjlemonstatic int tcp_msstab[] = { 0, 536, 1460, 8960 };
126388180Sjlemon
126488180Sjlemonstatic MD5_CTX syn_ctx;
126588180Sjlemon
126688180Sjlemon#define MD5Add(v)	MD5Update(&syn_ctx, (u_char *)&v, sizeof(v))
126788180Sjlemon
126888180Sjlemon/*
126988180Sjlemon * Consider the problem of a recreated (and retransmitted) cookie.  If the
127088180Sjlemon * original SYN was accepted, the connection is established.  The second
127188180Sjlemon * SYN is inflight, and if it arrives with an ISN that falls within the
127288180Sjlemon * receive window, the connection is killed.
127388180Sjlemon *
127488180Sjlemon * However, since cookies have other problems, this may not be worth
127588180Sjlemon * worrying about.
127688180Sjlemon */
127788180Sjlemon
127888180Sjlemonstatic u_int32_t
127988180Sjlemonsyncookie_generate(struct syncache *sc)
128088180Sjlemon{
128188180Sjlemon	u_int32_t md5_buffer[4];
128288180Sjlemon	u_int32_t data;
128388180Sjlemon	int wnd, idx;
128488180Sjlemon
128588180Sjlemon	wnd = ((ticks << SYNCOOKIE_TIMESHIFT) / hz) & SYNCOOKIE_WNDMASK;
128688180Sjlemon	idx = wnd >> SYNCOOKIE_HASHSHIFT;
128788180Sjlemon	if (tcp_secret[idx].ts_expire < ticks) {
128888180Sjlemon		tcp_secret[idx].ts_secbits = arc4random();
128988180Sjlemon		tcp_secret[idx].ts_expire = ticks + SYNCOOKIE_TIMEOUT;
129088180Sjlemon	}
129188180Sjlemon	for (data = sizeof(tcp_msstab) / sizeof(int) - 1; data > 0; data--)
129288180Sjlemon		if (tcp_msstab[data] <= sc->sc_peer_mss)
129388180Sjlemon			break;
129488180Sjlemon	data = (data << SYNCOOKIE_WNDBITS) | wnd;
129588180Sjlemon	data ^= sc->sc_irs;				/* peer's iss */
129688180Sjlemon	MD5Init(&syn_ctx);
129788180Sjlemon#ifdef INET6
129888180Sjlemon	if (sc->sc_inc.inc_isipv6) {
129988180Sjlemon		MD5Add(sc->sc_inc.inc6_laddr);
130088180Sjlemon		MD5Add(sc->sc_inc.inc6_faddr);
130188180Sjlemon	} else
130288180Sjlemon#endif
130388180Sjlemon	{
130488180Sjlemon		MD5Add(sc->sc_inc.inc_laddr);
130588180Sjlemon		MD5Add(sc->sc_inc.inc_faddr);
130688180Sjlemon	}
130788180Sjlemon	MD5Add(sc->sc_inc.inc_lport);
130888180Sjlemon	MD5Add(sc->sc_inc.inc_fport);
130988180Sjlemon	MD5Add(tcp_secret[idx].ts_secbits);
131088180Sjlemon	MD5Final((u_char *)&md5_buffer, &syn_ctx);
131188180Sjlemon	data ^= (md5_buffer[wnd & SYNCOOKIE_HASHMASK] & ~SYNCOOKIE_WNDMASK);
131288180Sjlemon	return (data);
131388180Sjlemon}
131488180Sjlemon
131588180Sjlemonstatic struct syncache *
131688180Sjlemonsyncookie_lookup(inc, th, so)
131788180Sjlemon	struct in_conninfo *inc;
131888180Sjlemon	struct tcphdr *th;
131988180Sjlemon	struct socket *so;
132088180Sjlemon{
132188180Sjlemon	u_int32_t md5_buffer[4];
132288180Sjlemon	struct syncache *sc;
132388180Sjlemon	u_int32_t data;
132488180Sjlemon	int wnd, idx;
132588180Sjlemon
132688180Sjlemon	data = (th->th_ack - 1) ^ (th->th_seq - 1);	/* remove ISS */
132788180Sjlemon	wnd = data & SYNCOOKIE_WNDMASK;
132888180Sjlemon	idx = wnd >> SYNCOOKIE_HASHSHIFT;
132988180Sjlemon	if (tcp_secret[idx].ts_expire < ticks ||
133088180Sjlemon	    sototcpcb(so)->ts_recent + SYNCOOKIE_TIMEOUT < ticks)
133188180Sjlemon		return (NULL);
133288180Sjlemon	MD5Init(&syn_ctx);
133388180Sjlemon#ifdef INET6
133488180Sjlemon	if (inc->inc_isipv6) {
133588180Sjlemon		MD5Add(inc->inc6_laddr);
133688180Sjlemon		MD5Add(inc->inc6_faddr);
133788180Sjlemon	} else
133888180Sjlemon#endif
133988180Sjlemon	{
134088180Sjlemon		MD5Add(inc->inc_laddr);
134188180Sjlemon		MD5Add(inc->inc_faddr);
134288180Sjlemon	}
134388180Sjlemon	MD5Add(inc->inc_lport);
134488180Sjlemon	MD5Add(inc->inc_fport);
134588180Sjlemon	MD5Add(tcp_secret[idx].ts_secbits);
134688180Sjlemon	MD5Final((u_char *)&md5_buffer, &syn_ctx);
134788180Sjlemon	data ^= md5_buffer[wnd & SYNCOOKIE_HASHMASK];
134888180Sjlemon	if ((data & ~SYNCOOKIE_DATAMASK) != 0)
134988180Sjlemon		return (NULL);
135088180Sjlemon	data = data >> SYNCOOKIE_WNDBITS;
135188180Sjlemon
135292760Sjeff	sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT);
135388180Sjlemon	if (sc == NULL)
135488180Sjlemon		return (NULL);
135588180Sjlemon	/*
135688180Sjlemon	 * Fill in the syncache values.
135788180Sjlemon	 * XXX duplicate code from syncache_add
135888180Sjlemon	 */
135988180Sjlemon	sc->sc_ipopts = NULL;
136088180Sjlemon	sc->sc_inc.inc_fport = inc->inc_fport;
136188180Sjlemon	sc->sc_inc.inc_lport = inc->inc_lport;
136288180Sjlemon#ifdef INET6
136388180Sjlemon	sc->sc_inc.inc_isipv6 = inc->inc_isipv6;
136488180Sjlemon	if (inc->inc_isipv6) {
136588180Sjlemon		sc->sc_inc.inc6_faddr = inc->inc6_faddr;
136688180Sjlemon		sc->sc_inc.inc6_laddr = inc->inc6_laddr;
136788180Sjlemon		sc->sc_route6.ro_rt = NULL;
136888180Sjlemon	} else
136988180Sjlemon#endif
137088180Sjlemon	{
137188180Sjlemon		sc->sc_inc.inc_faddr = inc->inc_faddr;
137288180Sjlemon		sc->sc_inc.inc_laddr = inc->inc_laddr;
137388180Sjlemon		sc->sc_route.ro_rt = NULL;
137488180Sjlemon	}
137588180Sjlemon	sc->sc_irs = th->th_seq - 1;
137688180Sjlemon	sc->sc_iss = th->th_ack - 1;
137788180Sjlemon	wnd = sbspace(&so->so_rcv);
137888180Sjlemon	wnd = imax(wnd, 0);
137988180Sjlemon	wnd = imin(wnd, TCP_MAXWIN);
138088180Sjlemon	sc->sc_wnd = wnd;
138188180Sjlemon	sc->sc_flags = 0;
138288180Sjlemon	sc->sc_rxtslot = 0;
138388180Sjlemon	sc->sc_peer_mss = tcp_msstab[data];
138488180Sjlemon	return (sc);
138588180Sjlemon}
1386