tcp_syncache.c revision 163606
186764Sjlemon/*- 2141063Srwatson * Copyright (c) 2001 McAfee, Inc. 3159695Sandre * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG 486764Sjlemon * All rights reserved. 586764Sjlemon * 686764Sjlemon * This software was developed for the FreeBSD Project by Jonathan Lemon 7141063Srwatson * and McAfee Research, the Security Research Division of McAfee, Inc. under 8141063Srwatson * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the 986764Sjlemon * DARPA CHATS research program. 1086764Sjlemon * 1186764Sjlemon * Redistribution and use in source and binary forms, with or without 1286764Sjlemon * modification, are permitted provided that the following conditions 1386764Sjlemon * are met: 1486764Sjlemon * 1. Redistributions of source code must retain the above copyright 1586764Sjlemon * notice, this list of conditions and the following disclaimer. 1686764Sjlemon * 2. Redistributions in binary form must reproduce the above copyright 1786764Sjlemon * notice, this list of conditions and the following disclaimer in the 1886764Sjlemon * documentation and/or other materials provided with the distribution. 1986764Sjlemon * 2086764Sjlemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 2186764Sjlemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2286764Sjlemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2386764Sjlemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2486764Sjlemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2586764Sjlemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2686764Sjlemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2786764Sjlemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2886764Sjlemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2986764Sjlemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3086764Sjlemon * SUCH DAMAGE. 3186764Sjlemon * 3286764Sjlemon * $FreeBSD: head/sys/netinet/tcp_syncache.c 163606 2006-10-22 11:52:19Z rwatson $ 3386764Sjlemon */ 3486764Sjlemon 35125680Sbms#include "opt_inet.h" 3686764Sjlemon#include "opt_inet6.h" 3786764Sjlemon#include "opt_ipsec.h" 38101106Srwatson#include "opt_mac.h" 3986764Sjlemon 4086764Sjlemon#include <sys/param.h> 4186764Sjlemon#include <sys/systm.h> 4286764Sjlemon#include <sys/kernel.h> 4386764Sjlemon#include <sys/sysctl.h> 44159695Sandre#include <sys/lock.h> 45159695Sandre#include <sys/mutex.h> 4686764Sjlemon#include <sys/malloc.h> 4786764Sjlemon#include <sys/mbuf.h> 4886764Sjlemon#include <sys/md5.h> 4986764Sjlemon#include <sys/proc.h> /* for proc0 declaration */ 5086764Sjlemon#include <sys/random.h> 5186764Sjlemon#include <sys/socket.h> 5286764Sjlemon#include <sys/socketvar.h> 5386764Sjlemon 54162278Sandre#include <vm/uma.h> 55162278Sandre 5686764Sjlemon#include <net/if.h> 5786764Sjlemon#include <net/route.h> 5886764Sjlemon 5986764Sjlemon#include <netinet/in.h> 6086764Sjlemon#include <netinet/in_systm.h> 6186764Sjlemon#include <netinet/ip.h> 6286764Sjlemon#include <netinet/in_var.h> 6386764Sjlemon#include <netinet/in_pcb.h> 6486764Sjlemon#include <netinet/ip_var.h> 65152592Sandre#include <netinet/ip_options.h> 6686764Sjlemon#ifdef INET6 6786764Sjlemon#include <netinet/ip6.h> 6886764Sjlemon#include <netinet/icmp6.h> 6986764Sjlemon#include <netinet6/nd6.h> 7086764Sjlemon#include <netinet6/ip6_var.h> 7186764Sjlemon#include <netinet6/in6_pcb.h> 7286764Sjlemon#endif 7386764Sjlemon#include <netinet/tcp.h> 7486764Sjlemon#include <netinet/tcp_fsm.h> 7586764Sjlemon#include <netinet/tcp_seq.h> 7686764Sjlemon#include <netinet/tcp_timer.h> 7786764Sjlemon#include <netinet/tcp_var.h> 7886764Sjlemon#ifdef INET6 7986764Sjlemon#include <netinet6/tcp6_var.h> 8086764Sjlemon#endif 8186764Sjlemon 8286764Sjlemon#ifdef IPSEC 8386764Sjlemon#include <netinet6/ipsec.h> 8486764Sjlemon#ifdef INET6 8586764Sjlemon#include <netinet6/ipsec6.h> 8686764Sjlemon#endif 8786764Sjlemon#endif /*IPSEC*/ 8886764Sjlemon 89105199Ssam#ifdef FAST_IPSEC 90105199Ssam#include <netipsec/ipsec.h> 91105199Ssam#ifdef INET6 92105199Ssam#include <netipsec/ipsec6.h> 93105199Ssam#endif 94105199Ssam#include <netipsec/key.h> 95105199Ssam#endif /*FAST_IPSEC*/ 96105199Ssam 9786764Sjlemon#include <machine/in_cksum.h> 9886764Sjlemon 99163606Srwatson#include <security/mac/mac_framework.h> 100163606Srwatson 10188180Sjlemonstatic int tcp_syncookies = 1; 10288180SjlemonSYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW, 103133874Srwatson &tcp_syncookies, 0, 10488180Sjlemon "Use TCP SYN cookies if the syncache overflows"); 10588180Sjlemon 106162277Sandrestatic int tcp_syncookiesonly = 0; 107162277SandreSYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW, 108162277Sandre &tcp_syncookiesonly, 0, 109162277Sandre "Use only TCP SYN cookies"); 110162277Sandre 111162277Sandre#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */ 112162277Sandre#define SYNCOOKIE_LIFETIME 16 /* seconds */ 113162277Sandre 114159725Sandrestruct syncache { 115159725Sandre TAILQ_ENTRY(syncache) sc_hash; 116159725Sandre struct in_conninfo sc_inc; /* addresses */ 117159725Sandre u_long sc_rxttime; /* retransmit time */ 118159725Sandre u_int16_t sc_rxmits; /* retransmit counter */ 119159725Sandre 120159950Sandre u_int32_t sc_tsreflect; /* timestamp to reflect */ 121162277Sandre u_int32_t sc_ts; /* our timestamp to send */ 122162277Sandre u_int32_t sc_tsoff; /* ts offset w/ syncookies */ 123159725Sandre u_int32_t sc_flowlabel; /* IPv6 flowlabel */ 124159725Sandre tcp_seq sc_irs; /* seq from peer */ 125159725Sandre tcp_seq sc_iss; /* our ISS */ 126159725Sandre struct mbuf *sc_ipopts; /* source route */ 127159725Sandre 128159725Sandre u_int16_t sc_peer_mss; /* peer's MSS */ 129159725Sandre u_int16_t sc_wnd; /* advertised window */ 130159725Sandre u_int8_t sc_ip_ttl; /* IPv4 TTL */ 131159725Sandre u_int8_t sc_ip_tos; /* IPv4 TOS */ 132159725Sandre u_int8_t sc_requested_s_scale:4, 133159950Sandre sc_requested_r_scale:4; 134159725Sandre u_int8_t sc_flags; 135159725Sandre#define SCF_NOOPT 0x01 /* no TCP options */ 136159725Sandre#define SCF_WINSCALE 0x02 /* negotiated window scaling */ 137159725Sandre#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */ 138159950Sandre /* MSS is implicit */ 139159725Sandre#define SCF_UNREACH 0x10 /* icmp unreachable received */ 140159725Sandre#define SCF_SIGNATURE 0x20 /* send MD5 digests */ 141159725Sandre#define SCF_SACK 0x80 /* send SACK option */ 142159725Sandre}; 143159725Sandre 144159725Sandrestruct syncache_head { 145159725Sandre struct mtx sch_mtx; 146159725Sandre TAILQ_HEAD(sch_head, syncache) sch_bucket; 147159725Sandre struct callout sch_timer; 148159725Sandre int sch_nextc; 149159725Sandre u_int sch_length; 150162277Sandre u_int sch_oddeven; 151162277Sandre u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE]; 152162277Sandre u_int32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE]; 153162277Sandre u_int sch_reseed; /* time_uptime, seconds */ 154159725Sandre}; 155159725Sandre 15686764Sjlemonstatic void syncache_drop(struct syncache *, struct syncache_head *); 15786764Sjlemonstatic void syncache_free(struct syncache *); 15888180Sjlemonstatic void syncache_insert(struct syncache *, struct syncache_head *); 15986764Sjlemonstruct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **); 16086764Sjlemonstatic int syncache_respond(struct syncache *, struct mbuf *); 161133874Srwatsonstatic struct socket *syncache_socket(struct syncache *, struct socket *, 16296602Srwatson struct mbuf *m); 16386764Sjlemonstatic void syncache_timer(void *); 164162277Sandrestatic void syncookie_generate(struct syncache_head *, struct syncache *, 165162277Sandre u_int32_t *); 166159697Sandrestatic struct syncache 167162277Sandre *syncookie_lookup(struct in_conninfo *, struct syncache_head *, 168162277Sandre struct syncache *, struct tcpopt *, struct tcphdr *, 169159697Sandre struct socket *); 17086764Sjlemon 17186764Sjlemon/* 17286764Sjlemon * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies. 17386764Sjlemon * 3 retransmits corresponds to a timeout of (1 + 2 + 4 + 8 == 15) seconds, 17486764Sjlemon * the odds are that the user has given up attempting to connect by then. 17586764Sjlemon */ 17686764Sjlemon#define SYNCACHE_MAXREXMTS 3 17786764Sjlemon 17886764Sjlemon/* Arbitrary values */ 17986764Sjlemon#define TCP_SYNCACHE_HASHSIZE 512 18086764Sjlemon#define TCP_SYNCACHE_BUCKETLIMIT 30 18186764Sjlemon 18286764Sjlemonstruct tcp_syncache { 18386764Sjlemon struct syncache_head *hashbase; 18492760Sjeff uma_zone_t zone; 18586764Sjlemon u_int hashsize; 18686764Sjlemon u_int hashmask; 18786764Sjlemon u_int bucket_limit; 188159695Sandre u_int cache_count; /* XXX: unprotected */ 18986764Sjlemon u_int cache_limit; 19086764Sjlemon u_int rexmt_limit; 19186764Sjlemon u_int hash_secret; 19286764Sjlemon}; 19386764Sjlemonstatic struct tcp_syncache tcp_syncache; 19486764Sjlemon 19586764SjlemonSYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); 19686764Sjlemon 197121307SsilbySYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, 19886764Sjlemon &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache"); 19986764Sjlemon 200121307SsilbySYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, 20186764Sjlemon &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache"); 20286764Sjlemon 20386764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD, 20486764Sjlemon &tcp_syncache.cache_count, 0, "Current number of entries in syncache"); 20586764Sjlemon 206121307SsilbySYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN, 20786764Sjlemon &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable"); 20886764Sjlemon 20986764SjlemonSYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, 21086764Sjlemon &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); 21186764Sjlemon 21286764Sjlemonstatic MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); 21386764Sjlemon 214133874Srwatson#define SYNCACHE_HASH(inc, mask) \ 21586764Sjlemon ((tcp_syncache.hash_secret ^ \ 21686764Sjlemon (inc)->inc_faddr.s_addr ^ \ 217133874Srwatson ((inc)->inc_faddr.s_addr >> 16) ^ \ 21886764Sjlemon (inc)->inc_fport ^ (inc)->inc_lport) & mask) 21986764Sjlemon 220133874Srwatson#define SYNCACHE_HASH6(inc, mask) \ 22186764Sjlemon ((tcp_syncache.hash_secret ^ \ 222133874Srwatson (inc)->inc6_faddr.s6_addr32[0] ^ \ 223133874Srwatson (inc)->inc6_faddr.s6_addr32[3] ^ \ 22486764Sjlemon (inc)->inc_fport ^ (inc)->inc_lport) & mask) 22586764Sjlemon 22686764Sjlemon#define ENDPTS_EQ(a, b) ( \ 22789667Sjlemon (a)->ie_fport == (b)->ie_fport && \ 22886764Sjlemon (a)->ie_lport == (b)->ie_lport && \ 22986764Sjlemon (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \ 23086764Sjlemon (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \ 23186764Sjlemon) 23286764Sjlemon 23386764Sjlemon#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0) 23486764Sjlemon 235159695Sandre#define SYNCACHE_TIMEOUT(sc, sch, co) do { \ 236159695Sandre (sc)->sc_rxmits++; \ 237159695Sandre (sc)->sc_rxttime = ticks + \ 238159695Sandre TCPTV_RTOBASE * tcp_backoff[(sc)->sc_rxmits - 1]; \ 239159695Sandre if ((sch)->sch_nextc > (sc)->sc_rxttime) \ 240159695Sandre (sch)->sch_nextc = (sc)->sc_rxttime; \ 241159695Sandre if (!TAILQ_EMPTY(&(sch)->sch_bucket) && !(co)) \ 242159695Sandre callout_reset(&(sch)->sch_timer, \ 243159695Sandre (sch)->sch_nextc - ticks, \ 244159695Sandre syncache_timer, (void *)(sch)); \ 24586764Sjlemon} while (0) 24686764Sjlemon 247159695Sandre#define SCH_LOCK(sch) mtx_lock(&(sch)->sch_mtx) 248159695Sandre#define SCH_UNLOCK(sch) mtx_unlock(&(sch)->sch_mtx) 249159695Sandre#define SCH_LOCK_ASSERT(sch) mtx_assert(&(sch)->sch_mtx, MA_OWNED) 250159695Sandre 251159695Sandre/* 252159695Sandre * Requires the syncache entry to be already removed from the bucket list. 253159695Sandre */ 25486764Sjlemonstatic void 25586764Sjlemonsyncache_free(struct syncache *sc) 25686764Sjlemon{ 25786764Sjlemon if (sc->sc_ipopts) 25886764Sjlemon (void) m_free(sc->sc_ipopts); 259122922Sandre 26092760Sjeff uma_zfree(tcp_syncache.zone, sc); 26186764Sjlemon} 26286764Sjlemon 26386764Sjlemonvoid 26486764Sjlemonsyncache_init(void) 26586764Sjlemon{ 26686764Sjlemon int i; 26786764Sjlemon 26886764Sjlemon tcp_syncache.cache_count = 0; 26986764Sjlemon tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; 27086764Sjlemon tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; 27186764Sjlemon tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; 27286764Sjlemon tcp_syncache.hash_secret = arc4random(); 27386764Sjlemon 274133874Srwatson TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize", 27586764Sjlemon &tcp_syncache.hashsize); 276133874Srwatson TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit", 27786764Sjlemon &tcp_syncache.bucket_limit); 278149455Sglebius if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) { 279133874Srwatson printf("WARNING: syncache hash size is not a power of 2.\n"); 280149455Sglebius tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; 281133874Srwatson } 28286764Sjlemon tcp_syncache.hashmask = tcp_syncache.hashsize - 1; 28386764Sjlemon 284159695Sandre /* Set limits. */ 285159695Sandre tcp_syncache.cache_limit = 286159695Sandre tcp_syncache.hashsize * tcp_syncache.bucket_limit; 287159695Sandre TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit", 288159695Sandre &tcp_syncache.cache_limit); 289159695Sandre 29086764Sjlemon /* Allocate the hash table. */ 29186764Sjlemon MALLOC(tcp_syncache.hashbase, struct syncache_head *, 29286764Sjlemon tcp_syncache.hashsize * sizeof(struct syncache_head), 293159787Sandre M_SYNCACHE, M_WAITOK | M_ZERO); 29486764Sjlemon 29586764Sjlemon /* Initialize the hash buckets. */ 29686764Sjlemon for (i = 0; i < tcp_syncache.hashsize; i++) { 29786764Sjlemon TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket); 298159695Sandre mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head", 299159695Sandre NULL, MTX_DEF); 300159695Sandre callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer, 301159695Sandre &tcp_syncache.hashbase[i].sch_mtx, 0); 30286764Sjlemon tcp_syncache.hashbase[i].sch_length = 0; 30386764Sjlemon } 30486764Sjlemon 305159695Sandre /* Create the syncache entry zone. */ 30692760Sjeff tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache), 307159695Sandre NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 30892760Sjeff uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit); 30986764Sjlemon} 31086764Sjlemon 311159695Sandre/* 312159695Sandre * Inserts a syncache entry into the specified bucket row. 313159695Sandre * Locks and unlocks the syncache_head autonomously. 314159695Sandre */ 31588180Sjlemonstatic void 316159697Sandresyncache_insert(struct syncache *sc, struct syncache_head *sch) 31786764Sjlemon{ 31886764Sjlemon struct syncache *sc2; 31986764Sjlemon 320159695Sandre SCH_LOCK(sch); 321122496Ssam 32286764Sjlemon /* 323159695Sandre * Make sure that we don't overflow the per-bucket limit. 324159695Sandre * If the bucket is full, toss the oldest element. 32586764Sjlemon */ 32686764Sjlemon if (sch->sch_length >= tcp_syncache.bucket_limit) { 327159695Sandre KASSERT(!TAILQ_EMPTY(&sch->sch_bucket), 328159695Sandre ("sch->sch_length incorrect")); 329159695Sandre sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head); 33086764Sjlemon syncache_drop(sc2, sch); 33186764Sjlemon tcpstat.tcps_sc_bucketoverflow++; 33286764Sjlemon } 33386764Sjlemon 33486764Sjlemon /* Put it into the bucket. */ 335159695Sandre TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash); 33686764Sjlemon sch->sch_length++; 337159695Sandre 338159695Sandre /* Reinitialize the bucket row's timer. */ 339159695Sandre SYNCACHE_TIMEOUT(sc, sch, 1); 340159695Sandre 341159695Sandre SCH_UNLOCK(sch); 342159695Sandre 34386764Sjlemon tcp_syncache.cache_count++; 34486764Sjlemon tcpstat.tcps_sc_added++; 34586764Sjlemon} 34686764Sjlemon 347159695Sandre/* 348159695Sandre * Remove and free entry from syncache bucket row. 349159695Sandre * Expects locked syncache head. 350159695Sandre */ 35186764Sjlemonstatic void 352159697Sandresyncache_drop(struct syncache *sc, struct syncache_head *sch) 35386764Sjlemon{ 35486764Sjlemon 355159695Sandre SCH_LOCK_ASSERT(sch); 35686764Sjlemon 35786764Sjlemon TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); 35886764Sjlemon sch->sch_length--; 35986764Sjlemon 36086764Sjlemon syncache_free(sc); 361159695Sandre tcp_syncache.cache_count--; 36286764Sjlemon} 36386764Sjlemon 36486764Sjlemon/* 36586764Sjlemon * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. 36686764Sjlemon * If we have retransmitted an entry the maximum number of times, expire it. 367159695Sandre * One separate timer for each bucket row. 36886764Sjlemon */ 36986764Sjlemonstatic void 370159697Sandresyncache_timer(void *xsch) 37186764Sjlemon{ 372159695Sandre struct syncache_head *sch = (struct syncache_head *)xsch; 37386764Sjlemon struct syncache *sc, *nsc; 374159695Sandre int tick = ticks; 37586764Sjlemon 376159695Sandre /* NB: syncache_head has already been locked by the callout. */ 377159695Sandre SCH_LOCK_ASSERT(sch); 37886764Sjlemon 379159695Sandre TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) { 380159695Sandre /* 381159695Sandre * We do not check if the listen socket still exists 382159695Sandre * and accept the case where the listen socket may be 383159695Sandre * gone by the time we resend the SYN/ACK. We do 384159695Sandre * not expect this to happens often. If it does, 385159695Sandre * then the RST will be sent by the time the remote 386159695Sandre * host does the SYN/ACK->ACK. 387159695Sandre */ 388159695Sandre if (sc->sc_rxttime >= tick) { 389159695Sandre if (sc->sc_rxttime < sch->sch_nextc) 390159695Sandre sch->sch_nextc = sc->sc_rxttime; 391159695Sandre continue; 392159695Sandre } 393159695Sandre 394159695Sandre if (sc->sc_rxmits > tcp_syncache.rexmt_limit) { 395159695Sandre syncache_drop(sc, sch); 39686764Sjlemon tcpstat.tcps_sc_stale++; 39786764Sjlemon continue; 39886764Sjlemon } 399159695Sandre 40086764Sjlemon (void) syncache_respond(sc, NULL); 40186764Sjlemon tcpstat.tcps_sc_retransmitted++; 402159695Sandre SYNCACHE_TIMEOUT(sc, sch, 0); 40386764Sjlemon } 404159695Sandre if (!TAILQ_EMPTY(&(sch)->sch_bucket)) 405159695Sandre callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick, 406159695Sandre syncache_timer, (void *)(sch)); 40786764Sjlemon} 40886764Sjlemon 40986764Sjlemon/* 41086764Sjlemon * Find an entry in the syncache. 411159695Sandre * Returns always with locked syncache_head plus a matching entry or NULL. 41286764Sjlemon */ 41386764Sjlemonstruct syncache * 414159697Sandresyncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) 41586764Sjlemon{ 41686764Sjlemon struct syncache *sc; 41786764Sjlemon struct syncache_head *sch; 41886764Sjlemon 41986764Sjlemon#ifdef INET6 42086764Sjlemon if (inc->inc_isipv6) { 42186764Sjlemon sch = &tcp_syncache.hashbase[ 42286764Sjlemon SYNCACHE_HASH6(inc, tcp_syncache.hashmask)]; 42386764Sjlemon *schp = sch; 424159695Sandre 425159695Sandre SCH_LOCK(sch); 426159695Sandre 427159695Sandre /* Circle through bucket row to find matching entry. */ 42886764Sjlemon TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { 429122496Ssam if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) 43086764Sjlemon return (sc); 43186764Sjlemon } 43286764Sjlemon } else 43386764Sjlemon#endif 43486764Sjlemon { 43586764Sjlemon sch = &tcp_syncache.hashbase[ 43686764Sjlemon SYNCACHE_HASH(inc, tcp_syncache.hashmask)]; 43786764Sjlemon *schp = sch; 438159695Sandre 439159695Sandre SCH_LOCK(sch); 440159695Sandre 441159695Sandre /* Circle through bucket row to find matching entry. */ 44286764Sjlemon TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { 44386764Sjlemon#ifdef INET6 44486764Sjlemon if (sc->sc_inc.inc_isipv6) 44586764Sjlemon continue; 44686764Sjlemon#endif 447122496Ssam if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) 44886764Sjlemon return (sc); 44986764Sjlemon } 45086764Sjlemon } 451159695Sandre SCH_LOCK_ASSERT(*schp); 452159695Sandre return (NULL); /* always returns with locked sch */ 45386764Sjlemon} 45486764Sjlemon 45586764Sjlemon/* 45686764Sjlemon * This function is called when we get a RST for a 45786764Sjlemon * non-existent connection, so that we can see if the 45886764Sjlemon * connection is in the syn cache. If it is, zap it. 45986764Sjlemon */ 46086764Sjlemonvoid 461159697Sandresyncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) 46286764Sjlemon{ 46386764Sjlemon struct syncache *sc; 46486764Sjlemon struct syncache_head *sch; 46586764Sjlemon 466159695Sandre sc = syncache_lookup(inc, &sch); /* returns locked sch */ 467159695Sandre SCH_LOCK_ASSERT(sch); 468159695Sandre if (sc == NULL) 469159695Sandre goto done; 470122496Ssam 47186764Sjlemon /* 47286764Sjlemon * If the RST bit is set, check the sequence number to see 47386764Sjlemon * if this is a valid reset segment. 47486764Sjlemon * RFC 793 page 37: 47586764Sjlemon * In all states except SYN-SENT, all reset (RST) segments 47686764Sjlemon * are validated by checking their SEQ-fields. A reset is 47786764Sjlemon * valid if its sequence number is in the window. 47886764Sjlemon * 47986764Sjlemon * The sequence number in the reset segment is normally an 48086764Sjlemon * echo of our outgoing acknowlegement numbers, but some hosts 48186764Sjlemon * send a reset with the sequence number at the rightmost edge 48286764Sjlemon * of our receive window, and we have to handle this case. 48386764Sjlemon */ 48486764Sjlemon if (SEQ_GEQ(th->th_seq, sc->sc_irs) && 48586764Sjlemon SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { 48686764Sjlemon syncache_drop(sc, sch); 48786764Sjlemon tcpstat.tcps_sc_reset++; 48886764Sjlemon } 489159695Sandredone: 490159695Sandre SCH_UNLOCK(sch); 49186764Sjlemon} 49286764Sjlemon 49386764Sjlemonvoid 494159697Sandresyncache_badack(struct in_conninfo *inc) 49586764Sjlemon{ 49686764Sjlemon struct syncache *sc; 49786764Sjlemon struct syncache_head *sch; 49886764Sjlemon 499159695Sandre sc = syncache_lookup(inc, &sch); /* returns locked sch */ 500159695Sandre SCH_LOCK_ASSERT(sch); 50186764Sjlemon if (sc != NULL) { 50286764Sjlemon syncache_drop(sc, sch); 50386764Sjlemon tcpstat.tcps_sc_badack++; 50486764Sjlemon } 505159695Sandre SCH_UNLOCK(sch); 50686764Sjlemon} 50786764Sjlemon 50886764Sjlemonvoid 509159697Sandresyncache_unreach(struct in_conninfo *inc, struct tcphdr *th) 51086764Sjlemon{ 51186764Sjlemon struct syncache *sc; 51286764Sjlemon struct syncache_head *sch; 51386764Sjlemon 514159695Sandre sc = syncache_lookup(inc, &sch); /* returns locked sch */ 515159695Sandre SCH_LOCK_ASSERT(sch); 51686764Sjlemon if (sc == NULL) 517159695Sandre goto done; 51886764Sjlemon 51986764Sjlemon /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ 52086764Sjlemon if (ntohl(th->th_seq) != sc->sc_iss) 521159695Sandre goto done; 52286764Sjlemon 52386764Sjlemon /* 52486764Sjlemon * If we've rertransmitted 3 times and this is our second error, 52586764Sjlemon * we remove the entry. Otherwise, we allow it to continue on. 52686764Sjlemon * This prevents us from incorrectly nuking an entry during a 52786764Sjlemon * spurious network outage. 52886764Sjlemon * 52986764Sjlemon * See tcp_notify(). 53086764Sjlemon */ 531159695Sandre if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) { 53286764Sjlemon sc->sc_flags |= SCF_UNREACH; 533159695Sandre goto done; 53486764Sjlemon } 53586764Sjlemon syncache_drop(sc, sch); 53686764Sjlemon tcpstat.tcps_sc_unreach++; 537159695Sandredone: 538159695Sandre SCH_UNLOCK(sch); 53986764Sjlemon} 54086764Sjlemon 54186764Sjlemon/* 54286764Sjlemon * Build a new TCP socket structure from a syncache entry. 54386764Sjlemon */ 54486764Sjlemonstatic struct socket * 545159697Sandresyncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) 54686764Sjlemon{ 54786764Sjlemon struct inpcb *inp = NULL; 54886764Sjlemon struct socket *so; 54986764Sjlemon struct tcpcb *tp; 55086764Sjlemon 551130555Srwatson NET_ASSERT_GIANT(); 552122496Ssam INP_INFO_WLOCK_ASSERT(&tcbinfo); 553122496Ssam 55486764Sjlemon /* 55586764Sjlemon * Ok, create the full blown connection, and set things up 55686764Sjlemon * as they would have been set up if we had created the 55786764Sjlemon * connection when the SYN arrived. If we can't create 55886764Sjlemon * the connection, abort it. 55986764Sjlemon */ 56086764Sjlemon so = sonewconn(lso, SS_ISCONNECTED); 56186764Sjlemon if (so == NULL) { 56286764Sjlemon /* 56386764Sjlemon * Drop the connection; we will send a RST if the peer 56486764Sjlemon * retransmits the ACK, 56586764Sjlemon */ 56686764Sjlemon tcpstat.tcps_listendrop++; 567122496Ssam goto abort2; 56886764Sjlemon } 569101106Srwatson#ifdef MAC 570130398Srwatson SOCK_LOCK(so); 571101106Srwatson mac_set_socket_peer_from_mbuf(m, so); 572130398Srwatson SOCK_UNLOCK(so); 573101106Srwatson#endif 57486764Sjlemon 57586764Sjlemon inp = sotoinpcb(so); 576122496Ssam INP_LOCK(inp); 57786764Sjlemon 578159695Sandre /* Insert new socket into PCB hash list. */ 57991492Sume inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6; 58086764Sjlemon#ifdef INET6 58186764Sjlemon if (sc->sc_inc.inc_isipv6) { 58286764Sjlemon inp->in6p_laddr = sc->sc_inc.inc6_laddr; 58386764Sjlemon } else { 58486764Sjlemon inp->inp_vflag &= ~INP_IPV6; 58586764Sjlemon inp->inp_vflag |= INP_IPV4; 58686764Sjlemon#endif 58786764Sjlemon inp->inp_laddr = sc->sc_inc.inc_laddr; 58886764Sjlemon#ifdef INET6 58986764Sjlemon } 59086764Sjlemon#endif 59186764Sjlemon inp->inp_lport = sc->sc_inc.inc_lport; 59286764Sjlemon if (in_pcbinshash(inp) != 0) { 59386764Sjlemon /* 59486764Sjlemon * Undo the assignments above if we failed to 59586764Sjlemon * put the PCB on the hash lists. 59686764Sjlemon */ 59786764Sjlemon#ifdef INET6 59886764Sjlemon if (sc->sc_inc.inc_isipv6) 59986764Sjlemon inp->in6p_laddr = in6addr_any; 600133874Srwatson else 60186764Sjlemon#endif 60286764Sjlemon inp->inp_laddr.s_addr = INADDR_ANY; 60386764Sjlemon inp->inp_lport = 0; 60486764Sjlemon goto abort; 60586764Sjlemon } 60686764Sjlemon#ifdef IPSEC 607159697Sandre /* Copy old policy into new socket's. */ 608122062Sume if (ipsec_copy_pcbpolicy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) 609159950Sandre printf("syncache_socket: could not copy policy\n"); 610122062Sume#endif 611122062Sume#ifdef FAST_IPSEC 612159697Sandre /* Copy old policy into new socket's. */ 61386764Sjlemon if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) 614159950Sandre printf("syncache_socket: could not copy policy\n"); 61586764Sjlemon#endif 61686764Sjlemon#ifdef INET6 61786764Sjlemon if (sc->sc_inc.inc_isipv6) { 61886764Sjlemon struct inpcb *oinp = sotoinpcb(lso); 61986764Sjlemon struct in6_addr laddr6; 620124847Sandre struct sockaddr_in6 sin6; 62186764Sjlemon /* 62286764Sjlemon * Inherit socket options from the listening socket. 62386764Sjlemon * Note that in6p_inputopts are not (and should not be) 62486764Sjlemon * copied, since it stores previously received options and is 62586764Sjlemon * used to detect if each new option is different than the 62686764Sjlemon * previous one and hence should be passed to a user. 627133874Srwatson * If we copied in6p_inputopts, a user would not be able to 62886764Sjlemon * receive options just after calling the accept system call. 62986764Sjlemon */ 63086764Sjlemon inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS; 63186764Sjlemon if (oinp->in6p_outputopts) 63286764Sjlemon inp->in6p_outputopts = 63386764Sjlemon ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT); 63486764Sjlemon 635124847Sandre sin6.sin6_family = AF_INET6; 636124847Sandre sin6.sin6_len = sizeof(sin6); 637124847Sandre sin6.sin6_addr = sc->sc_inc.inc6_faddr; 638124847Sandre sin6.sin6_port = sc->sc_inc.inc_fport; 639124847Sandre sin6.sin6_flowinfo = sin6.sin6_scope_id = 0; 64086764Sjlemon laddr6 = inp->in6p_laddr; 64186764Sjlemon if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 64286764Sjlemon inp->in6p_laddr = sc->sc_inc.inc6_laddr; 643127505Spjd if (in6_pcbconnect(inp, (struct sockaddr *)&sin6, 644127505Spjd thread0.td_ucred)) { 64586764Sjlemon inp->in6p_laddr = laddr6; 64686764Sjlemon goto abort; 64786764Sjlemon } 648132307Sdwmalone /* Override flowlabel from in6_pcbconnect. */ 649132307Sdwmalone inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 650132307Sdwmalone inp->in6p_flowinfo |= sc->sc_flowlabel; 65186764Sjlemon } else 65286764Sjlemon#endif 65386764Sjlemon { 65486764Sjlemon struct in_addr laddr; 655124847Sandre struct sockaddr_in sin; 65686764Sjlemon 657135274Sandre inp->inp_options = ip_srcroute(m); 65886764Sjlemon if (inp->inp_options == NULL) { 65986764Sjlemon inp->inp_options = sc->sc_ipopts; 66086764Sjlemon sc->sc_ipopts = NULL; 66186764Sjlemon } 66286764Sjlemon 663124847Sandre sin.sin_family = AF_INET; 664124847Sandre sin.sin_len = sizeof(sin); 665124847Sandre sin.sin_addr = sc->sc_inc.inc_faddr; 666124847Sandre sin.sin_port = sc->sc_inc.inc_fport; 667124847Sandre bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero)); 66886764Sjlemon laddr = inp->inp_laddr; 66986764Sjlemon if (inp->inp_laddr.s_addr == INADDR_ANY) 67086764Sjlemon inp->inp_laddr = sc->sc_inc.inc_laddr; 671127505Spjd if (in_pcbconnect(inp, (struct sockaddr *)&sin, 672127505Spjd thread0.td_ucred)) { 67386764Sjlemon inp->inp_laddr = laddr; 67486764Sjlemon goto abort; 67586764Sjlemon } 67686764Sjlemon } 67786764Sjlemon tp = intotcpcb(inp); 67886764Sjlemon tp->t_state = TCPS_SYN_RECEIVED; 67986764Sjlemon tp->iss = sc->sc_iss; 68086764Sjlemon tp->irs = sc->sc_irs; 68186764Sjlemon tcp_rcvseqinit(tp); 68286764Sjlemon tcp_sendseqinit(tp); 68386764Sjlemon tp->snd_wl1 = sc->sc_irs; 68486764Sjlemon tp->rcv_up = sc->sc_irs + 1; 68586764Sjlemon tp->rcv_wnd = sc->sc_wnd; 68686764Sjlemon tp->rcv_adv += tp->rcv_wnd; 68786764Sjlemon 68890982Sjlemon tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY); 68986764Sjlemon if (sc->sc_flags & SCF_NOOPT) 69086764Sjlemon tp->t_flags |= TF_NOOPT; 691159950Sandre else { 692159950Sandre if (sc->sc_flags & SCF_WINSCALE) { 693159950Sandre tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE; 694159950Sandre tp->snd_scale = sc->sc_requested_s_scale; 695159950Sandre tp->request_r_scale = sc->sc_requested_r_scale; 696159950Sandre } 697159950Sandre if (sc->sc_flags & SCF_TIMESTAMP) { 698159950Sandre tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; 699159950Sandre tp->ts_recent = sc->sc_tsreflect; 700159950Sandre tp->ts_recent_age = ticks; 701162277Sandre tp->ts_offset = sc->sc_tsoff; 702159950Sandre } 703125680Sbms#ifdef TCP_SIGNATURE 704159950Sandre if (sc->sc_flags & SCF_SIGNATURE) 705159950Sandre tp->t_flags |= TF_SIGNATURE; 706125783Sbms#endif 707159950Sandre if (sc->sc_flags & SCF_SACK) { 708159950Sandre tp->sack_enable = 1; 709159950Sandre tp->t_flags |= TF_SACK_PERMIT; 710159950Sandre } 711130989Sps } 712159695Sandre 713122922Sandre /* 714122922Sandre * Set up MSS and get cached values from tcp_hostcache. 715122922Sandre * This might overwrite some of the defaults we just set. 716122922Sandre */ 71786764Sjlemon tcp_mss(tp, sc->sc_peer_mss); 71886764Sjlemon 71986764Sjlemon /* 72086764Sjlemon * If the SYN,ACK was retransmitted, reset cwnd to 1 segment. 72186764Sjlemon */ 722159695Sandre if (sc->sc_rxmits > 1) 723133874Srwatson tp->snd_cwnd = tp->t_maxseg; 72486764Sjlemon callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); 72586764Sjlemon 726122496Ssam INP_UNLOCK(inp); 727122496Ssam 72886764Sjlemon tcpstat.tcps_accepts++; 72986764Sjlemon return (so); 73086764Sjlemon 73186764Sjlemonabort: 732122496Ssam INP_UNLOCK(inp); 733122496Ssamabort2: 73486764Sjlemon if (so != NULL) 735156763Srwatson soabort(so); 73686764Sjlemon return (NULL); 73786764Sjlemon} 73886764Sjlemon 73986764Sjlemon/* 74086764Sjlemon * This function gets called when we receive an ACK for a 74186764Sjlemon * socket in the LISTEN state. We look up the connection 74286764Sjlemon * in the syncache, and if its there, we pull it out of 74386764Sjlemon * the cache and turn it into a full-blown connection in 74486764Sjlemon * the SYN-RECEIVED state. 74586764Sjlemon */ 74686764Sjlemonint 747162277Sandresyncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, 748159697Sandre struct socket **lsop, struct mbuf *m) 74986764Sjlemon{ 75086764Sjlemon struct syncache *sc; 75186764Sjlemon struct syncache_head *sch; 75286764Sjlemon struct socket *so; 753162277Sandre struct syncache scs; 75486764Sjlemon 755159695Sandre /* 756159695Sandre * Global TCP locks are held because we manipulate the PCB lists 757159695Sandre * and create a new socket. 758159695Sandre */ 759122496Ssam INP_INFO_WLOCK_ASSERT(&tcbinfo); 760122496Ssam 761159695Sandre sc = syncache_lookup(inc, &sch); /* returns locked sch */ 762159695Sandre SCH_LOCK_ASSERT(sch); 76388180Sjlemon if (sc == NULL) { 76488180Sjlemon /* 765133874Srwatson * There is no syncache entry, so see if this ACK is 76688180Sjlemon * a returning syncookie. To do this, first: 76788180Sjlemon * A. See if this socket has had a syncache entry dropped in 76888180Sjlemon * the past. We don't want to accept a bogus syncookie 769133874Srwatson * if we've never received a SYN. 77088180Sjlemon * B. check that the syncookie is valid. If it is, then 77188180Sjlemon * cobble up a fake syncache entry, and return. 77288180Sjlemon */ 773162277Sandre if (!tcp_syncookies) { 774162277Sandre SCH_UNLOCK(sch); 775162277Sandre goto failed; 776162277Sandre } 777162277Sandre bzero(&scs, sizeof(scs)); 778162277Sandre sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop); 779159695Sandre SCH_UNLOCK(sch); 78088180Sjlemon if (sc == NULL) 781159695Sandre goto failed; 78288180Sjlemon tcpstat.tcps_sc_recvcookie++; 783159695Sandre } else { 784159695Sandre /* Pull out the entry to unlock the bucket row. */ 785159695Sandre TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); 786159695Sandre sch->sch_length--; 787159922Sandre tcp_syncache.cache_count--; 788159695Sandre SCH_UNLOCK(sch); 78988180Sjlemon } 79086764Sjlemon 79186764Sjlemon /* 79286764Sjlemon * If seg contains an ACK, but not for our SYN/ACK, send a RST. 79386764Sjlemon */ 794159695Sandre if (th->th_ack != sc->sc_iss + 1) 795159695Sandre goto failed; 79686764Sjlemon 797159695Sandre so = syncache_socket(sc, *lsop, m); 798159695Sandre 79986764Sjlemon if (so == NULL) { 80086764Sjlemon#if 0 80186764Sjlemonresetandabort: 80286764Sjlemon /* XXXjlemon check this - is this correct? */ 80386764Sjlemon (void) tcp_respond(NULL, m, m, th, 80486764Sjlemon th->th_seq + tlen, (tcp_seq)0, TH_RST|TH_ACK); 80586764Sjlemon#endif 806159695Sandre m_freem(m); /* XXX: only needed for above */ 80786764Sjlemon tcpstat.tcps_sc_aborted++; 808162277Sandre if (sc != &scs) { 809159695Sandre syncache_insert(sc, sch); /* try again later */ 810159695Sandre sc = NULL; 811159695Sandre } 812159695Sandre goto failed; 813122922Sandre } else 81486764Sjlemon tcpstat.tcps_sc_completed++; 815159695Sandre *lsop = so; 816122922Sandre 817162277Sandre if (sc != &scs) 818162277Sandre syncache_free(sc); 819159695Sandre return (1); 820159695Sandrefailed: 821162277Sandre if (sc != NULL && sc != &scs) 82286764Sjlemon syncache_free(sc); 823159695Sandre return (0); 82486764Sjlemon} 82586764Sjlemon 82686764Sjlemon/* 82786764Sjlemon * Given a LISTEN socket and an inbound SYN request, add 82886764Sjlemon * this to the syn cache, and send back a segment: 82986764Sjlemon * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 83086764Sjlemon * to the source. 83186764Sjlemon * 83286764Sjlemon * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. 83386764Sjlemon * Doing so would require that we hold onto the data and deliver it 83486764Sjlemon * to the application. However, if we are the target of a SYN-flood 83586764Sjlemon * DoS attack, an attacker could send data which would eventually 83686764Sjlemon * consume all available buffer space if it were ACKed. By not ACKing 83786764Sjlemon * the data, we avoid this DoS scenario. 83886764Sjlemon */ 83986764Sjlemonint 840159697Sandresyncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, 841159697Sandre struct inpcb *inp, struct socket **lsop, struct mbuf *m) 84286764Sjlemon{ 84386764Sjlemon struct tcpcb *tp; 84486764Sjlemon struct socket *so; 84586764Sjlemon struct syncache *sc = NULL; 84686764Sjlemon struct syncache_head *sch; 84786764Sjlemon struct mbuf *ipopts = NULL; 848132307Sdwmalone u_int32_t flowtmp; 849159727Sandre int win, sb_hiwat, ip_ttl, ip_tos, noopt; 850159701Sandre#ifdef INET6 851159701Sandre int autoflowlabel = 0; 852159701Sandre#endif 853162277Sandre struct syncache scs; 85486764Sjlemon 855122496Ssam INP_INFO_WLOCK_ASSERT(&tcbinfo); 856159695Sandre INP_LOCK_ASSERT(inp); /* listen socket */ 857122496Ssam 858159695Sandre /* 859159695Sandre * Combine all so/tp operations very early to drop the INP lock as 860159695Sandre * soon as possible. 861159695Sandre */ 862159695Sandre so = *lsop; 86386764Sjlemon tp = sototcpcb(so); 86486764Sjlemon 865159695Sandre#ifdef INET6 866159695Sandre if (inc->inc_isipv6 && 867159695Sandre (inp->in6p_flags & IN6P_AUTOFLOWLABEL)) 868159695Sandre autoflowlabel = 1; 869159695Sandre#endif 870159695Sandre ip_ttl = inp->inp_ip_ttl; 871159695Sandre ip_tos = inp->inp_ip_tos; 872159695Sandre win = sbspace(&so->so_rcv); 873159695Sandre sb_hiwat = so->so_rcv.sb_hiwat; 874159727Sandre noopt = (tp->t_flags & TF_NOOPT); 875159695Sandre 876159695Sandre so = NULL; 877159695Sandre tp = NULL; 878159695Sandre 879159695Sandre INP_UNLOCK(inp); 880159695Sandre INP_INFO_WUNLOCK(&tcbinfo); 881159695Sandre 88286764Sjlemon /* 88386764Sjlemon * Remember the IP options, if any. 88486764Sjlemon */ 88586764Sjlemon#ifdef INET6 88686764Sjlemon if (!inc->inc_isipv6) 88786764Sjlemon#endif 888135274Sandre ipopts = ip_srcroute(m); 88986764Sjlemon 89086764Sjlemon /* 89186764Sjlemon * See if we already have an entry for this connection. 89286764Sjlemon * If we do, resend the SYN,ACK, and reset the retransmit timer. 89386764Sjlemon * 894159697Sandre * XXX: should the syncache be re-initialized with the contents 89586764Sjlemon * of the new SYN here (which may have different options?) 89686764Sjlemon */ 897159695Sandre sc = syncache_lookup(inc, &sch); /* returns locked entry */ 898159695Sandre SCH_LOCK_ASSERT(sch); 89986764Sjlemon if (sc != NULL) { 90086764Sjlemon tcpstat.tcps_sc_dupsyn++; 90186764Sjlemon if (ipopts) { 90286764Sjlemon /* 90386764Sjlemon * If we were remembering a previous source route, 90486764Sjlemon * forget it and use the new one we've been given. 90586764Sjlemon */ 90686764Sjlemon if (sc->sc_ipopts) 90786764Sjlemon (void) m_free(sc->sc_ipopts); 90886764Sjlemon sc->sc_ipopts = ipopts; 90986764Sjlemon } 91086764Sjlemon /* 91186764Sjlemon * Update timestamp if present. 91286764Sjlemon */ 91386764Sjlemon if (sc->sc_flags & SCF_TIMESTAMP) 914159950Sandre sc->sc_tsreflect = to->to_tsval; 91586764Sjlemon if (syncache_respond(sc, m) == 0) { 916159695Sandre SYNCACHE_TIMEOUT(sc, sch, 1); 917133874Srwatson tcpstat.tcps_sndacks++; 91886764Sjlemon tcpstat.tcps_sndtotal++; 91986764Sjlemon } 920159695Sandre SCH_UNLOCK(sch); 921159695Sandre goto done; 92286764Sjlemon } 92386764Sjlemon 924155487Sqingli sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO); 92586764Sjlemon if (sc == NULL) { 92686764Sjlemon /* 92786764Sjlemon * The zone allocator couldn't provide more entries. 928133874Srwatson * Treat this as if the cache was full; drop the oldest 92986764Sjlemon * entry and insert the new one. 93086764Sjlemon */ 931154355Sglebius tcpstat.tcps_sc_zonefail++; 932159695Sandre sc = TAILQ_LAST(&sch->sch_bucket, sch_head); 933159695Sandre syncache_drop(sc, sch); 934155487Sqingli sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO); 93586764Sjlemon if (sc == NULL) { 936162277Sandre if (tcp_syncookies) { 937162277Sandre bzero(&scs, sizeof(scs)); 938162277Sandre sc = &scs; 939162277Sandre } else { 940162277Sandre SCH_UNLOCK(sch); 941162277Sandre if (ipopts) 942162277Sandre (void) m_free(ipopts); 943162277Sandre goto done; 944162277Sandre } 94586764Sjlemon } 946162277Sandre } 94786764Sjlemon 94886764Sjlemon /* 94986764Sjlemon * Fill in the syncache values. 95086764Sjlemon */ 95186764Sjlemon sc->sc_ipopts = ipopts; 952159950Sandre bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); 95386764Sjlemon#ifdef INET6 954159950Sandre if (!inc->inc_isipv6) 95586764Sjlemon#endif 95686764Sjlemon { 957159695Sandre sc->sc_ip_tos = ip_tos; 958159695Sandre sc->sc_ip_ttl = ip_ttl; 95986764Sjlemon } 960162277Sandre 96186764Sjlemon sc->sc_irs = th->th_seq; 962162277Sandre sc->sc_iss = arc4random(); 963110023Ssilby sc->sc_flags = 0; 964132307Sdwmalone sc->sc_flowlabel = 0; 96586764Sjlemon 966159695Sandre /* 967159695Sandre * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN]. 968159695Sandre * win was derived from socket earlier in the function. 969159695Sandre */ 97086764Sjlemon win = imax(win, 0); 97186764Sjlemon win = imin(win, TCP_MAXWIN); 97286764Sjlemon sc->sc_wnd = win; 97386764Sjlemon 97486764Sjlemon if (tcp_do_rfc1323) { 97586764Sjlemon /* 97686764Sjlemon * A timestamp received in a SYN makes 97786764Sjlemon * it ok to send timestamp requests and replies. 97886764Sjlemon */ 97986764Sjlemon if (to->to_flags & TOF_TS) { 980159950Sandre sc->sc_tsreflect = to->to_tsval; 98186764Sjlemon sc->sc_flags |= SCF_TIMESTAMP; 98286764Sjlemon } 98386764Sjlemon if (to->to_flags & TOF_SCALE) { 98486764Sjlemon int wscale = 0; 98586764Sjlemon 98686764Sjlemon /* Compute proper scaling value from buffer space */ 98786764Sjlemon while (wscale < TCP_MAX_WINSHIFT && 988159695Sandre (TCP_MAXWIN << wscale) < sb_hiwat) 98986764Sjlemon wscale++; 990159950Sandre sc->sc_requested_r_scale = wscale; 99186764Sjlemon sc->sc_requested_s_scale = to->to_requested_s_scale; 99286764Sjlemon sc->sc_flags |= SCF_WINSCALE; 99386764Sjlemon } 99486764Sjlemon } 995125680Sbms#ifdef TCP_SIGNATURE 996125680Sbms /* 997145371Sps * If listening socket requested TCP digests, and received SYN 998145371Sps * contains the option, flag this in the syncache so that 999145371Sps * syncache_respond() will do the right thing with the SYN+ACK. 1000159697Sandre * XXX: Currently we always record the option by default and will 1001145371Sps * attempt to use it in syncache_respond(). 1002125680Sbms */ 1003145371Sps if (to->to_flags & TOF_SIGNATURE) 1004150131Sandre sc->sc_flags |= SCF_SIGNATURE; 1005125783Sbms#endif 1006133874Srwatson if (to->to_flags & TOF_SACK) 1007130989Sps sc->sc_flags |= SCF_SACK; 1008162277Sandre if (to->to_flags & TOF_MSS) 1009162277Sandre sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ 1010159727Sandre if (noopt) 1011159727Sandre sc->sc_flags |= SCF_NOOPT; 1012130989Sps 1013162277Sandre if (tcp_syncookies) { 1014162277Sandre syncookie_generate(sch, sc, &flowtmp); 1015162277Sandre#ifdef INET6 1016162277Sandre if (autoflowlabel) 1017162277Sandre sc->sc_flowlabel = flowtmp; 1018162277Sandre#endif 1019162277Sandre } else { 1020162277Sandre#ifdef INET6 1021162277Sandre if (autoflowlabel) 1022162277Sandre sc->sc_flowlabel = 1023162277Sandre (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 1024162277Sandre#endif 1025162277Sandre } 1026162277Sandre SCH_UNLOCK(sch); 1027162277Sandre 102886764Sjlemon /* 1029137139Sandre * Do a standard 3-way handshake. 103086764Sjlemon */ 103188180Sjlemon if (syncache_respond(sc, m) == 0) { 1032162277Sandre if (tcp_syncookies && tcp_syncookiesonly && sc != &scs) 1033162277Sandre syncache_free(sc); 1034162277Sandre else if (sc != &scs) 1035162277Sandre syncache_insert(sc, sch); /* locks and unlocks sch */ 103688180Sjlemon tcpstat.tcps_sndacks++; 103788180Sjlemon tcpstat.tcps_sndtotal++; 103886764Sjlemon } else { 103986764Sjlemon syncache_free(sc); 104088180Sjlemon tcpstat.tcps_sc_dropped++; 104186764Sjlemon } 1042159695Sandre 1043159695Sandredone: 1044159695Sandre *lsop = NULL; 104586764Sjlemon return (1); 104686764Sjlemon} 104786764Sjlemon 104886764Sjlemonstatic int 1049159697Sandresyncache_respond(struct syncache *sc, struct mbuf *m) 105086764Sjlemon{ 1051159950Sandre struct ip *ip = NULL; 1052159950Sandre struct tcphdr *th; 105386764Sjlemon int optlen, error; 105486764Sjlemon u_int16_t tlen, hlen, mssopt; 1055159950Sandre u_int8_t *optp; 105686764Sjlemon#ifdef INET6 105786764Sjlemon struct ip6_hdr *ip6 = NULL; 105886764Sjlemon#endif 1059159695Sandre#ifdef MAC 1060159695Sandre struct inpcb *inp = NULL; 1061159695Sandre#endif 106286764Sjlemon 1063122922Sandre hlen = 106486764Sjlemon#ifdef INET6 1065133874Srwatson (sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) : 106686764Sjlemon#endif 1067122922Sandre sizeof(struct ip); 106886764Sjlemon 1069159697Sandre /* Determine MSS we advertize to other end of connection. */ 1070122922Sandre mssopt = tcp_mssopt(&sc->sc_inc); 1071159955Sandre if (sc->sc_peer_mss) 1072159955Sandre mssopt = max( min(sc->sc_peer_mss, mssopt), tcp_minmss); 1073122922Sandre 107486764Sjlemon /* Compute the size of the TCP options. */ 107586764Sjlemon if (sc->sc_flags & SCF_NOOPT) { 107686764Sjlemon optlen = 0; 107786764Sjlemon } else { 107886764Sjlemon optlen = TCPOLEN_MAXSEG + 107986764Sjlemon ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) + 1080137139Sandre ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0); 1081125680Sbms#ifdef TCP_SIGNATURE 1082145372Sps if (sc->sc_flags & SCF_SIGNATURE) 1083145372Sps optlen += TCPOLEN_SIGNATURE; 1084125783Sbms#endif 1085145372Sps if (sc->sc_flags & SCF_SACK) 1086145372Sps optlen += TCPOLEN_SACK_PERMITTED; 1087145372Sps optlen = roundup2(optlen, 4); 108886764Sjlemon } 108986764Sjlemon tlen = hlen + sizeof(struct tcphdr) + optlen; 109086764Sjlemon 109186764Sjlemon /* 1092159697Sandre * XXX: Assume that the entire packet will fit in a header mbuf. 109386764Sjlemon */ 109486764Sjlemon KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small")); 109586764Sjlemon 1096159695Sandre /* Create the IP+TCP header from scratch. */ 109786764Sjlemon if (m) 109886764Sjlemon m_freem(m); 109986764Sjlemon 1100151967Sandre m = m_gethdr(M_DONTWAIT, MT_DATA); 110186764Sjlemon if (m == NULL) 110286764Sjlemon return (ENOBUFS); 110386764Sjlemon m->m_data += max_linkhdr; 110486764Sjlemon m->m_len = tlen; 110586764Sjlemon m->m_pkthdr.len = tlen; 110686764Sjlemon m->m_pkthdr.rcvif = NULL; 1107159695Sandre 1108159695Sandre#ifdef MAC 1109159695Sandre /* 1110159695Sandre * For MAC look up the inpcb to get access to the label information. 1111159695Sandre * We don't store the inpcb pointer in struct syncache to make locking 1112159695Sandre * less complicated and to save locking operations. However for MAC 1113159695Sandre * this gives a slight overhead as we have to do a full pcblookup here. 1114159695Sandre */ 1115159695Sandre INP_INFO_RLOCK(&tcbinfo); 1116159695Sandre if (inp == NULL) { 1117159695Sandre#ifdef INET6 /* && MAC */ 1118159695Sandre if (sc->sc_inc.inc_isipv6) 1119159695Sandre inp = in6_pcblookup_hash(&tcbinfo, 1120159945Sandre &sc->sc_inc.inc6_faddr, sc->sc_inc.inc_fport, 1121159695Sandre &sc->sc_inc.inc6_laddr, sc->sc_inc.inc_lport, 1122159695Sandre 1, NULL); 1123159695Sandre else 1124159695Sandre#endif /* INET6 */ 1125159695Sandre inp = in_pcblookup_hash(&tcbinfo, 1126159945Sandre sc->sc_inc.inc_faddr, sc->sc_inc.inc_fport, 1127159695Sandre sc->sc_inc.inc_laddr, sc->sc_inc.inc_lport, 1128159695Sandre 1, NULL); 1129159695Sandre if (inp == NULL) { 1130159695Sandre m_freem(m); 1131159695Sandre INP_INFO_RUNLOCK(&tcbinfo); 1132159695Sandre return (ESHUTDOWN); 1133159695Sandre } 1134159695Sandre } 1135122496Ssam INP_LOCK(inp); 1136159695Sandre if (!inp->inp_socket->so_options & SO_ACCEPTCONN) { 1137159695Sandre m_freem(m); 1138159695Sandre INP_UNLOCK(inp); 1139159695Sandre INP_INFO_RUNLOCK(&tcbinfo); 1140159695Sandre return (ESHUTDOWN); 1141159695Sandre } 1142128905Srwatson mac_create_mbuf_from_inpcb(inp, m); 1143159695Sandre INP_UNLOCK(inp); 1144159695Sandre INP_INFO_RUNLOCK(&tcbinfo); 1145159695Sandre#endif /* MAC */ 114686764Sjlemon 114786764Sjlemon#ifdef INET6 114886764Sjlemon if (sc->sc_inc.inc_isipv6) { 114986764Sjlemon ip6 = mtod(m, struct ip6_hdr *); 115086764Sjlemon ip6->ip6_vfc = IPV6_VERSION; 115186764Sjlemon ip6->ip6_nxt = IPPROTO_TCP; 115286764Sjlemon ip6->ip6_src = sc->sc_inc.inc6_laddr; 115386764Sjlemon ip6->ip6_dst = sc->sc_inc.inc6_faddr; 115486764Sjlemon ip6->ip6_plen = htons(tlen - hlen); 115586764Sjlemon /* ip6_hlim is set after checksum */ 1156132307Sdwmalone ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK; 1157132307Sdwmalone ip6->ip6_flow |= sc->sc_flowlabel; 115886764Sjlemon 115986764Sjlemon th = (struct tcphdr *)(ip6 + 1); 116086764Sjlemon } else 116186764Sjlemon#endif 116286764Sjlemon { 116386764Sjlemon ip = mtod(m, struct ip *); 116486764Sjlemon ip->ip_v = IPVERSION; 116586764Sjlemon ip->ip_hl = sizeof(struct ip) >> 2; 116686764Sjlemon ip->ip_len = tlen; 116786764Sjlemon ip->ip_id = 0; 116886764Sjlemon ip->ip_off = 0; 116986764Sjlemon ip->ip_sum = 0; 117086764Sjlemon ip->ip_p = IPPROTO_TCP; 117186764Sjlemon ip->ip_src = sc->sc_inc.inc_laddr; 117286764Sjlemon ip->ip_dst = sc->sc_inc.inc_faddr; 1173159695Sandre ip->ip_ttl = sc->sc_ip_ttl; 1174159695Sandre ip->ip_tos = sc->sc_ip_tos; 117586764Sjlemon 117698204Ssilby /* 1177108125Shsu * See if we should do MTU discovery. Route lookups are 1178108125Shsu * expensive, so we will only unset the DF bit if: 1179101405Ssilby * 1180101405Ssilby * 1) path_mtu_discovery is disabled 1181101405Ssilby * 2) the SCF_UNREACH flag has been set 118298204Ssilby */ 1183108125Shsu if (path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0)) 118498204Ssilby ip->ip_off |= IP_DF; 118598204Ssilby 118686764Sjlemon th = (struct tcphdr *)(ip + 1); 118786764Sjlemon } 118886764Sjlemon th->th_sport = sc->sc_inc.inc_lport; 118986764Sjlemon th->th_dport = sc->sc_inc.inc_fport; 119086764Sjlemon 119186764Sjlemon th->th_seq = htonl(sc->sc_iss); 119286764Sjlemon th->th_ack = htonl(sc->sc_irs + 1); 119386764Sjlemon th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; 119486764Sjlemon th->th_x2 = 0; 119586764Sjlemon th->th_flags = TH_SYN|TH_ACK; 119686764Sjlemon th->th_win = htons(sc->sc_wnd); 119786764Sjlemon th->th_urp = 0; 119886764Sjlemon 119986764Sjlemon /* Tack on the TCP options. */ 1200108125Shsu if (optlen != 0) { 1201108125Shsu optp = (u_int8_t *)(th + 1); 1202108125Shsu *optp++ = TCPOPT_MAXSEG; 1203108125Shsu *optp++ = TCPOLEN_MAXSEG; 1204108125Shsu *optp++ = (mssopt >> 8) & 0xff; 1205108125Shsu *optp++ = mssopt & 0xff; 120686764Sjlemon 1207108125Shsu if (sc->sc_flags & SCF_WINSCALE) { 1208108125Shsu *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 | 1209108125Shsu TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | 1210159950Sandre sc->sc_requested_r_scale); 1211108125Shsu optp += 4; 1212108125Shsu } 121386764Sjlemon 1214108125Shsu if (sc->sc_flags & SCF_TIMESTAMP) { 1215108125Shsu u_int32_t *lp = (u_int32_t *)(optp); 121686764Sjlemon 1217108125Shsu /* Form timestamp option per appendix A of RFC 1323. */ 1218108125Shsu *lp++ = htonl(TCPOPT_TSTAMP_HDR); 1219162277Sandre if (sc->sc_ts) 1220162277Sandre *lp++ = htonl(sc->sc_ts); 1221162277Sandre else 1222162277Sandre *lp++ = htonl(ticks); 1223159950Sandre *lp = htonl(sc->sc_tsreflect); 1224108125Shsu optp += TCPOLEN_TSTAMP_APPA; 1225108125Shsu } 122686764Sjlemon 1227125680Sbms#ifdef TCP_SIGNATURE 1228125680Sbms /* 1229125680Sbms * Handle TCP-MD5 passive opener response. 1230125680Sbms */ 1231125680Sbms if (sc->sc_flags & SCF_SIGNATURE) { 1232125680Sbms u_int8_t *bp = optp; 1233125680Sbms int i; 1234125680Sbms 1235125680Sbms *bp++ = TCPOPT_SIGNATURE; 1236125680Sbms *bp++ = TCPOLEN_SIGNATURE; 1237125680Sbms for (i = 0; i < TCP_SIGLEN; i++) 1238125680Sbms *bp++ = 0; 1239125783Sbms tcp_signature_compute(m, sizeof(struct ip), 0, optlen, 1240125680Sbms optp + 2, IPSEC_DIR_OUTBOUND); 1241145372Sps optp += TCPOLEN_SIGNATURE; 1242125680Sbms } 1243125680Sbms#endif /* TCP_SIGNATURE */ 1244130989Sps 1245145372Sps if (sc->sc_flags & SCF_SACK) { 1246145372Sps *optp++ = TCPOPT_SACK_PERMITTED; 1247145372Sps *optp++ = TCPOLEN_SACK_PERMITTED; 1248145372Sps } 1249145372Sps 1250145372Sps { 1251145372Sps /* Pad TCP options to a 4 byte boundary */ 1252145372Sps int padlen = optlen - (optp - (u_int8_t *)(th + 1)); 1253145372Sps while (padlen-- > 0) 1254145372Sps *optp++ = TCPOPT_EOL; 1255145372Sps } 125686764Sjlemon } 125786764Sjlemon 125886764Sjlemon#ifdef INET6 125986764Sjlemon if (sc->sc_inc.inc_isipv6) { 126086764Sjlemon th->th_sum = 0; 126186764Sjlemon th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen); 1262122922Sandre ip6->ip6_hlim = in6_selecthlim(NULL, NULL); 1263159695Sandre error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); 126486764Sjlemon } else 126586764Sjlemon#endif 126686764Sjlemon { 1267133874Srwatson th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 126886764Sjlemon htons(tlen - hlen + IPPROTO_TCP)); 126986764Sjlemon m->m_pkthdr.csum_flags = CSUM_TCP; 127086764Sjlemon m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1271159695Sandre error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL); 127286764Sjlemon } 127386764Sjlemon return (error); 127486764Sjlemon} 127588180Sjlemon 127688180Sjlemon/* 1277162277Sandre * The purpose of SYN cookies is to avoid keeping track of all SYN's we 1278162277Sandre * receive and to be able to handle SYN floods from bogus source addresses 1279162277Sandre * (where we will never receive any reply). SYN floods try to exhaust all 1280162277Sandre * our memory and available slots in the SYN cache table to cause a denial 1281162277Sandre * of service to legitimate users of the local host. 128288180Sjlemon * 1283162277Sandre * The idea of SYN cookies is to encode and include all necessary information 1284162277Sandre * about the connection setup state within the SYN-ACK we send back and thus 1285162277Sandre * to get along without keeping any local state until the ACK to the SYN-ACK 1286162277Sandre * arrives (if ever). Everything we need to know should be available from 1287162277Sandre * the information we encoded in the SYN-ACK. 1288162277Sandre * 1289162277Sandre * More information about the theory behind SYN cookies and its first 1290162277Sandre * discussion and specification can be found at: 1291162277Sandre * http://cr.yp.to/syncookies.html (overview) 1292162277Sandre * http://cr.yp.to/syncookies/archive (gory details) 1293162277Sandre * 1294162277Sandre * This implementation extends the orginal idea and first implementation 1295162277Sandre * of FreeBSD by using not only the initial sequence number field to store 1296162277Sandre * information but also the timestamp field if present. This way we can 1297162277Sandre * keep track of the entire state we need to know to recreate the session in 1298162277Sandre * its original form. Almost all TCP speakers implement RFC1323 timestamps 1299162277Sandre * these days. For those that do not we still have to live with the known 1300162277Sandre * shortcomings of the ISN only SYN cookies. 1301162277Sandre * 1302162277Sandre * Cookie layers: 1303162277Sandre * 1304162277Sandre * Initial sequence number we send: 1305162277Sandre * 31|................................|0 1306162277Sandre * DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP 1307162277Sandre * D = MD5 Digest (first dword) 1308162277Sandre * M = MSS index 1309162277Sandre * R = Rotation of secret 1310162277Sandre * P = Odd or Even secret 1311162277Sandre * 1312162277Sandre * The MD5 Digest is computed with over following parameters: 1313162277Sandre * a) randomly rotated secret 1314162277Sandre * b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6) 1315162277Sandre * c) the received initial sequence number from remote host 1316162277Sandre * d) the rotation offset and odd/even bit 1317162277Sandre * 1318162277Sandre * Timestamp we send: 1319162277Sandre * 31|................................|0 1320162277Sandre * DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5 1321162277Sandre * D = MD5 Digest (third dword) (only as filler) 1322162277Sandre * S = Requested send window scale 1323162277Sandre * R = Requested receive window scale 1324162277Sandre * A = SACK allowed 1325162277Sandre * 5 = TCP-MD5 enabled (not implemented yet) 1326162277Sandre * XORed with MD5 Digest (forth dword) 1327162277Sandre * 1328162277Sandre * The timestamp isn't cryptographically secure and doesn't need to be. 1329162277Sandre * The double use of the MD5 digest dwords ties it to a specific remote/ 1330162277Sandre * local host/port, remote initial sequence number and our local time 1331162277Sandre * limited secret. A received timestamp is reverted (XORed) and then 1332162277Sandre * the contained MD5 dword is compared to the computed one to ensure the 1333162277Sandre * timestamp belongs to the SYN-ACK we sent. The other parameters may 1334162277Sandre * have been tampered with but this isn't different from supplying bogus 1335162277Sandre * values in the SYN in the first place. 1336162277Sandre * 1337162277Sandre * Some problems with SYN cookies remain however: 133888180Sjlemon * Consider the problem of a recreated (and retransmitted) cookie. If the 1339133874Srwatson * original SYN was accepted, the connection is established. The second 1340133874Srwatson * SYN is inflight, and if it arrives with an ISN that falls within the 1341133874Srwatson * receive window, the connection is killed. 134288180Sjlemon * 1343162277Sandre * Notes: 1344162277Sandre * A heuristic to determine when to accept syn cookies is not necessary. 1345162277Sandre * An ACK flood would cause the syncookie verification to be attempted, 1346162277Sandre * but a SYN flood causes syncookies to be generated. Both are of equal 1347162277Sandre * cost, so there's no point in trying to optimize the ACK flood case. 1348162277Sandre * Also, if you don't process certain ACKs for some reason, then all someone 1349162277Sandre * would have to do is launch a SYN and ACK flood at the same time, which 1350162277Sandre * would stop cookie verification and defeat the entire purpose of syncookies. 135188180Sjlemon */ 1352162277Sandrestatic int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 }; 135388180Sjlemon 1354159695Sandrestatic void 1355162277Sandresyncookie_generate(struct syncache_head *sch, struct syncache *sc, 1356162277Sandre u_int32_t *flowlabel) 1357162277Sandre{ 1358162277Sandre MD5_CTX ctx; 1359162277Sandre u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)]; 1360162277Sandre u_int32_t data; 1361162277Sandre u_int32_t *secbits; 1362162277Sandre u_int off, pmss, mss; 1363162277Sandre int i; 1364159695Sandre 1365162277Sandre SCH_LOCK_ASSERT(sch); 1366162277Sandre 1367162277Sandre /* Which of the two secrets to use. */ 1368162277Sandre secbits = sch->sch_oddeven ? 1369162277Sandre sch->sch_secbits_odd : sch->sch_secbits_even; 1370162277Sandre 1371162277Sandre /* Reseed secret if too old. */ 1372162277Sandre if (sch->sch_reseed < time_uptime) { 1373162277Sandre sch->sch_oddeven = sch->sch_oddeven ? 0 : 1; /* toggle */ 1374162277Sandre secbits = sch->sch_oddeven ? 1375162277Sandre sch->sch_secbits_odd : sch->sch_secbits_even; 1376162277Sandre for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++) 1377162277Sandre secbits[i] = arc4random(); 1378162277Sandre sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME; 1379159695Sandre } 1380159695Sandre 1381162277Sandre /* Secret rotation offset. */ 1382162277Sandre off = sc->sc_iss & 0x7; /* iss was randomized before */ 138388180Sjlemon 1384162277Sandre /* Maximum segment size calculation. */ 1385162277Sandre pmss = max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), tcp_minmss); 1386162277Sandre for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--) 1387162277Sandre if (tcp_sc_msstab[mss] <= pmss) 138888180Sjlemon break; 1389162277Sandre 1390162277Sandre /* Fold parameters and MD5 digest into the ISN we will send. */ 1391162277Sandre data = sch->sch_oddeven;/* odd or even secret, 1 bit */ 1392162277Sandre data |= off << 1; /* secret offset, derived from iss, 3 bits */ 1393162277Sandre data |= mss << 4; /* mss, 3 bits */ 1394162277Sandre 1395162277Sandre MD5Init(&ctx); 1396162277Sandre MD5Update(&ctx, ((u_int8_t *)secbits) + off, 1397162277Sandre SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off); 1398162277Sandre MD5Update(&ctx, secbits, off); 1399162277Sandre MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc)); 1400162277Sandre MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs)); 1401162277Sandre MD5Update(&ctx, &data, sizeof(data)); 1402162277Sandre MD5Final((u_int8_t *)&md5_buffer, &ctx); 1403162277Sandre 1404162277Sandre data |= (md5_buffer[0] << 7); 1405162277Sandre sc->sc_iss = data; 1406162277Sandre 1407162306Sache#ifdef INET6 1408162277Sandre *flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK; 1409162306Sache#endif 1410162277Sandre 1411162277Sandre /* Additional parameters are stored in the timestamp if present. */ 1412162277Sandre if (sc->sc_flags & SCF_TIMESTAMP) { 1413162277Sandre data = ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */ 1414162277Sandre data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */ 1415162277Sandre data |= sc->sc_requested_s_scale << 2; /* SWIN scale, 4 bits */ 1416162277Sandre data |= sc->sc_requested_r_scale << 6; /* RWIN scale, 4 bits */ 1417162277Sandre data |= md5_buffer[2] << 10; /* more digest bits */ 1418162277Sandre data ^= md5_buffer[3]; 1419162277Sandre sc->sc_ts = data; 1420162277Sandre sc->sc_tsoff = data - ticks; /* after XOR */ 142188180Sjlemon } else 1422162277Sandre sc->sc_ts = 0; 1423162277Sandre 1424162277Sandre return; 142588180Sjlemon} 142688180Sjlemon 142788180Sjlemonstatic struct syncache * 1428162277Sandresyncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 1429162277Sandre struct syncache *sc, struct tcpopt *to, struct tcphdr *th, 1430162277Sandre struct socket *so) 143188180Sjlemon{ 1432162277Sandre MD5_CTX ctx; 1433162277Sandre u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)]; 1434162277Sandre u_int32_t data = 0; 1435162277Sandre u_int32_t *secbits; 1436162277Sandre tcp_seq ack, seq; 1437162277Sandre int off, mss, wnd, flags; 143888180Sjlemon 1439162277Sandre SCH_LOCK_ASSERT(sch); 1440162277Sandre 1441162277Sandre /* 1442162277Sandre * Pull information out of SYN-ACK/ACK and 1443162277Sandre * revert sequence number advances. 1444162277Sandre */ 1445162277Sandre ack = th->th_ack - 1; 1446162277Sandre seq = th->th_seq - 1; 1447162277Sandre off = (ack >> 1) & 0x7; 1448162277Sandre mss = (ack >> 4) & 0x7; 1449162277Sandre flags = ack & 0x7f; 1450162277Sandre 1451162277Sandre /* Which of the two secrets to use. */ 1452162277Sandre secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even; 1453162277Sandre 1454162277Sandre /* 1455162277Sandre * The secret wasn't updated for the lifetime of a syncookie, 1456162277Sandre * so this SYN-ACK/ACK is either too old (replay) or totally bogus. 1457162277Sandre */ 1458162277Sandre if (sch->sch_reseed < time_uptime) { 145988180Sjlemon return (NULL); 1460159695Sandre } 1461162277Sandre 1462162277Sandre /* Recompute the digest so we can compare it. */ 1463162277Sandre MD5Init(&ctx); 1464162277Sandre MD5Update(&ctx, ((u_int8_t *)secbits) + off, 1465162277Sandre SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off); 1466162277Sandre MD5Update(&ctx, secbits, off); 1467162277Sandre MD5Update(&ctx, inc, sizeof(*inc)); 1468162277Sandre MD5Update(&ctx, &seq, sizeof(seq)); 1469162277Sandre MD5Update(&ctx, &flags, sizeof(flags)); 1470162277Sandre MD5Final((u_int8_t *)&md5_buffer, &ctx); 1471162277Sandre 1472162277Sandre /* Does the digest part of or ACK'ed ISS match? */ 1473162277Sandre if ((ack & (~0x7f)) != (md5_buffer[0] << 7)) 147488180Sjlemon return (NULL); 147588180Sjlemon 1476162277Sandre /* Does the digest part of our reflected timestamp match? */ 1477162277Sandre if (to->to_flags & TOF_TS) { 1478162277Sandre data = md5_buffer[3] ^ to->to_tsecr; 1479162277Sandre if ((data & (~0x3ff)) != (md5_buffer[2] << 10)) 1480162277Sandre return (NULL); 1481162277Sandre } 1482162277Sandre 1483162277Sandre /* Fill in the syncache values. */ 1484162277Sandre bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); 148588180Sjlemon sc->sc_ipopts = NULL; 1486162277Sandre 1487162277Sandre sc->sc_irs = seq; 1488162277Sandre sc->sc_iss = ack; 1489162277Sandre 149088180Sjlemon#ifdef INET6 149188180Sjlemon if (inc->inc_isipv6) { 1492159695Sandre if (sotoinpcb(so)->in6p_flags & IN6P_AUTOFLOWLABEL) 1493132307Sdwmalone sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK; 149488180Sjlemon } else 149588180Sjlemon#endif 149688180Sjlemon { 1497159695Sandre sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl; 1498159695Sandre sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos; 149988180Sjlemon } 1500162277Sandre 1501162277Sandre /* Additional parameters that were encoded in the timestamp. */ 1502162277Sandre if (data) { 1503162277Sandre sc->sc_flags |= SCF_TIMESTAMP; 1504162277Sandre sc->sc_tsreflect = to->to_tsval; 1505162277Sandre sc->sc_tsoff = to->to_tsecr - ticks; 1506162277Sandre sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0; 1507162277Sandre sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0; 1508162277Sandre sc->sc_requested_s_scale = min((data >> 2) & 0xf, 1509162277Sandre TCP_MAX_WINSHIFT); 1510162277Sandre sc->sc_requested_r_scale = min((data >> 6) & 0xf, 1511162277Sandre TCP_MAX_WINSHIFT); 1512162277Sandre if (sc->sc_requested_s_scale || sc->sc_requested_r_scale) 1513162277Sandre sc->sc_flags |= SCF_WINSCALE; 1514162277Sandre } else 1515162277Sandre sc->sc_flags |= SCF_NOOPT; 1516162277Sandre 151788180Sjlemon wnd = sbspace(&so->so_rcv); 151888180Sjlemon wnd = imax(wnd, 0); 151988180Sjlemon wnd = imin(wnd, TCP_MAXWIN); 152088180Sjlemon sc->sc_wnd = wnd; 1521162277Sandre 1522159695Sandre sc->sc_rxmits = 0; 1523162277Sandre sc->sc_peer_mss = tcp_sc_msstab[mss]; 1524162277Sandre 152588180Sjlemon return (sc); 152688180Sjlemon} 1527