1/*-
2 * Copyright (c) 2015-2017 Patrick Kelsey
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/*
28 * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
29 * this code, add the following line to your kernel config:
30 *
31 * options TCP_RFC7413
32 *
33 *
34 * The generated TFO cookies are the 64-bit output of
35 * SipHash24(key=<16-byte-key>, msg=<client-ip>).  Multiple concurrent valid
36 * keys are supported so that time-based rolling cookie invalidation
37 * policies can be implemented in the system.  The default number of
38 * concurrent keys is 2.  This can be adjusted in the kernel config as
39 * follows:
40 *
41 * options TCP_RFC7413_MAX_KEYS=<num-keys>
42 *
43 *
44 * In addition to the facilities defined in RFC7413, this implementation
45 * supports a pre-shared key (PSK) mode of operation in which the TFO server
46 * requires the client to be in posession of a shared secret in order for
47 * the client to be able to successfully open TFO connections with the
48 * server.  This is useful, for example, in environments where TFO servers
49 * are exposed to both internal and external clients and only wish to allow
50 * TFO connections from internal clients.
51 *
52 * In the PSK mode of operation, the server generates and sends TFO cookies
53 * to requesting clients as usual.  However, when validating cookies
54 * received in TFO SYNs from clients, the server requires the
55 * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
56 * msg=<cookie-sent-to-client>).
57 *
58 * Multiple concurrent valid pre-shared keys are supported so that
59 * time-based rolling PSK invalidation policies can be implemented in the
60 * system.  The default number of concurrent pre-shared keys is 2.  This can
61 * be adjusted in the kernel config as follows:
62 *
63 * options TCP_RFC7413_MAX_PSKS=<num-psks>
64 *
65 *
66 * The following TFO-specific sysctls are defined:
67 *
68 * net.inet.tcp.fastopen.acceptany (RW, default 0)
69 *     When non-zero, all client-supplied TFO cookies will be considered to
70 *     be valid.
71 *
72 * net.inet.tcp.fastopen.autokey (RW, default 120)
73 *     When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
74 *     key will be automatically generated after this many seconds.
75 *
76 * net.inet.tcp.fastopen.ccache_bucket_limit
77 *                     (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
78 *     The maximum number of entries in a client cookie cache bucket.
79 *
80 * net.inet.tcp.fastopen.ccache_buckets
81 *                          (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
82 *     The number of client cookie cache buckets.
83 *
84 * net.inet.tcp.fastopen.ccache_list (RO)
85 *     Print the client cookie cache.
86 *
87 * net.inet.tcp.fastopen.client_enable (RW, default 0)
88 *     When zero, no new active (i.e., client) TFO connections can be
89 *     created.  On the transition from enabled to disabled, the client
90 *     cookie cache is cleared and disabled.  The transition from enabled to
91 *     disabled does not affect any active TFO connections in progress; it
92 *     only prevents new ones from being made.
93 *
94 * net.inet.tcp.fastopen.keylen (RD)
95 *     The key length in bytes.
96 *
97 * net.inet.tcp.fastopen.maxkeys (RD)
98 *     The maximum number of keys supported.
99 *
100 * net.inet.tcp.fastopen.maxpsks (RD)
101 *     The maximum number of pre-shared keys supported.
102 *
103 * net.inet.tcp.fastopen.numkeys (RD)
104 *     The current number of keys installed.
105 *
106 * net.inet.tcp.fastopen.numpsks (RD)
107 *     The current number of pre-shared keys installed.
108 *
109 * net.inet.tcp.fastopen.path_disable_time
110 *                          (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
111 *     When a failure occurs while trying to create a new active (i.e.,
112 *     client) TFO connection, new active connections on the same path, as
113 *     determined by the tuple {client_ip, server_ip, server_port}, will be
114 *     forced to be non-TFO for this many seconds.  Note that the path
115 *     disable mechanism relies on state stored in client cookie cache
116 *     entries, so it is possible for the disable time for a given path to
117 *     be reduced if the corresponding client cookie cache entry is reused
118 *     due to resource pressure before the disable period has elapsed.
119 *
120 * net.inet.tcp.fastopen.psk_enable (RW, default 0)
121 *     When non-zero, pre-shared key (PSK) mode is enabled for all TFO
122 *     servers.  On the transition from enabled to disabled, all installed
123 *     pre-shared keys are removed.
124 *
125 * net.inet.tcp.fastopen.server_enable (RW, default 0)
126 *     When zero, no new passive (i.e., server) TFO connections can be
127 *     created.  On the transition from enabled to disabled, all installed
128 *     keys and pre-shared keys are removed.  On the transition from
129 *     disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
130 *     there are no keys installed, a new key will be generated immediately.
131 *     The transition from enabled to disabled does not affect any passive
132 *     TFO connections in progress; it only prevents new ones from being
133 *     made.
134 *
135 * net.inet.tcp.fastopen.setkey (WR)
136 *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
137 *     this sysctl.
138 *
139 * net.inet.tcp.fastopen.setpsk (WR)
140 *     Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
141 *     bytes to this sysctl.
142 *
143 * In order for TFO connections to be created via a listen socket, that
144 * socket must have the TCP_FASTOPEN socket option set on it.  This option
145 * can be set on the socket either before or after the listen() is invoked.
146 * Clearing this option on a listen socket after it has been set has no
147 * effect on existing TFO connections or TFO connections in progress; it
148 * only prevents new TFO connections from being made.
149 *
150 * For passively-created sockets, the TCP_FASTOPEN socket option can be
151 * queried to determine whether the connection was established using TFO.
152 * Note that connections that are established via a TFO SYN, but that fall
153 * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
154 * set.
155 *
156 * Per the RFC, this implementation limits the number of TFO connections
157 * that can be in the SYN_RECEIVED state on a per listen-socket basis.
158 * Whenever this limit is exceeded, requests for new TFO connections are
159 * serviced as non-TFO requests.  Without such a limit, given a valid TFO
160 * cookie, an attacker could keep the listen queue in an overflow condition
161 * using a TFO SYN flood.  This implementation sets the limit at half the
162 * configured listen backlog.
163 *
164 */
165
166#include <sys/cdefs.h>
167__FBSDID("$FreeBSD$");
168
169#include "opt_inet.h"
170
171#include <sys/param.h>
172#include <sys/jail.h>
173#include <sys/kernel.h>
174#include <sys/hash.h>
175#include <sys/limits.h>
176#include <sys/lock.h>
177#include <sys/proc.h>
178#include <sys/rmlock.h>
179#include <sys/sbuf.h>
180#include <sys/socket.h>
181#include <sys/socketvar.h>
182#include <sys/sysctl.h>
183#include <sys/systm.h>
184
185#include <crypto/siphash/siphash.h>
186
187#include <net/vnet.h>
188
189#include <netinet/in.h>
190#include <netinet/in_pcb.h>
191#include <netinet/tcp_var.h>
192#include <netinet/tcp_fastopen.h>
193
194
195#define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
196
197#if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
198#error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
199#endif
200
201/*
202 * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
203 * the PSK until the connect occurs.
204 */
205#if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
206#error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
207#endif
208
209#define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT	16
210#define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT		2048 /* must be power of 2 */
211
212#define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT		900 /* seconds */
213
214#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
215#define	TCP_FASTOPEN_MAX_KEYS	2
216#else
217#define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
218#endif
219
220#if TCP_FASTOPEN_MAX_KEYS > 10
221#undef TCP_FASTOPEN_MAX_KEYS
222#define	TCP_FASTOPEN_MAX_KEYS	10
223#endif
224
225#if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
226#define	TCP_FASTOPEN_MAX_PSKS	2
227#else
228#define	TCP_FASTOPEN_MAX_PSKS	TCP_RFC7413_MAX_PSKS
229#endif
230
231#if TCP_FASTOPEN_MAX_PSKS > 10
232#undef TCP_FASTOPEN_MAX_PSKS
233#define	TCP_FASTOPEN_MAX_PSKS	10
234#endif
235
236struct tcp_fastopen_keylist {
237	unsigned int newest;
238	unsigned int newest_psk;
239	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
240	uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
241};
242
243struct tcp_fastopen_callout {
244	struct callout c;
245	struct vnet *v;
246};
247
248static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
249    struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
250static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
251    struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
252    uint8_t *);
253static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
254    unsigned int);
255static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
256    struct tcp_fastopen_ccache_bucket *);
257
258SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
259
260VNET_DEFINE_STATIC(int, tcp_fastopen_acceptany) = 0;
261#define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
262SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
263    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
264    "Accept any non-empty cookie");
265
266VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_autokey) = 120;
267#define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
268static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
269SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
270    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
271    &sysctl_net_inet_tcp_fastopen_autokey, "IU",
272    "Number of seconds between auto-generation of a new key; zero disables");
273
274static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
275SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
276    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN, NULL, 0,
277    &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
278    "Max entries per bucket in client cookie cache");
279
280VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_ccache_buckets) =
281    TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
282#define	V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
283SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
284    CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
285    "Client cookie cache number of buckets (power of 2)");
286
287VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 1;
288static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
289SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
290    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
291    &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
292    "Enable/disable TCP Fast Open client functionality");
293
294SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
295    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
296    "Key length in bytes");
297
298SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
299    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
300    "Maximum number of keys supported");
301
302SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
303    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
304    "Maximum number of pre-shared keys supported");
305
306VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numkeys) = 0;
307#define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
308SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
309    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
310    "Number of keys installed");
311
312VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_numpsks) = 0;
313#define	V_tcp_fastopen_numpsks	VNET(tcp_fastopen_numpsks)
314SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
315    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
316    "Number of pre-shared keys installed");
317
318VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_path_disable_time) =
319    TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
320#define	V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
321SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
322    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
323    "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
324
325VNET_DEFINE_STATIC(unsigned int, tcp_fastopen_psk_enable) = 0;
326#define	V_tcp_fastopen_psk_enable	VNET(tcp_fastopen_psk_enable)
327static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
328SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
329    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
330    &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
331    "Enable/disable TCP Fast Open server pre-shared key mode");
332
333VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
334static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
335SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
336    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
337    &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
338    "Enable/disable TCP Fast Open server functionality");
339
340static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
341SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
342    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
343    &sysctl_net_inet_tcp_fastopen_setkey, "",
344    "Install a new key");
345
346static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
347SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
348    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
349    &sysctl_net_inet_tcp_fastopen_setpsk, "",
350    "Install a new pre-shared key");
351
352static int sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS);
353SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_list,
354    CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, NULL, 0,
355    sysctl_net_inet_tcp_fastopen_ccache_list, "A",
356    "List of all client cookie cache entries");
357
358VNET_DEFINE_STATIC(struct rmlock, tcp_fastopen_keylock);
359#define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
360
361#define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
362#define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
363#define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
364#define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
365
366VNET_DEFINE_STATIC(struct tcp_fastopen_keylist, tcp_fastopen_keys);
367#define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
368
369VNET_DEFINE_STATIC(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
370#define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
371
372VNET_DEFINE_STATIC(uma_zone_t, counter_zone);
373#define	V_counter_zone			VNET(counter_zone)
374
375static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
376
377VNET_DEFINE_STATIC(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
378#define V_tcp_fastopen_ccache	VNET(tcp_fastopen_ccache)
379
380#define	CCB_LOCK(ccb)		mtx_lock(&(ccb)->ccb_mtx)
381#define	CCB_UNLOCK(ccb)		mtx_unlock(&(ccb)->ccb_mtx)
382#define	CCB_LOCK_ASSERT(ccb)	mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
383
384
385void
386tcp_fastopen_init(void)
387{
388	unsigned int i;
389
390	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
391	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
392	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
393	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
394	    &V_tcp_fastopen_keylock, 0);
395	V_tcp_fastopen_autokey_ctx.v = curvnet;
396	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
397	V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
398
399	/* May already be non-zero if kernel tunable was set */
400	if (V_tcp_fastopen_ccache.bucket_limit == 0)
401		V_tcp_fastopen_ccache.bucket_limit =
402		    TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
403
404	/* May already be non-zero if kernel tunable was set */
405	if ((V_tcp_fastopen_ccache_buckets == 0) ||
406	    !powerof2(V_tcp_fastopen_ccache_buckets))
407		V_tcp_fastopen_ccache.buckets =
408			TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
409	else
410		V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
411
412	V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
413	V_tcp_fastopen_ccache.secret = arc4random();
414
415	V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
416	    sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
417	    M_WAITOK | M_ZERO);
418
419	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
420		TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
421		mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
422			 NULL, MTX_DEF);
423		if (V_tcp_fastopen_client_enable) {
424			/* enable bucket */
425			V_tcp_fastopen_ccache.base[i].ccb_num_entries = 0;
426		} else {
427			/* disable bucket */
428			V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1;
429		}
430		V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
431	}
432
433	/*
434	 * Note that while the total number of entries in the cookie cache
435	 * is limited by the table management logic to
436	 * V_tcp_fastopen_ccache.buckets *
437	 * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
438	 * this zone can exceed that amount by the number of CPUs in the
439	 * system times the maximum number of unallocated items that can be
440	 * present in each UMA per-CPU cache for this zone.
441	 */
442	V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
443	    sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
444	    UMA_ALIGN_CACHE, 0);
445}
446
447void
448tcp_fastopen_destroy(void)
449{
450	struct tcp_fastopen_ccache_bucket *ccb;
451	unsigned int i;
452
453	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
454		ccb = &V_tcp_fastopen_ccache.base[i];
455		tcp_fastopen_ccache_bucket_trim(ccb, 0);
456		mtx_destroy(&ccb->ccb_mtx);
457	}
458
459	KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
460	    ("%s: TFO ccache zone allocation count not 0", __func__));
461	uma_zdestroy(V_tcp_fastopen_ccache.zone);
462	free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
463
464	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
465	rm_destroy(&V_tcp_fastopen_keylock);
466	uma_zdestroy(V_counter_zone);
467}
468
469unsigned int *
470tcp_fastopen_alloc_counter(void)
471{
472	unsigned int *counter;
473	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
474	if (counter)
475		*counter = 1;
476	return (counter);
477}
478
479void
480tcp_fastopen_decrement_counter(unsigned int *counter)
481{
482	if (*counter == 1)
483		uma_zfree(V_counter_zone, counter);
484	else
485		atomic_subtract_int(counter, 1);
486}
487
488static void
489tcp_fastopen_addkey_locked(uint8_t *key)
490{
491
492	V_tcp_fastopen_keys.newest++;
493	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
494		V_tcp_fastopen_keys.newest = 0;
495	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
496	    TCP_FASTOPEN_KEY_LEN);
497	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
498		V_tcp_fastopen_numkeys++;
499}
500
501static void
502tcp_fastopen_addpsk_locked(uint8_t *psk)
503{
504
505	V_tcp_fastopen_keys.newest_psk++;
506	if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
507		V_tcp_fastopen_keys.newest_psk = 0;
508	memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
509	    TCP_FASTOPEN_KEY_LEN);
510	if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
511		V_tcp_fastopen_numpsks++;
512}
513
514static void
515tcp_fastopen_autokey_locked(void)
516{
517	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
518
519	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
520	tcp_fastopen_addkey_locked(newkey);
521}
522
523static void
524tcp_fastopen_autokey_callout(void *arg)
525{
526	struct tcp_fastopen_callout *ctx = arg;
527
528	CURVNET_SET(ctx->v);
529	tcp_fastopen_autokey_locked();
530	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
531		      tcp_fastopen_autokey_callout, ctx);
532	CURVNET_RESTORE();
533}
534
535
536static uint64_t
537tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
538{
539	SIPHASH_CTX ctx;
540	uint64_t siphash;
541
542	SipHash24_Init(&ctx);
543	SipHash_SetKey(&ctx, key);
544	switch (inc->inc_flags & INC_ISIPV6) {
545#ifdef INET
546	case 0:
547		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
548		break;
549#endif
550#ifdef INET6
551	case INC_ISIPV6:
552		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
553		break;
554#endif
555	}
556	SipHash_Final((u_int8_t *)&siphash, &ctx);
557
558	return (siphash);
559}
560
561static uint64_t
562tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
563{
564	SIPHASH_CTX ctx;
565	uint64_t psk_cookie;
566
567	SipHash24_Init(&ctx);
568	SipHash_SetKey(&ctx, psk);
569	SipHash_Update(&ctx, cookie, cookie_len);
570	SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
571
572	return (psk_cookie);
573}
574
575static int
576tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
577{
578	unsigned int i, psk_index;
579	uint64_t psk_cookie;
580
581	if (V_tcp_fastopen_psk_enable) {
582		psk_index = V_tcp_fastopen_keys.newest_psk;
583		for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
584			psk_cookie =
585			    tcp_fastopen_make_psk_cookie(
586				 V_tcp_fastopen_keys.psk[psk_index],
587				 (uint8_t *)cur_cookie,
588				 TCP_FASTOPEN_COOKIE_LEN);
589
590			if (memcmp(wire_cookie, &psk_cookie,
591				   TCP_FASTOPEN_COOKIE_LEN) == 0)
592				return (1);
593
594			if (psk_index == 0)
595				psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
596			else
597				psk_index--;
598		}
599	} else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
600		return (1);
601
602	return (0);
603}
604
605/*
606 * Return values:
607 *	-1	the cookie is invalid and no valid cookie is available
608 *	 0	the cookie is invalid and the latest cookie has been returned
609 *	 1	the cookie is valid and the latest cookie has been returned
610 */
611int
612tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
613    unsigned int len, uint64_t *latest_cookie)
614{
615	struct rm_priotracker tracker;
616	unsigned int i, key_index;
617	int rv;
618	uint64_t cur_cookie;
619
620	if (V_tcp_fastopen_acceptany) {
621		*latest_cookie = 0;
622		return (1);
623	}
624
625	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
626	if (len != TCP_FASTOPEN_COOKIE_LEN) {
627		if (V_tcp_fastopen_numkeys > 0) {
628			*latest_cookie =
629			    tcp_fastopen_make_cookie(
630				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
631				inc);
632			rv = 0;
633		} else
634			rv = -1;
635		goto out;
636	}
637
638	/*
639	 * Check against each available key, from newest to oldest.
640	 */
641	key_index = V_tcp_fastopen_keys.newest;
642	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
643		cur_cookie =
644		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
645			inc);
646		if (i == 0)
647			*latest_cookie = cur_cookie;
648		rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
649		if (rv)
650			goto out;
651		if (key_index == 0)
652			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
653		else
654			key_index--;
655	}
656	rv = 0;
657
658 out:
659	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
660	return (rv);
661}
662
663static int
664sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
665{
666	int error;
667	unsigned int new;
668
669	new = V_tcp_fastopen_autokey;
670	error = sysctl_handle_int(oidp, &new, 0, req);
671	if (error == 0 && req->newptr) {
672		if (new > (INT_MAX / hz))
673			return (EINVAL);
674
675		TCP_FASTOPEN_KEYS_WLOCK();
676		if (V_tcp_fastopen_server_enable) {
677			if (V_tcp_fastopen_autokey && !new)
678				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
679			else if (new)
680				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
681				    new * hz, tcp_fastopen_autokey_callout,
682				    &V_tcp_fastopen_autokey_ctx);
683		}
684		V_tcp_fastopen_autokey = new;
685		TCP_FASTOPEN_KEYS_WUNLOCK();
686	}
687
688	return (error);
689}
690
691static int
692sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
693{
694	int error;
695	unsigned int new;
696
697	new = V_tcp_fastopen_psk_enable;
698	error = sysctl_handle_int(oidp, &new, 0, req);
699	if (error == 0 && req->newptr) {
700		if (V_tcp_fastopen_psk_enable && !new) {
701			/* enabled -> disabled */
702			TCP_FASTOPEN_KEYS_WLOCK();
703			V_tcp_fastopen_numpsks = 0;
704			V_tcp_fastopen_keys.newest_psk =
705			    TCP_FASTOPEN_MAX_PSKS - 1;
706			V_tcp_fastopen_psk_enable = 0;
707			TCP_FASTOPEN_KEYS_WUNLOCK();
708		} else if (!V_tcp_fastopen_psk_enable && new) {
709			/* disabled -> enabled */
710			TCP_FASTOPEN_KEYS_WLOCK();
711			V_tcp_fastopen_psk_enable = 1;
712			TCP_FASTOPEN_KEYS_WUNLOCK();
713		}
714	}
715	return (error);
716}
717
718static int
719sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
720{
721	int error;
722	unsigned int new;
723
724	new = V_tcp_fastopen_server_enable;
725	error = sysctl_handle_int(oidp, &new, 0, req);
726	if (error == 0 && req->newptr) {
727		if (V_tcp_fastopen_server_enable && !new) {
728			/* enabled -> disabled */
729			TCP_FASTOPEN_KEYS_WLOCK();
730			V_tcp_fastopen_numkeys = 0;
731			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
732			if (V_tcp_fastopen_autokey)
733				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
734			V_tcp_fastopen_numpsks = 0;
735			V_tcp_fastopen_keys.newest_psk =
736			    TCP_FASTOPEN_MAX_PSKS - 1;
737			V_tcp_fastopen_server_enable = 0;
738			TCP_FASTOPEN_KEYS_WUNLOCK();
739		} else if (!V_tcp_fastopen_server_enable && new) {
740			/* disabled -> enabled */
741			TCP_FASTOPEN_KEYS_WLOCK();
742			if (V_tcp_fastopen_autokey &&
743			    (V_tcp_fastopen_numkeys == 0)) {
744				tcp_fastopen_autokey_locked();
745				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
746				    V_tcp_fastopen_autokey * hz,
747				    tcp_fastopen_autokey_callout,
748				    &V_tcp_fastopen_autokey_ctx);
749			}
750			V_tcp_fastopen_server_enable = 1;
751			TCP_FASTOPEN_KEYS_WUNLOCK();
752		}
753	}
754	return (error);
755}
756
757static int
758sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
759{
760	int error;
761	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
762
763	if (req->oldptr != NULL || req->oldlen != 0)
764		return (EINVAL);
765	if (req->newptr == NULL)
766		return (EPERM);
767	if (req->newlen != sizeof(newkey))
768		return (EINVAL);
769	error = SYSCTL_IN(req, newkey, sizeof(newkey));
770	if (error)
771		return (error);
772
773	TCP_FASTOPEN_KEYS_WLOCK();
774	tcp_fastopen_addkey_locked(newkey);
775	TCP_FASTOPEN_KEYS_WUNLOCK();
776
777	return (0);
778}
779
780static int
781sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
782{
783	int error;
784	uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
785
786	if (req->oldptr != NULL || req->oldlen != 0)
787		return (EINVAL);
788	if (req->newptr == NULL)
789		return (EPERM);
790	if (req->newlen != sizeof(newpsk))
791		return (EINVAL);
792	error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
793	if (error)
794		return (error);
795
796	TCP_FASTOPEN_KEYS_WLOCK();
797	tcp_fastopen_addpsk_locked(newpsk);
798	TCP_FASTOPEN_KEYS_WUNLOCK();
799
800	return (0);
801}
802
803static int
804sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
805{
806	struct tcp_fastopen_ccache_bucket *ccb;
807	int error;
808	unsigned int new;
809	unsigned int i;
810
811	new = V_tcp_fastopen_ccache.bucket_limit;
812	error = sysctl_handle_int(oidp, &new, 0, req);
813	if (error == 0 && req->newptr) {
814		if ((new == 0) || (new > INT_MAX))
815			error = EINVAL;
816		else {
817			if (new < V_tcp_fastopen_ccache.bucket_limit) {
818				for (i = 0; i < V_tcp_fastopen_ccache.buckets;
819				     i++) {
820					ccb = &V_tcp_fastopen_ccache.base[i];
821					tcp_fastopen_ccache_bucket_trim(ccb, new);
822				}
823			}
824			V_tcp_fastopen_ccache.bucket_limit = new;
825		}
826
827	}
828	return (error);
829}
830
831static int
832sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
833{
834	struct tcp_fastopen_ccache_bucket *ccb;
835	int error;
836	unsigned int new, i;
837
838	new = V_tcp_fastopen_client_enable;
839	error = sysctl_handle_int(oidp, &new, 0, req);
840	if (error == 0 && req->newptr) {
841		if (V_tcp_fastopen_client_enable && !new) {
842			/* enabled -> disabled */
843			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
844				ccb = &V_tcp_fastopen_ccache.base[i];
845				KASSERT(ccb->ccb_num_entries > -1,
846				    ("%s: ccb->ccb_num_entries %d is negative",
847					__func__, ccb->ccb_num_entries));
848				tcp_fastopen_ccache_bucket_trim(ccb, 0);
849			}
850			V_tcp_fastopen_client_enable = 0;
851		} else if (!V_tcp_fastopen_client_enable && new) {
852			/* disabled -> enabled */
853			for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
854				ccb = &V_tcp_fastopen_ccache.base[i];
855				CCB_LOCK(ccb);
856				KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
857				    ("%s: ccb->ccb_entries not empty", __func__));
858				KASSERT(ccb->ccb_num_entries == -1,
859				    ("%s: ccb->ccb_num_entries %d not -1", __func__,
860					ccb->ccb_num_entries));
861				ccb->ccb_num_entries = 0; /* enable bucket */
862				CCB_UNLOCK(ccb);
863			}
864			V_tcp_fastopen_client_enable = 1;
865		}
866	}
867	return (error);
868}
869
870void
871tcp_fastopen_connect(struct tcpcb *tp)
872{
873	struct inpcb *inp;
874	struct tcp_fastopen_ccache_bucket *ccb;
875	struct tcp_fastopen_ccache_entry *cce;
876	sbintime_t now;
877	uint16_t server_mss;
878	uint64_t psk_cookie;
879
880	psk_cookie = 0;
881	inp = tp->t_inpcb;
882	cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
883	if (cce) {
884		if (cce->disable_time == 0) {
885			if ((cce->cookie_len > 0) &&
886			    (tp->t_tfo_client_cookie_len ==
887			     TCP_FASTOPEN_PSK_LEN)) {
888				psk_cookie =
889				    tcp_fastopen_make_psk_cookie(
890					tp->t_tfo_cookie.client,
891					cce->cookie, cce->cookie_len);
892			} else {
893				tp->t_tfo_client_cookie_len = cce->cookie_len;
894				memcpy(tp->t_tfo_cookie.client, cce->cookie,
895				    cce->cookie_len);
896			}
897			server_mss = cce->server_mss;
898			CCB_UNLOCK(ccb);
899			if (tp->t_tfo_client_cookie_len ==
900			    TCP_FASTOPEN_PSK_LEN && psk_cookie) {
901				tp->t_tfo_client_cookie_len =
902				    TCP_FASTOPEN_COOKIE_LEN;
903				memcpy(tp->t_tfo_cookie.client, &psk_cookie,
904				    TCP_FASTOPEN_COOKIE_LEN);
905			}
906			tcp_mss(tp, server_mss ? server_mss : -1);
907			tp->snd_wnd = tp->t_maxseg;
908		} else {
909			/*
910			 * The path is disabled.  Check the time and
911			 * possibly re-enable.
912			 */
913			now = getsbinuptime();
914			if (now - cce->disable_time >
915			    ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
916				/*
917				 * Re-enable path.  Force a TFO cookie
918				 * request.  Forget the old MSS as it may be
919				 * bogus now, and we will rediscover it in
920				 * the SYN|ACK.
921				 */
922				cce->disable_time = 0;
923				cce->server_mss = 0;
924				cce->cookie_len = 0;
925				/*
926				 * tp->t_tfo... cookie details are already
927				 * zero from the tcpcb init.
928				 */
929			} else {
930				/*
931				 * Path is disabled, so disable TFO on this
932				 * connection.
933				 */
934				tp->t_flags &= ~TF_FASTOPEN;
935			}
936			CCB_UNLOCK(ccb);
937			tcp_mss(tp, -1);
938			/*
939			 * snd_wnd is irrelevant since we are either forcing
940			 * a TFO cookie request or disabling TFO - either
941			 * way, no data with the SYN.
942			 */
943		}
944	} else {
945		/*
946		 * A new entry for this path will be created when a SYN|ACK
947		 * comes back, or the attempt otherwise fails.
948		 */
949		CCB_UNLOCK(ccb);
950		tcp_mss(tp, -1);
951		/*
952		 * snd_wnd is irrelevant since we are forcing a TFO cookie
953		 * request.
954		 */
955	}
956}
957
958void
959tcp_fastopen_disable_path(struct tcpcb *tp)
960{
961	struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
962	struct tcp_fastopen_ccache_bucket *ccb;
963	struct tcp_fastopen_ccache_entry *cce;
964
965	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
966	if (cce) {
967		cce->server_mss = 0;
968		cce->cookie_len = 0;
969		/*
970		 * Preserve the existing disable time if it is already
971		 * disabled.
972		 */
973		if (cce->disable_time == 0)
974			cce->disable_time = getsbinuptime();
975	} else /* use invalid cookie len to create disabled entry */
976		tcp_fastopen_ccache_create(ccb, inc, 0,
977	   	    TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
978
979	CCB_UNLOCK(ccb);
980	tp->t_flags &= ~TF_FASTOPEN;
981}
982
983void
984tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
985    uint8_t cookie_len, uint8_t *cookie)
986{
987	struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
988	struct tcp_fastopen_ccache_bucket *ccb;
989	struct tcp_fastopen_ccache_entry *cce;
990
991	cce = tcp_fastopen_ccache_lookup(inc, &ccb);
992	if (cce) {
993		if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
994		    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
995		    ((cookie_len & 0x1) == 0)) {
996			cce->server_mss = mss;
997			cce->cookie_len = cookie_len;
998			memcpy(cce->cookie, cookie, cookie_len);
999			cce->disable_time = 0;
1000		} else {
1001			/* invalid cookie length, disable entry */
1002			cce->server_mss = 0;
1003			cce->cookie_len = 0;
1004			/*
1005			 * Preserve the existing disable time if it is
1006			 * already disabled.
1007			 */
1008			if (cce->disable_time == 0)
1009				cce->disable_time = getsbinuptime();
1010		}
1011	} else
1012		tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
1013
1014	CCB_UNLOCK(ccb);
1015}
1016
1017static struct tcp_fastopen_ccache_entry *
1018tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
1019    struct tcp_fastopen_ccache_bucket **ccbp)
1020{
1021	struct tcp_fastopen_ccache_bucket *ccb;
1022	struct tcp_fastopen_ccache_entry *cce;
1023	uint32_t last_word;
1024	uint32_t hash;
1025
1026	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
1027	    V_tcp_fastopen_ccache.secret);
1028	hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
1029	    hash);
1030	last_word = inc->inc_fport;
1031	hash = jenkins_hash32(&last_word, 1, hash);
1032	ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
1033	*ccbp = ccb;
1034	CCB_LOCK(ccb);
1035
1036	/*
1037	 * Always returns with locked bucket.
1038	 */
1039	TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
1040		if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
1041		    (cce->server_port == inc->inc_ie.ie_fport) &&
1042		    (((cce->af == AF_INET) &&
1043		      (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
1044		      (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
1045		     ((cce->af == AF_INET6) &&
1046		      IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
1047		      IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
1048			break;
1049
1050	return (cce);
1051}
1052
1053static struct tcp_fastopen_ccache_entry *
1054tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
1055    struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
1056{
1057	struct tcp_fastopen_ccache_entry *cce;
1058
1059	/*
1060	 * 1. Create a new entry, or
1061	 * 2. Reclaim an existing entry, or
1062	 * 3. Fail
1063	 */
1064
1065	CCB_LOCK_ASSERT(ccb);
1066
1067	cce = NULL;
1068	if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
1069		cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
1070
1071	if (cce == NULL) {
1072		/*
1073		 * At bucket limit, or out of memory - reclaim last
1074		 * entry in bucket.
1075		 */
1076		cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
1077		if (cce == NULL) {
1078			/* XXX count this event */
1079			return (NULL);
1080		}
1081
1082		TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1083	} else
1084		ccb->ccb_num_entries++;
1085
1086	TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
1087	cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
1088	if (cce->af == AF_INET) {
1089		cce->cce_client_ip.v4 = inc->inc_laddr;
1090		cce->cce_server_ip.v4 = inc->inc_faddr;
1091	} else {
1092		cce->cce_client_ip.v6 = inc->inc6_laddr;
1093		cce->cce_server_ip.v6 = inc->inc6_faddr;
1094	}
1095	cce->server_port = inc->inc_fport;
1096	if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
1097	    (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
1098	    ((cookie_len & 0x1) == 0)) {
1099		cce->server_mss = mss;
1100		cce->cookie_len = cookie_len;
1101		memcpy(cce->cookie, cookie, cookie_len);
1102		cce->disable_time = 0;
1103	} else {
1104		/* invalid cookie length, disable cce */
1105		cce->server_mss = 0;
1106		cce->cookie_len = 0;
1107		cce->disable_time = getsbinuptime();
1108	}
1109
1110	return (cce);
1111}
1112
1113static void
1114tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
1115    unsigned int limit)
1116{
1117	struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
1118	unsigned int entries;
1119
1120	CCB_LOCK(ccb);
1121	entries = 0;
1122	TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
1123		entries++;
1124		if (entries > limit)
1125			tcp_fastopen_ccache_entry_drop(cce, ccb);
1126	}
1127	KASSERT(ccb->ccb_num_entries <= (int)limit,
1128	    ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
1129		ccb->ccb_num_entries, limit));
1130	if (limit == 0) {
1131		KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
1132		    ("%s: ccb->ccb_entries not empty", __func__));
1133		ccb->ccb_num_entries = -1; /* disable bucket */
1134	}
1135	CCB_UNLOCK(ccb);
1136}
1137
1138static void
1139tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
1140    struct tcp_fastopen_ccache_bucket *ccb)
1141{
1142
1143	CCB_LOCK_ASSERT(ccb);
1144
1145	TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
1146	ccb->ccb_num_entries--;
1147	uma_zfree(V_tcp_fastopen_ccache.zone, cce);
1148}
1149
1150static int
1151sysctl_net_inet_tcp_fastopen_ccache_list(SYSCTL_HANDLER_ARGS)
1152{
1153	struct sbuf sb;
1154	struct tcp_fastopen_ccache_bucket *ccb;
1155	struct tcp_fastopen_ccache_entry *cce;
1156	sbintime_t now, duration, limit;
1157	const int linesize = 128;
1158	int i, error, num_entries;
1159	unsigned int j;
1160#ifdef INET6
1161	char clt_buf[INET6_ADDRSTRLEN], srv_buf[INET6_ADDRSTRLEN];
1162#else
1163	char clt_buf[INET_ADDRSTRLEN], srv_buf[INET_ADDRSTRLEN];
1164#endif
1165
1166	if (jailed_without_vnet(curthread->td_ucred) != 0)
1167		return (EPERM);
1168
1169	/* Only allow root to read the client cookie cache */
1170	if (curthread->td_ucred->cr_uid != 0)
1171		return (EPERM);
1172
1173	num_entries = 0;
1174	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1175		ccb = &V_tcp_fastopen_ccache.base[i];
1176		CCB_LOCK(ccb);
1177		if (ccb->ccb_num_entries > 0)
1178			num_entries += ccb->ccb_num_entries;
1179		CCB_UNLOCK(ccb);
1180	}
1181	sbuf_new(&sb, NULL, linesize * (num_entries + 1), SBUF_INCLUDENUL);
1182
1183	sbuf_printf(&sb,
1184	            "\nLocal IP address     Remote IP address     Port   MSS"
1185	            " Disabled Cookie\n");
1186
1187	now = getsbinuptime();
1188	limit = (sbintime_t)V_tcp_fastopen_path_disable_time << 32;
1189	for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
1190		ccb = &V_tcp_fastopen_ccache.base[i];
1191		CCB_LOCK(ccb);
1192		TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) {
1193			if (cce->disable_time != 0) {
1194				duration = now - cce->disable_time;
1195				if (limit >= duration)
1196					duration = limit - duration;
1197				else
1198					duration = 0;
1199			} else
1200				duration = 0;
1201			sbuf_printf(&sb,
1202			            "%-20s %-20s %5u %5u ",
1203			            inet_ntop(cce->af, &cce->cce_client_ip,
1204			                clt_buf, sizeof(clt_buf)),
1205			            inet_ntop(cce->af, &cce->cce_server_ip,
1206			                srv_buf, sizeof(srv_buf)),
1207			            ntohs(cce->server_port),
1208			            cce->server_mss);
1209			if (duration > 0)
1210				sbuf_printf(&sb, "%7ds ", sbintime_getsec(duration));
1211			else
1212				sbuf_printf(&sb, "%8s ", "No");
1213			for (j = 0; j < cce->cookie_len; j++)
1214				sbuf_printf(&sb, "%02x", cce->cookie[j]);
1215			sbuf_putc(&sb, '\n');
1216		}
1217		CCB_UNLOCK(ccb);
1218	}
1219	error = sbuf_finish(&sb);
1220	if (error == 0)
1221		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
1222	sbuf_delete(&sb);
1223	return (error);
1224}
1225
1226