1/*	$OpenBSD: ip_ipsp.c,v 1.278 2023/12/03 10:50:25 mvs Exp $	*/
2/*
3 * The authors of this code are John Ioannidis (ji@tla.org),
4 * Angelos D. Keromytis (kermit@csd.uch.gr),
5 * Niels Provos (provos@physnet.uni-hamburg.de) and
6 * Niklas Hallqvist (niklas@appli.se).
7 *
8 * The original version of this code was written by John Ioannidis
9 * for BSD/OS in Athens, Greece, in November 1995.
10 *
11 * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
12 * by Angelos D. Keromytis.
13 *
14 * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
15 * and Niels Provos.
16 *
17 * Additional features in 1999 by Angelos D. Keromytis and Niklas Hallqvist.
18 *
19 * Copyright (c) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
20 * Angelos D. Keromytis and Niels Provos.
21 * Copyright (c) 1999 Niklas Hallqvist.
22 * Copyright (c) 2001, Angelos D. Keromytis.
23 *
24 * Permission to use, copy, and modify this software with or without fee
25 * is hereby granted, provided that this entire notice is included in
26 * all copies of any software which is or includes a copy or
27 * modification of this software.
28 * You may use this code under the GNU public license if you so wish. Please
29 * contribute changes back to the authors under this freer than GPL license
30 * so that we may further the use of strong encryption without limitations to
31 * all.
32 *
33 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
34 * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
35 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
36 * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
37 * PURPOSE.
38 */
39
40#include "pf.h"
41#include "pfsync.h"
42#include "sec.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/mbuf.h>
47#include <sys/socket.h>
48#include <sys/kernel.h>
49#include <sys/timeout.h>
50#include <sys/pool.h>
51#include <sys/atomic.h>
52#include <sys/mutex.h>
53
54#include <net/if.h>
55#include <net/route.h>
56
57#include <netinet/in.h>
58#include <netinet/ip.h>
59#include <netinet/in_pcb.h>
60#include <netinet/ip_var.h>
61#include <netinet/ip_ipip.h>
62
63#if NPF > 0
64#include <net/pfvar.h>
65#endif
66
67#if NPFSYNC > 0
68#include <net/if_pfsync.h>
69#endif
70
71#if NSEC > 0
72#include <net/if_sec.h>
73#endif
74
75#include <netinet/ip_ipsp.h>
76#include <net/pfkeyv2.h>
77
78#ifdef DDB
79#include <ddb/db_output.h>
80void tdb_hashstats(void);
81#endif
82
83#ifdef ENCDEBUG
84#define DPRINTF(fmt, args...)						\
85	do {								\
86		if (encdebug)						\
87			printf("%s: " fmt "\n", __func__, ## args);	\
88	} while (0)
89#else
90#define DPRINTF(fmt, args...)						\
91	do { } while (0)
92#endif
93
94/*
95 * Locks used to protect global data and struct members:
96 *	D	tdb_sadb_mtx
97 *	F	ipsec_flows_mtx             SA database global mutex
98 */
99
100struct mutex ipsec_flows_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
101
102int		tdb_rehash(void);
103void		tdb_timeout(void *);
104void		tdb_firstuse(void *);
105void		tdb_soft_timeout(void *);
106void		tdb_soft_firstuse(void *);
107int		tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
108
109int ipsec_in_use = 0;
110u_int64_t ipsec_last_added = 0;
111int ipsec_ids_idle = 100;		/* keep free ids for 100s */
112
113struct pool tdb_pool;
114
115/* Protected by the NET_LOCK(). */
116u_int32_t ipsec_ids_next_flow = 1;		/* [F] may not be zero */
117struct ipsec_ids_tree ipsec_ids_tree;		/* [F] */
118struct ipsec_ids_flows ipsec_ids_flows;		/* [F] */
119struct ipsec_policy_head ipsec_policy_head =
120    TAILQ_HEAD_INITIALIZER(ipsec_policy_head);
121
122void ipsp_ids_gc(void *);
123
124LIST_HEAD(, ipsec_ids) ipsp_ids_gc_list =
125    LIST_HEAD_INITIALIZER(ipsp_ids_gc_list);	/* [F] */
126struct timeout ipsp_ids_gc_timeout =
127    TIMEOUT_INITIALIZER_FLAGS(ipsp_ids_gc, NULL, KCLOCK_NONE,
128    TIMEOUT_PROC | TIMEOUT_MPSAFE);
129
130static inline int ipsp_ids_cmp(const struct ipsec_ids *,
131    const struct ipsec_ids *);
132static inline int ipsp_ids_flow_cmp(const struct ipsec_ids *,
133    const struct ipsec_ids *);
134RBT_PROTOTYPE(ipsec_ids_tree, ipsec_ids, id_node_flow, ipsp_ids_cmp);
135RBT_PROTOTYPE(ipsec_ids_flows, ipsec_ids, id_node_id, ipsp_ids_flow_cmp);
136RBT_GENERATE(ipsec_ids_tree, ipsec_ids, id_node_flow, ipsp_ids_cmp);
137RBT_GENERATE(ipsec_ids_flows, ipsec_ids, id_node_id, ipsp_ids_flow_cmp);
138
139/*
140 * This is the proper place to define the various encapsulation transforms.
141 */
142
143const struct xformsw xformsw[] = {
144#ifdef IPSEC
145{
146  .xf_type	= XF_IP4,
147  .xf_flags	= 0,
148  .xf_name	= "IPv4 Simple Encapsulation",
149  .xf_attach	= ipe4_attach,
150  .xf_init	= ipe4_init,
151  .xf_zeroize	= ipe4_zeroize,
152  .xf_input	= ipe4_input,
153  .xf_output	= NULL,
154},
155{
156  .xf_type	= XF_AH,
157  .xf_flags	= XFT_AUTH,
158  .xf_name	= "IPsec AH",
159  .xf_attach	= ah_attach,
160  .xf_init	= ah_init,
161  .xf_zeroize	= ah_zeroize,
162  .xf_input	= ah_input,
163  .xf_output	= ah_output,
164},
165{
166  .xf_type	= XF_ESP,
167  .xf_flags	= XFT_CONF|XFT_AUTH,
168  .xf_name	= "IPsec ESP",
169  .xf_attach	= esp_attach,
170  .xf_init	= esp_init,
171  .xf_zeroize	= esp_zeroize,
172  .xf_input	= esp_input,
173  .xf_output	= esp_output,
174},
175{
176  .xf_type	= XF_IPCOMP,
177  .xf_flags	= XFT_COMP,
178  .xf_name	= "IPcomp",
179  .xf_attach	= ipcomp_attach,
180  .xf_init	= ipcomp_init,
181  .xf_zeroize	= ipcomp_zeroize,
182  .xf_input	= ipcomp_input,
183  .xf_output	= ipcomp_output,
184},
185#endif /* IPSEC */
186#ifdef TCP_SIGNATURE
187{
188  .xf_type	= XF_TCPSIGNATURE,
189  .xf_flags	= XFT_AUTH,
190  .xf_name	= "TCP MD5 Signature Option, RFC 2385",
191  .xf_attach	= tcp_signature_tdb_attach,
192  .xf_init	= tcp_signature_tdb_init,
193  .xf_zeroize	= tcp_signature_tdb_zeroize,
194  .xf_input	= tcp_signature_tdb_input,
195  .xf_output	= tcp_signature_tdb_output,
196}
197#endif /* TCP_SIGNATURE */
198};
199
200const struct xformsw *const xformswNXFORMSW = &xformsw[nitems(xformsw)];
201
202#define	TDB_HASHSIZE_INIT	32
203
204struct mutex tdb_sadb_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
205static SIPHASH_KEY tdbkey;				/* [D] */
206static struct tdb **tdbh;				/* [D] */
207static struct tdb **tdbdst;				/* [D] */
208static struct tdb **tdbsrc;				/* [D] */
209static u_int tdb_hashmask = TDB_HASHSIZE_INIT - 1;	/* [D] */
210static int tdb_count;					/* [D] */
211
212void
213ipsp_init(void)
214{
215	pool_init(&tdb_pool, sizeof(struct tdb), 0, IPL_SOFTNET, 0,
216	    "tdb", NULL);
217
218	arc4random_buf(&tdbkey, sizeof(tdbkey));
219	tdbh = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
220	    M_WAITOK | M_ZERO);
221	tdbdst = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
222	    M_WAITOK | M_ZERO);
223	tdbsrc = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
224	    M_WAITOK | M_ZERO);
225}
226
227/*
228 * Our hashing function needs to stir things with a non-zero random multiplier
229 * so we cannot be DoS-attacked via choosing of the data to hash.
230 */
231int
232tdb_hash(u_int32_t spi, union sockaddr_union *dst,
233    u_int8_t proto)
234{
235	SIPHASH_CTX ctx;
236
237	MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
238
239	SipHash24_Init(&ctx, &tdbkey);
240	SipHash24_Update(&ctx, &spi, sizeof(spi));
241	SipHash24_Update(&ctx, &proto, sizeof(proto));
242	SipHash24_Update(&ctx, dst, dst->sa.sa_len);
243
244	return (SipHash24_End(&ctx) & tdb_hashmask);
245}
246
247/*
248 * Reserve an SPI; the SA is not valid yet though.  We use 0 as
249 * an error return value.
250 */
251u_int32_t
252reserve_spi(u_int rdomain, u_int32_t sspi, u_int32_t tspi,
253    union sockaddr_union *src, union sockaddr_union *dst,
254    u_int8_t sproto, int *errval)
255{
256	struct tdb *tdbp, *exists;
257	u_int32_t spi;
258	int nums;
259
260	/* Don't accept ranges only encompassing reserved SPIs. */
261	if (sproto != IPPROTO_IPCOMP &&
262	    (tspi < sspi || tspi <= SPI_RESERVED_MAX)) {
263		(*errval) = EINVAL;
264		return 0;
265	}
266	if (sproto == IPPROTO_IPCOMP && (tspi < sspi ||
267	    tspi <= CPI_RESERVED_MAX ||
268	    tspi >= CPI_PRIVATE_MIN)) {
269		(*errval) = EINVAL;
270		return 0;
271	}
272
273	/* Limit the range to not include reserved areas. */
274	if (sspi <= SPI_RESERVED_MAX)
275		sspi = SPI_RESERVED_MAX + 1;
276
277	/* For IPCOMP the CPI is only 16 bits long, what a good idea.... */
278
279	if (sproto == IPPROTO_IPCOMP) {
280		u_int32_t t;
281		if (sspi >= 0x10000)
282			sspi = 0xffff;
283		if (tspi >= 0x10000)
284			tspi = 0xffff;
285		if (sspi > tspi) {
286			t = sspi; sspi = tspi; tspi = t;
287		}
288	}
289
290	if (sspi == tspi)   /* Asking for a specific SPI. */
291		nums = 1;
292	else
293		nums = 100;  /* Arbitrarily chosen */
294
295	/* allocate ahead of time to avoid potential sleeping race in loop */
296	tdbp = tdb_alloc(rdomain);
297
298	while (nums--) {
299		if (sspi == tspi)  /* Specific SPI asked. */
300			spi = tspi;
301		else    /* Range specified */
302			spi = sspi + arc4random_uniform(tspi - sspi);
303
304		/* Don't allocate reserved SPIs.  */
305		if (spi >= SPI_RESERVED_MIN && spi <= SPI_RESERVED_MAX)
306			continue;
307		else
308			spi = htonl(spi);
309
310		/* Check whether we're using this SPI already. */
311		exists = gettdb(rdomain, spi, dst, sproto);
312		if (exists != NULL) {
313			tdb_unref(exists);
314			continue;
315		}
316
317		tdbp->tdb_spi = spi;
318		memcpy(&tdbp->tdb_dst.sa, &dst->sa, dst->sa.sa_len);
319		memcpy(&tdbp->tdb_src.sa, &src->sa, src->sa.sa_len);
320		tdbp->tdb_sproto = sproto;
321		tdbp->tdb_flags |= TDBF_INVALID; /* Mark SA invalid for now. */
322		tdbp->tdb_satype = SADB_SATYPE_UNSPEC;
323		puttdb(tdbp);
324
325#ifdef IPSEC
326		/* Setup a "silent" expiration (since TDBF_INVALID's set). */
327		if (ipsec_keep_invalid > 0) {
328			mtx_enter(&tdbp->tdb_mtx);
329			tdbp->tdb_flags |= TDBF_TIMER;
330			tdbp->tdb_exp_timeout = ipsec_keep_invalid;
331			if (timeout_add_sec(&tdbp->tdb_timer_tmo,
332			    ipsec_keep_invalid))
333				tdb_ref(tdbp);
334			mtx_leave(&tdbp->tdb_mtx);
335		}
336#endif
337
338		return spi;
339	}
340
341	(*errval) = EEXIST;
342	tdb_unref(tdbp);
343	return 0;
344}
345
346/*
347 * An IPSP SAID is really the concatenation of the SPI found in the
348 * packet, the destination address of the packet and the IPsec protocol.
349 * When we receive an IPSP packet, we need to look up its tunnel descriptor
350 * block, based on the SPI in the packet and the destination address (which
351 * is really one of our addresses if we received the packet!
352 */
353struct tdb *
354gettdb_dir(u_int rdomain, u_int32_t spi, union sockaddr_union *dst,
355    u_int8_t proto, int reverse)
356{
357	u_int32_t hashval;
358	struct tdb *tdbp;
359
360	NET_ASSERT_LOCKED();
361
362	mtx_enter(&tdb_sadb_mtx);
363	hashval = tdb_hash(spi, dst, proto);
364
365	for (tdbp = tdbh[hashval]; tdbp != NULL; tdbp = tdbp->tdb_hnext)
366		if ((tdbp->tdb_spi == spi) && (tdbp->tdb_sproto == proto) &&
367		    ((!reverse && tdbp->tdb_rdomain == rdomain) ||
368		    (reverse && tdbp->tdb_rdomain_post == rdomain)) &&
369		    !memcmp(&tdbp->tdb_dst, dst, dst->sa.sa_len))
370			break;
371
372	tdb_ref(tdbp);
373	mtx_leave(&tdb_sadb_mtx);
374	return tdbp;
375}
376
377/*
378 * Same as gettdb() but compare SRC as well, so we
379 * use the tdbsrc[] hash table.  Setting spi to 0
380 * matches all SPIs.
381 */
382struct tdb *
383gettdbbysrcdst_dir(u_int rdomain, u_int32_t spi, union sockaddr_union *src,
384    union sockaddr_union *dst, u_int8_t proto, int reverse)
385{
386	u_int32_t hashval;
387	struct tdb *tdbp;
388	union sockaddr_union su_null;
389
390	mtx_enter(&tdb_sadb_mtx);
391	hashval = tdb_hash(0, src, proto);
392
393	for (tdbp = tdbsrc[hashval]; tdbp != NULL; tdbp = tdbp->tdb_snext) {
394		if (tdbp->tdb_sproto == proto &&
395		    (spi == 0 || tdbp->tdb_spi == spi) &&
396		    ((!reverse && tdbp->tdb_rdomain == rdomain) ||
397		    (reverse && tdbp->tdb_rdomain_post == rdomain)) &&
398		    ((tdbp->tdb_flags & TDBF_INVALID) == 0) &&
399		    (tdbp->tdb_dst.sa.sa_family == AF_UNSPEC ||
400		    !memcmp(&tdbp->tdb_dst, dst, dst->sa.sa_len)) &&
401		    !memcmp(&tdbp->tdb_src, src, src->sa.sa_len))
402			break;
403	}
404	if (tdbp != NULL) {
405		tdb_ref(tdbp);
406		mtx_leave(&tdb_sadb_mtx);
407		return tdbp;
408	}
409
410	memset(&su_null, 0, sizeof(su_null));
411	su_null.sa.sa_len = sizeof(struct sockaddr);
412	hashval = tdb_hash(0, &su_null, proto);
413
414	for (tdbp = tdbsrc[hashval]; tdbp != NULL; tdbp = tdbp->tdb_snext) {
415		if (tdbp->tdb_sproto == proto &&
416		    (spi == 0 || tdbp->tdb_spi == spi) &&
417		    ((!reverse && tdbp->tdb_rdomain == rdomain) ||
418		    (reverse && tdbp->tdb_rdomain_post == rdomain)) &&
419		    ((tdbp->tdb_flags & TDBF_INVALID) == 0) &&
420		    (tdbp->tdb_dst.sa.sa_family == AF_UNSPEC ||
421		    !memcmp(&tdbp->tdb_dst, dst, dst->sa.sa_len)) &&
422		    tdbp->tdb_src.sa.sa_family == AF_UNSPEC)
423			break;
424	}
425	tdb_ref(tdbp);
426	mtx_leave(&tdb_sadb_mtx);
427	return tdbp;
428}
429
430/*
431 * Check that IDs match. Return true if so. The t* range of
432 * arguments contains information from TDBs; the p* range of
433 * arguments contains information from policies or already
434 * established TDBs.
435 */
436int
437ipsp_aux_match(struct tdb *tdb,
438    struct ipsec_ids *ids,
439    struct sockaddr_encap *pfilter,
440    struct sockaddr_encap *pfiltermask)
441{
442	if (ids != NULL)
443		if (tdb->tdb_ids == NULL ||
444		    !ipsp_ids_match(tdb->tdb_ids, ids))
445			return 0;
446
447	/* Check for filter matches. */
448	if (pfilter != NULL && pfiltermask != NULL &&
449	    tdb->tdb_filter.sen_type) {
450		/*
451		 * XXX We should really be doing a subnet-check (see
452		 * whether the TDB-associated filter is a subset
453		 * of the policy's. For now, an exact match will solve
454		 * most problems (all this will do is make every
455		 * policy get its own SAs).
456		 */
457		if (memcmp(&tdb->tdb_filter, pfilter,
458		    sizeof(struct sockaddr_encap)) ||
459		    memcmp(&tdb->tdb_filtermask, pfiltermask,
460		    sizeof(struct sockaddr_encap)))
461			return 0;
462	}
463
464	return 1;
465}
466
467/*
468 * Get an SA given the remote address, the security protocol type, and
469 * the desired IDs.
470 */
471struct tdb *
472gettdbbydst(u_int rdomain, union sockaddr_union *dst, u_int8_t sproto,
473    struct ipsec_ids *ids,
474    struct sockaddr_encap *filter, struct sockaddr_encap *filtermask)
475{
476	u_int32_t hashval;
477	struct tdb *tdbp;
478
479	mtx_enter(&tdb_sadb_mtx);
480	hashval = tdb_hash(0, dst, sproto);
481
482	for (tdbp = tdbdst[hashval]; tdbp != NULL; tdbp = tdbp->tdb_dnext)
483		if ((tdbp->tdb_sproto == sproto) &&
484		    (tdbp->tdb_rdomain == rdomain) &&
485		    ((tdbp->tdb_flags & TDBF_INVALID) == 0) &&
486		    (!memcmp(&tdbp->tdb_dst, dst, dst->sa.sa_len))) {
487			/* Check whether IDs match */
488			if (!ipsp_aux_match(tdbp, ids, filter, filtermask))
489				continue;
490			break;
491		}
492
493	tdb_ref(tdbp);
494	mtx_leave(&tdb_sadb_mtx);
495	return tdbp;
496}
497
498/*
499 * Get an SA given the source address, the security protocol type, and
500 * the desired IDs.
501 */
502struct tdb *
503gettdbbysrc(u_int rdomain, union sockaddr_union *src, u_int8_t sproto,
504    struct ipsec_ids *ids,
505    struct sockaddr_encap *filter, struct sockaddr_encap *filtermask)
506{
507	u_int32_t hashval;
508	struct tdb *tdbp;
509
510	mtx_enter(&tdb_sadb_mtx);
511	hashval = tdb_hash(0, src, sproto);
512
513	for (tdbp = tdbsrc[hashval]; tdbp != NULL; tdbp = tdbp->tdb_snext) {
514		if ((tdbp->tdb_sproto == sproto) &&
515		    (tdbp->tdb_rdomain == rdomain) &&
516		    ((tdbp->tdb_flags & TDBF_INVALID) == 0) &&
517		    (!memcmp(&tdbp->tdb_src, src, src->sa.sa_len))) {
518			/* Check whether IDs match */
519			if (!ipsp_aux_match(tdbp, ids, filter, filtermask))
520				continue;
521			break;
522		}
523	}
524	tdb_ref(tdbp);
525	mtx_leave(&tdb_sadb_mtx);
526	return tdbp;
527}
528
529#ifdef DDB
530
531#define NBUCKETS 16
532void
533tdb_hashstats(void)
534{
535	int i, cnt, buckets[NBUCKETS];
536	struct tdb *tdbp;
537
538	if (tdbh == NULL) {
539		db_printf("no tdb hash table\n");
540		return;
541	}
542
543	memset(buckets, 0, sizeof(buckets));
544	for (i = 0; i <= tdb_hashmask; i++) {
545		cnt = 0;
546		for (tdbp = tdbh[i]; cnt < NBUCKETS - 1 && tdbp != NULL;
547		    tdbp = tdbp->tdb_hnext)
548			cnt++;
549		buckets[cnt]++;
550	}
551
552	db_printf("tdb cnt\t\tbucket cnt\n");
553	for (i = 0; i < NBUCKETS; i++)
554		if (buckets[i] > 0)
555			db_printf("%d%s\t\t%d\n", i, i == NBUCKETS - 1 ?
556			    "+" : "", buckets[i]);
557}
558
559#define DUMP(m, f) pr("%18s: " f "\n", #m, tdb->tdb_##m)
560void
561tdb_printit(void *addr, int full, int (*pr)(const char *, ...))
562{
563	struct tdb *tdb = addr;
564	char buf[INET6_ADDRSTRLEN];
565
566	if (full) {
567		pr("tdb at %p\n", tdb);
568		DUMP(hnext, "%p");
569		DUMP(dnext, "%p");
570		DUMP(snext, "%p");
571		DUMP(inext, "%p");
572		DUMP(onext, "%p");
573		DUMP(xform, "%p");
574		pr("%18s: %d\n", "refcnt", tdb->tdb_refcnt.r_refs);
575		DUMP(encalgxform, "%p");
576		DUMP(authalgxform, "%p");
577		DUMP(compalgxform, "%p");
578		pr("%18s: %b\n", "flags", tdb->tdb_flags, TDBF_BITS);
579		/* tdb_XXX_tmo */
580		DUMP(seq, "%d");
581		DUMP(exp_allocations, "%d");
582		DUMP(soft_allocations, "%d");
583		DUMP(cur_allocations, "%d");
584		DUMP(exp_bytes, "%lld");
585		DUMP(soft_bytes, "%lld");
586		DUMP(cur_bytes, "%lld");
587		DUMP(exp_timeout, "%lld");
588		DUMP(soft_timeout, "%lld");
589		DUMP(established, "%lld");
590		DUMP(first_use, "%lld");
591		DUMP(soft_first_use, "%lld");
592		DUMP(exp_first_use, "%lld");
593		DUMP(last_used, "%lld");
594		DUMP(last_marked, "%lld");
595		/* tdb_data */
596		DUMP(cryptoid, "%lld");
597		pr("%18s: %08x\n", "tdb_spi", ntohl(tdb->tdb_spi));
598		DUMP(amxkeylen, "%d");
599		DUMP(emxkeylen, "%d");
600		DUMP(ivlen, "%d");
601		DUMP(sproto, "%d");
602		DUMP(wnd, "%d");
603		DUMP(satype, "%d");
604		DUMP(updates, "%d");
605		pr("%18s: %s\n", "dst",
606		    ipsp_address(&tdb->tdb_dst, buf, sizeof(buf)));
607		pr("%18s: %s\n", "src",
608		    ipsp_address(&tdb->tdb_src, buf, sizeof(buf)));
609		DUMP(amxkey, "%p");
610		DUMP(emxkey, "%p");
611		DUMP(rpl, "%lld");
612		/* tdb_seen */
613		/* tdb_iv */
614		DUMP(ids, "%p");
615		DUMP(ids_swapped, "%d");
616		DUMP(mtu, "%d");
617		DUMP(mtutimeout, "%lld");
618		pr("%18s: %d\n", "udpencap_port",
619		    ntohs(tdb->tdb_udpencap_port));
620		DUMP(tag, "%d");
621		DUMP(tap, "%d");
622		DUMP(rdomain, "%d");
623		DUMP(rdomain_post, "%d");
624		/* tdb_filter */
625		/* tdb_filtermask */
626		/* tdb_policy_head */
627		/* tdb_sync_entry */
628	} else {
629		pr("%p:", tdb);
630		pr(" %08x", ntohl(tdb->tdb_spi));
631		pr(" %s", ipsp_address(&tdb->tdb_src, buf, sizeof(buf)));
632		pr("->%s", ipsp_address(&tdb->tdb_dst, buf, sizeof(buf)));
633		pr(":%d", tdb->tdb_sproto);
634		pr(" #%d", tdb->tdb_refcnt.r_refs);
635		pr(" %08x\n", tdb->tdb_flags);
636	}
637}
638#undef DUMP
639#endif	/* DDB */
640
641int
642tdb_walk(u_int rdomain, int (*walker)(struct tdb *, void *, int), void *arg)
643{
644	SIMPLEQ_HEAD(, tdb) tdblist;
645	struct tdb *tdbp;
646	int i, rval;
647
648	/*
649	 * The walker may sleep.  So we cannot hold the tdb_sadb_mtx while
650	 * traversing the tdb_hnext list.  Create a new tdb_walk list with
651	 * exclusive netlock protection.
652	 */
653	NET_ASSERT_LOCKED_EXCLUSIVE();
654	SIMPLEQ_INIT(&tdblist);
655
656	mtx_enter(&tdb_sadb_mtx);
657	for (i = 0; i <= tdb_hashmask; i++) {
658		for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbp->tdb_hnext) {
659			if (rdomain != tdbp->tdb_rdomain)
660				continue;
661			tdb_ref(tdbp);
662			SIMPLEQ_INSERT_TAIL(&tdblist, tdbp, tdb_walk);
663		}
664	}
665	mtx_leave(&tdb_sadb_mtx);
666
667	rval = 0;
668	while ((tdbp = SIMPLEQ_FIRST(&tdblist)) != NULL) {
669		SIMPLEQ_REMOVE_HEAD(&tdblist, tdb_walk);
670		if (rval == 0)
671			rval = walker(tdbp, arg, SIMPLEQ_EMPTY(&tdblist));
672		tdb_unref(tdbp);
673	}
674
675	return rval;
676}
677
678void
679tdb_timeout(void *v)
680{
681	struct tdb *tdb = v;
682
683	NET_LOCK();
684	if (tdb->tdb_flags & TDBF_TIMER) {
685		/* If it's an "invalid" TDB do a silent expiration. */
686		if (!(tdb->tdb_flags & TDBF_INVALID)) {
687#ifdef IPSEC
688			ipsecstat_inc(ipsec_exctdb);
689#endif /* IPSEC */
690			pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_HARD);
691		}
692		tdb_delete(tdb);
693	}
694	/* decrement refcount of the timeout argument */
695	tdb_unref(tdb);
696	NET_UNLOCK();
697}
698
699void
700tdb_firstuse(void *v)
701{
702	struct tdb *tdb = v;
703
704	NET_LOCK();
705	if (tdb->tdb_flags & TDBF_SOFT_FIRSTUSE) {
706		/* If the TDB hasn't been used, don't renew it. */
707		if (tdb->tdb_first_use != 0) {
708#ifdef IPSEC
709			ipsecstat_inc(ipsec_exctdb);
710#endif /* IPSEC */
711			pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_HARD);
712		}
713		tdb_delete(tdb);
714	}
715	/* decrement refcount of the timeout argument */
716	tdb_unref(tdb);
717	NET_UNLOCK();
718}
719
720void
721tdb_addtimeouts(struct tdb *tdbp)
722{
723	mtx_enter(&tdbp->tdb_mtx);
724	if (tdbp->tdb_flags & TDBF_TIMER) {
725		if (timeout_add_sec(&tdbp->tdb_timer_tmo,
726		    tdbp->tdb_exp_timeout))
727			tdb_ref(tdbp);
728	}
729	if (tdbp->tdb_flags & TDBF_SOFT_TIMER) {
730		if (timeout_add_sec(&tdbp->tdb_stimer_tmo,
731		    tdbp->tdb_soft_timeout))
732			tdb_ref(tdbp);
733	}
734	mtx_leave(&tdbp->tdb_mtx);
735}
736
737void
738tdb_soft_timeout(void *v)
739{
740	struct tdb *tdb = v;
741
742	NET_LOCK();
743	mtx_enter(&tdb->tdb_mtx);
744	if (tdb->tdb_flags & TDBF_SOFT_TIMER) {
745		tdb->tdb_flags &= ~TDBF_SOFT_TIMER;
746		mtx_leave(&tdb->tdb_mtx);
747		/* Soft expirations. */
748		pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_SOFT);
749	} else
750		mtx_leave(&tdb->tdb_mtx);
751	/* decrement refcount of the timeout argument */
752	tdb_unref(tdb);
753	NET_UNLOCK();
754}
755
756void
757tdb_soft_firstuse(void *v)
758{
759	struct tdb *tdb = v;
760
761	NET_LOCK();
762	mtx_enter(&tdb->tdb_mtx);
763	if (tdb->tdb_flags & TDBF_SOFT_FIRSTUSE) {
764		tdb->tdb_flags &= ~TDBF_SOFT_FIRSTUSE;
765		mtx_leave(&tdb->tdb_mtx);
766		/* If the TDB hasn't been used, don't renew it. */
767		if (tdb->tdb_first_use != 0)
768			pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_SOFT);
769	} else
770		mtx_leave(&tdb->tdb_mtx);
771	/* decrement refcount of the timeout argument */
772	tdb_unref(tdb);
773	NET_UNLOCK();
774}
775
776int
777tdb_rehash(void)
778{
779	struct tdb **new_tdbh, **new_tdbdst, **new_srcaddr, *tdbp, *tdbnp;
780	u_int i, old_hashmask;
781	u_int32_t hashval;
782
783	MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
784
785	old_hashmask = tdb_hashmask;
786	tdb_hashmask = (tdb_hashmask << 1) | 1;
787
788	arc4random_buf(&tdbkey, sizeof(tdbkey));
789	new_tdbh = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
790	    M_NOWAIT | M_ZERO);
791	new_tdbdst = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
792	    M_NOWAIT | M_ZERO);
793	new_srcaddr = mallocarray(tdb_hashmask + 1, sizeof(struct tdb *), M_TDB,
794	    M_NOWAIT | M_ZERO);
795	if (new_tdbh == NULL ||
796	    new_tdbdst == NULL ||
797	    new_srcaddr == NULL) {
798		free(new_tdbh, M_TDB, 0);
799		free(new_tdbdst, M_TDB, 0);
800		free(new_srcaddr, M_TDB, 0);
801		return (ENOMEM);
802	}
803
804	for (i = 0; i <= old_hashmask; i++) {
805		for (tdbp = tdbh[i]; tdbp != NULL; tdbp = tdbnp) {
806			tdbnp = tdbp->tdb_hnext;
807			hashval = tdb_hash(tdbp->tdb_spi, &tdbp->tdb_dst,
808			    tdbp->tdb_sproto);
809			tdbp->tdb_hnext = new_tdbh[hashval];
810			new_tdbh[hashval] = tdbp;
811		}
812
813		for (tdbp = tdbdst[i]; tdbp != NULL; tdbp = tdbnp) {
814			tdbnp = tdbp->tdb_dnext;
815			hashval = tdb_hash(0, &tdbp->tdb_dst, tdbp->tdb_sproto);
816			tdbp->tdb_dnext = new_tdbdst[hashval];
817			new_tdbdst[hashval] = tdbp;
818		}
819
820		for (tdbp = tdbsrc[i]; tdbp != NULL; tdbp = tdbnp) {
821			tdbnp = tdbp->tdb_snext;
822			hashval = tdb_hash(0, &tdbp->tdb_src, tdbp->tdb_sproto);
823			tdbp->tdb_snext = new_srcaddr[hashval];
824			new_srcaddr[hashval] = tdbp;
825		}
826	}
827
828	free(tdbh, M_TDB, 0);
829	tdbh = new_tdbh;
830
831	free(tdbdst, M_TDB, 0);
832	tdbdst = new_tdbdst;
833
834	free(tdbsrc, M_TDB, 0);
835	tdbsrc = new_srcaddr;
836
837	return 0;
838}
839
840/*
841 * Add TDB in the hash table.
842 */
843void
844puttdb(struct tdb *tdbp)
845{
846	mtx_enter(&tdb_sadb_mtx);
847	puttdb_locked(tdbp);
848	mtx_leave(&tdb_sadb_mtx);
849}
850
851void
852puttdb_locked(struct tdb *tdbp)
853{
854	u_int32_t hashval;
855
856	MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
857
858	hashval = tdb_hash(tdbp->tdb_spi, &tdbp->tdb_dst, tdbp->tdb_sproto);
859
860	/*
861	 * Rehash if this tdb would cause a bucket to have more than
862	 * two items and if the number of tdbs exceed 10% of the
863	 * bucket count.  This number is arbitrarily chosen and is
864	 * just a measure to not keep rehashing when adding and
865	 * removing tdbs which happens to always end up in the same
866	 * bucket, which is not uncommon when doing manual keying.
867	 */
868	if (tdbh[hashval] != NULL && tdbh[hashval]->tdb_hnext != NULL &&
869	    tdb_count * 10 > tdb_hashmask + 1) {
870		if (tdb_rehash() == 0)
871			hashval = tdb_hash(tdbp->tdb_spi, &tdbp->tdb_dst,
872			    tdbp->tdb_sproto);
873	}
874
875	tdbp->tdb_hnext = tdbh[hashval];
876	tdbh[hashval] = tdbp;
877
878	tdb_count++;
879#ifdef IPSEC
880	if ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_TUNNELING)) == TDBF_TUNNELING)
881		ipsecstat_inc(ipsec_tunnels);
882#endif /* IPSEC */
883
884	ipsec_last_added = getuptime();
885
886	if (ISSET(tdbp->tdb_flags, TDBF_IFACE)) {
887#if NSEC > 0
888		sec_tdb_insert(tdbp);
889#endif
890		return;
891	}
892
893	hashval = tdb_hash(0, &tdbp->tdb_dst, tdbp->tdb_sproto);
894	tdbp->tdb_dnext = tdbdst[hashval];
895	tdbdst[hashval] = tdbp;
896
897	hashval = tdb_hash(0, &tdbp->tdb_src, tdbp->tdb_sproto);
898	tdbp->tdb_snext = tdbsrc[hashval];
899	tdbsrc[hashval] = tdbp;
900}
901
902void
903tdb_unlink(struct tdb *tdbp)
904{
905	mtx_enter(&tdb_sadb_mtx);
906	tdb_unlink_locked(tdbp);
907	mtx_leave(&tdb_sadb_mtx);
908}
909
910void
911tdb_unlink_locked(struct tdb *tdbp)
912{
913	struct tdb *tdbpp;
914	u_int32_t hashval;
915
916	MUTEX_ASSERT_LOCKED(&tdb_sadb_mtx);
917
918	hashval = tdb_hash(tdbp->tdb_spi, &tdbp->tdb_dst, tdbp->tdb_sproto);
919
920	if (tdbh[hashval] == tdbp) {
921		tdbh[hashval] = tdbp->tdb_hnext;
922	} else {
923		for (tdbpp = tdbh[hashval]; tdbpp != NULL;
924		    tdbpp = tdbpp->tdb_hnext) {
925			if (tdbpp->tdb_hnext == tdbp) {
926				tdbpp->tdb_hnext = tdbp->tdb_hnext;
927				break;
928			}
929		}
930	}
931
932	tdbp->tdb_hnext = NULL;
933
934	tdb_count--;
935#ifdef IPSEC
936	if ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_TUNNELING)) ==
937	    TDBF_TUNNELING) {
938		ipsecstat_dec(ipsec_tunnels);
939		ipsecstat_inc(ipsec_prevtunnels);
940	}
941#endif /* IPSEC */
942
943	if (ISSET(tdbp->tdb_flags, TDBF_IFACE)) {
944#if NSEC > 0
945		sec_tdb_remove(tdbp);
946#endif
947		return;
948	}
949
950	hashval = tdb_hash(0, &tdbp->tdb_dst, tdbp->tdb_sproto);
951
952	if (tdbdst[hashval] == tdbp) {
953		tdbdst[hashval] = tdbp->tdb_dnext;
954	} else {
955		for (tdbpp = tdbdst[hashval]; tdbpp != NULL;
956		    tdbpp = tdbpp->tdb_dnext) {
957			if (tdbpp->tdb_dnext == tdbp) {
958				tdbpp->tdb_dnext = tdbp->tdb_dnext;
959				break;
960			}
961		}
962	}
963
964	tdbp->tdb_dnext = NULL;
965
966	hashval = tdb_hash(0, &tdbp->tdb_src, tdbp->tdb_sproto);
967
968	if (tdbsrc[hashval] == tdbp) {
969		tdbsrc[hashval] = tdbp->tdb_snext;
970	} else {
971		for (tdbpp = tdbsrc[hashval]; tdbpp != NULL;
972		    tdbpp = tdbpp->tdb_snext) {
973			if (tdbpp->tdb_snext == tdbp) {
974				tdbpp->tdb_snext = tdbp->tdb_snext;
975				break;
976			}
977		}
978	}
979
980	tdbp->tdb_snext = NULL;
981}
982
983void
984tdb_cleanspd(struct tdb *tdbp)
985{
986	struct ipsec_policy *ipo;
987
988	mtx_enter(&ipo_tdb_mtx);
989	while ((ipo = TAILQ_FIRST(&tdbp->tdb_policy_head)) != NULL) {
990		TAILQ_REMOVE(&tdbp->tdb_policy_head, ipo, ipo_tdb_next);
991		tdb_unref(ipo->ipo_tdb);
992		ipo->ipo_tdb = NULL;
993		ipo->ipo_last_searched = 0; /* Force a re-search. */
994	}
995	mtx_leave(&ipo_tdb_mtx);
996}
997
998void
999tdb_unbundle(struct tdb *tdbp)
1000{
1001	if (tdbp->tdb_onext != NULL) {
1002		if (tdbp->tdb_onext->tdb_inext == tdbp) {
1003			tdb_unref(tdbp);	/* to us */
1004			tdbp->tdb_onext->tdb_inext = NULL;
1005		}
1006		tdb_unref(tdbp->tdb_onext);	/* to other */
1007		tdbp->tdb_onext = NULL;
1008	}
1009	if (tdbp->tdb_inext != NULL) {
1010		if (tdbp->tdb_inext->tdb_onext == tdbp) {
1011			tdb_unref(tdbp);	/* to us */
1012			tdbp->tdb_inext->tdb_onext = NULL;
1013		}
1014		tdb_unref(tdbp->tdb_inext);	/* to other */
1015		tdbp->tdb_inext = NULL;
1016	}
1017}
1018
1019void
1020tdb_deltimeouts(struct tdb *tdbp)
1021{
1022	mtx_enter(&tdbp->tdb_mtx);
1023	tdbp->tdb_flags &= ~(TDBF_FIRSTUSE | TDBF_SOFT_FIRSTUSE | TDBF_TIMER |
1024	    TDBF_SOFT_TIMER);
1025	if (timeout_del(&tdbp->tdb_timer_tmo))
1026		tdb_unref(tdbp);
1027	if (timeout_del(&tdbp->tdb_first_tmo))
1028		tdb_unref(tdbp);
1029	if (timeout_del(&tdbp->tdb_stimer_tmo))
1030		tdb_unref(tdbp);
1031	if (timeout_del(&tdbp->tdb_sfirst_tmo))
1032		tdb_unref(tdbp);
1033	mtx_leave(&tdbp->tdb_mtx);
1034}
1035
1036struct tdb *
1037tdb_ref(struct tdb *tdb)
1038{
1039	if (tdb == NULL)
1040		return NULL;
1041	refcnt_take(&tdb->tdb_refcnt);
1042	return tdb;
1043}
1044
1045void
1046tdb_unref(struct tdb *tdb)
1047{
1048	if (tdb == NULL)
1049		return;
1050	if (refcnt_rele(&tdb->tdb_refcnt) == 0)
1051		return;
1052	tdb_free(tdb);
1053}
1054
1055void
1056tdb_delete(struct tdb *tdbp)
1057{
1058	NET_ASSERT_LOCKED();
1059
1060	mtx_enter(&tdbp->tdb_mtx);
1061	if (tdbp->tdb_flags & TDBF_DELETED) {
1062		mtx_leave(&tdbp->tdb_mtx);
1063		return;
1064	}
1065	tdbp->tdb_flags |= TDBF_DELETED;
1066	mtx_leave(&tdbp->tdb_mtx);
1067	tdb_unlink(tdbp);
1068
1069	/* cleanup SPD references */
1070	tdb_cleanspd(tdbp);
1071	/* release tdb_onext/tdb_inext references */
1072	tdb_unbundle(tdbp);
1073	/* delete timeouts and release references */
1074	tdb_deltimeouts(tdbp);
1075	/* release the reference for tdb_unlink() */
1076	tdb_unref(tdbp);
1077}
1078
1079/*
1080 * Allocate a TDB and initialize a few basic fields.
1081 */
1082struct tdb *
1083tdb_alloc(u_int rdomain)
1084{
1085	struct tdb *tdbp;
1086
1087	tdbp = pool_get(&tdb_pool, PR_WAITOK | PR_ZERO);
1088
1089	refcnt_init_trace(&tdbp->tdb_refcnt, DT_REFCNT_IDX_TDB);
1090	mtx_init(&tdbp->tdb_mtx, IPL_SOFTNET);
1091	TAILQ_INIT(&tdbp->tdb_policy_head);
1092
1093	/* Record establishment time. */
1094	tdbp->tdb_established = gettime();
1095
1096	/* Save routing domain */
1097	tdbp->tdb_rdomain = rdomain;
1098	tdbp->tdb_rdomain_post = rdomain;
1099
1100	/* Initialize counters. */
1101	tdbp->tdb_counters = counters_alloc(tdb_ncounters);
1102
1103	/* Initialize timeouts. */
1104	timeout_set_proc(&tdbp->tdb_timer_tmo, tdb_timeout, tdbp);
1105	timeout_set_proc(&tdbp->tdb_first_tmo, tdb_firstuse, tdbp);
1106	timeout_set_proc(&tdbp->tdb_stimer_tmo, tdb_soft_timeout, tdbp);
1107	timeout_set_proc(&tdbp->tdb_sfirst_tmo, tdb_soft_firstuse, tdbp);
1108
1109	return tdbp;
1110}
1111
1112void
1113tdb_free(struct tdb *tdbp)
1114{
1115	NET_ASSERT_LOCKED();
1116
1117	if (tdbp->tdb_xform) {
1118		(*(tdbp->tdb_xform->xf_zeroize))(tdbp);
1119		tdbp->tdb_xform = NULL;
1120	}
1121
1122#if NPFSYNC > 0 && defined(IPSEC)
1123	/* Cleanup pfsync references */
1124	pfsync_delete_tdb(tdbp);
1125#endif
1126
1127	KASSERT(TAILQ_EMPTY(&tdbp->tdb_policy_head));
1128
1129	if (tdbp->tdb_ids) {
1130		ipsp_ids_free(tdbp->tdb_ids);
1131		tdbp->tdb_ids = NULL;
1132	}
1133
1134#if NPF > 0
1135	if (tdbp->tdb_tag) {
1136		pf_tag_unref(tdbp->tdb_tag);
1137		tdbp->tdb_tag = 0;
1138	}
1139#endif
1140
1141	counters_free(tdbp->tdb_counters, tdb_ncounters);
1142
1143	KASSERT(tdbp->tdb_onext == NULL);
1144	KASSERT(tdbp->tdb_inext == NULL);
1145
1146	/* Remove expiration timeouts. */
1147	KASSERT(timeout_pending(&tdbp->tdb_timer_tmo) == 0);
1148	KASSERT(timeout_pending(&tdbp->tdb_first_tmo) == 0);
1149	KASSERT(timeout_pending(&tdbp->tdb_stimer_tmo) == 0);
1150	KASSERT(timeout_pending(&tdbp->tdb_sfirst_tmo) == 0);
1151
1152	pool_put(&tdb_pool, tdbp);
1153}
1154
1155/*
1156 * Do further initializations of a TDB.
1157 */
1158int
1159tdb_init(struct tdb *tdbp, u_int16_t alg, struct ipsecinit *ii)
1160{
1161	const struct xformsw *xsp;
1162	int err;
1163#ifdef ENCDEBUG
1164	char buf[INET6_ADDRSTRLEN];
1165#endif
1166
1167	for (xsp = xformsw; xsp < xformswNXFORMSW; xsp++) {
1168		if (xsp->xf_type == alg) {
1169			err = (*(xsp->xf_init))(tdbp, xsp, ii);
1170			return err;
1171		}
1172	}
1173
1174	DPRINTF("no alg %d for spi %08x, addr %s, proto %d",
1175	    alg, ntohl(tdbp->tdb_spi),
1176	    ipsp_address(&tdbp->tdb_dst, buf, sizeof(buf)),
1177	    tdbp->tdb_sproto);
1178
1179	return EINVAL;
1180}
1181
1182#if defined(DDB) || defined(ENCDEBUG)
1183/* Return a printable string for the address. */
1184const char *
1185ipsp_address(union sockaddr_union *sa, char *buf, socklen_t size)
1186{
1187	switch (sa->sa.sa_family) {
1188	case AF_INET:
1189		return inet_ntop(AF_INET, &sa->sin.sin_addr,
1190		    buf, (size_t)size);
1191
1192#ifdef INET6
1193	case AF_INET6:
1194		return inet_ntop(AF_INET6, &sa->sin6.sin6_addr,
1195		    buf, (size_t)size);
1196#endif /* INET6 */
1197
1198	default:
1199		return "(unknown address family)";
1200	}
1201}
1202#endif /* DDB || ENCDEBUG */
1203
1204/* Check whether an IP{4,6} address is unspecified. */
1205int
1206ipsp_is_unspecified(union sockaddr_union addr)
1207{
1208	switch (addr.sa.sa_family) {
1209	case AF_INET:
1210		if (addr.sin.sin_addr.s_addr == INADDR_ANY)
1211			return 1;
1212		else
1213			return 0;
1214
1215#ifdef INET6
1216	case AF_INET6:
1217		if (IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr))
1218			return 1;
1219		else
1220			return 0;
1221#endif /* INET6 */
1222
1223	case 0: /* No family set. */
1224	default:
1225		return 1;
1226	}
1227}
1228
1229int
1230ipsp_ids_match(struct ipsec_ids *a, struct ipsec_ids *b)
1231{
1232	return a == b;
1233}
1234
1235struct ipsec_ids *
1236ipsp_ids_insert(struct ipsec_ids *ids)
1237{
1238	struct ipsec_ids *found;
1239	u_int32_t start_flow;
1240
1241	mtx_enter(&ipsec_flows_mtx);
1242
1243	found = RBT_INSERT(ipsec_ids_tree, &ipsec_ids_tree, ids);
1244	if (found) {
1245		/* if refcount was zero, then timeout is running */
1246		if ((++found->id_refcount) == 1) {
1247			LIST_REMOVE(found, id_gc_list);
1248
1249			if (LIST_EMPTY(&ipsp_ids_gc_list))
1250				timeout_del(&ipsp_ids_gc_timeout);
1251		}
1252		mtx_leave (&ipsec_flows_mtx);
1253		DPRINTF("ids %p count %d", found, found->id_refcount);
1254		return found;
1255	}
1256
1257	ids->id_refcount = 1;
1258	ids->id_flow = start_flow = ipsec_ids_next_flow;
1259
1260	if (++ipsec_ids_next_flow == 0)
1261		ipsec_ids_next_flow = 1;
1262	while (RBT_INSERT(ipsec_ids_flows, &ipsec_ids_flows, ids) != NULL) {
1263		ids->id_flow = ipsec_ids_next_flow;
1264		if (++ipsec_ids_next_flow == 0)
1265			ipsec_ids_next_flow = 1;
1266		if (ipsec_ids_next_flow == start_flow) {
1267			RBT_REMOVE(ipsec_ids_tree, &ipsec_ids_tree, ids);
1268			mtx_leave(&ipsec_flows_mtx);
1269			DPRINTF("ipsec_ids_next_flow exhausted %u",
1270			    start_flow);
1271			return NULL;
1272		}
1273	}
1274	mtx_leave(&ipsec_flows_mtx);
1275	DPRINTF("new ids %p flow %u", ids, ids->id_flow);
1276	return ids;
1277}
1278
1279struct ipsec_ids *
1280ipsp_ids_lookup(u_int32_t ipsecflowinfo)
1281{
1282	struct ipsec_ids	key;
1283	struct ipsec_ids	*ids;
1284
1285	key.id_flow = ipsecflowinfo;
1286
1287	mtx_enter(&ipsec_flows_mtx);
1288	ids = RBT_FIND(ipsec_ids_flows, &ipsec_ids_flows, &key);
1289	if (ids != NULL) {
1290		if (ids->id_refcount != 0)
1291			ids->id_refcount++;
1292		else
1293			ids = NULL;
1294	}
1295	mtx_leave(&ipsec_flows_mtx);
1296
1297	return ids;
1298}
1299
1300/* free ids only from delayed timeout */
1301void
1302ipsp_ids_gc(void *arg)
1303{
1304	struct ipsec_ids *ids, *tids;
1305
1306	mtx_enter(&ipsec_flows_mtx);
1307
1308	LIST_FOREACH_SAFE(ids, &ipsp_ids_gc_list, id_gc_list, tids) {
1309		KASSERT(ids->id_refcount == 0);
1310		DPRINTF("ids %p count %d", ids, ids->id_refcount);
1311
1312		if ((--ids->id_gc_ttl) > 0)
1313			continue;
1314
1315		LIST_REMOVE(ids, id_gc_list);
1316		RBT_REMOVE(ipsec_ids_tree, &ipsec_ids_tree, ids);
1317		RBT_REMOVE(ipsec_ids_flows, &ipsec_ids_flows, ids);
1318		free(ids->id_local, M_CREDENTIALS, 0);
1319		free(ids->id_remote, M_CREDENTIALS, 0);
1320		free(ids, M_CREDENTIALS, 0);
1321	}
1322
1323	if (!LIST_EMPTY(&ipsp_ids_gc_list))
1324		timeout_add_sec(&ipsp_ids_gc_timeout, 1);
1325
1326	mtx_leave(&ipsec_flows_mtx);
1327}
1328
1329/* decrements refcount, actual free happens in gc */
1330void
1331ipsp_ids_free(struct ipsec_ids *ids)
1332{
1333	if (ids == NULL)
1334		return;
1335
1336	mtx_enter(&ipsec_flows_mtx);
1337
1338	/*
1339	 * If the refcount becomes zero, then a timeout is started. This
1340	 * timeout must be cancelled if refcount is increased from zero.
1341	 */
1342	DPRINTF("ids %p count %d", ids, ids->id_refcount);
1343	KASSERT(ids->id_refcount > 0);
1344
1345	if ((--ids->id_refcount) > 0) {
1346		mtx_leave(&ipsec_flows_mtx);
1347		return;
1348	}
1349
1350	/*
1351	 * Add second for the case ipsp_ids_gc() is already running and
1352	 * awaits netlock to be released.
1353	 */
1354	ids->id_gc_ttl = ipsec_ids_idle + 1;
1355
1356	if (LIST_EMPTY(&ipsp_ids_gc_list))
1357		timeout_add_sec(&ipsp_ids_gc_timeout, 1);
1358	LIST_INSERT_HEAD(&ipsp_ids_gc_list, ids, id_gc_list);
1359
1360	mtx_leave(&ipsec_flows_mtx);
1361}
1362
1363static int
1364ipsp_id_cmp(struct ipsec_id *a, struct ipsec_id *b)
1365{
1366	if (a->type > b->type)
1367		return 1;
1368	if (a->type < b->type)
1369		return -1;
1370	if (a->len > b->len)
1371		return 1;
1372	if (a->len < b->len)
1373		return -1;
1374	return memcmp(a + 1, b + 1, a->len);
1375}
1376
1377static inline int
1378ipsp_ids_cmp(const struct ipsec_ids *a, const struct ipsec_ids *b)
1379{
1380	int ret;
1381
1382	ret = ipsp_id_cmp(a->id_remote, b->id_remote);
1383	if (ret != 0)
1384		return ret;
1385	return ipsp_id_cmp(a->id_local, b->id_local);
1386}
1387
1388static inline int
1389ipsp_ids_flow_cmp(const struct ipsec_ids *a, const struct ipsec_ids *b)
1390{
1391	if (a->id_flow > b->id_flow)
1392		return 1;
1393	if (a->id_flow < b->id_flow)
1394		return -1;
1395	return 0;
1396}
1397