1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/stream.h>
28#include <sys/stropts.h>
29#include <sys/strsubr.h>
30#include <sys/errno.h>
31#include <sys/ddi.h>
32#include <sys/debug.h>
33#include <sys/cmn_err.h>
34#include <sys/stream.h>
35#include <sys/strlog.h>
36#include <sys/kmem.h>
37#include <sys/sunddi.h>
38#include <sys/tihdr.h>
39#include <sys/atomic.h>
40#include <sys/socket.h>
41#include <sys/sysmacros.h>
42#include <sys/crypto/common.h>
43#include <sys/crypto/api.h>
44#include <sys/zone.h>
45#include <netinet/in.h>
46#include <net/if.h>
47#include <net/pfkeyv2.h>
48#include <net/pfpolicy.h>
49#include <inet/common.h>
50#include <netinet/ip6.h>
51#include <inet/ip.h>
52#include <inet/ip_ire.h>
53#include <inet/ip6.h>
54#include <inet/ipsec_info.h>
55#include <inet/tcp.h>
56#include <inet/sadb.h>
57#include <inet/ipsec_impl.h>
58#include <inet/ipsecah.h>
59#include <inet/ipsecesp.h>
60#include <sys/random.h>
61#include <sys/dlpi.h>
62#include <sys/strsun.h>
63#include <sys/strsubr.h>
64#include <inet/ip_if.h>
65#include <inet/ipdrop.h>
66#include <inet/ipclassifier.h>
67#include <inet/sctp_ip.h>
68#include <sys/tsol/tnet.h>
69
70/*
71 * This source file contains Security Association Database (SADB) common
72 * routines.  They are linked in with the AH module.  Since AH has no chance
73 * of falling under export control, it was safe to link it in there.
74 */
75
76static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
77    ipsec_action_t *, boolean_t, uint32_t, uint32_t, sadb_sens_t *,
78    netstack_t *);
79static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
80static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
81			    netstack_t *);
82static void sadb_destroy(sadb_t *, netstack_t *);
83static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
84static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
85static sadb_sens_t *sadb_make_sens_ext(ts_label_t *tsl, int *len);
86
87static time_t sadb_add_time(time_t, uint64_t);
88static void lifetime_fuzz(ipsa_t *);
89static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
90static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
91static void init_ipsa_pair(ipsap_t *);
92static void destroy_ipsa_pair(ipsap_t *);
93static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
94static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
95
96/*
97 * ipsacq_maxpackets is defined here to make it tunable
98 * from /etc/system.
99 */
100extern uint64_t ipsacq_maxpackets;
101
102#define	SET_EXPIRE(sa, delta, exp) {				\
103	if (((sa)->ipsa_ ## delta) != 0) {				\
104		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
105			(sa)->ipsa_ ## delta);				\
106	}								\
107}
108
109#define	UPDATE_EXPIRE(sa, delta, exp) {					\
110	if (((sa)->ipsa_ ## delta) != 0) {				\
111		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
112			(sa)->ipsa_ ## delta);				\
113		if (((sa)->ipsa_ ## exp) == 0)				\
114			(sa)->ipsa_ ## exp = tmp;			\
115		else							\
116			(sa)->ipsa_ ## exp = 				\
117			    MIN((sa)->ipsa_ ## exp, tmp); 		\
118	}								\
119}
120
121
122/* wrap the macro so we can pass it as a function pointer */
123void
124sadb_sa_refrele(void *target)
125{
126	IPSA_REFRELE(((ipsa_t *)target));
127}
128
129/*
130 * We presume that sizeof (long) == sizeof (time_t) and that time_t is
131 * a signed type.
132 */
133#define	TIME_MAX LONG_MAX
134
135/*
136 * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
137 * time_t is defined to be a signed type with the same range as
138 * "long".  On ILP32 systems, we thus run the risk of wrapping around
139 * at end of time, as well as "overwrapping" the clock back around
140 * into a seemingly valid but incorrect future date earlier than the
141 * desired expiration.
142 *
143 * In order to avoid odd behavior (either negative lifetimes or loss
144 * of high order bits) when someone asks for bizarrely long SA
145 * lifetimes, we do a saturating add for expire times.
146 *
147 * We presume that ILP32 systems will be past end of support life when
148 * the 32-bit time_t overflows (a dangerous assumption, mind you..).
149 *
150 * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
151 * will hopefully have figured out clever ways to avoid the use of
152 * fixed-sized integers in computation.
153 */
154static time_t
155sadb_add_time(time_t base, uint64_t delta)
156{
157	time_t sum;
158
159	/*
160	 * Clip delta to the maximum possible time_t value to
161	 * prevent "overwrapping" back into a shorter-than-desired
162	 * future time.
163	 */
164	if (delta > TIME_MAX)
165		delta = TIME_MAX;
166	/*
167	 * This sum may still overflow.
168	 */
169	sum = base + delta;
170
171	/*
172	 * .. so if the result is less than the base, we overflowed.
173	 */
174	if (sum < base)
175		sum = TIME_MAX;
176
177	return (sum);
178}
179
180/*
181 * Callers of this function have already created a working security
182 * association, and have found the appropriate table & hash chain.  All this
183 * function does is check duplicates, and insert the SA.  The caller needs to
184 * hold the hash bucket lock and increment the refcnt before insertion.
185 *
186 * Return 0 if success, EEXIST if collision.
187 */
188#define	SA_UNIQUE_MATCH(sa1, sa2) \
189	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
190	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
191
192int
193sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
194{
195	ipsa_t **ptpn = NULL;
196	ipsa_t *walker;
197	boolean_t unspecsrc;
198
199	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
200
201	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
202
203	walker = bucket->isaf_ipsa;
204	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
205
206	/*
207	 * Find insertion point (pointed to with **ptpn).  Insert at the head
208	 * of the list unless there's an unspecified source address, then
209	 * insert it after the last SA with a specified source address.
210	 *
211	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
212	 * checking for collisions.
213	 */
214
215	while (walker != NULL) {
216		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
217		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
218			if (walker->ipsa_spi == ipsa->ipsa_spi)
219				return (EEXIST);
220
221			mutex_enter(&walker->ipsa_lock);
222			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
223			    (walker->ipsa_flags & IPSA_F_USED) &&
224			    SA_UNIQUE_MATCH(walker, ipsa)) {
225				walker->ipsa_flags |= IPSA_F_CINVALID;
226			}
227			mutex_exit(&walker->ipsa_lock);
228		}
229
230		if (ptpn == NULL && unspecsrc) {
231			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
232			    walker->ipsa_addrfam))
233				ptpn = walker->ipsa_ptpn;
234			else if (walker->ipsa_next == NULL)
235				ptpn = &walker->ipsa_next;
236		}
237
238		walker = walker->ipsa_next;
239	}
240
241	if (ptpn == NULL)
242		ptpn = &bucket->isaf_ipsa;
243	ipsa->ipsa_next = *ptpn;
244	ipsa->ipsa_ptpn = ptpn;
245	if (ipsa->ipsa_next != NULL)
246		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
247	*ptpn = ipsa;
248	ipsa->ipsa_linklock = &bucket->isaf_lock;
249
250	return (0);
251}
252#undef SA_UNIQUE_MATCH
253
254/*
255 * Free a security association.  Its reference count is 0, which means
256 * I must free it.  The SA must be unlocked and must not be linked into
257 * any fanout list.
258 */
259static void
260sadb_freeassoc(ipsa_t *ipsa)
261{
262	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
263	mblk_t		*asyncmp, *mp;
264
265	ASSERT(ipss != NULL);
266	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
267	ASSERT(ipsa->ipsa_refcnt == 0);
268	ASSERT(ipsa->ipsa_next == NULL);
269	ASSERT(ipsa->ipsa_ptpn == NULL);
270
271
272	asyncmp = sadb_clear_lpkt(ipsa);
273	if (asyncmp != NULL) {
274		mp = ip_recv_attr_free_mblk(asyncmp);
275		ip_drop_packet(mp, B_TRUE, NULL,
276		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
277		    &ipss->ipsec_sadb_dropper);
278	}
279	mutex_enter(&ipsa->ipsa_lock);
280
281	if (ipsa->ipsa_tsl != NULL) {
282		label_rele(ipsa->ipsa_tsl);
283		ipsa->ipsa_tsl = NULL;
284	}
285
286	if (ipsa->ipsa_otsl != NULL) {
287		label_rele(ipsa->ipsa_otsl);
288		ipsa->ipsa_otsl = NULL;
289	}
290
291	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
292	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
293	mutex_exit(&ipsa->ipsa_lock);
294
295	/* bzero() these fields for paranoia's sake. */
296	if (ipsa->ipsa_authkey != NULL) {
297		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
298		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
299	}
300	if (ipsa->ipsa_encrkey != NULL) {
301		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
302		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
303	}
304	if (ipsa->ipsa_nonce_buf != NULL) {
305		bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
306		kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
307	}
308	if (ipsa->ipsa_src_cid != NULL) {
309		IPSID_REFRELE(ipsa->ipsa_src_cid);
310	}
311	if (ipsa->ipsa_dst_cid != NULL) {
312		IPSID_REFRELE(ipsa->ipsa_dst_cid);
313	}
314	if (ipsa->ipsa_emech.cm_param != NULL)
315		kmem_free(ipsa->ipsa_emech.cm_param,
316		    ipsa->ipsa_emech.cm_param_len);
317
318	mutex_destroy(&ipsa->ipsa_lock);
319	kmem_free(ipsa, sizeof (*ipsa));
320}
321
322/*
323 * Unlink a security association from a hash bucket.  Assume the hash bucket
324 * lock is held, but the association's lock is not.
325 *
326 * Note that we do not bump the bucket's generation number here because
327 * we might not be making a visible change to the set of visible SA's.
328 * All callers MUST bump the bucket's generation number before they unlock
329 * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
330 * was present in the bucket at the time it was locked.
331 */
332void
333sadb_unlinkassoc(ipsa_t *ipsa)
334{
335	ASSERT(ipsa->ipsa_linklock != NULL);
336	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
337
338	/* These fields are protected by the link lock. */
339	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
340	if (ipsa->ipsa_next != NULL) {
341		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
342		ipsa->ipsa_next = NULL;
343	}
344
345	ipsa->ipsa_ptpn = NULL;
346
347	/* This may destroy the SA. */
348	IPSA_REFRELE(ipsa);
349}
350
351void
352sadb_delete_cluster(ipsa_t *assoc)
353{
354	uint8_t protocol;
355
356	if (cl_inet_deletespi &&
357	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
358	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
359		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
360		    IPPROTO_AH : IPPROTO_ESP;
361		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
362		    protocol, assoc->ipsa_spi, NULL);
363	}
364}
365
366/*
367 * Create a larval security association with the specified SPI.	 All other
368 * fields are zeroed.
369 */
370static ipsa_t *
371sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
372    netstack_t *ns)
373{
374	ipsa_t *newbie;
375
376	/*
377	 * Allocate...
378	 */
379
380	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
381	if (newbie == NULL) {
382		/* Can't make new larval SA. */
383		return (NULL);
384	}
385
386	/* Assigned requested SPI, assume caller does SPI allocation magic. */
387	newbie->ipsa_spi = spi;
388	newbie->ipsa_netstack = ns;	/* No netstack_hold */
389
390	/*
391	 * Copy addresses...
392	 */
393
394	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
395	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
396
397	newbie->ipsa_addrfam = addrfam;
398
399	/*
400	 * Set common initialization values, including refcnt.
401	 */
402	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
403	newbie->ipsa_state = IPSA_STATE_LARVAL;
404	newbie->ipsa_refcnt = 1;
405	newbie->ipsa_freefunc = sadb_freeassoc;
406
407	/*
408	 * There aren't a lot of other common initialization values, as
409	 * they are copied in from the PF_KEY message.
410	 */
411
412	return (newbie);
413}
414
415/*
416 * Call me to initialize a security association fanout.
417 */
418static int
419sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
420{
421	isaf_t *table;
422	int i;
423
424	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
425	*tablep = table;
426
427	if (table == NULL)
428		return (ENOMEM);
429
430	for (i = 0; i < size; i++) {
431		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
432		table[i].isaf_ipsa = NULL;
433		table[i].isaf_gen = 0;
434	}
435
436	return (0);
437}
438
439/*
440 * Call me to initialize an acquire fanout
441 */
442static int
443sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
444{
445	iacqf_t *table;
446	int i;
447
448	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
449	*tablep = table;
450
451	if (table == NULL)
452		return (ENOMEM);
453
454	for (i = 0; i < size; i++) {
455		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
456		table[i].iacqf_ipsacq = NULL;
457	}
458
459	return (0);
460}
461
462/*
463 * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
464 * caller must clean up partial allocations.
465 */
466static int
467sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
468{
469	ASSERT(sp->sdb_of == NULL);
470	ASSERT(sp->sdb_if == NULL);
471	ASSERT(sp->sdb_acq == NULL);
472
473	sp->sdb_hashsize = size;
474	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
475		return (ENOMEM);
476	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
477		return (ENOMEM);
478	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
479		return (ENOMEM);
480
481	return (0);
482}
483
484/*
485 * Call me to initialize an SADB instance; fall back to default size on failure.
486 */
487static void
488sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
489    netstack_t *ns)
490{
491	ASSERT(sp->sdb_of == NULL);
492	ASSERT(sp->sdb_if == NULL);
493	ASSERT(sp->sdb_acq == NULL);
494
495	if (size < IPSEC_DEFAULT_HASH_SIZE)
496		size = IPSEC_DEFAULT_HASH_SIZE;
497
498	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
499
500		cmn_err(CE_WARN,
501		    "Unable to allocate %u entry IPv%u %s SADB hash table",
502		    size, ver, name);
503
504		sadb_destroy(sp, ns);
505		size = IPSEC_DEFAULT_HASH_SIZE;
506		cmn_err(CE_WARN, "Falling back to %d entries", size);
507		(void) sadb_init_trial(sp, size, KM_SLEEP);
508	}
509}
510
511
512/*
513 * Initialize an SADB-pair.
514 */
515void
516sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
517{
518	sadb_init(name, &sp->s_v4, size, 4, ns);
519	sadb_init(name, &sp->s_v6, size, 6, ns);
520
521	sp->s_satype = type;
522
523	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
524	if (type == SADB_SATYPE_AH) {
525		ipsec_stack_t	*ipss = ns->netstack_ipsec;
526
527		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
528		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
529		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
530	} else {
531		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
532		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
533	}
534}
535
536/*
537 * Deliver a single SADB_DUMP message representing a single SA.  This is
538 * called many times by sadb_dump().
539 *
540 * If the return value of this is ENOBUFS (not the same as ENOMEM), then
541 * the caller should take that as a hint that dupb() on the "original answer"
542 * failed, and that perhaps the caller should try again with a copyb()ed
543 * "original answer".
544 */
545static int
546sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
547    sadb_msg_t *samsg)
548{
549	mblk_t *answer;
550
551	answer = dupb(original_answer);
552	if (answer == NULL)
553		return (ENOBUFS);
554	answer->b_cont = sadb_sa2msg(ipsa, samsg);
555	if (answer->b_cont == NULL) {
556		freeb(answer);
557		return (ENOMEM);
558	}
559
560	/* Just do a putnext, and let keysock deal with flow control. */
561	putnext(pfkey_q, answer);
562	return (0);
563}
564
565/*
566 * Common function to allocate and prepare a keysock_out_t M_CTL message.
567 */
568mblk_t *
569sadb_keysock_out(minor_t serial)
570{
571	mblk_t *mp;
572	keysock_out_t *kso;
573
574	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
575	if (mp != NULL) {
576		mp->b_datap->db_type = M_CTL;
577		mp->b_wptr += sizeof (ipsec_info_t);
578		kso = (keysock_out_t *)mp->b_rptr;
579		kso->ks_out_type = KEYSOCK_OUT;
580		kso->ks_out_len = sizeof (*kso);
581		kso->ks_out_serial = serial;
582	}
583
584	return (mp);
585}
586
587/*
588 * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
589 * to keysock.
590 */
591static int
592sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
593    int num_entries, boolean_t do_peers, time_t active_time)
594{
595	int i, error = 0;
596	mblk_t *original_answer;
597	ipsa_t *walker;
598	sadb_msg_t *samsg;
599	time_t	current;
600
601	/*
602	 * For each IPSA hash bucket do:
603	 *	- Hold the mutex
604	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
605	 */
606	ASSERT(mp->b_cont != NULL);
607	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
608
609	original_answer = sadb_keysock_out(serial);
610	if (original_answer == NULL)
611		return (ENOMEM);
612
613	current = gethrestime_sec();
614	for (i = 0; i < num_entries; i++) {
615		mutex_enter(&fanout[i].isaf_lock);
616		for (walker = fanout[i].isaf_ipsa; walker != NULL;
617		    walker = walker->ipsa_next) {
618			if (!do_peers && walker->ipsa_haspeer)
619				continue;
620			if ((active_time != 0) &&
621			    ((current - walker->ipsa_lastuse) > active_time))
622				continue;
623			error = sadb_dump_deliver(pfkey_q, original_answer,
624			    walker, samsg);
625			if (error == ENOBUFS) {
626				mblk_t *new_original_answer;
627
628				/* Ran out of dupb's.  Try a copyb. */
629				new_original_answer = copyb(original_answer);
630				if (new_original_answer == NULL) {
631					error = ENOMEM;
632				} else {
633					freeb(original_answer);
634					original_answer = new_original_answer;
635					error = sadb_dump_deliver(pfkey_q,
636					    original_answer, walker, samsg);
637				}
638			}
639			if (error != 0)
640				break;	/* out of for loop. */
641		}
642		mutex_exit(&fanout[i].isaf_lock);
643		if (error != 0)
644			break;	/* out of for loop. */
645	}
646
647	freeb(original_answer);
648	return (error);
649}
650
651/*
652 * Dump an entire SADB; outbound first, then inbound.
653 */
654
655int
656sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
657{
658	int error;
659	time_t	active_time = 0;
660	sadb_x_edump_t	*edump =
661	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
662
663	if (edump != NULL) {
664		active_time = edump->sadb_x_edump_timeout;
665	}
666
667	/* Dump outbound */
668	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
669	    sp->sdb_hashsize, B_TRUE, active_time);
670	if (error)
671		return (error);
672
673	/* Dump inbound */
674	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
675	    sp->sdb_hashsize, B_FALSE, active_time);
676}
677
678/*
679 * Generic sadb table walker.
680 *
681 * Call "walkfn" for each SA in each bucket in "table"; pass the
682 * bucket, the entry and "cookie" to the callback function.
683 * Take care to ensure that walkfn can delete the SA without screwing
684 * up our traverse.
685 *
686 * The bucket is locked for the duration of the callback, both so that the
687 * callback can just call sadb_unlinkassoc() when it wants to delete something,
688 * and so that no new entries are added while we're walking the list.
689 */
690static void
691sadb_walker(isaf_t *table, uint_t numentries,
692    void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
693    void *cookie)
694{
695	int i;
696	for (i = 0; i < numentries; i++) {
697		ipsa_t *entry, *next;
698
699		mutex_enter(&table[i].isaf_lock);
700
701		for (entry = table[i].isaf_ipsa; entry != NULL;
702		    entry = next) {
703			next = entry->ipsa_next;
704			(*walkfn)(&table[i], entry, cookie);
705		}
706		mutex_exit(&table[i].isaf_lock);
707	}
708}
709
710/*
711 * Call me to free up a security association fanout.  Use the forever
712 * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
713 * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
714 * when a module is unloaded).
715 */
716static void
717sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
718    boolean_t inbound)
719{
720	int i;
721	isaf_t *table = *tablep;
722	uint8_t protocol;
723	ipsa_t *sa;
724	netstackid_t sid;
725
726	if (table == NULL)
727		return;
728
729	for (i = 0; i < numentries; i++) {
730		mutex_enter(&table[i].isaf_lock);
731		while ((sa = table[i].isaf_ipsa) != NULL) {
732			if (inbound && cl_inet_deletespi &&
733			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
734			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
735				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
736				    IPPROTO_AH : IPPROTO_ESP;
737				sid = sa->ipsa_netstack->netstack_stackid;
738				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
739				    NULL);
740			}
741			sadb_unlinkassoc(sa);
742		}
743		table[i].isaf_gen++;
744		mutex_exit(&table[i].isaf_lock);
745		if (forever)
746			mutex_destroy(&(table[i].isaf_lock));
747	}
748
749	if (forever) {
750		*tablep = NULL;
751		kmem_free(table, numentries * sizeof (*table));
752	}
753}
754
755/*
756 * Entry points to sadb_destroyer().
757 */
758static void
759sadb_flush(sadb_t *sp, netstack_t *ns)
760{
761	/*
762	 * Flush out each bucket, one at a time.  Were it not for keysock's
763	 * enforcement, there would be a subtlety where I could add on the
764	 * heels of a flush.  With keysock's enforcement, however, this
765	 * makes ESP's job easy.
766	 */
767	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
768	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
769
770	/* For each acquire, destroy it; leave the bucket mutex alone. */
771	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
772}
773
774static void
775sadb_destroy(sadb_t *sp, netstack_t *ns)
776{
777	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
778	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
779
780	/* For each acquire, destroy it, including the bucket mutex. */
781	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
782
783	ASSERT(sp->sdb_of == NULL);
784	ASSERT(sp->sdb_if == NULL);
785	ASSERT(sp->sdb_acq == NULL);
786}
787
788void
789sadbp_flush(sadbp_t *spp, netstack_t *ns)
790{
791	sadb_flush(&spp->s_v4, ns);
792	sadb_flush(&spp->s_v6, ns);
793}
794
795void
796sadbp_destroy(sadbp_t *spp, netstack_t *ns)
797{
798	sadb_destroy(&spp->s_v4, ns);
799	sadb_destroy(&spp->s_v6, ns);
800
801	if (spp->s_satype == SADB_SATYPE_AH) {
802		ipsec_stack_t	*ipss = ns->netstack_ipsec;
803
804		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
805	}
806}
807
808
809/*
810 * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
811 * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
812 * EINVAL.
813 */
814int
815sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
816    sadb_lifetime_t *idle)
817{
818	if (hard == NULL || soft == NULL)
819		return (0);
820
821	if (hard->sadb_lifetime_allocations != 0 &&
822	    soft->sadb_lifetime_allocations != 0 &&
823	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
824		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
825
826	if (hard->sadb_lifetime_bytes != 0 &&
827	    soft->sadb_lifetime_bytes != 0 &&
828	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
829		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
830
831	if (hard->sadb_lifetime_addtime != 0 &&
832	    soft->sadb_lifetime_addtime != 0 &&
833	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
834		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
835
836	if (hard->sadb_lifetime_usetime != 0 &&
837	    soft->sadb_lifetime_usetime != 0 &&
838	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
839		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
840
841	if (idle != NULL) {
842		if (hard->sadb_lifetime_addtime != 0 &&
843		    idle->sadb_lifetime_addtime != 0 &&
844		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
845			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
846
847		if (soft->sadb_lifetime_addtime != 0 &&
848		    idle->sadb_lifetime_addtime != 0 &&
849		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
850			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
851
852		if (hard->sadb_lifetime_usetime != 0 &&
853		    idle->sadb_lifetime_usetime != 0 &&
854		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
855			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
856
857		if (soft->sadb_lifetime_usetime != 0 &&
858		    idle->sadb_lifetime_usetime != 0 &&
859		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
860			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
861	}
862
863	return (0);
864}
865
866/*
867 * Sanity check sensitivity labels.
868 *
869 * For now, just reject labels on unlabeled systems.
870 */
871int
872sadb_labelchk(keysock_in_t *ksi)
873{
874	if (!is_system_labeled()) {
875		if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
876			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
877
878		if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
879			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
880	}
881
882	return (0);
883}
884
885/*
886 * Clone a security association for the purposes of inserting a single SA
887 * into inbound and outbound tables respectively. This function should only
888 * be called from sadb_common_add().
889 */
890static ipsa_t *
891sadb_cloneassoc(ipsa_t *ipsa)
892{
893	ipsa_t *newbie;
894	boolean_t error = B_FALSE;
895
896	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
897
898	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
899	if (newbie == NULL)
900		return (NULL);
901
902	/* Copy over what we can. */
903	*newbie = *ipsa;
904
905	/* bzero and initialize locks, in case *_init() allocates... */
906	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
907
908	if (newbie->ipsa_tsl != NULL)
909		label_hold(newbie->ipsa_tsl);
910
911	if (newbie->ipsa_otsl != NULL)
912		label_hold(newbie->ipsa_otsl);
913
914	/*
915	 * While somewhat dain-bramaged, the most graceful way to
916	 * recover from errors is to keep plowing through the
917	 * allocations, and getting what I can.  It's easier to call
918	 * sadb_freeassoc() on the stillborn clone when all the
919	 * pointers aren't pointing to the parent's data.
920	 */
921
922	if (ipsa->ipsa_authkey != NULL) {
923		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
924		    KM_NOSLEEP);
925		if (newbie->ipsa_authkey == NULL) {
926			error = B_TRUE;
927		} else {
928			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
929			    newbie->ipsa_authkeylen);
930
931			newbie->ipsa_kcfauthkey.ck_data =
932			    newbie->ipsa_authkey;
933		}
934
935		if (newbie->ipsa_amech.cm_param != NULL) {
936			newbie->ipsa_amech.cm_param =
937			    (char *)&newbie->ipsa_mac_len;
938		}
939	}
940
941	if (ipsa->ipsa_encrkey != NULL) {
942		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
943		    KM_NOSLEEP);
944		if (newbie->ipsa_encrkey == NULL) {
945			error = B_TRUE;
946		} else {
947			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
948			    newbie->ipsa_encrkeylen);
949
950			newbie->ipsa_kcfencrkey.ck_data =
951			    newbie->ipsa_encrkey;
952		}
953	}
954
955	newbie->ipsa_authtmpl = NULL;
956	newbie->ipsa_encrtmpl = NULL;
957	newbie->ipsa_haspeer = B_TRUE;
958
959	if (ipsa->ipsa_src_cid != NULL) {
960		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
961		IPSID_REFHOLD(ipsa->ipsa_src_cid);
962	}
963
964	if (ipsa->ipsa_dst_cid != NULL) {
965		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
966		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
967	}
968
969	if (error) {
970		sadb_freeassoc(newbie);
971		return (NULL);
972	}
973
974	return (newbie);
975}
976
977/*
978 * Initialize a SADB address extension at the address specified by addrext.
979 * Return a pointer to the end of the new address extension.
980 */
981static uint8_t *
982sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
983    sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
984{
985	struct sockaddr_in *sin;
986	struct sockaddr_in6 *sin6;
987	uint8_t *cur = start;
988	int addrext_len;
989	int sin_len;
990	sadb_address_t *addrext	= (sadb_address_t *)cur;
991
992	if (cur == NULL)
993		return (NULL);
994
995	cur += sizeof (*addrext);
996	if (cur > end)
997		return (NULL);
998
999	addrext->sadb_address_proto = proto;
1000	addrext->sadb_address_prefixlen = prefix;
1001	addrext->sadb_address_reserved = 0;
1002	addrext->sadb_address_exttype = exttype;
1003
1004	switch (af) {
1005	case AF_INET:
1006		sin = (struct sockaddr_in *)cur;
1007		sin_len = sizeof (*sin);
1008		cur += sin_len;
1009		if (cur > end)
1010			return (NULL);
1011
1012		sin->sin_family = af;
1013		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1014		sin->sin_port = port;
1015		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1016		break;
1017	case AF_INET6:
1018		sin6 = (struct sockaddr_in6 *)cur;
1019		sin_len = sizeof (*sin6);
1020		cur += sin_len;
1021		if (cur > end)
1022			return (NULL);
1023
1024		bzero(sin6, sizeof (*sin6));
1025		sin6->sin6_family = af;
1026		sin6->sin6_port = port;
1027		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1028		break;
1029	}
1030
1031	addrext_len = roundup(cur - start, sizeof (uint64_t));
1032	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1033
1034	cur = start + addrext_len;
1035	if (cur > end)
1036		cur = NULL;
1037
1038	return (cur);
1039}
1040
1041/*
1042 * Construct a key management cookie extension.
1043 */
1044
1045static uint8_t *
1046sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1047{
1048	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1049
1050	if (cur == NULL)
1051		return (NULL);
1052
1053	cur += sizeof (*kmcext);
1054
1055	if (cur > end)
1056		return (NULL);
1057
1058	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1059	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1060	kmcext->sadb_x_kmc_proto = kmp;
1061	kmcext->sadb_x_kmc_cookie = kmc;
1062	kmcext->sadb_x_kmc_reserved = 0;
1063
1064	return (cur);
1065}
1066
1067/*
1068 * Given an original message header with sufficient space following it, and an
1069 * SA, construct a full PF_KEY message with all of the relevant extensions.
1070 * This is mostly used for SADB_GET, and SADB_DUMP.
1071 */
1072static mblk_t *
1073sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1074{
1075	int alloclen, addrsize, paddrsize, authsize, encrsize;
1076	int srcidsize, dstidsize, senslen, osenslen;
1077	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1078				/* src/dst and proxy sockaddrs. */
1079	/*
1080	 * The following are pointers into the PF_KEY message this PF_KEY
1081	 * message creates.
1082	 */
1083	sadb_msg_t *newsamsg;
1084	sadb_sa_t *assoc;
1085	sadb_lifetime_t *lt;
1086	sadb_key_t *key;
1087	sadb_ident_t *ident;
1088	sadb_sens_t *sens;
1089	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1090	sadb_x_replay_ctr_t *repl_ctr;
1091	sadb_x_pair_t *pair_ext;
1092
1093	mblk_t *mp;
1094	uint8_t *cur, *end;
1095	/* These indicate the presence of the above extension fields. */
1096	boolean_t soft = B_FALSE, hard = B_FALSE;
1097	boolean_t isrc = B_FALSE, idst = B_FALSE;
1098	boolean_t auth = B_FALSE, encr = B_FALSE;
1099	boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1100	boolean_t srcid = B_FALSE, dstid = B_FALSE;
1101	boolean_t idle;
1102	boolean_t paired;
1103	uint32_t otherspi;
1104
1105	/* First off, figure out the allocation length for this message. */
1106	/*
1107	 * Constant stuff.  This includes base, SA, address (src, dst),
1108	 * and lifetime (current).
1109	 */
1110	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1111	    sizeof (sadb_lifetime_t);
1112
1113	fam = ipsa->ipsa_addrfam;
1114	switch (fam) {
1115	case AF_INET:
1116		addrsize = roundup(sizeof (struct sockaddr_in) +
1117		    sizeof (sadb_address_t), sizeof (uint64_t));
1118		break;
1119	case AF_INET6:
1120		addrsize = roundup(sizeof (struct sockaddr_in6) +
1121		    sizeof (sadb_address_t), sizeof (uint64_t));
1122		break;
1123	default:
1124		return (NULL);
1125	}
1126	/*
1127	 * Allocate TWO address extensions, for source and destination.
1128	 * (Thus, the * 2.)
1129	 */
1130	alloclen += addrsize * 2;
1131	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1132		alloclen += addrsize;
1133	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1134		alloclen += addrsize;
1135
1136	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1137		paired = B_TRUE;
1138		alloclen += sizeof (sadb_x_pair_t);
1139		otherspi = ipsa->ipsa_otherspi;
1140	} else {
1141		paired = B_FALSE;
1142	}
1143
1144	/* How 'bout other lifetimes? */
1145	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1146	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1147		alloclen += sizeof (sadb_lifetime_t);
1148		soft = B_TRUE;
1149	}
1150
1151	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1152	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1153		alloclen += sizeof (sadb_lifetime_t);
1154		hard = B_TRUE;
1155	}
1156
1157	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1158		alloclen += sizeof (sadb_lifetime_t);
1159		idle = B_TRUE;
1160	} else {
1161		idle = B_FALSE;
1162	}
1163
1164	/* Inner addresses. */
1165	if (ipsa->ipsa_innerfam != 0) {
1166		pfam = ipsa->ipsa_innerfam;
1167		switch (pfam) {
1168		case AF_INET6:
1169			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1170			    sizeof (sadb_address_t), sizeof (uint64_t));
1171			break;
1172		case AF_INET:
1173			paddrsize = roundup(sizeof (struct sockaddr_in) +
1174			    sizeof (sadb_address_t), sizeof (uint64_t));
1175			break;
1176		default:
1177			cmn_err(CE_PANIC,
1178			    "IPsec SADB: Proxy length failure.\n");
1179			break;
1180		}
1181		isrc = B_TRUE;
1182		idst = B_TRUE;
1183		alloclen += 2 * paddrsize;
1184	}
1185
1186	/* For the following fields, assume that length != 0 ==> stuff */
1187	if (ipsa->ipsa_authkeylen != 0) {
1188		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1189		    sizeof (uint64_t));
1190		alloclen += authsize;
1191		auth = B_TRUE;
1192	}
1193
1194	if (ipsa->ipsa_encrkeylen != 0) {
1195		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1196		    ipsa->ipsa_nonce_len, sizeof (uint64_t));
1197		alloclen += encrsize;
1198		encr = B_TRUE;
1199	} else {
1200		encr = B_FALSE;
1201	}
1202
1203	if (ipsa->ipsa_tsl != NULL) {
1204		senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1205		alloclen += senslen;
1206		sensinteg = B_TRUE;
1207	}
1208
1209	if (ipsa->ipsa_otsl != NULL) {
1210		osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1211		alloclen += osenslen;
1212		osensinteg = B_TRUE;
1213	}
1214
1215	/*
1216	 * Must use strlen() here for lengths.	Identities use NULL
1217	 * pointers to indicate their nonexistence.
1218	 */
1219	if (ipsa->ipsa_src_cid != NULL) {
1220		srcidsize = roundup(sizeof (sadb_ident_t) +
1221		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1222		    sizeof (uint64_t));
1223		alloclen += srcidsize;
1224		srcid = B_TRUE;
1225	}
1226
1227	if (ipsa->ipsa_dst_cid != NULL) {
1228		dstidsize = roundup(sizeof (sadb_ident_t) +
1229		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1230		    sizeof (uint64_t));
1231		alloclen += dstidsize;
1232		dstid = B_TRUE;
1233	}
1234
1235	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1236		alloclen += sizeof (sadb_x_kmc_t);
1237
1238	if (ipsa->ipsa_replay != 0) {
1239		alloclen += sizeof (sadb_x_replay_ctr_t);
1240	}
1241
1242	/* Make sure the allocation length is a multiple of 8 bytes. */
1243	ASSERT((alloclen & 0x7) == 0);
1244
1245	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1246	mp = allocb(alloclen, BPRI_HI);
1247	if (mp == NULL)
1248		return (NULL);
1249	bzero(mp->b_rptr, alloclen);
1250
1251	mp->b_wptr += alloclen;
1252	end = mp->b_wptr;
1253	newsamsg = (sadb_msg_t *)mp->b_rptr;
1254	*newsamsg = *samsg;
1255	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1256
1257	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1258
1259	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1260
1261	assoc = (sadb_sa_t *)(newsamsg + 1);
1262	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1263	assoc->sadb_sa_exttype = SADB_EXT_SA;
1264	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1265	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1266	assoc->sadb_sa_state = ipsa->ipsa_state;
1267	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1268	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1269	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1270
1271	lt = (sadb_lifetime_t *)(assoc + 1);
1272	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1273	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1274	/* We do not support the concept. */
1275	lt->sadb_lifetime_allocations = 0;
1276	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1277	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1278	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1279
1280	if (hard) {
1281		lt++;
1282		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1283		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1284		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1285		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1286		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1287		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1288	}
1289
1290	if (soft) {
1291		lt++;
1292		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1293		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1294		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1295		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1296		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1297		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1298	}
1299
1300	if (idle) {
1301		lt++;
1302		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1303		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1304		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1305		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1306	}
1307
1308	cur = (uint8_t *)(lt + 1);
1309
1310	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1311	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1312	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1313	    SA_PROTO(ipsa), 0);
1314	if (cur == NULL) {
1315		freemsg(mp);
1316		mp = NULL;
1317		goto bail;
1318	}
1319
1320	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1321	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1322	    SA_PROTO(ipsa), 0);
1323	if (cur == NULL) {
1324		freemsg(mp);
1325		mp = NULL;
1326		goto bail;
1327	}
1328
1329	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1330		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1331		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1332		    IPPROTO_UDP, 0);
1333		if (cur == NULL) {
1334			freemsg(mp);
1335			mp = NULL;
1336			goto bail;
1337		}
1338	}
1339
1340	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1341		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1342		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1343		    IPPROTO_UDP, 0);
1344		if (cur == NULL) {
1345			freemsg(mp);
1346			mp = NULL;
1347			goto bail;
1348		}
1349	}
1350
1351	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1352	if (isrc) {
1353		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1354		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1355		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1356		if (cur == NULL) {
1357			freemsg(mp);
1358			mp = NULL;
1359			goto bail;
1360		}
1361	}
1362
1363	if (idst) {
1364		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1365		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1366		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1367		if (cur == NULL) {
1368			freemsg(mp);
1369			mp = NULL;
1370			goto bail;
1371		}
1372	}
1373
1374	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1375		cur = sadb_make_kmc_ext(cur, end,
1376		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1377		if (cur == NULL) {
1378			freemsg(mp);
1379			mp = NULL;
1380			goto bail;
1381		}
1382	}
1383
1384	walker = (sadb_ext_t *)cur;
1385	if (auth) {
1386		key = (sadb_key_t *)walker;
1387		key->sadb_key_len = SADB_8TO64(authsize);
1388		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1389		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1390		key->sadb_key_reserved = 0;
1391		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1392		walker = (sadb_ext_t *)((uint64_t *)walker +
1393		    walker->sadb_ext_len);
1394	}
1395
1396	if (encr) {
1397		uint8_t *buf_ptr;
1398		key = (sadb_key_t *)walker;
1399		key->sadb_key_len = SADB_8TO64(encrsize);
1400		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1401		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1402		key->sadb_key_reserved = ipsa->ipsa_saltbits;
1403		buf_ptr = (uint8_t *)(key + 1);
1404		bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1405		if (ipsa->ipsa_salt != NULL) {
1406			buf_ptr += ipsa->ipsa_encrkeylen;
1407			bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1408		}
1409		walker = (sadb_ext_t *)((uint64_t *)walker +
1410		    walker->sadb_ext_len);
1411	}
1412
1413	if (srcid) {
1414		ident = (sadb_ident_t *)walker;
1415		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1416		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1417		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1418		ident->sadb_ident_id = 0;
1419		ident->sadb_ident_reserved = 0;
1420		(void) strcpy((char *)(ident + 1),
1421		    ipsa->ipsa_src_cid->ipsid_cid);
1422		walker = (sadb_ext_t *)((uint64_t *)walker +
1423		    walker->sadb_ext_len);
1424	}
1425
1426	if (dstid) {
1427		ident = (sadb_ident_t *)walker;
1428		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1429		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1430		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1431		ident->sadb_ident_id = 0;
1432		ident->sadb_ident_reserved = 0;
1433		(void) strcpy((char *)(ident + 1),
1434		    ipsa->ipsa_dst_cid->ipsid_cid);
1435		walker = (sadb_ext_t *)((uint64_t *)walker +
1436		    walker->sadb_ext_len);
1437	}
1438
1439	if (sensinteg) {
1440		sens = (sadb_sens_t *)walker;
1441		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1442		    ipsa->ipsa_tsl, senslen);
1443
1444		walker = (sadb_ext_t *)((uint64_t *)walker +
1445		    walker->sadb_ext_len);
1446	}
1447
1448	if (osensinteg) {
1449		sens = (sadb_sens_t *)walker;
1450
1451		sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1452		    ipsa->ipsa_otsl, osenslen);
1453		if (ipsa->ipsa_mac_exempt)
1454			sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1455
1456		walker = (sadb_ext_t *)((uint64_t *)walker +
1457		    walker->sadb_ext_len);
1458	}
1459
1460	if (paired) {
1461		pair_ext = (sadb_x_pair_t *)walker;
1462
1463		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1464		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1465		pair_ext->sadb_x_pair_spi = otherspi;
1466
1467		walker = (sadb_ext_t *)((uint64_t *)walker +
1468		    walker->sadb_ext_len);
1469	}
1470
1471	if (ipsa->ipsa_replay != 0) {
1472		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1473		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1474		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1475		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1476		repl_ctr->sadb_x_rc_replay64 = 0;
1477		walker = (sadb_ext_t *)(repl_ctr + 1);
1478	}
1479
1480bail:
1481	/* Pardon any delays... */
1482	mutex_exit(&ipsa->ipsa_lock);
1483
1484	return (mp);
1485}
1486
1487/*
1488 * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1489 * and adjust base message accordingly.
1490 *
1491 * Assume message is pulled up in one piece of contiguous memory.
1492 *
1493 * Say if we start off with:
1494 *
1495 * +------+----+-------------+-----------+---------------+---------------+
1496 * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1497 * +------+----+-------------+-----------+---------------+---------------+
1498 *
1499 * we will end up with
1500 *
1501 * +------+----+-------------+-----------+---------------+
1502 * | base | SA | source addr | dest addr | soft lifetime |
1503 * +------+----+-------------+-----------+---------------+
1504 */
1505static void
1506sadb_strip(sadb_msg_t *samsg)
1507{
1508	sadb_ext_t *ext;
1509	uint8_t *target = NULL;
1510	uint8_t *msgend;
1511	int sofar = SADB_8TO64(sizeof (*samsg));
1512	int copylen;
1513
1514	ext = (sadb_ext_t *)(samsg + 1);
1515	msgend = (uint8_t *)samsg;
1516	msgend += SADB_64TO8(samsg->sadb_msg_len);
1517	while ((uint8_t *)ext < msgend) {
1518		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1519		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1520		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1521		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1522			/*
1523			 * Aha!	 I found a header to be erased.
1524			 */
1525
1526			if (target != NULL) {
1527				/*
1528				 * If I had a previous header to be erased,
1529				 * copy over it.  I can get away with just
1530				 * copying backwards because the target will
1531				 * always be 8 bytes behind the source.
1532				 */
1533				copylen = ((uint8_t *)ext) - (target +
1534				    SADB_64TO8(
1535				    ((sadb_ext_t *)target)->sadb_ext_len));
1536				ovbcopy(((uint8_t *)ext - copylen), target,
1537				    copylen);
1538				target += copylen;
1539				((sadb_ext_t *)target)->sadb_ext_len =
1540				    SADB_8TO64(((uint8_t *)ext) - target +
1541				    SADB_64TO8(ext->sadb_ext_len));
1542			} else {
1543				target = (uint8_t *)ext;
1544			}
1545		} else {
1546			sofar += ext->sadb_ext_len;
1547		}
1548
1549		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1550	}
1551
1552	ASSERT((uint8_t *)ext == msgend);
1553
1554	if (target != NULL) {
1555		copylen = ((uint8_t *)ext) - (target +
1556		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1557		if (copylen != 0)
1558			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1559	}
1560
1561	/* Adjust samsg. */
1562	samsg->sadb_msg_len = (uint16_t)sofar;
1563
1564	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1565}
1566
1567/*
1568 * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1569 * followed by an M_DATA with a PF_KEY message in it.  The serial of
1570 * the sending keysock instance is included.
1571 */
1572void
1573sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1574    uint_t serial)
1575{
1576	mblk_t *msg = mp->b_cont;
1577	sadb_msg_t *samsg;
1578	keysock_out_t *kso;
1579
1580	/*
1581	 * Enough functions call this to merit a NULL queue check.
1582	 */
1583	if (pfkey_q == NULL) {
1584		freemsg(mp);
1585		return;
1586	}
1587
1588	ASSERT(msg != NULL);
1589	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1590	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1591	samsg = (sadb_msg_t *)msg->b_rptr;
1592	kso = (keysock_out_t *)mp->b_rptr;
1593
1594	kso->ks_out_type = KEYSOCK_OUT;
1595	kso->ks_out_len = sizeof (*kso);
1596	kso->ks_out_serial = serial;
1597
1598	/*
1599	 * Only send the base message up in the event of an error.
1600	 * Don't worry about bzero()-ing, because it was probably bogus
1601	 * anyway.
1602	 */
1603	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1604	samsg = (sadb_msg_t *)msg->b_rptr;
1605	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1606	samsg->sadb_msg_errno = (uint8_t)error;
1607	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1608		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1609
1610	putnext(pfkey_q, mp);
1611}
1612
1613/*
1614 * Send a successful return packet back to keysock via the queue in pfkey_q.
1615 *
1616 * Often, an SA is associated with the reply message, it's passed in if needed,
1617 * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1618 * and the caller will release said refcnt.
1619 */
1620void
1621sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1622    keysock_in_t *ksi, ipsa_t *ipsa)
1623{
1624	keysock_out_t *kso;
1625	mblk_t *mp1;
1626	sadb_msg_t *newsamsg;
1627	uint8_t *oldend;
1628
1629	ASSERT((mp->b_cont != NULL) &&
1630	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1631	    ((void *)mp->b_rptr == (void *)ksi));
1632
1633	switch (samsg->sadb_msg_type) {
1634	case SADB_ADD:
1635	case SADB_UPDATE:
1636	case SADB_X_UPDATEPAIR:
1637	case SADB_X_DELPAIR_STATE:
1638	case SADB_FLUSH:
1639	case SADB_DUMP:
1640		/*
1641		 * I have all of the message already.  I just need to strip
1642		 * out the keying material and echo the message back.
1643		 *
1644		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1645		 * work.  When DUMP reaches here, it should only be a base
1646		 * message.
1647		 */
1648	justecho:
1649		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1650		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1651		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1652			sadb_strip(samsg);
1653			/* Assume PF_KEY message is contiguous. */
1654			ASSERT(mp->b_cont->b_cont == NULL);
1655			oldend = mp->b_cont->b_wptr;
1656			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1657			    SADB_64TO8(samsg->sadb_msg_len);
1658			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1659		}
1660		break;
1661	case SADB_GET:
1662		/*
1663		 * Do a lot of work here, because of the ipsa I just found.
1664		 * First construct the new PF_KEY message, then abandon
1665		 * the old one.
1666		 */
1667		mp1 = sadb_sa2msg(ipsa, samsg);
1668		if (mp1 == NULL) {
1669			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1670			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1671			return;
1672		}
1673		freemsg(mp->b_cont);
1674		mp->b_cont = mp1;
1675		break;
1676	case SADB_DELETE:
1677	case SADB_X_DELPAIR:
1678		if (ipsa == NULL)
1679			goto justecho;
1680		/*
1681		 * Because listening KMds may require more info, treat
1682		 * DELETE like a special case of GET.
1683		 */
1684		mp1 = sadb_sa2msg(ipsa, samsg);
1685		if (mp1 == NULL) {
1686			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1687			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1688			return;
1689		}
1690		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1691		sadb_strip(newsamsg);
1692		oldend = mp1->b_wptr;
1693		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1694		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1695		freemsg(mp->b_cont);
1696		mp->b_cont = mp1;
1697		break;
1698	default:
1699		if (mp != NULL)
1700			freemsg(mp);
1701		return;
1702	}
1703
1704	/* ksi is now null and void. */
1705	kso = (keysock_out_t *)ksi;
1706	kso->ks_out_type = KEYSOCK_OUT;
1707	kso->ks_out_len = sizeof (*kso);
1708	kso->ks_out_serial = ksi->ks_in_serial;
1709	/* We're ready to send... */
1710	putnext(pfkey_q, mp);
1711}
1712
1713/*
1714 * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1715 */
1716void
1717sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1718    void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1719{
1720	keysock_hello_ack_t *kha;
1721	queue_t *oldq;
1722
1723	ASSERT(OTHERQ(q) != NULL);
1724
1725	/*
1726	 * First, check atomically that I'm the first and only keysock
1727	 * instance.
1728	 *
1729	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1730	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1731	 * messages.
1732	 */
1733
1734	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
1735	if (oldq != NULL) {
1736		ASSERT(oldq != q);
1737		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1738		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1739		freemsg(mp);
1740		return;
1741	}
1742
1743	kha = (keysock_hello_ack_t *)mp->b_rptr;
1744	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1745	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1746	kha->ks_hello_satype = (uint8_t)satype;
1747
1748	/*
1749	 * If we made it past the casptr, then we have "exclusive" access
1750	 * to the timeout handle.  Fire it off after the default ager
1751	 * interval.
1752	 */
1753	*top = qtimeout(*pfkey_qp, ager, agerarg,
1754	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1755
1756	putnext(*pfkey_qp, mp);
1757}
1758
1759/*
1760 * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1761 *
1762 * Check addresses themselves for wildcard or multicast.
1763 * Check ire table for local/non-local/broadcast.
1764 */
1765int
1766sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1767    netstack_t *ns)
1768{
1769	sadb_address_t *addr = (sadb_address_t *)ext;
1770	struct sockaddr_in *sin;
1771	struct sockaddr_in6 *sin6;
1772	int diagnostic, type;
1773	boolean_t normalized = B_FALSE;
1774
1775	ASSERT(ext != NULL);
1776	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1777	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1778	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1779	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1780	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1781	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1782
1783	/* Assign both sockaddrs, the compiler will do the right thing. */
1784	sin = (struct sockaddr_in *)(addr + 1);
1785	sin6 = (struct sockaddr_in6 *)(addr + 1);
1786
1787	if (sin6->sin6_family == AF_INET6) {
1788		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1789			/*
1790			 * Convert to an AF_INET sockaddr.  This means the
1791			 * return messages will have the extra space, but have
1792			 * AF_INET sockaddrs instead of AF_INET6.
1793			 *
1794			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1795			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1796			 * equal to AF_INET <v4>, it shouldnt be a huge
1797			 * problem.
1798			 */
1799			sin->sin_family = AF_INET;
1800			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1801			    &sin->sin_addr);
1802			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1803			normalized = B_TRUE;
1804		}
1805	} else if (sin->sin_family != AF_INET) {
1806		switch (ext->sadb_ext_type) {
1807		case SADB_EXT_ADDRESS_SRC:
1808			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1809			break;
1810		case SADB_EXT_ADDRESS_DST:
1811			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1812			break;
1813		case SADB_X_EXT_ADDRESS_INNER_SRC:
1814			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1815			break;
1816		case SADB_X_EXT_ADDRESS_INNER_DST:
1817			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1818			break;
1819		case SADB_X_EXT_ADDRESS_NATT_LOC:
1820			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1821			break;
1822		case SADB_X_EXT_ADDRESS_NATT_REM:
1823			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1824			break;
1825			/* There is no default, see above ASSERT. */
1826		}
1827bail:
1828		if (pfkey_q != NULL) {
1829			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1830			    serial);
1831		} else {
1832			/*
1833			 * Scribble in sadb_msg that we got passed in.
1834			 * Overload "mp" to be an sadb_msg pointer.
1835			 */
1836			sadb_msg_t *samsg = (sadb_msg_t *)mp;
1837
1838			samsg->sadb_msg_errno = EINVAL;
1839			samsg->sadb_x_msg_diagnostic = diagnostic;
1840		}
1841		return (KS_IN_ADDR_UNKNOWN);
1842	}
1843
1844	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1845	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1846		/*
1847		 * We need only check for prefix issues.
1848		 */
1849
1850		/* Set diagnostic now, in case we need it later. */
1851		diagnostic =
1852		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1853		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1854		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1855
1856		if (normalized)
1857			addr->sadb_address_prefixlen -= 96;
1858
1859		/*
1860		 * Verify and mask out inner-addresses based on prefix length.
1861		 */
1862		if (sin->sin_family == AF_INET) {
1863			if (addr->sadb_address_prefixlen > 32)
1864				goto bail;
1865			sin->sin_addr.s_addr &=
1866			    ip_plen_to_mask(addr->sadb_address_prefixlen);
1867		} else {
1868			in6_addr_t mask;
1869
1870			ASSERT(sin->sin_family == AF_INET6);
1871			/*
1872			 * ip_plen_to_mask_v6() returns NULL if the value in
1873			 * question is out of range.
1874			 */
1875			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1876			    &mask) == NULL)
1877				goto bail;
1878			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1879			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1880			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1881			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1882		}
1883
1884		/* We don't care in these cases. */
1885		return (KS_IN_ADDR_DONTCARE);
1886	}
1887
1888	if (sin->sin_family == AF_INET6) {
1889		/* Check the easy ones now. */
1890		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1891			return (KS_IN_ADDR_MBCAST);
1892		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1893			return (KS_IN_ADDR_UNSPEC);
1894		/*
1895		 * At this point, we're a unicast IPv6 address.
1896		 *
1897		 * XXX Zones alert -> me/notme decision needs to be tempered
1898		 * by what zone we're in when we go to zone-aware IPsec.
1899		 */
1900		if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1901		    IRE_LOCAL) {
1902			/* Hey hey, it's local. */
1903			return (KS_IN_ADDR_ME);
1904		}
1905	} else {
1906		ASSERT(sin->sin_family == AF_INET);
1907		if (sin->sin_addr.s_addr == INADDR_ANY)
1908			return (KS_IN_ADDR_UNSPEC);
1909		if (CLASSD(sin->sin_addr.s_addr))
1910			return (KS_IN_ADDR_MBCAST);
1911		/*
1912		 * At this point we're a unicast or broadcast IPv4 address.
1913		 *
1914		 * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1915		 *
1916		 * XXX Zones alert -> me/notme decision needs to be tempered
1917		 * by what zone we're in when we go to zone-aware IPsec.
1918		 */
1919		type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1920		switch (type) {
1921		case IRE_LOCAL:
1922			return (KS_IN_ADDR_ME);
1923		case IRE_BROADCAST:
1924			return (KS_IN_ADDR_MBCAST);
1925		}
1926	}
1927
1928	return (KS_IN_ADDR_NOTME);
1929}
1930
1931/*
1932 * Address normalizations and reality checks for inbound PF_KEY messages.
1933 *
1934 * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1935 * the source to AF_INET.  Do the same for the inner sources.
1936 */
1937boolean_t
1938sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1939{
1940	struct sockaddr_in *src, *isrc;
1941	struct sockaddr_in6 *dst, *idst;
1942	sadb_address_t *srcext, *dstext;
1943	uint16_t sport;
1944	sadb_ext_t **extv = ksi->ks_in_extv;
1945	int rc;
1946
1947	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1948		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1949		    ksi->ks_in_serial, ns);
1950		if (rc == KS_IN_ADDR_UNKNOWN)
1951			return (B_FALSE);
1952		if (rc == KS_IN_ADDR_MBCAST) {
1953			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1954			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1955			return (B_FALSE);
1956		}
1957		ksi->ks_in_srctype = rc;
1958	}
1959
1960	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1961		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1962		    ksi->ks_in_serial, ns);
1963		if (rc == KS_IN_ADDR_UNKNOWN)
1964			return (B_FALSE);
1965		if (rc == KS_IN_ADDR_UNSPEC) {
1966			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1967			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1968			return (B_FALSE);
1969		}
1970		ksi->ks_in_dsttype = rc;
1971	}
1972
1973	/*
1974	 * NAT-Traversal addrs are simple enough to not require all of
1975	 * the checks in sadb_addrcheck().  Just normalize or reject if not
1976	 * AF_INET.
1977	 */
1978	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1979		rc = sadb_addrcheck(pfkey_q, mp,
1980		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1981
1982		/*
1983		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
1984		 * always be NOTME, or UNSPEC (to handle both tunnel mode
1985		 * AND local-port flexibility).
1986		 */
1987		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1988			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1989			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1990			    ksi->ks_in_serial);
1991			return (B_FALSE);
1992		}
1993		src = (struct sockaddr_in *)
1994		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
1995		if (src->sin_family != AF_INET) {
1996			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1997			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
1998			    ksi->ks_in_serial);
1999			return (B_FALSE);
2000		}
2001	}
2002
2003	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2004		rc = sadb_addrcheck(pfkey_q, mp,
2005		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2006
2007		/*
2008		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2009		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2010		 */
2011		if (rc != KS_IN_ADDR_NOTME &&
2012		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2013		    rc == KS_IN_ADDR_UNSPEC)) {
2014			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2015			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2016			    ksi->ks_in_serial);
2017			return (B_FALSE);
2018		}
2019		src = (struct sockaddr_in *)
2020		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2021		if (src->sin_family != AF_INET) {
2022			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2023			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2024			    ksi->ks_in_serial);
2025			return (B_FALSE);
2026		}
2027	}
2028
2029	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2030		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2031			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2032			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2033			    ksi->ks_in_serial);
2034			return (B_FALSE);
2035		}
2036
2037		if (sadb_addrcheck(pfkey_q, mp,
2038		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2039		    == KS_IN_ADDR_UNKNOWN ||
2040		    sadb_addrcheck(pfkey_q, mp,
2041		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2042		    == KS_IN_ADDR_UNKNOWN)
2043			return (B_FALSE);
2044
2045		isrc = (struct sockaddr_in *)
2046		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2047		    1);
2048		idst = (struct sockaddr_in6 *)
2049		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2050		    1);
2051		if (isrc->sin_family != idst->sin6_family) {
2052			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2053			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2054			    ksi->ks_in_serial);
2055			return (B_FALSE);
2056		}
2057	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2058			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2059			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2060			    ksi->ks_in_serial);
2061			return (B_FALSE);
2062	} else {
2063		isrc = NULL;	/* For inner/outer port check below. */
2064	}
2065
2066	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2067	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2068
2069	if (dstext == NULL || srcext == NULL)
2070		return (B_TRUE);
2071
2072	dst = (struct sockaddr_in6 *)(dstext + 1);
2073	src = (struct sockaddr_in *)(srcext + 1);
2074
2075	if (isrc != NULL &&
2076	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2077	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2078		/* Can't set inner and outer ports in one SA. */
2079		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2080		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2081		    ksi->ks_in_serial);
2082		return (B_FALSE);
2083	}
2084
2085	if (dst->sin6_family == src->sin_family)
2086		return (B_TRUE);
2087
2088	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2089		if (srcext->sadb_address_proto == 0) {
2090			srcext->sadb_address_proto = dstext->sadb_address_proto;
2091		} else if (dstext->sadb_address_proto == 0) {
2092			dstext->sadb_address_proto = srcext->sadb_address_proto;
2093		} else {
2094			/* Inequal protocols, neither were 0.  Report error. */
2095			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2096			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2097			    ksi->ks_in_serial);
2098			return (B_FALSE);
2099		}
2100	}
2101
2102	/*
2103	 * With the exception of an unspec IPv6 source and an IPv4
2104	 * destination, address families MUST me matched.
2105	 */
2106	if (src->sin_family == AF_INET ||
2107	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2108		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2109		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2110		return (B_FALSE);
2111	}
2112
2113	/*
2114	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2115	 * in the same place for sockaddr_in and sockaddr_in6.
2116	 */
2117	sport = src->sin_port;
2118	bzero(src, sizeof (*src));
2119	src->sin_family = AF_INET;
2120	src->sin_port = sport;
2121
2122	return (B_TRUE);
2123}
2124
2125/*
2126 * Set the results in "addrtype", given an IRE as requested by
2127 * sadb_addrcheck().
2128 */
2129int
2130sadb_addrset(ire_t *ire)
2131{
2132	if ((ire->ire_type & IRE_BROADCAST) ||
2133	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2134	    (ire->ire_ipversion == IPV6_VERSION &&
2135	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2136		return (KS_IN_ADDR_MBCAST);
2137	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2138		return (KS_IN_ADDR_ME);
2139	return (KS_IN_ADDR_NOTME);
2140}
2141
2142/*
2143 * Match primitives..
2144 * !!! TODO: short term: inner selectors
2145 *		ipv6 scope id (ifindex)
2146 * longer term:  zone id.  sensitivity label. uid.
2147 */
2148boolean_t
2149sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2150{
2151	return (sq->spi == sa->ipsa_spi);
2152}
2153
2154boolean_t
2155sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2156{
2157	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2158}
2159
2160boolean_t
2161sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2162{
2163	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2164}
2165
2166boolean_t
2167sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2168{
2169	return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2170}
2171
2172boolean_t
2173sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2174{
2175	return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2176}
2177
2178boolean_t
2179sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2180{
2181	return ((sa->ipsa_dst_cid != NULL) &&
2182	    (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2183	    (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2184
2185}
2186boolean_t
2187sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2188{
2189	return ((sa->ipsa_src_cid != NULL) &&
2190	    (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2191	    (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2192}
2193
2194boolean_t
2195sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2196{
2197#define	M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2198
2199	return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2200
2201#undef M
2202}
2203
2204/*
2205 * Common function which extracts several PF_KEY extensions for ease of
2206 * SADB matching.
2207 *
2208 * XXX TODO: weed out ipsa_query_t fields not used during matching
2209 * or afterwards?
2210 */
2211int
2212sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2213    ipsa_query_t *sq, int *diagnostic)
2214{
2215	int i;
2216	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2217
2218	for (i = 0; i < IPSA_NMATCH; i++)
2219		sq->matchers[i] = NULL;
2220
2221	ASSERT((req & ~match) == 0);
2222
2223	sq->req = req;
2224	sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2225	sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2226	sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2227
2228	if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2229		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2230		return (EINVAL);
2231	}
2232	if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2233		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2234		return (EINVAL);
2235	}
2236	if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2237		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2238		return (EINVAL);
2239	}
2240
2241	if (match & IPSA_Q_SA) {
2242		*mfpp++ = sadb_match_spi;
2243		sq->spi = sq->assoc->sadb_sa_spi;
2244	}
2245
2246	if (sq->dstext != NULL)
2247		sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2248	else {
2249		sq->dst = NULL;
2250		sq->dst6 = NULL;
2251		sq->dstaddr = NULL;
2252	}
2253
2254	if (sq->srcext != NULL)
2255		sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2256	else {
2257		sq->src = NULL;
2258		sq->src6 = NULL;
2259		sq->srcaddr = NULL;
2260	}
2261
2262	if (sq->dst != NULL)
2263		sq->af = sq->dst->sin_family;
2264	else if (sq->src != NULL)
2265		sq->af = sq->src->sin_family;
2266	else
2267		sq->af = AF_INET;
2268
2269	if (sq->af == AF_INET6) {
2270		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2271			*mfpp++ = sadb_match_dst_v6;
2272			sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2273			sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2274		} else {
2275			match &= ~IPSA_Q_DST;
2276			sq->dstaddr = ALL_ZEROES_PTR;
2277		}
2278
2279		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2280			sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2281			sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2282			if (sq->src6->sin6_family != AF_INET6) {
2283				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2284				return (EINVAL);
2285			}
2286			*mfpp++ = sadb_match_src_v6;
2287		} else {
2288			match &= ~IPSA_Q_SRC;
2289			sq->srcaddr = ALL_ZEROES_PTR;
2290		}
2291	} else {
2292		sq->src6 = sq->dst6 = NULL;
2293		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2294			*mfpp++ = sadb_match_dst_v4;
2295			sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2296		} else {
2297			match &= ~IPSA_Q_DST;
2298			sq->dstaddr = ALL_ZEROES_PTR;
2299		}
2300		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2301			sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2302			if (sq->src->sin_family != AF_INET) {
2303				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2304				return (EINVAL);
2305			}
2306			*mfpp++ = sadb_match_src_v4;
2307		} else {
2308			match &= ~IPSA_Q_SRC;
2309			sq->srcaddr = ALL_ZEROES_PTR;
2310		}
2311	}
2312
2313	sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2314	if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2315		sq->didstr = (char *)(sq->dstid + 1);
2316		sq->didtype = sq->dstid->sadb_ident_type;
2317		*mfpp++ = sadb_match_dstid;
2318	}
2319
2320	sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2321
2322	if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2323		sq->sidstr = (char *)(sq->srcid + 1);
2324		sq->sidtype = sq->srcid->sadb_ident_type;
2325		*mfpp++ = sadb_match_srcid;
2326	}
2327
2328	sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2329	sq->kmc = 0;
2330	sq->kmp = 0;
2331
2332	if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2333		sq->kmc = sq->kmcext->sadb_x_kmc_cookie;
2334		sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2335		*mfpp++ = sadb_match_kmc;
2336	}
2337
2338	if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2339		if (sq->af == AF_INET6)
2340			sq->sp = &sq->spp->s_v6;
2341		else
2342			sq->sp = &sq->spp->s_v4;
2343	} else {
2344		sq->sp = NULL;
2345	}
2346
2347	if (match & IPSA_Q_INBOUND) {
2348		sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2349		sq->inbound = &sq->sp->sdb_if[sq->inhash];
2350	} else {
2351		sq->inhash = 0;
2352		sq->inbound = NULL;
2353	}
2354
2355	if (match & IPSA_Q_OUTBOUND) {
2356		if (sq->af == AF_INET6) {
2357			sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2358		} else {
2359			sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2360		}
2361		sq->outbound = &sq->sp->sdb_of[sq->outhash];
2362	} else {
2363		sq->outhash = 0;
2364		sq->outbound = NULL;
2365	}
2366	sq->match = match;
2367	return (0);
2368}
2369
2370/*
2371 * Match an initialized query structure with a security association;
2372 * return B_TRUE on a match, B_FALSE on a miss.
2373 * Applies match functions set up by sadb_form_query() until one returns false.
2374 */
2375boolean_t
2376sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2377{
2378	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2379	ipsa_match_fn_t mfp;
2380
2381	for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2382		if (!mfp(sq, sa))
2383			return (B_FALSE);
2384	}
2385	return (B_TRUE);
2386}
2387
2388/*
2389 * Walker callback function to delete sa's based on src/dst address.
2390 * Assumes that we're called with *head locked, no other locks held;
2391 * Conveniently, and not coincidentally, this is both what sadb_walker
2392 * gives us and also what sadb_unlinkassoc expects.
2393 */
2394struct sadb_purge_state
2395{
2396	ipsa_query_t sq;
2397	boolean_t inbnd;
2398	uint8_t sadb_sa_state;
2399};
2400
2401static void
2402sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2403{
2404	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2405
2406	ASSERT(MUTEX_HELD(&head->isaf_lock));
2407
2408	mutex_enter(&entry->ipsa_lock);
2409
2410	if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2411	    !sadb_match_query(&ps->sq, entry)) {
2412		mutex_exit(&entry->ipsa_lock);
2413		return;
2414	}
2415
2416	if (ps->inbnd) {
2417		sadb_delete_cluster(entry);
2418	}
2419	entry->ipsa_state = IPSA_STATE_DEAD;
2420	(void) sadb_torch_assoc(head, entry);
2421}
2422
2423/*
2424 * Common code to purge an SA with a matching src or dst address.
2425 * Don't kill larval SA's in such a purge.
2426 */
2427int
2428sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2429	int *diagnostic, queue_t *pfkey_q)
2430{
2431	struct sadb_purge_state ps;
2432	int error = sadb_form_query(ksi, 0,
2433	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2434	    &ps.sq, diagnostic);
2435
2436	if (error != 0)
2437		return (error);
2438
2439	/*
2440	 * This is simple, crude, and effective.
2441	 * Unimplemented optimizations (TBD):
2442	 * - we can limit how many places we search based on where we
2443	 * think the SA is filed.
2444	 * - if we get a dst address, we can hash based on dst addr to find
2445	 * the correct bucket in the outbound table.
2446	 */
2447	ps.inbnd = B_TRUE;
2448	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2449	ps.inbnd = B_FALSE;
2450	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2451
2452	ASSERT(mp->b_cont != NULL);
2453	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2454	    NULL);
2455	return (0);
2456}
2457
2458static void
2459sadb_delpair_state_one(isaf_t *head, ipsa_t *entry, void *cookie)
2460{
2461	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2462	isaf_t  *inbound_bucket;
2463	ipsa_t *peer_assoc;
2464	ipsa_query_t *sq = &ps->sq;
2465
2466	ASSERT(MUTEX_HELD(&head->isaf_lock));
2467
2468	mutex_enter(&entry->ipsa_lock);
2469
2470	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2471	    ((sq->srcaddr != NULL) &&
2472	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, sq->srcaddr, sq->af))) {
2473		mutex_exit(&entry->ipsa_lock);
2474		return;
2475	}
2476
2477	/*
2478	 * The isaf_t *, which is passed in , is always an outbound bucket,
2479	 * and we are preserving the outbound-then-inbound hash-bucket lock
2480	 * ordering. The sadb_walker() which triggers this function is called
2481	 * only on the outbound fanout, and the corresponding inbound bucket
2482	 * lock is safe to acquire here.
2483	 */
2484
2485	if (entry->ipsa_haspeer) {
2486		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_spi);
2487		mutex_enter(&inbound_bucket->isaf_lock);
2488		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2489		    entry->ipsa_spi, entry->ipsa_srcaddr,
2490		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2491	} else {
2492		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_otherspi);
2493		mutex_enter(&inbound_bucket->isaf_lock);
2494		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2495		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2496		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2497	}
2498
2499	entry->ipsa_state = IPSA_STATE_DEAD;
2500	(void) sadb_torch_assoc(head, entry);
2501	if (peer_assoc != NULL) {
2502		mutex_enter(&peer_assoc->ipsa_lock);
2503		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2504		(void) sadb_torch_assoc(inbound_bucket, peer_assoc);
2505	}
2506	mutex_exit(&inbound_bucket->isaf_lock);
2507}
2508
2509static int
2510sadb_delpair_state(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2511    int *diagnostic, queue_t *pfkey_q)
2512{
2513	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2514	struct sadb_purge_state ps;
2515	int error;
2516
2517	ps.sq.spp = spp;		/* XXX param */
2518
2519	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SRC,
2520	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2521	    &ps.sq, diagnostic);
2522	if (error != 0)
2523		return (error);
2524
2525	ps.inbnd = B_FALSE;
2526	ps.sadb_sa_state = assoc->sadb_sa_state;
2527	sadb_walker(ps.sq.sp->sdb_of, ps.sq.sp->sdb_hashsize,
2528	    sadb_delpair_state_one, &ps);
2529
2530	ASSERT(mp->b_cont != NULL);
2531	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2532	    ksi, NULL);
2533	return (0);
2534}
2535
2536/*
2537 * Common code to delete/get an SA.
2538 */
2539int
2540sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2541    int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2542{
2543	ipsa_query_t sq;
2544	ipsa_t *echo_target = NULL;
2545	ipsap_t ipsapp;
2546	uint_t	error = 0;
2547
2548	if (sadb_msg_type == SADB_X_DELPAIR_STATE)
2549		return (sadb_delpair_state(mp, ksi, spp, diagnostic, pfkey_q));
2550
2551	sq.spp = spp;		/* XXX param */
2552	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2553	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2554	    &sq, diagnostic);
2555	if (error != 0)
2556		return (error);
2557
2558	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2559	if (error != 0) {
2560		return (error);
2561	}
2562
2563	echo_target = ipsapp.ipsap_sa_ptr;
2564	if (echo_target == NULL)
2565		echo_target = ipsapp.ipsap_psa_ptr;
2566
2567	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2568		/*
2569		 * Bucket locks will be required if SA is actually unlinked.
2570		 * get_ipsa_pair() returns valid hash bucket pointers even
2571		 * if it can't find a pair SA pointer. To prevent a potential
2572		 * deadlock, always lock the outbound bucket before the inbound.
2573		 */
2574		if (ipsapp.in_inbound_table) {
2575			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2576			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2577		} else {
2578			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2579			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2580		}
2581
2582		if (ipsapp.ipsap_sa_ptr != NULL) {
2583			mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2584			if (ipsapp.ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2585				sadb_delete_cluster(ipsapp.ipsap_sa_ptr);
2586			}
2587			ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2588			(void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2589			    ipsapp.ipsap_sa_ptr);
2590			/*
2591			 * sadb_torch_assoc() releases the ipsa_lock
2592			 * and calls sadb_unlinkassoc() which does a
2593			 * IPSA_REFRELE.
2594			 */
2595		}
2596		if (ipsapp.ipsap_psa_ptr != NULL) {
2597			mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2598			if (sadb_msg_type == SADB_X_DELPAIR ||
2599			    ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2600				if (ipsapp.ipsap_psa_ptr->ipsa_flags &
2601				    IPSA_F_INBOUND) {
2602					sadb_delete_cluster
2603					    (ipsapp.ipsap_psa_ptr);
2604				}
2605				ipsapp.ipsap_psa_ptr->ipsa_state =
2606				    IPSA_STATE_DEAD;
2607				(void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2608				    ipsapp.ipsap_psa_ptr);
2609			} else {
2610				/*
2611				 * Only half of the "pair" has been deleted.
2612				 * Update the remaining SA and remove references
2613				 * to its pair SA, which is now gone.
2614				 */
2615				ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2616				ipsapp.ipsap_psa_ptr->ipsa_flags &=
2617				    ~IPSA_F_PAIRED;
2618				mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2619			}
2620		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2621			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2622			error = ESRCH;
2623		}
2624		mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2625		mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2626	}
2627
2628	ASSERT(mp->b_cont != NULL);
2629
2630	if (error == 0)
2631		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2632		    mp->b_cont->b_rptr, ksi, echo_target);
2633
2634	destroy_ipsa_pair(&ipsapp);
2635
2636	return (error);
2637}
2638
2639/*
2640 * This function takes a sadb_sa_t and finds the ipsa_t structure
2641 * and the isaf_t (hash bucket) that its stored under. If the security
2642 * association has a peer, the ipsa_t structure and bucket for that security
2643 * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2644 * are returned as a ipsap_t.
2645 *
2646 * The hash buckets are returned for convenience, if the calling function
2647 * needs to use the hash bucket locks, say to remove the SA's, it should
2648 * take care to observe the convention of locking outbound bucket then
2649 * inbound bucket. The flag in_inbound_table provides direction.
2650 *
2651 * Note that a "pair" is defined as one (but not both) of the following:
2652 *
2653 * A security association which has a soft reference to another security
2654 * association via its SPI.
2655 *
2656 * A security association that is not obviously "inbound" or "outbound" so
2657 * it appears in both hash tables, the "peer" being the same security
2658 * association in the other hash table.
2659 *
2660 * This function will return NULL if the ipsa_t can't be found in the
2661 * inbound or outbound  hash tables (not found). If only one ipsa_t is
2662 * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2663 * provided at least one ipsa_t is found.
2664 */
2665static int
2666get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2667{
2668	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2669	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2670	uint32_t pair_spi;
2671
2672	init_ipsa_pair(ipsapp);
2673
2674	ipsapp->in_inbound_table = B_FALSE;
2675
2676	/* Lock down both buckets. */
2677	mutex_enter(&sq->outbound->isaf_lock);
2678	mutex_enter(&sq->inbound->isaf_lock);
2679
2680	if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2681		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2682		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2683		if (ipsapp->ipsap_sa_ptr != NULL) {
2684			ipsapp->ipsap_bucket = sq->inbound;
2685			ipsapp->ipsap_pbucket = sq->outbound;
2686			ipsapp->in_inbound_table = B_TRUE;
2687		} else {
2688			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2689			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2690			    sq->af);
2691			ipsapp->ipsap_bucket = sq->outbound;
2692			ipsapp->ipsap_pbucket = sq->inbound;
2693		}
2694	} else {
2695		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2696		ipsapp->ipsap_sa_ptr =
2697		    ipsec_getassocbyspi(sq->outbound,
2698		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2699		if (ipsapp->ipsap_sa_ptr != NULL) {
2700			ipsapp->ipsap_bucket = sq->outbound;
2701			ipsapp->ipsap_pbucket = sq->inbound;
2702		} else {
2703			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2704			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2705			    sq->af);
2706			ipsapp->ipsap_bucket = sq->inbound;
2707			ipsapp->ipsap_pbucket = sq->outbound;
2708			if (ipsapp->ipsap_sa_ptr != NULL)
2709				ipsapp->in_inbound_table = B_TRUE;
2710		}
2711	}
2712
2713	if (ipsapp->ipsap_sa_ptr == NULL) {
2714		mutex_exit(&sq->outbound->isaf_lock);
2715		mutex_exit(&sq->inbound->isaf_lock);
2716		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2717		return (ESRCH);
2718	}
2719
2720	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2721	    ipsapp->in_inbound_table) {
2722		mutex_exit(&sq->outbound->isaf_lock);
2723		mutex_exit(&sq->inbound->isaf_lock);
2724		return (0);
2725	}
2726
2727	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2728	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2729		/*
2730		 * haspeer implies no sa_pairing, look for same spi
2731		 * in other hashtable.
2732		 */
2733		ipsapp->ipsap_psa_ptr =
2734		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2735		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2736		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2737		mutex_exit(&sq->outbound->isaf_lock);
2738		mutex_exit(&sq->inbound->isaf_lock);
2739		return (0);
2740	}
2741	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2742	IPSA_COPY_ADDR(&pair_srcaddr,
2743	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2744	IPSA_COPY_ADDR(&pair_dstaddr,
2745	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2746	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2747	mutex_exit(&sq->inbound->isaf_lock);
2748	mutex_exit(&sq->outbound->isaf_lock);
2749
2750	if (pair_spi == 0) {
2751		ASSERT(ipsapp->ipsap_bucket != NULL);
2752		ASSERT(ipsapp->ipsap_pbucket != NULL);
2753		return (0);
2754	}
2755
2756	/* found sa in outbound sadb, peer should be inbound */
2757
2758	if (ipsapp->in_inbound_table) {
2759		/* Found SA in inbound table, pair will be in outbound. */
2760		if (sq->af == AF_INET6) {
2761			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2762			    *(uint32_t *)pair_srcaddr);
2763		} else {
2764			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2765			    *(uint32_t *)pair_srcaddr);
2766		}
2767	} else {
2768		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2769	}
2770	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2771	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2772	    pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2773	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2774	ASSERT(ipsapp->ipsap_bucket != NULL);
2775	ASSERT(ipsapp->ipsap_pbucket != NULL);
2776	return (0);
2777}
2778
2779/*
2780 * Perform NAT-traversal cached checksum offset calculations here.
2781 */
2782static void
2783sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2784    sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2785    uint32_t *dst_addr_ptr)
2786{
2787	struct sockaddr_in *natt_loc, *natt_rem;
2788	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2789	uint32_t running_sum = 0;
2790
2791#define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2792
2793	if (natt_rem_ext != NULL) {
2794		uint32_t l_src;
2795		uint32_t l_rem;
2796
2797		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2798
2799		/* Ensured by sadb_addrfix(). */
2800		ASSERT(natt_rem->sin_family == AF_INET);
2801
2802		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2803		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2804		l_src = *src_addr_ptr;
2805		l_rem = *natt_rem_ptr;
2806
2807		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2808		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2809
2810		l_src = ntohl(l_src);
2811		DOWN_SUM(l_src);
2812		DOWN_SUM(l_src);
2813		l_rem = ntohl(l_rem);
2814		DOWN_SUM(l_rem);
2815		DOWN_SUM(l_rem);
2816
2817		/*
2818		 * We're 1's complement for checksums, so check for wraparound
2819		 * here.
2820		 */
2821		if (l_rem > l_src)
2822			l_src--;
2823
2824		running_sum += l_src - l_rem;
2825
2826		DOWN_SUM(running_sum);
2827		DOWN_SUM(running_sum);
2828	}
2829
2830	if (natt_loc_ext != NULL) {
2831		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2832
2833		/* Ensured by sadb_addrfix(). */
2834		ASSERT(natt_loc->sin_family == AF_INET);
2835
2836		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2837		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2838
2839		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2840		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2841
2842		/*
2843		 * NAT-T port agility means we may have natt_loc_ext, but
2844		 * only for a local-port change.
2845		 */
2846		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2847			uint32_t l_dst = ntohl(*dst_addr_ptr);
2848			uint32_t l_loc = ntohl(*natt_loc_ptr);
2849
2850			DOWN_SUM(l_loc);
2851			DOWN_SUM(l_loc);
2852			DOWN_SUM(l_dst);
2853			DOWN_SUM(l_dst);
2854
2855			/*
2856			 * We're 1's complement for checksums, so check for
2857			 * wraparound here.
2858			 */
2859			if (l_loc > l_dst)
2860				l_dst--;
2861
2862			running_sum += l_dst - l_loc;
2863			DOWN_SUM(running_sum);
2864			DOWN_SUM(running_sum);
2865		}
2866	}
2867
2868	newbie->ipsa_inbound_cksum = running_sum;
2869#undef DOWN_SUM
2870}
2871
2872/*
2873 * This function is called from consumers that need to insert a fully-grown
2874 * security association into its tables.  This function takes into account that
2875 * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2876 * hash bucket parameters are set in order of what the SA will be most of the
2877 * time.  (For example, an SA with an unspecified source, and a multicast
2878 * destination will primarily be an outbound SA.  OTOH, if that destination
2879 * is unicast for this node, then the SA will primarily be inbound.)
2880 *
2881 * It takes a lot of parameters because even if clone is B_FALSE, this needs
2882 * to check both buckets for purposes of collision.
2883 *
2884 * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2885 * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2886 * with additional diagnostic information because there is at least one EINVAL
2887 * case here.
2888 */
2889int
2890sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2891    keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2892    ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2893    netstack_t *ns, sadbp_t *spp)
2894{
2895	ipsa_t *newbie_clone = NULL, *scratch;
2896	ipsap_t ipsapp;
2897	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2898	sadb_address_t *srcext =
2899	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2900	sadb_address_t *dstext =
2901	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2902	sadb_address_t *isrcext =
2903	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2904	sadb_address_t *idstext =
2905	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2906	sadb_x_kmc_t *kmcext =
2907	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2908	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2909	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2910	sadb_sens_t *sens =
2911	    (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2912	sadb_sens_t *osens =
2913	    (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2914	sadb_x_pair_t *pair_ext =
2915	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2916	sadb_x_replay_ctr_t *replayext =
2917	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2918	uint8_t protocol =
2919	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
2920	int salt_offset;
2921	uint8_t *buf_ptr;
2922	struct sockaddr_in *src, *dst, *isrc, *idst;
2923	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2924	sadb_lifetime_t *soft =
2925	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2926	sadb_lifetime_t *hard =
2927	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2928	sadb_lifetime_t	*idle =
2929	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2930	sa_family_t af;
2931	int error = 0;
2932	boolean_t isupdate = (newbie != NULL);
2933	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2934	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2935	ip_stack_t 	*ipst = ns->netstack_ip;
2936	ipsec_alginfo_t *alg;
2937	int		rcode;
2938	boolean_t	async = B_FALSE;
2939
2940	init_ipsa_pair(&ipsapp);
2941
2942	if (srcext == NULL) {
2943		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2944		return (EINVAL);
2945	}
2946	if (dstext == NULL) {
2947		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2948		return (EINVAL);
2949	}
2950	if (assoc == NULL) {
2951		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2952		return (EINVAL);
2953	}
2954
2955	src = (struct sockaddr_in *)(srcext + 1);
2956	src6 = (struct sockaddr_in6 *)(srcext + 1);
2957	dst = (struct sockaddr_in *)(dstext + 1);
2958	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2959	if (isrcext != NULL) {
2960		isrc = (struct sockaddr_in *)(isrcext + 1);
2961		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2962		ASSERT(idstext != NULL);
2963		idst = (struct sockaddr_in *)(idstext + 1);
2964		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2965	} else {
2966		isrc = NULL;
2967		isrc6 = NULL;
2968	}
2969
2970	af = src->sin_family;
2971
2972	if (af == AF_INET) {
2973		src_addr_ptr = (uint32_t *)&src->sin_addr;
2974		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2975	} else {
2976		ASSERT(af == AF_INET6);
2977		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2978		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2979	}
2980
2981	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
2982	    cl_inet_checkspi &&
2983	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
2984		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
2985		    assoc->sadb_sa_spi, NULL);
2986		if (rcode == -1) {
2987			return (EEXIST);
2988		}
2989	}
2990
2991	/*
2992	 * Check to see if the new SA will be cloned AND paired. The
2993	 * reason a SA will be cloned is the source or destination addresses
2994	 * are not specific enough to determine if the SA goes in the outbound
2995	 * or the inbound hash table, so its cloned and put in both. If
2996	 * the SA is paired, it's soft linked to another SA for the other
2997	 * direction. Keeping track and looking up SA's that are direction
2998	 * unspecific and linked is too hard.
2999	 */
3000	if (clone && (pair_ext != NULL)) {
3001		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3002		return (EINVAL);
3003	}
3004
3005	if (!isupdate) {
3006		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3007		    src_addr_ptr, dst_addr_ptr, af, ns);
3008		if (newbie == NULL)
3009			return (ENOMEM);
3010	}
3011
3012	mutex_enter(&newbie->ipsa_lock);
3013
3014	if (isrc != NULL) {
3015		if (isrc->sin_family == AF_INET) {
3016			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3017				if (srcext->sadb_address_proto != 0) {
3018					/*
3019					 * Mismatched outer-packet protocol
3020					 * and inner-packet address family.
3021					 */
3022					mutex_exit(&newbie->ipsa_lock);
3023					error = EPROTOTYPE;
3024					*diagnostic =
3025					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3026					goto error;
3027				} else {
3028					/* Fill in with explicit protocol. */
3029					srcext->sadb_address_proto =
3030					    IPPROTO_ENCAP;
3031					dstext->sadb_address_proto =
3032					    IPPROTO_ENCAP;
3033				}
3034			}
3035			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3036			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3037		} else {
3038			ASSERT(isrc->sin_family == AF_INET6);
3039			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3040				if (srcext->sadb_address_proto != 0) {
3041					/*
3042					 * Mismatched outer-packet protocol
3043					 * and inner-packet address family.
3044					 */
3045					mutex_exit(&newbie->ipsa_lock);
3046					error = EPROTOTYPE;
3047					*diagnostic =
3048					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3049					goto error;
3050				} else {
3051					/* Fill in with explicit protocol. */
3052					srcext->sadb_address_proto =
3053					    IPPROTO_IPV6;
3054					dstext->sadb_address_proto =
3055					    IPPROTO_IPV6;
3056				}
3057			}
3058			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3059			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3060		}
3061		newbie->ipsa_innerfam = isrc->sin_family;
3062
3063		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3064		    newbie->ipsa_innerfam);
3065		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3066		    newbie->ipsa_innerfam);
3067		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3068		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3069
3070		/* Unique value uses inner-ports for Tunnel Mode... */
3071		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3072		    idst->sin_port, dstext->sadb_address_proto,
3073		    idstext->sadb_address_proto);
3074		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3075		    idst->sin_port, dstext->sadb_address_proto,
3076		    idstext->sadb_address_proto);
3077	} else {
3078		/* ... and outer-ports for Transport Mode. */
3079		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3080		    dst->sin_port, dstext->sadb_address_proto, 0);
3081		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3082		    dst->sin_port, dstext->sadb_address_proto, 0);
3083	}
3084	if (newbie->ipsa_unique_mask != (uint64_t)0)
3085		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3086
3087	sadb_nat_calculations(newbie,
3088	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3089	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3090	    src_addr_ptr, dst_addr_ptr);
3091
3092	newbie->ipsa_type = samsg->sadb_msg_satype;
3093
3094	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3095	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3096	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3097	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3098
3099	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3100	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3101	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3102		mutex_exit(&newbie->ipsa_lock);
3103		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3104		error = EINVAL;
3105		goto error;
3106	}
3107	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3108	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3109		mutex_exit(&newbie->ipsa_lock);
3110		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3111		error = EINVAL;
3112		goto error;
3113	}
3114	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3115	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3116		mutex_exit(&newbie->ipsa_lock);
3117		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3118		error = EINVAL;
3119		goto error;
3120	}
3121	/*
3122	 * If unspecified source address, force replay_wsize to 0.
3123	 * This is because an SA that has multiple sources of secure
3124	 * traffic cannot enforce a replay counter w/o synchronizing the
3125	 * senders.
3126	 */
3127	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3128		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3129	else
3130		newbie->ipsa_replay_wsize = 0;
3131
3132	newbie->ipsa_addtime = gethrestime_sec();
3133
3134	if (kmcext != NULL) {
3135		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3136		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3137	}
3138
3139	/*
3140	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3141	 * The spec says that one can update current lifetimes, but
3142	 * that seems impractical, especially in the larval-to-mature
3143	 * update that this function performs.
3144	 */
3145	if (soft != NULL) {
3146		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3147		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3148		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3149		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3150		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3151	}
3152	if (hard != NULL) {
3153		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3154		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3155		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3156		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3157		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3158	}
3159	if (idle != NULL) {
3160		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3161		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3162		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3163		    newbie->ipsa_idleaddlt;
3164		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3165	}
3166
3167	newbie->ipsa_authtmpl = NULL;
3168	newbie->ipsa_encrtmpl = NULL;
3169
3170#ifdef IPSEC_LATENCY_TEST
3171	if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3172#else
3173	if (akey != NULL) {
3174#endif
3175		async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3176		    IPSEC_ALGS_EXEC_ASYNC);
3177
3178		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3179		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3180		/* In case we have to round up to the next byte... */
3181		if ((akey->sadb_key_bits & 0x7) != 0)
3182			newbie->ipsa_authkeylen++;
3183		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3184		    KM_NOSLEEP);
3185		if (newbie->ipsa_authkey == NULL) {
3186			error = ENOMEM;
3187			mutex_exit(&newbie->ipsa_lock);
3188			goto error;
3189		}
3190		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3191		bzero(akey + 1, newbie->ipsa_authkeylen);
3192
3193		/*
3194		 * Pre-initialize the kernel crypto framework key
3195		 * structure.
3196		 */
3197		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3198		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3199		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3200
3201		mutex_enter(&ipss->ipsec_alg_lock);
3202		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3203		    [newbie->ipsa_auth_alg];
3204		if (alg != NULL && ALG_VALID(alg)) {
3205			newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3206			newbie->ipsa_amech.cm_param =
3207			    (char *)&newbie->ipsa_mac_len;
3208			newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3209			newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3210		} else {
3211			newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3212		}
3213		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3214		mutex_exit(&ipss->ipsec_alg_lock);
3215		if (error != 0) {
3216			mutex_exit(&newbie->ipsa_lock);
3217			/*
3218			 * An error here indicates that alg is the wrong type
3219			 * (IE: not authentication) or its not in the alg tables
3220			 * created by ipsecalgs(1m), or Kcf does not like the
3221			 * parameters passed in with this algorithm, which is
3222			 * probably a coding error!
3223			 */
3224			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3225
3226			goto error;
3227		}
3228	}
3229
3230	if (ekey != NULL) {
3231		mutex_enter(&ipss->ipsec_alg_lock);
3232		async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3233		    IPSEC_ALGS_EXEC_ASYNC);
3234		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3235		    [newbie->ipsa_encr_alg];
3236
3237		if (alg != NULL && ALG_VALID(alg)) {
3238			newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3239			newbie->ipsa_datalen = alg->alg_datalen;
3240			if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3241				newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3242
3243			if (alg->alg_flags & ALG_FLAG_COMBINED) {
3244				newbie->ipsa_flags |= IPSA_F_COMBINED;
3245				newbie->ipsa_mac_len =  alg->alg_icvlen;
3246			}
3247
3248			if (alg->alg_flags & ALG_FLAG_CCM)
3249				newbie->ipsa_noncefunc = ccm_params_init;
3250			else if (alg->alg_flags & ALG_FLAG_GCM)
3251				newbie->ipsa_noncefunc = gcm_params_init;
3252			else newbie->ipsa_noncefunc = cbc_params_init;
3253
3254			newbie->ipsa_saltlen = alg->alg_saltlen;
3255			newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3256			newbie->ipsa_iv_len = alg->alg_ivlen;
3257			newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3258			    newbie->ipsa_iv_len;
3259			newbie->ipsa_emech.cm_param = NULL;
3260			newbie->ipsa_emech.cm_param_len = 0;
3261		} else {
3262			newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3263		}
3264		mutex_exit(&ipss->ipsec_alg_lock);
3265
3266		/*
3267		 * The byte stream following the sadb_key_t is made up of:
3268		 * key bytes, [salt bytes], [IV initial value]
3269		 * All of these have variable length. The IV is typically
3270		 * randomly generated by this function and not passed in.
3271		 * By supporting the injection of a known IV, the whole
3272		 * IPsec subsystem and the underlying crypto subsystem
3273		 * can be tested with known test vectors.
3274		 *
3275		 * The keying material has been checked by ext_check()
3276		 * and ipsec_valid_key_size(), after removing salt/IV
3277		 * bits, whats left is the encryption key. If this is too
3278		 * short, ipsec_create_ctx_tmpl() will fail and the SA
3279		 * won't get created.
3280		 *
3281		 * set ipsa_encrkeylen to length of key only.
3282		 */
3283		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3284		newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3285		newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3286		newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3287
3288		/* In case we have to round up to the next byte... */
3289		if ((ekey->sadb_key_bits & 0x7) != 0)
3290			newbie->ipsa_encrkeylen++;
3291
3292		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3293		    KM_NOSLEEP);
3294		if (newbie->ipsa_encrkey == NULL) {
3295			error = ENOMEM;
3296			mutex_exit(&newbie->ipsa_lock);
3297			goto error;
3298		}
3299
3300		buf_ptr = (uint8_t *)(ekey + 1);
3301		bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3302
3303		if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3304			/*
3305			 * Combined mode algs need a nonce. Copy the salt and
3306			 * IV into a buffer. The ipsa_nonce is a pointer into
3307			 * this buffer, some bytes at the start of the buffer
3308			 * may be unused, depends on the salt length. The IV
3309			 * is 64 bit aligned so it can be incremented as a
3310			 * uint64_t. Zero out key in samsg_t before freeing.
3311			 */
3312
3313			newbie->ipsa_nonce_buf = kmem_alloc(
3314			    sizeof (ipsec_nonce_t), KM_NOSLEEP);
3315			if (newbie->ipsa_nonce_buf == NULL) {
3316				error = ENOMEM;
3317				mutex_exit(&newbie->ipsa_lock);
3318				goto error;
3319			}
3320			/*
3321			 * Initialize nonce and salt pointers to point
3322			 * to the nonce buffer. This is just in case we get
3323			 * bad data, the pointers will be valid, the data
3324			 * won't be.
3325			 *
3326			 * See sadb.h for layout of nonce.
3327			 */
3328			newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3329			newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3330			newbie->ipsa_nonce = newbie->ipsa_salt;
3331			if (newbie->ipsa_saltlen != 0) {
3332				salt_offset = MAXSALTSIZE -
3333				    newbie->ipsa_saltlen;
3334				newbie->ipsa_salt = (uint8_t *)
3335				    &newbie->ipsa_nonce_buf->salt[salt_offset];
3336				newbie->ipsa_nonce = newbie->ipsa_salt;
3337				buf_ptr += newbie->ipsa_encrkeylen;
3338				bcopy(buf_ptr, newbie->ipsa_salt,
3339				    newbie->ipsa_saltlen);
3340			}
3341			/*
3342			 * The IV for CCM/GCM mode increments, it should not
3343			 * repeat. Get a random value for the IV, make a
3344			 * copy, the SA will expire when/if the IV ever
3345			 * wraps back to the initial value. If an Initial IV
3346			 * is passed in via PF_KEY, save this in the SA.
3347			 * Initialising IV for inbound is pointless as its
3348			 * taken from the inbound packet.
3349			 */
3350			if (!is_inbound) {
3351				if (ekey->sadb_key_reserved != 0) {
3352					buf_ptr += newbie->ipsa_saltlen;
3353					bcopy(buf_ptr, (uint8_t *)newbie->
3354					    ipsa_iv, SADB_1TO8(ekey->
3355					    sadb_key_reserved));
3356				} else {
3357					(void) random_get_pseudo_bytes(
3358					    (uint8_t *)newbie->ipsa_iv,
3359					    newbie->ipsa_iv_len);
3360				}
3361				newbie->ipsa_iv_softexpire =
3362				    (*newbie->ipsa_iv) << 9;
3363				newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3364			}
3365		}
3366		bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3367
3368		/*
3369		 * Pre-initialize the kernel crypto framework key
3370		 * structure.
3371		 */
3372		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3373		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3374		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3375
3376		mutex_enter(&ipss->ipsec_alg_lock);
3377		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3378		mutex_exit(&ipss->ipsec_alg_lock);
3379		if (error != 0) {
3380			mutex_exit(&newbie->ipsa_lock);
3381			/* See above for error explanation. */
3382			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3383			goto error;
3384		}
3385	}
3386
3387	if (async)
3388		newbie->ipsa_flags |= IPSA_F_ASYNC;
3389
3390	/*
3391	 * Ptrs to processing functions.
3392	 */
3393	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3394		ipsecesp_init_funcs(newbie);
3395	else
3396		ipsecah_init_funcs(newbie);
3397	ASSERT(newbie->ipsa_output_func != NULL &&
3398	    newbie->ipsa_input_func != NULL);
3399
3400	/*
3401	 * Certificate ID stuff.
3402	 */
3403	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3404		sadb_ident_t *id =
3405		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3406
3407		/*
3408		 * Can assume strlen() will return okay because ext_check() in
3409		 * keysock.c prepares the string for us.
3410		 */
3411		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3412		    (char *)(id+1), ns);
3413		if (newbie->ipsa_src_cid == NULL) {
3414			error = ENOMEM;
3415			mutex_exit(&newbie->ipsa_lock);
3416			goto error;
3417		}
3418	}
3419
3420	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3421		sadb_ident_t *id =
3422		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3423
3424		/*
3425		 * Can assume strlen() will return okay because ext_check() in
3426		 * keysock.c prepares the string for us.
3427		 */
3428		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3429		    (char *)(id+1), ns);
3430		if (newbie->ipsa_dst_cid == NULL) {
3431			error = ENOMEM;
3432			mutex_exit(&newbie->ipsa_lock);
3433			goto error;
3434		}
3435	}
3436
3437	/*
3438	 * sensitivity label handling code:
3439	 * Convert sens + bitmap into cred_t, and associate it
3440	 * with the new SA.
3441	 */
3442	if (sens != NULL) {
3443		uint64_t *bitmap = (uint64_t *)(sens + 1);
3444
3445		newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3446	}
3447
3448	/*
3449	 * Likewise for outer sensitivity.
3450	 */
3451	if (osens != NULL) {
3452		uint64_t *bitmap = (uint64_t *)(osens + 1);
3453		ts_label_t *tsl, *effective_tsl;
3454		uint32_t *peer_addr_ptr;
3455		zoneid_t zoneid = GLOBAL_ZONEID;
3456		zone_t *zone;
3457
3458		peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3459
3460		tsl = sadb_label_from_sens(osens, bitmap);
3461		newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3462
3463		if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3464			newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3465		}
3466
3467		error = tsol_check_dest(tsl, peer_addr_ptr,
3468		    (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3469		    newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3470		if (error != 0) {
3471			label_rele(tsl);
3472			mutex_exit(&newbie->ipsa_lock);
3473			goto error;
3474		}
3475
3476		if (effective_tsl != NULL) {
3477			label_rele(tsl);
3478			tsl = effective_tsl;
3479		}
3480
3481		newbie->ipsa_otsl = tsl;
3482
3483		zone = zone_find_by_label(tsl);
3484		if (zone != NULL) {
3485			zoneid = zone->zone_id;
3486			zone_rele(zone);
3487		}
3488		/*
3489		 * For exclusive stacks we set the zoneid to zero to operate
3490		 * as if in the global zone for tsol_compute_label_v4/v6
3491		 */
3492		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3493			zoneid = GLOBAL_ZONEID;
3494
3495		if (af == AF_INET6) {
3496			error = tsol_compute_label_v6(tsl, zoneid,
3497			    (in6_addr_t *)peer_addr_ptr,
3498			    newbie->ipsa_opt_storage, ipst);
3499		} else {
3500			error = tsol_compute_label_v4(tsl, zoneid,
3501			    *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3502		}
3503		if (error != 0) {
3504			mutex_exit(&newbie->ipsa_lock);
3505			goto error;
3506		}
3507	}
3508
3509
3510	if (replayext != NULL) {
3511		if ((replayext->sadb_x_rc_replay32 == 0) &&
3512		    (replayext->sadb_x_rc_replay64 != 0)) {
3513			error = EOPNOTSUPP;
3514			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3515			mutex_exit(&newbie->ipsa_lock);
3516			goto error;
3517		}
3518		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3519	}
3520
3521	/* now that the SA has been updated, set its new state */
3522	newbie->ipsa_state = assoc->sadb_sa_state;
3523
3524	if (clone) {
3525		newbie->ipsa_haspeer = B_TRUE;
3526	} else {
3527		if (!is_inbound) {
3528			lifetime_fuzz(newbie);
3529		}
3530	}
3531	/*
3532	 * The less locks I hold when doing an insertion and possible cloning,
3533	 * the better!
3534	 */
3535	mutex_exit(&newbie->ipsa_lock);
3536
3537	if (clone) {
3538		newbie_clone = sadb_cloneassoc(newbie);
3539
3540		if (newbie_clone == NULL) {
3541			error = ENOMEM;
3542			goto error;
3543		}
3544	}
3545
3546	/*
3547	 * Enter the bucket locks.  The order of entry is outbound,
3548	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3549	 * based on the destination address type.  If the destination address
3550	 * type is for a node that isn't mine (or potentially mine), the
3551	 * "primary" bucket is the outbound one.
3552	 */
3553	if (!is_inbound) {
3554		/* primary == outbound */
3555		mutex_enter(&primary->isaf_lock);
3556		mutex_enter(&secondary->isaf_lock);
3557	} else {
3558		/* primary == inbound */
3559		mutex_enter(&secondary->isaf_lock);
3560		mutex_enter(&primary->isaf_lock);
3561	}
3562
3563	/*
3564	 * sadb_insertassoc() doesn't increment the reference
3565	 * count.  We therefore have to increment the
3566	 * reference count one more time to reflect the
3567	 * pointers of the table that reference this SA.
3568	 */
3569	IPSA_REFHOLD(newbie);
3570
3571	if (isupdate) {
3572		/*
3573		 * Unlink from larval holding cell in the "inbound" fanout.
3574		 */
3575		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3576		    newbie->ipsa_linklock == &secondary->isaf_lock);
3577		sadb_unlinkassoc(newbie);
3578	}
3579
3580	mutex_enter(&newbie->ipsa_lock);
3581	error = sadb_insertassoc(newbie, primary);
3582	mutex_exit(&newbie->ipsa_lock);
3583
3584	if (error != 0) {
3585		/*
3586		 * Since sadb_insertassoc() failed, we must decrement the
3587		 * refcount again so the cleanup code will actually free
3588		 * the offending SA.
3589		 */
3590		IPSA_REFRELE(newbie);
3591		goto error_unlock;
3592	}
3593
3594	if (newbie_clone != NULL) {
3595		mutex_enter(&newbie_clone->ipsa_lock);
3596		error = sadb_insertassoc(newbie_clone, secondary);
3597		mutex_exit(&newbie_clone->ipsa_lock);
3598		if (error != 0) {
3599			/* Collision in secondary table. */
3600			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3601			goto error_unlock;
3602		}
3603		IPSA_REFHOLD(newbie_clone);
3604	} else {
3605		ASSERT(primary != secondary);
3606		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3607		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3608		if (scratch != NULL) {
3609			/* Collision in secondary table. */
3610			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3611			/* Set the error, since ipsec_getassocbyspi() can't. */
3612			error = EEXIST;
3613			goto error_unlock;
3614		}
3615	}
3616
3617	/* OKAY!  So let's do some reality check assertions. */
3618
3619	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3620	ASSERT(newbie_clone == NULL ||
3621	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3622
3623error_unlock:
3624
3625	/*
3626	 * We can exit the locks in any order.	Only entrance needs to
3627	 * follow any protocol.
3628	 */
3629	mutex_exit(&secondary->isaf_lock);
3630	mutex_exit(&primary->isaf_lock);
3631
3632	if (pair_ext != NULL && error == 0) {
3633		/* update pair_spi if it exists. */
3634		ipsa_query_t sq;
3635
3636		sq.spp = spp;		/* XXX param */
3637		error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3638		    IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3639		if (error)
3640			return (error);
3641
3642		error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3643
3644		if (error != 0)
3645			goto error;
3646
3647		if (ipsapp.ipsap_psa_ptr != NULL) {
3648			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3649			error = EINVAL;
3650		} else {
3651			/* update_pairing() sets diagnostic */
3652			error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3653		}
3654	}
3655	/* Common error point for this routine. */
3656error:
3657	if (newbie != NULL) {
3658		if (error != 0) {
3659			/* This SA is broken, let the reaper clean up. */
3660			mutex_enter(&newbie->ipsa_lock);
3661			newbie->ipsa_state = IPSA_STATE_DEAD;
3662			newbie->ipsa_hardexpiretime = 1;
3663			mutex_exit(&newbie->ipsa_lock);
3664		}
3665		IPSA_REFRELE(newbie);
3666	}
3667	if (newbie_clone != NULL) {
3668		IPSA_REFRELE(newbie_clone);
3669	}
3670
3671	if (error == 0) {
3672		/*
3673		 * Construct favorable PF_KEY return message and send to
3674		 * keysock. Update the flags in the original keysock message
3675		 * to reflect the actual flags in the new SA.
3676		 *  (Q:  Do I need to pass "newbie"?  If I do,
3677		 * make sure to REFHOLD, call, then REFRELE.)
3678		 */
3679		assoc->sadb_sa_flags = newbie->ipsa_flags;
3680		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3681	}
3682
3683	destroy_ipsa_pair(&ipsapp);
3684	return (error);
3685}
3686
3687/*
3688 * Set the time of first use for a security association.  Update any
3689 * expiration times as a result.
3690 */
3691void
3692sadb_set_usetime(ipsa_t *assoc)
3693{
3694	time_t snapshot = gethrestime_sec();
3695
3696	mutex_enter(&assoc->ipsa_lock);
3697	assoc->ipsa_lastuse = snapshot;
3698	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3699
3700	/*
3701	 * Caller does check usetime before calling me usually, and
3702	 * double-checking is better than a mutex_enter/exit hit.
3703	 */
3704	if (assoc->ipsa_usetime == 0) {
3705		/*
3706		 * This is redundant for outbound SA's, as
3707		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3708		 * Inbound SAs, however, have no such protection.
3709		 */
3710		assoc->ipsa_flags |= IPSA_F_USED;
3711		assoc->ipsa_usetime = snapshot;
3712
3713		/*
3714		 * After setting the use time, see if we have a use lifetime
3715		 * that would cause the actual SA expiration time to shorten.
3716		 */
3717		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3718		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3719	}
3720	mutex_exit(&assoc->ipsa_lock);
3721}
3722
3723/*
3724 * Send up a PF_KEY expire message for this association.
3725 */
3726static void
3727sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3728{
3729	mblk_t *mp, *mp1;
3730	int alloclen, af;
3731	sadb_msg_t *samsg;
3732	sadb_lifetime_t *current, *expire;
3733	sadb_sa_t *saext;
3734	uint8_t *end;
3735	boolean_t tunnel_mode;
3736
3737	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3738
3739	/* Don't bother sending if there's no queue. */
3740	if (pfkey_q == NULL)
3741		return;
3742
3743	mp = sadb_keysock_out(0);
3744	if (mp == NULL) {
3745		/* cmn_err(CE_WARN, */
3746		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3747		return;
3748	}
3749
3750	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3751	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3752
3753	af = assoc->ipsa_addrfam;
3754	switch (af) {
3755	case AF_INET:
3756		alloclen += 2 * sizeof (struct sockaddr_in);
3757		break;
3758	case AF_INET6:
3759		alloclen += 2 * sizeof (struct sockaddr_in6);
3760		break;
3761	default:
3762		/* Won't happen unless there's a kernel bug. */
3763		freeb(mp);
3764		cmn_err(CE_WARN,
3765		    "sadb_expire_assoc: Unknown address length.\n");
3766		return;
3767	}
3768
3769	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3770	if (tunnel_mode) {
3771		alloclen += 2 * sizeof (sadb_address_t);
3772		switch (assoc->ipsa_innerfam) {
3773		case AF_INET:
3774			alloclen += 2 * sizeof (struct sockaddr_in);
3775			break;
3776		case AF_INET6:
3777			alloclen += 2 * sizeof (struct sockaddr_in6);
3778			break;
3779		default:
3780			/* Won't happen unless there's a kernel bug. */
3781			freeb(mp);
3782			cmn_err(CE_WARN, "sadb_expire_assoc: "
3783			    "Unknown inner address length.\n");
3784			return;
3785		}
3786	}
3787
3788	mp->b_cont = allocb(alloclen, BPRI_HI);
3789	if (mp->b_cont == NULL) {
3790		freeb(mp);
3791		/* cmn_err(CE_WARN, */
3792		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3793		return;
3794	}
3795
3796	mp1 = mp;
3797	mp = mp->b_cont;
3798	end = mp->b_wptr + alloclen;
3799
3800	samsg = (sadb_msg_t *)mp->b_wptr;
3801	mp->b_wptr += sizeof (*samsg);
3802	samsg->sadb_msg_version = PF_KEY_V2;
3803	samsg->sadb_msg_type = SADB_EXPIRE;
3804	samsg->sadb_msg_errno = 0;
3805	samsg->sadb_msg_satype = assoc->ipsa_type;
3806	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3807	samsg->sadb_msg_reserved = 0;
3808	samsg->sadb_msg_seq = 0;
3809	samsg->sadb_msg_pid = 0;
3810
3811	saext = (sadb_sa_t *)mp->b_wptr;
3812	mp->b_wptr += sizeof (*saext);
3813	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3814	saext->sadb_sa_exttype = SADB_EXT_SA;
3815	saext->sadb_sa_spi = assoc->ipsa_spi;
3816	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3817	saext->sadb_sa_state = assoc->ipsa_state;
3818	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3819	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3820	saext->sadb_sa_flags = assoc->ipsa_flags;
3821
3822	current = (sadb_lifetime_t *)mp->b_wptr;
3823	mp->b_wptr += sizeof (sadb_lifetime_t);
3824	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3825	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3826	/* We do not support the concept. */
3827	current->sadb_lifetime_allocations = 0;
3828	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3829	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3830	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3831
3832	expire = (sadb_lifetime_t *)mp->b_wptr;
3833	mp->b_wptr += sizeof (*expire);
3834	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3835
3836	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3837		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3838		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3839		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3840		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3841		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3842	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3843		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3844		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3845		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3846		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3847		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3848	} else {
3849		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3850		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3851		expire->sadb_lifetime_allocations = 0;
3852		expire->sadb_lifetime_bytes = 0;
3853		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3854		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3855	}
3856
3857	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3858	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3859	    SA_PROTO(assoc), 0);
3860	ASSERT(mp->b_wptr != NULL);
3861
3862	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3863	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3864	    SA_PROTO(assoc), 0);
3865	ASSERT(mp->b_wptr != NULL);
3866
3867	if (tunnel_mode) {
3868		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3869		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3870		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3871		    assoc->ipsa_innersrcpfx);
3872		ASSERT(mp->b_wptr != NULL);
3873		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3874		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3875		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3876		    assoc->ipsa_innerdstpfx);
3877		ASSERT(mp->b_wptr != NULL);
3878	}
3879
3880	/* Can just putnext, we're ready to go! */
3881	putnext(pfkey_q, mp1);
3882}
3883
3884/*
3885 * "Age" the SA with the number of bytes that was used to protect traffic.
3886 * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3887 * enough "charge" left in the SA to protect the data.	Return B_FALSE
3888 * otherwise.  (If B_FALSE is returned, the association either was, or became
3889 * DEAD.)
3890 */
3891boolean_t
3892sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3893    boolean_t sendmsg)
3894{
3895	boolean_t rc = B_TRUE;
3896	uint64_t newtotal;
3897
3898	mutex_enter(&assoc->ipsa_lock);
3899	newtotal = assoc->ipsa_bytes + bytes;
3900	if (assoc->ipsa_hardbyteslt != 0 &&
3901	    newtotal >= assoc->ipsa_hardbyteslt) {
3902		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3903			sadb_delete_cluster(assoc);
3904			/*
3905			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3906			 * this off on another non-interrupt thread.  Also
3907			 * unlink this SA immediately.
3908			 */
3909			assoc->ipsa_state = IPSA_STATE_DEAD;
3910			if (sendmsg)
3911				sadb_expire_assoc(pfkey_q, assoc);
3912			/*
3913			 * Set non-zero expiration time so sadb_age_assoc()
3914			 * will work when reaping.
3915			 */
3916			assoc->ipsa_hardexpiretime = (time_t)1;
3917		} /* Else someone beat me to it! */
3918		rc = B_FALSE;
3919	} else if (assoc->ipsa_softbyteslt != 0 &&
3920	    (newtotal >= assoc->ipsa_softbyteslt)) {
3921		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3922			/*
3923			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3924			 * this off on another non-interrupt thread.
3925			 */
3926			assoc->ipsa_state = IPSA_STATE_DYING;
3927			assoc->ipsa_bytes = newtotal;
3928			if (sendmsg)
3929				sadb_expire_assoc(pfkey_q, assoc);
3930		} /* Else someone beat me to it! */
3931	}
3932	if (rc == B_TRUE)
3933		assoc->ipsa_bytes = newtotal;
3934	mutex_exit(&assoc->ipsa_lock);
3935	return (rc);
3936}
3937
3938/*
3939 * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3940 *     sadb_age_assoc().
3941 */
3942static ipsa_t *
3943sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3944{
3945	ASSERT(MUTEX_HELD(&head->isaf_lock));
3946	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3947	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3948
3949	/*
3950	 * Force cached SAs to be revalidated..
3951	 */
3952	head->isaf_gen++;
3953
3954	mutex_exit(&sa->ipsa_lock);
3955	sadb_unlinkassoc(sa);
3956
3957	return (NULL);
3958}
3959
3960/*
3961 * Do various SA-is-idle activities depending on delta (the number of idle
3962 * seconds on the SA) and/or other properties of the SA.
3963 *
3964 * Return B_TRUE if I've sent a packet, because I have to drop the
3965 * association's mutex before sending a packet out the wire.
3966 */
3967/* ARGSUSED */
3968static boolean_t
3969sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3970{
3971	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3972	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3973
3974	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3975
3976	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3977	    delta >= nat_t_interval &&
3978	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3979		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3980		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3981		mutex_exit(&assoc->ipsa_lock);
3982		ipsecesp_send_keepalive(assoc);
3983		return (B_TRUE);
3984	}
3985	return (B_FALSE);
3986}
3987
3988/*
3989 * Return "assoc" if haspeer is true and I send an expire.  This allows
3990 * the consumers' aging functions to tidy up an expired SA's peer.
3991 */
3992static ipsa_t *
3993sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3994    time_t current, int reap_delay, boolean_t inbound)
3995{
3996	ipsa_t *retval = NULL;
3997	boolean_t dropped_mutex = B_FALSE;
3998
3999	ASSERT(MUTEX_HELD(&head->isaf_lock));
4000
4001	mutex_enter(&assoc->ipsa_lock);
4002
4003	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4004	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4005	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4006	    (assoc->ipsa_hardexpiretime != 0))) &&
4007	    (assoc->ipsa_hardexpiretime <= current)) {
4008		assoc->ipsa_state = IPSA_STATE_DEAD;
4009		return (sadb_torch_assoc(head, assoc));
4010	}
4011
4012	/*
4013	 * Check lifetimes.  Fortunately, SA setup is done
4014	 * such that there are only two times to look at,
4015	 * softexpiretime, and hardexpiretime.
4016	 *
4017	 * Check hard first.
4018	 */
4019
4020	if (assoc->ipsa_hardexpiretime != 0 &&
4021	    assoc->ipsa_hardexpiretime <= current) {
4022		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4023			return (sadb_torch_assoc(head, assoc));
4024
4025		if (inbound) {
4026			sadb_delete_cluster(assoc);
4027		}
4028
4029		/*
4030		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4031		 */
4032		assoc->ipsa_state = IPSA_STATE_DEAD;
4033		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4034			/*
4035			 * If the SA is paired or peered with another, put
4036			 * a copy on a list which can be processed later, the
4037			 * pair/peer SA needs to be updated so the both die
4038			 * at the same time.
4039			 *
4040			 * If I return assoc, I have to bump up its reference
4041			 * count to keep with the ipsa_t reference count
4042			 * semantics.
4043			 */
4044			IPSA_REFHOLD(assoc);
4045			retval = assoc;
4046		}
4047		sadb_expire_assoc(pfkey_q, assoc);
4048		assoc->ipsa_hardexpiretime = current + reap_delay;
4049	} else if (assoc->ipsa_softexpiretime != 0 &&
4050	    assoc->ipsa_softexpiretime <= current &&
4051	    assoc->ipsa_state < IPSA_STATE_DYING) {
4052		/*
4053		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4054		 * this off on another non-interrupt thread.
4055		 */
4056		assoc->ipsa_state = IPSA_STATE_DYING;
4057		if (assoc->ipsa_haspeer) {
4058			/*
4059			 * If the SA has a peer, update the peer's state
4060			 * on SOFT_EXPIRE, this is mostly to prevent two
4061			 * expire messages from effectively the same SA.
4062			 *
4063			 * Don't care about paired SA's, then can (and should)
4064			 * be able to soft expire at different times.
4065			 *
4066			 * If I return assoc, I have to bump up its
4067			 * reference count to keep with the ipsa_t reference
4068			 * count semantics.
4069			 */
4070			IPSA_REFHOLD(assoc);
4071			retval = assoc;
4072		}
4073		sadb_expire_assoc(pfkey_q, assoc);
4074	} else if (assoc->ipsa_idletime != 0 &&
4075	    assoc->ipsa_idleexpiretime <= current) {
4076		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4077			assoc->ipsa_state = IPSA_STATE_IDLE;
4078		}
4079
4080		/*
4081		 * Need to handle Mature case
4082		 */
4083		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4084			sadb_expire_assoc(pfkey_q, assoc);
4085		}
4086	} else {
4087		/* Check idle time activities. */
4088		dropped_mutex = sadb_idle_activities(assoc,
4089		    current - assoc->ipsa_lastuse, inbound);
4090	}
4091
4092	if (!dropped_mutex)
4093		mutex_exit(&assoc->ipsa_lock);
4094	return (retval);
4095}
4096
4097/*
4098 * Called by a consumer protocol to do ther dirty work of reaping dead
4099 * Security Associations.
4100 *
4101 * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4102 * SA's that are already marked DEAD, so expired SA's are only reaped
4103 * the second time sadb_ager() runs.
4104 */
4105void
4106sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4107{
4108	int i;
4109	isaf_t *bucket;
4110	ipsa_t *assoc, *spare;
4111	iacqf_t *acqlist;
4112	ipsacq_t *acqrec, *spareacq;
4113	templist_t *haspeerlist, *newbie;
4114	/* Snapshot current time now. */
4115	time_t current = gethrestime_sec();
4116	haspeerlist = NULL;
4117
4118	/*
4119	 * Do my dirty work.  This includes aging real entries, aging
4120	 * larvals, and aging outstanding ACQUIREs.
4121	 *
4122	 * I hope I don't tie up resources for too long.
4123	 */
4124
4125	/* Age acquires. */
4126
4127	for (i = 0; i < sp->sdb_hashsize; i++) {
4128		acqlist = &sp->sdb_acq[i];
4129		mutex_enter(&acqlist->iacqf_lock);
4130		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4131		    acqrec = spareacq) {
4132			spareacq = acqrec->ipsacq_next;
4133			if (current > acqrec->ipsacq_expire)
4134				sadb_destroy_acquire(acqrec, ns);
4135		}
4136		mutex_exit(&acqlist->iacqf_lock);
4137	}
4138
4139	/* Age inbound associations. */
4140	for (i = 0; i < sp->sdb_hashsize; i++) {
4141		bucket = &(sp->sdb_if[i]);
4142		mutex_enter(&bucket->isaf_lock);
4143		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4144		    assoc = spare) {
4145			spare = assoc->ipsa_next;
4146			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4147			    reap_delay, B_TRUE) != NULL) {
4148				/*
4149				 * Put SA's which have a peer or SA's which
4150				 * are paired on a list for processing after
4151				 * all the hash tables have been walked.
4152				 *
4153				 * sadb_age_assoc() increments the refcnt,
4154				 * effectively doing an IPSA_REFHOLD().
4155				 */
4156				newbie = kmem_alloc(sizeof (*newbie),
4157				    KM_NOSLEEP);
4158				if (newbie == NULL) {
4159					/*
4160					 * Don't forget to REFRELE().
4161					 */
4162					IPSA_REFRELE(assoc);
4163					continue;	/* for loop... */
4164				}
4165				newbie->next = haspeerlist;
4166				newbie->ipsa = assoc;
4167				haspeerlist = newbie;
4168			}
4169		}
4170		mutex_exit(&bucket->isaf_lock);
4171	}
4172
4173	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4174	haspeerlist = NULL;
4175
4176	/* Age outbound associations. */
4177	for (i = 0; i < sp->sdb_hashsize; i++) {
4178		bucket = &(sp->sdb_of[i]);
4179		mutex_enter(&bucket->isaf_lock);
4180		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4181		    assoc = spare) {
4182			spare = assoc->ipsa_next;
4183			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4184			    reap_delay, B_FALSE) != NULL) {
4185				/*
4186				 * sadb_age_assoc() increments the refcnt,
4187				 * effectively doing an IPSA_REFHOLD().
4188				 */
4189				newbie = kmem_alloc(sizeof (*newbie),
4190				    KM_NOSLEEP);
4191				if (newbie == NULL) {
4192					/*
4193					 * Don't forget to REFRELE().
4194					 */
4195					IPSA_REFRELE(assoc);
4196					continue;	/* for loop... */
4197				}
4198				newbie->next = haspeerlist;
4199				newbie->ipsa = assoc;
4200				haspeerlist = newbie;
4201			}
4202		}
4203		mutex_exit(&bucket->isaf_lock);
4204	}
4205
4206	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4207
4208	/*
4209	 * Run a GC pass to clean out dead identities.
4210	 */
4211	ipsid_gc(ns);
4212}
4213
4214/*
4215 * Figure out when to reschedule the ager.
4216 */
4217timeout_id_t
4218sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4219    void *agerarg, uint_t *intp, uint_t intmax, short mid)
4220{
4221	hrtime_t end = gethrtime();
4222	uint_t interval = *intp;
4223
4224	/*
4225	 * See how long this took.  If it took too long, increase the
4226	 * aging interval.
4227	 */
4228	if ((end - begin) > (hrtime_t)interval * (hrtime_t)1000000) {
4229		if (interval >= intmax) {
4230			/* XXX Rate limit this?  Or recommend flush? */
4231			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4232			    "Too many SA's to age out in %d msec.\n",
4233			    intmax);
4234		} else {
4235			/* Double by shifting by one bit. */
4236			interval <<= 1;
4237			interval = min(interval, intmax);
4238		}
4239	} else if ((end - begin) <= (hrtime_t)interval * (hrtime_t)500000 &&
4240	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4241		/*
4242		 * If I took less than half of the interval, then I should
4243		 * ratchet the interval back down.  Never automatically
4244		 * shift below the default aging interval.
4245		 *
4246		 * NOTE:This even overrides manual setting of the age
4247		 *	interval using NDD to lower the setting past the
4248		 *	default.  In other words, if you set the interval
4249		 *	lower than the default, and your SADB gets too big,
4250		 *	the interval will only self-lower back to the default.
4251		 */
4252		/* Halve by shifting one bit. */
4253		interval >>= 1;
4254		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4255	}
4256	*intp = interval;
4257	return (qtimeout(pfkey_q, ager, agerarg,
4258	    drv_usectohz(interval * 1000)));
4259}
4260
4261
4262/*
4263 * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4264 * message takes when updating a MATURE or DYING SA.
4265 */
4266static void
4267sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4268    sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4269{
4270	mutex_enter(&assoc->ipsa_lock);
4271
4272	/*
4273	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4274	 * passed in during an update message.	We currently don't handle
4275	 * these.
4276	 */
4277
4278	if (hard != NULL) {
4279		if (hard->sadb_lifetime_bytes != 0)
4280			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4281		if (hard->sadb_lifetime_usetime != 0)
4282			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4283		if (hard->sadb_lifetime_addtime != 0)
4284			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4285		if (assoc->ipsa_hardaddlt != 0) {
4286			assoc->ipsa_hardexpiretime =
4287			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4288		}
4289		if (assoc->ipsa_harduselt != 0 &&
4290		    assoc->ipsa_flags & IPSA_F_USED) {
4291			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4292		}
4293		if (hard->sadb_lifetime_allocations != 0)
4294			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4295	}
4296
4297	if (soft != NULL) {
4298		if (soft->sadb_lifetime_bytes != 0) {
4299			if (soft->sadb_lifetime_bytes >
4300			    assoc->ipsa_hardbyteslt) {
4301				assoc->ipsa_softbyteslt =
4302				    assoc->ipsa_hardbyteslt;
4303			} else {
4304				assoc->ipsa_softbyteslt =
4305				    soft->sadb_lifetime_bytes;
4306			}
4307		}
4308		if (soft->sadb_lifetime_usetime != 0) {
4309			if (soft->sadb_lifetime_usetime >
4310			    assoc->ipsa_harduselt) {
4311				assoc->ipsa_softuselt =
4312				    assoc->ipsa_harduselt;
4313			} else {
4314				assoc->ipsa_softuselt =
4315				    soft->sadb_lifetime_usetime;
4316			}
4317		}
4318		if (soft->sadb_lifetime_addtime != 0) {
4319			if (soft->sadb_lifetime_addtime >
4320			    assoc->ipsa_hardexpiretime) {
4321				assoc->ipsa_softexpiretime =
4322				    assoc->ipsa_hardexpiretime;
4323			} else {
4324				assoc->ipsa_softaddlt =
4325				    soft->sadb_lifetime_addtime;
4326			}
4327		}
4328		if (assoc->ipsa_softaddlt != 0) {
4329			assoc->ipsa_softexpiretime =
4330			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4331		}
4332		if (assoc->ipsa_softuselt != 0 &&
4333		    assoc->ipsa_flags & IPSA_F_USED) {
4334			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4335		}
4336		if (outbound && assoc->ipsa_softexpiretime != 0) {
4337			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4338				lifetime_fuzz(assoc);
4339		}
4340
4341		if (soft->sadb_lifetime_allocations != 0)
4342			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4343	}
4344
4345	if (idle != NULL) {
4346		time_t current = gethrestime_sec();
4347		if ((assoc->ipsa_idleexpiretime <= current) &&
4348		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4349			assoc->ipsa_idleexpiretime =
4350			    current + assoc->ipsa_idleaddlt;
4351		}
4352		if (idle->sadb_lifetime_addtime != 0)
4353			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4354		if (idle->sadb_lifetime_usetime != 0)
4355			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4356		if (assoc->ipsa_idleaddlt != 0) {
4357			assoc->ipsa_idleexpiretime =
4358			    current + idle->sadb_lifetime_addtime;
4359			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4360		}
4361		if (assoc->ipsa_idleuselt != 0) {
4362			if (assoc->ipsa_idletime != 0) {
4363				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4364				    assoc->ipsa_idleuselt);
4365			assoc->ipsa_idleexpiretime =
4366			    current + assoc->ipsa_idletime;
4367			} else {
4368				assoc->ipsa_idleexpiretime =
4369				    current + assoc->ipsa_idleuselt;
4370				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4371			}
4372		}
4373	}
4374	mutex_exit(&assoc->ipsa_lock);
4375}
4376
4377static int
4378sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4379{
4380	int rcode = 0;
4381	time_t current = gethrestime_sec();
4382
4383	mutex_enter(&assoc->ipsa_lock);
4384
4385	switch (new_state) {
4386	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4387		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4388			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4389			assoc->ipsa_idleexpiretime =
4390			    current + assoc->ipsa_idletime;
4391		}
4392		break;
4393	case SADB_X_SASTATE_IDLE:
4394		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4395			assoc->ipsa_state = IPSA_STATE_IDLE;
4396			assoc->ipsa_idleexpiretime =
4397			    current + assoc->ipsa_idletime;
4398		} else {
4399			rcode = EINVAL;
4400		}
4401		break;
4402
4403	case SADB_X_SASTATE_ACTIVE:
4404		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4405			rcode = EINVAL;
4406			break;
4407		}
4408		assoc->ipsa_state = IPSA_STATE_MATURE;
4409		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4410
4411		if (ipkt_lst == NULL) {
4412			break;
4413		}
4414
4415		if (assoc->ipsa_bpkt_head != NULL) {
4416			*ipkt_lst = assoc->ipsa_bpkt_head;
4417			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4418			assoc->ipsa_mblkcnt = 0;
4419		} else {
4420			*ipkt_lst = NULL;
4421		}
4422		break;
4423	default:
4424		rcode = EINVAL;
4425		break;
4426	}
4427
4428	mutex_exit(&assoc->ipsa_lock);
4429	return (rcode);
4430}
4431
4432/*
4433 * Check a proposed KMC update for sanity.
4434 */
4435static int
4436sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4437{
4438	uint32_t kmp = sq->kmp;
4439	uint32_t kmc = sq->kmc;
4440
4441	if (sa == NULL)
4442		return (0);
4443
4444	if (sa->ipsa_state == IPSA_STATE_DEAD)
4445		return (ESRCH);	/* DEAD == Not there, in this case. */
4446
4447	if ((kmp != 0) && ((sa->ipsa_kmp != 0) || (sa->ipsa_kmp != kmp))) {
4448		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4449		return (EINVAL);
4450	}
4451
4452	if ((kmc != 0) && ((sa->ipsa_kmc != 0) || (sa->ipsa_kmc != kmc))) {
4453		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4454		return (EINVAL);
4455	}
4456
4457	return (0);
4458}
4459
4460/*
4461 * Actually update the KMC info.
4462 */
4463static void
4464sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4465{
4466	uint32_t kmp = sq->kmp;
4467	uint32_t kmc = sq->kmc;
4468
4469	if (kmp != 0)
4470		sa->ipsa_kmp = kmp;
4471	if (kmc != 0)
4472		sa->ipsa_kmc = kmc;
4473}
4474
4475/*
4476 * Common code to update an SA.
4477 */
4478
4479int
4480sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4481    sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4482    int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4483    netstack_t *ns, uint8_t sadb_msg_type)
4484{
4485	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4486	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4487	sadb_x_replay_ctr_t *replext =
4488	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4489	sadb_lifetime_t *soft =
4490	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4491	sadb_lifetime_t *hard =
4492	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4493	sadb_lifetime_t *idle =
4494	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4495	sadb_x_pair_t *pair_ext =
4496	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4497	ipsa_t *echo_target = NULL;
4498	ipsap_t ipsapp;
4499	ipsa_query_t sq;
4500	time_t current = gethrestime_sec();
4501
4502	sq.spp = spp;		/* XXX param */
4503	int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4504	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
4505	    &sq, diagnostic);
4506
4507	if (error != 0)
4508		return (error);
4509
4510	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4511	if (error != 0)
4512		return (error);
4513
4514	if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4515		if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4516			/*
4517			 * REFRELE the target and let the add_sa_func()
4518			 * deal with updating a larval SA.
4519			 */
4520			destroy_ipsa_pair(&ipsapp);
4521			return (add_sa_func(mp, ksi, diagnostic, ns));
4522		}
4523	}
4524
4525	/*
4526	 * At this point we have an UPDATE to a MATURE SA. There should
4527	 * not be any keying material present.
4528	 */
4529	if (akey != NULL) {
4530		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4531		error = EINVAL;
4532		goto bail;
4533	}
4534	if (ekey != NULL) {
4535		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4536		error = EINVAL;
4537		goto bail;
4538	}
4539
4540	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4541		if (ipsapp.ipsap_sa_ptr != NULL &&
4542		    ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4543			if ((error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4544			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4545				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4546				goto bail;
4547			}
4548		}
4549		if (ipsapp.ipsap_psa_ptr != NULL &&
4550		    ipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4551			if ((error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4552			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4553				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4554				goto bail;
4555			}
4556		}
4557	}
4558	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4559		if (ipsapp.ipsap_sa_ptr != NULL) {
4560			error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4561			    sq.assoc->sadb_sa_state,
4562			    (ipsapp.ipsap_sa_ptr->ipsa_flags &
4563			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4564			if (error) {
4565				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4566				goto bail;
4567			}
4568		}
4569		if (ipsapp.ipsap_psa_ptr != NULL) {
4570			error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4571			    sq.assoc->sadb_sa_state,
4572			    (ipsapp.ipsap_psa_ptr->ipsa_flags &
4573			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4574			if (error) {
4575				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4576				goto bail;
4577			}
4578		}
4579		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4580		    ksi, echo_target);
4581		goto bail;
4582	}
4583
4584	/*
4585	 * Reality checks for updates of active associations.
4586	 * Sundry first-pass UPDATE-specific reality checks.
4587	 * Have to do the checks here, because it's after the add_sa code.
4588	 * XXX STATS : logging/stats here?
4589	 */
4590
4591	if (!((sq.assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4592	    (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4593		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4594		error = EINVAL;
4595		goto bail;
4596	}
4597	if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4598		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4599		error = EINVAL;
4600		goto bail;
4601	}
4602	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4603		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4604		error = EOPNOTSUPP;
4605		goto bail;
4606	}
4607
4608	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4609		error = EINVAL;
4610		goto bail;
4611	}
4612
4613	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4614		return (EINVAL);
4615
4616	error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4617	if (error != 0)
4618		goto bail;
4619
4620	error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4621	if (error != 0)
4622		goto bail;
4623
4624
4625	if (ipsapp.ipsap_sa_ptr != NULL) {
4626		/*
4627		 * Do not allow replay value change for MATURE or LARVAL SA.
4628		 */
4629
4630		if ((replext != NULL) &&
4631		    ((ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4632		    (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4633			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4634			error = EINVAL;
4635			goto bail;
4636		}
4637	}
4638
4639
4640	if (ipsapp.ipsap_sa_ptr != NULL) {
4641		sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4642		    idle, B_TRUE);
4643		sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4644		if ((replext != NULL) &&
4645		    (ipsapp.ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4646			/*
4647			 * If an inbound SA, update the replay counter
4648			 * and check off all the other sequence number
4649			 */
4650			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4651				if (!sadb_replay_check(ipsapp.ipsap_sa_ptr,
4652				    replext->sadb_x_rc_replay32)) {
4653					*diagnostic =
4654					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4655					error = EINVAL;
4656					goto bail;
4657				}
4658				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4659				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4660				    current +
4661				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4662				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4663			} else {
4664				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4665				ipsapp.ipsap_sa_ptr->ipsa_replay =
4666				    replext->sadb_x_rc_replay32;
4667				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4668				    current +
4669				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4670				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4671			}
4672		}
4673	}
4674
4675	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4676		if (ipsapp.ipsap_psa_ptr != NULL) {
4677			sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4678			    idle, B_FALSE);
4679			sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4680		} else {
4681			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4682			error = ESRCH;
4683			goto bail;
4684		}
4685	}
4686
4687	if (pair_ext != NULL)
4688		error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4689
4690	if (error == 0)
4691		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4692		    ksi, echo_target);
4693bail:
4694
4695	destroy_ipsa_pair(&ipsapp);
4696
4697	return (error);
4698}
4699
4700
4701static int
4702update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4703    int *diagnostic)
4704{
4705	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4706	sadb_x_pair_t *pair_ext =
4707	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4708	int error = 0;
4709	ipsap_t oipsapp;
4710	boolean_t undo_pair = B_FALSE;
4711	uint32_t ipsa_flags;
4712
4713	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4714	    assoc->sadb_sa_spi) {
4715		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4716		return (EINVAL);
4717	}
4718
4719	/*
4720	 * Assume for now that the spi value provided in the SADB_UPDATE
4721	 * message was valid, update the SA with its pair spi value.
4722	 * If the spi turns out to be bogus or the SA no longer exists
4723	 * then this will be detected when the reverse update is made
4724	 * below.
4725	 */
4726	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4727	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4728	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4729	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4730
4731	/*
4732	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4733	 * should now return pointers to the SA *AND* its pair, if this is not
4734	 * the case, the "otherspi" either did not exist or was deleted. Also
4735	 * check that "otherspi" is not already paired. If everything looks
4736	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4737	 * after this update to ensure its not deleted until we are done.
4738	 */
4739	error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4740	if (error != 0) {
4741		/*
4742		 * This should never happen, calling function still has
4743		 * IPSA_REFHELD on the SA we just updated.
4744		 */
4745		return (error);	/* XXX EINVAL instead of ESRCH? */
4746	}
4747
4748	if (oipsapp.ipsap_psa_ptr == NULL) {
4749		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4750		error = EINVAL;
4751		undo_pair = B_TRUE;
4752	} else {
4753		ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4754		if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4755		    (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4756			/* Its dead Jim! */
4757			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4758			undo_pair = B_TRUE;
4759		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4760		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4761			/* This SA is in both hashtables. */
4762			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4763			undo_pair = B_TRUE;
4764		} else if (ipsa_flags & IPSA_F_PAIRED) {
4765			/* This SA is already paired with another. */
4766			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4767			undo_pair = B_TRUE;
4768		}
4769	}
4770
4771	if (undo_pair) {
4772		/* The pair SA does not exist. */
4773		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4774		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4775		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4776		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4777	} else {
4778		mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4779		oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4780		oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4781		mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4782	}
4783
4784	destroy_ipsa_pair(&oipsapp);
4785	return (error);
4786}
4787
4788/*
4789 * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4790 * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4791 * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4792 * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4793 * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4794 * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4795 * other direction's SA.
4796 */
4797
4798/*
4799 * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4800 * grab it, lock it, and return it.  Otherwise return NULL.
4801 *
4802 * XXX MLS number of arguments getting unwieldy here
4803 */
4804static ipsacq_t *
4805sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4806    uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4807    uint64_t unique_id, ts_label_t *tsl)
4808{
4809	ipsacq_t *walker;
4810	sa_family_t fam;
4811	uint32_t blank_address[4] = {0, 0, 0, 0};
4812
4813	if (isrc == NULL) {
4814		ASSERT(idst == NULL);
4815		isrc = idst = blank_address;
4816	}
4817
4818	/*
4819	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4820	 *
4821	 * XXX May need search for duplicates based on other things too!
4822	 */
4823	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4824	    walker = walker->ipsacq_next) {
4825		mutex_enter(&walker->ipsacq_lock);
4826		fam = walker->ipsacq_addrfam;
4827		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4828		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4829		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4830		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4831		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4832		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4833		    (ap == walker->ipsacq_act) &&
4834		    (pp == walker->ipsacq_policy) &&
4835		    /* XXX do deep compares of ap/pp? */
4836		    (unique_id == walker->ipsacq_unique_id) &&
4837		    (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4838			break;			/* everything matched */
4839		mutex_exit(&walker->ipsacq_lock);
4840	}
4841
4842	return (walker);
4843}
4844
4845/*
4846 * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4847 * of all of the same length.  Give up (and drop) if memory
4848 * cannot be allocated for a new one; otherwise, invoke callback to
4849 * send the acquire up..
4850 *
4851 * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4852 * list.  The ah_add_sa_finish() routines can look at the packet's attached
4853 * attributes and handle this case specially.
4854 */
4855void
4856sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
4857    boolean_t need_esp)
4858{
4859	mblk_t	*asyncmp;
4860	sadbp_t *spp;
4861	sadb_t *sp;
4862	ipsacq_t *newbie;
4863	iacqf_t *bucket;
4864	mblk_t *extended;
4865	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4866	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4867	uint32_t *src, *dst, *isrc, *idst;
4868	ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
4869	ipsec_action_t *ap = ixa->ixa_ipsec_action;
4870	sa_family_t af;
4871	int hashoffset;
4872	uint32_t seq;
4873	uint64_t unique_id = 0;
4874	ipsec_selector_t sel;
4875	boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
4876	ts_label_t 	*tsl = NULL;
4877	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
4878	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4879	sadb_sens_t 	*sens = NULL;
4880	int 		sens_len;
4881
4882	ASSERT((pp != NULL) || (ap != NULL));
4883
4884	ASSERT(need_ah != NULL || need_esp != NULL);
4885
4886	/* Assign sadb pointers */
4887	if (need_esp) { /* ESP for AH+ESP */
4888		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4889
4890		spp = &espstack->esp_sadb;
4891	} else {
4892		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4893
4894		spp = &ahstack->ah_sadb;
4895	}
4896	sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
4897
4898	if (is_system_labeled())
4899		tsl = ixa->ixa_tsl;
4900
4901	if (ap == NULL)
4902		ap = pp->ipsp_act;
4903
4904	ASSERT(ap != NULL);
4905
4906	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4907		unique_id = SA_FORM_UNIQUE_ID(ixa);
4908
4909	/*
4910	 * Set up an ACQUIRE record.
4911	 *
4912	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4913	 * below the lowest point allowed in the kernel.  (In other words,
4914	 * make sure the high bit on the sequence number is set.)
4915	 */
4916
4917	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4918
4919	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4920		src = (uint32_t *)&ipha->ipha_src;
4921		dst = (uint32_t *)&ipha->ipha_dst;
4922		af = AF_INET;
4923		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4924		ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
4925	} else {
4926		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4927		src = (uint32_t *)&ip6h->ip6_src;
4928		dst = (uint32_t *)&ip6h->ip6_dst;
4929		af = AF_INET6;
4930		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4931		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
4932	}
4933
4934	if (tunnel_mode) {
4935		if (pp == NULL) {
4936			/*
4937			 * Tunnel mode with no policy pointer means this is a
4938			 * reflected ICMP (like a ECHO REQUEST) that came in
4939			 * with self-encapsulated protection.  Until we better
4940			 * support this, drop the packet.
4941			 */
4942			ip_drop_packet(datamp, B_FALSE, NULL,
4943			    DROPPER(ipss, ipds_spd_got_selfencap),
4944			    &ipss->ipsec_spd_dropper);
4945			return;
4946		}
4947		/* Snag inner addresses. */
4948		isrc = ixa->ixa_ipsec_insrc;
4949		idst = ixa->ixa_ipsec_indst;
4950	} else {
4951		isrc = idst = NULL;
4952	}
4953
4954	/*
4955	 * Check buckets to see if there is an existing entry.  If so,
4956	 * grab it.  sadb_checkacquire locks newbie if found.
4957	 */
4958	bucket = &(sp->sdb_acq[hashoffset]);
4959	mutex_enter(&bucket->iacqf_lock);
4960	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4961	    unique_id, tsl);
4962
4963	if (newbie == NULL) {
4964		/*
4965		 * Otherwise, allocate a new one.
4966		 */
4967		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4968		if (newbie == NULL) {
4969			mutex_exit(&bucket->iacqf_lock);
4970			ip_drop_packet(datamp, B_FALSE, NULL,
4971			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4972			    &ipss->ipsec_sadb_dropper);
4973			return;
4974		}
4975		newbie->ipsacq_policy = pp;
4976		if (pp != NULL) {
4977			IPPOL_REFHOLD(pp);
4978		}
4979		IPACT_REFHOLD(ap);
4980		newbie->ipsacq_act = ap;
4981		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4982		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4983		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4984		if (newbie->ipsacq_next != NULL)
4985			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4986
4987		bucket->iacqf_ipsacq = newbie;
4988		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4989		mutex_enter(&newbie->ipsacq_lock);
4990	}
4991
4992	/*
4993	 * XXX MLS does it actually help us to drop the bucket lock here?
4994	 * we have inserted a half-built, locked acquire record into the
4995	 * bucket.  any competing thread will now be able to lock the bucket
4996	 * to scan it, but will immediately pile up on the new acquire
4997	 * record's lock; I don't think we gain anything here other than to
4998	 * disperse blame for lock contention.
4999	 *
5000	 * we might be able to dispense with acquire record locks entirely..
5001	 * just use the bucket locks..
5002	 */
5003
5004	mutex_exit(&bucket->iacqf_lock);
5005
5006	/*
5007	 * This assert looks silly for now, but we may need to enter newbie's
5008	 * mutex during a search.
5009	 */
5010	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5011
5012	/*
5013	 * Make the ip_xmit_attr_t into something we can queue.
5014	 * If no memory it frees datamp.
5015	 */
5016	asyncmp = ip_xmit_attr_to_mblk(ixa);
5017	if (asyncmp != NULL)
5018		linkb(asyncmp, datamp);
5019
5020	/* Queue up packet.  Use b_next. */
5021
5022	if (asyncmp == NULL) {
5023		/* Statistics for allocation failure */
5024		if (ixa->ixa_flags & IXAF_IS_IPV4) {
5025			BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5026			    ipIfStatsOutDiscards);
5027		} else {
5028			BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5029			    ipIfStatsOutDiscards);
5030		}
5031		ip_drop_output("No memory for asyncmp", datamp, NULL);
5032		freemsg(datamp);
5033	} else if (newbie->ipsacq_numpackets == 0) {
5034		/* First one. */
5035		newbie->ipsacq_mp = asyncmp;
5036		newbie->ipsacq_numpackets = 1;
5037		newbie->ipsacq_expire = gethrestime_sec();
5038		/*
5039		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5040		 * value.
5041		 */
5042		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5043		newbie->ipsacq_seq = seq;
5044		newbie->ipsacq_addrfam = af;
5045
5046		newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5047		newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5048		newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5049		newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5050		if (tunnel_mode) {
5051			newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
5052			newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
5053			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5054			newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
5055			newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5056			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5057			    ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5058			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5059			    ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5060		} else {
5061			newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5062		}
5063		newbie->ipsacq_unique_id = unique_id;
5064
5065		if (ixa->ixa_tsl != NULL) {
5066			label_hold(ixa->ixa_tsl);
5067			newbie->ipsacq_tsl = ixa->ixa_tsl;
5068		}
5069	} else {
5070		/* Scan to the end of the list & insert. */
5071		mblk_t *lastone = newbie->ipsacq_mp;
5072
5073		while (lastone->b_next != NULL)
5074			lastone = lastone->b_next;
5075		lastone->b_next = asyncmp;
5076		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5077			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5078			lastone = newbie->ipsacq_mp;
5079			newbie->ipsacq_mp = lastone->b_next;
5080			lastone->b_next = NULL;
5081
5082			/* Freeing the async message */
5083			lastone = ip_xmit_attr_free_mblk(lastone);
5084			ip_drop_packet(lastone, B_FALSE, NULL,
5085			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5086			    &ipss->ipsec_sadb_dropper);
5087		} else {
5088			IP_ACQUIRE_STAT(ipss, qhiwater,
5089			    newbie->ipsacq_numpackets);
5090		}
5091	}
5092
5093	/*
5094	 * Reset addresses.  Set them to the most recently added mblk chain,
5095	 * so that the address pointers in the acquire record will point
5096	 * at an mblk still attached to the acquire list.
5097	 */
5098
5099	newbie->ipsacq_srcaddr = src;
5100	newbie->ipsacq_dstaddr = dst;
5101
5102	/*
5103	 * If the acquire record has more than one queued packet, we've
5104	 * already sent an ACQUIRE, and don't need to repeat ourself.
5105	 */
5106	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5107		/* I have an acquire outstanding already! */
5108		mutex_exit(&newbie->ipsacq_lock);
5109		return;
5110	}
5111
5112	if (!keysock_extended_reg(ns))
5113		goto punt_extended;
5114	/*
5115	 * Construct an extended ACQUIRE.  There are logging
5116	 * opportunities here in failure cases.
5117	 */
5118	bzero(&sel, sizeof (sel));
5119	sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5120	if (tunnel_mode) {
5121		sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5122		    IPPROTO_ENCAP : IPPROTO_IPV6;
5123	} else {
5124		sel.ips_protocol = ixa->ixa_ipsec_proto;
5125		sel.ips_local_port = ixa->ixa_ipsec_src_port;
5126		sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
5127	}
5128	sel.ips_icmp_type = ixa->ixa_ipsec_icmp_type;
5129	sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
5130	sel.ips_is_icmp_inv_acq = 0;
5131	if (af == AF_INET) {
5132		sel.ips_local_addr_v4 = ipha->ipha_src;
5133		sel.ips_remote_addr_v4 = ipha->ipha_dst;
5134	} else {
5135		sel.ips_local_addr_v6 = ip6h->ip6_src;
5136		sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5137	}
5138
5139	extended = sadb_keysock_out(0);
5140	if (extended == NULL)
5141		goto punt_extended;
5142
5143	if (ixa->ixa_tsl != NULL) {
5144		/*
5145		 * XXX MLS correct condition here?
5146		 * XXX MLS other credential attributes in acquire?
5147		 * XXX malloc failure?  don't fall back to original?
5148		 */
5149		sens = sadb_make_sens_ext(ixa->ixa_tsl, &sens_len);
5150
5151		if (sens == NULL) {
5152			freeb(extended);
5153			goto punt_extended;
5154		}
5155	}
5156
5157	extended->b_cont = sadb_extended_acquire(&sel, pp, ap, tunnel_mode,
5158	    seq, 0, sens, ns);
5159
5160	if (sens != NULL)
5161		kmem_free(sens, sens_len);
5162
5163	if (extended->b_cont == NULL) {
5164		freeb(extended);
5165		goto punt_extended;
5166	}
5167
5168	/*
5169	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5170	 * this new record.  The send-acquire callback assumes that acqrec is
5171	 * already locked.
5172	 */
5173	(*spp->s_acqfn)(newbie, extended, ns);
5174	return;
5175
5176punt_extended:
5177	(*spp->s_acqfn)(newbie, NULL, ns);
5178}
5179
5180/*
5181 * Unlink and free an acquire record.
5182 */
5183void
5184sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5185{
5186	mblk_t		*mp;
5187	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5188
5189	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5190
5191	if (acqrec->ipsacq_policy != NULL) {
5192		IPPOL_REFRELE(acqrec->ipsacq_policy);
5193	}
5194	if (acqrec->ipsacq_act != NULL) {
5195		IPACT_REFRELE(acqrec->ipsacq_act);
5196	}
5197
5198	/* Unlink */
5199	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5200	if (acqrec->ipsacq_next != NULL)
5201		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5202
5203	if (acqrec->ipsacq_tsl != NULL) {
5204		label_rele(acqrec->ipsacq_tsl);
5205		acqrec->ipsacq_tsl = NULL;
5206	}
5207
5208	/*
5209	 * Free hanging mp's.
5210	 *
5211	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5212	 */
5213
5214	mutex_enter(&acqrec->ipsacq_lock);
5215	while (acqrec->ipsacq_mp != NULL) {
5216		mp = acqrec->ipsacq_mp;
5217		acqrec->ipsacq_mp = mp->b_next;
5218		mp->b_next = NULL;
5219		/* Freeing the async message */
5220		mp = ip_xmit_attr_free_mblk(mp);
5221		ip_drop_packet(mp, B_FALSE, NULL,
5222		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5223		    &ipss->ipsec_sadb_dropper);
5224	}
5225	mutex_exit(&acqrec->ipsacq_lock);
5226
5227	/* Free */
5228	mutex_destroy(&acqrec->ipsacq_lock);
5229	kmem_free(acqrec, sizeof (*acqrec));
5230}
5231
5232/*
5233 * Destroy an acquire list fanout.
5234 */
5235static void
5236sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5237    netstack_t *ns)
5238{
5239	int i;
5240	iacqf_t *list = *listp;
5241
5242	if (list == NULL)
5243		return;
5244
5245	for (i = 0; i < numentries; i++) {
5246		mutex_enter(&(list[i].iacqf_lock));
5247		while (list[i].iacqf_ipsacq != NULL)
5248			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5249		mutex_exit(&(list[i].iacqf_lock));
5250		if (forever)
5251			mutex_destroy(&(list[i].iacqf_lock));
5252	}
5253
5254	if (forever) {
5255		*listp = NULL;
5256		kmem_free(list, numentries * sizeof (*list));
5257	}
5258}
5259
5260/*
5261 * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5262 * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5263 */
5264static uint8_t *
5265sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5266    sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5267    uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5268{
5269	uint8_t *cur = start;
5270	ipsec_alginfo_t *algp;
5271	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5272
5273	cur += sizeof (*algdesc);
5274	if (cur >= limit)
5275		return (NULL);
5276
5277	ecomb->sadb_x_ecomb_numalgs++;
5278
5279	/*
5280	 * Normalize vs. crypto framework's limits.  This way, you can specify
5281	 * a stronger policy, and when the framework loads a stronger version,
5282	 * you can just keep plowing w/o rewhacking your SPD.
5283	 */
5284	mutex_enter(&ipss->ipsec_alg_lock);
5285	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5286	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5287	if (algp == NULL) {
5288		mutex_exit(&ipss->ipsec_alg_lock);
5289		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5290	}
5291	if (minbits < algp->alg_ef_minbits)
5292		minbits = algp->alg_ef_minbits;
5293	if (maxbits > algp->alg_ef_maxbits)
5294		maxbits = algp->alg_ef_maxbits;
5295	mutex_exit(&ipss->ipsec_alg_lock);
5296
5297	algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
5298	algdesc->sadb_x_algdesc_satype = satype;
5299	algdesc->sadb_x_algdesc_algtype = algtype;
5300	algdesc->sadb_x_algdesc_alg = alg;
5301	algdesc->sadb_x_algdesc_minbits = minbits;
5302	algdesc->sadb_x_algdesc_maxbits = maxbits;
5303
5304	return (cur);
5305}
5306
5307/*
5308 * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5309 * which must fit before *limit
5310 *
5311 * return NULL if we ran out of room or a pointer to the end of the ecomb.
5312 */
5313static uint8_t *
5314sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5315    netstack_t *ns)
5316{
5317	uint8_t *cur = start;
5318	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5319	ipsec_prot_t *ipp;
5320	ipsec_stack_t *ipss = ns->netstack_ipsec;
5321
5322	cur += sizeof (*ecomb);
5323	if (cur >= limit)
5324		return (NULL);
5325
5326	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5327
5328	ipp = &act->ipa_act.ipa_apply;
5329
5330	ecomb->sadb_x_ecomb_numalgs = 0;
5331	ecomb->sadb_x_ecomb_reserved = 0;
5332	ecomb->sadb_x_ecomb_reserved2 = 0;
5333	/*
5334	 * No limits on allocations, since we really don't support that
5335	 * concept currently.
5336	 */
5337	ecomb->sadb_x_ecomb_soft_allocations = 0;
5338	ecomb->sadb_x_ecomb_hard_allocations = 0;
5339
5340	/*
5341	 * XXX TBD: Policy or global parameters will eventually be
5342	 * able to fill in some of these.
5343	 */
5344	ecomb->sadb_x_ecomb_flags = 0;
5345	ecomb->sadb_x_ecomb_soft_bytes = 0;
5346	ecomb->sadb_x_ecomb_hard_bytes = 0;
5347	ecomb->sadb_x_ecomb_soft_addtime = 0;
5348	ecomb->sadb_x_ecomb_hard_addtime = 0;
5349	ecomb->sadb_x_ecomb_soft_usetime = 0;
5350	ecomb->sadb_x_ecomb_hard_usetime = 0;
5351
5352	if (ipp->ipp_use_ah) {
5353		cur = sadb_new_algdesc(cur, limit, ecomb,
5354		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5355		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5356		if (cur == NULL)
5357			return (NULL);
5358		ipsecah_fill_defs(ecomb, ns);
5359	}
5360
5361	if (ipp->ipp_use_esp) {
5362		if (ipp->ipp_use_espa) {
5363			cur = sadb_new_algdesc(cur, limit, ecomb,
5364			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5365			    ipp->ipp_esp_auth_alg,
5366			    ipp->ipp_espa_minbits,
5367			    ipp->ipp_espa_maxbits, ipss);
5368			if (cur == NULL)
5369				return (NULL);
5370		}
5371
5372		cur = sadb_new_algdesc(cur, limit, ecomb,
5373		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5374		    ipp->ipp_encr_alg,
5375		    ipp->ipp_espe_minbits,
5376		    ipp->ipp_espe_maxbits, ipss);
5377		if (cur == NULL)
5378			return (NULL);
5379		/* Fill in lifetimes if and only if AH didn't already... */
5380		if (!ipp->ipp_use_ah)
5381			ipsecesp_fill_defs(ecomb, ns);
5382	}
5383
5384	return (cur);
5385}
5386
5387#include <sys/tsol/label_macro.h> /* XXX should not need this */
5388
5389/*
5390 * From a cred_t, construct a sensitivity label extension
5391 *
5392 * We send up a fixed-size sensitivity label bitmap, and are perhaps
5393 * overly chummy with the underlying data structures here.
5394 */
5395
5396/* ARGSUSED */
5397int
5398sadb_sens_len_from_label(ts_label_t *tsl)
5399{
5400	int baselen = sizeof (sadb_sens_t) + _C_LEN * 4;
5401	return (roundup(baselen, sizeof (uint64_t)));
5402}
5403
5404void
5405sadb_sens_from_label(sadb_sens_t *sens, int exttype, ts_label_t *tsl,
5406    int senslen)
5407{
5408	uint8_t *bitmap;
5409	bslabel_t *sl;
5410
5411	/* LINTED */
5412	ASSERT((_C_LEN & 1) == 0);
5413	ASSERT((senslen & 7) == 0);
5414
5415	sl = label2bslabel(tsl);
5416
5417	sens->sadb_sens_exttype = exttype;
5418	sens->sadb_sens_len = SADB_8TO64(senslen);
5419
5420	sens->sadb_sens_dpd = tsl->tsl_doi;
5421	sens->sadb_sens_sens_level = LCLASS(sl);
5422	sens->sadb_sens_integ_level = 0; /* TBD */
5423	sens->sadb_sens_sens_len = _C_LEN >> 1;
5424	sens->sadb_sens_integ_len = 0; /* TBD */
5425	sens->sadb_x_sens_flags = 0;
5426
5427	bitmap = (uint8_t *)(sens + 1);
5428	bcopy(&(((_bslabel_impl_t *)sl)->compartments), bitmap, _C_LEN * 4);
5429}
5430
5431static sadb_sens_t *
5432sadb_make_sens_ext(ts_label_t *tsl, int *len)
5433{
5434	/* XXX allocation failure? */
5435	int sens_len = sadb_sens_len_from_label(tsl);
5436
5437	sadb_sens_t *sens = kmem_alloc(sens_len, KM_SLEEP);
5438
5439	sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, tsl, sens_len);
5440
5441	*len = sens_len;
5442
5443	return (sens);
5444}
5445
5446/*
5447 * Okay, how do we report errors/invalid labels from this?
5448 * With a special designated "not a label" cred_t ?
5449 */
5450/* ARGSUSED */
5451ts_label_t *
5452sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
5453{
5454	int bitmap_len = SADB_64TO8(sens->sadb_sens_sens_len);
5455	bslabel_t sl;
5456	ts_label_t *tsl;
5457
5458	if (sens->sadb_sens_integ_level != 0)
5459		return (NULL);
5460	if (sens->sadb_sens_integ_len != 0)
5461		return (NULL);
5462	if (bitmap_len > _C_LEN * 4)
5463		return (NULL);
5464
5465	bsllow(&sl);
5466	LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level);
5467	bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
5468	    bitmap_len);
5469
5470	tsl = labelalloc(&sl, sens->sadb_sens_dpd, KM_NOSLEEP);
5471	if (tsl == NULL)
5472		return (NULL);
5473
5474	if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
5475		tsl->tsl_flags |= TSLF_UNLABELED;
5476	return (tsl);
5477}
5478
5479/* End XXX label-library-leakage */
5480
5481/*
5482 * Construct an extended ACQUIRE message based on a selector and the resulting
5483 * IPsec action.
5484 *
5485 * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5486 * generation. As a consequence, expect this function to evolve
5487 * rapidly.
5488 */
5489static mblk_t *
5490sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5491    ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5492    sadb_sens_t *sens, netstack_t *ns)
5493{
5494	mblk_t *mp;
5495	sadb_msg_t *samsg;
5496	uint8_t *start, *cur, *end;
5497	uint32_t *saddrptr, *daddrptr;
5498	sa_family_t af;
5499	sadb_prop_t *eprop;
5500	ipsec_action_t *ap, *an;
5501	ipsec_selkey_t *ipsl;
5502	uint8_t proto, pfxlen;
5503	uint16_t lport, rport;
5504	uint32_t kmp, kmc;
5505
5506	/*
5507	 * Find the action we want sooner rather than later..
5508	 */
5509	an = NULL;
5510	if (pol == NULL) {
5511		ap = act;
5512	} else {
5513		ap = pol->ipsp_act;
5514
5515		if (ap != NULL)
5516			an = ap->ipa_next;
5517	}
5518
5519	/*
5520	 * Just take a swag for the allocation for now.	 We can always
5521	 * alter it later.
5522	 */
5523#define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5524	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5525	if (mp == NULL)
5526		return (NULL);
5527
5528	start = mp->b_rptr;
5529	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5530
5531	cur = start;
5532
5533	samsg = (sadb_msg_t *)cur;
5534	cur += sizeof (*samsg);
5535
5536	samsg->sadb_msg_version = PF_KEY_V2;
5537	samsg->sadb_msg_type = SADB_ACQUIRE;
5538	samsg->sadb_msg_errno = 0;
5539	samsg->sadb_msg_reserved = 0;
5540	samsg->sadb_msg_satype = 0;
5541	samsg->sadb_msg_seq = seq;
5542	samsg->sadb_msg_pid = pid;
5543
5544	if (tunnel_mode) {
5545		/*
5546		 * Form inner address extensions based NOT on the inner
5547		 * selectors (i.e. the packet data), but on the policy's
5548		 * selector key (i.e. the policy's selector information).
5549		 *
5550		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5551		 * same in ipsec_selkey_t (unless the compiler does very
5552		 * strange things with unions, consult your local C language
5553		 * lawyer for details).
5554		 */
5555		ASSERT(pol != NULL);
5556
5557		ipsl = &(pol->ipsp_sel->ipsl_key);
5558		if (ipsl->ipsl_valid & IPSL_IPV4) {
5559			af = AF_INET;
5560			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5561			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5562		} else {
5563			af = AF_INET6;
5564			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5565			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5566		}
5567
5568		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5569			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5570			pfxlen = ipsl->ipsl_local_pfxlen;
5571		} else {
5572			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5573			pfxlen = 0;
5574		}
5575		/* XXX What about ICMP type/code? */
5576		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5577		    ipsl->ipsl_lport : 0;
5578		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5579		    ipsl->ipsl_proto : 0;
5580
5581		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5582		    af, saddrptr, lport, proto, pfxlen);
5583		if (cur == NULL) {
5584			freeb(mp);
5585			return (NULL);
5586		}
5587
5588		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5589			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5590			pfxlen = ipsl->ipsl_remote_pfxlen;
5591		} else {
5592			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5593			pfxlen = 0;
5594		}
5595		/* XXX What about ICMP type/code? */
5596		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5597		    ipsl->ipsl_rport : 0;
5598
5599		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5600		    af, daddrptr, rport, proto, pfxlen);
5601		if (cur == NULL) {
5602			freeb(mp);
5603			return (NULL);
5604		}
5605		/*
5606		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5607		 * _with_ inner-packet address selectors, we'll need to further
5608		 * distinguish tunnel mode here.  For now, having inner
5609		 * addresses and/or ports is sufficient.
5610		 *
5611		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5612		 * outer addresses.
5613		 */
5614		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5615		lport = rport = 0;
5616	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5617		proto = 0;
5618		lport = 0;
5619		rport = 0;
5620		if (pol != NULL) {
5621			ipsl = &(pol->ipsp_sel->ipsl_key);
5622			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5623				proto = ipsl->ipsl_proto;
5624			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5625				rport = ipsl->ipsl_rport;
5626			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5627				lport = ipsl->ipsl_lport;
5628		}
5629	} else {
5630		proto = sel->ips_protocol;
5631		lport = sel->ips_local_port;
5632		rport = sel->ips_remote_port;
5633	}
5634
5635	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5636
5637	/*
5638	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5639	 * ipsec_selector_t.
5640	 */
5641	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5642	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5643
5644	if (cur == NULL) {
5645		freeb(mp);
5646		return (NULL);
5647	}
5648
5649	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5650	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5651
5652	if (cur == NULL) {
5653		freeb(mp);
5654		return (NULL);
5655	}
5656
5657	if (sens != NULL) {
5658		uint8_t *sensext = cur;
5659		int senslen = SADB_64TO8(sens->sadb_sens_len);
5660
5661		cur += senslen;
5662		if (cur > end) {
5663			freeb(mp);
5664			return (NULL);
5665		}
5666		bcopy(sens, sensext, senslen);
5667	}
5668
5669	/*
5670	 * This section will change a lot as policy evolves.
5671	 * For now, it'll be relatively simple.
5672	 */
5673	eprop = (sadb_prop_t *)cur;
5674	cur += sizeof (*eprop);
5675	if (cur > end) {
5676		/* no space left */
5677		freeb(mp);
5678		return (NULL);
5679	}
5680
5681	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5682	eprop->sadb_x_prop_ereserved = 0;
5683	eprop->sadb_x_prop_numecombs = 0;
5684	eprop->sadb_prop_replay = 32;	/* default */
5685
5686	kmc = kmp = 0;
5687
5688	for (; ap != NULL; ap = an) {
5689		an = (pol != NULL) ? ap->ipa_next : NULL;
5690
5691		/*
5692		 * Skip non-IPsec policies
5693		 */
5694		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5695			continue;
5696
5697		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5698			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5699		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5700			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5701		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5702			eprop->sadb_prop_replay =
5703			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5704		}
5705
5706		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5707		if (cur == NULL) { /* no space */
5708			freeb(mp);
5709			return (NULL);
5710		}
5711		eprop->sadb_x_prop_numecombs++;
5712	}
5713
5714	if (eprop->sadb_x_prop_numecombs == 0) {
5715		/*
5716		 * This will happen if we fail to find a policy
5717		 * allowing for IPsec processing.
5718		 * Construct an error message.
5719		 */
5720		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5721		samsg->sadb_msg_errno = ENOENT;
5722		samsg->sadb_x_msg_diagnostic = 0;
5723		return (mp);
5724	}
5725
5726	if ((kmp != 0) || (kmc != 0)) {
5727		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5728		if (cur == NULL) {
5729			freeb(mp);
5730			return (NULL);
5731		}
5732	}
5733
5734	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5735	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5736	mp->b_wptr = cur;
5737
5738	return (mp);
5739}
5740
5741/*
5742 * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5743 *
5744 * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5745 * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5746 * maximize code consolidation while preventing algorithm changes from messing
5747 * with the callers finishing touches on the ACQUIRE itself.
5748 */
5749mblk_t *
5750sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5751{
5752	uint_t allocsize;
5753	mblk_t *pfkeymp, *msgmp;
5754	sa_family_t af;
5755	uint8_t *cur, *end;
5756	sadb_msg_t *samsg;
5757	uint16_t sport_typecode;
5758	uint16_t dport_typecode;
5759	uint8_t check_proto;
5760	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5761
5762	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5763
5764	pfkeymp = sadb_keysock_out(0);
5765	if (pfkeymp == NULL)
5766		return (NULL);
5767
5768	/*
5769	 * First, allocate a basic ACQUIRE message
5770	 */
5771	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5772	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5773
5774	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5775	allocsize += 2 * sizeof (struct sockaddr_in6);
5776
5777	mutex_enter(&ipss->ipsec_alg_lock);
5778	/* NOTE:  The lock is now held through to this function's return. */
5779	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5780	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5781
5782	if (tunnel_mode) {
5783		/* Tunnel mode! */
5784		allocsize += 2 * sizeof (sadb_address_t);
5785		/* Enough to cover both AF_INET and AF_INET6. */
5786		allocsize += 2 * sizeof (struct sockaddr_in6);
5787	}
5788
5789	msgmp = allocb(allocsize, BPRI_HI);
5790	if (msgmp == NULL) {
5791		freeb(pfkeymp);
5792		mutex_exit(&ipss->ipsec_alg_lock);
5793		return (NULL);
5794	}
5795
5796	pfkeymp->b_cont = msgmp;
5797	cur = msgmp->b_rptr;
5798	end = cur + allocsize;
5799	samsg = (sadb_msg_t *)cur;
5800	cur += sizeof (sadb_msg_t);
5801
5802	af = acqrec->ipsacq_addrfam;
5803	switch (af) {
5804	case AF_INET:
5805		check_proto = IPPROTO_ICMP;
5806		break;
5807	case AF_INET6:
5808		check_proto = IPPROTO_ICMPV6;
5809		break;
5810	default:
5811		/* This should never happen unless we have kernel bugs. */
5812		cmn_err(CE_WARN,
5813		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5814		ASSERT(0);
5815		mutex_exit(&ipss->ipsec_alg_lock);
5816		return (NULL);
5817	}
5818
5819	samsg->sadb_msg_version = PF_KEY_V2;
5820	samsg->sadb_msg_type = SADB_ACQUIRE;
5821	samsg->sadb_msg_satype = satype;
5822	samsg->sadb_msg_errno = 0;
5823	samsg->sadb_msg_pid = 0;
5824	samsg->sadb_msg_reserved = 0;
5825	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5826
5827	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5828
5829	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5830		sport_typecode = dport_typecode = 0;
5831	} else {
5832		sport_typecode = acqrec->ipsacq_srcport;
5833		dport_typecode = acqrec->ipsacq_dstport;
5834	}
5835
5836	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5837	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5838
5839	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5840	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5841
5842	if (tunnel_mode) {
5843		sport_typecode = acqrec->ipsacq_srcport;
5844		dport_typecode = acqrec->ipsacq_dstport;
5845		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5846		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5847		    sport_typecode, acqrec->ipsacq_inner_proto,
5848		    acqrec->ipsacq_innersrcpfx);
5849		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5850		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5851		    dport_typecode, acqrec->ipsacq_inner_proto,
5852		    acqrec->ipsacq_innerdstpfx);
5853	}
5854
5855	/* XXX Insert identity information here. */
5856
5857	/* XXXMLS Insert sensitivity information here. */
5858
5859	if (cur != NULL)
5860		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5861	else
5862		mutex_exit(&ipss->ipsec_alg_lock);
5863
5864	return (pfkeymp);
5865}
5866
5867/*
5868 * Given an SADB_GETSPI message, find an appropriately ranged SA and
5869 * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5870 * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5871 * (ipsa_t *)-1).
5872 *
5873 * master_spi is passed in host order.
5874 */
5875ipsa_t *
5876sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5877    netstack_t *ns, uint_t sa_type)
5878{
5879	sadb_address_t *src =
5880	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5881	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5882	sadb_spirange_t *range =
5883	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5884	struct sockaddr_in *ssa, *dsa;
5885	struct sockaddr_in6 *ssa6, *dsa6;
5886	uint32_t *srcaddr, *dstaddr;
5887	sa_family_t af;
5888	uint32_t add, min, max;
5889	uint8_t protocol =
5890	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5891
5892	if (src == NULL) {
5893		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5894		return ((ipsa_t *)-1);
5895	}
5896	if (dst == NULL) {
5897		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5898		return ((ipsa_t *)-1);
5899	}
5900	if (range == NULL) {
5901		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5902		return ((ipsa_t *)-1);
5903	}
5904
5905	min = ntohl(range->sadb_spirange_min);
5906	max = ntohl(range->sadb_spirange_max);
5907	dsa = (struct sockaddr_in *)(dst + 1);
5908	dsa6 = (struct sockaddr_in6 *)dsa;
5909
5910	ssa = (struct sockaddr_in *)(src + 1);
5911	ssa6 = (struct sockaddr_in6 *)ssa;
5912	ASSERT(dsa->sin_family == ssa->sin_family);
5913
5914	srcaddr = ALL_ZEROES_PTR;
5915	af = dsa->sin_family;
5916	switch (af) {
5917	case AF_INET:
5918		if (src != NULL)
5919			srcaddr = (uint32_t *)(&ssa->sin_addr);
5920		dstaddr = (uint32_t *)(&dsa->sin_addr);
5921		break;
5922	case AF_INET6:
5923		if (src != NULL)
5924			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5925		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5926		break;
5927	default:
5928		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5929		return ((ipsa_t *)-1);
5930	}
5931
5932	if (master_spi < min || master_spi > max) {
5933		/* Return a random value in the range. */
5934		if (cl_inet_getspi) {
5935			cl_inet_getspi(ns->netstack_stackid, protocol,
5936			    (uint8_t *)&add, sizeof (add), NULL);
5937		} else {
5938			(void) random_get_pseudo_bytes((uint8_t *)&add,
5939			    sizeof (add));
5940		}
5941		master_spi = min + (add % (max - min + 1));
5942	}
5943
5944	/*
5945	 * Since master_spi is passed in host order, we need to htonl() it
5946	 * for the purposes of creating a new SA.
5947	 */
5948	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5949	    ns));
5950}
5951
5952/*
5953 *
5954 * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5955 * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5956 * and scan for the sequence number in question.  I may wish to accept an
5957 * address pair with it, for easier searching.
5958 *
5959 * Caller frees the message, so we don't have to here.
5960 *
5961 * NOTE:	The pfkey_q parameter may be used in the future for ACQUIRE
5962 *		failures.
5963 */
5964/* ARGSUSED */
5965void
5966sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *pfkey_q,
5967    netstack_t *ns)
5968{
5969	int i;
5970	ipsacq_t *acqrec;
5971	iacqf_t *bucket;
5972
5973	/*
5974	 * I only accept the base header for this!
5975	 * Though to be honest, requiring the dst address would help
5976	 * immensely.
5977	 *
5978	 * XXX	There are already cases where I can get the dst address.
5979	 */
5980	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5981		return;
5982
5983	/*
5984	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5985	 * (and in the future send a message to IP with the appropriate error
5986	 * number).
5987	 *
5988	 * Q: Do I want to reject if pid != 0?
5989	 */
5990
5991	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5992		bucket = &sp->s_v4.sdb_acq[i];
5993		mutex_enter(&bucket->iacqf_lock);
5994		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5995		    acqrec = acqrec->ipsacq_next) {
5996			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5997				break;	/* for acqrec... loop. */
5998		}
5999		if (acqrec != NULL)
6000			break;	/* for i = 0... loop. */
6001
6002		mutex_exit(&bucket->iacqf_lock);
6003	}
6004
6005	if (acqrec == NULL) {
6006		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6007			bucket = &sp->s_v6.sdb_acq[i];
6008			mutex_enter(&bucket->iacqf_lock);
6009			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6010			    acqrec = acqrec->ipsacq_next) {
6011				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6012					break;	/* for acqrec... loop. */
6013			}
6014			if (acqrec != NULL)
6015				break;	/* for i = 0... loop. */
6016
6017			mutex_exit(&bucket->iacqf_lock);
6018		}
6019	}
6020
6021
6022	if (acqrec == NULL)
6023		return;
6024
6025	/*
6026	 * What do I do with the errno and IP?	I may need mp's services a
6027	 * little more.	 See sadb_destroy_acquire() for future directions
6028	 * beyond free the mblk chain on the acquire record.
6029	 */
6030
6031	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6032	sadb_destroy_acquire(acqrec, ns);
6033	/* Have to exit mutex here, because of breaking out of for loop. */
6034	mutex_exit(&bucket->iacqf_lock);
6035}
6036
6037/*
6038 * The following functions work with the replay windows of an SA.  They assume
6039 * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6040 * represents the highest sequence number packet received, and back
6041 * (ipsa->ipsa_replay_wsize) packets.
6042 */
6043
6044/*
6045 * Is the replay bit set?
6046 */
6047static boolean_t
6048ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6049{
6050	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6051
6052	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6053}
6054
6055/*
6056 * Shift the bits of the replay window over.
6057 */
6058static void
6059ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6060{
6061	int i;
6062	int jump = ((shift - 1) >> 6) + 1;
6063
6064	if (shift == 0)
6065		return;
6066
6067	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6068		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6069			ipsa->ipsa_replay_arr[i + jump] |=
6070			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6071		}
6072		ipsa->ipsa_replay_arr[i] <<= shift;
6073	}
6074}
6075
6076/*
6077 * Set a bit in the bit vector.
6078 */
6079static void
6080ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6081{
6082	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6083
6084	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6085}
6086
6087#define	SADB_MAX_REPLAY_VALUE 0xffffffff
6088
6089/*
6090 * Assume caller has NOT done ntohl() already on seq.  Check to see
6091 * if replay sequence number "seq" has been seen already.
6092 */
6093boolean_t
6094sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6095{
6096	boolean_t rc;
6097	uint32_t diff;
6098
6099	if (ipsa->ipsa_replay_wsize == 0)
6100		return (B_TRUE);
6101
6102	/*
6103	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6104	 */
6105
6106	/* Convert sequence number into host order before holding the mutex. */
6107	seq = ntohl(seq);
6108
6109	mutex_enter(&ipsa->ipsa_lock);
6110
6111	/* Initialize inbound SA's ipsa_replay field to last one received. */
6112	if (ipsa->ipsa_replay == 0)
6113		ipsa->ipsa_replay = 1;
6114
6115	if (seq > ipsa->ipsa_replay) {
6116		/*
6117		 * I have received a new "highest value received".  Shift
6118		 * the replay window over.
6119		 */
6120		diff = seq - ipsa->ipsa_replay;
6121		if (diff < ipsa->ipsa_replay_wsize) {
6122			/* In replay window, shift bits over. */
6123			ipsa_shift_replay(ipsa, diff);
6124		} else {
6125			/* WAY FAR AHEAD, clear bits and start again. */
6126			bzero(ipsa->ipsa_replay_arr,
6127			    sizeof (ipsa->ipsa_replay_arr));
6128		}
6129		ipsa_set_replay(ipsa, 0);
6130		ipsa->ipsa_replay = seq;
6131		rc = B_TRUE;
6132		goto done;
6133	}
6134	diff = ipsa->ipsa_replay - seq;
6135	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6136		rc = B_FALSE;
6137		goto done;
6138	}
6139	/* Set this packet as seen. */
6140	ipsa_set_replay(ipsa, diff);
6141
6142	rc = B_TRUE;
6143done:
6144	mutex_exit(&ipsa->ipsa_lock);
6145	return (rc);
6146}
6147
6148/*
6149 * "Peek" and see if we should even bother going through the effort of
6150 * running an authentication check on the sequence number passed in.
6151 * this takes into account packets that are below the replay window,
6152 * and collisions with already replayed packets.  Return B_TRUE if it
6153 * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6154 * Assume same byte-ordering as sadb_replay_check.
6155 */
6156boolean_t
6157sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6158{
6159	boolean_t rc = B_FALSE;
6160	uint32_t diff;
6161
6162	if (ipsa->ipsa_replay_wsize == 0)
6163		return (B_TRUE);
6164
6165	/*
6166	 * 0 is 0, regardless of byte order... :)
6167	 *
6168	 * If I get 0 on the wire (and there is a replay window) then the
6169	 * sender most likely wrapped.	This ipsa may need to be marked or
6170	 * something.
6171	 */
6172	if (seq == 0)
6173		return (B_FALSE);
6174
6175	seq = ntohl(seq);
6176	mutex_enter(&ipsa->ipsa_lock);
6177	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6178	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6179		goto done;
6180
6181	/*
6182	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6183	 * bother with formalities.  I'm not accepting any more packets
6184	 * on this SA.
6185	 */
6186	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6187		/*
6188		 * Since we're already holding the lock, update the
6189		 * expire time ala. sadb_replay_delete() and return.
6190		 */
6191		ipsa->ipsa_hardexpiretime = (time_t)1;
6192		goto done;
6193	}
6194
6195	if (seq <= ipsa->ipsa_replay) {
6196		/*
6197		 * This seq is in the replay window.  I'm not below it,
6198		 * because I already checked for that above!
6199		 */
6200		diff = ipsa->ipsa_replay - seq;
6201		if (ipsa_is_replay_set(ipsa, diff))
6202			goto done;
6203	}
6204	/* Else return B_TRUE, I'm going to advance the window. */
6205
6206	rc = B_TRUE;
6207done:
6208	mutex_exit(&ipsa->ipsa_lock);
6209	return (rc);
6210}
6211
6212/*
6213 * Delete a single SA.
6214 *
6215 * For now, use the quick-and-dirty trick of making the association's
6216 * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6217 */
6218void
6219sadb_replay_delete(ipsa_t *assoc)
6220{
6221	mutex_enter(&assoc->ipsa_lock);
6222	assoc->ipsa_hardexpiretime = (time_t)1;
6223	mutex_exit(&assoc->ipsa_lock);
6224}
6225
6226/*
6227 * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6228 * this is designed to take only a format string with "* %x * %s *", so
6229 * that "spi" is printed first, then "addr" is converted using inet_pton().
6230 *
6231 * This is abstracted out to save the stack space for only when inet_pton()
6232 * is called.  Make sure "spi" is in network order; it usually is when this
6233 * would get called.
6234 */
6235void
6236ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6237    uint32_t spi, void *addr, int af, netstack_t *ns)
6238{
6239	char buf[INET6_ADDRSTRLEN];
6240
6241	ASSERT(af == AF_INET6 || af == AF_INET);
6242
6243	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6244	    inet_ntop(af, addr, buf, sizeof (buf)));
6245}
6246
6247/*
6248 * Fills in a reference to the policy, if any, from the conn, in *ppp
6249 */
6250static void
6251ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6252{
6253	ipsec_policy_t	*pp;
6254	ipsec_latch_t	*ipl = connp->conn_latch;
6255
6256	if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
6257		pp = connp->conn_ixa->ixa_ipsec_policy;
6258		IPPOL_REFHOLD(pp);
6259	} else {
6260		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
6261		    connp->conn_netstack);
6262	}
6263	*ppp = pp;
6264}
6265
6266/*
6267 * The following functions scan through active conn_t structures
6268 * and return a reference to the best-matching policy it can find.
6269 * Caller must release the reference.
6270 */
6271static void
6272ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6273{
6274	connf_t *connfp;
6275	conn_t *connp = NULL;
6276	ipsec_selector_t portonly;
6277
6278	bzero((void *)&portonly, sizeof (portonly));
6279
6280	if (sel->ips_local_port == 0)
6281		return;
6282
6283	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6284	    ipst)];
6285	mutex_enter(&connfp->connf_lock);
6286
6287	if (sel->ips_isv4) {
6288		connp = connfp->connf_head;
6289		while (connp != NULL) {
6290			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6291			    sel->ips_local_addr_v4, sel->ips_remote_port,
6292			    sel->ips_remote_addr_v4))
6293				break;
6294			connp = connp->conn_next;
6295		}
6296
6297		if (connp == NULL) {
6298			/* Try port-only match in IPv6. */
6299			portonly.ips_local_port = sel->ips_local_port;
6300			sel = &portonly;
6301		}
6302	}
6303
6304	if (connp == NULL) {
6305		connp = connfp->connf_head;
6306		while (connp != NULL) {
6307			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6308			    sel->ips_local_addr_v6, sel->ips_remote_port,
6309			    sel->ips_remote_addr_v6))
6310				break;
6311			connp = connp->conn_next;
6312		}
6313
6314		if (connp == NULL) {
6315			mutex_exit(&connfp->connf_lock);
6316			return;
6317		}
6318	}
6319
6320	CONN_INC_REF(connp);
6321	mutex_exit(&connfp->connf_lock);
6322
6323	ipsec_conn_pol(sel, connp, ppp);
6324	CONN_DEC_REF(connp);
6325}
6326
6327static conn_t *
6328ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6329{
6330	connf_t *connfp;
6331	conn_t *connp = NULL;
6332	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6333
6334	if (sel->ips_local_port == 0)
6335		return (NULL);
6336
6337	connfp = &ipst->ips_ipcl_bind_fanout[
6338	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6339	mutex_enter(&connfp->connf_lock);
6340
6341	if (sel->ips_isv4) {
6342		connp = connfp->connf_head;
6343		while (connp != NULL) {
6344			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6345			    sel->ips_local_addr_v4, pptr[1]))
6346				break;
6347			connp = connp->conn_next;
6348		}
6349
6350		if (connp == NULL) {
6351			/* Match to all-zeroes. */
6352			v6addrmatch = &ipv6_all_zeros;
6353		}
6354	}
6355
6356	if (connp == NULL) {
6357		connp = connfp->connf_head;
6358		while (connp != NULL) {
6359			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6360			    *v6addrmatch, pptr[1]))
6361				break;
6362			connp = connp->conn_next;
6363		}
6364
6365		if (connp == NULL) {
6366			mutex_exit(&connfp->connf_lock);
6367			return (NULL);
6368		}
6369	}
6370
6371	CONN_INC_REF(connp);
6372	mutex_exit(&connfp->connf_lock);
6373	return (connp);
6374}
6375
6376static void
6377ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6378{
6379	connf_t 	*connfp;
6380	conn_t		*connp;
6381	uint32_t	ports;
6382	uint16_t	*pptr = (uint16_t *)&ports;
6383
6384	/*
6385	 * Find TCP state in the following order:
6386	 * 1.) Connected conns.
6387	 * 2.) Listeners.
6388	 *
6389	 * Even though #2 will be the common case for inbound traffic, only
6390	 * following this order insures correctness.
6391	 */
6392
6393	if (sel->ips_local_port == 0)
6394		return;
6395
6396	/*
6397	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6398	 * See ipsec_construct_inverse_acquire() for details.
6399	 */
6400	pptr[0] = sel->ips_remote_port;
6401	pptr[1] = sel->ips_local_port;
6402
6403	connfp = &ipst->ips_ipcl_conn_fanout[
6404	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6405	mutex_enter(&connfp->connf_lock);
6406	connp = connfp->connf_head;
6407
6408	if (sel->ips_isv4) {
6409		while (connp != NULL) {
6410			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6411			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6412			    ports))
6413				break;
6414			connp = connp->conn_next;
6415		}
6416	} else {
6417		while (connp != NULL) {
6418			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6419			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6420			    ports))
6421				break;
6422			connp = connp->conn_next;
6423		}
6424	}
6425
6426	if (connp != NULL) {
6427		CONN_INC_REF(connp);
6428		mutex_exit(&connfp->connf_lock);
6429	} else {
6430		mutex_exit(&connfp->connf_lock);
6431
6432		/* Try the listen hash. */
6433		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6434			return;
6435	}
6436
6437	ipsec_conn_pol(sel, connp, ppp);
6438	CONN_DEC_REF(connp);
6439}
6440
6441static void
6442ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6443    ip_stack_t *ipst)
6444{
6445	conn_t		*connp;
6446	uint32_t	ports;
6447	uint16_t	*pptr = (uint16_t *)&ports;
6448
6449	/*
6450	 * Find SCP state in the following order:
6451	 * 1.) Connected conns.
6452	 * 2.) Listeners.
6453	 *
6454	 * Even though #2 will be the common case for inbound traffic, only
6455	 * following this order insures correctness.
6456	 */
6457
6458	if (sel->ips_local_port == 0)
6459		return;
6460
6461	/*
6462	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6463	 * See ipsec_construct_inverse_acquire() for details.
6464	 */
6465	pptr[0] = sel->ips_remote_port;
6466	pptr[1] = sel->ips_local_port;
6467
6468	/*
6469	 * For labeled systems, there's no need to check the
6470	 * label here.  It's known to be good as we checked
6471	 * before allowing the connection to become bound.
6472	 */
6473	if (sel->ips_isv4) {
6474		in6_addr_t	src, dst;
6475
6476		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6477		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6478		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6479		    0, ipst->ips_netstack->netstack_sctp);
6480	} else {
6481		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6482		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6483		    0, ipst->ips_netstack->netstack_sctp);
6484	}
6485	if (connp == NULL)
6486		return;
6487	ipsec_conn_pol(sel, connp, ppp);
6488	CONN_DEC_REF(connp);
6489}
6490
6491/*
6492 * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6493 * Returns 0 or errno, and always sets *diagnostic to something appropriate
6494 * to PF_KEY.
6495 *
6496 * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6497 * ignore prefix lengths in the address extension.  Since we match on first-
6498 * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6499 * set addresses to mask out the lower bits, we should get a suitable search
6500 * key for the SPD anyway.  This is the function to change if the assumption
6501 * about suitable search keys is wrong.
6502 */
6503static int
6504ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6505    sadb_address_t *dstext, int *diagnostic)
6506{
6507	struct sockaddr_in *src, *dst;
6508	struct sockaddr_in6 *src6, *dst6;
6509
6510	*diagnostic = 0;
6511
6512	bzero(sel, sizeof (*sel));
6513	sel->ips_protocol = srcext->sadb_address_proto;
6514	dst = (struct sockaddr_in *)(dstext + 1);
6515	if (dst->sin_family == AF_INET6) {
6516		dst6 = (struct sockaddr_in6 *)dst;
6517		src6 = (struct sockaddr_in6 *)(srcext + 1);
6518		if (src6->sin6_family != AF_INET6) {
6519			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6520			return (EINVAL);
6521		}
6522		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6523		sel->ips_local_addr_v6 = src6->sin6_addr;
6524		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6525			sel->ips_is_icmp_inv_acq = 1;
6526		} else {
6527			sel->ips_remote_port = dst6->sin6_port;
6528			sel->ips_local_port = src6->sin6_port;
6529		}
6530		sel->ips_isv4 = B_FALSE;
6531	} else {
6532		src = (struct sockaddr_in *)(srcext + 1);
6533		if (src->sin_family != AF_INET) {
6534			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6535			return (EINVAL);
6536		}
6537		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6538		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6539		if (sel->ips_protocol == IPPROTO_ICMP) {
6540			sel->ips_is_icmp_inv_acq = 1;
6541		} else {
6542			sel->ips_remote_port = dst->sin_port;
6543			sel->ips_local_port = src->sin_port;
6544		}
6545		sel->ips_isv4 = B_TRUE;
6546	}
6547	return (0);
6548}
6549
6550/*
6551 * We have encapsulation.
6552 * - Lookup tun_t by address and look for an associated
6553 *   tunnel policy
6554 * - If there are inner selectors
6555 *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6556 *   - Look up tunnel policy based on selectors
6557 * - Else
6558 *   - Sanity check the negotation
6559 *   - If appropriate, fall through to global policy
6560 */
6561static int
6562ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6563    sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6564    int *diagnostic)
6565{
6566	int err;
6567	ipsec_policy_head_t *polhead;
6568
6569	*diagnostic = 0;
6570
6571	/* Check for inner selectors and act appropriately */
6572
6573	if (innsrcext != NULL) {
6574		/* Inner selectors present */
6575		ASSERT(inndstext != NULL);
6576		if ((itp == NULL) ||
6577		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6578		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6579			/*
6580			 * If inner packet selectors, we must have negotiate
6581			 * tunnel and active policy.  If the tunnel has
6582			 * transport-mode policy set on it, or has no policy,
6583			 * fail.
6584			 */
6585			return (ENOENT);
6586		} else {
6587			/*
6588			 * Reset "sel" to indicate inner selectors.  Pass
6589			 * inner PF_KEY address extensions for this to happen.
6590			 */
6591			if ((err = ipsec_get_inverse_acquire_sel(sel,
6592			    innsrcext, inndstext, diagnostic)) != 0)
6593				return (err);
6594			/*
6595			 * Now look for a tunnel policy based on those inner
6596			 * selectors.  (Common code is below.)
6597			 */
6598		}
6599	} else {
6600		/* No inner selectors present */
6601		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6602			/*
6603			 * Transport mode negotiation with no tunnel policy
6604			 * configured - return to indicate a global policy
6605			 * check is needed.
6606			 */
6607			return (0);
6608		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6609			/* Tunnel mode set with no inner selectors. */
6610			return (ENOENT);
6611		}
6612		/*
6613		 * Else, this is a tunnel policy configured with ifconfig(1m)
6614		 * or "negotiate transport" with ipsecconf(1m).  We have an
6615		 * itp with policy set based on any match, so don't bother
6616		 * changing fields in "sel".
6617		 */
6618	}
6619
6620	ASSERT(itp != NULL);
6621	polhead = itp->itp_policy;
6622	ASSERT(polhead != NULL);
6623	rw_enter(&polhead->iph_lock, RW_READER);
6624	*ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
6625	rw_exit(&polhead->iph_lock);
6626
6627	/*
6628	 * Don't default to global if we didn't find a matching policy entry.
6629	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6630	 */
6631	if (*ppp == NULL)
6632		return (ENOENT);
6633
6634	return (0);
6635}
6636
6637/*
6638 * For sctp conn_faddr is the primary address, hence this is of limited
6639 * use for sctp.
6640 */
6641static void
6642ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6643    ip_stack_t *ipst)
6644{
6645	boolean_t	isv4 = sel->ips_isv4;
6646	connf_t		*connfp;
6647	conn_t		*connp;
6648
6649	if (isv4) {
6650		connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6651	} else {
6652		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6653	}
6654
6655	mutex_enter(&connfp->connf_lock);
6656	for (connp = connfp->connf_head; connp != NULL;
6657	    connp = connp->conn_next) {
6658		if (isv4) {
6659			if ((connp->conn_laddr_v4 == INADDR_ANY ||
6660			    connp->conn_laddr_v4 == sel->ips_local_addr_v4) &&
6661			    (connp->conn_faddr_v4 == INADDR_ANY ||
6662			    connp->conn_faddr_v4 == sel->ips_remote_addr_v4))
6663				break;
6664		} else {
6665			if ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
6666			    IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6667			    &sel->ips_local_addr_v6)) &&
6668			    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
6669			    IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
6670			    &sel->ips_remote_addr_v6)))
6671				break;
6672		}
6673	}
6674	if (connp == NULL) {
6675		mutex_exit(&connfp->connf_lock);
6676		return;
6677	}
6678
6679	CONN_INC_REF(connp);
6680	mutex_exit(&connfp->connf_lock);
6681
6682	ipsec_conn_pol(sel, connp, ppp);
6683	CONN_DEC_REF(connp);
6684}
6685
6686/*
6687 * Construct an inverse ACQUIRE reply based on:
6688 *
6689 * 1.) Current global policy.
6690 * 2.) An conn_t match depending on what all was passed in the extv[].
6691 * 3.) A tunnel's policy head.
6692 * ...
6693 * N.) Other stuff TBD (e.g. identities)
6694 *
6695 * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6696 * in this function so the caller can extract them where appropriately.
6697 *
6698 * The SRC address is the local one - just like an outbound ACQUIRE message.
6699 *
6700 * XXX MLS: key management supplies a label which we just reflect back up
6701 * again.  clearly we need to involve the label in the rest of the checks.
6702 */
6703mblk_t *
6704ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6705    netstack_t *ns)
6706{
6707	int err;
6708	int diagnostic;
6709	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6710	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6711	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6712	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6713	sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6714	struct sockaddr_in6 *src, *dst;
6715	struct sockaddr_in6 *isrc, *idst;
6716	ipsec_tun_pol_t *itp = NULL;
6717	ipsec_policy_t *pp = NULL;
6718	ipsec_selector_t sel, isel;
6719	mblk_t *retmp = NULL;
6720	ip_stack_t	*ipst = ns->netstack_ip;
6721
6722
6723	/* Normalize addresses */
6724	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6725	    == KS_IN_ADDR_UNKNOWN) {
6726		err = EINVAL;
6727		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6728		goto bail;
6729	}
6730	src = (struct sockaddr_in6 *)(srcext + 1);
6731	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6732	    == KS_IN_ADDR_UNKNOWN) {
6733		err = EINVAL;
6734		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6735		goto bail;
6736	}
6737	dst = (struct sockaddr_in6 *)(dstext + 1);
6738	if (src->sin6_family != dst->sin6_family) {
6739		err = EINVAL;
6740		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6741		goto bail;
6742	}
6743
6744	/* Check for tunnel mode and act appropriately */
6745	if (innsrcext != NULL) {
6746		if (inndstext == NULL) {
6747			err = EINVAL;
6748			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6749			goto bail;
6750		}
6751		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6752		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6753			err = EINVAL;
6754			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6755			goto bail;
6756		}
6757		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6758		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6759		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6760			err = EINVAL;
6761			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6762			goto bail;
6763		}
6764		idst = (struct sockaddr_in6 *)(inndstext + 1);
6765		if (isrc->sin6_family != idst->sin6_family) {
6766			err = EINVAL;
6767			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6768			goto bail;
6769		}
6770		if (isrc->sin6_family != AF_INET &&
6771		    isrc->sin6_family != AF_INET6) {
6772			err = EINVAL;
6773			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6774			goto bail;
6775		}
6776	} else if (inndstext != NULL) {
6777		err = EINVAL;
6778		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6779		goto bail;
6780	}
6781
6782	/* Get selectors first, based on outer addresses */
6783	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6784	if (err != 0)
6785		goto bail;
6786
6787	/* Check for tunnel mode mismatches. */
6788	if (innsrcext != NULL &&
6789	    ((isrc->sin6_family == AF_INET &&
6790	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6791	    (isrc->sin6_family == AF_INET6 &&
6792	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6793		err = EPROTOTYPE;
6794		goto bail;
6795	}
6796
6797	/*
6798	 * Okay, we have the addresses and other selector information.
6799	 * Let's first find a conn...
6800	 */
6801	pp = NULL;
6802	switch (sel.ips_protocol) {
6803	case IPPROTO_TCP:
6804		ipsec_tcp_pol(&sel, &pp, ipst);
6805		break;
6806	case IPPROTO_UDP:
6807		ipsec_udp_pol(&sel, &pp, ipst);
6808		break;
6809	case IPPROTO_SCTP:
6810		ipsec_sctp_pol(&sel, &pp, ipst);
6811		break;
6812	case IPPROTO_ENCAP:
6813	case IPPROTO_IPV6:
6814		/*
6815		 * Assume sel.ips_remote_addr_* has the right address at
6816		 * that exact position.
6817		 */
6818		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6819		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6820		    ipst);
6821
6822		if (innsrcext == NULL) {
6823			/*
6824			 * Transport-mode tunnel, make sure we fake out isel
6825			 * to contain something based on the outer protocol.
6826			 */
6827			bzero(&isel, sizeof (isel));
6828			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6829		} /* Else isel is initialized by ipsec_tun_pol(). */
6830		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6831		    &diagnostic);
6832		/*
6833		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6834		 * may be.
6835		 */
6836		if (err != 0)
6837			goto bail;
6838		break;
6839	default:
6840		ipsec_oth_pol(&sel, &pp, ipst);
6841		break;
6842	}
6843
6844	/*
6845	 * If we didn't find a matching conn_t or other policy head, take a
6846	 * look in the global policy.
6847	 */
6848	if (pp == NULL) {
6849		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
6850		if (pp == NULL) {
6851			/* There's no global policy. */
6852			err = ENOENT;
6853			diagnostic = 0;
6854			goto bail;
6855		}
6856	}
6857
6858	/*
6859	 * Now that we have a policy entry/widget, construct an ACQUIRE
6860	 * message based on that, fix fields where appropriate,
6861	 * and return the message.
6862	 */
6863	retmp = sadb_extended_acquire(&sel, pp, NULL,
6864	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6865	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, sens, ns);
6866	if (pp != NULL) {
6867		IPPOL_REFRELE(pp);
6868	}
6869	ASSERT(err == 0 && diagnostic == 0);
6870	if (retmp == NULL)
6871		err = ENOMEM;
6872bail:
6873	if (itp != NULL) {
6874		ITP_REFRELE(itp, ns);
6875	}
6876	samsg->sadb_msg_errno = (uint8_t)err;
6877	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6878	return (retmp);
6879}
6880
6881/*
6882 * ipsa_lpkt is a one-element queue, only manipulated by the next two
6883 * functions.  They have to hold the ipsa_lock because of potential races
6884 * between key management using SADB_UPDATE, and inbound packets that may
6885 * queue up on the larval SA (hence the 'l' in "lpkt").
6886 */
6887
6888/*
6889 * sadb_set_lpkt:
6890 *
6891 * Returns the passed-in packet if the SA is no longer larval.
6892 *
6893 * Returns NULL if the SA is larval, and needs to be swapped into the SA for
6894 * processing after an SADB_UPDATE.
6895 */
6896mblk_t *
6897sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, ip_recv_attr_t *ira)
6898{
6899	mblk_t		*opkt;
6900
6901	mutex_enter(&ipsa->ipsa_lock);
6902	opkt = ipsa->ipsa_lpkt;
6903	if (ipsa->ipsa_state == IPSA_STATE_LARVAL) {
6904		/*
6905		 * Consume npkt and place it in the LARVAL SA's inbound
6906		 * packet slot.
6907		 */
6908		mblk_t	*attrmp;
6909
6910		attrmp = ip_recv_attr_to_mblk(ira);
6911		if (attrmp == NULL) {
6912			ill_t *ill = ira->ira_ill;
6913
6914			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
6915			ip_drop_input("ipIfStatsInDiscards", npkt, ill);
6916			freemsg(npkt);
6917			opkt = NULL;
6918		} else {
6919			ASSERT(attrmp->b_cont == NULL);
6920			attrmp->b_cont = npkt;
6921			ipsa->ipsa_lpkt = attrmp;
6922		}
6923		npkt = NULL;
6924	} else {
6925		/*
6926		 * If not larval, we lost the race.  NOTE: ipsa_lpkt may still
6927		 * have been non-NULL in the non-larval case, because of
6928		 * inbound packets arriving prior to sadb_common_add()
6929		 * transferring the SA completely out of larval state, but
6930		 * after lpkt was grabbed by the AH/ESP-specific add routines.
6931		 * We should clear the old ipsa_lpkt in this case to make sure
6932		 * that it doesn't linger on the now-MATURE IPsec SA, or get
6933		 * picked up as an out-of-order packet.
6934		 */
6935		ipsa->ipsa_lpkt = NULL;
6936	}
6937	mutex_exit(&ipsa->ipsa_lock);
6938
6939	if (opkt != NULL) {
6940		ipsec_stack_t	*ipss;
6941
6942		ipss = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsec;
6943		opkt = ip_recv_attr_free_mblk(opkt);
6944		ip_drop_packet(opkt, B_TRUE, ira->ira_ill,
6945		    DROPPER(ipss, ipds_sadb_inlarval_replace),
6946		    &ipss->ipsec_sadb_dropper);
6947	}
6948	return (npkt);
6949}
6950
6951/*
6952 * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6953 * previous value.
6954 */
6955mblk_t *
6956sadb_clear_lpkt(ipsa_t *ipsa)
6957{
6958	mblk_t *opkt;
6959
6960	mutex_enter(&ipsa->ipsa_lock);
6961	opkt = ipsa->ipsa_lpkt;
6962	ipsa->ipsa_lpkt = NULL;
6963	mutex_exit(&ipsa->ipsa_lock);
6964	return (opkt);
6965}
6966
6967/*
6968 * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6969 */
6970void
6971sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, ip_recv_attr_t *ira)
6972{
6973	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
6974	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6975	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6976	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6977	mblk_t		*mp;
6978
6979	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6980
6981	if (cl_inet_idlesa == NULL) {
6982		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
6983		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6984		    &ipss->ipsec_sadb_dropper);
6985		return;
6986	}
6987
6988	cl_inet_idlesa(ns->netstack_stackid,
6989	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
6990	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
6991
6992	mp = ip_recv_attr_to_mblk(ira);
6993	if (mp == NULL) {
6994		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
6995		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6996		    &ipss->ipsec_sadb_dropper);
6997		return;
6998	}
6999	linkb(mp, bpkt);
7000
7001	mutex_enter(&ipsa->ipsa_lock);
7002	ipsa->ipsa_mblkcnt++;
7003	if (ipsa->ipsa_bpkt_head == NULL) {
7004		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7005	} else {
7006		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7007		ipsa->ipsa_bpkt_tail = bpkt;
7008		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7009			mblk_t *tmp;
7010
7011			tmp = ipsa->ipsa_bpkt_head;
7012			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7013			tmp = ip_recv_attr_free_mblk(tmp);
7014			ip_drop_packet(tmp, B_TRUE, NULL,
7015			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7016			    &ipss->ipsec_sadb_dropper);
7017			ipsa->ipsa_mblkcnt --;
7018		}
7019	}
7020	mutex_exit(&ipsa->ipsa_lock);
7021}
7022
7023/*
7024 * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7025 * and put into STREAMS again.
7026 */
7027void
7028sadb_clear_buf_pkt(void *ipkt)
7029{
7030	mblk_t	*tmp, *buf_pkt;
7031	ip_recv_attr_t	iras;
7032
7033	buf_pkt = (mblk_t *)ipkt;
7034
7035	while (buf_pkt != NULL) {
7036		mblk_t *data_mp;
7037
7038		tmp = buf_pkt->b_next;
7039		buf_pkt->b_next = NULL;
7040
7041		data_mp = buf_pkt->b_cont;
7042		buf_pkt->b_cont = NULL;
7043		if (!ip_recv_attr_from_mblk(buf_pkt, &iras)) {
7044			/* The ill or ip_stack_t disappeared on us. */
7045			ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
7046			freemsg(data_mp);
7047		} else {
7048			ip_input_post_ipsec(data_mp, &iras);
7049		}
7050		ira_cleanup(&iras, B_TRUE);
7051		buf_pkt = tmp;
7052	}
7053}
7054/*
7055 * Walker callback used by sadb_alg_update() to free/create crypto
7056 * context template when a crypto software provider is removed or
7057 * added.
7058 */
7059
7060struct sadb_update_alg_state {
7061	ipsec_algtype_t alg_type;
7062	uint8_t alg_id;
7063	boolean_t is_added;
7064	boolean_t async_auth;
7065	boolean_t async_encr;
7066};
7067
7068static void
7069sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7070{
7071	struct sadb_update_alg_state *update_state =
7072	    (struct sadb_update_alg_state *)cookie;
7073	crypto_ctx_template_t *ctx_tmpl = NULL;
7074
7075	ASSERT(MUTEX_HELD(&head->isaf_lock));
7076
7077	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7078		return;
7079
7080	mutex_enter(&entry->ipsa_lock);
7081
7082	if ((entry->ipsa_encr_alg != SADB_EALG_NONE && entry->ipsa_encr_alg !=
7083	    SADB_EALG_NULL && update_state->async_encr) ||
7084	    (entry->ipsa_auth_alg != SADB_AALG_NONE &&
7085	    update_state->async_auth)) {
7086		entry->ipsa_flags |= IPSA_F_ASYNC;
7087	} else {
7088		entry->ipsa_flags &= ~IPSA_F_ASYNC;
7089	}
7090
7091	switch (update_state->alg_type) {
7092	case IPSEC_ALG_AUTH:
7093		if (entry->ipsa_auth_alg == update_state->alg_id)
7094			ctx_tmpl = &entry->ipsa_authtmpl;
7095		break;
7096	case IPSEC_ALG_ENCR:
7097		if (entry->ipsa_encr_alg == update_state->alg_id)
7098			ctx_tmpl = &entry->ipsa_encrtmpl;
7099		break;
7100	default:
7101		ctx_tmpl = NULL;
7102	}
7103
7104	if (ctx_tmpl == NULL) {
7105		mutex_exit(&entry->ipsa_lock);
7106		return;
7107	}
7108
7109	/*
7110	 * The context template of the SA may be affected by the change
7111	 * of crypto provider.
7112	 */
7113	if (update_state->is_added) {
7114		/* create the context template if not already done */
7115		if (*ctx_tmpl == NULL) {
7116			(void) ipsec_create_ctx_tmpl(entry,
7117			    update_state->alg_type);
7118		}
7119	} else {
7120		/*
7121		 * The crypto provider was removed. If the context template
7122		 * exists but it is no longer valid, free it.
7123		 */
7124		if (*ctx_tmpl != NULL)
7125			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7126	}
7127
7128	mutex_exit(&entry->ipsa_lock);
7129}
7130
7131/*
7132 * Invoked by IP when an software crypto provider has been updated, or if
7133 * the crypto synchrony changes.  The type and id of the corresponding
7134 * algorithm is passed as argument.  The type is set to ALL in the case of
7135 * a synchrony change.
7136 *
7137 * is_added is B_TRUE if the provider was added, B_FALSE if it was
7138 * removed. The function updates the SADB and free/creates the
7139 * context templates associated with SAs if needed.
7140 */
7141
7142#define	SADB_ALG_UPDATE_WALK(sadb, table) \
7143    sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7144	&update_state)
7145
7146void
7147sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7148    netstack_t *ns)
7149{
7150	struct sadb_update_alg_state update_state;
7151	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7152	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7153	ipsec_stack_t *ipss = ns->netstack_ipsec;
7154
7155	update_state.alg_type = alg_type;
7156	update_state.alg_id = alg_id;
7157	update_state.is_added = is_added;
7158	update_state.async_auth = ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
7159	    IPSEC_ALGS_EXEC_ASYNC;
7160	update_state.async_encr = ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
7161	    IPSEC_ALGS_EXEC_ASYNC;
7162
7163	if (alg_type == IPSEC_ALG_AUTH || alg_type == IPSEC_ALG_ALL) {
7164		/* walk the AH tables only for auth. algorithm changes */
7165		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7166		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7167		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7168		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7169	}
7170
7171	/* walk the ESP tables */
7172	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7173	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7174	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7175	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7176}
7177
7178/*
7179 * Creates a context template for the specified SA. This function
7180 * is called when an SA is created and when a context template needs
7181 * to be created due to a change of software provider.
7182 */
7183int
7184ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7185{
7186	ipsec_alginfo_t *alg;
7187	crypto_mechanism_t mech;
7188	crypto_key_t *key;
7189	crypto_ctx_template_t *sa_tmpl;
7190	int rv;
7191	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7192
7193	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7194	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7195
7196	/* get pointers to the algorithm info, context template, and key */
7197	switch (alg_type) {
7198	case IPSEC_ALG_AUTH:
7199		key = &sa->ipsa_kcfauthkey;
7200		sa_tmpl = &sa->ipsa_authtmpl;
7201		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7202		break;
7203	case IPSEC_ALG_ENCR:
7204		key = &sa->ipsa_kcfencrkey;
7205		sa_tmpl = &sa->ipsa_encrtmpl;
7206		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7207		break;
7208	default:
7209		alg = NULL;
7210	}
7211
7212	if (alg == NULL || !ALG_VALID(alg))
7213		return (EINVAL);
7214
7215	/* initialize the mech info structure for the framework */
7216	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7217	mech.cm_type = alg->alg_mech_type;
7218	mech.cm_param = NULL;
7219	mech.cm_param_len = 0;
7220
7221	/* create a new context template */
7222	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7223
7224	/*
7225	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7226	 * providers are available for that mechanism. In that case
7227	 * we don't fail, and will generate the context template from
7228	 * the framework callback when a software provider for that
7229	 * mechanism registers.
7230	 *
7231	 * The context template is assigned the special value
7232	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7233	 * lack of memory. No attempt will be made to use
7234	 * the context template if it is set to this value.
7235	 */
7236	if (rv == CRYPTO_HOST_MEMORY) {
7237		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7238	} else if (rv != CRYPTO_SUCCESS) {
7239		*sa_tmpl = NULL;
7240		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7241			return (EINVAL);
7242	}
7243
7244	return (0);
7245}
7246
7247/*
7248 * Destroy the context template of the specified algorithm type
7249 * of the specified SA. Must be called while holding the SA lock.
7250 */
7251void
7252ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7253{
7254	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7255
7256	if (alg_type == IPSEC_ALG_AUTH) {
7257		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7258			sa->ipsa_authtmpl = NULL;
7259		else if (sa->ipsa_authtmpl != NULL) {
7260			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7261			sa->ipsa_authtmpl = NULL;
7262		}
7263	} else {
7264		ASSERT(alg_type == IPSEC_ALG_ENCR);
7265		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7266			sa->ipsa_encrtmpl = NULL;
7267		else if (sa->ipsa_encrtmpl != NULL) {
7268			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7269			sa->ipsa_encrtmpl = NULL;
7270		}
7271	}
7272}
7273
7274/*
7275 * Use the kernel crypto framework to check the validity of a key received
7276 * via keysock. Returns 0 if the key is OK, -1 otherwise.
7277 */
7278int
7279ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7280    boolean_t is_auth, int *diag)
7281{
7282	crypto_mechanism_t mech;
7283	crypto_key_t crypto_key;
7284	int crypto_rc;
7285
7286	mech.cm_type = mech_type;
7287	mech.cm_param = NULL;
7288	mech.cm_param_len = 0;
7289
7290	crypto_key.ck_format = CRYPTO_KEY_RAW;
7291	crypto_key.ck_data = sadb_key + 1;
7292	crypto_key.ck_length = sadb_key->sadb_key_bits;
7293
7294	crypto_rc = crypto_key_check(&mech, &crypto_key);
7295
7296	switch (crypto_rc) {
7297	case CRYPTO_SUCCESS:
7298		return (0);
7299	case CRYPTO_MECHANISM_INVALID:
7300	case CRYPTO_MECH_NOT_SUPPORTED:
7301		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7302		    SADB_X_DIAGNOSTIC_BAD_EALG;
7303		break;
7304	case CRYPTO_KEY_SIZE_RANGE:
7305		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7306		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7307		break;
7308	case CRYPTO_WEAK_KEY:
7309		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7310		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7311		break;
7312	}
7313
7314	return (-1);
7315}
7316
7317/*
7318 * Whack options in the outer IP header when ipsec changes the outer label
7319 *
7320 * This is inelegant and really could use refactoring.
7321 */
7322mblk_t *
7323sadb_whack_label_v4(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7324    ipdropper_t *dropper)
7325{
7326	int delta;
7327	int plen;
7328	dblk_t *db;
7329	int hlen;
7330	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7331	ipha_t *ipha = (ipha_t *)mp->b_rptr;
7332
7333	plen = ntohs(ipha->ipha_length);
7334
7335	delta = tsol_remove_secopt(ipha, MBLKL(mp));
7336	mp->b_wptr += delta;
7337	plen += delta;
7338
7339	/* XXX XXX code copied from tsol_check_label */
7340
7341	/* Make sure we have room for the worst-case addition */
7342	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
7343	hlen = (hlen + 3) & ~3;
7344	if (hlen > IP_MAX_HDR_LENGTH)
7345		hlen = IP_MAX_HDR_LENGTH;
7346	hlen -= IPH_HDR_LENGTH(ipha);
7347
7348	db = mp->b_datap;
7349	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7350		int copylen;
7351		mblk_t *new_mp;
7352
7353		/* allocate enough to be meaningful, but not *too* much */
7354		copylen = MBLKL(mp);
7355		if (copylen > 256)
7356			copylen = 256;
7357		new_mp = allocb_tmpl(hlen + copylen +
7358		    (mp->b_rptr - mp->b_datap->db_base), mp);
7359
7360		if (new_mp == NULL) {
7361			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7362			return (NULL);
7363		}
7364
7365		/* keep the bias */
7366		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7367		new_mp->b_wptr = new_mp->b_rptr + copylen;
7368		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7369		new_mp->b_cont = mp;
7370		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7371			new_mp->b_cont = mp->b_cont;
7372			freeb(mp);
7373		}
7374		mp = new_mp;
7375		ipha = (ipha_t *)mp->b_rptr;
7376	}
7377
7378	delta = tsol_prepend_option(assoc->ipsa_opt_storage, ipha, MBLKL(mp));
7379
7380	ASSERT(delta != -1);
7381
7382	plen += delta;
7383	mp->b_wptr += delta;
7384
7385	/*
7386	 * Paranoia
7387	 */
7388	db = mp->b_datap;
7389
7390	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7391	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7392
7393	ASSERT3P(mp->b_wptr, >=, db->db_base);
7394	ASSERT3P(mp->b_rptr, >=, db->db_base);
7395	/* End paranoia */
7396
7397	ipha->ipha_length = htons(plen);
7398
7399	return (mp);
7400}
7401
7402mblk_t *
7403sadb_whack_label_v6(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7404    ipdropper_t *dropper)
7405{
7406	int delta;
7407	int plen;
7408	dblk_t *db;
7409	int hlen;
7410	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7411	uint_t sec_opt_len; /* label option length not including type, len */
7412	ip6_t *ip6h = (ip6_t *)mp->b_rptr;
7413
7414	plen = ntohs(ip6h->ip6_plen);
7415
7416	delta = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
7417	mp->b_wptr += delta;
7418	plen += delta;
7419
7420	/* XXX XXX code copied from tsol_check_label_v6 */
7421	/*
7422	 * Make sure we have room for the worst-case addition. Add 2 bytes for
7423	 * the hop-by-hop ext header's next header and length fields. Add
7424	 * another 2 bytes for the label option type, len and then round
7425	 * up to the next 8-byte multiple.
7426	 */
7427	sec_opt_len = opt_storage[1];
7428
7429	db = mp->b_datap;
7430	hlen = (4 + sec_opt_len + 7) & ~7;
7431
7432	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7433		int copylen;
7434		mblk_t *new_mp;
7435		uint16_t hdr_len;
7436
7437		hdr_len = ip_hdr_length_v6(mp, ip6h);
7438		/*
7439		 * Allocate enough to be meaningful, but not *too* much.
7440		 * Also all the IPv6 extension headers must be in the same mblk
7441		 */
7442		copylen = MBLKL(mp);
7443		if (copylen > 256)
7444			copylen = 256;
7445		if (copylen < hdr_len)
7446			copylen = hdr_len;
7447		new_mp = allocb_tmpl(hlen + copylen +
7448		    (mp->b_rptr - mp->b_datap->db_base), mp);
7449		if (new_mp == NULL) {
7450			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7451			return (NULL);
7452		}
7453
7454		/* keep the bias */
7455		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7456		new_mp->b_wptr = new_mp->b_rptr + copylen;
7457		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7458		new_mp->b_cont = mp;
7459		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7460			new_mp->b_cont = mp->b_cont;
7461			freeb(mp);
7462		}
7463		mp = new_mp;
7464		ip6h = (ip6_t *)mp->b_rptr;
7465	}
7466
7467	delta = tsol_prepend_option_v6(assoc->ipsa_opt_storage,
7468	    ip6h, MBLKL(mp));
7469
7470	ASSERT(delta != -1);
7471
7472	plen += delta;
7473	mp->b_wptr += delta;
7474
7475	/*
7476	 * Paranoia
7477	 */
7478	db = mp->b_datap;
7479
7480	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7481	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7482
7483	ASSERT3P(mp->b_wptr, >=, db->db_base);
7484	ASSERT3P(mp->b_rptr, >=, db->db_base);
7485	/* End paranoia */
7486
7487	ip6h->ip6_plen = htons(plen);
7488
7489	return (mp);
7490}
7491
7492/* Whack the labels and update ip_xmit_attr_t as needed */
7493mblk_t *
7494sadb_whack_label(mblk_t *mp, ipsa_t *assoc, ip_xmit_attr_t *ixa,
7495    kstat_named_t *counter, ipdropper_t *dropper)
7496{
7497	int adjust;
7498	int iplen;
7499
7500	if (ixa->ixa_flags & IXAF_IS_IPV4) {
7501		ipha_t		*ipha = (ipha_t *)mp->b_rptr;
7502
7503		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7504		iplen = ntohs(ipha->ipha_length);
7505		mp = sadb_whack_label_v4(mp, assoc, counter, dropper);
7506		if (mp == NULL)
7507			return (NULL);
7508
7509		ipha = (ipha_t *)mp->b_rptr;
7510		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7511		adjust = (int)ntohs(ipha->ipha_length) - iplen;
7512	} else {
7513		ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
7514
7515		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7516		iplen = ntohs(ip6h->ip6_plen);
7517		mp = sadb_whack_label_v6(mp, assoc, counter, dropper);
7518		if (mp == NULL)
7519			return (NULL);
7520
7521		ip6h = (ip6_t *)mp->b_rptr;
7522		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7523		adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
7524	}
7525	ixa->ixa_pktlen += adjust;
7526	ixa->ixa_ip_hdr_length += adjust;
7527	return (mp);
7528}
7529
7530/*
7531 * If this is an outgoing SA then add some fuzz to the
7532 * SOFT EXPIRE time. The reason for this is to stop
7533 * peers trying to renegotiate SOFT expiring SA's at
7534 * the same time. The amount of fuzz needs to be at
7535 * least 8 seconds which is the typical interval
7536 * sadb_ager(), although this is only a guide as it
7537 * selftunes.
7538 */
7539static void
7540lifetime_fuzz(ipsa_t *assoc)
7541{
7542	uint8_t rnd;
7543
7544	if (assoc->ipsa_softaddlt == 0)
7545		return;
7546
7547	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7548	rnd = (rnd & 0xF) + 8;
7549	assoc->ipsa_softexpiretime -= rnd;
7550	assoc->ipsa_softaddlt -= rnd;
7551}
7552
7553static void
7554destroy_ipsa_pair(ipsap_t *ipsapp)
7555{
7556	/*
7557	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7558	 * them in { }.
7559	 */
7560	if (ipsapp->ipsap_sa_ptr != NULL) {
7561		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7562	}
7563	if (ipsapp->ipsap_psa_ptr != NULL) {
7564		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7565	}
7566	init_ipsa_pair(ipsapp);
7567}
7568
7569static void
7570init_ipsa_pair(ipsap_t *ipsapp)
7571{
7572	ipsapp->ipsap_bucket = NULL;
7573	ipsapp->ipsap_sa_ptr = NULL;
7574	ipsapp->ipsap_pbucket = NULL;
7575	ipsapp->ipsap_psa_ptr = NULL;
7576}
7577
7578/*
7579 * The sadb_ager() function walks through the hash tables of SA's and ages
7580 * them, if the SA expires as a result, its marked as DEAD and will be reaped
7581 * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7582 * SA appears in both the inbound and outbound tables because its not possible
7583 * to determine its direction) are placed on a list when they expire. This is
7584 * to ensure that pair/peer SA's are reaped at the same time, even if they
7585 * expire at different times.
7586 *
7587 * This function is called twice by sadb_ager(), one after processing the
7588 * inbound table, then again after processing the outbound table.
7589 */
7590void
7591age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7592{
7593	templist_t *listptr;
7594	int outhash;
7595	isaf_t *bucket;
7596	boolean_t haspeer;
7597	ipsa_t *peer_assoc, *dying;
7598	/*
7599	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7600	 * is address independent.
7601	 */
7602	while (haspeerlist != NULL) {
7603		/* "dying" contains the SA that has a peer. */
7604		dying = haspeerlist->ipsa;
7605		haspeer = (dying->ipsa_haspeer);
7606		listptr = haspeerlist;
7607		haspeerlist = listptr->next;
7608		kmem_free(listptr, sizeof (*listptr));
7609		/*
7610		 * Pick peer bucket based on addrfam.
7611		 */
7612		if (outbound) {
7613			if (haspeer)
7614				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7615			else
7616				bucket = INBOUND_BUCKET(sp,
7617				    dying->ipsa_otherspi);
7618		} else { /* inbound */
7619			if (haspeer) {
7620				if (dying->ipsa_addrfam == AF_INET6) {
7621					outhash = OUTBOUND_HASH_V6(sp,
7622					    *((in6_addr_t *)&dying->
7623					    ipsa_dstaddr));
7624				} else {
7625					outhash = OUTBOUND_HASH_V4(sp,
7626					    *((ipaddr_t *)&dying->
7627					    ipsa_dstaddr));
7628				}
7629			} else if (dying->ipsa_addrfam == AF_INET6) {
7630				outhash = OUTBOUND_HASH_V6(sp,
7631				    *((in6_addr_t *)&dying->
7632				    ipsa_srcaddr));
7633			} else {
7634				outhash = OUTBOUND_HASH_V4(sp,
7635				    *((ipaddr_t *)&dying->
7636				    ipsa_srcaddr));
7637			}
7638			bucket = &(sp->sdb_of[outhash]);
7639		}
7640
7641		mutex_enter(&bucket->isaf_lock);
7642		/*
7643		 * "haspeer" SA's have the same src/dst address ordering,
7644		 * "paired" SA's have the src/dst addresses reversed.
7645		 */
7646		if (haspeer) {
7647			peer_assoc = ipsec_getassocbyspi(bucket,
7648			    dying->ipsa_spi, dying->ipsa_srcaddr,
7649			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7650		} else {
7651			peer_assoc = ipsec_getassocbyspi(bucket,
7652			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7653			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7654		}
7655
7656		mutex_exit(&bucket->isaf_lock);
7657		if (peer_assoc != NULL) {
7658			mutex_enter(&peer_assoc->ipsa_lock);
7659			mutex_enter(&dying->ipsa_lock);
7660			if (!haspeer) {
7661				/*
7662				 * Only SA's which have a "peer" or are
7663				 * "paired" end up on this list, so this
7664				 * must be a "paired" SA, update the flags
7665				 * to break the pair.
7666				 */
7667				peer_assoc->ipsa_otherspi = 0;
7668				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7669				dying->ipsa_otherspi = 0;
7670				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7671			}
7672			if (haspeer || outbound) {
7673				/*
7674				 * Update the state of the "inbound" SA when
7675				 * the "outbound" SA has expired. Don't update
7676				 * the "outbound" SA when the "inbound" SA
7677				 * SA expires because setting the hard_addtime
7678				 * below will cause this to happen.
7679				 */
7680				peer_assoc->ipsa_state = dying->ipsa_state;
7681			}
7682			if (dying->ipsa_state == IPSA_STATE_DEAD)
7683				peer_assoc->ipsa_hardexpiretime = 1;
7684
7685			mutex_exit(&dying->ipsa_lock);
7686			mutex_exit(&peer_assoc->ipsa_lock);
7687			IPSA_REFRELE(peer_assoc);
7688		}
7689		IPSA_REFRELE(dying);
7690	}
7691}
7692
7693/*
7694 * Ensure that the IV used for CCM mode never repeats. The IV should
7695 * only be updated by this function. Also check to see if the IV
7696 * is about to wrap and generate a SOFT Expire. This function is only
7697 * called for outgoing packets, the IV for incomming packets is taken
7698 * from the wire. If the outgoing SA needs to be expired, update
7699 * the matching incomming SA.
7700 */
7701boolean_t
7702update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7703    ipsecesp_stack_t *espstack)
7704{
7705	boolean_t rc = B_TRUE;
7706	isaf_t *inbound_bucket;
7707	sadb_t *sp;
7708	ipsa_t *pair_sa = NULL;
7709	int sa_new_state = 0;
7710
7711	/* For non counter modes, the IV is random data. */
7712	if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7713		(void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7714		return (rc);
7715	}
7716
7717	mutex_enter(&assoc->ipsa_lock);
7718
7719	(*assoc->ipsa_iv)++;
7720
7721	if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
7722		sa_new_state = IPSA_STATE_DEAD;
7723		rc = B_FALSE;
7724	} else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
7725		if (assoc->ipsa_state != IPSA_STATE_DYING) {
7726			/*
7727			 * This SA may have already been expired when its
7728			 * PAIR_SA expired.
7729			 */
7730			sa_new_state = IPSA_STATE_DYING;
7731		}
7732	}
7733	if (sa_new_state) {
7734		/*
7735		 * If there is a state change, we need to update this SA
7736		 * and its "pair", we can find the bucket for the "pair" SA
7737		 * while holding the ipsa_t mutex, but we won't actually
7738		 * update anything untill the ipsa_t mutex has been released
7739		 * for _this_ SA.
7740		 */
7741		assoc->ipsa_state = sa_new_state;
7742		if (assoc->ipsa_addrfam == AF_INET6) {
7743			sp = &espstack->esp_sadb.s_v6;
7744		} else {
7745			sp = &espstack->esp_sadb.s_v4;
7746		}
7747		inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
7748		sadb_expire_assoc(pfkey_q, assoc);
7749	}
7750	if (rc == B_TRUE)
7751		bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
7752
7753	mutex_exit(&assoc->ipsa_lock);
7754
7755	if (sa_new_state) {
7756		/* Find the inbound SA, need to lock hash bucket. */
7757		mutex_enter(&inbound_bucket->isaf_lock);
7758		pair_sa = ipsec_getassocbyspi(inbound_bucket,
7759		    assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
7760		    assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
7761		mutex_exit(&inbound_bucket->isaf_lock);
7762		if (pair_sa != NULL) {
7763			mutex_enter(&pair_sa->ipsa_lock);
7764			pair_sa->ipsa_state = sa_new_state;
7765			mutex_exit(&pair_sa->ipsa_lock);
7766			IPSA_REFRELE(pair_sa);
7767		}
7768	}
7769
7770	return (rc);
7771}
7772
7773void
7774ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7775    ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7776{
7777	uchar_t *nonce;
7778	crypto_mechanism_t *combined_mech;
7779	CK_AES_CCM_PARAMS *params;
7780
7781	combined_mech = (crypto_mechanism_t *)cm_mech;
7782	params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
7783	nonce = (uchar_t *)(params + 1);
7784	params->ulMACSize = assoc->ipsa_mac_len;
7785	params->ulNonceSize = assoc->ipsa_nonce_len;
7786	params->ulAuthDataSize = sizeof (esph_t);
7787	params->ulDataSize = data_len;
7788	params->nonce = nonce;
7789	params->authData = esph;
7790
7791	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7792	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
7793	cm_mech->combined_mech.cm_param = (caddr_t)params;
7794	/* See gcm_params_init() for comments. */
7795	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7796	nonce += assoc->ipsa_saltlen;
7797	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7798	crypto_data->cd_miscdata = NULL;
7799}
7800
7801/* ARGSUSED */
7802void
7803cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7804    ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7805{
7806	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7807	cm_mech->combined_mech.cm_param_len = 0;
7808	cm_mech->combined_mech.cm_param = NULL;
7809	crypto_data->cd_miscdata = (char *)iv_ptr;
7810}
7811
7812/* ARGSUSED */
7813void
7814gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7815    ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7816{
7817	uchar_t *nonce;
7818	crypto_mechanism_t *combined_mech;
7819	CK_AES_GCM_PARAMS *params;
7820
7821	combined_mech = (crypto_mechanism_t *)cm_mech;
7822	params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
7823	nonce = (uchar_t *)(params + 1);
7824
7825	params->pIv = nonce;
7826	params->ulIvLen = assoc->ipsa_nonce_len;
7827	params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
7828	params->pAAD = esph;
7829	params->ulAADLen = sizeof (esph_t);
7830	params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
7831
7832	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7833	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
7834	cm_mech->combined_mech.cm_param = (caddr_t)params;
7835	/*
7836	 * Create the nonce, which is made up of the salt and the IV.
7837	 * Copy the salt from the SA and the IV from the packet.
7838	 * For inbound packets we copy the IV from the packet because it
7839	 * was set by the sending system, for outbound packets we copy the IV
7840	 * from the packet because the IV in the SA may be changed by another
7841	 * thread, the IV in the packet was created while holding a mutex.
7842	 */
7843	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7844	nonce += assoc->ipsa_saltlen;
7845	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7846	crypto_data->cd_miscdata = NULL;
7847}
7848