1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/stream.h>
28#include <sys/stropts.h>
29#include <sys/errno.h>
30#include <sys/strlog.h>
31#include <sys/tihdr.h>
32#include <sys/socket.h>
33#include <sys/ddi.h>
34#include <sys/sunddi.h>
35#include <sys/mkdev.h>
36#include <sys/kmem.h>
37#include <sys/zone.h>
38#include <sys/sysmacros.h>
39#include <sys/cmn_err.h>
40#include <sys/vtrace.h>
41#include <sys/debug.h>
42#include <sys/atomic.h>
43#include <sys/strsun.h>
44#include <sys/random.h>
45#include <netinet/in.h>
46#include <net/if.h>
47#include <netinet/ip6.h>
48#include <netinet/icmp6.h>
49#include <net/pfkeyv2.h>
50#include <net/pfpolicy.h>
51
52#include <inet/common.h>
53#include <inet/mi.h>
54#include <inet/ip.h>
55#include <inet/ip6.h>
56#include <inet/nd.h>
57#include <inet/ip_if.h>
58#include <inet/ip_ndp.h>
59#include <inet/ipsec_info.h>
60#include <inet/ipsec_impl.h>
61#include <inet/sadb.h>
62#include <inet/ipsecah.h>
63#include <inet/ipsec_impl.h>
64#include <inet/ipdrop.h>
65#include <sys/taskq.h>
66#include <sys/policy.h>
67#include <sys/strsun.h>
68
69#include <sys/crypto/common.h>
70#include <sys/crypto/api.h>
71#include <sys/kstat.h>
72#include <sys/strsubr.h>
73
74#include <sys/tsol/tnet.h>
75
76/*
77 * Table of ND variables supported by ipsecah. These are loaded into
78 * ipsecah_g_nd in ipsecah_init_nd.
79 * All of these are alterable, within the min/max values given, at run time.
80 */
81static	ipsecahparam_t	lcl_param_arr[] = {
82	/* min	max			value	name */
83	{ 0,	3,			0,	"ipsecah_debug"},
84	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT,	"ipsecah_age_interval"},
85	{ 1,	10,			1,	"ipsecah_reap_delay"},
86	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecah_replay_size"},
87	{ 1,	300,			15,	"ipsecah_acquire_timeout"},
88	{ 1,	1800,			90,	"ipsecah_larval_timeout"},
89	/* Default lifetime values for ACQUIRE messages. */
90	{ 0,	0xffffffffU,		0,	"ipsecah_default_soft_bytes"},
91	{ 0,	0xffffffffU,		0,	"ipsecah_default_hard_bytes"},
92	{ 0,	0xffffffffU,		24000,	"ipsecah_default_soft_addtime"},
93	{ 0,	0xffffffffU,		28800,	"ipsecah_default_hard_addtime"},
94	{ 0,	0xffffffffU,		0,	"ipsecah_default_soft_usetime"},
95	{ 0,	0xffffffffU,		0,	"ipsecah_default_hard_usetime"},
96	{ 0,	1,			0,	"ipsecah_log_unknown_spi"},
97};
98#define	ipsecah_debug			ipsecah_params[0].ipsecah_param_value
99#define	ipsecah_age_interval		ipsecah_params[1].ipsecah_param_value
100#define	ipsecah_age_int_max		ipsecah_params[1].ipsecah_param_max
101#define	ipsecah_reap_delay		ipsecah_params[2].ipsecah_param_value
102#define	ipsecah_replay_size		ipsecah_params[3].ipsecah_param_value
103#define	ipsecah_acquire_timeout		ipsecah_params[4].ipsecah_param_value
104#define	ipsecah_larval_timeout		ipsecah_params[5].ipsecah_param_value
105#define	ipsecah_default_soft_bytes	ipsecah_params[6].ipsecah_param_value
106#define	ipsecah_default_hard_bytes	ipsecah_params[7].ipsecah_param_value
107#define	ipsecah_default_soft_addtime	ipsecah_params[8].ipsecah_param_value
108#define	ipsecah_default_hard_addtime	ipsecah_params[9].ipsecah_param_value
109#define	ipsecah_default_soft_usetime	ipsecah_params[10].ipsecah_param_value
110#define	ipsecah_default_hard_usetime	ipsecah_params[11].ipsecah_param_value
111#define	ipsecah_log_unknown_spi		ipsecah_params[12].ipsecah_param_value
112
113#define	ah0dbg(a)	printf a
114/* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
115#define	ah1dbg(ahstack, a)	if (ahstack->ipsecah_debug != 0) printf a
116#define	ah2dbg(ahstack, a)	if (ahstack->ipsecah_debug > 1) printf a
117#define	ah3dbg(ahstack, a)	if (ahstack->ipsecah_debug > 2) printf a
118
119/*
120 * XXX This is broken. Padding should be determined dynamically
121 * depending on the ICV size and IP version number so that the
122 * total AH header size is a multiple of 32 bits or 64 bits
123 * for V4 and V6 respectively. For 96bit ICVs we have no problems.
124 * Anything different from that, we need to fix our code.
125 */
126#define	IPV4_PADDING_ALIGN	0x04	/* Multiple of 32 bits */
127#define	IPV6_PADDING_ALIGN	0x04	/* Multiple of 32 bits */
128
129/*
130 * Helper macro. Avoids a call to msgdsize if there is only one
131 * mblk in the chain.
132 */
133#define	AH_MSGSIZE(mp) ((mp)->b_cont != NULL ? msgdsize(mp) : MBLKL(mp))
134
135
136static mblk_t *ah_auth_out_done(mblk_t *, ip_xmit_attr_t *, ipsec_crypto_t *);
137static mblk_t *ah_auth_in_done(mblk_t *, ip_recv_attr_t *, ipsec_crypto_t *);
138static mblk_t *ah_process_ip_options_v4(mblk_t *, ipsa_t *, int *, uint_t,
139    boolean_t, ipsecah_stack_t *);
140static mblk_t *ah_process_ip_options_v6(mblk_t *, ipsa_t *, int *, uint_t,
141    boolean_t, ipsecah_stack_t *);
142static void ah_getspi(mblk_t *, keysock_in_t *, ipsecah_stack_t *);
143static void ah_inbound_restart(mblk_t *, ip_recv_attr_t *);
144
145static mblk_t *ah_outbound(mblk_t *, ip_xmit_attr_t *);
146static void ah_outbound_finish(mblk_t *, ip_xmit_attr_t *);
147
148static int ipsecah_open(queue_t *, dev_t *, int, int, cred_t *);
149static int ipsecah_close(queue_t *);
150static void ipsecah_wput(queue_t *, mblk_t *);
151static void ah_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
152static boolean_t ah_register_out(uint32_t, uint32_t, uint_t, ipsecah_stack_t *,
153    cred_t *);
154static void	*ipsecah_stack_init(netstackid_t stackid, netstack_t *ns);
155static void	ipsecah_stack_fini(netstackid_t stackid, void *arg);
156
157/* Setable in /etc/system */
158uint32_t ah_hash_size = IPSEC_DEFAULT_HASH_SIZE;
159
160static taskq_t *ah_taskq;
161
162static struct module_info info = {
163	5136, "ipsecah", 0, INFPSZ, 65536, 1024
164};
165
166static struct qinit rinit = {
167	(pfi_t)putnext, NULL, ipsecah_open, ipsecah_close, NULL, &info,
168	NULL
169};
170
171static struct qinit winit = {
172	(pfi_t)ipsecah_wput, NULL, ipsecah_open, ipsecah_close, NULL, &info,
173	NULL
174};
175
176struct streamtab ipsecahinfo = {
177	&rinit, &winit, NULL, NULL
178};
179
180static int ah_kstat_update(kstat_t *, int);
181
182uint64_t ipsacq_maxpackets = IPSACQ_MAXPACKETS;
183
184static boolean_t
185ah_kstat_init(ipsecah_stack_t *ahstack, netstackid_t stackid)
186{
187	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
188
189	ahstack->ah_ksp = kstat_create_netstack("ipsecah", 0, "ah_stat", "net",
190	    KSTAT_TYPE_NAMED, sizeof (ah_kstats_t) / sizeof (kstat_named_t),
191	    KSTAT_FLAG_PERSISTENT, stackid);
192
193	if (ahstack->ah_ksp == NULL || ahstack->ah_ksp->ks_data == NULL)
194		return (B_FALSE);
195
196	ahstack->ah_kstats = ahstack->ah_ksp->ks_data;
197
198	ahstack->ah_ksp->ks_update = ah_kstat_update;
199	ahstack->ah_ksp->ks_private = (void *)(uintptr_t)stackid;
200
201#define	K64 KSTAT_DATA_UINT64
202#define	KI(x) kstat_named_init(&(ahstack->ah_kstats->ah_stat_##x), #x, K64)
203
204	KI(num_aalgs);
205	KI(good_auth);
206	KI(bad_auth);
207	KI(replay_failures);
208	KI(replay_early_failures);
209	KI(keysock_in);
210	KI(out_requests);
211	KI(acquire_requests);
212	KI(bytes_expired);
213	KI(out_discards);
214	KI(crypto_sync);
215	KI(crypto_async);
216	KI(crypto_failures);
217
218#undef KI
219#undef K64
220
221	kstat_install(ahstack->ah_ksp);
222	IP_ACQUIRE_STAT(ipss, maxpackets, ipsacq_maxpackets);
223	return (B_TRUE);
224}
225
226static int
227ah_kstat_update(kstat_t *kp, int rw)
228{
229	ah_kstats_t	*ekp;
230	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
231	netstack_t	*ns;
232	ipsec_stack_t	*ipss;
233
234	if ((kp == NULL) || (kp->ks_data == NULL))
235		return (EIO);
236
237	if (rw == KSTAT_WRITE)
238		return (EACCES);
239
240	ns = netstack_find_by_stackid(stackid);
241	if (ns == NULL)
242		return (-1);
243	ipss = ns->netstack_ipsec;
244	if (ipss == NULL) {
245		netstack_rele(ns);
246		return (-1);
247	}
248	ekp = (ah_kstats_t *)kp->ks_data;
249
250	mutex_enter(&ipss->ipsec_alg_lock);
251	ekp->ah_stat_num_aalgs.value.ui64 = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
252	mutex_exit(&ipss->ipsec_alg_lock);
253
254	netstack_rele(ns);
255	return (0);
256}
257
258/*
259 * Don't have to lock ipsec_age_interval, as only one thread will access it at
260 * a time, because I control the one function that does a qtimeout() on
261 * ah_pfkey_q.
262 */
263static void
264ah_ager(void *arg)
265{
266	ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
267	netstack_t	*ns = ahstack->ipsecah_netstack;
268	hrtime_t begin = gethrtime();
269
270	sadb_ager(&ahstack->ah_sadb.s_v4, ahstack->ah_pfkey_q,
271	    ahstack->ipsecah_reap_delay, ns);
272	sadb_ager(&ahstack->ah_sadb.s_v6, ahstack->ah_pfkey_q,
273	    ahstack->ipsecah_reap_delay, ns);
274
275	ahstack->ah_event = sadb_retimeout(begin, ahstack->ah_pfkey_q,
276	    ah_ager, ahstack,
277	    &ahstack->ipsecah_age_interval, ahstack->ipsecah_age_int_max,
278	    info.mi_idnum);
279}
280
281/*
282 * Get an AH NDD parameter.
283 */
284/* ARGSUSED */
285static int
286ipsecah_param_get(q, mp, cp, cr)
287	queue_t	*q;
288	mblk_t	*mp;
289	caddr_t	cp;
290	cred_t *cr;
291{
292	ipsecahparam_t	*ipsecahpa = (ipsecahparam_t *)cp;
293	uint_t value;
294	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
295
296	mutex_enter(&ahstack->ipsecah_param_lock);
297	value = ipsecahpa->ipsecah_param_value;
298	mutex_exit(&ahstack->ipsecah_param_lock);
299
300	(void) mi_mpprintf(mp, "%u", value);
301	return (0);
302}
303
304/*
305 * This routine sets an NDD variable in a ipsecahparam_t structure.
306 */
307/* ARGSUSED */
308static int
309ipsecah_param_set(q, mp, value, cp, cr)
310	queue_t	*q;
311	mblk_t	*mp;
312	char	*value;
313	caddr_t	cp;
314	cred_t *cr;
315{
316	ulong_t	new_value;
317	ipsecahparam_t	*ipsecahpa = (ipsecahparam_t *)cp;
318	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
319
320	/*
321	 * Fail the request if the new value does not lie within the
322	 * required bounds.
323	 */
324	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
325	    new_value < ipsecahpa->ipsecah_param_min ||
326	    new_value > ipsecahpa->ipsecah_param_max) {
327		    return (EINVAL);
328	}
329
330	/* Set the new value */
331	mutex_enter(&ahstack->ipsecah_param_lock);
332	ipsecahpa->ipsecah_param_value = new_value;
333	mutex_exit(&ahstack->ipsecah_param_lock);
334	return (0);
335}
336
337/*
338 * Using lifetime NDD variables, fill in an extended combination's
339 * lifetime information.
340 */
341void
342ipsecah_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
343{
344	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
345
346	ecomb->sadb_x_ecomb_soft_bytes = ahstack->ipsecah_default_soft_bytes;
347	ecomb->sadb_x_ecomb_hard_bytes = ahstack->ipsecah_default_hard_bytes;
348	ecomb->sadb_x_ecomb_soft_addtime =
349	    ahstack->ipsecah_default_soft_addtime;
350	ecomb->sadb_x_ecomb_hard_addtime =
351	    ahstack->ipsecah_default_hard_addtime;
352	ecomb->sadb_x_ecomb_soft_usetime =
353	    ahstack->ipsecah_default_soft_usetime;
354	ecomb->sadb_x_ecomb_hard_usetime =
355	    ahstack->ipsecah_default_hard_usetime;
356}
357
358/*
359 * Initialize things for AH at module load time.
360 */
361boolean_t
362ipsecah_ddi_init(void)
363{
364	ah_taskq = taskq_create("ah_taskq", 1, minclsyspri,
365	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
366
367	/*
368	 * We want to be informed each time a stack is created or
369	 * destroyed in the kernel, so we can maintain the
370	 * set of ipsecah_stack_t's.
371	 */
372	netstack_register(NS_IPSECAH, ipsecah_stack_init, NULL,
373	    ipsecah_stack_fini);
374
375	return (B_TRUE);
376}
377
378/*
379 * Walk through the param array specified registering each element with the
380 * named dispatch handler.
381 */
382static boolean_t
383ipsecah_param_register(IDP *ndp, ipsecahparam_t *ahp, int cnt)
384{
385	for (; cnt-- > 0; ahp++) {
386		if (ahp->ipsecah_param_name != NULL &&
387		    ahp->ipsecah_param_name[0]) {
388			if (!nd_load(ndp,
389			    ahp->ipsecah_param_name,
390			    ipsecah_param_get, ipsecah_param_set,
391			    (caddr_t)ahp)) {
392				nd_free(ndp);
393				return (B_FALSE);
394			}
395		}
396	}
397	return (B_TRUE);
398}
399
400/*
401 * Initialize things for AH for each stack instance
402 */
403static void *
404ipsecah_stack_init(netstackid_t stackid, netstack_t *ns)
405{
406	ipsecah_stack_t	*ahstack;
407	ipsecahparam_t	*ahp;
408
409	ahstack = (ipsecah_stack_t *)kmem_zalloc(sizeof (*ahstack), KM_SLEEP);
410	ahstack->ipsecah_netstack = ns;
411
412	ahp = (ipsecahparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
413	ahstack->ipsecah_params = ahp;
414	bcopy(lcl_param_arr, ahp, sizeof (lcl_param_arr));
415
416	(void) ipsecah_param_register(&ahstack->ipsecah_g_nd, ahp,
417	    A_CNT(lcl_param_arr));
418
419	(void) ah_kstat_init(ahstack, stackid);
420
421	ahstack->ah_sadb.s_acquire_timeout = &ahstack->ipsecah_acquire_timeout;
422	ahstack->ah_sadb.s_acqfn = ah_send_acquire;
423	sadbp_init("AH", &ahstack->ah_sadb, SADB_SATYPE_AH, ah_hash_size,
424	    ahstack->ipsecah_netstack);
425
426	mutex_init(&ahstack->ipsecah_param_lock, NULL, MUTEX_DEFAULT, 0);
427
428	ip_drop_register(&ahstack->ah_dropper, "IPsec AH");
429	return (ahstack);
430}
431
432/*
433 * Destroy things for AH at module unload time.
434 */
435void
436ipsecah_ddi_destroy(void)
437{
438	netstack_unregister(NS_IPSECAH);
439	taskq_destroy(ah_taskq);
440}
441
442/*
443 * Destroy things for AH for one stack... Never called?
444 */
445static void
446ipsecah_stack_fini(netstackid_t stackid, void *arg)
447{
448	ipsecah_stack_t *ahstack = (ipsecah_stack_t *)arg;
449
450	if (ahstack->ah_pfkey_q != NULL) {
451		(void) quntimeout(ahstack->ah_pfkey_q, ahstack->ah_event);
452	}
453	ahstack->ah_sadb.s_acqfn = NULL;
454	ahstack->ah_sadb.s_acquire_timeout = NULL;
455	sadbp_destroy(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
456	ip_drop_unregister(&ahstack->ah_dropper);
457	mutex_destroy(&ahstack->ipsecah_param_lock);
458	nd_free(&ahstack->ipsecah_g_nd);
459
460	kmem_free(ahstack->ipsecah_params, sizeof (lcl_param_arr));
461	ahstack->ipsecah_params = NULL;
462	kstat_delete_netstack(ahstack->ah_ksp, stackid);
463	ahstack->ah_ksp = NULL;
464	ahstack->ah_kstats = NULL;
465
466	kmem_free(ahstack, sizeof (*ahstack));
467}
468
469/*
470 * AH module open routine, which is here for keysock plumbing.
471 * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
472 * Days of export control, and fears that ESP would not be allowed
473 * to be shipped at all by default.  Eventually, keysock should
474 * either access AH and ESP via modstubs or krtld dependencies, or
475 * perhaps be folded in with AH and ESP into a single IPsec/netsec
476 * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
477 */
478/* ARGSUSED */
479static int
480ipsecah_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
481{
482	netstack_t	*ns;
483	ipsecah_stack_t	*ahstack;
484
485	if (secpolicy_ip_config(credp, B_FALSE) != 0)
486		return (EPERM);
487
488	if (q->q_ptr != NULL)
489		return (0);  /* Re-open of an already open instance. */
490
491	if (sflag != MODOPEN)
492		return (EINVAL);
493
494	ns = netstack_find_by_cred(credp);
495	ASSERT(ns != NULL);
496	ahstack = ns->netstack_ipsecah;
497	ASSERT(ahstack != NULL);
498
499	q->q_ptr = ahstack;
500	WR(q)->q_ptr = q->q_ptr;
501
502	qprocson(q);
503	return (0);
504}
505
506/*
507 * AH module close routine.
508 */
509static int
510ipsecah_close(queue_t *q)
511{
512	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
513
514	/*
515	 * Clean up q_ptr, if needed.
516	 */
517	qprocsoff(q);
518
519	/* Keysock queue check is safe, because of OCEXCL perimeter. */
520
521	if (q == ahstack->ah_pfkey_q) {
522		ah1dbg(ahstack,
523		    ("ipsecah_close:  Ummm... keysock is closing AH.\n"));
524		ahstack->ah_pfkey_q = NULL;
525		/* Detach qtimeouts. */
526		(void) quntimeout(q, ahstack->ah_event);
527	}
528
529	netstack_rele(ahstack->ipsecah_netstack);
530	return (0);
531}
532
533/*
534 * Construct an SADB_REGISTER message with the current algorithms.
535 */
536static boolean_t
537ah_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
538    ipsecah_stack_t *ahstack, cred_t *cr)
539{
540	mblk_t *mp;
541	boolean_t rc = B_TRUE;
542	sadb_msg_t *samsg;
543	sadb_supported_t *sasupp;
544	sadb_alg_t *saalg;
545	uint_t allocsize = sizeof (*samsg);
546	uint_t i, numalgs_snap;
547	ipsec_alginfo_t **authalgs;
548	uint_t num_aalgs;
549	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
550	sadb_sens_t *sens;
551	size_t sens_len = 0;
552	sadb_ext_t *nextext;
553	ts_label_t *sens_tsl = NULL;
554
555	/* Allocate the KEYSOCK_OUT. */
556	mp = sadb_keysock_out(serial);
557	if (mp == NULL) {
558		ah0dbg(("ah_register_out: couldn't allocate mblk.\n"));
559		return (B_FALSE);
560	}
561
562	if (is_system_labeled() && (cr != NULL)) {
563		sens_tsl = crgetlabel(cr);
564		if (sens_tsl != NULL) {
565			sens_len = sadb_sens_len_from_label(sens_tsl);
566			allocsize += sens_len;
567		}
568	}
569
570	/*
571	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
572	 * The alg reader lock needs to be held while allocating
573	 * the variable part (i.e. the algorithms) of the message.
574	 */
575
576	mutex_enter(&ipss->ipsec_alg_lock);
577
578	/*
579	 * Return only valid algorithms, so the number of algorithms
580	 * to send up may be less than the number of algorithm entries
581	 * in the table.
582	 */
583	authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
584	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
585		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
586			num_aalgs++;
587
588	/*
589	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
590	 * down the lock while filling it.
591	 */
592	if (num_aalgs != 0) {
593		allocsize += (num_aalgs * sizeof (*saalg));
594		allocsize += sizeof (*sasupp);
595	}
596	mp->b_cont = allocb(allocsize, BPRI_HI);
597	if (mp->b_cont == NULL) {
598		mutex_exit(&ipss->ipsec_alg_lock);
599		freemsg(mp);
600		return (B_FALSE);
601	}
602
603	mp->b_cont->b_wptr += allocsize;
604	nextext = (sadb_ext_t *)(mp->b_cont->b_rptr + sizeof (*samsg));
605
606	if (num_aalgs != 0) {
607
608		saalg = (sadb_alg_t *)(((uint8_t *)nextext) + sizeof (*sasupp));
609		ASSERT(((ulong_t)saalg & 0x7) == 0);
610
611		numalgs_snap = 0;
612		for (i = 0;
613		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
614		    i++) {
615			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
616				continue;
617
618			saalg->sadb_alg_id = authalgs[i]->alg_id;
619			saalg->sadb_alg_ivlen = 0;
620			saalg->sadb_alg_minbits = authalgs[i]->alg_ef_minbits;
621			saalg->sadb_alg_maxbits = authalgs[i]->alg_ef_maxbits;
622			saalg->sadb_x_alg_increment =
623			    authalgs[i]->alg_increment;
624			/* For now, salt is meaningless in AH. */
625			ASSERT(authalgs[i]->alg_saltlen == 0);
626			saalg->sadb_x_alg_saltbits =
627			    SADB_8TO1(authalgs[i]->alg_saltlen);
628			numalgs_snap++;
629			saalg++;
630		}
631		ASSERT(numalgs_snap == num_aalgs);
632#ifdef DEBUG
633		/*
634		 * Reality check to make sure I snagged all of the
635		 * algorithms.
636		 */
637		for (; i < IPSEC_MAX_ALGS; i++)
638			if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
639				cmn_err(CE_PANIC,
640				    "ah_register_out()!  Missed #%d.\n", i);
641#endif /* DEBUG */
642		nextext = (sadb_ext_t *)saalg;
643	}
644
645	mutex_exit(&ipss->ipsec_alg_lock);
646
647	if (sens_tsl != NULL) {
648		sens = (sadb_sens_t *)nextext;
649		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
650		    sens_tsl, sens_len);
651
652		nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
653	}
654
655	/* Now fill the restof the SADB_REGISTER message. */
656
657	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
658	samsg->sadb_msg_version = PF_KEY_V2;
659	samsg->sadb_msg_type = SADB_REGISTER;
660	samsg->sadb_msg_errno = 0;
661	samsg->sadb_msg_satype = SADB_SATYPE_AH;
662	samsg->sadb_msg_len = SADB_8TO64(allocsize);
663	samsg->sadb_msg_reserved = 0;
664	/*
665	 * Assume caller has sufficient sequence/pid number info.  If it's one
666	 * from me over a new alg., I could give two hoots about sequence.
667	 */
668	samsg->sadb_msg_seq = sequence;
669	samsg->sadb_msg_pid = pid;
670
671	if (num_aalgs != 0) {
672		sasupp = (sadb_supported_t *)(samsg + 1);
673		sasupp->sadb_supported_len = SADB_8TO64(
674		    sizeof (*sasupp) + sizeof (*saalg) * num_aalgs);
675		sasupp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
676		sasupp->sadb_supported_reserved = 0;
677	}
678
679	if (ahstack->ah_pfkey_q != NULL)
680		putnext(ahstack->ah_pfkey_q, mp);
681	else {
682		rc = B_FALSE;
683		freemsg(mp);
684	}
685
686	return (rc);
687}
688
689/*
690 * Invoked when the algorithm table changes. Causes SADB_REGISTER
691 * messages continaining the current list of algorithms to be
692 * sent up to the AH listeners.
693 */
694void
695ipsecah_algs_changed(netstack_t *ns)
696{
697	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
698
699	/*
700	 * Time to send a PF_KEY SADB_REGISTER message to AH listeners
701	 * everywhere.  (The function itself checks for NULL ah_pfkey_q.)
702	 */
703	(void) ah_register_out(0, 0, 0, ahstack, NULL);
704}
705
706/*
707 * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
708 * and send it into AH and IP again.
709 */
710static void
711inbound_task(void *arg)
712{
713	mblk_t		*mp = (mblk_t *)arg;
714	mblk_t		*async_mp;
715	ip_recv_attr_t	iras;
716
717	async_mp = mp;
718	mp = async_mp->b_cont;
719	async_mp->b_cont = NULL;
720	if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
721		/* The ill or ip_stack_t disappeared on us */
722		ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
723		freemsg(mp);
724		goto done;
725	}
726
727	ah_inbound_restart(mp, &iras);
728done:
729	ira_cleanup(&iras, B_TRUE);
730}
731
732/*
733 * Restart ESP after the SA has been added.
734 */
735static void
736ah_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
737{
738	ah_t		*ah;
739	netstack_t	*ns;
740	ipsecah_stack_t	*ahstack;
741
742	ns = ira->ira_ill->ill_ipst->ips_netstack;
743	ahstack = ns->netstack_ipsecah;
744
745	ASSERT(ahstack != NULL);
746	mp = ipsec_inbound_ah_sa(mp, ira, &ah);
747	if (mp == NULL)
748		return;
749
750	ASSERT(ah != NULL);
751	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
752	ASSERT(ira->ira_ipsec_ah_sa != NULL);
753
754	mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
755	if (mp == NULL) {
756		/*
757		 * Either it failed or is pending. In the former case
758		 * ipIfStatsInDiscards was increased.
759		 */
760		return;
761	}
762	ip_input_post_ipsec(mp, ira);
763}
764
765/*
766 * Now that weak-key passed, actually ADD the security association, and
767 * send back a reply ADD message.
768 */
769static int
770ah_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
771    int *diagnostic, ipsecah_stack_t *ahstack)
772{
773	isaf_t *primary = NULL, *secondary;
774	boolean_t clone = B_FALSE, is_inbound = B_FALSE;
775	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
776	ipsa_t *larval;
777	ipsacq_t *acqrec;
778	iacqf_t *acq_bucket;
779	mblk_t *acq_msgs = NULL;
780	mblk_t *lpkt;
781	int rc;
782	ipsa_query_t sq;
783	int error;
784	netstack_t	*ns = ahstack->ipsecah_netstack;
785	ipsec_stack_t	*ipss = ns->netstack_ipsec;
786
787	/*
788	 * Locate the appropriate table(s).
789	 */
790
791	sq.spp = &ahstack->ah_sadb;
792	error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
793	    IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
794	    &sq, diagnostic);
795	if (error)
796		return (error);
797
798	/*
799	 * Use the direction flags provided by the KMD to determine
800	 * if the inbound or outbound table should be the primary
801	 * for this SA. If these flags were absent then make this
802	 * decision based on the addresses.
803	 */
804	if (assoc->sadb_sa_flags & IPSA_F_INBOUND) {
805		primary = sq.inbound;
806		secondary = sq.outbound;
807		is_inbound = B_TRUE;
808		if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
809			clone = B_TRUE;
810	} else {
811		if (assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
812			primary = sq.outbound;
813			secondary = sq.inbound;
814		}
815	}
816	if (primary == NULL) {
817		/*
818		 * The KMD did not set a direction flag, determine which
819		 * table to insert the SA into based on addresses.
820		 */
821		switch (ksi->ks_in_dsttype) {
822		case KS_IN_ADDR_MBCAST:
823			clone = B_TRUE;	/* All mcast SAs can be bidirectional */
824			assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
825			/* FALLTHRU */
826		/*
827		 * If the source address is either one of mine, or unspecified
828		 * (which is best summed up by saying "not 'not mine'"),
829		 * then the association is potentially bi-directional,
830		 * in that it can be used for inbound traffic and outbound
831		 * traffic.  The best example of such and SA is a multicast
832		 * SA (which allows me to receive the outbound traffic).
833		 */
834		case KS_IN_ADDR_ME:
835			assoc->sadb_sa_flags |= IPSA_F_INBOUND;
836			primary = sq.inbound;
837			secondary = sq.outbound;
838			if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
839				clone = B_TRUE;
840			is_inbound = B_TRUE;
841			break;
842
843		/*
844		 * If the source address literally not mine (either
845		 * unspecified or not mine), then this SA may have an
846		 * address that WILL be mine after some configuration.
847		 * We pay the price for this by making it a bi-directional
848		 * SA.
849		 */
850		case KS_IN_ADDR_NOTME:
851			assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
852			primary = sq.outbound;
853			secondary = sq.inbound;
854			if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
855				assoc->sadb_sa_flags |= IPSA_F_INBOUND;
856				clone = B_TRUE;
857			}
858			break;
859		default:
860			*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
861			return (EINVAL);
862		}
863	}
864
865	/*
866	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
867	 * suits the needs of an ACQUIRE list entry, we can eliminate the
868	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
869	 * high-bit of the sequence number to queue it.  Key off destination
870	 * addr, and change acqrec's state.
871	 */
872
873	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
874		acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
875		mutex_enter(&acq_bucket->iacqf_lock);
876		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
877		    acqrec = acqrec->ipsacq_next) {
878			mutex_enter(&acqrec->ipsacq_lock);
879			/*
880			 * Q:  I only check sequence.  Should I check dst?
881			 * A: Yes, check dest because those are the packets
882			 *    that are queued up.
883			 */
884			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
885			    IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
886			    acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
887				break;
888			mutex_exit(&acqrec->ipsacq_lock);
889		}
890		if (acqrec != NULL) {
891			/*
892			 * AHA!  I found an ACQUIRE record for this SA.
893			 * Grab the msg list, and free the acquire record.
894			 * I already am holding the lock for this record,
895			 * so all I have to do is free it.
896			 */
897			acq_msgs = acqrec->ipsacq_mp;
898			acqrec->ipsacq_mp = NULL;
899			mutex_exit(&acqrec->ipsacq_lock);
900			sadb_destroy_acquire(acqrec, ns);
901		}
902		mutex_exit(&acq_bucket->iacqf_lock);
903	}
904
905	/*
906	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
907	 * in larval list (if there).
908	 */
909
910	larval = NULL;
911
912	if (samsg->sadb_msg_type == SADB_UPDATE) {
913		mutex_enter(&sq.inbound->isaf_lock);
914		larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
915		    ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
916		mutex_exit(&sq.inbound->isaf_lock);
917
918		if ((larval == NULL) ||
919		    (larval->ipsa_state != IPSA_STATE_LARVAL)) {
920			*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
921			if (larval != NULL) {
922				IPSA_REFRELE(larval);
923			}
924			ah0dbg(("Larval update, but larval disappeared.\n"));
925			return (ESRCH);
926		} /* Else sadb_common_add unlinks it for me! */
927	}
928
929	if (larval != NULL) {
930		/*
931		 * Hold again, because sadb_common_add() consumes a reference,
932		 * and we don't want to clear_lpkt() without a reference.
933		 */
934		IPSA_REFHOLD(larval);
935	}
936
937	rc = sadb_common_add(ahstack->ah_pfkey_q, mp,
938	    samsg, ksi, primary, secondary, larval, clone, is_inbound,
939	    diagnostic, ns, &ahstack->ah_sadb);
940
941	if (larval != NULL) {
942		if (rc == 0) {
943			lpkt = sadb_clear_lpkt(larval);
944			if (lpkt != NULL) {
945				rc = !taskq_dispatch(ah_taskq, inbound_task,
946				    lpkt, TQ_NOSLEEP);
947			}
948		}
949		IPSA_REFRELE(larval);
950	}
951
952	/*
953	 * How much more stack will I create with all of these
954	 * ah_outbound_*() calls?
955	 */
956
957	/* Handle the packets queued waiting for the SA */
958	while (acq_msgs != NULL) {
959		mblk_t		*asyncmp;
960		mblk_t		*data_mp;
961		ip_xmit_attr_t	ixas;
962		ill_t		*ill;
963
964		asyncmp = acq_msgs;
965		acq_msgs = acq_msgs->b_next;
966		asyncmp->b_next = NULL;
967
968		/*
969		 * Extract the ip_xmit_attr_t from the first mblk.
970		 * Verifies that the netstack and ill is still around; could
971		 * have vanished while iked was doing its work.
972		 * On succesful return we have a nce_t and the ill/ipst can't
973		 * disappear until we do the nce_refrele in ixa_cleanup.
974		 */
975		data_mp = asyncmp->b_cont;
976		asyncmp->b_cont = NULL;
977		if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
978			AH_BUMP_STAT(ahstack, out_discards);
979			ip_drop_packet(data_mp, B_FALSE, NULL,
980			    DROPPER(ipss, ipds_sadb_acquire_timeout),
981			    &ahstack->ah_dropper);
982		} else if (rc != 0) {
983			ill = ixas.ixa_nce->nce_ill;
984			AH_BUMP_STAT(ahstack, out_discards);
985			ip_drop_packet(data_mp, B_FALSE, ill,
986			    DROPPER(ipss, ipds_sadb_acquire_timeout),
987			    &ahstack->ah_dropper);
988			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
989		} else {
990			ah_outbound_finish(data_mp, &ixas);
991		}
992		ixa_cleanup(&ixas);
993	}
994
995	return (rc);
996}
997
998
999/*
1000 * Process one of the queued messages (from ipsacq_mp) once the SA
1001 * has been added.
1002 */
1003static void
1004ah_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
1005{
1006	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
1007	ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
1008	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1009	ill_t		*ill = ixa->ixa_nce->nce_ill;
1010
1011	if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
1012		AH_BUMP_STAT(ahstack, out_discards);
1013		ip_drop_packet(data_mp, B_FALSE, ill,
1014		    DROPPER(ipss, ipds_sadb_acquire_timeout),
1015		    &ahstack->ah_dropper);
1016		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
1017		return;
1018	}
1019
1020	data_mp = ah_outbound(data_mp, ixa);
1021	if (data_mp == NULL)
1022		return;
1023
1024	(void) ip_output_post_ipsec(data_mp, ixa);
1025}
1026
1027/*
1028 * Add new AH security association.  This may become a generic AH/ESP
1029 * routine eventually.
1030 */
1031static int
1032ah_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
1033{
1034	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1035	sadb_address_t *srcext =
1036	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1037	sadb_address_t *dstext =
1038	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1039	sadb_address_t *isrcext =
1040	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
1041	sadb_address_t *idstext =
1042	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
1043	sadb_key_t *key = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
1044	struct sockaddr_in *src, *dst;
1045	/* We don't need sockaddr_in6 for now. */
1046	sadb_lifetime_t *soft =
1047	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
1048	sadb_lifetime_t *hard =
1049	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
1050	sadb_lifetime_t *idle =
1051	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
1052	ipsec_alginfo_t *aalg;
1053	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1054	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1055
1056	/* I need certain extensions present for an ADD message. */
1057	if (srcext == NULL) {
1058		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
1059		return (EINVAL);
1060	}
1061	if (dstext == NULL) {
1062		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1063		return (EINVAL);
1064	}
1065	if (isrcext == NULL && idstext != NULL) {
1066		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
1067		return (EINVAL);
1068	}
1069	if (isrcext != NULL && idstext == NULL) {
1070		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
1071		return (EINVAL);
1072	}
1073	if (assoc == NULL) {
1074		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1075		return (EINVAL);
1076	}
1077	if (key == NULL) {
1078		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_AKEY;
1079		return (EINVAL);
1080	}
1081
1082	src = (struct sockaddr_in *)(srcext + 1);
1083	dst = (struct sockaddr_in *)(dstext + 1);
1084
1085	/* Sundry ADD-specific reality checks. */
1086	/* XXX STATS : Logging/stats here? */
1087
1088	if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
1089	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
1090		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
1091		return (EINVAL);
1092	}
1093	if (assoc->sadb_sa_encrypt != SADB_EALG_NONE) {
1094		*diagnostic = SADB_X_DIAGNOSTIC_ENCR_NOTSUPP;
1095		return (EINVAL);
1096	}
1097	if (assoc->sadb_sa_flags & ~ahstack->ah_sadb.s_addflags) {
1098		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
1099		return (EINVAL);
1100	}
1101	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0)
1102		return (EINVAL);
1103
1104	ASSERT(src->sin_family == dst->sin_family);
1105
1106	/* Stuff I don't support, for now.  XXX Diagnostic? */
1107	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
1108		return (EOPNOTSUPP);
1109
1110	if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL) {
1111		if (!is_system_labeled())
1112			return (EOPNOTSUPP);
1113	}
1114
1115	if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL) {
1116		if (!is_system_labeled())
1117			return (EOPNOTSUPP);
1118	}
1119	/*
1120	 * XXX Policy : I'm not checking identities at this time, but
1121	 * if I did, I'd do them here, before I sent the weak key
1122	 * check up to the algorithm.
1123	 */
1124
1125	/* verify that there is a mapping for the specified algorithm */
1126	mutex_enter(&ipss->ipsec_alg_lock);
1127	aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH][assoc->sadb_sa_auth];
1128	if (aalg == NULL || !ALG_VALID(aalg)) {
1129		mutex_exit(&ipss->ipsec_alg_lock);
1130		ah1dbg(ahstack, ("Couldn't find auth alg #%d.\n",
1131		    assoc->sadb_sa_auth));
1132		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
1133		return (EINVAL);
1134	}
1135	ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
1136
1137	/* sanity check key sizes */
1138	if (!ipsec_valid_key_size(key->sadb_key_bits, aalg)) {
1139		mutex_exit(&ipss->ipsec_alg_lock);
1140		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
1141		return (EINVAL);
1142	}
1143
1144	/* check key and fix parity if needed */
1145	if (ipsec_check_key(aalg->alg_mech_type, key, B_TRUE,
1146	    diagnostic) != 0) {
1147		mutex_exit(&ipss->ipsec_alg_lock);
1148		return (EINVAL);
1149	}
1150
1151	mutex_exit(&ipss->ipsec_alg_lock);
1152
1153	return (ah_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
1154	    diagnostic, ahstack));
1155}
1156
1157/* Refactor me */
1158/*
1159 * Update a security association.  Updates come in two varieties.  The first
1160 * is an update of lifetimes on a non-larval SA.  The second is an update of
1161 * a larval SA, which ends up looking a lot more like an add.
1162 */
1163static int
1164ah_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1165    ipsecah_stack_t *ahstack, uint8_t sadb_msg_type)
1166{
1167	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1168	sadb_address_t *dstext =
1169	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1170	mblk_t	*buf_pkt;
1171	int rcode;
1172
1173	if (dstext == NULL) {
1174		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
1175		return (EINVAL);
1176	}
1177
1178	rcode = sadb_update_sa(mp, ksi, &buf_pkt, &ahstack->ah_sadb,
1179	    diagnostic, ahstack->ah_pfkey_q, ah_add_sa,
1180	    ahstack->ipsecah_netstack, sadb_msg_type);
1181
1182	if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
1183	    (rcode != 0)) {
1184		return (rcode);
1185	}
1186
1187	HANDLE_BUF_PKT(ah_taskq, ahstack->ipsecah_netstack->netstack_ipsec,
1188	    ahstack->ah_dropper, buf_pkt);
1189
1190	return (rcode);
1191}
1192
1193/* Refactor me */
1194/*
1195 * Delete a security association.  This is REALLY likely to be code common to
1196 * both AH and ESP.  Find the association, then unlink it.
1197 */
1198static int
1199ah_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
1200    ipsecah_stack_t *ahstack, uint8_t sadb_msg_type)
1201{
1202	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
1203	sadb_address_t *dstext =
1204	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
1205	sadb_address_t *srcext =
1206	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
1207	struct sockaddr_in *sin;
1208
1209	if (assoc == NULL) {
1210		if (dstext != NULL)
1211			sin = (struct sockaddr_in *)(dstext + 1);
1212		else if (srcext != NULL)
1213			sin = (struct sockaddr_in *)(srcext + 1);
1214		else {
1215			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
1216			return (EINVAL);
1217		}
1218		return (sadb_purge_sa(mp, ksi,
1219		    (sin->sin_family == AF_INET6) ? &ahstack->ah_sadb.s_v6 :
1220		    &ahstack->ah_sadb.s_v4, diagnostic, ahstack->ah_pfkey_q));
1221	}
1222
1223	return (sadb_delget_sa(mp, ksi, &ahstack->ah_sadb, diagnostic,
1224	    ahstack->ah_pfkey_q, sadb_msg_type));
1225}
1226
1227/* Refactor me */
1228/*
1229 * Convert the entire contents of all of AH's SA tables into PF_KEY SADB_DUMP
1230 * messages.
1231 */
1232static void
1233ah_dump(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1234{
1235	int error;
1236	sadb_msg_t *samsg;
1237
1238	/*
1239	 * Dump each fanout, bailing if error is non-zero.
1240	 */
1241
1242	error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi, &ahstack->ah_sadb.s_v4);
1243	if (error != 0)
1244		goto bail;
1245
1246	error = sadb_dump(ahstack->ah_pfkey_q, mp, ksi, &ahstack->ah_sadb.s_v6);
1247bail:
1248	ASSERT(mp->b_cont != NULL);
1249	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1250	samsg->sadb_msg_errno = (uint8_t)error;
1251	sadb_pfkey_echo(ahstack->ah_pfkey_q, mp,
1252	    (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
1253}
1254
1255/*
1256 * First-cut reality check for an inbound PF_KEY message.
1257 */
1258static boolean_t
1259ah_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
1260    ipsecah_stack_t *ahstack)
1261{
1262	int diagnostic;
1263
1264	if (mp->b_cont == NULL) {
1265		freemsg(mp);
1266		return (B_TRUE);
1267	}
1268
1269	if (ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1270		diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
1271		goto badmsg;
1272	}
1273	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
1274		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
1275		goto badmsg;
1276	}
1277	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
1278	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
1279		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
1280		goto badmsg;
1281	}
1282	return (B_FALSE);	/* False ==> no failures */
1283
1284badmsg:
1285	sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1286	    diagnostic, ksi->ks_in_serial);
1287	return (B_TRUE);	/* True ==> failures */
1288}
1289
1290/*
1291 * AH parsing of PF_KEY messages.  Keysock did most of the really silly
1292 * error cases.  What I receive is a fully-formed, syntactically legal
1293 * PF_KEY message.  I then need to check semantics...
1294 *
1295 * This code may become common to AH and ESP.  Stay tuned.
1296 *
1297 * I also make the assumption that db_ref's are cool.  If this assumption
1298 * is wrong, this means that someone other than keysock or me has been
1299 * mucking with PF_KEY messages.
1300 */
1301static void
1302ah_parse_pfkey(mblk_t *mp, ipsecah_stack_t *ahstack)
1303{
1304	mblk_t *msg = mp->b_cont;
1305	sadb_msg_t *samsg;
1306	keysock_in_t *ksi;
1307	int error;
1308	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
1309
1310	ASSERT(msg != NULL);
1311
1312	samsg = (sadb_msg_t *)msg->b_rptr;
1313	ksi = (keysock_in_t *)mp->b_rptr;
1314
1315	/*
1316	 * If applicable, convert unspecified AF_INET6 to unspecified
1317	 * AF_INET.
1318	 */
1319	if (!sadb_addrfix(ksi, ahstack->ah_pfkey_q, mp,
1320	    ahstack->ipsecah_netstack) ||
1321	    ah_pfkey_reality_failures(mp, ksi, ahstack)) {
1322		return;
1323	}
1324
1325	switch (samsg->sadb_msg_type) {
1326	case SADB_ADD:
1327		error = ah_add_sa(mp, ksi, &diagnostic,
1328		    ahstack->ipsecah_netstack);
1329		if (error != 0) {
1330			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1331			    diagnostic, ksi->ks_in_serial);
1332		}
1333		/* else ah_add_sa() took care of things. */
1334		break;
1335	case SADB_DELETE:
1336	case SADB_X_DELPAIR:
1337	case SADB_X_DELPAIR_STATE:
1338		error = ah_del_sa(mp, ksi, &diagnostic, ahstack,
1339		    samsg->sadb_msg_type);
1340		if (error != 0) {
1341			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1342			    diagnostic, ksi->ks_in_serial);
1343		}
1344		/* Else ah_del_sa() took care of things. */
1345		break;
1346	case SADB_GET:
1347		error = sadb_delget_sa(mp, ksi, &ahstack->ah_sadb, &diagnostic,
1348		    ahstack->ah_pfkey_q, samsg->sadb_msg_type);
1349		if (error != 0) {
1350			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1351			    diagnostic, ksi->ks_in_serial);
1352		}
1353		/* Else sadb_get_sa() took care of things. */
1354		break;
1355	case SADB_FLUSH:
1356		sadbp_flush(&ahstack->ah_sadb, ahstack->ipsecah_netstack);
1357		sadb_pfkey_echo(ahstack->ah_pfkey_q, mp, samsg, ksi, NULL);
1358		break;
1359	case SADB_REGISTER:
1360		/*
1361		 * Hmmm, let's do it!  Check for extensions (there should
1362		 * be none), extract the fields, call ah_register_out(),
1363		 * then either free or report an error.
1364		 *
1365		 * Keysock takes care of the PF_KEY bookkeeping for this.
1366		 */
1367		if (ah_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
1368		    ksi->ks_in_serial, ahstack, msg_getcred(mp, NULL))) {
1369			freemsg(mp);
1370		} else {
1371			/*
1372			 * Only way this path hits is if there is a memory
1373			 * failure.  It will not return B_FALSE because of
1374			 * lack of ah_pfkey_q if I am in wput().
1375			 */
1376			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM,
1377			    diagnostic, ksi->ks_in_serial);
1378		}
1379		break;
1380	case SADB_UPDATE:
1381	case SADB_X_UPDATEPAIR:
1382		/*
1383		 * Find a larval, if not there, find a full one and get
1384		 * strict.
1385		 */
1386		error = ah_update_sa(mp, ksi, &diagnostic, ahstack,
1387		    samsg->sadb_msg_type);
1388		if (error != 0) {
1389			sadb_pfkey_error(ahstack->ah_pfkey_q, mp, error,
1390			    diagnostic, ksi->ks_in_serial);
1391		}
1392		/* else ah_update_sa() took care of things. */
1393		break;
1394	case SADB_GETSPI:
1395		/*
1396		 * Reserve a new larval entry.
1397		 */
1398		ah_getspi(mp, ksi, ahstack);
1399		break;
1400	case SADB_ACQUIRE:
1401		/*
1402		 * Find larval and/or ACQUIRE record and kill it (them), I'm
1403		 * most likely an error.  Inbound ACQUIRE messages should only
1404		 * have the base header.
1405		 */
1406		sadb_in_acquire(samsg, &ahstack->ah_sadb, ahstack->ah_pfkey_q,
1407		    ahstack->ipsecah_netstack);
1408		freemsg(mp);
1409		break;
1410	case SADB_DUMP:
1411		/*
1412		 * Dump all entries.
1413		 */
1414		ah_dump(mp, ksi, ahstack);
1415		/* ah_dump will take care of the return message, etc. */
1416		break;
1417	case SADB_EXPIRE:
1418		/* Should never reach me. */
1419		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EOPNOTSUPP,
1420		    diagnostic, ksi->ks_in_serial);
1421		break;
1422	default:
1423		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL,
1424		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
1425		break;
1426	}
1427}
1428
1429/*
1430 * Handle case where PF_KEY says it can't find a keysock for one of my
1431 * ACQUIRE messages.
1432 */
1433static void
1434ah_keysock_no_socket(mblk_t *mp, ipsecah_stack_t *ahstack)
1435{
1436	sadb_msg_t *samsg;
1437	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
1438
1439	if (mp->b_cont == NULL) {
1440		freemsg(mp);
1441		return;
1442	}
1443	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
1444
1445	/*
1446	 * If keysock can't find any registered, delete the acquire record
1447	 * immediately, and handle errors.
1448	 */
1449	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
1450		samsg->sadb_msg_errno = kse->ks_err_errno;
1451		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1452		/*
1453		 * Use the write-side of the ah_pfkey_q
1454		 */
1455		sadb_in_acquire(samsg, &ahstack->ah_sadb,
1456		    WR(ahstack->ah_pfkey_q), ahstack->ipsecah_netstack);
1457	}
1458
1459	freemsg(mp);
1460}
1461
1462/*
1463 * AH module write put routine.
1464 */
1465static void
1466ipsecah_wput(queue_t *q, mblk_t *mp)
1467{
1468	ipsec_info_t *ii;
1469	struct iocblk *iocp;
1470	ipsecah_stack_t	*ahstack = (ipsecah_stack_t *)q->q_ptr;
1471
1472	ah3dbg(ahstack, ("In ah_wput().\n"));
1473
1474	/* NOTE:  Each case must take care of freeing or passing mp. */
1475	switch (mp->b_datap->db_type) {
1476	case M_CTL:
1477		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
1478			/* Not big enough message. */
1479			freemsg(mp);
1480			break;
1481		}
1482		ii = (ipsec_info_t *)mp->b_rptr;
1483
1484		switch (ii->ipsec_info_type) {
1485		case KEYSOCK_OUT_ERR:
1486			ah1dbg(ahstack, ("Got KEYSOCK_OUT_ERR message.\n"));
1487			ah_keysock_no_socket(mp, ahstack);
1488			break;
1489		case KEYSOCK_IN:
1490			AH_BUMP_STAT(ahstack, keysock_in);
1491			ah3dbg(ahstack, ("Got KEYSOCK_IN message.\n"));
1492
1493			/* Parse the message. */
1494			ah_parse_pfkey(mp, ahstack);
1495			break;
1496		case KEYSOCK_HELLO:
1497			sadb_keysock_hello(&ahstack->ah_pfkey_q, q, mp,
1498			    ah_ager, (void *)ahstack, &ahstack->ah_event,
1499			    SADB_SATYPE_AH);
1500			break;
1501		default:
1502			ah1dbg(ahstack, ("Got M_CTL from above of 0x%x.\n",
1503			    ii->ipsec_info_type));
1504			freemsg(mp);
1505			break;
1506		}
1507		break;
1508	case M_IOCTL:
1509		iocp = (struct iocblk *)mp->b_rptr;
1510		switch (iocp->ioc_cmd) {
1511		case ND_SET:
1512		case ND_GET:
1513			if (nd_getset(q, ahstack->ipsecah_g_nd, mp)) {
1514				qreply(q, mp);
1515				return;
1516			} else {
1517				iocp->ioc_error = ENOENT;
1518			}
1519			/* FALLTHRU */
1520		default:
1521			/* We really don't support any other ioctls, do we? */
1522
1523			/* Return EINVAL */
1524			if (iocp->ioc_error != ENOENT)
1525				iocp->ioc_error = EINVAL;
1526			iocp->ioc_count = 0;
1527			mp->b_datap->db_type = M_IOCACK;
1528			qreply(q, mp);
1529			return;
1530		}
1531	default:
1532		ah3dbg(ahstack,
1533		    ("Got default message, type %d, passing to IP.\n",
1534		    mp->b_datap->db_type));
1535		putnext(q, mp);
1536	}
1537}
1538
1539/* Refactor me */
1540/*
1541 * Updating use times can be tricky business if the ipsa_haspeer flag is
1542 * set.  This function is called once in an SA's lifetime.
1543 *
1544 * Caller has to REFRELE "assoc" which is passed in.  This function has
1545 * to REFRELE any peer SA that is obtained.
1546 */
1547static void
1548ah_set_usetime(ipsa_t *assoc, boolean_t inbound)
1549{
1550	ipsa_t *inassoc, *outassoc;
1551	isaf_t *bucket;
1552	sadb_t *sp;
1553	int outhash;
1554	boolean_t isv6;
1555	netstack_t	*ns = assoc->ipsa_netstack;
1556	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1557
1558	/* No peer?  No problem! */
1559	if (!assoc->ipsa_haspeer) {
1560		sadb_set_usetime(assoc);
1561		return;
1562	}
1563
1564	/*
1565	 * Otherwise, we want to grab both the original assoc and its peer.
1566	 * There might be a race for this, but if it's a real race, the times
1567	 * will be out-of-synch by at most a second, and since our time
1568	 * granularity is a second, this won't be a problem.
1569	 *
1570	 * If we need tight synchronization on the peer SA, then we need to
1571	 * reconsider.
1572	 */
1573
1574	/* Use address family to select IPv6/IPv4 */
1575	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1576	if (isv6) {
1577		sp = &ahstack->ah_sadb.s_v6;
1578	} else {
1579		sp = &ahstack->ah_sadb.s_v4;
1580		ASSERT(assoc->ipsa_addrfam == AF_INET);
1581	}
1582	if (inbound) {
1583		inassoc = assoc;
1584		if (isv6)
1585			outhash = OUTBOUND_HASH_V6(sp,
1586			    *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1587		else
1588			outhash = OUTBOUND_HASH_V4(sp,
1589			    *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1590		bucket = &sp->sdb_of[outhash];
1591
1592		mutex_enter(&bucket->isaf_lock);
1593		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1594		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1595		    inassoc->ipsa_addrfam);
1596		mutex_exit(&bucket->isaf_lock);
1597		if (outassoc == NULL) {
1598			/* Q: Do we wish to set haspeer == B_FALSE? */
1599			ah0dbg(("ah_set_usetime: "
1600			    "can't find peer for inbound.\n"));
1601			sadb_set_usetime(inassoc);
1602			return;
1603		}
1604	} else {
1605		outassoc = assoc;
1606		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1607		mutex_enter(&bucket->isaf_lock);
1608		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1609		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1610		    outassoc->ipsa_addrfam);
1611		mutex_exit(&bucket->isaf_lock);
1612		if (inassoc == NULL) {
1613			/* Q: Do we wish to set haspeer == B_FALSE? */
1614			ah0dbg(("ah_set_usetime: "
1615			    "can't find peer for outbound.\n"));
1616			sadb_set_usetime(outassoc);
1617			return;
1618		}
1619	}
1620
1621	/* Update usetime on both. */
1622	sadb_set_usetime(inassoc);
1623	sadb_set_usetime(outassoc);
1624
1625	/*
1626	 * REFRELE any peer SA.
1627	 *
1628	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1629	 * them in { }.
1630	 */
1631	if (inbound) {
1632		IPSA_REFRELE(outassoc);
1633	} else {
1634		IPSA_REFRELE(inassoc);
1635	}
1636}
1637
1638/* Refactor me */
1639/*
1640 * Add a number of bytes to what the SA has protected so far.  Return
1641 * B_TRUE if the SA can still protect that many bytes.
1642 *
1643 * Caller must REFRELE the passed-in assoc.  This function must REFRELE
1644 * any obtained peer SA.
1645 */
1646static boolean_t
1647ah_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
1648{
1649	ipsa_t *inassoc, *outassoc;
1650	isaf_t *bucket;
1651	boolean_t inrc, outrc, isv6;
1652	sadb_t *sp;
1653	int outhash;
1654	netstack_t	*ns = assoc->ipsa_netstack;
1655	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1656
1657	/* No peer?  No problem! */
1658	if (!assoc->ipsa_haspeer) {
1659		return (sadb_age_bytes(ahstack->ah_pfkey_q, assoc, bytes,
1660		    B_TRUE));
1661	}
1662
1663	/*
1664	 * Otherwise, we want to grab both the original assoc and its peer.
1665	 * There might be a race for this, but if it's a real race, two
1666	 * expire messages may occur.  We limit this by only sending the
1667	 * expire message on one of the peers, we'll pick the inbound
1668	 * arbitrarily.
1669	 *
1670	 * If we need tight synchronization on the peer SA, then we need to
1671	 * reconsider.
1672	 */
1673
1674	/* Pick v4/v6 bucket based on addrfam. */
1675	isv6 = (assoc->ipsa_addrfam == AF_INET6);
1676	if (isv6) {
1677		sp = &ahstack->ah_sadb.s_v6;
1678	} else {
1679		sp = &ahstack->ah_sadb.s_v4;
1680		ASSERT(assoc->ipsa_addrfam == AF_INET);
1681	}
1682	if (inbound) {
1683		inassoc = assoc;
1684		if (isv6)
1685			outhash = OUTBOUND_HASH_V6(sp,
1686			    *((in6_addr_t *)&inassoc->ipsa_dstaddr));
1687		else
1688			outhash = OUTBOUND_HASH_V4(sp,
1689			    *((ipaddr_t *)&inassoc->ipsa_dstaddr));
1690		bucket = &sp->sdb_of[outhash];
1691		mutex_enter(&bucket->isaf_lock);
1692		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
1693		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
1694		    inassoc->ipsa_addrfam);
1695		mutex_exit(&bucket->isaf_lock);
1696		if (outassoc == NULL) {
1697			/* Q: Do we wish to set haspeer == B_FALSE? */
1698			ah0dbg(("ah_age_bytes: "
1699			    "can't find peer for inbound.\n"));
1700			return (sadb_age_bytes(ahstack->ah_pfkey_q, inassoc,
1701			    bytes, B_TRUE));
1702		}
1703	} else {
1704		outassoc = assoc;
1705		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
1706		mutex_enter(&bucket->isaf_lock);
1707		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
1708		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
1709		    outassoc->ipsa_addrfam);
1710		mutex_exit(&bucket->isaf_lock);
1711		if (inassoc == NULL) {
1712			/* Q: Do we wish to set haspeer == B_FALSE? */
1713			ah0dbg(("ah_age_bytes: "
1714			    "can't find peer for outbound.\n"));
1715			return (sadb_age_bytes(ahstack->ah_pfkey_q, outassoc,
1716			    bytes, B_TRUE));
1717		}
1718	}
1719
1720	inrc = sadb_age_bytes(ahstack->ah_pfkey_q, inassoc, bytes, B_TRUE);
1721	outrc = sadb_age_bytes(ahstack->ah_pfkey_q, outassoc, bytes, B_FALSE);
1722
1723	/*
1724	 * REFRELE any peer SA.
1725	 *
1726	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
1727	 * them in { }.
1728	 */
1729	if (inbound) {
1730		IPSA_REFRELE(outassoc);
1731	} else {
1732		IPSA_REFRELE(inassoc);
1733	}
1734
1735	return (inrc && outrc);
1736}
1737
1738/*
1739 * Perform the really difficult work of inserting the proposed situation.
1740 * Called while holding the algorithm lock.
1741 */
1742static void
1743ah_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs,
1744    netstack_t *ns)
1745{
1746	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
1747	ipsec_action_t *ap;
1748	ipsec_prot_t *prot;
1749	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1750	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1751
1752	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1753
1754	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
1755	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
1756	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
1757
1758	prop->sadb_prop_replay = ahstack->ipsecah_replay_size;
1759
1760	/*
1761	 * Based upon algorithm properties, and what-not, prioritize a
1762	 * proposal, based on the ordering of the AH algorithms in the
1763	 * alternatives in the policy rule or socket that was placed
1764	 * in the acquire record.
1765	 */
1766
1767	for (ap = acqrec->ipsacq_act; ap != NULL;
1768	    ap = ap->ipa_next) {
1769		ipsec_alginfo_t *aalg;
1770
1771		if ((ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY) ||
1772		    (!ap->ipa_act.ipa_apply.ipp_use_ah))
1773			continue;
1774
1775		prot = &ap->ipa_act.ipa_apply;
1776
1777		ASSERT(prot->ipp_auth_alg > 0);
1778
1779		aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
1780		    [prot->ipp_auth_alg];
1781		if (aalg == NULL || !ALG_VALID(aalg))
1782			continue;
1783
1784		/* XXX check aalg for duplicates??.. */
1785
1786		comb->sadb_comb_flags = 0;
1787		comb->sadb_comb_reserved = 0;
1788		comb->sadb_comb_encrypt = 0;
1789		comb->sadb_comb_encrypt_minbits = 0;
1790		comb->sadb_comb_encrypt_maxbits = 0;
1791
1792		comb->sadb_comb_auth = aalg->alg_id;
1793		comb->sadb_comb_auth_minbits =
1794		    MAX(prot->ipp_ah_minbits, aalg->alg_ef_minbits);
1795		comb->sadb_comb_auth_maxbits =
1796		    MIN(prot->ipp_ah_maxbits, aalg->alg_ef_maxbits);
1797
1798		/*
1799		 * The following may be based on algorithm
1800		 * properties, but in the meantime, we just pick
1801		 * some good, sensible numbers.  Key mgmt. can
1802		 * (and perhaps should) be the place to finalize
1803		 * such decisions.
1804		 */
1805
1806		/*
1807		 * No limits on allocations, since we really don't
1808		 * support that concept currently.
1809		 */
1810		comb->sadb_comb_soft_allocations = 0;
1811		comb->sadb_comb_hard_allocations = 0;
1812
1813		/*
1814		 * These may want to come from policy rule..
1815		 */
1816		comb->sadb_comb_soft_bytes =
1817		    ahstack->ipsecah_default_soft_bytes;
1818		comb->sadb_comb_hard_bytes =
1819		    ahstack->ipsecah_default_hard_bytes;
1820		comb->sadb_comb_soft_addtime =
1821		    ahstack->ipsecah_default_soft_addtime;
1822		comb->sadb_comb_hard_addtime =
1823		    ahstack->ipsecah_default_hard_addtime;
1824		comb->sadb_comb_soft_usetime =
1825		    ahstack->ipsecah_default_soft_usetime;
1826		comb->sadb_comb_hard_usetime =
1827		    ahstack->ipsecah_default_hard_usetime;
1828
1829		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
1830		if (--combs == 0)
1831			return;	/* out of space.. */
1832		comb++;
1833	}
1834}
1835
1836/*
1837 * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
1838 */
1839static void
1840ah_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns)
1841{
1842	uint_t combs;
1843	sadb_msg_t *samsg;
1844	sadb_prop_t *prop;
1845	mblk_t *pfkeymp, *msgmp;
1846	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
1847	ipsec_stack_t	*ipss = ns->netstack_ipsec;
1848
1849	AH_BUMP_STAT(ahstack, acquire_requests);
1850
1851	if (ahstack->ah_pfkey_q == NULL) {
1852		mutex_exit(&acqrec->ipsacq_lock);
1853		return;
1854	}
1855
1856	/* Set up ACQUIRE. */
1857	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_AH,
1858	    ns->netstack_ipsec);
1859	if (pfkeymp == NULL) {
1860		ah0dbg(("sadb_setup_acquire failed.\n"));
1861		mutex_exit(&acqrec->ipsacq_lock);
1862		return;
1863	}
1864	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
1865	combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
1866	msgmp = pfkeymp->b_cont;
1867	samsg = (sadb_msg_t *)(msgmp->b_rptr);
1868
1869	/* Insert proposal here. */
1870
1871	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
1872	ah_insert_prop(prop, acqrec, combs, ns);
1873	samsg->sadb_msg_len += prop->sadb_prop_len;
1874	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
1875
1876	mutex_exit(&ipss->ipsec_alg_lock);
1877
1878	/*
1879	 * Must mutex_exit() before sending PF_KEY message up, in
1880	 * order to avoid recursive mutex_enter() if there are no registered
1881	 * listeners.
1882	 *
1883	 * Once I've sent the message, I'm cool anyway.
1884	 */
1885	mutex_exit(&acqrec->ipsacq_lock);
1886	if (extended != NULL) {
1887		putnext(ahstack->ah_pfkey_q, extended);
1888	}
1889	putnext(ahstack->ah_pfkey_q, pfkeymp);
1890}
1891
1892/* Refactor me */
1893/*
1894 * Handle the SADB_GETSPI message.  Create a larval SA.
1895 */
1896static void
1897ah_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecah_stack_t *ahstack)
1898{
1899	ipsa_t *newbie, *target;
1900	isaf_t *outbound, *inbound;
1901	int rc, diagnostic;
1902	sadb_sa_t *assoc;
1903	keysock_out_t *kso;
1904	uint32_t newspi;
1905
1906	/*
1907	 * Randomly generate a proposed SPI value.
1908	 */
1909	if (cl_inet_getspi != NULL) {
1910		cl_inet_getspi(ahstack->ipsecah_netstack->netstack_stackid,
1911		    IPPROTO_AH, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
1912	} else {
1913		(void) random_get_pseudo_bytes((uint8_t *)&newspi,
1914		    sizeof (uint32_t));
1915	}
1916	newbie = sadb_getspi(ksi, newspi, &diagnostic,
1917	    ahstack->ipsecah_netstack, IPPROTO_AH);
1918
1919	if (newbie == NULL) {
1920		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, ENOMEM, diagnostic,
1921		    ksi->ks_in_serial);
1922		return;
1923	} else if (newbie == (ipsa_t *)-1) {
1924		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, EINVAL, diagnostic,
1925		    ksi->ks_in_serial);
1926		return;
1927	}
1928
1929	/*
1930	 * XXX - We may randomly collide.  We really should recover from this.
1931	 *	 Unfortunately, that could require spending way-too-much-time
1932	 *	 in here.  For now, let the user retry.
1933	 */
1934
1935	if (newbie->ipsa_addrfam == AF_INET6) {
1936		outbound = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6,
1937		    *(uint32_t *)(newbie->ipsa_dstaddr));
1938		inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v6,
1939		    newbie->ipsa_spi);
1940	} else {
1941		outbound = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4,
1942		    *(uint32_t *)(newbie->ipsa_dstaddr));
1943		inbound = INBOUND_BUCKET(&ahstack->ah_sadb.s_v4,
1944		    newbie->ipsa_spi);
1945	}
1946
1947	mutex_enter(&outbound->isaf_lock);
1948	mutex_enter(&inbound->isaf_lock);
1949
1950	/*
1951	 * Check for collisions (i.e. did sadb_getspi() return with something
1952	 * that already exists?).
1953	 *
1954	 * Try outbound first.  Even though SADB_GETSPI is traditionally
1955	 * for inbound SAs, you never know what a user might do.
1956	 */
1957	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
1958	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
1959	if (target == NULL) {
1960		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
1961		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
1962		    newbie->ipsa_addrfam);
1963	}
1964
1965	/*
1966	 * I don't have collisions elsewhere!
1967	 * (Nor will I because I'm still holding inbound/outbound locks.)
1968	 */
1969
1970	if (target != NULL) {
1971		rc = EEXIST;
1972		IPSA_REFRELE(target);
1973	} else {
1974		/*
1975		 * sadb_insertassoc() also checks for collisions, so
1976		 * if there's a colliding larval entry, rc will be set
1977		 * to EEXIST.
1978		 */
1979		rc = sadb_insertassoc(newbie, inbound);
1980		newbie->ipsa_hardexpiretime = gethrestime_sec();
1981		newbie->ipsa_hardexpiretime += ahstack->ipsecah_larval_timeout;
1982	}
1983
1984	/*
1985	 * Can exit outbound mutex.  Hold inbound until we're done with
1986	 * newbie.
1987	 */
1988	mutex_exit(&outbound->isaf_lock);
1989
1990	if (rc != 0) {
1991		mutex_exit(&inbound->isaf_lock);
1992		IPSA_REFRELE(newbie);
1993		sadb_pfkey_error(ahstack->ah_pfkey_q, mp, rc,
1994		    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1995		return;
1996	}
1997
1998	/* Can write here because I'm still holding the bucket lock. */
1999	newbie->ipsa_type = SADB_SATYPE_AH;
2000
2001	/*
2002	 * Construct successful return message.  We have one thing going
2003	 * for us in PF_KEY v2.  That's the fact that
2004	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
2005	 */
2006	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
2007	assoc->sadb_sa_exttype = SADB_EXT_SA;
2008	assoc->sadb_sa_spi = newbie->ipsa_spi;
2009	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
2010	mutex_exit(&inbound->isaf_lock);
2011
2012	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
2013	kso = (keysock_out_t *)ksi;
2014	kso->ks_out_len = sizeof (*kso);
2015	kso->ks_out_serial = ksi->ks_in_serial;
2016	kso->ks_out_type = KEYSOCK_OUT;
2017
2018	/*
2019	 * Can safely putnext() to ah_pfkey_q, because this is a turnaround
2020	 * from the ah_pfkey_q.
2021	 */
2022	putnext(ahstack->ah_pfkey_q, mp);
2023}
2024
2025/*
2026 * IPv6 sends up the ICMP errors for validation and the removal of the AH
2027 * header.
2028 * If succesful, the mp has been modified to not include the AH header so
2029 * that the caller can fanout to the ULP's icmp error handler.
2030 */
2031static mblk_t *
2032ah_icmp_error_v6(mblk_t *mp, ip_recv_attr_t *ira, ipsecah_stack_t *ahstack)
2033{
2034	ip6_t *ip6h, *oip6h;
2035	uint16_t hdr_length, ah_length;
2036	uint8_t *nexthdrp;
2037	ah_t *ah;
2038	icmp6_t *icmp6;
2039	isaf_t *isaf;
2040	ipsa_t *assoc;
2041	uint8_t *post_ah_ptr;
2042	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2043
2044	/*
2045	 * Eat the cost of a pullupmsg() for now.  It makes the rest of this
2046	 * code far less convoluted.
2047	 */
2048	if (!pullupmsg(mp, -1) ||
2049	    !ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, &hdr_length,
2050	    &nexthdrp) ||
2051	    mp->b_rptr + hdr_length + sizeof (icmp6_t) + sizeof (ip6_t) +
2052	    sizeof (ah_t) > mp->b_wptr) {
2053		IP_AH_BUMP_STAT(ipss, in_discards);
2054		ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2055		    DROPPER(ipss, ipds_ah_nomem),
2056		    &ahstack->ah_dropper);
2057		return (NULL);
2058	}
2059
2060	oip6h = (ip6_t *)mp->b_rptr;
2061	icmp6 = (icmp6_t *)((uint8_t *)oip6h + hdr_length);
2062	ip6h = (ip6_t *)(icmp6 + 1);
2063	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
2064		IP_AH_BUMP_STAT(ipss, in_discards);
2065		ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2066		    DROPPER(ipss, ipds_ah_bad_v6_hdrs),
2067		    &ahstack->ah_dropper);
2068		return (NULL);
2069	}
2070	ah = (ah_t *)((uint8_t *)ip6h + hdr_length);
2071
2072	isaf = OUTBOUND_BUCKET_V6(&ahstack->ah_sadb.s_v6, ip6h->ip6_dst);
2073	mutex_enter(&isaf->isaf_lock);
2074	assoc = ipsec_getassocbyspi(isaf, ah->ah_spi,
2075	    (uint32_t *)&ip6h->ip6_src, (uint32_t *)&ip6h->ip6_dst, AF_INET6);
2076	mutex_exit(&isaf->isaf_lock);
2077
2078	if (assoc == NULL) {
2079		IP_AH_BUMP_STAT(ipss, lookup_failure);
2080		IP_AH_BUMP_STAT(ipss, in_discards);
2081		if (ahstack->ipsecah_log_unknown_spi) {
2082			ipsec_assocfailure(info.mi_idnum, 0, 0,
2083			    SL_CONSOLE | SL_WARN | SL_ERROR,
2084			    "Bad ICMP message - No association for the "
2085			    "attached AH header whose spi is 0x%x, "
2086			    "sender is 0x%x\n",
2087			    ah->ah_spi, &oip6h->ip6_src, AF_INET6,
2088			    ahstack->ipsecah_netstack);
2089		}
2090		ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2091		    DROPPER(ipss, ipds_ah_no_sa),
2092		    &ahstack->ah_dropper);
2093		return (NULL);
2094	}
2095
2096	IPSA_REFRELE(assoc);
2097
2098	/*
2099	 * There seems to be a valid association. If there is enough of AH
2100	 * header remove it, otherwise bail.  One could check whether it has
2101	 * complete AH header plus 8 bytes but it does not make sense if an
2102	 * icmp error is returned for ICMP messages e.g ICMP time exceeded,
2103	 * that are being sent up. Let the caller figure out.
2104	 *
2105	 * NOTE: ah_length is the number of 32 bit words minus 2.
2106	 */
2107	ah_length = (ah->ah_length << 2) + 8;
2108	post_ah_ptr = (uint8_t *)ah + ah_length;
2109
2110	if (post_ah_ptr > mp->b_wptr) {
2111		IP_AH_BUMP_STAT(ipss, in_discards);
2112		ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2113		    DROPPER(ipss, ipds_ah_bad_length),
2114		    &ahstack->ah_dropper);
2115		return (NULL);
2116	}
2117
2118	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - ah_length);
2119	*nexthdrp = ah->ah_nexthdr;
2120	ovbcopy(post_ah_ptr, ah,
2121	    (size_t)((uintptr_t)mp->b_wptr - (uintptr_t)post_ah_ptr));
2122	mp->b_wptr -= ah_length;
2123
2124	return (mp);
2125}
2126
2127/*
2128 * IP sends up the ICMP errors for validation and the removal of
2129 * the AH header.
2130 * If succesful, the mp has been modified to not include the AH header so
2131 * that the caller can fanout to the ULP's icmp error handler.
2132 */
2133static mblk_t *
2134ah_icmp_error_v4(mblk_t *mp, ip_recv_attr_t *ira, ipsecah_stack_t *ahstack)
2135{
2136	mblk_t *mp1;
2137	icmph_t *icmph;
2138	int iph_hdr_length;
2139	int hdr_length;
2140	isaf_t *hptr;
2141	ipsa_t *assoc;
2142	int ah_length;
2143	ipha_t *ipha;
2144	ipha_t *oipha;
2145	ah_t *ah;
2146	uint32_t length;
2147	int alloc_size;
2148	uint8_t nexthdr;
2149	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2150
2151	oipha = ipha = (ipha_t *)mp->b_rptr;
2152	iph_hdr_length = IPH_HDR_LENGTH(ipha);
2153	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2154
2155	ipha = (ipha_t *)&icmph[1];
2156	hdr_length = IPH_HDR_LENGTH(ipha);
2157
2158	/*
2159	 * See if we have enough to locate the SPI
2160	 */
2161	if ((uchar_t *)ipha + hdr_length + 8 > mp->b_wptr) {
2162		if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length + 8 -
2163		    mp->b_rptr)) {
2164			ipsec_rl_strlog(ahstack->ipsecah_netstack,
2165			    info.mi_idnum, 0, 0,
2166			    SL_WARN | SL_ERROR,
2167			    "ICMP error: Small AH header\n");
2168			IP_AH_BUMP_STAT(ipss, in_discards);
2169			ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2170			    DROPPER(ipss, ipds_ah_bad_length),
2171			    &ahstack->ah_dropper);
2172			return (NULL);
2173		}
2174		icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2175		ipha = (ipha_t *)&icmph[1];
2176	}
2177
2178	ah = (ah_t *)((uint8_t *)ipha + hdr_length);
2179	nexthdr = ah->ah_nexthdr;
2180
2181	hptr = OUTBOUND_BUCKET_V4(&ahstack->ah_sadb.s_v4, ipha->ipha_dst);
2182	mutex_enter(&hptr->isaf_lock);
2183	assoc = ipsec_getassocbyspi(hptr, ah->ah_spi,
2184	    (uint32_t *)&ipha->ipha_src, (uint32_t *)&ipha->ipha_dst, AF_INET);
2185	mutex_exit(&hptr->isaf_lock);
2186
2187	if (assoc == NULL) {
2188		IP_AH_BUMP_STAT(ipss, lookup_failure);
2189		IP_AH_BUMP_STAT(ipss, in_discards);
2190		if (ahstack->ipsecah_log_unknown_spi) {
2191			ipsec_assocfailure(info.mi_idnum, 0, 0,
2192			    SL_CONSOLE | SL_WARN | SL_ERROR,
2193			    "Bad ICMP message - No association for the "
2194			    "attached AH header whose spi is 0x%x, "
2195			    "sender is 0x%x\n",
2196			    ah->ah_spi, &oipha->ipha_src, AF_INET,
2197			    ahstack->ipsecah_netstack);
2198		}
2199		ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2200		    DROPPER(ipss, ipds_ah_no_sa),
2201		    &ahstack->ah_dropper);
2202		return (NULL);
2203	}
2204
2205	IPSA_REFRELE(assoc);
2206	/*
2207	 * There seems to be a valid association. If there
2208	 * is enough of AH header remove it, otherwise remove
2209	 * as much as possible and send it back. One could check
2210	 * whether it has complete AH header plus 8 bytes but it
2211	 * does not make sense if an icmp error is returned for
2212	 * ICMP messages e.g ICMP time exceeded, that are being
2213	 * sent up. Let the caller figure out.
2214	 *
2215	 * NOTE: ah_length is the number of 32 bit words minus 2.
2216	 */
2217	ah_length = (ah->ah_length << 2) + 8;
2218
2219	if ((uchar_t *)ipha + hdr_length + ah_length > mp->b_wptr) {
2220		if (mp->b_cont == NULL) {
2221			/*
2222			 * There is nothing to pullup. Just remove as
2223			 * much as possible. This is a common case for
2224			 * IPV4.
2225			 */
2226			ah_length = (mp->b_wptr - ((uchar_t *)ipha +
2227			    hdr_length));
2228			goto done;
2229		}
2230		/* Pullup the full ah header */
2231		if (!pullupmsg(mp, (uchar_t *)ah + ah_length - mp->b_rptr)) {
2232			/*
2233			 * pullupmsg could have failed if there was not
2234			 * enough to pullup or memory allocation failed.
2235			 * We tried hard, give up now.
2236			 */
2237			IP_AH_BUMP_STAT(ipss, in_discards);
2238			ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2239			    DROPPER(ipss, ipds_ah_nomem),
2240			    &ahstack->ah_dropper);
2241			return (NULL);
2242		}
2243		icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2244		ipha = (ipha_t *)&icmph[1];
2245	}
2246done:
2247	/*
2248	 * Remove the AH header and change the protocol.
2249	 * Don't update the spi fields in the ip_recv_attr_t
2250	 * as we are called just to validate the
2251	 * message attached to the ICMP message.
2252	 *
2253	 * If we never pulled up since all of the message
2254	 * is in one single mblk, we can't remove the AH header
2255	 * by just setting the b_wptr to the beginning of the
2256	 * AH header. We need to allocate a mblk that can hold
2257	 * up until the inner IP header and copy them.
2258	 */
2259	alloc_size = iph_hdr_length + sizeof (icmph_t) + hdr_length;
2260
2261	if ((mp1 = allocb(alloc_size, BPRI_LO)) == NULL) {
2262		IP_AH_BUMP_STAT(ipss, in_discards);
2263		ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2264		    DROPPER(ipss, ipds_ah_nomem),
2265		    &ahstack->ah_dropper);
2266		return (NULL);
2267	}
2268	bcopy(mp->b_rptr, mp1->b_rptr, alloc_size);
2269	mp1->b_wptr += alloc_size;
2270
2271	/*
2272	 * Skip whatever we have copied and as much of AH header
2273	 * possible. If we still have something left in the original
2274	 * message, tag on.
2275	 */
2276	mp->b_rptr = (uchar_t *)ipha + hdr_length + ah_length;
2277
2278	if (mp->b_rptr != mp->b_wptr) {
2279		mp1->b_cont = mp;
2280	} else {
2281		if (mp->b_cont != NULL)
2282			mp1->b_cont = mp->b_cont;
2283		freeb(mp);
2284	}
2285
2286	ipha = (ipha_t *)(mp1->b_rptr + iph_hdr_length + sizeof (icmph_t));
2287	ipha->ipha_protocol = nexthdr;
2288	length = ntohs(ipha->ipha_length);
2289	length -= ah_length;
2290	ipha->ipha_length = htons((uint16_t)length);
2291	ipha->ipha_hdr_checksum = 0;
2292	ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
2293
2294	return (mp1);
2295}
2296
2297/*
2298 * IP calls this to validate the ICMP errors that
2299 * we got from the network.
2300 */
2301mblk_t *
2302ipsecah_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
2303{
2304	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
2305	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
2306
2307	if (ira->ira_flags & IRAF_IS_IPV4)
2308		return (ah_icmp_error_v4(data_mp, ira, ahstack));
2309	else
2310		return (ah_icmp_error_v6(data_mp, ira, ahstack));
2311}
2312
2313static int
2314ah_fix_tlv_options_v6(uint8_t *oi_opt, uint8_t *pi_opt, uint_t ehdrlen,
2315    uint8_t hdr_type, boolean_t copy_always)
2316{
2317	uint8_t opt_type;
2318	uint_t optlen;
2319
2320	ASSERT(hdr_type == IPPROTO_DSTOPTS || hdr_type == IPPROTO_HOPOPTS);
2321
2322	/*
2323	 * Copy the next header and hdr ext. len of the HOP-by-HOP
2324	 * and Destination option.
2325	 */
2326	*pi_opt++ = *oi_opt++;
2327	*pi_opt++ = *oi_opt++;
2328	ehdrlen -= 2;
2329
2330	/*
2331	 * Now handle all the TLV encoded options.
2332	 */
2333	while (ehdrlen != 0) {
2334		opt_type = *oi_opt;
2335
2336		if (opt_type == IP6OPT_PAD1) {
2337			optlen = 1;
2338		} else {
2339			if (ehdrlen < 2)
2340				goto bad_opt;
2341			optlen = 2 + oi_opt[1];
2342			if (optlen > ehdrlen)
2343				goto bad_opt;
2344		}
2345		if (copy_always || !(opt_type & IP6OPT_MUTABLE)) {
2346			bcopy(oi_opt, pi_opt, optlen);
2347		} else {
2348			if (optlen == 1) {
2349				*pi_opt = 0;
2350			} else {
2351				/*
2352				 * Copy the type and data length fields.
2353				 * Zero the option data by skipping
2354				 * option type and option data len
2355				 * fields.
2356				 */
2357				*pi_opt = *oi_opt;
2358				*(pi_opt + 1) = *(oi_opt + 1);
2359				bzero(pi_opt + 2, optlen - 2);
2360			}
2361		}
2362		ehdrlen -= optlen;
2363		oi_opt += optlen;
2364		pi_opt += optlen;
2365	}
2366	return (0);
2367bad_opt:
2368	return (-1);
2369}
2370
2371/*
2372 * Construct a pseudo header for AH, processing all the options.
2373 *
2374 * oip6h is the IPv6 header of the incoming or outgoing packet.
2375 * ip6h is the pointer to the pseudo headers IPV6 header. All
2376 * the space needed for the options have been allocated including
2377 * the AH header.
2378 *
2379 * If copy_always is set, all the options that appear before AH are copied
2380 * blindly without checking for IP6OPT_MUTABLE. This is used by
2381 * ah_auth_out_done().  Please refer to that function for details.
2382 *
2383 * NOTE :
2384 *
2385 * *  AH header is never copied in this function even if copy_always
2386 *    is set. It just returns the ah_offset - offset of the AH header
2387 *    and the caller needs to do the copying. This is done so that we
2388 *    don't have pass extra arguments e.g. SA etc. and also,
2389 *    it is not needed when ah_auth_out_done is calling this function.
2390 */
2391static uint_t
2392ah_fix_phdr_v6(ip6_t *ip6h, ip6_t *oip6h, boolean_t outbound,
2393    boolean_t copy_always)
2394{
2395	uint8_t	*oi_opt;
2396	uint8_t	*pi_opt;
2397	uint8_t nexthdr;
2398	uint8_t *prev_nexthdr;
2399	ip6_hbh_t *hbhhdr;
2400	ip6_dest_t *dsthdr = NULL;
2401	ip6_rthdr0_t *rthdr;
2402	int ehdrlen;
2403	ah_t *ah;
2404	int ret;
2405
2406	/*
2407	 * In the outbound case for source route, ULP has already moved
2408	 * the first hop, which is now in ip6_dst. We need to re-arrange
2409	 * the header to make it look like how it would appear in the
2410	 * receiver i.e
2411	 *
2412	 * Because of ip_massage_options_v6 the header looks like
2413	 * this :
2414	 *
2415	 * ip6_src = S, ip6_dst = I1. followed by I2,I3,D.
2416	 *
2417	 * When it reaches the receiver, it would look like
2418	 *
2419	 * ip6_src = S, ip6_dst = D. followed by I1,I2,I3.
2420	 *
2421	 * NOTE : We assume that there are no problems with the options
2422	 * as IP should have already checked this.
2423	 */
2424
2425	oi_opt = (uchar_t *)&oip6h[1];
2426	pi_opt = (uchar_t *)&ip6h[1];
2427
2428	/*
2429	 * We set the prev_nexthdr properly in the pseudo header.
2430	 * After we finish authentication and come back from the
2431	 * algorithm module, pseudo header will become the real
2432	 * IP header.
2433	 */
2434	prev_nexthdr = (uint8_t *)&ip6h->ip6_nxt;
2435	nexthdr = oip6h->ip6_nxt;
2436	/* Assume IP has already stripped it */
2437	ASSERT(nexthdr != IPPROTO_FRAGMENT);
2438	ah = NULL;
2439	dsthdr = NULL;
2440	for (;;) {
2441		switch (nexthdr) {
2442		case IPPROTO_HOPOPTS:
2443			hbhhdr = (ip6_hbh_t *)oi_opt;
2444			nexthdr = hbhhdr->ip6h_nxt;
2445			ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
2446			ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2447			    IPPROTO_HOPOPTS, copy_always);
2448			/*
2449			 * Return a zero offset indicating error if there
2450			 * was error.
2451			 */
2452			if (ret == -1)
2453				return (0);
2454			hbhhdr = (ip6_hbh_t *)pi_opt;
2455			prev_nexthdr = (uint8_t *)&hbhhdr->ip6h_nxt;
2456			break;
2457		case IPPROTO_ROUTING:
2458			rthdr = (ip6_rthdr0_t *)oi_opt;
2459			nexthdr = rthdr->ip6r0_nxt;
2460			ehdrlen = 8 * (rthdr->ip6r0_len + 1);
2461			if (!copy_always && outbound) {
2462				int i, left;
2463				ip6_rthdr0_t *prthdr;
2464				in6_addr_t *ap, *pap;
2465
2466				left = rthdr->ip6r0_segleft;
2467				prthdr = (ip6_rthdr0_t *)pi_opt;
2468				pap = (in6_addr_t *)(prthdr + 1);
2469				ap = (in6_addr_t *)(rthdr + 1);
2470				/*
2471				 * First eight bytes except seg_left
2472				 * does not change en route.
2473				 */
2474				bcopy(oi_opt, pi_opt, 8);
2475				prthdr->ip6r0_segleft = 0;
2476				/*
2477				 * First address has been moved to
2478				 * the destination address of the
2479				 * ip header by ip_massage_options_v6.
2480				 * And the real destination address is
2481				 * in the last address part of the
2482				 * option.
2483				 */
2484				*pap = oip6h->ip6_dst;
2485				for (i = 1; i < left - 1; i++)
2486					pap[i] = ap[i - 1];
2487				ip6h->ip6_dst = *(ap + left - 1);
2488			} else {
2489				bcopy(oi_opt, pi_opt, ehdrlen);
2490			}
2491			rthdr = (ip6_rthdr0_t *)pi_opt;
2492			prev_nexthdr = (uint8_t *)&rthdr->ip6r0_nxt;
2493			break;
2494		case IPPROTO_DSTOPTS:
2495			/*
2496			 * Destination options are tricky.  If there is
2497			 * a terminal (e.g. non-IPv6-extension) header
2498			 * following the destination options, don't
2499			 * reset prev_nexthdr or advance the AH insertion
2500			 * point and just treat this as a terminal header.
2501			 *
2502			 * If this is an inbound packet, just deal with
2503			 * it as is.
2504			 */
2505			dsthdr = (ip6_dest_t *)oi_opt;
2506			/*
2507			 * XXX I hope common-subexpression elimination
2508			 * saves us the double-evaluate.
2509			 */
2510			if (outbound && dsthdr->ip6d_nxt != IPPROTO_ROUTING &&
2511			    dsthdr->ip6d_nxt != IPPROTO_HOPOPTS)
2512				goto terminal_hdr;
2513			nexthdr = dsthdr->ip6d_nxt;
2514			ehdrlen = 8 * (dsthdr->ip6d_len + 1);
2515			ret = ah_fix_tlv_options_v6(oi_opt, pi_opt, ehdrlen,
2516			    IPPROTO_DSTOPTS, copy_always);
2517			/*
2518			 * Return a zero offset indicating error if there
2519			 * was error.
2520			 */
2521			if (ret == -1)
2522				return (0);
2523			break;
2524		case IPPROTO_AH:
2525			/*
2526			 * Be conservative in what you send.  We shouldn't
2527			 * see two same-scoped AH's in one packet.
2528			 * (Inner-IP-scoped AH will be hit by terminal
2529			 * header of IP or IPv6.)
2530			 */
2531			ASSERT(!outbound);
2532			return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2533		default:
2534			ASSERT(outbound);
2535terminal_hdr:
2536			*prev_nexthdr = IPPROTO_AH;
2537			ah = (ah_t *)pi_opt;
2538			ah->ah_nexthdr = nexthdr;
2539			return ((uint_t)(pi_opt - (uint8_t *)ip6h));
2540		}
2541		pi_opt += ehdrlen;
2542		oi_opt += ehdrlen;
2543	}
2544	/* NOTREACHED */
2545}
2546
2547static boolean_t
2548ah_finish_up(ah_t *phdr_ah, ah_t *inbound_ah, ipsa_t *assoc,
2549    int ah_data_sz, int ah_align_sz, ipsecah_stack_t *ahstack)
2550{
2551	int i;
2552
2553	/*
2554	 * Padding :
2555	 *
2556	 * 1) Authentication data may have to be padded
2557	 * before ICV calculation if ICV is not a multiple
2558	 * of 64 bits. This padding is arbitrary and transmitted
2559	 * with the packet at the end of the authentication data.
2560	 * Payload length should include the padding bytes.
2561	 *
2562	 * 2) Explicit padding of the whole datagram may be
2563	 * required by the algorithm which need not be
2564	 * transmitted. It is assumed that this will be taken
2565	 * care by the algorithm module.
2566	 */
2567	bzero(phdr_ah + 1, ah_data_sz);	/* Zero out ICV for pseudo-hdr. */
2568
2569	if (inbound_ah == NULL) {
2570		/* Outbound AH datagram. */
2571
2572		phdr_ah->ah_length = (ah_align_sz >> 2) + 1;
2573		phdr_ah->ah_reserved = 0;
2574		phdr_ah->ah_spi = assoc->ipsa_spi;
2575
2576		phdr_ah->ah_replay =
2577		    htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
2578		if (phdr_ah->ah_replay == 0 && assoc->ipsa_replay_wsize != 0) {
2579			/*
2580			 * XXX We have replay counter wrapping.  We probably
2581			 * want to nuke this SA (and its peer).
2582			 */
2583			ipsec_assocfailure(info.mi_idnum, 0, 0,
2584			    SL_ERROR | SL_CONSOLE | SL_WARN,
2585			    "Outbound AH SA (0x%x), dst %s has wrapped "
2586			    "sequence.\n", phdr_ah->ah_spi,
2587			    assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
2588			    ahstack->ipsecah_netstack);
2589
2590			sadb_replay_delete(assoc);
2591			/* Caller will free phdr_mp and return NULL. */
2592			return (B_FALSE);
2593		}
2594
2595		if (ah_data_sz != ah_align_sz) {
2596			uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2597			    ah_data_sz);
2598
2599			for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2600				pad[i] = (uchar_t)i;	/* Fill the padding */
2601			}
2602		}
2603	} else {
2604		/* Inbound AH datagram. */
2605		phdr_ah->ah_nexthdr = inbound_ah->ah_nexthdr;
2606		phdr_ah->ah_length = inbound_ah->ah_length;
2607		phdr_ah->ah_reserved = 0;
2608		ASSERT(inbound_ah->ah_spi == assoc->ipsa_spi);
2609		phdr_ah->ah_spi = inbound_ah->ah_spi;
2610		phdr_ah->ah_replay = inbound_ah->ah_replay;
2611
2612		if (ah_data_sz != ah_align_sz) {
2613			uchar_t *opad = ((uchar_t *)inbound_ah +
2614			    sizeof (ah_t) + ah_data_sz);
2615			uchar_t *pad = ((uchar_t *)phdr_ah + sizeof (ah_t) +
2616			    ah_data_sz);
2617
2618			for (i = 0; i < (ah_align_sz - ah_data_sz); i++) {
2619				pad[i] = opad[i];	/* Copy the padding */
2620			}
2621		}
2622	}
2623
2624	return (B_TRUE);
2625}
2626
2627/*
2628 * Called upon failing the inbound ICV check. The message passed as
2629 * argument is freed.
2630 */
2631static void
2632ah_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
2633{
2634	boolean_t	isv4 = (ira->ira_flags & IRAF_IS_IPV4);
2635	ipsa_t		*assoc = ira->ira_ipsec_ah_sa;
2636	int		af;
2637	void		*addr;
2638	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
2639	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
2640	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2641
2642	ASSERT(mp->b_datap->db_type == M_DATA);
2643
2644	mp->b_rptr -= ic->ic_skip_len;
2645
2646	if (isv4) {
2647		ipha_t *ipha = (ipha_t *)mp->b_rptr;
2648		addr = &ipha->ipha_dst;
2649		af = AF_INET;
2650	} else {
2651		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
2652		addr = &ip6h->ip6_dst;
2653		af = AF_INET6;
2654	}
2655
2656	/*
2657	 * Log the event. Don't print to the console, block
2658	 * potential denial-of-service attack.
2659	 */
2660	AH_BUMP_STAT(ahstack, bad_auth);
2661
2662	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
2663	    "AH Authentication failed spi %x, dst_addr %s",
2664	    assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
2665
2666	IP_AH_BUMP_STAT(ipss, in_discards);
2667	ip_drop_packet(mp, B_TRUE, ira->ira_ill,
2668	    DROPPER(ipss, ipds_ah_bad_auth),
2669	    &ahstack->ah_dropper);
2670}
2671
2672/*
2673 * Kernel crypto framework callback invoked after completion of async
2674 * crypto requests for outbound packets.
2675 */
2676static void
2677ah_kcf_callback_outbound(void *arg, int status)
2678{
2679	mblk_t		*mp = (mblk_t *)arg;
2680	mblk_t		*async_mp;
2681	netstack_t	*ns;
2682	ipsec_stack_t	*ipss;
2683	ipsecah_stack_t	*ahstack;
2684	mblk_t		*data_mp;
2685	ip_xmit_attr_t	ixas;
2686	ipsec_crypto_t	*ic;
2687	ill_t		*ill;
2688
2689	/*
2690	 * First remove the ipsec_crypto_t mblk
2691	 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
2692	 */
2693	async_mp = ipsec_remove_crypto_data(mp, &ic);
2694	ASSERT(async_mp != NULL);
2695
2696	/*
2697	 * Extract the ip_xmit_attr_t from the first mblk.
2698	 * Verifies that the netstack and ill is still around; could
2699	 * have vanished while kEf was doing its work.
2700	 * On succesful return we have a nce_t and the ill/ipst can't
2701	 * disappear until we do the nce_refrele in ixa_cleanup.
2702	 */
2703	data_mp = async_mp->b_cont;
2704	async_mp->b_cont = NULL;
2705	if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
2706		/* Disappeared on us - no ill/ipst for MIB */
2707		if (ixas.ixa_nce != NULL) {
2708			ill = ixas.ixa_nce->nce_ill;
2709			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2710			ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
2711		}
2712		freemsg(data_mp);
2713		goto done;
2714	}
2715	ns = ixas.ixa_ipst->ips_netstack;
2716	ahstack = ns->netstack_ipsecah;
2717	ipss = ns->netstack_ipsec;
2718	ill = ixas.ixa_nce->nce_ill;
2719
2720	if (status == CRYPTO_SUCCESS) {
2721		data_mp = ah_auth_out_done(data_mp, &ixas, ic);
2722		if (data_mp == NULL)
2723			goto done;
2724
2725		(void) ip_output_post_ipsec(data_mp, &ixas);
2726	} else {
2727		/* Outbound shouldn't see invalid MAC */
2728		ASSERT(status != CRYPTO_INVALID_MAC);
2729
2730		ah1dbg(ahstack,
2731		    ("ah_kcf_callback_outbound: crypto failed with 0x%x\n",
2732		    status));
2733		AH_BUMP_STAT(ahstack, crypto_failures);
2734		AH_BUMP_STAT(ahstack, out_discards);
2735
2736		ip_drop_packet(data_mp, B_FALSE, ill,
2737		    DROPPER(ipss, ipds_ah_crypto_failed),
2738		    &ahstack->ah_dropper);
2739		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2740	}
2741done:
2742	ixa_cleanup(&ixas);
2743	(void) ipsec_free_crypto_data(mp);
2744}
2745
2746/*
2747 * Kernel crypto framework callback invoked after completion of async
2748 * crypto requests for inbound packets.
2749 */
2750static void
2751ah_kcf_callback_inbound(void *arg, int status)
2752{
2753	mblk_t		*mp = (mblk_t *)arg;
2754	mblk_t		*async_mp;
2755	netstack_t	*ns;
2756	ipsec_stack_t	*ipss;
2757	ipsecah_stack_t	*ahstack;
2758	mblk_t		*data_mp;
2759	ip_recv_attr_t	iras;
2760	ipsec_crypto_t	*ic;
2761
2762	/*
2763	 * First remove the ipsec_crypto_t mblk
2764	 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
2765	 */
2766	async_mp = ipsec_remove_crypto_data(mp, &ic);
2767	ASSERT(async_mp != NULL);
2768
2769	/*
2770	 * Extract the ip_xmit_attr_t from the first mblk.
2771	 * Verifies that the netstack and ill is still around; could
2772	 * have vanished while kEf was doing its work.
2773	 */
2774	data_mp = async_mp->b_cont;
2775	async_mp->b_cont = NULL;
2776	if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
2777		/* The ill or ip_stack_t disappeared on us */
2778		ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
2779		freemsg(data_mp);
2780		goto done;
2781	}
2782	ns = iras.ira_ill->ill_ipst->ips_netstack;
2783	ahstack = ns->netstack_ipsecah;
2784	ipss = ns->netstack_ipsec;
2785
2786	if (status == CRYPTO_SUCCESS) {
2787		data_mp = ah_auth_in_done(data_mp, &iras, ic);
2788		if (data_mp == NULL)
2789			goto done;
2790
2791		/* finish IPsec processing */
2792		ip_input_post_ipsec(data_mp, &iras);
2793
2794	} else if (status == CRYPTO_INVALID_MAC) {
2795		ah_log_bad_auth(data_mp, &iras, ic);
2796	} else {
2797		ah1dbg(ahstack,
2798		    ("ah_kcf_callback_inbound: crypto failed with 0x%x\n",
2799		    status));
2800		AH_BUMP_STAT(ahstack, crypto_failures);
2801		IP_AH_BUMP_STAT(ipss, in_discards);
2802		ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
2803		    DROPPER(ipss, ipds_ah_crypto_failed),
2804		    &ahstack->ah_dropper);
2805		BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2806	}
2807done:
2808	ira_cleanup(&iras, B_TRUE);
2809	(void) ipsec_free_crypto_data(mp);
2810}
2811
2812/*
2813 * Invoked on kernel crypto failure during inbound and outbound processing.
2814 */
2815static void
2816ah_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
2817    ill_t *ill, ipsecah_stack_t *ahstack)
2818{
2819	ipsec_stack_t	*ipss = ahstack->ipsecah_netstack->netstack_ipsec;
2820
2821	ah1dbg(ahstack, ("crypto failed for %s AH with 0x%x\n",
2822	    is_inbound ? "inbound" : "outbound", kef_rc));
2823	ip_drop_packet(data_mp, is_inbound, ill,
2824	    DROPPER(ipss, ipds_ah_crypto_failed),
2825	    &ahstack->ah_dropper);
2826	AH_BUMP_STAT(ahstack, crypto_failures);
2827	if (is_inbound)
2828		IP_AH_BUMP_STAT(ipss, in_discards);
2829	else
2830		AH_BUMP_STAT(ahstack, out_discards);
2831}
2832
2833/*
2834 * Helper macros for the ah_submit_req_{inbound,outbound}() functions.
2835 */
2836
2837/*
2838 * A statement-equivalent macro, _cr MUST point to a modifiable
2839 * crypto_call_req_t.
2840 */
2841#define	AH_INIT_CALLREQ(_cr, _mp, _callback)		\
2842	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE;	\
2843	(_cr)->cr_callback_arg = (_mp);				\
2844	(_cr)->cr_callback_func = (_callback)
2845
2846#define	AH_INIT_CRYPTO_DATA(data, msglen, mblk) {			\
2847	(data)->cd_format = CRYPTO_DATA_MBLK;				\
2848	(data)->cd_mp = mblk;						\
2849	(data)->cd_offset = 0;						\
2850	(data)->cd_length = msglen;					\
2851}
2852
2853#define	AH_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
2854	(mac)->cd_format = CRYPTO_DATA_RAW;				\
2855	(mac)->cd_offset = 0;						\
2856	(mac)->cd_length = icvlen;					\
2857	(mac)->cd_raw.iov_base = icvbuf;				\
2858	(mac)->cd_raw.iov_len = icvlen;					\
2859}
2860
2861/*
2862 * Submit an inbound packet for processing by the crypto framework.
2863 */
2864static mblk_t *
2865ah_submit_req_inbound(mblk_t *phdr_mp, ip_recv_attr_t *ira,
2866    size_t skip_len, uint32_t ah_offset, ipsa_t *assoc)
2867{
2868	int kef_rc;
2869	mblk_t *mp;
2870	crypto_call_req_t call_req, *callrp;
2871	uint_t icv_len = assoc->ipsa_mac_len;
2872	crypto_ctx_template_t ctx_tmpl;
2873	ipsecah_stack_t	*ahstack;
2874	ipsec_crypto_t	*ic, icstack;
2875	boolean_t force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2876
2877	ahstack = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsecah;
2878
2879	ASSERT(phdr_mp != NULL);
2880	ASSERT(phdr_mp->b_datap->db_type == M_DATA);
2881
2882	if (force) {
2883		/* We are doing asynch; allocate mblks to hold state */
2884		if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
2885		    (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2886			BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2887			ip_drop_input("ipIfStatsInDiscards", phdr_mp,
2888			    ira->ira_ill);
2889			freemsg(phdr_mp);
2890			return (NULL);
2891		}
2892
2893		linkb(mp, phdr_mp);
2894		callrp = &call_req;
2895		AH_INIT_CALLREQ(callrp, mp, ah_kcf_callback_inbound);
2896	} else {
2897		/*
2898		 * If we know we are going to do sync then ipsec_crypto_t
2899		 * should be on the stack.
2900		 */
2901		ic = &icstack;
2902		bzero(ic, sizeof (*ic));
2903		callrp = NULL;
2904	}
2905
2906	/* init arguments for the crypto framework */
2907	AH_INIT_CRYPTO_DATA(&ic->ic_crypto_data, AH_MSGSIZE(phdr_mp),
2908	    phdr_mp);
2909
2910	AH_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, icv_len,
2911	    (char *)phdr_mp->b_cont->b_rptr - skip_len + ah_offset +
2912	    sizeof (ah_t));
2913
2914	ic->ic_skip_len = skip_len;
2915
2916	IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH, ctx_tmpl);
2917
2918	/* call KEF to do the MAC operation */
2919	kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
2920	    &ic->ic_crypto_data, &assoc->ipsa_kcfauthkey, ctx_tmpl,
2921	    &ic->ic_crypto_mac, callrp);
2922
2923	switch (kef_rc) {
2924	case CRYPTO_SUCCESS:
2925		AH_BUMP_STAT(ahstack, crypto_sync);
2926		phdr_mp = ah_auth_in_done(phdr_mp, ira, ic);
2927		if (force) {
2928			/* Free mp after we are done with ic */
2929			mp = ipsec_free_crypto_data(mp);
2930			(void) ip_recv_attr_free_mblk(mp);
2931		}
2932		return (phdr_mp);
2933	case CRYPTO_QUEUED:
2934		/* ah_kcf_callback_inbound() will be invoked on completion */
2935		AH_BUMP_STAT(ahstack, crypto_async);
2936		return (NULL);
2937	case CRYPTO_INVALID_MAC:
2938		/* Free mp after we are done with ic */
2939		AH_BUMP_STAT(ahstack, crypto_sync);
2940		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2941		ah_log_bad_auth(phdr_mp, ira, ic);
2942		/* phdr_mp was passed to ip_drop_packet */
2943		if (force) {
2944			mp = ipsec_free_crypto_data(mp);
2945			(void) ip_recv_attr_free_mblk(mp);
2946		}
2947		return (NULL);
2948	}
2949
2950	if (force) {
2951		mp = ipsec_free_crypto_data(mp);
2952		phdr_mp = ip_recv_attr_free_mblk(mp);
2953	}
2954	BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
2955	ah_crypto_failed(phdr_mp, B_TRUE, kef_rc, ira->ira_ill, ahstack);
2956	/* phdr_mp was passed to ip_drop_packet */
2957	return (NULL);
2958}
2959
2960/*
2961 * Submit an outbound packet for processing by the crypto framework.
2962 */
2963static mblk_t *
2964ah_submit_req_outbound(mblk_t *phdr_mp, ip_xmit_attr_t *ixa,
2965    size_t skip_len, ipsa_t *assoc)
2966{
2967	int kef_rc;
2968	mblk_t *mp;
2969	crypto_call_req_t call_req, *callrp;
2970	uint_t icv_len = assoc->ipsa_mac_len;
2971	ipsecah_stack_t	*ahstack;
2972	ipsec_crypto_t	*ic, icstack;
2973	ill_t		*ill = ixa->ixa_nce->nce_ill;
2974	boolean_t force = (assoc->ipsa_flags & IPSA_F_ASYNC);
2975
2976	ahstack = ill->ill_ipst->ips_netstack->netstack_ipsecah;
2977
2978	ASSERT(phdr_mp != NULL);
2979	ASSERT(phdr_mp->b_datap->db_type == M_DATA);
2980
2981	if (force) {
2982		/* We are doing asynch; allocate mblks to hold state */
2983		if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
2984		    (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
2985			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
2986			ip_drop_output("ipIfStatsOutDiscards", phdr_mp, ill);
2987			freemsg(phdr_mp);
2988			return (NULL);
2989		}
2990		linkb(mp, phdr_mp);
2991		callrp = &call_req;
2992		AH_INIT_CALLREQ(callrp, mp, ah_kcf_callback_outbound);
2993	} else {
2994		/*
2995		 * If we know we are going to do sync then ipsec_crypto_t
2996		 * should be on the stack.
2997		 */
2998		ic = &icstack;
2999		bzero(ic, sizeof (*ic));
3000		callrp = NULL;
3001	}
3002
3003	/* init arguments for the crypto framework */
3004	AH_INIT_CRYPTO_DATA(&ic->ic_crypto_data, AH_MSGSIZE(phdr_mp),
3005	    phdr_mp);
3006
3007	AH_INIT_CRYPTO_MAC(&ic->ic_crypto_mac, icv_len,
3008	    (char *)phdr_mp->b_wptr);
3009
3010	ic->ic_skip_len = skip_len;
3011
3012	ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
3013
3014	/* call KEF to do the MAC operation */
3015	kef_rc = crypto_mac(&assoc->ipsa_amech, &ic->ic_crypto_data,
3016	    &assoc->ipsa_kcfauthkey, assoc->ipsa_authtmpl,
3017	    &ic->ic_crypto_mac, callrp);
3018
3019	switch (kef_rc) {
3020	case CRYPTO_SUCCESS:
3021		AH_BUMP_STAT(ahstack, crypto_sync);
3022		phdr_mp = ah_auth_out_done(phdr_mp, ixa, ic);
3023		if (force) {
3024			/* Free mp after we are done with ic */
3025			mp = ipsec_free_crypto_data(mp);
3026			(void) ip_xmit_attr_free_mblk(mp);
3027		}
3028		return (phdr_mp);
3029	case CRYPTO_QUEUED:
3030		/* ah_kcf_callback_outbound() will be invoked on completion */
3031		AH_BUMP_STAT(ahstack, crypto_async);
3032		return (NULL);
3033	}
3034
3035	if (force) {
3036		mp = ipsec_free_crypto_data(mp);
3037		phdr_mp = ip_xmit_attr_free_mblk(mp);
3038	}
3039	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3040	ah_crypto_failed(phdr_mp, B_FALSE, kef_rc, NULL, ahstack);
3041	/* phdr_mp was passed to ip_drop_packet */
3042	return (NULL);
3043}
3044
3045/*
3046 * This function constructs a pseudo header by looking at the IP header
3047 * and options if any. This is called for both outbound and inbound,
3048 * before computing the ICV.
3049 */
3050static mblk_t *
3051ah_process_ip_options_v6(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
3052    uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
3053{
3054	ip6_t	*ip6h;
3055	ip6_t	*oip6h;
3056	mblk_t 	*phdr_mp;
3057	int option_length;
3058	uint_t	ah_align_sz;
3059	uint_t ah_offset;
3060	int hdr_size;
3061
3062	/*
3063	 * Allocate space for the authentication data also. It is
3064	 * useful both during the ICV calculation where we need to
3065	 * feed in zeroes and while sending the datagram back to IP
3066	 * where we will be using the same space.
3067	 *
3068	 * We need to allocate space for padding bytes if it is not
3069	 * a multiple of IPV6_PADDING_ALIGN.
3070	 *
3071	 * In addition, we allocate space for the ICV computed by
3072	 * the kernel crypto framework, saving us a separate kmem
3073	 * allocation down the road.
3074	 */
3075
3076	ah_align_sz = P2ALIGN(ah_data_sz + IPV6_PADDING_ALIGN - 1,
3077	    IPV6_PADDING_ALIGN);
3078
3079	ASSERT(ah_align_sz >= ah_data_sz);
3080
3081	hdr_size = ipsec_ah_get_hdr_size_v6(mp, B_FALSE);
3082	option_length = hdr_size - IPV6_HDR_LEN;
3083
3084	/* This was not included in ipsec_ah_get_hdr_size_v6() */
3085	hdr_size += (sizeof (ah_t) + ah_align_sz);
3086
3087	if (!outbound && (MBLKL(mp) < hdr_size)) {
3088		/*
3089		 * We have post-AH header options in a separate mblk,
3090		 * a pullup is required.
3091		 */
3092		if (!pullupmsg(mp, hdr_size))
3093			return (NULL);
3094	}
3095
3096	if ((phdr_mp = allocb_tmpl(hdr_size + ah_data_sz, mp)) == NULL) {
3097		return (NULL);
3098	}
3099
3100	oip6h = (ip6_t *)mp->b_rptr;
3101
3102	/*
3103	 * Form the basic IP header first. Zero out the header
3104	 * so that the mutable fields are zeroed out.
3105	 */
3106	ip6h = (ip6_t *)phdr_mp->b_rptr;
3107	bzero(ip6h, sizeof (ip6_t));
3108	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
3109
3110	if (outbound) {
3111		/*
3112		 * Include the size of AH and authentication data.
3113		 * This is how our recipient would compute the
3114		 * authentication data. Look at what we do in the
3115		 * inbound case below.
3116		 */
3117		ip6h->ip6_plen = htons(ntohs(oip6h->ip6_plen) +
3118		    sizeof (ah_t) + ah_align_sz);
3119	} else {
3120		ip6h->ip6_plen = oip6h->ip6_plen;
3121	}
3122
3123	ip6h->ip6_src = oip6h->ip6_src;
3124	ip6h->ip6_dst = oip6h->ip6_dst;
3125
3126	*length_to_skip = IPV6_HDR_LEN;
3127	if (option_length == 0) {
3128		/* Form the AH header */
3129		ip6h->ip6_nxt = IPPROTO_AH;
3130		((ah_t *)(ip6h + 1))->ah_nexthdr = oip6h->ip6_nxt;
3131		ah_offset = *length_to_skip;
3132	} else {
3133		ip6h->ip6_nxt = oip6h->ip6_nxt;
3134		/* option_length does not include the AH header's size */
3135		*length_to_skip += option_length;
3136
3137		ah_offset = ah_fix_phdr_v6(ip6h, oip6h, outbound, B_FALSE);
3138		if (ah_offset == 0) {
3139			return (NULL);
3140		}
3141	}
3142
3143	if (!ah_finish_up(((ah_t *)((uint8_t *)ip6h + ah_offset)),
3144	    (outbound ? NULL : ((ah_t *)((uint8_t *)oip6h + ah_offset))),
3145	    assoc, ah_data_sz, ah_align_sz, ahstack)) {
3146		freeb(phdr_mp);
3147		/*
3148		 * Returning NULL will tell the caller to
3149		 * IPSA_REFELE(), free the memory, etc.
3150		 */
3151		return (NULL);
3152	}
3153
3154	phdr_mp->b_wptr = ((uint8_t *)ip6h + ah_offset + sizeof (ah_t) +
3155	    ah_align_sz);
3156	if (!outbound)
3157		*length_to_skip += sizeof (ah_t) + ah_align_sz;
3158	return (phdr_mp);
3159}
3160
3161/*
3162 * This function constructs a pseudo header by looking at the IP header
3163 * and options if any. This is called for both outbound and inbound,
3164 * before computing the ICV.
3165 */
3166static mblk_t *
3167ah_process_ip_options_v4(mblk_t *mp, ipsa_t *assoc, int *length_to_skip,
3168    uint_t ah_data_sz, boolean_t outbound, ipsecah_stack_t *ahstack)
3169{
3170	ipoptp_t opts;
3171	uint32_t option_length;
3172	ipha_t	*ipha;
3173	ipha_t	*oipha;
3174	mblk_t 	*phdr_mp;
3175	int	 size;
3176	uchar_t	*optptr;
3177	uint8_t optval;
3178	uint8_t optlen;
3179	ipaddr_t dst;
3180	uint32_t v_hlen_tos_len;
3181	int ip_hdr_length;
3182	uint_t	ah_align_sz;
3183	uint32_t off;
3184
3185#ifdef	_BIG_ENDIAN
3186#define	V_HLEN	(v_hlen_tos_len >> 24)
3187#else
3188#define	V_HLEN	(v_hlen_tos_len & 0xFF)
3189#endif
3190
3191	oipha = (ipha_t *)mp->b_rptr;
3192	v_hlen_tos_len = ((uint32_t *)oipha)[0];
3193
3194	/*
3195	 * Allocate space for the authentication data also. It is
3196	 * useful both during the ICV calculation where we need to
3197	 * feed in zeroes and while sending the datagram back to IP
3198	 * where we will be using the same space.
3199	 *
3200	 * We need to allocate space for padding bytes if it is not
3201	 * a multiple of IPV4_PADDING_ALIGN.
3202	 *
3203	 * In addition, we allocate space for the ICV computed by
3204	 * the kernel crypto framework, saving us a separate kmem
3205	 * allocation down the road.
3206	 */
3207
3208	ah_align_sz = P2ALIGN(ah_data_sz + IPV4_PADDING_ALIGN - 1,
3209	    IPV4_PADDING_ALIGN);
3210
3211	ASSERT(ah_align_sz >= ah_data_sz);
3212
3213	size = IP_SIMPLE_HDR_LENGTH + sizeof (ah_t) + ah_align_sz +
3214	    ah_data_sz;
3215
3216	if (V_HLEN != IP_SIMPLE_HDR_VERSION) {
3217		option_length = oipha->ipha_version_and_hdr_length -
3218		    (uint8_t)((IP_VERSION << 4) +
3219		    IP_SIMPLE_HDR_LENGTH_IN_WORDS);
3220		option_length <<= 2;
3221		size += option_length;
3222	}
3223
3224	if ((phdr_mp = allocb_tmpl(size, mp)) == NULL) {
3225		return (NULL);
3226	}
3227
3228	/*
3229	 * Form the basic IP header first.
3230	 */
3231	ipha = (ipha_t *)phdr_mp->b_rptr;
3232	ipha->ipha_version_and_hdr_length = oipha->ipha_version_and_hdr_length;
3233	ipha->ipha_type_of_service = 0;
3234
3235	if (outbound) {
3236		/*
3237		 * Include the size of AH and authentication data.
3238		 * This is how our recipient would compute the
3239		 * authentication data. Look at what we do in the
3240		 * inbound case below.
3241		 */
3242		ipha->ipha_length = ntohs(htons(oipha->ipha_length) +
3243		    sizeof (ah_t) + ah_align_sz);
3244	} else {
3245		ipha->ipha_length = oipha->ipha_length;
3246	}
3247
3248	ipha->ipha_ident = oipha->ipha_ident;
3249	ipha->ipha_fragment_offset_and_flags = 0;
3250	ipha->ipha_ttl = 0;
3251	ipha->ipha_protocol = IPPROTO_AH;
3252	ipha->ipha_hdr_checksum = 0;
3253	ipha->ipha_src = oipha->ipha_src;
3254	ipha->ipha_dst = dst = oipha->ipha_dst;
3255
3256	/*
3257	 * If there is no option to process return now.
3258	 */
3259	ip_hdr_length = IP_SIMPLE_HDR_LENGTH;
3260
3261	if (V_HLEN == IP_SIMPLE_HDR_VERSION) {
3262		/* Form the AH header */
3263		goto ah_hdr;
3264	}
3265
3266	ip_hdr_length += option_length;
3267
3268	/*
3269	 * We have options. In the outbound case for source route,
3270	 * ULP has already moved the first hop, which is now in
3271	 * ipha_dst. We need the final destination for the calculation
3272	 * of authentication data. And also make sure that mutable
3273	 * and experimental fields are zeroed out in the IP options.
3274	 */
3275
3276	bcopy(&oipha[1], &ipha[1], option_length);
3277
3278	for (optval = ipoptp_first(&opts, ipha);
3279	    optval != IPOPT_EOL;
3280	    optval = ipoptp_next(&opts)) {
3281		optptr = opts.ipoptp_cur;
3282		optlen = opts.ipoptp_len;
3283		switch (optval) {
3284		case IPOPT_EXTSEC:
3285		case IPOPT_COMSEC:
3286		case IPOPT_RA:
3287		case IPOPT_SDMDD:
3288		case IPOPT_SECURITY:
3289			/*
3290			 * These options are Immutable, leave them as-is.
3291			 * Note that IPOPT_NOP is also Immutable, but it
3292			 * was skipped by ipoptp_next() and thus remains
3293			 * intact in the header.
3294			 */
3295			break;
3296		case IPOPT_SSRR:
3297		case IPOPT_LSRR:
3298			if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0)
3299				goto bad_ipv4opt;
3300			/*
3301			 * These two are mutable and will be zeroed, but
3302			 * first get the final destination.
3303			 */
3304			off = optptr[IPOPT_OFFSET];
3305			/*
3306			 * If one of the conditions is true, it means
3307			 * end of options and dst already has the right
3308			 * value. So, just fall through.
3309			 */
3310			if (!(optlen < IP_ADDR_LEN || off > optlen - 3)) {
3311				off = optlen - IP_ADDR_LEN;
3312				bcopy(&optptr[off], &dst, IP_ADDR_LEN);
3313			}
3314			/* FALLTHRU */
3315		case IPOPT_RR:
3316		case IPOPT_TS:
3317		case IPOPT_SATID:
3318		default:
3319			/*
3320			 * optlen should include from the beginning of an
3321			 * option.
3322			 * NOTE : Stream Identifier Option (SID): RFC 791
3323			 * shows the bit pattern of optlen as 2 and documents
3324			 * the length as 4. We assume it to be 2 here.
3325			 */
3326			bzero(optptr, optlen);
3327			break;
3328		}
3329	}
3330
3331	if ((opts.ipoptp_flags & IPOPTP_ERROR) != 0) {
3332bad_ipv4opt:
3333		ah1dbg(ahstack, ("AH : bad IPv4 option"));
3334		freeb(phdr_mp);
3335		return (NULL);
3336	}
3337
3338	/*
3339	 * Don't change ipha_dst for an inbound datagram as it points
3340	 * to the right value. Only for the outbound with LSRR/SSRR,
3341	 * because of ip_massage_options called by the ULP, ipha_dst
3342	 * points to the first hop and we need to use the final
3343	 * destination for computing the ICV.
3344	 */
3345
3346	if (outbound)
3347		ipha->ipha_dst = dst;
3348ah_hdr:
3349	((ah_t *)((uint8_t *)ipha + ip_hdr_length))->ah_nexthdr =
3350	    oipha->ipha_protocol;
3351	if (!ah_finish_up(((ah_t *)((uint8_t *)ipha + ip_hdr_length)),
3352	    (outbound ? NULL : ((ah_t *)((uint8_t *)oipha + ip_hdr_length))),
3353	    assoc, ah_data_sz, ah_align_sz, ahstack)) {
3354		freeb(phdr_mp);
3355		/*
3356		 * Returning NULL will tell the caller to IPSA_REFELE(), free
3357		 * the memory, etc.
3358		 */
3359		return (NULL);
3360	}
3361
3362	phdr_mp->b_wptr = ((uchar_t *)ipha + ip_hdr_length +
3363	    sizeof (ah_t) + ah_align_sz);
3364
3365	ASSERT(phdr_mp->b_wptr <= phdr_mp->b_datap->db_lim);
3366	if (outbound)
3367		*length_to_skip = ip_hdr_length;
3368	else
3369		*length_to_skip = ip_hdr_length + sizeof (ah_t) + ah_align_sz;
3370	return (phdr_mp);
3371}
3372
3373/*
3374 * Authenticate an outbound datagram. This function is called
3375 * whenever IP sends an outbound datagram that needs authentication.
3376 * Returns a modified packet if done. Returns NULL if error or queued.
3377 * If error return then ipIfStatsOutDiscards has been increased.
3378 */
3379static mblk_t *
3380ah_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
3381{
3382	mblk_t *phdr_mp;
3383	ipsa_t *assoc;
3384	int length_to_skip;
3385	uint_t ah_align_sz;
3386	uint_t age_bytes;
3387	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
3388	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
3389	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3390	ill_t		*ill = ixa->ixa_nce->nce_ill;
3391	boolean_t	need_refrele = B_FALSE;
3392
3393	/*
3394	 * Construct the chain of mblks
3395	 *
3396	 * PSEUDO_HDR->DATA
3397	 *
3398	 * one by one.
3399	 */
3400
3401	AH_BUMP_STAT(ahstack, out_requests);
3402
3403	ASSERT(data_mp->b_datap->db_type == M_DATA);
3404
3405	assoc = ixa->ixa_ipsec_ah_sa;
3406	ASSERT(assoc != NULL);
3407
3408
3409	/*
3410	 * Get the outer IP header in shape to escape this system..
3411	 */
3412	if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
3413		/*
3414		 * Need to update packet with any CIPSO option and update
3415		 * ixa_tsl to capture the new label.
3416		 * We allocate a separate ixa for that purpose.
3417		 */
3418		ixa = ip_xmit_attr_duplicate(ixa);
3419		if (ixa == NULL) {
3420			ip_drop_packet(data_mp, B_FALSE, ill,
3421			    DROPPER(ipss, ipds_ah_nomem),
3422			    &ahstack->ah_dropper);
3423			return (NULL);
3424		}
3425		need_refrele = B_TRUE;
3426
3427		label_hold(assoc->ipsa_otsl);
3428		ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
3429
3430		data_mp = sadb_whack_label(data_mp, assoc, ixa,
3431		    DROPPER(ipss, ipds_ah_nomem), &ahstack->ah_dropper);
3432		if (data_mp == NULL) {
3433			/* Packet dropped by sadb_whack_label */
3434			ixa_refrele(ixa);
3435			return (NULL);
3436		}
3437	}
3438
3439	/*
3440	 * Age SA according to number of bytes that will be sent after
3441	 * adding the AH header, ICV, and padding to the packet.
3442	 */
3443
3444	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3445		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
3446		ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3447		    IPV4_PADDING_ALIGN - 1, IPV4_PADDING_ALIGN);
3448		age_bytes = ntohs(ipha->ipha_length) + sizeof (ah_t) +
3449		    ah_align_sz;
3450	} else {
3451		ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
3452		ah_align_sz = P2ALIGN(assoc->ipsa_mac_len +
3453		    IPV6_PADDING_ALIGN - 1, IPV6_PADDING_ALIGN);
3454		age_bytes = sizeof (ip6_t) + ntohs(ip6h->ip6_plen) +
3455		    sizeof (ah_t) + ah_align_sz;
3456	}
3457
3458	if (!ah_age_bytes(assoc, age_bytes, B_FALSE)) {
3459		/* rig things as if ipsec_getassocbyconn() failed */
3460		ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
3461		    "AH association 0x%x, dst %s had bytes expire.\n",
3462		    ntohl(assoc->ipsa_spi), assoc->ipsa_dstaddr, AF_INET,
3463		    ahstack->ipsecah_netstack);
3464		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3465		ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
3466		freemsg(data_mp);
3467		if (need_refrele)
3468			ixa_refrele(ixa);
3469		return (NULL);
3470	}
3471
3472	/*
3473	 * XXX We need to have fixed up the outer label before we get here.
3474	 * (AH is computing the checksum over the outer label).
3475	 */
3476
3477	/*
3478	 * Insert pseudo header:
3479	 * [IP, ULP] => [IP, AH, ICV] -> ULP
3480	 */
3481
3482	if (ixa->ixa_flags & IXAF_IS_IPV4) {
3483		phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3484		    &length_to_skip, assoc->ipsa_mac_len, B_TRUE, ahstack);
3485	} else {
3486		phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3487		    &length_to_skip, assoc->ipsa_mac_len, B_TRUE, ahstack);
3488	}
3489
3490	if (phdr_mp == NULL) {
3491		AH_BUMP_STAT(ahstack, out_discards);
3492		ip_drop_packet(data_mp, B_FALSE, ixa->ixa_nce->nce_ill,
3493		    DROPPER(ipss, ipds_ah_bad_v4_opts),
3494		    &ahstack->ah_dropper);
3495		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3496		if (need_refrele)
3497			ixa_refrele(ixa);
3498		return (NULL);
3499	}
3500
3501	phdr_mp->b_cont = data_mp;
3502	data_mp->b_rptr += length_to_skip;
3503	data_mp = phdr_mp;
3504
3505	/*
3506	 * At this point data_mp points to
3507	 * an mblk containing the pseudo header (IP header,
3508	 * AH header, and ICV with mutable fields zero'ed out).
3509	 * mp points to the mblk containing the ULP data. The original
3510	 * IP header is kept before the ULP data in data_mp.
3511	 */
3512
3513	/* submit MAC request to KCF */
3514	data_mp = ah_submit_req_outbound(data_mp, ixa, length_to_skip, assoc);
3515	if (need_refrele)
3516		ixa_refrele(ixa);
3517	return (data_mp);
3518}
3519
3520static mblk_t *
3521ah_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
3522{
3523	ah_t		*ah = (ah_t *)arg;
3524	ipsa_t		*assoc = ira->ira_ipsec_ah_sa;
3525	int		length_to_skip;
3526	int		ah_length;
3527	mblk_t		*phdr_mp;
3528	uint32_t	ah_offset;
3529	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
3530	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
3531	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3532
3533	ASSERT(assoc != NULL);
3534
3535	/*
3536	 * We may wish to check replay in-range-only here as an optimization.
3537	 * Include the reality check of ipsa->ipsa_replay >
3538	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
3539	 * where N == ipsa->ipsa_replay_wsize.
3540	 *
3541	 * Another check that may come here later is the "collision" check.
3542	 * If legitimate packets flow quickly enough, this won't be a problem,
3543	 * but collisions may cause authentication algorithm crunching to
3544	 * take place when it doesn't need to.
3545	 */
3546	if (!sadb_replay_peek(assoc, ah->ah_replay)) {
3547		AH_BUMP_STAT(ahstack, replay_early_failures);
3548		IP_AH_BUMP_STAT(ipss, in_discards);
3549		ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3550		    DROPPER(ipss, ipds_ah_early_replay),
3551		    &ahstack->ah_dropper);
3552		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3553		return (NULL);
3554	}
3555
3556	/*
3557	 * The offset of the AH header can be computed from its pointer
3558	 * within the data mblk, which was pulled up until the AH header
3559	 * by ipsec_inbound_ah_sa() during SA selection.
3560	 */
3561	ah_offset = (uchar_t *)ah - data_mp->b_rptr;
3562
3563	/*
3564	 * We need to pullup until the ICV before we call
3565	 * ah_process_ip_options_v6.
3566	 */
3567	ah_length = (ah->ah_length << 2) + 8;
3568
3569	/*
3570	 * NOTE : If we want to use any field of IP/AH header, you need
3571	 * to re-assign following the pullup.
3572	 */
3573	if (((uchar_t *)ah + ah_length) > data_mp->b_wptr) {
3574		if (!pullupmsg(data_mp, (uchar_t *)ah + ah_length -
3575		    data_mp->b_rptr)) {
3576			(void) ipsec_rl_strlog(ns, info.mi_idnum, 0, 0,
3577			    SL_WARN | SL_ERROR,
3578			    "ah_inbound: Small AH header\n");
3579			IP_AH_BUMP_STAT(ipss, in_discards);
3580			ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3581			    DROPPER(ipss, ipds_ah_nomem),
3582			    &ahstack->ah_dropper);
3583			BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3584			return (NULL);
3585		}
3586	}
3587
3588	/*
3589	 * Insert pseudo header:
3590	 * [IP, ULP] => [IP, AH, ICV] -> ULP
3591	 */
3592	if (ira->ira_flags & IRAF_IS_IPV4) {
3593		phdr_mp = ah_process_ip_options_v4(data_mp, assoc,
3594		    &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3595	} else {
3596		phdr_mp = ah_process_ip_options_v6(data_mp, assoc,
3597		    &length_to_skip, assoc->ipsa_mac_len, B_FALSE, ahstack);
3598	}
3599
3600	if (phdr_mp == NULL) {
3601		IP_AH_BUMP_STAT(ipss, in_discards);
3602		ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
3603		    ((ira->ira_flags & IRAF_IS_IPV4) ?
3604		    DROPPER(ipss, ipds_ah_bad_v4_opts) :
3605		    DROPPER(ipss, ipds_ah_bad_v6_hdrs)),
3606		    &ahstack->ah_dropper);
3607		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3608		return (NULL);
3609	}
3610
3611	phdr_mp->b_cont = data_mp;
3612	data_mp->b_rptr += length_to_skip;
3613	data_mp = phdr_mp;
3614
3615	/* submit request to KCF */
3616	return (ah_submit_req_inbound(data_mp, ira, length_to_skip, ah_offset,
3617	    assoc));
3618}
3619
3620/*
3621 * Invoked after processing of an inbound packet by the
3622 * kernel crypto framework. Called by ah_submit_req() for a sync request,
3623 * or by the kcf callback for an async request.
3624 * Returns NULL if the mblk chain is consumed.
3625 */
3626static mblk_t *
3627ah_auth_in_done(mblk_t *phdr_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
3628{
3629	ipha_t *ipha;
3630	uint_t ah_offset = 0;
3631	mblk_t *mp;
3632	int align_len, newpos;
3633	ah_t *ah;
3634	uint32_t length;
3635	uint32_t *dest32;
3636	uint8_t *dest;
3637	boolean_t isv4;
3638	ip6_t *ip6h;
3639	uint_t icv_len;
3640	ipsa_t *assoc;
3641	kstat_named_t *counter;
3642	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
3643	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
3644	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3645
3646	isv4 = (ira->ira_flags & IRAF_IS_IPV4);
3647	assoc = ira->ira_ipsec_ah_sa;
3648	icv_len = (uint_t)ic->ic_crypto_mac.cd_raw.iov_len;
3649
3650	if (phdr_mp == NULL) {
3651		ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill,
3652		    DROPPER(ipss, ipds_ah_nomem),
3653		    &ahstack->ah_dropper);
3654		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3655		return (NULL);
3656	}
3657
3658	mp = phdr_mp->b_cont;
3659	if (mp == NULL) {
3660		ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill,
3661		    DROPPER(ipss, ipds_ah_nomem),
3662		    &ahstack->ah_dropper);
3663		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3664		return (NULL);
3665	}
3666	mp->b_rptr -= ic->ic_skip_len;
3667
3668	ah_set_usetime(assoc, B_TRUE);
3669
3670	if (isv4) {
3671		ipha = (ipha_t *)mp->b_rptr;
3672		ah_offset = ipha->ipha_version_and_hdr_length -
3673		    (uint8_t)((IP_VERSION << 4));
3674		ah_offset <<= 2;
3675		align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
3676		    IPV4_PADDING_ALIGN);
3677	} else {
3678		ip6h = (ip6_t *)mp->b_rptr;
3679		ah_offset = ipsec_ah_get_hdr_size_v6(mp, B_TRUE);
3680		ASSERT((mp->b_wptr - mp->b_rptr) >= ah_offset);
3681		align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
3682		    IPV6_PADDING_ALIGN);
3683	}
3684
3685	ah = (ah_t *)(mp->b_rptr + ah_offset);
3686	newpos = sizeof (ah_t) + align_len;
3687
3688	/*
3689	 * We get here only when authentication passed.
3690	 */
3691
3692	ah3dbg(ahstack, ("AH succeeded, checking replay\n"));
3693	AH_BUMP_STAT(ahstack, good_auth);
3694
3695	if (!sadb_replay_check(assoc, ah->ah_replay)) {
3696		int af;
3697		void *addr;
3698
3699		if (isv4) {
3700			addr = &ipha->ipha_dst;
3701			af = AF_INET;
3702		} else {
3703			addr = &ip6h->ip6_dst;
3704			af = AF_INET6;
3705		}
3706
3707		/*
3708		 * Log the event. As of now we print out an event.
3709		 * Do not print the replay failure number, or else
3710		 * syslog cannot collate the error messages.  Printing
3711		 * the replay number that failed (or printing to the
3712		 * console) opens a denial-of-service attack.
3713		 */
3714		AH_BUMP_STAT(ahstack, replay_failures);
3715		ipsec_assocfailure(info.mi_idnum, 0, 0,
3716		    SL_ERROR | SL_WARN,
3717		    "Replay failed for AH spi %x, dst_addr %s",
3718		    assoc->ipsa_spi, addr, af, ahstack->ipsecah_netstack);
3719		counter = DROPPER(ipss, ipds_ah_replay);
3720		goto ah_in_discard;
3721	}
3722
3723	/*
3724	 * We need to remove the AH header from the original
3725	 * datagram. Best way to do this is to move the pre-AH headers
3726	 * forward in the (relatively simple) IPv4 case.  In IPv6, it's
3727	 * a bit more complicated because of IPv6's next-header chaining,
3728	 * but it's doable.
3729	 */
3730	if (isv4) {
3731		/*
3732		 * Assign the right protocol, adjust the length as we
3733		 * are removing the AH header and adjust the checksum to
3734		 * account for the protocol and length.
3735		 */
3736		length = ntohs(ipha->ipha_length);
3737		if (!ah_age_bytes(assoc, length, B_TRUE)) {
3738			/* The ipsa has hit hard expiration, LOG and AUDIT. */
3739			ipsec_assocfailure(info.mi_idnum, 0, 0,
3740			    SL_ERROR | SL_WARN,
3741			    "AH Association 0x%x, dst %s had bytes expire.\n",
3742			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
3743			    AF_INET, ahstack->ipsecah_netstack);
3744			AH_BUMP_STAT(ahstack, bytes_expired);
3745			counter = DROPPER(ipss, ipds_ah_bytes_expire);
3746			goto ah_in_discard;
3747		}
3748		ipha->ipha_protocol = ah->ah_nexthdr;
3749		length -= newpos;
3750
3751		ipha->ipha_length = htons((uint16_t)length);
3752		ipha->ipha_hdr_checksum = 0;
3753		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
3754	} else {
3755		uchar_t *whereptr;
3756		int hdrlen;
3757		uint8_t *nexthdr;
3758		ip6_hbh_t *hbhhdr;
3759		ip6_dest_t *dsthdr;
3760		ip6_rthdr0_t *rthdr;
3761
3762		/*
3763		 * Make phdr_mp hold until the AH header and make
3764		 * mp hold everything past AH header.
3765		 */
3766		length = ntohs(ip6h->ip6_plen);
3767		if (!ah_age_bytes(assoc, length + sizeof (ip6_t), B_TRUE)) {
3768			/* The ipsa has hit hard expiration, LOG and AUDIT. */
3769			ipsec_assocfailure(info.mi_idnum, 0, 0,
3770			    SL_ERROR | SL_WARN,
3771			    "AH Association 0x%x, dst %s had bytes "
3772			    "expire.\n", assoc->ipsa_spi, &ip6h->ip6_dst,
3773			    AF_INET6, ahstack->ipsecah_netstack);
3774			AH_BUMP_STAT(ahstack, bytes_expired);
3775			counter = DROPPER(ipss, ipds_ah_bytes_expire);
3776			goto ah_in_discard;
3777		}
3778
3779		/*
3780		 * Update the next header field of the header preceding
3781		 * AH with the next header field of AH. Start with the
3782		 * IPv6 header and proceed with the extension headers
3783		 * until we find what we're looking for.
3784		 */
3785		nexthdr = &ip6h->ip6_nxt;
3786		whereptr =  (uchar_t *)ip6h;
3787		hdrlen = sizeof (ip6_t);
3788
3789		while (*nexthdr != IPPROTO_AH) {
3790			whereptr += hdrlen;
3791			/* Assume IP has already stripped it */
3792			ASSERT(*nexthdr != IPPROTO_FRAGMENT);
3793			switch (*nexthdr) {
3794			case IPPROTO_HOPOPTS:
3795				hbhhdr = (ip6_hbh_t *)whereptr;
3796				nexthdr = &hbhhdr->ip6h_nxt;
3797				hdrlen = 8 * (hbhhdr->ip6h_len + 1);
3798				break;
3799			case IPPROTO_DSTOPTS:
3800				dsthdr = (ip6_dest_t *)whereptr;
3801				nexthdr = &dsthdr->ip6d_nxt;
3802				hdrlen = 8 * (dsthdr->ip6d_len + 1);
3803				break;
3804			case IPPROTO_ROUTING:
3805				rthdr = (ip6_rthdr0_t *)whereptr;
3806				nexthdr = &rthdr->ip6r0_nxt;
3807				hdrlen = 8 * (rthdr->ip6r0_len + 1);
3808				break;
3809			}
3810		}
3811		*nexthdr = ah->ah_nexthdr;
3812		length -= newpos;
3813		ip6h->ip6_plen = htons((uint16_t)length);
3814	}
3815
3816	/* Now that we've fixed the IP header, move it forward. */
3817	mp->b_rptr += newpos;
3818	if (IS_P2ALIGNED(mp->b_rptr, sizeof (uint32_t))) {
3819		dest32 = (uint32_t *)(mp->b_rptr + ah_offset);
3820		while (--dest32 >= (uint32_t *)mp->b_rptr)
3821			*dest32 = *(dest32 - (newpos >> 2));
3822	} else {
3823		dest = mp->b_rptr + ah_offset;
3824		while (--dest >= mp->b_rptr)
3825			*dest = *(dest - newpos);
3826	}
3827	freeb(phdr_mp);
3828
3829	/*
3830	 * If SA is labelled, use its label, else inherit the label
3831	 */
3832	if (is_system_labeled() && (assoc->ipsa_tsl != NULL)) {
3833		if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
3834			ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3835			    DROPPER(ipss, ipds_ah_nomem), &ahstack->ah_dropper);
3836			BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3837			return (NULL);
3838		}
3839	}
3840
3841	if (assoc->ipsa_state == IPSA_STATE_IDLE) {
3842		/*
3843		 * Cluster buffering case.  Tell caller that we're
3844		 * handling the packet.
3845		 */
3846		sadb_buf_pkt(assoc, mp, ira);
3847		return (NULL);
3848	}
3849
3850	return (mp);
3851
3852ah_in_discard:
3853	IP_AH_BUMP_STAT(ipss, in_discards);
3854	ip_drop_packet(phdr_mp, B_TRUE, ira->ira_ill, counter,
3855	    &ahstack->ah_dropper);
3856	BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
3857	return (NULL);
3858}
3859
3860/*
3861 * Invoked after processing of an outbound packet by the
3862 * kernel crypto framework, either by ah_submit_req() for a request
3863 * executed syncrhonously, or by the KEF callback for a request
3864 * executed asynchronously.
3865 */
3866static mblk_t *
3867ah_auth_out_done(mblk_t *phdr_mp, ip_xmit_attr_t *ixa, ipsec_crypto_t *ic)
3868{
3869	mblk_t *mp;
3870	int align_len;
3871	uint32_t hdrs_length;
3872	uchar_t *ptr;
3873	uint32_t length;
3874	boolean_t isv4;
3875	size_t icv_len;
3876	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
3877	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
3878	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3879	ill_t		*ill = ixa->ixa_nce->nce_ill;
3880
3881	isv4 = (ixa->ixa_flags & IXAF_IS_IPV4);
3882	icv_len = ic->ic_crypto_mac.cd_raw.iov_len;
3883
3884	mp = phdr_mp->b_cont;
3885	if (mp == NULL) {
3886		ip_drop_packet(phdr_mp, B_FALSE, ill,
3887		    DROPPER(ipss, ipds_ah_nomem),
3888		    &ahstack->ah_dropper);
3889		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
3890		return (NULL);
3891	}
3892	mp->b_rptr -= ic->ic_skip_len;
3893
3894	ASSERT(ixa->ixa_flags & IXAF_IPSEC_SECURE);
3895	ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
3896	ah_set_usetime(ixa->ixa_ipsec_ah_sa, B_FALSE);
3897
3898	if (isv4) {
3899		ipha_t *ipha;
3900		ipha_t *nipha;
3901
3902		ipha = (ipha_t *)mp->b_rptr;
3903		hdrs_length = ipha->ipha_version_and_hdr_length -
3904		    (uint8_t)((IP_VERSION << 4));
3905		hdrs_length <<= 2;
3906		align_len = P2ALIGN(icv_len + IPV4_PADDING_ALIGN - 1,
3907		    IPV4_PADDING_ALIGN);
3908		/*
3909		 * phdr_mp must have the right amount of space for the
3910		 * combined IP and AH header. Copy the IP header and
3911		 * the ack_data onto AH. Note that the AH header was
3912		 * already formed before the ICV calculation and hence
3913		 * you don't have to copy it here.
3914		 */
3915		bcopy(mp->b_rptr, phdr_mp->b_rptr, hdrs_length);
3916
3917		ptr = phdr_mp->b_rptr + hdrs_length + sizeof (ah_t);
3918		bcopy(phdr_mp->b_wptr, ptr, icv_len);
3919
3920		/*
3921		 * Compute the new header checksum as we are assigning
3922		 * IPPROTO_AH and adjusting the length here.
3923		 */
3924		nipha = (ipha_t *)phdr_mp->b_rptr;
3925
3926		nipha->ipha_protocol = IPPROTO_AH;
3927		length = ntohs(nipha->ipha_length);
3928		length += (sizeof (ah_t) + align_len);
3929		nipha->ipha_length = htons((uint16_t)length);
3930		nipha->ipha_hdr_checksum = 0;
3931		nipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(nipha);
3932	} else {
3933		ip6_t *ip6h;
3934		ip6_t *nip6h;
3935		uint_t ah_offset;
3936
3937		ip6h = (ip6_t *)mp->b_rptr;
3938		nip6h = (ip6_t *)phdr_mp->b_rptr;
3939		align_len = P2ALIGN(icv_len + IPV6_PADDING_ALIGN - 1,
3940		    IPV6_PADDING_ALIGN);
3941		/*
3942		 * phdr_mp must have the right amount of space for the
3943		 * combined IP and AH header. Copy the IP header with
3944		 * options into the pseudo header. When we constructed
3945		 * a pseudo header, we did not copy some of the mutable
3946		 * fields. We do it now by calling ah_fix_phdr_v6()
3947		 * with the last argument B_TRUE. It returns the
3948		 * ah_offset into the pseudo header.
3949		 */
3950
3951		bcopy(ip6h, nip6h, IPV6_HDR_LEN);
3952		ah_offset = ah_fix_phdr_v6(nip6h, ip6h, B_TRUE, B_TRUE);
3953		ASSERT(ah_offset != 0);
3954		/*
3955		 * phdr_mp can hold exactly the whole IP header with options
3956		 * plus the AH header also. Thus subtracting the AH header's
3957		 * size should give exactly how much of the original header
3958		 * should be skipped.
3959		 */
3960		hdrs_length = (phdr_mp->b_wptr - phdr_mp->b_rptr) -
3961		    sizeof (ah_t) - icv_len;
3962		bcopy(phdr_mp->b_wptr, ((uint8_t *)nip6h + ah_offset +
3963		    sizeof (ah_t)), icv_len);
3964		length = ntohs(nip6h->ip6_plen);
3965		length += (sizeof (ah_t) + align_len);
3966		nip6h->ip6_plen = htons((uint16_t)length);
3967	}
3968
3969	/* Skip the original IP header */
3970	mp->b_rptr += hdrs_length;
3971	if (mp->b_rptr == mp->b_wptr) {
3972		phdr_mp->b_cont = mp->b_cont;
3973		freeb(mp);
3974	}
3975
3976	return (phdr_mp);
3977}
3978
3979/* Refactor me */
3980/*
3981 * Wrapper to allow IP to trigger an AH association failure message
3982 * during SA inbound selection.
3983 */
3984void
3985ipsecah_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
3986    uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
3987{
3988	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
3989	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
3990	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3991
3992	if (ahstack->ipsecah_log_unknown_spi) {
3993		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
3994		    addr, af, ahstack->ipsecah_netstack);
3995	}
3996
3997	ip_drop_packet(mp, B_TRUE, ira->ira_ill,
3998	    DROPPER(ipss, ipds_ah_no_sa),
3999	    &ahstack->ah_dropper);
4000}
4001
4002/*
4003 * Initialize the AH input and output processing functions.
4004 */
4005void
4006ipsecah_init_funcs(ipsa_t *sa)
4007{
4008	if (sa->ipsa_output_func == NULL)
4009		sa->ipsa_output_func = ah_outbound;
4010	if (sa->ipsa_input_func == NULL)
4011		sa->ipsa_input_func = ah_inbound;
4012}
4013