dld_proto.c revision 1184:1c788f55a808
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * Data-Link Driver
31 */
32
33#include <sys/types.h>
34#include <sys/debug.h>
35#include <sys/sysmacros.h>
36#include <sys/stream.h>
37#include <sys/ddi.h>
38#include <sys/sunddi.h>
39#include <sys/strsun.h>
40#include <sys/cpuvar.h>
41#include <sys/dlpi.h>
42#include <netinet/in.h>
43#include <sys/sdt.h>
44#include <sys/strsubr.h>
45#include <sys/vlan.h>
46#include <sys/mac.h>
47#include <sys/dls.h>
48#include <sys/dld.h>
49#include <sys/dld_impl.h>
50#include <sys/dls_soft_ring.h>
51
52typedef boolean_t proto_reqfunc_t(dld_str_t *, union DL_primitives *, mblk_t *);
53
54static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
55    proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
56    proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
57    proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
58    proto_notify_req, proto_unitdata_req, proto_passive_req;
59
60static void proto_poll_disable(dld_str_t *);
61static boolean_t proto_poll_enable(dld_str_t *, dl_capab_dls_t *);
62static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
63
64static void proto_soft_ring_disable(dld_str_t *);
65static boolean_t proto_soft_ring_enable(dld_str_t *, dl_capab_dls_t *);
66static boolean_t proto_capability_advertise(dld_str_t *, mblk_t *);
67static void proto_change_soft_ring_fanout(dld_str_t *, int);
68static void proto_stop_soft_ring_threads(void *);
69
70#define	DL_ACK_PENDING(state) \
71	((state) == DL_ATTACH_PENDING || \
72	(state) == DL_DETACH_PENDING || \
73	(state) == DL_BIND_PENDING || \
74	(state) == DL_UNBIND_PENDING)
75
76/*
77 * Process a DLPI protocol message.
78 * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
79 * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
80 * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
81 * as 'passive' and forbids it from being subsequently made 'active'
82 * by the above primitives.
83 */
84void
85dld_proto(dld_str_t *dsp, mblk_t *mp)
86{
87	union DL_primitives	*udlp;
88	t_uscalar_t		prim;
89
90	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
91		freemsg(mp);
92		return;
93	}
94
95	udlp = (union DL_primitives *)mp->b_rptr;
96	prim = udlp->dl_primitive;
97
98	switch (prim) {
99	case DL_INFO_REQ:
100		(void) proto_info_req(dsp, udlp, mp);
101		break;
102	case DL_BIND_REQ:
103		(void) proto_bind_req(dsp, udlp, mp);
104		break;
105	case DL_UNBIND_REQ:
106		(void) proto_unbind_req(dsp, udlp, mp);
107		break;
108	case DL_UNITDATA_REQ:
109		(void) proto_unitdata_req(dsp, udlp, mp);
110		break;
111	case DL_UDQOS_REQ:
112		(void) proto_udqos_req(dsp, udlp, mp);
113		break;
114	case DL_ATTACH_REQ:
115		(void) proto_attach_req(dsp, udlp, mp);
116		break;
117	case DL_DETACH_REQ:
118		(void) proto_detach_req(dsp, udlp, mp);
119		break;
120	case DL_ENABMULTI_REQ:
121		(void) proto_enabmulti_req(dsp, udlp, mp);
122		break;
123	case DL_DISABMULTI_REQ:
124		(void) proto_disabmulti_req(dsp, udlp, mp);
125		break;
126	case DL_PROMISCON_REQ:
127		(void) proto_promiscon_req(dsp, udlp, mp);
128		break;
129	case DL_PROMISCOFF_REQ:
130		(void) proto_promiscoff_req(dsp, udlp, mp);
131		break;
132	case DL_PHYS_ADDR_REQ:
133		(void) proto_physaddr_req(dsp, udlp, mp);
134		break;
135	case DL_SET_PHYS_ADDR_REQ:
136		(void) proto_setphysaddr_req(dsp, udlp, mp);
137		break;
138	case DL_NOTIFY_REQ:
139		(void) proto_notify_req(dsp, udlp, mp);
140		break;
141	case DL_CAPABILITY_REQ:
142		(void) proto_capability_req(dsp, udlp, mp);
143		break;
144	case DL_PASSIVE_REQ:
145		(void) proto_passive_req(dsp, udlp, mp);
146		break;
147	default:
148		(void) proto_req(dsp, udlp, mp);
149		break;
150	}
151}
152
153/*
154 * Finish any pending operations.  At this moment we are single-threaded,
155 * hence there is no need to hold ds_lock as writer because we're already
156 * exclusive.
157 */
158void
159dld_finish_pending_ops(dld_str_t *dsp)
160{
161	ASSERT(MUTEX_HELD(&dsp->ds_thr_lock));
162	ASSERT(dsp->ds_thr == 0);
163
164	/* Pending DL_DETACH_REQ? */
165	if (dsp->ds_detach_req != NULL) {
166		mblk_t *mp;
167
168		ASSERT(dsp->ds_dlstate == DL_DETACH_PENDING);
169		dld_str_detach(dsp);
170
171		mp = dsp->ds_detach_req;
172		dsp->ds_detach_req = NULL;
173
174		mutex_exit(&dsp->ds_thr_lock);
175		dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
176	} else {
177		mutex_exit(&dsp->ds_thr_lock);
178	}
179}
180
181#define	NEG(x)	-(x)
182
183typedef struct dl_info_ack_wrapper {
184	dl_info_ack_t		dl_info;
185	uint8_t			dl_addr[MAXADDRLEN + sizeof (uint16_t)];
186	uint8_t			dl_brdcst_addr[MAXADDRLEN];
187	dl_qos_cl_range1_t	dl_qos_range1;
188	dl_qos_cl_sel1_t	dl_qos_sel1;
189} dl_info_ack_wrapper_t;
190
191/*
192 * DL_INFO_REQ
193 */
194/*ARGSUSED*/
195static boolean_t
196proto_info_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
197{
198	dl_info_ack_wrapper_t	*dlwp;
199	dl_info_ack_t		*dlp;
200	dl_qos_cl_sel1_t	*selp;
201	dl_qos_cl_range1_t	*rangep;
202	uint8_t			*addr;
203	uint8_t			*brdcst_addr;
204	uint_t			addr_length;
205	uint_t			sap_length;
206	mac_info_t		minfo;
207	mac_info_t		*minfop;
208	queue_t			*q = dsp->ds_wq;
209
210	/*
211	 * Swap the request message for one large enough to contain the
212	 * wrapper structure defined above.
213	 */
214	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
215	    M_PCPROTO, 0)) == NULL)
216		return (B_FALSE);
217
218	rw_enter(&dsp->ds_lock, RW_READER);
219
220	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
221	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
222
223	dlp = &(dlwp->dl_info);
224	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
225
226	dlp->dl_primitive = DL_INFO_ACK;
227
228	/*
229	 * Set up the sub-structure pointers.
230	 */
231	addr = dlwp->dl_addr;
232	brdcst_addr = dlwp->dl_brdcst_addr;
233	rangep = &(dlwp->dl_qos_range1);
234	selp = &(dlwp->dl_qos_sel1);
235
236	/*
237	 * This driver supports only version 2 connectionless DLPI provider
238	 * nodes.
239	 */
240	dlp->dl_service_mode = DL_CLDLS;
241	dlp->dl_version = DL_VERSION_2;
242
243	/*
244	 * Set the style of the provider
245	 */
246	dlp->dl_provider_style = dsp->ds_style;
247	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
248	    dlp->dl_provider_style == DL_STYLE2);
249
250	/*
251	 * Set the current DLPI state.
252	 */
253	dlp->dl_current_state = dsp->ds_dlstate;
254
255	/*
256	 * Gratuitously set the media type. This is to deal with modules
257	 * that assume the media type is known prior to DL_ATTACH_REQ
258	 * being completed.
259	 */
260	dlp->dl_mac_type = DL_ETHER;
261
262	/*
263	 * If the stream is not at least attached we try to retrieve the
264	 * mac_info using mac_info_get()
265	 */
266	if (dsp->ds_dlstate == DL_UNATTACHED ||
267	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
268	    dsp->ds_dlstate == DL_DETACH_PENDING) {
269		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
270			/*
271			 * Cannot find mac_info. giving up.
272			 */
273			goto done;
274		}
275		minfop = &minfo;
276	} else {
277		minfop = (mac_info_t *)dsp->ds_mip;
278	}
279
280	/*
281	 * Set the media type (properly this time).
282	 */
283	dlp->dl_mac_type = minfop->mi_media;
284
285	/*
286	 * Set the DLSAP length. We only support 16 bit values and they
287	 * appear after the MAC address portion of DLSAP addresses.
288	 */
289	sap_length = sizeof (uint16_t);
290	dlp->dl_sap_length = NEG(sap_length);
291
292	/*
293	 * Set the minimum and maximum payload sizes.
294	 */
295	dlp->dl_min_sdu = minfop->mi_sdu_min;
296	dlp->dl_max_sdu = minfop->mi_sdu_max;
297
298	addr_length = minfop->mi_addr_length;
299	ASSERT(addr_length != 0);
300
301	/*
302	 * Copy in the media broadcast address.
303	 */
304	dlp->dl_brdcst_addr_offset = (uintptr_t)brdcst_addr - (uintptr_t)dlp;
305	bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
306	dlp->dl_brdcst_addr_length = addr_length;
307
308	/*
309	 * We only support QoS information for VLAN interfaces.
310	 */
311	if (dsp->ds_vid != VLAN_ID_NONE) {
312		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
313		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
314
315		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
316		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
317		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
318		rangep->dl_protection.dl_min = DL_UNKNOWN;
319		rangep->dl_protection.dl_max = DL_UNKNOWN;
320		rangep->dl_residual_error = DL_UNKNOWN;
321
322		/*
323		 * Specify the supported range of priorities.
324		 */
325		rangep->dl_priority.dl_min = 0;
326		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
327
328		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
329		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
330
331		selp->dl_qos_type = DL_QOS_CL_SEL1;
332		selp->dl_trans_delay = DL_UNKNOWN;
333		selp->dl_protection = DL_UNKNOWN;
334		selp->dl_residual_error = DL_UNKNOWN;
335
336		/*
337		 * Specify the current priority (which can be changed by
338		 * the DL_UDQOS_REQ primitive).
339		 */
340		selp->dl_priority = dsp->ds_pri;
341	} else {
342		/*
343		 * Shorten the buffer to lose the unused QoS information
344		 * structures.
345		 */
346		mp->b_wptr = (uint8_t *)rangep;
347	}
348
349	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
350	if (dsp->ds_dlstate == DL_IDLE) {
351		/*
352		 * The stream is bound. Therefore we can formulate a valid
353		 * DLSAP address.
354		 */
355		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
356		bcopy(dsp->ds_curr_addr, addr, addr_length);
357		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
358	}
359
360done:
361	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
362	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
363	    dlp->dl_qos_range_length != 0));
364	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
365	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
366	    dlp->dl_brdcst_addr_length != 0));
367
368	rw_exit(&dsp->ds_lock);
369
370	qreply(q, mp);
371	return (B_TRUE);
372}
373
374/*
375 * DL_ATTACH_REQ
376 */
377static boolean_t
378proto_attach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
379{
380	dl_attach_req_t	*dlp = (dl_attach_req_t *)udlp;
381	int		err = 0;
382	t_uscalar_t	dl_err;
383	queue_t		*q = dsp->ds_wq;
384
385	rw_enter(&dsp->ds_lock, RW_WRITER);
386
387	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
388	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
389		dl_err = DL_BADPRIM;
390		goto failed;
391	}
392
393	if (dsp->ds_dlstate != DL_UNATTACHED) {
394		dl_err = DL_OUTSTATE;
395		goto failed;
396	}
397
398	dsp->ds_dlstate = DL_ATTACH_PENDING;
399
400	err = dld_str_attach(dsp, dlp->dl_ppa);
401	if (err != 0) {
402		switch (err) {
403		case ENOENT:
404			dl_err = DL_BADPPA;
405			err = 0;
406			break;
407		default:
408			dl_err = DL_SYSERR;
409			break;
410		}
411		dsp->ds_dlstate = DL_UNATTACHED;
412		goto failed;
413	}
414	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
415	rw_exit(&dsp->ds_lock);
416
417	dlokack(q, mp, DL_ATTACH_REQ);
418	return (B_TRUE);
419failed:
420	rw_exit(&dsp->ds_lock);
421	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
422	return (B_FALSE);
423}
424
425/*
426 * DL_DETACH_REQ
427 */
428/*ARGSUSED*/
429static boolean_t
430proto_detach_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
431{
432	queue_t		*q = dsp->ds_wq;
433	t_uscalar_t	dl_err;
434
435	rw_enter(&dsp->ds_lock, RW_WRITER);
436
437	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
438		dl_err = DL_BADPRIM;
439		goto failed;
440	}
441
442	if (dsp->ds_dlstate != DL_UNBOUND) {
443		dl_err = DL_OUTSTATE;
444		goto failed;
445	}
446
447	if (dsp->ds_style == DL_STYLE1) {
448		dl_err = DL_BADPRIM;
449		goto failed;
450	}
451
452	dsp->ds_dlstate = DL_DETACH_PENDING;
453
454	/*
455	 * Complete the detach when the driver is single-threaded.
456	 */
457	mutex_enter(&dsp->ds_thr_lock);
458	ASSERT(dsp->ds_detach_req == NULL);
459	dsp->ds_detach_req = mp;
460	mutex_exit(&dsp->ds_thr_lock);
461	rw_exit(&dsp->ds_lock);
462
463	return (B_TRUE);
464failed:
465	rw_exit(&dsp->ds_lock);
466	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
467	return (B_FALSE);
468}
469
470/*
471 * DL_BIND_REQ
472 */
473static boolean_t
474proto_bind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
475{
476	dl_bind_req_t	*dlp = (dl_bind_req_t *)udlp;
477	int		err = 0;
478	uint8_t		addr[MAXADDRLEN];
479	uint_t		addr_length;
480	t_uscalar_t	dl_err;
481	t_scalar_t	sap;
482	queue_t		*q = dsp->ds_wq;
483
484	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
485		dl_err = DL_BADPRIM;
486		goto failed;
487	}
488
489	if (dlp->dl_xidtest_flg != 0) {
490		dl_err = DL_NOAUTO;
491		goto failed;
492	}
493
494	if (dlp->dl_service_mode != DL_CLDLS) {
495		dl_err = DL_UNSUPPORTED;
496		goto failed;
497	}
498
499	rw_enter(&dsp->ds_lock, RW_WRITER);
500
501	if (dsp->ds_dlstate != DL_UNBOUND) {
502		dl_err = DL_OUTSTATE;
503		goto failed;
504	}
505
506	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
507	    !dls_active_set(dsp->ds_dc)) {
508		dl_err = DL_SYSERR;
509		err = EBUSY;
510		goto failed;
511	}
512
513	dsp->ds_dlstate = DL_BIND_PENDING;
514	/*
515	 * Set the receive callback.
516	 */
517	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_RAW) ?
518	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
519
520	/*
521	 * Bind the channel such that it can receive packets.
522	 */
523	sap = dsp->ds_sap = dlp->dl_sap;
524	err = dls_bind(dsp->ds_dc, dlp->dl_sap);
525	if (err != 0) {
526		switch (err) {
527		case EINVAL:
528			dl_err = DL_BADADDR;
529			err = 0;
530			break;
531		default:
532			dl_err = DL_SYSERR;
533			break;
534		}
535		dsp->ds_dlstate = DL_UNBOUND;
536		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
537			dls_active_clear(dsp->ds_dc);
538
539		goto failed;
540	}
541
542	/*
543	 * Copy in MAC address.
544	 */
545	addr_length = dsp->ds_mip->mi_addr_length;
546	bcopy(dsp->ds_curr_addr, addr, addr_length);
547
548	/*
549	 * Copy in the DLSAP.
550	 */
551	*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
552	addr_length += sizeof (uint16_t);
553
554	dsp->ds_dlstate = DL_IDLE;
555	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
556		dsp->ds_passivestate = DLD_ACTIVE;
557
558	rw_exit(&dsp->ds_lock);
559
560	dlbindack(q, mp, sap, (void *)addr, addr_length, 0, 0);
561	return (B_TRUE);
562failed:
563	rw_exit(&dsp->ds_lock);
564	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
565	return (B_FALSE);
566}
567
568/*
569 * DL_UNBIND_REQ
570 */
571/*ARGSUSED*/
572static boolean_t
573proto_unbind_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
574{
575	queue_t		*q = dsp->ds_wq;
576	t_uscalar_t	dl_err;
577
578	rw_enter(&dsp->ds_lock, RW_WRITER);
579
580	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
581		dl_err = DL_BADPRIM;
582		goto failed;
583	}
584
585	if (dsp->ds_dlstate != DL_IDLE) {
586		dl_err = DL_OUTSTATE;
587		goto failed;
588	}
589
590	dsp->ds_dlstate = DL_UNBIND_PENDING;
591
592	/*
593	 * Flush any remaining packets scheduled for transmission.
594	 */
595	dld_tx_flush(dsp);
596
597	/*
598	 * Unbind the channel to stop packets being received.
599	 */
600	dls_unbind(dsp->ds_dc);
601
602	/*
603	 * Disable polling mode, if it is enabled.
604	 */
605	proto_poll_disable(dsp);
606
607	/*
608	 * Clear the receive callback.
609	 */
610	dls_rx_set(dsp->ds_dc, NULL, NULL);
611
612	/*
613	 * Set the mode back to the default (unitdata).
614	 */
615	dsp->ds_mode = DLD_UNITDATA;
616
617	/*
618	 * If soft rings were enabled, the workers
619	 * should be quiesced. Start a task that will
620	 * get this in motion. We cannot check for
621	 * ds_soft_ring flag because
622	 * proto_soft_ring_disable() called from
623	 * proto_capability_req() would have reset it.
624	 */
625	if (dls_soft_ring_workers(dsp->ds_dc)) {
626		dsp->ds_unbind_req = mp;
627		dsp->ds_task_id = taskq_dispatch(system_taskq,
628		    proto_stop_soft_ring_threads, (void *)dsp, TQ_SLEEP);
629		rw_exit(&dsp->ds_lock);
630		return (B_TRUE);
631	}
632
633	dsp->ds_dlstate = DL_UNBOUND;
634	rw_exit(&dsp->ds_lock);
635
636	dlokack(q, mp, DL_UNBIND_REQ);
637	return (B_TRUE);
638failed:
639	rw_exit(&dsp->ds_lock);
640	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
641	return (B_FALSE);
642}
643
644/*
645 * DL_PROMISCON_REQ
646 */
647static boolean_t
648proto_promiscon_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
649{
650	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)udlp;
651	int		err = 0;
652	t_uscalar_t	dl_err;
653	uint32_t	promisc_saved;
654	queue_t		*q = dsp->ds_wq;
655
656	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
657		dl_err = DL_BADPRIM;
658		goto failed;
659	}
660
661	rw_enter(&dsp->ds_lock, RW_WRITER);
662
663	if (dsp->ds_dlstate == DL_UNATTACHED ||
664	    DL_ACK_PENDING(dsp->ds_dlstate)) {
665		dl_err = DL_OUTSTATE;
666		goto failed;
667	}
668
669	promisc_saved = dsp->ds_promisc;
670	switch (dlp->dl_level) {
671	case DL_PROMISC_SAP:
672		dsp->ds_promisc |= DLS_PROMISC_SAP;
673		break;
674
675	case DL_PROMISC_MULTI:
676		dsp->ds_promisc |= DLS_PROMISC_MULTI;
677		break;
678
679	case DL_PROMISC_PHYS:
680		dsp->ds_promisc |= DLS_PROMISC_PHYS;
681		break;
682
683	default:
684		dl_err = DL_NOTSUPPORTED;
685		goto failed;
686	}
687
688	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
689	    !dls_active_set(dsp->ds_dc)) {
690		dsp->ds_promisc = promisc_saved;
691		dl_err = DL_SYSERR;
692		err = EBUSY;
693		goto failed;
694	}
695
696	/*
697	 * Adjust channel promiscuity.
698	 */
699	err = dls_promisc(dsp->ds_dc, dsp->ds_promisc);
700	if (err != 0) {
701		dl_err = DL_SYSERR;
702		dsp->ds_promisc = promisc_saved;
703		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
704			dls_active_clear(dsp->ds_dc);
705
706		goto failed;
707	}
708
709	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
710		dsp->ds_passivestate = DLD_ACTIVE;
711
712	rw_exit(&dsp->ds_lock);
713	dlokack(q, mp, DL_PROMISCON_REQ);
714	return (B_TRUE);
715failed:
716	rw_exit(&dsp->ds_lock);
717	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
718	return (B_FALSE);
719}
720
721/*
722 * DL_PROMISCOFF_REQ
723 */
724static boolean_t
725proto_promiscoff_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
726{
727	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)udlp;
728	int		err = 0;
729	t_uscalar_t	dl_err;
730	uint32_t	promisc_saved;
731	queue_t		*q = dsp->ds_wq;
732
733
734	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
735		dl_err = DL_BADPRIM;
736		goto failed;
737	}
738
739	rw_enter(&dsp->ds_lock, RW_WRITER);
740
741	if (dsp->ds_dlstate == DL_UNATTACHED ||
742	    DL_ACK_PENDING(dsp->ds_dlstate)) {
743		dl_err = DL_OUTSTATE;
744		goto failed;
745	}
746
747	promisc_saved = dsp->ds_promisc;
748	switch (dlp->dl_level) {
749	case DL_PROMISC_SAP:
750		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
751			dl_err = DL_NOTENAB;
752			goto failed;
753		}
754		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
755		break;
756
757	case DL_PROMISC_MULTI:
758		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
759			dl_err = DL_NOTENAB;
760			goto failed;
761		}
762		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
763		break;
764
765	case DL_PROMISC_PHYS:
766		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
767			dl_err = DL_NOTENAB;
768			goto failed;
769		}
770		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
771		break;
772
773	default:
774		dl_err = DL_NOTSUPPORTED;
775		goto failed;
776	}
777
778	/*
779	 * Adjust channel promiscuity.
780	 */
781	err = dls_promisc(dsp->ds_dc, dsp->ds_promisc);
782	if (err != 0) {
783		dsp->ds_promisc = promisc_saved;
784		dl_err = DL_SYSERR;
785		goto failed;
786	}
787
788	rw_exit(&dsp->ds_lock);
789	dlokack(q, mp, DL_PROMISCOFF_REQ);
790	return (B_TRUE);
791failed:
792	rw_exit(&dsp->ds_lock);
793	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
794	return (B_FALSE);
795}
796
797/*
798 * DL_ENABMULTI_REQ
799 */
800static boolean_t
801proto_enabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
802{
803	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)udlp;
804	int		err = 0;
805	t_uscalar_t	dl_err;
806	queue_t		*q = dsp->ds_wq;
807
808	rw_enter(&dsp->ds_lock, RW_WRITER);
809
810	if (dsp->ds_dlstate == DL_UNATTACHED ||
811	    DL_ACK_PENDING(dsp->ds_dlstate)) {
812		dl_err = DL_OUTSTATE;
813		goto failed;
814	}
815
816	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
817	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
818	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
819		dl_err = DL_BADPRIM;
820		goto failed;
821	}
822
823	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
824	    !dls_active_set(dsp->ds_dc)) {
825		dl_err = DL_SYSERR;
826		err = EBUSY;
827		goto failed;
828	}
829
830	err = dls_multicst_add(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
831	if (err != 0) {
832		switch (err) {
833		case EINVAL:
834			dl_err = DL_BADADDR;
835			err = 0;
836			break;
837		case ENOSPC:
838			dl_err = DL_TOOMANY;
839			err = 0;
840			break;
841		default:
842			dl_err = DL_SYSERR;
843			break;
844		}
845		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
846			dls_active_clear(dsp->ds_dc);
847
848		goto failed;
849	}
850
851	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
852		dsp->ds_passivestate = DLD_ACTIVE;
853
854	rw_exit(&dsp->ds_lock);
855	dlokack(q, mp, DL_ENABMULTI_REQ);
856	return (B_TRUE);
857failed:
858	rw_exit(&dsp->ds_lock);
859	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
860	return (B_FALSE);
861}
862
863/*
864 * DL_DISABMULTI_REQ
865 */
866static boolean_t
867proto_disabmulti_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
868{
869	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)udlp;
870	int		err = 0;
871	t_uscalar_t	dl_err;
872	queue_t		*q = dsp->ds_wq;
873
874	rw_enter(&dsp->ds_lock, RW_READER);
875
876	if (dsp->ds_dlstate == DL_UNATTACHED ||
877	    DL_ACK_PENDING(dsp->ds_dlstate)) {
878		dl_err = DL_OUTSTATE;
879		goto failed;
880	}
881
882	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
883	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
884	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
885		dl_err = DL_BADPRIM;
886		goto failed;
887	}
888
889	err = dls_multicst_remove(dsp->ds_dc, mp->b_rptr + dlp->dl_addr_offset);
890	if (err != 0) {
891	switch (err) {
892		case EINVAL:
893			dl_err = DL_BADADDR;
894			err = 0;
895			break;
896
897		case ENOENT:
898			dl_err = DL_NOTENAB;
899			err = 0;
900			break;
901
902		default:
903			dl_err = DL_SYSERR;
904			break;
905		}
906		goto failed;
907	}
908
909	rw_exit(&dsp->ds_lock);
910	dlokack(q, mp, DL_DISABMULTI_REQ);
911	return (B_TRUE);
912failed:
913	rw_exit(&dsp->ds_lock);
914	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
915	return (B_FALSE);
916}
917
918/*
919 * DL_PHYS_ADDR_REQ
920 */
921static boolean_t
922proto_physaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
923{
924	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)udlp;
925	queue_t		*q = dsp->ds_wq;
926	t_uscalar_t	dl_err;
927	char		*addr;
928	uint_t		addr_length;
929
930	rw_enter(&dsp->ds_lock, RW_READER);
931
932	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
933		dl_err = DL_BADPRIM;
934		goto failed;
935	}
936
937	if (dsp->ds_dlstate == DL_UNATTACHED ||
938	    DL_ACK_PENDING(dsp->ds_dlstate)) {
939		dl_err = DL_OUTSTATE;
940		goto failed;
941	}
942
943	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
944	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
945		dl_err = DL_UNSUPPORTED;
946		goto failed;
947	}
948
949	addr_length = dsp->ds_mip->mi_addr_length;
950	addr = kmem_alloc(addr_length, KM_NOSLEEP);
951	if (addr == NULL) {
952		rw_exit(&dsp->ds_lock);
953		merror(q, mp, ENOSR);
954		return (B_FALSE);
955	}
956
957	/*
958	 * Copy out the address before we drop the lock; we don't
959	 * want to call dlphysaddrack() while holding ds_lock.
960	 */
961	bcopy((dlp->dl_addr_type == DL_CURR_PHYS_ADDR) ?
962	    dsp->ds_curr_addr : dsp->ds_fact_addr, addr, addr_length);
963
964	rw_exit(&dsp->ds_lock);
965	dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
966	kmem_free(addr, addr_length);
967	return (B_TRUE);
968failed:
969	rw_exit(&dsp->ds_lock);
970	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
971	return (B_FALSE);
972}
973
974/*
975 * DL_SET_PHYS_ADDR_REQ
976 */
977static boolean_t
978proto_setphysaddr_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
979{
980	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)udlp;
981	int		err = 0;
982	t_uscalar_t	dl_err;
983	queue_t		*q = dsp->ds_wq;
984
985	rw_enter(&dsp->ds_lock, RW_WRITER);
986
987	if (dsp->ds_dlstate == DL_UNATTACHED ||
988	    DL_ACK_PENDING(dsp->ds_dlstate)) {
989		dl_err = DL_OUTSTATE;
990		goto failed;
991	}
992
993	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
994	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
995	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
996		dl_err = DL_BADPRIM;
997		goto failed;
998	}
999
1000	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
1001	    !dls_active_set(dsp->ds_dc)) {
1002		dl_err = DL_SYSERR;
1003		err = EBUSY;
1004		goto failed;
1005	}
1006
1007	err = mac_unicst_set(dsp->ds_mh, mp->b_rptr + dlp->dl_addr_offset);
1008	if (err != 0) {
1009		switch (err) {
1010		case EINVAL:
1011			dl_err = DL_BADADDR;
1012			err = 0;
1013			break;
1014
1015		default:
1016			dl_err = DL_SYSERR;
1017			break;
1018		}
1019		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
1020			dls_active_clear(dsp->ds_dc);
1021
1022		goto failed;
1023	}
1024	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
1025		dsp->ds_passivestate = DLD_ACTIVE;
1026
1027	rw_exit(&dsp->ds_lock);
1028	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
1029	return (B_TRUE);
1030failed:
1031	rw_exit(&dsp->ds_lock);
1032	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
1033	return (B_FALSE);
1034}
1035
1036/*
1037 * DL_UDQOS_REQ
1038 */
1039static boolean_t
1040proto_udqos_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1041{
1042	dl_udqos_req_t *dlp = (dl_udqos_req_t *)udlp;
1043	dl_qos_cl_sel1_t *selp;
1044	int		off, len;
1045	t_uscalar_t	dl_err;
1046	queue_t		*q = dsp->ds_wq;
1047
1048	off = dlp->dl_qos_offset;
1049	len = dlp->dl_qos_length;
1050
1051	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
1052		dl_err = DL_BADPRIM;
1053		goto failed;
1054	}
1055
1056	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
1057	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
1058		dl_err = DL_BADQOSTYPE;
1059		goto failed;
1060	}
1061
1062	rw_enter(&dsp->ds_lock, RW_WRITER);
1063
1064	if (dsp->ds_vid == VLAN_ID_NONE ||
1065	    selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
1066	    selp->dl_priority < 0) {
1067		dl_err = DL_BADQOSPARAM;
1068		goto failed;
1069	}
1070
1071	dsp->ds_pri = selp->dl_priority;
1072
1073	rw_exit(&dsp->ds_lock);
1074	dlokack(q, mp, DL_UDQOS_REQ);
1075	return (B_TRUE);
1076failed:
1077	rw_exit(&dsp->ds_lock);
1078	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
1079	return (B_FALSE);
1080}
1081
1082static boolean_t
1083check_ip_above(queue_t *q)
1084{
1085	queue_t		*next_q;
1086	boolean_t	ret = B_TRUE;
1087
1088	claimstr(q);
1089	next_q = q->q_next;
1090	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1091		ret = B_FALSE;
1092	releasestr(q);
1093	return (ret);
1094}
1095
1096/*
1097 * DL_CAPABILITY_REQ
1098 */
1099/*ARGSUSED*/
1100static boolean_t
1101proto_capability_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1102{
1103	dl_capability_req_t *dlp = (dl_capability_req_t *)udlp;
1104	dl_capability_sub_t *sp;
1105	size_t		size, len;
1106	offset_t	off, end;
1107	t_uscalar_t	dl_err;
1108	queue_t		*q = dsp->ds_wq;
1109	boolean_t	upgraded;
1110
1111	rw_enter(&dsp->ds_lock, RW_READER);
1112
1113	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1114		dl_err = DL_BADPRIM;
1115		goto failed;
1116	}
1117
1118	if (dsp->ds_dlstate == DL_UNATTACHED ||
1119	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1120		dl_err = DL_OUTSTATE;
1121		goto failed;
1122	}
1123
1124	/*
1125	 * This request is overloaded. If there are no requested capabilities
1126	 * then we just want to acknowledge with all the capabilities we
1127	 * support. Otherwise we enable the set of capabilities requested.
1128	 */
1129	if (dlp->dl_sub_length == 0) {
1130		/* callee drops lock */
1131		return (proto_capability_advertise(dsp, mp));
1132	}
1133
1134	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1135		dl_err = DL_BADPRIM;
1136		goto failed;
1137	}
1138
1139	dlp->dl_primitive = DL_CAPABILITY_ACK;
1140
1141	off = dlp->dl_sub_offset;
1142	len = dlp->dl_sub_length;
1143
1144	/*
1145	 * Walk the list of capabilities to be enabled.
1146	 */
1147	upgraded = B_FALSE;
1148	for (end = off + len; off < end; ) {
1149		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1150		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1151
1152		if (off + size > end ||
1153		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1154			dl_err = DL_BADPRIM;
1155			goto failed;
1156		}
1157
1158		switch (sp->dl_cap) {
1159		/*
1160		 * TCP/IP checksum offload to hardware.
1161		 */
1162		case DL_CAPAB_HCKSUM: {
1163			dl_capab_hcksum_t *hcksump;
1164			dl_capab_hcksum_t hcksum;
1165
1166			ASSERT(dsp->ds_mip->mi_cksum != 0);
1167
1168			hcksump = (dl_capab_hcksum_t *)&sp[1];
1169			/*
1170			 * Copy for alignment.
1171			 */
1172			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1173			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1174			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1175			break;
1176		}
1177
1178		/*
1179		 * IP polling interface.
1180		 */
1181		case DL_CAPAB_POLL: {
1182			dl_capab_dls_t *pollp;
1183			dl_capab_dls_t	poll;
1184
1185			pollp = (dl_capab_dls_t *)&sp[1];
1186			/*
1187			 * Copy for alignment.
1188			 */
1189			bcopy(pollp, &poll, sizeof (dl_capab_dls_t));
1190
1191			/*
1192			 * We need to become writer before enabling and/or
1193			 * disabling the polling interface.  If we couldn'
1194			 * upgrade, check state again after re-acquiring the
1195			 * lock to make sure we can proceed.
1196			 */
1197			if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) {
1198				rw_exit(&dsp->ds_lock);
1199				rw_enter(&dsp->ds_lock, RW_WRITER);
1200
1201				if (dsp->ds_dlstate == DL_UNATTACHED ||
1202				    DL_ACK_PENDING(dsp->ds_dlstate)) {
1203					dl_err = DL_OUTSTATE;
1204					goto failed;
1205				}
1206			}
1207			upgraded = B_TRUE;
1208
1209			switch (poll.dls_flags) {
1210			default:
1211				/*FALLTHRU*/
1212			case POLL_DISABLE:
1213				proto_poll_disable(dsp);
1214				break;
1215
1216			case POLL_ENABLE:
1217				ASSERT(!(dld_opt & DLD_OPT_NO_POLL));
1218
1219				/*
1220				 * Make sure polling is disabled.
1221				 */
1222				proto_poll_disable(dsp);
1223
1224				/*
1225				 * Now attempt enable it.
1226				 */
1227				if (check_ip_above(dsp->ds_rq) &&
1228				    proto_poll_enable(dsp, &poll)) {
1229					bzero(&poll, sizeof (dl_capab_dls_t));
1230					poll.dls_flags = POLL_ENABLE;
1231				}
1232				break;
1233			}
1234
1235			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1236			bcopy(&poll, pollp, sizeof (dl_capab_dls_t));
1237			break;
1238		}
1239		case DL_CAPAB_SOFT_RING: {
1240			dl_capab_dls_t *soft_ringp;
1241			dl_capab_dls_t soft_ring;
1242
1243			soft_ringp = (dl_capab_dls_t *)&sp[1];
1244			/*
1245			 * Copy for alignment.
1246			 */
1247			bcopy(soft_ringp, &soft_ring,
1248			    sizeof (dl_capab_dls_t));
1249
1250			/*
1251			 * We need to become writer before enabling and/or
1252			 * disabling the soft_ring interface.  If we couldn'
1253			 * upgrade, check state again after re-acquiring the
1254			 * lock to make sure we can proceed.
1255			 */
1256			if (!upgraded && !rw_tryupgrade(&dsp->ds_lock)) {
1257				rw_exit(&dsp->ds_lock);
1258				rw_enter(&dsp->ds_lock, RW_WRITER);
1259
1260				if (dsp->ds_dlstate == DL_UNATTACHED ||
1261				    DL_ACK_PENDING(dsp->ds_dlstate)) {
1262					dl_err = DL_OUTSTATE;
1263					goto failed;
1264				}
1265			}
1266			upgraded = B_TRUE;
1267
1268			switch (soft_ring.dls_flags) {
1269			default:
1270				/*FALLTHRU*/
1271			case SOFT_RING_DISABLE:
1272				proto_soft_ring_disable(dsp);
1273				break;
1274
1275			case SOFT_RING_ENABLE:
1276				/*
1277				 * Make sure soft_ring is disabled.
1278				 */
1279				proto_soft_ring_disable(dsp);
1280
1281				/*
1282				 * Now attempt enable it.
1283				 */
1284				if (check_ip_above(dsp->ds_rq) &&
1285				    proto_soft_ring_enable(dsp, &soft_ring)) {
1286					bzero(&soft_ring,
1287					    sizeof (dl_capab_dls_t));
1288					soft_ring.dls_flags =
1289					    SOFT_RING_ENABLE;
1290				} else {
1291					bzero(&soft_ring,
1292					    sizeof (dl_capab_dls_t));
1293					soft_ring.dls_flags =
1294					    SOFT_RING_DISABLE;
1295				}
1296				break;
1297			}
1298
1299			dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1300			bcopy(&soft_ring, soft_ringp,
1301			    sizeof (dl_capab_dls_t));
1302			break;
1303		}
1304		default:
1305			break;
1306		}
1307
1308		off += size;
1309	}
1310	rw_exit(&dsp->ds_lock);
1311	qreply(q, mp);
1312	return (B_TRUE);
1313failed:
1314	rw_exit(&dsp->ds_lock);
1315	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1316	return (B_FALSE);
1317}
1318
1319/*
1320 * DL_NOTIFY_REQ
1321 */
1322static boolean_t
1323proto_notify_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1324{
1325	dl_notify_req_t	*dlp = (dl_notify_req_t *)udlp;
1326	t_uscalar_t	dl_err;
1327	queue_t		*q = dsp->ds_wq;
1328	uint_t		note =
1329	    DL_NOTE_PROMISC_ON_PHYS |
1330	    DL_NOTE_PROMISC_OFF_PHYS |
1331	    DL_NOTE_PHYS_ADDR |
1332	    DL_NOTE_LINK_UP |
1333	    DL_NOTE_LINK_DOWN |
1334	    DL_NOTE_CAPAB_RENEG;
1335
1336	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1337		dl_err = DL_BADPRIM;
1338		goto failed;
1339	}
1340
1341	rw_enter(&dsp->ds_lock, RW_WRITER);
1342	if (dsp->ds_dlstate == DL_UNATTACHED ||
1343	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1344		dl_err = DL_OUTSTATE;
1345		goto failed;
1346	}
1347
1348	if (dsp->ds_mip->mi_stat[MAC_STAT_IFSPEED])
1349		note |= DL_NOTE_SPEED;
1350
1351	/*
1352	 * Cache the notifications that are being enabled.
1353	 */
1354	dsp->ds_notifications = dlp->dl_notifications & note;
1355	rw_exit(&dsp->ds_lock);
1356	/*
1357	 * The ACK carries all notifications regardless of which set is
1358	 * being enabled.
1359	 */
1360	dlnotifyack(q, mp, note);
1361
1362	/*
1363	 * Solicit DL_NOTIFY_IND messages for each enabled notification.
1364	 */
1365	rw_enter(&dsp->ds_lock, RW_READER);
1366	if (dsp->ds_notifications != 0) {
1367		rw_exit(&dsp->ds_lock);
1368		dld_str_notify_ind(dsp);
1369	} else {
1370		rw_exit(&dsp->ds_lock);
1371	}
1372	return (B_TRUE);
1373failed:
1374	rw_exit(&dsp->ds_lock);
1375	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1376	return (B_FALSE);
1377}
1378
1379/*
1380 * DL_UINTDATA_REQ
1381 */
1382static boolean_t
1383proto_unitdata_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1384{
1385	queue_t			*q = dsp->ds_wq;
1386	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)udlp;
1387	off_t			off;
1388	size_t			len, size;
1389	const uint8_t		*addr;
1390	uint16_t		sap;
1391	uint_t			addr_length;
1392	mblk_t			*bp, *cont;
1393	uint32_t		start, stuff, end, value, flags;
1394	t_uscalar_t		dl_err;
1395
1396	rw_enter(&dsp->ds_lock, RW_READER);
1397
1398	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1399		dl_err = DL_BADPRIM;
1400		goto failed;
1401	}
1402
1403	if (dsp->ds_dlstate != DL_IDLE) {
1404		dl_err = DL_OUTSTATE;
1405		goto failed;
1406	}
1407	addr_length = dsp->ds_mip->mi_addr_length;
1408
1409	off = dlp->dl_dest_addr_offset;
1410	len = dlp->dl_dest_addr_length;
1411
1412	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1413		dl_err = DL_BADPRIM;
1414		goto failed;
1415	}
1416
1417	if (len != addr_length + sizeof (uint16_t)) {
1418		dl_err = DL_BADADDR;
1419		goto failed;
1420	}
1421
1422	addr = mp->b_rptr + off;
1423	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1424
1425	/*
1426	 * Check the length of the packet and the block types.
1427	 */
1428	size = 0;
1429	cont = mp->b_cont;
1430	for (bp = cont; bp != NULL; bp = bp->b_cont) {
1431		if (DB_TYPE(bp) != M_DATA)
1432			goto baddata;
1433
1434		size += MBLKL(bp);
1435	}
1436
1437	if (size > dsp->ds_mip->mi_sdu_max)
1438		goto baddata;
1439
1440	/*
1441	 * Build a packet header.
1442	 */
1443	if ((bp = dls_header(dsp->ds_dc, addr, sap, dsp->ds_pri)) == NULL) {
1444		dl_err = DL_BADADDR;
1445		goto failed;
1446	}
1447
1448	/*
1449	 * We no longer need the M_PROTO header, so free it.
1450	 */
1451	freeb(mp);
1452
1453	/*
1454	 * Transfer the checksum offload information if it is present.
1455	 */
1456	hcksum_retrieve(cont, NULL, NULL, &start, &stuff, &end, &value,
1457	    &flags);
1458	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags,
1459	    0);
1460
1461	/*
1462	 * Link the payload onto the new header.
1463	 */
1464	ASSERT(bp->b_cont == NULL);
1465	bp->b_cont = cont;
1466
1467	str_mdata_fastpath_put(dsp, bp);
1468	rw_exit(&dsp->ds_lock);
1469	return (B_TRUE);
1470failed:
1471	rw_exit(&dsp->ds_lock);
1472	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1473	return (B_FALSE);
1474
1475baddata:
1476	rw_exit(&dsp->ds_lock);
1477	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1478	return (B_FALSE);
1479}
1480
1481/*
1482 * DL_PASSIVE_REQ
1483 */
1484/* ARGSUSED */
1485static boolean_t
1486proto_passive_req(dld_str_t *dsp, union DL_primitives *udlp, mblk_t *mp)
1487{
1488	t_uscalar_t dl_err;
1489
1490	rw_enter(&dsp->ds_lock, RW_WRITER);
1491	/*
1492	 * If we've already become active by issuing an active primitive,
1493	 * then it's too late to try to become passive.
1494	 */
1495	if (dsp->ds_passivestate == DLD_ACTIVE) {
1496		dl_err = DL_OUTSTATE;
1497		goto failed;
1498	}
1499
1500	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1501		dl_err = DL_BADPRIM;
1502		goto failed;
1503	}
1504
1505	dsp->ds_passivestate = DLD_PASSIVE;
1506	rw_exit(&dsp->ds_lock);
1507	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1508	return (B_TRUE);
1509failed:
1510	rw_exit(&dsp->ds_lock);
1511	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1512	return (B_FALSE);
1513}
1514
1515
1516/*
1517 * Catch-all handler.
1518 */
1519static boolean_t
1520proto_req(dld_str_t *dsp, union DL_primitives *dlp, mblk_t *mp)
1521{
1522	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1523	return (B_FALSE);
1524}
1525
1526static void
1527proto_poll_disable(dld_str_t *dsp)
1528{
1529	mac_handle_t	mh;
1530
1531	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1532
1533	if (!dsp->ds_polling)
1534		return;
1535
1536	/*
1537	 * It should be impossible to enable raw mode if polling is turned on.
1538	 */
1539	ASSERT(dsp->ds_mode != DLD_RAW);
1540
1541	/*
1542	 * Reset the resource_add callback.
1543	 */
1544	mh = dls_mac(dsp->ds_dc);
1545	mac_resource_set(mh, NULL, NULL);
1546	mac_resources(mh);
1547
1548	/*
1549	 * Set receive function back to default.
1550	 */
1551	dls_rx_set(dsp->ds_dc, (dsp->ds_mode == DLD_FASTPATH) ?
1552	    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1553
1554	/*
1555	 * Note that polling is disabled.
1556	 */
1557	dsp->ds_polling = B_FALSE;
1558}
1559
1560static boolean_t
1561proto_poll_enable(dld_str_t *dsp, dl_capab_dls_t *pollp)
1562{
1563	mac_handle_t	mh;
1564
1565	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1566	ASSERT(!dsp->ds_polling);
1567
1568	/*
1569	 * We cannot enable polling if raw mode
1570	 * has been enabled.
1571	 */
1572	if (dsp->ds_mode == DLD_RAW)
1573		return (B_FALSE);
1574
1575	mh = dls_mac(dsp->ds_dc);
1576
1577	/*
1578	 * Register resources.
1579	 */
1580	mac_resource_set(mh, (mac_resource_add_t)pollp->dls_ring_add,
1581	    (void *)pollp->dls_rx_handle);
1582	mac_resources(mh);
1583
1584	/*
1585	 * Set the receive function.
1586	 */
1587	dls_rx_set(dsp->ds_dc, (dls_rx_t)pollp->dls_rx,
1588	    (void *)pollp->dls_rx_handle);
1589
1590	/*
1591	 * Note that polling is enabled. This prevents further DLIOCHDRINFO
1592	 * ioctls from overwriting the receive function pointer.
1593	 */
1594	dsp->ds_polling = B_TRUE;
1595	return (B_TRUE);
1596}
1597
1598static void
1599proto_soft_ring_disable(dld_str_t *dsp)
1600{
1601	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1602
1603	if (!dsp->ds_soft_ring)
1604		return;
1605
1606	/*
1607	 * It should be impossible to enable raw mode if soft_ring is turned on.
1608	 */
1609	ASSERT(dsp->ds_mode != DLD_RAW);
1610	proto_change_soft_ring_fanout(dsp, SOFT_RING_NONE);
1611	/*
1612	 * Note that fanout is disabled.
1613	 */
1614	dsp->ds_soft_ring = B_FALSE;
1615}
1616
1617static boolean_t
1618proto_soft_ring_enable(dld_str_t *dsp, dl_capab_dls_t *soft_ringp)
1619{
1620	ASSERT(RW_WRITE_HELD(&dsp->ds_lock));
1621	ASSERT(!dsp->ds_soft_ring);
1622
1623	/*
1624	 * We cannot enable soft_ring if raw mode
1625	 * has been enabled.
1626	 */
1627	if (dsp->ds_mode == DLD_RAW)
1628		return (B_FALSE);
1629
1630	if (dls_soft_ring_enable(dsp->ds_dc, soft_ringp) == B_FALSE)
1631		return (B_FALSE);
1632
1633	dsp->ds_soft_ring = B_TRUE;
1634	return (B_TRUE);
1635}
1636
1637static void
1638proto_change_soft_ring_fanout(dld_str_t *dsp, int type)
1639{
1640	dls_rx_t	rx;
1641
1642	if (type == SOFT_RING_NONE) {
1643		rx = (dsp->ds_mode == DLD_FASTPATH) ?
1644			    dld_str_rx_fastpath : dld_str_rx_unitdata;
1645	} else {
1646		rx = (dls_rx_t)dls_ether_soft_ring_fanout;
1647	}
1648	dls_soft_ring_rx_set(dsp->ds_dc, rx, dsp, type);
1649}
1650
1651static void
1652proto_stop_soft_ring_threads(void *arg)
1653{
1654	dld_str_t	*dsp = (dld_str_t *)arg;
1655
1656	rw_enter(&dsp->ds_lock, RW_WRITER);
1657	dls_soft_ring_disable(dsp->ds_dc);
1658	dsp->ds_dlstate = DL_UNBOUND;
1659	rw_exit(&dsp->ds_lock);
1660	dlokack(dsp->ds_wq, dsp->ds_unbind_req, DL_UNBIND_REQ);
1661	rw_enter(&dsp->ds_lock, RW_WRITER);
1662	dsp->ds_task_id = NULL;
1663	rw_exit(&dsp->ds_lock);
1664}
1665
1666/*
1667 * DL_CAPABILITY_ACK/DL_ERROR_ACK
1668 */
1669static boolean_t
1670proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1671{
1672	dl_capability_ack_t	*dlap;
1673	dl_capability_sub_t	*dlsp;
1674	size_t			subsize;
1675	dl_capab_dls_t		poll;
1676	dl_capab_dls_t	soft_ring;
1677	dl_capab_hcksum_t	hcksum;
1678	dl_capab_zerocopy_t	zcopy;
1679	uint8_t			*ptr;
1680	uint32_t		cksum;
1681	boolean_t		poll_cap;
1682	queue_t			*q = dsp->ds_wq;
1683	mblk_t			*mp1;
1684
1685	ASSERT(RW_READ_HELD(&dsp->ds_lock));
1686
1687	/*
1688	 * Initially assume no capabilities.
1689	 */
1690	subsize = 0;
1691
1692	/* Always advertize soft ring capability for GLDv3 drivers */
1693	subsize += sizeof (dl_capability_sub_t) + sizeof (dl_capab_dls_t);
1694
1695	/*
1696	 * Check if polling can be enabled on this interface.
1697	 * If advertising DL_CAPAB_POLL has not been explicitly disabled
1698	 * then reserve space for that capability.
1699	 */
1700	poll_cap = ((dsp->ds_mip->mi_poll & DL_CAPAB_POLL) &&
1701	    !(dld_opt & DLD_OPT_NO_POLL) && (dsp->ds_vid == VLAN_ID_NONE));
1702	if (poll_cap) {
1703		subsize += sizeof (dl_capability_sub_t) +
1704		    sizeof (dl_capab_dls_t);
1705	}
1706
1707	/*
1708	 * If the MAC interface supports checksum offload then reserve
1709	 * space for the DL_CAPAB_HCKSUM capability.
1710	 */
1711	if ((cksum = dsp->ds_mip->mi_cksum) != 0) {
1712		subsize += sizeof (dl_capability_sub_t) +
1713		    sizeof (dl_capab_hcksum_t);
1714	}
1715
1716	/*
1717	 * If DL_CAPAB_ZEROCOPY has not be explicitly disabled then
1718	 * reserve space for it.
1719	 */
1720	if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1721		subsize += sizeof (dl_capability_sub_t) +
1722		    sizeof (dl_capab_zerocopy_t);
1723	}
1724
1725	/*
1726	 * If there are no capabilities to advertise or if we
1727	 * can't allocate a response, send a DL_ERROR_ACK.
1728	 */
1729	if ((mp1 = reallocb(mp,
1730	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1731		rw_exit(&dsp->ds_lock);
1732		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1733		return (B_FALSE);
1734	}
1735
1736	mp = mp1;
1737	DB_TYPE(mp) = M_PROTO;
1738	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1739	bzero(mp->b_rptr, MBLKL(mp));
1740	dlap = (dl_capability_ack_t *)mp->b_rptr;
1741	dlap->dl_primitive = DL_CAPABILITY_ACK;
1742	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1743	dlap->dl_sub_length = subsize;
1744	ptr = (uint8_t *)&dlap[1];
1745
1746	/*
1747	 * IP polling interface.
1748	 */
1749	if (poll_cap) {
1750		/*
1751		 * Attempt to disable just in case this is a re-negotiation;
1752		 * we need to become writer before doing so.
1753		 */
1754		if (!rw_tryupgrade(&dsp->ds_lock)) {
1755			rw_exit(&dsp->ds_lock);
1756			rw_enter(&dsp->ds_lock, RW_WRITER);
1757		}
1758
1759		/*
1760		 * Check if polling state has changed after we re-acquired
1761		 * the lock above, so that we don't mis-advertise it.
1762		 */
1763		poll_cap = ((dsp->ds_mip->mi_poll & DL_CAPAB_POLL) &&
1764		    !(dld_opt & DLD_OPT_NO_POLL) &&
1765		    (dsp->ds_vid == VLAN_ID_NONE));
1766
1767		if (!poll_cap) {
1768			int poll_capab_size;
1769
1770			rw_downgrade(&dsp->ds_lock);
1771
1772			poll_capab_size = sizeof (dl_capability_sub_t) +
1773			    sizeof (dl_capab_dls_t);
1774
1775			mp->b_wptr -= poll_capab_size;
1776			subsize -= poll_capab_size;
1777			dlap->dl_sub_length = subsize;
1778		} else {
1779			proto_poll_disable(dsp);
1780
1781			rw_downgrade(&dsp->ds_lock);
1782
1783			dlsp = (dl_capability_sub_t *)ptr;
1784
1785			dlsp->dl_cap = DL_CAPAB_POLL;
1786			dlsp->dl_length = sizeof (dl_capab_dls_t);
1787			ptr += sizeof (dl_capability_sub_t);
1788
1789			bzero(&poll, sizeof (dl_capab_dls_t));
1790			poll.dls_version = POLL_VERSION_1;
1791			poll.dls_flags = POLL_CAPABLE;
1792			poll.dls_tx_handle = (uintptr_t)dsp;
1793			poll.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1794
1795			dlcapabsetqid(&(poll.dls_mid), dsp->ds_rq);
1796			bcopy(&poll, ptr, sizeof (dl_capab_dls_t));
1797			ptr += sizeof (dl_capab_dls_t);
1798		}
1799	}
1800
1801	ASSERT(RW_READ_HELD(&dsp->ds_lock));
1802
1803	dlsp = (dl_capability_sub_t *)ptr;
1804
1805	dlsp->dl_cap = DL_CAPAB_SOFT_RING;
1806	dlsp->dl_length = sizeof (dl_capab_dls_t);
1807	ptr += sizeof (dl_capability_sub_t);
1808
1809	bzero(&soft_ring, sizeof (dl_capab_dls_t));
1810	soft_ring.dls_version = SOFT_RING_VERSION_1;
1811	soft_ring.dls_flags = SOFT_RING_CAPABLE;
1812	soft_ring.dls_tx_handle = (uintptr_t)dsp;
1813	soft_ring.dls_tx = (uintptr_t)str_mdata_fastpath_put;
1814	soft_ring.dls_ring_change_status =
1815	    (uintptr_t)proto_change_soft_ring_fanout;
1816	soft_ring.dls_ring_bind = (uintptr_t)soft_ring_bind;
1817	soft_ring.dls_ring_unbind = (uintptr_t)soft_ring_unbind;
1818
1819	dlcapabsetqid(&(soft_ring.dls_mid), dsp->ds_rq);
1820	bcopy(&soft_ring, ptr, sizeof (dl_capab_dls_t));
1821	ptr += sizeof (dl_capab_dls_t);
1822
1823	/*
1824	 * TCP/IP checksum offload.
1825	 */
1826	if (cksum != 0) {
1827		dlsp = (dl_capability_sub_t *)ptr;
1828
1829		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1830		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1831		ptr += sizeof (dl_capability_sub_t);
1832
1833		bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1834		hcksum.hcksum_version = HCKSUM_VERSION_1;
1835		hcksum.hcksum_txflags = cksum;
1836
1837		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1838		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1839		ptr += sizeof (dl_capab_hcksum_t);
1840	}
1841
1842	/*
1843	 * Zero copy
1844	 */
1845	if (!(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1846		dlsp = (dl_capability_sub_t *)ptr;
1847
1848		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1849		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1850		ptr += sizeof (dl_capability_sub_t);
1851
1852		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1853		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1854		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1855
1856		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1857		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1858		ptr += sizeof (dl_capab_zerocopy_t);
1859	}
1860
1861	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1862
1863	rw_exit(&dsp->ds_lock);
1864	qreply(q, mp);
1865	return (B_TRUE);
1866}
1867