dld_proto.c revision 8910:b30ab15b8ec0
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Data-Link Driver
28 */
29#include <sys/sysmacros.h>
30#include <sys/strsubr.h>
31#include <sys/strsun.h>
32#include <sys/vlan.h>
33#include <sys/dld_impl.h>
34#include <sys/mac_client.h>
35#include <sys/mac_client_impl.h>
36#include <sys/mac_client_priv.h>
37
38typedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
39
40static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
41    proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
42    proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
43    proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
44    proto_notify_req, proto_passive_req;
45
46static void proto_capability_advertise(dld_str_t *, mblk_t *);
47static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
48
49#define	DL_ACK_PENDING(state) \
50	((state) == DL_ATTACH_PENDING || \
51	(state) == DL_DETACH_PENDING || \
52	(state) == DL_BIND_PENDING || \
53	(state) == DL_UNBIND_PENDING)
54
55/*
56 * Process a DLPI protocol message.
57 * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
58 * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
59 * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
60 * as 'passive' and forbids it from being subsequently made 'active'
61 * by the above primitives.
62 */
63void
64dld_proto(dld_str_t *dsp, mblk_t *mp)
65{
66	t_uscalar_t		prim;
67
68	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
69		freemsg(mp);
70		return;
71	}
72	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
73
74	switch (prim) {
75	case DL_INFO_REQ:
76		proto_info_req(dsp, mp);
77		break;
78	case DL_BIND_REQ:
79		proto_bind_req(dsp, mp);
80		break;
81	case DL_UNBIND_REQ:
82		proto_unbind_req(dsp, mp);
83		break;
84	case DL_UNITDATA_REQ:
85		proto_unitdata_req(dsp, mp);
86		break;
87	case DL_UDQOS_REQ:
88		proto_udqos_req(dsp, mp);
89		break;
90	case DL_ATTACH_REQ:
91		proto_attach_req(dsp, mp);
92		break;
93	case DL_DETACH_REQ:
94		proto_detach_req(dsp, mp);
95		break;
96	case DL_ENABMULTI_REQ:
97		proto_enabmulti_req(dsp, mp);
98		break;
99	case DL_DISABMULTI_REQ:
100		proto_disabmulti_req(dsp, mp);
101		break;
102	case DL_PROMISCON_REQ:
103		proto_promiscon_req(dsp, mp);
104		break;
105	case DL_PROMISCOFF_REQ:
106		proto_promiscoff_req(dsp, mp);
107		break;
108	case DL_PHYS_ADDR_REQ:
109		proto_physaddr_req(dsp, mp);
110		break;
111	case DL_SET_PHYS_ADDR_REQ:
112		proto_setphysaddr_req(dsp, mp);
113		break;
114	case DL_NOTIFY_REQ:
115		proto_notify_req(dsp, mp);
116		break;
117	case DL_CAPABILITY_REQ:
118		proto_capability_req(dsp, mp);
119		break;
120	case DL_PASSIVE_REQ:
121		proto_passive_req(dsp, mp);
122		break;
123	default:
124		proto_req(dsp, mp);
125		break;
126	}
127}
128
129#define	NEG(x)	-(x)
130typedef struct dl_info_ack_wrapper {
131	dl_info_ack_t		dl_info;
132	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
133	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
134	dl_qos_cl_range1_t	dl_qos_range1;
135	dl_qos_cl_sel1_t	dl_qos_sel1;
136} dl_info_ack_wrapper_t;
137
138/*
139 * DL_INFO_REQ
140 */
141static void
142proto_info_req(dld_str_t *dsp, mblk_t *mp)
143{
144	dl_info_ack_wrapper_t	*dlwp;
145	dl_info_ack_t		*dlp;
146	dl_qos_cl_sel1_t	*selp;
147	dl_qos_cl_range1_t	*rangep;
148	uint8_t			*addr;
149	uint8_t			*brdcst_addr;
150	uint_t			addr_length;
151	uint_t			sap_length;
152	mac_info_t		minfo;
153	mac_info_t		*minfop;
154	queue_t			*q = dsp->ds_wq;
155
156	/*
157	 * Swap the request message for one large enough to contain the
158	 * wrapper structure defined above.
159	 */
160	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
161	    M_PCPROTO, 0)) == NULL)
162		return;
163
164	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
165	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
166
167	dlp = &(dlwp->dl_info);
168	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
169
170	dlp->dl_primitive = DL_INFO_ACK;
171
172	/*
173	 * Set up the sub-structure pointers.
174	 */
175	addr = dlwp->dl_addr;
176	brdcst_addr = dlwp->dl_brdcst_addr;
177	rangep = &(dlwp->dl_qos_range1);
178	selp = &(dlwp->dl_qos_sel1);
179
180	/*
181	 * This driver supports only version 2 connectionless DLPI provider
182	 * nodes.
183	 */
184	dlp->dl_service_mode = DL_CLDLS;
185	dlp->dl_version = DL_VERSION_2;
186
187	/*
188	 * Set the style of the provider
189	 */
190	dlp->dl_provider_style = dsp->ds_style;
191	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
192	    dlp->dl_provider_style == DL_STYLE2);
193
194	/*
195	 * Set the current DLPI state.
196	 */
197	dlp->dl_current_state = dsp->ds_dlstate;
198
199	/*
200	 * Gratuitously set the media type. This is to deal with modules
201	 * that assume the media type is known prior to DL_ATTACH_REQ
202	 * being completed.
203	 */
204	dlp->dl_mac_type = DL_ETHER;
205
206	/*
207	 * If the stream is not at least attached we try to retrieve the
208	 * mac_info using mac_info_get()
209	 */
210	if (dsp->ds_dlstate == DL_UNATTACHED ||
211	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
212	    dsp->ds_dlstate == DL_DETACH_PENDING) {
213		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
214			/*
215			 * Cannot find mac_info. giving up.
216			 */
217			goto done;
218		}
219		minfop = &minfo;
220	} else {
221		minfop = (mac_info_t *)dsp->ds_mip;
222		/* We can only get the sdu if we're attached. */
223		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
224	}
225
226	/*
227	 * Set the media type (properly this time).
228	 */
229	if (dsp->ds_native)
230		dlp->dl_mac_type = minfop->mi_nativemedia;
231	else
232		dlp->dl_mac_type = minfop->mi_media;
233
234	/*
235	 * Set the DLSAP length. We only support 16 bit values and they
236	 * appear after the MAC address portion of DLSAP addresses.
237	 */
238	sap_length = sizeof (uint16_t);
239	dlp->dl_sap_length = NEG(sap_length);
240
241	addr_length = minfop->mi_addr_length;
242
243	/*
244	 * Copy in the media broadcast address.
245	 */
246	if (minfop->mi_brdcst_addr != NULL) {
247		dlp->dl_brdcst_addr_offset =
248		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
249		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
250		dlp->dl_brdcst_addr_length = addr_length;
251	}
252
253	/* Only VLAN links and links that have a normal tag mode support QOS. */
254	if (mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE ||
255	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL) {
256		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
257		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
258
259		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
260		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
261		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
262		rangep->dl_protection.dl_min = DL_UNKNOWN;
263		rangep->dl_protection.dl_max = DL_UNKNOWN;
264		rangep->dl_residual_error = DL_UNKNOWN;
265
266		/*
267		 * Specify the supported range of priorities.
268		 */
269		rangep->dl_priority.dl_min = 0;
270		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
271
272		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
273		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
274
275		selp->dl_qos_type = DL_QOS_CL_SEL1;
276		selp->dl_trans_delay = DL_UNKNOWN;
277		selp->dl_protection = DL_UNKNOWN;
278		selp->dl_residual_error = DL_UNKNOWN;
279
280		/*
281		 * Specify the current priority (which can be changed by
282		 * the DL_UDQOS_REQ primitive).
283		 */
284		selp->dl_priority = dsp->ds_pri;
285	}
286
287	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
288	if (dsp->ds_dlstate == DL_IDLE) {
289		/*
290		 * The stream is bound. Therefore we can formulate a valid
291		 * DLSAP address.
292		 */
293		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
294		if (addr_length > 0)
295			mac_unicast_primary_get(dsp->ds_mh, addr);
296
297		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
298	}
299
300done:
301	ASSERT(IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0));
302	ASSERT(IMPLY(dlp->dl_qos_range_offset != 0,
303	    dlp->dl_qos_range_length != 0));
304	ASSERT(IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0));
305	ASSERT(IMPLY(dlp->dl_brdcst_addr_offset != 0,
306	    dlp->dl_brdcst_addr_length != 0));
307
308	qreply(q, mp);
309}
310
311/*
312 * DL_ATTACH_REQ
313 */
314static void
315proto_attach_req(dld_str_t *dsp, mblk_t *mp)
316{
317	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
318	int		err = 0;
319	t_uscalar_t	dl_err;
320	queue_t		*q = dsp->ds_wq;
321
322	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
323	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
324		dl_err = DL_BADPRIM;
325		goto failed;
326	}
327
328	if (dsp->ds_dlstate != DL_UNATTACHED) {
329		dl_err = DL_OUTSTATE;
330		goto failed;
331	}
332
333	dsp->ds_dlstate = DL_ATTACH_PENDING;
334
335	err = dld_str_attach(dsp, dlp->dl_ppa);
336	if (err != 0) {
337		switch (err) {
338		case ENOENT:
339			dl_err = DL_BADPPA;
340			err = 0;
341			break;
342		default:
343			dl_err = DL_SYSERR;
344			break;
345		}
346		dsp->ds_dlstate = DL_UNATTACHED;
347		goto failed;
348	}
349	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
350	dlokack(q, mp, DL_ATTACH_REQ);
351	return;
352
353failed:
354	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
355}
356
357/*
358 * DL_DETACH_REQ
359 */
360static void
361proto_detach_req(dld_str_t *dsp, mblk_t *mp)
362{
363	queue_t		*q = dsp->ds_wq;
364	t_uscalar_t	dl_err;
365
366	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
367		dl_err = DL_BADPRIM;
368		goto failed;
369	}
370
371	if (dsp->ds_dlstate != DL_UNBOUND) {
372		dl_err = DL_OUTSTATE;
373		goto failed;
374	}
375
376	if (dsp->ds_style == DL_STYLE1) {
377		dl_err = DL_BADPRIM;
378		goto failed;
379	}
380
381	ASSERT(dsp->ds_datathr_cnt == 0);
382	dsp->ds_dlstate = DL_DETACH_PENDING;
383
384	dld_str_detach(dsp);
385	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
386	return;
387
388failed:
389	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
390}
391
392/*
393 * DL_BIND_REQ
394 */
395static void
396proto_bind_req(dld_str_t *dsp, mblk_t *mp)
397{
398	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
399	int		err = 0;
400	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
401	uint_t		dlsap_addr_length;
402	t_uscalar_t	dl_err;
403	t_scalar_t	sap;
404	queue_t		*q = dsp->ds_wq;
405	mac_perim_handle_t	mph;
406	void		*mdip;
407	int32_t		intr_cpu;
408
409	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
410		dl_err = DL_BADPRIM;
411		goto failed;
412	}
413
414	if (dlp->dl_xidtest_flg != 0) {
415		dl_err = DL_NOAUTO;
416		goto failed;
417	}
418
419	if (dlp->dl_service_mode != DL_CLDLS) {
420		dl_err = DL_UNSUPPORTED;
421		goto failed;
422	}
423
424	if (dsp->ds_dlstate != DL_UNBOUND) {
425		dl_err = DL_OUTSTATE;
426		goto failed;
427	}
428
429	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
430
431	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
432	    ((err = dls_active_set(dsp)) != 0)) {
433		dl_err = DL_SYSERR;
434		goto failed2;
435	}
436
437	dsp->ds_dlstate = DL_BIND_PENDING;
438	/*
439	 * Set the receive callback.
440	 */
441	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
442	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
443
444	/*
445	 * Bind the channel such that it can receive packets.
446	 */
447	sap = dlp->dl_sap;
448	err = dls_bind(dsp, sap);
449	if (err != 0) {
450		switch (err) {
451		case EINVAL:
452			dl_err = DL_BADADDR;
453			err = 0;
454			break;
455		default:
456			dl_err = DL_SYSERR;
457			break;
458		}
459
460		dsp->ds_dlstate = DL_UNBOUND;
461		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
462			dls_active_clear(dsp);
463		goto failed2;
464	}
465
466	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
467	mdip = mac_get_devinfo(dsp->ds_mh);
468	mac_perim_exit(mph);
469
470	/*
471	 * We do this after we get out of the perim to avoid deadlocks
472	 * etc. since part of mac_client_retarget_intr is to walk the
473	 * device tree in order to find and retarget the interrupts.
474	 */
475	mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
476
477	/*
478	 * Copy in MAC address.
479	 */
480	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
481	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
482
483	/*
484	 * Copy in the SAP.
485	 */
486	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
487	dlsap_addr_length += sizeof (uint16_t);
488
489	dsp->ds_dlstate = DL_IDLE;
490	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
491		dsp->ds_passivestate = DLD_ACTIVE;
492
493	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
494	return;
495
496failed2:
497	mac_perim_exit(mph);
498failed:
499	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
500}
501
502/*
503 * DL_UNBIND_REQ
504 */
505static void
506proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
507{
508	queue_t		*q = dsp->ds_wq;
509	t_uscalar_t	dl_err;
510	mac_perim_handle_t	mph;
511
512	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
513		dl_err = DL_BADPRIM;
514		goto failed;
515	}
516
517	if (dsp->ds_dlstate != DL_IDLE) {
518		dl_err = DL_OUTSTATE;
519		goto failed;
520	}
521
522	mutex_enter(&dsp->ds_lock);
523	while (dsp->ds_datathr_cnt != 0)
524		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
525
526	dsp->ds_dlstate = DL_UNBIND_PENDING;
527	mutex_exit(&dsp->ds_lock);
528
529	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
530	/*
531	 * Unbind the channel to stop packets being received.
532	 */
533	if (dls_unbind(dsp) != 0) {
534		dl_err = DL_OUTSTATE;
535		mac_perim_exit(mph);
536		goto failed;
537	}
538
539	/*
540	 * Disable polling mode, if it is enabled.
541	 */
542	(void) dld_capab_poll_disable(dsp, NULL);
543
544	/*
545	 * Clear LSO flags.
546	 */
547	dsp->ds_lso = B_FALSE;
548	dsp->ds_lso_max = 0;
549
550	/*
551	 * Clear the receive callback.
552	 */
553	dls_rx_set(dsp, NULL, NULL);
554	dsp->ds_direct = B_FALSE;
555
556	/*
557	 * Set the mode back to the default (unitdata).
558	 */
559	dsp->ds_mode = DLD_UNITDATA;
560	dsp->ds_dlstate = DL_UNBOUND;
561
562	mac_perim_exit(mph);
563	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
564	return;
565failed:
566	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
567}
568
569/*
570 * DL_PROMISCON_REQ
571 */
572static void
573proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
574{
575	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
576	int		err = 0;
577	t_uscalar_t	dl_err;
578	uint32_t	promisc_saved;
579	queue_t		*q = dsp->ds_wq;
580	mac_perim_handle_t	mph;
581
582	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
583		dl_err = DL_BADPRIM;
584		goto failed;
585	}
586
587	if (dsp->ds_dlstate == DL_UNATTACHED ||
588	    DL_ACK_PENDING(dsp->ds_dlstate)) {
589		dl_err = DL_OUTSTATE;
590		goto failed;
591	}
592
593	promisc_saved = dsp->ds_promisc;
594	switch (dlp->dl_level) {
595	case DL_PROMISC_SAP:
596		dsp->ds_promisc |= DLS_PROMISC_SAP;
597		break;
598
599	case DL_PROMISC_MULTI:
600		dsp->ds_promisc |= DLS_PROMISC_MULTI;
601		break;
602
603	case DL_PROMISC_PHYS:
604		dsp->ds_promisc |= DLS_PROMISC_PHYS;
605		break;
606
607	default:
608		dl_err = DL_NOTSUPPORTED;
609		goto failed;
610	}
611
612	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
613
614	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
615	    ((err = dls_active_set(dsp)) != 0)) {
616		dsp->ds_promisc = promisc_saved;
617		dl_err = DL_SYSERR;
618		goto failed2;
619	}
620
621	/*
622	 * Adjust channel promiscuity.
623	 */
624	err = dls_promisc(dsp, promisc_saved);
625
626	if (err != 0) {
627		dl_err = DL_SYSERR;
628		dsp->ds_promisc = promisc_saved;
629		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
630			dls_active_clear(dsp);
631		goto failed2;
632	}
633
634	mac_perim_exit(mph);
635
636	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
637		dsp->ds_passivestate = DLD_ACTIVE;
638	dlokack(q, mp, DL_PROMISCON_REQ);
639	return;
640
641failed2:
642	mac_perim_exit(mph);
643failed:
644	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
645}
646
647/*
648 * DL_PROMISCOFF_REQ
649 */
650static void
651proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
652{
653	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
654	int		err = 0;
655	t_uscalar_t	dl_err;
656	uint32_t	promisc_saved;
657	queue_t		*q = dsp->ds_wq;
658	mac_perim_handle_t	mph;
659
660	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
661		dl_err = DL_BADPRIM;
662		goto failed;
663	}
664
665	if (dsp->ds_dlstate == DL_UNATTACHED ||
666	    DL_ACK_PENDING(dsp->ds_dlstate)) {
667		dl_err = DL_OUTSTATE;
668		goto failed;
669	}
670
671	promisc_saved = dsp->ds_promisc;
672	switch (dlp->dl_level) {
673	case DL_PROMISC_SAP:
674		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
675			dl_err = DL_NOTENAB;
676			goto failed;
677		}
678		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
679		break;
680
681	case DL_PROMISC_MULTI:
682		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
683			dl_err = DL_NOTENAB;
684			goto failed;
685		}
686		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
687		break;
688
689	case DL_PROMISC_PHYS:
690		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
691			dl_err = DL_NOTENAB;
692			goto failed;
693		}
694		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
695		break;
696
697	default:
698		dl_err = DL_NOTSUPPORTED;
699		goto failed;
700	}
701
702	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
703	/*
704	 * Adjust channel promiscuity.
705	 */
706	err = dls_promisc(dsp, promisc_saved);
707	mac_perim_exit(mph);
708
709	if (err != 0) {
710		dl_err = DL_SYSERR;
711		goto failed;
712	}
713	dlokack(q, mp, DL_PROMISCOFF_REQ);
714	return;
715failed:
716	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
717}
718
719/*
720 * DL_ENABMULTI_REQ
721 */
722static void
723proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
724{
725	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
726	int		err = 0;
727	t_uscalar_t	dl_err;
728	queue_t		*q = dsp->ds_wq;
729	mac_perim_handle_t	mph;
730
731	if (dsp->ds_dlstate == DL_UNATTACHED ||
732	    DL_ACK_PENDING(dsp->ds_dlstate)) {
733		dl_err = DL_OUTSTATE;
734		goto failed;
735	}
736
737	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
738	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
739	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
740		dl_err = DL_BADPRIM;
741		goto failed;
742	}
743
744	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
745
746	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
747	    ((err = dls_active_set(dsp)) != 0)) {
748		dl_err = DL_SYSERR;
749		goto failed2;
750	}
751
752	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
753
754	if (err != 0) {
755		switch (err) {
756		case EINVAL:
757			dl_err = DL_BADADDR;
758			err = 0;
759			break;
760		case ENOSPC:
761			dl_err = DL_TOOMANY;
762			err = 0;
763			break;
764		default:
765			dl_err = DL_SYSERR;
766			break;
767		}
768		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
769			dls_active_clear(dsp);
770
771		goto failed2;
772	}
773
774	mac_perim_exit(mph);
775
776	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
777		dsp->ds_passivestate = DLD_ACTIVE;
778	dlokack(q, mp, DL_ENABMULTI_REQ);
779	return;
780
781failed2:
782	mac_perim_exit(mph);
783failed:
784	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
785}
786
787/*
788 * DL_DISABMULTI_REQ
789 */
790static void
791proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
792{
793	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
794	int		err = 0;
795	t_uscalar_t	dl_err;
796	queue_t		*q = dsp->ds_wq;
797	mac_perim_handle_t	mph;
798
799	if (dsp->ds_dlstate == DL_UNATTACHED ||
800	    DL_ACK_PENDING(dsp->ds_dlstate)) {
801		dl_err = DL_OUTSTATE;
802		goto failed;
803	}
804
805	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
806	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
807	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
808		dl_err = DL_BADPRIM;
809		goto failed;
810	}
811
812	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
813	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
814	mac_perim_exit(mph);
815
816	if (err != 0) {
817	switch (err) {
818		case EINVAL:
819			dl_err = DL_BADADDR;
820			err = 0;
821			break;
822
823		case ENOENT:
824			dl_err = DL_NOTENAB;
825			err = 0;
826			break;
827
828		default:
829			dl_err = DL_SYSERR;
830			break;
831		}
832		goto failed;
833	}
834	dlokack(q, mp, DL_DISABMULTI_REQ);
835	return;
836failed:
837	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
838}
839
840/*
841 * DL_PHYS_ADDR_REQ
842 */
843static void
844proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
845{
846	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
847	queue_t		*q = dsp->ds_wq;
848	t_uscalar_t	dl_err;
849	char		*addr;
850	uint_t		addr_length;
851
852	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
853		dl_err = DL_BADPRIM;
854		goto failed;
855	}
856
857	if (dsp->ds_dlstate == DL_UNATTACHED ||
858	    DL_ACK_PENDING(dsp->ds_dlstate)) {
859		dl_err = DL_OUTSTATE;
860		goto failed;
861	}
862
863	if (dlp->dl_addr_type != DL_CURR_PHYS_ADDR &&
864	    dlp->dl_addr_type != DL_FACT_PHYS_ADDR) {
865		dl_err = DL_UNSUPPORTED;
866		goto failed;
867	}
868
869	addr_length = dsp->ds_mip->mi_addr_length;
870	if (addr_length > 0) {
871		addr = kmem_alloc(addr_length, KM_SLEEP);
872		if (dlp->dl_addr_type == DL_CURR_PHYS_ADDR)
873			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
874		else
875			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
876
877		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
878		kmem_free(addr, addr_length);
879	} else {
880		dlphysaddrack(q, mp, NULL, 0);
881	}
882	return;
883failed:
884	dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
885}
886
887/*
888 * DL_SET_PHYS_ADDR_REQ
889 */
890static void
891proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
892{
893	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
894	int		err = 0;
895	t_uscalar_t	dl_err;
896	queue_t		*q = dsp->ds_wq;
897	mac_perim_handle_t	mph;
898
899	if (dsp->ds_dlstate == DL_UNATTACHED ||
900	    DL_ACK_PENDING(dsp->ds_dlstate)) {
901		dl_err = DL_OUTSTATE;
902		goto failed;
903	}
904
905	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
906	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
907	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
908		dl_err = DL_BADPRIM;
909		goto failed;
910	}
911
912	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
913
914	if (dsp->ds_passivestate == DLD_UNINITIALIZED &&
915	    ((err = dls_active_set(dsp)) != 0)) {
916		dl_err = DL_SYSERR;
917		goto failed2;
918	}
919
920	err = mac_unicast_primary_set(dsp->ds_mh,
921	    mp->b_rptr + dlp->dl_addr_offset);
922	if (err != 0) {
923		switch (err) {
924		case EINVAL:
925			dl_err = DL_BADADDR;
926			err = 0;
927			break;
928
929		default:
930			dl_err = DL_SYSERR;
931			break;
932		}
933		if (dsp->ds_passivestate == DLD_UNINITIALIZED)
934			dls_active_clear(dsp);
935
936		goto failed2;
937
938	}
939
940	mac_perim_exit(mph);
941
942	if (dsp->ds_passivestate == DLD_UNINITIALIZED)
943		dsp->ds_passivestate = DLD_ACTIVE;
944	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
945	return;
946
947failed2:
948	mac_perim_exit(mph);
949failed:
950	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
951}
952
953/*
954 * DL_UDQOS_REQ
955 */
956static void
957proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
958{
959	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
960	dl_qos_cl_sel1_t *selp;
961	int		off, len;
962	t_uscalar_t	dl_err;
963	queue_t		*q = dsp->ds_wq;
964
965	off = dlp->dl_qos_offset;
966	len = dlp->dl_qos_length;
967
968	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
969		dl_err = DL_BADPRIM;
970		goto failed;
971	}
972
973	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
974	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
975		dl_err = DL_BADQOSTYPE;
976		goto failed;
977	}
978
979	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
980	    selp->dl_priority < 0) {
981		dl_err = DL_BADQOSPARAM;
982		goto failed;
983	}
984
985	dsp->ds_pri = selp->dl_priority;
986	dlokack(q, mp, DL_UDQOS_REQ);
987	return;
988failed:
989	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
990}
991
992static boolean_t
993check_ip_above(queue_t *q)
994{
995	queue_t		*next_q;
996	boolean_t	ret = B_TRUE;
997
998	claimstr(q);
999	next_q = q->q_next;
1000	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, "ip") != 0)
1001		ret = B_FALSE;
1002	releasestr(q);
1003	return (ret);
1004}
1005
1006/*
1007 * DL_CAPABILITY_REQ
1008 */
1009static void
1010proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1011{
1012	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1013	dl_capability_sub_t *sp;
1014	size_t		size, len;
1015	offset_t	off, end;
1016	t_uscalar_t	dl_err;
1017	queue_t		*q = dsp->ds_wq;
1018
1019	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1020		dl_err = DL_BADPRIM;
1021		goto failed;
1022	}
1023
1024	if (dsp->ds_dlstate == DL_UNATTACHED ||
1025	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1026		dl_err = DL_OUTSTATE;
1027		goto failed;
1028	}
1029
1030	/*
1031	 * This request is overloaded. If there are no requested capabilities
1032	 * then we just want to acknowledge with all the capabilities we
1033	 * support. Otherwise we enable the set of capabilities requested.
1034	 */
1035	if (dlp->dl_sub_length == 0) {
1036		proto_capability_advertise(dsp, mp);
1037		return;
1038	}
1039
1040	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1041		dl_err = DL_BADPRIM;
1042		goto failed;
1043	}
1044
1045	dlp->dl_primitive = DL_CAPABILITY_ACK;
1046
1047	off = dlp->dl_sub_offset;
1048	len = dlp->dl_sub_length;
1049
1050	/*
1051	 * Walk the list of capabilities to be enabled.
1052	 */
1053	for (end = off + len; off < end; ) {
1054		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1055		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1056
1057		if (off + size > end ||
1058		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1059			dl_err = DL_BADPRIM;
1060			goto failed;
1061		}
1062
1063		switch (sp->dl_cap) {
1064		/*
1065		 * TCP/IP checksum offload to hardware.
1066		 */
1067		case DL_CAPAB_HCKSUM: {
1068			dl_capab_hcksum_t *hcksump;
1069			dl_capab_hcksum_t hcksum;
1070
1071			hcksump = (dl_capab_hcksum_t *)&sp[1];
1072			/*
1073			 * Copy for alignment.
1074			 */
1075			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1076			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1077			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1078			break;
1079		}
1080
1081		case DL_CAPAB_DLD: {
1082			dl_capab_dld_t	*dldp;
1083			dl_capab_dld_t	dld;
1084
1085			dldp = (dl_capab_dld_t *)&sp[1];
1086			/*
1087			 * Copy for alignment.
1088			 */
1089			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1090			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1091			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1092			break;
1093		}
1094		default:
1095			break;
1096		}
1097		off += size;
1098	}
1099	qreply(q, mp);
1100	return;
1101failed:
1102	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1103}
1104
1105/*
1106 * DL_NOTIFY_REQ
1107 */
1108static void
1109proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1110{
1111	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1112	t_uscalar_t	dl_err;
1113	queue_t		*q = dsp->ds_wq;
1114	uint_t		note =
1115	    DL_NOTE_PROMISC_ON_PHYS |
1116	    DL_NOTE_PROMISC_OFF_PHYS |
1117	    DL_NOTE_PHYS_ADDR |
1118	    DL_NOTE_LINK_UP |
1119	    DL_NOTE_LINK_DOWN |
1120	    DL_NOTE_CAPAB_RENEG |
1121	    DL_NOTE_FASTPATH_FLUSH |
1122	    DL_NOTE_SPEED;
1123
1124	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1125		dl_err = DL_BADPRIM;
1126		goto failed;
1127	}
1128
1129	if (dsp->ds_dlstate == DL_UNATTACHED ||
1130	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1131		dl_err = DL_OUTSTATE;
1132		goto failed;
1133	}
1134
1135	note &= ~(mac_no_notification(dsp->ds_mh));
1136
1137	/*
1138	 * Cache the notifications that are being enabled.
1139	 */
1140	dsp->ds_notifications = dlp->dl_notifications & note;
1141	/*
1142	 * The ACK carries all notifications regardless of which set is
1143	 * being enabled.
1144	 */
1145	dlnotifyack(q, mp, note);
1146
1147	/*
1148	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1149	 */
1150	if (dsp->ds_notifications != 0) {
1151		dld_str_notify_ind(dsp);
1152	}
1153	return;
1154failed:
1155	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1156}
1157
1158/*
1159 * DL_UINTDATA_REQ
1160 */
1161void
1162proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1163{
1164	queue_t			*q = dsp->ds_wq;
1165	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1166	off_t			off;
1167	size_t			len, size;
1168	const uint8_t		*addr;
1169	uint16_t		sap;
1170	uint_t			addr_length;
1171	mblk_t			*bp, *payload;
1172	uint32_t		start, stuff, end, value, flags;
1173	t_uscalar_t		dl_err;
1174	uint_t			max_sdu;
1175
1176	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1177		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1178		return;
1179	}
1180
1181	mutex_enter(&dsp->ds_lock);
1182	if (dsp->ds_dlstate != DL_IDLE) {
1183		mutex_exit(&dsp->ds_lock);
1184		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1185		return;
1186	}
1187	DLD_DATATHR_INC(dsp);
1188	mutex_exit(&dsp->ds_lock);
1189
1190	addr_length = dsp->ds_mip->mi_addr_length;
1191
1192	off = dlp->dl_dest_addr_offset;
1193	len = dlp->dl_dest_addr_length;
1194
1195	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1196		dl_err = DL_BADPRIM;
1197		goto failed;
1198	}
1199
1200	if (len != addr_length + sizeof (uint16_t)) {
1201		dl_err = DL_BADADDR;
1202		goto failed;
1203	}
1204
1205	addr = mp->b_rptr + off;
1206	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1207
1208	/*
1209	 * Check the length of the packet and the block types.
1210	 */
1211	size = 0;
1212	payload = mp->b_cont;
1213	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1214		if (DB_TYPE(bp) != M_DATA)
1215			goto baddata;
1216
1217		size += MBLKL(bp);
1218	}
1219
1220	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1221	if (size > max_sdu)
1222		goto baddata;
1223
1224	/*
1225	 * Build a packet header.
1226	 */
1227	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1228	    &payload)) == NULL) {
1229		dl_err = DL_BADADDR;
1230		goto failed;
1231	}
1232
1233	/*
1234	 * We no longer need the M_PROTO header, so free it.
1235	 */
1236	freeb(mp);
1237
1238	/*
1239	 * Transfer the checksum offload information if it is present.
1240	 */
1241	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1242	    &flags);
1243	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1244
1245	/*
1246	 * Link the payload onto the new header.
1247	 */
1248	ASSERT(bp->b_cont == NULL);
1249	bp->b_cont = payload;
1250
1251	/*
1252	 * No lock can be held across modules and putnext()'s,
1253	 * which can happen here with the call from DLD_TX().
1254	 */
1255	if (DLD_TX(dsp, bp, 0, 0) != NULL) {
1256		/* flow-controlled */
1257		DLD_SETQFULL(dsp);
1258	}
1259	DLD_DATATHR_DCR(dsp);
1260	return;
1261
1262failed:
1263	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1264	DLD_DATATHR_DCR(dsp);
1265	return;
1266
1267baddata:
1268	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1269	DLD_DATATHR_DCR(dsp);
1270}
1271
1272/*
1273 * DL_PASSIVE_REQ
1274 */
1275static void
1276proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1277{
1278	t_uscalar_t dl_err;
1279
1280	/*
1281	 * If we've already become active by issuing an active primitive,
1282	 * then it's too late to try to become passive.
1283	 */
1284	if (dsp->ds_passivestate == DLD_ACTIVE) {
1285		dl_err = DL_OUTSTATE;
1286		goto failed;
1287	}
1288
1289	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1290		dl_err = DL_BADPRIM;
1291		goto failed;
1292	}
1293
1294	dsp->ds_passivestate = DLD_PASSIVE;
1295	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1296	return;
1297failed:
1298	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1299}
1300
1301
1302/*
1303 * Catch-all handler.
1304 */
1305static void
1306proto_req(dld_str_t *dsp, mblk_t *mp)
1307{
1308	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1309
1310	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1311}
1312
1313static int
1314dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1315{
1316	switch (flags) {
1317	case DLD_ENABLE:
1318		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1319		return (0);
1320
1321	case DLD_DISABLE:
1322		mac_perim_exit((mac_perim_handle_t)data);
1323		return (0);
1324
1325	case DLD_QUERY:
1326		return (mac_perim_held(dsp->ds_mh));
1327	}
1328	return (0);
1329}
1330
1331static int
1332dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1333{
1334	dld_capab_direct_t	*direct = data;
1335
1336	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1337
1338	switch (flags) {
1339	case DLD_ENABLE:
1340		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1341		    direct->di_rx_ch);
1342
1343		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1344		direct->di_tx_dh = dsp;
1345		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1346		direct->di_tx_cb_dh = dsp->ds_mch;
1347		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1348		direct->di_tx_fctl_dh = dsp->ds_mch;
1349
1350		dsp->ds_direct = B_TRUE;
1351
1352		return (0);
1353
1354	case DLD_DISABLE:
1355		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1356		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1357		dsp->ds_direct = B_FALSE;
1358
1359		return (0);
1360	}
1361	return (ENOTSUP);
1362}
1363
1364/*
1365 * dld_capab_poll_enable()
1366 *
1367 * This function is misnamed. All polling  and fanouts are run out of the
1368 * lower mac (in case of VNIC and the only mac in case of NICs). The
1369 * availability of Rx ring and promiscous mode is all taken care between
1370 * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1371 * fanout necessary is done by the soft rings that are part of the
1372 * mac_srs (by default mac_srs sends the packets up via a TCP and
1373 * non TCP soft ring).
1374 *
1375 * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1376 * (the cookie returned when they registered with IP during plumb) as their
1377 * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1378 * function and 1st argument is what the caller registered when they
1379 * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1380 * the function is vnic_rx and argument is vnic_t. For regular NIC
1381 * case, it mac_rx_default and mac_handle_t. As explained above, the
1382 * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1383 * from its stored 2nd argument.
1384 */
1385static int
1386dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1387{
1388	if (dsp->ds_polling)
1389		return (EINVAL);
1390
1391	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1392		return (ENOTSUP);
1393
1394	/*
1395	 * Enable client polling if and only if DLS bypass is possible.
1396	 * Special cases like VLANs need DLS processing in the Rx data path.
1397	 * In such a case we can neither allow the client (IP) to directly
1398	 * poll the softring (since DLS processing hasn't been done) nor can
1399	 * we allow DLS bypass.
1400	 */
1401	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1402		return (ENOTSUP);
1403
1404	/*
1405	 * Register soft ring resources. This will come in handy later if
1406	 * the user decides to modify CPU bindings to use more CPUs for the
1407	 * device in which case we will switch to fanout using soft rings.
1408	 */
1409	mac_resource_set_common(dsp->ds_mch,
1410	    (mac_resource_add_t)poll->poll_ring_add_cf,
1411	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1412	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1413	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1414	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1415	    poll->poll_ring_ch);
1416
1417	mac_client_poll_enable(dsp->ds_mch);
1418
1419	dsp->ds_polling = B_TRUE;
1420	return (0);
1421}
1422
1423/* ARGSUSED */
1424static int
1425dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1426{
1427	if (!dsp->ds_polling)
1428		return (EINVAL);
1429
1430	mac_client_poll_disable(dsp->ds_mch);
1431	mac_resource_set(dsp->ds_mch, NULL, NULL);
1432
1433	dsp->ds_polling = B_FALSE;
1434	return (0);
1435}
1436
1437static int
1438dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1439{
1440	dld_capab_poll_t	*poll = data;
1441
1442	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1443
1444	switch (flags) {
1445	case DLD_ENABLE:
1446		return (dld_capab_poll_enable(dsp, poll));
1447	case DLD_DISABLE:
1448		return (dld_capab_poll_disable(dsp, poll));
1449	}
1450	return (ENOTSUP);
1451}
1452
1453static int
1454dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1455{
1456	dld_capab_lso_t		*lso = data;
1457
1458	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1459
1460	switch (flags) {
1461	case DLD_ENABLE: {
1462		mac_capab_lso_t		mac_lso;
1463
1464		/*
1465		 * Check if LSO is supported on this MAC & enable LSO
1466		 * accordingly.
1467		 */
1468		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1469			lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1470			lso->lso_flags = 0;
1471			/* translate the flag for mac clients */
1472			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1473				lso->lso_flags |= DLD_LSO_TX_BASIC_TCP_IPV4;
1474			dsp->ds_lso = B_TRUE;
1475			dsp->ds_lso_max = lso->lso_max;
1476		} else {
1477			dsp->ds_lso = B_FALSE;
1478			dsp->ds_lso_max = 0;
1479			return (ENOTSUP);
1480		}
1481		return (0);
1482	}
1483	case DLD_DISABLE: {
1484		dsp->ds_lso = B_FALSE;
1485		dsp->ds_lso_max = 0;
1486		return (0);
1487	}
1488	}
1489	return (ENOTSUP);
1490}
1491
1492static int
1493dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1494{
1495	int	err;
1496
1497	/*
1498	 * Don't enable direct callback capabilities unless the caller is
1499	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1500	 * the stack initiates capability disable, but due to races, the
1501	 * module insertion may complete before the capability disable
1502	 * completes. So we limit the check to DLD_ENABLE case.
1503	 */
1504	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1505	    (dsp->ds_sap != ETHERTYPE_IP || !check_ip_above(dsp->ds_rq))) {
1506		return (ENOTSUP);
1507	}
1508
1509	switch (type) {
1510	case DLD_CAPAB_DIRECT:
1511		err = dld_capab_direct(dsp, data, flags);
1512		break;
1513
1514	case DLD_CAPAB_POLL:
1515		err =  dld_capab_poll(dsp, data, flags);
1516		break;
1517
1518	case DLD_CAPAB_PERIM:
1519		err = dld_capab_perim(dsp, data, flags);
1520		break;
1521
1522	case DLD_CAPAB_LSO:
1523		err = dld_capab_lso(dsp, data, flags);
1524		break;
1525
1526	default:
1527		err = ENOTSUP;
1528		break;
1529	}
1530
1531	return (err);
1532}
1533
1534/*
1535 * DL_CAPABILITY_ACK/DL_ERROR_ACK
1536 */
1537static void
1538proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1539{
1540	dl_capability_ack_t	*dlap;
1541	dl_capability_sub_t	*dlsp;
1542	size_t			subsize;
1543	dl_capab_dld_t		dld;
1544	dl_capab_hcksum_t	hcksum;
1545	dl_capab_zerocopy_t	zcopy;
1546	uint8_t			*ptr;
1547	queue_t			*q = dsp->ds_wq;
1548	mblk_t			*mp1;
1549	boolean_t		is_vlan;
1550	boolean_t		hcksum_capable = B_FALSE;
1551	boolean_t		zcopy_capable = B_FALSE;
1552	boolean_t		dld_capable = B_FALSE;
1553
1554	/*
1555	 * Initially assume no capabilities.
1556	 */
1557	subsize = 0;
1558	is_vlan = (mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE);
1559
1560	/*
1561	 * Check if checksum offload is supported on this MAC.  Don't
1562	 * advertise DL_CAPAB_HCKSUM if the underlying MAC is VLAN incapable,
1563	 * since it might not be able to do the hardware checksum offload
1564	 * with the correct offset.
1565	 */
1566	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1567	if ((!is_vlan || (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_NATIVEVLAN,
1568	    NULL))) && mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1569	    &hcksum.hcksum_txflags)) {
1570		if (hcksum.hcksum_txflags != 0) {
1571			hcksum_capable = B_TRUE;
1572			subsize += sizeof (dl_capability_sub_t) +
1573			    sizeof (dl_capab_hcksum_t);
1574		}
1575	}
1576
1577	/*
1578	 * Check if zerocopy is supported on this interface.
1579	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1580	 * then reserve space for that capability.
1581	 */
1582	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1583	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1584		zcopy_capable = B_TRUE;
1585		subsize += sizeof (dl_capability_sub_t) +
1586		    sizeof (dl_capab_zerocopy_t);
1587	}
1588
1589	/*
1590	 * Direct capability negotiation interface between IP and DLD
1591	 */
1592	if (dsp->ds_sap == ETHERTYPE_IP && check_ip_above(dsp->ds_rq)) {
1593		dld_capable = B_TRUE;
1594		subsize += sizeof (dl_capability_sub_t) +
1595		    sizeof (dl_capab_dld_t);
1596	}
1597
1598	/*
1599	 * If there are no capabilities to advertise or if we
1600	 * can't allocate a response, send a DL_ERROR_ACK.
1601	 */
1602	if ((mp1 = reallocb(mp,
1603	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1604		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1605		return;
1606	}
1607
1608	mp = mp1;
1609	DB_TYPE(mp) = M_PROTO;
1610	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1611	bzero(mp->b_rptr, MBLKL(mp));
1612	dlap = (dl_capability_ack_t *)mp->b_rptr;
1613	dlap->dl_primitive = DL_CAPABILITY_ACK;
1614	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1615	dlap->dl_sub_length = subsize;
1616	ptr = (uint8_t *)&dlap[1];
1617
1618	/*
1619	 * TCP/IP checksum offload.
1620	 */
1621	if (hcksum_capable) {
1622		dlsp = (dl_capability_sub_t *)ptr;
1623
1624		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1625		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1626		ptr += sizeof (dl_capability_sub_t);
1627
1628		hcksum.hcksum_version = HCKSUM_VERSION_1;
1629		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1630		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1631		ptr += sizeof (dl_capab_hcksum_t);
1632	}
1633
1634	/*
1635	 * Zero copy
1636	 */
1637	if (zcopy_capable) {
1638		dlsp = (dl_capability_sub_t *)ptr;
1639
1640		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1641		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1642		ptr += sizeof (dl_capability_sub_t);
1643
1644		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1645		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1646		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1647
1648		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1649		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1650		ptr += sizeof (dl_capab_zerocopy_t);
1651	}
1652
1653	/*
1654	 * Direct capability negotiation interface between IP and DLD.
1655	 * Refer to dld.h for details.
1656	 */
1657	if (dld_capable) {
1658		dlsp = (dl_capability_sub_t *)ptr;
1659		dlsp->dl_cap = DL_CAPAB_DLD;
1660		dlsp->dl_length = sizeof (dl_capab_dld_t);
1661		ptr += sizeof (dl_capability_sub_t);
1662
1663		bzero(&dld, sizeof (dl_capab_dld_t));
1664		dld.dld_version = DLD_CURRENT_VERSION;
1665		dld.dld_capab = (uintptr_t)dld_capab;
1666		dld.dld_capab_handle = (uintptr_t)dsp;
1667
1668		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1669		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1670		ptr += sizeof (dl_capab_dld_t);
1671	}
1672
1673	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1674	qreply(q, mp);
1675}
1676
1677/*
1678 * Disable any enabled capabilities.
1679 */
1680void
1681dld_capabilities_disable(dld_str_t *dsp)
1682{
1683	if (dsp->ds_polling)
1684		(void) dld_capab_poll_disable(dsp, NULL);
1685}
1686