dld_proto.c revision 12325:8a132ae95aa2
1285SN/A/*
2462SN/A * CDDL HEADER START
3285SN/A *
4285SN/A * The contents of this file are subject to the terms of the
5285SN/A * Common Development and Distribution License (the "License").
6285SN/A * You may not use this file except in compliance with the License.
7285SN/A *
8285SN/A * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9285SN/A * or http://www.opensolaris.org/os/licensing.
10285SN/A * See the License for the specific language governing permissions
11285SN/A * and limitations under the License.
12285SN/A *
13285SN/A * When distributing Covered Code, include this CDDL HEADER in each
14285SN/A * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15285SN/A * If applicable, add the following below this CDDL HEADER, with the
16285SN/A * fields enclosed by brackets "[]" replaced with your own identifying
17285SN/A * information: Portions Copyright [yyyy] [name of copyright owner]
18285SN/A *
19285SN/A * CDDL HEADER END
20285SN/A */
21285SN/A/*
22285SN/A * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23285SN/A */
24285SN/A
25285SN/A/*
26285SN/A * Data-Link Driver
27285SN/A */
28285SN/A#include <sys/sysmacros.h>
29285SN/A#include <sys/strsubr.h>
30285SN/A#include <sys/strsun.h>
31285SN/A#include <sys/vlan.h>
32285SN/A#include <sys/dld_impl.h>
33285SN/A#include <sys/mac_client.h>
34285SN/A#include <sys/mac_client_impl.h>
35285SN/A#include <sys/mac_client_priv.h>
36285SN/A
37285SN/Atypedef void proto_reqfunc_t(dld_str_t *, mblk_t *);
38285SN/A
39285SN/Astatic proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
40285SN/A    proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
41285SN/A    proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
42285SN/A    proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
43285SN/A    proto_notify_req, proto_passive_req;
44285SN/A
45285SN/Astatic void proto_capability_advertise(dld_str_t *, mblk_t *);
46285SN/Astatic int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
47285SN/Astatic boolean_t check_mod_above(queue_t *, const char *);
48285SN/A
49285SN/A#define	DL_ACK_PENDING(state) \
50285SN/A	((state) == DL_ATTACH_PENDING || \
51285SN/A	(state) == DL_DETACH_PENDING || \
52	(state) == DL_BIND_PENDING || \
53	(state) == DL_UNBIND_PENDING)
54
55/*
56 * Process a DLPI protocol message.
57 * The primitives DL_BIND_REQ, DL_ENABMULTI_REQ, DL_PROMISCON_REQ,
58 * DL_SET_PHYS_ADDR_REQ put the data link below our dld_str_t into an
59 * 'active' state. The primitive DL_PASSIVE_REQ marks our dld_str_t
60 * as 'passive' and forbids it from being subsequently made 'active'
61 * by the above primitives.
62 */
63void
64dld_proto(dld_str_t *dsp, mblk_t *mp)
65{
66	t_uscalar_t		prim;
67
68	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
69		freemsg(mp);
70		return;
71	}
72	prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
73
74	switch (prim) {
75	case DL_INFO_REQ:
76		proto_info_req(dsp, mp);
77		break;
78	case DL_BIND_REQ:
79		proto_bind_req(dsp, mp);
80		break;
81	case DL_UNBIND_REQ:
82		proto_unbind_req(dsp, mp);
83		break;
84	case DL_UNITDATA_REQ:
85		proto_unitdata_req(dsp, mp);
86		break;
87	case DL_UDQOS_REQ:
88		proto_udqos_req(dsp, mp);
89		break;
90	case DL_ATTACH_REQ:
91		proto_attach_req(dsp, mp);
92		break;
93	case DL_DETACH_REQ:
94		proto_detach_req(dsp, mp);
95		break;
96	case DL_ENABMULTI_REQ:
97		proto_enabmulti_req(dsp, mp);
98		break;
99	case DL_DISABMULTI_REQ:
100		proto_disabmulti_req(dsp, mp);
101		break;
102	case DL_PROMISCON_REQ:
103		proto_promiscon_req(dsp, mp);
104		break;
105	case DL_PROMISCOFF_REQ:
106		proto_promiscoff_req(dsp, mp);
107		break;
108	case DL_PHYS_ADDR_REQ:
109		proto_physaddr_req(dsp, mp);
110		break;
111	case DL_SET_PHYS_ADDR_REQ:
112		proto_setphysaddr_req(dsp, mp);
113		break;
114	case DL_NOTIFY_REQ:
115		proto_notify_req(dsp, mp);
116		break;
117	case DL_CAPABILITY_REQ:
118		proto_capability_req(dsp, mp);
119		break;
120	case DL_PASSIVE_REQ:
121		proto_passive_req(dsp, mp);
122		break;
123	default:
124		proto_req(dsp, mp);
125		break;
126	}
127}
128
129#define	NEG(x)	-(x)
130typedef struct dl_info_ack_wrapper {
131	dl_info_ack_t		dl_info;
132	uint8_t			dl_addr[MAXMACADDRLEN + sizeof (uint16_t)];
133	uint8_t			dl_brdcst_addr[MAXMACADDRLEN];
134	dl_qos_cl_range1_t	dl_qos_range1;
135	dl_qos_cl_sel1_t	dl_qos_sel1;
136} dl_info_ack_wrapper_t;
137
138/*
139 * DL_INFO_REQ
140 */
141static void
142proto_info_req(dld_str_t *dsp, mblk_t *mp)
143{
144	dl_info_ack_wrapper_t	*dlwp;
145	dl_info_ack_t		*dlp;
146	dl_qos_cl_sel1_t	*selp;
147	dl_qos_cl_range1_t	*rangep;
148	uint8_t			*addr;
149	uint8_t			*brdcst_addr;
150	uint_t			addr_length;
151	uint_t			sap_length;
152	mac_info_t		minfo;
153	mac_info_t		*minfop;
154	queue_t			*q = dsp->ds_wq;
155
156	/*
157	 * Swap the request message for one large enough to contain the
158	 * wrapper structure defined above.
159	 */
160	if ((mp = mexchange(q, mp, sizeof (dl_info_ack_wrapper_t),
161	    M_PCPROTO, 0)) == NULL)
162		return;
163
164	bzero(mp->b_rptr, sizeof (dl_info_ack_wrapper_t));
165	dlwp = (dl_info_ack_wrapper_t *)mp->b_rptr;
166
167	dlp = &(dlwp->dl_info);
168	ASSERT(dlp == (dl_info_ack_t *)mp->b_rptr);
169
170	dlp->dl_primitive = DL_INFO_ACK;
171
172	/*
173	 * Set up the sub-structure pointers.
174	 */
175	addr = dlwp->dl_addr;
176	brdcst_addr = dlwp->dl_brdcst_addr;
177	rangep = &(dlwp->dl_qos_range1);
178	selp = &(dlwp->dl_qos_sel1);
179
180	/*
181	 * This driver supports only version 2 connectionless DLPI provider
182	 * nodes.
183	 */
184	dlp->dl_service_mode = DL_CLDLS;
185	dlp->dl_version = DL_VERSION_2;
186
187	/*
188	 * Set the style of the provider
189	 */
190	dlp->dl_provider_style = dsp->ds_style;
191	ASSERT(dlp->dl_provider_style == DL_STYLE1 ||
192	    dlp->dl_provider_style == DL_STYLE2);
193
194	/*
195	 * Set the current DLPI state.
196	 */
197	dlp->dl_current_state = dsp->ds_dlstate;
198
199	/*
200	 * Gratuitously set the media type. This is to deal with modules
201	 * that assume the media type is known prior to DL_ATTACH_REQ
202	 * being completed.
203	 */
204	dlp->dl_mac_type = DL_ETHER;
205
206	/*
207	 * If the stream is not at least attached we try to retrieve the
208	 * mac_info using mac_info_get()
209	 */
210	if (dsp->ds_dlstate == DL_UNATTACHED ||
211	    dsp->ds_dlstate == DL_ATTACH_PENDING ||
212	    dsp->ds_dlstate == DL_DETACH_PENDING) {
213		if (!mac_info_get(ddi_major_to_name(dsp->ds_major), &minfo)) {
214			/*
215			 * Cannot find mac_info. giving up.
216			 */
217			goto done;
218		}
219		minfop = &minfo;
220	} else {
221		minfop = (mac_info_t *)dsp->ds_mip;
222		/* We can only get the sdu if we're attached. */
223		mac_sdu_get(dsp->ds_mh, &dlp->dl_min_sdu, &dlp->dl_max_sdu);
224	}
225
226	/*
227	 * Set the media type (properly this time).
228	 */
229	if (dsp->ds_native)
230		dlp->dl_mac_type = minfop->mi_nativemedia;
231	else
232		dlp->dl_mac_type = minfop->mi_media;
233
234	/*
235	 * Set the DLSAP length. We only support 16 bit values and they
236	 * appear after the MAC address portion of DLSAP addresses.
237	 */
238	sap_length = sizeof (uint16_t);
239	dlp->dl_sap_length = NEG(sap_length);
240
241	addr_length = minfop->mi_addr_length;
242
243	/*
244	 * Copy in the media broadcast address.
245	 */
246	if (minfop->mi_brdcst_addr != NULL) {
247		dlp->dl_brdcst_addr_offset =
248		    (uintptr_t)brdcst_addr - (uintptr_t)dlp;
249		bcopy(minfop->mi_brdcst_addr, brdcst_addr, addr_length);
250		dlp->dl_brdcst_addr_length = addr_length;
251	}
252
253	/* Only VLAN links and links that have a normal tag mode support QOS. */
254	if ((dsp->ds_mch != NULL &&
255	    mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) ||
256	    (dsp->ds_dlp != NULL &&
257	    dsp->ds_dlp->dl_tagmode == LINK_TAGMODE_NORMAL)) {
258		dlp->dl_qos_range_offset = (uintptr_t)rangep - (uintptr_t)dlp;
259		dlp->dl_qos_range_length = sizeof (dl_qos_cl_range1_t);
260
261		rangep->dl_qos_type = DL_QOS_CL_RANGE1;
262		rangep->dl_trans_delay.dl_target_value = DL_UNKNOWN;
263		rangep->dl_trans_delay.dl_accept_value = DL_UNKNOWN;
264		rangep->dl_protection.dl_min = DL_UNKNOWN;
265		rangep->dl_protection.dl_max = DL_UNKNOWN;
266		rangep->dl_residual_error = DL_UNKNOWN;
267
268		/*
269		 * Specify the supported range of priorities.
270		 */
271		rangep->dl_priority.dl_min = 0;
272		rangep->dl_priority.dl_max = (1 << VLAN_PRI_SIZE) - 1;
273
274		dlp->dl_qos_offset = (uintptr_t)selp - (uintptr_t)dlp;
275		dlp->dl_qos_length = sizeof (dl_qos_cl_sel1_t);
276
277		selp->dl_qos_type = DL_QOS_CL_SEL1;
278		selp->dl_trans_delay = DL_UNKNOWN;
279		selp->dl_protection = DL_UNKNOWN;
280		selp->dl_residual_error = DL_UNKNOWN;
281
282		/*
283		 * Specify the current priority (which can be changed by
284		 * the DL_UDQOS_REQ primitive).
285		 */
286		selp->dl_priority = dsp->ds_pri;
287	}
288
289	dlp->dl_addr_length = addr_length + sizeof (uint16_t);
290	if (dsp->ds_dlstate == DL_IDLE) {
291		/*
292		 * The stream is bound. Therefore we can formulate a valid
293		 * DLSAP address.
294		 */
295		dlp->dl_addr_offset = (uintptr_t)addr - (uintptr_t)dlp;
296		if (addr_length > 0)
297			mac_unicast_primary_get(dsp->ds_mh, addr);
298
299		*(uint16_t *)(addr + addr_length) = dsp->ds_sap;
300	}
301
302done:
303	IMPLY(dlp->dl_qos_offset != 0, dlp->dl_qos_length != 0);
304	IMPLY(dlp->dl_qos_range_offset != 0,
305	    dlp->dl_qos_range_length != 0);
306	IMPLY(dlp->dl_addr_offset != 0, dlp->dl_addr_length != 0);
307	IMPLY(dlp->dl_brdcst_addr_offset != 0,
308	    dlp->dl_brdcst_addr_length != 0);
309
310	qreply(q, mp);
311}
312
313/*
314 * DL_ATTACH_REQ
315 */
316static void
317proto_attach_req(dld_str_t *dsp, mblk_t *mp)
318{
319	dl_attach_req_t	*dlp = (dl_attach_req_t *)mp->b_rptr;
320	int		err = 0;
321	t_uscalar_t	dl_err;
322	queue_t		*q = dsp->ds_wq;
323
324	if (MBLKL(mp) < sizeof (dl_attach_req_t) ||
325	    dlp->dl_ppa < 0 || dsp->ds_style == DL_STYLE1) {
326		dl_err = DL_BADPRIM;
327		goto failed;
328	}
329
330	if (dsp->ds_dlstate != DL_UNATTACHED) {
331		dl_err = DL_OUTSTATE;
332		goto failed;
333	}
334
335	dsp->ds_dlstate = DL_ATTACH_PENDING;
336
337	err = dld_str_attach(dsp, dlp->dl_ppa);
338	if (err != 0) {
339		switch (err) {
340		case ENOENT:
341			dl_err = DL_BADPPA;
342			err = 0;
343			break;
344		default:
345			dl_err = DL_SYSERR;
346			break;
347		}
348		dsp->ds_dlstate = DL_UNATTACHED;
349		goto failed;
350	}
351	ASSERT(dsp->ds_dlstate == DL_UNBOUND);
352	dlokack(q, mp, DL_ATTACH_REQ);
353	return;
354
355failed:
356	dlerrorack(q, mp, DL_ATTACH_REQ, dl_err, (t_uscalar_t)err);
357}
358
359/*
360 * DL_DETACH_REQ
361 */
362static void
363proto_detach_req(dld_str_t *dsp, mblk_t *mp)
364{
365	queue_t		*q = dsp->ds_wq;
366	t_uscalar_t	dl_err;
367
368	if (MBLKL(mp) < sizeof (dl_detach_req_t)) {
369		dl_err = DL_BADPRIM;
370		goto failed;
371	}
372
373	if (dsp->ds_dlstate != DL_UNBOUND) {
374		dl_err = DL_OUTSTATE;
375		goto failed;
376	}
377
378	if (dsp->ds_style == DL_STYLE1) {
379		dl_err = DL_BADPRIM;
380		goto failed;
381	}
382
383	ASSERT(dsp->ds_datathr_cnt == 0);
384	dsp->ds_dlstate = DL_DETACH_PENDING;
385
386	dld_str_detach(dsp);
387	dlokack(dsp->ds_wq, mp, DL_DETACH_REQ);
388	return;
389
390failed:
391	dlerrorack(q, mp, DL_DETACH_REQ, dl_err, 0);
392}
393
394/*
395 * DL_BIND_REQ
396 */
397static void
398proto_bind_req(dld_str_t *dsp, mblk_t *mp)
399{
400	dl_bind_req_t	*dlp = (dl_bind_req_t *)mp->b_rptr;
401	int		err = 0;
402	uint8_t		dlsap_addr[MAXMACADDRLEN + sizeof (uint16_t)];
403	uint_t		dlsap_addr_length;
404	t_uscalar_t	dl_err;
405	t_scalar_t	sap;
406	queue_t		*q = dsp->ds_wq;
407	mac_perim_handle_t	mph;
408	void		*mdip;
409	int32_t		intr_cpu;
410
411	if (MBLKL(mp) < sizeof (dl_bind_req_t)) {
412		dl_err = DL_BADPRIM;
413		goto failed;
414	}
415
416	if (dlp->dl_xidtest_flg != 0) {
417		dl_err = DL_NOAUTO;
418		goto failed;
419	}
420
421	if (dlp->dl_service_mode != DL_CLDLS) {
422		dl_err = DL_UNSUPPORTED;
423		goto failed;
424	}
425
426	if (dsp->ds_dlstate != DL_UNBOUND) {
427		dl_err = DL_OUTSTATE;
428		goto failed;
429	}
430
431	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
432
433	if ((err = dls_active_set(dsp)) != 0) {
434		dl_err = DL_SYSERR;
435		goto failed2;
436	}
437
438	dsp->ds_dlstate = DL_BIND_PENDING;
439	/*
440	 * Set the receive callback.
441	 */
442	dls_rx_set(dsp, (dsp->ds_mode == DLD_RAW) ?
443	    dld_str_rx_raw : dld_str_rx_unitdata, dsp);
444
445	/*
446	 * Bind the channel such that it can receive packets.
447	 */
448	sap = dlp->dl_sap;
449	dsp->ds_nonip = !check_mod_above(dsp->ds_rq, "ip") &&
450	    !check_mod_above(dsp->ds_rq, "arp");
451
452	err = dls_bind(dsp, sap);
453	if (err != 0) {
454		switch (err) {
455		case EINVAL:
456			dl_err = DL_BADADDR;
457			err = 0;
458			break;
459		default:
460			dl_err = DL_SYSERR;
461			break;
462		}
463
464		dsp->ds_dlstate = DL_UNBOUND;
465		dls_active_clear(dsp, B_FALSE);
466		goto failed2;
467	}
468
469	intr_cpu = mac_client_intr_cpu(dsp->ds_mch);
470	mdip = mac_get_devinfo(dsp->ds_mh);
471	mac_perim_exit(mph);
472
473	/*
474	 * We do this after we get out of the perim to avoid deadlocks
475	 * etc. since part of mac_client_retarget_intr is to walk the
476	 * device tree in order to find and retarget the interrupts.
477	 */
478	if (intr_cpu != -1)
479		mac_client_set_intr_cpu(mdip, dsp->ds_mch, intr_cpu);
480
481	/*
482	 * Copy in MAC address.
483	 */
484	dlsap_addr_length = dsp->ds_mip->mi_addr_length;
485	mac_unicast_primary_get(dsp->ds_mh, dlsap_addr);
486
487	/*
488	 * Copy in the SAP.
489	 */
490	*(uint16_t *)(dlsap_addr + dlsap_addr_length) = sap;
491	dlsap_addr_length += sizeof (uint16_t);
492
493	dsp->ds_dlstate = DL_IDLE;
494	dlbindack(q, mp, sap, dlsap_addr, dlsap_addr_length, 0, 0);
495	return;
496
497failed2:
498	mac_perim_exit(mph);
499failed:
500	dlerrorack(q, mp, DL_BIND_REQ, dl_err, (t_uscalar_t)err);
501}
502
503/*
504 * DL_UNBIND_REQ
505 */
506static void
507proto_unbind_req(dld_str_t *dsp, mblk_t *mp)
508{
509	queue_t		*q = dsp->ds_wq;
510	t_uscalar_t	dl_err;
511	mac_perim_handle_t	mph;
512
513	if (MBLKL(mp) < sizeof (dl_unbind_req_t)) {
514		dl_err = DL_BADPRIM;
515		goto failed;
516	}
517
518	if (dsp->ds_dlstate != DL_IDLE) {
519		dl_err = DL_OUTSTATE;
520		goto failed;
521	}
522
523	mutex_enter(&dsp->ds_lock);
524	while (dsp->ds_datathr_cnt != 0)
525		cv_wait(&dsp->ds_datathr_cv, &dsp->ds_lock);
526
527	dsp->ds_dlstate = DL_UNBIND_PENDING;
528	mutex_exit(&dsp->ds_lock);
529
530	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
531	/*
532	 * Unbind the channel to stop packets being received.
533	 */
534	dls_unbind(dsp);
535
536	/*
537	 * Disable polling mode, if it is enabled.
538	 */
539	(void) dld_capab_poll_disable(dsp, NULL);
540
541	/*
542	 * Clear LSO flags.
543	 */
544	dsp->ds_lso = B_FALSE;
545	dsp->ds_lso_max = 0;
546
547	/*
548	 * Clear the receive callback.
549	 */
550	dls_rx_set(dsp, NULL, NULL);
551	dsp->ds_direct = B_FALSE;
552
553	/*
554	 * Set the mode back to the default (unitdata).
555	 */
556	dsp->ds_mode = DLD_UNITDATA;
557	dsp->ds_dlstate = DL_UNBOUND;
558
559	dls_active_clear(dsp, B_FALSE);
560	mac_perim_exit(mph);
561	dlokack(dsp->ds_wq, mp, DL_UNBIND_REQ);
562	return;
563failed:
564	dlerrorack(q, mp, DL_UNBIND_REQ, dl_err, 0);
565}
566
567/*
568 * DL_PROMISCON_REQ
569 */
570static void
571proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
572{
573	dl_promiscon_req_t *dlp = (dl_promiscon_req_t *)mp->b_rptr;
574	int		err = 0;
575	t_uscalar_t	dl_err;
576	uint32_t	promisc_saved;
577	queue_t		*q = dsp->ds_wq;
578	mac_perim_handle_t	mph;
579
580	if (MBLKL(mp) < sizeof (dl_promiscon_req_t)) {
581		dl_err = DL_BADPRIM;
582		goto failed;
583	}
584
585	if (dsp->ds_dlstate == DL_UNATTACHED ||
586	    DL_ACK_PENDING(dsp->ds_dlstate)) {
587		dl_err = DL_OUTSTATE;
588		goto failed;
589	}
590
591	promisc_saved = dsp->ds_promisc;
592	switch (dlp->dl_level) {
593	case DL_PROMISC_SAP:
594		dsp->ds_promisc |= DLS_PROMISC_SAP;
595		break;
596
597	case DL_PROMISC_MULTI:
598		dsp->ds_promisc |= DLS_PROMISC_MULTI;
599		break;
600
601	case DL_PROMISC_PHYS:
602		dsp->ds_promisc |= DLS_PROMISC_PHYS;
603		break;
604
605	default:
606		dl_err = DL_NOTSUPPORTED;
607		goto failed;
608	}
609
610	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
611
612	if ((promisc_saved == 0) && (err = dls_active_set(dsp)) != 0) {
613		dsp->ds_promisc = promisc_saved;
614		dl_err = DL_SYSERR;
615		goto failed2;
616	}
617
618	/*
619	 * Adjust channel promiscuity.
620	 */
621	err = dls_promisc(dsp, promisc_saved);
622
623	if (err != 0) {
624		dl_err = DL_SYSERR;
625		dsp->ds_promisc = promisc_saved;
626		if (promisc_saved == 0)
627			dls_active_clear(dsp, B_FALSE);
628		goto failed2;
629	}
630
631	mac_perim_exit(mph);
632
633	dlokack(q, mp, DL_PROMISCON_REQ);
634	return;
635
636failed2:
637	mac_perim_exit(mph);
638failed:
639	dlerrorack(q, mp, DL_PROMISCON_REQ, dl_err, (t_uscalar_t)err);
640}
641
642/*
643 * DL_PROMISCOFF_REQ
644 */
645static void
646proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
647{
648	dl_promiscoff_req_t *dlp = (dl_promiscoff_req_t *)mp->b_rptr;
649	int		err = 0;
650	t_uscalar_t	dl_err;
651	uint32_t	promisc_saved;
652	queue_t		*q = dsp->ds_wq;
653	mac_perim_handle_t	mph;
654
655	if (MBLKL(mp) < sizeof (dl_promiscoff_req_t)) {
656		dl_err = DL_BADPRIM;
657		goto failed;
658	}
659
660	if (dsp->ds_dlstate == DL_UNATTACHED ||
661	    DL_ACK_PENDING(dsp->ds_dlstate)) {
662		dl_err = DL_OUTSTATE;
663		goto failed;
664	}
665
666	promisc_saved = dsp->ds_promisc;
667	switch (dlp->dl_level) {
668	case DL_PROMISC_SAP:
669		if (!(dsp->ds_promisc & DLS_PROMISC_SAP)) {
670			dl_err = DL_NOTENAB;
671			goto failed;
672		}
673		dsp->ds_promisc &= ~DLS_PROMISC_SAP;
674		break;
675
676	case DL_PROMISC_MULTI:
677		if (!(dsp->ds_promisc & DLS_PROMISC_MULTI)) {
678			dl_err = DL_NOTENAB;
679			goto failed;
680		}
681		dsp->ds_promisc &= ~DLS_PROMISC_MULTI;
682		break;
683
684	case DL_PROMISC_PHYS:
685		if (!(dsp->ds_promisc & DLS_PROMISC_PHYS)) {
686			dl_err = DL_NOTENAB;
687			goto failed;
688		}
689		dsp->ds_promisc &= ~DLS_PROMISC_PHYS;
690		break;
691
692	default:
693		dl_err = DL_NOTSUPPORTED;
694		goto failed;
695	}
696
697	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
698	/*
699	 * Adjust channel promiscuity.
700	 */
701	err = dls_promisc(dsp, promisc_saved);
702
703	if (err != 0) {
704		mac_perim_exit(mph);
705		dl_err = DL_SYSERR;
706		goto failed;
707	}
708
709	if (dsp->ds_promisc == 0)
710		dls_active_clear(dsp, B_FALSE);
711
712	mac_perim_exit(mph);
713
714	dlokack(q, mp, DL_PROMISCOFF_REQ);
715	return;
716failed:
717	dlerrorack(q, mp, DL_PROMISCOFF_REQ, dl_err, (t_uscalar_t)err);
718}
719
720/*
721 * DL_ENABMULTI_REQ
722 */
723static void
724proto_enabmulti_req(dld_str_t *dsp, mblk_t *mp)
725{
726	dl_enabmulti_req_t *dlp = (dl_enabmulti_req_t *)mp->b_rptr;
727	int		err = 0;
728	t_uscalar_t	dl_err;
729	queue_t		*q = dsp->ds_wq;
730	mac_perim_handle_t	mph;
731
732	if (dsp->ds_dlstate == DL_UNATTACHED ||
733	    DL_ACK_PENDING(dsp->ds_dlstate)) {
734		dl_err = DL_OUTSTATE;
735		goto failed;
736	}
737
738	if (MBLKL(mp) < sizeof (dl_enabmulti_req_t) ||
739	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
740	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
741		dl_err = DL_BADPRIM;
742		goto failed;
743	}
744
745	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
746
747	if ((dsp->ds_dmap == NULL) && (err = dls_active_set(dsp)) != 0) {
748		dl_err = DL_SYSERR;
749		goto failed2;
750	}
751
752	err = dls_multicst_add(dsp, mp->b_rptr + dlp->dl_addr_offset);
753	if (err != 0) {
754		switch (err) {
755		case EINVAL:
756			dl_err = DL_BADADDR;
757			err = 0;
758			break;
759		case ENOSPC:
760			dl_err = DL_TOOMANY;
761			err = 0;
762			break;
763		default:
764			dl_err = DL_SYSERR;
765			break;
766		}
767		if (dsp->ds_dmap == NULL)
768			dls_active_clear(dsp, B_FALSE);
769		goto failed2;
770	}
771
772	mac_perim_exit(mph);
773
774	dlokack(q, mp, DL_ENABMULTI_REQ);
775	return;
776
777failed2:
778	mac_perim_exit(mph);
779failed:
780	dlerrorack(q, mp, DL_ENABMULTI_REQ, dl_err, (t_uscalar_t)err);
781}
782
783/*
784 * DL_DISABMULTI_REQ
785 */
786static void
787proto_disabmulti_req(dld_str_t *dsp, mblk_t *mp)
788{
789	dl_disabmulti_req_t *dlp = (dl_disabmulti_req_t *)mp->b_rptr;
790	int		err = 0;
791	t_uscalar_t	dl_err;
792	queue_t		*q = dsp->ds_wq;
793	mac_perim_handle_t	mph;
794
795	if (dsp->ds_dlstate == DL_UNATTACHED ||
796	    DL_ACK_PENDING(dsp->ds_dlstate)) {
797		dl_err = DL_OUTSTATE;
798		goto failed;
799	}
800
801	if (MBLKL(mp) < sizeof (dl_disabmulti_req_t) ||
802	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
803	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
804		dl_err = DL_BADPRIM;
805		goto failed;
806	}
807
808	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
809	err = dls_multicst_remove(dsp, mp->b_rptr + dlp->dl_addr_offset);
810	if ((err == 0) && (dsp->ds_dmap == NULL))
811		dls_active_clear(dsp, B_FALSE);
812	mac_perim_exit(mph);
813
814	if (err != 0) {
815	switch (err) {
816		case EINVAL:
817			dl_err = DL_BADADDR;
818			err = 0;
819			break;
820
821		case ENOENT:
822			dl_err = DL_NOTENAB;
823			err = 0;
824			break;
825
826		default:
827			dl_err = DL_SYSERR;
828			break;
829		}
830		goto failed;
831	}
832	dlokack(q, mp, DL_DISABMULTI_REQ);
833	return;
834failed:
835	dlerrorack(q, mp, DL_DISABMULTI_REQ, dl_err, (t_uscalar_t)err);
836}
837
838/*
839 * DL_PHYS_ADDR_REQ
840 */
841static void
842proto_physaddr_req(dld_str_t *dsp, mblk_t *mp)
843{
844	dl_phys_addr_req_t *dlp = (dl_phys_addr_req_t *)mp->b_rptr;
845	queue_t		*q = dsp->ds_wq;
846	t_uscalar_t	dl_err = 0;
847	char		*addr = NULL;
848	uint_t		addr_length;
849
850	if (MBLKL(mp) < sizeof (dl_phys_addr_req_t)) {
851		dl_err = DL_BADPRIM;
852		goto done;
853	}
854
855	if (dsp->ds_dlstate == DL_UNATTACHED ||
856	    DL_ACK_PENDING(dsp->ds_dlstate)) {
857		dl_err = DL_OUTSTATE;
858		goto done;
859	}
860
861	addr_length = dsp->ds_mip->mi_addr_length;
862	if (addr_length > 0) {
863		addr = kmem_alloc(addr_length, KM_SLEEP);
864		switch (dlp->dl_addr_type) {
865		case DL_CURR_PHYS_ADDR:
866			mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)addr);
867			break;
868		case DL_FACT_PHYS_ADDR:
869			bcopy(dsp->ds_mip->mi_unicst_addr, addr, addr_length);
870			break;
871		case DL_CURR_DEST_ADDR:
872			if (!mac_dst_get(dsp->ds_mh, (uint8_t *)addr))
873				dl_err = DL_NOTSUPPORTED;
874			break;
875		default:
876			dl_err = DL_UNSUPPORTED;
877		}
878	}
879done:
880	if (dl_err == 0)
881		dlphysaddrack(q, mp, addr, (t_uscalar_t)addr_length);
882	else
883		dlerrorack(q, mp, DL_PHYS_ADDR_REQ, dl_err, 0);
884	if (addr != NULL)
885		kmem_free(addr, addr_length);
886}
887
888/*
889 * DL_SET_PHYS_ADDR_REQ
890 */
891static void
892proto_setphysaddr_req(dld_str_t *dsp, mblk_t *mp)
893{
894	dl_set_phys_addr_req_t *dlp = (dl_set_phys_addr_req_t *)mp->b_rptr;
895	int		err = 0;
896	t_uscalar_t	dl_err;
897	queue_t		*q = dsp->ds_wq;
898	mac_perim_handle_t	mph;
899
900	if (dsp->ds_dlstate == DL_UNATTACHED ||
901	    DL_ACK_PENDING(dsp->ds_dlstate)) {
902		dl_err = DL_OUTSTATE;
903		goto failed;
904	}
905
906	if (MBLKL(mp) < sizeof (dl_set_phys_addr_req_t) ||
907	    !MBLKIN(mp, dlp->dl_addr_offset, dlp->dl_addr_length) ||
908	    dlp->dl_addr_length != dsp->ds_mip->mi_addr_length) {
909		dl_err = DL_BADPRIM;
910		goto failed;
911	}
912
913	mac_perim_enter_by_mh(dsp->ds_mh, &mph);
914
915	if ((err = dls_active_set(dsp)) != 0) {
916		dl_err = DL_SYSERR;
917		goto failed2;
918	}
919
920	/*
921	 * If mac-nospoof is enabled and the link is owned by a
922	 * non-global zone, changing the mac address is not allowed.
923	 */
924	if (dsp->ds_dlp->dl_zid != GLOBAL_ZONEID &&
925	    mac_protect_enabled(dsp->ds_mch, MPT_MACNOSPOOF)) {
926		dls_active_clear(dsp, B_FALSE);
927		err = EACCES;
928		goto failed2;
929	}
930
931	err = mac_unicast_primary_set(dsp->ds_mh,
932	    mp->b_rptr + dlp->dl_addr_offset);
933	if (err != 0) {
934		switch (err) {
935		case EINVAL:
936			dl_err = DL_BADADDR;
937			err = 0;
938			break;
939
940		default:
941			dl_err = DL_SYSERR;
942			break;
943		}
944		dls_active_clear(dsp, B_FALSE);
945		goto failed2;
946
947	}
948
949	mac_perim_exit(mph);
950
951	dlokack(q, mp, DL_SET_PHYS_ADDR_REQ);
952	return;
953
954failed2:
955	mac_perim_exit(mph);
956failed:
957	dlerrorack(q, mp, DL_SET_PHYS_ADDR_REQ, dl_err, (t_uscalar_t)err);
958}
959
960/*
961 * DL_UDQOS_REQ
962 */
963static void
964proto_udqos_req(dld_str_t *dsp, mblk_t *mp)
965{
966	dl_udqos_req_t *dlp = (dl_udqos_req_t *)mp->b_rptr;
967	dl_qos_cl_sel1_t *selp;
968	int		off, len;
969	t_uscalar_t	dl_err;
970	queue_t		*q = dsp->ds_wq;
971
972	off = dlp->dl_qos_offset;
973	len = dlp->dl_qos_length;
974
975	if (MBLKL(mp) < sizeof (dl_udqos_req_t) || !MBLKIN(mp, off, len)) {
976		dl_err = DL_BADPRIM;
977		goto failed;
978	}
979
980	selp = (dl_qos_cl_sel1_t *)(mp->b_rptr + off);
981	if (selp->dl_qos_type != DL_QOS_CL_SEL1) {
982		dl_err = DL_BADQOSTYPE;
983		goto failed;
984	}
985
986	if (selp->dl_priority > (1 << VLAN_PRI_SIZE) - 1 ||
987	    selp->dl_priority < 0) {
988		dl_err = DL_BADQOSPARAM;
989		goto failed;
990	}
991
992	dsp->ds_pri = selp->dl_priority;
993	dlokack(q, mp, DL_UDQOS_REQ);
994	return;
995failed:
996	dlerrorack(q, mp, DL_UDQOS_REQ, dl_err, 0);
997}
998
999static boolean_t
1000check_mod_above(queue_t *q, const char *mod)
1001{
1002	queue_t		*next_q;
1003	boolean_t	ret = B_TRUE;
1004
1005	claimstr(q);
1006	next_q = q->q_next;
1007	if (strcmp(next_q->q_qinfo->qi_minfo->mi_idname, mod) != 0)
1008		ret = B_FALSE;
1009	releasestr(q);
1010	return (ret);
1011}
1012
1013/*
1014 * DL_CAPABILITY_REQ
1015 */
1016static void
1017proto_capability_req(dld_str_t *dsp, mblk_t *mp)
1018{
1019	dl_capability_req_t *dlp = (dl_capability_req_t *)mp->b_rptr;
1020	dl_capability_sub_t *sp;
1021	size_t		size, len;
1022	offset_t	off, end;
1023	t_uscalar_t	dl_err;
1024	queue_t		*q = dsp->ds_wq;
1025
1026	if (MBLKL(mp) < sizeof (dl_capability_req_t)) {
1027		dl_err = DL_BADPRIM;
1028		goto failed;
1029	}
1030
1031	if (dsp->ds_dlstate == DL_UNATTACHED ||
1032	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1033		dl_err = DL_OUTSTATE;
1034		goto failed;
1035	}
1036
1037	/*
1038	 * This request is overloaded. If there are no requested capabilities
1039	 * then we just want to acknowledge with all the capabilities we
1040	 * support. Otherwise we enable the set of capabilities requested.
1041	 */
1042	if (dlp->dl_sub_length == 0) {
1043		proto_capability_advertise(dsp, mp);
1044		return;
1045	}
1046
1047	if (!MBLKIN(mp, dlp->dl_sub_offset, dlp->dl_sub_length)) {
1048		dl_err = DL_BADPRIM;
1049		goto failed;
1050	}
1051
1052	dlp->dl_primitive = DL_CAPABILITY_ACK;
1053
1054	off = dlp->dl_sub_offset;
1055	len = dlp->dl_sub_length;
1056
1057	/*
1058	 * Walk the list of capabilities to be enabled.
1059	 */
1060	for (end = off + len; off < end; ) {
1061		sp = (dl_capability_sub_t *)(mp->b_rptr + off);
1062		size = sizeof (dl_capability_sub_t) + sp->dl_length;
1063
1064		if (off + size > end ||
1065		    !IS_P2ALIGNED(off, sizeof (uint32_t))) {
1066			dl_err = DL_BADPRIM;
1067			goto failed;
1068		}
1069
1070		switch (sp->dl_cap) {
1071		/*
1072		 * TCP/IP checksum offload to hardware.
1073		 */
1074		case DL_CAPAB_HCKSUM: {
1075			dl_capab_hcksum_t *hcksump;
1076			dl_capab_hcksum_t hcksum;
1077
1078			hcksump = (dl_capab_hcksum_t *)&sp[1];
1079			/*
1080			 * Copy for alignment.
1081			 */
1082			bcopy(hcksump, &hcksum, sizeof (dl_capab_hcksum_t));
1083			dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1084			bcopy(&hcksum, hcksump, sizeof (dl_capab_hcksum_t));
1085			break;
1086		}
1087
1088		case DL_CAPAB_DLD: {
1089			dl_capab_dld_t	*dldp;
1090			dl_capab_dld_t	dld;
1091
1092			dldp = (dl_capab_dld_t *)&sp[1];
1093			/*
1094			 * Copy for alignment.
1095			 */
1096			bcopy(dldp, &dld, sizeof (dl_capab_dld_t));
1097			dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1098			bcopy(&dld, dldp, sizeof (dl_capab_dld_t));
1099			break;
1100		}
1101		default:
1102			break;
1103		}
1104		off += size;
1105	}
1106	qreply(q, mp);
1107	return;
1108failed:
1109	dlerrorack(q, mp, DL_CAPABILITY_REQ, dl_err, 0);
1110}
1111
1112/*
1113 * DL_NOTIFY_REQ
1114 */
1115static void
1116proto_notify_req(dld_str_t *dsp, mblk_t *mp)
1117{
1118	dl_notify_req_t	*dlp = (dl_notify_req_t *)mp->b_rptr;
1119	t_uscalar_t	dl_err;
1120	queue_t		*q = dsp->ds_wq;
1121	uint_t		note =
1122	    DL_NOTE_PROMISC_ON_PHYS |
1123	    DL_NOTE_PROMISC_OFF_PHYS |
1124	    DL_NOTE_PHYS_ADDR |
1125	    DL_NOTE_LINK_UP |
1126	    DL_NOTE_LINK_DOWN |
1127	    DL_NOTE_CAPAB_RENEG |
1128	    DL_NOTE_FASTPATH_FLUSH |
1129	    DL_NOTE_SPEED |
1130	    DL_NOTE_SDU_SIZE;
1131
1132	if (MBLKL(mp) < sizeof (dl_notify_req_t)) {
1133		dl_err = DL_BADPRIM;
1134		goto failed;
1135	}
1136
1137	if (dsp->ds_dlstate == DL_UNATTACHED ||
1138	    DL_ACK_PENDING(dsp->ds_dlstate)) {
1139		dl_err = DL_OUTSTATE;
1140		goto failed;
1141	}
1142
1143	note &= ~(mac_no_notification(dsp->ds_mh));
1144
1145	/*
1146	 * Cache the notifications that are being enabled.
1147	 */
1148	dsp->ds_notifications = dlp->dl_notifications & note;
1149	/*
1150	 * The ACK carries all notifications regardless of which set is
1151	 * being enabled.
1152	 */
1153	dlnotifyack(q, mp, note);
1154
1155	/*
1156	 * Generate DL_NOTIFY_IND messages for each enabled notification.
1157	 */
1158	if (dsp->ds_notifications != 0) {
1159		dld_str_notify_ind(dsp);
1160	}
1161	return;
1162failed:
1163	dlerrorack(q, mp, DL_NOTIFY_REQ, dl_err, 0);
1164}
1165
1166/*
1167 * DL_UINTDATA_REQ
1168 */
1169void
1170proto_unitdata_req(dld_str_t *dsp, mblk_t *mp)
1171{
1172	queue_t			*q = dsp->ds_wq;
1173	dl_unitdata_req_t	*dlp = (dl_unitdata_req_t *)mp->b_rptr;
1174	off_t			off;
1175	size_t			len, size;
1176	const uint8_t		*addr;
1177	uint16_t		sap;
1178	uint_t			addr_length;
1179	mblk_t			*bp, *payload;
1180	uint32_t		start, stuff, end, value, flags;
1181	t_uscalar_t		dl_err;
1182	uint_t			max_sdu;
1183
1184	if (MBLKL(mp) < sizeof (dl_unitdata_req_t) || mp->b_cont == NULL) {
1185		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_BADPRIM, 0);
1186		return;
1187	}
1188
1189	mutex_enter(&dsp->ds_lock);
1190	if (dsp->ds_dlstate != DL_IDLE) {
1191		mutex_exit(&dsp->ds_lock);
1192		dlerrorack(q, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0);
1193		return;
1194	}
1195	DLD_DATATHR_INC(dsp);
1196	mutex_exit(&dsp->ds_lock);
1197
1198	addr_length = dsp->ds_mip->mi_addr_length;
1199
1200	off = dlp->dl_dest_addr_offset;
1201	len = dlp->dl_dest_addr_length;
1202
1203	if (!MBLKIN(mp, off, len) || !IS_P2ALIGNED(off, sizeof (uint16_t))) {
1204		dl_err = DL_BADPRIM;
1205		goto failed;
1206	}
1207
1208	if (len != addr_length + sizeof (uint16_t)) {
1209		dl_err = DL_BADADDR;
1210		goto failed;
1211	}
1212
1213	addr = mp->b_rptr + off;
1214	sap = *(uint16_t *)(mp->b_rptr + off + addr_length);
1215
1216	/*
1217	 * Check the length of the packet and the block types.
1218	 */
1219	size = 0;
1220	payload = mp->b_cont;
1221	for (bp = payload; bp != NULL; bp = bp->b_cont) {
1222		if (DB_TYPE(bp) != M_DATA)
1223			goto baddata;
1224
1225		size += MBLKL(bp);
1226	}
1227
1228	mac_sdu_get(dsp->ds_mh, NULL, &max_sdu);
1229	if (size > max_sdu)
1230		goto baddata;
1231
1232	/*
1233	 * Build a packet header.
1234	 */
1235	if ((bp = dls_header(dsp, addr, sap, dlp->dl_priority.dl_max,
1236	    &payload)) == NULL) {
1237		dl_err = DL_BADADDR;
1238		goto failed;
1239	}
1240
1241	/*
1242	 * We no longer need the M_PROTO header, so free it.
1243	 */
1244	freeb(mp);
1245
1246	/*
1247	 * Transfer the checksum offload information if it is present.
1248	 */
1249	hcksum_retrieve(payload, NULL, NULL, &start, &stuff, &end, &value,
1250	    &flags);
1251	(void) hcksum_assoc(bp, NULL, NULL, start, stuff, end, value, flags, 0);
1252
1253	/*
1254	 * Link the payload onto the new header.
1255	 */
1256	ASSERT(bp->b_cont == NULL);
1257	bp->b_cont = payload;
1258
1259	/*
1260	 * No lock can be held across modules and putnext()'s,
1261	 * which can happen here with the call from DLD_TX().
1262	 */
1263	if (DLD_TX(dsp, bp, 0, 0) != NULL) {
1264		/* flow-controlled */
1265		DLD_SETQFULL(dsp);
1266	}
1267	DLD_DATATHR_DCR(dsp);
1268	return;
1269
1270failed:
1271	dlerrorack(q, mp, DL_UNITDATA_REQ, dl_err, 0);
1272	DLD_DATATHR_DCR(dsp);
1273	return;
1274
1275baddata:
1276	dluderrorind(q, mp, (void *)addr, len, DL_BADDATA, 0);
1277	DLD_DATATHR_DCR(dsp);
1278}
1279
1280/*
1281 * DL_PASSIVE_REQ
1282 */
1283static void
1284proto_passive_req(dld_str_t *dsp, mblk_t *mp)
1285{
1286	t_uscalar_t dl_err;
1287
1288	/*
1289	 * If we've already become active by issuing an active primitive,
1290	 * then it's too late to try to become passive.
1291	 */
1292	if (dsp->ds_passivestate == DLD_ACTIVE) {
1293		dl_err = DL_OUTSTATE;
1294		goto failed;
1295	}
1296
1297	if (MBLKL(mp) < sizeof (dl_passive_req_t)) {
1298		dl_err = DL_BADPRIM;
1299		goto failed;
1300	}
1301
1302	dsp->ds_passivestate = DLD_PASSIVE;
1303	dlokack(dsp->ds_wq, mp, DL_PASSIVE_REQ);
1304	return;
1305failed:
1306	dlerrorack(dsp->ds_wq, mp, DL_PASSIVE_REQ, dl_err, 0);
1307}
1308
1309
1310/*
1311 * Catch-all handler.
1312 */
1313static void
1314proto_req(dld_str_t *dsp, mblk_t *mp)
1315{
1316	union DL_primitives	*dlp = (union DL_primitives *)mp->b_rptr;
1317
1318	dlerrorack(dsp->ds_wq, mp, dlp->dl_primitive, DL_UNSUPPORTED, 0);
1319}
1320
1321static int
1322dld_capab_perim(dld_str_t *dsp, void *data, uint_t flags)
1323{
1324	switch (flags) {
1325	case DLD_ENABLE:
1326		mac_perim_enter_by_mh(dsp->ds_mh, (mac_perim_handle_t *)data);
1327		return (0);
1328
1329	case DLD_DISABLE:
1330		mac_perim_exit((mac_perim_handle_t)data);
1331		return (0);
1332
1333	case DLD_QUERY:
1334		return (mac_perim_held(dsp->ds_mh));
1335	}
1336	return (0);
1337}
1338
1339static int
1340dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
1341{
1342	dld_capab_direct_t	*direct = data;
1343
1344	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1345
1346	switch (flags) {
1347	case DLD_ENABLE:
1348		dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
1349		    direct->di_rx_ch);
1350
1351		direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
1352		direct->di_tx_dh = dsp;
1353		direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
1354		direct->di_tx_cb_dh = dsp->ds_mch;
1355		direct->di_tx_fctl_df = (uintptr_t)mac_tx_is_flow_blocked;
1356		direct->di_tx_fctl_dh = dsp->ds_mch;
1357
1358		dsp->ds_direct = B_TRUE;
1359
1360		return (0);
1361
1362	case DLD_DISABLE:
1363		dls_rx_set(dsp, (dsp->ds_mode == DLD_FASTPATH) ?
1364		    dld_str_rx_fastpath : dld_str_rx_unitdata, (void *)dsp);
1365		dsp->ds_direct = B_FALSE;
1366
1367		return (0);
1368	}
1369	return (ENOTSUP);
1370}
1371
1372/*
1373 * dld_capab_poll_enable()
1374 *
1375 * This function is misnamed. All polling  and fanouts are run out of the
1376 * lower mac (in case of VNIC and the only mac in case of NICs). The
1377 * availability of Rx ring and promiscous mode is all taken care between
1378 * the soft ring set (mac_srs), the Rx ring, and S/W classifier. Any
1379 * fanout necessary is done by the soft rings that are part of the
1380 * mac_srs (by default mac_srs sends the packets up via a TCP and
1381 * non TCP soft ring).
1382 *
1383 * The mac_srs (or its associated soft rings) always store the ill_rx_ring
1384 * (the cookie returned when they registered with IP during plumb) as their
1385 * 2nd argument which is passed up as mac_resource_handle_t. The upcall
1386 * function and 1st argument is what the caller registered when they
1387 * called mac_rx_classify_flow_add() to register the flow. For VNIC,
1388 * the function is vnic_rx and argument is vnic_t. For regular NIC
1389 * case, it mac_rx_default and mac_handle_t. As explained above, the
1390 * mac_srs (or its soft ring) will add the ill_rx_ring (mac_resource_handle_t)
1391 * from its stored 2nd argument.
1392 */
1393static int
1394dld_capab_poll_enable(dld_str_t *dsp, dld_capab_poll_t *poll)
1395{
1396	if (dsp->ds_polling)
1397		return (EINVAL);
1398
1399	if ((dld_opt & DLD_OPT_NO_POLL) != 0 || dsp->ds_mode == DLD_RAW)
1400		return (ENOTSUP);
1401
1402	/*
1403	 * Enable client polling if and only if DLS bypass is possible.
1404	 * Special cases like VLANs need DLS processing in the Rx data path.
1405	 * In such a case we can neither allow the client (IP) to directly
1406	 * poll the softring (since DLS processing hasn't been done) nor can
1407	 * we allow DLS bypass.
1408	 */
1409	if (!mac_rx_bypass_set(dsp->ds_mch, dsp->ds_rx, dsp->ds_rx_arg))
1410		return (ENOTSUP);
1411
1412	/*
1413	 * Register soft ring resources. This will come in handy later if
1414	 * the user decides to modify CPU bindings to use more CPUs for the
1415	 * device in which case we will switch to fanout using soft rings.
1416	 */
1417	mac_resource_set_common(dsp->ds_mch,
1418	    (mac_resource_add_t)poll->poll_ring_add_cf,
1419	    (mac_resource_remove_t)poll->poll_ring_remove_cf,
1420	    (mac_resource_quiesce_t)poll->poll_ring_quiesce_cf,
1421	    (mac_resource_restart_t)poll->poll_ring_restart_cf,
1422	    (mac_resource_bind_t)poll->poll_ring_bind_cf,
1423	    poll->poll_ring_ch);
1424
1425	mac_client_poll_enable(dsp->ds_mch);
1426
1427	dsp->ds_polling = B_TRUE;
1428	return (0);
1429}
1430
1431/* ARGSUSED */
1432static int
1433dld_capab_poll_disable(dld_str_t *dsp, dld_capab_poll_t *poll)
1434{
1435	if (!dsp->ds_polling)
1436		return (EINVAL);
1437
1438	mac_client_poll_disable(dsp->ds_mch);
1439	mac_resource_set(dsp->ds_mch, NULL, NULL);
1440
1441	dsp->ds_polling = B_FALSE;
1442	return (0);
1443}
1444
1445static int
1446dld_capab_poll(dld_str_t *dsp, void *data, uint_t flags)
1447{
1448	dld_capab_poll_t	*poll = data;
1449
1450	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1451
1452	switch (flags) {
1453	case DLD_ENABLE:
1454		return (dld_capab_poll_enable(dsp, poll));
1455	case DLD_DISABLE:
1456		return (dld_capab_poll_disable(dsp, poll));
1457	}
1458	return (ENOTSUP);
1459}
1460
1461static int
1462dld_capab_lso(dld_str_t *dsp, void *data, uint_t flags)
1463{
1464	dld_capab_lso_t		*lso = data;
1465
1466	ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
1467
1468	switch (flags) {
1469	case DLD_ENABLE: {
1470		mac_capab_lso_t		mac_lso;
1471
1472		/*
1473		 * Check if LSO is supported on this MAC & enable LSO
1474		 * accordingly.
1475		 */
1476		if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_LSO, &mac_lso)) {
1477			lso->lso_max = mac_lso.lso_basic_tcp_ipv4.lso_max;
1478			lso->lso_flags = 0;
1479			/* translate the flag for mac clients */
1480			if ((mac_lso.lso_flags & LSO_TX_BASIC_TCP_IPV4) != 0)
1481				lso->lso_flags |= DLD_LSO_BASIC_TCP_IPV4;
1482			dsp->ds_lso = B_TRUE;
1483			dsp->ds_lso_max = lso->lso_max;
1484		} else {
1485			dsp->ds_lso = B_FALSE;
1486			dsp->ds_lso_max = 0;
1487			return (ENOTSUP);
1488		}
1489		return (0);
1490	}
1491	case DLD_DISABLE: {
1492		dsp->ds_lso = B_FALSE;
1493		dsp->ds_lso_max = 0;
1494		return (0);
1495	}
1496	}
1497	return (ENOTSUP);
1498}
1499
1500static int
1501dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
1502{
1503	int	err;
1504
1505	/*
1506	 * Don't enable direct callback capabilities unless the caller is
1507	 * the IP client. When a module is inserted in a stream (_I_INSERT)
1508	 * the stack initiates capability disable, but due to races, the
1509	 * module insertion may complete before the capability disable
1510	 * completes. So we limit the check to DLD_ENABLE case.
1511	 */
1512	if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
1513	    (dsp->ds_sap != ETHERTYPE_IP ||
1514	    !check_mod_above(dsp->ds_rq, "ip"))) {
1515		return (ENOTSUP);
1516	}
1517
1518	switch (type) {
1519	case DLD_CAPAB_DIRECT:
1520		err = dld_capab_direct(dsp, data, flags);
1521		break;
1522
1523	case DLD_CAPAB_POLL:
1524		err =  dld_capab_poll(dsp, data, flags);
1525		break;
1526
1527	case DLD_CAPAB_PERIM:
1528		err = dld_capab_perim(dsp, data, flags);
1529		break;
1530
1531	case DLD_CAPAB_LSO:
1532		err = dld_capab_lso(dsp, data, flags);
1533		break;
1534
1535	default:
1536		err = ENOTSUP;
1537		break;
1538	}
1539
1540	return (err);
1541}
1542
1543/*
1544 * DL_CAPABILITY_ACK/DL_ERROR_ACK
1545 */
1546static void
1547proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
1548{
1549	dl_capability_ack_t	*dlap;
1550	dl_capability_sub_t	*dlsp;
1551	size_t			subsize;
1552	dl_capab_dld_t		dld;
1553	dl_capab_hcksum_t	hcksum;
1554	dl_capab_zerocopy_t	zcopy;
1555	dl_capab_vrrp_t		vrrp;
1556	mac_capab_vrrp_t	vrrp_capab;
1557	uint8_t			*ptr;
1558	queue_t			*q = dsp->ds_wq;
1559	mblk_t			*mp1;
1560	boolean_t		hcksum_capable = B_FALSE;
1561	boolean_t		zcopy_capable = B_FALSE;
1562	boolean_t		dld_capable = B_FALSE;
1563	boolean_t		vrrp_capable = B_FALSE;
1564
1565	/*
1566	 * Initially assume no capabilities.
1567	 */
1568	subsize = 0;
1569
1570	/*
1571	 * Check if checksum offload is supported on this MAC.
1572	 */
1573	bzero(&hcksum, sizeof (dl_capab_hcksum_t));
1574	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_HCKSUM,
1575	    &hcksum.hcksum_txflags)) {
1576		if (hcksum.hcksum_txflags != 0) {
1577			hcksum_capable = B_TRUE;
1578			subsize += sizeof (dl_capability_sub_t) +
1579			    sizeof (dl_capab_hcksum_t);
1580		}
1581	}
1582
1583	/*
1584	 * Check if zerocopy is supported on this interface.
1585	 * If advertising DL_CAPAB_ZEROCOPY has not been explicitly disabled
1586	 * then reserve space for that capability.
1587	 */
1588	if (!mac_capab_get(dsp->ds_mh, MAC_CAPAB_NO_ZCOPY, NULL) &&
1589	    !(dld_opt & DLD_OPT_NO_ZEROCOPY)) {
1590		zcopy_capable = B_TRUE;
1591		subsize += sizeof (dl_capability_sub_t) +
1592		    sizeof (dl_capab_zerocopy_t);
1593	}
1594
1595	/*
1596	 * Direct capability negotiation interface between IP and DLD
1597	 */
1598	if (dsp->ds_sap == ETHERTYPE_IP && check_mod_above(dsp->ds_rq, "ip")) {
1599		dld_capable = B_TRUE;
1600		subsize += sizeof (dl_capability_sub_t) +
1601		    sizeof (dl_capab_dld_t);
1602	}
1603
1604	/*
1605	 * Check if vrrp is supported on this interface. If so, reserve
1606	 * space for that capability.
1607	 */
1608	if (mac_capab_get(dsp->ds_mh, MAC_CAPAB_VRRP, &vrrp_capab)) {
1609		vrrp_capable = B_TRUE;
1610		subsize += sizeof (dl_capability_sub_t) +
1611		    sizeof (dl_capab_vrrp_t);
1612	}
1613
1614	/*
1615	 * If there are no capabilities to advertise or if we
1616	 * can't allocate a response, send a DL_ERROR_ACK.
1617	 */
1618	if ((mp1 = reallocb(mp,
1619	    sizeof (dl_capability_ack_t) + subsize, 0)) == NULL) {
1620		dlerrorack(q, mp, DL_CAPABILITY_REQ, DL_NOTSUPPORTED, 0);
1621		return;
1622	}
1623
1624	mp = mp1;
1625	DB_TYPE(mp) = M_PROTO;
1626	mp->b_wptr = mp->b_rptr + sizeof (dl_capability_ack_t) + subsize;
1627	bzero(mp->b_rptr, MBLKL(mp));
1628	dlap = (dl_capability_ack_t *)mp->b_rptr;
1629	dlap->dl_primitive = DL_CAPABILITY_ACK;
1630	dlap->dl_sub_offset = sizeof (dl_capability_ack_t);
1631	dlap->dl_sub_length = subsize;
1632	ptr = (uint8_t *)&dlap[1];
1633
1634	/*
1635	 * TCP/IP checksum offload.
1636	 */
1637	if (hcksum_capable) {
1638		dlsp = (dl_capability_sub_t *)ptr;
1639
1640		dlsp->dl_cap = DL_CAPAB_HCKSUM;
1641		dlsp->dl_length = sizeof (dl_capab_hcksum_t);
1642		ptr += sizeof (dl_capability_sub_t);
1643
1644		hcksum.hcksum_version = HCKSUM_VERSION_1;
1645		dlcapabsetqid(&(hcksum.hcksum_mid), dsp->ds_rq);
1646		bcopy(&hcksum, ptr, sizeof (dl_capab_hcksum_t));
1647		ptr += sizeof (dl_capab_hcksum_t);
1648	}
1649
1650	/*
1651	 * Zero copy
1652	 */
1653	if (zcopy_capable) {
1654		dlsp = (dl_capability_sub_t *)ptr;
1655
1656		dlsp->dl_cap = DL_CAPAB_ZEROCOPY;
1657		dlsp->dl_length = sizeof (dl_capab_zerocopy_t);
1658		ptr += sizeof (dl_capability_sub_t);
1659
1660		bzero(&zcopy, sizeof (dl_capab_zerocopy_t));
1661		zcopy.zerocopy_version = ZEROCOPY_VERSION_1;
1662		zcopy.zerocopy_flags = DL_CAPAB_VMSAFE_MEM;
1663
1664		dlcapabsetqid(&(zcopy.zerocopy_mid), dsp->ds_rq);
1665		bcopy(&zcopy, ptr, sizeof (dl_capab_zerocopy_t));
1666		ptr += sizeof (dl_capab_zerocopy_t);
1667	}
1668
1669	/*
1670	 * VRRP capability negotiation
1671	 */
1672	if (vrrp_capable) {
1673		dlsp = (dl_capability_sub_t *)ptr;
1674		dlsp->dl_cap = DL_CAPAB_VRRP;
1675		dlsp->dl_length = sizeof (dl_capab_vrrp_t);
1676		ptr += sizeof (dl_capability_sub_t);
1677
1678		bzero(&vrrp, sizeof (dl_capab_vrrp_t));
1679		vrrp.vrrp_af = vrrp_capab.mcv_af;
1680		bcopy(&vrrp, ptr, sizeof (dl_capab_vrrp_t));
1681		ptr += sizeof (dl_capab_vrrp_t);
1682	}
1683
1684	/*
1685	 * Direct capability negotiation interface between IP and DLD.
1686	 * Refer to dld.h for details.
1687	 */
1688	if (dld_capable) {
1689		dlsp = (dl_capability_sub_t *)ptr;
1690		dlsp->dl_cap = DL_CAPAB_DLD;
1691		dlsp->dl_length = sizeof (dl_capab_dld_t);
1692		ptr += sizeof (dl_capability_sub_t);
1693
1694		bzero(&dld, sizeof (dl_capab_dld_t));
1695		dld.dld_version = DLD_CURRENT_VERSION;
1696		dld.dld_capab = (uintptr_t)dld_capab;
1697		dld.dld_capab_handle = (uintptr_t)dsp;
1698
1699		dlcapabsetqid(&(dld.dld_mid), dsp->ds_rq);
1700		bcopy(&dld, ptr, sizeof (dl_capab_dld_t));
1701		ptr += sizeof (dl_capab_dld_t);
1702	}
1703
1704	ASSERT(ptr == mp->b_rptr + sizeof (dl_capability_ack_t) + subsize);
1705	qreply(q, mp);
1706}
1707
1708/*
1709 * Disable any enabled capabilities.
1710 */
1711void
1712dld_capabilities_disable(dld_str_t *dsp)
1713{
1714	if (dsp->ds_polling)
1715		(void) dld_capab_poll_disable(dsp, NULL);
1716}
1717