1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/types.h>
26#include <sys/cred.h>
27#include <sys/sysmacros.h>
28#include <sys/conf.h>
29#include <sys/cmn_err.h>
30#include <sys/list.h>
31#include <sys/ksynch.h>
32#include <sys/kmem.h>
33#include <sys/stream.h>
34#include <sys/modctl.h>
35#include <sys/ddi.h>
36#include <sys/sunddi.h>
37#include <sys/atomic.h>
38#include <sys/stat.h>
39#include <sys/modhash.h>
40#include <sys/strsubr.h>
41#include <sys/strsun.h>
42#include <sys/dlpi.h>
43#include <sys/mac.h>
44#include <sys/mac_provider.h>
45#include <sys/mac_client.h>
46#include <sys/mac_client_priv.h>
47#include <sys/mac_ether.h>
48#include <sys/dls.h>
49#include <sys/pattr.h>
50#include <sys/time.h>
51#include <sys/vlan.h>
52#include <sys/vnic.h>
53#include <sys/vnic_impl.h>
54#include <sys/mac_flow_impl.h>
55#include <inet/ip_impl.h>
56
57/*
58 * Note that for best performance, the VNIC is a passthrough design.
59 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
60 * This MAC client is opened by the VNIC driver at VNIC creation,
61 * and closed when the VNIC is deleted.
62 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
63 * (upper MAC) detects that the MAC being opened is a VNIC. Instead
64 * of allocating a new MAC client, it asks the VNIC driver to return
65 * the lower MAC client handle associated with the VNIC, and that handle
66 * is returned to the upper MAC client directly. This allows access
67 * by upper MAC clients of the VNIC to have direct access to the lower
68 * MAC client for the control path and data path.
69 *
70 * Due to this passthrough, some of the entry points exported by the
71 * VNIC driver are never directly invoked. These entry points include
72 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
73 */
74
75static int vnic_m_start(void *);
76static void vnic_m_stop(void *);
77static int vnic_m_promisc(void *, boolean_t);
78static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
79static int vnic_m_unicst(void *, const uint8_t *);
80static int vnic_m_stat(void *, uint_t, uint64_t *);
81static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
82static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
83    const void *);
84static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
85    mac_prop_info_handle_t);
86static mblk_t *vnic_m_tx(void *, mblk_t *);
87static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
88static void vnic_notify_cb(void *, mac_notify_type_t);
89
90static kmem_cache_t	*vnic_cache;
91static krwlock_t	vnic_lock;
92static uint_t		vnic_count;
93
94#define	ANCHOR_VNIC_MIN_MTU	576
95#define	ANCHOR_VNIC_MAX_MTU	9000
96
97/* hash of VNICs (vnic_t's), keyed by VNIC id */
98static mod_hash_t	*vnic_hash;
99#define	VNIC_HASHSZ	64
100#define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)
101
102#define	VNIC_M_CALLBACK_FLAGS	\
103	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
104
105static mac_callbacks_t vnic_m_callbacks = {
106	VNIC_M_CALLBACK_FLAGS,
107	vnic_m_stat,
108	vnic_m_start,
109	vnic_m_stop,
110	vnic_m_promisc,
111	vnic_m_multicst,
112	vnic_m_unicst,
113	vnic_m_tx,
114	NULL,
115	vnic_m_ioctl,
116	vnic_m_capab_get,
117	NULL,
118	NULL,
119	vnic_m_setprop,
120	NULL,
121	vnic_m_propinfo
122};
123
124void
125vnic_dev_init(void)
126{
127	vnic_cache = kmem_cache_create("vnic_cache",
128	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
129
130	vnic_hash = mod_hash_create_idhash("vnic_hash",
131	    VNIC_HASHSZ, mod_hash_null_valdtor);
132
133	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
134
135	vnic_count = 0;
136}
137
138void
139vnic_dev_fini(void)
140{
141	ASSERT(vnic_count == 0);
142
143	rw_destroy(&vnic_lock);
144	mod_hash_destroy_idhash(vnic_hash);
145	kmem_cache_destroy(vnic_cache);
146}
147
148uint_t
149vnic_dev_count(void)
150{
151	return (vnic_count);
152}
153
154static vnic_ioc_diag_t
155vnic_mac2vnic_diag(mac_diag_t diag)
156{
157	switch (diag) {
158	case MAC_DIAG_MACADDR_NIC:
159		return (VNIC_IOC_DIAG_MACADDR_NIC);
160	case MAC_DIAG_MACADDR_INUSE:
161		return (VNIC_IOC_DIAG_MACADDR_INUSE);
162	case MAC_DIAG_MACADDR_INVALID:
163		return (VNIC_IOC_DIAG_MACADDR_INVALID);
164	case MAC_DIAG_MACADDRLEN_INVALID:
165		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
166	case MAC_DIAG_MACFACTORYSLOTINVALID:
167		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
168	case MAC_DIAG_MACFACTORYSLOTUSED:
169		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
170	case MAC_DIAG_MACFACTORYSLOTALLUSED:
171		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
172	case MAC_DIAG_MACFACTORYNOTSUP:
173		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
174	case MAC_DIAG_MACPREFIX_INVALID:
175		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
176	case MAC_DIAG_MACPREFIXLEN_INVALID:
177		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
178	case MAC_DIAG_MACNO_HWRINGS:
179		return (VNIC_IOC_DIAG_NO_HWRINGS);
180	default:
181		return (VNIC_IOC_DIAG_NONE);
182	}
183}
184
185static int
186vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
187    int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
188    uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
189    uint16_t vid, boolean_t req_hwgrp_flag)
190{
191	mac_diag_t mac_diag;
192	uint16_t mac_flags = 0;
193	int err;
194	uint_t addr_len;
195
196	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
197		mac_flags |= MAC_UNICAST_NODUPCHECK;
198
199	switch (vnic_addr_type) {
200	case VNIC_MAC_ADDR_TYPE_FIXED:
201	case VNIC_MAC_ADDR_TYPE_VRID:
202		/*
203		 * The MAC address value to assign to the VNIC
204		 * is already provided in mac_addr_arg. addr_len_ptr_arg
205		 * already contains the MAC address length.
206		 */
207		break;
208
209	case VNIC_MAC_ADDR_TYPE_RANDOM:
210		/*
211		 * Random MAC address. There are two sub-cases:
212		 *
213		 * 1 - If mac_len == 0, a new MAC address is generated.
214		 *	The length of the MAC address to generated depends
215		 *	on the type of MAC used. The prefix to use for the MAC
216		 *	address is stored in the most significant bytes
217		 *	of the mac_addr argument, and its length is specified
218		 *	by the mac_prefix_len argument. This prefix can
219		 *	correspond to a IEEE OUI in the case of Ethernet,
220		 *	for example.
221		 *
222		 * 2 - If mac_len > 0, the address was already picked
223		 *	randomly, and is now passed back during VNIC
224		 *	re-creation. The mac_addr argument contains the MAC
225		 *	address that was generated. We distinguish this
226		 *	case from the fixed MAC address case, since we
227		 *	want the user consumers to know, when they query
228		 *	the list of VNICs, that a VNIC was assigned a
229		 *	random MAC address vs assigned a fixed address
230		 *	specified by the user.
231		 */
232
233		/*
234		 * If it's a pre-generated address, we're done. mac_addr_arg
235		 * and addr_len_ptr_arg already contain the MAC address
236		 * value and length.
237		 */
238		if (*addr_len_ptr_arg > 0)
239			break;
240
241		/* generate a new random MAC address */
242		if ((err = mac_addr_random(vnic->vn_mch,
243		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
244			*diag = vnic_mac2vnic_diag(mac_diag);
245			return (err);
246		}
247		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
248		break;
249
250	case VNIC_MAC_ADDR_TYPE_FACTORY:
251		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
252		if (err != 0) {
253			if (err == EINVAL)
254				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
255			if (err == EBUSY)
256				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
257			if (err == ENOSPC)
258				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
259			return (err);
260		}
261
262		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
263		    mac_addr_arg, &addr_len, NULL, NULL);
264		*addr_len_ptr_arg = addr_len;
265		break;
266
267	case VNIC_MAC_ADDR_TYPE_AUTO:
268		/* first try to allocate a factory MAC address */
269		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
270		if (err == 0) {
271			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
272			    mac_addr_arg, &addr_len, NULL, NULL);
273			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
274			*addr_len_ptr_arg = addr_len;
275			break;
276		}
277
278		/*
279		 * Allocating a factory MAC address failed, generate a
280		 * random MAC address instead.
281		 */
282		if ((err = mac_addr_random(vnic->vn_mch,
283		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
284			*diag = vnic_mac2vnic_diag(mac_diag);
285			return (err);
286		}
287		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
288		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
289		break;
290	case VNIC_MAC_ADDR_TYPE_PRIMARY:
291		/*
292		 * We get the address here since we copy it in the
293		 * vnic's vn_addr.
294		 * We can't ask for hardware resources since we
295		 * don't currently support hardware classification
296		 * for these MAC clients.
297		 */
298		if (req_hwgrp_flag) {
299			*diag = VNIC_IOC_DIAG_NO_HWRINGS;
300			return (ENOTSUP);
301		}
302		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
303		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
304		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
305		break;
306	}
307
308	vnic->vn_addr_type = vnic_addr_type;
309
310	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
311	    &vnic->vn_muh, vid, &mac_diag);
312	if (err != 0) {
313		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
314			/* release factory MAC address */
315			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
316		}
317		*diag = vnic_mac2vnic_diag(mac_diag);
318	}
319
320	return (err);
321}
322
323/*
324 * Create a new VNIC upon request from administrator.
325 * Returns 0 on success, an errno on failure.
326 */
327/* ARGSUSED */
328int
329vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
330    vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
331    int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
332    int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
333    cred_t *credp)
334{
335	vnic_t *vnic;
336	mac_register_t *mac;
337	int err;
338	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
339	char vnic_name[MAXNAMELEN];
340	const mac_info_t *minfop;
341	uint32_t req_hwgrp_flag = B_FALSE;
342
343	*diag = VNIC_IOC_DIAG_NONE;
344
345	rw_enter(&vnic_lock, RW_WRITER);
346
347	/* does a VNIC with the same id already exist? */
348	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
349	    (mod_hash_val_t *)&vnic);
350	if (err == 0) {
351		rw_exit(&vnic_lock);
352		return (EEXIST);
353	}
354
355	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
356	if (vnic == NULL) {
357		rw_exit(&vnic_lock);
358		return (ENOMEM);
359	}
360
361	bzero(vnic, sizeof (*vnic));
362
363	vnic->vn_id = vnic_id;
364	vnic->vn_link_id = linkid;
365	vnic->vn_vrid = vrid;
366	vnic->vn_af = af;
367
368	if (!is_anchor) {
369		if (linkid == DATALINK_INVALID_LINKID) {
370			err = EINVAL;
371			goto bail;
372		}
373
374		/*
375		 * Open the lower MAC and assign its initial bandwidth and
376		 * MAC address. We do this here during VNIC creation and
377		 * do not wait until the upper MAC client open so that we
378		 * can validate the VNIC creation parameters (bandwidth,
379		 * MAC address, etc) and reserve a factory MAC address if
380		 * one was requested.
381		 */
382		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
383		if (err != 0)
384			goto bail;
385
386		/*
387		 * VNIC(vlan) over VNICs(vlans) is not supported.
388		 */
389		if (mac_is_vnic(vnic->vn_lower_mh)) {
390			err = EINVAL;
391			goto bail;
392		}
393
394		/* only ethernet support for now */
395		minfop = mac_info(vnic->vn_lower_mh);
396		if (minfop->mi_nativemedia != DL_ETHER) {
397			err = ENOTSUP;
398			goto bail;
399		}
400
401		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
402		    NULL);
403		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
404		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
405		if (err != 0)
406			goto bail;
407
408		if (mrp != NULL) {
409			if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
410			    (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
411				req_hwgrp_flag = B_TRUE;
412			}
413			err = mac_client_set_resources(vnic->vn_mch, mrp);
414			if (err != 0)
415				goto bail;
416		}
417		/* assign a MAC address to the VNIC */
418
419		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
420		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
421		    req_hwgrp_flag);
422		if (err != 0) {
423			vnic->vn_muh = NULL;
424			if (diag != NULL && req_hwgrp_flag)
425				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
426			goto bail;
427		}
428
429		/* register to receive notification from underlying MAC */
430		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
431		    vnic);
432
433		*vnic_addr_type = vnic->vn_addr_type;
434		vnic->vn_addr_len = *mac_len;
435		vnic->vn_vid = vid;
436
437		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
438
439		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
440			vnic->vn_slot_id = *mac_slot;
441
442		/*
443		 * Set the initial VNIC capabilities. If the VNIC is created
444		 * over MACs which does not support nactive vlan, disable
445		 * VNIC's hardware checksum capability if its VID is not 0,
446		 * since the underlying MAC would get the hardware checksum
447		 * offset wrong in case of VLAN packets.
448		 */
449		if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
450		    MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
451			if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
452			    &vnic->vn_hcksum_txflags))
453				vnic->vn_hcksum_txflags = 0;
454		} else {
455			vnic->vn_hcksum_txflags = 0;
456		}
457	}
458
459	/* register with the MAC module */
460	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
461		goto bail;
462
463	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
464	mac->m_driver = vnic;
465	mac->m_dip = vnic_get_dip();
466	mac->m_instance = (uint_t)-1;
467	mac->m_src_addr = vnic->vn_addr;
468	mac->m_callbacks = &vnic_m_callbacks;
469
470	if (!is_anchor) {
471		/*
472		 * If this is a VNIC based VLAN, then we check for the
473		 * margin unless it has been created with the force
474		 * flag. If we are configuring a VLAN over an etherstub,
475		 * we don't check the margin even if force is not set.
476		 */
477		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
478			if (vid != VLAN_ID_NONE)
479				vnic->vn_force = B_TRUE;
480			/*
481			 * As the current margin size of the underlying mac is
482			 * used to determine the margin size of the VNIC
483			 * itself, request the underlying mac not to change
484			 * to a smaller margin size.
485			 */
486			err = mac_margin_add(vnic->vn_lower_mh,
487			    &vnic->vn_margin, B_TRUE);
488			ASSERT(err == 0);
489		} else {
490			vnic->vn_margin = VLAN_TAGSZ;
491			err = mac_margin_add(vnic->vn_lower_mh,
492			    &vnic->vn_margin, B_FALSE);
493			if (err != 0) {
494				mac_free(mac);
495				if (diag != NULL)
496					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
497				goto bail;
498			}
499		}
500
501		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
502		    &mac->m_max_sdu);
503	} else {
504		vnic->vn_margin = VLAN_TAGSZ;
505		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
506		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
507	}
508
509	mac->m_margin = vnic->vn_margin;
510
511	err = mac_register(mac, &vnic->vn_mh);
512	mac_free(mac);
513	if (err != 0) {
514		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
515		    vnic->vn_margin) == 0);
516		goto bail;
517	}
518
519	/* Set the VNIC's MAC in the client */
520	if (!is_anchor)
521		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
522
523	err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
524	if (err != 0) {
525		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
526		    vnic->vn_margin) == 0);
527		(void) mac_unregister(vnic->vn_mh);
528		goto bail;
529	}
530
531	/* add new VNIC to hash table */
532	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
533	    (mod_hash_val_t)vnic);
534	ASSERT(err == 0);
535	vnic_count++;
536
537	vnic->vn_enabled = B_TRUE;
538	rw_exit(&vnic_lock);
539
540	return (0);
541
542bail:
543	rw_exit(&vnic_lock);
544	if (!is_anchor) {
545		if (vnic->vn_mnh != NULL)
546			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
547		if (vnic->vn_muh != NULL)
548			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
549		if (vnic->vn_mch != NULL)
550			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
551		if (vnic->vn_lower_mh != NULL)
552			mac_close(vnic->vn_lower_mh);
553	}
554
555	kmem_cache_free(vnic_cache, vnic);
556	return (err);
557}
558
559/*
560 * Modify the properties of an existing VNIC.
561 */
562/* ARGSUSED */
563int
564vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
565    vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
566    uint_t mac_slot, mac_resource_props_t *mrp)
567{
568	vnic_t *vnic = NULL;
569
570	rw_enter(&vnic_lock, RW_WRITER);
571
572	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
573	    (mod_hash_val_t *)&vnic) != 0) {
574		rw_exit(&vnic_lock);
575		return (ENOENT);
576	}
577
578	rw_exit(&vnic_lock);
579
580	return (0);
581}
582
583/* ARGSUSED */
584int
585vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
586{
587	vnic_t *vnic = NULL;
588	mod_hash_val_t val;
589	datalink_id_t tmpid;
590	int rc;
591
592	rw_enter(&vnic_lock, RW_WRITER);
593
594	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
595	    (mod_hash_val_t *)&vnic) != 0) {
596		rw_exit(&vnic_lock);
597		return (ENOENT);
598	}
599
600	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
601		rw_exit(&vnic_lock);
602		return (rc);
603	}
604
605	ASSERT(vnic_id == tmpid);
606
607	/*
608	 * We cannot unregister the MAC yet. Unregistering would
609	 * free up mac_impl_t which should not happen at this time.
610	 * So disable mac_impl_t by calling mac_disable(). This will prevent
611	 * any new claims on mac_impl_t.
612	 */
613	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
614		(void) dls_devnet_create(vnic->vn_mh, vnic_id,
615		    crgetzoneid(credp));
616		rw_exit(&vnic_lock);
617		return (rc);
618	}
619
620	vnic->vn_enabled = B_FALSE;
621	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
622	ASSERT(vnic == (vnic_t *)val);
623	vnic_count--;
624	rw_exit(&vnic_lock);
625
626	/*
627	 * XXX-nicolas shouldn't have a void cast here, if it's
628	 * expected that the function will never fail, then we should
629	 * have an ASSERT().
630	 */
631	(void) mac_unregister(vnic->vn_mh);
632
633	if (vnic->vn_lower_mh != NULL) {
634		/*
635		 * Check if MAC address for the vnic was obtained from the
636		 * factory MAC addresses. If yes, release it.
637		 */
638		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
639			(void) mac_addr_factory_release(vnic->vn_mch,
640			    vnic->vn_slot_id);
641		}
642		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
643		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
644		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
645		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
646		mac_close(vnic->vn_lower_mh);
647	}
648
649	kmem_cache_free(vnic_cache, vnic);
650	return (0);
651}
652
653/* ARGSUSED */
654mblk_t *
655vnic_m_tx(void *arg, mblk_t *mp_chain)
656{
657	/*
658	 * This function could be invoked for an anchor VNIC when sending
659	 * broadcast and multicast packets, and unicast packets which did
660	 * not match any local known destination.
661	 */
662	freemsgchain(mp_chain);
663	return (NULL);
664}
665
666/*ARGSUSED*/
667static void
668vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
669{
670	miocnak(q, mp, 0, ENOTSUP);
671}
672
673/*
674 * This entry point cannot be passed-through, since it is invoked
675 * for the per-VNIC kstats which must be exported independently
676 * of the existence of VNIC MAC clients.
677 */
678static int
679vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
680{
681	vnic_t *vnic = arg;
682	int rval = 0;
683
684	if (vnic->vn_lower_mh == NULL) {
685		/*
686		 * It's an anchor VNIC, which does not have any
687		 * statistics in itself.
688		 */
689		return (ENOTSUP);
690	}
691
692	/*
693	 * ENOTSUP must be reported for unsupported stats, the VNIC
694	 * driver reports a subset of the stats that would
695	 * be returned by a real piece of hardware.
696	 */
697
698	switch (stat) {
699	case MAC_STAT_LINK_STATE:
700	case MAC_STAT_LINK_UP:
701	case MAC_STAT_PROMISC:
702	case MAC_STAT_IFSPEED:
703	case MAC_STAT_MULTIRCV:
704	case MAC_STAT_MULTIXMT:
705	case MAC_STAT_BRDCSTRCV:
706	case MAC_STAT_BRDCSTXMT:
707	case MAC_STAT_OPACKETS:
708	case MAC_STAT_OBYTES:
709	case MAC_STAT_IERRORS:
710	case MAC_STAT_OERRORS:
711	case MAC_STAT_RBYTES:
712	case MAC_STAT_IPACKETS:
713		*val = mac_client_stat_get(vnic->vn_mch, stat);
714		break;
715	default:
716		rval = ENOTSUP;
717	}
718
719	return (rval);
720}
721
722/*
723 * Invoked by the upper MAC to retrieve the lower MAC client handle
724 * corresponding to a VNIC. A pointer to this function is obtained
725 * by the upper MAC via capability query.
726 *
727 * XXX-nicolas Note: this currently causes all VNIC MAC clients to
728 * receive the same MAC client handle for the same VNIC. This is ok
729 * as long as we have only one VNIC MAC client which sends and
730 * receives data, but we don't currently enforce this at the MAC layer.
731 */
732static void *
733vnic_mac_client_handle(void *vnic_arg)
734{
735	vnic_t *vnic = vnic_arg;
736
737	return (vnic->vn_mch);
738}
739
740
741/*
742 * Return information about the specified capability.
743 */
744/* ARGSUSED */
745static boolean_t
746vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
747{
748	vnic_t *vnic = arg;
749
750	switch (cap) {
751	case MAC_CAPAB_HCKSUM: {
752		uint32_t *hcksum_txflags = cap_data;
753
754		*hcksum_txflags = vnic->vn_hcksum_txflags &
755		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
756		    HCKSUM_INET_PARTIAL);
757		break;
758	}
759	case MAC_CAPAB_VNIC: {
760		mac_capab_vnic_t *vnic_capab = cap_data;
761
762		if (vnic->vn_lower_mh == NULL) {
763			/*
764			 * It's an anchor VNIC, we don't have an underlying
765			 * NIC and MAC client handle.
766			 */
767			return (B_FALSE);
768		}
769
770		if (vnic_capab != NULL) {
771			vnic_capab->mcv_arg = vnic;
772			vnic_capab->mcv_mac_client_handle =
773			    vnic_mac_client_handle;
774		}
775		break;
776	}
777	case MAC_CAPAB_ANCHOR_VNIC: {
778		/* since it's an anchor VNIC we don't have lower mac handle */
779		if (vnic->vn_lower_mh == NULL) {
780			ASSERT(vnic->vn_link_id == 0);
781			return (B_TRUE);
782		}
783		return (B_FALSE);
784	}
785	case MAC_CAPAB_NO_NATIVEVLAN:
786		return (B_FALSE);
787	case MAC_CAPAB_NO_ZCOPY:
788		return (B_TRUE);
789	case MAC_CAPAB_VRRP: {
790		mac_capab_vrrp_t *vrrp_capab = cap_data;
791
792		if (vnic->vn_vrid != 0) {
793			if (vrrp_capab != NULL)
794				vrrp_capab->mcv_af = vnic->vn_af;
795			return (B_TRUE);
796		}
797		return (B_FALSE);
798	}
799	default:
800		return (B_FALSE);
801	}
802	return (B_TRUE);
803}
804
805/* ARGSUSED */
806static int
807vnic_m_start(void *arg)
808{
809	return (0);
810}
811
812/* ARGSUSED */
813static void
814vnic_m_stop(void *arg)
815{
816}
817
818/* ARGSUSED */
819static int
820vnic_m_promisc(void *arg, boolean_t on)
821{
822	return (0);
823}
824
825/* ARGSUSED */
826static int
827vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
828{
829	return (0);
830}
831
832static int
833vnic_m_unicst(void *arg, const uint8_t *macaddr)
834{
835	vnic_t *vnic = arg;
836
837	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
838}
839
840/*
841 * Callback functions for set/get of properties
842 */
843/*ARGSUSED*/
844static int
845vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
846    uint_t pr_valsize, const void *pr_val)
847{
848	int 		err = ENOTSUP;
849	vnic_t		*vn = m_driver;
850
851	/* allow setting MTU only on an etherstub */
852	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
853		return (err);
854
855	switch (pr_num) {
856	case MAC_PROP_MTU: {
857		uint32_t	mtu;
858
859		if (pr_valsize < sizeof (mtu)) {
860			err = EINVAL;
861			break;
862		}
863		bcopy(pr_val, &mtu, sizeof (mtu));
864		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
865			err = EINVAL;
866			break;
867		}
868		err = mac_maxsdu_update(vn->vn_mh, mtu);
869		break;
870	}
871	default:
872		break;
873	}
874	return (err);
875}
876
877/* ARGSUSED */
878static void vnic_m_propinfo(void *m_driver, const char *pr_name,
879    mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
880{
881	vnic_t		*vn = m_driver;
882
883	/* MTU setting allowed only on an etherstub */
884	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
885		return;
886
887	switch (pr_num) {
888	case MAC_PROP_MTU:
889		mac_prop_info_set_range_uint32(prh,
890		    ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
891		break;
892	}
893}
894
895
896int
897vnic_info(vnic_info_t *info, cred_t *credp)
898{
899	vnic_t		*vnic;
900	int		err;
901
902	/* Make sure that the VNIC link is visible from the caller's zone. */
903	if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
904		return (ENOENT);
905
906	rw_enter(&vnic_lock, RW_WRITER);
907
908	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
909	    (mod_hash_val_t *)&vnic);
910	if (err != 0) {
911		rw_exit(&vnic_lock);
912		return (ENOENT);
913	}
914
915	info->vn_link_id = vnic->vn_link_id;
916	info->vn_mac_addr_type = vnic->vn_addr_type;
917	info->vn_mac_len = vnic->vn_addr_len;
918	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
919	info->vn_mac_slot = vnic->vn_slot_id;
920	info->vn_mac_prefix_len = 0;
921	info->vn_vid = vnic->vn_vid;
922	info->vn_force = vnic->vn_force;
923	info->vn_vrid = vnic->vn_vrid;
924	info->vn_af = vnic->vn_af;
925
926	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
927	if (vnic->vn_mch != NULL)
928		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
929
930	rw_exit(&vnic_lock);
931	return (0);
932}
933
934static void
935vnic_notify_cb(void *arg, mac_notify_type_t type)
936{
937	vnic_t *vnic = arg;
938
939	/*
940	 * Do not deliver notifications if the vnic is not fully initialized
941	 * or is in process of being torn down.
942	 */
943	if (!vnic->vn_enabled)
944		return;
945
946	switch (type) {
947	case MAC_NOTE_UNICST:
948		/*
949		 * Only the VLAN VNIC needs to be notified with primary MAC
950		 * address change.
951		 */
952		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
953			return;
954
955		/*  the unicast MAC address value */
956		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
957
958		/* notify its upper layer MAC about MAC address change */
959		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
960		break;
961
962	case MAC_NOTE_LINK:
963		mac_link_update(vnic->vn_mh,
964		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
965		break;
966
967	default:
968		break;
969	}
970}
971