mac_provider.c revision 11588:618490a7401f
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/conf.h>
29#include <sys/id_space.h>
30#include <sys/esunddi.h>
31#include <sys/stat.h>
32#include <sys/mkdev.h>
33#include <sys/stream.h>
34#include <sys/strsubr.h>
35#include <sys/dlpi.h>
36#include <sys/modhash.h>
37#include <sys/mac.h>
38#include <sys/mac_provider.h>
39#include <sys/mac_impl.h>
40#include <sys/mac_client_impl.h>
41#include <sys/mac_client_priv.h>
42#include <sys/mac_soft_ring.h>
43#include <sys/dld.h>
44#include <sys/modctl.h>
45#include <sys/fs/dv_node.h>
46#include <sys/thread.h>
47#include <sys/proc.h>
48#include <sys/callb.h>
49#include <sys/cpuvar.h>
50#include <sys/atomic.h>
51#include <sys/sdt.h>
52#include <sys/mac_flow.h>
53#include <sys/ddi_intr_impl.h>
54#include <sys/disp.h>
55#include <sys/sdt.h>
56
57/*
58 * MAC Provider Interface.
59 *
60 * Interface for GLDv3 compatible NIC drivers.
61 */
62
63static void i_mac_notify_thread(void *);
64
65typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *);
66
67static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = {
68	mac_fanout_recompute,	/* MAC_NOTE_LINK */
69	NULL,		/* MAC_NOTE_UNICST */
70	NULL,		/* MAC_NOTE_TX */
71	NULL,		/* MAC_NOTE_DEVPROMISC */
72	NULL,		/* MAC_NOTE_FASTPATH_FLUSH */
73	NULL,		/* MAC_NOTE_SDU_SIZE */
74	NULL,		/* MAC_NOTE_MARGIN */
75	NULL,		/* MAC_NOTE_CAPAB_CHG */
76	NULL		/* MAC_NOTE_LOWLINK */
77};
78
79/*
80 * Driver support functions.
81 */
82
83/* REGISTRATION */
84
85mac_register_t *
86mac_alloc(uint_t mac_version)
87{
88	mac_register_t *mregp;
89
90	/*
91	 * Make sure there isn't a version mismatch between the driver and
92	 * the framework.  In the future, if multiple versions are
93	 * supported, this check could become more sophisticated.
94	 */
95	if (mac_version != MAC_VERSION)
96		return (NULL);
97
98	mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP);
99	mregp->m_version = mac_version;
100	return (mregp);
101}
102
103void
104mac_free(mac_register_t *mregp)
105{
106	kmem_free(mregp, sizeof (mac_register_t));
107}
108
109/*
110 * mac_register() is how drivers register new MACs with the GLDv3
111 * framework.  The mregp argument is allocated by drivers using the
112 * mac_alloc() function, and can be freed using mac_free() immediately upon
113 * return from mac_register().  Upon success (0 return value), the mhp
114 * opaque pointer becomes the driver's handle to its MAC interface, and is
115 * the argument to all other mac module entry points.
116 */
117/* ARGSUSED */
118int
119mac_register(mac_register_t *mregp, mac_handle_t *mhp)
120{
121	mac_impl_t		*mip;
122	mactype_t		*mtype;
123	int			err = EINVAL;
124	struct devnames		*dnp = NULL;
125	uint_t			instance;
126	boolean_t		style1_created = B_FALSE;
127	boolean_t		style2_created = B_FALSE;
128	char			*driver;
129	minor_t			minor = 0;
130
131	/* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */
132	if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip)))
133		return (EINVAL);
134
135	/* Find the required MAC-Type plugin. */
136	if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL)
137		return (EINVAL);
138
139	/* Create a mac_impl_t to represent this MAC. */
140	mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP);
141
142	/*
143	 * The mac is not ready for open yet.
144	 */
145	mip->mi_state_flags |= MIS_DISABLED;
146
147	/*
148	 * When a mac is registered, the m_instance field can be set to:
149	 *
150	 *  0:	Get the mac's instance number from m_dip.
151	 *	This is usually used for physical device dips.
152	 *
153	 *  [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number.
154	 *	For example, when an aggregation is created with the key option,
155	 *	"key" will be used as the instance number.
156	 *
157	 *  -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1].
158	 *	This is often used when a MAC of a virtual link is registered
159	 *	(e.g., aggregation when "key" is not specified, or vnic).
160	 *
161	 * Note that the instance number is used to derive the mi_minor field
162	 * of mac_impl_t, which will then be used to derive the name of kstats
163	 * and the devfs nodes.  The first 2 cases are needed to preserve
164	 * backward compatibility.
165	 */
166	switch (mregp->m_instance) {
167	case 0:
168		instance = ddi_get_instance(mregp->m_dip);
169		break;
170	case ((uint_t)-1):
171		minor = mac_minor_hold(B_TRUE);
172		if (minor == 0) {
173			err = ENOSPC;
174			goto fail;
175		}
176		instance = minor - 1;
177		break;
178	default:
179		instance = mregp->m_instance;
180		if (instance >= MAC_MAX_MINOR) {
181			err = EINVAL;
182			goto fail;
183		}
184		break;
185	}
186
187	mip->mi_minor = (minor_t)(instance + 1);
188	mip->mi_dip = mregp->m_dip;
189	mip->mi_clients_list = NULL;
190	mip->mi_nclients = 0;
191
192	/* Set the default IEEE Port VLAN Identifier */
193	mip->mi_pvid = 1;
194
195	/* Default bridge link learning protection values */
196	mip->mi_llimit = 1000;
197	mip->mi_ldecay = 200;
198
199	driver = (char *)ddi_driver_name(mip->mi_dip);
200
201	/* Construct the MAC name as <drvname><instance> */
202	(void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d",
203	    driver, instance);
204
205	mip->mi_driver = mregp->m_driver;
206
207	mip->mi_type = mtype;
208	mip->mi_margin = mregp->m_margin;
209	mip->mi_info.mi_media = mtype->mt_type;
210	mip->mi_info.mi_nativemedia = mtype->mt_nativetype;
211	if (mregp->m_max_sdu <= mregp->m_min_sdu)
212		goto fail;
213	mip->mi_sdu_min = mregp->m_min_sdu;
214	mip->mi_sdu_max = mregp->m_max_sdu;
215	mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length;
216	/*
217	 * If the media supports a broadcast address, cache a pointer to it
218	 * in the mac_info_t so that upper layers can use it.
219	 */
220	mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr;
221
222	mip->mi_v12n_level = mregp->m_v12n;
223
224	/*
225	 * Copy the unicast source address into the mac_info_t, but only if
226	 * the MAC-Type defines a non-zero address length.  We need to
227	 * handle MAC-Types that have an address length of 0
228	 * (point-to-point protocol MACs for example).
229	 */
230	if (mip->mi_type->mt_addr_length > 0) {
231		if (mregp->m_src_addr == NULL)
232			goto fail;
233		mip->mi_info.mi_unicst_addr =
234		    kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP);
235		bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr,
236		    mip->mi_type->mt_addr_length);
237
238		/*
239		 * Copy the fixed 'factory' MAC address from the immutable
240		 * info.  This is taken to be the MAC address currently in
241		 * use.
242		 */
243		bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr,
244		    mip->mi_type->mt_addr_length);
245
246		/*
247		 * At this point, we should set up the classification
248		 * rules etc but we delay it till mac_open() so that
249		 * the resource discovery has taken place and we
250		 * know someone wants to use the device. Otherwise
251		 * memory gets allocated for Rx ring structures even
252		 * during probe.
253		 */
254
255		/* Copy the destination address if one is provided. */
256		if (mregp->m_dst_addr != NULL) {
257			bcopy(mregp->m_dst_addr, mip->mi_dstaddr,
258			    mip->mi_type->mt_addr_length);
259			mip->mi_dstaddr_set = B_TRUE;
260		}
261	} else if (mregp->m_src_addr != NULL) {
262		goto fail;
263	}
264
265	/*
266	 * The format of the m_pdata is specific to the plugin.  It is
267	 * passed in as an argument to all of the plugin callbacks.  The
268	 * driver can update this information by calling
269	 * mac_pdata_update().
270	 */
271	if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) {
272		/*
273		 * Verify if the supplied plugin data is valid.  Note that
274		 * even if the caller passed in a NULL pointer as plugin data,
275		 * we still need to verify if that's valid as the plugin may
276		 * require plugin data to function.
277		 */
278		if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata,
279		    mregp->m_pdata_size)) {
280			goto fail;
281		}
282		if (mregp->m_pdata != NULL) {
283			mip->mi_pdata =
284			    kmem_alloc(mregp->m_pdata_size, KM_SLEEP);
285			bcopy(mregp->m_pdata, mip->mi_pdata,
286			    mregp->m_pdata_size);
287			mip->mi_pdata_size = mregp->m_pdata_size;
288		}
289	} else if (mregp->m_pdata != NULL) {
290		/*
291		 * The caller supplied non-NULL plugin data, but the plugin
292		 * does not recognize plugin data.
293		 */
294		err = EINVAL;
295		goto fail;
296	}
297
298	/*
299	 * Register the private properties.
300	 */
301	mac_register_priv_prop(mip, mregp->m_priv_props,
302	    mregp->m_priv_prop_count);
303
304	/*
305	 * Stash the driver callbacks into the mac_impl_t, but first sanity
306	 * check to make sure all mandatory callbacks are set.
307	 */
308	if (mregp->m_callbacks->mc_getstat == NULL ||
309	    mregp->m_callbacks->mc_start == NULL ||
310	    mregp->m_callbacks->mc_stop == NULL ||
311	    mregp->m_callbacks->mc_setpromisc == NULL ||
312	    mregp->m_callbacks->mc_multicst == NULL) {
313		goto fail;
314	}
315	mip->mi_callbacks = mregp->m_callbacks;
316
317	if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY,
318	    &mip->mi_capab_legacy)) {
319		mip->mi_state_flags |= MIS_LEGACY;
320		mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev;
321	} else {
322		mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip),
323		    mip->mi_minor);
324	}
325
326	/*
327	 * Allocate a notification thread. thread_create blocks for memory
328	 * if needed, it never fails.
329	 */
330	mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread,
331	    mip, 0, &p0, TS_RUN, minclsyspri);
332
333	/*
334	 * Initialize the capabilities
335	 */
336
337	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL))
338		mip->mi_state_flags |= MIS_IS_VNIC;
339
340	if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
341		mip->mi_state_flags |= MIS_IS_AGGR;
342
343	mac_addr_factory_init(mip);
344
345	/*
346	 * Enforce the virtrualization level registered.
347	 */
348	if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) {
349		if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 ||
350		    mac_init_rings(mip, MAC_RING_TYPE_TX) != 0)
351			goto fail;
352
353		/*
354		 * The driver needs to register at least rx rings for this
355		 * virtualization level.
356		 */
357		if (mip->mi_rx_groups == NULL)
358			goto fail;
359	}
360
361	/*
362	 * The driver must set mc_unicst entry point to NULL when it advertises
363	 * CAP_RINGS for rx groups.
364	 */
365	if (mip->mi_rx_groups != NULL) {
366		if (mregp->m_callbacks->mc_unicst != NULL)
367			goto fail;
368	} else {
369		if (mregp->m_callbacks->mc_unicst == NULL)
370			goto fail;
371	}
372
373	/*
374	 * The driver must set mc_tx entry point to NULL when it advertises
375	 * CAP_RINGS for tx rings.
376	 */
377	if (mip->mi_tx_groups != NULL) {
378		if (mregp->m_callbacks->mc_tx != NULL)
379			goto fail;
380	} else {
381		if (mregp->m_callbacks->mc_tx == NULL)
382			goto fail;
383	}
384
385	/*
386	 * Initialize MAC addresses. Must be called after mac_init_rings().
387	 */
388	mac_init_macaddr(mip);
389
390	mip->mi_share_capab.ms_snum = 0;
391	if (mip->mi_v12n_level & MAC_VIRT_HIO) {
392		(void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES,
393		    &mip->mi_share_capab);
394	}
395
396	/*
397	 * Initialize the kstats for this device.
398	 */
399	mac_stat_create(mip);
400
401	/* Zero out any properties. */
402	bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t));
403
404	if (mip->mi_minor <= MAC_MAX_MINOR) {
405		/* Create a style-2 DLPI device */
406		if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0,
407		    DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS)
408			goto fail;
409		style2_created = B_TRUE;
410
411		/* Create a style-1 DLPI device */
412		if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR,
413		    mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS)
414			goto fail;
415		style1_created = B_TRUE;
416	}
417
418	mac_flow_l2tab_create(mip, &mip->mi_flow_tab);
419
420	rw_enter(&i_mac_impl_lock, RW_WRITER);
421	if (mod_hash_insert(i_mac_impl_hash,
422	    (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) {
423		rw_exit(&i_mac_impl_lock);
424		err = EEXIST;
425		goto fail;
426	}
427
428	DTRACE_PROBE2(mac__register, struct devnames *, dnp,
429	    (mac_impl_t *), mip);
430
431	/*
432	 * Mark the MAC to be ready for open.
433	 */
434	mip->mi_state_flags &= ~MIS_DISABLED;
435	rw_exit(&i_mac_impl_lock);
436
437	atomic_inc_32(&i_mac_impl_count);
438
439	cmn_err(CE_NOTE, "!%s registered", mip->mi_name);
440	*mhp = (mac_handle_t)mip;
441	return (0);
442
443fail:
444	if (style1_created)
445		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
446
447	if (style2_created)
448		ddi_remove_minor_node(mip->mi_dip, driver);
449
450	mac_addr_factory_fini(mip);
451
452	/* Clean up registered MAC addresses */
453	mac_fini_macaddr(mip);
454
455	/* Clean up registered rings */
456	mac_free_rings(mip, MAC_RING_TYPE_RX);
457	mac_free_rings(mip, MAC_RING_TYPE_TX);
458
459	/* Clean up notification thread */
460	if (mip->mi_notify_thread != NULL)
461		i_mac_notify_exit(mip);
462
463	if (mip->mi_info.mi_unicst_addr != NULL) {
464		kmem_free(mip->mi_info.mi_unicst_addr,
465		    mip->mi_type->mt_addr_length);
466		mip->mi_info.mi_unicst_addr = NULL;
467	}
468
469	mac_stat_destroy(mip);
470
471	if (mip->mi_type != NULL) {
472		atomic_dec_32(&mip->mi_type->mt_ref);
473		mip->mi_type = NULL;
474	}
475
476	if (mip->mi_pdata != NULL) {
477		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
478		mip->mi_pdata = NULL;
479		mip->mi_pdata_size = 0;
480	}
481
482	if (minor != 0) {
483		ASSERT(minor > MAC_MAX_MINOR);
484		mac_minor_rele(minor);
485	}
486
487	mac_unregister_priv_prop(mip);
488
489	/*
490	 * Clear the state before destroying the mac_impl_t
491	 */
492	mip->mi_state_flags = 0;
493
494	kmem_cache_free(i_mac_impl_cachep, mip);
495	return (err);
496}
497
498/*
499 * Unregister from the GLDv3 framework
500 */
501int
502mac_unregister(mac_handle_t mh)
503{
504	int			err;
505	mac_impl_t		*mip = (mac_impl_t *)mh;
506	mod_hash_val_t		val;
507	mac_margin_req_t	*mmr, *nextmmr;
508
509	/* Fail the unregister if there are any open references to this mac. */
510	if ((err = mac_disable_nowait(mh)) != 0)
511		return (err);
512
513	/*
514	 * Clean up notification thread and wait for it to exit.
515	 */
516	i_mac_notify_exit(mip);
517
518	i_mac_perim_enter(mip);
519
520	/*
521	 * There is still resource properties configured over this mac.
522	 */
523	if (mip->mi_resource_props.mrp_mask != 0)
524		mac_fastpath_enable((mac_handle_t)mip);
525
526	if (mip->mi_minor < MAC_MAX_MINOR + 1) {
527		ddi_remove_minor_node(mip->mi_dip, mip->mi_name);
528		ddi_remove_minor_node(mip->mi_dip,
529		    (char *)ddi_driver_name(mip->mi_dip));
530	}
531
532	ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags &
533	    MIS_EXCLUSIVE));
534
535	mac_stat_destroy(mip);
536
537	(void) mod_hash_remove(i_mac_impl_hash,
538	    (mod_hash_key_t)mip->mi_name, &val);
539	ASSERT(mip == (mac_impl_t *)val);
540
541	ASSERT(i_mac_impl_count > 0);
542	atomic_dec_32(&i_mac_impl_count);
543
544	if (mip->mi_pdata != NULL)
545		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
546	mip->mi_pdata = NULL;
547	mip->mi_pdata_size = 0;
548
549	/*
550	 * Free the list of margin request.
551	 */
552	for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) {
553		nextmmr = mmr->mmr_nextp;
554		kmem_free(mmr, sizeof (mac_margin_req_t));
555	}
556	mip->mi_mmrp = NULL;
557
558	mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN;
559	kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length);
560	mip->mi_info.mi_unicst_addr = NULL;
561
562	atomic_dec_32(&mip->mi_type->mt_ref);
563	mip->mi_type = NULL;
564
565	/*
566	 * Free the primary MAC address.
567	 */
568	mac_fini_macaddr(mip);
569
570	/*
571	 * free all rings
572	 */
573	mac_free_rings(mip, MAC_RING_TYPE_RX);
574	mac_free_rings(mip, MAC_RING_TYPE_TX);
575
576	mac_addr_factory_fini(mip);
577
578	bzero(mip->mi_addr, MAXMACADDRLEN);
579	bzero(mip->mi_dstaddr, MAXMACADDRLEN);
580
581	/* and the flows */
582	mac_flow_tab_destroy(mip->mi_flow_tab);
583	mip->mi_flow_tab = NULL;
584
585	if (mip->mi_minor > MAC_MAX_MINOR)
586		mac_minor_rele(mip->mi_minor);
587
588	cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name);
589
590	/*
591	 * Reset the perim related fields to default values before
592	 * kmem_cache_free
593	 */
594	i_mac_perim_exit(mip);
595	mip->mi_state_flags = 0;
596
597	mac_unregister_priv_prop(mip);
598
599	ASSERT(mip->mi_bridge_link == NULL);
600	kmem_cache_free(i_mac_impl_cachep, mip);
601
602	return (0);
603}
604
605/* DATA RECEPTION */
606
607/*
608 * This function is invoked for packets received by the MAC driver in
609 * interrupt context. The ring generation number provided by the driver
610 * is matched with the ring generation number held in MAC. If they do not
611 * match, received packets are considered stale packets coming from an older
612 * assignment of the ring. Drop them.
613 */
614void
615mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain,
616    uint64_t mr_gen_num)
617{
618	mac_ring_t		*mr = (mac_ring_t *)mrh;
619
620	if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) {
621		DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t,
622		    mr->mr_gen_num, uint64_t, mr_gen_num);
623		freemsgchain(mp_chain);
624		return;
625	}
626	mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain);
627}
628
629/*
630 * This function is invoked for each packet received by the underlying driver.
631 */
632void
633mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
634{
635	mac_impl_t *mip = (mac_impl_t *)mh;
636
637	/*
638	 * Check if the link is part of a bridge.  If not, then we don't need
639	 * to take the lock to remain consistent.  Make this common case
640	 * lock-free and tail-call optimized.
641	 */
642	if (mip->mi_bridge_link == NULL) {
643		mac_rx_common(mh, mrh, mp_chain);
644	} else {
645		/*
646		 * Once we take a reference on the bridge link, the bridge
647		 * module itself can't unload, so the callback pointers are
648		 * stable.
649		 */
650		mutex_enter(&mip->mi_bridge_lock);
651		if ((mh = mip->mi_bridge_link) != NULL)
652			mac_bridge_ref_cb(mh, B_TRUE);
653		mutex_exit(&mip->mi_bridge_lock);
654		if (mh == NULL) {
655			mac_rx_common((mac_handle_t)mip, mrh, mp_chain);
656		} else {
657			mac_bridge_rx_cb(mh, mrh, mp_chain);
658			mac_bridge_ref_cb(mh, B_FALSE);
659		}
660	}
661}
662
663/*
664 * Special case function: this allows snooping of packets transmitted and
665 * received by TRILL. By design, they go directly into the TRILL module.
666 */
667void
668mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
669{
670	mac_impl_t *mip = (mac_impl_t *)mh;
671
672	if (mip->mi_promisc_list != NULL)
673		mac_promisc_dispatch(mip, mp, NULL);
674}
675
676/*
677 * This is the upward reentry point for packets arriving from the bridging
678 * module and from mac_rx for links not part of a bridge.
679 */
680void
681mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
682{
683	mac_impl_t		*mip = (mac_impl_t *)mh;
684	mac_ring_t		*mr = (mac_ring_t *)mrh;
685	mac_soft_ring_set_t 	*mac_srs;
686	mblk_t			*bp = mp_chain;
687	boolean_t		hw_classified = B_FALSE;
688
689	/*
690	 * If there are any promiscuous mode callbacks defined for
691	 * this MAC, pass them a copy if appropriate.
692	 */
693	if (mip->mi_promisc_list != NULL)
694		mac_promisc_dispatch(mip, mp_chain, NULL);
695
696	if (mr != NULL) {
697		/*
698		 * If the SRS teardown has started, just return. The 'mr'
699		 * continues to be valid until the driver unregisters the mac.
700		 * Hardware classified packets will not make their way up
701		 * beyond this point once the teardown has started. The driver
702		 * is never passed a pointer to a flow entry or SRS or any
703		 * structure that can be freed much before mac_unregister.
704		 */
705		mutex_enter(&mr->mr_lock);
706		if ((mr->mr_state != MR_INUSE) || (mr->mr_flag &
707		    (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) {
708			mutex_exit(&mr->mr_lock);
709			freemsgchain(mp_chain);
710			return;
711		}
712		if (mr->mr_classify_type == MAC_HW_CLASSIFIER) {
713			hw_classified = B_TRUE;
714			MR_REFHOLD_LOCKED(mr);
715		}
716		mutex_exit(&mr->mr_lock);
717
718		/*
719		 * We check if an SRS is controlling this ring.
720		 * If so, we can directly call the srs_lower_proc
721		 * routine otherwise we need to go through mac_rx_classify
722		 * to reach the right place.
723		 */
724		if (hw_classified) {
725			mac_srs = mr->mr_srs;
726			/*
727			 * This is supposed to be the fast path.
728			 * All packets received though here were steered by
729			 * the hardware classifier, and share the same
730			 * MAC header info.
731			 */
732			mac_srs->srs_rx.sr_lower_proc(mh,
733			    (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE);
734			MR_REFRELE(mr);
735			return;
736		}
737		/* We'll fall through to software classification */
738	} else {
739		flow_entry_t *flent;
740		int err;
741
742		rw_enter(&mip->mi_rw_lock, RW_READER);
743		if (mip->mi_single_active_client != NULL) {
744			flent = mip->mi_single_active_client->mci_flent_list;
745			FLOW_TRY_REFHOLD(flent, err);
746			rw_exit(&mip->mi_rw_lock);
747			if (err == 0) {
748				(flent->fe_cb_fn)(flent->fe_cb_arg1,
749				    flent->fe_cb_arg2, mp_chain, B_FALSE);
750				FLOW_REFRELE(flent);
751				return;
752			}
753		} else {
754			rw_exit(&mip->mi_rw_lock);
755		}
756	}
757
758	if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) {
759		if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL)
760			return;
761	}
762
763	freemsgchain(bp);
764}
765
766/* DATA TRANSMISSION */
767
768/*
769 * A driver's notification to resume transmission, in case of a provider
770 * without TX rings.
771 */
772void
773mac_tx_update(mac_handle_t mh)
774{
775	/*
776	 * Walk the list of MAC clients (mac_client_handle)
777	 * and update
778	 */
779	i_mac_tx_srs_notify((mac_impl_t *)mh, NULL);
780}
781
782/*
783 * A driver's notification to resume transmission on the specified TX ring.
784 */
785void
786mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh)
787{
788	i_mac_tx_srs_notify((mac_impl_t *)mh, rh);
789}
790
791/* LINK STATE */
792/*
793 * Notify the MAC layer about a link state change
794 */
795void
796mac_link_update(mac_handle_t mh, link_state_t link)
797{
798	mac_impl_t	*mip = (mac_impl_t *)mh;
799
800	/*
801	 * Save the link state.
802	 */
803	mip->mi_lowlinkstate = link;
804
805	/*
806	 * Send a MAC_NOTE_LOWLINK notification.  This tells the notification
807	 * thread to deliver both lower and upper notifications.
808	 */
809	i_mac_notify(mip, MAC_NOTE_LOWLINK);
810}
811
812/*
813 * Notify the MAC layer about a link state change due to bridging.
814 */
815void
816mac_link_redo(mac_handle_t mh, link_state_t link)
817{
818	mac_impl_t	*mip = (mac_impl_t *)mh;
819
820	/*
821	 * Save the link state.
822	 */
823	mip->mi_linkstate = link;
824
825	/*
826	 * Send a MAC_NOTE_LINK notification.  Only upper notifications are
827	 * made.
828	 */
829	i_mac_notify(mip, MAC_NOTE_LINK);
830}
831
832/* MINOR NODE HANDLING */
833
834/*
835 * Given a dev_t, return the instance number (PPA) associated with it.
836 * Drivers can use this in their getinfo(9e) implementation to lookup
837 * the instance number (i.e. PPA) of the device, to use as an index to
838 * their own array of soft state structures.
839 *
840 * Returns -1 on error.
841 */
842int
843mac_devt_to_instance(dev_t devt)
844{
845	return (dld_devt_to_instance(devt));
846}
847
848/*
849 * This function returns the first minor number that is available for
850 * driver private use.  All minor numbers smaller than this are
851 * reserved for GLDv3 use.
852 */
853minor_t
854mac_private_minor(void)
855{
856	return (MAC_PRIVATE_MINOR);
857}
858
859/* OTHER CONTROL INFORMATION */
860
861/*
862 * A driver notified us that its primary MAC address has changed.
863 */
864void
865mac_unicst_update(mac_handle_t mh, const uint8_t *addr)
866{
867	mac_impl_t	*mip = (mac_impl_t *)mh;
868
869	if (mip->mi_type->mt_addr_length == 0)
870		return;
871
872	i_mac_perim_enter(mip);
873
874	/*
875	 * If address changes, freshen the MAC address value and update
876	 * all MAC clients that share this MAC address.
877	 */
878	if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) {
879		mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr),
880		    (uint8_t *)addr);
881	}
882
883	i_mac_perim_exit(mip);
884
885	/*
886	 * Send a MAC_NOTE_UNICST notification.
887	 */
888	i_mac_notify(mip, MAC_NOTE_UNICST);
889}
890
891void
892mac_dst_update(mac_handle_t mh, const uint8_t *addr)
893{
894	mac_impl_t	*mip = (mac_impl_t *)mh;
895
896	if (mip->mi_type->mt_addr_length == 0)
897		return;
898
899	i_mac_perim_enter(mip);
900	bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length);
901	i_mac_perim_exit(mip);
902	i_mac_notify(mip, MAC_NOTE_DEST);
903}
904
905/*
906 * MAC plugin information changed.
907 */
908int
909mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize)
910{
911	mac_impl_t	*mip = (mac_impl_t *)mh;
912
913	/*
914	 * Verify that the plugin supports MAC plugin data and that the
915	 * supplied data is valid.
916	 */
917	if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY))
918		return (EINVAL);
919	if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize))
920		return (EINVAL);
921
922	if (mip->mi_pdata != NULL)
923		kmem_free(mip->mi_pdata, mip->mi_pdata_size);
924
925	mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP);
926	bcopy(mac_pdata, mip->mi_pdata, dsize);
927	mip->mi_pdata_size = dsize;
928
929	/*
930	 * Since the MAC plugin data is used to construct MAC headers that
931	 * were cached in fast-path headers, we need to flush fast-path
932	 * information for links associated with this mac.
933	 */
934	i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH);
935	return (0);
936}
937
938/*
939 * Invoked by driver as well as the framework to notify its capability change.
940 */
941void
942mac_capab_update(mac_handle_t mh)
943{
944	/* Send MAC_NOTE_CAPAB_CHG notification */
945	i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG);
946}
947
948int
949mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max)
950{
951	mac_impl_t	*mip = (mac_impl_t *)mh;
952
953	if (sdu_max == 0 || sdu_max < mip->mi_sdu_min)
954		return (EINVAL);
955	mip->mi_sdu_max = sdu_max;
956
957	/* Send a MAC_NOTE_SDU_SIZE notification. */
958	i_mac_notify(mip, MAC_NOTE_SDU_SIZE);
959	return (0);
960}
961
962/* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */
963
964/*
965 * Updates the mac_impl structure with the current state of the link
966 */
967static void
968i_mac_log_link_state(mac_impl_t *mip)
969{
970	/*
971	 * If no change, then it is not interesting.
972	 */
973	if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate)
974		return;
975
976	switch (mip->mi_lowlinkstate) {
977	case LINK_STATE_UP:
978		if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) {
979			char det[200];
980
981			mip->mi_type->mt_ops.mtops_link_details(det,
982			    sizeof (det), (mac_handle_t)mip, mip->mi_pdata);
983
984			cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det);
985		} else {
986			cmn_err(CE_NOTE, "!%s link up", mip->mi_name);
987		}
988		break;
989
990	case LINK_STATE_DOWN:
991		/*
992		 * Only transitions from UP to DOWN are interesting
993		 */
994		if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN)
995			cmn_err(CE_NOTE, "!%s link down", mip->mi_name);
996		break;
997
998	case LINK_STATE_UNKNOWN:
999		/*
1000		 * This case is normally not interesting.
1001		 */
1002		break;
1003	}
1004	mip->mi_lastlowlinkstate = mip->mi_lowlinkstate;
1005}
1006
1007/*
1008 * Main routine for the callbacks notifications thread
1009 */
1010static void
1011i_mac_notify_thread(void *arg)
1012{
1013	mac_impl_t	*mip = arg;
1014	callb_cpr_t	cprinfo;
1015	mac_cb_t	*mcb;
1016	mac_cb_info_t	*mcbi;
1017	mac_notify_cb_t	*mncb;
1018
1019	mcbi = &mip->mi_notify_cb_info;
1020	CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr,
1021	    "i_mac_notify_thread");
1022
1023	mutex_enter(mcbi->mcbi_lockp);
1024
1025	for (;;) {
1026		uint32_t	bits;
1027		uint32_t	type;
1028
1029		bits = mip->mi_notify_bits;
1030		if (bits == 0) {
1031			CALLB_CPR_SAFE_BEGIN(&cprinfo);
1032			cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1033			CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp);
1034			continue;
1035		}
1036		mip->mi_notify_bits = 0;
1037		if ((bits & (1 << MAC_NNOTE)) != 0) {
1038			/* request to quit */
1039			ASSERT(mip->mi_state_flags & MIS_DISABLED);
1040			break;
1041		}
1042
1043		mutex_exit(mcbi->mcbi_lockp);
1044
1045		/*
1046		 * Log link changes on the actual link, but then do reports on
1047		 * synthetic state (if part of a bridge).
1048		 */
1049		if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) {
1050			link_state_t newstate;
1051			mac_handle_t mh;
1052
1053			i_mac_log_link_state(mip);
1054			newstate = mip->mi_lowlinkstate;
1055			if (mip->mi_bridge_link != NULL) {
1056				mutex_enter(&mip->mi_bridge_lock);
1057				if ((mh = mip->mi_bridge_link) != NULL) {
1058					newstate = mac_bridge_ls_cb(mh,
1059					    newstate);
1060				}
1061				mutex_exit(&mip->mi_bridge_lock);
1062			}
1063			if (newstate != mip->mi_linkstate) {
1064				mip->mi_linkstate = newstate;
1065				bits |= 1 << MAC_NOTE_LINK;
1066			}
1067		}
1068
1069		/*
1070		 * Do notification callbacks for each notification type.
1071		 */
1072		for (type = 0; type < MAC_NNOTE; type++) {
1073			if ((bits & (1 << type)) == 0) {
1074				continue;
1075			}
1076
1077			if (mac_notify_cb_list[type] != NULL)
1078				(*mac_notify_cb_list[type])(mip);
1079
1080			/*
1081			 * Walk the list of notifications.
1082			 */
1083			MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info);
1084			for (mcb = mip->mi_notify_cb_list; mcb != NULL;
1085			    mcb = mcb->mcb_nextp) {
1086				mncb = (mac_notify_cb_t *)mcb->mcb_objp;
1087				mncb->mncb_fn(mncb->mncb_arg, type);
1088			}
1089			MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info,
1090			    &mip->mi_notify_cb_list);
1091		}
1092
1093		mutex_enter(mcbi->mcbi_lockp);
1094	}
1095
1096	mip->mi_state_flags |= MIS_NOTIFY_DONE;
1097	cv_broadcast(&mcbi->mcbi_cv);
1098
1099	/* CALLB_CPR_EXIT drops the lock */
1100	CALLB_CPR_EXIT(&cprinfo);
1101	thread_exit();
1102}
1103
1104/*
1105 * Signal the i_mac_notify_thread asking it to quit.
1106 * Then wait till it is done.
1107 */
1108void
1109i_mac_notify_exit(mac_impl_t *mip)
1110{
1111	mac_cb_info_t	*mcbi;
1112
1113	mcbi = &mip->mi_notify_cb_info;
1114
1115	mutex_enter(mcbi->mcbi_lockp);
1116	mip->mi_notify_bits = (1 << MAC_NNOTE);
1117	cv_broadcast(&mcbi->mcbi_cv);
1118
1119
1120	while ((mip->mi_notify_thread != NULL) &&
1121	    !(mip->mi_state_flags & MIS_NOTIFY_DONE)) {
1122		cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp);
1123	}
1124
1125	/* Necessary clean up before doing kmem_cache_free */
1126	mip->mi_state_flags &= ~MIS_NOTIFY_DONE;
1127	mip->mi_notify_bits = 0;
1128	mip->mi_notify_thread = NULL;
1129	mutex_exit(mcbi->mcbi_lockp);
1130}
1131
1132/*
1133 * Entry point invoked by drivers to dynamically add a ring to an
1134 * existing group.
1135 */
1136int
1137mac_group_add_ring(mac_group_handle_t gh, int index)
1138{
1139	mac_group_t *group = (mac_group_t *)gh;
1140	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1141	int ret;
1142
1143	i_mac_perim_enter(mip);
1144
1145	/*
1146	 * Only RX rings can be added or removed by drivers currently.
1147	 */
1148	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1149
1150	ret = i_mac_group_add_ring(group, NULL, index);
1151
1152	i_mac_perim_exit(mip);
1153
1154	return (ret);
1155}
1156
1157/*
1158 * Entry point invoked by drivers to dynamically remove a ring
1159 * from an existing group. The specified ring handle must no longer
1160 * be used by the driver after a call to this function.
1161 */
1162void
1163mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh)
1164{
1165	mac_group_t *group = (mac_group_t *)gh;
1166	mac_impl_t *mip = (mac_impl_t *)group->mrg_mh;
1167
1168	i_mac_perim_enter(mip);
1169
1170	/*
1171	 * Only RX rings can be added or removed by drivers currently.
1172	 */
1173	ASSERT(group->mrg_type == MAC_RING_TYPE_RX);
1174
1175	i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE);
1176
1177	i_mac_perim_exit(mip);
1178}
1179