vsw_switching.c revision 6495:1a95fa8c7c94
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include <sys/types.h>
30#include <sys/errno.h>
31#include <sys/debug.h>
32#include <sys/time.h>
33#include <sys/sysmacros.h>
34#include <sys/systm.h>
35#include <sys/user.h>
36#include <sys/stropts.h>
37#include <sys/stream.h>
38#include <sys/strlog.h>
39#include <sys/strsubr.h>
40#include <sys/cmn_err.h>
41#include <sys/cpu.h>
42#include <sys/kmem.h>
43#include <sys/conf.h>
44#include <sys/ddi.h>
45#include <sys/sunddi.h>
46#include <sys/ksynch.h>
47#include <sys/stat.h>
48#include <sys/kstat.h>
49#include <sys/vtrace.h>
50#include <sys/strsun.h>
51#include <sys/dlpi.h>
52#include <sys/ethernet.h>
53#include <net/if.h>
54#include <sys/varargs.h>
55#include <sys/machsystm.h>
56#include <sys/modctl.h>
57#include <sys/modhash.h>
58#include <sys/mac.h>
59#include <sys/mac_ether.h>
60#include <sys/taskq.h>
61#include <sys/note.h>
62#include <sys/mach_descrip.h>
63#include <sys/mac.h>
64#include <sys/mdeg.h>
65#include <sys/ldc.h>
66#include <sys/vsw_fdb.h>
67#include <sys/vsw.h>
68#include <sys/vio_mailbox.h>
69#include <sys/vnet_mailbox.h>
70#include <sys/vnet_common.h>
71#include <sys/vio_util.h>
72#include <sys/sdt.h>
73#include <sys/atomic.h>
74#include <sys/vlan.h>
75
76/* Switching setup routines */
77void vsw_setup_switching_timeout(void *arg);
78void vsw_stop_switching_timeout(vsw_t *vswp);
79int vsw_setup_switching(vsw_t *);
80static	int vsw_setup_layer2(vsw_t *);
81static	int vsw_setup_layer3(vsw_t *);
82
83/* Switching/data transmit routines */
84static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
85	vsw_port_t *port, mac_resource_handle_t);
86static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
87	vsw_port_t *port, mac_resource_handle_t);
88static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp,
89	int caller, vsw_port_t *port);
90static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp,
91    int caller, vsw_port_t *port);
92
93/* VLAN routines */
94void vsw_create_vlans(void *arg, int type);
95void vsw_destroy_vlans(void *arg, int type);
96void vsw_vlan_add_ids(void *arg, int type);
97void vsw_vlan_remove_ids(void *arg, int type);
98static	void vsw_vlan_create_hash(void *arg, int type);
99static	void vsw_vlan_destroy_hash(void *arg, int type);
100boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
101	uint16_t *vidp);
102mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
103uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt);
104boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
105
106/* Forwarding database (FDB) routines */
107void vsw_fdbe_add(vsw_t *vswp, void *port);
108void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr);
109static	vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *);
110static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
111
112int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
113int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
114int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
115void vsw_del_mcst_vsw(vsw_t *);
116
117/* Support functions */
118static mblk_t *vsw_dupmsgchain(mblk_t *mp);
119static uint32_t vsw_get_same_dest_list(struct ether_header *ehp,
120    mblk_t **rhead, mblk_t **rtail, mblk_t **mpp);
121
122
123/*
124 * Functions imported from other files.
125 */
126extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
127extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
128extern int vsw_mac_open(vsw_t *vswp);
129extern void vsw_mac_close(vsw_t *vswp);
130extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
131    mblk_t *mp, vsw_macrx_flags_t flags);
132extern void vsw_set_addrs(vsw_t *vswp);
133extern int vsw_get_hw_maddr(vsw_t *);
134extern int vsw_mac_attach(vsw_t *vswp);
135extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt,
136	uint32_t count);
137extern void vsw_hio_init(vsw_t *vswp);
138extern void vsw_hio_start_ports(vsw_t *vswp);
139
140/*
141 * Tunables used in this file.
142 */
143extern	int vsw_setup_switching_delay;
144extern	uint32_t vsw_vlan_nchains;
145extern	uint32_t vsw_fdbe_refcnt_delay;
146
147#define	VSW_FDBE_REFHOLD(p)						\
148{									\
149	atomic_inc_32(&(p)->refcnt);					\
150	ASSERT((p)->refcnt != 0);					\
151}
152
153#define	VSW_FDBE_REFRELE(p)						\
154{									\
155	ASSERT((p)->refcnt != 0);					\
156	atomic_dec_32(&(p)->refcnt);					\
157}
158
159/*
160 * Timeout routine to setup switching mode:
161 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop()
162 * initially. If it fails and the error is EAGAIN, then this timeout handler
163 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried
164 * until we successfully finish it; or the returned error is not EAGAIN.
165 */
166void
167vsw_setup_switching_timeout(void *arg)
168{
169	vsw_t		*vswp = (vsw_t *)arg;
170	int		rv;
171
172	if (vswp->swtmout_enabled == B_FALSE)
173		return;
174
175	rv = vsw_setup_switching(vswp);
176
177	if (rv == 0) {
178		/*
179		 * Successfully setup switching mode.
180		 * Program unicst, mcst addrs of vsw
181		 * interface and ports in the physdev.
182		 */
183		vsw_set_addrs(vswp);
184
185		/* Start HIO for ports that have already connected */
186		vsw_hio_start_ports(vswp);
187	}
188
189	mutex_enter(&vswp->swtmout_lock);
190
191	if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) {
192		/*
193		 * Reschedule timeout() if the error is EAGAIN and the
194		 * timeout is still enabled. For errors other than EAGAIN,
195		 * we simply return without rescheduling timeout().
196		 */
197		vswp->swtmout_id =
198		    timeout(vsw_setup_switching_timeout, vswp,
199		    (vsw_setup_switching_delay * drv_usectohz(MICROSEC)));
200		goto exit;
201	}
202
203	/* timeout handler completed */
204	vswp->swtmout_enabled = B_FALSE;
205	vswp->swtmout_id = 0;
206
207exit:
208	mutex_exit(&vswp->swtmout_lock);
209}
210
211/*
212 * Cancel the timeout handler to setup switching mode.
213 */
214void
215vsw_stop_switching_timeout(vsw_t *vswp)
216{
217	timeout_id_t tid;
218
219	mutex_enter(&vswp->swtmout_lock);
220
221	tid = vswp->swtmout_id;
222
223	if (tid != 0) {
224		/* signal timeout handler to stop */
225		vswp->swtmout_enabled = B_FALSE;
226		vswp->swtmout_id = 0;
227		mutex_exit(&vswp->swtmout_lock);
228
229		(void) untimeout(tid);
230	} else {
231		mutex_exit(&vswp->swtmout_lock);
232	}
233
234	(void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
235
236	mutex_enter(&vswp->mac_lock);
237	vswp->mac_open_retries = 0;
238	mutex_exit(&vswp->mac_lock);
239}
240
241/*
242 * Setup the required switching mode.
243 * This routine is invoked from vsw_attach() or vsw_update_md_prop()
244 * initially. If it fails and the error is EAGAIN, then a timeout handler
245 * is started to retry vsw_setup_switching(), until it successfully finishes;
246 * or the returned error is not EAGAIN.
247 *
248 * Returns:
249 *  0 on success.
250 *  EAGAIN if retry is needed.
251 *  1 on all other failures.
252 */
253int
254vsw_setup_switching(vsw_t *vswp)
255{
256	int	i, rv = 1;
257
258	D1(vswp, "%s: enter", __func__);
259
260	/*
261	 * Select best switching mode.
262	 * Note that we start from the saved smode_idx. This is done as
263	 * this routine can be called from the timeout handler to retry
264	 * setting up a specific mode. Currently only the function which
265	 * sets up layer2/promisc mode returns EAGAIN if the underlying
266	 * physical device is not available yet, causing retries.
267	 */
268	for (i = vswp->smode_idx; i < vswp->smode_num; i++) {
269		vswp->smode_idx = i;
270		switch (vswp->smode[i]) {
271		case VSW_LAYER2:
272		case VSW_LAYER2_PROMISC:
273			rv = vsw_setup_layer2(vswp);
274			break;
275
276		case VSW_LAYER3:
277			rv = vsw_setup_layer3(vswp);
278			break;
279
280		default:
281			DERR(vswp, "unknown switch mode");
282			break;
283		}
284
285		if ((rv == 0) || (rv == EAGAIN))
286			break;
287
288		/* all other errors(rv != 0): continue & select the next mode */
289		rv = 1;
290	}
291
292	if (rv && (rv != EAGAIN)) {
293		cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
294		    "switching mode", vswp->instance);
295	} else if (rv == 0) {
296		(void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
297	}
298
299	D2(vswp, "%s: Operating in mode %d", __func__,
300	    vswp->smode[vswp->smode_idx]);
301
302	D1(vswp, "%s: exit", __func__);
303
304	return (rv);
305}
306
307/*
308 * Setup for layer 2 switching.
309 *
310 * Returns:
311 *  0 on success.
312 *  EAGAIN if retry is needed.
313 *  EIO on all other failures.
314 */
315static int
316vsw_setup_layer2(vsw_t *vswp)
317{
318	int	rv;
319
320	D1(vswp, "%s: enter", __func__);
321
322	vswp->vsw_switch_frame = vsw_switch_l2_frame;
323
324	rv = strlen(vswp->physname);
325	if (rv == 0) {
326		/*
327		 * Physical device name is NULL, which is
328		 * required for layer 2.
329		 */
330		cmn_err(CE_WARN, "!vsw%d: no physical device name specified",
331		    vswp->instance);
332		return (EIO);
333	}
334
335	mutex_enter(&vswp->mac_lock);
336
337	rv = vsw_mac_open(vswp);
338	if (rv != 0) {
339		if (rv != EAGAIN) {
340			cmn_err(CE_WARN, "!vsw%d: Unable to open physical "
341			    "device: %s\n", vswp->instance, vswp->physname);
342		}
343		mutex_exit(&vswp->mac_lock);
344		return (rv);
345	}
346
347	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) {
348		/*
349		 * Verify that underlying device can support multiple
350		 * unicast mac addresses.
351		 */
352		rv = vsw_get_hw_maddr(vswp);
353		if (rv != 0) {
354			goto exit_error;
355		}
356	}
357
358	/*
359	 * Attempt to link into the MAC layer so we can get
360	 * and send packets out over the physical adapter.
361	 */
362	rv = vsw_mac_attach(vswp);
363	if (rv != 0) {
364		/*
365		 * Registration with the MAC layer has failed,
366		 * so return error so that can fall back to next
367		 * prefered switching method.
368		 */
369		cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: "
370		    "%s\n", vswp->instance, vswp->physname);
371		goto exit_error;
372	}
373
374	D1(vswp, "%s: exit", __func__);
375
376	mutex_exit(&vswp->mac_lock);
377
378	/* Initialize HybridIO related stuff */
379	vsw_hio_init(vswp);
380	return (0);
381
382exit_error:
383	vsw_mac_close(vswp);
384	mutex_exit(&vswp->mac_lock);
385	return (EIO);
386}
387
388static int
389vsw_setup_layer3(vsw_t *vswp)
390{
391	D1(vswp, "%s: enter", __func__);
392
393	D2(vswp, "%s: operating in layer 3 mode", __func__);
394	vswp->vsw_switch_frame = vsw_switch_l3_frame;
395
396	D1(vswp, "%s: exit", __func__);
397
398	return (0);
399}
400
401/*
402 * Switch the given ethernet frame when operating in layer 2 mode.
403 *
404 * vswp: pointer to the vsw instance
405 * mp: pointer to chain of ethernet frame(s) to be switched
406 * caller: identifies the source of this frame as:
407 * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
408 *		2. VSW_PHYSDEV - the physical ethernet device
409 *		3. VSW_LOCALDEV - vsw configured as a virtual interface
410 * arg: argument provided by the caller.
411 *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
412 *		2. for PHYSDEV - NULL
413 *		3. for LOCALDEV - pointer to to this vsw_t(self)
414 */
415void
416vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
417			vsw_port_t *arg, mac_resource_handle_t mrh)
418{
419	struct ether_header	*ehp;
420	mblk_t			*bp, *ret_m;
421	mblk_t			*mpt = NULL;
422	uint32_t		count;
423	vsw_fdbe_t		*fp;
424
425	D1(vswp, "%s: enter (caller %d)", __func__, caller);
426
427	/*
428	 * PERF: rather than breaking up the chain here, scan it
429	 * to find all mblks heading to same destination and then
430	 * pass that sub-chain to the lower transmit functions.
431	 */
432
433	/* process the chain of packets */
434	bp = mp;
435	while (bp) {
436		ehp = (struct ether_header *)bp->b_rptr;
437		count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp);
438		ASSERT(count != 0);
439
440		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
441		    __func__, MBLKSIZE(mp), MBLKL(mp));
442
443		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
444			/*
445			 * If destination is VSW_LOCALDEV (vsw as an eth
446			 * interface) and if the device is up & running,
447			 * send the packet up the stack on this host.
448			 * If the virtual interface is down, drop the packet.
449			 */
450			if (caller != VSW_LOCALDEV) {
451				vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG);
452			} else {
453				freemsgchain(mp);
454			}
455			continue;
456		}
457
458		/*
459		 * Find fdb entry for the destination
460		 * and hold a reference to it.
461		 */
462		fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
463		if (fp != NULL) {
464
465			/*
466			 * If plumbed and in promisc mode then copy msg
467			 * and send up the stack.
468			 */
469			vsw_mac_rx(vswp, mrh, mp,
470			    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
471
472			/*
473			 * If the destination is in FDB, the packet
474			 * should be forwarded to the correponding
475			 * vsw_port (connected to a vnet device -
476			 * VSW_VNETPORT)
477			 */
478			(void) vsw_portsend(fp->portp, mp, mpt, count);
479
480			/* Release the reference on the fdb entry */
481			VSW_FDBE_REFRELE(fp);
482		} else {
483			/*
484			 * Destination not in FDB.
485			 *
486			 * If the destination is broadcast or
487			 * multicast forward the packet to all
488			 * (VNETPORTs, PHYSDEV, LOCALDEV),
489			 * except the caller.
490			 */
491			if (IS_BROADCAST(ehp)) {
492				D2(vswp, "%s: BROADCAST pkt", __func__);
493				(void) vsw_forward_all(vswp, mp, caller, arg);
494			} else if (IS_MULTICAST(ehp)) {
495				D2(vswp, "%s: MULTICAST pkt", __func__);
496				(void) vsw_forward_grp(vswp, mp, caller, arg);
497			} else {
498				/*
499				 * If the destination is unicast, and came
500				 * from either a logical network device or
501				 * the switch itself when it is plumbed, then
502				 * send it out on the physical device and also
503				 * up the stack if the logical interface is
504				 * in promiscious mode.
505				 *
506				 * NOTE:  The assumption here is that if we
507				 * cannot find the destination in our fdb, its
508				 * a unicast address, and came from either a
509				 * vnet or down the stack (when plumbed) it
510				 * must be destinded for an ethernet device
511				 * outside our ldoms.
512				 */
513				if (caller == VSW_VNETPORT) {
514					/* promisc check copy etc */
515					vsw_mac_rx(vswp, mrh, mp,
516					    VSW_MACRX_PROMISC |
517					    VSW_MACRX_COPYMSG);
518
519					if ((ret_m = vsw_tx_msg(vswp, mp))
520					    != NULL) {
521						DERR(vswp, "%s: drop mblks to "
522						    "phys dev", __func__);
523						freemsgchain(ret_m);
524					}
525
526				} else if (caller == VSW_PHYSDEV) {
527					/*
528					 * Pkt seen because card in promisc
529					 * mode. Send up stack if plumbed in
530					 * promisc mode, else drop it.
531					 */
532					vsw_mac_rx(vswp, mrh, mp,
533					    VSW_MACRX_PROMISC |
534					    VSW_MACRX_FREEMSG);
535
536				} else if (caller == VSW_LOCALDEV) {
537					/*
538					 * Pkt came down the stack, send out
539					 * over physical device.
540					 */
541					if ((ret_m = vsw_tx_msg(vswp, mp))
542					    != NULL) {
543						DERR(vswp, "%s: drop mblks to "
544						    "phys dev", __func__);
545						freemsgchain(ret_m);
546					}
547				}
548			}
549		}
550	}
551	D1(vswp, "%s: exit\n", __func__);
552}
553
554/*
555 * Switch ethernet frame when in layer 3 mode (i.e. using IP
556 * layer to do the routing).
557 *
558 * There is a large amount of overlap between this function and
559 * vsw_switch_l2_frame. At some stage we need to revisit and refactor
560 * both these functions.
561 */
562void
563vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
564			vsw_port_t *arg, mac_resource_handle_t mrh)
565{
566	struct ether_header	*ehp;
567	mblk_t			*bp = NULL;
568	mblk_t			*mpt;
569	uint32_t		count;
570	vsw_fdbe_t		*fp;
571
572	D1(vswp, "%s: enter (caller %d)", __func__, caller);
573
574	/*
575	 * In layer 3 mode should only ever be switching packets
576	 * between IP layer and vnet devices. So make sure thats
577	 * who is invoking us.
578	 */
579	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
580		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
581		freemsgchain(mp);
582		return;
583	}
584
585	/* process the chain of packets */
586	bp = mp;
587	while (bp) {
588		ehp = (struct ether_header *)bp->b_rptr;
589		count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp);
590		ASSERT(count != 0);
591
592		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
593		    __func__, MBLKSIZE(mp), MBLKL(mp));
594
595		/*
596		 * Find fdb entry for the destination
597		 * and hold a reference to it.
598		 */
599		fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
600		if (fp != NULL) {
601
602			D2(vswp, "%s: sending to target port", __func__);
603			(void) vsw_portsend(fp->portp, mp, mpt, count);
604
605			/* Release the reference on the fdb entry */
606			VSW_FDBE_REFRELE(fp);
607		} else {
608			/*
609			 * Destination not in FDB
610			 *
611			 * If the destination is broadcast or
612			 * multicast forward the packet to all
613			 * (VNETPORTs, PHYSDEV, LOCALDEV),
614			 * except the caller.
615			 */
616			if (IS_BROADCAST(ehp)) {
617				D2(vswp, "%s: BROADCAST pkt", __func__);
618				(void) vsw_forward_all(vswp, mp, caller, arg);
619			} else if (IS_MULTICAST(ehp)) {
620				D2(vswp, "%s: MULTICAST pkt", __func__);
621				(void) vsw_forward_grp(vswp, mp, caller, arg);
622			} else {
623				/*
624				 * Unicast pkt from vnet that we don't have
625				 * an FDB entry for, so must be destinded for
626				 * the outside world. Attempt to send up to the
627				 * IP layer to allow it to deal with it.
628				 */
629				if (caller == VSW_VNETPORT) {
630					vsw_mac_rx(vswp, mrh,
631					    mp, VSW_MACRX_FREEMSG);
632				}
633			}
634		}
635	}
636
637	D1(vswp, "%s: exit", __func__);
638}
639
640/*
641 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
642 * except the caller (port on which frame arrived).
643 */
644static int
645vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
646{
647	vsw_port_list_t	*plist = &vswp->plist;
648	vsw_port_t	*portp;
649	mblk_t		*nmp = NULL;
650	mblk_t		*ret_m = NULL;
651	int		skip_port = 0;
652
653	D1(vswp, "vsw_forward_all: enter\n");
654
655	/*
656	 * Broadcast message from inside ldoms so send to outside
657	 * world if in either of layer 2 modes.
658	 */
659	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
660	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
661	    ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
662
663		nmp = vsw_dupmsgchain(mp);
664		if (nmp) {
665			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
666				DERR(vswp, "%s: dropping pkt(s) "
667				    "consisting of %ld bytes of data for"
668				    " physical device", __func__, MBLKL(ret_m));
669				freemsgchain(ret_m);
670			}
671		}
672	}
673
674	if (caller == VSW_VNETPORT)
675		skip_port = 1;
676
677	/*
678	 * Broadcast message from other vnet (layer 2 or 3) or outside
679	 * world (layer 2 only), send up stack if plumbed.
680	 */
681	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
682		vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG);
683	}
684
685	/* send it to all VNETPORTs */
686	READ_ENTER(&plist->lockrw);
687	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
688		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
689		/*
690		 * Caution ! - don't reorder these two checks as arg
691		 * will be NULL if the caller is PHYSDEV. skip_port is
692		 * only set if caller is VNETPORT.
693		 */
694		if ((skip_port) && (portp == arg)) {
695			continue;
696		} else {
697			nmp = vsw_dupmsgchain(mp);
698			if (nmp) {
699				mblk_t	*mpt = nmp;
700				uint32_t count = 1;
701
702				/* Find tail */
703				while (mpt->b_next != NULL) {
704					mpt = mpt->b_next;
705					count++;
706				}
707				/*
708				 * The plist->lockrw is protecting the
709				 * portp from getting destroyed here.
710				 * So, no ref_cnt is incremented here.
711				 */
712				(void) vsw_portsend(portp, nmp, mpt, count);
713			} else {
714				DERR(vswp, "vsw_forward_all: nmp NULL");
715			}
716		}
717	}
718	RW_EXIT(&plist->lockrw);
719
720	freemsgchain(mp);
721
722	D1(vswp, "vsw_forward_all: exit\n");
723	return (0);
724}
725
726/*
727 * Forward pkts to any devices or interfaces which have registered
728 * an interest in them (i.e. multicast groups).
729 */
730static int
731vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
732{
733	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
734	mfdb_ent_t		*entp = NULL;
735	mfdb_ent_t		*tpp = NULL;
736	vsw_port_t 		*port;
737	uint64_t		key = 0;
738	mblk_t			*nmp = NULL;
739	mblk_t			*ret_m = NULL;
740	boolean_t		check_if = B_TRUE;
741
742	/*
743	 * Convert address to hash table key
744	 */
745	KEY_HASH(key, &ehp->ether_dhost);
746
747	D1(vswp, "%s: key 0x%llx", __func__, key);
748
749	/*
750	 * If pkt came from either a vnet or down the stack (if we are
751	 * plumbed) and we are in layer 2 mode, then we send the pkt out
752	 * over the physical adapter, and then check to see if any other
753	 * vnets are interested in it.
754	 */
755	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
756	    (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
757	    ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
758		nmp = vsw_dupmsgchain(mp);
759		if (nmp) {
760			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
761				DERR(vswp, "%s: dropping pkt(s) consisting of "
762				    "%ld bytes of data for physical device",
763				    __func__, MBLKL(ret_m));
764				freemsgchain(ret_m);
765			}
766		}
767	}
768
769	READ_ENTER(&vswp->mfdbrw);
770	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
771	    (mod_hash_val_t *)&entp) != 0) {
772		D3(vswp, "%s: no table entry found for addr 0x%llx",
773		    __func__, key);
774	} else {
775		/*
776		 * Send to list of devices associated with this address...
777		 */
778		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
779
780			/* dont send to ourselves */
781			if ((caller == VSW_VNETPORT) &&
782			    (tpp->d_addr == (void *)arg)) {
783				port = (vsw_port_t *)tpp->d_addr;
784				D3(vswp, "%s: not sending to ourselves"
785				    " : port %d", __func__, port->p_instance);
786				continue;
787
788			} else if ((caller == VSW_LOCALDEV) &&
789			    (tpp->d_type == VSW_LOCALDEV)) {
790				D2(vswp, "%s: not sending back up stack",
791				    __func__);
792				continue;
793			}
794
795			if (tpp->d_type == VSW_VNETPORT) {
796				port = (vsw_port_t *)tpp->d_addr;
797				D3(vswp, "%s: sending to port %ld for addr "
798				    "0x%llx", __func__, port->p_instance, key);
799
800				nmp = vsw_dupmsgchain(mp);
801				if (nmp) {
802					mblk_t	*mpt = nmp;
803					uint32_t count = 1;
804
805					/* Find tail */
806					while (mpt->b_next != NULL) {
807						mpt = mpt->b_next;
808						count++;
809					}
810					/*
811					 * The vswp->mfdbrw is protecting the
812					 * portp from getting destroyed here.
813					 * So, no ref_cnt is incremented here.
814					 */
815					(void) vsw_portsend(port, nmp, mpt,
816					    count);
817				}
818			} else {
819				vsw_mac_rx(vswp, NULL,
820				    mp, VSW_MACRX_COPYMSG);
821				D2(vswp, "%s: sending up stack"
822				    " for addr 0x%llx", __func__, key);
823				check_if = B_FALSE;
824			}
825		}
826	}
827
828	RW_EXIT(&vswp->mfdbrw);
829
830	/*
831	 * If the pkt came from either a vnet or from physical device,
832	 * and if we havent already sent the pkt up the stack then we
833	 * check now if we can/should (i.e. the interface is plumbed
834	 * and in promisc mode).
835	 */
836	if ((check_if) &&
837	    ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
838		vsw_mac_rx(vswp, NULL, mp,
839		    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
840	}
841
842	freemsgchain(mp);
843
844	D1(vswp, "%s: exit", __func__);
845
846	return (0);
847}
848
849/*
850 * This function creates the vlan id hash table for the given vsw device or
851 * port. It then adds each vlan that the device or port has been assigned,
852 * into this hash table.
853 * Arguments:
854 *   arg:  vsw device or port.
855 *   type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
856 */
857void
858vsw_create_vlans(void *arg, int type)
859{
860	/* create vlan hash table */
861	vsw_vlan_create_hash(arg, type);
862
863	/* add vlan ids of the vsw device into its hash table */
864	vsw_vlan_add_ids(arg, type);
865}
866
867/*
868 * This function removes the vlan ids of the vsw device or port from its hash
869 * table. It then destroys the vlan hash table.
870 * Arguments:
871 *   arg:  vsw device or port.
872 *   type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
873 */
874void
875vsw_destroy_vlans(void *arg, int type)
876{
877	/* remove vlan ids from the hash table */
878	vsw_vlan_remove_ids(arg, type);
879
880	/* destroy vlan-hash-table */
881	vsw_vlan_destroy_hash(arg, type);
882}
883
884/*
885 * Create a vlan-id hash table for the given vsw device or port.
886 */
887static void
888vsw_vlan_create_hash(void *arg, int type)
889{
890	char		hashname[MAXNAMELEN];
891
892	if (type == VSW_LOCALDEV) {
893		vsw_t		*vswp = (vsw_t *)arg;
894
895		(void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash",
896		    vswp->instance);
897
898		vswp->vlan_nchains = vsw_vlan_nchains;
899		vswp->vlan_hashp = mod_hash_create_idhash(hashname,
900		    vswp->vlan_nchains, mod_hash_null_valdtor);
901
902	} else if (type == VSW_VNETPORT) {
903		vsw_port_t	*portp = (vsw_port_t *)arg;
904
905		(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
906		    portp->p_instance);
907
908		portp->vlan_nchains = vsw_vlan_nchains;
909		portp->vlan_hashp = mod_hash_create_idhash(hashname,
910		    portp->vlan_nchains, mod_hash_null_valdtor);
911
912	} else {
913		return;
914	}
915}
916
917/*
918 * Destroy the vlan-id hash table for the given vsw device or port.
919 */
920static void
921vsw_vlan_destroy_hash(void *arg, int type)
922{
923	if (type == VSW_LOCALDEV) {
924		vsw_t		*vswp = (vsw_t *)arg;
925
926		mod_hash_destroy_hash(vswp->vlan_hashp);
927		vswp->vlan_nchains = 0;
928	} else if (type == VSW_VNETPORT) {
929		vsw_port_t	*portp = (vsw_port_t *)arg;
930
931		mod_hash_destroy_hash(portp->vlan_hashp);
932		portp->vlan_nchains = 0;
933	} else {
934		return;
935	}
936}
937
938/*
939 * Add vlan ids of the given vsw device or port into its hash table.
940 */
941void
942vsw_vlan_add_ids(void *arg, int type)
943{
944	int	rv;
945	int	i;
946
947	if (type == VSW_LOCALDEV) {
948		vsw_t		*vswp = (vsw_t *)arg;
949
950		rv = mod_hash_insert(vswp->vlan_hashp,
951		    (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
952		    (mod_hash_val_t)B_TRUE);
953		ASSERT(rv == 0);
954
955		for (i = 0; i < vswp->nvids; i++) {
956			rv = mod_hash_insert(vswp->vlan_hashp,
957			    (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]),
958			    (mod_hash_val_t)B_TRUE);
959			ASSERT(rv == 0);
960		}
961
962	} else if (type == VSW_VNETPORT) {
963		vsw_port_t	*portp = (vsw_port_t *)arg;
964
965		rv = mod_hash_insert(portp->vlan_hashp,
966		    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
967		    (mod_hash_val_t)B_TRUE);
968		ASSERT(rv == 0);
969
970		for (i = 0; i < portp->nvids; i++) {
971			rv = mod_hash_insert(portp->vlan_hashp,
972			    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
973			    (mod_hash_val_t)B_TRUE);
974			ASSERT(rv == 0);
975		}
976
977	} else {
978		return;
979	}
980}
981
982/*
983 * Remove vlan ids of the given vsw device or port from its hash table.
984 */
985void
986vsw_vlan_remove_ids(void *arg, int type)
987{
988	mod_hash_val_t	vp;
989	int		rv;
990	int		i;
991
992	if (type == VSW_LOCALDEV) {
993		vsw_t		*vswp = (vsw_t *)arg;
994
995		rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid);
996		if (rv == B_TRUE) {
997			rv = mod_hash_remove(vswp->vlan_hashp,
998			    (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
999			    (mod_hash_val_t *)&vp);
1000			ASSERT(rv == 0);
1001		}
1002
1003		for (i = 0; i < vswp->nvids; i++) {
1004			rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->vids[i]);
1005			if (rv == B_TRUE) {
1006				rv = mod_hash_remove(vswp->vlan_hashp,
1007				    (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]),
1008				    (mod_hash_val_t *)&vp);
1009				ASSERT(rv == 0);
1010			}
1011		}
1012
1013	} else if (type == VSW_VNETPORT) {
1014		vsw_port_t	*portp = (vsw_port_t *)arg;
1015
1016		portp = (vsw_port_t *)arg;
1017		rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid);
1018		if (rv == B_TRUE) {
1019			rv = mod_hash_remove(portp->vlan_hashp,
1020			    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
1021			    (mod_hash_val_t *)&vp);
1022			ASSERT(rv == 0);
1023		}
1024
1025		for (i = 0; i < portp->nvids; i++) {
1026			rv = vsw_vlan_lookup(portp->vlan_hashp, portp->vids[i]);
1027			if (rv == B_TRUE) {
1028				rv = mod_hash_remove(portp->vlan_hashp,
1029				    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]),
1030				    (mod_hash_val_t *)&vp);
1031				ASSERT(rv == 0);
1032			}
1033		}
1034
1035	} else {
1036		return;
1037	}
1038}
1039
1040/*
1041 * Find the given vlan id in the hash table.
1042 * Return: B_TRUE if the id is found; B_FALSE if not found.
1043 */
1044boolean_t
1045vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
1046{
1047	int		rv;
1048	mod_hash_val_t	vp;
1049
1050	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
1051
1052	if (rv != 0)
1053		return (B_FALSE);
1054
1055	return (B_TRUE);
1056}
1057
1058/*
1059 * Add an entry into FDB for the given vsw.
1060 */
1061void
1062vsw_fdbe_add(vsw_t *vswp, void *port)
1063{
1064	uint64_t	addr = 0;
1065	vsw_port_t	*portp;
1066	vsw_fdbe_t	*fp;
1067	int		rv;
1068
1069	portp = (vsw_port_t *)port;
1070	KEY_HASH(addr, &portp->p_macaddr);
1071
1072	fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP);
1073	fp->portp = port;
1074
1075	/*
1076	 * Note: duplicate keys will be rejected by mod_hash.
1077	 */
1078	rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr,
1079	    (mod_hash_val_t)fp);
1080	ASSERT(rv == 0);
1081}
1082
1083/*
1084 * Remove an entry from FDB.
1085 */
1086void
1087vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr)
1088{
1089	uint64_t	addr = 0;
1090	vsw_fdbe_t	*fp;
1091	int		rv;
1092
1093	KEY_HASH(addr, eaddr);
1094
1095	/*
1096	 * Remove the entry from fdb hash table.
1097	 * This prevents further references to this fdb entry.
1098	 */
1099	rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr,
1100	    (mod_hash_val_t *)&fp);
1101	if (rv != 0) {
1102		/* invalid key? */
1103		return;
1104	}
1105
1106	/*
1107	 * If there are threads already ref holding before the entry was
1108	 * removed from hash table, then wait for ref count to drop to zero.
1109	 */
1110	while (fp->refcnt != 0) {
1111		delay(drv_usectohz(vsw_fdbe_refcnt_delay));
1112	}
1113
1114	kmem_free(fp, sizeof (*fp));
1115}
1116
1117/*
1118 * Search fdb for a given mac address. If an entry is found, hold
1119 * a reference to it and return the entry, else returns NULL.
1120 */
1121static vsw_fdbe_t *
1122vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp)
1123{
1124	uint64_t	key = 0;
1125	vsw_fdbe_t	*fp;
1126	int		rv;
1127
1128	KEY_HASH(key, addrp);
1129
1130	rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key,
1131	    (mod_hash_val_t *)&fp, vsw_fdbe_find_cb);
1132
1133	if (rv != 0)
1134		return (NULL);
1135
1136	return (fp);
1137}
1138
1139/*
1140 * Callback function provided to mod_hash_find_cb(). After finding the fdb
1141 * entry corresponding to the key (macaddr), this callback will be invoked by
1142 * mod_hash_find_cb() to atomically increment the reference count on the fdb
1143 * entry before returning the found entry.
1144 */
1145static void
1146vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
1147{
1148	_NOTE(ARGUNUSED(key))
1149	VSW_FDBE_REFHOLD((vsw_fdbe_t *)val);
1150}
1151
1152/*
1153 * A given frame must be always tagged with the appropriate vlan id (unless it
1154 * is in the default-vlan) before the mac address switching function is called.
1155 * Otherwise, after switching function determines the destination, we cannot
1156 * figure out if the destination belongs to the the same vlan that the frame
1157 * originated from and if it needs tag/untag. Frames which are inbound from
1158 * the external(physical) network over a vlan trunk link are always tagged.
1159 * However frames which are received from a vnet-port over ldc or frames which
1160 * are coming down the stack on the service domain over vsw interface may be
1161 * untagged. These frames must be tagged with the appropriate pvid of the
1162 * sender (vnet-port or vsw device), before invoking the switching function.
1163 *
1164 * Arguments:
1165 *   arg:    caller of the function.
1166 *   type:   type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
1167 *   mp:     frame(s) to be tagged.
1168 */
1169mblk_t *
1170vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp)
1171{
1172	vsw_t			*vswp;
1173	vsw_port_t		*portp;
1174	struct ether_header	*ehp;
1175	mblk_t			*bp;
1176	mblk_t			*bpt;
1177	mblk_t			*bph;
1178	mblk_t			*bpn;
1179	uint16_t		pvid;
1180
1181	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
1182
1183	if (type == VSW_LOCALDEV) {
1184		vswp = (vsw_t *)arg;
1185		pvid = vswp->pvid;
1186		portp = NULL;
1187	} else {
1188		/* VSW_VNETPORT */
1189		portp = (vsw_port_t *)arg;
1190		pvid = portp->pvid;
1191		vswp = portp->p_vswp;
1192	}
1193
1194	bpn = bph = bpt = NULL;
1195
1196	for (bp = mp; bp != NULL; bp = bpn) {
1197
1198		bpn = bp->b_next;
1199		bp->b_next = bp->b_prev = NULL;
1200
1201		/* Determine if it is an untagged frame */
1202		ehp = (struct ether_header *)bp->b_rptr;
1203
1204		if (ehp->ether_type != ETHERTYPE_VLAN) {	/* untagged */
1205
1206			/* no need to tag if the frame is in default vlan */
1207			if (pvid != vswp->default_vlan_id) {
1208				bp = vnet_vlan_insert_tag(bp, pvid);
1209				if (bp == NULL) {
1210					continue;
1211				}
1212			}
1213		}
1214
1215		/* build a chain of processed packets */
1216		if (bph == NULL) {
1217			bph = bpt = bp;
1218		} else {
1219			bpt->b_next = bp;
1220			bpt = bp;
1221		}
1222
1223	}
1224
1225	return (bph);
1226}
1227
1228/*
1229 * Frames destined to a vnet-port or to the local vsw interface, must be
1230 * untagged if necessary before sending. This function first checks that the
1231 * frame can be sent to the destination in the vlan identified by the frame
1232 * tag. Note that when this function is invoked the frame must have been
1233 * already tagged (unless it is in the default-vlan). Because, this function is
1234 * called when the switching function determines the destination and invokes
1235 * its send function (vnet-port or vsw interface) and all frames would have
1236 * been tagged by this time (see comments in vsw_vlan_frame_pretag()).
1237 *
1238 * Arguments:
1239 *   arg:    destination device.
1240 *   type:   type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
1241 *   np:     head of pkt chain to be validated and untagged.
1242 *   npt:    tail of pkt chain to be validated and untagged.
1243 *
1244 * Returns:
1245 *   np:     head of updated chain of packets
1246 *   npt:    tail of updated chain of packets
1247 *   rv:     count of any packets dropped
1248 */
1249uint32_t
1250vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt)
1251{
1252	mblk_t			*bp;
1253	mblk_t			*bpt;
1254	mblk_t			*bph;
1255	mblk_t			*bpn;
1256	vsw_port_t		*portp;
1257	vsw_t			*vswp;
1258	uint32_t		count;
1259	struct ether_header	*ehp;
1260	boolean_t		is_tagged;
1261	boolean_t		rv;
1262	uint16_t		vlan_id;
1263	uint16_t		pvid;
1264	mod_hash_t		*vlan_hashp;
1265
1266	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
1267
1268	if (type == VSW_LOCALDEV) {
1269		vswp = (vsw_t *)arg;
1270		pvid = vswp->pvid;
1271		vlan_hashp = vswp->vlan_hashp;
1272		portp = NULL;
1273	} else {
1274		/* type == VSW_VNETPORT */
1275		portp = (vsw_port_t *)arg;
1276		vswp = portp->p_vswp;
1277		vlan_hashp = portp->vlan_hashp;
1278		pvid = portp->pvid;
1279	}
1280
1281	bpn = bph = bpt = NULL;
1282	count = 0;
1283
1284	for (bp = *np; bp != NULL; bp = bpn) {
1285
1286		bpn = bp->b_next;
1287		bp->b_next = bp->b_prev = NULL;
1288
1289		/*
1290		 * Determine the vlan id that the frame belongs to.
1291		 */
1292		ehp = (struct ether_header *)bp->b_rptr;
1293		is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id);
1294
1295		/*
1296		 * Check if the destination is in the same vlan.
1297		 */
1298		rv = vsw_vlan_lookup(vlan_hashp, vlan_id);
1299		if (rv == B_FALSE) {
1300			/* drop the packet */
1301			freemsg(bp);
1302			count++;
1303			continue;
1304		}
1305
1306		/*
1307		 * Check the frame header if tag/untag is  needed.
1308		 */
1309		if (is_tagged == B_FALSE) {
1310			/*
1311			 * Untagged frame. We shouldn't have an untagged
1312			 * packet at this point, unless the destination's
1313			 * vlan id is default-vlan-id; if it is not the
1314			 * default-vlan-id, we drop the packet.
1315			 */
1316			if (vlan_id != vswp->default_vlan_id) {
1317				/* drop the packet */
1318				freemsg(bp);
1319				count++;
1320				continue;
1321			}
1322		} else {
1323			/*
1324			 * Tagged frame, untag if it's the destination's pvid.
1325			 */
1326			if (vlan_id == pvid) {
1327
1328				bp = vnet_vlan_remove_tag(bp);
1329				if (bp == NULL) {
1330					/* packet dropped */
1331					count++;
1332					continue;
1333				}
1334			}
1335		}
1336
1337		/* build a chain of processed packets */
1338		if (bph == NULL) {
1339			bph = bpt = bp;
1340		} else {
1341			bpt->b_next = bp;
1342			bpt = bp;
1343		}
1344
1345	}
1346
1347	*np = bph;
1348	*npt = bpt;
1349
1350	return (count);
1351}
1352
1353/*
1354 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame,
1355 * then the vlan-id is available in the tag; otherwise, its vlan id is
1356 * implicitly obtained based on the caller (destination of the frame:
1357 * VSW_VNETPORT or VSW_LOCALDEV).
1358 * The vlan id determined is returned in vidp.
1359 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
1360 */
1361boolean_t
1362vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
1363	uint16_t *vidp)
1364{
1365	struct ether_vlan_header	*evhp;
1366	vsw_t				*vswp;
1367	vsw_port_t			*portp;
1368
1369	/* If it's a tagged frame, get the vid from vlan header */
1370	if (ehp->ether_type == ETHERTYPE_VLAN) {
1371
1372		evhp = (struct ether_vlan_header *)ehp;
1373		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
1374		return (B_TRUE);
1375	}
1376
1377	/* Untagged frame; determine vlan id based on caller */
1378	switch (caller) {
1379
1380	case VSW_VNETPORT:
1381		/*
1382		 * packet destined to a vnet; vlan-id is pvid of vnet-port.
1383		 */
1384		portp = (vsw_port_t *)arg;
1385		*vidp = portp->pvid;
1386		break;
1387
1388	case VSW_LOCALDEV:
1389
1390		/*
1391		 * packet destined to vsw interface;
1392		 * vlan-id is port-vlan-id of vsw device.
1393		 */
1394		vswp = (vsw_t *)arg;
1395		*vidp = vswp->pvid;
1396		break;
1397	}
1398
1399	return (B_FALSE);
1400}
1401
1402/*
1403 * Add or remove multicast address(es).
1404 *
1405 * Returns 0 on success, 1 on failure.
1406 */
1407int
1408vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
1409{
1410	mcst_addr_t		*mcst_p = NULL;
1411	vsw_t			*vswp = port->p_vswp;
1412	uint64_t		addr = 0x0;
1413	int			i;
1414
1415	D1(vswp, "%s: enter", __func__);
1416
1417	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
1418
1419	for (i = 0; i < mcst_pkt->count; i++) {
1420		/*
1421		 * Convert address into form that can be used
1422		 * as hash table key.
1423		 */
1424		KEY_HASH(addr, &(mcst_pkt->mca[i]));
1425
1426		/*
1427		 * Add or delete the specified address/port combination.
1428		 */
1429		if (mcst_pkt->set == 0x1) {
1430			D3(vswp, "%s: adding multicast address 0x%llx for "
1431			    "port %ld", __func__, addr, port->p_instance);
1432			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1433				/*
1434				 * Update the list of multicast
1435				 * addresses contained within the
1436				 * port structure to include this new
1437				 * one.
1438				 */
1439				mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
1440				    KM_NOSLEEP);
1441				if (mcst_p == NULL) {
1442					DERR(vswp, "%s: unable to alloc mem",
1443					    __func__);
1444					(void) vsw_del_mcst(vswp,
1445					    VSW_VNETPORT, addr, port);
1446					return (1);
1447				}
1448
1449				mcst_p->nextp = NULL;
1450				mcst_p->addr = addr;
1451				ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
1452
1453				/*
1454				 * Program the address into HW. If the addr
1455				 * has already been programmed then the MAC
1456				 * just increments a ref counter (which is
1457				 * used when the address is being deleted)
1458				 */
1459				mutex_enter(&vswp->mac_lock);
1460				if (vswp->mh != NULL) {
1461					if (mac_multicst_add(vswp->mh,
1462					    (uchar_t *)&mcst_pkt->mca[i])) {
1463						mutex_exit(&vswp->mac_lock);
1464						cmn_err(CE_WARN, "!vsw%d: "
1465						    "unable to add multicast "
1466						    "address: %s\n",
1467						    vswp->instance,
1468						    ether_sprintf((void *)
1469						    &mcst_p->mca));
1470						(void) vsw_del_mcst(vswp,
1471						    VSW_VNETPORT, addr, port);
1472						kmem_free(mcst_p,
1473						    sizeof (*mcst_p));
1474						return (1);
1475					}
1476					mcst_p->mac_added = B_TRUE;
1477				}
1478				mutex_exit(&vswp->mac_lock);
1479
1480				mutex_enter(&port->mca_lock);
1481				mcst_p->nextp = port->mcap;
1482				port->mcap = mcst_p;
1483				mutex_exit(&port->mca_lock);
1484
1485			} else {
1486				DERR(vswp, "%s: error adding multicast "
1487				    "address 0x%llx for port %ld",
1488				    __func__, addr, port->p_instance);
1489				return (1);
1490			}
1491		} else {
1492			/*
1493			 * Delete an entry from the multicast hash
1494			 * table and update the address list
1495			 * appropriately.
1496			 */
1497			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
1498				D3(vswp, "%s: deleting multicast address "
1499				    "0x%llx for port %ld", __func__, addr,
1500				    port->p_instance);
1501
1502				mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
1503				ASSERT(mcst_p != NULL);
1504
1505				/*
1506				 * Remove the address from HW. The address
1507				 * will actually only be removed once the ref
1508				 * count within the MAC layer has dropped to
1509				 * zero. I.e. we can safely call this fn even
1510				 * if other ports are interested in this
1511				 * address.
1512				 */
1513				mutex_enter(&vswp->mac_lock);
1514				if (vswp->mh != NULL && mcst_p->mac_added) {
1515					if (mac_multicst_remove(vswp->mh,
1516					    (uchar_t *)&mcst_pkt->mca[i])) {
1517						mutex_exit(&vswp->mac_lock);
1518						cmn_err(CE_WARN, "!vsw%d: "
1519						    "unable to remove mcast "
1520						    "address: %s\n",
1521						    vswp->instance,
1522						    ether_sprintf((void *)
1523						    &mcst_p->mca));
1524						kmem_free(mcst_p,
1525						    sizeof (*mcst_p));
1526						return (1);
1527					}
1528					mcst_p->mac_added = B_FALSE;
1529				}
1530				mutex_exit(&vswp->mac_lock);
1531				kmem_free(mcst_p, sizeof (*mcst_p));
1532
1533			} else {
1534				DERR(vswp, "%s: error deleting multicast "
1535				    "addr 0x%llx for port %ld",
1536				    __func__, addr, port->p_instance);
1537				return (1);
1538			}
1539		}
1540	}
1541	D1(vswp, "%s: exit", __func__);
1542	return (0);
1543}
1544
1545/*
1546 * Add a new multicast entry.
1547 *
1548 * Search hash table based on address. If match found then
1549 * update associated val (which is chain of ports), otherwise
1550 * create new key/val (addr/port) pair and insert into table.
1551 */
1552int
1553vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1554{
1555	int		dup = 0;
1556	int		rv = 0;
1557	mfdb_ent_t	*ment = NULL;
1558	mfdb_ent_t	*tmp_ent = NULL;
1559	mfdb_ent_t	*new_ent = NULL;
1560	void		*tgt = NULL;
1561
1562	if (devtype == VSW_VNETPORT) {
1563		/*
1564		 * Being invoked from a vnet.
1565		 */
1566		ASSERT(arg != NULL);
1567		tgt = arg;
1568		D2(NULL, "%s: port %d : address 0x%llx", __func__,
1569		    ((vsw_port_t *)arg)->p_instance, addr);
1570	} else {
1571		/*
1572		 * We are being invoked via the m_multicst mac entry
1573		 * point.
1574		 */
1575		D2(NULL, "%s: address 0x%llx", __func__, addr);
1576		tgt = (void *)vswp;
1577	}
1578
1579	WRITE_ENTER(&vswp->mfdbrw);
1580	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1581	    (mod_hash_val_t *)&ment) != 0) {
1582
1583		/* address not currently in table */
1584		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1585		ment->d_addr = (void *)tgt;
1586		ment->d_type = devtype;
1587		ment->nextp = NULL;
1588
1589		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
1590		    (mod_hash_val_t)ment) != 0) {
1591			DERR(vswp, "%s: hash table insertion failed", __func__);
1592			kmem_free(ment, sizeof (mfdb_ent_t));
1593			rv = 1;
1594		} else {
1595			D2(vswp, "%s: added initial entry for 0x%llx to "
1596			    "table", __func__, addr);
1597		}
1598	} else {
1599		/*
1600		 * Address in table. Check to see if specified port
1601		 * is already associated with the address. If not add
1602		 * it now.
1603		 */
1604		tmp_ent = ment;
1605		while (tmp_ent != NULL) {
1606			if (tmp_ent->d_addr == (void *)tgt) {
1607				if (devtype == VSW_VNETPORT) {
1608					DERR(vswp, "%s: duplicate port entry "
1609					    "found for portid %ld and key "
1610					    "0x%llx", __func__,
1611					    ((vsw_port_t *)arg)->p_instance,
1612					    addr);
1613				} else {
1614					DERR(vswp, "%s: duplicate entry found"
1615					    "for key 0x%llx", __func__, addr);
1616				}
1617				rv = 1;
1618				dup = 1;
1619				break;
1620			}
1621			tmp_ent = tmp_ent->nextp;
1622		}
1623
1624		/*
1625		 * Port not on list so add it to end now.
1626		 */
1627		if (0 == dup) {
1628			D2(vswp, "%s: added entry for 0x%llx to table",
1629			    __func__, addr);
1630			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
1631			new_ent->d_addr = (void *)tgt;
1632			new_ent->d_type = devtype;
1633			new_ent->nextp = NULL;
1634
1635			tmp_ent = ment;
1636			while (tmp_ent->nextp != NULL)
1637				tmp_ent = tmp_ent->nextp;
1638
1639			tmp_ent->nextp = new_ent;
1640		}
1641	}
1642
1643	RW_EXIT(&vswp->mfdbrw);
1644	return (rv);
1645}
1646
1647/*
1648 * Remove a multicast entry from the hashtable.
1649 *
1650 * Search hash table based on address. If match found, scan
1651 * list of ports associated with address. If specified port
1652 * found remove it from list.
1653 */
1654int
1655vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
1656{
1657	mfdb_ent_t	*ment = NULL;
1658	mfdb_ent_t	*curr_p, *prev_p;
1659	void		*tgt = NULL;
1660
1661	D1(vswp, "%s: enter", __func__);
1662
1663	if (devtype == VSW_VNETPORT) {
1664		tgt = (vsw_port_t *)arg;
1665		D2(vswp, "%s: removing port %d from mFDB for address"
1666		    " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
1667	} else {
1668		D2(vswp, "%s: removing entry", __func__);
1669		tgt = (void *)vswp;
1670	}
1671
1672	WRITE_ENTER(&vswp->mfdbrw);
1673	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
1674	    (mod_hash_val_t *)&ment) != 0) {
1675		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
1676		RW_EXIT(&vswp->mfdbrw);
1677		return (1);
1678	}
1679
1680	prev_p = curr_p = ment;
1681
1682	while (curr_p != NULL) {
1683		if (curr_p->d_addr == (void *)tgt) {
1684			if (devtype == VSW_VNETPORT) {
1685				D2(vswp, "%s: port %d found", __func__,
1686				    ((vsw_port_t *)tgt)->p_instance);
1687			} else {
1688				D2(vswp, "%s: instance found", __func__);
1689			}
1690
1691			if (prev_p == curr_p) {
1692				/*
1693				 * head of list, if no other element is in
1694				 * list then destroy this entry, otherwise
1695				 * just replace it with updated value.
1696				 */
1697				ment = curr_p->nextp;
1698				if (ment == NULL) {
1699					(void) mod_hash_destroy(vswp->mfdb,
1700					    (mod_hash_val_t)addr);
1701				} else {
1702					(void) mod_hash_replace(vswp->mfdb,
1703					    (mod_hash_key_t)addr,
1704					    (mod_hash_val_t)ment);
1705				}
1706			} else {
1707				/*
1708				 * Not head of list, no need to do
1709				 * replacement, just adjust list pointers.
1710				 */
1711				prev_p->nextp = curr_p->nextp;
1712			}
1713			break;
1714		}
1715
1716		prev_p = curr_p;
1717		curr_p = curr_p->nextp;
1718	}
1719
1720	RW_EXIT(&vswp->mfdbrw);
1721
1722	D1(vswp, "%s: exit", __func__);
1723
1724	if (curr_p == NULL)
1725		return (1);
1726	kmem_free(curr_p, sizeof (mfdb_ent_t));
1727	return (0);
1728}
1729
1730/*
1731 * Port is being deleted, but has registered an interest in one
1732 * or more multicast groups. Using the list of addresses maintained
1733 * within the port structure find the appropriate entry in the hash
1734 * table and remove this port from the list of interested ports.
1735 */
1736void
1737vsw_del_mcst_port(vsw_port_t *port)
1738{
1739	mcst_addr_t	*mcap = NULL;
1740	vsw_t		*vswp = port->p_vswp;
1741
1742	D1(vswp, "%s: enter", __func__);
1743
1744	mutex_enter(&port->mca_lock);
1745
1746	while ((mcap = port->mcap) != NULL) {
1747
1748		port->mcap = mcap->nextp;
1749
1750		mutex_exit(&port->mca_lock);
1751
1752		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
1753		    mcap->addr, port);
1754
1755		/*
1756		 * Remove the address from HW. The address
1757		 * will actually only be removed once the ref
1758		 * count within the MAC layer has dropped to
1759		 * zero. I.e. we can safely call this fn even
1760		 * if other ports are interested in this
1761		 * address.
1762		 */
1763		mutex_enter(&vswp->mac_lock);
1764		if (vswp->mh != NULL && mcap->mac_added) {
1765			(void) mac_multicst_remove(vswp->mh,
1766			    (uchar_t *)&mcap->mca);
1767		}
1768		mutex_exit(&vswp->mac_lock);
1769
1770		kmem_free(mcap, sizeof (*mcap));
1771
1772		mutex_enter(&port->mca_lock);
1773
1774	}
1775
1776	mutex_exit(&port->mca_lock);
1777
1778	D1(vswp, "%s: exit", __func__);
1779}
1780
1781/*
1782 * This vsw instance is detaching, but has registered an interest in one
1783 * or more multicast groups. Using the list of addresses maintained
1784 * within the vsw structure find the appropriate entry in the hash
1785 * table and remove this instance from the list of interested ports.
1786 */
1787void
1788vsw_del_mcst_vsw(vsw_t *vswp)
1789{
1790	mcst_addr_t	*next_p = NULL;
1791
1792	D1(vswp, "%s: enter", __func__);
1793
1794	mutex_enter(&vswp->mca_lock);
1795
1796	while (vswp->mcap != NULL) {
1797		DERR(vswp, "%s: deleting addr 0x%llx",
1798		    __func__, vswp->mcap->addr);
1799		(void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
1800
1801		next_p = vswp->mcap->nextp;
1802		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
1803		vswp->mcap = next_p;
1804	}
1805
1806	vswp->mcap = NULL;
1807	mutex_exit(&vswp->mca_lock);
1808
1809	D1(vswp, "%s: exit", __func__);
1810}
1811
1812static uint32_t
1813vsw_get_same_dest_list(struct ether_header *ehp,
1814    mblk_t **rhead, mblk_t **rtail, mblk_t **mpp)
1815{
1816	uint32_t		count = 0;
1817	mblk_t			*bp;
1818	mblk_t			*nbp;
1819	mblk_t			*head = NULL;
1820	mblk_t			*tail = NULL;
1821	mblk_t			*prev = NULL;
1822	struct ether_header	*behp;
1823
1824	/* process the chain of packets */
1825	bp = *mpp;
1826	while (bp) {
1827		nbp = bp->b_next;
1828		behp = (struct ether_header *)bp->b_rptr;
1829		bp->b_prev = NULL;
1830		if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) {
1831			if (prev == NULL) {
1832				*mpp = nbp;
1833			} else {
1834				prev->b_next = nbp;
1835			}
1836			bp->b_next =  NULL;
1837			if (head == NULL) {
1838				head = tail = bp;
1839			} else {
1840				tail->b_next = bp;
1841				tail = bp;
1842			}
1843			count++;
1844		} else {
1845			prev = bp;
1846		}
1847		bp = nbp;
1848	}
1849	*rhead = head;
1850	*rtail = tail;
1851	DTRACE_PROBE1(vsw_same_dest, int, count);
1852	return (count);
1853}
1854
1855static mblk_t *
1856vsw_dupmsgchain(mblk_t *mp)
1857{
1858	mblk_t	*nmp = NULL;
1859	mblk_t	**nmpp = &nmp;
1860
1861	for (; mp != NULL; mp = mp->b_next) {
1862		if ((*nmpp = dupmsg(mp)) == NULL) {
1863			freemsgchain(nmp);
1864			return (NULL);
1865		}
1866
1867		nmpp = &((*nmpp)->b_next);
1868	}
1869
1870	return (nmp);
1871}
1872