1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26#include <sys/types.h>
27#include <sys/kmem.h>
28#include <sys/conf.h>
29#include <sys/ddi.h>
30#include <sys/sunddi.h>
31#include <sys/ksynch.h>
32
33#include <sys/ib/clients/eoib/eib_impl.h>
34
35/*
36 * Declarations private to this file
37 */
38static int eib_vnic_get_instance(eib_t *, int *);
39static void eib_vnic_ret_instance(eib_t *, int);
40static void eib_vnic_modify_enter(eib_t *, uint_t);
41static void eib_vnic_modify_exit(eib_t *, uint_t);
42static int eib_vnic_create_common(eib_t *, eib_vnic_t *, int *);
43static int eib_vnic_set_partition(eib_t *, eib_vnic_t *, int *);
44static void eib_vnic_make_vhub_mgid(uint8_t *, uint8_t, uint8_t *, uint8_t,
45    uint8_t, uint32_t, ib_gid_t *);
46static int eib_vnic_attach_ctl_mcgs(eib_t *, eib_vnic_t *, int *);
47static int eib_vnic_attach_vhub_table(eib_t *, eib_vnic_t *);
48static int eib_vnic_attach_vhub_update(eib_t *, eib_vnic_t *);
49static void eib_vnic_start_keepalives(eib_t *, eib_vnic_t *);
50static int eib_vnic_lookup_dest(eib_vnic_t *, uint8_t *, uint16_t,
51    eib_vhub_map_t *, ibt_mcg_info_t *, int *);
52static void eib_vnic_leave_all_data_mcgs(eib_t *, eib_vnic_t *);
53static void eib_vnic_rejoin_data_mcgs(eib_t *, eib_vnic_t *);
54static void eib_vnic_reattach_ctl_mcgs(eib_t *, eib_vnic_t *);
55static void eib_rb_vnic_create_common(eib_t *, eib_vnic_t *, uint_t);
56static void eib_rb_vnic_attach_ctl_mcgs(eib_t *, eib_vnic_t *);
57static void eib_rb_vnic_attach_vhub_table(eib_t *, eib_vnic_t *);
58static void eib_rb_vnic_attach_vhub_update(eib_t *, eib_vnic_t *);
59static void eib_rb_vnic_start_keepalives(eib_t *, eib_vnic_t *);
60static void eib_rb_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *);
61
62/*
63 * Definitions private to this file
64 */
65#define	EIB_VNIC_STRUCT_ALLOCD		0x0001
66#define	EIB_VNIC_GOT_INSTANCE		0x0002
67#define	EIB_VNIC_CREATE_COMMON_DONE	0x0004
68#define	EIB_VNIC_CTLQP_CREATED		0x0008
69#define	EIB_VNIC_DATAQP_CREATED		0x0010
70#define	EIB_VNIC_LOGIN_DONE		0x0020
71#define	EIB_VNIC_PARTITION_SET		0x0040
72#define	EIB_VNIC_RX_POSTED_TO_CTLQP	0x0080
73#define	EIB_VNIC_RX_POSTED_TO_DATAQP	0x0100
74#define	EIB_VNIC_ATTACHED_TO_CTL_MCGS	0x0200
75#define	EIB_VNIC_GOT_VHUB_TABLE		0x0400
76#define	EIB_VNIC_KEEPALIVES_STARTED	0x0800
77#define	EIB_VNIC_BROADCAST_JOINED	0x1000
78
79/*
80 * Destination type
81 */
82#define	EIB_TX_UNICAST			1
83#define	EIB_TX_MULTICAST		2
84#define	EIB_TX_BROADCAST		3
85
86int
87eib_vnic_create(eib_t *ss, uint8_t *macaddr, uint16_t vlan, eib_vnic_t **vnicp,
88    int *err)
89{
90	eib_vnic_t *vnic = NULL;
91	boolean_t failed_vnic = B_FALSE;
92	uint_t progress = 0;
93
94	eib_vnic_modify_enter(ss, EIB_VN_BEING_CREATED);
95
96	/*
97	 * When a previously created vnic is being resurrected due to a
98	 * gateway reboot, there's a race possible where a creation request
99	 * for the existing vnic could get filed with the vnic creator
100	 * thread. So, before we go ahead with the creation of this vnic,
101	 * make sure we already don't have the vnic.
102	 */
103	if (macaddr) {
104		if (eib_data_lookup_vnic(ss, macaddr, vlan, vnicp,
105		    &failed_vnic) == EIB_E_SUCCESS) {
106			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_create: "
107			    "vnic for mac=%x:%x:%x:%x:%x:%x, vlan=0x%x "
108			    "already there, no duplicate creation", macaddr[0],
109			    macaddr[1], macaddr[2], macaddr[3], macaddr[4],
110			    macaddr[5], vlan);
111
112			eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
113			return (EIB_E_SUCCESS);
114		} else if (failed_vnic) {
115			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_create: "
116			    "vnic for mac=%x:%x:%x:%x:%x:%x, vlan=0x%x "
117			    "failed earlier, shouldn't be here at all",
118			    macaddr[0], macaddr[1], macaddr[2], macaddr[3],
119			    macaddr[4], macaddr[5], vlan);
120
121			*err = EEXIST;
122
123			eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
124			return (EIB_E_FAILURE);
125		}
126	}
127
128	/*
129	 * Allocate a vnic structure for this instance
130	 */
131	vnic = kmem_zalloc(sizeof (eib_vnic_t), KM_SLEEP);
132	vnic->vn_ss = ss;
133	vnic->vn_instance = -1;
134	mutex_init(&vnic->vn_lock, NULL, MUTEX_DRIVER, NULL);
135	cv_init(&vnic->vn_cv, NULL, CV_DEFAULT, NULL);
136
137	progress |= EIB_VNIC_STRUCT_ALLOCD;
138
139	/*
140	 * Get a vnic instance
141	 */
142	if (eib_vnic_get_instance(ss, &vnic->vn_instance) != EIB_E_SUCCESS) {
143		*err = EMFILE;
144		goto vnic_create_fail;
145	}
146	progress |= EIB_VNIC_GOT_INSTANCE;
147
148	/*
149	 * Initialize vnic's basic parameters.  Note that we set the 15-bit
150	 * vnic id to send to gw during a login to be a 2-tuple of
151	 * {devi_instance#, eoib_vnic_instance#}.
152	 */
153	vnic->vn_vlan = vlan;
154	if (macaddr) {
155		bcopy(macaddr, vnic->vn_macaddr, sizeof (vnic->vn_macaddr));
156	}
157	vnic->vn_id = (uint16_t)EIB_VNIC_ID(ss->ei_instance, vnic->vn_instance);
158
159	/*
160	 * Start up this vnic instance
161	 */
162	if (eib_vnic_create_common(ss, vnic, err) != EIB_E_SUCCESS)
163		goto vnic_create_fail;
164
165	progress |= EIB_VNIC_CREATE_COMMON_DONE;
166
167	/*
168	 * Return the created vnic
169	 */
170	if (vnicp) {
171		*vnicp = vnic;
172	}
173
174	eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
175	return (EIB_E_SUCCESS);
176
177vnic_create_fail:
178	eib_rb_vnic_create(ss, vnic, progress);
179	eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED);
180	return (EIB_E_FAILURE);
181}
182
183void
184eib_vnic_delete(eib_t *ss, eib_vnic_t *vnic)
185{
186	eib_vnic_modify_enter(ss, EIB_VN_BEING_DELETED);
187	eib_rb_vnic_create(ss, vnic, ~0);
188	eib_vnic_modify_exit(ss, EIB_VN_BEING_DELETED);
189}
190
191/*ARGSUSED*/
192int
193eib_vnic_wait_for_login_ack(eib_t *ss, eib_vnic_t *vnic, int *err)
194{
195	clock_t deadline;
196	int ret = EIB_E_SUCCESS;
197
198	deadline = ddi_get_lbolt() + drv_usectohz(EIB_LOGIN_TIMEOUT_USEC);
199
200	/*
201	 * Wait for login ack/nack or wait time to get over. If we wake up
202	 * with a login failure, record the reason.
203	 */
204	mutex_enter(&vnic->vn_lock);
205	while (vnic->vn_state == EIB_LOGIN_ACK_WAIT) {
206		if (cv_timedwait(&vnic->vn_cv, &vnic->vn_lock,
207		    deadline) == -1) {
208			if (vnic->vn_state == EIB_LOGIN_ACK_WAIT)
209				vnic->vn_state = EIB_LOGIN_TIMED_OUT;
210		}
211	}
212
213	if (vnic->vn_state != EIB_LOGIN_ACK_RCVD) {
214		ret = EIB_E_FAILURE;
215		*err =  (vnic->vn_state == EIB_LOGIN_TIMED_OUT) ?
216		    ETIME : ECANCELED;
217	}
218	mutex_exit(&vnic->vn_lock);
219
220	return (ret);
221}
222
223void
224eib_vnic_login_ack(eib_t *ss, eib_login_data_t *ld)
225{
226	eib_vnic_t *vnic;
227	uint_t vnic_instance;
228	uint_t hdrs_sz;
229	uint16_t vnic_id;
230	int nack = 1;
231
232	/*
233	 * The msb in the vnic id in login ack message is not
234	 * part of our vNIC id.
235	 */
236	vnic_id = ld->ld_vnic_id & (~FIP_VL_VNIC_ID_MSBIT);
237
238	/*
239	 * Now, we deconstruct the vnic id and determine the vnic
240	 * instance number. If this vnic_instance number isn't
241	 * valid or the vnic_id of the vnic for this instance
242	 * number doesn't match in our records, we quit.
243	 */
244	vnic_instance = EIB_VNIC_INSTANCE(vnic_id);
245	if (vnic_instance >= EIB_MAX_VNICS)
246		return;
247
248	/*
249	 * At this point, we haven't fully created the vnic, so
250	 * this vnic should be present as ei_vnic_pending.
251	 */
252	mutex_enter(&ss->ei_vnic_lock);
253	if ((vnic = ss->ei_vnic_pending) == NULL) {
254		mutex_exit(&ss->ei_vnic_lock);
255		return;
256	} else if (vnic->vn_id != vnic_id) {
257		mutex_exit(&ss->ei_vnic_lock);
258		return;
259	}
260	mutex_exit(&ss->ei_vnic_lock);
261
262	/*
263	 * First check if the vnic is still sleeping, waiting
264	 * for login ack.  If not, we might as well quit now.
265	 */
266	mutex_enter(&vnic->vn_lock);
267	if (vnic->vn_state != EIB_LOGIN_ACK_WAIT) {
268		mutex_exit(&vnic->vn_lock);
269		return;
270	}
271
272	/*
273	 * We NACK the waiter under these conditions:
274	 *
275	 * . syndrome was set
276	 * . vhub mtu is bigger than our max mtu (minus eoib/eth hdrs sz)
277	 * . assigned vlan is different from requested vlan (except
278	 *   when we didn't request a specific vlan)
279	 * . when the assigned mac is different from the requested mac
280	 *   (except when we didn't request a specific mac)
281	 * . when the VP bit indicates that vlan tag should be used
282	 *   but we had not specified a vlan tag in our request
283	 * . when the VP bit indicates that vlan tag should not be
284	 *   present and we'd specified a vlan tag in our request
285	 *
286	 * The last case is interesting: if we had not specified any vlan id
287	 * in our request, but the gateway has assigned a vlan and asks us
288	 * to use/expect that tag on every packet dealt by this vnic, it
289	 * means effectively the EoIB driver has to insert/remove vlan
290	 * tagging on this vnic traffic, since the nw layer on Solaris
291	 * won't be using/expecting any tag on traffic for this vnic. This
292	 * feature is not supported currently.
293	 */
294	hdrs_sz = EIB_ENCAP_HDR_SZ + sizeof (struct ether_header) + VLAN_TAGSZ;
295	if (ld->ld_syndrome) {
296		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
297		    "non-zero syndrome 0x%lx, NACK", ld->ld_syndrome);
298
299	} else if (ld->ld_vhub_mtu > (ss->ei_props->ep_mtu - hdrs_sz)) {
300		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
301		    "vhub mtu (0x%x) bigger than port mtu (0x%x), NACK",
302		    ld->ld_vhub_mtu, ss->ei_props->ep_mtu);
303
304	} else if ((vnic->vn_vlan) && (vnic->vn_vlan != ld->ld_assigned_vlan)) {
305		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
306		    "assigned vlan (0x%x) different from asked (0x%x), "
307		    "for vnic id 0x%x, NACK", ld->ld_assigned_vlan,
308		    vnic->vn_vlan, vnic->vn_id);
309
310	} else if (bcmp(vnic->vn_macaddr, eib_zero_mac, ETHERADDRL) &&
311	    bcmp(vnic->vn_macaddr, ld->ld_assigned_mac, ETHERADDRL)) {
312		uint8_t *asked, *got;
313
314		asked = vnic->vn_macaddr;
315		got = ld->ld_assigned_mac;
316
317		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
318		    "assigned mac (%x:%x:%x:%x:%x:%x) different from "
319		    "asked (%x:%x:%x:%x:%x:%x) for vnic id 0x%x, NACK",
320		    got[0], got[1], got[2], got[3], got[4], got[5], asked[0],
321		    asked[1], asked[2], asked[3], asked[4], asked[5]);
322
323	} else if ((vnic->vn_vlan == 0) && (ld->ld_vlan_in_packets)) {
324		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
325		    "asked for tagless vlan, but VP flag is set "
326		    "for vnic id 0x%x, NACK", vnic->vn_id);
327
328	} else if ((vnic->vn_vlan) && (!ld->ld_vlan_in_packets)) {
329		if (eib_wa_no_good_vp_flag) {
330			ld->ld_vlan_in_packets = 1;
331			ld->ld_vhub_id = EIB_VHUB_ID(ld->ld_gw_port_id,
332			    ld->ld_assigned_vlan);
333			nack = 0;
334		} else {
335			EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: "
336			    "vlan was assigned correctly, but VP flag is not "
337			    "set for vnic id 0x%x, NACK", vnic->vn_id);
338		}
339	} else {
340		ld->ld_vhub_id = EIB_VHUB_ID(ld->ld_gw_port_id,
341		    ld->ld_assigned_vlan);
342		nack = 0;
343	}
344
345	/*
346	 * ACK/NACK the waiter
347	 */
348	if (nack) {
349		vnic->vn_state = EIB_LOGIN_NACK_RCVD;
350	} else {
351		bcopy(ld, &vnic->vn_login_data, sizeof (eib_login_data_t));
352		vnic->vn_state = EIB_LOGIN_ACK_RCVD;
353	}
354
355	cv_signal(&vnic->vn_cv);
356	mutex_exit(&vnic->vn_lock);
357}
358
359int
360eib_vnic_wait_for_table(eib_t *ss, eib_vnic_t *vnic, int *err)
361{
362	clock_t deadline;
363	int ret = EIB_E_SUCCESS;
364
365	/*
366	 * The EoIB spec does not detail exactly within what time a vhub table
367	 * request is expected to be answered.  However, it does mention that
368	 * in the worst case, the vhub update messages from the gateway must
369	 * be seen atleast once in 2.5 * GW_KA_PERIOD (already saved in
370	 * pp_gw_ka_ticks), so we'll settle for that limit.
371	 */
372	deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_gw_ka_ticks;
373
374	/*
375	 * Wait for vhub table to be constructed. If we wake up with a
376	 * vhub table construction failure, record the reason.
377	 */
378	mutex_enter(&vnic->vn_lock);
379	while (vnic->vn_state == EIB_LOGIN_TBL_WAIT) {
380		if (cv_timedwait(&vnic->vn_cv, &vnic->vn_lock,
381		    deadline) == -1) {
382			if (vnic->vn_state == EIB_LOGIN_TBL_WAIT)
383				vnic->vn_state = EIB_LOGIN_TIMED_OUT;
384		}
385	}
386
387	if (vnic->vn_state != EIB_LOGIN_TBL_DONE) {
388		ret = EIB_E_FAILURE;
389		*err =  (vnic->vn_state == EIB_LOGIN_TIMED_OUT) ?
390		    ETIME : ECANCELED;
391	}
392	mutex_exit(&vnic->vn_lock);
393
394	return (ret);
395}
396
397void
398eib_vnic_vhub_table_done(eib_vnic_t *vnic, uint_t result_state)
399{
400	ASSERT(result_state == EIB_LOGIN_TBL_DONE ||
401	    result_state == EIB_LOGIN_TBL_FAILED);
402
403	/*
404	 * Construction of vhub table for the vnic is done one way or
405	 * the other.  Set the login wait state appropriately and signal
406	 * the waiter. If it's a vhub table failure, we shouldn't parse
407	 * any more vhub table or vhub update packets until the vnic state
408	 * is changed.
409	 */
410	mutex_enter(&vnic->vn_lock);
411	vnic->vn_state = result_state;
412	cv_signal(&vnic->vn_cv);
413	mutex_exit(&vnic->vn_lock);
414}
415
416int
417eib_vnic_join_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac,
418    boolean_t rejoin, int *err)
419{
420	eib_chan_t *chan = vnic->vn_data_chan;
421	eib_login_data_t *ld = &vnic->vn_login_data;
422	eib_mcg_t *mcg;
423	eib_mcg_t *elem;
424	eib_mcg_t *tail;
425	ibt_mcg_info_t *mcg_info;
426	ibt_mcg_attr_t mcg_attr;
427	ibt_status_t ret;
428
429	/*
430	 * Compose the multicast MGID to join
431	 */
432	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
433
434	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
435	    (uint8_t)EIB_MGID_VHUB_DATA, mcast_mac, ld->ld_n_mac_mcgid, 0,
436	    ld->ld_vhub_id, &(mcg_attr.mc_mgid));
437	mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey;
438	mcg_attr.mc_qkey = (ib_qkey_t)EIB_DATA_QKEY;
439
440	/*
441	 * Allocate for and prepare the mcg to add to our list
442	 */
443	mcg_info = kmem_zalloc(sizeof (ibt_mcg_info_t), KM_NOSLEEP);
444	if (mcg_info == NULL) {
445		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: "
446		    "no memory, failed to join mcg (mac=%x:%x:%x:%x:%x:%x)",
447		    mcast_mac[0], mcast_mac[1], mcast_mac[2],
448		    mcast_mac[3], mcast_mac[4], mcast_mac[5]);
449
450		*err = ENOMEM;
451		goto vnic_join_data_mcg_fail;
452	}
453	mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP);
454	if (mcg == NULL) {
455		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: "
456		    "no memory, failed to join mcg (mac=%x:%x:%x:%x:%x:%x)",
457		    mcast_mac[0], mcast_mac[1], mcast_mac[2],
458		    mcast_mac[3], mcast_mac[4], mcast_mac[5]);
459
460		*err = ENOMEM;
461		goto vnic_join_data_mcg_fail;
462	}
463	mcg->mg_next = NULL;
464	mcg->mg_rgid = ss->ei_props->ep_sgid;
465	mcg->mg_mgid = mcg_attr.mc_mgid;
466	mcg->mg_join_state = IB_MC_JSTATE_FULL;
467	mcg->mg_mcginfo = mcg_info;
468	bcopy(mcast_mac, mcg->mg_mac, ETHERADDRL);
469
470	/*
471	 * Join the multicast group
472	 *
473	 * Should we query for the mcg and join instead of attempting to
474	 * join directly ?
475	 */
476	mcg_attr.mc_join_state = mcg->mg_join_state;
477	mcg_attr.mc_flow = 0;
478	mcg_attr.mc_tclass = 0;
479	mcg_attr.mc_sl = 0;
480	mcg_attr.mc_scope = 0;	/* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */
481
482	ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, mcg_info, NULL, NULL);
483	if (ret != IBT_SUCCESS) {
484		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: "
485		    "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, qkey=0x%lx, "
486		    "jstate=0x%x) failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
487		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey,
488		    mcg_attr.mc_qkey, mcg_attr.mc_join_state, ret);
489
490		*err = EINVAL;
491		goto vnic_join_data_mcg_fail;
492	}
493
494	/*
495	 * Attach to the group to receive multicast messages
496	 */
497	ret = ibt_attach_mcg(chan->ch_chan, mcg_info);
498	if (ret != IBT_SUCCESS) {
499		*err = EINVAL;
500
501		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
502		    eib_reserved_gid, mcg->mg_join_state);
503		if (ret != EIB_E_SUCCESS) {
504			EIB_DPRINTF_WARN(ss->ei_instance,
505			    "eib_vnic_join_data_mcg: "
506			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
507			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
508			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
509		}
510
511		goto vnic_join_data_mcg_fail;
512	}
513
514	mutex_enter(&chan->ch_vhub_lock);
515
516	tail = NULL;
517	for (elem = chan->ch_vhub_data; elem != NULL; elem = elem->mg_next) {
518		if ((elem->mg_mgid.gid_prefix == mcg_attr.mc_mgid.gid_prefix) &&
519		    (elem->mg_mgid.gid_guid == mcg_attr.mc_mgid.gid_guid)) {
520			break;
521		}
522		tail = elem;
523	}
524
525	/*
526	 * If we had't already joined to this mcg, add the newly joined mcg
527	 * to the tail and return success
528	 */
529	if (elem == NULL) {
530		if (tail)
531			tail->mg_next = mcg;
532		else
533			chan->ch_vhub_data = mcg;
534		mutex_exit(&chan->ch_vhub_lock);
535		return (EIB_E_SUCCESS);
536	}
537
538	/*
539	 * Duplicate.  We need to leave one of the two joins.  If "rejoin"
540	 * was requested, leave the old join, otherwise leave the new join.
541	 *
542	 * Note that we must not detach the qp from the mcg, since if this
543	 * was a dup, a second ibt_attach_mcg() above would've simply been
544	 * a nop.
545	 *
546	 * Note also that the leave may not be successful here if our presence
547	 * has been removed by the SM, but we need to do this to prevent leaks
548	 * in ibtf.
549	 */
550	if (rejoin) {
551		ASSERT(elem->mg_mcginfo != NULL);
552		kmem_free(elem->mg_mcginfo, sizeof (ibt_mcg_info_t));
553		(void) ibt_leave_mcg(elem->mg_rgid, elem->mg_mgid,
554		    eib_reserved_gid, elem->mg_join_state);
555		/*
556		 * Copy the new mcg over the old one (including the new
557		 * mg_mcginfo), but preserve the link to the next element
558		 * on the list
559		 */
560		mcg->mg_next = elem->mg_next;
561		bcopy(mcg, elem, sizeof (eib_mcg_t));
562	} else {
563		ASSERT(mcg->mg_mcginfo != NULL);
564		kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
565		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
566		    eib_reserved_gid, mcg->mg_join_state);
567	}
568	mutex_exit(&chan->ch_vhub_lock);
569
570	kmem_free(mcg, sizeof (eib_mcg_t));
571	return (EIB_E_SUCCESS);
572
573vnic_join_data_mcg_fail:
574	if (mcg) {
575		kmem_free(mcg, sizeof (eib_mcg_t));
576	}
577	if (mcg_info) {
578		kmem_free(mcg_info, sizeof (ibt_mcg_info_t));
579	}
580	return (EIB_E_FAILURE);
581}
582
583int
584eib_vnic_setup_dest(eib_vnic_t *vnic, eib_wqe_t *swqe, uint8_t *dmac,
585    uint16_t vlan)
586{
587	eib_t *ss = vnic->vn_ss;
588	eib_stats_t *stats = ss->ei_stats;
589	eib_avect_t *av;
590	eib_vhub_map_t ucast;
591	ibt_mcg_info_t mcast;
592	ibt_status_t ret;
593	int dtype;
594	int rv;
595
596	/*
597	 * Lookup the destination in the vhub table or in our mcg list
598	 */
599	rv = eib_vnic_lookup_dest(vnic, dmac, vlan, &ucast, &mcast, &dtype);
600	if (rv != EIB_E_SUCCESS) {
601		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_setup_dest: "
602		    "eib_vnic_lookup_dest(dmac=%x:%x:%x:%x:%x:%x, vlan=0x%x) "
603		    "failed", dmac[0], dmac[1], dmac[2], dmac[3], dmac[4],
604		    dmac[5], vlan);
605
606		return (EIB_E_FAILURE);
607	}
608
609	/*
610	 * If we found a unicast address, get an address vector for the lid
611	 * and sl, modify the ud dest based on the address vector and return.
612	 * If we found a multicast address, use the address vector in the
613	 * mcg info to modify the ud dest and return.
614	 */
615	if (dtype == EIB_TX_UNICAST) {
616		if ((av = eib_ibt_hold_avect(ss, ucast.mp_lid,
617		    ucast.mp_sl)) == NULL) {
618			EIB_DPRINTF_WARN(ss->ei_instance,
619			    "eib_vnic_setup_dest: "
620			    "eib_ibt_hold_avect(lid=0x%x, sl=0x%x) failed",
621			    ucast.mp_lid, ucast.mp_sl);
622
623			return (EIB_E_FAILURE);
624		}
625		ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_DATA_QKEY,
626		    ucast.mp_qpn, &av->av_vect);
627
628		eib_ibt_release_avect(ss, av);
629
630		if (ret != IBT_SUCCESS) {
631			EIB_DPRINTF_WARN(ss->ei_instance,
632			    "eib_vnic_setup_dest: "
633			    "ibt_modify_ud_dest(qpn=0x%lx, qkey=0x%lx) "
634			    "failed, ret=%d", ucast.mp_qpn, EIB_DATA_QKEY, ret);
635			return (EIB_E_FAILURE);
636		}
637	} else {
638		ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_DATA_QKEY,
639		    IB_MC_QPN, &(mcast.mc_adds_vect));
640
641		if (dtype == EIB_TX_BROADCAST)
642			EIB_INCR_COUNTER(&stats->st_brdcstxmit);
643		else
644			EIB_INCR_COUNTER(&stats->st_multixmit);
645
646		if (ret != IBT_SUCCESS) {
647			EIB_DPRINTF_WARN(ss->ei_instance,
648			    "eib_vnic_setup_dest: "
649			    "ibt_modify_ud_dest(mc_qpn=0x%lx, qkey=0x%lx) "
650			    "failed, ret=%d", IB_MC_QPN, EIB_DATA_QKEY, ret);
651			return (EIB_E_FAILURE);
652		}
653	}
654
655	return (EIB_E_SUCCESS);
656}
657
658void
659eib_vnic_leave_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac)
660{
661	eib_rb_vnic_join_data_mcg(ss, vnic, mcast_mac);
662}
663
664/*ARGSUSED*/
665void
666eib_vnic_init_tables(eib_t *ss, eib_vnic_t *vnic)
667{
668	eib_vhub_table_t *tbl;
669	eib_vhub_update_t *upd;
670
671	tbl = kmem_zalloc(sizeof (eib_vhub_table_t), KM_SLEEP);
672	mutex_init(&tbl->tb_lock, NULL, MUTEX_DRIVER, NULL);
673	tbl->tb_eport_state = FIP_EPORT_UP;
674
675	upd = kmem_zalloc(sizeof (eib_vhub_update_t), KM_SLEEP);
676	mutex_init(&upd->up_lock, NULL, MUTEX_DRIVER, NULL);
677
678	mutex_enter(&vnic->vn_lock);
679	vnic->vn_vhub_table = tbl;
680	vnic->vn_vhub_update = upd;
681	mutex_exit(&vnic->vn_lock);
682}
683
684/*ARGSUSED*/
685void
686eib_vnic_fini_tables(eib_t *ss, eib_vnic_t *vnic, boolean_t clobber)
687{
688	eib_vhub_update_t *upd;
689	eib_vhub_table_t *tbl;
690	eib_vhub_map_t *elem;
691	eib_vhub_map_t *nxt;
692	int i;
693
694	/*
695	 * We come here only when we've either completely detached from
696	 * the vhub multicast groups and so cannot receive anymore table
697	 * or update control messages, or we've had a recent vhub table
698	 * construction failure and the vnic state is currently
699	 * EIB_LOGIN_TBL_FAILED and so won't parse any table or update
700	 * control messages.  Also, since we haven't completed the vnic
701	 * creation, no one from the tx path will be accessing the
702	 * vn_vhub_table entries either.  All said, we're free to play
703	 * around with the vnic's vn_vhub_table and vn_vhub_update here.
704	 */
705
706	mutex_enter(&vnic->vn_lock);
707	upd = vnic->vn_vhub_update;
708	tbl = vnic->vn_vhub_table;
709	if (clobber) {
710		vnic->vn_vhub_update = NULL;
711		vnic->vn_vhub_table = NULL;
712	}
713	mutex_exit(&vnic->vn_lock);
714
715	/*
716	 * Destroy the vhub update entries if any
717	 */
718	if (upd) {
719		/*
720		 * Wipe clean the list of vnic entries accumulated via
721		 * vhub updates so far.  Release eib_vhub_update_t only
722		 * if explicitly asked to do so
723		 */
724		mutex_enter(&upd->up_lock);
725		for (elem = upd->up_vnic_entry; elem != NULL; elem = nxt) {
726			nxt = elem->mp_next;
727			kmem_free(elem, sizeof (eib_vhub_map_t));
728		}
729		upd->up_vnic_entry = NULL;
730		upd->up_tusn = 0;
731		upd->up_eport_state = 0;
732		mutex_exit(&upd->up_lock);
733
734		if (clobber) {
735			mutex_destroy(&upd->up_lock);
736			kmem_free(upd, sizeof (eib_vhub_update_t));
737		}
738	}
739
740	/*
741	 * Destroy the vhub table entries
742	 */
743	if (tbl == NULL)
744		return;
745
746	/*
747	 * Wipe clean the list of entries in the vhub table collected so
748	 * far. Release eib_vhub_table_t only if explicitly asked to do so.
749	 */
750	mutex_enter(&tbl->tb_lock);
751
752	if (tbl->tb_gateway) {
753		kmem_free(tbl->tb_gateway, sizeof (eib_vhub_map_t));
754		tbl->tb_gateway = NULL;
755	}
756
757	if (tbl->tb_unicast_miss) {
758		kmem_free(tbl->tb_unicast_miss, sizeof (eib_vhub_map_t));
759		tbl->tb_unicast_miss = NULL;
760	}
761
762	if (tbl->tb_vhub_multicast) {
763		kmem_free(tbl->tb_vhub_multicast, sizeof (eib_vhub_map_t));
764		tbl->tb_vhub_multicast = NULL;
765	}
766
767	if (!eib_wa_no_mcast_entries) {
768		for (i = 0; i < EIB_TB_NBUCKETS; i++) {
769			for (elem = tbl->tb_mcast_entry[i]; elem != NULL;
770			    elem = nxt) {
771				nxt = elem->mp_next;
772				kmem_free(elem, sizeof (eib_vhub_map_t));
773			}
774			tbl->tb_mcast_entry[i] = NULL;
775		}
776	}
777
778	for (i = 0; i < EIB_TB_NBUCKETS; i++) {
779		for (elem = tbl->tb_vnic_entry[i]; elem != NULL; elem = nxt) {
780			nxt = elem->mp_next;
781			kmem_free(elem, sizeof (eib_vhub_map_t));
782		}
783		tbl->tb_vnic_entry[i] = NULL;
784	}
785
786	tbl->tb_tusn = 0;
787	tbl->tb_eport_state = 0;
788	tbl->tb_entries_seen = 0;
789	tbl->tb_entries_in_table = 0;
790	tbl->tb_checksum = 0;
791
792	mutex_exit(&tbl->tb_lock);
793
794	/*
795	 * Don't throw away space created for holding vhub table if we haven't
796	 * been explicitly asked to do so
797	 */
798	if (clobber) {
799		mutex_destroy(&tbl->tb_lock);
800		kmem_free(tbl, sizeof (eib_vhub_table_t));
801	}
802}
803
804eib_chan_t *
805eib_vnic_get_data_chan(eib_t *ss, int vinst)
806{
807	eib_vnic_t *vnic;
808	eib_chan_t *chan = NULL;
809
810	if (vinst >= 0 && vinst < EIB_MAX_VNICS) {
811		mutex_enter(&ss->ei_vnic_lock);
812		if ((vnic = ss->ei_vnic[vinst]) != NULL)
813			chan = vnic->vn_data_chan;
814		mutex_exit(&ss->ei_vnic_lock);
815	}
816
817	return (chan);
818}
819
820void
821eib_vnic_need_new(eib_t *ss, uint8_t *mac, uint16_t vlan)
822{
823	eib_vnic_req_t *vrq;
824
825	EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf);
826
827	/*
828	 * Create a new vnic request for this {mac,vlan} tuple
829	 */
830	vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_NOSLEEP);
831	if (vrq == NULL) {
832		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_need_new: "
833		    "no memory, failed to queue new vnic creation request");
834		return;
835	}
836	vrq->vr_next = NULL;
837	vrq->vr_req = EIB_CR_REQ_NEW_VNIC;
838	bcopy(mac, vrq->vr_mac, ETHERADDRL);
839	vrq->vr_vlan = vlan;
840
841	eib_vnic_enqueue_req(ss, vrq);
842}
843
844void
845eib_vnic_enqueue_req(eib_t *ss, eib_vnic_req_t *vrq)
846{
847	eib_vnic_req_t *elem = NULL;
848	uint8_t *m;
849
850	/*
851	 * Enqueue this new vnic request with the vnic creator and
852	 * signal it.
853	 */
854	m = vrq->vr_mac;
855	EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_enqueue_req: "
856	    "BEGIN file request for creation of %x:%x:%x:%x:%x:%x, 0x%x",
857	    m[0], m[1], m[2], m[3], m[4], m[5], vrq->vr_vlan);
858
859
860	mutex_enter(&ss->ei_vnic_req_lock);
861
862	/*
863	 * Death request has the highest priority.  If we've already been asked
864	 * to die, we don't entertain any more requests.
865	 */
866	if (ss->ei_vnic_req) {
867		if (ss->ei_vnic_req->vr_req == EIB_CR_REQ_DIE) {
868			mutex_exit(&ss->ei_vnic_req_lock);
869			kmem_free(vrq, sizeof (eib_vnic_req_t));
870			return;
871		}
872	}
873
874	if (vrq->vr_req == EIB_CR_REQ_DIE || vrq->vr_req == EIB_CR_REQ_FLUSH) {
875		vrq->vr_next = ss->ei_vnic_req;
876		ss->ei_vnic_req = vrq;
877	} else {
878		/*
879		 * If there's already a creation request for this vnic that's
880		 * being processed, return immediately without adding a new
881		 * request.
882		 */
883		if ((elem = ss->ei_pending_vnic_req) != NULL) {
884			EIB_DPRINTF_DEBUG(ss->ei_instance,
885			    "eib_vnic_enqueue_req: "
886			    "ei_pending_vnic_req not NULL");
887
888			if ((elem->vr_vlan == vrq->vr_vlan) &&
889			    (bcmp(elem->vr_mac, vrq->vr_mac,
890			    ETHERADDRL) == 0)) {
891				EIB_DPRINTF_DEBUG(ss->ei_instance,
892				    "eib_vnic_enqueue_req: "
893				    "pending request already present for "
894				    "%x:%x:%x:%x:%x:%x, 0x%x", m[0], m[1], m[2],
895				    m[3], m[4], m[5], vrq->vr_vlan);
896
897				mutex_exit(&ss->ei_vnic_req_lock);
898				kmem_free(vrq, sizeof (eib_vnic_req_t));
899
900				EIB_DPRINTF_DEBUG(ss->ei_instance,
901				    "eib_vnic_enqueue_req: "
902				    "END file request");
903				return;
904			}
905
906			EIB_DPRINTF_DEBUG(ss->ei_instance,
907			    "eib_vnic_enqueue_req: "
908			    "NO pending request for %x:%x:%x:%x:%x:%x, 0x%x",
909			    m[0], m[1], m[2], m[3], m[4], m[5], vrq->vr_vlan);
910		}
911
912		/*
913		 * Or if there's one waiting in the queue for processing, do
914		 * the same thing
915		 */
916		for (elem = ss->ei_vnic_req; elem; elem = elem->vr_next) {
917			/*
918			 * If there's already a create request for this vnic
919			 * waiting in the queue, return immediately
920			 */
921			if (elem->vr_req == EIB_CR_REQ_NEW_VNIC) {
922				if ((elem->vr_vlan == vrq->vr_vlan) &&
923				    (bcmp(elem->vr_mac, vrq->vr_mac,
924				    ETHERADDRL) == 0)) {
925
926					EIB_DPRINTF_DEBUG(ss->ei_instance,
927					    "eib_vnic_enqueue_req: "
928					    "request already present for "
929					    "%x:%x:%x:%x:%x:%x, 0x%x", m[0],
930					    m[1], m[2], m[3], m[4], m[5],
931					    vrq->vr_vlan);
932
933					mutex_exit(&ss->ei_vnic_req_lock);
934					kmem_free(vrq, sizeof (eib_vnic_req_t));
935
936					EIB_DPRINTF_DEBUG(ss->ei_instance,
937					    "eib_vnic_enqueue_req: "
938					    "END file request");
939					return;
940				}
941			}
942
943			if (elem->vr_next == NULL) {
944				EIB_DPRINTF_DEBUG(ss->ei_instance,
945				    "eib_vnic_enqueue_req: "
946				    "request not found, filing afresh");
947				break;
948			}
949		}
950
951		/*
952		 * Otherwise queue up this new creation request and signal the
953		 * service thread.
954		 */
955		if (elem) {
956			elem->vr_next = vrq;
957		} else {
958			ss->ei_vnic_req = vrq;
959		}
960	}
961
962	cv_signal(&ss->ei_vnic_req_cv);
963	mutex_exit(&ss->ei_vnic_req_lock);
964
965	EIB_DPRINTF_DEBUG(ss->ei_instance,
966	    "eib_vnic_enqueue_req: END file request");
967}
968
969void
970eib_vnic_update_failed_macs(eib_t *ss, uint8_t *old_mac, uint16_t old_vlan,
971    uint8_t *new_mac, uint16_t new_vlan)
972{
973	eib_vnic_req_t *vrq;
974	eib_vnic_req_t *elem;
975	eib_vnic_req_t *prev;
976
977	vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_NOSLEEP);
978	if (vrq == NULL) {
979		EIB_DPRINTF_WARN(ss->ei_instance,
980		    "eib_vnic_update_failed_macs: "
981		    "no memory, failed to drop old mac");
982	} else {
983		vrq->vr_next = NULL;
984		vrq->vr_req = 0;	/* unused */
985		bcopy(old_mac, vrq->vr_mac, ETHERADDRL);
986		vrq->vr_vlan = old_vlan;
987	}
988
989	mutex_enter(&ss->ei_vnic_req_lock);
990
991	/*
992	 * We'll search the failed vnics list to see if the new {mac,vlan}
993	 * tuple is in there and remove it if present (since the new address
994	 * is no longer "failed").
995	 */
996	prev = NULL;
997	for (elem = ss->ei_failed_vnic_req; elem; elem = elem->vr_next) {
998		if ((bcmp(elem->vr_mac, new_mac, ETHERADDRL) == 0) &&
999		    (elem->vr_vlan == new_vlan)) {
1000			if (prev) {
1001				prev->vr_next = elem->vr_next;
1002			} else {
1003				ss->ei_failed_vnic_req = elem->vr_next;
1004			}
1005			elem->vr_next = NULL;
1006			break;
1007		}
1008	}
1009	if (elem) {
1010		kmem_free(elem, sizeof (eib_vnic_req_t));
1011	}
1012
1013	/*
1014	 * We'll also insert the old {mac,vlan} tuple to the "failed vnic req"
1015	 * list (it shouldn't be there already), to avoid trying to recreate
1016	 * the vnic we just explicitly discarded.
1017	 */
1018	if (vrq) {
1019		vrq->vr_next = ss->ei_failed_vnic_req;
1020		ss->ei_failed_vnic_req = vrq;
1021	}
1022
1023	mutex_exit(&ss->ei_vnic_req_lock);
1024}
1025
1026void
1027eib_vnic_resurrect_zombies(eib_t *ss, uint8_t *vn0_mac)
1028{
1029	int inst;
1030
1031	/*
1032	 * We want to restart/relogin each vnic instance with the gateway,
1033	 * but with the same vnic id and instance as before.
1034	 */
1035	while ((inst = EIB_FIND_LSB_SET(ss->ei_zombie_vnics)) != -1) {
1036		EIB_DPRINTF_DEBUG(ss->ei_instance,
1037		    "eib_vnic_resurrect_zombies: "
1038		    "calling eib_vnic_restart(vn_inst=%d)", inst);
1039
1040		eib_vnic_restart(ss, inst, vn0_mac);
1041
1042		EIB_DPRINTF_DEBUG(ss->ei_instance,
1043		    "eib_vnic_resurrect_zombies: "
1044		    "eib_vnic_restart(vn_inst=%d) done", inst);
1045	}
1046}
1047
1048void
1049eib_vnic_restart(eib_t *ss, int inst, uint8_t *vn0_mac)
1050{
1051	eib_vnic_t *vnic;
1052	eib_login_data_t *ld;
1053	uint8_t old_mac[ETHERADDRL];
1054	int ret;
1055	int err;
1056
1057	if (inst < 0 || inst >= EIB_MAX_VNICS) {
1058		EIB_DPRINTF_WARN(ss->ei_instance,
1059		    "eib_vnic_restart: "
1060		    "vnic instance (%d) invalid", inst);
1061		return;
1062	}
1063
1064	eib_vnic_modify_enter(ss, EIB_VN_BEING_MODIFIED);
1065	if ((vnic = ss->ei_vnic[inst]) != NULL) {
1066		/*
1067		 * Remember what mac was allocated for this vnic last time
1068		 */
1069		bcopy(vnic->vn_login_data.ld_assigned_mac, old_mac, ETHERADDRL);
1070
1071		/*
1072		 * Tear down and restart this vnic instance
1073		 */
1074		eib_rb_vnic_create_common(ss, vnic, ~0);
1075		ret = eib_vnic_create_common(ss, vnic, &err);
1076		if (ret != EIB_E_SUCCESS) {
1077			EIB_DPRINTF_WARN(ss->ei_instance,
1078			    "eib_vnic_restart: "
1079			    "eib_vnic_create_common(vnic_inst=%d) failed, "
1080			    "ret=%d", inst, err);
1081		}
1082
1083		/*
1084		 * If this is vnic instance 0 and if our current assigned mac is
1085		 * different from what was assigned last time, we need to pass
1086		 * this information back to the caller, so the mac layer can be
1087		 * appropriately informed. We will also queue up the old mac
1088		 * and vlan in the "failed vnic req" list, so any future packets
1089		 * to this address on this interface will be dropped.
1090		 */
1091		ld = &vnic->vn_login_data;
1092		if ((inst == 0) &&
1093		    (bcmp(ld->ld_assigned_mac, old_mac, ETHERADDRL) != 0)) {
1094			uint8_t *m = ld->ld_assigned_mac;
1095
1096			if (vn0_mac != NULL) {
1097				bcopy(ld->ld_assigned_mac, vn0_mac,
1098				    ETHERADDRL);
1099			}
1100
1101			EIB_DPRINTF_VERBOSE(ss->ei_instance,
1102			    "eib_vnic_restart: updating failed macs list "
1103			    "old=%x:%x:%x:%x:%x:%x, new=%x:%x:%x:%x:%x:%x, "
1104			    "vlan=0x%x", old_mac[0], old_mac[1], old_mac[2],
1105			    old_mac[3], old_mac[4], old_mac[5], m[0], m[1],
1106			    m[2], m[3], m[4], m[5], vnic->vn_vlan);
1107
1108			eib_vnic_update_failed_macs(ss, old_mac, vnic->vn_vlan,
1109			    ld->ld_assigned_mac, vnic->vn_vlan);
1110		}
1111
1112		/*
1113		 * No longer a zombie or need to rejoin mcgs
1114		 */
1115		mutex_enter(&ss->ei_vnic_lock);
1116		ss->ei_zombie_vnics &= (~((uint64_t)1 << inst));
1117		ss->ei_rejoin_vnics &= (~((uint64_t)1 << inst));
1118		mutex_exit(&ss->ei_vnic_lock);
1119	}
1120	eib_vnic_modify_exit(ss, EIB_VN_BEING_MODIFIED);
1121}
1122
1123void
1124eib_vnic_rejoin_mcgs(eib_t *ss)
1125{
1126	eib_vnic_t *vnic;
1127	int inst;
1128
1129	/*
1130	 * For each vnic that still requires re-join, go through the
1131	 * control channels and data channel and reattach/rejoin mcgs.
1132	 */
1133	mutex_enter(&ss->ei_vnic_lock);
1134	while ((inst = EIB_FIND_LSB_SET(ss->ei_rejoin_vnics)) != -1) {
1135		if ((vnic = ss->ei_vnic[inst]) != NULL) {
1136			eib_vnic_reattach_ctl_mcgs(ss, vnic);
1137			eib_vnic_rejoin_data_mcgs(ss, vnic);
1138		}
1139		ss->ei_rejoin_vnics &= (~((uint64_t)1 << inst));
1140	}
1141	mutex_exit(&ss->ei_vnic_lock);
1142}
1143
1144void
1145eib_rb_vnic_create(eib_t *ss, eib_vnic_t *vnic, uint_t progress)
1146{
1147	if (progress & EIB_VNIC_CREATE_COMMON_DONE) {
1148		eib_rb_vnic_create_common(ss, vnic, ~0);
1149	}
1150
1151	if (progress & EIB_VNIC_GOT_INSTANCE) {
1152		eib_vnic_ret_instance(ss, vnic->vn_instance);
1153		vnic->vn_instance = -1;
1154	}
1155
1156	if (progress & EIB_VNIC_STRUCT_ALLOCD) {
1157		cv_destroy(&vnic->vn_cv);
1158		mutex_destroy(&vnic->vn_lock);
1159		kmem_free(vnic, sizeof (eib_vnic_t));
1160	}
1161}
1162
1163/*
1164 * Currently, we only allow 64 vnics per eoib device instance, for
1165 * reasons described in eib.h (see EIB_VNIC_ID() definition), so we
1166 * could use a simple bitmap to assign the vnic instance numbers.
1167 * Once we start allowing more vnics per device instance, this
1168 * allocation scheme will need to be changed.
1169 */
1170static int
1171eib_vnic_get_instance(eib_t *ss, int *vinst)
1172{
1173	int bitpos;
1174	uint64_t nval;
1175
1176	mutex_enter(&ss->ei_vnic_lock);
1177
1178	/*
1179	 * What we have is the active vnics list --  the in-use vnics are
1180	 * indicated by a 1 in the bit position, and the free ones are
1181	 * indicated by 0.  We need to find the least significant '0' bit
1182	 * to get the first free vnic instance.  Or we could bit-reverse
1183	 * the active list and locate the least significant '1'.
1184	 */
1185	nval = ~(ss->ei_active_vnics);
1186	if (nval == 0)
1187		return (EIB_E_FAILURE);
1188
1189	/*
1190	 * The single bit-position values in a 64-bit integer are relatively
1191	 * prime with 67, so performing a modulus division with 67 guarantees
1192	 * a unique number between 0 and 63 for each value (setbit_mod67[]).
1193	 */
1194	bitpos = EIB_FIND_LSB_SET(nval);
1195	if (bitpos == -1)
1196		return (EIB_E_FAILURE);
1197
1198	ss->ei_active_vnics |= ((uint64_t)1 << bitpos);
1199	*vinst = bitpos;
1200
1201	mutex_exit(&ss->ei_vnic_lock);
1202
1203	return (EIB_E_SUCCESS);
1204}
1205
1206static void
1207eib_vnic_ret_instance(eib_t *ss, int vinst)
1208{
1209	mutex_enter(&ss->ei_vnic_lock);
1210
1211	if (vinst >= EIB_MAX_VNICS) {
1212		EIB_DPRINTF_WARN(ss->ei_instance,
1213		    "eib_vnic_ret_instance: "
1214		    "vnic instance (%d) invalid", vinst);
1215	} else if ((ss->ei_active_vnics & ((uint64_t)1 << vinst)) == 0) {
1216		EIB_DPRINTF_WARN(ss->ei_instance,
1217		    "eib_vnic_ret_instance: "
1218		    "vnic instance (%d) not active!", vinst);
1219	} else {
1220		ss->ei_active_vnics &= (~((uint64_t)1 << vinst));
1221	}
1222
1223	mutex_exit(&ss->ei_vnic_lock);
1224}
1225
1226static void
1227eib_vnic_modify_enter(eib_t *ss, uint_t op)
1228{
1229	mutex_enter(&ss->ei_vnic_lock);
1230	while (ss->ei_vnic_state & EIB_VN_BEING_MODIFIED)
1231		cv_wait(&ss->ei_vnic_cv, &ss->ei_vnic_lock);
1232
1233	ss->ei_vnic_state |= op;
1234	mutex_exit(&ss->ei_vnic_lock);
1235}
1236
1237static void
1238eib_vnic_modify_exit(eib_t *ss, uint_t op)
1239{
1240	mutex_enter(&ss->ei_vnic_lock);
1241	ss->ei_vnic_state &= (~op);
1242	cv_broadcast(&ss->ei_vnic_cv);
1243	mutex_exit(&ss->ei_vnic_lock);
1244}
1245
1246static int
1247eib_vnic_create_common(eib_t *ss, eib_vnic_t *vnic, int *err)
1248{
1249	uint_t progress = 0;
1250
1251	/*
1252	 * When we receive login acks within this vnic creation
1253	 * routine we need a way to retrieve the vnic structure
1254	 * from the vnic instance, so store this somewhere. Note
1255	 * that there can be only one outstanding vnic creation
1256	 * at any point of time, so we only need one vnic struct.
1257	 */
1258	mutex_enter(&ss->ei_vnic_lock);
1259	ASSERT(ss->ei_vnic_pending == NULL);
1260	ss->ei_vnic_pending = vnic;
1261	mutex_exit(&ss->ei_vnic_lock);
1262
1263	/*
1264	 * Create a control qp for this vnic
1265	 */
1266	if (eib_ctl_create_qp(ss, vnic, err) != EIB_E_SUCCESS) {
1267		EIB_DPRINTF_WARN(ss->ei_instance,
1268		    "eib_vnic_create_common: "
1269		    "eib_ctl_create_qp(vn_id=0x%x) failed, ret=%d",
1270		    vnic->vn_id, *err);
1271		goto vnic_create_common_fail;
1272	}
1273	progress |= EIB_VNIC_CTLQP_CREATED;
1274
1275	/*
1276	 * Create a data qp for this vnic
1277	 */
1278	if (eib_data_create_qp(ss, vnic, err) != EIB_E_SUCCESS) {
1279		EIB_DPRINTF_WARN(ss->ei_instance,
1280		    "eib_vnic_create_common: "
1281		    "eib_data_create_qp(vn_id=0x%x) failed, ret=%d",
1282		    vnic->vn_id, *err);
1283		goto vnic_create_common_fail;
1284	}
1285	progress |= EIB_VNIC_DATAQP_CREATED;
1286
1287	/*
1288	 * Login to the gateway with this vnic's parameters
1289	 */
1290	if (eib_fip_login(ss, vnic, err) != EIB_E_SUCCESS) {
1291		EIB_DPRINTF_WARN(ss->ei_instance,
1292		    "eib_vnic_create_common: "
1293		    "eib_fip_login(vn_id=0x%x) failed, ret=%d",
1294		    vnic->vn_id, *err);
1295		goto vnic_create_common_fail;
1296	}
1297	progress |= EIB_VNIC_LOGIN_DONE;
1298
1299	/*
1300	 * Associate the control and data qps for the vnic with the
1301	 * vHUB partition
1302	 */
1303	if (eib_vnic_set_partition(ss, vnic, err) != EIB_E_SUCCESS) {
1304		EIB_DPRINTF_WARN(ss->ei_instance,
1305		    "eib_vnic_create_common: "
1306		    "eib_vnic_set_partition(vn_id=0x%x) failed, ret=%d",
1307		    vnic->vn_id, *err);
1308		goto vnic_create_common_fail;
1309	}
1310	progress |= EIB_VNIC_PARTITION_SET;
1311
1312	/*
1313	 * Post initial set of rx buffers on the control qp to the HCA
1314	 */
1315	if (eib_chan_post_rx(ss, vnic->vn_ctl_chan, NULL) != EIB_E_SUCCESS) {
1316		EIB_DPRINTF_WARN(ss->ei_instance,
1317		    "eib_vnic_create_common: "
1318		    "eib_chan_post_rx(vn_id=0x%x, CTL_QP) failed, ret=%d",
1319		    vnic->vn_id, *err);
1320
1321		*err = ENOMEM;
1322		goto vnic_create_common_fail;
1323	}
1324	progress |= EIB_VNIC_RX_POSTED_TO_CTLQP;
1325
1326	/*
1327	 * Post initial set of rx buffers on the data qp to the HCA
1328	 */
1329	if (eib_chan_post_rx(ss, vnic->vn_data_chan, NULL) != EIB_E_SUCCESS) {
1330		EIB_DPRINTF_WARN(ss->ei_instance,
1331		    "eib_vnic_create_common: "
1332		    "eib_chan_post_rx(vn_id=0x%x, DATA_QP) failed, ret=%d",
1333		    vnic->vn_id, *err);
1334
1335		*err = ENOMEM;
1336		goto vnic_create_common_fail;
1337	}
1338	progress |= EIB_VNIC_RX_POSTED_TO_DATAQP;
1339
1340	/*
1341	 * Attach to the vHUB table and vHUB update multicast groups
1342	 */
1343	if (eib_vnic_attach_ctl_mcgs(ss, vnic, err) != EIB_E_SUCCESS) {
1344		EIB_DPRINTF_WARN(ss->ei_instance,
1345		    "eib_vnic_create_common: "
1346		    "eib_vnic_attach_ctl_mcgs(vn_id=0x%x) failed, ret=%d",
1347		    vnic->vn_id, *err);
1348		goto vnic_create_common_fail;
1349	}
1350	progress |= EIB_VNIC_ATTACHED_TO_CTL_MCGS;
1351
1352	/*
1353	 * Send the vHUB table request and construct the vhub table
1354	 */
1355	if (eib_fip_vhub_table(ss, vnic, err) != EIB_E_SUCCESS) {
1356		EIB_DPRINTF_WARN(ss->ei_instance,
1357		    "eib_vnic_create_common: "
1358		    "eib_fip_vhub_table(vn_id=0x%x) failed, ret=%d",
1359		    vnic->vn_id, *err);
1360		goto vnic_create_common_fail;
1361	}
1362	progress |= EIB_VNIC_GOT_VHUB_TABLE;
1363
1364	/*
1365	 * Detach from the vHUB table mcg (we no longer need the vHUB
1366	 * table messages) and start the keepalives for this vnic.
1367	 */
1368	eib_vnic_start_keepalives(ss, vnic);
1369	eib_rb_vnic_attach_vhub_table(ss, vnic);
1370
1371	progress |= EIB_VNIC_KEEPALIVES_STARTED;
1372
1373	/*
1374	 * All ethernet vnics are automatically members of the broadcast
1375	 * group for the vlan they are participating in, so join the
1376	 * ethernet broadcast group.  Note that when we restart vnics,
1377	 * we rejoin the mcgs, so we pass B_TRUE to eib_vnic_join_data_mcg().
1378	 */
1379	if (eib_vnic_join_data_mcg(ss, vnic, eib_broadcast_mac, B_TRUE,
1380	    err) != EIB_E_SUCCESS) {
1381		EIB_DPRINTF_WARN(ss->ei_instance,
1382		    "eib_vnic_create_common: "
1383		    "eib_vnic_join_data_mcg(vn_id=0x%x, BCAST_GROUP) failed, "
1384		    "ret=%d", vnic->vn_id, *err);
1385		goto vnic_create_common_fail;
1386	}
1387	progress |= EIB_VNIC_BROADCAST_JOINED;
1388
1389	mutex_enter(&ss->ei_vnic_lock);
1390	if (ss->ei_vnic[vnic->vn_instance] == NULL) {
1391		ss->ei_vnic[vnic->vn_instance] = vnic;
1392	}
1393	ss->ei_vnic_pending = NULL;
1394	mutex_exit(&ss->ei_vnic_lock);
1395
1396	return (EIB_E_SUCCESS);
1397
1398vnic_create_common_fail:
1399	eib_rb_vnic_create_common(ss, vnic, progress);
1400	return (EIB_E_FAILURE);
1401}
1402
1403static int
1404eib_vnic_set_partition(eib_t *ss, eib_vnic_t *vnic, int *err)
1405{
1406	int ret;
1407
1408	/*
1409	 * Associate the control channel with the vhub partition
1410	 */
1411	ret = eib_ibt_modify_chan_pkey(ss, vnic->vn_ctl_chan,
1412	    vnic->vn_login_data.ld_vhub_pkey);
1413	if (ret != EIB_E_SUCCESS) {
1414		EIB_DPRINTF_WARN(ss->ei_instance,
1415		    "eib_vnic_set_partition: "
1416		    "eib_ibt_modify_chan_pkey(vn_id=0x%x, CTL_CHAN, "
1417		    "vhub_pkey=0x%x) failed", vnic->vn_id,
1418		    vnic->vn_login_data.ld_vhub_pkey);
1419		*err = EINVAL;
1420		return (EIB_E_FAILURE);
1421	}
1422
1423	/*
1424	 * Now, do the same thing for the data channel. Note that if a
1425	 * failure happens, the channel state(s) are left as-is, since
1426	 * it is pointless to try to change them back using the same
1427	 * interfaces that have just failed.
1428	 */
1429	ret = eib_ibt_modify_chan_pkey(ss, vnic->vn_data_chan,
1430	    vnic->vn_login_data.ld_vhub_pkey);
1431	if (ret != EIB_E_SUCCESS) {
1432		EIB_DPRINTF_WARN(ss->ei_instance,
1433		    "eib_vnic_set_partition: "
1434		    "eib_ibt_modify_chan_pkey(vn_id=0x%x, DATA_CHAN, "
1435		    "vhub_pkey=0x%x) failed", vnic->vn_id,
1436		    vnic->vn_login_data.ld_vhub_pkey);
1437		*err = EINVAL;
1438		return (EIB_E_FAILURE);
1439	}
1440
1441	return (EIB_E_SUCCESS);
1442}
1443
1444static void
1445eib_vnic_make_vhub_mgid(uint8_t *mg_prefix, uint8_t mg_type,
1446    uint8_t *mcast_mac, uint8_t n_mac, uint8_t rss_hash, uint32_t vhub_id,
1447    ib_gid_t *mgid)
1448{
1449	eib_mgid_t em;
1450	uint64_t dmac_mask;
1451	uint64_t dmac = 0;
1452	uint8_t *dmac_str = (uint8_t *)&dmac;
1453	uint_t	vhub_id_nw;
1454	uint8_t *vhub_id_str = (uint8_t *)&vhub_id_nw;
1455
1456	/*
1457	 * Copy mgid prefix and type
1458	 */
1459	bcopy(mg_prefix, em.gd_spec.sp_mgid_prefix, FIP_MGID_PREFIX_LEN);
1460	em.gd_spec.sp_type = mg_type;
1461
1462	/*
1463	 * Take n_mac bits from mcast_mac and copy dmac
1464	 */
1465	bcopy(mcast_mac, dmac_str + 2, ETHERADDRL);
1466	dmac_mask = ((uint64_t)1 << n_mac) - 1;
1467	dmac_mask = htonll(dmac_mask);
1468	dmac &= dmac_mask;
1469	bcopy(dmac_str + 2, em.gd_spec.sp_dmac, ETHERADDRL);
1470
1471	/*
1472	 * Copy rss hash and prepare vhub id from gw port id and vlan
1473	 */
1474	em.gd_spec.sp_rss_hash = rss_hash;
1475
1476	vhub_id_nw = htonl(vhub_id);
1477	bcopy(vhub_id_str + 1, em.gd_spec.sp_vhub_id, FIP_VHUBID_LEN);
1478
1479	/*
1480	 * Ok, now we've assembled the mgid as per EoIB spec. We now have to
1481	 * represent it in the way Solaris IBTF wants it and return (sigh).
1482	 */
1483	mgid->gid_prefix = ntohll(em.gd_sol.gid_prefix);
1484	mgid->gid_guid = ntohll(em.gd_sol.gid_guid);
1485}
1486
1487static int
1488eib_vnic_attach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic, int *err)
1489{
1490	/*
1491	 * Get tb_vhub_table and tb_vhub_update allocated and ready before
1492	 * attaching to the vhub table and vhub update mcgs
1493	 */
1494	eib_vnic_init_tables(ss, vnic);
1495
1496	if (eib_vnic_attach_vhub_update(ss, vnic) != EIB_E_SUCCESS) {
1497		EIB_DPRINTF_WARN(ss->ei_instance,
1498		    "eib_vnic_attach_ctl_mcgs: "
1499		    "eib_vnic_attach_vhub_update(vn_id=0x%x) failed",
1500		    vnic->vn_id);
1501
1502		*err = EINVAL;
1503		eib_vnic_fini_tables(ss, vnic, B_TRUE);
1504		return (EIB_E_FAILURE);
1505	}
1506
1507	if (eib_vnic_attach_vhub_table(ss, vnic) != EIB_E_SUCCESS) {
1508		EIB_DPRINTF_WARN(ss->ei_instance,
1509		    "eib_vnic_attach_ctl_mcgs: "
1510		    "eib_vnic_attach_vhub_table(vn_id=0x%x) failed",
1511		    vnic->vn_id);
1512
1513		*err = EINVAL;
1514		eib_rb_vnic_attach_vhub_update(ss, vnic);
1515		eib_vnic_fini_tables(ss, vnic, B_TRUE);
1516		return (EIB_E_FAILURE);
1517	}
1518
1519	return (EIB_E_SUCCESS);
1520}
1521
1522static int
1523eib_vnic_attach_vhub_table(eib_t *ss, eib_vnic_t *vnic)
1524{
1525	eib_chan_t *chan = vnic->vn_ctl_chan;
1526	eib_login_data_t *ld = &vnic->vn_login_data;
1527	eib_mcg_t *mcg;
1528	ibt_mcg_info_t *tbl_mcginfo;
1529	ibt_mcg_attr_t mcg_attr;
1530	ibt_status_t ret;
1531	uint_t entries;
1532
1533	/*
1534	 * Compose the MGID for receiving VHUB table
1535	 */
1536	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
1537
1538	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
1539	    (uint8_t)EIB_MGID_VHUB_TABLE, eib_broadcast_mac, ld->ld_n_mac_mcgid,
1540	    0, ld->ld_vhub_id, &(mcg_attr.mc_mgid));
1541	mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey;
1542	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
1543
1544	/*
1545	 * Locate the multicast group for receiving vhub table
1546	 */
1547	ret = ibt_query_mcg(ss->ei_props->ep_sgid, &mcg_attr, 1,
1548	    &tbl_mcginfo, &entries);
1549	if (ret != IBT_SUCCESS) {
1550		EIB_DPRINTF_WARN(ss->ei_instance,
1551		    "eib_vnic_attach_vhub_table: "
1552		    "ibt_query_mcg(mgid=%llx.%llx, pkey=0x%x) failed, "
1553		    "ret=%d", mcg_attr.mc_mgid.gid_prefix,
1554		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, ret);
1555		return (EIB_E_FAILURE);
1556	}
1557
1558	/*
1559	 * Allocate for and prepare the mcg to add to our list
1560	 */
1561	mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP);
1562	if (mcg == NULL) {
1563		EIB_DPRINTF_WARN(ss->ei_instance,
1564		    "eib_vnic_attach_vhub_table: "
1565		    "no memory, failed to attach to vhub table "
1566		    "(mgid=%llx.%llx, pkey=0x%x)", mcg_attr.mc_mgid.gid_prefix,
1567		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1568		ibt_free_mcg_info(tbl_mcginfo, 1);
1569		return (EIB_E_FAILURE);
1570	}
1571
1572	mcg->mg_next = NULL;
1573	mcg->mg_rgid = ss->ei_props->ep_sgid;
1574	mcg->mg_mgid = mcg_attr.mc_mgid;
1575	mcg->mg_join_state = IB_MC_JSTATE_FULL;
1576	mcg->mg_mcginfo = tbl_mcginfo;
1577	bcopy(eib_broadcast_mac, mcg->mg_mac, ETHERADDRL);
1578
1579	/*
1580	 * Join the multicast group
1581	 */
1582	mcg_attr.mc_join_state = mcg->mg_join_state;
1583	mcg_attr.mc_flow = tbl_mcginfo->mc_adds_vect.av_flow;
1584	mcg_attr.mc_tclass = tbl_mcginfo->mc_adds_vect.av_tclass;
1585	mcg_attr.mc_sl = tbl_mcginfo->mc_adds_vect.av_srvl;
1586	mcg_attr.mc_scope = 0;	/* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */
1587
1588	ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, tbl_mcginfo, NULL, NULL);
1589	if (ret != IBT_SUCCESS) {
1590		EIB_DPRINTF_WARN(ss->ei_instance,
1591		    "eib_vnic_attach_vhub_table: "
1592		    "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, jstate=0x%x) "
1593		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1594		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey,
1595		    mcg_attr.mc_join_state, ret);
1596
1597		kmem_free(mcg, sizeof (eib_mcg_t));
1598		ibt_free_mcg_info(tbl_mcginfo, 1);
1599		return (EIB_E_FAILURE);
1600	}
1601
1602	/*
1603	 * Attach to the multicast group to receive tbl multicasts
1604	 */
1605	ret = ibt_attach_mcg(chan->ch_chan, tbl_mcginfo);
1606	if (ret != IBT_SUCCESS) {
1607		EIB_DPRINTF_WARN(ss->ei_instance,
1608		    "eib_vnic_attach_vhub_table: "
1609		    "ibt_attach_mcg(mgid=%llx.%llx, pkey=0x%x) "
1610		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1611		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1612
1613		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1614		    eib_reserved_gid, mcg->mg_join_state);
1615		kmem_free(mcg, sizeof (eib_mcg_t));
1616		ibt_free_mcg_info(tbl_mcginfo, 1);
1617		return (EIB_E_FAILURE);
1618	}
1619
1620	mutex_enter(&chan->ch_vhub_lock);
1621	chan->ch_vhub_table = mcg;
1622	mutex_exit(&chan->ch_vhub_lock);
1623
1624	return (EIB_E_SUCCESS);
1625}
1626
1627static int
1628eib_vnic_attach_vhub_update(eib_t *ss, eib_vnic_t *vnic)
1629{
1630	eib_chan_t *chan = vnic->vn_ctl_chan;
1631	eib_login_data_t *ld = &vnic->vn_login_data;
1632	eib_mcg_t *mcg;
1633	ibt_mcg_info_t *upd_mcginfo;
1634	ibt_mcg_attr_t mcg_attr;
1635	ibt_status_t ret;
1636	uint_t entries;
1637
1638	/*
1639	 * Compose the MGID for receiving VHUB updates
1640	 */
1641	bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
1642
1643	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
1644	    (uint8_t)EIB_MGID_VHUB_UPDATE, eib_broadcast_mac,
1645	    ld->ld_n_mac_mcgid, 0, ld->ld_vhub_id, &(mcg_attr.mc_mgid));
1646	mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey;
1647	mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY;
1648
1649	/*
1650	 * Locate the multicast group for receiving vhub updates
1651	 */
1652	ret = ibt_query_mcg(ss->ei_props->ep_sgid, &mcg_attr, 1,
1653	    &upd_mcginfo, &entries);
1654	if (ret != IBT_SUCCESS) {
1655		EIB_DPRINTF_WARN(ss->ei_instance,
1656		    "eib_vnic_attach_vhub_update: "
1657		    "ibt_query_mcg(mgid=%llx.%llx, pkey=0x%x) failed, "
1658		    "ret=%d", mcg_attr.mc_mgid.gid_prefix,
1659		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, ret);
1660		return (EIB_E_FAILURE);
1661	}
1662
1663	/*
1664	 * Allocate for and prepare the mcg to add to our list
1665	 */
1666	mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP);
1667	if (mcg == NULL) {
1668		EIB_DPRINTF_WARN(ss->ei_instance,
1669		    "eib_vnic_attach_vhub_update: "
1670		    "no memory, failed to attach to vhub update "
1671		    "(mgid=%llx.%llx, pkey=0x%x)", mcg_attr.mc_mgid.gid_prefix,
1672		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1673
1674		ibt_free_mcg_info(upd_mcginfo, 1);
1675		return (EIB_E_FAILURE);
1676	}
1677
1678	mcg->mg_next = NULL;
1679	mcg->mg_rgid = ss->ei_props->ep_sgid;
1680	mcg->mg_mgid = mcg_attr.mc_mgid;
1681	mcg->mg_join_state = IB_MC_JSTATE_FULL;
1682	mcg->mg_mcginfo = upd_mcginfo;
1683	bcopy(eib_broadcast_mac, mcg->mg_mac, ETHERADDRL);
1684
1685	/*
1686	 * Join the multicast group
1687	 */
1688	mcg_attr.mc_join_state = mcg->mg_join_state;
1689	mcg_attr.mc_flow = upd_mcginfo->mc_adds_vect.av_flow;
1690	mcg_attr.mc_tclass = upd_mcginfo->mc_adds_vect.av_tclass;
1691	mcg_attr.mc_sl = upd_mcginfo->mc_adds_vect.av_srvl;
1692	mcg_attr.mc_scope = 0;	/* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */
1693
1694	ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, upd_mcginfo, NULL, NULL);
1695	if (ret != IBT_SUCCESS) {
1696		EIB_DPRINTF_WARN(ss->ei_instance,
1697		    "eib_vnic_attach_vhub_update: "
1698		    "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, jstate=0x%x) "
1699		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1700		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey,
1701		    mcg_attr.mc_join_state, ret);
1702
1703		kmem_free(mcg, sizeof (eib_mcg_t));
1704		ibt_free_mcg_info(upd_mcginfo, 1);
1705		return (EIB_E_FAILURE);
1706	}
1707
1708	/*
1709	 * Attach to the multicast group to receive upd multicasts
1710	 */
1711	ret = ibt_attach_mcg(chan->ch_chan, upd_mcginfo);
1712	if (ret != IBT_SUCCESS) {
1713		EIB_DPRINTF_WARN(ss->ei_instance,
1714		    "eib_vnic_attach_vhub_update: "
1715		    "ibt_attach_mcg(mgid=%llx.%llx, pkey=0x%x) "
1716		    "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix,
1717		    mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey);
1718
1719		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1720		    eib_reserved_gid, mcg->mg_join_state);
1721		kmem_free(mcg, sizeof (eib_mcg_t));
1722		ibt_free_mcg_info(upd_mcginfo, 1);
1723		return (EIB_E_FAILURE);
1724	}
1725
1726	mutex_enter(&chan->ch_vhub_lock);
1727	chan->ch_vhub_update = mcg;
1728	mutex_exit(&chan->ch_vhub_lock);
1729
1730	return (EIB_E_SUCCESS);
1731}
1732
1733static void
1734eib_vnic_start_keepalives(eib_t *ss, eib_vnic_t *vnic)
1735{
1736	eib_ka_vnics_t *kav;
1737	eib_ka_vnics_t *elem;
1738	int err;
1739
1740	kav = kmem_zalloc(sizeof (eib_ka_vnics_t), KM_SLEEP);
1741	kav->ka_vnic = vnic;
1742	kav->ka_next = NULL;
1743
1744	/*
1745	 * Send the first keepalive and then queue this vnic up with
1746	 * the keepalives manager
1747	 */
1748	(void) eib_fip_heartbeat(ss, vnic, &err);
1749
1750	mutex_enter(&ss->ei_ka_vnics_lock);
1751	for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) {
1752		if (elem->ka_next == NULL)
1753			break;
1754	}
1755	if (elem) {
1756		elem->ka_next = kav;
1757	} else {
1758		ss->ei_ka_vnics = kav;
1759	}
1760	mutex_exit(&ss->ei_ka_vnics_lock);
1761}
1762
1763/*ARGSUSED*/
1764static int
1765eib_vnic_lookup_dest(eib_vnic_t *vnic, uint8_t *dmac, uint16_t vlan,
1766    eib_vhub_map_t *ucast, ibt_mcg_info_t *mcast, int *dtype)
1767{
1768	eib_t *ss = vnic->vn_ss;
1769	eib_vhub_map_t *elem;
1770	eib_mcg_t *mcg;
1771	eib_chan_t *chan = vnic->vn_data_chan;
1772	eib_login_data_t *ld = &vnic->vn_login_data;
1773	eib_vhub_map_t *gw;
1774	eib_vhub_table_t *tbl;
1775	uint8_t bkt = (dmac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1776	ib_gid_t mgid;
1777
1778	/*
1779	 * If this was a unicast dmac, locate the vhub entry matching the
1780	 * unicast dmac in our vhub table.  If it's not found, return the
1781	 * gateway entry
1782	 */
1783	if (EIB_UNICAST_MAC(dmac)) {
1784
1785		mutex_enter(&vnic->vn_lock);
1786		if ((tbl = vnic->vn_vhub_table) == NULL) {
1787			mutex_exit(&vnic->vn_lock);
1788			return (EIB_E_FAILURE);
1789		}
1790
1791		mutex_enter(&tbl->tb_lock);
1792		gw = tbl->tb_gateway;
1793		for (elem = tbl->tb_vnic_entry[bkt]; elem != NULL;
1794		    elem = elem->mp_next) {
1795			if (bcmp(elem->mp_mac, dmac, ETHERADDRL) == 0)
1796				break;
1797		}
1798		mutex_exit(&tbl->tb_lock);
1799
1800		if ((elem == NULL) && (gw == NULL)) {
1801			mutex_exit(&vnic->vn_lock);
1802			return (EIB_E_FAILURE);
1803		}
1804
1805		*dtype = EIB_TX_UNICAST;
1806		if (elem) {
1807			bcopy(elem, ucast, sizeof (eib_vhub_map_t));
1808		} else {
1809			bcopy(gw, ucast, sizeof (eib_vhub_map_t));
1810		}
1811		mutex_exit(&vnic->vn_lock);
1812
1813		return (EIB_E_SUCCESS);
1814	}
1815
1816	/*
1817	 * Is it a broadcast ?
1818	 */
1819	*dtype = (bcmp(dmac, eib_broadcast_mac, ETHERADDRL) == 0) ?
1820	    EIB_TX_BROADCAST : EIB_TX_MULTICAST;
1821
1822	/*
1823	 * If this was a multicast dmac, prepare the mgid and look for it
1824	 * in the list of mcgs we've joined and use the address vector from
1825	 * the mcginfo stored there.
1826	 *
1827	 * Note that since we don't have a way to associate each vlan with
1828	 * the mcg (see eib_m_multicast()), we'll prepare the mgid to use
1829	 * the broadcast channel all the time.
1830	 */
1831	eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix,
1832	    (uint8_t)EIB_MGID_VHUB_DATA, eib_broadcast_mac, ld->ld_n_mac_mcgid,
1833	    0, ld->ld_vhub_id, &mgid);
1834
1835	mutex_enter(&chan->ch_vhub_lock);
1836	for (mcg = chan->ch_vhub_data; mcg; mcg = mcg->mg_next) {
1837		if ((mcg->mg_mgid.gid_prefix == mgid.gid_prefix) &&
1838		    (mcg->mg_mgid.gid_guid == mgid.gid_guid)) {
1839			break;
1840		}
1841	}
1842	if (mcg == NULL) {
1843		mutex_exit(&chan->ch_vhub_lock);
1844
1845		EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_lookup_dest: "
1846		    "could not find mgid %llx.%llx",
1847		    mgid.gid_prefix, mgid.gid_guid);
1848
1849		return (EIB_E_FAILURE);
1850	}
1851
1852	bcopy(mcg->mg_mcginfo, mcast, sizeof (ibt_mcg_info_t));
1853	mutex_exit(&chan->ch_vhub_lock);
1854
1855	return (EIB_E_SUCCESS);
1856}
1857
1858/*ARGSUSED*/
1859static void
1860eib_vnic_leave_all_data_mcgs(eib_t *ss, eib_vnic_t *vnic)
1861{
1862	eib_chan_t *chan = vnic->vn_data_chan;
1863	eib_mcg_t *mcglist;
1864	eib_mcg_t *mcg;
1865	eib_mcg_t *nxt = NULL;
1866	ibt_status_t ret;
1867
1868	/*
1869	 * First, take the ch_vhub_data mcg chain out of chan
1870	 */
1871	mutex_enter(&chan->ch_vhub_lock);
1872	mcglist = chan->ch_vhub_data;
1873	chan->ch_vhub_data = NULL;
1874	mutex_exit(&chan->ch_vhub_lock);
1875
1876	/*
1877	 * Go through the chain of mcgs we've joined, detach the qp from the
1878	 * mcg, leave the group and free all associated stuff
1879	 */
1880	for (mcg = mcglist; mcg != NULL; mcg = nxt) {
1881		nxt = mcg->mg_next;
1882
1883		ret = ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo);
1884		if (ret != IBT_SUCCESS) {
1885			EIB_DPRINTF_WARN(ss->ei_instance,
1886			    "eib_vnic_leave_all_data_mcgs: "
1887			    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
1888			    "mgid=%llx.%llx) failed, ret=%d", chan->ch_chan,
1889			    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
1890			    mcg->mg_mgid.gid_guid, ret);
1891		}
1892
1893		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1894		    eib_reserved_gid, mcg->mg_join_state);
1895		if (ret != IBT_SUCCESS) {
1896			EIB_DPRINTF_WARN(ss->ei_instance,
1897			    "eib_vnic_leave_all_data_mcgs: "
1898			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
1899			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
1900			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
1901		}
1902
1903		if (mcg->mg_mcginfo)
1904			kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
1905
1906		kmem_free(mcg, sizeof (eib_mcg_t));
1907	}
1908}
1909
1910static void
1911eib_vnic_rejoin_data_mcgs(eib_t *ss, eib_vnic_t *vnic)
1912{
1913	eib_chan_t *chan = vnic->vn_data_chan;
1914	eib_mcg_t *mcglist;
1915	eib_mcg_t *mcg;
1916	eib_mcg_t *next;
1917	int err;
1918
1919	/*
1920	 * Grab the current list of mcgs
1921	 */
1922	mutex_enter(&chan->ch_vhub_lock);
1923	mcglist = chan->ch_vhub_data;
1924	chan->ch_vhub_data = NULL;
1925	mutex_exit(&chan->ch_vhub_lock);
1926
1927	/*
1928	 * When rejoin data mcgs is called, we may not even be marked as
1929	 * joined in SM's records.  But we still have to leave the old
1930	 * one first to prevent leaks in ibtf.
1931	 */
1932	for (mcg = mcglist; mcg != NULL; mcg = next) {
1933		next = mcg->mg_next;
1934		mcg->mg_next = NULL;
1935
1936		(void) ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo);
1937		(void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
1938		    eib_reserved_gid, mcg->mg_join_state);
1939
1940		if (eib_vnic_join_data_mcg(ss, vnic, mcg->mg_mac, B_TRUE,
1941		    &err) != EIB_E_SUCCESS) {
1942			uint8_t *m;
1943
1944			m = mcg->mg_mac;
1945			EIB_DPRINTF_WARN(ss->ei_instance,
1946			    "eib_vnic_rejoin_data_mcgs: "
1947			    "eib_vnic_join_data_mcg(mcmac=%x:%x:%x:%x:%x:%x) "
1948			    "failed, ret=%d", m[0], m[1], m[2], m[3],
1949			    m[4], m[5], err);
1950		}
1951		if (mcg->mg_mcginfo) {
1952			kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
1953		}
1954		kmem_free(mcg, sizeof (eib_mcg_t));
1955	}
1956}
1957
1958static void
1959eib_vnic_reattach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic)
1960{
1961	/*
1962	 * For reattaching to control mcgs, we will not reinitialize the
1963	 * vhub table/vhub update we've constructed.  We'll simply detach
1964	 * from the table and update mcgs and reattach to them.  Hopefully,
1965	 * we wouldn't have missed any updates and won't have to restart
1966	 * the vnic.
1967	 */
1968	eib_rb_vnic_attach_vhub_table(ss, vnic);
1969	eib_rb_vnic_attach_vhub_update(ss, vnic);
1970
1971	if (eib_vnic_attach_vhub_update(ss, vnic) != EIB_E_SUCCESS) {
1972		EIB_DPRINTF_WARN(ss->ei_instance,
1973		    "eib_vnic_reattach_ctl_mcgs: "
1974		    "eib_vnic_attach_vhub_update(vn_id=0x%x) failed",
1975		    vnic->vn_id);
1976	}
1977
1978	if (eib_vnic_attach_vhub_table(ss, vnic) != EIB_E_SUCCESS) {
1979		EIB_DPRINTF_WARN(ss->ei_instance,
1980		    "eib_vnic_reattach_ctl_mcgs: "
1981		    "eib_vnic_attach_vhub_table(vn_id=0x%x) failed",
1982		    vnic->vn_id);
1983
1984		eib_rb_vnic_attach_vhub_update(ss, vnic);
1985	}
1986}
1987
1988static void
1989eib_rb_vnic_create_common(eib_t *ss, eib_vnic_t *vnic, uint_t progress)
1990{
1991	int err;
1992
1993	mutex_enter(&ss->ei_vnic_lock);
1994	ss->ei_vnic[vnic->vn_instance] = NULL;
1995	ss->ei_vnic_pending = NULL;
1996	mutex_exit(&ss->ei_vnic_lock);
1997
1998	if (progress & EIB_VNIC_BROADCAST_JOINED) {
1999		eib_vnic_leave_all_data_mcgs(ss, vnic);
2000	}
2001
2002	if (progress & EIB_VNIC_KEEPALIVES_STARTED) {
2003		eib_rb_vnic_start_keepalives(ss, vnic);
2004	}
2005
2006	if (progress & EIB_VNIC_ATTACHED_TO_CTL_MCGS) {
2007		eib_rb_vnic_attach_ctl_mcgs(ss, vnic);
2008	}
2009
2010	if (progress & EIB_VNIC_LOGIN_DONE) {
2011		(void) eib_fip_logout(ss, vnic, &err);
2012	}
2013
2014	if (progress & EIB_VNIC_DATAQP_CREATED) {
2015		eib_rb_data_create_qp(ss, vnic);
2016	}
2017
2018	if (progress & EIB_VNIC_CTLQP_CREATED) {
2019		eib_rb_ctl_create_qp(ss, vnic);
2020	}
2021}
2022
2023static void
2024eib_rb_vnic_attach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic)
2025{
2026	/*
2027	 * Detach from the vhub table and vhub update mcgs before blowing
2028	 * up vn_vhub_table and vn_vhub_update, since these are assumed to
2029	 * be available by the control cq handler.
2030	 */
2031	eib_rb_vnic_attach_vhub_table(ss, vnic);
2032	eib_rb_vnic_attach_vhub_update(ss, vnic);
2033	eib_vnic_fini_tables(ss, vnic, B_TRUE);
2034}
2035
2036/*ARGSUSED*/
2037static void
2038eib_rb_vnic_attach_vhub_table(eib_t *ss, eib_vnic_t *vnic)
2039{
2040	eib_chan_t *chan = vnic->vn_ctl_chan;
2041	eib_mcg_t *mcg;
2042	ibt_channel_hdl_t chan_hdl;
2043	ibt_status_t ret;
2044
2045	if (chan == NULL)
2046		return;
2047
2048	mutex_enter(&chan->ch_vhub_lock);
2049	chan_hdl = chan->ch_chan;
2050	mcg = chan->ch_vhub_table;
2051	chan->ch_vhub_table = NULL;
2052	mutex_exit(&chan->ch_vhub_lock);
2053
2054	if (chan_hdl && mcg) {
2055		ret = ibt_detach_mcg(chan_hdl, mcg->mg_mcginfo);
2056		if (ret != IBT_SUCCESS) {
2057			EIB_DPRINTF_WARN(ss->ei_instance,
2058			    "eib_rb_vnic_attach_vhub_table: "
2059			    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
2060			    "mgid=%llx.%llx) failed, ret=%d", chan_hdl,
2061			    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
2062			    mcg->mg_mgid.gid_guid, ret);
2063		}
2064
2065		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
2066		    eib_reserved_gid, mcg->mg_join_state);
2067		if (ret != IBT_SUCCESS) {
2068			EIB_DPRINTF_WARN(ss->ei_instance,
2069			    "eib_rb_vnic_attach_vhub_table: "
2070			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
2071			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
2072			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
2073		}
2074
2075		if (mcg->mg_mcginfo) {
2076			ibt_free_mcg_info(mcg->mg_mcginfo, 1);
2077		}
2078		kmem_free(mcg, sizeof (eib_mcg_t));
2079	}
2080}
2081
2082/*ARGSUSED*/
2083static void
2084eib_rb_vnic_attach_vhub_update(eib_t *ss, eib_vnic_t *vnic)
2085{
2086	eib_chan_t *chan = vnic->vn_ctl_chan;
2087	eib_mcg_t *mcg;
2088	ibt_channel_hdl_t chan_hdl;
2089	ibt_status_t ret;
2090
2091	if (chan == NULL)
2092		return;
2093
2094	mutex_enter(&chan->ch_vhub_lock);
2095	chan_hdl = chan->ch_chan;
2096	mcg = chan->ch_vhub_update;
2097	chan->ch_vhub_update = NULL;
2098	mutex_exit(&chan->ch_vhub_lock);
2099
2100	if (chan_hdl && mcg) {
2101		ret = ibt_detach_mcg(chan_hdl, mcg->mg_mcginfo);
2102		if (ret != IBT_SUCCESS) {
2103			EIB_DPRINTF_WARN(ss->ei_instance,
2104			    "eib_rb_vnic_attach_vhub_update: "
2105			    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
2106			    "mgid=%llx.%llx) failed, ret=%d", chan_hdl,
2107			    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
2108			    mcg->mg_mgid.gid_guid, ret);
2109		}
2110
2111		ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid,
2112		    eib_reserved_gid, mcg->mg_join_state);
2113		if (ret != IBT_SUCCESS) {
2114			EIB_DPRINTF_WARN(ss->ei_instance,
2115			    "eib_rb_vnic_attach_vhub_update: "
2116			    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
2117			    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
2118			    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
2119		}
2120
2121		if (mcg->mg_mcginfo) {
2122			ibt_free_mcg_info(mcg->mg_mcginfo, 1);
2123		}
2124		kmem_free(mcg, sizeof (eib_mcg_t));
2125	}
2126}
2127
2128/*ARGSUSED*/
2129static void
2130eib_rb_vnic_start_keepalives(eib_t *ss, eib_vnic_t *vnic)
2131{
2132	eib_ka_vnics_t *prev;
2133	eib_ka_vnics_t *elem;
2134
2135	/*
2136	 * We only need to locate and remove the vnic entry from the
2137	 * keepalives manager list
2138	 */
2139
2140	mutex_enter(&ss->ei_ka_vnics_lock);
2141
2142	prev = NULL;
2143	for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) {
2144		if (elem->ka_vnic == vnic)
2145			break;
2146
2147		prev = elem;
2148	}
2149	if (elem == NULL) {
2150		EIB_DPRINTF_DEBUG(ss->ei_instance,
2151		    "eib_rb_vnic_start_keepalives: no keepalive element found "
2152		    "for vnic 0x%llx (vn_inst=%d) with keepalive manager",
2153		    vnic, vnic->vn_instance);
2154	} else {
2155		if (prev) {
2156			prev->ka_next = elem->ka_next;
2157		} else {
2158			ss->ei_ka_vnics = elem->ka_next;
2159		}
2160		kmem_free(elem, sizeof (eib_ka_vnics_t));
2161	}
2162	mutex_exit(&ss->ei_ka_vnics_lock);
2163}
2164
2165/*ARGSUSED*/
2166static void
2167eib_rb_vnic_join_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac)
2168{
2169	eib_chan_t *chan = vnic->vn_data_chan;
2170	eib_mcg_t *prev;
2171	eib_mcg_t *mcg;
2172	ibt_status_t ret;
2173
2174	/*
2175	 * Search our list and remove the item if found
2176	 */
2177	mutex_enter(&chan->ch_vhub_lock);
2178
2179	prev = NULL;
2180	for (mcg = chan->ch_vhub_data; mcg != NULL; mcg = mcg->mg_next) {
2181		if (bcmp(mcg->mg_mac, mcast_mac, ETHERADDRL) == 0)
2182			break;
2183		prev = mcg;
2184	}
2185
2186	if (mcg == NULL) {
2187		mutex_exit(&chan->ch_vhub_lock);
2188		return;
2189	}
2190
2191	if (prev != NULL)
2192		prev->mg_next = mcg->mg_next;
2193	else
2194		chan->ch_vhub_data = mcg->mg_next;
2195
2196	mcg->mg_next = NULL;
2197
2198	mutex_exit(&chan->ch_vhub_lock);
2199
2200	/*
2201	 * Detach data channel qp from the mcg, leave the group and free
2202	 * all associated stuff
2203	 */
2204	ret = ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo);
2205	if (ret != IBT_SUCCESS) {
2206		EIB_DPRINTF_WARN(ss->ei_instance,
2207		    "eib_rb_vnic_join_data_mcg: "
2208		    "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, "
2209		    "mgid=%llx.%llx) failed, ret=%d", chan->ch_chan,
2210		    mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix,
2211		    mcg->mg_mgid.gid_guid, ret);
2212	}
2213
2214	ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, eib_reserved_gid,
2215	    mcg->mg_join_state);
2216	if (ret != IBT_SUCCESS) {
2217		EIB_DPRINTF_WARN(ss->ei_instance,
2218		    "eib_rb_vnic_join_data_mcg: "
2219		    "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) "
2220		    "failed, ret=%d", mcg->mg_mgid.gid_prefix,
2221		    mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret);
2222	}
2223
2224	if (mcg->mg_mcginfo)
2225		kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t));
2226
2227	kmem_free(mcg, sizeof (eib_mcg_t));
2228}
2229