1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * This file implements the client interfaces of the IBMF.
28 */
29
30#include <sys/ib/mgt/ibmf/ibmf_impl.h>
31#include <sys/ib/mgt/ib_mad.h>
32
33extern ibmf_state_t *ibmf_statep;
34
35/* global settable */
36int	ibmf_send_wqes_per_port = IBMF_MAX_SQ_WRE;
37int	ibmf_recv_wqes_per_port = IBMF_MAX_RQ_WRE;
38int	ibmf_send_wqes_posted_per_qp = IBMF_MAX_POSTED_SQ_PER_QP;
39int	ibmf_recv_wqes_posted_per_qp = IBMF_MAX_POSTED_RQ_PER_QP;
40
41int	ibmf_taskq_max_tasks = 1024;
42
43int	ibmf_trace_level = DPRINT_L0;
44
45#define	IBMF_MAD_CL_HDR_OFF_1	0
46#define	IBMF_MAD_CL_HDR_OFF_2	12
47#define	IBMF_MAD_CL_HDR_SZ_1	40
48#define	IBMF_MAD_CL_HDR_SZ_2	20
49#define	IBMF_MAD_CL_HDR_SZ_3	0
50#define	IBMF_MAD_CL_HDR_SZ_4	4
51
52#define	IBMF_VALID_CLIENT_TYPE(client_type)		\
53	((client_type) == SUBN_AGENT ||			\
54	(client_type) == SUBN_MANAGER ||		\
55	(client_type) == SUBN_ADM_AGENT ||		\
56	(client_type) == SUBN_ADM_MANAGER ||		\
57	(client_type) == PERF_AGENT ||			\
58	(client_type) == PERF_MANAGER ||		\
59	(client_type) == BM_AGENT ||			\
60	(client_type) == BM_MANAGER ||			\
61	(client_type) == DEV_MGT_AGENT ||		\
62	(client_type) == DEV_MGT_MANAGER ||		\
63	(client_type) == COMM_MGT_MANAGER_AGENT ||	\
64	(client_type) == SNMP_MANAGER_AGENT ||		\
65	(client_type) == VENDOR_09_MANAGER_AGENT ||	\
66	(client_type) == VENDOR_0A_MANAGER_AGENT ||	\
67	(client_type) == VENDOR_0B_MANAGER_AGENT ||	\
68	(client_type) == VENDOR_0C_MANAGER_AGENT ||	\
69	(client_type) == VENDOR_0D_MANAGER_AGENT ||	\
70	(client_type) == VENDOR_0E_MANAGER_AGENT ||	\
71	(client_type) == VENDOR_0F_MANAGER_AGENT ||	\
72	(client_type) == VENDOR_30_MANAGER_AGENT ||	\
73	(client_type) == VENDOR_31_MANAGER_AGENT ||	\
74	(client_type) == VENDOR_32_MANAGER_AGENT ||	\
75	(client_type) == VENDOR_33_MANAGER_AGENT ||	\
76	(client_type) == VENDOR_34_MANAGER_AGENT ||	\
77	(client_type) == VENDOR_35_MANAGER_AGENT ||	\
78	(client_type) == VENDOR_36_MANAGER_AGENT ||	\
79	(client_type) == VENDOR_37_MANAGER_AGENT ||	\
80	(client_type) == VENDOR_38_MANAGER_AGENT ||	\
81	(client_type) == VENDOR_39_MANAGER_AGENT ||	\
82	(client_type) == VENDOR_3A_MANAGER_AGENT ||	\
83	(client_type) == VENDOR_3B_MANAGER_AGENT ||	\
84	(client_type) == VENDOR_3C_MANAGER_AGENT ||	\
85	(client_type) == VENDOR_3D_MANAGER_AGENT ||	\
86	(client_type) == VENDOR_3E_MANAGER_AGENT ||	\
87	(client_type) == VENDOR_3F_MANAGER_AGENT ||	\
88	(client_type) == VENDOR_40_MANAGER_AGENT ||	\
89	(client_type) == VENDOR_41_MANAGER_AGENT ||	\
90	(client_type) == VENDOR_42_MANAGER_AGENT ||	\
91	(client_type) == VENDOR_43_MANAGER_AGENT ||	\
92	(client_type) == VENDOR_44_MANAGER_AGENT ||	\
93	(client_type) == VENDOR_45_MANAGER_AGENT ||	\
94	(client_type) == VENDOR_46_MANAGER_AGENT ||	\
95	(client_type) == VENDOR_47_MANAGER_AGENT ||	\
96	(client_type) == VENDOR_48_MANAGER_AGENT ||	\
97	(client_type) == VENDOR_49_MANAGER_AGENT ||	\
98	(client_type) == VENDOR_4A_MANAGER_AGENT ||	\
99	(client_type) == VENDOR_4B_MANAGER_AGENT ||	\
100	(client_type) == VENDOR_4C_MANAGER_AGENT ||	\
101	(client_type) == VENDOR_4D_MANAGER_AGENT ||	\
102	(client_type) == VENDOR_4E_MANAGER_AGENT ||	\
103	(client_type) == VENDOR_4F_MANAGER_AGENT ||	\
104	(client_type) == APPLICATION_10_MANAGER_AGENT || \
105	(client_type) == APPLICATION_11_MANAGER_AGENT || \
106	(client_type) == APPLICATION_12_MANAGER_AGENT || \
107	(client_type) == APPLICATION_13_MANAGER_AGENT || \
108	(client_type) == APPLICATION_14_MANAGER_AGENT || \
109	(client_type) == APPLICATION_15_MANAGER_AGENT || \
110	(client_type) == APPLICATION_16_MANAGER_AGENT || \
111	(client_type) == APPLICATION_17_MANAGER_AGENT || \
112	(client_type) == APPLICATION_18_MANAGER_AGENT || \
113	(client_type) == APPLICATION_19_MANAGER_AGENT || \
114	(client_type) == APPLICATION_1A_MANAGER_AGENT || \
115	(client_type) == APPLICATION_1B_MANAGER_AGENT || \
116	(client_type) == APPLICATION_1C_MANAGER_AGENT || \
117	(client_type) == APPLICATION_1D_MANAGER_AGENT || \
118	(client_type) == APPLICATION_1E_MANAGER_AGENT || \
119	(client_type) == APPLICATION_1F_MANAGER_AGENT || \
120	(client_type) == APPLICATION_20_MANAGER_AGENT || \
121	(client_type) == APPLICATION_21_MANAGER_AGENT || \
122	(client_type) == APPLICATION_22_MANAGER_AGENT || \
123	(client_type) == APPLICATION_23_MANAGER_AGENT || \
124	(client_type) == APPLICATION_24_MANAGER_AGENT || \
125	(client_type) == APPLICATION_25_MANAGER_AGENT || \
126	(client_type) == APPLICATION_26_MANAGER_AGENT || \
127	(client_type) == APPLICATION_27_MANAGER_AGENT || \
128	(client_type) == APPLICATION_28_MANAGER_AGENT || \
129	(client_type) == APPLICATION_29_MANAGER_AGENT || \
130	(client_type) == APPLICATION_2A_MANAGER_AGENT || \
131	(client_type) == APPLICATION_2B_MANAGER_AGENT || \
132	(client_type) == APPLICATION_2C_MANAGER_AGENT || \
133	(client_type) == APPLICATION_2D_MANAGER_AGENT || \
134	(client_type) == APPLICATION_2E_MANAGER_AGENT || \
135	(client_type) == APPLICATION_2F_MANAGER_AGENT || \
136	(client_type) == UNIVERSAL_CLASS)
137
138static ibmf_ci_t *ibmf_i_lookup_ci(ib_guid_t ci_guid);
139static int ibmf_i_init_ci(ibmf_register_info_t *client_infop,
140    ibmf_ci_t *cip);
141static void ibmf_i_uninit_ci(ibmf_ci_t *cip);
142static void ibmf_i_init_ci_done(ibmf_ci_t *cip);
143static void ibmf_i_uninit_ci_done(ibmf_ci_t *cip);
144static int ibmf_i_init_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp);
145static void ibmf_i_uninit_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp);
146static int ibmf_i_init_cqs(ibmf_ci_t *cip);
147static void ibmf_i_fini_cqs(ibmf_ci_t *cip);
148static void ibmf_i_init_qplist(ibmf_ci_t *ibmf_cip);
149static void ibmf_i_fini_qplist(ibmf_ci_t *ibmf_cip);
150static int ibmf_i_lookup_client_by_info(ibmf_ci_t *ibmf_cip,
151    ibmf_register_info_t *ir_client, ibmf_client_t **clientpp);
152
153/*
154 * ibmf_init():
155 *	Initializes module state and registers with the IBT framework.
156 * 	Returns 0 if initialization was successful, else returns non-zero.
157 */
158int
159ibmf_init(void)
160{
161	ibt_status_t 	status;
162	ibt_clnt_hdl_t 	ibmf_ibt_handle;
163
164	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_start,
165	    IBMF_TNF_TRACE, "", "ibmf_init() enter\n");
166
167	/* setup the IBT module information */
168	ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V_CURR;
169	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_class = IBT_IBMA;
170	ibmf_statep->ibmf_ibt_modinfo.mi_async_handler
171	    = ibmf_ibt_async_handler;
172	ibmf_statep->ibmf_ibt_modinfo.mi_reserved = NULL;
173	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_name = "ibmf";
174
175	/* setup a connection to IB transport layer (IBTF) */
176	status = ibt_attach(&ibmf_statep->ibmf_ibt_modinfo, (void *)NULL,
177	    (void *)NULL, (void *)&ibmf_ibt_handle);
178	if (status != IBT_SUCCESS) {
179		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_init_err,
180		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
181		    "ibt attach failed", tnf_uint, status, status);
182		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_end,
183		    IBMF_TNF_TRACE, "", "ibmf_init() exit\n");
184		return (1);
185	}
186
187	/* initialize the IBMF state context */
188	ibmf_statep->ibmf_ibt_handle = ibmf_ibt_handle;
189	ibmf_statep->ibmf_ci_list = (ibmf_ci_t *)NULL;
190	ibmf_statep->ibmf_ci_list_tail = (ibmf_ci_t *)NULL;
191	mutex_init(&ibmf_statep->ibmf_mutex, NULL, MUTEX_DRIVER, NULL);
192	ibmf_statep->ibmf_cq_handler = ibmf_i_mad_completions;
193
194	ibmf_statep->ibmf_taskq = taskq_create("ibmf_taskq", IBMF_TASKQ_1THREAD,
195	    MINCLSYSPRI, 1, ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
196
197	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_end,
198	    IBMF_TNF_TRACE, "", "ibmf_init() exit\n");
199
200	return (0);
201}
202
203/*
204 * ibmf_fini():
205 *	Cleans up module state resources and unregisters from IBT framework.
206 */
207int
208ibmf_fini(void)
209{
210	ibmf_ci_t	*cip;
211	ibmf_ci_t	*tcip;
212	ibt_status_t	status;
213
214	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_start,
215	    IBMF_TNF_TRACE, "", "ibmf_fini() enter\n");
216
217	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
218
219	mutex_enter(&ibmf_statep->ibmf_mutex);
220
221	/* free all the Channel Interface (CI) context structures */
222	cip = ibmf_statep->ibmf_ci_list;
223	tcip = NULL;
224	while (cip != (ibmf_ci_t *)NULL) {
225
226		mutex_enter(&cip->ci_mutex);
227		ASSERT((cip->ci_state == IBMF_CI_STATE_PRESENT && cip->ci_ref ==
228		    0) || (cip->ci_state == IBMF_CI_STATE_GONE));
229		ASSERT(cip->ci_init_state == IBMF_CI_INIT_HCA_LINKED);
230		ASSERT(cip->ci_qp_list == NULL && cip->ci_qp_list_tail == NULL);
231		if (tcip != (ibmf_ci_t *)NULL)
232			tcip->ci_next = cip->ci_next;
233		if (ibmf_statep->ibmf_ci_list_tail == cip)
234			ibmf_statep->ibmf_ci_list_tail = NULL;
235		if (ibmf_statep->ibmf_ci_list == cip)
236			ibmf_statep->ibmf_ci_list = cip->ci_next;
237		tcip = cip->ci_next;
238		mutex_exit(&cip->ci_mutex);
239		/* free up the ci structure */
240		if (cip->ci_port_kstatp != NULL) {
241			kstat_delete(cip->ci_port_kstatp);
242		}
243		mutex_destroy(&cip->ci_mutex);
244		mutex_destroy(&cip->ci_clients_mutex);
245		mutex_destroy(&cip->ci_wqe_mutex);
246		cv_destroy(&cip->ci_state_cv);
247		cv_destroy(&cip->ci_wqes_cv);
248		kmem_free((void *) cip, sizeof (ibmf_ci_t));
249		cip = tcip;
250	}
251
252	ASSERT(ibmf_statep->ibmf_ci_list == NULL);
253	ASSERT(ibmf_statep->ibmf_ci_list_tail == NULL);
254
255	taskq_destroy(ibmf_statep->ibmf_taskq);
256
257	mutex_exit(&ibmf_statep->ibmf_mutex);
258
259	/* detach from IBTF */
260	status = ibt_detach(ibmf_statep->ibmf_ibt_handle);
261	if (status != IBT_SUCCESS) {
262		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_fini_err,
263		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
264		    "ibt detach error", tnf_uint, status, status);
265		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_end,
266		    IBMF_TNF_TRACE, "", "ibmf_fini() exit\n");
267		return (1);
268	}
269
270	mutex_destroy(&ibmf_statep->ibmf_mutex);
271
272	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_end,
273	    IBMF_TNF_TRACE, "", "ibmf_fini() exit\n");
274
275	return (0);
276}
277
278/*
279 * ibmf_i_validate_class_mask():
280 *	Checks client type value in client information structure.
281 */
282int
283ibmf_i_validate_class_mask(ibmf_register_info_t	*client_infop)
284{
285	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
286	    ibmf_i_validate_class_mask_start, IBMF_TNF_TRACE, "",
287	    "ibmf_i_validate_class_mask() enter, client_infop = %p\n",
288	    tnf_opaque, client_infop, client_infop);
289
290	if (IBMF_VALID_CLIENT_TYPE(client_infop->ir_client_class) == B_FALSE) {
291		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
292		    ibmf_i_validate_class_mask_err, IBMF_TNF_ERROR, "",
293		    "%s, class = %x\n", tnf_string, msg,
294		    "invalid class", tnf_uint, class,
295		    client_infop->ir_client_class);
296		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
297		    ibmf_i_validate_class_mask_end, IBMF_TNF_TRACE, "",
298		    "ibmf_i_validate_class_mask() exit\n");
299		return (IBMF_BAD_CLASS);
300	}
301
302	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_validate_class_mask_end,
303	    IBMF_TNF_TRACE, "", "ibmf_i_validate_class_mask() exit\n");
304	return (IBMF_SUCCESS);
305}
306
307/*
308 * ibmf_i_validate_ci_guid_and_port():
309 *	Checks validity of port number and HCA GUID at client
310 *	registration time.
311 */
312int
313ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid, uint8_t port_num)
314{
315	ibt_status_t	status;
316	ibt_hca_attr_t	hca_attrs;
317
318	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
319	    ibmf_i_validate_ci_guid_and_port_start, IBMF_TNF_TRACE, "",
320	    "ibmf_i_validate_ci_guid_and_port() enter, hca_guid = %x, "
321	    "port_num = %d\n", tnf_opaque, hca_guid, hca_guid,
322	    tnf_uint, port_num, port_num);
323
324	/* check for incorrect port number specification */
325	if (port_num == 0) {
326		IBMF_TRACE_1(IBMF_TNF_NODEBUG, 1,
327		    ibmf_i_validate_ci_guid_and_port_err, IBMF_TNF_ERROR, "",
328		    "%s\n", tnf_string, msg, "port num is 0");
329		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
330		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
331		    "ibmf_i_validate_ci_guid_and_port() exit\n");
332		return (IBMF_BAD_PORT);
333	}
334
335	/* call IB transport layer for HCA attributes */
336	status = ibt_query_hca_byguid(hca_guid, &hca_attrs);
337	if (status != IBT_SUCCESS) {
338		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
339		    ibmf_i_validate_ci_guid_and_port_err,
340		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
341		    "query_hca_guid failed", tnf_uint, status, status);
342		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
343		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
344		    "ibmf_i_validate_ci_guid_and_port() exit\n");
345		return (IBMF_BAD_NODE);
346	}
347
348	/* check if the specified port number is within the HCAs range */
349	if (port_num > hca_attrs.hca_nports) {
350		IBMF_TRACE_3(IBMF_TNF_NODEBUG, 1,
351		    ibmf_i_validate_ci_guid_and_port_err, IBMF_TNF_ERROR, "",
352		    "%s, num = %d, hca_ports = %d\n",
353		    tnf_string, msg, "port num > valid ports",
354		    tnf_uint, num, port_num, tnf_uint, hca_nports,
355		    hca_attrs.hca_nports);
356		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
357		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
358		    "ibmf_i_validate_ci_guid_and_port() exit\n");
359		return (IBMF_BAD_PORT);
360	}
361
362	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
363	    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
364	    "ibmf_i_validate_ci_guid_and_port() exit\n");
365	return (IBMF_SUCCESS);
366}
367
368/*
369 * ibmf_i_lookup_ci():
370 * 	Lookup the ci and return if found. If the CI is not found, returns
371 * 	NULL.
372 */
373static ibmf_ci_t *
374ibmf_i_lookup_ci(ib_guid_t ci_guid)
375{
376	ibmf_ci_t	*cip = NULL;
377
378	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
379
380	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_lookup_ci_start,
381	    IBMF_TNF_TRACE, "", "ibmf_i_lookup_ci(): enter, guid = 0x%x\n",
382	    tnf_uint64, guid, ci_guid);
383
384	/* walk the CI list looking for one that matches the provided GUID */
385	mutex_enter(&ibmf_statep->ibmf_mutex);
386	cip = ibmf_statep->ibmf_ci_list;
387	while (cip != (ibmf_ci_t *)NULL) {
388		if (ci_guid == cip->ci_node_guid) {
389			/* found it in our list */
390			break;
391		}
392		cip = cip->ci_next;
393	}
394	mutex_exit(&ibmf_statep->ibmf_mutex);
395
396	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_lookup_ci_end,
397	    IBMF_TNF_TRACE, "", "ibmf_i_lookup_ci() exit\n");
398
399	return (cip);
400}
401
402/*
403 * ibmf_i_get_ci():
404 *	Get the CI structure based on the HCA GUID from a list if it exists.
405 *	If the CI structure does not exist, and the HCA GUID is valid,
406 *	create a new CI structure and add it to the list.
407 */
408int
409ibmf_i_get_ci(ibmf_register_info_t *client_infop, ibmf_ci_t **cipp)
410{
411	ibmf_ci_t 		*cip;
412	ibt_status_t		status;
413	boolean_t		invalid = B_FALSE;
414	ibt_hca_attr_t		hca_attrs;
415	ibmf_port_kstat_t	*ksp;
416
417	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_start,
418	    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() enter, clinfop = %p\n",
419	    tnf_opaque, client_infop, client_infop);
420
421	/* look for a CI context with a matching GUID */
422	cip = ibmf_i_lookup_ci(client_infop->ir_ci_guid);
423
424	if (cip == NULL) {
425
426		/*
427		 * attempt to create the ci. First, verify the ci exists.
428		 * If it exists, allocate ci memory and insert in the ci list.
429		 * It is possible that some other thread raced with us
430		 * and inserted created ci while we are blocked in
431		 * allocating memory. Check for that case and if that is indeed
432		 * the case, free up what we allocated and try to get a
433		 * reference count on the ci that the other thread added.
434		 */
435		status = ibt_query_hca_byguid(client_infop->ir_ci_guid,
436		    &hca_attrs);
437		if (status == IBT_SUCCESS) {
438
439			ibmf_ci_t *tcip;
440			char buf[128];
441
442			/* allocate memory for the CI structure */
443			cip = (ibmf_ci_t *)kmem_zalloc(sizeof (ibmf_ci_t),
444			    KM_SLEEP);
445
446			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cip))
447
448			mutex_init(&cip->ci_mutex, NULL, MUTEX_DRIVER, NULL);
449			mutex_init(&cip->ci_clients_mutex, NULL, MUTEX_DRIVER,
450			    NULL);
451			mutex_init(&cip->ci_wqe_mutex, NULL, MUTEX_DRIVER,
452			    NULL);
453			cv_init(&cip->ci_state_cv, NULL, CV_DRIVER, NULL);
454			cv_init(&cip->ci_wqes_cv, NULL, CV_DRIVER, NULL);
455
456			(void) sprintf(buf, "r%08X",
457			    (uint32_t)client_infop->ir_ci_guid);
458			mutex_enter(&cip->ci_mutex);
459
460			cip->ci_state = IBMF_CI_STATE_PRESENT;
461			cip->ci_node_guid = client_infop->ir_ci_guid;
462
463			/* set up per CI kstats */
464			(void) sprintf(buf, "ibmf_%016" PRIx64 "_%d_stat",
465			    client_infop->ir_ci_guid,
466			    client_infop->ir_port_num);
467			if ((cip->ci_port_kstatp = kstat_create("ibmf", 0, buf,
468			    "misc", KSTAT_TYPE_NAMED,
469			    sizeof (ibmf_port_kstat_t) / sizeof (kstat_named_t),
470			    KSTAT_FLAG_WRITABLE)) == NULL) {
471				mutex_exit(&cip->ci_mutex);
472				mutex_destroy(&cip->ci_mutex);
473				mutex_destroy(&cip->ci_clients_mutex);
474				mutex_destroy(&cip->ci_wqe_mutex);
475				cv_destroy(&cip->ci_state_cv);
476				cv_destroy(&cip->ci_wqes_cv);
477				kmem_free((void *)cip, sizeof (ibmf_ci_t));
478				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
479				    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "",
480				    "%s\n", tnf_string, msg,
481				    "kstat create failed");
482				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
483				    ibmf_i_get_ci_end, IBMF_TNF_TRACE, "",
484				    "ibmf_i_get_ci() exit\n");
485				return (IBMF_NO_RESOURCES);
486			}
487			ksp = (ibmf_port_kstat_t *)cip->ci_port_kstatp->ks_data;
488			kstat_named_init(&ksp->clients_registered,
489			    "clients_registered", KSTAT_DATA_UINT32);
490			kstat_named_init(&ksp->client_regs_failed,
491			    "client_registrations_failed", KSTAT_DATA_UINT32);
492			kstat_named_init(&ksp->send_wqes_alloced,
493			    "send_wqes_allocated", KSTAT_DATA_UINT32);
494			kstat_named_init(&ksp->recv_wqes_alloced,
495			    "receive_wqes_allocated", KSTAT_DATA_UINT32);
496			kstat_named_init(&ksp->swqe_allocs_failed,
497			    "send_wqe_allocs_failed", KSTAT_DATA_UINT32);
498			kstat_named_init(&ksp->rwqe_allocs_failed,
499			    "recv_wqe_allocs_failed", KSTAT_DATA_UINT32);
500			kstat_install(cip->ci_port_kstatp);
501
502			mutex_exit(&cip->ci_mutex);
503
504			mutex_enter(&ibmf_statep->ibmf_mutex);
505
506			tcip = ibmf_statep->ibmf_ci_list;
507			while (tcip != (ibmf_ci_t *)NULL) {
508				if (client_infop->ir_ci_guid ==
509				    tcip->ci_node_guid) {
510					/* found it in our list */
511					break;
512				}
513				tcip = tcip->ci_next;
514			}
515
516			/* if the ci isn't on the list, add it */
517			if (tcip == NULL) {
518				cip->ci_next = NULL;
519
520				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
521
522				if (ibmf_statep->ibmf_ci_list_tail != NULL)
523					ibmf_statep->ibmf_ci_list_tail->
524					    ci_next = cip;
525				if (ibmf_statep->ibmf_ci_list == NULL)
526					ibmf_statep->ibmf_ci_list = cip;
527				ibmf_statep->ibmf_ci_list_tail = cip;
528
529				mutex_enter(&cip->ci_mutex);
530				cip->ci_init_state |= IBMF_CI_INIT_HCA_LINKED;
531				mutex_exit(&cip->ci_mutex);
532
533			} else {
534				/* free cip and set it to the one on the list */
535				kstat_delete(cip->ci_port_kstatp);
536				mutex_destroy(&cip->ci_mutex);
537				mutex_destroy(&cip->ci_clients_mutex);
538				mutex_destroy(&cip->ci_wqe_mutex);
539				cv_destroy(&cip->ci_state_cv);
540				cv_destroy(&cip->ci_wqes_cv);
541				kmem_free((void *)cip, sizeof (ibmf_ci_t));
542
543				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
544
545				cip = tcip;
546			}
547			mutex_exit(&ibmf_statep->ibmf_mutex);
548		} else {
549			/* we didn't find it and the CI doesn't exist */
550			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1,
551			    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "", "%s\n",
552			    tnf_string, msg, "GUID doesn't exist");
553			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
554			    ibmf_i_get_ci_end, IBMF_TNF_TRACE, "",
555			    "ibmf_i_get_ci() exit\n");
556			return (IBMF_TRANSPORT_FAILURE);
557		}
558	}
559
560	ASSERT(cip != NULL);
561
562	/*
563	 * We now have a CI context structure, either found it on the list,
564	 * or created it.
565	 * We now proceed to intialize the CI context.
566	 */
567	for (;;) {
568		mutex_enter(&cip->ci_mutex);
569
570		/* CI is INITED & no state change in progress; we are all set */
571		if (cip->ci_state == IBMF_CI_STATE_INITED && (cip->
572		    ci_state_flags & (IBMF_CI_STATE_INVALIDATING |
573		    IBMF_CI_STATE_UNINITING)) == 0) {
574
575			cip->ci_ref++;
576			mutex_exit(&cip->ci_mutex);
577
578			break;
579		}
580
581		/* CI is PRESENT; transition it to INITED */
582		if (cip->ci_state == IBMF_CI_STATE_PRESENT && (cip->
583		    ci_state_flags & (IBMF_CI_STATE_INVALIDATING |
584		    IBMF_CI_STATE_INITING)) == 0) {
585
586			/* mark state as initing and init the ci */
587			cip->ci_state_flags |= IBMF_CI_STATE_INITING;
588			mutex_exit(&cip->ci_mutex);
589
590			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cip))
591
592			if (ibmf_i_init_ci(client_infop, cip) != IBMF_SUCCESS) {
593				invalid = B_TRUE;
594				break;
595			}
596
597			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
598
599			continue;
600		}
601
602		/*
603		 * If CI is GONE and no validation is in progress, we should
604		 * return failure. Also, if CI is INITED but in the process of
605		 * being made GONE (ie., a hot remove in progress), return
606		 * failure.
607		 */
608		if ((cip->ci_state == IBMF_CI_STATE_GONE && (cip->
609		    ci_state_flags & IBMF_CI_STATE_VALIDATING) == 0) ||
610		    (cip->ci_state == IBMF_CI_STATE_INITED && (cip->
611		    ci_state_flags & IBMF_CI_STATE_INVALIDATING) != 0)) {
612
613			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
614			    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "",
615			    "ci_state = %x, ci_state_flags = %x\n",
616			    tnf_opaque, cip->ci_state, cip->ci_state,
617			    tnf_opaque, cip->ci_state_flags,
618			    cip->ci_state_flags);
619
620			invalid = B_TRUE;
621			mutex_exit(&cip->ci_mutex);
622
623			break;
624		}
625
626		/* a state change in progress; block waiting for state change */
627		if (cip->ci_state_flags & IBMF_CI_STATE_VALIDATING)
628			cip->ci_state_flags |= IBMF_CI_STATE_VALIDATE_WAIT;
629		else if (cip->ci_state_flags & IBMF_CI_STATE_INITING)
630			cip->ci_state_flags |= IBMF_CI_STATE_INIT_WAIT;
631		else if (cip->ci_state_flags & IBMF_CI_STATE_UNINITING)
632			cip->ci_state_flags |= IBMF_CI_STATE_UNINIT_WAIT;
633
634		cv_wait(&cip->ci_state_cv, &cip->ci_mutex);
635
636		mutex_exit(&cip->ci_mutex);
637	}
638
639	if (invalid == B_TRUE) {
640		IBMF_TRACE_0(IBMF_TNF_NODEBUG, DPRINT_L2, ibmf_i_get_ci_err,
641		    IBMF_TNF_ERROR, "", "ibmf_i_get_ci() error\n");
642		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
643		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
644		return (IBMF_FAILURE);
645	}
646
647	if (cip != NULL) {
648		*cipp = cip;
649		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
650		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
651		return (IBMF_SUCCESS);
652	} else {
653		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
654		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
655		return (IBMF_FAILURE);
656	}
657}
658
659/*
660 * ibmf_i_release_ci():
661 *	Drop the reference count for the CI.
662 */
663void
664ibmf_i_release_ci(ibmf_ci_t *cip)
665{
666	uint_t ref;
667
668	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_ci_start,
669	    IBMF_TNF_TRACE, "", "ibmf_i_release_ci() enter, cip = %p\n",
670	    tnf_opaque, cip, cip);
671
672	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
673
674	mutex_enter(&cip->ci_mutex);
675	ref = cip->ci_ref--;
676	if (ref == 1) {
677		ASSERT(cip->ci_state == IBMF_CI_STATE_INITED);
678		cip->ci_state_flags |= IBMF_CI_STATE_UNINITING;
679	}
680	mutex_exit(&cip->ci_mutex);
681
682	if (ref == 1) {
683		ibmf_i_uninit_ci(cip);
684	}
685
686	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_ci_end,
687	    IBMF_TNF_TRACE, "", "ibmf_i_release_ci() exit\n");
688}
689
690/*
691 * ibmf_i_init_ci():
692 *	Initialize the CI structure by setting up the HCA, allocating
693 *	protection domains, completion queues, a pool of WQEs.
694 */
695/* ARGSUSED */
696static int
697ibmf_i_init_ci(ibmf_register_info_t *client_infop, ibmf_ci_t *cip)
698{
699	ibt_pd_hdl_t		pd;
700	ibt_status_t		status;
701	ib_guid_t		ci_guid;
702	ibt_hca_attr_t		hca_attrs;
703	ibt_hca_hdl_t		hca_handle;
704	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
705	boolean_t		error = B_FALSE;
706	int			ibmfstatus = IBMF_SUCCESS;
707	char			errmsg[128];
708
709	_NOTE(ASSUMING_PROTECTED(*cip))
710
711	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
712	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
713
714	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_start,
715	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci() enter, cip = %p\n",
716	    tnf_opaque, ibmf_ci, cip);
717
718	mutex_enter(&cip->ci_mutex);
719	ci_guid = cip->ci_node_guid;
720	ASSERT(cip->ci_state == IBMF_CI_STATE_PRESENT);
721	ASSERT((cip->ci_state_flags & IBMF_CI_STATE_INITING) != 0);
722	mutex_exit(&cip->ci_mutex);
723
724	/* set up a connection to the HCA specified by the GUID */
725	status = ibt_open_hca(ibmf_statep->ibmf_ibt_handle, ci_guid,
726	    &hca_handle);
727	ASSERT(status != IBT_HCA_IN_USE);
728	if (status != IBT_SUCCESS) {
729		ibmf_i_init_ci_done(cip);
730		(void) sprintf(errmsg, "ibt open hca failed, status = 0x%x",
731		    status);
732		error = B_TRUE;
733		ibmfstatus = IBMF_TRANSPORT_FAILURE;
734		goto bail;
735	}
736
737	/* get the HCA attributes */
738	status = ibt_query_hca(hca_handle, &hca_attrs);
739	if (status != IBT_SUCCESS) {
740		(void) ibt_close_hca(hca_handle);
741		ibmf_i_init_ci_done(cip);
742		(void) sprintf(errmsg, "ibt query hca failed, status = 0x%x",
743		    status);
744		error = B_TRUE;
745		ibmfstatus = IBMF_TRANSPORT_FAILURE;
746		goto bail;
747	}
748
749	/* allocate a Protection Domain */
750	status = ibt_alloc_pd(hca_handle, pd_flags, &pd);
751	if (status != IBT_SUCCESS) {
752		(void) ibt_close_hca(hca_handle);
753		ibmf_i_init_ci_done(cip);
754		(void) sprintf(errmsg, "alloc PD failed, status = 0x%x",
755		    status);
756		error = B_TRUE;
757		ibmfstatus = IBMF_TRANSPORT_FAILURE;
758		goto bail;
759	}
760
761	/* init the ci */
762	mutex_enter(&cip->ci_mutex);
763	cip->ci_nports = hca_attrs.hca_nports;
764	cip->ci_vendor_id = hca_attrs.hca_vendor_id;
765	cip->ci_device_id = hca_attrs.hca_device_id;
766	cip->ci_ci_handle = hca_handle;
767	cip->ci_pd = pd;
768	cip->ci_init_state |= IBMF_CI_INIT_HCA_INITED;
769	mutex_exit(&cip->ci_mutex);
770
771	/* initialize cqs */
772	if (ibmf_i_init_cqs(cip) != IBMF_SUCCESS) {
773		(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
774		mutex_enter(&cip->ci_mutex);
775		cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
776		mutex_exit(&cip->ci_mutex);
777		(void) ibt_close_hca(cip->ci_ci_handle);
778		ibmf_i_init_ci_done(cip);
779		(void) sprintf(errmsg, "init CQs failed");
780		error = B_TRUE;
781		ibmfstatus = IBMF_FAILURE;
782		goto bail;
783	}
784
785	/* initialize wqes */
786	if (ibmf_i_init_wqes(cip) != IBMF_SUCCESS) {
787		ibmf_i_fini_cqs(cip);
788		(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
789		mutex_enter(&cip->ci_mutex);
790		cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
791		mutex_exit(&cip->ci_mutex);
792		(void) ibt_close_hca(cip->ci_ci_handle);
793		ibmf_i_init_ci_done(cip);
794		(void) sprintf(errmsg, "init WQEs failed");
795		error = B_TRUE;
796		ibmfstatus = IBMF_FAILURE;
797		goto bail;
798	}
799
800	/* initialize the UD destination structure pool */
801	ibmf_i_init_ud_dest(cip);
802
803	/* initialize the QP list */
804	ibmf_i_init_qplist(cip);
805
806	/* initialize condition variable, state, and enable CQ notification */
807	cip->ci_init_state |= IBMF_CI_INIT_MUTEX_CV_INITED;
808	(void) ibt_enable_cq_notify(cip->ci_cq_handle, IBT_NEXT_COMPLETION);
809	(void) ibt_enable_cq_notify(cip->ci_alt_cq_handle, IBT_NEXT_COMPLETION);
810
811	/* set state to INITED */
812	mutex_enter(&cip->ci_mutex);
813	cip->ci_state = IBMF_CI_STATE_INITED;
814	mutex_exit(&cip->ci_mutex);
815
816	/* wake up waiters blocked on an initialization done event */
817	ibmf_i_init_ci_done(cip);
818
819bail:
820	if (error) {
821		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_ci_err,
822		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
823		    errmsg, tnf_uint, ibmfstatus, ibmfstatus);
824	}
825
826	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_end,
827	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci() exit, cip = %p\n",
828	    tnf_opaque, ibmf_ci, cip);
829
830	return (ibmfstatus);
831}
832
833/*
834 * ibmf_i_uninit_ci():
835 *	Free up the resources allocated when initializing the CI structure.
836 */
837static void
838ibmf_i_uninit_ci(ibmf_ci_t *cip)
839{
840	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_start,
841	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci() enter, cip = %p\n",
842	    tnf_opaque, cip, cip);
843
844	ASSERT(MUTEX_HELD(&cip->ci_mutex) == 0);
845
846	/* clean up the QP list */
847	ibmf_i_fini_qplist(cip);
848
849	/* empty completions directly */
850	ibmf_i_mad_completions(cip->ci_cq_handle, (void*)cip);
851	ibmf_i_mad_completions(cip->ci_alt_cq_handle, (void*)cip);
852
853	mutex_enter(&cip->ci_mutex);
854	if (cip->ci_init_state & IBMF_CI_INIT_MUTEX_CV_INITED) {
855		cip->ci_init_state &= ~IBMF_CI_INIT_MUTEX_CV_INITED;
856	}
857	mutex_exit(&cip->ci_mutex);
858
859	/* clean up the UD destination structure pool */
860	ibmf_i_fini_ud_dest(cip);
861
862	/* clean up any WQE caches */
863	ibmf_i_fini_wqes(cip);
864
865	/* free up the completion queues */
866	ibmf_i_fini_cqs(cip);
867
868	/* free up the protection domain */
869	(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
870
871	/* close the HCA connection */
872	(void) ibt_close_hca(cip->ci_ci_handle);
873
874	/* set state down to PRESENT */
875	mutex_enter(&cip->ci_mutex);
876	cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
877	cip->ci_state = IBMF_CI_STATE_PRESENT;
878	mutex_exit(&cip->ci_mutex);
879
880	/* wake up waiters blocked on an un-initialization done event */
881	ibmf_i_uninit_ci_done(cip);
882
883	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_end,
884	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci() exit\n");
885}
886
887/*
888 * ibmf_i_init_ci_done():
889 *	Mark CI initialization as "done", and wake up any waiters.
890 */
891static void
892ibmf_i_init_ci_done(ibmf_ci_t *cip)
893{
894	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_done_start,
895	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci_done() enter, cip = %p\n",
896	    tnf_opaque, cip, cip);
897
898	mutex_enter(&cip->ci_mutex);
899	cip->ci_state_flags &= ~IBMF_CI_STATE_INITING;
900	if (cip->ci_state_flags & IBMF_CI_STATE_INIT_WAIT) {
901		cip->ci_state_flags &= ~IBMF_CI_STATE_INIT_WAIT;
902		cv_broadcast(&cip->ci_state_cv);
903	}
904	mutex_exit(&cip->ci_mutex);
905
906	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_done_end,
907	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci_done() exit\n");
908}
909
910/*
911 * ibmf_i_uninit_ci_done():
912 *	Mark CI uninitialization as "done", and wake up any waiters.
913 */
914static void
915ibmf_i_uninit_ci_done(ibmf_ci_t *cip)
916{
917	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_done_start,
918	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci_done() enter, cip = %p\n",
919	    tnf_opaque, cip, cip);
920
921	mutex_enter(&cip->ci_mutex);
922	cip->ci_state_flags &= ~IBMF_CI_STATE_UNINITING;
923	if (cip->ci_state_flags & IBMF_CI_STATE_UNINIT_WAIT) {
924		cip->ci_state_flags &= ~IBMF_CI_STATE_UNINIT_WAIT;
925		cv_broadcast(&cip->ci_state_cv);
926	}
927	mutex_exit(&cip->ci_mutex);
928
929	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_done_end,
930	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci_done() exit\n");
931}
932
933/*
934 * ibmf_i_init_cqs():
935 *	Allocate a completion queue and set the CQ handler.
936 */
937static int
938ibmf_i_init_cqs(ibmf_ci_t *cip)
939{
940	ibt_status_t		status;
941	ibt_cq_attr_t		cq_attrs;
942	ibt_cq_hdl_t		cq_handle;
943	uint32_t		num_entries;
944
945	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
946
947	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_start,
948	    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() enter, cip = %p\n",
949	    tnf_opaque, cip, cip);
950
951	/*
952	 * Allocate completion queue handle.
953	 * The CQ size should be a 2^n - 1 value to avoid excess CQ allocation
954	 * as done by some HCAs when the CQ size is specified as a 2^n
955	 * quantity.
956	 */
957	cq_attrs.cq_size = (cip->ci_nports * (ibmf_send_wqes_posted_per_qp +
958	    ibmf_recv_wqes_posted_per_qp)) - 1;
959
960	cq_attrs.cq_sched = NULL;
961	cq_attrs.cq_flags = 0;
962
963	/* Get the CQ handle for the special QPs */
964	status = ibt_alloc_cq(cip->ci_ci_handle, &cq_attrs,
965	    &cq_handle, &num_entries);
966	if (status != IBT_SUCCESS) {
967		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_cqs_err,
968		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
969		    "ibt_alloc_cq failed", tnf_uint, ibt_status, status);
970		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
971		    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
972		return (IBMF_TRANSPORT_FAILURE);
973	}
974	ibt_set_cq_handler(cq_handle, ibmf_statep->ibmf_cq_handler, cip);
975	cip->ci_cq_handle = cq_handle;
976
977	/* Get the CQ handle for the alternate QPs */
978	status = ibt_alloc_cq(cip->ci_ci_handle, &cq_attrs,
979	    &cq_handle, &num_entries);
980	if (status != IBT_SUCCESS) {
981		(void) ibt_free_cq(cip->ci_cq_handle);
982		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_cqs_err,
983		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
984		    "ibt_alloc_cq failed", tnf_uint, ibt_status, status);
985		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
986		    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
987		return (IBMF_TRANSPORT_FAILURE);
988	}
989	ibt_set_cq_handler(cq_handle, ibmf_statep->ibmf_cq_handler, cip);
990	cip->ci_alt_cq_handle = cq_handle;
991
992	/* set state to CQ INITED */
993	mutex_enter(&cip->ci_mutex);
994	cip->ci_init_state |= IBMF_CI_INIT_CQ_INITED;
995	mutex_exit(&cip->ci_mutex);
996
997	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
998	    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
999
1000	return (IBMF_SUCCESS);
1001}
1002
1003/*
1004 * ibmf_i_fini_cqs():
1005 *	Free up the completion queue
1006 */
1007static void
1008ibmf_i_fini_cqs(ibmf_ci_t *cip)
1009{
1010	ibt_status_t	status;
1011	uint_t		ci_init_state;
1012
1013	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_cqs_start,
1014	    IBMF_TNF_TRACE, "", "ibmf_i_fini_cqs() enter, cip = %p\n",
1015	    tnf_opaque, cip, cip);
1016
1017	mutex_enter(&cip->ci_mutex);
1018	ci_init_state = cip->ci_init_state;
1019	cip->ci_init_state &= ~IBMF_CI_INIT_CQ_INITED;
1020	mutex_exit(&cip->ci_mutex);
1021
1022	if (ci_init_state & IBMF_CI_INIT_CQ_INITED) {
1023		status = ibt_free_cq(cip->ci_alt_cq_handle);
1024		if (status != IBT_SUCCESS) {
1025			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
1026			    ibmf_i_fini_cqs_err, IBMF_TNF_ERROR, "",
1027			    "%s, status = %d\n", tnf_string, msg,
1028			    "ibt free cqs failed", tnf_uint, status, status);
1029		}
1030
1031		status = ibt_free_cq(cip->ci_cq_handle);
1032		if (status != IBT_SUCCESS) {
1033			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
1034			    ibmf_i_fini_cqs_err, IBMF_TNF_ERROR, "",
1035			    "%s, status = %d\n", tnf_string, msg,
1036			    "ibt free cqs failed", tnf_uint, status, status);
1037		}
1038	}
1039
1040	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_cqs_end,
1041	    IBMF_TNF_TRACE, "", "ibmf_i_fini_cqs() exit");
1042}
1043
1044/*
1045 * ibmf_i_init_qplist():
1046 *	Set the QP list inited state flag
1047 */
1048static void
1049ibmf_i_init_qplist(ibmf_ci_t *ibmf_cip)
1050{
1051	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qplist_start,
1052	    IBMF_TNF_TRACE, "", "ibmf_i_init_qplist() enter, cip = %p\n",
1053	    tnf_opaque, cip, ibmf_cip);
1054
1055	mutex_enter(&ibmf_cip->ci_mutex);
1056	ASSERT((ibmf_cip->ci_init_state & IBMF_CI_INIT_QP_LIST_INITED) == 0);
1057	ASSERT(ibmf_cip->ci_qp_list == NULL && ibmf_cip->ci_qp_list_tail ==
1058	    NULL);
1059	cv_init(&ibmf_cip->ci_qp_cv, NULL, CV_DRIVER, NULL);
1060	ibmf_cip->ci_init_state |= IBMF_CI_INIT_QP_LIST_INITED;
1061	mutex_exit(&ibmf_cip->ci_mutex);
1062
1063	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qplist_end,
1064	    IBMF_TNF_TRACE, "", "ibmf_i_init_qplist() exit\n");
1065}
1066
1067/*
1068 * ibmf_i_fini_qplist():
1069 *	Clean up the QP list
1070 */
1071static void
1072ibmf_i_fini_qplist(ibmf_ci_t *ibmf_cip)
1073{
1074	ibmf_qp_t *qpp;
1075	ibmf_alt_qp_t *altqpp;
1076	ibt_status_t status;
1077
1078	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_qplist_start,
1079	    IBMF_TNF_TRACE, "", "ibmf_i_fini_qplist() enter, cip = %p\n",
1080	    tnf_opaque, cip, ibmf_cip);
1081
1082	mutex_enter(&ibmf_cip->ci_mutex);
1083
1084	if ((ibmf_cip->ci_init_state & IBMF_CI_INIT_QP_LIST_INITED) != 0) {
1085
1086		/* walk through the qp list and free the memory */
1087		qpp = ibmf_cip->ci_qp_list;
1088		while (qpp != NULL) {
1089			/* Remove qpp from the list */
1090			ibmf_cip->ci_qp_list = qpp->iq_next;
1091
1092			ASSERT(qpp->iq_qp_ref == 0);
1093			ASSERT(qpp->iq_flags == IBMF_QP_FLAGS_INVALID);
1094			mutex_exit(&ibmf_cip->ci_mutex);
1095			if (qpp->iq_qp_handle != NULL) {
1096				/* Flush the special QP */
1097				status = ibt_flush_qp(qpp->iq_qp_handle);
1098				if (status != IBT_SUCCESS) {
1099					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1100					    DPRINT_L1, ibmf_i_fini_qplist_err,
1101					    IBMF_TNF_ERROR, "",
1102					    "%s, status = %d\n", tnf_string,
1103					    msg, "ibt_flush_qp returned error",
1104					    tnf_int, status, status);
1105				}
1106
1107				/* Grab the ci_mutex mutex before waiting */
1108				mutex_enter(&ibmf_cip->ci_mutex);
1109
1110				/* Wait if WQEs for special QPs are alloced */
1111				while (ibmf_cip->ci_wqes_alloced != 0) {
1112					cv_wait(&ibmf_cip->ci_wqes_cv,
1113					    &ibmf_cip->ci_mutex);
1114				}
1115
1116				mutex_exit(&ibmf_cip->ci_mutex);
1117
1118				/* Free the special QP */
1119				status = ibt_free_qp(qpp->iq_qp_handle);
1120				if (status != IBT_SUCCESS) {
1121					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1122					    DPRINT_L1, ibmf_i_fini_qplist_err,
1123					    IBMF_TNF_ERROR, "",
1124					    "%s, status = %d\n", tnf_string,
1125					    msg, "ibt_free_qp returned error",
1126					    tnf_int, status, status);
1127				}
1128			}
1129			mutex_destroy(&qpp->iq_mutex);
1130			kmem_free((void *)qpp, sizeof (ibmf_qp_t));
1131
1132			/* Grab the mutex again before accessing the QP list */
1133			mutex_enter(&ibmf_cip->ci_mutex);
1134			qpp = ibmf_cip->ci_qp_list;
1135		}
1136
1137		cv_destroy(&ibmf_cip->ci_qp_cv);
1138
1139		ibmf_cip->ci_qp_list = ibmf_cip->ci_qp_list_tail = NULL;
1140		ibmf_cip->ci_init_state &=  ~IBMF_CI_INIT_QP_LIST_INITED;
1141
1142		altqpp = ibmf_cip->ci_alt_qp_list;
1143		while (altqpp != NULL) {
1144			/* Remove altqpp from the list */
1145			ibmf_cip->ci_alt_qp_list = altqpp->isq_next;
1146			mutex_exit(&ibmf_cip->ci_mutex);
1147
1148			if (altqpp->isq_qp_handle != NULL) {
1149				/* Flush the special QP */
1150				status = ibt_flush_qp(altqpp->isq_qp_handle);
1151				if (status != IBT_SUCCESS) {
1152					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1153					    DPRINT_L1, ibmf_i_fini_qplist_err,
1154					    IBMF_TNF_ERROR, "",
1155					    "%s, status = %d\n", tnf_string,
1156					    msg, "ibt_flush_qp returned error",
1157					    tnf_int, status, status);
1158				}
1159
1160				/* Free the special QP */
1161				status = ibt_free_qp(altqpp->isq_qp_handle);
1162				if (status != IBT_SUCCESS) {
1163					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1164					    DPRINT_L1, ibmf_i_fini_qplist_err,
1165					    IBMF_TNF_ERROR, "",
1166					    "%s, status = %d\n", tnf_string,
1167					    msg, "ibt_free_qp returned error",
1168					    tnf_int, status, status);
1169				}
1170			}
1171			mutex_destroy(&altqpp->isq_mutex);
1172			kmem_free((void *)altqpp, sizeof (ibmf_alt_qp_t));
1173
1174			/* Grab the mutex again before accessing the QP list */
1175			mutex_enter(&ibmf_cip->ci_mutex);
1176			altqpp = ibmf_cip->ci_alt_qp_list;
1177		}
1178	}
1179
1180	mutex_exit(&ibmf_cip->ci_mutex);
1181
1182	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_qplist_end,
1183	    IBMF_TNF_TRACE, "", "ibmf_i_fini_qplist() exit\n");
1184}
1185
1186/*
1187 * ibmf_i_alloc_client():
1188 *	Allocate and initialize the client structure.
1189 */
1190int
1191ibmf_i_alloc_client(ibmf_register_info_t *client_infop, uint_t flags,
1192    ibmf_client_t **clientpp)
1193{
1194	ibmf_client_t		*ibmf_clientp;
1195	char			buf[128];
1196	ibmf_kstat_t		*ksp;
1197
1198	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_client_start,
1199	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_client() enter, "
1200	    "client_infop = %p\n", tnf_opaque, client_infop, client_infop);
1201
1202	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibmf_clientp))
1203
1204	/* allocate memory for ibmf_client and initialize it */
1205	ibmf_clientp = kmem_zalloc(sizeof (ibmf_client_t), KM_SLEEP);
1206	mutex_init(&ibmf_clientp->ic_mutex, NULL, MUTEX_DRIVER, NULL);
1207	mutex_init(&ibmf_clientp->ic_msg_mutex, NULL, MUTEX_DRIVER, NULL);
1208	mutex_init(&ibmf_clientp->ic_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
1209	cv_init(&ibmf_clientp->ic_recv_cb_teardown_cv, NULL, CV_DRIVER, NULL);
1210
1211	(void) sprintf(buf, "s%08X_0x%08X",
1212	    (uint32_t)client_infop->ir_ci_guid, client_infop->ir_client_class);
1213
1214	/* create a taskq to handle send completions based on reg flags */
1215	if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1216		if (flags & IBMF_REG_FLAG_SINGLE_OFFLOAD)
1217			ibmf_clientp->ic_send_taskq = taskq_create(buf,
1218			    IBMF_TASKQ_1THREAD, MINCLSYSPRI, 1,
1219			    ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
1220		else
1221			ibmf_clientp->ic_send_taskq = taskq_create(buf,
1222			    IBMF_TASKQ_NTHREADS, MINCLSYSPRI, 1,
1223			    ibmf_taskq_max_tasks,
1224			    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
1225		if (ibmf_clientp->ic_send_taskq == NULL) {
1226			cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1227			mutex_destroy(&ibmf_clientp->ic_mutex);
1228			mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1229			mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1230			kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1231			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1232			    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1233			    tnf_string, msg, buf);
1234			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1235			    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1236			    "ibmf_i_alloc_client() exit\n");
1237			return (IBMF_NO_RESOURCES);
1238		}
1239	}
1240	ibmf_clientp->ic_init_state_class |= IBMF_CI_INIT_SEND_TASKQ_DONE;
1241
1242	(void) sprintf(buf, "r%08X_0x%08X",
1243	    (uint32_t)client_infop->ir_ci_guid, client_infop->ir_client_class);
1244
1245	/* create a taskq to handle receive completions on reg flags */
1246	if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1247		if (flags & IBMF_REG_FLAG_SINGLE_OFFLOAD)
1248			ibmf_clientp->ic_recv_taskq = taskq_create(buf,
1249			    IBMF_TASKQ_1THREAD, MINCLSYSPRI, 1,
1250			    ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
1251		else
1252			ibmf_clientp->ic_recv_taskq = taskq_create(buf,
1253			    IBMF_TASKQ_NTHREADS, MINCLSYSPRI, 1,
1254			    ibmf_taskq_max_tasks,
1255			    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
1256		if (ibmf_clientp->ic_recv_taskq == NULL) {
1257			cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1258			mutex_destroy(&ibmf_clientp->ic_mutex);
1259			mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1260			mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1261			taskq_destroy(ibmf_clientp->ic_send_taskq);
1262			kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1263			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1264			    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1265			    tnf_string, msg, buf);
1266			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1267			    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1268			    "ibmf_i_alloc_client() exit\n");
1269			return (IBMF_NO_RESOURCES);
1270		}
1271	}
1272	ibmf_clientp->ic_init_state_class |= IBMF_CI_INIT_RECV_TASKQ_DONE;
1273	ibmf_clientp->ic_client_info.ci_guid = client_infop->ir_ci_guid;
1274	ibmf_clientp->ic_client_info.port_num = client_infop->ir_port_num;
1275
1276	/* Get the base LID */
1277	(void) ibt_get_port_state_byguid(ibmf_clientp->ic_client_info.ci_guid,
1278	    ibmf_clientp->ic_client_info.port_num, NULL,
1279	    &ibmf_clientp->ic_base_lid);
1280
1281	ibmf_clientp->ic_client_info.client_class =
1282	    client_infop->ir_client_class;
1283
1284	/* set up the per client ibmf kstats */
1285	(void) sprintf(buf, "ibmf_%016" PRIx64 "_%d_%X_stat",
1286	    client_infop->ir_ci_guid, client_infop->ir_port_num,
1287	    client_infop->ir_client_class);
1288	if ((ibmf_clientp->ic_kstatp = kstat_create("ibmf", 0, buf, "misc",
1289	    KSTAT_TYPE_NAMED, sizeof (ibmf_kstat_t) / sizeof (kstat_named_t),
1290	    KSTAT_FLAG_WRITABLE)) == NULL) {
1291		cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1292		mutex_destroy(&ibmf_clientp->ic_mutex);
1293		mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1294		mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1295		if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1296			taskq_destroy(ibmf_clientp->ic_send_taskq);
1297			taskq_destroy(ibmf_clientp->ic_recv_taskq);
1298		}
1299		kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1300		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1301		    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1302		    tnf_string, msg, "kstat creation failed");
1303		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1304		    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1305		    "ibmf_i_alloc_client() exit\n");
1306		return (IBMF_NO_RESOURCES);
1307	}
1308	ksp = (ibmf_kstat_t *)ibmf_clientp->ic_kstatp->ks_data;
1309	kstat_named_init(&ksp->msgs_alloced, "messages_allocated",
1310	    KSTAT_DATA_UINT32);
1311	kstat_named_init(&ksp->msgs_active, "messages_active",
1312	    KSTAT_DATA_UINT32);
1313	kstat_named_init(&ksp->msgs_sent, "messages_sent", KSTAT_DATA_UINT32);
1314	kstat_named_init(&ksp->msgs_received, "messages_received",
1315	    KSTAT_DATA_UINT32);
1316	kstat_named_init(&ksp->sends_active, "sends_active", KSTAT_DATA_UINT32);
1317	kstat_named_init(&ksp->recvs_active, "receives_active",
1318	    KSTAT_DATA_UINT32);
1319	kstat_named_init(&ksp->ud_dests_alloced, "ud_dests_allocated",
1320	    KSTAT_DATA_UINT32);
1321	kstat_named_init(&ksp->alt_qps_alloced, "alt_qps_allocated",
1322	    KSTAT_DATA_UINT32);
1323	kstat_named_init(&ksp->send_cb_active, "send_callbacks_active",
1324	    KSTAT_DATA_UINT32);
1325	kstat_named_init(&ksp->recv_cb_active, "receive_callbacks_active",
1326	    KSTAT_DATA_UINT32);
1327	kstat_named_init(&ksp->recv_bufs_alloced, "receive_bufs_allocated",
1328	    KSTAT_DATA_UINT32);
1329	kstat_named_init(&ksp->msg_allocs_failed, "msg_allocs_failed",
1330	    KSTAT_DATA_UINT32);
1331	kstat_named_init(&ksp->uddest_allocs_failed, "uddest_allocs_failed",
1332	    KSTAT_DATA_UINT32);
1333	kstat_named_init(&ksp->alt_qp_allocs_failed, "alt_qp_allocs_failed",
1334	    KSTAT_DATA_UINT32);
1335	kstat_named_init(&ksp->send_pkt_failed, "send_pkt_failed",
1336	    KSTAT_DATA_UINT32);
1337	kstat_named_init(&ksp->rmpp_errors, "rmpp_errors",
1338	    KSTAT_DATA_UINT32);
1339
1340	kstat_install(ibmf_clientp->ic_kstatp);
1341
1342	*clientpp = ibmf_clientp;
1343
1344	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibmf_clientp))
1345
1346	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_client_end,
1347	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_client() exit\n");
1348
1349	return (IBMF_SUCCESS);
1350}
1351
1352/*
1353 * ibmf_i_free_client():
1354 *	Free up the client structure and release resources
1355 */
1356void
1357ibmf_i_free_client(ibmf_client_t *clientp)
1358{
1359	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_client_start,
1360	    IBMF_TNF_TRACE, "", "ibmf_i_free_client() enter, clientp = %p\n",
1361	    tnf_opaque, clientp, clientp);
1362
1363	/* delete the general ibmf kstats */
1364	if (clientp->ic_kstatp != NULL) {
1365		kstat_delete(clientp->ic_kstatp);
1366		clientp->ic_kstatp = NULL;
1367	}
1368
1369	/* release references and destroy the resources */
1370	if (clientp->ic_init_state_class & IBMF_CI_INIT_SEND_TASKQ_DONE) {
1371		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1372			taskq_destroy(clientp->ic_send_taskq);
1373		}
1374		clientp->ic_init_state_class &= ~IBMF_CI_INIT_SEND_TASKQ_DONE;
1375	}
1376
1377	if (clientp->ic_init_state_class & IBMF_CI_INIT_RECV_TASKQ_DONE) {
1378		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1379			taskq_destroy(clientp->ic_recv_taskq);
1380		}
1381		clientp->ic_init_state_class &= ~IBMF_CI_INIT_RECV_TASKQ_DONE;
1382	}
1383
1384	mutex_destroy(&clientp->ic_mutex);
1385	mutex_destroy(&clientp->ic_msg_mutex);
1386	mutex_destroy(&clientp->ic_kstat_mutex);
1387	cv_destroy(&clientp->ic_recv_cb_teardown_cv);
1388	kmem_free((void *)clientp, sizeof (ibmf_client_t));
1389
1390	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_client_end,
1391	    IBMF_TNF_TRACE, "", "ibmf_i_free_client() exit\n");
1392}
1393
1394/*
1395 * ibmf_i_validate_classes_and_port():
1396 *	Validate the class type and get the client structure
1397 */
1398int
1399ibmf_i_validate_classes_and_port(ibmf_ci_t *ibmf_cip,
1400    ibmf_register_info_t *client_infop)
1401{
1402	ibmf_client_t		*ibmf_clientp;
1403	int			status;
1404
1405	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1406	    ibmf_i_validate_classes_and_port_start, IBMF_TNF_TRACE, "",
1407	    "ibmf_i_validate_classes_and_port() enter, cip = %p, "
1408	    "clientp = %p\n", tnf_opaque, cip, ibmf_cip,
1409	    tnf_opaque, client_infop, client_infop);
1410
1411	/*
1412	 * the Solaris implementation of IBMF does not support
1413	 * the UNIVERSAL_CLASS
1414	 */
1415	if (client_infop->ir_client_class == UNIVERSAL_CLASS) {
1416		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1417		    ibmf_i_validate_classes_and_port_err, IBMF_TNF_ERROR, "",
1418		    "%s\n", tnf_string, msg,
1419		    "UNIVERSAL class is not supported");
1420		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1421		    ibmf_i_validate_classes_and_port_end, IBMF_TNF_TRACE, "",
1422		    "ibmf_i_validate_classes_and_port() exit\n");
1423		return (IBMF_NOT_SUPPORTED);
1424	}
1425
1426	/*
1427	 * Check if the client context already exists on the list
1428	 * maintained in the CI context. If it is, then the client class
1429	 * has already been registered for.
1430	 */
1431	status = ibmf_i_lookup_client_by_info(ibmf_cip, client_infop,
1432	    &ibmf_clientp);
1433	if (status != IBMF_SUCCESS) {
1434		/* client class has not been previously registered for */
1435		status = IBMF_SUCCESS;
1436	} else {
1437		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1438		    ibmf_i_validate_classes_and_port_err, IBMF_TNF_ERROR, "",
1439		    "client already registered, class = 0x%X\n",
1440		    tnf_uint, class, client_infop->ir_client_class);
1441		status = IBMF_PORT_IN_USE;
1442	}
1443
1444	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1445	    ibmf_i_validate_classes_and_port_end, IBMF_TNF_TRACE, "",
1446	    "ibmf_i_validate_classes_and_port() exit\n");
1447	return (status);
1448}
1449
1450/*
1451 * ibmf_i_lookup_client_by_info():
1452 *	Get the client structure from the list
1453 */
1454static int
1455ibmf_i_lookup_client_by_info(ibmf_ci_t *ibmf_cip,
1456    ibmf_register_info_t *ir_client, ibmf_client_t **clientpp)
1457{
1458	ibmf_client_t *clientp;
1459
1460	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1461	    ibmf_i_lookup_client_by_info_start, IBMF_TNF_TRACE, "",
1462	    "ibmf_i_lookup_client_by_info() enter, cip = %p, clientinfo = %p\n",
1463	    tnf_opaque, cip, ibmf_cip, tnf_opaque, clientinfo, ir_client);
1464
1465	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1466
1467	/*
1468	 * walk the CI's client list searching for one with the specified class
1469	 */
1470	mutex_enter(&ibmf_cip->ci_clients_mutex);
1471	clientp = ibmf_cip->ci_clients;
1472	while (clientp != NULL) {
1473		ibmf_client_info_t *tmp = &clientp->ic_client_info;
1474		if (tmp->client_class == ir_client->ir_client_class &&
1475		    ir_client->ir_client_class != UNIVERSAL_CLASS &&
1476		    tmp->ci_guid == ir_client->ir_ci_guid &&
1477		    tmp->port_num == ir_client->ir_port_num) {
1478			/* found our match */
1479			break;
1480		}
1481		clientp = clientp->ic_next;
1482	}
1483	mutex_exit(&ibmf_cip->ci_clients_mutex);
1484
1485	if (clientp != NULL) {
1486		*clientpp = clientp;
1487		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1488		    ibmf_i_lookup_client_by_info_end, IBMF_TNF_TRACE, "",
1489		    "ibmf_i_lookup_client_by_info(): clientp = %p\n",
1490		    tnf_opaque, clientp, clientp);
1491		return (IBMF_SUCCESS);
1492	} else {
1493		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1494		    ibmf_i_lookup_client_by_info_end, IBMF_TNF_TRACE, "",
1495		    "ibmf_i_lookup_client_by_info() exit\n");
1496		return (IBMF_FAILURE);
1497	}
1498}
1499
1500/*
1501 * ibmf_i_add_client():
1502 *	Add a new client to the client list
1503 */
1504void
1505ibmf_i_add_client(ibmf_ci_t *ibmf_cip, ibmf_client_t *ibmf_clientp)
1506{
1507	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_add_start,
1508	    IBMF_TNF_TRACE, "",
1509	    "ibmf_i_add_client() enter, cip = %p, clientp = %p\n",
1510	    tnf_opaque, ibmf_ci, ibmf_cip, tnf_opaque, client, ibmf_clientp);
1511
1512	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1513
1514	mutex_enter(&ibmf_cip->ci_clients_mutex);
1515	ibmf_clientp->ic_next = NULL;
1516	ibmf_clientp->ic_prev = ibmf_cip->ci_clients_last;
1517	if (ibmf_cip->ci_clients == NULL) {
1518		ibmf_cip->ci_clients = ibmf_clientp;
1519	}
1520	if (ibmf_cip->ci_clients_last) {
1521		ibmf_cip->ci_clients_last->ic_next = ibmf_clientp;
1522	}
1523	ibmf_cip->ci_clients_last = ibmf_clientp;
1524	mutex_exit(&ibmf_cip->ci_clients_mutex);
1525
1526	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_add_end,
1527	    IBMF_TNF_TRACE, "", "ibmf_i_add_client() exit\n");
1528}
1529
1530/*
1531 * ibmf_i_delete_client():
1532 *	Delete a client from the client list
1533 */
1534void
1535ibmf_i_delete_client(ibmf_ci_t *ibmf_cip, ibmf_client_t *ibmf_clientp)
1536{
1537	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_delete_client_start,
1538	    IBMF_TNF_TRACE, "", "ibmf_i_delete_client() enter, "
1539	    "ibmf_i_delete_client() enter, cip = %p, clientp = %p\n",
1540	    tnf_opaque, ibmf_ci, ibmf_cip, tnf_opaque, client, ibmf_clientp);
1541
1542	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1543
1544	mutex_enter(&ibmf_cip->ci_clients_mutex);
1545	if (ibmf_clientp->ic_next)
1546		ibmf_clientp->ic_next->ic_prev = ibmf_clientp->ic_prev;
1547
1548	if (ibmf_clientp->ic_prev)
1549		ibmf_clientp->ic_prev->ic_next = ibmf_clientp->ic_next;
1550
1551	if (ibmf_cip->ci_clients == ibmf_clientp) {
1552		ibmf_cip->ci_clients = ibmf_clientp->ic_next;
1553	}
1554	if (ibmf_cip->ci_clients_last == ibmf_clientp) {
1555		ibmf_cip->ci_clients_last = ibmf_clientp->ic_prev;
1556	}
1557	mutex_exit(&ibmf_cip->ci_clients_mutex);
1558
1559	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_delete_client_end,
1560	    IBMF_TNF_TRACE, "", "ibmf_i_delete_client() exit\n");
1561}
1562
1563/*
1564 * ibmf_i_get_qp():
1565 *	Get the QP structure based on the client class
1566 */
1567int
1568ibmf_i_get_qp(ibmf_ci_t *ibmf_cip, uint_t port_num, ibmf_client_type_t class,
1569    ibmf_qp_t **qppp)
1570{
1571	ibmf_qp_t		*qpp;
1572	int			qp_num, status = IBMF_SUCCESS;
1573
1574	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_start,
1575	    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() enter, cip = %p, "
1576	    "port = %d, class = %x\n", tnf_opaque, ibmf_ci, ibmf_cip,
1577	    tnf_int, port, port_num, tnf_opaque, class, class);
1578
1579	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_mutex));
1580
1581	mutex_enter(&ibmf_cip->ci_mutex);
1582
1583	/*
1584	 * walk through the list of qps on this ci, looking for one that
1585	 * corresponds to the type and class the caller is interested in.
1586	 * If it is not there, we need allocate it from the transport. Since
1587	 * qp0 & qp1 can only be allocated once, we maintain a reference count
1588	 * and call the transport for allocation iff the ref count is 0.
1589	 */
1590	qp_num = (class == SUBN_AGENT || class == SUBN_MANAGER) ? 0 : 1;
1591
1592	qpp = ibmf_cip->ci_qp_list;
1593	while (qpp != NULL) {
1594		if (port_num == qpp->iq_port_num && qp_num == qpp->iq_qp_num)
1595			break;
1596		qpp = qpp->iq_next;
1597	}
1598
1599	if (qpp == NULL) {
1600		/*
1601		 * allocate qp and add it the qp list; recheck to
1602		 * catch races
1603		 */
1604		ibmf_qp_t *tqpp;
1605
1606		mutex_exit(&ibmf_cip->ci_mutex);
1607
1608		tqpp = (ibmf_qp_t *)kmem_zalloc(sizeof (ibmf_qp_t), KM_SLEEP);
1609
1610		/* check the list under lock */
1611		mutex_enter(&ibmf_cip->ci_mutex);
1612
1613		qpp = ibmf_cip->ci_qp_list;
1614		while (qpp != NULL) {
1615			if (port_num == qpp->iq_port_num && qp_num ==
1616			    qpp->iq_qp_num)
1617				break;
1618			qpp = qpp->iq_next;
1619		}
1620
1621		if (qpp != NULL) {
1622			/* some one raced past us and added to the list */
1623			kmem_free((void *)tqpp, sizeof (ibmf_qp_t));
1624		} else {
1625			/* add this to the qp list */
1626			qpp = tqpp;
1627			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qpp))
1628			qpp->iq_next = NULL;
1629			if (ibmf_cip->ci_qp_list == NULL)
1630				ibmf_cip->ci_qp_list = qpp;
1631			if (ibmf_cip->ci_qp_list_tail != NULL)
1632				ibmf_cip->ci_qp_list_tail->iq_next = qpp;
1633			ibmf_cip->ci_qp_list_tail = qpp;
1634			qpp->iq_port_num = port_num;
1635			qpp->iq_qp_num = qp_num;
1636			qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1637			mutex_init(&qpp->iq_mutex, NULL, MUTEX_DRIVER, NULL);
1638		}
1639	}
1640
1641	/* we now have a QP context */
1642	for (;;) {
1643		if (qpp->iq_flags == IBMF_QP_FLAGS_INITING) {
1644
1645			/* block till qp is in VALID state */
1646			cv_wait(&ibmf_cip->ci_qp_cv, &ibmf_cip->ci_mutex);
1647			continue;
1648
1649		}
1650
1651		if (qpp->iq_flags == IBMF_QP_FLAGS_UNINITING) {
1652
1653			/* block till qp is in INVALID state */
1654			cv_wait(&ibmf_cip->ci_qp_cv, &ibmf_cip->ci_mutex);
1655			continue;
1656		}
1657
1658		if (qpp->iq_flags == IBMF_QP_FLAGS_INVALID) {
1659			if ((status = ibmf_i_init_qp(ibmf_cip, qpp)) !=
1660			    IBMF_SUCCESS) {
1661				ibmf_qp_t *tqpp;
1662
1663				/*
1664				 * Remove the QP context from the CI's list.
1665				 * Only initialized QPs should be on the list.
1666				 * We know that this QP is on the list, so
1667				 * the list is not empty.
1668				 */
1669				tqpp = ibmf_cip->ci_qp_list;
1670				if (tqpp == qpp) {
1671					/* Only QP context on the list */
1672					ibmf_cip->ci_qp_list = NULL;
1673					ibmf_cip->ci_qp_list_tail = NULL;
1674				}
1675
1676				/* Find the QP context before the last one */
1677				if (tqpp != qpp) {
1678					while (tqpp->iq_next != qpp) {
1679						tqpp = tqpp->iq_next;
1680					}
1681
1682					/*
1683					 * We are at the second last element of
1684					 * the list. Readjust the tail pointer.
1685					 * Remove the last element from the
1686					 * list.
1687					 */
1688					tqpp->iq_next = NULL;
1689					ibmf_cip->ci_qp_list_tail = tqpp;
1690				}
1691
1692				/* Free up the QP context */
1693				kmem_free((void *)qpp, sizeof (ibmf_qp_t));
1694
1695				break;
1696			}
1697			continue;
1698		}
1699
1700		if (qpp->iq_flags == IBMF_QP_FLAGS_INITED) {
1701			qpp->iq_qp_ref++;
1702			break;
1703		}
1704	}
1705
1706	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*qpp))
1707
1708	mutex_exit(&ibmf_cip->ci_mutex);
1709
1710	if (status == IBMF_SUCCESS) {
1711		*qppp = qpp;
1712		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_end,
1713		    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() exit "
1714		    "qp_handle = %p\n", tnf_opaque, qp_handle, qpp);
1715		return (IBMF_SUCCESS);
1716	} else {
1717		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_get_qp_err,
1718		    IBMF_TNF_ERROR, "", "%s\n", tnf_string, msg,
1719		    "ibmf_i_get_qp(): qp_not found");
1720		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_end,
1721		    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() exit\n");
1722		return (status);
1723	}
1724}
1725
1726/*
1727 * ibmf_i_release_qp():
1728 *	Drop the reference count on the QP structure
1729 */
1730void
1731ibmf_i_release_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t **qppp)
1732{
1733	ibmf_qp_t	*qpp;
1734
1735	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_qp_start,
1736	    IBMF_TNF_TRACE, "", "ibmf_i_release_qp() enter, cip = %p, "
1737	    "qpp = %p\n", tnf_opaque, cip, ibmf_cip, tnf_opaque, qpp, *qppp);
1738
1739	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_mutex));
1740
1741	mutex_enter(&ibmf_cip->ci_mutex);
1742	qpp = *qppp;
1743	qpp->iq_qp_ref--;
1744	if (qpp->iq_qp_ref == 0)
1745		ibmf_i_uninit_qp(ibmf_cip, qpp);
1746	mutex_exit(&ibmf_cip->ci_mutex);
1747
1748	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_qp_end,
1749	    IBMF_TNF_TRACE, "", "ibmf_i_release_qp() exit\n");
1750}
1751
1752/*
1753 * ibmf_i_init_qp():
1754 *	Set up the QP context, request a QP from the IBT framework
1755 *	and initialize it
1756 */
1757static int
1758ibmf_i_init_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp)
1759{
1760	ibt_sqp_type_t		qp_type;
1761	ibt_qp_alloc_attr_t	qp_attrs;
1762	ibt_qp_hdl_t		qp_handle;
1763	ibt_qp_info_t		qp_modify_attr;
1764	ibt_status_t		ibt_status;
1765	int			i, status;
1766
1767	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_start,
1768	    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() enter, cip = %p, "
1769	    "port = %d, qp = %d\n", tnf_opaque, ibmf_ci, ibmf_cip, tnf_int,
1770	    port, qpp->iq_port_num, tnf_int, num, qpp->iq_qp_num);
1771
1772	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qpp->iq_qp_handle))
1773
1774	ASSERT(MUTEX_HELD(&ibmf_cip->ci_mutex));
1775
1776	qpp->iq_flags = IBMF_QP_FLAGS_INITING;
1777	mutex_exit(&ibmf_cip->ci_mutex);
1778	if (qpp->iq_qp_handle) {	/* closed but not yet freed */
1779		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1780		if (ibt_status != IBT_SUCCESS) {
1781			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1782			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1783			    "%s, status = %d\n", tnf_string, msg,
1784			    "ibt_free_qp returned error",
1785			    tnf_uint, ibt_status, ibt_status);
1786		}
1787		qpp->iq_qp_handle = NULL;
1788	}
1789	ASSERT(qpp->iq_qp_num == 0 || qpp->iq_qp_num == 1);
1790	if (qpp->iq_qp_num == 0)
1791		qp_type = IBT_SMI_SQP;
1792	else
1793		qp_type = IBT_GSI_SQP;
1794	qp_attrs.qp_scq_hdl = ibmf_cip->ci_cq_handle;
1795	qp_attrs.qp_rcq_hdl = ibmf_cip->ci_cq_handle;
1796	qp_attrs.qp_pd_hdl = ibmf_cip->ci_pd;
1797	qp_attrs.qp_sizes.cs_sq_sgl = 1;
1798	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
1799	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
1800	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
1801	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
1802	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1803
1804	/* call the IB transport to allocate a special QP */
1805	ibt_status = ibt_alloc_special_qp(ibmf_cip->ci_ci_handle,
1806	    qpp->iq_port_num, qp_type, &qp_attrs, NULL, &qp_handle);
1807	if (ibt_status != IBT_SUCCESS) {
1808		mutex_enter(&ibmf_cip->ci_mutex);
1809		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1810		cv_broadcast(&ibmf_cip->ci_qp_cv);
1811		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1812		    IBMF_TNF_ERROR, "", "ibmf_i_init_qp() error status = %d\n",
1813		    tnf_uint, ibt_status, ibt_status);
1814		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1815		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1816		return (IBMF_TRANSPORT_FAILURE);
1817	}
1818
1819	/* initialize qpp */
1820	qpp->iq_qp_handle = qp_handle;
1821	qp_modify_attr.qp_trans = IBT_UD_SRV;
1822	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
1823
1824	/* get the pkey index for the specified pkey */
1825	if (ibmf_i_get_pkeyix(ibmf_cip->ci_ci_handle, IBMF_P_KEY_DEF_LIMITED,
1826	    qpp->iq_port_num, &qp_modify_attr.qp_transport.ud.ud_pkey_ix) !=
1827	    IBMF_SUCCESS) {
1828		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1829		if (ibt_status != IBT_SUCCESS) {
1830			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1831			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1832			    "%s, status = %d\n", tnf_string, msg,
1833			    "ibt_free_qp returned error",
1834			    tnf_uint, ibt_status, ibt_status);
1835		}
1836		mutex_enter(&ibmf_cip->ci_mutex);
1837		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1838		cv_broadcast(&ibmf_cip->ci_qp_cv);
1839		IBMF_TRACE_0(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1840		    IBMF_TNF_ERROR, "", "ibmf_init_qp(): failed to get "
1841		    "pkey index\n");
1842		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1843		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1844		return (IBMF_FAILURE);
1845	}
1846	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
1847	qp_modify_attr.qp_transport.ud.ud_port = qpp->iq_port_num;
1848	qp_modify_attr.qp_transport.ud.ud_qkey = IBMF_MGMT_Q_KEY;
1849
1850	/* call the IB transport to initialize the QP */
1851	ibt_status = ibt_initialize_qp(qp_handle, &qp_modify_attr);
1852	if (ibt_status != IBT_SUCCESS) {
1853		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1854		if (ibt_status != IBT_SUCCESS) {
1855			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1856			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1857			    "%s, status = %d\n", tnf_string, msg,
1858			    "ibt_free_qp returned error",
1859			    tnf_uint, ibt_status, ibt_status);
1860		}
1861		mutex_enter(&ibmf_cip->ci_mutex);
1862		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1863		cv_broadcast(&ibmf_cip->ci_qp_cv);
1864		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1865		    IBMF_TNF_ERROR, "", "ibmf_init_qp(): error status = %d\n",
1866		    tnf_uint, ibt_status, ibt_status);
1867		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1868		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1869		return (IBMF_TRANSPORT_FAILURE);
1870	}
1871
1872	/* post receive wqes to the RQ to handle unsolicited inbound packets  */
1873	for (i = 0; i < ibmf_recv_wqes_per_port; i++) {
1874		status =  ibmf_i_post_recv_buffer(ibmf_cip, qpp,
1875		    B_TRUE, IBMF_QP_HANDLE_DEFAULT);
1876		if (status != IBMF_SUCCESS) {
1877			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1,
1878			    ibmf_i_init_qp, IBMF_TNF_TRACE, "",
1879			    "%s\n", tnf_string, msg, "ibmf_i_init_qp(): "
1880			    "ibmf_i_post_recv_buffer() failed");
1881		}
1882	}
1883	mutex_enter(&ibmf_cip->ci_mutex);
1884
1885	/* set the state and signal blockers */
1886	qpp->iq_flags = IBMF_QP_FLAGS_INITED;
1887	cv_broadcast(&ibmf_cip->ci_qp_cv);
1888
1889	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1890	    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1891	return (IBMF_SUCCESS);
1892}
1893
1894/*
1895 * ibmf_i_uninit_qp():
1896 *	Invalidate the QP context
1897 */
1898static void
1899ibmf_i_uninit_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp)
1900{
1901	ibt_status_t		status;
1902
1903	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_qp_start,
1904	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_qp() enter, cip = %p "
1905	    "qpp = %p\n", tnf_opaque, cip, ibmf_cip, tnf_opaque, qpp, qpp);
1906
1907	ASSERT(MUTEX_HELD(&ibmf_cip->ci_mutex));
1908
1909	/* mark the state as uniniting */
1910	ASSERT(qpp->iq_qp_ref == 0);
1911	qpp->iq_flags = IBMF_QP_FLAGS_UNINITING;
1912	mutex_exit(&ibmf_cip->ci_mutex);
1913
1914	/* note: we ignore error values from ibt_flush_qp */
1915	status = ibt_flush_qp(qpp->iq_qp_handle);
1916	if (status != IBT_SUCCESS) {
1917		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L2,
1918		    ibmf_i_uninit_qp_err, IBMF_TNF_ERROR, "",
1919		    "ibmf_i_uninit_qp(): %s, status = %d\n", tnf_string, msg,
1920		    "ibt_flush_qp returned error", tnf_int, status, status);
1921	}
1922
1923	/* mark state as INVALID and signal any blockers */
1924	mutex_enter(&ibmf_cip->ci_mutex);
1925	qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1926	cv_broadcast(&ibmf_cip->ci_qp_cv);
1927
1928	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_qp_end,
1929	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_qp() exit\n");
1930}
1931
1932/*
1933 * ibmf_i_alloc_msg():
1934 *	Allocate and set up a message context
1935 */
1936int
1937ibmf_i_alloc_msg(ibmf_client_t *clientp, ibmf_msg_impl_t **msgp, int km_flags)
1938{
1939	ibmf_msg_impl_t *msgimplp;
1940
1941	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
1942	    ibmf_i_alloc_msg_start, IBMF_TNF_TRACE, "",
1943	    "ibmf_i_alloc_msg() enter, clientp = %p, msg = %p, "
1944	    " kmflags = %d\n", tnf_opaque, clientp, clientp, tnf_opaque, msg,
1945	    *msgp, tnf_int, km_flags, km_flags);
1946
1947	/* allocate the message context */
1948	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
1949	    km_flags);
1950	if (msgimplp != NULL) {
1951		if (km_flags == KM_SLEEP) {
1952			ibmf_i_pop_ud_dest_thread(clientp->ic_myci);
1953		}
1954	} else {
1955		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1956		    ibmf_i_alloc_msg_err, IBMF_TNF_ERROR, "",
1957		    "ibmf_i_alloc_msg(): %s\n",
1958		    tnf_string, msg, "kmem_xalloc failed");
1959		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_msg_end,
1960		    IBMF_TNF_TRACE, "", "ibmf_i_alloc_msg() exit\n");
1961		return (IBMF_NO_RESOURCES);
1962	}
1963
1964	*msgp = msgimplp;
1965	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_msg_end,
1966	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_msg() exit\n");
1967	return (IBMF_SUCCESS);
1968}
1969
1970/*
1971 * ibmf_i_free_msg():
1972 *	frees up all buffers allocated by IBMF for
1973 * 	this message context, and then frees up the context
1974 */
1975void
1976ibmf_i_free_msg(ibmf_msg_impl_t *msgimplp)
1977{
1978	ibmf_msg_bufs_t *msgbufp = &msgimplp->im_msgbufs_recv;
1979	ibmf_client_t *clientp = (ibmf_client_t *)msgimplp->im_client;
1980	uint32_t	cl_hdr_sz, cl_hdr_off;
1981
1982	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1983	    ibmf_i_free_msg_start, IBMF_TNF_TRACE, "",
1984	    "ibmf_i_free_msg() enter, msg = %p\n", tnf_opaque, msg, msgimplp);
1985
1986	/* free up the UD destination resource */
1987	if (msgimplp->im_ibmf_ud_dest != NULL) {
1988		ibmf_i_free_ud_dest(clientp, msgimplp);
1989		ibmf_i_clean_ud_dest_list(clientp->ic_myci, B_FALSE);
1990	}
1991
1992	/* free up the receive buffer if allocated previously */
1993	if (msgbufp->im_bufs_mad_hdr != NULL) {
1994		ibmf_i_mgt_class_to_hdr_sz_off(
1995		    msgbufp->im_bufs_mad_hdr->MgmtClass,
1996		    &cl_hdr_sz, &cl_hdr_off);
1997		kmem_free(msgbufp->im_bufs_mad_hdr, sizeof (ib_mad_hdr_t) +
1998		    cl_hdr_off + msgbufp->im_bufs_cl_hdr_len +
1999		    msgbufp->im_bufs_cl_data_len);
2000		mutex_enter(&clientp->ic_kstat_mutex);
2001		IBMF_SUB32_KSTATS(clientp, recv_bufs_alloced, 1);
2002		mutex_exit(&clientp->ic_kstat_mutex);
2003	}
2004
2005	/* destroy the message mutex */
2006	mutex_destroy(&msgimplp->im_mutex);
2007
2008	/* free the message context */
2009	kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
2010
2011	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_msg_end,
2012	    IBMF_TNF_TRACE, "", "ibmf_i_free_msg() exit\n");
2013}
2014
2015/*
2016 * ibmf_i_msg_transport():
2017 *	Send a message posted by the IBMF client using the RMPP protocol
2018 *	if specified
2019 */
2020int
2021ibmf_i_msg_transport(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
2022    ibmf_msg_impl_t *msgimplp, int blocking)
2023{
2024	ib_mad_hdr_t	*madhdrp;
2025	ibmf_msg_bufs_t *msgbufp, *smsgbufp;
2026	uint32_t	cl_hdr_sz, cl_hdr_off;
2027	boolean_t	isDS = 0; /* double sided (sequenced) transaction */
2028	boolean_t	error = B_FALSE;
2029	int		status = IBMF_SUCCESS;
2030	uint_t		refcnt;
2031	char		errmsg[128];
2032	timeout_id_t	msg_rp_unset_id, msg_tr_unset_id;
2033
2034	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_msg_transport_start,
2035	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): clientp = 0x%p, "
2036	    "qphdl = 0x%p, msgp = 0x%p, block = %d\n",
2037	    tnf_opaque, clientp, clientp, tnf_opaque, qphdl, ibmf_qp_handle,
2038	    tnf_opaque, msg, msgimplp, tnf_uint, block, blocking);
2039
2040	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp, *msgbufp))
2041
2042	mutex_enter(&msgimplp->im_mutex);
2043
2044	madhdrp = msgimplp->im_msgbufs_send.im_bufs_mad_hdr;
2045	msgbufp = &msgimplp->im_msgbufs_recv;
2046	smsgbufp = &msgimplp->im_msgbufs_send;
2047
2048	/*
2049	 * check if transp_op_flags specify that the transaction is
2050	 * a single packet, then the size of the message header + data
2051	 * does not exceed 256 bytes
2052	 */
2053	if ((msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) == 0) {
2054		ibmf_i_mgt_class_to_hdr_sz_off(
2055		    smsgbufp->im_bufs_mad_hdr->MgmtClass,
2056		    &cl_hdr_sz, &cl_hdr_off);
2057
2058		if ((sizeof (ib_mad_hdr_t) + cl_hdr_off +
2059		    smsgbufp->im_bufs_cl_hdr_len +
2060		    smsgbufp->im_bufs_cl_data_len) > IBMF_MAD_SIZE) {
2061			mutex_exit(&msgimplp->im_mutex);
2062			(void) sprintf(errmsg,
2063			    "Non-RMPP message size is too large");
2064			error = B_TRUE;
2065			status = IBMF_BAD_SIZE;
2066			goto bail;
2067		}
2068	}
2069
2070	/* more message context initialization */
2071	msgimplp->im_qp_hdl 	= ibmf_qp_handle;
2072	msgimplp->im_tid	= b2h64(madhdrp->TransactionID);
2073	msgimplp->im_mgt_class 	= madhdrp->MgmtClass;
2074	msgimplp->im_unsolicited = B_FALSE;
2075	msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_UNINIT;
2076	bzero(&msgimplp->im_rmpp_ctx, sizeof (ibmf_rmpp_ctx_t));
2077	msgimplp->im_rmpp_ctx.rmpp_state = IBMF_RMPP_STATE_UNDEFINED;
2078	msgimplp->im_rmpp_ctx.rmpp_respt = IBMF_RMPP_DEFAULT_RRESPT;
2079	msgimplp->im_rmpp_ctx.rmpp_retry_cnt = 0;
2080	msgimplp->im_ref_count = 0;
2081	msgimplp->im_pending_send_compls = 0;
2082	IBMF_MSG_INCR_REFCNT(msgimplp);
2083	if (msgimplp->im_retrans.retrans_retries == 0)
2084		msgimplp->im_retrans.retrans_retries = IBMF_RETRANS_DEF_RETRIES;
2085	if (msgimplp->im_retrans.retrans_rtv == 0)
2086		msgimplp->im_retrans.retrans_rtv = IBMF_RETRANS_DEF_RTV;
2087	if (msgimplp->im_retrans.retrans_rttv == 0)
2088		msgimplp->im_retrans.retrans_rttv = IBMF_RETRANS_DEF_RTTV;
2089
2090	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2091	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): %s, msgp = 0x%p, "
2092	    "class = 0x%x, method = 0x%x, attributeID = 0x%x\n",
2093	    tnf_string, msg, "Added message", tnf_opaque, msgimplp,
2094	    msgimplp, tnf_opaque, class, msgimplp->im_mgt_class, tnf_opaque,
2095	    method, madhdrp->R_Method, tnf_opaque, attrib_id,
2096	    b2h16(madhdrp->AttributeID));
2097
2098	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2099	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): msgp = 0x%p, "
2100	    "TID = 0x%p, transp_op_flags = 0x%x\n",
2101	    tnf_opaque, msgimplp, msgimplp, tnf_opaque, tid, msgimplp->im_tid,
2102	    tnf_uint, transp_op_flags, msgimplp->im_transp_op_flags);
2103
2104	/*
2105	 * Do not allow reuse of a message where the receive buffers are
2106	 * being used as send buffers if this is a sequenced transaction
2107	 */
2108	if ((madhdrp == msgbufp->im_bufs_mad_hdr) &&
2109	    (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)) {
2110		IBMF_MSG_DECR_REFCNT(msgimplp);
2111		mutex_exit(&msgimplp->im_mutex);
2112		(void) sprintf(errmsg,
2113		    "Send and Recv buffers are the same for sequenced"
2114		    " transaction");
2115		error = B_TRUE;
2116		status = IBMF_REQ_INVALID;
2117		goto bail;
2118	}
2119
2120	/* set transaction flags */
2121	if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)
2122		msgimplp->im_flags |= IBMF_MSG_FLAGS_SEQUENCED;
2123
2124	if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP)
2125		msgimplp->im_flags |= IBMF_MSG_FLAGS_SEND_RMPP;
2126	else
2127		msgimplp->im_flags |= IBMF_MSG_FLAGS_NOT_RMPP;
2128
2129	/* free recv buffers if this is a reused message */
2130	if ((msgbufp->im_bufs_mad_hdr != NULL) &&
2131	    (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)) {
2132
2133		IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2134		    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): %s, "
2135		    "msgp = 0x%p, mad_hdrp = 0x%p\n", tnf_string, msg,
2136		    "Freeing recv buffer for reused message",
2137		    tnf_opaque, msgimplp, msgimplp,
2138		    tnf_opaque, mad_hdr, msgbufp->im_bufs_mad_hdr);
2139
2140		ibmf_i_mgt_class_to_hdr_sz_off(
2141		    msgbufp->im_bufs_mad_hdr->MgmtClass,
2142		    &cl_hdr_sz, &cl_hdr_off);
2143
2144		kmem_free(msgbufp->im_bufs_mad_hdr, sizeof (ib_mad_hdr_t) +
2145		    cl_hdr_off + msgbufp->im_bufs_cl_hdr_len +
2146		    msgbufp->im_bufs_cl_data_len);
2147
2148		msgbufp->im_bufs_mad_hdr = NULL;
2149		msgbufp->im_bufs_cl_hdr = NULL;
2150		msgbufp->im_bufs_cl_hdr_len = 0;
2151		msgbufp->im_bufs_cl_data = NULL;
2152		msgbufp->im_bufs_cl_data_len = 0;
2153	}
2154
2155	mutex_exit(&msgimplp->im_mutex);
2156
2157	/* initialize (and possibly allocate) the address handle */
2158	status = ibmf_i_alloc_ud_dest(clientp, msgimplp,
2159	    &msgimplp->im_ud_dest, blocking);
2160	if (status != IBMF_SUCCESS) {
2161		(void) sprintf(errmsg, "ibmf_i_alloc_ud_dest() failed");
2162		error = B_TRUE;
2163		goto bail;
2164	}
2165
2166	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*msgimplp, *msgbufp))
2167
2168	/* add the message to the client context's message list */
2169	ibmf_i_client_add_msg(clientp, msgimplp);
2170
2171	mutex_enter(&msgimplp->im_mutex);
2172
2173	/* no one should have touched our state */
2174	ASSERT(msgimplp->im_trans_state_flags == IBMF_TRANS_STATE_FLAG_UNINIT);
2175
2176	/* transition out of uninit state */
2177	msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_INIT;
2178
2179	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2180	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): msgp = 0x%p, "
2181	    "local_lid = 0x%x, remote_lid = 0x%x, remote_qpn = 0x%x, "
2182	    "block = %d\n", tnf_opaque, msgp, msgimplp,
2183	    tnf_uint, local_lid, msgimplp->im_local_addr.ia_local_lid,
2184	    tnf_uint, remote_lid, msgimplp->im_local_addr.ia_remote_lid,
2185	    tnf_uint, remote_qpn, msgimplp->im_local_addr.ia_remote_qno,
2186	    tnf_uint, blocking, blocking);
2187
2188	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2189	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): "
2190	    "unsetting timer %p %d\n", tnf_opaque, msgimplp, msgimplp,
2191	    tnf_opaque, timeout_id, msgimplp->im_rp_timeout_id);
2192
2193	ASSERT(msgimplp->im_rp_timeout_id == 0);
2194	ASSERT(msgimplp->im_tr_timeout_id == 0);
2195
2196	if ((msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) == 0) {
2197
2198		/* Non-RMPP transaction */
2199
2200		status = ibmf_i_send_single_pkt(clientp, ibmf_qp_handle,
2201		    msgimplp, blocking);
2202		if (status != IBMF_SUCCESS) {
2203			IBMF_MSG_DECR_REFCNT(msgimplp);
2204			mutex_exit(&msgimplp->im_mutex);
2205			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2206			(void) sprintf(errmsg, "Single packet send failed");
2207			error = B_TRUE;
2208			goto bail;
2209		}
2210
2211	} else if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) {
2212
2213		/* RMPP transaction */
2214
2215		/* check if client supports RMPP traffic */
2216		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_RMPP) == 0) {
2217			IBMF_MSG_DECR_REFCNT(msgimplp);
2218			mutex_exit(&msgimplp->im_mutex);
2219			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2220			(void) sprintf(errmsg, "Class does not support RMPP");
2221			error = B_TRUE;
2222			status = IBMF_BAD_RMPP_OPT;
2223			goto bail;
2224		}
2225
2226		/* for non-special QPs, check if QP supports RMPP traffic */
2227		if (ibmf_qp_handle != IBMF_QP_HANDLE_DEFAULT &&
2228		    (((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_supports_rmpp ==
2229		    B_FALSE)) {
2230			IBMF_MSG_DECR_REFCNT(msgimplp);
2231			mutex_exit(&msgimplp->im_mutex);
2232			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2233			(void) sprintf(errmsg, "QP does not support RMPP");
2234			error = B_TRUE;
2235			status = IBMF_BAD_RMPP_OPT;
2236			goto bail;
2237		}
2238
2239		/* check if transaction is "double sided" (send and receive) */
2240		if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)
2241			isDS = 1;
2242
2243		status = ibmf_i_send_rmpp_pkts(clientp, ibmf_qp_handle,
2244		    msgimplp, isDS, blocking);
2245		if (status != IBMF_SUCCESS) {
2246			IBMF_MSG_DECR_REFCNT(msgimplp);
2247			mutex_exit(&msgimplp->im_mutex);
2248			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2249			(void) sprintf(errmsg, "RMPP packets send failed");
2250			error = B_TRUE;
2251			goto bail;
2252		}
2253	}
2254
2255	/*
2256	 * decrement the reference count so notify_client() can remove the
2257	 * message when it's ready
2258	 */
2259	IBMF_MSG_DECR_REFCNT(msgimplp);
2260
2261	/* check if the transaction is a blocking transaction */
2262	if (blocking && ((msgimplp->im_trans_state_flags &
2263	    IBMF_TRANS_STATE_FLAG_SIGNALED) == 0)) {
2264
2265		/* indicate that the tranaction is waiting */
2266		msgimplp->im_trans_state_flags |= IBMF_TRANS_STATE_FLAG_WAIT;
2267
2268		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2269		    IBMF_TNF_TRACE, "",
2270		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2271		    tnf_string, msg, "blocking for completion",
2272		    tnf_opaque, msgimplp, msgimplp);
2273
2274		/* wait for transaction completion */
2275		cv_wait(&msgimplp->im_trans_cv, &msgimplp->im_mutex);
2276
2277		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2278		    IBMF_TNF_TRACE, "",
2279		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2280		    tnf_string, msg, "unblocking for completion",
2281		    tnf_opaque, msgimplp, msgimplp);
2282
2283		/* clean up flags */
2284		msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_WAIT;
2285		msgimplp->im_flags &= ~IBMF_MSG_FLAGS_BUSY;
2286
2287		if (msgimplp->im_msg_status != IBMF_SUCCESS) {
2288
2289			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2290			    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2291			    "ibmf_i_msg_transport(): msg_status = %d\n",
2292			    tnf_uint, msgstatus, msgimplp->im_msg_status);
2293
2294			status = msgimplp->im_msg_status;
2295		}
2296	} else if (blocking && (msgimplp->im_trans_state_flags &
2297	    IBMF_TRANS_STATE_FLAG_SIGNALED)) {
2298		msgimplp->im_flags &= ~IBMF_MSG_FLAGS_BUSY;
2299
2300		if (msgimplp->im_msg_status != IBMF_SUCCESS) {
2301			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2302			    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2303			    "ibmf_i_msg_transport(): msg_status = %d\n",
2304			    tnf_uint, msgstatus, msgimplp->im_msg_status);
2305			status = msgimplp->im_msg_status;
2306		}
2307	}
2308
2309	msg_rp_unset_id = msg_tr_unset_id = 0;
2310	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
2311	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
2312	msgimplp->im_rp_unset_timeout_id = 0;
2313	msgimplp->im_tr_unset_timeout_id = 0;
2314
2315	mutex_exit(&msgimplp->im_mutex);
2316
2317	/* Unset the timers */
2318	if (msg_rp_unset_id != 0) {
2319		(void) untimeout(msg_rp_unset_id);
2320	}
2321
2322	if (msg_tr_unset_id != 0) {
2323		(void) untimeout(msg_tr_unset_id);
2324	}
2325
2326	/* increment kstats of the number of sent messages */
2327	mutex_enter(&clientp->ic_kstat_mutex);
2328	IBMF_ADD32_KSTATS(clientp, msgs_sent, 1);
2329	mutex_exit(&clientp->ic_kstat_mutex);
2330
2331bail:
2332	if (error) {
2333		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2334		    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2335		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2336		    tnf_string, msg, errmsg, tnf_opaque, msgimplp, msgimplp);
2337	}
2338
2339	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_msg_transport_end,
2340	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport() exit, status = %d\n",
2341	    tnf_uint, status, status);
2342
2343	return (status);
2344}
2345
2346/*
2347 * ibmf_i_init_msg():
2348 *	Initialize the message fields
2349 */
2350void
2351ibmf_i_init_msg(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t trans_cb,
2352    void *trans_cb_arg, ibmf_retrans_t *retrans, boolean_t block)
2353{
2354	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_msg_start,
2355	    IBMF_TNF_TRACE, "", "ibmf_i_init_msg() enter\n");
2356
2357	_NOTE(ASSUMING_PROTECTED(msgimplp->im_trans_cb,
2358	    msgimplp->im_trans_cb_arg))
2359
2360	if (block == B_TRUE)
2361		msgimplp->im_msg_flags |= IBMF_MSG_FLAGS_BLOCKING;
2362	msgimplp->im_trans_cb = trans_cb;
2363	msgimplp->im_trans_cb_arg = trans_cb_arg;
2364
2365	bzero(&msgimplp->im_retrans, sizeof (ibmf_retrans_t));
2366	if (retrans != NULL) {
2367		bcopy((void *)retrans, (void *)&msgimplp->im_retrans,
2368		    sizeof (ibmf_retrans_t));
2369	}
2370
2371	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_msg_end,
2372	    IBMF_TNF_TRACE, "", "ibmf_i_init_msg() exit\n");
2373}
2374
2375/*
2376 * ibmf_i_alloc_qp():
2377 *	Allocate a QP context for the alternate QPs
2378 */
2379int
2380ibmf_i_alloc_qp(ibmf_client_t *clientp, ib_pkey_t p_key, ib_qkey_t q_key,
2381    uint_t flags, ibmf_qp_handle_t *ibmf_qp_handlep)
2382{
2383	ibmf_ci_t		*ibmf_cip = clientp->ic_myci;
2384	ibt_qp_alloc_attr_t	qp_attrs;
2385	ibt_qp_info_t		qp_modify_attr;
2386	ibmf_alt_qp_t		*qp_ctx;
2387	uint16_t		pkey_ix;
2388	ibt_status_t		ibt_status;
2389	int			i, blocking;
2390	boolean_t		error = B_FALSE;
2391	int			status = IBMF_SUCCESS;
2392	char			errmsg[128];
2393
2394
2395	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
2396	    ibmf_i_alloc_qp_start, IBMF_TNF_TRACE, "",
2397	    "ibmf_i_alloc_qp() enter, clientp = %p, pkey = %x, qkey = %x \n",
2398	    tnf_opaque, clientp, clientp, tnf_uint, p_key, p_key,
2399	    tnf_uint, q_key, q_key);
2400
2401	/*
2402	 * get the pkey index associated with this pkey if present in table
2403	 */
2404	if (ibmf_i_get_pkeyix(clientp->ic_ci_handle, p_key,
2405	    clientp->ic_client_info.port_num, &pkey_ix) != IBMF_SUCCESS) {
2406		(void) sprintf(errmsg, "pkey not in table, pkey = %x", p_key);
2407		error = B_TRUE;
2408		status = IBMF_FAILURE;
2409		goto bail;
2410	}
2411
2412	/* allocate QP context memory */
2413	qp_ctx = (ibmf_alt_qp_t *)kmem_zalloc(sizeof (ibmf_alt_qp_t),
2414	    (flags & IBMF_ALLOC_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
2415	if (qp_ctx == NULL) {
2416		(void) sprintf(errmsg, "failed to kmem_zalloc qp ctx");
2417		error = B_TRUE;
2418		status = IBMF_NO_RESOURCES;
2419		goto bail;
2420	}
2421
2422	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp_ctx));
2423
2424	/* setup the qp attrs for the alloc call */
2425	qp_attrs.qp_scq_hdl = ibmf_cip->ci_alt_cq_handle;
2426	qp_attrs.qp_rcq_hdl = ibmf_cip->ci_alt_cq_handle;
2427	qp_attrs.qp_pd_hdl = ibmf_cip->ci_pd;
2428	qp_attrs.qp_sizes.cs_sq_sgl = IBMF_MAX_SQ_WR_SGL_ELEMENTS;
2429	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
2430	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
2431	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
2432	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
2433	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
2434
2435	/* request IBT for a qp with the desired attributes */
2436	ibt_status = ibt_alloc_qp(clientp->ic_ci_handle, IBT_UD_RQP,
2437	    &qp_attrs, &qp_ctx->isq_qp_sizes, &qp_ctx->isq_qpn,
2438	    &qp_ctx->isq_qp_handle);
2439	if (ibt_status != IBT_SUCCESS) {
2440		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2441		(void) sprintf(errmsg, "failed to alloc qp, status = %d",
2442		    ibt_status);
2443		error = B_TRUE;
2444		status = IBMF_NO_RESOURCES;
2445		goto bail;
2446	}
2447
2448	qp_modify_attr.qp_trans = IBT_UD_SRV;
2449	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
2450	qp_modify_attr.qp_transport.ud.ud_qkey = q_key;
2451	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
2452	qp_modify_attr.qp_transport.ud.ud_pkey_ix = pkey_ix;
2453	qp_modify_attr.qp_transport.ud.ud_port =
2454	    clientp->ic_client_info.port_num;
2455
2456	/* Set up the client handle in the QP context */
2457	qp_ctx->isq_client_hdl = clientp;
2458
2459	/* call the IB transport to initialize the QP */
2460	ibt_status = ibt_initialize_qp(qp_ctx->isq_qp_handle, &qp_modify_attr);
2461	if (ibt_status != IBT_SUCCESS) {
2462		(void) ibt_free_qp(qp_ctx->isq_qp_handle);
2463		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2464		(void) sprintf(errmsg, "failed to initialize qp, status = %d",
2465		    ibt_status);
2466		error = B_TRUE;
2467		status = IBMF_NO_RESOURCES;
2468		goto bail;
2469	}
2470
2471	/* Set up the WQE caches */
2472	status = ibmf_i_init_altqp_wqes(qp_ctx);
2473	if (status != IBMF_SUCCESS) {
2474		(void) ibt_free_qp(qp_ctx->isq_qp_handle);
2475		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2476		(void) sprintf(errmsg, "failed to init wqe caches, status = %d",
2477		    status);
2478		error = B_TRUE;
2479		goto bail;
2480	}
2481
2482	qp_ctx->isq_next = NULL;
2483	qp_ctx->isq_pkey = p_key;
2484	qp_ctx->isq_qkey = q_key;
2485	qp_ctx->isq_port_num = clientp->ic_client_info.port_num;
2486	mutex_init(&qp_ctx->isq_mutex, NULL, MUTEX_DRIVER, NULL);
2487	mutex_init(&qp_ctx->isq_wqe_mutex, NULL, MUTEX_DRIVER, NULL);
2488	cv_init(&qp_ctx->isq_recv_cb_teardown_cv, NULL, CV_DRIVER, NULL);
2489	cv_init(&qp_ctx->isq_sqd_cv, NULL, CV_DRIVER, NULL);
2490	cv_init(&qp_ctx->isq_wqes_cv, NULL, CV_DRIVER, NULL);
2491
2492	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*qp_ctx));
2493
2494	/* add alt qp to the list in CI context */
2495	mutex_enter(&ibmf_cip->ci_mutex);
2496	if (ibmf_cip->ci_alt_qp_list == NULL) {
2497		ibmf_cip->ci_alt_qp_list = qp_ctx;
2498	} else {
2499		ibmf_alt_qp_t *qpp;
2500
2501		qpp = ibmf_cip->ci_alt_qp_list;
2502		while (qpp->isq_next != NULL) {
2503			qpp = qpp->isq_next;
2504		}
2505		qpp->isq_next = qp_ctx;
2506	}
2507	mutex_exit(&ibmf_cip->ci_mutex);
2508
2509	*ibmf_qp_handlep = (ibmf_qp_handle_t)qp_ctx;
2510
2511	if (flags & IBMF_ALLOC_SLEEP)
2512		blocking = 1;
2513	else
2514		blocking = 0;
2515
2516	/* post the max number of buffers to RQ */
2517	for (i = 0; i < ibmf_recv_wqes_per_port; i++) {
2518		status = ibmf_i_post_recv_buffer(ibmf_cip, clientp->ic_qp,
2519		    blocking, *ibmf_qp_handlep);
2520		if (status != IBMF_SUCCESS) {
2521			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
2522			    ibmf_i_alloc_qp, IBMF_TNF_TRACE, "",
2523			    "ibmf_i_alloc_qp(): %s, status = %d\n",
2524			    tnf_string, msg, "ibmf_i_post_recv_buffer() failed",
2525			    tnf_int, status, status);
2526		}
2527	}
2528
2529	mutex_enter(&clientp->ic_kstat_mutex);
2530	IBMF_ADD32_KSTATS(clientp, alt_qps_alloced, 1);
2531	mutex_exit(&clientp->ic_kstat_mutex);
2532
2533bail:
2534	if (error) {
2535		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2536		    ibmf_i_alloc_qp_err, IBMF_TNF_TRACE, "",
2537		    "ibmf_i_alloc_qp(): %s\n", tnf_string, msg, errmsg);
2538	}
2539
2540	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_qp_end,
2541	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_qp() exit, qp = %p\n",
2542	    tnf_opaque, qp_handlep, *ibmf_qp_handlep);
2543	return (status);
2544}
2545
2546/*
2547 * ibmf_i_free_qp():
2548 *	Free an alternate QP context
2549 */
2550/* ARGSUSED */
2551int
2552ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags)
2553{
2554	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2555	ibmf_client_t		*clientp = qp_ctx->isq_client_hdl;
2556	ibmf_ci_t		*ibmf_cip = qp_ctx->isq_client_hdl->ic_myci;
2557	ibmf_alt_qp_t		*qpp, *pqpp;
2558	ibt_status_t		ibt_status;
2559
2560	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
2561	    ibmf_i_free_qp_start, IBMF_TNF_TRACE, "",
2562	    "ibmf_i_free_qp() enter, qp_hdl = %p, flags = %x\n",
2563	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_uint, flags, flags);
2564
2565	/* remove qp from the list in CI context */
2566
2567	mutex_enter(&ibmf_cip->ci_mutex);
2568	qpp = ibmf_cip->ci_alt_qp_list;
2569	ASSERT(qpp != NULL);
2570	if (qpp == qp_ctx) {
2571		ibmf_cip->ci_alt_qp_list = qpp->isq_next;
2572	} else {
2573		while (qpp != NULL) {
2574			if (qpp == qp_ctx)
2575				break;
2576			pqpp = qpp;
2577			qpp = qpp->isq_next;
2578		}
2579		ASSERT(qpp != NULL);
2580		pqpp->isq_next = qpp->isq_next;
2581	}
2582
2583	mutex_exit(&ibmf_cip->ci_mutex);
2584
2585	/* flush the WQEs in the QP queues */
2586	ibt_status = ibt_flush_qp(qp_ctx->isq_qp_handle);
2587	if (ibt_status != IBT_SUCCESS) {
2588		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2589		    ibmf_i_free_qp_err, IBMF_TNF_TRACE, "",
2590		    "ibmf_i_free_qp(): %s, status = %d\n",
2591		    tnf_string, msg, "failed to close qp",
2592		    tnf_uint, ibt_status, ibt_status);
2593		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2594		    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2595		return (IBMF_TRANSPORT_FAILURE);
2596	}
2597
2598	/* Call the MAD completion handler */
2599	ibmf_i_mad_completions(ibmf_cip->ci_alt_cq_handle, (void*)ibmf_cip);
2600
2601	/* Wait here for all WQE owned by this QP to get freed */
2602	mutex_enter(&qpp->isq_mutex);
2603	while (qpp->isq_wqes_alloced != 0) {
2604		cv_wait(&qpp->isq_wqes_cv, &qpp->isq_mutex);
2605	}
2606	mutex_exit(&qpp->isq_mutex);
2607
2608	cv_destroy(&qp_ctx->isq_recv_cb_teardown_cv);
2609	cv_destroy(&qp_ctx->isq_sqd_cv);
2610	cv_destroy(&qp_ctx->isq_wqes_cv);
2611
2612	/* call the IB transport to free the QP */
2613	ibt_status = ibt_free_qp(qp_ctx->isq_qp_handle);
2614	if (ibt_status != IBT_SUCCESS) {
2615		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2616		    ibmf_i_free_qp_err, IBMF_TNF_TRACE, "",
2617		    "ibmf_i_free_qp(): %s, status = %d\n",
2618		    tnf_string, msg, "failed to free qp",
2619		    tnf_uint, ibt_status, ibt_status);
2620		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2621		    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2622		return (IBMF_TRANSPORT_FAILURE);
2623	}
2624
2625	/* Clean up the WQE caches */
2626	ibmf_i_fini_altqp_wqes(qp_ctx);
2627	mutex_destroy(&qp_ctx->isq_wqe_mutex);
2628	mutex_destroy(&qp_ctx->isq_mutex);
2629
2630	mutex_enter(&clientp->ic_kstat_mutex);
2631	IBMF_SUB32_KSTATS(clientp, alt_qps_alloced, 1);
2632	mutex_exit(&clientp->ic_kstat_mutex);
2633
2634	kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2635
2636	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2637	    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2638
2639	return (IBMF_SUCCESS);
2640}
2641
2642/*
2643 * ibmf_i_query_qp():
2644 *	Query an alternate QP context
2645 */
2646/* ARGSUSED */
2647int
2648ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags,
2649    uint_t *qp_nump, ib_pkey_t *p_keyp, ib_qkey_t *q_keyp, uint8_t *portnump)
2650{
2651	ibt_qp_query_attr_t	qp_query;
2652	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2653	uint16_t		pkey_ix;
2654	ibt_status_t		ibt_status;
2655
2656	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
2657	    ibmf_i_free_qp_start, IBMF_TNF_TRACE, "",
2658	    "ibmf_i_free_qp() enter, qp_hdl = %p, flags = %x\n",
2659	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_uint, flags, flags);
2660
2661	ibt_status = ibt_query_qp(qp_ctx->isq_qp_handle, &qp_query);
2662	if (ibt_status != IBT_SUCCESS) {
2663		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2664		    ibmf_i_query_qp_err, IBMF_TNF_TRACE, "",
2665		    "ibmf_i_query_qp(): %s, status = %d\n",
2666		    tnf_string, msg, "failed to query qp",
2667		    tnf_uint, ibt_status, ibt_status);
2668		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2669		    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit\n");
2670		return (IBMF_TRANSPORT_FAILURE);
2671	}
2672
2673	/* move the desired attributes into the locations provided */
2674	*qp_nump = qp_query.qp_qpn;
2675	*q_keyp = qp_query.qp_info.qp_transport.ud.ud_qkey;
2676	*portnump = qp_query.qp_info.qp_transport.ud.ud_port;
2677
2678	pkey_ix = qp_query.qp_info.qp_transport.ud.ud_pkey_ix;
2679
2680	/* get the pkey based on the pkey_ix */
2681	ibt_status = ibt_index2pkey(qp_ctx->isq_client_hdl->ic_ci_handle,
2682	    *portnump, pkey_ix, p_keyp);
2683	if (ibt_status != IBT_SUCCESS) {
2684		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L1,
2685		    ibmf_i_query_qp_err, IBMF_TNF_TRACE, "",
2686		    "ibmf_i_query_qp(): %s, pkey_ix = %d, status = %d\n",
2687		    tnf_string, msg, "failed to get pkey from index",
2688		    tnf_uint, pkey_ix, pkey_ix,
2689		    tnf_uint, ibt_status, ibt_status);
2690		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2691		    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit\n");
2692		return (IBMF_TRANSPORT_FAILURE);
2693	}
2694
2695	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2696	    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit, qp_num = 0x%x, "
2697	    "pkey = 0x%x, qkey = 0x%x, portnum = %d\n",
2698	    tnf_uint, qp_num, *qp_nump, tnf_uint, pkey, *p_keyp,
2699	    tnf_uint, qkey, *q_keyp, tnf_uint, portnum, *portnump);
2700
2701	return (IBMF_SUCCESS);
2702}
2703
2704/*
2705 * ibmf_i_modify_qp():
2706 *	Modify an alternate QP context
2707 */
2708/* ARGSUSED */
2709int
2710ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle, ib_pkey_t p_key,
2711    ib_qkey_t q_key, uint_t flags)
2712{
2713	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2714	ibmf_client_t		*clientp = qp_ctx->isq_client_hdl;
2715	ibmf_ci_t		*ibmf_cip = clientp->ic_myci;
2716	ibmf_alt_qp_t		*qpp;
2717	ibt_qp_info_t		qp_mod;
2718	ibt_cep_modify_flags_t	qp_mod_flags;
2719	ibt_queue_sizes_t	actual_sz;
2720	uint16_t		pkey_ix;
2721	ibt_status_t		ibt_status;
2722
2723	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
2724	    ibmf_i_modify_qp_start, IBMF_TNF_TRACE, "",
2725	    "ibmf_i_modify_qp() enter, qp_hdl = %p, flags = %x, pkey = 0x%x, "
2726	    "qkey = 0x%x\n", tnf_opaque, qp_hdl, ibmf_qp_handle,
2727	    tnf_uint, flags, flags, tnf_uint, p_key, p_key,
2728	    tnf_uint, q_key, q_key);
2729
2730	/*
2731	 * get the pkey index associated with this pkey if present in table
2732	 */
2733	if (ibmf_i_get_pkeyix(clientp->ic_ci_handle, p_key,
2734	    clientp->ic_client_info.port_num, &pkey_ix) != IBMF_SUCCESS) {
2735		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2736		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2737		    "ibmf_i_modify_qp(): %s, pkey = %x\n",
2738		    tnf_string, msg, "pkey not in table",
2739		    tnf_uint, pkey, p_key);
2740		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2741		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2742		return (IBMF_FAILURE);
2743	}
2744
2745	/* Find the QP context in the CI QP context list */
2746	mutex_enter(&ibmf_cip->ci_mutex);
2747	qpp = ibmf_cip->ci_alt_qp_list;
2748	while (qpp != NULL) {
2749		if (qpp == qp_ctx) {
2750			break;
2751		}
2752		qpp = qpp->isq_next;
2753	}
2754
2755	if (qpp == NULL) {
2756		mutex_exit(&ibmf_cip->ci_mutex);
2757
2758		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2759		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2760		    "ibmf_i_modify_qp(): %s\n",
2761		    tnf_string, msg, "QP not in altqp list");
2762		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2763		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2764		return (IBMF_BAD_QP_HANDLE);
2765
2766	} else {
2767
2768		mutex_enter(&qp_ctx->isq_mutex);
2769	}
2770
2771	mutex_exit(&ibmf_cip->ci_mutex);
2772
2773	/*
2774	 * Transition the QP to SQD state
2775	 */
2776	bzero(&qp_mod, sizeof (ibt_qp_info_t));
2777	qp_mod.qp_trans = IBT_UD_SRV;
2778	qp_mod.qp_state = IBT_STATE_SQD;
2779	qp_mod_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_SQD_EVENT;
2780	ibt_status = ibt_modify_qp(qp_ctx->isq_qp_handle, qp_mod_flags,
2781	    &qp_mod, &actual_sz);
2782	if (ibt_status != IBT_SUCCESS) {
2783		mutex_exit(&qp_ctx->isq_mutex);
2784		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2785		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2786		    "ibmf_i_modify_qp(): %s, qp_hdl = %p\n",
2787		    tnf_string, msg, "QP transition RTS to SQD failed",
2788		    tnf_opaque, qp_handle, qp_ctx->isq_qp_handle);
2789		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2790		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2791		return (IBMF_TRANSPORT_FAILURE);
2792	}
2793
2794	/*
2795	 * Wait for an event indicating that the QP is in SQD state
2796	 */
2797	cv_wait(&qp_ctx->isq_sqd_cv, &qp_ctx->isq_mutex);
2798
2799	/* Setup QP modification information for transition to RTS state */
2800	bzero(&qp_mod, sizeof (ibt_qp_info_t));
2801	qp_mod.qp_trans = IBT_UD_SRV;
2802	qp_mod.qp_state = IBT_STATE_RTS;
2803	qp_mod.qp_current_state = IBT_STATE_SQD;
2804	qp_mod.qp_transport.ud.ud_pkey_ix = pkey_ix;
2805	qp_mod.qp_transport.ud.ud_qkey = q_key;
2806	qp_mod_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PKEY_IX |
2807	    IBT_CEP_SET_QKEY;
2808
2809	/*
2810	 * transition the QP back to RTS state to allow
2811	 * modification of the pkey and qkey
2812	 */
2813
2814	ibt_status = ibt_modify_qp(qp_ctx->isq_qp_handle, qp_mod_flags,
2815	    &qp_mod, &actual_sz);
2816	if (ibt_status != IBT_SUCCESS) {
2817		mutex_exit(&qp_ctx->isq_mutex);
2818		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L1,
2819		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2820		    "ibmf_i_modify_qp(): %s, qp_hdl = %p, status = %d\n",
2821		    tnf_string, msg, "QP transition SQD to RTS failed",
2822		    tnf_opaque, qp_handle, qp_ctx->isq_qp_handle,
2823		    tnf_uint, ibt_status, ibt_status);
2824		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2825		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2826		return (IBMF_TRANSPORT_FAILURE);
2827	}
2828
2829	qp_ctx->isq_pkey = p_key;
2830	qp_ctx->isq_qkey = q_key;
2831	mutex_exit(&qp_ctx->isq_mutex);
2832
2833	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2834	    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2835	return (IBMF_SUCCESS);
2836}
2837
2838/*
2839 * ibmf_i_post_recv_buffer():
2840 *	Post a WQE to the RQ of the specified QP
2841 */
2842int
2843ibmf_i_post_recv_buffer(ibmf_ci_t *cip, ibmf_qp_t *qpp, boolean_t block,
2844    ibmf_qp_handle_t ibmf_qp_handle)
2845{
2846	int			ret;
2847	ibt_wr_ds_t		*sgl;
2848	ibt_status_t		status;
2849	ibmf_recv_wqe_t		*recv_wqep;
2850	ibt_qp_hdl_t		ibt_qp_handle;
2851	struct kmem_cache	*kmem_cachep;
2852	ibmf_alt_qp_t		*altqp;
2853
2854	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
2855	    ibmf_i_post_recv_buffer_start, IBMF_TNF_TRACE, "",
2856	    "ibmf_i_post_recv_buffer() enter, cip = %p, qpp = %p, "
2857	    "qp_hdl = %p, block = %d\n", tnf_opaque, cip, cip,
2858	    tnf_opaque, qpp, qpp, tnf_opaque, qp_hdl, ibmf_qp_handle,
2859	    tnf_uint, block, block);
2860
2861	/*
2862	 * if we haven't hit the max wqes per qp, attempt to allocate a recv
2863	 * wqe and post it to the recv queue.
2864	 * It is possible for more than one thread to get through this
2865	 * check below and post wqes that could push us above the
2866	 * ibmf_recv_wqes_posted_per_qp. We catch that case when the recv
2867	 * completion is signaled.
2868	 */
2869	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
2870
2871	/* Get the WQE kmem cache pointer based on the QP type */
2872	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
2873		kmem_cachep = cip->ci_recv_wqes_cache;
2874	else {
2875		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
2876		kmem_cachep = altqp->isq_recv_wqes_cache;
2877	}
2878
2879	/* allocate a receive WQE from the receive WQE kmem cache */
2880	recv_wqep = kmem_cache_alloc(kmem_cachep,
2881	    (block == B_TRUE ? KM_SLEEP : KM_NOSLEEP));
2882	if (recv_wqep == NULL) {
2883		/*
2884		 * Attempt to extend the cache and then retry the
2885		 * kmem_cache_alloc()
2886		 */
2887		if (ibmf_i_extend_wqe_cache(cip, ibmf_qp_handle, block) ==
2888		    IBMF_NO_RESOURCES) {
2889			mutex_enter(&cip->ci_mutex);
2890			IBMF_ADD32_PORT_KSTATS(cip, rwqe_allocs_failed, 1);
2891			mutex_exit(&cip->ci_mutex);
2892			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2893			    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2894			    "ibmf_i_post_recv_buffer(): %s, status = %d\n",
2895			    tnf_string, msg, "alloc recv_wqe failed",
2896			    tnf_int, ibmf_status, IBMF_NO_RESOURCES);
2897			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2898			    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2899			    "ibmf_i_post_recv_buffer() exit\n");
2900			return (IBMF_NO_RESOURCES);
2901		} else {
2902			recv_wqep = kmem_cache_alloc(kmem_cachep,
2903			    (block == B_TRUE ? KM_SLEEP : KM_NOSLEEP));
2904			if (recv_wqep == NULL) {
2905				/* Allocation failed again. Give up here. */
2906				mutex_enter(&cip->ci_mutex);
2907				IBMF_ADD32_PORT_KSTATS(cip, rwqe_allocs_failed,
2908				    1);
2909				mutex_exit(&cip->ci_mutex);
2910				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2911				    ibmf_i_post_recv_buffer_err,
2912				    IBMF_TNF_ERROR, "",
2913				    "ibmf_i_post_recv_buffer(): %s, "
2914				    "status = %d\n",
2915				    tnf_string, msg, "alloc recv_wqe failed",
2916				    tnf_int, ibmf_status, IBMF_NO_RESOURCES);
2917				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2918				    ibmf_i_post_recv_buffer_end,
2919				    IBMF_TNF_TRACE, "",
2920				    "ibmf_i_post_recv_buffer() exit\n");
2921				return (IBMF_NO_RESOURCES);
2922			}
2923		}
2924	}
2925
2926	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
2927
2928	/*
2929	 * if the qp handle provided in ibmf_send_pkt() or
2930	 * ibmf_setup_recv_cb() is not the default qp handle
2931	 * for this client, then the wqe must be queued on this qp,
2932	 * else use the default qp handle set up during ibmf_register()
2933	 */
2934	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
2935		ibt_qp_handle = qpp->iq_qp_handle;
2936	} else {
2937		ibt_qp_handle =
2938		    ((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_qp_handle;
2939	}
2940
2941	/* allocate memory for the scatter-gather list */
2942	sgl = kmem_zalloc(IBMF_MAX_RQ_WR_SGL_ELEMENTS * sizeof (ibt_wr_ds_t),
2943	    (block == B_TRUE) ? KM_SLEEP : KM_NOSLEEP);
2944	if (sgl == NULL) {
2945		kmem_cache_free(kmem_cachep, recv_wqep);
2946		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2947		    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2948		    "ibmf_i_post_recv_buffer(): %s\n",
2949		    tnf_string, msg, "failed to kmem_zalloc qp ctx");
2950		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2951		    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2952		    "ibmf_i_post_recv_buffer() exit\n");
2953		return (IBMF_NO_RESOURCES);
2954	}
2955
2956	/* initialize it */
2957	ibmf_i_init_recv_wqe(qpp, sgl, recv_wqep, ibt_qp_handle,
2958	    ibmf_qp_handle);
2959
2960	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*recv_wqep))
2961
2962	/* and post it */
2963	status = ibt_post_recv(recv_wqep->recv_qp_handle, &recv_wqep->recv_wr,
2964	    1, NULL);
2965
2966	ret = ibmf_i_ibt_to_ibmf_status(status);
2967	if (ret != IBMF_SUCCESS) {
2968		kmem_free(sgl, IBMF_MAX_RQ_WR_SGL_ELEMENTS *
2969		    sizeof (ibt_wr_ds_t));
2970		kmem_cache_free(kmem_cachep, recv_wqep);
2971		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2972		    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2973		    "ibmf_i_post_recv_buffer(): %s, status = %d\n",
2974		    tnf_string, msg, "ibt_post_recv failed",
2975		    tnf_uint, ibt_status, status);
2976		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2977		    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2978		    "ibmf_i_post_recv_buffer() exit\n");
2979		return (ret);
2980	}
2981
2982	mutex_enter(&cip->ci_mutex);
2983	IBMF_ADD32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
2984	mutex_exit(&cip->ci_mutex);
2985	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
2986		mutex_enter(&qpp->iq_mutex);
2987		qpp->iq_rwqes_posted++;
2988		mutex_exit(&qpp->iq_mutex);
2989		mutex_enter(&cip->ci_mutex);
2990		cip->ci_wqes_alloced++;
2991		mutex_exit(&cip->ci_mutex);
2992	} else {
2993		mutex_enter(&altqp->isq_mutex);
2994		altqp->isq_wqes_alloced++;
2995		altqp->isq_rwqes_posted++;
2996		mutex_exit(&altqp->isq_mutex);
2997	}
2998
2999	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_post_recv_buffer_end,
3000	    IBMF_TNF_TRACE, "", "ibmf_i_post_recv_buffer() exit\n");
3001
3002	return (ret);
3003}
3004
3005/*
3006 * ibmf_i_mgt_class_to_hdr_sz_off():
3007 *	Determine class header offser and size for management classes
3008 */
3009void
3010ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class, uint32_t *szp,
3011    uint32_t *offp)
3012{
3013	uint32_t	hdr_sz = 0, hdr_off = 0;
3014
3015	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3016	    ibmf_i_mgt_class_to_hdr_sz_off_start, IBMF_TNF_TRACE, "",
3017	    "ibmf_i_mgt_class_to_hdr_sz_off(): mgt_class = 0x%x\n",
3018	    tnf_uint, mgt_class, mgt_class);
3019
3020	switch (mgt_class) {
3021	case MAD_MGMT_CLASS_SUBN_LID_ROUTED :
3022	case MAD_MGMT_CLASS_SUBN_DIRECT_ROUTE :
3023	case MAD_MGMT_CLASS_PERF :
3024	case MAD_MGMT_CLASS_BM :
3025	case MAD_MGMT_CLASS_DEV_MGT :
3026	case MAD_MGMT_CLASS_SNMP :
3027	case MAD_MGMT_CLASS_COMM_MGT:
3028		hdr_sz = IBMF_MAD_CL_HDR_SZ_1;
3029		hdr_off = IBMF_MAD_CL_HDR_OFF_1;
3030		break;
3031	case MAD_MGMT_CLASS_SUBN_ADM :
3032		hdr_sz = IBMF_MAD_CL_HDR_SZ_2;
3033		hdr_off = IBMF_MAD_CL_HDR_OFF_2;
3034		break;
3035	default:
3036		if (((mgt_class >= MAD_MGMT_CLASS_VENDOR_START) &&
3037		    (mgt_class <= MAD_MGMT_CLASS_VENDOR_END)) ||
3038		    ((mgt_class >= MAD_MGMT_CLASS_APPLICATION_START) &&
3039		    (mgt_class <= MAD_MGMT_CLASS_APPLICATION_END))) {
3040			hdr_sz = IBMF_MAD_CL_HDR_SZ_3;
3041			hdr_off = IBMF_MAD_CL_HDR_OFF_1;
3042		} else if ((mgt_class >= MAD_MGMT_CLASS_VENDOR2_START) &&
3043		    (mgt_class <= MAD_MGMT_CLASS_VENDOR2_END)) {
3044			hdr_sz = IBMF_MAD_CL_HDR_SZ_4;
3045			hdr_off = IBMF_MAD_CL_HDR_OFF_2;
3046		} else {
3047			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3048			    ibmf_i_mgt_class_to_hdr_sz_off_start,
3049			    IBMF_TNF_TRACE, "",
3050			    "ibmf_i_mgt_class_to_hdr_sz_off():"
3051			    "got illegal management class = 0x%x\n",
3052			    tnf_uint, mgt_class, mgt_class);
3053		}
3054		break;
3055	}
3056
3057
3058
3059	*szp = hdr_sz;
3060	*offp = hdr_off;
3061
3062	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
3063	    ibmf_i_mgt_class_to_hdr_sz_off_end, IBMF_TNF_TRACE, "",
3064	    "ibmf_i_mgt_class_to_hdr_sz_off() exit,hdr_sz = %d, hdr_off = %d\n",
3065	    tnf_uint, hdr_sz, hdr_sz, tnf_uint, hdr_off, hdr_off);
3066}
3067
3068/*
3069 * ibmf_i_lookup_client_by_mgmt_class():
3070 *	Lookup the client context based on the management class of
3071 *	the incoming packet
3072 */
3073int
3074ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t *ibmf_cip, int port_num,
3075    ibmf_client_type_t class, ibmf_client_t **clientpp)
3076{
3077	ibmf_client_t 		*clientp;
3078	ibmf_client_info_t	*client_infop;
3079
3080	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
3081	    ibmf_i_lookup_client_by_mgmt_class_start, IBMF_TNF_TRACE, "",
3082	    "ibmf_i_lookup_client_by_mgmt_class() enter, cip = %p, "
3083	    "port_num = %d, class = 0x%x\n", tnf_opaque, cip, ibmf_cip,
3084	    tnf_int, port, port_num, tnf_opaque, class, class);
3085
3086	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
3087
3088	mutex_enter(&ibmf_cip->ci_clients_mutex);
3089
3090	clientp = ibmf_cip->ci_clients;
3091
3092	/* walk client context list looking for class/portnum match */
3093	while (clientp != NULL) {
3094		client_infop = &clientp->ic_client_info;
3095		if (class == client_infop->client_class &&
3096		    port_num == client_infop->port_num) {
3097			/* found our match */
3098			break;
3099		}
3100		clientp = clientp->ic_next;
3101	}
3102
3103	mutex_exit(&ibmf_cip->ci_clients_mutex);
3104
3105	if (clientp != NULL) {
3106		*clientpp = clientp;
3107		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3108		    ibmf_i_lookup_client_by_mgmt_class_end, IBMF_TNF_TRACE, "",
3109		    "ibmf_i_lookup_client_by_mgmt_class() exit, clp = %p\n",
3110		    tnf_opaque, clientp, clientp);
3111		return (IBMF_SUCCESS);
3112	} else {
3113		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3114		    ibmf_i_lookup_client_by_mgmt_class_end, IBMF_TNF_TRACE, "",
3115		    "ibmf_i_lookup_client_by_mgmt_class() failure exit\n");
3116		return (IBMF_FAILURE);
3117	}
3118}
3119
3120/*
3121 * ibmf_i_get_pkeyix():
3122 *	Get the pkey index of the pkey in the pkey table of the specified
3123 *	port. Take into account the partition membership.
3124 */
3125int
3126ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle, ib_pkey_t pkey, uint8_t port,
3127    ib_pkey_t *pkeyixp)
3128{
3129	ib_pkey_t		tpkey;
3130	ibt_status_t		ibt_status;
3131
3132	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_start,
3133	    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix() enter, hcahdl = %p, "
3134	    "pkey = 0x%x, port = %d\n", tnf_opaque, hcahdl, hca_handle,
3135	    tnf_int, pkey, pkey, tnf_int, port, port);
3136
3137	/*
3138	 * If the client specifies the FULL membership pkey and the
3139	 * pkey is not in the table, this function should fail.
3140	 */
3141	if (pkey & IBMF_PKEY_MEMBERSHIP_MASK) {
3142		ibt_status = ibt_pkey2index(hca_handle, port,
3143		    pkey, pkeyixp);
3144		if (ibt_status != IBT_SUCCESS) {
3145			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3146			    ibmf_i_get_pkeyix_err, IBMF_TNF_ERROR, "",
3147			    "ibmf_i_get_pkeyix() error status = %d\n",
3148			    tnf_uint, ibt_status, ibt_status);
3149			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3150			    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3151			    "ibmf_i_get_pkeyix() exit\n");
3152			return (IBMF_TRANSPORT_FAILURE);
3153		}
3154		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_end,
3155		    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix() exit\n");
3156		return (IBMF_SUCCESS);
3157	}
3158
3159	/*
3160	 * Limited member pkey processing
3161	 * Check if this limited member pkey is in the pkey table
3162	 */
3163	ibt_status = ibt_pkey2index(hca_handle, port, pkey, pkeyixp);
3164	if (ibt_status == IBT_SUCCESS) {
3165		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3166		    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3167		    "ibmf_i_get_pkeyix() exit\n");
3168		return (IBMF_SUCCESS);
3169	}
3170
3171	/*
3172	 * Could not find the limited member version of the pkey.
3173	 * Now check if the full member version of the pkey is in the
3174	 * pkey table. If not, fail the call.
3175	 */
3176	tpkey = pkey | IBMF_PKEY_MEMBERSHIP_MASK;
3177	ibt_status = ibt_pkey2index(hca_handle, port, tpkey, pkeyixp);
3178	if (ibt_status != IBT_SUCCESS) {
3179		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3180		    ibmf_i_get_pkeyix_err, IBMF_TNF_ERROR, "",
3181		    "ibmf_i_get_pkeyix() error status = %d\n",
3182		    tnf_uint, ibt_status, ibt_status);
3183		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3184		    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3185		    "ibmf_i_get_pkeyix() exit\n");
3186		return (IBMF_TRANSPORT_FAILURE);
3187	}
3188
3189	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_end,
3190	    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix(): pkey_ix = %d\n",
3191	    tnf_int, pkeyix, *pkeyixp);
3192	return (IBMF_SUCCESS);
3193}
3194
3195/*
3196 * ibmf_i_pkey_ix_to_key():
3197 *	Figure out pkey from pkey index
3198 */
3199int
3200ibmf_i_pkey_ix_to_key(ibmf_ci_t *cip, uint_t port_num, uint_t pkey_ix,
3201    ib_pkey_t *pkeyp)
3202{
3203	ibt_status_t		ibt_status;
3204
3205	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_pkey_ix_to_key_start,
3206	    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() enter\n");
3207
3208	ibt_status = ibt_index2pkey(cip->ci_ci_handle, port_num, pkey_ix,
3209	    pkeyp);
3210	if (ibt_status != IBT_SUCCESS) {
3211		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3212		    ibmf_i_pkey_ix_to_key, IBMF_TNF_TRACE, "",
3213		    "ibmf_i_pkey_ix_to_key(): ibt_index2pkey failed for "
3214		    " pkey index %d \n", tnf_uint, pkey_ix, pkey_ix);
3215		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3216		    ibmf_i_pkey_ix_to_key_end,
3217		    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() exit\n");
3218		return (IBMF_TRANSPORT_FAILURE);
3219	}
3220
3221	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_pkey_ix_to_key_end,
3222	    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() exit\n");
3223
3224	return (IBMF_SUCCESS);
3225}
3226
3227/*
3228 * ibmf_i_ibt_to_ibmf_status():
3229 *	Map IBT return code to IBMF return code
3230 */
3231int
3232ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status)
3233{
3234	int ibmf_status;
3235
3236	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_ibt_to_ibmf_status_start,
3237	    IBMF_TNF_TRACE, "", "ibmf_i_ibt_to_ibmf_status() enter, "
3238	    "status = %d\n", tnf_uint, ibt_status, ibt_status);
3239
3240	switch (ibt_status) {
3241
3242	case IBT_SUCCESS:
3243		ibmf_status = IBMF_SUCCESS;
3244		break;
3245
3246	case IBT_INSUFF_KERNEL_RESOURCE:
3247	case IBT_INSUFF_RESOURCE:
3248	case IBT_QP_FULL:
3249		ibmf_status = IBMF_NO_RESOURCES;
3250		break;
3251
3252	case IBT_HCA_IN_USE:
3253	case IBT_QP_IN_USE:
3254	case IBT_CQ_BUSY:
3255	case IBT_PD_IN_USE:
3256	case IBT_MR_IN_USE:
3257		ibmf_status = IBMF_BUSY;
3258		break;
3259
3260	default:
3261		ibmf_status = IBMF_FAILURE;
3262		break;
3263	}
3264
3265	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_ibt_to_ibmf_status_end,
3266	    IBMF_TNF_TRACE, "", "ibmf_i_ibt_to_ibmf_status() exit, "
3267	    "ibt_status = %d, ibmf_status = %d\n", tnf_uint, ibt_status,
3268	    ibt_status, tnf_int, ibmf_status, ibmf_status);
3269
3270	return (ibmf_status);
3271}
3272
3273/*
3274 * ibmf_i_ibt_wc_to_ibmf_status():
3275 *	Map work completion code to IBMF return code
3276 */
3277int
3278ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status)
3279{
3280	int ibmf_status;
3281
3282	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3283	    ibmf_i_ibt_wc_to_ibmf_status_start, IBMF_TNF_TRACE, "",
3284	    "ibmf_i_ibt_to_ibmf_status() enter, status = %d\n",
3285	    tnf_uint, ibt_wc_status, ibt_wc_status);
3286
3287	switch (ibt_wc_status) {
3288
3289	case IBT_WC_SUCCESS:
3290		ibmf_status = IBMF_SUCCESS;
3291		break;
3292
3293	default:
3294		ibmf_status = IBMF_FAILURE;
3295		break;
3296	}
3297
3298	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
3299	    ibmf_i_ibt_wc_to_ibmf_status_end, IBMF_TNF_TRACE, "",
3300	    "ibmf_i_ibt_to_ibmf_status() exit, wc_status = %d, "
3301	    "ibmf_status = %d\n", tnf_uint, ibt_wc_status,
3302	    ibt_wc_status, tnf_int, ibmf_status, ibmf_status);
3303
3304	return (ibmf_status);
3305}
3306
3307/*
3308 * ibmf_i_is_ibmf_handle_valid():
3309 *	Validate the ibmf handle
3310 */
3311int
3312ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle)
3313{
3314	ibmf_ci_t	*cip;
3315	ibmf_client_t	*clp, *clientp = (ibmf_client_t *)ibmf_handle;
3316
3317	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3318	    ibmf_i_is_ibmf_handle_valid_start, IBMF_TNF_TRACE, "",
3319	    "ibmf_i_is_ibmf_handle_valid() enter\n");
3320
3321	mutex_enter(&ibmf_statep->ibmf_mutex);
3322
3323	cip = ibmf_statep->ibmf_ci_list;
3324
3325	/* iterate through all the channel interace contexts */
3326	while (cip != NULL) {
3327
3328		mutex_enter(&cip->ci_clients_mutex);
3329
3330		clp = cip->ci_clients;
3331
3332		/* search all registration contexts for this ci */
3333		while (clp != NULL) {
3334			if (clp == clientp)
3335				break;
3336			clp = clp->ic_next;
3337		}
3338
3339		mutex_exit(&cip->ci_clients_mutex);
3340
3341		if (clp == clientp) {
3342			/* ci found */
3343			break;
3344		} else {
3345			/* ci not found, move onto next ci */
3346			cip = cip->ci_next;
3347		}
3348	}
3349
3350	mutex_exit(&ibmf_statep->ibmf_mutex);
3351
3352	if (cip != NULL) {
3353		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3354		    ibmf_i_is_ibmf_handle_valid_end, IBMF_TNF_TRACE, "",
3355		    "ibmf_i_is_ibmf_handle_valid() exit\n");
3356		return (IBMF_SUCCESS);
3357	} else {
3358		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3359		    ibmf_i_is_ibmf_handle_valid_end, IBMF_TNF_TRACE, "",
3360		    "ibmf_i_is_ibmf_handle_valid() failure exit\n");
3361		return (IBMF_FAILURE);
3362	}
3363}
3364
3365/*
3366 * ibmf_i_is_qp_handle_valid():
3367 *	Validate the QP handle
3368 */
3369int
3370ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle,
3371    ibmf_qp_handle_t ibmf_qp_handle)
3372{
3373	ibmf_client_t	*clientp = (ibmf_client_t *)ibmf_handle;
3374	ibmf_alt_qp_t	*alt_qp, *qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
3375	ibmf_ci_t	*cip = clientp->ic_myci;
3376
3377	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3378	    ibmf_i_is_qp_handle_valid_start, IBMF_TNF_TRACE, "",
3379	    "ibmf_i_is_qp_handle_valid() enter\n");
3380
3381	/* the default qp handle is always valid */
3382	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
3383		return (IBMF_SUCCESS);
3384
3385	mutex_enter(&cip->ci_mutex);
3386
3387	alt_qp = cip->ci_alt_qp_list;
3388
3389	while (alt_qp != NULL) {
3390		if (alt_qp == qpp) {
3391			/* qp handle found */
3392			break;
3393		} else {
3394			/* qp handle not found, get next qp on list */
3395			alt_qp = alt_qp->isq_next;
3396		}
3397	}
3398
3399	mutex_exit(&cip->ci_mutex);
3400
3401	if (alt_qp != NULL) {
3402		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3403		    ibmf_i_is_qp_handle_valid_end, IBMF_TNF_TRACE, "",
3404		    "ibmf_i_is_qp_handle_valid() exit\n");
3405		return (IBMF_SUCCESS);
3406	} else {
3407		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3408		    ibmf_i_is_qp_handle_valid_end, IBMF_TNF_TRACE, "",
3409		    "ibmf_i_is_qp_handle_valid() failure exit\n");
3410		return (IBMF_FAILURE);
3411	}
3412}
3413
3414void
3415ibmf_dprintf(int l, const char *fmt, ...)
3416{
3417	va_list ap;
3418
3419	if ((l) > ibmf_trace_level) {
3420
3421		return;
3422	}
3423
3424	va_start(ap, fmt);
3425	(void) vprintf(fmt, ap);
3426	va_end(ap);
3427}
3428
3429/*
3430 * ibmf_setup_term_ctx():
3431 * Sets up a message context that is the duplicate of the one
3432 * passed in the regmsgimplp argument. The duplicate message context
3433 * is not visible to the client. It is managed internally by ibmf
3434 * to process the RMPP receiver termination flow logic for the
3435 * transaction while the client is notified of the completion of the
3436 * same transaction (i.e. all the solicited data has been received).
3437 */
3438int
3439ibmf_setup_term_ctx(ibmf_client_t *clientp, ibmf_msg_impl_t *regmsgimplp)
3440{
3441	ibmf_msg_impl_t	*msgimplp;
3442	size_t		offset;
3443	uint32_t	cl_hdr_sz, cl_hdr_off;
3444	int		status;
3445
3446	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3447	    ibmf_setup_term_ctx_start, IBMF_TNF_TRACE, "",
3448	    "ibmf_setup_term_ctx() enter\n");
3449
3450	/*
3451	 * Allocate the termination message context
3452	 */
3453	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
3454	    KM_NOSLEEP);
3455	if (msgimplp == NULL) {
3456		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3457		    ibmf_setup_term_ctx_error, IBMF_TNF_ERROR, "",
3458		    "ibmf_setup_term_ctx(): %s\n", tnf_string, msg,
3459		    "message mem allocation failure");
3460		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3461		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3462		    "ibmf_setup_term_ctx() exit\n");
3463		return (IBMF_NO_RESOURCES);
3464	}
3465
3466	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
3467
3468	/* Copy the message context to the termination message structure */
3469	*msgimplp = *regmsgimplp;
3470
3471	/* Initialize the message mutex */
3472	mutex_init(&msgimplp->im_mutex, NULL, MUTEX_DRIVER, NULL);
3473
3474	/*
3475	 * Allocate enough memory for the MAD header only.
3476	 */
3477	msgimplp->im_msgbufs_recv.im_bufs_mad_hdr =
3478	    (ib_mad_hdr_t *)kmem_zalloc(IBMF_MAD_SIZE, KM_NOSLEEP);
3479	if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
3480		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
3481		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3482		    ibmf_setup_term_ctx_error, IBMF_TNF_ERROR, "",
3483		    "ibmf_setup_term_ctx(): %s\n", tnf_string, msg,
3484		    "recv buf mem allocation failure");
3485		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3486		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3487		    "ibmf_setup_term_ctx() exit\n");
3488		return (IBMF_NO_RESOURCES);
3489	}
3490
3491	/* Copy over just the MAD header contents */
3492	bcopy((const void *)regmsgimplp->im_msgbufs_recv.im_bufs_mad_hdr,
3493	    (void *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr,
3494	    sizeof (ib_mad_hdr_t));
3495
3496	offset = sizeof (ib_mad_hdr_t);
3497	ibmf_i_mgt_class_to_hdr_sz_off(
3498	    regmsgimplp->im_msgbufs_recv.im_bufs_mad_hdr->MgmtClass,
3499	    &cl_hdr_sz, &cl_hdr_off);
3500	offset += cl_hdr_off;
3501
3502	/*
3503	 * Copy the management class header
3504	 */
3505	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr =
3506	    (uchar_t *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr + offset;
3507	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len =
3508	    regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len;
3509	bcopy((void *)regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr,
3510	    (void *)msgimplp->im_msgbufs_recv.im_bufs_cl_hdr,
3511	    regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len);
3512
3513	/*
3514	 * Clear the termination message timers copied from the regular message
3515	 * since ibmf_i_set_timer() expects them to be cleared.
3516	 */
3517	msgimplp->im_rp_timeout_id = 0;
3518	msgimplp->im_tr_timeout_id = 0;
3519
3520	/* Mark this message as being in a receiver RMPP mode */
3521	msgimplp->im_flags |= IBMF_MSG_FLAGS_RECV_RMPP;
3522
3523	/* Mark this message as being a "termination flow" message */
3524	msgimplp->im_flags |= IBMF_MSG_FLAGS_TERMINATION;
3525
3526	/*
3527	 * Clear the IBMF_MSG_FLAGS_SET_TERMINATION copied over from the regular
3528	 * message.
3529	 */
3530	msgimplp->im_flags &= ~IBMF_MSG_FLAGS_SET_TERMINATION;
3531
3532	/*
3533	 * Clear the trans_state RECV_DONE and DONE flags so that the
3534	 * protocol continues with the termination message context.
3535	 */
3536	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_RECV_DONE;
3537	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_DONE;
3538
3539	/* Clear out references to the old UD dest handles */
3540	msgimplp->im_ibmf_ud_dest = NULL;
3541	msgimplp->im_ud_dest = NULL;
3542
3543	/*
3544	 * Request new UD dest resources for the termination phase.
3545	 * The old UD dest resources are freed when the IBMF client
3546	 * calls ibmf_free_msg(), so they cannot be relied on to exist
3547	 * when the RMPP termination loop completes.
3548	 */
3549	status = ibmf_i_alloc_ud_dest(clientp, msgimplp, &msgimplp->im_ud_dest,
3550	    B_FALSE);
3551	if (status != IBMF_SUCCESS) {
3552		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
3553		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
3554		    ibmf_setup_term_ctx_err, IBMF_TNF_ERROR, "",
3555		    "ibmf_setup_term_ctx(): %s, status = %d\n",
3556		    tnf_string, msg, "UD destination resource allocation"
3557		    " failed", tnf_int, ibmf_status, status);
3558		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3559		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3560		    "ibmf_setup_term_ctx() exit\n");
3561		return (status);
3562	}
3563
3564	/*
3565	 * Add the message to the termination client list by virtue of
3566	 * having the IBMF_MSG_FLAGS_TERMINATION "im_flags" flag set.
3567	 */
3568	ibmf_i_client_add_msg(clientp, msgimplp);
3569
3570	/*
3571	 * Increase the "allocted messages" count so that the client
3572	 * does not unregister before this message has been freed.
3573	 * This is necessary because we want the client context to
3574	 * be around when the receive timeout expires for this termination
3575	 * loop, otherwise the code will access freed memory and crash.
3576	 */
3577	mutex_enter(&clientp->ic_mutex);
3578	clientp->ic_msgs_alloced++;
3579	mutex_exit(&clientp->ic_mutex);
3580
3581	mutex_enter(&msgimplp->im_mutex);
3582	/* Set the response timer for the termination message. */
3583	ibmf_i_set_timer(ibmf_i_recv_timeout, msgimplp, IBMF_RESP_TIMER);
3584	mutex_exit(&msgimplp->im_mutex);
3585
3586	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_setup_term_ctx_end,
3587	    IBMF_TNF_TRACE, "", "ibmf_setup_term_ctx() exit\n");
3588
3589	return (IBMF_SUCCESS);
3590}
3591