1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/* Solaris Open Fabric kernel verbs */
26
27#include <sys/types.h>
28#include <sys/ddi.h>
29#include <sys/sunddi.h>
30#include <sys/modctl.h>
31#include <sys/ib/clients/of/rdma/ib_verbs.h>
32#include <sys/ib/clients/of/rdma/ib_addr.h>
33#include <sys/ib/clients/of/rdma/rdma_cm.h>
34#include <sys/ib/clients/of/sol_ofs/sol_kverb_impl.h>
35
36static void *statep;
37char *sol_kverbs_dbg_str = "sol_kverbs";
38
39static llist_head_t client_list = LLIST_HEAD_INIT(client_list);
40kmutex_t clist_lock; /* mutex for client_list */
41
42static void ofs_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
43    ibt_async_event_t *);
44
45/*
46 * set ibt_client_t members. clnt->ib_client must be set before
47 * this func is called.
48 */
49static int
50alloc_ibt_client(ofs_client_t *clnt)
51{
52	int namelen;
53	ASSERT(clnt->ib_client != NULL);
54
55	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
56	    "alloc_ibt_client: client: 0x%p", clnt);
57
58	/*
59	 * double-check the name string. if it's longer than MAXNAMELEN
60	 * including the string terminator, assuming the name is invalid,
61	 * return EINVAL.
62	 */
63	namelen = strlen(clnt->ib_client->name);
64	if (namelen >= MAXNAMELEN) {
65		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
66		    "alloc_ibt_client: client: 0x%p => "
67		    "namelen(%d) is larger than MAXNAMELEN", clnt, namelen);
68		return (-EINVAL);
69	}
70	clnt->ibt_client.mi_clnt_name = kmem_zalloc(namelen + 1, KM_NOSLEEP);
71	if (clnt->ibt_client.mi_clnt_name == NULL) {
72		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
73		    "alloc_ibt_client: client: 0x%p => "
74		    "no sufficient memory", clnt);
75		return (-ENOMEM);
76	}
77	bcopy(clnt->ib_client->name, clnt->ibt_client.mi_clnt_name, namelen);
78	clnt->ibt_client.mi_ibt_version = IBTI_V_CURR;
79	if (clnt->ib_client->dip) {
80		clnt->ibt_client.mi_clnt_class = IBT_GENERIC;
81	} else {
82		clnt->ibt_client.mi_clnt_class = IBT_GENERIC_MISC;
83	}
84	clnt->ibt_client.mi_async_handler = ofs_async_handler;
85
86	return (0);
87}
88
89static void
90free_ibt_client(ofs_client_t *clnt)
91{
92	int namelen = strlen(clnt->ib_client->name);
93	ASSERT(namelen < MAXNAMELEN);
94
95	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
96	    "free_ibt_client: client: 0x%p", clnt);
97
98	kmem_free(clnt->ibt_client.mi_clnt_name, namelen + 1);
99	clnt->ibt_client.mi_clnt_name = NULL;
100}
101
102/*
103 * get_device() returns a pointer to struct ib_devcie with
104 * the same guid as one passed to the function.
105 */
106static ib_device_t *
107get_device(ofs_client_t *ofs_client, ib_guid_t guid)
108{
109	ib_device_t *device;
110	llist_head_t *entry;
111
112	ASSERT(RW_LOCK_HELD(&ofs_client->lock));
113
114	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
115	    "get_device: client: 0x%p, guid:0x%p",
116	    ofs_client, (void *)(uintptr_t)htonll(guid));
117
118	list_for_each(entry, &ofs_client->device_list) {
119		device = entry->ptr;
120		if (device->node_guid == htonll(guid)) {
121			ASSERT(device->reg_state == IB_DEV_CLOSE);
122			ASSERT(device->node_type == RDMA_NODE_IB_CA);
123			ASSERT(device->clnt_hdl == (ofs_client_p_t)ofs_client);
124			return (device);
125		}
126	}
127
128	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
129	    "get_device: client: 0x%p, guid:0x%p => no match guid",
130	    ofs_client, (void *)(uintptr_t)htonll(guid));
131
132	return (NULL);
133}
134
135/*
136 * ofs_async_handler() is a delegated function to handle asynchrnonous events,
137 * which dispatches each event to corresponding qp/cq handlers registered
138 * with ib_create_qp() and/or ib_create_cq().
139 */
140static void
141ofs_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
142    ibt_async_event_t *event)
143{
144	ofs_client_t 	*ofs_client = (ofs_client_t *)clntp;
145	struct ib_event ib_event;
146	struct ib_qp 	*qpp;
147	struct ib_cq	*cqp;
148
149
150	ASSERT(ofs_client != NULL);
151
152	cqp = event->ev_cq_hdl ? ibt_get_cq_private(event->ev_cq_hdl) : NULL;
153	qpp = event->ev_chan_hdl ?
154	    ibt_get_qp_private(event->ev_chan_hdl) : NULL;
155
156	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
157	    "ofs_async_handler: client: 0x%p, hca_hdl: 0x%p, code:0x%x, "
158	    "event->qp: 0x%p, event->cq: 0x%p, event->srq: 0x%p "
159	    "event->guid: 0x%p, event->port: 0x%x",
160	    clntp, hdl, code, qpp, cqp, event->ev_srq_hdl,
161	    (void *)(uintptr_t)event->ev_hca_guid, event->ev_port);
162
163	bzero(&ib_event, sizeof (struct ib_event));
164	switch (code) {
165	case IBT_EVENT_PATH_MIGRATED:
166		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
167		    IB_EVENT_PATH_MIG);
168		return;
169	case IBT_EVENT_SQD:
170		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
171		    IB_EVENT_SQ_DRAINED);
172		return;
173	case IBT_EVENT_COM_EST:
174		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
175		    IB_EVENT_COMM_EST);
176		return;
177	case IBT_ERROR_CATASTROPHIC_CHAN:
178		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
179		    IB_EVENT_QP_FATAL);
180		return;
181	case IBT_ERROR_INVALID_REQUEST_CHAN:
182		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
183		    IB_EVENT_QP_REQ_ERR);
184		return;
185	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
186		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
187		    IB_EVENT_QP_ACCESS_ERR);
188		return;
189	case IBT_ERROR_PATH_MIGRATE_REQ:
190		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
191		    IB_EVENT_PATH_MIG);
192		return;
193	case IBT_EVENT_EMPTY_CHAN:
194		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
195		    IB_EVENT_QP_LAST_WQE_REACHED);
196		return;
197	case IBT_ERROR_CQ:
198		FIRE_CQ_EVENT(ofs_client, hdl, ib_event, cqp,
199		    IB_EVENT_CQ_ERR);
200		return;
201	case IBT_HCA_ATTACH_EVENT:
202	{
203		ib_device_t	*device;
204		int		rtn;
205
206		/* re-use the device once it was created */
207		rw_enter(&ofs_client->lock, RW_WRITER);
208		device = get_device(ofs_client, event->ev_hca_guid);
209		if (device == NULL) {
210			device = kmem_alloc(sizeof (ib_device_t), KM_SLEEP);
211			device->node_type = RDMA_NODE_IB_CA;
212			device->reg_state = IB_DEV_CLOSE;
213			device->clnt_hdl = (ofs_client_p_t)ofs_client;
214			device->node_guid = htonll(event->ev_hca_guid);
215			device->data = NULL;
216			/* add this HCA */
217			ofs_client->hca_num++;
218			llist_head_init(&device->list, device);
219			llist_add_tail(&device->list, &ofs_client->device_list);
220		}
221		device->hca_hdl = NULL;
222		device->local_dma_lkey = 0;
223		device->phys_port_cnt = 0;
224
225		/* open this HCA */
226		rtn = ibt_open_hca(ofs_client->ibt_hdl, event->ev_hca_guid,
227		    &device->hca_hdl);
228		if (rtn == IBT_SUCCESS) {
229			ibt_hca_attr_t hattr;
230
231			ofs_client->hca_open_num++;
232			device->reg_state = IB_DEV_OPEN;
233			ibt_set_hca_private(device->hca_hdl, device);
234
235			rtn = ibt_query_hca(device->hca_hdl, &hattr);
236			if (rtn != IBT_SUCCESS) {
237				device->reg_state = IB_DEV_CLOSE;
238				rtn = ibt_close_hca(device->hca_hdl);
239				ASSERT(rtn == IBT_SUCCESS);
240				ofs_client->hca_open_num--;
241				return;
242			}
243
244			(void) sprintf(device->name, "%x:%x:%x",
245			    hattr.hca_vendor_id, hattr.hca_device_id,
246			    hattr.hca_version_id);
247			device->local_dma_lkey = hattr.hca_reserved_lkey;
248			device->phys_port_cnt = hattr.hca_nports;
249			ibt_set_hca_private(device->hca_hdl, device);
250
251			/* invoke client's callback */
252			if (ofs_client->ib_client->add) {
253				ofs_client->ib_client->add(device);
254			}
255		}
256		rw_exit(&ofs_client->lock);
257
258		return;
259	}
260	case IBT_HCA_DETACH_EVENT:
261	{
262		struct ib_device *device;
263
264		rw_enter(&ofs_client->lock, RW_WRITER);
265		device = ibt_get_hca_private(hdl);
266		if (device->reg_state == IB_DEV_OPEN) {
267			ibt_status_t rtn;
268			/* invoke client's callback */
269			if (ofs_client->ib_client->remove) {
270				ofs_client->ib_client->remove(device);
271			}
272			/* change the state only */
273			device->reg_state = IB_DEV_CLOSE;
274			/* close this HCA */
275			rtn = ibt_close_hca(device->hca_hdl);
276			ASSERT(rtn == IBT_SUCCESS);
277			ofs_client->hca_open_num--;
278		}
279		rw_exit(&ofs_client->lock);
280
281		return;
282	}
283	case IBT_EVENT_LIMIT_REACHED_SRQ:
284	case IBT_ERROR_CATASTROPHIC_SRQ:
285	default:
286		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
287		    "sol_ofs does not support this event(0x%x).\n"
288		    "\t clntp=0x%p, hca_hdl=0x%p, code=%d, eventp=0x%p\n",
289		    code, clntp, hdl, code, event);
290		return;
291	}
292}
293
294/*
295 * ib_register_client - Register an IB client
296 * @client:Client to register
297 *
298 * Upper level users of the IB drivers can use ib_register_client() to
299 * register callbacks for IB device addition and removal.  When an IB
300 * device is added, each registered client's add method will be called
301 * (in the order the clients were registered), and when a device is
302 * removed, each client's remove method will be called (in the reverse
303 * order that clients were registered).  In addition, when
304 * ib_register_client() is called, the client will receive an add
305 * callback for all devices already registered.
306 *
307 * Note that struct ib_client should have a dip pointer to the client,
308 * which is different from the Linux implementation.
309 */
310int
311ib_register_client(struct ib_client *client)
312{
313	uint_t		i, nhcas; /* number of HCAs */
314	ib_guid_t	*guidp;
315	ofs_client_t	*ofs_client;
316	llist_head_t	*entry, *tmp;
317	ib_device_t	*device;
318	int		rtn;
319
320	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
321	    "ib_register_client: client: 0x%p", client);
322
323	/* get the number of HCAs on this system */
324	if ((nhcas = ibt_get_hca_list(&guidp)) == 0) {
325		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
326		    "ib_register_client: client: 0x%p => no HCA", client);
327		return (-ENXIO);
328	}
329
330	/* allocate a new sol_ofs_client structure */
331	ofs_client = kmem_zalloc(sizeof (ofs_client_t), KM_NOSLEEP);
332	if (ofs_client == NULL) {
333		(void) ibt_free_hca_list(guidp, nhcas);
334		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
335		    "ib_register_client: client: 0x%p => "
336		    "no sufficient memory for ofs_client", client);
337		return (-ENOMEM);
338	}
339
340	/* set members */
341	ofs_client->ib_client = client;
342	if ((rtn = alloc_ibt_client(ofs_client)) != 0) {
343		kmem_free(ofs_client, sizeof (ofs_client_t));
344		(void) ibt_free_hca_list(guidp, nhcas);
345		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
346		    "ib_register_client: client: 0x%p => "
347		    "alloc_ibt_client failed w/ 0x%x", client, rtn);
348		return (rtn);
349	}
350	ofs_client->state = IB_OFS_CLNT_INITIALIZED;
351	llist_head_init(&ofs_client->device_list, NULL);
352	llist_head_init(&ofs_client->client_list, ofs_client);
353	rw_init(&ofs_client->lock, NULL, RW_DEFAULT, NULL);
354
355	/* initialize IB client */
356	rw_enter(&ofs_client->lock, RW_WRITER);
357	if (client->state != IB_CLNT_UNINITIALIZED) {
358		rw_exit(&ofs_client->lock);
359		kmem_free(ofs_client, sizeof (ofs_client_t));
360		(void) ibt_free_hca_list(guidp, nhcas);
361		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
362		    "ib_register_client: client: 0x%p => "
363		    "invalid client state(%d)", client, client->state);
364		return (-EPERM);
365	}
366
367	/* attach this client to IBTF */
368	rtn = ibt_attach(&ofs_client->ibt_client, client->dip, ofs_client,
369	    &ofs_client->ibt_hdl);
370	if (rtn != IBT_SUCCESS) {
371		rw_exit(&ofs_client->lock);
372		free_ibt_client(ofs_client);
373		kmem_free(ofs_client, sizeof (ofs_client_t));
374		(void) ibt_free_hca_list(guidp, nhcas);
375		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
376		    "ib_register_client: client: 0x%p => "
377		    "ibt_attach failed w/ 0x%x", client, rtn);
378		return (-EINVAL);
379	}
380	client->clnt_hdl = (ofs_client_p_t)ofs_client;
381	client->state = IB_CLNT_INITIALIZED;
382
383	/* link this client */
384	mutex_enter(&clist_lock);
385	llist_add_tail(&ofs_client->client_list, &client_list);
386	mutex_exit(&clist_lock);
387
388	/* Open HCAs */
389	ofs_client->hca_num = nhcas;
390	for (i = 0; i < ofs_client->hca_num; i++) {
391		/* allocate the ib_device structure */
392		device = kmem_zalloc(sizeof (ib_device_t), KM_NOSLEEP);
393		if (device == NULL) {
394			rtn = -ENOMEM;
395			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
396			    "ib_register_client: client: 0x%p => "
397			    "no sufficient memory for ib_device", client);
398			goto err;
399		}
400		device->node_guid = htonll(guidp[i]);
401		device->node_type = RDMA_NODE_IB_CA;
402		device->reg_state = IB_DEV_CLOSE;
403		device->clnt_hdl = (ofs_client_p_t)ofs_client;
404		llist_head_init(&device->list, device);
405		llist_add_tail(&device->list, &ofs_client->device_list);
406
407		rtn = ibt_open_hca(ofs_client->ibt_hdl, guidp[i],
408		    &device->hca_hdl);
409		if (rtn == IBT_SUCCESS) {
410			ibt_hca_attr_t hattr;
411
412			ofs_client->hca_open_num++;
413			device->reg_state = IB_DEV_OPEN;
414
415			rtn = ibt_query_hca(device->hca_hdl, &hattr);
416			if (rtn != IBT_SUCCESS) {
417				rtn = -EIO;
418				SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
419				    "ib_register_client: client: 0x%p,"
420				    "hca_hdl: 0x%p ==> "
421				    "ibt_query_hca() failed w/ %d",
422				    client, device->hca_hdl, rtn);
423				goto err;
424			}
425
426			(void) sprintf(device->name, "%x:%x:%x",
427			    hattr.hca_vendor_id, hattr.hca_device_id,
428			    hattr.hca_version_id);
429			device->local_dma_lkey = hattr.hca_reserved_lkey;
430			device->phys_port_cnt = hattr.hca_nports;
431			ibt_set_hca_private(device->hca_hdl, device);
432
433			/* invoke client's callback */
434			if (client->add) {
435				client->add(device);
436			}
437		}
438	}
439	if (ofs_client->hca_open_num == 0) {
440		rtn = -ENXIO;
441		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
442		    "ib_register_client: client: 0x%p => "
443		    "no available HCA", client);
444		goto err;
445	}
446	rw_exit(&ofs_client->lock);
447
448	(void) ibt_free_hca_list(guidp, nhcas);
449	return (0);
450
451err:
452	/* first close all open HCAs */
453	list_for_each(entry, &ofs_client->device_list) {
454		device = entry->ptr;
455		/*
456		 * If it's open already, close it after the remove
457		 * callback.
458		 */
459		if (device->reg_state == IB_DEV_OPEN) {
460			ibt_status_t rtn;
461			/* invoke client's callback */
462			if (client->remove) {
463				client->remove(device);
464			}
465			device->reg_state = IB_DEV_CLOSE;
466			rtn = ibt_close_hca(device->hca_hdl);
467			ASSERT(rtn == IBT_SUCCESS);
468			ofs_client->hca_open_num--;
469		}
470	}
471	ASSERT(ofs_client->hca_open_num == 0);
472
473	/* then free the devices */
474	list_for_each_safe(entry, tmp, &ofs_client->device_list) {
475		device = entry->ptr;
476		/* de-link and free the device */
477		llist_del(entry);
478		kmem_free(device, sizeof (ib_device_t));
479		ofs_client->hca_num--;
480	}
481	ASSERT(ofs_client->hca_num == 0);
482
483	/* delink this client */
484	mutex_enter(&clist_lock);
485	llist_del(&ofs_client->client_list);
486	mutex_exit(&clist_lock);
487
488	/* detach the client */
489	client->clnt_hdl = NULL;
490	client->state = IB_CLNT_UNINITIALIZED;
491	(void) ibt_detach(ofs_client->ibt_hdl);
492	rw_exit(&ofs_client->lock);
493
494	/* free sol_ofs_client */
495	free_ibt_client(ofs_client);
496	kmem_free(ofs_client, sizeof (ofs_client_t));
497
498	(void) ibt_free_hca_list(guidp, nhcas);
499	return (rtn);
500}
501
502/*
503 * ib_unregister_client - Unregister an IB client
504 * @client:Client to unregister
505 *
506 * Upper level users use ib_unregister_client() to remove their client
507 * registration.  When ib_unregister_client() is called, the client
508 * will receive a remove callback for each IB device still registered.
509 */
510void
511ib_unregister_client(struct ib_client *client)
512{
513	ofs_client_t	*ofs_client;
514	ib_device_t	*device;
515	llist_head_t	*entry, *tmp;
516
517	ASSERT(client->state == IB_CLNT_INITIALIZED &&
518	    client->clnt_hdl != NULL);
519
520	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
521	    "ib_unregister_client: client: 0x%p", client);
522
523	ofs_client = (ofs_client_t *)client->clnt_hdl;
524	rw_enter(&ofs_client->lock, RW_WRITER);
525
526	/* first close all open HCAs */
527	list_for_each(entry, &ofs_client->device_list) {
528		device = entry->ptr;
529		/*
530		 * If it's open already, close it after the remove
531		 * callback.
532		 */
533		if (device->reg_state == IB_DEV_OPEN) {
534			ibt_status_t rtn;
535			/* invoke client's callback */
536			if (client->remove) {
537				client->remove(device);
538			}
539			device->reg_state = IB_DEV_CLOSE;
540			rtn = ibt_close_hca(device->hca_hdl);
541			if (rtn != IBT_SUCCESS)
542				SOL_OFS_DPRINTF_L3(
543				    sol_kverbs_dbg_str,
544				    "ib_unregister_client(%p) - "
545				    "ibt_close_hca failed %d",
546				    client, rtn);
547
548			ofs_client->hca_open_num--;
549		}
550	}
551	ASSERT(ofs_client->hca_open_num == 0);
552
553	/* then free the devices */
554	list_for_each_safe(entry, tmp, &ofs_client->device_list) {
555		device = entry->ptr;
556		/* de-link and free the device */
557		llist_del(entry);
558		kmem_free(device, sizeof (ib_device_t));
559		ofs_client->hca_num--;
560	}
561	ASSERT(ofs_client->hca_num == 0);
562
563	/* delink this client */
564	mutex_enter(&clist_lock);
565	llist_del(&ofs_client->client_list);
566	mutex_exit(&clist_lock);
567
568	/* detach the client */
569	client->clnt_hdl = NULL;
570	client->state = IB_CLNT_UNINITIALIZED;
571	(void) ibt_detach(ofs_client->ibt_hdl);
572	rw_exit(&ofs_client->lock);
573
574	/* free sol_ofs_client */
575	free_ibt_client(ofs_client);
576	kmem_free(ofs_client, sizeof (ofs_client_t));
577}
578
579/*
580 * ofs_lock_enter() and ofs_lock_exit() are used to avoid the recursive
581 * rwlock while the client callbacks are invoked.
582 *
583 * Note that the writer lock is used only in the client callback case,
584 * so that the kverb functions wanting to acquire the reader lock can
585 * safely ignore the reader lock if the writer lock is already held.
586 * The writer lock shouldn't be used in no other plances.
587 */
588static inline void
589ofs_lock_enter(krwlock_t *lock)
590{
591	if (!RW_WRITE_HELD(lock)) {
592		rw_enter(lock, RW_READER);
593	}
594}
595
596static inline void
597ofs_lock_exit(krwlock_t *lock)
598{
599	if (!RW_WRITE_HELD(lock)) {
600		rw_exit(lock);
601	}
602}
603
604/*
605 * ib_get_client_data - Get IB client context
606 * @device:Device to get context for
607 * @client:Client to get context for
608 *
609 * ib_get_client_data() returns client context set with
610 * ib_set_client_data() and returns NULL if it's not found.
611 */
612void *ib_get_client_data(struct ib_device *device,
613    struct ib_client *client)
614{
615	ofs_client_t		*ofs_client;
616	struct ib_device	*ib_device;
617	boolean_t		found = B_FALSE;
618	llist_head_t		*entry;
619	void			*data;
620
621	ASSERT(device != 0 && client != 0);
622
623	ofs_client = (ofs_client_t *)client->clnt_hdl;
624	if (ofs_client == 0) {
625		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
626		    "ib_get_client_data: device: 0x%p, client: 0x%p => "
627		    "no ofs_client", device, client);
628		return (NULL);
629	}
630
631	ofs_lock_enter(&ofs_client->lock);
632	list_for_each(entry, &ofs_client->device_list) {
633		ib_device = entry->ptr;
634		if (ib_device->node_guid == device->node_guid) {
635			found = B_TRUE;
636			break;
637		}
638	}
639	if (!found) {
640		ofs_lock_exit(&ofs_client->lock);
641		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
642		    "ib_get_client_data: device: 0x%p, client: 0x%p => "
643		    "no ib_device found", device, client);
644		return (NULL);
645	}
646	data = ib_device->data;
647	ofs_lock_exit(&ofs_client->lock);
648
649	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
650	    "ib_get_client_data: device: 0x%p, client: 0x%p",
651	    device, client);
652
653	return (data);
654}
655
656/*
657 * ib_set_client_data - Set IB client context
658 * @device:Device to set context for
659 * @client:Client to set context for
660 * @data:Context to set
661 *
662 * ib_set_client_data() sets client context that can be retrieved with
663 * ib_get_client_data(). If the specified device is not found, the function
664 * returns w/o any operations.
665 */
666void ib_set_client_data(struct ib_device *device, struct ib_client *client,
667    void *data)
668{
669	ofs_client_t		*ofs_client;
670	struct ib_device	*ib_device;
671	boolean_t		found = B_FALSE;
672	llist_head_t		*entry;
673
674	ASSERT(device != 0 && client != 0);
675
676	ofs_client = (ofs_client_t *)client->clnt_hdl;
677	if (ofs_client == 0) {
678		cmn_err(CE_WARN, "No client context found for %s/%s\n",
679		    device->name, client->name);
680		return;
681	}
682
683	ofs_lock_enter(&ofs_client->lock);
684	list_for_each(entry, &ofs_client->device_list) {
685		ib_device = entry->ptr;
686		if (ib_device->node_guid == device->node_guid) {
687			found = B_TRUE;
688			break;
689		}
690	}
691	if (!found) {
692		cmn_err(CE_WARN, "No client context found for %s/%s\n",
693		    device->name, client->name);
694		ofs_lock_exit(&ofs_client->lock);
695		return;
696	}
697	ib_device->data = data;
698	ofs_lock_exit(&ofs_client->lock);
699
700	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
701	    "ib_set_client_data: device: 0x%p, client: 0x%p, "
702	    "data: 0x%p", device, client, data);
703}
704
705/*
706 * ib_query_device - Query IB device attributes
707 * @device:Device to query
708 * @device_attr:Device attributes
709 *
710 * ib_query_device() returns the attributes of a device through the
711 * @device_attr pointer.
712 */
713int
714ib_query_device(struct ib_device *device, struct ib_device_attr *attr)
715{
716	ofs_client_t	*ofs_client = (ofs_client_t *)device->clnt_hdl;
717	ibt_hca_attr_t	hattr;
718	int		rtn;
719
720	ofs_lock_enter(&ofs_client->lock);
721	if (device->reg_state != IB_DEV_OPEN) {
722		ofs_lock_exit(&ofs_client->lock);
723		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
724		    "ib_query_device: device: 0x%p => "
725		    "invalid device state (%d)", device, device->reg_state);
726		return (-ENXIO);
727	}
728	if ((rtn = ibt_query_hca(device->hca_hdl, &hattr)) != IBT_SUCCESS) {
729		ofs_lock_exit(&ofs_client->lock);
730		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
731		    "ib_query_device: device: 0x%p => "
732		    "ibt_query_hca failed w/ 0x%x", device, rtn);
733		return (-EIO);
734	}
735	ofs_lock_exit(&ofs_client->lock);
736
737	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
738	    "ib_query_device: device: 0x%p, attr: 0x%p, rtn: 0x%p",
739	    device, attr, rtn);
740
741	/* OF order is major.micro.minor, so keep it here */
742	attr->fw_ver = (uint64_t)hattr.hca_fw_major_version << 32	|
743	    hattr.hca_fw_micro_version << 16 & 0xFFFF0000		|
744	    hattr.hca_fw_minor_version & 0xFFFF;
745
746	attr->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT		|
747	    IB_DEVICE_PORT_ACTIVE_EVENT					|
748	    IB_DEVICE_SYS_IMAGE_GUID					|
749	    IB_DEVICE_RC_RNR_NAK_GEN;
750	if (hattr.hca_flags & IBT_HCA_PKEY_CNTR) {
751		attr->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
752	}
753	if (hattr.hca_flags & IBT_HCA_QKEY_CNTR) {
754		attr->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
755	}
756	if (hattr.hca_flags & IBT_HCA_AUTO_PATH_MIG) {
757		attr->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
758	}
759	if (hattr.hca_flags & IBT_HCA_AH_PORT_CHECK) {
760		attr->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
761	}
762
763	attr->vendor_id		= hattr.hca_vendor_id;
764	attr->vendor_part_id	= hattr.hca_device_id;
765	attr->hw_ver		= hattr.hca_version_id;
766	attr->sys_image_guid	= htonll(hattr.hca_si_guid);
767	attr->max_mr_size	= ~0ull;
768	attr->page_size_cap	= IBTF2OF_PGSZ(hattr.hca_page_sz);
769	attr->max_qp		= hattr.hca_max_qp;
770	attr->max_qp_wr		= hattr.hca_max_qp_sz;
771	attr->max_sge		= hattr.hca_max_sgl;
772	attr->max_sge_rd	= hattr.hca_max_rd_sgl;
773	attr->max_cq		= hattr.hca_max_cq;
774	attr->max_cqe		= hattr.hca_max_cq_sz;
775	attr->max_mr		= hattr.hca_max_memr;
776	attr->max_pd		= hattr.hca_max_pd;
777	attr->max_qp_rd_atom	= hattr.hca_max_rdma_in_qp;
778	attr->max_qp_init_rd_atom	= hattr.hca_max_rdma_in_qp;
779	attr->max_ee_rd_atom	= hattr.hca_max_rdma_in_ee;
780	attr->max_ee_init_rd_atom	= hattr.hca_max_rdma_in_ee;
781	attr->max_res_rd_atom	= hattr.hca_max_rsc;
782	attr->max_srq		= hattr.hca_max_srqs;
783	attr->max_srq_wr	= hattr.hca_max_srqs_sz -1;
784	attr->max_srq_sge	= hattr.hca_max_srq_sgl;
785	attr->local_ca_ack_delay	= hattr.hca_local_ack_delay;
786	attr->atomic_cap = hattr.hca_flags & IBT_HCA_ATOMICS_GLOBAL ?
787	    IB_ATOMIC_GLOB : (hattr.hca_flags & IBT_HCA_ATOMICS_HCA ?
788	    IB_ATOMIC_HCA : IB_ATOMIC_NONE);
789	attr->max_ee		= hattr.hca_max_eec;
790	attr->max_rdd		= hattr.hca_max_rdd;
791	attr->max_mw		= hattr.hca_max_mem_win;
792	attr->max_pkeys		= hattr.hca_max_port_pkey_tbl_sz;
793	attr->max_raw_ipv6_qp	= hattr.hca_max_ipv6_qp;
794	attr->max_raw_ethy_qp	= hattr.hca_max_ether_qp;
795	attr->max_mcast_grp	= hattr.hca_max_mcg;
796	attr->max_mcast_qp_attach	= hattr.hca_max_qp_per_mcg;
797	attr->max_total_mcast_qp_attach = hattr.hca_max_mcg_qps;
798	attr->max_ah		= hattr.hca_max_ah;
799	attr->max_fmr		= hattr.hca_max_fmrs;
800	attr->max_map_per_fmr	= hattr.hca_opaque9; /* hca_max_map_per_fmr */
801
802	return (0);
803}
804
805/* Protection domains */
806struct ib_pd *
807ib_alloc_pd(struct ib_device *device)
808{
809	ofs_client_t	*ofs_client = (ofs_client_t *)device->clnt_hdl;
810	struct ib_pd	*pd;
811	int		rtn;
812
813	if ((pd = kmem_alloc(sizeof (struct ib_pd), KM_NOSLEEP)) == NULL) {
814		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
815		    "ib_alloc_pd: device: 0x%p => no sufficient memory",
816		    device);
817		return ((struct ib_pd *)-ENOMEM);
818	}
819
820	ofs_lock_enter(&ofs_client->lock);
821	if (device->reg_state != IB_DEV_OPEN) {
822		ofs_lock_exit(&ofs_client->lock);
823		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
824		    "ib_alloc_pd: device: 0x%p => invalid device state (%d)",
825		    device, device->reg_state);
826		return ((struct ib_pd *)-ENXIO);
827	}
828
829	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
830	    "ib_alloc_pd: device: 0x%p", device);
831
832	rtn = ibt_alloc_pd(device->hca_hdl, IBT_PD_NO_FLAGS, &pd->ibt_pd);
833	ofs_lock_exit(&ofs_client->lock);
834
835	if (rtn == IBT_SUCCESS) {
836		pd->device = device;
837		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
838		    "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p, "
839		    "rtn: 0x%x", device, pd, pd->ibt_pd, rtn);
840		return (pd);
841	}
842	kmem_free(pd, sizeof (struct ib_pd));
843
844	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
845	    "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p => "
846	    "ibt_alloc_pd failed w/ 0x%x", device, pd, pd->ibt_pd, rtn);
847
848	switch (rtn) {
849	case IBT_INSUFF_RESOURCE:
850		return ((struct ib_pd *)-ENOMEM);
851	case IBT_HCA_HDL_INVALID:
852		return ((struct ib_pd *)-EFAULT);
853	default:
854		return ((struct ib_pd *)-EIO);
855	}
856}
857
858int
859ib_dealloc_pd(struct ib_pd *pd)
860{
861	ofs_client_t *ofs_client = (ofs_client_t *)pd->device->clnt_hdl;
862	int rtn;
863
864	ofs_lock_enter(&ofs_client->lock);
865	if (pd->device->reg_state != IB_DEV_OPEN) {
866		ofs_lock_exit(&ofs_client->lock);
867		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
868		    "ib_dealloc_pd: pd: 0x%p => invalid device state (%d)",
869		    pd, pd->device->reg_state);
870		return (-ENXIO);
871	}
872
873	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
874	    "ib_dealloc_pd: pd: 0x%p", pd);
875
876	rtn = ibt_free_pd(pd->device->hca_hdl, pd->ibt_pd);
877	ofs_lock_exit(&ofs_client->lock);
878
879	if (rtn == IBT_SUCCESS) {
880		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
881		    "ib_dealloc_pd: pd: 0x%p, device: 0x%p, ibt_pd: 0x%p, "
882		    "rtn: 0x%x", pd, pd->device, pd->ibt_pd, rtn);
883		kmem_free(pd, sizeof (struct ib_pd));
884		return (0);
885	}
886
887	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
888	    "ib_dealloc_pd: pd: 0x%p => ibt_free_pd failed w/ 0x%x",
889	    pd, rtn);
890
891	switch (rtn) {
892	case IBT_PD_IN_USE:
893		return (-EBUSY);
894	case IBT_HCA_HDL_INVALID:
895		return (-EFAULT);
896	default:
897		return (-EIO);
898	}
899}
900
901/*
902 * ofs_cq_handler() is a delegated function to handle CQ events,
903 * which dispatches them to corresponding cq handlers registered
904 * with ib_create_cq().
905 */
906static void
907ofs_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
908{
909	struct ib_cq *cq = (struct ib_cq *)ibt_get_cq_private(ibt_cq);
910
911	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
912	    "ofs_cq_handler: ibt_cq: 0x%p, ib_cq: 0x%p, comp_handler: 0x%p, "
913	    "arg: 0x%p", ibt_cq, cq, cq->comp_handler, arg);
914
915	if (cq->comp_handler) {
916		cq->comp_handler(cq, cq->cq_context);
917	}
918}
919
920/*
921 * ib_create_cq - Creates a CQ on the specified device.
922 * @device: The device on which to create the CQ.
923 * @comp_handler: A user-specified callback that is invoked when a
924 *   completion event occurs on the CQ.
925 * @event_handler: A user-specified callback that is invoked when an
926 *   asynchronous event not associated with a completion occurs on the CQ.
927 * @cq_context: Context associated with the CQ returned to the user via
928 *   the associated completion and event handlers.
929 * @cqe: The minimum size of the CQ.
930 * @comp_vector - Completion vector used to signal completion events.
931 *     Must be >= 0 and < context->num_comp_vectors.
932 *
933 * Users can examine the cq structure to determine the actual CQ size.
934 *
935 * Note that comp_vector is not supported currently.
936 */
937struct ib_cq *
938ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler,
939    void (*event_handler)(struct ib_event *, void *), void *cq_context,
940    int cqe, void *comp_vector)
941{
942	ofs_client_t	*ofs_client = (ofs_client_t *)device->clnt_hdl;
943	ibt_cq_attr_t	cq_attr;
944	uint32_t	real_size;
945	struct ib_cq	*cq;
946	int		rtn;
947
948	if ((cq = kmem_alloc(sizeof (struct ib_cq), KM_NOSLEEP)) == NULL) {
949		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
950		    "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
951		    "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
952		    "comp_vector: %p => no sufficient memory", device,
953		    comp_handler, event_handler, cq_context, cqe, comp_vector);
954		return ((struct ib_cq *)-ENOMEM);
955	}
956
957	ofs_lock_enter(&ofs_client->lock);
958	if (device->reg_state != IB_DEV_OPEN) {
959		ofs_lock_exit(&ofs_client->lock);
960		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
961		    "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
962		    "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
963		    "comp_vector: %p => invalid device state (%d)", device,
964		    comp_handler, event_handler, cq_context, cqe, comp_vector,
965		    device->reg_state);
966		return ((struct ib_cq *)-ENXIO);
967	}
968
969	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
970	    "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
971	    "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
972	    "comp_vector: %d", device, comp_handler, event_handler,
973	    cq_context, cqe, comp_vector);
974
975	cq_attr.cq_size = cqe;
976	cq_attr.cq_sched = comp_vector;
977	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
978	rtn = ibt_alloc_cq(device->hca_hdl, &cq_attr, &cq->ibt_cq, &real_size);
979	ofs_lock_exit(&ofs_client->lock);
980
981	if (rtn == IBT_SUCCESS) {
982		cq->device = device;
983		cq->comp_handler = comp_handler;
984		cq->event_handler = event_handler;
985		cq->cq_context = cq_context;
986		cq->cqe = real_size;
987		ibt_set_cq_private(cq->ibt_cq, cq);
988		ibt_set_cq_handler(cq->ibt_cq, ofs_cq_handler, cq_context);
989		mutex_init(&cq->lock, NULL, MUTEX_DEFAULT, NULL);
990		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
991		    "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p, "
992		    "rtn: 0x%x", device, cqe, cq->ibt_cq, rtn);
993		return (cq);
994	}
995	kmem_free(cq, sizeof (struct ib_cq));
996
997	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
998	    "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p => "
999	    "ibt_alloc_cq failed w/ 0x%x", device, cqe, cq->ibt_cq, rtn);
1000
1001	switch (rtn) {
1002	case IBT_HCA_CQ_EXCEEDED:
1003	case IBT_INVALID_PARAM:
1004	case IBT_HCA_HDL_INVALID:
1005		return ((struct ib_cq *)-EINVAL);
1006	case IBT_INSUFF_RESOURCE:
1007		return ((struct ib_cq *)-ENOMEM);
1008	default:
1009		return ((struct ib_cq *)-EIO);
1010	}
1011}
1012
1013int
1014ib_destroy_cq(struct ib_cq *cq)
1015{
1016	ofs_client_t	*ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
1017	int		rtn;
1018
1019	ofs_lock_enter(&ofs_client->lock);
1020	if (cq->device->reg_state != IB_DEV_OPEN) {
1021		ofs_lock_exit(&ofs_client->lock);
1022		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1023		    "ib_destroy_cq: cq: 0x%p => invalid device state (%d)",
1024		    cq, cq->device->reg_state);
1025		return (-ENXIO);
1026	}
1027
1028	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1029	    "ib_destroy_cq: cq: 0x%p", cq);
1030
1031	/*
1032	 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1033	 * at this moment, but yet alive for a while. Then
1034	 * there is a possibility that this qp is used even after
1035	 * ib_destroy_cq() is called. To distinguish this case from
1036	 * others, clear ibt_qp here.
1037	 */
1038	ibt_set_cq_private(cq->ibt_cq, NULL);
1039
1040	rtn = ibt_free_cq(cq->ibt_cq);
1041	if (rtn == IBT_SUCCESS) {
1042		ofs_lock_exit(&ofs_client->lock);
1043		kmem_free(cq, sizeof (struct ib_cq));
1044		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1045		    "ib_destroy_cq: cq: 0x%p, rtn: 0x%x", cq, rtn);
1046		return (0);
1047	}
1048	ibt_set_cq_private(cq->ibt_cq, cq);
1049	ofs_lock_exit(&ofs_client->lock);
1050
1051	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1052	    "ib_destroy_cq: cq: 0x%p => ibt_free_cq failed w/ 0x%x", cq, rtn);
1053
1054	switch (rtn) {
1055	case IBT_CQ_BUSY:
1056		return (-EBUSY);
1057	case IBT_HCA_HDL_INVALID:
1058	case IBT_CQ_HDL_INVALID:
1059		return (-EINVAL);
1060	default:
1061		return (-EIO);
1062	}
1063}
1064
1065struct ib_qp *
1066ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr)
1067{
1068	ofs_client_t		*ofs_client = pd->device->clnt_hdl;
1069	ibt_qp_alloc_attr_t	attrs;
1070	ibt_chan_sizes_t	sizes;
1071	ib_qpn_t		qpn;
1072	ibt_qp_hdl_t		ibt_qp;
1073	struct ib_qp		*qp;
1074	int			rtn;
1075
1076	/* sanity check */
1077	if (!(qp_init_attr->send_cq && qp_init_attr->recv_cq)) {
1078		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1079		    "ib_create_qp: pd: 0x%p => invalid cqs "
1080		    "(send_cq=0x%p, recv_cq=0x%p)", pd,
1081		    qp_init_attr->send_cq, qp_init_attr->recv_cq);
1082		return ((struct ib_qp *)-EINVAL);
1083	}
1084
1085	/* UC, Raw IPv6 and Raw Ethernet are not supported */
1086	if (qp_init_attr->qp_type == IB_QPT_UC ||
1087	    qp_init_attr->qp_type == IB_QPT_RAW_IPV6 ||
1088	    qp_init_attr->qp_type == IB_QPT_RAW_ETY) {
1089		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1090		    "ib_create_qp: pd: 0x%p => invalid qp_type",
1091		    pd, qp_init_attr->qp_type);
1092		return ((struct ib_qp *)-EINVAL);
1093	}
1094
1095	if ((qp = kmem_alloc(sizeof (struct ib_qp), KM_NOSLEEP)) == NULL) {
1096		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1097		    "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1098		    "no sufficient memory", pd, qp_init_attr);
1099		return ((struct ib_qp *)-ENOMEM);
1100	}
1101
1102	ofs_lock_enter(&ofs_client->lock);
1103	if (pd->device->reg_state != IB_DEV_OPEN) {
1104		ofs_lock_exit(&ofs_client->lock);
1105		kmem_free(qp, sizeof (struct ib_qp));
1106		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1107		    "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1108		    "invalid device state (%d)", pd, qp_init_attr,
1109		    pd->device->reg_state);
1110		return ((struct ib_qp *)-ENXIO);
1111	}
1112
1113	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1114	    "ib_create_qp: pd: 0x%p, event_handler: 0x%p, qp_context: 0x%p, "
1115	    "send_cq: 0x%p, recv_cq: 0x%p, srq: 0x%p, max_send_wr: 0x%x, "
1116	    "max_recv_wr: 0x%x, max_send_sge: 0x%x, max_recv_sge: 0x%x, "
1117	    "max_inline_data: 0x%x, sq_sig_type: %d, qp_type: %d, "
1118	    "port_num: %d",
1119	    pd, qp_init_attr->event_handler, qp_init_attr->qp_context,
1120	    qp_init_attr->send_cq, qp_init_attr->recv_cq, qp_init_attr->srq,
1121	    qp_init_attr->cap.max_send_wr, qp_init_attr->cap.max_recv_wr,
1122	    qp_init_attr->cap.max_send_sge, qp_init_attr->cap.max_recv_sge,
1123	    qp_init_attr->cap.max_inline_data, qp_init_attr->sq_sig_type,
1124	    qp_init_attr->qp_type, qp_init_attr->port_num);
1125
1126	attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1127	if (qp_init_attr->srq) {
1128		attrs.qp_alloc_flags |= IBT_QP_USES_SRQ;
1129	}
1130
1131	attrs.qp_flags = IBT_ALL_SIGNALED | IBT_FAST_REG_RES_LKEY;
1132	if (qp_init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) {
1133		attrs.qp_flags |= IBT_WR_SIGNALED;
1134	}
1135
1136	attrs.qp_scq_hdl = qp_init_attr->send_cq->ibt_cq;
1137	attrs.qp_rcq_hdl = qp_init_attr->recv_cq->ibt_cq;
1138	attrs.qp_pd_hdl = pd->ibt_pd;
1139
1140	attrs.qp_sizes.cs_sq = qp_init_attr->cap.max_send_wr;
1141	attrs.qp_sizes.cs_rq = qp_init_attr->cap.max_recv_wr;
1142	attrs.qp_sizes.cs_sq_sgl = qp_init_attr->cap.max_send_sge;
1143	attrs.qp_sizes.cs_rq_sgl = qp_init_attr->cap.max_recv_sge;
1144	attrs.qp_sizes.cs_inline = qp_init_attr->cap.max_inline_data;
1145
1146	switch (qp_init_attr->qp_type) {
1147	case IB_QPT_RC:
1148		rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_RC_RQP, &attrs,
1149		    &sizes, &qpn, &ibt_qp);
1150		break;
1151	case IB_QPT_UD:
1152		rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_UD_RQP, &attrs,
1153		    &sizes, &qpn, &ibt_qp);
1154		break;
1155	case IB_QPT_SMI:
1156		rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1157		    qp_init_attr->port_num, IBT_SMI_SQP, &attrs, &sizes,
1158		    &ibt_qp);
1159		break;
1160	case IB_QPT_GSI:
1161		rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1162		    qp_init_attr->port_num, IBT_GSI_SQP, &attrs, &sizes,
1163		    &ibt_qp);
1164		break;
1165	default:
1166		/* this should never happens */
1167		ofs_lock_exit(&ofs_client->lock);
1168		kmem_free(qp, sizeof (struct ib_qp));
1169		return ((struct ib_qp *)-EINVAL);
1170	}
1171	ofs_lock_exit(&ofs_client->lock);
1172
1173	if (rtn == IBT_SUCCESS) {
1174		/* fill in ib_qp_cap w/ the real values */
1175		qp_init_attr->cap.max_send_wr = sizes.cs_sq;
1176		qp_init_attr->cap.max_recv_wr = sizes.cs_rq;
1177		qp_init_attr->cap.max_send_sge = sizes.cs_sq_sgl;
1178		qp_init_attr->cap.max_recv_sge = sizes.cs_rq_sgl;
1179		/* max_inline_data is not supported */
1180		qp_init_attr->cap.max_inline_data = 0;
1181		/* fill in ib_qp */
1182		qp->device = pd->device;
1183		qp->pd = pd;
1184		qp->send_cq = qp_init_attr->send_cq;
1185		qp->recv_cq = qp_init_attr->recv_cq;
1186		qp->srq = qp_init_attr->srq;
1187		qp->event_handler = qp_init_attr->event_handler;
1188		qp->qp_context = qp_init_attr->qp_context;
1189		qp->qp_num = qp_init_attr->qp_type == IB_QPT_SMI ? 0 :
1190		    qp_init_attr->qp_type == IB_QPT_GSI ? 1 : qpn;
1191		qp->qp_type = qp_init_attr->qp_type;
1192		qp->ibt_qp = ibt_qp;
1193		ibt_set_qp_private(qp->ibt_qp, qp);
1194		mutex_init(&qp->lock, NULL, MUTEX_DEFAULT, NULL);
1195		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1196		    "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p, "
1197		    "rtn: 0x%x", pd->device, pd, qp_init_attr, rtn);
1198		return (qp);
1199	}
1200	kmem_free(qp, sizeof (struct ib_qp));
1201
1202	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1203	    "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p => "
1204	    "ibt_alloc_(special)_qp failed w/ rtn: 0x%x", pd->device, pd,
1205	    qp_init_attr, rtn);
1206
1207	switch (rtn) {
1208	case IBT_NOT_SUPPORTED:
1209	case IBT_QP_SRV_TYPE_INVALID:
1210	case IBT_CQ_HDL_INVALID:
1211	case IBT_HCA_HDL_INVALID:
1212	case IBT_INVALID_PARAM:
1213	case IBT_SRQ_HDL_INVALID:
1214	case IBT_PD_HDL_INVALID:
1215	case IBT_HCA_SGL_EXCEEDED:
1216	case IBT_HCA_WR_EXCEEDED:
1217		return ((struct ib_qp *)-EINVAL);
1218	case IBT_INSUFF_RESOURCE:
1219		return ((struct ib_qp *)-ENOMEM);
1220	default:
1221		return ((struct ib_qp *)-EIO);
1222	}
1223}
1224
1225int
1226ib_destroy_qp(struct ib_qp *qp)
1227{
1228	ofs_client_t	*ofs_client = (ofs_client_t *)qp->device->clnt_hdl;
1229	int		rtn;
1230
1231	ofs_lock_enter(&ofs_client->lock);
1232	if (qp->device->reg_state != IB_DEV_OPEN) {
1233		ofs_lock_exit(&ofs_client->lock);
1234		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1235		    "ib_destroy_qp: qp: 0x%p => invalid device state (%d)",
1236		    qp, qp->device->reg_state);
1237		return (-ENXIO);
1238	}
1239
1240	/*
1241	 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1242	 * at this moment, but yet alive for a while. Then
1243	 * there is a possibility that this qp is used even after
1244	 * ib_destroy_qp() is called. To distinguish this case from
1245	 * others, clear ibt_qp here.
1246	 */
1247	ibt_set_qp_private(qp->ibt_qp, NULL);
1248
1249	rtn = ibt_free_qp(qp->ibt_qp);
1250	if (rtn == IBT_SUCCESS) {
1251		ofs_lock_exit(&ofs_client->lock);
1252		kmem_free(qp, sizeof (struct ib_qp));
1253		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1254		    "ib_destroy_qp: qp: 0x%p, rtn: 0x%x", qp, rtn);
1255		return (0);
1256	}
1257	ibt_set_qp_private(qp->ibt_qp, qp);
1258	ofs_lock_exit(&ofs_client->lock);
1259
1260	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1261	    "ib_destroy_qp: qp: 0x%p => ibt_free_qp failed w/ 0x%x", qp, rtn);
1262
1263	switch (rtn) {
1264	case IBT_CHAN_STATE_INVALID:
1265	case IBT_HCA_HDL_INVALID:
1266	case IBT_QP_HDL_INVALID:
1267		return (-EINVAL);
1268	default:
1269		return (-EIO);
1270	}
1271}
1272
1273/*
1274 * ib_req_notify_cq - Request completion notification on a CQ.
1275 * @cq: The CQ to generate an event for.
1276 * @flags:
1277 *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
1278 *   to request an event on the next solicited event or next work
1279 *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
1280 *   may also be |ed in to request a hint about missed events, as
1281 *   described below.
1282 *
1283 * Return Value:
1284 *    < 0 means an error occurred while requesting notification
1285 *   == 0 means notification was requested successfully, and if
1286 *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
1287 *        were missed and it is safe to wait for another event.  In
1288 *        this case is it guaranteed that any work completions added
1289 *        to the CQ since the last CQ poll will trigger a completion
1290 *        notification event.
1291 *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
1292 *        in.  It means that the consumer must poll the CQ again to
1293 *        make sure it is empty to avoid missing an event because of a
1294 *        race between requesting notification and an entry being
1295 *        added to the CQ.  This return value means it is possible
1296 *        (but not guaranteed) that a work completion has been added
1297 *        to the CQ since the last poll without triggering a
1298 *        completion notification event.
1299 *
1300 * Note that IB_CQ_REPORT_MISSED_EVENTS is currently not supported.
1301 */
1302int
1303ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
1304{
1305	ibt_cq_notify_flags_t	notify_type;
1306	int			rtn;
1307	ofs_client_t		*ofs_client = cq->device->clnt_hdl;
1308
1309	ofs_lock_enter(&ofs_client->lock);
1310	if (cq->device->reg_state != IB_DEV_OPEN) {
1311		ofs_lock_exit(&ofs_client->lock);
1312		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1313		    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1314		return (-ENXIO);
1315	}
1316
1317	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1318	    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1319
1320	switch (flags & IB_CQ_SOLICITED_MASK) {
1321	case IB_CQ_SOLICITED:
1322		notify_type = IBT_NEXT_SOLICITED;
1323		break;
1324	case IB_CQ_NEXT_COMP:
1325		notify_type = IBT_NEXT_COMPLETION;
1326		break;
1327	default:
1328		/* Currently only two flags are supported */
1329		ofs_lock_exit(&ofs_client->lock);
1330		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1331		    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => invalid flag",
1332		    cq, flags);
1333		return (-EINVAL);
1334	}
1335
1336	rtn = ibt_enable_cq_notify(cq->ibt_cq, notify_type);
1337	ofs_lock_exit(&ofs_client->lock);
1338
1339	if (rtn == IBT_SUCCESS) {
1340		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1341		    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x rtn: 0x%x",
1342		    cq, flags, rtn);
1343		return (0);
1344	}
1345
1346	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1347	    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => ibt_enable_cq_notify "
1348	    "failed w/ 0x%x", cq, flags, rtn);
1349
1350	switch (rtn) {
1351	case IBT_HCA_HDL_INVALID:
1352	case IBT_CQ_HDL_INVALID:
1353	case IBT_CQ_NOTIFY_TYPE_INVALID:
1354		return (-EINVAL);
1355	default:
1356		return (-EIO);
1357	}
1358}
1359
1360static const struct {
1361	int			valid;
1362	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETY + 1];
1363	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETY + 1];
1364} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1365
1366	[IB_QPS_RESET] = {
1367		[IB_QPS_RESET] = { .valid = 1 },
1368		[IB_QPS_INIT]  = {
1369			.valid = 1,
1370			.req_param = {
1371				[IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1372				    IB_QP_QKEY),
1373				[IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1374				    IB_QP_ACCESS_FLAGS),
1375				[IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1376				    IB_QP_ACCESS_FLAGS),
1377				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1378				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1379			}
1380		},
1381	},
1382	[IB_QPS_INIT]  = {
1383		[IB_QPS_RESET] = { .valid = 1 },
1384		[IB_QPS_ERR] =   { .valid = 1 },
1385		[IB_QPS_INIT]  = {
1386			.valid = 1,
1387			.opt_param = {
1388				[IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1389				    IB_QP_QKEY),
1390				[IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1391				    IB_QP_ACCESS_FLAGS),
1392				[IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1393				    IB_QP_ACCESS_FLAGS),
1394				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1395				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1396			}
1397		},
1398		[IB_QPS_RTR]   = {
1399			.valid = 1,
1400			.req_param = {
1401				[IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU |
1402				    IB_QP_DEST_QPN | IB_QP_RQ_PSN),
1403				[IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU |
1404				    IB_QP_DEST_QPN | IB_QP_RQ_PSN |
1405				    IB_QP_MAX_DEST_RD_ATOMIC |
1406				    IB_QP_MIN_RNR_TIMER),
1407			},
1408			.opt_param = {
1409				[IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1410				[IB_QPT_UC] = (IB_QP_ALT_PATH |
1411				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1412				[IB_QPT_RC] = (IB_QP_ALT_PATH |
1413				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1414				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1415				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1416			}
1417		}
1418	},
1419	[IB_QPS_RTR]   = {
1420		[IB_QPS_RESET] = { .valid = 1 },
1421		[IB_QPS_ERR] =   { .valid = 1 },
1422		[IB_QPS_RTS]   = {
1423			.valid = 1,
1424			.req_param = {
1425				[IB_QPT_UD] = IB_QP_SQ_PSN,
1426				[IB_QPT_UC] = IB_QP_SQ_PSN,
1427				[IB_QPT_RC] = (IB_QP_TIMEOUT |
1428				    IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
1429				    IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC),
1430				[IB_QPT_SMI] = IB_QP_SQ_PSN,
1431				[IB_QPT_GSI] = IB_QP_SQ_PSN,
1432			},
1433			.opt_param = {
1434				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1435				[IB_QPT_UC] = (IB_QP_CUR_STATE |
1436				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1437				    IB_QP_PATH_MIG_STATE),
1438				[IB_QPT_RC] = (IB_QP_CUR_STATE |
1439				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS	|
1440				    IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1441				[IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1442				[IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1443			}
1444		}
1445	},
1446	[IB_QPS_RTS] = {
1447		[IB_QPS_RESET] = { .valid = 1 },
1448		[IB_QPS_ERR] =  { .valid = 1 },
1449		[IB_QPS_RTS] = {
1450			.valid = 1,
1451			.opt_param = {
1452				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1453				[IB_QPT_UC] = (IB_QP_CUR_STATE	|
1454				    IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1455				    IB_QP_PATH_MIG_STATE),
1456				[IB_QPT_RC] = (IB_QP_CUR_STATE	|
1457				    IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1458				    IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER),
1459				[IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1460				[IB_QPT_GSI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1461			}
1462		},
1463		[IB_QPS_SQD] = {
1464			.valid = 1,
1465			.opt_param = {
1466				[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1467				[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1468				[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1469				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1470				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1471			}
1472		},
1473	},
1474	[IB_QPS_SQD] = {
1475		[IB_QPS_RESET] = { .valid = 1 },
1476		[IB_QPS_ERR] = { .valid = 1 },
1477		[IB_QPS_RTS] = {
1478			.valid = 1,
1479			.opt_param = {
1480				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1481				[IB_QPT_UC] = (IB_QP_CUR_STATE |
1482				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1483				    IB_QP_PATH_MIG_STATE),
1484				[IB_QPT_RC] = (IB_QP_CUR_STATE |
1485				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1486				    IB_QP_MIN_RNR_TIMER	| IB_QP_PATH_MIG_STATE),
1487				[IB_QPT_SMI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1488				[IB_QPT_GSI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1489			}
1490		},
1491		[IB_QPS_SQD] = {
1492			.valid = 1,
1493			.opt_param = {
1494				[IB_QPT_UD] = (IB_QP_PKEY_INDEX	| IB_QP_QKEY),
1495				[IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH |
1496				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1497				    IB_QP_PATH_MIG_STATE),
1498				[IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV |
1499				    IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
1500				    IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC |
1501				    IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH |
1502				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1503				    IB_QP_MIN_RNR_TIMER	| IB_QP_PATH_MIG_STATE),
1504				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1505				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1506			}
1507		}
1508	},
1509	[IB_QPS_SQE]  = {
1510		[IB_QPS_RESET] = { .valid = 1 },
1511		[IB_QPS_ERR] = { .valid = 1 },
1512		[IB_QPS_RTS] = {
1513			.valid = 1,
1514			.opt_param = {
1515				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1516				[IB_QPT_UC] = (IB_QP_CUR_STATE |
1517				    IB_QP_ACCESS_FLAGS),
1518				[IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1519				[IB_QPT_GSI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1520			}
1521		}
1522	},
1523	[IB_QPS_ERR] = {
1524		[IB_QPS_RESET] = { .valid = 1 },
1525		[IB_QPS_ERR] =  { .valid = 1 }
1526	}
1527};
1528
1529static inline int
1530ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1531    enum ib_qp_type type, enum ib_qp_attr_mask mask)
1532{
1533	enum ib_qp_attr_mask req_param, opt_param;
1534
1535	if (cur_state  < 0 || cur_state  > IB_QPS_ERR ||
1536	    next_state < 0 || next_state > IB_QPS_ERR) {
1537		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1538		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1539		    "qp_type: %d, attr_mask: 0x%x => invalid state(1)",
1540		    cur_state, next_state, type, mask);
1541		return (0);
1542	}
1543
1544	if (mask & IB_QP_CUR_STATE &&
1545	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1546	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) {
1547		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1548		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1549		    "qp_type: %d, attr_mask: 0x%x => invalid state(2)",
1550		    cur_state, next_state, type, mask);
1551		return (0);
1552	}
1553
1554	if (!qp_state_table[cur_state][next_state].valid) {
1555		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1556		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1557		    "qp_type: %d, attr_mask: 0x%x => state is not valid",
1558		    cur_state, next_state, type, mask);
1559		return (0);
1560	}
1561
1562	req_param = qp_state_table[cur_state][next_state].req_param[type];
1563	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1564
1565	if ((mask & req_param) != req_param) {
1566		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1567		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1568		    "qp_type: %d, attr_mask: 0x%x => "
1569		    "required param doesn't match. req_param = 0x%x",
1570		    cur_state, next_state, type, mask, req_param);
1571		return (0);
1572	}
1573
1574	if (mask & ~(req_param | opt_param | IB_QP_STATE)) {
1575		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1576		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1577		    "qp_type: %d, attr_mask: 0x%x => "
1578		    "unsupported options. req_param = 0x%x, opt_param = 0x%x",
1579		    cur_state, next_state, type, mask, req_param, opt_param);
1580		return (0);
1581	}
1582
1583	return (1);
1584}
1585
1586static inline enum ib_qp_state
1587qp_current_state(ibt_qp_query_attr_t *qp_attr)
1588{
1589	ASSERT(qp_attr->qp_info.qp_state != IBT_STATE_SQDRAIN);
1590	return (enum ib_qp_state)(qp_attr->qp_info.qp_state);
1591}
1592
1593static inline ibt_tran_srv_t
1594of2ibtf_qp_type(enum ib_qp_type type)
1595{
1596	switch (type) {
1597	case IB_QPT_SMI:
1598	case IB_QPT_GSI:
1599	case IB_QPT_UD:
1600		return (IBT_UD_SRV);
1601	case IB_QPT_RC:
1602		return (IBT_RC_SRV);
1603	case IB_QPT_UC:
1604		return (IBT_UC_SRV);
1605	case IB_QPT_RAW_IPV6:
1606		return (IBT_RAWIP_SRV);
1607	case IB_QPT_RAW_ETY:
1608	default:
1609		ASSERT(type == IB_QPT_RAW_ETY);
1610		return (IBT_RAWETHER_SRV);
1611	}
1612}
1613
1614static inline void
1615set_av(struct ib_ah_attr *attr, ibt_cep_path_t *pathp)
1616{
1617	ibt_adds_vect_t		*av = &pathp->cep_adds_vect;
1618
1619	pathp->cep_hca_port_num = attr->port_num;
1620	av->av_srate = OF2IBTF_SRATE(attr->static_rate);
1621	av->av_srvl = attr->sl & 0xF;
1622	av->av_send_grh = attr->ah_flags & IB_AH_GRH ? 1 : 0;
1623
1624	if (av->av_send_grh) {
1625		av->av_dgid.gid_prefix =
1626		    attr->grh.dgid.global.subnet_prefix;
1627		av->av_dgid.gid_guid =
1628		    attr->grh.dgid.global.interface_id;
1629		av->av_flow = attr->grh.flow_label & 0xFFFFF;
1630		av->av_tclass = attr->grh.traffic_class;
1631		av->av_hop = attr->grh.hop_limit;
1632		av->av_sgid_ix = attr->grh.sgid_index;
1633	}
1634	av->av_dlid = attr->dlid;
1635	av->av_src_path = attr->src_path_bits;
1636}
1637
1638int
1639ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask)
1640{
1641	enum ib_qp_state	cur_state, new_state;
1642	ibt_hca_attr_t		hattr;
1643	ibt_qp_query_attr_t	qp_attr;
1644	ibt_qp_info_t		modify_attr;
1645	ibt_cep_modify_flags_t	flags;
1646	int			rtn;
1647	ofs_client_t		*ofs_client = qp->device->clnt_hdl;
1648
1649	ofs_lock_enter(&ofs_client->lock);
1650	if (qp->device->reg_state != IB_DEV_OPEN) {
1651		ofs_lock_exit(&ofs_client->lock);
1652		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1653		    "ib_modify_qp: qp: 0x%p => invalid device state (%d)",
1654		    qp, qp->device->reg_state);
1655		return (-ENXIO);
1656	}
1657
1658	rtn = ibt_query_hca(qp->device->hca_hdl, &hattr);
1659	if (rtn != IBT_SUCCESS) {
1660		ofs_lock_exit(&ofs_client->lock);
1661		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1662		    "ib_modify_qp: qp: 0x%p, hca_hdl: 0x%p => "
1663		    "ibt_query_hca() failed w/ %d",
1664		    qp, qp->device->hca_hdl, rtn);
1665		return (-EIO);
1666	}
1667
1668	/* only one thread per qp is allowed during the qp modification */
1669	mutex_enter(&qp->lock);
1670
1671	/* Get the current QP attributes first */
1672	bzero(&qp_attr, sizeof (ibt_qp_query_attr_t));
1673	if ((rtn = ibt_query_qp(qp->ibt_qp, &qp_attr)) != IBT_SUCCESS) {
1674		mutex_exit(&qp->lock);
1675		ofs_lock_exit(&ofs_client->lock);
1676		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1677		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1678		    "ibt_query_qp failed w/ 0x%x", qp, attr, attr_mask, rtn);
1679		return (-EIO);
1680	}
1681
1682	/* Get the current and new state for this QP */
1683	cur_state = attr_mask & IB_QP_CUR_STATE ?  attr->cur_qp_state :
1684	    qp_current_state(&qp_attr);
1685	new_state = attr_mask & IB_QP_STATE ? attr->qp_state :
1686	    cur_state;
1687
1688	/* Sanity check of the current/new states */
1689	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1690		/* Linux OF returns 0 in this case */
1691		mutex_exit(&qp->lock);
1692		ofs_lock_exit(&ofs_client->lock);
1693		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1694		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1695		    "invalid state (both of current/new states are RESET)",
1696		    qp, attr, attr_mask);
1697		return (0);
1698	}
1699
1700	/*
1701	 * Check if this modification request is supported with the new
1702	 * and/or current state.
1703	 */
1704	if (!ib_modify_qp_is_ok(cur_state, new_state, qp->qp_type, attr_mask)) {
1705		mutex_exit(&qp->lock);
1706		ofs_lock_exit(&ofs_client->lock);
1707		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1708		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1709		    "invalid arguments",
1710		    qp, attr, attr_mask);
1711		return (-EINVAL);
1712	}
1713
1714	/* Sanity checks */
1715	if (attr_mask & IB_QP_PORT && (attr->port_num == 0 ||
1716	    attr->port_num > hattr.hca_nports)) {
1717		mutex_exit(&qp->lock);
1718		ofs_lock_exit(&ofs_client->lock);
1719		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1720		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1721		    "invalid attr->port_num(%d), max_nports(%d)",
1722		    qp, attr, attr_mask, attr->port_num, hattr.hca_nports);
1723		return (-EINVAL);
1724	}
1725
1726	if (attr_mask & IB_QP_PKEY_INDEX &&
1727	    attr->pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1728		mutex_exit(&qp->lock);
1729		ofs_lock_exit(&ofs_client->lock);
1730		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1731		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1732		    "invalid attr->pkey_index(%d), max_pkey_index(%d)",
1733		    qp, attr, attr_mask, attr->pkey_index,
1734		    hattr.hca_max_port_pkey_tbl_sz);
1735		return (-EINVAL);
1736	}
1737
1738	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1739	    attr->max_rd_atomic > hattr.hca_max_rdma_out_qp) {
1740		mutex_exit(&qp->lock);
1741		ofs_lock_exit(&ofs_client->lock);
1742		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1743		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1744		    "invalid attr->max_rd_atomic(0x%x), max_rdma_out_qp(0x%x)",
1745		    qp, attr, attr_mask, attr->max_rd_atomic,
1746		    hattr.hca_max_rdma_out_qp);
1747		return (-EINVAL);
1748	}
1749
1750	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1751	    attr->max_dest_rd_atomic > hattr.hca_max_rdma_in_qp) {
1752		mutex_exit(&qp->lock);
1753		ofs_lock_exit(&ofs_client->lock);
1754		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1755		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1756		    "invalid attr->max_dest_rd_atomic(0x%x), "
1757		    "max_rdma_in_qp(0x%x)", qp, attr, attr_mask,
1758		    attr->max_dest_rd_atomic, hattr.hca_max_rdma_in_qp);
1759		return (-EINVAL);
1760	}
1761
1762	/* copy the current setting */
1763	modify_attr = qp_attr.qp_info;
1764
1765	/*
1766	 * Since it's already checked if the modification request matches
1767	 * the new and/or current states, just assign both of states to
1768	 * modify_attr here. The current state is required if qp_state
1769	 * is RTR, but it's harmelss otherwise, so it's set always.
1770	 */
1771	modify_attr.qp_current_state = OF2IBTF_STATE(cur_state);
1772	modify_attr.qp_state = OF2IBTF_STATE(new_state);
1773	modify_attr.qp_trans = of2ibtf_qp_type(qp->qp_type);
1774
1775	/* Convert OF modification requests into IBTF ones */
1776	flags = IBT_CEP_SET_STATE;	/* IBTF needs IBT_CEP_SET_STATE */
1777	if (cur_state == IB_QPS_RESET &&
1778	    new_state == IB_QPS_INIT) {
1779		flags |= IBT_CEP_SET_RESET_INIT;
1780	} else if (cur_state == IB_QPS_INIT &&
1781	    new_state == IB_QPS_RTR) {
1782		flags |= IBT_CEP_SET_INIT_RTR;
1783	} else if (cur_state == IB_QPS_RTR &&
1784	    new_state == IB_QPS_RTS) {
1785		flags |= IBT_CEP_SET_RTR_RTS;
1786	}
1787	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
1788		flags |= IBT_CEP_SET_SQD_EVENT;
1789	}
1790	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1791		modify_attr.qp_flags &= ~(IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1792		    IBT_CEP_ATOMIC);
1793		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
1794			flags |= IBT_CEP_SET_RDMA_R;
1795			modify_attr.qp_flags |= IBT_CEP_RDMA_RD;
1796		}
1797		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
1798			flags |= IBT_CEP_SET_RDMA_W;
1799			modify_attr.qp_flags |= IBT_CEP_RDMA_WR;
1800		}
1801		if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
1802			flags |= IBT_CEP_SET_ATOMIC;
1803			modify_attr.qp_flags |= IBT_CEP_ATOMIC;
1804		}
1805	}
1806	if (attr_mask & IB_QP_PKEY_INDEX) {
1807		flags |= IBT_CEP_SET_PKEY_IX;
1808		switch (qp->qp_type)  {
1809		case IB_QPT_SMI:
1810		case IB_QPT_GSI:
1811		case IB_QPT_UD:
1812			modify_attr.qp_transport.ud.ud_pkey_ix =
1813			    attr->pkey_index;
1814			break;
1815		case IB_QPT_RC:
1816			modify_attr.qp_transport.rc.rc_path.cep_pkey_ix =
1817			    attr->pkey_index;
1818			break;
1819		case IB_QPT_UC:
1820			modify_attr.qp_transport.uc.uc_path.cep_pkey_ix =
1821			    attr->pkey_index;
1822			break;
1823		default:
1824			/* This should never happen */
1825			mutex_exit(&qp->lock);
1826			ofs_lock_exit(&ofs_client->lock);
1827			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1828			    "ib_modify_qp(IB_QP_PKEY_INDEX): qp: 0x%p, "
1829			    "attr: 0x%p, attr_mask: 0x%x => "
1830			    "invalid qp->qp_type(%d)",
1831			    qp, attr, attr_mask, qp->qp_type);
1832			return (-EINVAL);
1833		}
1834	}
1835	if (attr_mask & IB_QP_PORT) {
1836		flags |= IBT_CEP_SET_PORT;
1837		switch (qp->qp_type) {
1838		case IB_QPT_SMI:
1839		case IB_QPT_GSI:
1840		case IB_QPT_UD:
1841			modify_attr.qp_transport.ud.ud_port = attr->port_num;
1842			break;
1843		case IB_QPT_RC:
1844			modify_attr.qp_transport.rc.rc_path.cep_hca_port_num =
1845			    attr->port_num;
1846			break;
1847		case IB_QPT_UC:
1848			modify_attr.qp_transport.uc.uc_path.cep_hca_port_num =
1849			    attr->port_num;
1850			break;
1851		default:
1852			/* This should never happen */
1853			mutex_exit(&qp->lock);
1854			ofs_lock_exit(&ofs_client->lock);
1855			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1856			    "ib_modify_qp(IB_QP_PORT): qp: 0x%p, "
1857			    "attr: 0x%p, attr_mask: 0x%x => "
1858			    "invalid qp->qp_type(%d)",
1859			    qp, attr, attr_mask, qp->qp_type);
1860			return (-EINVAL);
1861		}
1862	}
1863	if (attr_mask & IB_QP_QKEY) {
1864		ASSERT(qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_SMI ||
1865		    qp->qp_type == IB_QPT_GSI);
1866		flags |= IBT_CEP_SET_QKEY;
1867		modify_attr.qp_transport.ud.ud_qkey = attr->qkey;
1868	}
1869	if (attr_mask & IB_QP_AV) {
1870		flags |= IBT_CEP_SET_ADDS_VECT;
1871		switch (qp->qp_type) {
1872		case IB_QPT_RC:
1873			set_av(&attr->ah_attr,
1874			    &modify_attr.qp_transport.rc.rc_path);
1875			break;
1876		case IB_QPT_UC:
1877			set_av(&attr->ah_attr,
1878			    &modify_attr.qp_transport.uc.uc_path);
1879			break;
1880		case IB_QPT_SMI:
1881		case IB_QPT_GSI:
1882		case IB_QPT_UD:
1883		default:
1884			/* This should never happen */
1885			mutex_exit(&qp->lock);
1886			ofs_lock_exit(&ofs_client->lock);
1887			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1888			    "ib_modify_qp(IB_QP_AV): qp: 0x%p, "
1889			    "attr: 0x%p, attr_mask: 0x%x => "
1890			    "invalid qp->qp_type(%d)",
1891			    qp, attr, attr_mask, qp->qp_type);
1892			return (-EINVAL);
1893		}
1894	}
1895	if (attr_mask & IB_QP_PATH_MTU) {
1896		switch (qp->qp_type) {
1897		case IB_QPT_RC:
1898			modify_attr.qp_transport.rc.rc_path_mtu =
1899			    OF2IBTF_PATH_MTU(attr->path_mtu);
1900			break;
1901		case IB_QPT_UC:
1902			modify_attr.qp_transport.uc.uc_path_mtu =
1903			    OF2IBTF_PATH_MTU(attr->path_mtu);
1904			break;
1905		case IB_QPT_SMI:
1906		case IB_QPT_GSI:
1907		case IB_QPT_UD:
1908		default:
1909			/* nothing to do */
1910			break;
1911		}
1912	}
1913	if (attr_mask & IB_QP_TIMEOUT && qp->qp_type == IB_QPT_RC) {
1914		flags |= IBT_CEP_SET_TIMEOUT;
1915		modify_attr.qp_transport.rc.rc_path.cep_timeout =
1916		    attr->timeout;
1917	}
1918	if (attr_mask & IB_QP_RETRY_CNT && qp->qp_type == IB_QPT_RC) {
1919		flags |= IBT_CEP_SET_RETRY;
1920		modify_attr.qp_transport.rc.rc_retry_cnt =
1921		    attr->retry_cnt & 0x7;
1922	}
1923	if (attr_mask & IB_QP_RNR_RETRY && qp->qp_type == IB_QPT_RC) {
1924		flags |= IBT_CEP_SET_RNR_NAK_RETRY;
1925		modify_attr.qp_transport.rc.rc_rnr_retry_cnt =
1926		    attr->rnr_retry & 0x7;
1927	}
1928	if (attr_mask & IB_QP_RQ_PSN) {
1929		switch (qp->qp_type) {
1930		case IB_QPT_RC:
1931			modify_attr.qp_transport.rc.rc_rq_psn =
1932			    attr->rq_psn & 0xFFFFFF;
1933			break;
1934		case IB_QPT_UC:
1935			modify_attr.qp_transport.uc.uc_rq_psn =
1936			    attr->rq_psn & 0xFFFFFF;
1937			break;
1938		case IB_QPT_SMI:
1939		case IB_QPT_GSI:
1940		case IB_QPT_UD:
1941		default:
1942			/* nothing to do */
1943			break;
1944		}
1945	}
1946	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
1947		if (attr->max_rd_atomic) {
1948			flags |= IBT_CEP_SET_RDMARA_OUT;
1949			modify_attr.qp_transport.rc.rc_rdma_ra_out =
1950			    attr->max_rd_atomic;
1951		}
1952	}
1953	if (attr_mask & IB_QP_ALT_PATH) {
1954		/* Sanity checks */
1955		if (attr->alt_port_num == 0 ||
1956		    attr->alt_port_num > hattr.hca_nports) {
1957			mutex_exit(&qp->lock);
1958			ofs_lock_exit(&ofs_client->lock);
1959			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1960			    "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1961			    "attr_mask: 0x%x => invalid attr->alt_port_num"
1962			    "(%d), max_nports(%d)",
1963			    qp, attr, attr_mask, attr->alt_port_num,
1964			    hattr.hca_nports);
1965			return (-EINVAL);
1966		}
1967		if (attr->alt_pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1968			mutex_exit(&qp->lock);
1969			ofs_lock_exit(&ofs_client->lock);
1970			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1971			    "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1972			    "attr_mask: 0x%x => invalid attr->alt_pkey_index"
1973			    "(%d), max_port_key_index(%d)",
1974			    qp, attr, attr_mask, attr->alt_pkey_index,
1975			    hattr.hca_max_port_pkey_tbl_sz);
1976			return (-EINVAL);
1977		}
1978		flags |= IBT_CEP_SET_ALT_PATH;
1979		switch (qp->qp_type) {
1980		case IB_QPT_RC:
1981			modify_attr.qp_transport.rc.rc_alt_path.
1982			    cep_pkey_ix = attr->alt_pkey_index;
1983			modify_attr.qp_transport.rc.rc_alt_path.
1984			    cep_hca_port_num = attr->alt_port_num;
1985			set_av(&attr->alt_ah_attr,
1986			    &modify_attr.qp_transport.rc.rc_alt_path);
1987			modify_attr.qp_transport.rc.rc_alt_path.
1988			    cep_timeout = attr->alt_timeout;
1989			break;
1990		case IB_QPT_UC:
1991			modify_attr.qp_transport.uc.uc_alt_path.
1992			    cep_pkey_ix = attr->alt_pkey_index;
1993			modify_attr.qp_transport.uc.uc_alt_path.
1994			    cep_hca_port_num = attr->alt_port_num;
1995			set_av(&attr->alt_ah_attr,
1996			    &modify_attr.qp_transport.uc.uc_alt_path);
1997			modify_attr.qp_transport.uc.uc_alt_path.
1998			    cep_timeout = attr->alt_timeout;
1999			break;
2000		case IB_QPT_SMI:
2001		case IB_QPT_GSI:
2002		case IB_QPT_UD:
2003		default:
2004			/* This should never happen */
2005			mutex_exit(&qp->lock);
2006			ofs_lock_exit(&ofs_client->lock);
2007			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2008			    "ib_modify_qp(IB_QP_ALT_PATH): qp: 0x%p, "
2009			    "attr: 0x%p, attr_mask: 0x%x => "
2010			    "invalid qp->qp_type(%d)",
2011			    qp, attr, attr_mask, qp->qp_type);
2012			return (-EINVAL);
2013		}
2014	}
2015	if (attr_mask & IB_QP_MIN_RNR_TIMER && qp->qp_type == IB_QPT_RC) {
2016		flags |= IBT_CEP_SET_MIN_RNR_NAK;
2017		modify_attr.qp_transport.rc.rc_min_rnr_nak =
2018		    attr->min_rnr_timer & 0x1F;
2019	}
2020	if (attr_mask & IB_QP_SQ_PSN) {
2021		switch (qp->qp_type)  {
2022		case IB_QPT_SMI:
2023		case IB_QPT_GSI:
2024		case IB_QPT_UD:
2025			modify_attr.qp_transport.ud.ud_sq_psn =
2026			    attr->sq_psn;
2027			break;
2028		case IB_QPT_RC:
2029			modify_attr.qp_transport.rc.rc_sq_psn =
2030			    attr->sq_psn;
2031			break;
2032		case IB_QPT_UC:
2033			modify_attr.qp_transport.uc.uc_sq_psn =
2034			    attr->sq_psn;
2035			break;
2036		default:
2037			/* This should never happen */
2038			mutex_exit(&qp->lock);
2039			ofs_lock_exit(&ofs_client->lock);
2040			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2041			    "ib_modify_qp(IB_QP_SQ_PSN): qp: 0x%p, "
2042			    "attr: 0x%p, attr_mask: 0x%x => "
2043			    "invalid qp->qp_type(%d)",
2044			    qp, attr, attr_mask, qp->qp_type);
2045			return (-EINVAL);
2046		}
2047	}
2048	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
2049		/* Linux OF sets the value if max_dest_rd_atomic is not zero */
2050		if (attr->max_dest_rd_atomic) {
2051			flags |= IBT_CEP_SET_RDMARA_IN;
2052			modify_attr.qp_transport.rc.rc_rdma_ra_in =
2053			    attr->max_dest_rd_atomic;
2054		}
2055	}
2056	if (attr_mask & IB_QP_PATH_MIG_STATE) {
2057		flags |= IBT_CEP_SET_MIG;
2058		switch (qp->qp_type)  {
2059		case IB_QPT_RC:
2060			modify_attr.qp_transport.rc.rc_mig_state =
2061			    OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2062			break;
2063		case IB_QPT_UC:
2064			modify_attr.qp_transport.uc.uc_mig_state =
2065			    OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2066			break;
2067		case IB_QPT_SMI:
2068		case IB_QPT_GSI:
2069		case IB_QPT_UD:
2070		default:
2071			/* This should never happen */
2072			mutex_exit(&qp->lock);
2073			ofs_lock_exit(&ofs_client->lock);
2074			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2075			    "ib_modify_qp(IB_QP_PATH_MIG_STATE): qp: 0x%p, "
2076			    "attr: 0x%p, attr_mask: 0x%x => "
2077			    "invalid qp->qp_type(%d)",
2078			    qp, attr, attr_mask, qp->qp_type);
2079			return (-EINVAL);
2080		}
2081	}
2082	if (attr_mask & IB_QP_CAP) {
2083		/* IB_QP_CAP is not supported */
2084		mutex_exit(&qp->lock);
2085		ofs_lock_exit(&ofs_client->lock);
2086		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2087		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
2088		    "attr_mask: 0x%x => IB_QP_CAP is not supported",
2089		    qp, attr, attr_mask);
2090		return (-EINVAL);
2091	}
2092	if (attr_mask & IB_QP_DEST_QPN) {
2093		switch (qp->qp_type)  {
2094		case IB_QPT_RC:
2095			modify_attr.qp_transport.rc.rc_dst_qpn =
2096			    attr->dest_qp_num;
2097			break;
2098		case IB_QPT_UC:
2099			modify_attr.qp_transport.uc.uc_dst_qpn =
2100			    attr->dest_qp_num;
2101			break;
2102		case IB_QPT_SMI:
2103		case IB_QPT_GSI:
2104		case IB_QPT_UD:
2105		default:
2106			/* This should never happen */
2107			mutex_exit(&qp->lock);
2108			ofs_lock_exit(&ofs_client->lock);
2109			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2110			    "ib_modify_qp(IB_QP_DEST_PSN): qp: 0x%p, "
2111			    "attr: 0x%p, attr_mask: 0x%x => "
2112			    "invalid qp->qp_type(%d)",
2113			    qp, attr, attr_mask, qp->qp_type);
2114			return (-EINVAL);
2115		}
2116	}
2117
2118	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2119	    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x, "
2120	    "flags: 0x%x, modify_attr: 0x%p",
2121	    qp, attr, attr_mask, flags, &modify_attr);
2122
2123	/* Modify the QP attributes */
2124	rtn = ibt_modify_qp(qp->ibt_qp, flags, &modify_attr, NULL);
2125	if (rtn == IBT_SUCCESS) {
2126		mutex_exit(&qp->lock);
2127		ofs_lock_exit(&ofs_client->lock);
2128		return (0);
2129	}
2130	mutex_exit(&qp->lock);
2131	ofs_lock_exit(&ofs_client->lock);
2132
2133	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2134	    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
2135	    "ibt_modify_qp failed w/ %d, flags: 0x%x",
2136	    qp, attr, attr_mask, rtn, flags);
2137
2138	switch (rtn) {
2139	case IBT_HCA_HDL_INVALID:
2140	case IBT_QP_HDL_INVALID:
2141	case IBT_QP_SRV_TYPE_INVALID:
2142	case IBT_QP_STATE_INVALID:
2143	case IBT_HCA_PORT_INVALID:
2144	case IBT_PKEY_IX_ILLEGAL:
2145		return (-EINVAL);
2146	default:
2147		return (-EIO);
2148	}
2149}
2150
2151static inline enum ib_wc_status
2152ibt2of_wc_status(ibt_wc_status_t status)
2153{
2154	switch (status) {
2155	case IBT_WC_LOCAL_LEN_ERR:
2156		return (IB_WC_LOC_LEN_ERR);
2157	case IBT_WC_LOCAL_CHAN_OP_ERR:
2158		return (IB_WC_LOC_QP_OP_ERR);
2159	case IBT_WC_LOCAL_PROTECT_ERR:
2160		return (IB_WC_LOC_PROT_ERR);
2161	case IBT_WC_WR_FLUSHED_ERR:
2162		return (IB_WC_WR_FLUSH_ERR);
2163	case IBT_WC_MEM_WIN_BIND_ERR:
2164		return (IB_WC_MW_BIND_ERR);
2165	case IBT_WC_BAD_RESPONSE_ERR:
2166		return (IB_WC_BAD_RESP_ERR);
2167	case IBT_WC_LOCAL_ACCESS_ERR:
2168		return (IB_WC_LOC_ACCESS_ERR);
2169	case IBT_WC_REMOTE_INVALID_REQ_ERR:
2170		return (IB_WC_REM_INV_REQ_ERR);
2171	case IBT_WC_REMOTE_ACCESS_ERR:
2172		return (IB_WC_REM_ACCESS_ERR);
2173	case IBT_WC_REMOTE_OP_ERR:
2174		return (IB_WC_REM_OP_ERR);
2175	case IBT_WC_TRANS_TIMEOUT_ERR:
2176		return (IB_WC_RETRY_EXC_ERR);
2177	case IBT_WC_RNR_NAK_TIMEOUT_ERR:
2178		return (IB_WC_RNR_RETRY_EXC_ERR);
2179	case IBT_WC_SUCCESS:
2180	default:
2181		/* Hermon doesn't support EEC yet */
2182		ASSERT(status == IBT_WC_SUCCESS);
2183		return (IB_WC_SUCCESS);
2184	}
2185}
2186
2187static inline enum ib_wc_opcode
2188ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)
2189{
2190	switch (wc_type) {
2191	case IBT_WRC_SEND:
2192		return (IB_WC_SEND);
2193	case IBT_WRC_RDMAR:
2194		return (IB_WC_RDMA_READ);
2195	case IBT_WRC_RDMAW:
2196		return (IB_WC_RDMA_WRITE);
2197	case IBT_WRC_CSWAP:
2198		return (IB_WC_COMP_SWAP);
2199	case IBT_WRC_FADD:
2200		return (IB_WC_FETCH_ADD);
2201	case IBT_WRC_BIND:
2202		return (IB_WC_BIND_MW);
2203	case IBT_WRC_RECV:
2204		return (IB_WC_RECV);
2205	case IBT_WRC_RECV_RDMAWI:
2206	default:
2207		ASSERT(wc_type == IBT_WRC_RECV_RDMAWI);
2208		return (IB_WC_RECV_RDMA_WITH_IMM);
2209	}
2210}
2211
2212static inline int
2213ibt2of_wc_flags(ibt_wc_flags_t wc_flags)
2214{
2215	return (wc_flags & ~IBT_WC_CKSUM_OK);
2216}
2217
2218static inline void
2219set_wc(ibt_wc_t *ibt_wc, struct ib_wc *wc)
2220{
2221	wc->wr_id = ibt_wc->wc_id;
2222	wc->status = ibt2of_wc_status(ibt_wc->wc_status);
2223	/* opcode can be undefined if status is not success */
2224	if (wc->status == IB_WC_SUCCESS) {
2225		wc->opcode = ibt2of_wc_opcode(ibt_wc->wc_type);
2226	}
2227	wc->vendor_err = 0;			/* not supported */
2228	wc->byte_len = ibt_wc->wc_bytes_xfer;
2229	wc->qp = NULL;				/* not supported */
2230	wc->imm_data = htonl(ibt_wc->wc_immed_data);
2231	wc->src_qp = ibt_wc->wc_qpn;
2232	wc->wc_flags = ibt2of_wc_flags(ibt_wc->wc_flags);
2233	wc->pkey_index = ibt_wc->wc_pkey_ix;
2234	wc->slid = ibt_wc->wc_slid;
2235	wc->sl = ibt_wc->wc_sl;
2236	wc->dlid_path_bits = ibt_wc->wc_path_bits;
2237	wc->port_num = 0;			/* not supported */
2238}
2239
2240/*
2241 * ib_poll_cq - poll a CQ for completion(s)
2242 * @cq:the CQ being polled
2243 * @num_entries:maximum number of completions to return
2244 * @wc:array of at least @num_entries &struct ib_wc where completions
2245 *   will be returned
2246 *
2247 * Poll a CQ for (possibly multiple) completions.  If the return value
2248 * is < 0, an error occurred.  If the return value is >= 0, it is the
2249 * number of completions returned.  If the return value is
2250 * non-negative and < num_entries, then the CQ was emptied.
2251 *
2252 * Note that three following memebers in struct ib_wc are not supported
2253 * currently, and the values are always either 0 or NULL.
2254 *	u32			vendor_err;
2255 *	struct ib_qp		*qp;
2256 *	u8			port_num;
2257 */
2258int
2259ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
2260{
2261	ibt_wc_t	ibt_wc;
2262	int		npolled;
2263	ibt_status_t	rtn;
2264	ofs_client_t	*ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
2265
2266	ofs_lock_enter(&ofs_client->lock);
2267	if (cq->device->reg_state != IB_DEV_OPEN) {
2268		ofs_lock_exit(&ofs_client->lock);
2269		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2270		    "ib_poll_cq: cq: 0x%p => invalid device state (%d)",
2271		    cq, cq->device->reg_state);
2272		return (-ENXIO);
2273	}
2274
2275	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2276	    "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p, "
2277	    "ibt_cq: 0x%p, ibt_wc: 0x%p",
2278	    cq, num_entries, wc, cq->ibt_cq, &ibt_wc);
2279
2280	/* only one thread per cq is allowed during ibt_poll_cq() */
2281	mutex_enter(&cq->lock);
2282	for (npolled = 0; npolled < num_entries; ++npolled) {
2283		bzero(&ibt_wc, sizeof (ibt_wc_t));
2284		rtn = ibt_poll_cq(cq->ibt_cq, &ibt_wc, 1, NULL);
2285		if (rtn != IBT_SUCCESS) {
2286			break;
2287		}
2288		/* save this result to struct ib_wc */
2289		set_wc(&ibt_wc, wc + npolled);
2290	}
2291	mutex_exit(&cq->lock);
2292	ofs_lock_exit(&ofs_client->lock);
2293
2294	if (rtn == IBT_SUCCESS || rtn == IBT_CQ_EMPTY) {
2295		return (npolled);
2296	}
2297
2298	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2299	    "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p => "
2300	    "ibt_poll_cq failed w/ %d, npolled = %d",
2301	    cq, num_entries, wc, rtn, npolled);
2302
2303	switch (rtn) {
2304	case IBT_HCA_HDL_INVALID:
2305	case IBT_CQ_HDL_INVALID:
2306	case IBT_INVALID_PARAM:
2307		return (-EINVAL);
2308	default:
2309		return (-EIO);
2310	}
2311}
2312
2313ibt_hca_hdl_t
2314ib_get_ibt_hca_hdl(struct ib_device *device)
2315{
2316	return (device->hca_hdl);
2317}
2318
2319ibt_channel_hdl_t
2320ib_get_ibt_channel_hdl(struct rdma_cm_id *cm)
2321{
2322	return (cm->qp == NULL ? NULL : cm->qp->ibt_qp);
2323}
2324