daplt.c revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 *
26 * UDAPL kernel agent
27 */
28
29#include <sys/types.h>
30#include <sys/errno.h>
31#include <sys/debug.h>
32#include <sys/stropts.h>
33#include <sys/stream.h>
34#include <sys/strlog.h>
35#include <sys/cmn_err.h>
36#include <sys/kmem.h>
37#include <sys/conf.h>
38#include <sys/stat.h>
39#include <sys/modctl.h>
40#include <sys/kstat.h>
41#include <sys/ddi.h>
42#include <sys/sunddi.h>
43#include <sys/strsun.h>
44#include <sys/taskq.h>
45#include <sys/open.h>
46#include <sys/uio.h>
47#include <sys/cpuvar.h>
48#include <sys/atomic.h>
49#include <sys/sysmacros.h>
50#include <sys/esunddi.h>
51#include <sys/avl.h>
52#include <sys/cred.h>
53#include <sys/note.h>
54#include <sys/ib/ibtl/ibti.h>
55#include <sys/socket.h>
56#include <netinet/in.h>
57#include <daplt_if.h>
58#include <daplt.h>
59
60/*
61 * The following variables support the debug log buffer scheme.
62 */
63#ifdef	DEBUG
64static char daplka_dbgbuf[0x80000];
65#else /* DEBUG */
66static char daplka_dbgbuf[0x4000];
67#endif /* DEBUG */
68static int daplka_dbgsize = sizeof (daplka_dbgbuf);
69static size_t daplka_dbgnext;
70static int daplka_dbginit = 0;
71static kmutex_t daplka_dbglock;
72_NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock,
73    daplka_dbgbuf
74    daplka_dbgnext))
75
76static int daplka_dbg = 0x0103;
77static void daplka_console(const char *, ...);
78static void daplka_debug(const char *, ...);
79static int daplka_apm = 0x1;			/* default enable */
80static int daplka_failback = 0x1;		/* default enable */
81static int daplka_query_aft_setaltpath = 10;
82
83#define	DERR				\
84	if (daplka_dbg & 0x100) 	\
85	    daplka_debug
86
87#ifdef DEBUG
88
89#define	DINFO				\
90	daplka_console
91
92#define	D1				\
93	if (daplka_dbg & 0x01)		\
94	    daplka_debug
95#define	D2				\
96	if (daplka_dbg & 0x02) 		\
97	    daplka_debug
98#define	D3				\
99	if (daplka_dbg & 0x04) 		\
100	    daplka_debug
101#define	D4				\
102	if (daplka_dbg & 0x08) 		\
103	    daplka_debug
104
105#else /* DEBUG */
106
107#define	DINFO	if (0) printf
108#define	D1	if (0) printf
109#define	D2	if (0) printf
110#define	D3	if (0) printf
111#define	D4	if (0) printf
112
113#endif /* DEBUG */
114
115/*
116 * driver entry points
117 */
118static int daplka_open(dev_t *, int, int, struct cred *);
119static int daplka_close(dev_t, int, int, struct cred *);
120static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
121static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
122static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
123static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
124
125/*
126 * types of ioctls
127 */
128static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
129static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
130    cred_t *, int *);
131static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
132    cred_t *, int *);
133static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
134    cred_t *, int *);
135static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
136    cred_t *, int *);
137static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
138    cred_t *, int *);
139static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
140    cred_t *, int *);
141static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
142    cred_t *, int *);
143static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
144    cred_t *, int *);
145
146/*
147 * common ioctls and supporting functions
148 */
149static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
150static int daplka_ia_destroy(daplka_resource_t *);
151
152/*
153 * EP ioctls and supporting functions
154 */
155static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
156    cred_t *, int *);
157static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
158    cred_t *, int *);
159static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
160    cred_t *, int *);
161static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
162    cred_t *, int *);
163static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
164    cred_t *, int *);
165static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
166    cred_t *, int *);
167static int daplka_ep_destroy(daplka_resource_t *);
168static void daplka_hash_ep_free(void *);
169static int daplka_ep_failback(void *objp, void *arg);
170static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
171
172static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
173static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
174static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
175static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
176static void daplka_timer_info_free(daplka_timer_info_t *);
177static void daplka_timer_handler(void *);
178static void daplka_timer_dispatch(void *);
179static void daplka_timer_thread(void *);
180static int daplka_cancel_timer(daplka_ep_resource_t *);
181static void daplka_hash_timer_free(void *);
182
183/*
184 * EVD ioctls and supporting functions
185 */
186static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
187    cred_t *, int *);
188static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
189    cred_t *, int *);
190static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
191    cred_t *, int *);
192static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
193    cred_t *, int *);
194static int daplka_evd_destroy(daplka_resource_t *);
195static void daplka_cq_handler(ibt_cq_hdl_t, void *);
196static void daplka_evd_wakeup(daplka_evd_resource_t *,
197    daplka_evd_event_list_t *, daplka_evd_event_t *);
198static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
199    daplka_evd_event_t *);
200static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
201static void daplka_hash_evd_free(void *);
202
203
204/*
205 * SRQ ioctls and supporting functions
206 */
207static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
208    cred_t *, int *);
209static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
210    cred_t *, int *);
211static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
212    cred_t *, int *);
213static int daplka_srq_destroy(daplka_resource_t *);
214static void daplka_hash_srq_free(void *);
215
216/*
217 * Miscellaneous ioctls
218 */
219static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
220    cred_t *, int *);
221static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
222    cred_t *, int *);
223static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
224    cred_t *, int *);
225static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
226    cred_t *, int *);
227
228/*
229 * PD ioctls and supporting functions
230 */
231static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
232    cred_t *, int *);
233static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
234    cred_t *, int *);
235static int daplka_pd_destroy(daplka_resource_t *);
236static void daplka_hash_pd_free(void *);
237
238/*
239 * SP ioctls and supporting functions
240 */
241static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
242    cred_t *, int *);
243static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
244    cred_t *, int *);
245static int daplka_sp_destroy(daplka_resource_t *);
246static void daplka_hash_sp_free(void *);
247static void daplka_hash_sp_unref(void *);
248
249/*
250 * MR ioctls and supporting functions
251 */
252static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
253    cred_t *, int *);
254static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
255    cred_t *, int *);
256static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
257    cred_t *, int *);
258static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
259    cred_t *, int *);
260static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
261    cred_t *, int *);
262static int daplka_mr_destroy(daplka_resource_t *);
263static void daplka_hash_mr_free(void *);
264static void daplka_shared_mr_free(daplka_mr_resource_t *);
265
266/*
267 * MW ioctls and supporting functions
268 */
269static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
270    cred_t *, int *);
271static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
272    cred_t *, int *);
273static int daplka_mw_destroy(daplka_resource_t *);
274static void daplka_hash_mw_free(void *);
275
276/*
277 * CNO ioctls and supporting functions
278 */
279static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
280    cred_t *, int *);
281static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
282    cred_t *, int *);
283static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
284    cred_t *, int *);
285static int daplka_cno_destroy(daplka_resource_t *);
286static void daplka_hash_cno_free(void *);
287
288/*
289 * CM handlers
290 */
291static  ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
292    ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
293
294static  ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
295    ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
296
297static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
298    ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
299
300/*
301 * resource management routines
302 */
303static int daplka_resource_reserve(minor_t *);
304static int daplka_resource_insert(minor_t, daplka_resource_t *);
305static daplka_resource_t *daplka_resource_remove(minor_t rnum);
306static daplka_resource_t *daplka_resource_lookup(minor_t);
307static void daplka_resource_init(void);
308static void daplka_resource_fini(void);
309static struct daplka_resource_table daplka_resource;
310
311/*
312 * hash table routines
313 */
314static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
315static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
316static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
317    void *, krw_t);
318static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
319static int daplka_hash_create(daplka_hash_table_t *, uint_t,
320    void (*)(void *), void (*)(void *));
321static void daplka_hash_destroy(daplka_hash_table_t *);
322static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
323static void daplka_hash_generic_lookup(void *);
324
325static uint32_t daplka_timer_hkey_gen();
326
327/*
328 * async event handlers
329 */
330static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
331    uint64_t, daplka_ia_resource_t *);
332static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
333    ibt_async_event_t *);
334static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
335    ibt_async_event_t *);
336static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
337    ibt_async_event_t *);
338static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
339    ibt_async_event_t *);
340static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
341    ibt_subnet_event_t *event);
342static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
343
344/*
345 * IBTF wrappers and default limits used for resource accounting
346 */
347static boolean_t	daplka_accounting_enabled = B_TRUE;
348static uint32_t		daplka_max_qp_percent = 100;
349static uint32_t		daplka_max_cq_percent = 100;
350static uint32_t		daplka_max_pd_percent = 100;
351static uint32_t		daplka_max_mw_percent = 100;
352static uint32_t		daplka_max_mr_percent = 100;
353static uint32_t		daplka_max_srq_percent = 100;
354
355static ibt_status_t
356daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
357    ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
358    ibt_channel_hdl_t *, ibt_chan_sizes_t *);
359
360static ibt_status_t
361daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
362
363static ibt_status_t
364daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
365    ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
366
367static ibt_status_t
368daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
369
370static ibt_status_t
371daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
372    ibt_pd_flags_t, ibt_pd_hdl_t *);
373
374static ibt_status_t
375daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
376
377static ibt_status_t
378daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
379    ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
380
381static ibt_status_t
382daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
383
384static ibt_status_t
385daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
386    ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
387
388static ibt_status_t
389daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
390    ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
391    ibt_mr_desc_t *);
392
393static ibt_status_t
394daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
395
396static ibt_status_t
397daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
398    ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
399
400static ibt_status_t
401daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
402
403/*
404 * macros for manipulating resource objects.
405 * these macros can be used on objects that begin with a
406 * daplka_resource_t header.
407 */
408#define	DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
409
410#define	DAPLKA_RS_REF(rp) {			\
411	mutex_enter(&(rp)->header.rs_reflock);	\
412	(rp)->header.rs_refcnt++;		\
413	ASSERT((rp)->header.rs_refcnt != 0);	\
414	mutex_exit(&(rp)->header.rs_reflock);	\
415}
416
417#define	DAPLKA_RS_UNREF(rp) {					\
418	mutex_enter(&(rp)->header.rs_reflock);			\
419	ASSERT((rp)->header.rs_refcnt != 0);			\
420	if (--(rp)->header.rs_refcnt == 0) {			\
421		ASSERT((rp)->header.rs_free != NULL);		\
422		mutex_exit(&(rp)->header.rs_reflock);		\
423		(rp)->header.rs_free((daplka_resource_t *)rp);	\
424	} else {						\
425		mutex_exit(&(rp)->header.rs_reflock);		\
426	}							\
427}
428
429#define	DAPLKA_RS_INIT(rp, type, rnum, free_func) {	\
430	(rp)->header.rs_refcnt = 1;			\
431	(rp)->header.rs_type = (type);			\
432	(rp)->header.rs_rnum = (rnum); 			\
433	(rp)->header.rs_charged = 0;			\
434	(rp)->header.rs_free = (free_func);		\
435	mutex_init(&(rp)->header.rs_reflock, NULL,	\
436	    MUTEX_DRIVER, NULL);			\
437}
438
439#define	DAPLKA_RS_FINI(rp) {				\
440	mutex_destroy(&(rp)->header.rs_reflock);	\
441}
442
443#define	DAPLKA_RS_ACCT_INC(rp, cnt) {				\
444	atomic_add_32(&(rp)->header.rs_charged, (cnt));		\
445}
446#define	DAPLKA_RS_ACCT_DEC(rp, cnt) {				\
447	atomic_add_32(&(rp)->header.rs_charged, -(cnt));	\
448}
449#define	DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
450
451#define	DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
452#define	DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
453#define	DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
454
455/*
456 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
457 */
458#define	DAPLKA_EVD_WAIT(cvp, mp, timeout)			\
459	((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) :	\
460	cv_timedwait_sig((cvp), (mp), (timeout))
461
462#define	DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca)	((hca)->hca_ref_cnt++)
463#define	DAPLKA_RELE_HCA_WITHOUT_LOCK(hca)	((hca)->hca_ref_cnt--)
464
465#define	DAPLKA_HOLD_HCA(dp, hca) {			\
466	mutex_enter(&(dp)->daplka_mutex);		\
467	DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);		\
468	mutex_exit(&(dp)->daplka_mutex);		\
469}
470
471#define	DAPLKA_RELE_HCA(dp, hca) {			\
472	mutex_enter(&(dp)->daplka_mutex);		\
473	DAPLKA_RELE_HCA_WITHOUT_LOCK(hca);		\
474	mutex_exit(&(dp)->daplka_mutex);		\
475}
476
477#define	DAPLKA_HCA_BUSY(hca)				\
478	((hca)->hca_ref_cnt != 0 ||			\
479	(hca)->hca_qp_count != 0 ||			\
480	(hca)->hca_cq_count != 0 ||			\
481	(hca)->hca_pd_count != 0 ||			\
482	(hca)->hca_mw_count != 0 ||			\
483	(hca)->hca_mr_count != 0)
484
485
486static struct cb_ops daplka_cb_ops = {
487	daplka_open,		/* cb_open */
488	daplka_close,		/* cb_close */
489	nodev,			/* cb_strategy */
490	nodev,			/* cb_print */
491	nodev,			/* cb_dump */
492	nodev,			/* cb_read */
493	nodev,			/* cb_write */
494	daplka_ioctl,		/* cb_ioctl */
495	nodev,			/* cb_devmap */
496	nodev,			/* cb_mmap */
497	nodev,			/* cb_segmap */
498	nochpoll,		/* cb_chpoll */
499	ddi_prop_op,		/* cb_prop_op */
500	NULL,			/* cb_stream */
501	D_NEW | D_MP,		/* cb_flag */
502	CB_REV,			/* rev */
503	nodev,			/* int (*cb_aread)() */
504	nodev			/* int (*cb_awrite)() */
505};
506
507static struct dev_ops daplka_ops = {
508	DEVO_REV,		/* devo_rev */
509	0,			/* devo_refcnt */
510	daplka_info,		/* devo_getinfo */
511	nulldev,		/* devo_identify */
512	nulldev,		/* devo_probe */
513	daplka_attach,		/* devo_attach */
514	daplka_detach,		/* devo_detach */
515	nodev,			/* devo_reset */
516	&daplka_cb_ops,		/* devo_cb_ops */
517	(struct bus_ops *)NULL,	/* devo_bus_ops */
518	nulldev,		/* power */
519	ddi_quiesce_not_needed,	/* devo_quiesce */
520};
521
522/*
523 * Module linkage information for the kernel.
524 */
525static struct modldrv modldrv = {
526	&mod_driverops,
527	"uDAPL Service Driver",
528	&daplka_ops,
529};
530
531static struct modlinkage modlinkage = {
532#ifdef _LP64
533	MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
534#else
535	MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
536#endif
537};
538
539/*
540 * daplka_dev holds global driver state and a list of HCAs
541 */
542static daplka_t *daplka_dev = NULL;
543static void *daplka_state = NULL;
544
545/*
546 * global SP hash table
547 */
548static daplka_hash_table_t daplka_global_sp_htbl;
549
550/*
551 * timer_info hash table
552 */
553static daplka_hash_table_t daplka_timer_info_htbl;
554static uint32_t daplka_timer_hkey = 0;
555
556/*
557 * shared MR avl tree
558 */
559static avl_tree_t daplka_shared_mr_tree;
560static kmutex_t daplka_shared_mr_lock;
561static int daplka_shared_mr_cmp(const void *, const void *);
562_NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock,
563    daplka_shared_mr_tree))
564
565/*
566 * default kmem flags used by this driver
567 */
568static int daplka_km_flags = KM_SLEEP;
569
570/*
571 * taskq used for handling background tasks
572 */
573static taskq_t *daplka_taskq = NULL;
574
575/*
576 * daplka_cm_delay is the length of time the active
577 * side needs to wait before timing out on the REP message.
578 */
579static clock_t daplka_cm_delay = 60000000;
580
581/*
582 * modunload will fail if pending_close is non-zero
583 */
584static uint32_t daplka_pending_close = 0;
585
586static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
587	IBTI_V_CURR,
588	IBT_USER,
589	daplka_async_handler,
590	NULL,
591	DAPLKA_DRV_NAME
592};
593
594/*
595 * Module Installation
596 */
597int
598_init(void)
599{
600	int status;
601
602	status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
603	if (status != 0) {
604		return (status);
605	}
606
607	mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
608	bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
609	daplka_dbgnext = 0;
610	daplka_dbginit = 1;
611
612	daplka_resource_init();
613
614	status = mod_install(&modlinkage);
615	if (status != DDI_SUCCESS) {
616		/* undo inits done before mod_install */
617		daplka_resource_fini();
618		mutex_destroy(&daplka_dbglock);
619		ddi_soft_state_fini(&daplka_state);
620	}
621	return (status);
622}
623
624/*
625 * Module Removal
626 */
627int
628_fini(void)
629{
630	int	status;
631
632	/*
633	 * mod_remove causes detach to be called
634	 */
635	if ((status = mod_remove(&modlinkage)) != 0) {
636		DERR("fini: mod_remove failed: 0x%x\n", status);
637		return (status);
638	}
639
640	daplka_resource_fini();
641	mutex_destroy(&daplka_dbglock);
642	ddi_soft_state_fini(&daplka_state);
643
644	return (status);
645}
646
647/*
648 * Return Module Info.
649 */
650int
651_info(struct modinfo *modinfop)
652{
653	return (mod_info(&modlinkage, modinfop));
654}
655
656static void
657daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
658{
659	daplka_hca_t *h;
660
661	ASSERT(mutex_owned(&dp->daplka_mutex));
662
663	if (dp->daplka_hca_list_head == NULL) {
664		dp->daplka_hca_list_head = hca;
665	} else {
666		h = dp->daplka_hca_list_head;
667		while (h->hca_next != NULL)
668			h = h->hca_next;
669
670		h->hca_next = hca;
671	}
672}
673
674static void
675daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
676{
677	daplka_hca_t *h;
678
679	ASSERT(mutex_owned(&dp->daplka_mutex));
680
681	if (dp->daplka_hca_list_head == hca)
682		dp->daplka_hca_list_head = hca->hca_next;
683	else {
684		h = dp->daplka_hca_list_head;
685		while (h->hca_next != hca)
686			h = h->hca_next;
687		h->hca_next = hca->hca_next;
688	}
689}
690
691static int
692daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
693{
694	daplka_hca_t		*hca;
695	ibt_hca_portinfo_t	*pinfop;
696	uint_t			size;
697	int			j;
698	ibt_status_t		status;
699
700	hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
701
702	hca->hca_guid = hca_guid;
703
704	/*
705	 * open the HCA for use
706	 */
707	status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
708	if (status != IBT_SUCCESS) {
709		if (status == IBT_HCA_IN_USE) {
710			DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
711		} else {
712			DERR("ibt_open_hca() returned %d\n", status);
713		}
714		kmem_free(hca, sizeof (daplka_hca_t));
715		return (status);
716	}
717
718	/*
719	 * query HCA to get its info
720	 */
721	status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
722	if (status != IBT_SUCCESS) {
723		DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
724		    status, (longlong_t)hca_guid);
725		goto out;
726	}
727
728	/*
729	 * query HCA to get info of all ports
730	 */
731	status = ibt_query_hca_ports(hca->hca_hdl,
732	    0, &pinfop, &hca->hca_nports, &size);
733	if (status != IBT_SUCCESS) {
734		DERR("ibt_query_all_ports returned %d "
735		    "(hca_guid 0x%llx)\n", status,
736		    (longlong_t)hca_guid);
737		goto out;
738	}
739	hca->hca_ports = pinfop;
740	hca->hca_pinfosz = size;
741
742	DERR("hca guid 0x%llx, nports %d\n",
743	    (longlong_t)hca_guid, hca->hca_nports);
744	for (j = 0; j < hca->hca_nports; j++) {
745		DERR("port %d: state %d prefix 0x%016llx "
746		    "guid %016llx\n",
747		    pinfop[j].p_port_num, pinfop[j].p_linkstate,
748		    (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
749		    (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
750	}
751
752	mutex_enter(&dp->daplka_mutex);
753	daplka_enqueue_hca(dp, hca);
754	mutex_exit(&dp->daplka_mutex);
755
756	return (IBT_SUCCESS);
757
758out:
759	(void) ibt_close_hca(hca->hca_hdl);
760	kmem_free(hca, sizeof (daplka_hca_t));
761	return (status);
762}
763
764/*
765 * this function obtains the list of HCAs from IBTF.
766 * the HCAs are then opened and the returned handles
767 * and attributes are stored into the global daplka_dev
768 * structure.
769 */
770static int
771daplka_init_hcas(daplka_t *dp)
772{
773	int		i;
774	ib_guid_t	*hca_guids;
775	uint32_t	hca_count;
776
777	/*
778	 * get the num & list of HCAs present
779	 */
780	hca_count = ibt_get_hca_list(&hca_guids);
781	DERR("No. of HCAs present %d\n", hca_count);
782
783	if (hca_count != 0) {
784		/*
785		 * get the info for each available HCA
786		 */
787		for (i = 0; i < hca_count; i++)
788			(void) daplka_init_hca(dp, hca_guids[i]);
789
790		ibt_free_hca_list(hca_guids, hca_count);
791	}
792
793	if (dp->daplka_hca_list_head != NULL)
794		return (IBT_SUCCESS);
795	else
796		return (IBT_FAILURE);
797}
798
799static int
800daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
801{
802	ibt_status_t	status;
803
804	if (hca->hca_hdl != NULL) {
805		status = ibt_close_hca(hca->hca_hdl);
806		if (status != IBT_SUCCESS) {
807			DERR("ibt_close_hca returned %d"
808			    " (hca_guid 0x%llx)\n", status,
809			    (longlong_t)hca->hca_guid);
810
811			mutex_enter(&dp->daplka_mutex);
812			daplka_enqueue_hca(dp, hca);
813			mutex_exit(&dp->daplka_mutex);
814
815			return (status);
816		}
817	}
818
819	if (hca->hca_ports != NULL)
820		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
821
822	kmem_free(hca, sizeof (daplka_hca_t));
823	return (IBT_SUCCESS);
824}
825
826/*
827 * closes all HCAs and frees up the HCA list
828 */
829static int
830daplka_fini_hcas(daplka_t *dp)
831{
832	ibt_status_t	status;
833	daplka_hca_t	*hca;
834
835	mutex_enter(&daplka_dev->daplka_mutex);
836	while ((hca = dp->daplka_hca_list_head) != NULL) {
837		if (DAPLKA_HCA_BUSY(hca)) {
838			mutex_exit(&daplka_dev->daplka_mutex);
839			return (IBT_HCA_RESOURCES_NOT_FREED);
840		}
841		daplka_dequeue_hca(daplka_dev, hca);
842		mutex_exit(&daplka_dev->daplka_mutex);
843
844		if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
845			return (status);
846
847		mutex_enter(&daplka_dev->daplka_mutex);
848	}
849	mutex_exit(&daplka_dev->daplka_mutex);
850
851	DERR("dapl kernel agent unloaded\n");
852	return (IBT_SUCCESS);
853}
854
855
856/*
857 * Attach the device, create and fill in daplka_dev
858 */
859static int
860daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
861{
862	daplka_t	*dp;
863	int		instance, retval, err;
864	boolean_t	sp_htbl_allocated = B_FALSE;
865	boolean_t	timer_htbl_allocated = B_FALSE;
866	boolean_t	shared_mr_tree_allocated = B_FALSE;
867
868	switch (cmd) {
869	case DDI_ATTACH:
870		break;
871	case DDI_RESUME:
872		return (DDI_SUCCESS);
873	default:
874		return (DDI_FAILURE);
875	}
876
877	/*
878	 * Allocate soft data structure
879	 */
880	instance = ddi_get_instance(dip);
881	if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
882		DERR("attach: bad state zalloc\n");
883		return (DDI_FAILURE);
884	}
885
886	dp = ddi_get_soft_state(daplka_state, instance);
887	if (dp == NULL) {
888		ddi_soft_state_free(daplka_state, instance);
889		DERR("attach: cannot get soft state\n");
890		return (DDI_FAILURE);
891	}
892	/*
893	 * Stuff private info into dip.
894	 */
895	dp->daplka_dip = dip;
896	ddi_set_driver_private(dip, dp);
897	daplka_dev = dp;
898	mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
899
900	/*
901	 * Register driver with IBTF
902	 */
903	retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
904	    &dp->daplka_clnt_hdl);
905	if (retval != IBT_SUCCESS) {
906		DERR("attach: ibt_attach failed: error = %d\n", retval);
907		retval = DDI_FAILURE;
908		goto error;
909	}
910	/* Register to receive SM events */
911	ibt_register_subnet_notices(dp->daplka_clnt_hdl,
912	    daplka_sm_notice_handler, NULL);
913
914	retval = daplka_init_hcas(dp);
915	if (retval != IBT_SUCCESS) {
916		DERR("attach: hca_init failed: error = %d\n", retval);
917		retval = DDI_FAILURE;
918		goto error;
919	}
920	/*
921	 * this table is used by cr_handoff
922	 */
923	retval = daplka_hash_create(&daplka_global_sp_htbl,
924	    DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
925	    daplka_hash_generic_lookup);
926	if (retval != 0) {
927		DERR("attach: cannot create sp hash table\n");
928		retval = DDI_FAILURE;
929		goto error;
930	}
931	sp_htbl_allocated = B_TRUE;
932
933	/*
934	 * this table stores per EP timer information.
935	 * timer_info_t objects are inserted into this table whenever
936	 * a EP timer is set. timers get removed when they expire
937	 * or when they get cancelled.
938	 */
939	retval = daplka_hash_create(&daplka_timer_info_htbl,
940	    DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
941	if (retval != 0) {
942		DERR("attach: cannot create timer hash table\n");
943		retval = DDI_FAILURE;
944		goto error;
945	}
946	timer_htbl_allocated = B_TRUE;
947
948	/*
949	 * this taskq is currently only used for processing timers.
950	 * other processing may also use this taskq in the future.
951	 */
952	daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
953	    maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
954	if (daplka_taskq == NULL) {
955		DERR("attach: cannot create daplka_taskq\n");
956		retval = DDI_FAILURE;
957		goto error;
958	}
959
960	/*
961	 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
962	 * gets retrieved or created when daplka_mr_register_shared is
963	 * called.
964	 */
965	mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
966
967	avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
968	    sizeof (daplka_shared_mr_t),
969	    offsetof(daplka_shared_mr_t, smr_node));
970	shared_mr_tree_allocated = B_TRUE;
971
972	/*
973	 * Create the filesystem device node.
974	 */
975	if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
976	    0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
977		DERR("attach: bad create_minor_node\n");
978		retval = DDI_FAILURE;
979		goto error;
980	}
981	dp->daplka_status = DAPLKA_STATE_ATTACHED;
982	ddi_report_dev(dip);
983	return (DDI_SUCCESS);
984
985error:
986	if (shared_mr_tree_allocated) {
987		avl_destroy(&daplka_shared_mr_tree);
988		mutex_destroy(&daplka_shared_mr_lock);
989	}
990
991	if (daplka_taskq) {
992		taskq_destroy(daplka_taskq);
993		daplka_taskq = NULL;
994	}
995
996	if (timer_htbl_allocated) {
997		daplka_hash_destroy(&daplka_timer_info_htbl);
998	}
999
1000	if (sp_htbl_allocated) {
1001		daplka_hash_destroy(&daplka_global_sp_htbl);
1002	}
1003
1004	err = daplka_fini_hcas(dp);
1005	if (err != IBT_SUCCESS) {
1006		DERR("attach: hca_fini returned %d\n", err);
1007	}
1008
1009	if (dp->daplka_clnt_hdl != NULL) {
1010		/* unregister SM event notification */
1011		ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1012		    (ibt_sm_notice_handler_t)NULL, NULL);
1013		err = ibt_detach(dp->daplka_clnt_hdl);
1014
1015		if (err != IBT_SUCCESS) {
1016			DERR("attach: ibt_detach returned %d\n", err);
1017		}
1018	}
1019	mutex_destroy(&dp->daplka_mutex);
1020
1021	if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1022		ddi_remove_minor_node(dip, NULL);
1023	}
1024	ddi_soft_state_free(daplka_state, instance);
1025	return (retval);
1026}
1027
1028/*
1029 * Detach - Free resources allocated in attach
1030 */
1031/* ARGSUSED */
1032static int
1033daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1034{
1035	int		instance, err;
1036	void		*cookie = NULL;
1037	daplka_t	*dp;
1038
1039	if (cmd != DDI_DETACH) {
1040		return (DDI_FAILURE);
1041	}
1042	if (daplka_resource.daplka_rc_cnt > 0 ||
1043	    daplka_pending_close > 0) {
1044		DERR("detach: driver in use\n");
1045		return (DDI_FAILURE);
1046	}
1047
1048	instance = ddi_get_instance(dip);
1049	dp = ddi_get_soft_state(daplka_state, instance);
1050	if (dp == NULL) {
1051		DERR("detach: cannot get soft state\n");
1052		return (DDI_FAILURE);
1053	}
1054	err = daplka_fini_hcas(dp);
1055	if (err != IBT_SUCCESS) {
1056		DERR("detach: hca_fini returned %d\n", err);
1057		return (DDI_FAILURE);
1058	}
1059	if (dp->daplka_clnt_hdl != NULL) {
1060		/* unregister SM event notification */
1061		ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1062		    (ibt_sm_notice_handler_t)NULL, NULL);
1063		err = ibt_detach(dp->daplka_clnt_hdl);
1064		if (err != IBT_SUCCESS) {
1065			DERR("detach: ibt_detach returned %d\n", err);
1066			return (DDI_FAILURE);
1067		}
1068		dp->daplka_clnt_hdl = NULL;
1069	}
1070	mutex_destroy(&dp->daplka_mutex);
1071	if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1072		ddi_remove_minor_node(dip, NULL);
1073	}
1074	dp->daplka_status = DAPLKA_STATE_DETACHED;
1075	ddi_soft_state_free(daplka_state, instance);
1076	daplka_dev = NULL;
1077
1078	/*
1079	 * by the time we get here, all clients of dapl should
1080	 * have exited and completed their cleanup properly.
1081	 * we can assert that all global data structures are now
1082	 * empty.
1083	 */
1084	ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1085	avl_destroy(&daplka_shared_mr_tree);
1086	mutex_destroy(&daplka_shared_mr_lock);
1087
1088	ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1089	daplka_hash_destroy(&daplka_timer_info_htbl);
1090
1091	ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1092	daplka_hash_destroy(&daplka_global_sp_htbl);
1093
1094	taskq_destroy(daplka_taskq);
1095
1096	return (DDI_SUCCESS);
1097}
1098
1099/* ARGSUSED */
1100static int
1101daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1102{
1103	switch (infocmd) {
1104	case DDI_INFO_DEVT2DEVINFO:
1105		if (daplka_dev !=  NULL) {
1106			*result = daplka_dev->daplka_dip;
1107			return (DDI_SUCCESS);
1108		} else {
1109			return (DDI_FAILURE);
1110		}
1111
1112	case DDI_INFO_DEVT2INSTANCE:
1113		*result = 0;
1114		return (DDI_SUCCESS);
1115
1116	default:
1117		return (DDI_FAILURE);
1118	}
1119}
1120
1121/*
1122 * creates a EP resource.
1123 * A EP resource contains a RC channel. A EP resource holds a
1124 * reference to a send_evd (for the send CQ), recv_evd (for the
1125 * recv CQ), a connection evd and a PD. These references ensure
1126 * that the referenced resources are not freed until the EP itself
1127 * gets freed.
1128 */
1129/* ARGSUSED */
1130static int
1131daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1132	cred_t *cred, int *rvalp)
1133{
1134	daplka_ep_resource_t		*ep_rp;
1135	daplka_pd_resource_t		*pd_rp;
1136	dapl_ep_create_t		args;
1137	ibt_rc_chan_alloc_args_t	chan_args;
1138	ibt_chan_alloc_flags_t		achan_flags;
1139	ibt_chan_sizes_t		chan_real_sizes;
1140	ibt_hca_attr_t			*hca_attrp;
1141	uint64_t			ep_hkey = 0;
1142	boolean_t			inserted = B_FALSE;
1143	uint32_t			old_state, new_state;
1144	int				retval;
1145	ibt_status_t			status;
1146
1147	D3("ep_create: enter\n");
1148	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1149	    mode);
1150	if (retval != 0) {
1151		DERR("ep_create: copyin error %d\n", retval);
1152		return (EFAULT);
1153	}
1154	ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1155	if (ep_rp == NULL) {
1156		DERR("ep_create: cannot allocate ep_rp\n");
1157		return (ENOMEM);
1158	}
1159	DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1160	    DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1161
1162	mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1163	cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1164	ep_rp->ep_hca = ia_rp->ia_hca;
1165	ep_rp->ep_cookie = args.ep_cookie;
1166	ep_rp->ep_timer_hkey = 0;
1167
1168	/*
1169	 * we don't have to use ep_get_state here because ep_rp is not in
1170	 * ep_htbl yet. refer to the description of daplka_ep_set_state
1171	 * for details about the EP state machine.
1172	 */
1173	ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1174	new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1175
1176	/* get reference to send evd and get cq handle */
1177	ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1178	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1179	if (ep_rp->ep_snd_evd == NULL) {
1180		DERR("ep_create: ep_snd_evd %llx not found\n",
1181		    args.ep_snd_evd_hkey);
1182		retval = EINVAL;
1183		goto cleanup;
1184	}
1185	chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1186	if (chan_args.rc_scq == NULL) {
1187		DERR("ep_create: ep_snd_evd cq invalid\n");
1188		retval = EINVAL;
1189		goto cleanup;
1190	}
1191
1192	/* get reference to recv evd and get cq handle */
1193	ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1194	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1195	if (ep_rp->ep_rcv_evd == NULL) {
1196		DERR("ep_create: ep_rcv_evd %llx not found\n",
1197		    args.ep_rcv_evd_hkey);
1198		retval = EINVAL;
1199		goto cleanup;
1200	}
1201	chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1202	if (chan_args.rc_rcq == NULL) {
1203		DERR("ep_create: ep_rcv_evd cq invalid\n");
1204		retval = EINVAL;
1205		goto cleanup;
1206	}
1207
1208	/* get reference to conn evd */
1209	ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1210	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1211	if (ep_rp->ep_conn_evd == NULL) {
1212		DERR("ep_create: ep_conn_evd %llx not found\n",
1213		    args.ep_conn_evd_hkey);
1214		retval = EINVAL;
1215		goto cleanup;
1216	}
1217
1218	/* get reference to SRQ if needed */
1219	if (args.ep_srq_attached) {
1220		ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1221		    &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1222		if (ep_rp->ep_srq_res == NULL) {
1223			DERR("ep_create: ep_srq %llx not found\n",
1224			    (longlong_t)args.ep_srq_hkey);
1225			retval = EINVAL;
1226			goto cleanup;
1227		}
1228		ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1229		D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1230		    (longlong_t)args.ep_srq_hkey);
1231	} else {
1232		ep_rp->ep_srq_res = NULL;
1233	}
1234
1235	/* get pd handle */
1236	pd_rp = (daplka_pd_resource_t *)
1237	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1238	if (pd_rp == NULL) {
1239		DERR("ep_create: cannot find pd resource\n");
1240		retval = EINVAL;
1241		goto cleanup;
1242	}
1243	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1244	ep_rp->ep_pd_res = pd_rp;
1245	chan_args.rc_pd = pd_rp->pd_hdl;
1246
1247
1248	/*
1249	 * these checks ensure that the requested channel sizes
1250	 * are within the limits supported by the chosen HCA.
1251	 */
1252	hca_attrp = &ia_rp->ia_hca->hca_attr;
1253	if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1254		DERR("ep_create: invalid cs_sq_sgl %d\n",
1255		    args.ep_ch_sizes.dcs_sq_sgl);
1256		retval = EINVAL;
1257		goto cleanup;
1258	}
1259	if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1260		DERR("ep_create: invalid cs_rq_sgl %d\n",
1261		    args.ep_ch_sizes.dcs_rq_sgl);
1262		retval = EINVAL;
1263		goto cleanup;
1264	}
1265	if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1266		DERR("ep_create: invalid cs_sq %d\n",
1267		    args.ep_ch_sizes.dcs_sq);
1268		retval = EINVAL;
1269		goto cleanup;
1270	}
1271	if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1272		DERR("ep_create: invalid cs_rq %d\n",
1273		    args.ep_ch_sizes.dcs_rq);
1274		retval = EINVAL;
1275		goto cleanup;
1276	}
1277
1278	chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1279	chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1280	chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1281	chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1282	chan_args.rc_flags = IBT_WR_SIGNALED;
1283	chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1284	chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1285	chan_args.rc_clone_chan = NULL;
1286	if (args.ep_srq_attached) {
1287		chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1288	} else {
1289		chan_args.rc_srq = NULL;
1290	}
1291
1292	D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1293	    "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1294	    args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1295	    args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1296	    chan_args.rc_flags, chan_args.rc_control,
1297	    chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1298
1299	if (args.ep_srq_attached) {
1300		achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1301	} else {
1302		achan_flags = IBT_ACHAN_USER_MAP;
1303	}
1304	/* create rc channel */
1305	status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1306	    achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1307	    &chan_real_sizes);
1308	if (status != IBT_SUCCESS) {
1309		DERR("ep_create: alloc_rc_channel returned %d\n", status);
1310		*rvalp = (int)status;
1311		retval = 0;
1312		goto cleanup;
1313	}
1314
1315	args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1316	args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1317	args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1318	args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1319
1320	/*
1321	 * store ep ptr with chan_hdl.
1322	 * this ep_ptr is used by the CM handlers (both active and
1323	 * passive)
1324	 * mutex is only needed for race of "destroy" and "async"
1325	 */
1326	mutex_enter(&daplka_dev->daplka_mutex);
1327	ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1328	mutex_exit(&daplka_dev->daplka_mutex);
1329
1330	/* Get HCA-specific data_out info */
1331	status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1332	    IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1333	    &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1334
1335	if (status != IBT_SUCCESS) {
1336		DERR("ep_create: ibt_ci_data_out error(%d)\n",
1337		    status);
1338		*rvalp = (int)status;
1339		retval = 0;
1340		goto cleanup;
1341	}
1342
1343	/* insert into ep hash table */
1344	retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1345	    &ep_hkey, (void *)ep_rp);
1346	if (retval != 0) {
1347		DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1348		goto cleanup;
1349	}
1350	inserted = B_TRUE;
1351
1352	/*
1353	 * at this point, the ep_rp can be looked up by other threads
1354	 * if they manage to guess the correct hkey. but they are not
1355	 * permitted to operate on ep_rp until we transition to the
1356	 * CLOSED state.
1357	 */
1358
1359	/* return hkey to library */
1360	args.ep_hkey = ep_hkey;
1361
1362	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1363	    mode);
1364	if (retval != 0) {
1365		DERR("ep_create: copyout error %d\n", retval);
1366		retval = EFAULT;
1367		goto cleanup;
1368	}
1369
1370	daplka_ep_set_state(ep_rp, old_state, new_state);
1371	D3("ep_create: exit\n");
1372	return (0);
1373
1374cleanup:
1375	if (inserted) {
1376		daplka_ep_resource_t *free_rp = NULL;
1377
1378		(void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1379		    (void **)&free_rp);
1380		if (free_rp != ep_rp) {
1381			/*
1382			 * this case is impossible because ep_free will
1383			 * wait until our state transition is complete.
1384			 */
1385			DERR("ep_create: cannot remove ep from hash table\n");
1386			ASSERT(B_FALSE);
1387			return (retval);
1388		}
1389	}
1390	new_state = DAPLKA_EP_STATE_FREED;
1391	daplka_ep_set_state(ep_rp, old_state, new_state);
1392	DAPLKA_RS_UNREF(ep_rp);
1393	return (retval);
1394}
1395
1396/*
1397 * daplka_ep_get_state retrieves the current state of the EP and
1398 * sets the state to TRANSITIONING. if the current state is already
1399 * TRANSITIONING, this function will wait until the state becomes one
1400 * of the other EP states. Most of the EP related ioctls follow the
1401 * call sequence:
1402 *
1403 *	new_state = old_state = daplka_ep_get_state(ep_rp);
1404 *	...
1405 *	...some code that affects the EP
1406 *	...
1407 *	new_state = <NEW_STATE>;
1408 *	daplka_ep_set_state(ep_rp, old_state, new_state);
1409 *
1410 * this call sequence ensures that only one thread may access the EP
1411 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1412 * transitions ep_state to new_state and wakes up any waiters blocking
1413 * on ep_cv.
1414 *
1415 */
1416static uint32_t
1417daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1418{
1419	uint32_t	old_state = 0;
1420
1421	mutex_enter(&ep_rp->ep_lock);
1422	while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1423		D2("get_state: wait for state transition to complete\n");
1424		cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1425		D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1426	}
1427	ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1428	old_state = ep_rp->ep_state;
1429
1430	/*
1431	 * an ep that is in the FREED state cannot transition
1432	 * back to any of the regular states
1433	 */
1434	if (old_state != DAPLKA_EP_STATE_FREED) {
1435		ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1436	}
1437	mutex_exit(&ep_rp->ep_lock);
1438	return (old_state);
1439}
1440
1441/*
1442 * EP state transition diagram
1443 *
1444 *              CLOSED<-------------------
1445 *                |                      |
1446 *                |                      |
1447 *     ------------------------          |
1448 *     |                      |          |
1449 *     |                      |          |
1450 *     v                      v          |
1451 *   CONNECTING       ACCEPTING          |
1452 *     |  |   |       |       |          |
1453 *     |  |   |       |       |          |
1454 *     |  |   |       |       |          |
1455 *     |  |   |_______|_______|          |
1456 *     |  |           |   |   |          |
1457 *     |  |___________|   |   |          |
1458 *     |        |         |   |          |
1459 *     |        v         |   |---->DISCONNECTED
1460 *     |     CONNECTED    |              ^
1461 *     v        |         |              |
1462 *    ABORTING  |---------|--------------|
1463 *     |        |         |              |
1464 *     |        |         v              |
1465 *     |        |-------->DISCONNECTING--|
1466 *     |                                 |
1467 *     |---------------------------------|
1468 *
1469 *	*not shown in this diagram:
1470 *	    -loopback transitions
1471 *	    -transitions to the FREED state
1472 */
1473static boolean_t
1474daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1475{
1476	boolean_t valid = B_FALSE;
1477
1478	/*
1479	 * reseting to the same state is a no-op and is always
1480	 * permitted. transitioning to the FREED state indicates
1481	 * that the ep is about to be freed and no further operation
1482	 * is allowed on it. to support abrupt close, the ep is
1483	 * permitted to transition to the FREED state from any state.
1484	 */
1485	if (old_state == new_state ||
1486	    new_state == DAPLKA_EP_STATE_FREED) {
1487		return (B_TRUE);
1488	}
1489
1490	switch (old_state) {
1491	case DAPLKA_EP_STATE_CLOSED:
1492		/*
1493		 * this is the initial ep_state.
1494		 * a transition to CONNECTING or ACCEPTING may occur
1495		 * upon calling daplka_ep_connect or daplka_cr_accept,
1496		 * respectively.
1497		 */
1498		if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1499		    new_state == DAPLKA_EP_STATE_ACCEPTING) {
1500			valid = B_TRUE;
1501		}
1502		break;
1503	case DAPLKA_EP_STATE_CONNECTING:
1504		/*
1505		 * we transition to this state if daplka_ep_connect
1506		 * is successful. from this state, we can transition
1507		 * to CONNECTED if daplka_cm_rc_conn_est gets called;
1508		 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1509		 * daplka_cm_rc_event_failure gets called. If the
1510		 * client calls daplka_ep_disconnect, we transition
1511		 * to DISCONNECTING. If a timer was set at ep_connect
1512		 * time and if the timer expires prior to any of the
1513		 * CM callbacks, we transition to ABORTING and then
1514		 * to DISCONNECTED.
1515		 */
1516		if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1517		    new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1518		    new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1519		    new_state == DAPLKA_EP_STATE_ABORTING) {
1520			valid = B_TRUE;
1521		}
1522		break;
1523	case DAPLKA_EP_STATE_ACCEPTING:
1524		/*
1525		 * we transition to this state if daplka_cr_accept
1526		 * is successful. from this state, we can transition
1527		 * to CONNECTED if daplka_cm_service_conn_est gets called;
1528		 * or to DISCONNECTED if daplka_cm_service_conn_closed or
1529		 * daplka_cm_service_event_failure gets called. If the
1530		 * client calls daplka_ep_disconnect, we transition to
1531		 * DISCONNECTING.
1532		 */
1533		if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1534		    new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1535		    new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1536			valid = B_TRUE;
1537		}
1538		break;
1539	case DAPLKA_EP_STATE_CONNECTED:
1540		/*
1541		 * we transition to this state if a active or passive
1542		 * connection gets established. if the client calls
1543		 * daplka_ep_disconnect, we transition to the
1544		 * DISCONNECTING state. subsequent CM callbacks will
1545		 * cause ep_state to be set to DISCONNECTED. If the
1546		 * remote peer terminates the connection before we do,
1547		 * it is possible for us to transition directly from
1548		 * CONNECTED to DISCONNECTED.
1549		 */
1550		if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1551		    new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1552			valid = B_TRUE;
1553		}
1554		break;
1555	case DAPLKA_EP_STATE_DISCONNECTING:
1556		/*
1557		 * we transition to this state if the client calls
1558		 * daplka_ep_disconnect.
1559		 */
1560		if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1561			valid = B_TRUE;
1562		}
1563		break;
1564	case DAPLKA_EP_STATE_ABORTING:
1565		/*
1566		 * we transition to this state if the active side
1567		 * EP timer has expired. this is only a transient
1568		 * state that is set during timer processing. when
1569		 * timer processing completes, ep_state will become
1570		 * DISCONNECTED.
1571		 */
1572		if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1573			valid = B_TRUE;
1574		}
1575		break;
1576	case DAPLKA_EP_STATE_DISCONNECTED:
1577		/*
1578		 * we transition to this state if we get a closed
1579		 * or event_failure CM callback. an expired timer
1580		 * can also cause us to be in this state. this
1581		 * is the only state in which we permit the
1582		 * ep_reinit operation.
1583		 */
1584		if (new_state == DAPLKA_EP_STATE_CLOSED) {
1585			valid = B_TRUE;
1586		}
1587		break;
1588	default:
1589		break;
1590	}
1591
1592	if (!valid) {
1593		DERR("ep_transition: invalid state change %d -> %d\n",
1594		    old_state, new_state);
1595	}
1596	return (valid);
1597}
1598
1599/*
1600 * first check if the transition is valid. then set ep_state
1601 * to new_state and wake up all waiters.
1602 */
1603static void
1604daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1605	uint32_t new_state)
1606{
1607	boolean_t	valid;
1608
1609	ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1610
1611	valid = daplka_ep_transition_is_valid(old_state, new_state);
1612	mutex_enter(&ep_rp->ep_lock);
1613	if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1614		if (valid) {
1615			ep_rp->ep_state = new_state;
1616		} else {
1617			/*
1618			 * this case is impossible.
1619			 * we have a serious problem if we get here.
1620			 * instead of panicing, we reset the state to
1621			 * old_state. doing this would at least prevent
1622			 * threads from hanging due to ep_state being
1623			 * stuck in TRANSITIONING.
1624			 */
1625			ep_rp->ep_state = old_state;
1626			ASSERT(B_FALSE);
1627		}
1628	}
1629	cv_broadcast(&ep_rp->ep_cv);
1630	mutex_exit(&ep_rp->ep_lock);
1631}
1632
1633/*
1634 * modifies RC channel attributes.
1635 * currently, only the rdma_in and rdma_out attributes may
1636 * be modified. the channel must be in quiescent state when
1637 * this function is called.
1638 */
1639/* ARGSUSED */
1640static int
1641daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1642	cred_t *cred, int *rvalp)
1643{
1644	daplka_ep_resource_t		*ep_rp = NULL;
1645	ibt_cep_modify_flags_t		good_flags;
1646	ibt_rc_chan_modify_attr_t	rcm_attr;
1647	ibt_hca_attr_t			*hca_attrp;
1648	dapl_ep_modify_t		args;
1649	ibt_status_t			status;
1650	uint32_t			old_state, new_state;
1651	int				retval = 0;
1652
1653	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1654	    mode);
1655	if (retval != 0) {
1656		DERR("ep_modify: copyin error %d\n", retval);
1657		return (EFAULT);
1658	}
1659	ep_rp = (daplka_ep_resource_t *)
1660	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1661	if (ep_rp == NULL) {
1662		DERR("ep_modify: cannot find ep resource\n");
1663		return (EINVAL);
1664	}
1665	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1666	new_state = old_state = daplka_ep_get_state(ep_rp);
1667
1668	if (old_state != DAPLKA_EP_STATE_CLOSED &&
1669	    old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1670		DERR("ep_modify: invalid state %d\n", old_state);
1671		retval = EINVAL;
1672		goto cleanup;
1673	}
1674
1675	good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1676	if ((args.epm_flags & ~good_flags) != 0) {
1677		DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1678		retval = EINVAL;
1679		goto cleanup;
1680	}
1681
1682	hca_attrp = &ia_rp->ia_hca->hca_attr;
1683
1684	bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1685	if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1686		if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1687			DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1688			    args.epm_rdma_ra_out);
1689			retval = EINVAL;
1690			goto cleanup;
1691		}
1692		rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1693	}
1694	if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1695		if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1696			DERR("ep_modify: epm_rdma_ra_in %d\n",
1697			    args.epm_rdma_ra_in);
1698			retval = EINVAL;
1699			goto cleanup;
1700		}
1701		rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1702	}
1703	status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1704	    &rcm_attr, NULL);
1705	if (status != IBT_SUCCESS) {
1706		DERR("ep_modify: modify_rc_channel returned %d\n", status);
1707		*rvalp = (int)status;
1708		retval = 0;
1709		goto cleanup;
1710	}
1711
1712	/*
1713	 * ep_modify does not change ep_state
1714	 */
1715cleanup:;
1716	daplka_ep_set_state(ep_rp, old_state, new_state);
1717	DAPLKA_RS_UNREF(ep_rp);
1718	return (retval);
1719}
1720
1721/*
1722 * Frees a EP resource.
1723 * a EP may only be freed when it is in the CLOSED or
1724 * DISCONNECTED state.
1725 */
1726/* ARGSUSED */
1727static int
1728daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1729	cred_t *cred, int *rvalp)
1730{
1731	daplka_ep_resource_t	*ep_rp = NULL;
1732	dapl_ep_free_t		args;
1733	uint32_t		old_state, new_state;
1734	int			retval;
1735
1736	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1737	if (retval != 0) {
1738		DERR("ep_free: copyin error %d\n", retval);
1739		return (EFAULT);
1740	}
1741	ep_rp = (daplka_ep_resource_t *)
1742	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1743	if (ep_rp == NULL) {
1744		DERR("ep_free: cannot find ep resource\n");
1745		return (EINVAL);
1746	}
1747	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1748	new_state = old_state = daplka_ep_get_state(ep_rp);
1749
1750	/*
1751	 * ep cannot be freed if it is in an invalid state.
1752	 */
1753	if (old_state != DAPLKA_EP_STATE_CLOSED &&
1754	    old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1755		DERR("ep_free: invalid state %d\n", old_state);
1756		retval = EINVAL;
1757		goto cleanup;
1758	}
1759	ep_rp = NULL;
1760	retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1761	    args.epf_hkey, (void **)&ep_rp);
1762	if (retval != 0 || ep_rp == NULL) {
1763		/*
1764		 * this is only possible if we have two threads
1765		 * calling ep_free in parallel.
1766		 */
1767		DERR("ep_free: cannot find ep resource\n");
1768		goto cleanup;
1769	}
1770	/* there should not be any outstanding timers */
1771	ASSERT(ep_rp->ep_timer_hkey == 0);
1772
1773	new_state = DAPLKA_EP_STATE_FREED;
1774	daplka_ep_set_state(ep_rp, old_state, new_state);
1775
1776	/* remove reference obtained by lookup */
1777	DAPLKA_RS_UNREF(ep_rp);
1778
1779	/* UNREF calls the actual free function when refcnt is zero */
1780	DAPLKA_RS_UNREF(ep_rp);
1781	return (0);
1782
1783cleanup:;
1784	daplka_ep_set_state(ep_rp, old_state, new_state);
1785
1786	/* remove reference obtained by lookup */
1787	DAPLKA_RS_UNREF(ep_rp);
1788	return (retval);
1789}
1790
1791/*
1792 * The following routines supports the timeout feature of ep_connect.
1793 * Refer to the description of ep_connect for details.
1794 */
1795
1796/*
1797 * this is the timer processing thread.
1798 */
1799static void
1800daplka_timer_thread(void *arg)
1801{
1802	daplka_timer_info_t	*timerp = (daplka_timer_info_t *)arg;
1803	daplka_ep_resource_t	*ep_rp;
1804	daplka_evd_event_t	*disc_ev = NULL;
1805	ibt_status_t		status;
1806	int			old_state, new_state;
1807
1808	ep_rp = timerp->ti_ep_res;
1809	ASSERT(ep_rp != NULL);
1810	ASSERT(timerp->ti_tmo_id != 0);
1811	timerp->ti_tmo_id = 0;
1812
1813	new_state = old_state = daplka_ep_get_state(ep_rp);
1814	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1815		/* unblock hash_ep_free */
1816		mutex_enter(&ep_rp->ep_lock);
1817		ASSERT(ep_rp->ep_timer_hkey != 0);
1818		ep_rp->ep_timer_hkey = 0;
1819		cv_broadcast(&ep_rp->ep_cv);
1820		mutex_exit(&ep_rp->ep_lock);
1821
1822		/* reset state to original state */
1823		daplka_ep_set_state(ep_rp, old_state, new_state);
1824
1825		/* this function will also unref ep_rp */
1826		daplka_timer_info_free(timerp);
1827		return;
1828	}
1829
1830	ASSERT(ep_rp->ep_timer_hkey != 0);
1831	ep_rp->ep_timer_hkey = 0;
1832
1833	/*
1834	 * we cannot keep ep_state in TRANSITIONING if we call
1835	 * ibt_close_rc_channel in blocking mode. this would cause
1836	 * a deadlock because the cm callbacks will be blocked and
1837	 * will not be able to wake us up.
1838	 */
1839	new_state = DAPLKA_EP_STATE_ABORTING;
1840	daplka_ep_set_state(ep_rp, old_state, new_state);
1841
1842	/*
1843	 * when we return from close_rc_channel, all callbacks should have
1844	 * completed. we can also be certain that these callbacks did not
1845	 * enqueue any events to conn_evd.
1846	 */
1847	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1848	    NULL, 0, NULL, NULL, NULL);
1849	if (status != IBT_SUCCESS) {
1850		DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1851		    status);
1852	}
1853	old_state = daplka_ep_get_state(ep_rp);
1854
1855	/*
1856	 * this is the only thread that can transition ep_state out
1857	 * of ABORTING. all other ep operations would fail when
1858	 * ep_state is in ABORTING.
1859	 */
1860	ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1861
1862	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1863	ASSERT(disc_ev != NULL);
1864
1865	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1866	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1867	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1868	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1869	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1870	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1871
1872	D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1873	    disc_ev, ep_rp->ep_conn_evd);
1874
1875	new_state = DAPLKA_EP_STATE_DISCONNECTED;
1876	daplka_ep_set_state(ep_rp, old_state, new_state);
1877
1878	daplka_evd_wakeup(ep_rp->ep_conn_evd,
1879	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1880
1881	/* this function will also unref ep_rp */
1882	daplka_timer_info_free(timerp);
1883}
1884
1885/*
1886 * dispatches a thread to continue with timer processing.
1887 */
1888static void
1889daplka_timer_dispatch(void *arg)
1890{
1891	/*
1892	 * keep rescheduling this function until
1893	 * taskq_dispatch succeeds.
1894	 */
1895	if (taskq_dispatch(daplka_taskq,
1896	    daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1897		DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1898		(void) timeout(daplka_timer_dispatch, arg, 10);
1899	}
1900}
1901
1902/*
1903 * this function is called by the kernel's callout thread.
1904 * we first attempt to remove the timer object from the
1905 * global timer table. if it is found, we dispatch a thread
1906 * to continue processing the timer object. if it is not
1907 * found, that means the timer has been cancelled by someone
1908 * else.
1909 */
1910static void
1911daplka_timer_handler(void *arg)
1912{
1913	uint64_t		timer_hkey = (uintptr_t)arg;
1914	daplka_timer_info_t	*timerp = NULL;
1915
1916	D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1917
1918	(void) daplka_hash_remove(&daplka_timer_info_htbl,
1919	    timer_hkey, (void **)&timerp);
1920	if (timerp == NULL) {
1921		D2("timer_handler: timer already cancelled\n");
1922		return;
1923	}
1924	daplka_timer_dispatch((void *)timerp);
1925}
1926
1927/*
1928 * allocates a timer_info object.
1929 * a reference to a EP is held by this object. this ensures
1930 * that the EP stays valid when a timer is outstanding.
1931 */
1932static daplka_timer_info_t *
1933daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1934{
1935	daplka_timer_info_t	*timerp;
1936
1937	timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1938	if (timerp == NULL) {
1939		DERR("timer_info_alloc: cannot allocate timer info\n");
1940		return (NULL);
1941	}
1942	timerp->ti_ep_res = ep_rp;
1943	timerp->ti_tmo_id = 0;
1944
1945	return (timerp);
1946}
1947
1948/*
1949 * Frees the timer_info object.
1950 * we release the EP reference before freeing the object.
1951 */
1952static void
1953daplka_timer_info_free(daplka_timer_info_t *timerp)
1954{
1955	ASSERT(timerp->ti_ep_res != NULL);
1956	DAPLKA_RS_UNREF(timerp->ti_ep_res);
1957	timerp->ti_ep_res = NULL;
1958	ASSERT(timerp->ti_tmo_id == 0);
1959	kmem_free(timerp, sizeof (*timerp));
1960}
1961
1962/*
1963 * cancels the timer set by ep_connect.
1964 * returns -1 if timer handling is in progress
1965 * and 0 otherwise.
1966 */
1967static int
1968daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1969{
1970	/*
1971	 * this function can only be called when ep_state
1972	 * is frozen.
1973	 */
1974	ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1975	if (ep_rp->ep_timer_hkey != 0) {
1976		daplka_timer_info_t	*timerp = NULL;
1977
1978		(void) daplka_hash_remove(&daplka_timer_info_htbl,
1979		    ep_rp->ep_timer_hkey, (void **)&timerp);
1980		if (timerp == NULL) {
1981			/*
1982			 * this is possible if the timer_handler has
1983			 * removed the timerp but the taskq thread has
1984			 * not transitioned the ep_state to DISCONNECTED.
1985			 * we need to reset the ep_state to allow the
1986			 * taskq thread to continue with its work. the
1987			 * taskq thread will set the ep_timer_hkey to 0
1988			 * so we don't have to do it here.
1989			 */
1990			DERR("cancel_timer: timer is being processed\n");
1991			return (-1);
1992		}
1993		/*
1994		 * we got the timer object. if the handler fires at
1995		 * this point, it will not be able to find the object
1996		 * and will return immediately. normally, ti_tmo_id gets
1997		 * cleared when the handler fires.
1998		 */
1999		ASSERT(timerp->ti_tmo_id != 0);
2000
2001		/*
2002		 * note that untimeout can possibly call the handler.
2003		 * we are safe because the handler will be a no-op.
2004		 */
2005		(void) untimeout(timerp->ti_tmo_id);
2006		timerp->ti_tmo_id = 0;
2007		daplka_timer_info_free(timerp);
2008		ep_rp->ep_timer_hkey = 0;
2009	}
2010	return (0);
2011}
2012
2013/*
2014 * this function is called by daplka_hash_destroy for
2015 * freeing timer_info objects
2016 */
2017static void
2018daplka_hash_timer_free(void *obj)
2019{
2020	daplka_timer_info_free((daplka_timer_info_t *)obj);
2021}
2022
2023/* ARGSUSED */
2024static uint16_t
2025daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2026{
2027	uint8_t *bp;
2028	int i;
2029	uint16_t cksum = 0;
2030
2031	bp = (uint8_t *)dp;
2032	for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2033		cksum += bp[i];
2034	}
2035	return (cksum);
2036}
2037
2038/*
2039 * ep_connect is called by the client to initiate a connection to a
2040 * remote service point. It is a non-blocking call. If a non-zero
2041 * timeout is specified by the client, a timer will be set just before
2042 * returning from ep_connect. Upon a successful return from ep_connect,
2043 * the client will call evd_wait to wait for the connection to complete.
2044 * If the connection is rejected or has failed due to an error, the
2045 * client will be notified with an event containing the appropriate error
2046 * code. If the connection is accepted, the client will be notified with
2047 * the CONN_ESTABLISHED event. If the timer expires before either of the
2048 * above events (error or established), a TIMED_OUT event will be delivered
2049 * to the client.
2050 *
2051 * the complicated part of the timer logic is the handling of race
2052 * conditions with CM callbacks. we need to ensure that either the CM or
2053 * the timer thread gets to deliver an event, but not both. when the
2054 * CM callback is about to deliver an event, it always tries to cancel
2055 * the outstanding timer. if cancel_timer indicates a that the timer is
2056 * already being processed, the CM callback will simply return without
2057 * delivering an event. when the timer thread executes, it tries to check
2058 * if the EP is still in CONNECTING state (timers only work on the active
2059 * side). if the EP is not in this state, the timer thread will return
2060 * without delivering an event.
2061 */
2062/* ARGSUSED */
2063static int
2064daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2065	cred_t *cred, int *rvalp)
2066{
2067	daplka_ep_resource_t	*ep_rp = NULL;
2068	dapl_ep_connect_t	args;
2069	daplka_timer_info_t	*timerp = NULL;
2070	uint32_t		old_state, new_state;
2071	boolean_t		timer_inserted = B_FALSE;
2072	uint64_t		timer_hkey = 0;
2073	ibt_path_info_t		path_info;
2074	ibt_path_attr_t		path_attr;
2075	ibt_hca_attr_t		*hca_attrp;
2076	ibt_chan_open_args_t	chan_args;
2077	ibt_status_t		status = IBT_SUCCESS;
2078	uint8_t			num_paths;
2079	void			*priv_data;
2080	DAPL_PRIVATE		*dp;
2081	int			retval = 0;
2082	ib_gid_t		*sgid;
2083	ib_gid_t		*dgid;
2084	uint64_t		dgid_ored;
2085	ibt_ar_t		ar_query_s;
2086	ibt_ar_t		ar_result_s;
2087	ibt_path_flags_t	pathflags;
2088
2089	D3("ep_connect: enter\n");
2090	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2091	    mode);
2092	if (retval != 0) {
2093		DERR("ep_connect: copyin error %d\n", retval);
2094		return (EFAULT);
2095	}
2096	ep_rp = (daplka_ep_resource_t *)
2097	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2098	if (ep_rp == NULL) {
2099		DERR("ep_connect: cannot find ep resource\n");
2100		return (EINVAL);
2101	}
2102	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2103
2104	new_state = old_state = daplka_ep_get_state(ep_rp);
2105	if (old_state != DAPLKA_EP_STATE_CLOSED) {
2106		DERR("ep_connect: invalid state %d\n", old_state);
2107		retval = EINVAL;
2108		goto cleanup;
2109	}
2110	if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2111		DERR("ep_connect: private data len (%d) exceeded "
2112		    "max size %d\n", args.epc_priv_sz,
2113		    DAPL_MAX_PRIVATE_DATA_SIZE);
2114		retval = EINVAL;
2115		goto cleanup;
2116	}
2117
2118	/*
2119	 * check for remote ipaddress to dgid resolution needs ATS
2120	 */
2121	dgid = &args.epc_dgid;
2122	dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2123#if defined(DAPLKA_DEBUG_FORCE_ATS)
2124	dgid_ored = 0ULL;
2125#endif /* DAPLKA_DEBUG_FORCE_ATS */
2126	/* check for unidentified dgid */
2127	if (dgid_ored == 0ULL) {
2128		/*
2129		 * setup for ibt_query_ar()
2130		 */
2131		sgid = &ia_rp->ia_hca_sgid;
2132		ar_query_s.ar_gid.gid_guid = 0ULL;
2133		ar_query_s.ar_gid.gid_prefix = 0ULL;
2134		ar_query_s.ar_pkey = 0;
2135		bcopy(args.epc_raddr_sadata.iad_sadata,
2136		    ar_query_s.ar_data, DAPL_ATS_NBYTES);
2137#define	UR(b) ar_query_s.ar_data[(b)]
2138		D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2139		    UR(8), UR(9), UR(10), UR(11));
2140		D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2141		    UR(12), UR(13), UR(14), UR(15));
2142		status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2143		if (status != IBT_SUCCESS) {
2144			DERR("ep_connect: ibt_query_ar returned %d\n", status);
2145			*rvalp = (int)status;
2146			retval = 0;
2147			goto cleanup;
2148		}
2149		/*
2150		 * dgid identified from SA record
2151		 */
2152		dgid = &ar_result_s.ar_gid;
2153		D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2154		    (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2155	}
2156
2157	bzero(&path_info, sizeof (ibt_path_info_t));
2158	bzero(&path_attr, sizeof (ibt_path_attr_t));
2159	bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2160
2161	path_attr.pa_dgids = dgid;
2162	path_attr.pa_num_dgids = 1;
2163	/*
2164	 * don't set sid in path_attr saves 1 SA query
2165	 * Also makes server side not to write the service record
2166	 */
2167	path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2168	path_attr.pa_pkey = ia_rp->ia_port_pkey;
2169
2170	/* save the connection ep  - struct copy */
2171	ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2172	ep_rp->ep_dgid = *dgid;
2173
2174	num_paths = 0;
2175	pathflags = IBT_PATH_PKEY;
2176	/* enable APM on remote port but not on loopback case */
2177	if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2178	    (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2179		pathflags |= IBT_PATH_APM;
2180	}
2181	status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2182	    pathflags, &path_attr, 1, &path_info, &num_paths);
2183
2184	if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2185		DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2186		    status, num_paths);
2187		*rvalp = (int)status;
2188		retval = 0;
2189		goto cleanup;
2190	}
2191	/* fill in the sid directly to path_info */
2192	path_info.pi_sid = args.epc_sid;
2193	hca_attrp = &ia_rp->ia_hca->hca_attr;
2194
2195	/* fill in open channel args */
2196	chan_args.oc_path = &path_info;
2197	chan_args.oc_cm_handler = daplka_cm_rc_handler;
2198	chan_args.oc_cm_clnt_private = (void *)ep_rp;
2199	chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2200	chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2201	chan_args.oc_path_retry_cnt = 7;	/* 3-bit field */
2202	chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2203
2204	ASSERT(args.epc_priv_sz > 0);
2205	priv_data = (void *)args.epc_priv;
2206
2207	chan_args.oc_priv_data_len = args.epc_priv_sz;
2208	chan_args.oc_priv_data = priv_data;
2209
2210	/*
2211	 * calculate checksum value of hello message and
2212	 * put hello message in networking byte order
2213	 */
2214	dp = (DAPL_PRIVATE *)priv_data;
2215	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp))
2216	dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2217	dp->hello_msg.hi_checksum = 0;
2218	dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2219	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp))
2220
2221	if (args.epc_timeout > 0) {
2222		/*
2223		 * increment refcnt before passing reference to
2224		 * timer_info_alloc.
2225		 */
2226		DAPLKA_RS_REF(ep_rp);
2227		timerp = daplka_timer_info_alloc(ep_rp);
2228		if (timerp == NULL) {
2229			DERR("ep_connect: cannot allocate timer\n");
2230			/*
2231			 * we need to remove the reference if
2232			 * allocation failed.
2233			 */
2234			DAPLKA_RS_UNREF(ep_rp);
2235			retval = ENOMEM;
2236			goto cleanup;
2237		}
2238		/*
2239		 * We generate our own hkeys so that timer_hkey can fit
2240		 * into a pointer and passed as an arg to timeout()
2241		 */
2242		timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2243		retval = daplka_hash_insert(&daplka_timer_info_htbl,
2244		    &timer_hkey, (void *)timerp);
2245		if (retval != 0) {
2246			DERR("ep_connect: cannot insert timer info\n");
2247			goto cleanup;
2248		}
2249		ASSERT(ep_rp->ep_timer_hkey == 0);
2250		ep_rp->ep_timer_hkey = timer_hkey;
2251		timer_inserted = B_TRUE;
2252		D2("ep_connect: timer_hkey = 0x%llx\n",
2253		    (longlong_t)timer_hkey);
2254	}
2255	status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2256	    IBT_NONBLOCKING, &chan_args, NULL);
2257
2258	if (status != IBT_SUCCESS) {
2259		DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2260		*rvalp = (int)status;
2261		retval = 0;
2262		goto cleanup;
2263	}
2264	/*
2265	 * if a cm callback gets called at this point, it'll have to wait until
2266	 * ep_state becomes connecting (or some other state if another thread
2267	 * manages to get ahead of the callback). this guarantees that the
2268	 * callback will not touch the timer until it gets set.
2269	 */
2270	if (timerp != NULL) {
2271		clock_t		tmo;
2272
2273		tmo = drv_usectohz((clock_t)args.epc_timeout);
2274		/*
2275		 * We generate our own 32 bit timer_hkey so that it can fit
2276		 * into a pointer
2277		 */
2278		ASSERT(timer_hkey != 0);
2279		timerp->ti_tmo_id = timeout(daplka_timer_handler,
2280		    (void *)(uintptr_t)timer_hkey, tmo);
2281	}
2282	new_state = DAPLKA_EP_STATE_CONNECTING;
2283
2284cleanup:;
2285	if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2286		/*
2287		 * if ibt_open_rc_channel failed, the timerp must still
2288		 * be in daplka_timer_info_htbl because neither the cm
2289		 * callback nor the timer_handler will be called.
2290		 */
2291		if (timer_inserted) {
2292			daplka_timer_info_t	*new_timerp = NULL;
2293
2294			ASSERT(timer_hkey != 0);
2295			(void) daplka_hash_remove(&daplka_timer_info_htbl,
2296			    timer_hkey, (void **)&new_timerp);
2297			ASSERT(new_timerp == timerp);
2298			ep_rp->ep_timer_hkey = 0;
2299		}
2300		daplka_timer_info_free(timerp);
2301	}
2302	daplka_ep_set_state(ep_rp, old_state, new_state);
2303	DAPLKA_RS_UNREF(ep_rp);
2304	D3("ep_connect: exit\n");
2305	return (retval);
2306}
2307
2308/*
2309 * ep_disconnect closes a connection with a remote peer.
2310 * if a connection has not been established, ep_disconnect
2311 * will instead flush all recv bufs posted to this channel.
2312 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2313 * entry to ep_disconnect, the EP state will transition to
2314 * DISCONNECTING upon exit. the CM callbacks triggered by
2315 * ibt_close_rc_channel will cause EP state to become
2316 * DISCONNECTED. This function is a no-op if EP state is
2317 * DISCONNECTED.
2318 */
2319/* ARGSUSED */
2320static int
2321daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2322	cred_t *cred, int *rvalp)
2323{
2324	daplka_ep_resource_t	*ep_rp = NULL;
2325	dapl_ep_disconnect_t	args;
2326	ibt_status_t		status;
2327	uint32_t		old_state, new_state;
2328	int			retval = 0;
2329
2330	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2331	    mode);
2332	if (retval != 0) {
2333		DERR("ep_disconnect: copyin error %d\n", retval);
2334		return (EFAULT);
2335	}
2336	ep_rp = (daplka_ep_resource_t *)
2337	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2338	if (ep_rp == NULL) {
2339		DERR("ep_disconnect: cannot find ep resource\n");
2340		return (EINVAL);
2341	}
2342	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2343
2344	new_state = old_state = daplka_ep_get_state(ep_rp);
2345	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2346	    old_state != DAPLKA_EP_STATE_CONNECTING &&
2347	    old_state != DAPLKA_EP_STATE_ACCEPTING &&
2348	    old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2349	    old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2350	    old_state != DAPLKA_EP_STATE_CLOSED) {
2351		DERR("ep_disconnect: invalid state %d\n", old_state);
2352		retval = EINVAL;
2353		goto cleanup;
2354	}
2355
2356	if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2357	    (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2358		D2("ep_disconnect: ep already disconnected\n");
2359		retval = 0;
2360		/* we leave the state as DISCONNECTED */
2361		goto cleanup;
2362	}
2363	if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2364	    old_state == DAPLKA_EP_STATE_ACCEPTING) {
2365		D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2366	}
2367
2368	/*
2369	 * according to the udapl spec, ep_disconnect should
2370	 * flush the channel if the channel is not CONNECTED.
2371	 */
2372	if (old_state == DAPLKA_EP_STATE_CLOSED) {
2373		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2374		if (status != IBT_SUCCESS) {
2375			DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2376			    status);
2377			*rvalp = (int)status;
2378		}
2379		retval = 0;
2380		/* we leave the state as CLOSED */
2381		goto cleanup;
2382	}
2383
2384	new_state = DAPLKA_EP_STATE_DISCONNECTING;
2385	daplka_ep_set_state(ep_rp, old_state, new_state);
2386	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2387	    NULL, 0, NULL, NULL, NULL);
2388
2389	if (status == IBT_SUCCESS) {
2390		DAPLKA_RS_UNREF(ep_rp);
2391		return (retval);
2392	} else {
2393		DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2394		    status);
2395		*rvalp = (int)status;
2396		retval = 0;
2397		new_state = old_state;
2398	}
2399
2400cleanup:;
2401	daplka_ep_set_state(ep_rp, old_state, new_state);
2402	DAPLKA_RS_UNREF(ep_rp);
2403	return (retval);
2404}
2405
2406/*
2407 * this function resets the EP to a usable state (ie. from
2408 * DISCONNECTED to CLOSED). this function is best implemented using
2409 * the ibt_recycle_channel interface. until that is available, we will
2410 * instead clone and tear down the existing channel and replace the
2411 * existing channel with the cloned one.
2412 */
2413/* ARGSUSED */
2414static int
2415daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2416	cred_t *cred, int *rvalp)
2417{
2418	daplka_ep_resource_t		*ep_rp = NULL;
2419	dapl_ep_reinit_t		args;
2420	ibt_status_t			status;
2421	uint32_t			old_state, new_state;
2422	int				retval = 0;
2423
2424	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2425	    mode);
2426	if (retval != 0) {
2427		DERR("reinit: copyin error %d\n", retval);
2428		return (EFAULT);
2429	}
2430	ep_rp = (daplka_ep_resource_t *)
2431	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2432	if (ep_rp == NULL) {
2433		DERR("reinit: cannot find ep resource\n");
2434		return (EINVAL);
2435	}
2436	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2437	new_state = old_state = daplka_ep_get_state(ep_rp);
2438	if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2439	    (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2440		DERR("reinit: invalid state %d\n", old_state);
2441		retval = EINVAL;
2442		goto cleanup;
2443	}
2444
2445	status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2446	    IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2447	    ia_rp->ia_port_num, NULL, NULL);
2448	if (status != IBT_SUCCESS) {
2449		DERR("reinit: unable to clone channel\n");
2450		*rvalp = (int)status;
2451		retval = 0;
2452		goto cleanup;
2453	}
2454	new_state = DAPLKA_EP_STATE_CLOSED;
2455
2456cleanup:;
2457	daplka_ep_set_state(ep_rp, old_state, new_state);
2458	DAPLKA_RS_UNREF(ep_rp);
2459	return (retval);
2460}
2461
2462/*
2463 * destroys a EP resource.
2464 * called when refcnt drops to zero.
2465 */
2466static int
2467daplka_ep_destroy(daplka_resource_t *gen_rp)
2468{
2469	daplka_ep_resource_t	*ep_rp = (daplka_ep_resource_t *)gen_rp;
2470	ibt_status_t		status;
2471
2472	ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2473	ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2474
2475	/*
2476	 * by the time we get here, we can be sure that
2477	 * there is no outstanding timer.
2478	 */
2479	ASSERT(ep_rp->ep_timer_hkey == 0);
2480
2481	D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2482	    ep_rp, DAPLKA_RS_RNUM(ep_rp));
2483	/*
2484	 * free rc channel
2485	 */
2486	if (ep_rp->ep_chan_hdl != NULL) {
2487		mutex_enter(&daplka_dev->daplka_mutex);
2488		ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2489		mutex_exit(&daplka_dev->daplka_mutex);
2490		status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2491		if (status != IBT_SUCCESS) {
2492			DERR("ep_free: ibt_free_channel returned %d\n",
2493			    status);
2494		}
2495		ep_rp->ep_chan_hdl = NULL;
2496		D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2497	}
2498	/*
2499	 * release all references
2500	 */
2501	if (ep_rp->ep_snd_evd != NULL) {
2502		DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2503		ep_rp->ep_snd_evd = NULL;
2504	}
2505	if (ep_rp->ep_rcv_evd != NULL) {
2506		DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2507		ep_rp->ep_rcv_evd = NULL;
2508	}
2509	if (ep_rp->ep_conn_evd != NULL) {
2510		DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2511		ep_rp->ep_conn_evd = NULL;
2512	}
2513	if (ep_rp->ep_srq_res != NULL) {
2514		DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2515		ep_rp->ep_srq_res = NULL;
2516	}
2517	if (ep_rp->ep_pd_res != NULL) {
2518		DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2519		ep_rp->ep_pd_res = NULL;
2520	}
2521	cv_destroy(&ep_rp->ep_cv);
2522	mutex_destroy(&ep_rp->ep_lock);
2523
2524	DAPLKA_RS_FINI(ep_rp);
2525	kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2526	D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2527	return (0);
2528}
2529
2530/*
2531 * this function is called by daplka_hash_destroy for
2532 * freeing EP resource objects
2533 */
2534static void
2535daplka_hash_ep_free(void *obj)
2536{
2537	daplka_ep_resource_t	*ep_rp = (daplka_ep_resource_t *)obj;
2538	ibt_status_t		status;
2539	uint32_t		old_state, new_state;
2540	int			retval;
2541
2542	old_state = daplka_ep_get_state(ep_rp);
2543	retval = daplka_cancel_timer(ep_rp);
2544	new_state = DAPLKA_EP_STATE_FREED;
2545	daplka_ep_set_state(ep_rp, old_state, new_state);
2546
2547	if (retval != 0) {
2548		D2("hash_ep_free: ep_rp 0x%p "
2549		    "timer is still being processed\n", ep_rp);
2550		mutex_enter(&ep_rp->ep_lock);
2551		if (ep_rp->ep_timer_hkey != 0) {
2552			D2("hash_ep_free: ep_rp 0x%p "
2553			    "waiting for timer_hkey to be 0\n", ep_rp);
2554			cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2555		}
2556		mutex_exit(&ep_rp->ep_lock);
2557	}
2558
2559	/* call ibt_close_rc_channel regardless of what state we are in */
2560	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2561	    NULL, 0, NULL, NULL, NULL);
2562	if (status != IBT_SUCCESS) {
2563		if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2564		    old_state == DAPLKA_EP_STATE_CONNECTING ||
2565		    old_state == DAPLKA_EP_STATE_ACCEPTING) {
2566			DERR("hash_ep_free: ep_rp 0x%p state %d "
2567			    "unexpected error %d from close_rc_channel\n",
2568			    ep_rp, old_state, status);
2569		}
2570		D2("hash_ep_free: close_rc_channel, status %d\n", status);
2571	}
2572
2573	DAPLKA_RS_UNREF(ep_rp);
2574}
2575
2576/*
2577 * creates a EVD resource.
2578 * a EVD is used by the client to wait for events from one
2579 * or more sources.
2580 */
2581/* ARGSUSED */
2582static int
2583daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2584	cred_t *cred, int *rvalp)
2585{
2586	daplka_evd_resource_t		*evd_rp = NULL;
2587	daplka_async_evd_hkey_t		*async_evd;
2588	ibt_hca_attr_t			*hca_attrp;
2589	ibt_cq_attr_t			cq_attr;
2590	dapl_evd_create_t		args;
2591	uint64_t			evd_hkey = 0;
2592	boolean_t			inserted = B_FALSE;
2593	int				retval = 0;
2594	ibt_status_t			status;
2595
2596	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2597	    mode);
2598	if (retval != 0) {
2599		DERR("evd_create: copyin error %d", retval);
2600		return (EFAULT);
2601	}
2602	if ((args.evd_flags &
2603	    ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2604		DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2605		return (EINVAL);
2606	}
2607
2608	evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2609	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
2610	DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2611	    DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2612
2613	mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2614	cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2615	evd_rp->evd_hca = ia_rp->ia_hca;
2616	evd_rp->evd_flags = args.evd_flags;
2617	evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2618	evd_rp->evd_cookie = args.evd_cookie;
2619	evd_rp->evd_cno_res = NULL;
2620	evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2621	evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2622	evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2623
2624	/*
2625	 * if the client specified a non-zero cno_hkey, we
2626	 * lookup the cno and save the reference for later use.
2627	 */
2628	if (args.evd_cno_hkey > 0) {
2629		daplka_cno_resource_t *cno_rp;
2630
2631		cno_rp = (daplka_cno_resource_t *)
2632		    daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2633		    args.evd_cno_hkey);
2634		if (cno_rp == NULL) {
2635			DERR("evd_create: cannot find cno resource\n");
2636			goto cleanup;
2637		}
2638		ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2639		evd_rp->evd_cno_res = cno_rp;
2640	}
2641	hca_attrp = &ia_rp->ia_hca->hca_attr;
2642	if ((evd_rp->evd_flags &
2643	    (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2644		if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2645			DERR("evd_create: invalid cq size %d",
2646			    args.evd_cq_size);
2647			retval = EINVAL;
2648			goto cleanup;
2649		}
2650		cq_attr.cq_size = args.evd_cq_size;
2651		cq_attr.cq_sched = NULL;
2652		cq_attr.cq_flags = IBT_CQ_USER_MAP;
2653
2654		status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2655		    &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2656
2657		if (status != IBT_SUCCESS) {
2658			DERR("evd_create: ibt_alloc_cq returned %d", status);
2659			*rvalp = (int)status;
2660			retval = 0;
2661			goto cleanup;
2662		}
2663
2664		/*
2665		 * store evd ptr with cq_hdl
2666		 * mutex is only needed for race of "destroy" and "async"
2667		 */
2668		mutex_enter(&daplka_dev->daplka_mutex);
2669		ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2670		mutex_exit(&daplka_dev->daplka_mutex);
2671
2672		/* Get HCA-specific data_out info */
2673		status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2674		    IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2675		    &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2676
2677		if (status != IBT_SUCCESS) {
2678			DERR("evd_create: ibt_ci_data_out error(%d)", status);
2679			*rvalp = (int)status;
2680			retval = 0;
2681			goto cleanup;
2682		}
2683
2684		args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2685
2686		ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
2687		    (void *)evd_rp);
2688	}
2689
2690	retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2691	    &evd_hkey, (void *)evd_rp);
2692	if (retval != 0) {
2693		DERR("evd_ceate: cannot insert evd %d\n", retval);
2694		goto cleanup;
2695	}
2696	inserted = B_TRUE;
2697	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp))
2698
2699	/*
2700	 * If this evd handles async events need to add to the IA resource
2701	 * async evd list
2702	 */
2703	if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2704		async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2705		    daplka_km_flags);
2706		/* add the evd to the head of the list */
2707		mutex_enter(&ia_rp->ia_lock);
2708		async_evd->aeh_evd_hkey = evd_hkey;
2709		async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2710		ia_rp->ia_async_evd_hkeys = async_evd;
2711		mutex_exit(&ia_rp->ia_lock);
2712	}
2713
2714	args.evd_hkey = evd_hkey;
2715	retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2716	if (retval != 0) {
2717		DERR("evd_create: copyout error %d\n", retval);
2718		retval = EFAULT;
2719		goto cleanup;
2720	}
2721	return (0);
2722
2723cleanup:;
2724	if (inserted) {
2725		daplka_evd_resource_t *free_rp = NULL;
2726
2727		(void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2728		    (void **)&free_rp);
2729		if (free_rp != evd_rp) {
2730			DERR("evd_create: cannot remove evd\n");
2731			/*
2732			 * we can only get here if another thread
2733			 * has completed the cleanup in evd_free
2734			 */
2735			return (retval);
2736		}
2737	}
2738	DAPLKA_RS_UNREF(evd_rp);
2739	return (retval);
2740}
2741
2742/*
2743 * resizes CQ and returns new mapping info to library.
2744 */
2745/* ARGSUSED */
2746static int
2747daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2748	cred_t *cred, int *rvalp)
2749{
2750	daplka_evd_resource_t		*evd_rp = NULL;
2751	ibt_hca_attr_t			*hca_attrp;
2752	dapl_cq_resize_t		args;
2753	ibt_status_t			status;
2754	int				retval = 0;
2755
2756	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2757	    mode);
2758	if (retval != 0) {
2759		DERR("cq_resize: copyin error %d\n", retval);
2760		return (EFAULT);
2761	}
2762
2763	/* get evd resource */
2764	evd_rp = (daplka_evd_resource_t *)
2765	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2766	if (evd_rp == NULL) {
2767		DERR("cq_resize: cannot find evd resource\n");
2768		return (EINVAL);
2769	}
2770	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2771
2772	hca_attrp = &ia_rp->ia_hca->hca_attr;
2773	if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2774		DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2775		retval = EINVAL;
2776		goto cleanup;
2777	}
2778	/*
2779	 * If ibt_resize_cq fails that it is primarily due to resource
2780	 * shortage. Per IB spec resize will never loose events and
2781	 * a resize error leaves the CQ intact. Therefore even if the
2782	 * resize request fails we proceed and get the mapping data
2783	 * from the CQ so that the library can mmap it.
2784	 */
2785	status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2786	    &args.cqr_cq_real_size);
2787	if (status != IBT_SUCCESS) {
2788		/* we return the size of the old CQ if resize fails */
2789		args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2790		ASSERT(status != IBT_CQ_HDL_INVALID);
2791		DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2792	} else {
2793		mutex_enter(&evd_rp->evd_lock);
2794		evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2795		mutex_exit(&evd_rp->evd_lock);
2796	}
2797
2798	D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2799	    DAPLKA_RS_RNUM(evd_rp),
2800	    args.cqr_cq_new_size, args.cqr_cq_real_size);
2801
2802	/* Get HCA-specific data_out info */
2803	status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2804	    IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2805	    &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2806	if (status != IBT_SUCCESS) {
2807		DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2808		/* return ibt_ci_data_out status */
2809		*rvalp = (int)status;
2810		retval = 0;
2811		goto cleanup;
2812	}
2813
2814	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2815	    mode);
2816	if (retval != 0) {
2817		DERR("cq_resize: copyout error %d\n", retval);
2818		retval = EFAULT;
2819		goto cleanup;
2820	}
2821
2822cleanup:;
2823	if (evd_rp != NULL) {
2824		DAPLKA_RS_UNREF(evd_rp);
2825	}
2826	return (retval);
2827}
2828
2829/*
2830 * Routine to copyin the event poll message so that 32 bit libraries
2831 * can be safely supported
2832 */
2833int
2834daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2835{
2836	int	retval;
2837
2838#ifdef _MULTI_DATAMODEL
2839	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2840		dapl_event_poll32_t	args32;
2841
2842		retval = ddi_copyin((void *)inarg, &args32,
2843		    sizeof (dapl_event_poll32_t), mode);
2844		if (retval != 0) {
2845			DERR("event_poll_copyin: 32bit error %d\n", retval);
2846			return (EFAULT);
2847		}
2848
2849		outarg->evp_evd_hkey = args32.evp_evd_hkey;
2850		outarg->evp_threshold = args32.evp_threshold;
2851		outarg->evp_timeout = args32.evp_timeout;
2852		outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2853		outarg->evp_num_ev = args32.evp_num_ev;
2854		outarg->evp_num_polled = args32.evp_num_polled;
2855		return (0);
2856	}
2857#endif
2858	retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2859	    mode);
2860	if (retval != 0) {
2861		DERR("event_poll: copyin error %d\n", retval);
2862		return (EFAULT);
2863	}
2864
2865	return (0);
2866}
2867
2868/*
2869 * Routine to copyout the event poll message so that 32 bit libraries
2870 * can be safely supported
2871 */
2872int
2873daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2874{
2875	int	retval;
2876
2877#ifdef _MULTI_DATAMODEL
2878	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2879		dapl_event_poll32_t	args32;
2880
2881		args32.evp_evd_hkey = inarg->evp_evd_hkey;
2882		args32.evp_threshold = inarg->evp_threshold;
2883		args32.evp_timeout = inarg->evp_timeout;
2884		args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2885		args32.evp_num_ev = inarg->evp_num_ev;
2886		args32.evp_num_polled = inarg->evp_num_polled;
2887
2888		retval = ddi_copyout((void *)&args32, (void *)outarg,
2889		    sizeof (dapl_event_poll32_t), mode);
2890		if (retval != 0) {
2891			DERR("event_poll_copyout: 32bit error %d\n", retval);
2892			return (EFAULT);
2893		}
2894		return (0);
2895	}
2896#endif
2897	retval = ddi_copyout((void *)inarg, (void *)outarg,
2898	    sizeof (dapl_event_poll_t), mode);
2899	if (retval != 0) {
2900		DERR("event_poll_copyout: error %d\n", retval);
2901		return (EFAULT);
2902	}
2903
2904	return (0);
2905}
2906
2907/*
2908 * fucntion to handle CM REQ RCV private data from Solaris or third parties
2909 */
2910/* ARGSUSED */
2911static void
2912daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2913	dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2914{
2915	DAPL_PRIVATE	*dp;
2916	ib_gid_t	*lgid;
2917	ibt_ar_t	ar_query_s;
2918	ibt_ar_t	ar_result_s;
2919	DAPL_HELLO_MSG	*hip;
2920	uint32_t	ipaddr_ord;
2921	ibt_priv_data_len_t clen;
2922	ibt_priv_data_len_t olen;
2923	ibt_status_t	status;
2924	uint16_t	cksum;
2925
2926	/*
2927	 * get private data and len
2928	 */
2929	dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2930	clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2931#if defined(DAPLKA_DEBUG_FORCE_ATS)
2932	/* skip the DAPL_PRIVATE chekcsum check */
2933#else
2934	/* for remote connects */
2935	/* look up hello message in the CM private data area */
2936	if (clen >= sizeof (DAPL_PRIVATE) &&
2937	    (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2938		cksum = ntohs(dp->hello_msg.hi_checksum);
2939		dp->hello_msg.hi_checksum = 0;
2940		if (daplka_hellomsg_cksum(dp) == cksum) {
2941			D2("daplka_crevent_privdata_post: Solaris msg\n");
2942			evd_rp->ibe_ce.ibce_priv_data_size = clen;
2943			dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2944			dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2945			bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2946			kmem_free(dp, clen);
2947			return;
2948		}
2949	}
2950#endif /* DAPLKA_DEBUG_FORCE_ATS */
2951
2952	D2("daplka_crevent_privdata_post: 3rd party msg\n");
2953	/* transpose CM private data into hello message */
2954	if (clen) {
2955		olen = clen;
2956		if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2957			clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2958		}
2959		bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2960		kmem_free(dp, olen);
2961	} else {
2962		bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2963		    DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2964	}
2965	evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2966	dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2967	/*
2968	 * fill in hello message
2969	 */
2970	hip = &dp->hello_msg;
2971	hip->hi_checksum = DAPL_CHECKSUM;
2972	hip->hi_clen = clen;
2973	hip->hi_mid = 0;
2974	hip->hi_vers = DAPL_HELLO_MSG_VERS;
2975	hip->hi_port = 0;
2976
2977	/* assign sgid and dgid */
2978	lgid = &ia_rp->ia_hca_sgid;
2979	ar_query_s.ar_gid.gid_prefix =
2980	    cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2981	ar_query_s.ar_gid.gid_guid =
2982	    cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2983	ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2984	bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2985
2986	/* reverse ip address lookup through ATS */
2987	status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2988	if (status == IBT_SUCCESS) {
2989		bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2990		/* determine the address families */
2991		ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2992		    hip->hi_v4pad[2];
2993		if (ipaddr_ord == 0) {
2994			hip->hi_ipv = AF_INET;
2995		} else {
2996			hip->hi_ipv = AF_INET6;
2997		}
2998
2999#define	UL(b) ar_result_s.ar_data[(b)]
3000		D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
3001		    hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
3002		D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
3003		    UL(12), UL(13), UL(14), UL(15));
3004	} else {
3005		/* non-conformed third parties */
3006		hip->hi_ipv = AF_UNSPEC;
3007		bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
3008	}
3009}
3010
3011/*
3012 * this function is called by evd_wait and evd_dequeue to wait for
3013 * connection events and CQ notifications. typically this function
3014 * is called when the userland CQ is empty and the client has
3015 * specified a non-zero timeout to evd_wait. if the client is
3016 * interested in CQ events, the CQ must be armed in userland prior
3017 * to calling this function.
3018 */
3019/* ARGSUSED */
3020static int
3021daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3022	cred_t *cred, int *rvalp)
3023{
3024	daplka_evd_resource_t	*evd_rp = NULL;
3025	dapl_event_poll_t	args;
3026	daplka_evd_event_t	*head;
3027	dapl_ib_event_t		evp_arr[NUM_EVENTS_PER_POLL];
3028	dapl_ib_event_t		*evp;
3029	dapl_ib_event_t		*evp_start;
3030	size_t			evp_size;
3031	int			threshold;
3032	clock_t			timeout;
3033	uint32_t		max_events;
3034	uint32_t		num_events = 0;
3035	void			*pd;
3036	ibt_priv_data_len_t	n;
3037	int			retval = 0;
3038	int			rc;
3039
3040	retval = daplka_event_poll_copyin(arg, &args, mode);
3041	if (retval != 0) {
3042		return (EFAULT);
3043	}
3044
3045	if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3046		DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3047		    args.evp_num_ev);
3048		return (EINVAL);
3049	}
3050	/*
3051	 * Note: dequeue requests have a threshold = 0, timeout = 0
3052	 */
3053	threshold = args.evp_threshold;
3054
3055	max_events = args.evp_num_ev;
3056	/* ensure library is passing sensible values */
3057	if (max_events < threshold) {
3058		DERR("event_poll: max_events(%d) < threshold(%d)\n",
3059		    max_events, threshold);
3060		return (EINVAL);
3061	}
3062	/* Do a sanity check to avoid excessive memory allocation */
3063	if (max_events > DAPL_EVD_MAX_EVENTS) {
3064		DERR("event_poll: max_events(%d) > %d",
3065		    max_events, DAPL_EVD_MAX_EVENTS);
3066		return (EINVAL);
3067	}
3068	D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3069	    threshold, (longlong_t)args.evp_timeout, max_events);
3070
3071	/* get evd resource */
3072	evd_rp = (daplka_evd_resource_t *)
3073	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3074	if (evd_rp == NULL) {
3075		DERR("event_poll: cannot find evd resource\n");
3076		return (EINVAL);
3077	}
3078	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3079
3080	/*
3081	 * Use event array on the stack if possible
3082	 */
3083	if (max_events <= NUM_EVENTS_PER_POLL) {
3084		evp_start = evp = &evp_arr[0];
3085	} else {
3086		evp_size = max_events * sizeof (dapl_ib_event_t);
3087		evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3088		if (evp == NULL) {
3089			DERR("event_poll: kmem_zalloc failed, evp_size %d",
3090			    evp_size);
3091			retval = ENOMEM;
3092			goto cleanup;
3093		}
3094	}
3095
3096	/*
3097	 * The Event poll algorithm is as follows -
3098	 * The library passes a buffer big enough to hold "max_events"
3099	 * events. max_events is >= threshold. If at any stage we get
3100	 * max_events no. of events we bail. The events are polled in
3101	 * the following order -
3102	 * 1) Check for CR events in the evd_cr_events list
3103	 * 2) Check for Connection events in the evd_connection_events list
3104	 *
3105	 * If after the above 2 steps we don't have enough(>= threshold) events
3106	 * we block for CQ notification and sleep. Upon being woken up we start
3107	 * at step 1 again.
3108	 */
3109
3110	/*
3111	 * Note: this could be 0 or INFINITE or anyother value in microsec
3112	 */
3113	if (args.evp_timeout > 0) {
3114		if (args.evp_timeout >= LONG_MAX) {
3115			timeout = LONG_MAX;
3116		} else {
3117			clock_t	curr_time = ddi_get_lbolt();
3118
3119			timeout = curr_time +
3120			    drv_usectohz((clock_t)args.evp_timeout);
3121			/*
3122			 * use the max value if we wrapped around
3123			 */
3124			if (timeout <= curr_time) {
3125				timeout = LONG_MAX;
3126			}
3127		}
3128	} else {
3129		timeout = 0;
3130	}
3131
3132	mutex_enter(&evd_rp->evd_lock);
3133	for (;;) {
3134		/*
3135		 * If this evd is waiting for CM events check that now.
3136		 */
3137		if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3138		    (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3139			/* dequeue events from evd_cr_events list */
3140			while (head = daplka_evd_event_dequeue(
3141			    &evd_rp->evd_cr_events)) {
3142				/*
3143				 * populate the evp array
3144				 */
3145				evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3146				evp[num_events].ibe_ce.ibce_event =
3147				    head->ee_cmev.ec_cm_ev_type;
3148				evp[num_events].ibe_ce.ibce_cookie =
3149				    (uint64_t)head->ee_cmev.ec_cm_cookie;
3150				evp[num_events].ibe_ce.ibce_psep_cookie =
3151				    head->ee_cmev.ec_cm_psep_cookie;
3152				daplka_crevent_privdata_post(ia_rp,
3153				    &evp[num_events], head);
3154				kmem_free(head, sizeof (daplka_evd_event_t));
3155
3156				if (++num_events == max_events) {
3157					mutex_exit(&evd_rp->evd_lock);
3158					goto maxevent_reached;
3159				}
3160			}
3161		}
3162
3163		if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3164		    (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3165			/* dequeue events from evd_connection_events list */
3166			while ((head = daplka_evd_event_dequeue
3167			    (&evd_rp->evd_conn_events))) {
3168				/*
3169				 * populate the evp array -
3170				 *
3171				 */
3172				if (head->ee_cmev.ec_cm_is_passive) {
3173					evp[num_events].ibe_ev_family =
3174					    DAPL_PASSIVE_CONNECTION_EVENTS;
3175				} else {
3176					evp[num_events].ibe_ev_family =
3177					    DAPL_ACTIVE_CONNECTION_EVENTS;
3178				}
3179				evp[num_events].ibe_ce.ibce_event =
3180				    head->ee_cmev.ec_cm_ev_type;
3181				evp[num_events].ibe_ce.ibce_cookie =
3182				    (uint64_t)head->ee_cmev.ec_cm_cookie;
3183				evp[num_events].ibe_ce.ibce_psep_cookie =
3184				    head->ee_cmev.ec_cm_psep_cookie;
3185
3186				if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3187					pd = head->ee_cmev.ec_cm_ev_priv_data;
3188					n = head->
3189					    ee_cmev.ec_cm_ev_priv_data_len;
3190					bcopy(pd, (void *)evp[num_events].
3191					    ibe_ce.ibce_priv_data_ptr, n);
3192					evp[num_events].ibe_ce.
3193					    ibce_priv_data_size = n;
3194					kmem_free(pd, n);
3195				}
3196
3197				kmem_free(head, sizeof (daplka_evd_event_t));
3198
3199				if (++num_events == max_events) {
3200					mutex_exit(&evd_rp->evd_lock);
3201					goto maxevent_reached;
3202				}
3203			}
3204		}
3205
3206		if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3207		    (evd_rp->evd_async_events.eel_num_elements > 0)) {
3208			/* dequeue events from evd_async_events list */
3209			while (head = daplka_evd_event_dequeue(
3210			    &evd_rp->evd_async_events)) {
3211				/*
3212				 * populate the evp array
3213				 */
3214				evp[num_events].ibe_ev_family =
3215				    DAPL_ASYNC_EVENTS;
3216				evp[num_events].ibe_async.ibae_type =
3217				    head->ee_aev.ibae_type;
3218				evp[num_events].ibe_async.ibae_hca_guid =
3219				    head->ee_aev.ibae_hca_guid;
3220				evp[num_events].ibe_async.ibae_cookie =
3221				    head->ee_aev.ibae_cookie;
3222				evp[num_events].ibe_async.ibae_port =
3223				    head->ee_aev.ibae_port;
3224
3225				kmem_free(head, sizeof (daplka_evd_event_t));
3226
3227				if (++num_events == max_events) {
3228					break;
3229				}
3230			}
3231		}
3232
3233		/*
3234		 * We have sufficient events for this call so no need to wait
3235		 */
3236		if ((threshold > 0) && (num_events >= threshold)) {
3237			mutex_exit(&evd_rp->evd_lock);
3238			break;
3239		}
3240
3241		evd_rp->evd_waiters++;
3242		/*
3243		 * There are no new events and a timeout was specified.
3244		 * Note: for CQ events threshold is 0 but timeout is
3245		 * not necessarily 0.
3246		 */
3247		while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3248		    timeout) {
3249			retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3250			    &evd_rp->evd_lock, timeout);
3251			if (retval == 0) {
3252				retval = EINTR;
3253				break;
3254			} else if (retval == -1) {
3255				retval = ETIME;
3256				break;
3257			} else {
3258				retval = 0;
3259				continue;
3260			}
3261		}
3262		evd_rp->evd_waiters--;
3263		if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3264			/*
3265			 * If we got woken up by the CQ handler due to events
3266			 * in the CQ. Need to go to userland to check for
3267			 * CQ events. Or if we were woken up due to S/W events
3268			 */
3269
3270			/* check for userland events only */
3271			if (!(evd_rp->evd_newevents &
3272			    ~DAPLKA_EVD_ULAND_EVENTS)) {
3273				evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3274				mutex_exit(&evd_rp->evd_lock);
3275				break;
3276			}
3277			/*
3278			 * Clear newevents since we are going to loopback
3279			 * back and check for both CM and CQ events
3280			 */
3281			evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3282		} else { /* error */
3283			mutex_exit(&evd_rp->evd_lock);
3284			break;
3285		}
3286	}
3287
3288maxevent_reached:
3289	args.evp_num_polled = num_events;
3290
3291	/*
3292	 * At this point retval might have a value that we want to return
3293	 * back to the user. So the copyouts shouldn't tamper retval.
3294	 */
3295	if (args.evp_num_polled > 0) { /* copyout the events */
3296		rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3297		    sizeof (dapl_ib_event_t), mode);
3298		if (rc != 0) { /* XXX: we are losing events here */
3299			DERR("event_poll: event array copyout error %d", rc);
3300			retval = EFAULT;
3301			goto cleanup;
3302		}
3303		rc = daplka_event_poll_copyout(&args, arg, mode);
3304		if (rc != 0) {  /* XXX: we are losing events here */
3305			DERR("event_poll: copyout error %d\n", rc);
3306			retval = EFAULT;
3307			goto cleanup;
3308		}
3309	}
3310
3311cleanup:;
3312	if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3313		kmem_free(evp_start, evp_size);
3314	}
3315
3316	if (evd_rp != NULL) {
3317		DAPLKA_RS_UNREF(evd_rp);
3318	}
3319	return (retval);
3320}
3321
3322/* ARGSUSED */
3323static int
3324daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3325	cred_t *cred, int *rvalp)
3326{
3327	dapl_event_wakeup_t	args;
3328	daplka_evd_resource_t	*evd_rp;
3329	int			retval;
3330
3331	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3332	    mode);
3333	if (retval != 0) {
3334		DERR("event_wakeup: copyin error %d\n", retval);
3335		return (EFAULT);
3336	}
3337
3338	/* get evd resource */
3339	evd_rp = (daplka_evd_resource_t *)
3340	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3341	if (evd_rp == NULL) {
3342		DERR("event_wakeup: cannot find evd resource\n");
3343		return (EINVAL);
3344	}
3345	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3346
3347	daplka_evd_wakeup(evd_rp, NULL, NULL);
3348
3349	DAPLKA_RS_UNREF(evd_rp);
3350
3351	return (retval);
3352}
3353
3354/* ARGSUSED */
3355static int
3356daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3357	cred_t *cred, int *rvalp)
3358{
3359	dapl_evd_modify_cno_t	args;
3360	daplka_evd_resource_t	*evd_rp;
3361	daplka_cno_resource_t	*cno_rp;
3362	daplka_cno_resource_t	*old_cno_rp;
3363	int			retval;
3364
3365	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3366	    mode);
3367	if (retval != 0) {
3368		DERR("evd_modify_cno: copyin error %d\n", retval);
3369		return (EFAULT);
3370	}
3371
3372	/* get evd resource */
3373	evd_rp = (daplka_evd_resource_t *)
3374	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3375	if (evd_rp == NULL) {
3376		DERR("evd_modify_cno: cannot find evd resource\n");
3377		retval = EINVAL;
3378		goto cleanup;
3379	}
3380	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3381
3382	if (args.evmc_cno_hkey > 0) {
3383		/* get cno resource corresponding to the new CNO */
3384		cno_rp = (daplka_cno_resource_t *)
3385		    daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3386		    args.evmc_cno_hkey);
3387		if (cno_rp == NULL) {
3388			DERR("evd_modify_cno: cannot find CNO resource\n");
3389			retval = EINVAL;
3390			goto cleanup;
3391		}
3392		ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3393	} else {
3394		cno_rp = NULL;
3395	}
3396
3397	mutex_enter(&evd_rp->evd_lock);
3398	old_cno_rp = evd_rp->evd_cno_res;
3399	evd_rp->evd_cno_res = cno_rp;
3400	mutex_exit(&evd_rp->evd_lock);
3401
3402	/*
3403	 * drop the refcnt on the old CNO, the refcnt on the new CNO is
3404	 * retained since the evd holds a reference to it.
3405	 */
3406	if (old_cno_rp) {
3407		DAPLKA_RS_UNREF(old_cno_rp);
3408	}
3409
3410cleanup:
3411	if (evd_rp) {
3412		DAPLKA_RS_UNREF(evd_rp);
3413	}
3414
3415	return (retval);
3416}
3417
3418/*
3419 * Frees the EVD and associated resources.
3420 * If there are other threads still using this EVD, the destruction
3421 * will defer until the EVD's refcnt drops to zero.
3422 */
3423/* ARGSUSED */
3424static int
3425daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3426	cred_t *cred, int *rvalp)
3427{
3428	daplka_evd_resource_t	*evd_rp = NULL;
3429	daplka_async_evd_hkey_t	*curr;
3430	daplka_async_evd_hkey_t	*prev;
3431	dapl_evd_free_t		args;
3432	int			retval = 0;
3433
3434	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3435	if (retval != 0) {
3436		DERR("evd_free: copyin error %d\n", retval);
3437		return (EFAULT);
3438	}
3439	retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3440	    (void **)&evd_rp);
3441	if (retval != 0 || evd_rp == NULL) {
3442		DERR("evd_free: cannot find evd resource\n");
3443		return (EINVAL);
3444	}
3445	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3446
3447	/* If this is an async evd remove it from the IA's async evd list */
3448	if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3449		mutex_enter(&ia_rp->ia_lock);
3450		curr = prev = ia_rp->ia_async_evd_hkeys;
3451		while (curr != NULL) {
3452			if (curr->aeh_evd_hkey == args.evf_hkey) {
3453				/* unlink curr from the list */
3454				if (curr == prev) {
3455					/*
3456					 * if first element in the list update
3457					 * the list head
3458					 */
3459					ia_rp->ia_async_evd_hkeys =
3460					    curr->aeh_next;
3461				} else {
3462					prev->aeh_next = curr->aeh_next;
3463				}
3464				break;
3465			}
3466			prev = curr;
3467			curr = curr->aeh_next;
3468		}
3469		mutex_exit(&ia_rp->ia_lock);
3470		/* free the curr entry */
3471		kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3472	}
3473
3474	/* UNREF calls the actual free function when refcnt is zero */
3475	DAPLKA_RS_UNREF(evd_rp);
3476	return (0);
3477}
3478
3479/*
3480 * destroys EVD resource.
3481 * called when refcnt drops to zero.
3482 */
3483static int
3484daplka_evd_destroy(daplka_resource_t *gen_rp)
3485{
3486	daplka_evd_resource_t	*evd_rp = (daplka_evd_resource_t *)gen_rp;
3487	ibt_status_t		status;
3488	daplka_evd_event_t	*evt;
3489	ibt_priv_data_len_t	len;
3490
3491	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
3492	D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3493	    evd_rp, DAPLKA_RS_RNUM(evd_rp));
3494	/*
3495	 * free CQ
3496	 */
3497	if (evd_rp->evd_cq_hdl) {
3498		ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3499		mutex_enter(&daplka_dev->daplka_mutex);
3500		ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3501		mutex_exit(&daplka_dev->daplka_mutex);
3502
3503		status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3504		if (status != IBT_SUCCESS) {
3505			DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3506		}
3507		evd_rp->evd_cq_hdl = NULL;
3508		D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3509	}
3510
3511	/*
3512	 * release reference on CNO
3513	 */
3514	if (evd_rp->evd_cno_res != NULL) {
3515		mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3516		if (evd_rp->evd_cno_res->cno_evd_cookie ==
3517		    evd_rp->evd_cookie) {
3518			evd_rp->evd_cno_res->cno_evd_cookie = 0;
3519		}
3520		mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3521		DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3522		evd_rp->evd_cno_res = NULL;
3523	}
3524
3525	/*
3526	 * discard all remaining events
3527	 */
3528	mutex_enter(&evd_rp->evd_lock);
3529	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3530		D2("evd_destroy: discarding CR event: %d\n",
3531		    evt->ee_cmev.ec_cm_ev_type);
3532		len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3533		if (len > 0) {
3534			kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3535			evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3536			evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3537		}
3538		kmem_free(evt, sizeof (*evt));
3539	}
3540	ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3541
3542	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3543		D2("evd_destroy: discarding CONN event: %d\n",
3544		    evt->ee_cmev.ec_cm_ev_type);
3545		len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3546		if (len > 0) {
3547			kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3548			evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3549			evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3550		}
3551		kmem_free(evt, sizeof (*evt));
3552	}
3553	ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3554
3555	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3556		DERR("evd_destroy: discarding ASYNC event: %d\n",
3557		    evt->ee_aev.ibae_type);
3558		kmem_free(evt, sizeof (*evt));
3559	}
3560	ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3561	mutex_exit(&evd_rp->evd_lock);
3562
3563	mutex_destroy(&evd_rp->evd_lock);
3564	DAPLKA_RS_FINI(evd_rp);
3565	kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3566	D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3567	return (0);
3568}
3569
3570static void
3571daplka_hash_evd_free(void *obj)
3572{
3573	daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3574
3575	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3576	DAPLKA_RS_UNREF(evd_rp);
3577}
3578
3579/*
3580 * this handler fires when new completions arrive.
3581 */
3582/* ARGSUSED */
3583static void
3584daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3585{
3586	D3("cq_handler: fired setting evd_newevents\n");
3587	daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3588}
3589
3590/*
3591 * this routine wakes up a client from evd_wait. if evtq and evt
3592 * are non-null, the event evt will be enqueued prior to waking
3593 * up the client. if the evd is associated with a CNO and if there
3594 * are no waiters on the evd, the CNO will be notified.
3595 */
3596static void
3597daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3598	daplka_evd_event_t *evt)
3599{
3600	uint32_t waiters = 0;
3601
3602	mutex_enter(&evd_rp->evd_lock);
3603	if (evtq != NULL && evt != NULL) {
3604		ASSERT(evtq == &evd_rp->evd_cr_events ||
3605		    evtq == &evd_rp->evd_conn_events ||
3606		    evtq == &evd_rp->evd_async_events);
3607		daplka_evd_event_enqueue(evtq, evt);
3608		ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3609		    (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3610		evd_rp->evd_newevents |= evtq->eel_event_type;
3611	} else {
3612		evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3613	}
3614	waiters = evd_rp->evd_waiters;
3615	cv_broadcast(&evd_rp->evd_cv);
3616	mutex_exit(&evd_rp->evd_lock);
3617
3618	/*
3619	 * only wakeup the CNO if there are no waiters on this evd.
3620	 */
3621	if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3622		mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3623		evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3624		cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3625		mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3626	}
3627}
3628
3629/*
3630 * daplka_evd_event_enqueue adds elem to the end of the event list
3631 * The caller is expected to acquire appropriate locks before
3632 * calling enqueue
3633 */
3634static void
3635daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3636    daplka_evd_event_t *elem)
3637{
3638	if (evlist->eel_tail) {
3639		evlist->eel_tail->ee_next = elem;
3640		evlist->eel_tail = elem;
3641	} else {
3642		/* list is empty */
3643		ASSERT(evlist->eel_head == NULL);
3644		evlist->eel_head = elem;
3645		evlist->eel_tail = elem;
3646	}
3647	evlist->eel_num_elements++;
3648}
3649
3650/*
3651 * daplka_evd_event_dequeue removes and returns the first element of event
3652 * list. NULL is returned if the list is empty. The caller is expected to
3653 * acquire appropriate locks before calling enqueue.
3654 */
3655static daplka_evd_event_t *
3656daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3657{
3658	daplka_evd_event_t *head;
3659
3660	head = evlist->eel_head;
3661	if (head == NULL) {
3662		return (NULL);
3663	}
3664
3665	evlist->eel_head = head->ee_next;
3666	evlist->eel_num_elements--;
3667	/* if it was the last element update the tail pointer too */
3668	if (evlist->eel_head == NULL) {
3669		ASSERT(evlist->eel_num_elements == 0);
3670		evlist->eel_tail = NULL;
3671	}
3672	return (head);
3673}
3674
3675/*
3676 * A CNO allows the client to wait for notifications from multiple EVDs.
3677 * To use a CNO, the client needs to follow the procedure below:
3678 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3679 * 2. create one or more EVDs using the returned cno_hkey.
3680 * 3. call cno_wait. when one of the associated EVDs get notified, the
3681 *    CNO will also get notified. cno_wait will then return with a
3682 *    evd_cookie identifying the EVD that triggered the event.
3683 *
3684 * A note about cno_wait:
3685 * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3686 *  example, suppose multiple EVDs triggered a CNO before the client calls
3687 *  cno_wait; when the client calls cno_wait, it will return with the
3688 *  evd_cookie that identifies the *last* EVD that triggered the CNO. It
3689 *  is the responsibility of the client, upon returning from cno_wait, to
3690 *  check on all EVDs that can potentially trigger the CNO. the returned
3691 *  evd_cookie is only meant to be a hint. there is no guarantee that the
3692 *  EVD identified by the evd_cookie still contains an event or still
3693 *  exists by the time cno_wait returns.
3694 */
3695
3696/*
3697 * allocates a CNO.
3698 * the returned cno_hkey may subsequently be used in evd_create.
3699 */
3700/* ARGSUSED */
3701static int
3702daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3703	cred_t *cred, int *rvalp)
3704{
3705	dapl_cno_alloc_t	args;
3706	daplka_cno_resource_t	*cno_rp = NULL;
3707	uint64_t		cno_hkey = 0;
3708	boolean_t		inserted = B_FALSE;
3709	int			retval = 0;
3710
3711	cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3712	if (cno_rp == NULL) {
3713		DERR("cno_alloc: cannot allocate cno resource\n");
3714		return (ENOMEM);
3715	}
3716	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp))
3717	DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3718	    DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3719
3720	mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3721	cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3722	cno_rp->cno_evd_cookie = 0;
3723
3724	/* insert into cno hash table */
3725	retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3726	    &cno_hkey, (void *)cno_rp);
3727	if (retval != 0) {
3728		DERR("cno_alloc: cannot insert cno resource\n");
3729		goto cleanup;
3730	}
3731	inserted = B_TRUE;
3732	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp))
3733
3734	/* return hkey to library */
3735	args.cno_hkey = cno_hkey;
3736
3737	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3738	    mode);
3739	if (retval != 0) {
3740		DERR("cno_alloc: copyout error %d\n", retval);
3741		retval = EFAULT;
3742		goto cleanup;
3743	}
3744	return (0);
3745
3746cleanup:;
3747	if (inserted) {
3748		daplka_cno_resource_t *free_rp = NULL;
3749
3750		(void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3751		    (void **)&free_rp);
3752		if (free_rp != cno_rp) {
3753			DERR("cno_alloc: cannot remove cno\n");
3754			/*
3755			 * we can only get here if another thread
3756			 * has completed the cleanup in cno_free
3757			 */
3758			return (retval);
3759		}
3760	}
3761	DAPLKA_RS_UNREF(cno_rp);
3762	return (retval);
3763}
3764
3765/*
3766 * destroys a CNO.
3767 * this gets called when a CNO resource's refcnt drops to zero.
3768 */
3769static int
3770daplka_cno_destroy(daplka_resource_t *gen_rp)
3771{
3772	daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3773
3774	ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3775	D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3776	    cno_rp, DAPLKA_RS_RNUM(cno_rp));
3777
3778	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3779	cv_destroy(&cno_rp->cno_cv);
3780	mutex_destroy(&cno_rp->cno_lock);
3781
3782	DAPLKA_RS_FINI(cno_rp);
3783	kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3784	D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3785	return (0);
3786}
3787
3788static void
3789daplka_hash_cno_free(void *obj)
3790{
3791	daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3792
3793	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3794	DAPLKA_RS_UNREF(cno_rp);
3795}
3796
3797/*
3798 * removes the CNO from the cno hash table and frees the CNO
3799 * if there are no references to it. if there are references to
3800 * it, the CNO will be destroyed when the last of the references
3801 * is released. once the CNO is removed from the cno hash table,
3802 * the client will no longer be able to call cno_wait on the CNO.
3803 */
3804/* ARGSUSED */
3805static int
3806daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3807	cred_t *cred, int *rvalp)
3808{
3809	daplka_cno_resource_t	*cno_rp = NULL;
3810	dapl_cno_free_t		args;
3811	int			retval = 0;
3812
3813	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3814	if (retval != 0) {
3815		DERR("cno_free: copyin error %d\n", retval);
3816		return (EINVAL);
3817	}
3818
3819	retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3820	    args.cnf_hkey, (void **)&cno_rp);
3821	if (retval != 0 || cno_rp == NULL) {
3822		DERR("cno_free: cannot find cno resource\n");
3823		return (EINVAL);
3824	}
3825	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3826
3827	/* UNREF calls the actual free function when refcnt is zero */
3828	DAPLKA_RS_UNREF(cno_rp);
3829	return (0);
3830}
3831
3832/*
3833 * wait for a notification from one of the associated EVDs.
3834 */
3835/* ARGSUSED */
3836static int
3837daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3838	cred_t *cred, int *rvalp)
3839{
3840	daplka_cno_resource_t	*cno_rp = NULL;
3841	dapl_cno_wait_t		args;
3842	int			retval = 0;
3843	uint64_t		evd_cookie = 0;
3844	clock_t			timeout, curr_time;
3845
3846	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3847	if (retval != 0) {
3848		DERR("cno_wait: copyin error %d\n", retval);
3849		return (EINVAL);
3850	}
3851	/* get cno resource */
3852	cno_rp = (daplka_cno_resource_t *)
3853	    daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3854	if (cno_rp == NULL) {
3855		DERR("cno_wait: cannot find cno resource\n");
3856		return (EINVAL);
3857	}
3858	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3859
3860	curr_time = ddi_get_lbolt();
3861	timeout = curr_time + drv_usectohz(args.cnw_timeout);
3862
3863	/*
3864	 * use the max value if we wrapped around
3865	 */
3866	if (args.cnw_timeout > 0 && timeout <= curr_time) {
3867		/*
3868		 * clock_t (size long) changes between 32 and 64-bit kernels
3869		 */
3870		timeout = LONG_MAX >> 4;
3871	}
3872	mutex_enter(&cno_rp->cno_lock);
3873	while (cno_rp->cno_evd_cookie == 0) {
3874		int rval = 0;
3875
3876		rval = cv_timedwait_sig(&cno_rp->cno_cv,
3877		    &cno_rp->cno_lock, timeout);
3878		if (rval == 0) {
3879			DERR("cno_wait: interrupted\n");
3880			mutex_exit(&cno_rp->cno_lock);
3881			retval = EINTR;
3882			goto cleanup;
3883		} else if (rval == -1) {
3884			DERR("cno_wait: timed out\n");
3885			mutex_exit(&cno_rp->cno_lock);
3886			retval = ETIME;
3887			goto cleanup;
3888		}
3889	}
3890	evd_cookie = cno_rp->cno_evd_cookie;
3891	cno_rp->cno_evd_cookie = 0;
3892	mutex_exit(&cno_rp->cno_lock);
3893
3894	ASSERT(evd_cookie != 0);
3895	D2("cno_wait: returning evd_cookie 0x%p\n",
3896	    (void *)(uintptr_t)evd_cookie);
3897	args.cnw_evd_cookie = evd_cookie;
3898	retval = ddi_copyout((void *)&args, (void *)arg,
3899	    sizeof (dapl_cno_wait_t), mode);
3900	if (retval != 0) {
3901		DERR("cno_wait: copyout error %d\n", retval);
3902		retval = EFAULT;
3903		goto cleanup;
3904	}
3905
3906cleanup:;
3907	if (cno_rp != NULL) {
3908		DAPLKA_RS_UNREF(cno_rp);
3909	}
3910	return (retval);
3911}
3912
3913/*
3914 * this function is called by the client when it decides to
3915 * accept a connection request. a connection request is generated
3916 * when the active side generates REQ MAD to a service point on
3917 * the destination node. this causes the CM service handler
3918 * (daplka_cm_service_req) on the passive side to be callee. This
3919 * handler will then enqueue this connection request to the backlog
3920 * array of the service point. A connection event containing the
3921 * backlog array index and connection request private data is passed
3922 * to the client's service point EVD (sp_evd_res). once the event
3923 * is passed up to the userland, the client may examine the request
3924 * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3925 */
3926/* ARGSUSED */
3927static int
3928daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3929	cred_t *cred, int *rvalp)
3930{
3931	daplka_ep_resource_t		*ep_rp = NULL;
3932	daplka_sp_resource_t		*sp_rp = NULL;
3933	dapl_cr_accept_t		args;
3934	daplka_sp_conn_pend_t		*conn;
3935	ibt_cm_proceed_reply_t		proc_reply;
3936	ibt_hca_attr_t			*hca_attrp;
3937	ibt_status_t			status;
3938	uint16_t			bkl_index;
3939	uint32_t			old_state, new_state;
3940	int				retval = 0;
3941	void				*priv_data = NULL, *sid;
3942
3943	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3944	    mode);
3945	if (retval != 0) {
3946		DERR("cr_accept: copyin error %d\n", retval);
3947		return (EFAULT);
3948	}
3949	if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3950		DERR("cr_accept: private data len (%d) exceeded "
3951		    "max size %d\n", args.cra_priv_sz,
3952		    DAPL_MAX_PRIVATE_DATA_SIZE);
3953		return (EINVAL);
3954	}
3955	priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3956
3957	D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3958	    args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3959
3960	/* get sp resource */
3961	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3962	    args.cra_sp_hkey);
3963	if (sp_rp == NULL) {
3964		DERR("cr_accept: cannot find sp resource\n");
3965		return (EINVAL);
3966	}
3967	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3968
3969	/* get ep resource */
3970	ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3971	    args.cra_ep_hkey);
3972	if (ep_rp == NULL) {
3973		DERR("cr_accept: cannot find ep resource\n");
3974		retval = EINVAL;
3975		goto cleanup;
3976	}
3977	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3978
3979	/*
3980	 * accept is only allowed if ep_state is CLOSED.
3981	 * note that after this point, the ep_state is frozen
3982	 * (i.e. TRANSITIONING) until we transition ep_state
3983	 * to ACCEPTING or back to CLOSED if we get an error.
3984	 */
3985	new_state = old_state = daplka_ep_get_state(ep_rp);
3986	if (old_state != DAPLKA_EP_STATE_CLOSED) {
3987		DERR("cr_accept: invalid ep state %d\n", old_state);
3988		retval = EINVAL;
3989		goto cleanup;
3990	}
3991
3992	mutex_enter(&sp_rp->sp_lock);
3993	bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3994	/*
3995	 * make sure the backlog index is not bogus.
3996	 */
3997	if (bkl_index >= sp_rp->sp_backlog_size) {
3998		DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3999		    (longlong_t)args.cra_bkl_cookie, bkl_index);
4000		mutex_exit(&sp_rp->sp_lock);
4001		retval = EINVAL;
4002		goto cleanup;
4003	}
4004	/*
4005	 * make sure the backlog index indeed refers
4006	 * to a pending connection.
4007	 */
4008	conn = &sp_rp->sp_backlog[bkl_index];
4009	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4010		DERR("cr_accept: invalid conn state %d\n",
4011		    conn->spcp_state);
4012		mutex_exit(&sp_rp->sp_lock);
4013		retval = EINVAL;
4014		goto cleanup;
4015	}
4016	if (conn->spcp_sid == NULL) {
4017		DERR("cr_accept: sid == NULL\n");
4018		mutex_exit(&sp_rp->sp_lock);
4019		retval = EINVAL;
4020		goto cleanup;
4021	}
4022	if (ep_rp->ep_chan_hdl == NULL) {
4023		/*
4024		 * a ep_rp with a NULL chan_hdl is impossible.
4025		 */
4026		DERR("cr_accept: ep_chan_hdl == NULL\n");
4027		mutex_exit(&sp_rp->sp_lock);
4028		ASSERT(B_FALSE);
4029		retval = EINVAL;
4030		goto cleanup;
4031	}
4032	hca_attrp = &ia_rp->ia_hca->hca_attr;
4033	proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4034	proc_reply.rep.cm_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
4035	proc_reply.rep.cm_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
4036	proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4037	sid = conn->spcp_sid;
4038
4039	/*
4040	 * this clears our slot in the backlog array.
4041	 * this slot may now be used by other pending connections.
4042	 */
4043	conn->spcp_sid = NULL;
4044	conn->spcp_state = DAPLKA_SPCP_INIT;
4045	conn->spcp_req_len = 0;
4046	mutex_exit(&sp_rp->sp_lock);
4047
4048	/*
4049	 * Set the unique cookie corresponding to the CR to this EP
4050	 * so that is can be used in passive side CM callbacks
4051	 */
4052	ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4053
4054	status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4055	    &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4056
4057	if (status != IBT_SUCCESS) {
4058		DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4059		*rvalp = (int)status;
4060		retval = 0;
4061	}
4062	/*
4063	 * note that the CM handler may actually be called at this
4064	 * point. but since ep_state is still in TRANSITIONING, the
4065	 * handler will wait until we transition to ACCEPTING. this
4066	 * prevents the case where we set ep_state to ACCEPTING after
4067	 * daplka_service_conn_est sets ep_state to CONNECTED.
4068	 */
4069	new_state = DAPLKA_EP_STATE_ACCEPTING;
4070
4071cleanup:;
4072	if (sp_rp != NULL) {
4073		DAPLKA_RS_UNREF(sp_rp);
4074	}
4075	if (ep_rp != NULL) {
4076		daplka_ep_set_state(ep_rp, old_state, new_state);
4077		DAPLKA_RS_UNREF(ep_rp);
4078	}
4079	return (retval);
4080}
4081
4082/*
4083 * this function is called by the client to reject a
4084 * connection request.
4085 */
4086/* ARGSUSED */
4087static int
4088daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4089	cred_t *cred, int *rvalp)
4090{
4091	dapl_cr_reject_t	args;
4092	daplka_sp_resource_t	*sp_rp = NULL;
4093	daplka_sp_conn_pend_t	*conn;
4094	ibt_cm_proceed_reply_t	proc_reply;
4095	ibt_cm_status_t		proc_status;
4096	ibt_status_t		status;
4097	uint16_t		bkl_index;
4098	int			retval = 0;
4099	void			*sid;
4100
4101	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4102	    mode);
4103	if (retval != 0) {
4104		DERR("cr_reject: copyin error %d\n", retval);
4105		return (EFAULT);
4106	}
4107	/* get sp resource */
4108	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4109	    args.crr_sp_hkey);
4110	if (sp_rp == NULL) {
4111		DERR("cr_reject: cannot find sp resource\n");
4112		return (EINVAL);
4113	}
4114	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4115
4116	D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4117
4118	mutex_enter(&sp_rp->sp_lock);
4119	bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4120	/*
4121	 * make sure the backlog index is not bogus.
4122	 */
4123	if (bkl_index >= sp_rp->sp_backlog_size) {
4124		DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4125		    (longlong_t)args.crr_bkl_cookie, bkl_index);
4126		mutex_exit(&sp_rp->sp_lock);
4127		retval = EINVAL;
4128		goto cleanup;
4129	}
4130	/*
4131	 * make sure the backlog index indeed refers
4132	 * to a pending connection.
4133	 */
4134	conn = &sp_rp->sp_backlog[bkl_index];
4135	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4136		DERR("cr_reject: invalid conn state %d\n",
4137		    conn->spcp_state);
4138		mutex_exit(&sp_rp->sp_lock);
4139		retval = EINVAL;
4140		goto cleanup;
4141	}
4142	if (conn->spcp_sid == NULL) {
4143		DERR("cr_reject: sid == NULL\n");
4144		mutex_exit(&sp_rp->sp_lock);
4145		retval = EINVAL;
4146		goto cleanup;
4147	}
4148	bzero(&proc_reply, sizeof (proc_reply));
4149	sid = conn->spcp_sid;
4150
4151	/*
4152	 * this clears our slot in the backlog array.
4153	 * this slot may now be used by other pending connections.
4154	 */
4155	conn->spcp_sid = NULL;
4156	conn->spcp_state = DAPLKA_SPCP_INIT;
4157	conn->spcp_req_len = 0;
4158
4159	switch (args.crr_reason) {
4160	case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4161		/* results in IBT_CM_CONSUMER as the reason for reject */
4162		proc_status = IBT_CM_REJECT;
4163		break;
4164	case DAPL_IB_CME_LOCAL_FAILURE:
4165		/*FALLTHRU*/
4166	case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4167		/* results in IBT_CM_NO_RESC as the reason for reject */
4168		proc_status = IBT_CM_NO_RESOURCE;
4169		break;
4170	default:
4171		/* unexpect reason code */
4172		ASSERT(!"unexpected reject reason code");
4173		proc_status = IBT_CM_NO_RESOURCE;
4174		break;
4175	}
4176
4177	mutex_exit(&sp_rp->sp_lock);
4178
4179	status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4180	    &proc_reply, NULL, 0);
4181
4182	if (status != IBT_SUCCESS) {
4183		DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4184		*rvalp = (int)status;
4185		retval = 0;
4186	}
4187
4188cleanup:;
4189	if (sp_rp != NULL) {
4190		DAPLKA_RS_UNREF(sp_rp);
4191	}
4192	return (retval);
4193}
4194
4195
4196/*
4197 * daplka_sp_match is used by daplka_hash_walk for finding SPs
4198 */
4199typedef struct daplka_sp_match_s {
4200	uint64_t		spm_conn_qual;
4201	daplka_sp_resource_t	*spm_sp_rp;
4202} daplka_sp_match_t;
4203_NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp))
4204
4205static int
4206daplka_sp_match(void *objp, void *arg)
4207{
4208	daplka_sp_resource_t	*sp_rp = (daplka_sp_resource_t *)objp;
4209
4210	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4211	if (sp_rp->sp_conn_qual ==
4212	    ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4213		((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4214		D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4215		    (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4216		DAPLKA_RS_REF(sp_rp);
4217		return (1);
4218	}
4219	return (0);
4220}
4221
4222/*
4223 * cr_handoff allows the client to handoff a connection request from
4224 * one service point to another.
4225 */
4226/* ARGSUSED */
4227static int
4228daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4229	cred_t *cred, int *rvalp)
4230{
4231	dapl_cr_handoff_t		args;
4232	daplka_sp_resource_t		*sp_rp = NULL, *new_sp_rp = NULL;
4233	daplka_sp_conn_pend_t		*conn;
4234	daplka_sp_match_t		sp_match;
4235	ibt_cm_event_t			fake_event;
4236	ibt_cm_status_t			cm_status;
4237	ibt_status_t			status;
4238	uint16_t			bkl_index;
4239	void				*sid, *priv = NULL;
4240	int				retval = 0, priv_len = 0;
4241
4242	D3("cr_handoff: entering\n");
4243	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4244	    mode);
4245	if (retval != 0) {
4246		DERR("cr_handoff: copyin error %d\n", retval);
4247		return (EFAULT);
4248	}
4249	/* get sp resource */
4250	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4251	    args.crh_sp_hkey);
4252	if (sp_rp == NULL) {
4253		DERR("cr_handoff: cannot find sp resource\n");
4254		return (EINVAL);
4255	}
4256	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4257
4258	/*
4259	 * find the destination service point.
4260	 */
4261	sp_match.spm_conn_qual = args.crh_conn_qual;
4262	sp_match.spm_sp_rp = NULL;
4263	daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4264	    (void *)&sp_match, RW_READER);
4265
4266	/*
4267	 * return if we cannot find the service point
4268	 */
4269	if (sp_match.spm_sp_rp == NULL) {
4270		DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4271		    (longlong_t)args.crh_conn_qual);
4272		retval = EINVAL;
4273		goto cleanup;
4274	}
4275	new_sp_rp = sp_match.spm_sp_rp;
4276
4277	/*
4278	 * the spec does not discuss the security implications of this
4279	 * function. to be safe, we currently only allow processes
4280	 * owned by the same user to handoff connection requests
4281	 * to each other.
4282	 */
4283	if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4284		DERR("cr_handoff: permission denied\n");
4285		retval = EPERM;
4286		goto cleanup;
4287	}
4288
4289	D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4290
4291	mutex_enter(&sp_rp->sp_lock);
4292	bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4293	/*
4294	 * make sure the backlog index is not bogus.
4295	 */
4296	if (bkl_index >= sp_rp->sp_backlog_size) {
4297		DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4298		    (longlong_t)args.crh_bkl_cookie, bkl_index);
4299		mutex_exit(&sp_rp->sp_lock);
4300		retval = EINVAL;
4301		goto cleanup;
4302	}
4303	/*
4304	 * make sure the backlog index indeed refers
4305	 * to a pending connection.
4306	 */
4307	conn = &sp_rp->sp_backlog[bkl_index];
4308	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4309		DERR("cr_handoff: invalid conn state %d\n",
4310		    conn->spcp_state);
4311		mutex_exit(&sp_rp->sp_lock);
4312		retval = EINVAL;
4313		goto cleanup;
4314	}
4315	if (conn->spcp_sid == NULL) {
4316		DERR("cr_handoff: sid == NULL\n");
4317		mutex_exit(&sp_rp->sp_lock);
4318		retval = EINVAL;
4319		goto cleanup;
4320	}
4321	sid = conn->spcp_sid;
4322	priv = NULL;
4323	priv_len = conn->spcp_req_len;
4324	if (priv_len > 0) {
4325		priv = kmem_zalloc(priv_len, daplka_km_flags);
4326		if (priv == NULL) {
4327			mutex_exit(&sp_rp->sp_lock);
4328			retval = ENOMEM;
4329			goto cleanup;
4330		}
4331		bcopy(conn->spcp_req_data, priv, priv_len);
4332	}
4333	/*
4334	 * this clears our slot in the backlog array.
4335	 * this slot may now be used by other pending connections.
4336	 */
4337	conn->spcp_sid = NULL;
4338	conn->spcp_state = DAPLKA_SPCP_INIT;
4339	conn->spcp_req_len = 0;
4340	mutex_exit(&sp_rp->sp_lock);
4341
4342	/* fill fake_event and call service_req handler */
4343	bzero(&fake_event, sizeof (fake_event));
4344	fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4345	fake_event.cm_session_id = sid;
4346	fake_event.cm_priv_data_len = priv_len;
4347	fake_event.cm_priv_data = priv;
4348
4349	cm_status = daplka_cm_service_req(new_sp_rp,
4350	    &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4351	if (cm_status != IBT_CM_DEFER) {
4352		ibt_cm_proceed_reply_t	proc_reply;
4353
4354		DERR("cr_handoff: service_req returned %d\n", cm_status);
4355		/*
4356		 * if for some reason cm_service_req failed, we
4357		 * reject the connection.
4358		 */
4359		bzero(&proc_reply, sizeof (proc_reply));
4360
4361		status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4362		    IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4363		if (status != IBT_SUCCESS) {
4364			DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4365			    status);
4366		}
4367		*rvalp = (int)status;
4368		retval = 0;
4369	}
4370
4371cleanup:;
4372	if (priv_len > 0 && priv != NULL) {
4373		kmem_free(priv, priv_len);
4374	}
4375	if (new_sp_rp != NULL) {
4376		DAPLKA_RS_UNREF(new_sp_rp);
4377	}
4378	if (sp_rp != NULL) {
4379		DAPLKA_RS_UNREF(sp_rp);
4380	}
4381	D3("cr_handoff: exiting\n");
4382	return (retval);
4383}
4384
4385/*
4386 * returns a list of hca attributes
4387 */
4388/* ARGSUSED */
4389static int
4390daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4391	cred_t *cred, int *rvalp)
4392{
4393	dapl_ia_query_t		args;
4394	int			retval;
4395	ibt_hca_attr_t		*hcap;
4396
4397	hcap = &ia_rp->ia_hca->hca_attr;
4398
4399	/*
4400	 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4401	 */
4402	args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4403	args.hca_attr.dhca_device_id = hcap->hca_device_id;
4404	args.hca_attr.dhca_version_id = hcap->hca_version_id;
4405	args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4406	args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4407	args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4408	args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4409	args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4410	args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4411	args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4412	args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4413	args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4414	args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4415	args.hca_attr.dhca_max_partitions  = hcap->hca_max_partitions;
4416	args.hca_attr.dhca_nports  = hcap->hca_nports;
4417	args.hca_attr.dhca_node_guid  = hcap->hca_node_guid;
4418	args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4419	args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4420	args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4421	args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4422
4423	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4424	    mode);
4425	if (retval != 0) {
4426		DERR("ia_query: copyout error %d\n", retval);
4427		return (EFAULT);
4428	}
4429	return (0);
4430}
4431
4432/*
4433 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4434 * it frees the mw embedded in the mw resource object.
4435 */
4436
4437/* ARGSUSED */
4438static int
4439daplka_mr_cb_freemw(void *objp, void *arg)
4440{
4441	daplka_mw_resource_t	*mw_rp = (daplka_mw_resource_t *)objp;
4442	ibt_mw_hdl_t		mw_hdl;
4443	ibt_status_t		status;
4444
4445	D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4446	DAPLKA_RS_REF(mw_rp);
4447
4448	mutex_enter(&mw_rp->mw_lock);
4449	mw_hdl = mw_rp->mw_hdl;
4450	/*
4451	 * we set mw_hdl to NULL so it won't get freed again
4452	 */
4453	mw_rp->mw_hdl = NULL;
4454	mutex_exit(&mw_rp->mw_lock);
4455
4456	if (mw_hdl != NULL) {
4457		status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4458		if (status != IBT_SUCCESS) {
4459			DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4460		}
4461		D3("mr_cb_freemw: mw freed\n");
4462	}
4463
4464	DAPLKA_RS_UNREF(mw_rp);
4465	return (0);
4466}
4467
4468/*
4469 * This routine is called from HCA driver's umem lock undo callback
4470 * when the memory associated with an MR is being unmapped. In this callback
4471 * we free all the MW associated with the IA and post an unaffiliated
4472 * async event to tell the app that there was a catastrophic event.
4473 * This allows the HCA to deregister the MR in its callback processing.
4474 */
4475static void
4476daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4477{
4478	daplka_mr_resource_t	*mr_rp;
4479	daplka_ia_resource_t	*ia_rp;
4480#ifdef	_THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4481	ibt_async_event_t	event;
4482	ibt_hca_attr_t		*hca_attrp;
4483#endif
4484	minor_t			rnum;
4485
4486	mr_rp = (daplka_mr_resource_t *)arg1;
4487	rnum = DAPLKA_RS_RNUM(mr_rp);
4488	daplka_shared_mr_free(mr_rp);
4489
4490	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4491	if (ia_rp == NULL) {
4492		DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4493		    rnum);
4494		return;
4495	}
4496
4497	DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4498
4499	mutex_enter(&ia_rp->ia_lock);
4500	/*
4501	 * MW is being alloced OR MW freeze has already begun. In
4502	 * both these cases we wait for that to complete before
4503	 * continuing.
4504	 */
4505	while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4506	    (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4507		cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4508	}
4509
4510	switch (ia_rp->ia_state) {
4511	case DAPLKA_IA_INIT:
4512		ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4513		mutex_exit(&ia_rp->ia_lock);
4514		break;
4515	case DAPLKA_IA_MW_FROZEN:
4516		/* the mw on this ia have been freed */
4517		D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4518		    ia_rp->ia_state);
4519		mutex_exit(&ia_rp->ia_lock);
4520		goto cleanup;
4521	default:
4522		ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4523		DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4524		    ia_rp->ia_state);
4525		mutex_exit(&ia_rp->ia_lock);
4526		goto cleanup;
4527	}
4528
4529	/*
4530	 * Walk the mw hash table and free the mws. Acquire a writer
4531	 * lock since we don't want anyone else traversing this tree
4532	 * while we are freeing the MW.
4533	 */
4534	daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4535	    RW_WRITER);
4536
4537	mutex_enter(&ia_rp->ia_lock);
4538	ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4539	ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4540	cv_broadcast(&ia_rp->ia_cv);
4541	mutex_exit(&ia_rp->ia_lock);
4542
4543	/*
4544	 * Currently commented out because Oracle skgxp is incapable
4545	 * of handling async events correctly.
4546	 */
4547#ifdef	_THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4548	/*
4549	 * Enqueue an unaffiliated async error event to indicate this
4550	 * IA has encountered a problem that caused the MW to freed up
4551	 */
4552
4553	/* Create a fake event, only relevant field is the hca_guid */
4554	bzero(&event, sizeof (ibt_async_event_t));
4555	hca_attrp = &ia_rp->ia_hca->hca_attr;
4556	event.ev_hca_guid = hca_attrp->hca_node_guid;
4557
4558	daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4559	    ia_rp);
4560#endif	/* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4561
4562cleanup:;
4563	D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4564	DAPLKA_RS_UNREF(ia_rp);
4565}
4566
4567/*
4568 * registers a memory region.
4569 * memory locking will be done by the HCA driver.
4570 */
4571/* ARGSUSED */
4572static int
4573daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4574	cred_t *cred, int *rvalp)
4575{
4576	boolean_t			inserted = B_FALSE;
4577	daplka_mr_resource_t		*mr_rp;
4578	daplka_pd_resource_t		*pd_rp;
4579	dapl_mr_register_t		args;
4580	ibt_mr_data_in_t		mr_cb_data_in;
4581	uint64_t			mr_hkey = 0;
4582	ibt_status_t			status;
4583	int				retval;
4584
4585	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
4586	    mode);
4587	if (retval != 0) {
4588		DERR("mr_register: copyin error %d\n", retval);
4589		return (EINVAL);
4590	}
4591	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4592	if (mr_rp == NULL) {
4593		DERR("mr_register: cannot allocate mr resource\n");
4594		return (ENOMEM);
4595	}
4596	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4597	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4598	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4599
4600	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4601	mr_rp->mr_hca = ia_rp->ia_hca;
4602	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4603	mr_rp->mr_next = NULL;
4604	mr_rp->mr_shared_mr = NULL;
4605
4606	/* get pd handle */
4607	pd_rp = (daplka_pd_resource_t *)
4608	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4609	if (pd_rp == NULL) {
4610		DERR("mr_register: cannot find pd resource\n");
4611		retval = EINVAL;
4612		goto cleanup;
4613	}
4614	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4615	mr_rp->mr_pd_res = pd_rp;
4616
4617	mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4618	mr_rp->mr_attr.mr_len = args.mr_len;
4619	mr_rp->mr_attr.mr_as = curproc->p_as;
4620	mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4621
4622	D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4623	    (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4624	    (longlong_t)mr_rp->mr_attr.mr_len,
4625	    mr_rp->mr_attr.mr_flags);
4626
4627	status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4628	    mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4629	    &mr_rp->mr_desc);
4630
4631	if (status != IBT_SUCCESS) {
4632		DERR("mr_register: ibt_register_mr error %d\n", status);
4633		*rvalp = (int)status;
4634		retval = 0;
4635		goto cleanup;
4636	}
4637
4638	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4639	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4640	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4641	mr_cb_data_in.mr_arg2 = NULL;
4642
4643	/* Pass the service driver mr cleanup handler to the hca driver */
4644	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4645	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4646	    &mr_cb_data_in, sizeof (mr_cb_data_in));
4647
4648	if (status != IBT_SUCCESS) {
4649		DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4650		    status, mr_cb_data_in.mr_rev);
4651		*rvalp = (int)status;
4652		retval = 0;
4653		goto cleanup;
4654	}
4655
4656	/* insert into mr hash table */
4657	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4658	    &mr_hkey, (void *)mr_rp);
4659	if (retval != 0) {
4660		DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4661		goto cleanup;
4662	}
4663	inserted = B_TRUE;
4664	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4665
4666	args.mr_lkey = mr_rp->mr_desc.md_lkey;
4667	args.mr_rkey = mr_rp->mr_desc.md_rkey;
4668	args.mr_hkey = mr_hkey;
4669
4670	retval = ddi_copyout((void *)&args, (void *)arg,
4671	    sizeof (dapl_mr_register_t), mode);
4672	if (retval != 0) {
4673		DERR("mr_register: copyout error %d\n", retval);
4674		retval = EFAULT;
4675		goto cleanup;
4676	}
4677	return (0);
4678
4679cleanup:;
4680	if (inserted) {
4681		daplka_mr_resource_t *free_rp = NULL;
4682
4683		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4684		    (void **)&free_rp);
4685		if (free_rp != mr_rp) {
4686			DERR("mr_register: cannot remove mr from hash table\n");
4687			/*
4688			 * we can only get here if another thread
4689			 * has completed the cleanup in mr_deregister
4690			 */
4691			return (retval);
4692		}
4693	}
4694	DAPLKA_RS_UNREF(mr_rp);
4695	return (retval);
4696}
4697
4698/*
4699 * registers a shared memory region.
4700 * the client calls this function with the intention to share the memory
4701 * region with other clients. it is assumed that, prior to calling this
4702 * function, the client(s) are already sharing parts of their address
4703 * space using a mechanism such as SYSV shared memory. the first client
4704 * that calls this function will create and insert a daplka_shared_mr_t
4705 * object into the global daplka_shared_mr_tree. this shared mr object
4706 * will be identified by a unique 40-byte key and will maintain a list
4707 * of mr resources. every time this function gets called with the same
4708 * 40-byte key, a new mr resource (containing a new mr handle generated
4709 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4710 * into this list. similarly, every time a shared mr gets deregistered
4711 * or invalidated by a callback, the mr resource gets removed from this
4712 * list. the shared mr object has a reference count. when it drops to
4713 * zero, the shared mr object will be removed from the global avl tree
4714 * and be freed.
4715 */
4716/* ARGSUSED */
4717static int
4718daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4719	cred_t *cred, int *rvalp)
4720{
4721	dapl_mr_register_shared_t	args;
4722	daplka_shared_mr_t		*smrp = NULL;
4723	daplka_shared_mr_t		tmp_smr;
4724	ibt_mr_data_in_t		mr_cb_data_in;
4725	avl_index_t			where;
4726	boolean_t			inserted = B_FALSE;
4727	daplka_mr_resource_t		*mr_rp = NULL;
4728	daplka_pd_resource_t		*pd_rp;
4729	uint64_t			mr_hkey = 0;
4730	ibt_status_t			status;
4731	int				retval;
4732
4733	retval = ddi_copyin((void *)arg, &args,
4734	    sizeof (dapl_mr_register_shared_t), mode);
4735	if (retval != 0) {
4736		DERR("mr_register_shared: copyin error %d\n", retval);
4737		return (EINVAL);
4738	}
4739
4740	mutex_enter(&daplka_shared_mr_lock);
4741	/*
4742	 * find smrp from the global avl tree.
4743	 * the 40-byte key is used as the lookup key.
4744	 */
4745	tmp_smr.smr_cookie = args.mrs_shm_cookie;
4746	smrp = (daplka_shared_mr_t *)
4747	    avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4748	if (smrp != NULL) {
4749		D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4750		    "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4751		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4752		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4753		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4754		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4755		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4756
4757		/*
4758		 * if the smrp exists, other threads could still be
4759		 * accessing it. we wait until they are done before
4760		 * we continue.
4761		 */
4762		smrp->smr_refcnt++;
4763		while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4764			D2("mr_register_shared: smrp 0x%p, "
4765			    "waiting in transitioning state, refcnt %d\n",
4766			    smrp, smrp->smr_refcnt);
4767			cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4768		}
4769		ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4770		D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4771		    smrp, smrp->smr_refcnt);
4772
4773		/*
4774		 * we set smr_state to TRANSITIONING to temporarily
4775		 * prevent other threads from trying to access smrp.
4776		 */
4777		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4778	} else {
4779		D2("mr_register_shared: cannot find cookie:\n"
4780		    "0x%016llx%016llx%016llx%016llx%016llx\n",
4781		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4782		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4783		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4784		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4785		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4786
4787		/*
4788		 * if we cannot find smrp, we need to create and
4789		 * insert one into daplka_shared_mr_tree
4790		 */
4791		smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4792		    daplka_km_flags);
4793		if (smrp == NULL) {
4794			retval = ENOMEM;
4795			mutex_exit(&daplka_shared_mr_lock);
4796			goto cleanup;
4797		}
4798		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4799		smrp->smr_refcnt = 1;
4800		smrp->smr_cookie = args.mrs_shm_cookie;
4801		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4802		smrp->smr_mr_list = NULL;
4803		cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4804		avl_insert(&daplka_shared_mr_tree, smrp, where);
4805		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp))
4806	}
4807	mutex_exit(&daplka_shared_mr_lock);
4808
4809	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4810	if (mr_rp == NULL) {
4811		DERR("mr_register_shared: cannot allocate mr resource\n");
4812		goto cleanup;
4813	}
4814	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4815	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4816	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4817
4818	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4819	mr_rp->mr_hca = ia_rp->ia_hca;
4820	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4821	mr_rp->mr_next = NULL;
4822	mr_rp->mr_shared_mr = NULL;
4823
4824	/* get pd handle */
4825	pd_rp = (daplka_pd_resource_t *)
4826	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4827	if (pd_rp == NULL) {
4828		DERR("mr_register_shared: cannot find pd resource\n");
4829		retval = EINVAL;
4830		goto cleanup;
4831	}
4832	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4833	mr_rp->mr_pd_res = pd_rp;
4834
4835	mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4836	mr_rp->mr_attr.mr_len = args.mrs_len;
4837	mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4838	mr_rp->mr_attr.mr_as = curproc->p_as;
4839
4840	D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4841	    "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4842	    (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4843	    (longlong_t)mr_rp->mr_attr.mr_len,
4844	    mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4845	    (int)(smrp->smr_mr_list != NULL), smrp);
4846
4847	/*
4848	 * since we are in TRANSITIONING state, we are guaranteed
4849	 * that we have exclusive access to smr_mr_list.
4850	 */
4851	if (smrp->smr_mr_list != NULL) {
4852		ibt_smr_attr_t	mem_sattr;
4853
4854		/*
4855		 * a non-null smr_mr_list indicates that someone
4856		 * else has already inserted an mr_resource into
4857		 * smr_mr_list. we use the mr_handle from the first
4858		 * element as an arg to ibt_register_shared_mr.
4859		 */
4860		mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4861		mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4862
4863		D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4864		    (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4865		status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4866		    smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4867		    &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4868
4869		if (status != IBT_SUCCESS) {
4870			DERR("mr_register_shared: "
4871			    "ibt_register_shared_mr error %d\n", status);
4872			*rvalp = (int)status;
4873			retval = 0;
4874			goto cleanup;
4875		}
4876	} else {
4877		/*
4878		 * an mr does not exist yet. we need to create one
4879		 * using ibt_register_mr.
4880		 */
4881		status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4882		    mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4883		    &mr_rp->mr_hdl, &mr_rp->mr_desc);
4884
4885		if (status != IBT_SUCCESS) {
4886			DERR("mr_register_shared: "
4887			    "ibt_register_mr error %d\n", status);
4888			*rvalp = (int)status;
4889			retval = 0;
4890			goto cleanup;
4891		}
4892	}
4893
4894	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4895	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4896	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4897	mr_cb_data_in.mr_arg2 = NULL;
4898
4899	/* Pass the service driver mr cleanup handler to the hca driver */
4900	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4901	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4902	    &mr_cb_data_in, sizeof (mr_cb_data_in));
4903
4904	if (status != IBT_SUCCESS) {
4905		DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4906		    status, mr_cb_data_in.mr_rev);
4907		*rvalp = (int)status;
4908		retval = 0;
4909		goto cleanup;
4910	}
4911
4912	/*
4913	 * we bump reference of mr_rp and enqueue it onto smrp.
4914	 */
4915	DAPLKA_RS_REF(mr_rp);
4916	mr_rp->mr_next = smrp->smr_mr_list;
4917	smrp->smr_mr_list = mr_rp;
4918	mr_rp->mr_shared_mr = smrp;
4919
4920	/* insert into mr hash table */
4921	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4922	    &mr_hkey, (void *)mr_rp);
4923	if (retval != 0) {
4924		DERR("mr_register_shared: cannot insert mr resource\n");
4925		goto cleanup;
4926	}
4927	inserted = B_TRUE;
4928	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4929
4930	/*
4931	 * at this point, there are two references to our mr resource.
4932	 * one is kept in ia_mr_htbl. the other is kept in the list
4933	 * within this shared mr object (smrp). when we deregister this
4934	 * mr or when a callback invalidates this mr, the reference kept
4935	 * by this shared mr object will be removed.
4936	 */
4937
4938	args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4939	args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4940	args.mrs_hkey = mr_hkey;
4941
4942	retval = ddi_copyout((void *)&args, (void *)arg,
4943	    sizeof (dapl_mr_register_shared_t), mode);
4944	if (retval != 0) {
4945		DERR("mr_register_shared: copyout error %d\n", retval);
4946		retval = EFAULT;
4947		goto cleanup;
4948	}
4949
4950	/*
4951	 * set the state to READY to allow others to continue
4952	 */
4953	mutex_enter(&daplka_shared_mr_lock);
4954	smrp->smr_state = DAPLKA_SMR_READY;
4955	cv_broadcast(&smrp->smr_cv);
4956	mutex_exit(&daplka_shared_mr_lock);
4957	return (0);
4958
4959cleanup:;
4960	if (inserted) {
4961		daplka_mr_resource_t *free_rp = NULL;
4962
4963		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4964		    (void **)&free_rp);
4965		if (free_rp != mr_rp) {
4966			DERR("mr_register_shared: "
4967			    "cannot remove mr from hash table\n");
4968			/*
4969			 * we can only get here if another thread
4970			 * has completed the cleanup in mr_deregister
4971			 */
4972			return (retval);
4973		}
4974	}
4975	if (smrp != NULL) {
4976		mutex_enter(&daplka_shared_mr_lock);
4977		ASSERT(smrp->smr_refcnt > 0);
4978		smrp->smr_refcnt--;
4979
4980		if (smrp->smr_refcnt == 0) {
4981			DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4982			avl_remove(&daplka_shared_mr_tree, smrp);
4983			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4984			if (smrp->smr_mr_list != NULL) {
4985				/*
4986				 * the refcnt is 0. if there is anything
4987				 * left on the list, it must be ours.
4988				 */
4989				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4990				ASSERT(smrp->smr_mr_list == mr_rp);
4991				DAPLKA_RS_UNREF(mr_rp);
4992				smrp->smr_mr_list = NULL;
4993				ASSERT(mr_rp->mr_shared_mr == smrp);
4994				mr_rp->mr_shared_mr = NULL;
4995				ASSERT(mr_rp->mr_next == NULL);
4996			}
4997			smrp->smr_state = DAPLKA_SMR_FREED;
4998			cv_destroy(&smrp->smr_cv);
4999			kmem_free(smrp, sizeof (daplka_shared_mr_t));
5000		} else {
5001			DERR("mr_register_shared: resetting smr_state "
5002			    "smrp 0x%p, %d waiters remain\n", smrp,
5003			    smrp->smr_refcnt);
5004			ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
5005			if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
5006				daplka_mr_resource_t	**mpp;
5007
5008				/*
5009				 * search and remove mr_rp from smr_mr_list
5010				 */
5011				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5012				mpp = &smrp->smr_mr_list;
5013				while (*mpp != NULL) {
5014					if (*mpp == mr_rp) {
5015						*mpp = (*mpp)->mr_next;
5016						DAPLKA_RS_UNREF(mr_rp);
5017						ASSERT(mr_rp->mr_shared_mr ==
5018						    smrp);
5019						mr_rp->mr_shared_mr = NULL;
5020						mr_rp->mr_next = NULL;
5021						break;
5022					}
5023					mpp = &(*mpp)->mr_next;
5024				}
5025			}
5026			/*
5027			 * note that smr_state == READY does not necessarily
5028			 * mean that smr_mr_list is non empty. for this case,
5029			 * we are doing cleanup because of a failure. we set
5030			 * the state to READY to allow other threads to
5031			 * continue.
5032			 */
5033			smrp->smr_state = DAPLKA_SMR_READY;
5034			cv_broadcast(&smrp->smr_cv);
5035		}
5036		mutex_exit(&daplka_shared_mr_lock);
5037	}
5038	if (mr_rp != NULL) {
5039		DAPLKA_RS_UNREF(mr_rp);
5040	}
5041	return (retval);
5042}
5043
5044/*
5045 * registers a memory region using the attributes of an
5046 * existing region.
5047 */
5048/* ARGSUSED */
5049static int
5050daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5051	cred_t *cred, int *rvalp)
5052{
5053	boolean_t 			inserted = B_FALSE;
5054	dapl_mr_register_lmr_t		args;
5055	ibt_mr_data_in_t		mr_cb_data_in;
5056	daplka_mr_resource_t		*orig_mr_rp = NULL;
5057	daplka_mr_resource_t		*mr_rp;
5058	ibt_smr_attr_t			mem_sattr;
5059	uint64_t			mr_hkey = 0;
5060	ibt_status_t			status;
5061	int				retval;
5062
5063	retval = ddi_copyin((void *)arg, &args,
5064	    sizeof (dapl_mr_register_lmr_t), mode);
5065	if (retval != 0) {
5066		DERR("mr_register_lmr: copyin error %d\n", retval);
5067		return (EINVAL);
5068	}
5069	orig_mr_rp = (daplka_mr_resource_t *)
5070	    daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5071	if (orig_mr_rp == NULL) {
5072		DERR("mr_register_lmr: cannot find mr resource\n");
5073		return (EINVAL);
5074	}
5075	ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5076
5077	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5078	if (mr_rp == NULL) {
5079		DERR("mr_register_lmr: cannot allocate mr resource\n");
5080		retval = ENOMEM;
5081		goto cleanup;
5082	}
5083	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5084	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5085	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5086
5087	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5088	mr_rp->mr_hca = ia_rp->ia_hca;
5089	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5090	mr_rp->mr_next = NULL;
5091	mr_rp->mr_shared_mr = NULL;
5092
5093	DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5094	mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5095	mr_rp->mr_attr = orig_mr_rp->mr_attr;
5096
5097	/* Pass the IO addr that was returned while allocating the orig MR */
5098	mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5099	mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5100
5101	status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5102	    orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5103	    &mr_rp->mr_hdl, &mr_rp->mr_desc);
5104
5105	if (status != IBT_SUCCESS) {
5106		DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5107		    status);
5108		*rvalp = (int)status;
5109		retval = 0;
5110		goto cleanup;
5111	}
5112
5113	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5114	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5115	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5116	mr_cb_data_in.mr_arg2 = NULL;
5117
5118	/* Pass the service driver mr cleanup handler to the hca driver */
5119	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5120	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5121	    &mr_cb_data_in, sizeof (mr_cb_data_in));
5122
5123	if (status != IBT_SUCCESS) {
5124		DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5125		    status, mr_cb_data_in.mr_rev);
5126		*rvalp = (int)status;
5127		retval = 0;
5128		goto cleanup;
5129	}
5130	mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
5131	mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5132
5133	/* insert into mr hash table */
5134	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5135	    (void *)mr_rp);
5136	if (retval != 0) {
5137		DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5138		goto cleanup;
5139	}
5140	inserted = B_TRUE;
5141	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
5142
5143	args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5144	args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5145	args.mrl_hkey = mr_hkey;
5146
5147	retval = ddi_copyout((void *)&args, (void *)arg,
5148	    sizeof (dapl_mr_register_lmr_t), mode);
5149	if (retval != 0) {
5150		DERR("mr_register_lmr: copyout error %d\n", retval);
5151		retval = EFAULT;
5152		goto cleanup;
5153	}
5154	if (orig_mr_rp != NULL) {
5155		DAPLKA_RS_UNREF(orig_mr_rp);
5156	}
5157	return (0);
5158
5159cleanup:;
5160	if (inserted) {
5161		daplka_mr_resource_t *free_rp = NULL;
5162
5163		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5164		    (void **)&free_rp);
5165		if (free_rp != mr_rp) {
5166			DERR("mr_register: cannot remove mr from hash table\n");
5167			/*
5168			 * we can only get here if another thread
5169			 * has completed the cleanup in mr_deregister
5170			 */
5171			return (retval);
5172		}
5173	}
5174	if (orig_mr_rp != NULL) {
5175		DAPLKA_RS_UNREF(orig_mr_rp);
5176	}
5177	if (mr_rp != NULL) {
5178		DAPLKA_RS_UNREF(mr_rp);
5179	}
5180	return (retval);
5181}
5182
5183/*
5184 * this function is called by mr_deregister and mr_cleanup_callback to
5185 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5186 * if mr_shared_mr is already NULL, that means the region being
5187 * deregistered or invalidated is not a shared mr region and we can
5188 * return immediately.
5189 */
5190static void
5191daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5192{
5193	daplka_shared_mr_t	*smrp;
5194
5195	/*
5196	 * we need a lock because mr_callback also checks this field.
5197	 * for the rare case that mr_deregister and mr_cleanup_callback
5198	 * gets called simultaneously, we are guaranteed that smrp won't
5199	 * be dereferenced twice because either function will find
5200	 * mr_shared_mr to be NULL.
5201	 */
5202	mutex_enter(&mr_rp->mr_lock);
5203	smrp = mr_rp->mr_shared_mr;
5204	mr_rp->mr_shared_mr = NULL;
5205	mutex_exit(&mr_rp->mr_lock);
5206
5207	if (smrp != NULL) {
5208		daplka_mr_resource_t	**mpp;
5209		boolean_t		mr_found = B_FALSE;
5210
5211		mutex_enter(&daplka_shared_mr_lock);
5212		ASSERT(smrp->smr_refcnt > 0);
5213		while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5214			cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5215		}
5216		ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5217		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5218		smrp->smr_refcnt--;
5219
5220		/*
5221		 * search and remove mr_rp from smr_mr_list.
5222		 * also UNREF mr_rp because it is no longer
5223		 * on the list.
5224		 */
5225		mpp = &smrp->smr_mr_list;
5226		while (*mpp != NULL) {
5227			if (*mpp == mr_rp) {
5228				*mpp = (*mpp)->mr_next;
5229				DAPLKA_RS_UNREF(mr_rp);
5230				mr_rp->mr_next = NULL;
5231				mr_found = B_TRUE;
5232				break;
5233			}
5234			mpp = &(*mpp)->mr_next;
5235		}
5236		/*
5237		 * since mr_clean_callback may not touch smr_mr_list
5238		 * at this time (due to smr_state), we can be sure
5239		 * that we can find and remove mr_rp from smr_mr_list
5240		 */
5241		ASSERT(mr_found);
5242		if (smrp->smr_refcnt == 0) {
5243			D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5244			avl_remove(&daplka_shared_mr_tree, smrp);
5245			ASSERT(smrp->smr_mr_list == NULL);
5246			smrp->smr_state = DAPLKA_SMR_FREED;
5247			cv_destroy(&smrp->smr_cv);
5248			kmem_free(smrp, sizeof (daplka_shared_mr_t));
5249		} else {
5250			D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5251			    smrp, smrp->smr_refcnt);
5252			smrp->smr_state = DAPLKA_SMR_READY;
5253			cv_broadcast(&smrp->smr_cv);
5254		}
5255		mutex_exit(&daplka_shared_mr_lock);
5256	}
5257}
5258
5259/*
5260 * deregisters a memory region.
5261 * if mr is shared, remove reference from global shared mr object.
5262 * release the initial reference to the mr. if the mr's refcnt is
5263 * zero, call mr_destroy to free mr.
5264 */
5265/* ARGSUSED */
5266static int
5267daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5268	cred_t *cred, int *rvalp)
5269{
5270	daplka_mr_resource_t	*mr_rp;
5271	dapl_mr_deregister_t	args;
5272	int 			retval;
5273
5274	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5275	    mode);
5276	if (retval != 0) {
5277		DERR("mr_deregister: copyin error %d\n", retval);
5278		return (EINVAL);
5279	}
5280	retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5281	    args.mrd_hkey, (void **)&mr_rp);
5282	if (retval != 0 || mr_rp == NULL) {
5283		DERR("mr_deregister: cannot find mr resource\n");
5284		return (EINVAL);
5285	}
5286	ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5287
5288	daplka_shared_mr_free(mr_rp);
5289	DAPLKA_RS_UNREF(mr_rp);
5290	return (0);
5291}
5292
5293/*
5294 * sync local memory regions on RDMA read or write.
5295 */
5296/* ARGSUSED */
5297static int
5298daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5299	cred_t *cred, int *rvalp)
5300{
5301	dapl_mr_sync_t	args;
5302	daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5303	ibt_mr_sync_t	mrs[DAPL_MR_PER_SYNC];
5304	uint32_t	sync_direction_flags;
5305	ibt_status_t	status;
5306	int		i, j;
5307	int		retval;
5308
5309	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5310	if (retval != 0) {
5311		DERR("mr_sync: copyin error %d\n", retval);
5312		return (EFAULT);
5313	}
5314
5315	/* number of segments bound check */
5316	if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5317		DERR("mr_sync: number of segments too large\n");
5318		return (EINVAL);
5319	}
5320
5321	/* translate MR sync direction flag */
5322	if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5323		sync_direction_flags = IBT_SYNC_READ;
5324	} else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5325		sync_direction_flags = IBT_SYNC_WRITE;
5326	} else {
5327		DERR("mr_sync: unknown flags\n");
5328		return (EINVAL);
5329	}
5330
5331	/*
5332	 * all the segments are going to be sync'd by ibtl together
5333	 */
5334	for (i = 0; i < args.mrs_numseg; i++) {
5335		mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5336		    &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5337		if (mr_rp[i] == NULL) {
5338			for (j = 0; j < i; j++) {
5339				DAPLKA_RS_UNREF(mr_rp[j]);
5340			}
5341			DERR("mr_sync: lookup error\n");
5342			return (EINVAL);
5343		}
5344		ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5345		mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5346		mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5347		mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5348		mrs[i].ms_flags = sync_direction_flags;
5349	}
5350
5351	status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5352	if (status != IBT_SUCCESS) {
5353		DERR("mr_sync: ibt_sync_mr error %d\n", status);
5354		*rvalp = (int)status;
5355	}
5356	for (i = 0; i < args.mrs_numseg; i++) {
5357		DAPLKA_RS_UNREF(mr_rp[i]);
5358	}
5359	return (0);
5360}
5361
5362/*
5363 * destroys a memory region.
5364 * called when refcnt drops to zero.
5365 */
5366static int
5367daplka_mr_destroy(daplka_resource_t *gen_rp)
5368{
5369	daplka_mr_resource_t	*mr_rp = (daplka_mr_resource_t *)gen_rp;
5370	ibt_status_t		status;
5371
5372	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5373	ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5374	ASSERT(mr_rp->mr_shared_mr == NULL);
5375	D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5376	    mr_rp, DAPLKA_RS_RNUM(mr_rp));
5377
5378	/*
5379	 * deregister mr
5380	 */
5381	if (mr_rp->mr_hdl) {
5382		status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5383		    mr_rp->mr_hdl);
5384		if (status != IBT_SUCCESS) {
5385			DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5386			    status);
5387		}
5388		mr_rp->mr_hdl = NULL;
5389		D3("mr_destroy: mr deregistered\n");
5390	}
5391	mr_rp->mr_attr.mr_vaddr = NULL;
5392
5393	/*
5394	 * release reference on PD
5395	 */
5396	if (mr_rp->mr_pd_res != NULL) {
5397		DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5398		mr_rp->mr_pd_res = NULL;
5399	}
5400	mutex_destroy(&mr_rp->mr_lock);
5401	DAPLKA_RS_FINI(mr_rp);
5402	kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5403	D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5404	return (0);
5405}
5406
5407/*
5408 * this function is called by daplka_hash_destroy for
5409 * freeing MR resource objects
5410 */
5411static void
5412daplka_hash_mr_free(void *obj)
5413{
5414	daplka_mr_resource_t	*mr_rp = (daplka_mr_resource_t *)obj;
5415
5416	daplka_shared_mr_free(mr_rp);
5417	DAPLKA_RS_UNREF(mr_rp);
5418}
5419
5420/*
5421 * comparison function used for finding a shared mr object
5422 * from the global shared mr avl tree.
5423 */
5424static int
5425daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5426{
5427	daplka_shared_mr_t	*s1 = (daplka_shared_mr_t *)smr1;
5428	daplka_shared_mr_t	*s2 = (daplka_shared_mr_t *)smr2;
5429	int i;
5430
5431	for (i = 4; i >= 0; i--) {
5432		if (s1->smr_cookie.mc_uint_arr[i] <
5433		    s2->smr_cookie.mc_uint_arr[i]) {
5434			return (-1);
5435		}
5436		if (s1->smr_cookie.mc_uint_arr[i] >
5437		    s2->smr_cookie.mc_uint_arr[i]) {
5438			return (1);
5439		}
5440	}
5441	return (0);
5442}
5443
5444/*
5445 * allocates a protection domain.
5446 */
5447/* ARGSUSED */
5448static int
5449daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5450	cred_t *cred, int *rvalp)
5451{
5452	dapl_pd_alloc_t		args;
5453	daplka_pd_resource_t	*pd_rp;
5454	ibt_status_t		status;
5455	uint64_t		pd_hkey = 0;
5456	boolean_t		inserted = B_FALSE;
5457	int			retval;
5458
5459	pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5460	if (pd_rp == NULL) {
5461		DERR("pd_alloc: cannot allocate pd resource\n");
5462		return (ENOMEM);
5463	}
5464	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5465	DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5466	    DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5467
5468	pd_rp->pd_hca = ia_rp->ia_hca;
5469	pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5470	status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5471	    IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5472	if (status != IBT_SUCCESS) {
5473		DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5474		*rvalp = (int)status;
5475		retval = 0;
5476		goto cleanup;
5477	}
5478
5479	/* insert into pd hash table */
5480	retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5481	    &pd_hkey, (void *)pd_rp);
5482	if (retval != 0) {
5483		DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5484		goto cleanup;
5485	}
5486	inserted = B_TRUE;
5487	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp))
5488
5489	/* return hkey to library */
5490	args.pda_hkey = pd_hkey;
5491
5492	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5493	    mode);
5494	if (retval != 0) {
5495		DERR("pd_alloc: copyout error %d\n", retval);
5496		retval = EFAULT;
5497		goto cleanup;
5498	}
5499	return (0);
5500
5501cleanup:;
5502	if (inserted) {
5503		daplka_pd_resource_t *free_rp = NULL;
5504
5505		(void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5506		    (void **)&free_rp);
5507		if (free_rp != pd_rp) {
5508			DERR("pd_alloc: cannot remove pd from hash table\n");
5509			/*
5510			 * we can only get here if another thread
5511			 * has completed the cleanup in pd_free
5512			 */
5513			return (retval);
5514		}
5515	}
5516	DAPLKA_RS_UNREF(pd_rp);
5517	return (retval);
5518}
5519
5520/*
5521 * destroys a protection domain.
5522 * called when refcnt drops to zero.
5523 */
5524static int
5525daplka_pd_destroy(daplka_resource_t *gen_rp)
5526{
5527	daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5528	ibt_status_t status;
5529
5530	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5531	ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5532	D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5533	    pd_rp, DAPLKA_RS_RNUM(pd_rp));
5534
5535	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5536	if (pd_rp->pd_hdl != NULL) {
5537		status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5538		    pd_rp->pd_hdl);
5539		if (status != IBT_SUCCESS) {
5540			DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5541		}
5542	}
5543	DAPLKA_RS_FINI(pd_rp);
5544	kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5545	D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5546	return (0);
5547}
5548
5549static void
5550daplka_hash_pd_free(void *obj)
5551{
5552	daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5553
5554	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5555	DAPLKA_RS_UNREF(pd_rp);
5556}
5557
5558/*
5559 * removes the pd reference from ia_pd_htbl and releases the
5560 * initial reference to the pd. also destroys the pd if the refcnt
5561 * is zero.
5562 */
5563/* ARGSUSED */
5564static int
5565daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5566	cred_t *cred, int *rvalp)
5567{
5568	daplka_pd_resource_t *pd_rp;
5569	dapl_pd_free_t args;
5570	int retval;
5571
5572	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5573	if (retval != 0) {
5574		DERR("pd_free: copyin error %d\n", retval);
5575		return (EINVAL);
5576	}
5577
5578	retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5579	    args.pdf_hkey, (void **)&pd_rp);
5580	if (retval != 0 || pd_rp == NULL) {
5581		DERR("pd_free: cannot find pd resource\n");
5582		return (EINVAL);
5583	}
5584	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5585
5586	/* UNREF calls the actual free function when refcnt is zero */
5587	DAPLKA_RS_UNREF(pd_rp);
5588	return (0);
5589}
5590
5591/*
5592 * allocates a memory window
5593 */
5594/* ARGSUSED */
5595static int
5596daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5597	cred_t *cred, int *rvalp)
5598{
5599	daplka_pd_resource_t	*pd_rp;
5600	daplka_mw_resource_t	*mw_rp;
5601	dapl_mw_alloc_t		args;
5602	ibt_status_t		status;
5603	boolean_t		inserted = B_FALSE;
5604	uint64_t		mw_hkey;
5605	ibt_rkey_t		mw_rkey;
5606	int			retval;
5607
5608	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5609	if (retval != 0) {
5610		DERR("mw_alloc: copyin error %d\n", retval);
5611		return (EFAULT);
5612	}
5613
5614	/*
5615	 * Allocate and initialize a MW resource
5616	 */
5617	mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5618	if (mw_rp == NULL) {
5619		DERR("mw_alloc: cannot allocate mw resource\n");
5620		return (ENOMEM);
5621	}
5622	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5623	DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5624	    DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5625
5626	mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5627	mw_rp->mw_hca = ia_rp->ia_hca;
5628	mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5629
5630	/* get pd handle */
5631	pd_rp = (daplka_pd_resource_t *)
5632	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5633	if (pd_rp == NULL) {
5634		DERR("mw_alloc: cannot find pd resource\n");
5635		goto cleanup;
5636	}
5637	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5638
5639	mw_rp->mw_pd_res = pd_rp;
5640
5641	status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5642	    pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5643
5644	if (status != IBT_SUCCESS) {
5645		DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5646		*rvalp = (int)status;
5647		retval = 0;
5648		goto cleanup;
5649	}
5650
5651	mutex_enter(&ia_rp->ia_lock);
5652	switch (ia_rp->ia_state) {
5653	case DAPLKA_IA_INIT:
5654		ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5655		ia_rp->ia_mw_alloccnt++;
5656		retval = 0;
5657		break;
5658	case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5659		/* another mw_alloc is already in progress increase cnt */
5660		ia_rp->ia_mw_alloccnt++;
5661		retval = 0;
5662		break;
5663	case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5664		/* FALLTHRU */
5665	case DAPLKA_IA_MW_FROZEN:
5666		/*
5667		 * IA is being or already frozen don't allow more MWs to be
5668		 * allocated.
5669		 */
5670		DERR("mw_alloc:	IA is freezing MWs (state=%d)\n",
5671		    ia_rp->ia_state);
5672		retval = EINVAL;
5673		break;
5674	default:
5675		ASSERT(!"Invalid IA state in mw_alloc");
5676		DERR("mw_alloc:	IA state=%d invalid\n", ia_rp->ia_state);
5677		retval = EINVAL;
5678		break;
5679	}
5680	mutex_exit(&ia_rp->ia_lock);
5681	/* retval is 0 when ia_mw_alloccnt is incremented */
5682	if (retval != 0) {
5683		goto cleanup;
5684	}
5685
5686	/* insert into mw hash table */
5687	mw_hkey = 0;
5688	retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5689	    (void *)mw_rp);
5690	if (retval != 0) {
5691		DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5692		mutex_enter(&ia_rp->ia_lock);
5693		ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5694		ia_rp->ia_mw_alloccnt--;
5695		if (ia_rp->ia_mw_alloccnt == 0) {
5696			ia_rp->ia_state = DAPLKA_IA_INIT;
5697			cv_broadcast(&ia_rp->ia_cv);
5698		}
5699		mutex_exit(&ia_rp->ia_lock);
5700		goto cleanup;
5701	}
5702	inserted = B_TRUE;
5703	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp))
5704
5705	D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5706	    mw_rp->mw_hdl, (longlong_t)mw_rkey);
5707
5708	mutex_enter(&ia_rp->ia_lock);
5709	/*
5710	 * We are done with mw_alloc if this was the last mw_alloc
5711	 * change state back to DAPLKA_IA_INIT and wake up waiters
5712	 * specifically the unlock callback.
5713	 */
5714	ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5715	ia_rp->ia_mw_alloccnt--;
5716	if (ia_rp->ia_mw_alloccnt == 0) {
5717		ia_rp->ia_state = DAPLKA_IA_INIT;
5718		cv_broadcast(&ia_rp->ia_cv);
5719	}
5720	mutex_exit(&ia_rp->ia_lock);
5721
5722	args.mw_hkey = mw_hkey;
5723	args.mw_rkey = mw_rkey;
5724
5725	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5726	    mode);
5727	if (retval != 0) {
5728		DERR("mw_alloc: copyout error %d\n", retval);
5729		retval = EFAULT;
5730		goto cleanup;
5731	}
5732	return (0);
5733
5734cleanup:;
5735	if (inserted) {
5736		daplka_mw_resource_t *free_rp = NULL;
5737
5738		(void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5739		    (void **)&free_rp);
5740		if (free_rp != mw_rp) {
5741			DERR("mw_alloc: cannot remove mw from hash table\n");
5742			/*
5743			 * we can only get here if another thread
5744			 * has completed the cleanup in mw_free
5745			 */
5746			return (retval);
5747		}
5748	}
5749	DAPLKA_RS_UNREF(mw_rp);
5750	return (retval);
5751}
5752
5753/*
5754 * removes the mw reference from ia_mw_htbl and releases the
5755 * initial reference to the mw. also destroys the mw if the refcnt
5756 * is zero.
5757 */
5758/* ARGSUSED */
5759static int
5760daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5761	cred_t *cred, int *rvalp)
5762{
5763	daplka_mw_resource_t	*mw_rp = NULL;
5764	dapl_mw_free_t		args;
5765	int			retval = 0;
5766
5767	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5768	if (retval != 0) {
5769		DERR("mw_free: copyin error %d\n", retval);
5770		return (EFAULT);
5771	}
5772
5773	retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5774	    (void **)&mw_rp);
5775	if (retval != 0 || mw_rp == NULL) {
5776		DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5777		    (longlong_t)args.mw_hkey);
5778		return (EINVAL);
5779	}
5780
5781	ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5782
5783	/* UNREF calls the actual free function when refcnt is zero */
5784	DAPLKA_RS_UNREF(mw_rp);
5785	return (retval);
5786}
5787
5788/*
5789 * destroys the memory window.
5790 * called when refcnt drops to zero.
5791 */
5792static int
5793daplka_mw_destroy(daplka_resource_t *gen_rp)
5794{
5795	daplka_mw_resource_t	*mw_rp = (daplka_mw_resource_t *)gen_rp;
5796	ibt_status_t		status;
5797
5798	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5799	ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5800	D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5801	    mw_rp, DAPLKA_RS_RNUM(mw_rp));
5802
5803	/*
5804	 * free memory window
5805	 */
5806	if (mw_rp->mw_hdl) {
5807		status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5808		    mw_rp->mw_hdl);
5809		if (status != IBT_SUCCESS) {
5810			DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5811		}
5812		mw_rp->mw_hdl = NULL;
5813		D3("mw_destroy: mw freed\n");
5814	}
5815
5816	/*
5817	 * release reference on PD
5818	 */
5819	if (mw_rp->mw_pd_res != NULL) {
5820		DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5821		mw_rp->mw_pd_res = NULL;
5822	}
5823	mutex_destroy(&mw_rp->mw_lock);
5824	DAPLKA_RS_FINI(mw_rp);
5825	kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5826	D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5827	return (0);
5828}
5829
5830static void
5831daplka_hash_mw_free(void *obj)
5832{
5833	daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5834
5835	ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5836	DAPLKA_RS_UNREF(mw_rp);
5837}
5838
5839/*
5840 * SRQ ioctls and supporting functions
5841 */
5842/* ARGSUSED */
5843static int
5844daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5845    cred_t *cred, int *rvalp)
5846{
5847	daplka_srq_resource_t		*srq_rp;
5848	daplka_pd_resource_t		*pd_rp;
5849	dapl_srq_create_t		args;
5850	ibt_srq_sizes_t			srq_sizes;
5851	ibt_srq_sizes_t			srq_real_sizes;
5852	ibt_hca_attr_t			*hca_attrp;
5853	uint64_t			srq_hkey = 0;
5854	boolean_t			inserted = B_FALSE;
5855	int				retval;
5856	ibt_status_t			status;
5857
5858	D3("srq_create: enter\n");
5859	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5860	    mode);
5861	if (retval != 0) {
5862		DERR("srq_create: copyin error %d\n", retval);
5863		return (EFAULT);
5864	}
5865	srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5866	if (srq_rp == NULL) {
5867		DERR("srq_create: cannot allocate ep_rp\n");
5868		return (ENOMEM);
5869	}
5870	DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5871	    DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5872
5873	srq_rp->srq_hca = ia_rp->ia_hca;
5874	srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5875	mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5876
5877	/* get pd handle */
5878	pd_rp = (daplka_pd_resource_t *)
5879	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5880	if (pd_rp == NULL) {
5881		DERR("srq_create: cannot find pd resource\n");
5882		retval = EINVAL;
5883		goto cleanup;
5884	}
5885	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5886	srq_rp->srq_pd_res = pd_rp;
5887
5888	/*
5889	 * these checks ensure that the requested SRQ sizes
5890	 * are within the limits supported by the chosen HCA.
5891	 */
5892	hca_attrp = &ia_rp->ia_hca->hca_attr;
5893	if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5894		DERR("srq_create: invalid srqs_sz %d\n",
5895		    args.srqc_sizes.srqs_sz);
5896		retval = EINVAL;
5897		goto cleanup;
5898	}
5899	if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5900		DERR("srq_create: invalid srqs_sgl %d\n",
5901		    args.srqc_sizes.srqs_sgl);
5902		retval = EINVAL;
5903		goto cleanup;
5904	}
5905
5906	D3("srq_create: srq_sgl %d, srq_sz %d\n",
5907	    args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5908
5909	srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5910	srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5911
5912	/* create srq */
5913	status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5914	    IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5915	    &srq_real_sizes);
5916	if (status != IBT_SUCCESS) {
5917		DERR("srq_create: alloc_srq returned %d\n", status);
5918		*rvalp = (int)status;
5919		retval = 0;
5920		goto cleanup;
5921	}
5922
5923	args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5924	args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5925
5926	/* Get HCA-specific data_out info */
5927	status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5928	    IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5929	    &args.srqc_data_out, sizeof (args.srqc_data_out));
5930
5931	if (status != IBT_SUCCESS) {
5932		DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5933		*rvalp = (int)status;
5934		retval = 0;
5935		goto cleanup;
5936	}
5937
5938	srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5939
5940	/* preparing to copyout map_data back to the library */
5941	args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5942	args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5943
5944	/* insert into srq hash table */
5945	retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5946	    &srq_hkey, (void *)srq_rp);
5947	if (retval != 0) {
5948		DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5949		goto cleanup;
5950	}
5951	inserted = B_TRUE;
5952
5953	/* return hkey to library */
5954	args.srqc_hkey = srq_hkey;
5955
5956	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5957	    mode);
5958	if (retval != 0) {
5959		DERR("srq_create: copyout error %d\n", retval);
5960		retval = EFAULT;
5961		goto cleanup;
5962	}
5963
5964	D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5965	D3("	sz(%d) sgl(%d)\n",
5966	    args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5967	D3("srq_create: exit\n");
5968	return (0);
5969
5970cleanup:
5971	if (inserted) {
5972		daplka_srq_resource_t *free_rp = NULL;
5973
5974		(void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5975		    (void **)&free_rp);
5976		if (free_rp != srq_rp) {
5977			/*
5978			 * this case is impossible because ep_free will
5979			 * wait until our state transition is complete.
5980			 */
5981			DERR("srq_create: cannot remove srq from hash table\n");
5982			ASSERT(B_FALSE);
5983			return (retval);
5984		}
5985	}
5986	DAPLKA_RS_UNREF(srq_rp);
5987	return (retval);
5988}
5989
5990/*
5991 * Resize an existing SRQ
5992 */
5993/* ARGSUSED */
5994static int
5995daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5996    cred_t *cred, int *rvalp)
5997{
5998	daplka_srq_resource_t		*srq_rp = NULL;
5999	ibt_hca_attr_t			*hca_attrp;
6000	dapl_srq_resize_t		args;
6001	ibt_status_t			status;
6002	int				retval = 0;
6003
6004	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
6005	    mode);
6006	if (retval != 0) {
6007		DERR("srq_resize: copyin error %d\n", retval);
6008		return (EFAULT);
6009	}
6010
6011	/* get srq resource */
6012	srq_rp = (daplka_srq_resource_t *)
6013	    daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
6014	if (srq_rp == NULL) {
6015		DERR("srq_resize: cannot find srq resource\n");
6016		return (EINVAL);
6017	}
6018	ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6019
6020	hca_attrp = &ia_rp->ia_hca->hca_attr;
6021	if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
6022		DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
6023		retval = EINVAL;
6024		goto cleanup;
6025	}
6026
6027	mutex_enter(&srq_rp->srq_lock);
6028	/*
6029	 * If ibt_resize_srq fails that it is primarily due to resource
6030	 * shortage. Per IB spec resize will never loose events and
6031	 * a resize error leaves the SRQ intact. Therefore even if the
6032	 * resize request fails we proceed and get the mapping data
6033	 * from the SRQ so that the library can mmap it.
6034	 */
6035	status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6036	    args.srqr_new_size, 0, &args.srqr_real_size);
6037	if (status != IBT_SUCCESS) {
6038		/* we return the size of the old CQ if resize fails */
6039		args.srqr_real_size = srq_rp->srq_real_size;
6040		ASSERT(status != IBT_SRQ_HDL_INVALID);
6041		DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6042	} else {
6043		srq_rp->srq_real_size = args.srqr_real_size;
6044	}
6045	mutex_exit(&srq_rp->srq_lock);
6046
6047
6048	D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6049	    DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6050
6051	/* Get HCA-specific data_out info */
6052	status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6053	    IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6054	    &args.srqr_data_out, sizeof (args.srqr_data_out));
6055	if (status != IBT_SUCCESS) {
6056		DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6057		/* return ibt_ci_data_out status */
6058		*rvalp = (int)status;
6059		retval = 0;
6060		goto cleanup;
6061	}
6062
6063	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6064	    mode);
6065	if (retval != 0) {
6066		DERR("srq_resize: copyout error %d\n", retval);
6067		retval = EFAULT;
6068		goto cleanup;
6069	}
6070
6071cleanup:;
6072	if (srq_rp != NULL) {
6073		DAPLKA_RS_UNREF(srq_rp);
6074	}
6075	return (retval);
6076}
6077
6078/*
6079 * Frees an SRQ resource.
6080 */
6081/* ARGSUSED */
6082static int
6083daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6084    cred_t *cred, int *rvalp)
6085{
6086	daplka_srq_resource_t	*srq_rp = NULL;
6087	dapl_srq_free_t		args;
6088	int			retval;
6089
6090	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6091	if (retval != 0) {
6092		DERR("srq_free: copyin error %d\n", retval);
6093		return (EFAULT);
6094	}
6095
6096	retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6097	    args.srqf_hkey, (void **)&srq_rp);
6098	if (retval != 0 || srq_rp == NULL) {
6099		/*
6100		 * this is only possible if we have two threads
6101		 * calling ep_free in parallel.
6102		 */
6103		DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6104		    retval, args.srqf_hkey);
6105		return (EINVAL);
6106	}
6107
6108	/* UNREF calls the actual free function when refcnt is zero */
6109	DAPLKA_RS_UNREF(srq_rp);
6110	return (0);
6111}
6112
6113/*
6114 * destroys a SRQ resource.
6115 * called when refcnt drops to zero.
6116 */
6117static int
6118daplka_srq_destroy(daplka_resource_t *gen_rp)
6119{
6120	daplka_srq_resource_t	*srq_rp = (daplka_srq_resource_t *)gen_rp;
6121	ibt_status_t		status;
6122
6123	ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6124
6125	D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6126	    srq_rp, DAPLKA_RS_RNUM(srq_rp));
6127	/*
6128	 * destroy the srq
6129	 */
6130	if (srq_rp->srq_hdl != NULL) {
6131		status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6132		if (status != IBT_SUCCESS) {
6133			DERR("srq_destroy: ibt_free_srq returned %d\n",
6134			    status);
6135		}
6136		srq_rp->srq_hdl = NULL;
6137		D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6138	}
6139	/*
6140	 * release all references
6141	 */
6142	if (srq_rp->srq_pd_res != NULL) {
6143		DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6144		srq_rp->srq_pd_res = NULL;
6145	}
6146
6147	mutex_destroy(&srq_rp->srq_lock);
6148	DAPLKA_RS_FINI(srq_rp);
6149	kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6150	D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6151	return (0);
6152}
6153
6154static void
6155daplka_hash_srq_free(void *obj)
6156{
6157	daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6158
6159	ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6160	DAPLKA_RS_UNREF(srq_rp);
6161}
6162
6163/*
6164 * This function tells the CM to start listening on a service id.
6165 * It must be called by the passive side client before the client
6166 * can receive connection requests from remote endpoints. If the
6167 * client specifies a non-zero service id (connection qualifier in
6168 * dapl terms), this function will attempt to bind to this service
6169 * id and return an error if the id is already in use. If the client
6170 * specifies zero as the service id, this function will try to find
6171 * the next available service id and return it back to the client.
6172 * To support the cr_handoff function, this function will, in addition
6173 * to creating and inserting an SP resource into the per-IA SP hash
6174 * table, insert the SP resource into a global SP table. This table
6175 * maintains all active service points created by all dapl clients.
6176 * CR handoff locates the target SP by iterating through this global
6177 * table.
6178 */
6179/* ARGSUSED */
6180static int
6181daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6182	cred_t *cred, int *rvalp)
6183{
6184	daplka_evd_resource_t	*evd_rp = NULL;
6185	daplka_sp_resource_t	*sp_rp = NULL;
6186	dapl_service_register_t	args;
6187	ibt_srv_desc_t		sd_args;
6188	ibt_srv_bind_t		sb_args;
6189	ibt_status_t		status;
6190	ib_svc_id_t		retsid = 0;
6191	uint64_t		sp_hkey = 0;
6192	boolean_t		bumped = B_FALSE;
6193	int			backlog_size;
6194	int			retval = 0;
6195
6196	retval = ddi_copyin((void *)arg, &args,
6197	    sizeof (dapl_service_register_t), mode);
6198	if (retval != 0) {
6199		DERR("service_register: copyin error %d\n", retval);
6200		return (EINVAL);
6201	}
6202
6203	sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6204	if (sp_rp == NULL) {
6205		DERR("service_register: cannot allocate sp resource\n");
6206		return (ENOMEM);
6207	}
6208	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6209	DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6210	    DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6211
6212	/* check if evd exists */
6213	evd_rp = (daplka_evd_resource_t *)
6214	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6215	if (evd_rp == NULL) {
6216		DERR("service_register: evd resource not found\n");
6217		retval = EINVAL;
6218		goto cleanup;
6219	}
6220	/*
6221	 * initialize backlog size
6222	 */
6223	if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6224		backlog_size = evd_rp->evd_cq_real_size + 1;
6225	} else {
6226		backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6227	}
6228	D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6229
6230	/* save the userland sp ptr */
6231	sp_rp->sp_cookie = args.sr_sp_cookie;
6232	sp_rp->sp_backlog_size = backlog_size;
6233	D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6234	sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6235	    sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6236
6237	/* save evd resource pointer */
6238	sp_rp->sp_evd_res = evd_rp;
6239
6240	/*
6241	 * save ruid here so that we can do a comparison later
6242	 * when someone does cr_handoff. the check will prevent
6243	 * a malicious app from passing a CR to us.
6244	 */
6245	sp_rp->sp_ruid = crgetruid(cred);
6246
6247	/* fill in args for register_service */
6248	sd_args.sd_ud_handler = NULL;
6249	sd_args.sd_handler = daplka_cm_service_handler;
6250	sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6251
6252	status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6253	    &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6254
6255	if (status != IBT_SUCCESS) {
6256		DERR("service_register: ibt_register_service returned %d\n",
6257		    status);
6258		*rvalp = (int)status;
6259		retval = 0;
6260		goto cleanup;
6261	}
6262	/* save returned sid */
6263	sp_rp->sp_conn_qual = retsid;
6264	args.sr_retsid = retsid;
6265
6266	/* fill in args for bind_service */
6267	sb_args.sb_pkey = ia_rp->ia_port_pkey;
6268	sb_args.sb_lease = 0xffffffff;
6269	sb_args.sb_key[0] = 0x1234;
6270	sb_args.sb_key[1] = 0x5678;
6271	sb_args.sb_name = DAPLKA_DRV_NAME;
6272
6273	D2("service_register: bind(0x%llx:0x%llx)\n",
6274	    (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6275	    (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6276
6277	status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6278	    &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6279	if (status != IBT_SUCCESS) {
6280		DERR("service_register: ibt_bind_service returned %d\n",
6281		    status);
6282		*rvalp = (int)status;
6283		retval = 0;
6284		goto cleanup;
6285	}
6286
6287	/*
6288	 * need to bump refcnt because the global hash table will
6289	 * have a reference to sp_rp
6290	 */
6291	DAPLKA_RS_REF(sp_rp);
6292	bumped = B_TRUE;
6293
6294	/* insert into global sp hash table */
6295	sp_rp->sp_global_hkey = 0;
6296	retval = daplka_hash_insert(&daplka_global_sp_htbl,
6297	    &sp_rp->sp_global_hkey, (void *)sp_rp);
6298	if (retval != 0) {
6299		DERR("service_register: cannot insert sp resource\n");
6300		goto cleanup;
6301	}
6302	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp))
6303
6304	/* insert into per-IA sp hash table */
6305	retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6306	    &sp_hkey, (void *)sp_rp);
6307	if (retval != 0) {
6308		DERR("service_register: cannot insert sp resource\n");
6309		goto cleanup;
6310	}
6311
6312	/* pass index to application */
6313	args.sr_sp_hkey = sp_hkey;
6314	retval = ddi_copyout(&args, (void *)arg,
6315	    sizeof (dapl_service_register_t), mode);
6316	if (retval != 0) {
6317		DERR("service_register: copyout error %d\n", retval);
6318		retval = EFAULT;
6319		goto cleanup;
6320	}
6321	return (0);
6322
6323cleanup:;
6324	ASSERT(sp_rp != NULL);
6325	/* remove from ia table */
6326	if (sp_hkey != 0) {
6327		daplka_sp_resource_t *free_rp = NULL;
6328
6329		(void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6330		    sp_hkey, (void **)&free_rp);
6331		if (free_rp != sp_rp) {
6332			DERR("service_register: cannot remove sp\n");
6333			/*
6334			 * we can only get here if another thread
6335			 * has completed the cleanup in svc_deregister
6336			 */
6337			return (retval);
6338		}
6339	}
6340
6341	/* remove from global table */
6342	if (sp_rp->sp_global_hkey != 0) {
6343		daplka_sp_resource_t *free_rp = NULL;
6344
6345		/*
6346		 * we get here if either the hash_insert into
6347		 * ia_sp_htbl failed or the ddi_copyout failed.
6348		 * hash_insert failure implies that we are the
6349		 * only thread with a reference to sp. ddi_copyout
6350		 * failure implies that svc_deregister could have
6351		 * picked up the sp and destroyed it. but since
6352		 * we got to this point, we must have removed
6353		 * the sp ourselves in hash_remove above and
6354		 * that the sp can be destroyed by us.
6355		 */
6356		(void) daplka_hash_remove(&daplka_global_sp_htbl,
6357		    sp_rp->sp_global_hkey, (void **)&free_rp);
6358		if (free_rp != sp_rp) {
6359			DERR("service_register: cannot remove sp\n");
6360			/*
6361			 * this case is impossible. see explanation above.
6362			 */
6363			ASSERT(B_FALSE);
6364			return (retval);
6365		}
6366		sp_rp->sp_global_hkey = 0;
6367	}
6368	/* unreference sp */
6369	if (bumped) {
6370		DAPLKA_RS_UNREF(sp_rp);
6371	}
6372
6373	/* destroy sp resource */
6374	DAPLKA_RS_UNREF(sp_rp);
6375	return (retval);
6376}
6377
6378/*
6379 * deregisters the service and removes SP from the global table.
6380 */
6381/* ARGSUSED */
6382static int
6383daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6384	cred_t *cred, int *rvalp)
6385{
6386	dapl_service_deregister_t	args;
6387	daplka_sp_resource_t		*sp_rp = NULL, *g_sp_rp = NULL;
6388	int				retval;
6389
6390	retval = ddi_copyin((void *)arg, &args,
6391	    sizeof (dapl_service_deregister_t), mode);
6392
6393	if (retval != 0) {
6394		DERR("service_deregister: copyin error %d\n", retval);
6395		return (EINVAL);
6396	}
6397
6398	retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6399	    args.sdr_sp_hkey, (void **)&sp_rp);
6400	if (retval != 0 || sp_rp == NULL) {
6401		DERR("service_deregister: cannot find sp resource\n");
6402		return (EINVAL);
6403	}
6404
6405	retval = daplka_hash_remove(&daplka_global_sp_htbl,
6406	    sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6407	if (retval != 0 || g_sp_rp == NULL) {
6408		DERR("service_deregister: cannot find sp resource\n");
6409	}
6410
6411	/* remove the global reference */
6412	if (g_sp_rp == sp_rp) {
6413		DAPLKA_RS_UNREF(g_sp_rp);
6414	}
6415
6416	DAPLKA_RS_UNREF(sp_rp);
6417	return (0);
6418}
6419
6420/*
6421 * destroys a service point.
6422 * called when the refcnt drops to zero.
6423 */
6424static int
6425daplka_sp_destroy(daplka_resource_t *gen_rp)
6426{
6427	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6428	ibt_status_t status;
6429
6430	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6431	ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6432	D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6433	    sp_rp, DAPLKA_RS_RNUM(sp_rp));
6434
6435	/*
6436	 * it is possible for pending connections to remain
6437	 * on an SP. We need to clean them up here.
6438	 */
6439	if (sp_rp->sp_backlog != NULL) {
6440		ibt_cm_proceed_reply_t proc_reply;
6441		int i, cnt = 0;
6442		void *spcp_sidp;
6443
6444		for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6445			if (sp_rp->sp_backlog[i].spcp_state ==
6446			    DAPLKA_SPCP_PENDING) {
6447				cnt++;
6448				if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6449					DERR("sp_destroy: "
6450					    "spcp_sid == NULL!\n");
6451					continue;
6452				}
6453				mutex_enter(&sp_rp->sp_lock);
6454				spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6455				sp_rp->sp_backlog[i].spcp_state =
6456				    DAPLKA_SPCP_INIT;
6457				sp_rp->sp_backlog[i].spcp_sid = NULL;
6458				sp_rp->sp_backlog[i].spcp_req_len = 0;
6459				mutex_exit(&sp_rp->sp_lock);
6460				status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6461				    spcp_sidp,
6462				    IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6463				if (status != IBT_SUCCESS) {
6464					DERR("sp_destroy: proceed failed %d\n",
6465					    status);
6466				}
6467			}
6468		}
6469		if (cnt > 0) {
6470			DERR("sp_destroy: found %d pending "
6471			    "connections\n", cnt);
6472		}
6473	}
6474
6475	if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6476		status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6477		    sp_rp->sp_bind_hdl);
6478		if (status != IBT_SUCCESS) {
6479			DERR("sp_destroy: ibt_unbind_service "
6480			    "failed: %d\n", status);
6481		}
6482	}
6483
6484	if (sp_rp->sp_srv_hdl != NULL) {
6485		status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6486		    sp_rp->sp_srv_hdl);
6487		if (status != IBT_SUCCESS) {
6488			DERR("sp_destroy: ibt_deregister_service "
6489			    "failed: %d\n", status);
6490		}
6491	}
6492	if (sp_rp->sp_backlog != NULL) {
6493		kmem_free(sp_rp->sp_backlog,
6494		    sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6495		sp_rp->sp_backlog = NULL;
6496		sp_rp->sp_backlog_size = 0;
6497	}
6498
6499	/*
6500	 * release reference to evd
6501	 */
6502	if (sp_rp->sp_evd_res != NULL) {
6503		DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6504	}
6505	sp_rp->sp_bind_hdl = NULL;
6506	sp_rp->sp_srv_hdl = NULL;
6507	DAPLKA_RS_FINI(sp_rp);
6508	kmem_free(sp_rp, sizeof (*sp_rp));
6509	D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6510	return (0);
6511}
6512
6513/*
6514 * this function is called by daplka_hash_destroy for
6515 * freeing SP resource objects
6516 */
6517static void
6518daplka_hash_sp_free(void *obj)
6519{
6520	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6521	daplka_sp_resource_t *g_sp_rp;
6522	int retval;
6523
6524	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6525
6526	retval = daplka_hash_remove(&daplka_global_sp_htbl,
6527	    sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6528	if (retval != 0 || g_sp_rp == NULL) {
6529		DERR("sp_free: cannot find sp resource\n");
6530	}
6531	if (g_sp_rp == sp_rp) {
6532		DAPLKA_RS_UNREF(g_sp_rp);
6533	}
6534
6535	DAPLKA_RS_UNREF(sp_rp);
6536}
6537
6538static void
6539daplka_hash_sp_unref(void *obj)
6540{
6541	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6542
6543	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6544	DAPLKA_RS_UNREF(sp_rp);
6545}
6546
6547/*
6548 * Passive side CM handlers
6549 */
6550
6551/*
6552 * processes the REQ_RCV event
6553 */
6554/* ARGSUSED */
6555static ibt_cm_status_t
6556daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6557    ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6558{
6559	daplka_sp_conn_pend_t	*conn = NULL;
6560	daplka_evd_event_t	*cr_ev = NULL;
6561	ibt_cm_status_t		cm_status = IBT_CM_DEFAULT;
6562	uint16_t		bkl_index;
6563	ibt_status_t		status;
6564
6565	/*
6566	 * acquire a slot in the connection backlog of this service point
6567	 */
6568	mutex_enter(&spp->sp_lock);
6569	for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6570		if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6571			conn = &spp->sp_backlog[bkl_index];
6572			ASSERT(conn->spcp_sid == NULL);
6573			conn->spcp_state = DAPLKA_SPCP_PENDING;
6574			conn->spcp_sid = event->cm_session_id;
6575			break;
6576		}
6577	}
6578	mutex_exit(&spp->sp_lock);
6579
6580	/*
6581	 * too many pending connections
6582	 */
6583	if (bkl_index == spp->sp_backlog_size) {
6584		DERR("service_req: connection pending exceeded %d limit\n",
6585		    spp->sp_backlog_size);
6586		return (IBT_CM_NO_RESOURCE);
6587	}
6588	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn))
6589
6590	/*
6591	 * save data for cr_handoff
6592	 */
6593	if (pr_data != NULL && pr_len > 0) {
6594		int trunc_len = pr_len;
6595
6596		if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6597			DERR("service_req: private data truncated\n");
6598			trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6599		}
6600		conn->spcp_req_len = trunc_len;
6601		bcopy(pr_data, conn->spcp_req_data, trunc_len);
6602	} else {
6603		conn->spcp_req_len = 0;
6604	}
6605
6606	/*
6607	 * create a CR event
6608	 */
6609	cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6610	if (cr_ev == NULL) {
6611		DERR("service_req: could not alloc cr_ev\n");
6612		cm_status = IBT_CM_NO_RESOURCE;
6613		goto cleanup;
6614	}
6615
6616	cr_ev->ee_next = NULL;
6617	cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6618	cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6619	cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6620	/*
6621	 * save the requestor gid
6622	 * daplka_event_poll needs this if this is a third party REQ_RCV
6623	 */
6624	cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6625	    event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6626	cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6627	    event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6628
6629	/*
6630	 * set event type
6631	 */
6632	if (pr_len == 0) {
6633		cr_ev->ee_cmev.ec_cm_ev_type =
6634		    DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6635	} else {
6636		cr_ev->ee_cmev.ec_cm_ev_priv_data =
6637		    kmem_zalloc(pr_len, KM_NOSLEEP);
6638		if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6639			DERR("service_req: could not alloc priv\n");
6640			cm_status = IBT_CM_NO_RESOURCE;
6641			goto cleanup;
6642		}
6643		bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6644		cr_ev->ee_cmev.ec_cm_ev_type =
6645		    DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6646	}
6647	cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6648
6649	/*
6650	 * tell the active side to expect the processing time to be
6651	 * at most equal to daplka_cm_delay
6652	 */
6653	status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6654	    daplka_cm_delay, NULL, 0);
6655	if (status != IBT_SUCCESS) {
6656		DERR("service_req: ibt_cm_delay failed %d\n", status);
6657		cm_status = IBT_CM_NO_RESOURCE;
6658		goto cleanup;
6659	}
6660
6661	/*
6662	 * enqueue cr_ev onto the cr_events list of the EVD
6663	 * corresponding to the SP
6664	 */
6665	D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6666	    "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6667	    cr_ev->ee_cmev.ec_cm_ev_priv_data,
6668	    (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6669	    (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6670
6671	daplka_evd_wakeup(spp->sp_evd_res,
6672	    &spp->sp_evd_res->evd_cr_events, cr_ev);
6673
6674	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn))
6675	return (IBT_CM_DEFER);
6676
6677cleanup:;
6678	/*
6679	 * free the cr event
6680	 */
6681	if (cr_ev != NULL) {
6682		if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6683			kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6684			cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6685			cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6686		}
6687		kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6688	}
6689	/*
6690	 * release our slot in the backlog array
6691	 */
6692	if (conn != NULL) {
6693		mutex_enter(&spp->sp_lock);
6694		ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6695		ASSERT(conn->spcp_sid == event->cm_session_id);
6696		conn->spcp_state = DAPLKA_SPCP_INIT;
6697		conn->spcp_req_len = 0;
6698		conn->spcp_sid = NULL;
6699		mutex_exit(&spp->sp_lock);
6700	}
6701	return (cm_status);
6702}
6703
6704/*
6705 * processes the CONN_CLOSED event
6706 */
6707/* ARGSUSED */
6708static ibt_cm_status_t
6709daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6710    ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6711    void *priv_data, ibt_priv_data_len_t len)
6712{
6713	daplka_ep_resource_t	*ep_rp;
6714	daplka_evd_event_t	*disc_ev;
6715	uint32_t		old_state, new_state;
6716
6717	ep_rp = (daplka_ep_resource_t *)
6718	    ibt_get_chan_private(event->cm_channel);
6719	if (ep_rp == NULL) {
6720		DERR("service_conn_closed: ep_rp == NULL\n");
6721		return (IBT_CM_ACCEPT);
6722	}
6723
6724	/*
6725	 * verify that the ep_state is either CONNECTED or
6726	 * DISCONNECTING. if it is not in either states return
6727	 * without generating an event.
6728	 */
6729	new_state = old_state = daplka_ep_get_state(ep_rp);
6730	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6731	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6732		/*
6733		 * we can get here if the connection is being aborted
6734		 */
6735		D2("service_conn_closed: conn aborted, state = %d, "
6736		    "closed = %d\n", old_state, (int)event->cm_event.closed);
6737		daplka_ep_set_state(ep_rp, old_state, new_state);
6738		return (IBT_CM_ACCEPT);
6739	}
6740
6741	/*
6742	 * create a DAPL_IB_CME_DISCONNECTED event
6743	 */
6744	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6745	if (disc_ev == NULL) {
6746		DERR("service_conn_closed: cannot alloc disc_ev\n");
6747		daplka_ep_set_state(ep_rp, old_state, new_state);
6748		return (IBT_CM_ACCEPT);
6749	}
6750
6751	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6752	disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6753	disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6754	disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6755	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6756	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6757
6758	D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6759	    disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6760
6761	/*
6762	 * transition ep_state to DISCONNECTED
6763	 */
6764	new_state = DAPLKA_EP_STATE_DISCONNECTED;
6765	daplka_ep_set_state(ep_rp, old_state, new_state);
6766
6767	/*
6768	 * enqueue event onto the conn_evd owned by ep_rp
6769	 */
6770	daplka_evd_wakeup(ep_rp->ep_conn_evd,
6771	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6772
6773	return (IBT_CM_ACCEPT);
6774}
6775
6776/*
6777 * processes the CONN_EST event
6778 */
6779/* ARGSUSED */
6780static ibt_cm_status_t
6781daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6782    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6783{
6784	daplka_ep_resource_t	*ep_rp;
6785	daplka_evd_event_t	*conn_ev;
6786	void			*pr_data = event->cm_priv_data;
6787	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
6788	uint32_t		old_state, new_state;
6789
6790	ep_rp = (daplka_ep_resource_t *)
6791	    ibt_get_chan_private(event->cm_channel);
6792	if (ep_rp == NULL) {
6793		DERR("service_conn_est: ep_rp == NULL\n");
6794		return (IBT_CM_ACCEPT);
6795	}
6796
6797	/*
6798	 * verify that ep_state is ACCEPTING. if it is not in this
6799	 * state, return without generating an event.
6800	 */
6801	new_state = old_state = daplka_ep_get_state(ep_rp);
6802	if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6803		/*
6804		 * we can get here if the connection is being aborted
6805		 */
6806		DERR("service_conn_est: conn aborted, state = %d\n",
6807		    old_state);
6808		daplka_ep_set_state(ep_rp, old_state, new_state);
6809		return (IBT_CM_ACCEPT);
6810	}
6811
6812	/*
6813	 * create a DAPL_IB_CME_CONNECTED event
6814	 */
6815	conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6816	if (conn_ev == NULL) {
6817		DERR("service_conn_est: conn_ev alloc failed\n");
6818		daplka_ep_set_state(ep_rp, old_state, new_state);
6819		return (IBT_CM_ACCEPT);
6820	}
6821
6822	conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6823	conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6824	conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6825	conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6826
6827	/*
6828	 * copy private data into event
6829	 */
6830	if (pr_len > 0) {
6831		conn_ev->ee_cmev.ec_cm_ev_priv_data =
6832		    kmem_zalloc(pr_len, KM_NOSLEEP);
6833		if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6834			DERR("service_conn_est: pr_data alloc failed\n");
6835			daplka_ep_set_state(ep_rp, old_state, new_state);
6836			kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6837			return (IBT_CM_ACCEPT);
6838		}
6839		bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6840	}
6841	conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6842
6843	D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6844	    conn_ev, ep_rp->ep_conn_evd);
6845
6846	/*
6847	 * transition ep_state to CONNECTED
6848	 */
6849	new_state = DAPLKA_EP_STATE_CONNECTED;
6850	daplka_ep_set_state(ep_rp, old_state, new_state);
6851
6852	/*
6853	 * enqueue event onto the conn_evd owned by ep_rp
6854	 */
6855	daplka_evd_wakeup(ep_rp->ep_conn_evd,
6856	    &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6857
6858	return (IBT_CM_ACCEPT);
6859}
6860
6861/*
6862 * processes the FAILURE event
6863 */
6864/* ARGSUSED */
6865static ibt_cm_status_t
6866daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6867    ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6868    ibt_priv_data_len_t len)
6869{
6870	daplka_evd_event_t	*disc_ev;
6871	daplka_ep_resource_t	*ep_rp;
6872	uint32_t		old_state, new_state;
6873	ibt_rc_chan_query_attr_t chan_attrs;
6874	ibt_status_t		status;
6875
6876	/*
6877	 * check that we still have a valid cm_channel before continuing
6878	 */
6879	if (event->cm_channel == NULL) {
6880		DERR("serice_event_failure: event->cm_channel == NULL\n");
6881		return (IBT_CM_ACCEPT);
6882	}
6883	ep_rp = (daplka_ep_resource_t *)
6884	    ibt_get_chan_private(event->cm_channel);
6885	if (ep_rp == NULL) {
6886		DERR("service_event_failure: ep_rp == NULL\n");
6887		return (IBT_CM_ACCEPT);
6888	}
6889
6890	/*
6891	 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6892	 * is not in either state, return without generating an event.
6893	 */
6894	new_state = old_state = daplka_ep_get_state(ep_rp);
6895	if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6896	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6897		/*
6898		 * we can get here if the connection is being aborted
6899		 */
6900		DERR("service_event_failure: conn aborted, state = %d, "
6901		    "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6902		    (int)event->cm_event.failed.cf_code,
6903		    (int)event->cm_event.failed.cf_msg,
6904		    (int)event->cm_event.failed.cf_reason);
6905
6906		daplka_ep_set_state(ep_rp, old_state, new_state);
6907		return (IBT_CM_ACCEPT);
6908	}
6909
6910	bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6911	status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6912
6913	if ((status == IBT_SUCCESS) &&
6914	    (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6915		DERR("service_event_failure: conn abort qpn %d state %d\n",
6916		    chan_attrs.rc_qpn, chan_attrs.rc_state);
6917
6918		/* explicit transition the QP to ERROR state */
6919		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6920	}
6921
6922	/*
6923	 * create an event
6924	 */
6925	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6926	if (disc_ev == NULL) {
6927		DERR("service_event_failure: cannot alloc disc_ev\n");
6928		daplka_ep_set_state(ep_rp, old_state, new_state);
6929		return (IBT_CM_ACCEPT);
6930	}
6931
6932	/*
6933	 * fill in the appropriate event type
6934	 */
6935	if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6936		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6937	} else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6938		switch (event->cm_event.failed.cf_reason) {
6939		case IBT_CM_INVALID_CID:
6940			disc_ev->ee_cmev.ec_cm_ev_type =
6941			    DAPL_IB_CME_DESTINATION_REJECT;
6942			break;
6943		default:
6944			disc_ev->ee_cmev.ec_cm_ev_type =
6945			    DAPL_IB_CME_LOCAL_FAILURE;
6946			break;
6947		}
6948	} else {
6949		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6950	}
6951	disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6952	disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6953	disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6954	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6955	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6956
6957	D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6958	    "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6959	    ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6960	    (int)event->cm_event.failed.cf_msg,
6961	    (int)event->cm_event.failed.cf_reason,
6962	    (longlong_t)ep_rp->ep_psep_cookie);
6963
6964	/*
6965	 * transition ep_state to DISCONNECTED
6966	 */
6967	new_state = DAPLKA_EP_STATE_DISCONNECTED;
6968	daplka_ep_set_state(ep_rp, old_state, new_state);
6969
6970	/*
6971	 * enqueue event onto the conn_evd owned by ep_rp
6972	 */
6973	daplka_evd_wakeup(ep_rp->ep_conn_evd,
6974	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6975
6976	return (IBT_CM_ACCEPT);
6977}
6978
6979/*
6980 * this is the passive side CM handler. it gets registered
6981 * when an SP resource is created in daplka_service_register.
6982 */
6983static ibt_cm_status_t
6984daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6985ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6986{
6987	daplka_sp_resource_t	*sp_rp = (daplka_sp_resource_t *)cm_private;
6988
6989	if (sp_rp == NULL) {
6990		DERR("service_handler: sp_rp == NULL\n");
6991		return (IBT_CM_NO_RESOURCE);
6992	}
6993	/*
6994	 * default is not to return priv data
6995	 */
6996	if (ret_args != NULL) {
6997		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
6998		ret_args->cm_ret_len = 0;
6999	}
7000
7001	switch (event->cm_type) {
7002	case IBT_CM_EVENT_REQ_RCV:
7003		D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
7004		return (daplka_cm_service_req(sp_rp, event, ret_args,
7005		    event->cm_priv_data, event->cm_priv_data_len));
7006
7007	case IBT_CM_EVENT_REP_RCV:
7008		/* passive side should not receive this event */
7009		D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
7010		return (IBT_CM_DEFAULT);
7011
7012	case IBT_CM_EVENT_CONN_CLOSED:
7013		D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7014		    event->cm_event.closed);
7015		return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
7016		    priv_data, len));
7017
7018	case IBT_CM_EVENT_MRA_RCV:
7019		/* passive side does default processing MRA event */
7020		D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
7021		return (IBT_CM_DEFAULT);
7022
7023	case IBT_CM_EVENT_CONN_EST:
7024		D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
7025		return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
7026		    priv_data, len));
7027
7028	case IBT_CM_EVENT_FAILURE:
7029		D2("service_handler: IBT_CM_EVENT_FAILURE\n");
7030		return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
7031		    priv_data, len));
7032	case IBT_CM_EVENT_LAP_RCV:
7033		/* active side had initiated a path migration operation */
7034		D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
7035		return (IBT_CM_ACCEPT);
7036	default:
7037		DERR("service_handler: invalid event %d\n", event->cm_type);
7038		break;
7039	}
7040	return (IBT_CM_DEFAULT);
7041}
7042
7043/*
7044 * Active side CM handlers
7045 */
7046
7047/*
7048 * Processes the REP_RCV event. When the passive side accepts the
7049 * connection, this handler is called. We make a copy of the private
7050 * data into the ep so that it can be passed back to userland in when
7051 * the CONN_EST event occurs.
7052 */
7053/* ARGSUSED */
7054static ibt_cm_status_t
7055daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7056    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7057{
7058	void			*pr_data = event->cm_priv_data;
7059	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
7060	uint32_t		old_state, new_state;
7061
7062	D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7063	    (int)pr_len);
7064
7065	ASSERT(ep_rp != NULL);
7066	new_state = old_state = daplka_ep_get_state(ep_rp);
7067	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7068		/*
7069		 * we can get here if the connection is being aborted
7070		 */
7071		DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7072		daplka_ep_set_state(ep_rp, old_state, new_state);
7073		return (IBT_CM_NO_CHANNEL);
7074	}
7075
7076	/*
7077	 * we do not cancel the timer here because the connection
7078	 * handshake is still in progress.
7079	 */
7080
7081	/*
7082	 * save the private data. it will be passed up when
7083	 * the connection is established.
7084	 */
7085	if (pr_len > 0) {
7086		ep_rp->ep_priv_len = pr_len;
7087		bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7088	}
7089
7090	/*
7091	 * we do not actually transition to a different state.
7092	 * the state will change when we get a conn_est, failure,
7093	 * closed, or timeout event.
7094	 */
7095	daplka_ep_set_state(ep_rp, old_state, new_state);
7096	return (IBT_CM_ACCEPT);
7097}
7098
7099/*
7100 * Processes the CONN_CLOSED event. This gets called when either
7101 * the active or passive side closes the rc channel.
7102 */
7103/* ARGSUSED */
7104static ibt_cm_status_t
7105daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7106    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7107{
7108	daplka_evd_event_t	*disc_ev;
7109	uint32_t		old_state, new_state;
7110
7111	ASSERT(ep_rp != NULL);
7112	old_state = new_state = daplka_ep_get_state(ep_rp);
7113	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7114	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7115		/*
7116		 * we can get here if the connection is being aborted
7117		 */
7118		D2("rc_conn_closed: conn aborted, state = %d, "
7119		    "closed = %d\n", old_state, (int)event->cm_event.closed);
7120		daplka_ep_set_state(ep_rp, old_state, new_state);
7121		return (IBT_CM_ACCEPT);
7122	}
7123
7124	/*
7125	 * it's ok for the timer to fire at this point. the
7126	 * taskq thread that processes the timer will just wait
7127	 * until we are done with our state transition.
7128	 */
7129	if (daplka_cancel_timer(ep_rp) != 0) {
7130		/*
7131		 * daplka_cancel_timer returns -1 if the timer is
7132		 * being processed and 0 for all other cases.
7133		 * we need to reset ep_state to allow timer processing
7134		 * to continue.
7135		 */
7136		DERR("rc_conn_closed: timer is being processed\n");
7137		daplka_ep_set_state(ep_rp, old_state, new_state);
7138		return (IBT_CM_ACCEPT);
7139	}
7140
7141	/*
7142	 * create a DAPL_IB_CME_DISCONNECTED event
7143	 */
7144	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7145	if (disc_ev == NULL) {
7146		DERR("rc_conn_closed: could not alloc ev\n");
7147		daplka_ep_set_state(ep_rp, old_state, new_state);
7148		return (IBT_CM_ACCEPT);
7149	}
7150
7151	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7152	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7153	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7154	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7155	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7156	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7157
7158	D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7159	    disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7160
7161	/*
7162	 * transition ep_state to DISCONNECTED
7163	 */
7164	new_state = DAPLKA_EP_STATE_DISCONNECTED;
7165	daplka_ep_set_state(ep_rp, old_state, new_state);
7166
7167	/*
7168	 * enqueue event onto the conn_evd owned by ep_rp
7169	 */
7170	daplka_evd_wakeup(ep_rp->ep_conn_evd,
7171	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7172
7173	return (IBT_CM_ACCEPT);
7174}
7175
7176/*
7177 * processes the CONN_EST event
7178 */
7179/* ARGSUSED */
7180static ibt_cm_status_t
7181daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7182    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7183{
7184	daplka_evd_event_t	*conn_ev;
7185	uint32_t		old_state, new_state;
7186
7187	ASSERT(ep_rp != NULL);
7188	old_state = new_state = daplka_ep_get_state(ep_rp);
7189	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7190		/*
7191		 * we can get here if the connection is being aborted
7192		 */
7193		DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7194		daplka_ep_set_state(ep_rp, old_state, new_state);
7195		return (IBT_CM_ACCEPT);
7196	}
7197
7198	/*
7199	 * it's ok for the timer to fire at this point. the
7200	 * taskq thread that processes the timer will just wait
7201	 * until we are done with our state transition.
7202	 */
7203	if (daplka_cancel_timer(ep_rp) != 0) {
7204		/*
7205		 * daplka_cancel_timer returns -1 if the timer is
7206		 * being processed and 0 for all other cases.
7207		 * we need to reset ep_state to allow timer processing
7208		 * to continue.
7209		 */
7210		DERR("rc_conn_est: timer is being processed\n");
7211		daplka_ep_set_state(ep_rp, old_state, new_state);
7212		return (IBT_CM_ACCEPT);
7213	}
7214
7215	/*
7216	 * create a DAPL_IB_CME_CONNECTED event
7217	 */
7218	conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7219	if (conn_ev == NULL) {
7220		DERR("rc_conn_est: could not alloc ev\n");
7221		daplka_ep_set_state(ep_rp, old_state, new_state);
7222		return (IBT_CM_ACCEPT);
7223	}
7224
7225	conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7226	conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7227	conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7228	conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7229
7230	/*
7231	 * The private data passed back in the connection established
7232	 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7233	 * saved in ep resource structure.
7234	 */
7235	if (ep_rp->ep_priv_len > 0) {
7236		conn_ev->ee_cmev.ec_cm_ev_priv_data =
7237		    kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7238
7239		if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7240			DERR("rc_conn_est: could not alloc pr_data\n");
7241			kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7242			daplka_ep_set_state(ep_rp, old_state, new_state);
7243			return (IBT_CM_ACCEPT);
7244		}
7245		bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7246		    ep_rp->ep_priv_len);
7247	}
7248	conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7249
7250	D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7251	    "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7252	    conn_ev->ee_cmev.ec_cm_ev_priv_data,
7253	    (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7254
7255	/*
7256	 * transition ep_state to CONNECTED
7257	 */
7258	new_state = DAPLKA_EP_STATE_CONNECTED;
7259	daplka_ep_set_state(ep_rp, old_state, new_state);
7260
7261	/*
7262	 * enqueue event onto the conn_evd owned by ep_rp
7263	 */
7264	daplka_evd_wakeup(ep_rp->ep_conn_evd,
7265	    &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7266
7267	return (IBT_CM_ACCEPT);
7268}
7269
7270/*
7271 * processes the FAILURE event
7272 */
7273/* ARGSUSED */
7274static ibt_cm_status_t
7275daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7276    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7277{
7278	daplka_evd_event_t	*disc_ev;
7279	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
7280	void			*pr_data = event->cm_priv_data;
7281	uint32_t		old_state, new_state;
7282	ibt_rc_chan_query_attr_t chan_attrs;
7283	ibt_status_t		status;
7284
7285	ASSERT(ep_rp != NULL);
7286	old_state = new_state = daplka_ep_get_state(ep_rp);
7287	if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7288	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7289		/*
7290		 * we can get here if the connection is being aborted
7291		 */
7292		DERR("rc_event_failure: conn aborted, state = %d, "
7293		    "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7294		    (int)event->cm_event.failed.cf_code,
7295		    (int)event->cm_event.failed.cf_msg,
7296		    (int)event->cm_event.failed.cf_reason);
7297
7298		daplka_ep_set_state(ep_rp, old_state, new_state);
7299		return (IBT_CM_ACCEPT);
7300	}
7301
7302	/*
7303	 * it's ok for the timer to fire at this point. the
7304	 * taskq thread that processes the timer will just wait
7305	 * until we are done with our state transition.
7306	 */
7307	if (daplka_cancel_timer(ep_rp) != 0) {
7308		/*
7309		 * daplka_cancel_timer returns -1 if the timer is
7310		 * being processed and 0 for all other cases.
7311		 * we need to reset ep_state to allow timer processing
7312		 * to continue.
7313		 */
7314		DERR("rc_event_failure: timer is being processed\n");
7315		daplka_ep_set_state(ep_rp, old_state, new_state);
7316		return (IBT_CM_ACCEPT);
7317	}
7318
7319	bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7320	status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7321
7322	if ((status == IBT_SUCCESS) &&
7323	    (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7324		DERR("rc_event_failure: conn abort qpn %d state %d\n",
7325		    chan_attrs.rc_qpn, chan_attrs.rc_state);
7326
7327		/* explicit transition the QP to ERROR state */
7328		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7329	}
7330
7331	/*
7332	 * create an event
7333	 */
7334	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7335	if (disc_ev == NULL) {
7336		DERR("rc_event_failure: cannot alloc disc_ev\n");
7337		daplka_ep_set_state(ep_rp, old_state, new_state);
7338		return (IBT_CM_ACCEPT);
7339	}
7340
7341	/*
7342	 * copy private data into event
7343	 */
7344	if (pr_len > 0) {
7345		disc_ev->ee_cmev.ec_cm_ev_priv_data =
7346		    kmem_zalloc(pr_len, KM_NOSLEEP);
7347
7348		if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7349			DERR("rc_event_failure: cannot alloc pr data\n");
7350			kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7351			daplka_ep_set_state(ep_rp, old_state, new_state);
7352			return (IBT_CM_ACCEPT);
7353		}
7354		bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7355	}
7356	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7357
7358	/*
7359	 * fill in the appropriate event type
7360	 */
7361	if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7362		switch (event->cm_event.failed.cf_reason) {
7363		case IBT_CM_CONSUMER:
7364			disc_ev->ee_cmev.ec_cm_ev_type =
7365			    DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7366			break;
7367		case IBT_CM_NO_CHAN:
7368		case IBT_CM_NO_RESC:
7369			disc_ev->ee_cmev.ec_cm_ev_type =
7370			    DAPL_IB_CME_DESTINATION_REJECT;
7371			break;
7372		default:
7373			disc_ev->ee_cmev.ec_cm_ev_type =
7374			    DAPL_IB_CME_DESTINATION_REJECT;
7375			break;
7376		}
7377	} else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7378		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7379	} else {
7380		/* others we'll mark as local failure */
7381		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7382	}
7383	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7384	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7385	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7386
7387	D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7388	    "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7389	    (int)event->cm_event.failed.cf_code,
7390	    (int)event->cm_event.failed.cf_msg,
7391	    (int)event->cm_event.failed.cf_reason);
7392
7393	/*
7394	 * transition ep_state to DISCONNECTED
7395	 */
7396	new_state = DAPLKA_EP_STATE_DISCONNECTED;
7397	daplka_ep_set_state(ep_rp, old_state, new_state);
7398
7399	/*
7400	 * enqueue event onto the conn_evd owned by ep_rp
7401	 */
7402	daplka_evd_wakeup(ep_rp->ep_conn_evd,
7403	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7404
7405	return (IBT_CM_ACCEPT);
7406}
7407
7408/*
7409 * This is the active side CM handler. It gets registered when
7410 * ibt_open_rc_channel is called.
7411 */
7412static ibt_cm_status_t
7413daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7414    ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7415{
7416	daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7417
7418	if (ep_rp == NULL) {
7419		DERR("rc_handler: ep_rp == NULL\n");
7420		return (IBT_CM_NO_CHANNEL);
7421	}
7422	/*
7423	 * default is not to return priv data
7424	 */
7425	if (ret_args != NULL) {
7426		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
7427		ret_args->cm_ret_len = 0;
7428	}
7429
7430	switch (event->cm_type) {
7431	case IBT_CM_EVENT_REQ_RCV:
7432		/* active side should not receive this event */
7433		D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7434		break;
7435
7436	case IBT_CM_EVENT_REP_RCV:
7437		/* connection accepted by passive side */
7438		D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7439		return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7440		    priv_data, len));
7441
7442	case IBT_CM_EVENT_CONN_CLOSED:
7443		D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7444		    event->cm_event.closed);
7445		return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7446		    priv_data, len));
7447
7448	case IBT_CM_EVENT_MRA_RCV:
7449		/* passive side does default processing MRA event */
7450		D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7451		return (IBT_CM_DEFAULT);
7452
7453	case IBT_CM_EVENT_CONN_EST:
7454		D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7455		return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7456		    priv_data, len));
7457
7458	case IBT_CM_EVENT_FAILURE:
7459		D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7460		return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7461		    priv_data, len));
7462
7463	default:
7464		D2("rc_handler: invalid event %d\n", event->cm_type);
7465		break;
7466	}
7467	return (IBT_CM_DEFAULT);
7468}
7469
7470/*
7471 * creates an IA resource and inserts it into the global resource table.
7472 */
7473/* ARGSUSED */
7474static int
7475daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7476	cred_t *cred, int *rvalp)
7477{
7478	daplka_ia_resource_t	*ia_rp, *tmp_rp;
7479	boolean_t		inserted = B_FALSE;
7480	dapl_ia_create_t	args;
7481	ibt_hca_hdl_t		hca_hdl;
7482	ibt_status_t		status;
7483	ib_gid_t		sgid;
7484	int			retval;
7485	ibt_hca_portinfo_t	*pinfop;
7486	uint_t			pinfon;
7487	uint_t			size;
7488	ibt_ar_t		ar_s;
7489	daplka_hca_t		*hca;
7490
7491	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7492	    mode);
7493	if (retval != 0) {
7494		DERR("ia_create: copyin error %d\n", retval);
7495		return (EFAULT);
7496	}
7497	if (args.ia_version != DAPL_IF_VERSION) {
7498		DERR("ia_create: invalid version %d, expected version %d\n",
7499		    args.ia_version, DAPL_IF_VERSION);
7500		return (EINVAL);
7501	}
7502
7503	/*
7504	 * find the hca with the matching guid
7505	 */
7506	mutex_enter(&daplka_dev->daplka_mutex);
7507	for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7508	    hca = hca->hca_next) {
7509		if (hca->hca_guid == args.ia_guid) {
7510			DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7511			break;
7512		}
7513	}
7514	mutex_exit(&daplka_dev->daplka_mutex);
7515
7516	if (hca == NULL) {
7517		DERR("ia_create: guid 0x%016llx not found\n",
7518		    (longlong_t)args.ia_guid);
7519		return (EINVAL);
7520	}
7521
7522	/*
7523	 * check whether port number is valid and whether it is up
7524	 */
7525	if (args.ia_port > hca->hca_nports) {
7526		DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7527		DAPLKA_RELE_HCA(daplka_dev, hca);
7528		return (EINVAL);
7529	}
7530	hca_hdl = hca->hca_hdl;
7531	if (hca_hdl == NULL) {
7532		DERR("ia_create: hca_hdl == NULL\n");
7533		DAPLKA_RELE_HCA(daplka_dev, hca);
7534		return (EINVAL);
7535	}
7536	status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7537	    &pinfop, &pinfon, &size);
7538	if (status != IBT_SUCCESS) {
7539		DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7540		*rvalp = (int)status;
7541		DAPLKA_RELE_HCA(daplka_dev, hca);
7542		return (0);
7543	}
7544	sgid = pinfop->p_sgid_tbl[0];
7545	ibt_free_portinfo(pinfop, size);
7546
7547	ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7548	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7549	DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7550
7551	mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7552	cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7553	ia_rp->ia_hca_hdl = hca_hdl;
7554	ia_rp->ia_hca_sgid = sgid;
7555	ia_rp->ia_hca = hca;
7556	ia_rp->ia_port_num = args.ia_port;
7557	ia_rp->ia_port_pkey = args.ia_pkey;
7558	ia_rp->ia_pid = ddi_get_pid();
7559	ia_rp->ia_async_evd_hkeys = NULL;
7560	ia_rp->ia_ar_registered = B_FALSE;
7561	bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7562
7563	/* register Address Record */
7564	ar_s.ar_gid = ia_rp->ia_hca_sgid;
7565	ar_s.ar_pkey = ia_rp->ia_port_pkey;
7566	bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7567#define	UC(b) ar_s.ar_data[(b)]
7568	D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7569	    UC(8), UC(9), UC(10), UC(11));
7570	D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7571	    UC(12), UC(13), UC(14), UC(15));
7572	retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7573	if (retval != IBT_SUCCESS) {
7574		DERR("ia_create: failed to register Address Record.\n");
7575		retval = EINVAL;
7576		goto cleanup;
7577	}
7578	ia_rp->ia_ar_registered = B_TRUE;
7579
7580	/*
7581	 * create hash tables for all object types
7582	 */
7583	retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7584	    daplka_hash_ep_free, daplka_hash_generic_lookup);
7585	if (retval != 0) {
7586		DERR("ia_create: cannot create ep hash table\n");
7587		goto cleanup;
7588	}
7589	retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7590	    daplka_hash_mr_free, daplka_hash_generic_lookup);
7591	if (retval != 0) {
7592		DERR("ia_create: cannot create mr hash table\n");
7593		goto cleanup;
7594	}
7595	retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7596	    daplka_hash_mw_free, daplka_hash_generic_lookup);
7597	if (retval != 0) {
7598		DERR("ia_create: cannot create mw hash table\n");
7599		goto cleanup;
7600	}
7601	retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7602	    daplka_hash_pd_free, daplka_hash_generic_lookup);
7603	if (retval != 0) {
7604		DERR("ia_create: cannot create pd hash table\n");
7605		goto cleanup;
7606	}
7607	retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7608	    daplka_hash_evd_free, daplka_hash_generic_lookup);
7609	if (retval != 0) {
7610		DERR("ia_create: cannot create evd hash table\n");
7611		goto cleanup;
7612	}
7613	retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7614	    daplka_hash_cno_free, daplka_hash_generic_lookup);
7615	if (retval != 0) {
7616		DERR("ia_create: cannot create cno hash table\n");
7617		goto cleanup;
7618	}
7619	retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7620	    daplka_hash_sp_free, daplka_hash_generic_lookup);
7621	if (retval != 0) {
7622		DERR("ia_create: cannot create sp hash table\n");
7623		goto cleanup;
7624	}
7625	retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7626	    daplka_hash_srq_free, daplka_hash_generic_lookup);
7627	if (retval != 0) {
7628		DERR("ia_create: cannot create srq hash table\n");
7629		goto cleanup;
7630	}
7631	/*
7632	 * insert ia_rp into the global resource table
7633	 */
7634	retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7635	if (retval != 0) {
7636		DERR("ia_create: cannot insert resource\n");
7637		goto cleanup;
7638	}
7639	inserted = B_TRUE;
7640	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp))
7641
7642	args.ia_resnum = rnum;
7643	retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7644	if (retval != 0) {
7645		DERR("ia_create: copyout error %d\n", retval);
7646		retval = EFAULT;
7647		goto cleanup;
7648	}
7649	return (0);
7650
7651cleanup:;
7652	if (inserted) {
7653		tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7654		if (tmp_rp != ia_rp) {
7655			/*
7656			 * we can return here because another thread must
7657			 * have freed up the resource
7658			 */
7659			DERR("ia_create: cannot remove resource\n");
7660			return (retval);
7661		}
7662	}
7663	DAPLKA_RS_UNREF(ia_rp);
7664	return (retval);
7665}
7666
7667/*
7668 * destroys an IA resource
7669 */
7670static int
7671daplka_ia_destroy(daplka_resource_t *gen_rp)
7672{
7673	daplka_ia_resource_t	*ia_rp = (daplka_ia_resource_t *)gen_rp;
7674	daplka_async_evd_hkey_t *hkp;
7675	int			cnt;
7676	ibt_ar_t		ar_s;
7677
7678	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7679	D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7680
7681	/* deregister Address Record */
7682	if (ia_rp->ia_ar_registered) {
7683		ar_s.ar_gid = ia_rp->ia_hca_sgid;
7684		ar_s.ar_pkey = ia_rp->ia_port_pkey;
7685		bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7686		(void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7687		ia_rp->ia_ar_registered = B_FALSE;
7688	}
7689
7690	/*
7691	 * destroy hash tables. make sure resources are
7692	 * destroyed in the correct order.
7693	 */
7694	daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7695	daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7696	daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7697	daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7698	daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7699	daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7700	daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7701	daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7702
7703	/*
7704	 * free the async evd list
7705	 */
7706	cnt = 0;
7707	hkp = ia_rp->ia_async_evd_hkeys;
7708	while (hkp != NULL) {
7709		daplka_async_evd_hkey_t	*free_hkp;
7710
7711		cnt++;
7712		free_hkp = hkp;
7713		hkp = hkp->aeh_next;
7714		kmem_free(free_hkp, sizeof (*free_hkp));
7715	}
7716	if (cnt > 0) {
7717		D3("ia_destroy: freed %d hkeys\n", cnt);
7718	}
7719	mutex_destroy(&ia_rp->ia_lock);
7720	cv_destroy(&ia_rp->ia_cv);
7721	ia_rp->ia_hca_hdl = NULL;
7722
7723	DAPLKA_RS_FINI(ia_rp);
7724
7725	if (ia_rp->ia_hca)
7726		DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7727
7728	kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7729	D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7730	return (0);
7731}
7732
7733static void
7734daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7735    uint64_t cookie, daplka_ia_resource_t *ia_rp)
7736{
7737	daplka_evd_event_t	*evp;
7738	daplka_evd_resource_t	*async_evd;
7739	daplka_async_evd_hkey_t	*curr;
7740
7741	mutex_enter(&ia_rp->ia_lock);
7742	curr = ia_rp->ia_async_evd_hkeys;
7743	while (curr != NULL) {
7744		/*
7745		 * Note: this allocation does not zero out the buffer
7746		 * since we init all the fields.
7747		 */
7748		evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7749		if (evp == NULL) {
7750			DERR("async_event_enqueue: event alloc failed"
7751			    "!found\n", ia_rp, curr->aeh_evd_hkey);
7752			curr = curr->aeh_next;
7753			continue;
7754		}
7755		evp->ee_next = NULL;
7756		evp->ee_aev.ibae_type = code;
7757		evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7758		evp->ee_aev.ibae_cookie = cookie;
7759		evp->ee_aev.ibae_port = event->ev_port;
7760
7761		/*
7762		 * Lookup the async evd corresponding to this ia and enqueue
7763		 * evp and wakeup any waiter.
7764		 */
7765		async_evd = (daplka_evd_resource_t *)
7766		    daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7767		if (async_evd == NULL) { /* async evd is being freed */
7768			DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7769			    "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7770			kmem_free(evp, sizeof (daplka_evd_event_t));
7771			curr = curr->aeh_next;
7772			continue;
7773		}
7774		daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7775
7776		/* decrement refcnt on async_evd */
7777		DAPLKA_RS_UNREF(async_evd);
7778		curr = curr->aeh_next;
7779	}
7780	mutex_exit(&ia_rp->ia_lock);
7781}
7782/*
7783 * This routine is called in kernel context
7784 */
7785
7786/* ARGSUSED */
7787static void
7788daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7789    ibt_async_code_t code, ibt_async_event_t *event)
7790{
7791	daplka_ep_resource_t		*epp;
7792	daplka_ia_resource_t		*ia_rp;
7793	minor_t				ia_rnum;
7794
7795	if (event->ev_chan_hdl == NULL) {
7796		DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7797		return;
7798	}
7799
7800	mutex_enter(&daplka_dev->daplka_mutex);
7801	epp = ibt_get_chan_private(event->ev_chan_hdl);
7802	if (epp == NULL) {
7803		mutex_exit(&daplka_dev->daplka_mutex);
7804		DERR("daplka_rc_async_handler: chan_private is NULL\n");
7805		return;
7806	}
7807
7808	/* grab a reference to this ep */
7809	DAPLKA_RS_REF(epp);
7810	mutex_exit(&daplka_dev->daplka_mutex);
7811
7812	/*
7813	 * The endpoint resource has the resource number corresponding to
7814	 * the IA resource. Use that to lookup the ia resource entry
7815	 */
7816	ia_rnum = DAPLKA_RS_RNUM(epp);
7817	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7818	if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7819		D2("daplka_rc_async_handler: resource (%d) not found\n",
7820		    ia_rnum);
7821		DAPLKA_RS_UNREF(epp);
7822		return;
7823	}
7824
7825	/*
7826	 * Create an async event and chain it to the async evd
7827	 */
7828	daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7829
7830	DAPLKA_RS_UNREF(ia_rp);
7831	DAPLKA_RS_UNREF(epp);
7832}
7833
7834/*
7835 * This routine is called in kernel context
7836 */
7837
7838/* ARGSUSED */
7839static void
7840daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7841    ibt_async_code_t code, ibt_async_event_t *event)
7842{
7843	daplka_evd_resource_t		*evdp;
7844	daplka_ia_resource_t		*ia_rp;
7845	minor_t				ia_rnum;
7846
7847	if (event->ev_cq_hdl == NULL)
7848		return;
7849
7850	mutex_enter(&daplka_dev->daplka_mutex);
7851	evdp = ibt_get_cq_private(event->ev_cq_hdl);
7852	if (evdp == NULL) {
7853		mutex_exit(&daplka_dev->daplka_mutex);
7854		DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7855		    event->ev_cq_hdl);
7856		return;
7857	}
7858	/* grab a reference to this evd resource */
7859	DAPLKA_RS_REF(evdp);
7860	mutex_exit(&daplka_dev->daplka_mutex);
7861
7862	/*
7863	 * The endpoint resource has the resource number corresponding to
7864	 * the IA resource. Use that to lookup the ia resource entry
7865	 */
7866	ia_rnum = DAPLKA_RS_RNUM(evdp);
7867	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7868	if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7869		DERR("daplka_cq_async_handler: resource (%d) not found\n",
7870		    ia_rnum);
7871		DAPLKA_RS_UNREF(evdp);
7872		return;
7873	}
7874
7875	/*
7876	 * Create an async event and chain it to the async evd
7877	 */
7878	daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7879
7880	/* release all the refcount that were acquired */
7881	DAPLKA_RS_UNREF(ia_rp);
7882	DAPLKA_RS_UNREF(evdp);
7883}
7884
7885/*
7886 * This routine is called in kernel context, handles unaffiliated async errors
7887 */
7888
7889/* ARGSUSED */
7890static void
7891daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7892    ibt_async_code_t code, ibt_async_event_t *event)
7893{
7894	int			i, j;
7895	daplka_resource_blk_t	*blk;
7896	daplka_resource_t	*rp;
7897	daplka_ia_resource_t	*ia_rp;
7898
7899	/*
7900	 * Walk the resource table looking for an ia that matches the
7901	 * hca_hdl.
7902	 */
7903	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7904	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7905		blk = daplka_resource.daplka_rc_root[i];
7906		if (blk == NULL)
7907			continue;
7908		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7909			rp = blk->daplka_rcblk_blks[j];
7910			if ((rp == NULL) ||
7911			    ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7912			    (rp->rs_type != DAPL_TYPE_IA)) {
7913				continue;
7914			}
7915			/*
7916			 * rp is an IA resource check if it belongs
7917			 * to the hca/port for which we got the event
7918			 */
7919			ia_rp = (daplka_ia_resource_t *)rp;
7920			DAPLKA_RS_REF(ia_rp);
7921			if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7922			    (event->ev_port == ia_rp->ia_port_num)) {
7923				/*
7924				 * walk the ep hash table. Acquire a
7925				 * reader lock. NULL dgid indicates
7926				 * local port up event.
7927				 */
7928				daplka_hash_walk(&ia_rp->ia_ep_htbl,
7929				    daplka_ep_failback, NULL, RW_READER);
7930			}
7931			DAPLKA_RS_UNREF(ia_rp);
7932		}
7933	}
7934	rw_exit(&daplka_resource.daplka_rct_lock);
7935}
7936
7937static int
7938daplka_handle_hca_detach_event(ibt_async_event_t *event)
7939{
7940	daplka_hca_t	*hca;
7941
7942	/*
7943	 * find the hca with the matching guid
7944	 */
7945	mutex_enter(&daplka_dev->daplka_mutex);
7946	for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7947	    hca = hca->hca_next) {
7948		if (hca->hca_guid == event->ev_hca_guid) {
7949			if (DAPLKA_HCA_BUSY(hca)) {
7950				mutex_exit(&daplka_dev->daplka_mutex);
7951				return (IBT_HCA_RESOURCES_NOT_FREED);
7952			}
7953			daplka_dequeue_hca(daplka_dev, hca);
7954			break;
7955		}
7956	}
7957	mutex_exit(&daplka_dev->daplka_mutex);
7958
7959	if (hca == NULL)
7960		return (IBT_FAILURE);
7961
7962	return (daplka_fini_hca(daplka_dev, hca));
7963}
7964
7965/*
7966 * This routine is called in kernel context
7967 */
7968static void
7969daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7970    ibt_async_code_t code, ibt_async_event_t *event)
7971{
7972	switch (code) {
7973	case IBT_ERROR_CATASTROPHIC_CHAN:
7974	case IBT_ERROR_INVALID_REQUEST_CHAN:
7975	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7976	case IBT_ERROR_PATH_MIGRATE_REQ:
7977		D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7978		/* These events are affiliated with a the RC channel */
7979		daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7980		break;
7981	case IBT_ERROR_CQ:
7982		/* This event is affiliated with a the CQ */
7983		D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7984		daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7985		break;
7986	case IBT_ERROR_PORT_DOWN:
7987		D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7988		break;
7989	case IBT_EVENT_PORT_UP:
7990		D2("daplka_async_handler(): IBT_PORT_UP\n");
7991		if (daplka_apm) {
7992			daplka_un_async_handler(clnt_private, hca_hdl, code,
7993			    event);
7994		}
7995		break;
7996	case IBT_HCA_ATTACH_EVENT:
7997		/*
7998		 * NOTE: In some error recovery paths, it is possible to
7999		 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
8000		 */
8001		D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
8002		(void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
8003		break;
8004	case IBT_HCA_DETACH_EVENT:
8005		D2("daplka_async_handler(): IBT_HCA_DETACH\n");
8006		/* Free all hca resources and close the HCA. */
8007		(void) daplka_handle_hca_detach_event(event);
8008		break;
8009	case IBT_EVENT_PATH_MIGRATED:
8010		/* This event is affiliated with APM */
8011		D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
8012		break;
8013	default:
8014		D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
8015		break;
8016	}
8017}
8018
8019/*
8020 * This routine is called in kernel context related to Subnet events
8021 */
8022/*ARGSUSED*/
8023static void
8024daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
8025	ibt_subnet_event_t *event)
8026{
8027	ib_gid_t *sgid = &gid;
8028	ib_gid_t *dgid;
8029
8030	dgid = &event->sm_notice_gid;
8031	switch (code) {
8032	case IBT_SM_EVENT_GID_AVAIL:
8033		/* This event is affiliated with remote port up */
8034		D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
8035		if (daplka_apm)
8036			daplka_sm_gid_avail(sgid, dgid);
8037		return;
8038	case IBT_SM_EVENT_GID_UNAVAIL:
8039		/* This event is affiliated with remote port down */
8040		D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8041		return;
8042	default:
8043		D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8044		    code);
8045		return;
8046	}
8047}
8048
8049/*
8050 * This routine is called in kernel context, handles Subnet GID avail events
8051 * which correspond to remote port up. Setting up alternate path or path
8052 * migration (failback) has to be initiated from the active side of the
8053 * original connect.
8054 */
8055static void
8056daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8057{
8058	int			i, j;
8059	daplka_resource_blk_t	*blk;
8060	daplka_resource_t	*rp;
8061	daplka_ia_resource_t	*ia_rp;
8062
8063	D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8064	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8065	    (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8066
8067	/*
8068	 * Walk the resource table looking for an ia that matches the sgid
8069	 */
8070	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8071	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8072		blk = daplka_resource.daplka_rc_root[i];
8073		if (blk == NULL)
8074			continue;
8075		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8076			rp = blk->daplka_rcblk_blks[j];
8077			if ((rp == NULL) ||
8078			    ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8079			    (rp->rs_type != DAPL_TYPE_IA)) {
8080				continue;
8081			}
8082			/*
8083			 * rp is an IA resource check if its gid
8084			 * matches with the calling sgid
8085			 */
8086			ia_rp = (daplka_ia_resource_t *)rp;
8087			DAPLKA_RS_REF(ia_rp);
8088			if ((sgid->gid_prefix ==
8089			    ia_rp->ia_hca_sgid.gid_prefix) &&
8090			    (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8091				/*
8092				 * walk the ep hash table. Acquire a
8093				 * reader lock.
8094				 */
8095				daplka_hash_walk(&ia_rp->ia_ep_htbl,
8096				    daplka_ep_failback,
8097				    (void *)dgid, RW_READER);
8098			}
8099			DAPLKA_RS_UNREF(ia_rp);
8100		}
8101	}
8102	rw_exit(&daplka_resource.daplka_rct_lock);
8103}
8104
8105/*
8106 * This routine is called in kernel context to get and set an alternate path
8107 */
8108static int
8109daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8110{
8111	ibt_alt_path_info_t path_info;
8112	ibt_alt_path_attr_t path_attr;
8113	ibt_ap_returns_t ap_rets;
8114	ibt_status_t status;
8115
8116	D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8117	bzero(&path_info, sizeof (ibt_alt_path_info_t));
8118	bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8119	if (dgid != NULL) {
8120		path_attr.apa_sgid = ep_rp->ep_sgid;
8121		path_attr.apa_dgid = *dgid;
8122	}
8123	status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8124	    &path_attr, &path_info);
8125	if (status != IBT_SUCCESS) {
8126		DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8127		    status);
8128		return (1);
8129	}
8130
8131	D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8132	bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8133	status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8134	    &path_info, NULL, 0, &ap_rets);
8135	if ((status != IBT_SUCCESS) ||
8136	    (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8137		DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8138		    "status %d ap_status %d\n", status, ap_rets.ap_status);
8139		return (1);
8140	}
8141	return (0);
8142}
8143
8144/*
8145 * This routine is called in kernel context to failback to the original path
8146 */
8147static int
8148daplka_ep_failback(void *objp, void *arg)
8149{
8150	daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8151	ib_gid_t *dgid;
8152	ibt_status_t status;
8153	ibt_rc_chan_query_attr_t chan_attrs;
8154	int i;
8155
8156	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8157	D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8158	    (longlong_t)ep_rp->ep_sgid.gid_prefix,
8159	    (longlong_t)ep_rp->ep_sgid.gid_guid,
8160	    (longlong_t)ep_rp->ep_dgid.gid_prefix,
8161	    (longlong_t)ep_rp->ep_dgid.gid_guid);
8162
8163	/*
8164	 * daplka_ep_failback is called from daplka_hash_walk
8165	 * which holds the read lock on hash table to protect
8166	 * the endpoint resource from removal
8167	 */
8168	mutex_enter(&ep_rp->ep_lock);
8169	/* check for unconnected endpoints */
8170	/* first check for ep state */
8171	if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8172		mutex_exit(&ep_rp->ep_lock);
8173		D2("daplka_ep_failback : endpoints not connected\n");
8174		return (0);
8175	}
8176
8177	/* second check for gids */
8178	if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8179	    (ep_rp->ep_sgid.gid_guid == 0)) ||
8180	    ((ep_rp->ep_dgid.gid_prefix == 0) &&
8181	    (ep_rp->ep_dgid.gid_guid == 0))) {
8182		mutex_exit(&ep_rp->ep_lock);
8183		D2("daplka_ep_failback : skip unconnected endpoints\n");
8184		return (0);
8185	}
8186
8187	/*
8188	 * matching destination ep
8189	 * when dgid is NULL, the async event is a local port up.
8190	 * dgid becomes wild card, i.e. all endpoints match
8191	 */
8192	dgid = (ib_gid_t *)arg;
8193	if (dgid == NULL) {
8194		/* ignore loopback ep */
8195		if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8196		    (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8197			mutex_exit(&ep_rp->ep_lock);
8198			D2("daplka_ep_failback : skip loopback endpoints\n");
8199			return (0);
8200		}
8201	} else {
8202		/* matching remote ep */
8203		if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8204		    (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8205			mutex_exit(&ep_rp->ep_lock);
8206			D2("daplka_ep_failback : unrelated endpoints\n");
8207			return (0);
8208		}
8209	}
8210
8211	/* call get and set altpath with original dgid used in ep_connect */
8212	if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8213		mutex_exit(&ep_rp->ep_lock);
8214		return (0);
8215	}
8216
8217	/*
8218	 * wait for migration state to be ARMed
8219	 * e.g. a post_send msg will transit mig_state from REARM to ARM
8220	 */
8221	for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8222		bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8223		status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8224		if (status != IBT_SUCCESS) {
8225			mutex_exit(&ep_rp->ep_lock);
8226			DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8227			return (0);
8228		}
8229		if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8230			break;
8231	}
8232
8233	D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8234	    i, chan_attrs.rc_mig_state);
8235	D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8236	    (longlong_t)
8237	    chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8238	    (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8239	    (longlong_t)
8240	    chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8241	    (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8242	D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8243	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8244	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8245	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8246	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8247
8248	/* skip failback on ARMed state not reached or env override */
8249	if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8250		mutex_exit(&ep_rp->ep_lock);
8251		DERR("daplka_ep_altpath : ARMed state not reached\n");
8252		return (0);
8253	}
8254
8255	D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8256	status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8257	if (status != IBT_SUCCESS) {
8258		mutex_exit(&ep_rp->ep_lock);
8259		DERR("daplka_ep_failback : migration failed "
8260		    "status %d\n", status);
8261		return (0);
8262	}
8263
8264	/* call get and altpath with NULL dgid to indicate unspecified dgid */
8265	(void) daplka_ep_altpath(ep_rp, NULL);
8266	mutex_exit(&ep_rp->ep_lock);
8267	return (0);
8268}
8269
8270/*
8271 * IBTF wrappers used for resource accounting
8272 */
8273static ibt_status_t
8274daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8275    ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8276    ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8277{
8278	daplka_hca_t	*hca_p;
8279	uint32_t	max_qps;
8280	boolean_t	acct_enabled;
8281	ibt_status_t	status;
8282
8283	acct_enabled = daplka_accounting_enabled;
8284	hca_p = ep_rp->ep_hca;
8285	max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
8286
8287	if (acct_enabled) {
8288		if (daplka_max_qp_percent != 0 &&
8289		    max_qps <= hca_p->hca_qp_count) {
8290			DERR("ibt_alloc_rc_channel: resource limit exceeded "
8291			    "(limit %d, count %d)\n", max_qps,
8292			    hca_p->hca_qp_count);
8293			return (IBT_INSUFF_RESOURCE);
8294		}
8295		DAPLKA_RS_ACCT_INC(ep_rp, 1);
8296		atomic_add_32(&hca_p->hca_qp_count, 1);
8297	}
8298	status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8299
8300	if (status != IBT_SUCCESS && acct_enabled) {
8301		DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8302		atomic_add_32(&hca_p->hca_qp_count, -1);
8303	}
8304	return (status);
8305}
8306
8307static ibt_status_t
8308daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8309{
8310	daplka_hca_t	*hca_p;
8311	ibt_status_t	status;
8312
8313	hca_p = ep_rp->ep_hca;
8314
8315	status = ibt_free_channel(chan_hdl);
8316	if (status != IBT_SUCCESS) {
8317		return (status);
8318	}
8319	if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8320		DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8321		atomic_add_32(&hca_p->hca_qp_count, -1);
8322	}
8323	return (status);
8324}
8325
8326static ibt_status_t
8327daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8328    ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8329{
8330	daplka_hca_t	*hca_p;
8331	uint32_t	max_cqs;
8332	boolean_t	acct_enabled;
8333	ibt_status_t	status;
8334
8335	acct_enabled = daplka_accounting_enabled;
8336	hca_p = evd_rp->evd_hca;
8337	max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
8338
8339	if (acct_enabled) {
8340		if (daplka_max_cq_percent != 0 &&
8341		    max_cqs <= hca_p->hca_cq_count) {
8342			DERR("ibt_alloc_cq: resource limit exceeded "
8343			    "(limit %d, count %d)\n", max_cqs,
8344			    hca_p->hca_cq_count);
8345			return (IBT_INSUFF_RESOURCE);
8346		}
8347		DAPLKA_RS_ACCT_INC(evd_rp, 1);
8348		atomic_add_32(&hca_p->hca_cq_count, 1);
8349	}
8350	status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8351
8352	if (status != IBT_SUCCESS && acct_enabled) {
8353		DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8354		atomic_add_32(&hca_p->hca_cq_count, -1);
8355	}
8356	return (status);
8357}
8358
8359static ibt_status_t
8360daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8361{
8362	daplka_hca_t	*hca_p;
8363	ibt_status_t	status;
8364
8365	hca_p = evd_rp->evd_hca;
8366
8367	status = ibt_free_cq(cq_hdl);
8368	if (status != IBT_SUCCESS) {
8369		return (status);
8370	}
8371	if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8372		DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8373		atomic_add_32(&hca_p->hca_cq_count, -1);
8374	}
8375	return (status);
8376}
8377
8378static ibt_status_t
8379daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8380    ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8381{
8382	daplka_hca_t	*hca_p;
8383	uint32_t	max_pds;
8384	boolean_t	acct_enabled;
8385	ibt_status_t	status;
8386
8387	acct_enabled = daplka_accounting_enabled;
8388	hca_p = pd_rp->pd_hca;
8389	max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
8390
8391	if (acct_enabled) {
8392		if (daplka_max_pd_percent != 0 &&
8393		    max_pds <= hca_p->hca_pd_count) {
8394			DERR("ibt_alloc_pd: resource limit exceeded "
8395			    "(limit %d, count %d)\n", max_pds,
8396			    hca_p->hca_pd_count);
8397			return (IBT_INSUFF_RESOURCE);
8398		}
8399		DAPLKA_RS_ACCT_INC(pd_rp, 1);
8400		atomic_add_32(&hca_p->hca_pd_count, 1);
8401	}
8402	status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8403
8404	if (status != IBT_SUCCESS && acct_enabled) {
8405		DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8406		atomic_add_32(&hca_p->hca_pd_count, -1);
8407	}
8408	return (status);
8409}
8410
8411static ibt_status_t
8412daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8413    ibt_pd_hdl_t pd_hdl)
8414{
8415	daplka_hca_t	*hca_p;
8416	ibt_status_t	status;
8417
8418	hca_p = pd_rp->pd_hca;
8419
8420	status = ibt_free_pd(hca_hdl, pd_hdl);
8421	if (status != IBT_SUCCESS) {
8422		return (status);
8423	}
8424	if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8425		DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8426		atomic_add_32(&hca_p->hca_pd_count, -1);
8427	}
8428	return (status);
8429}
8430
8431static ibt_status_t
8432daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8433    ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8434    ibt_rkey_t *rkey_p)
8435{
8436	daplka_hca_t	*hca_p;
8437	uint32_t	max_mws;
8438	boolean_t	acct_enabled;
8439	ibt_status_t	status;
8440
8441	acct_enabled = daplka_accounting_enabled;
8442	hca_p = mw_rp->mw_hca;
8443	max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
8444
8445	if (acct_enabled) {
8446		if (daplka_max_mw_percent != 0 &&
8447		    max_mws <= hca_p->hca_mw_count) {
8448			DERR("ibt_alloc_mw: resource limit exceeded "
8449			    "(limit %d, count %d)\n", max_mws,
8450			    hca_p->hca_mw_count);
8451			return (IBT_INSUFF_RESOURCE);
8452		}
8453		DAPLKA_RS_ACCT_INC(mw_rp, 1);
8454		atomic_add_32(&hca_p->hca_mw_count, 1);
8455	}
8456	status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8457
8458	if (status != IBT_SUCCESS && acct_enabled) {
8459		DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8460		atomic_add_32(&hca_p->hca_mw_count, -1);
8461	}
8462	return (status);
8463}
8464
8465static ibt_status_t
8466daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8467    ibt_mw_hdl_t mw_hdl)
8468{
8469	daplka_hca_t	*hca_p;
8470	ibt_status_t	status;
8471
8472	hca_p = mw_rp->mw_hca;
8473
8474	status = ibt_free_mw(hca_hdl, mw_hdl);
8475	if (status != IBT_SUCCESS) {
8476		return (status);
8477	}
8478	if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8479		DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8480		atomic_add_32(&hca_p->hca_mw_count, -1);
8481	}
8482	return (status);
8483}
8484
8485static ibt_status_t
8486daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8487    ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8488    ibt_mr_desc_t *mr_desc_p)
8489{
8490	daplka_hca_t	*hca_p;
8491	uint32_t	max_mrs;
8492	boolean_t	acct_enabled;
8493	ibt_status_t	status;
8494
8495	acct_enabled = daplka_accounting_enabled;
8496	hca_p = mr_rp->mr_hca;
8497	max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8498
8499	if (acct_enabled) {
8500		if (daplka_max_mr_percent != 0 &&
8501		    max_mrs <= hca_p->hca_mr_count) {
8502			DERR("ibt_register_mr: resource limit exceeded "
8503			    "(limit %d, count %d)\n", max_mrs,
8504			    hca_p->hca_mr_count);
8505			return (IBT_INSUFF_RESOURCE);
8506		}
8507		DAPLKA_RS_ACCT_INC(mr_rp, 1);
8508		atomic_add_32(&hca_p->hca_mr_count, 1);
8509	}
8510	status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8511
8512	if (status != IBT_SUCCESS && acct_enabled) {
8513		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8514		atomic_add_32(&hca_p->hca_mr_count, -1);
8515	}
8516	return (status);
8517}
8518
8519static ibt_status_t
8520daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8521    ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8522    ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8523    ibt_mr_desc_t *mr_desc_p)
8524{
8525	daplka_hca_t	*hca_p;
8526	uint32_t	max_mrs;
8527	boolean_t	acct_enabled;
8528	ibt_status_t	status;
8529
8530	acct_enabled = daplka_accounting_enabled;
8531	hca_p = mr_rp->mr_hca;
8532	max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8533
8534	if (acct_enabled) {
8535		if (daplka_max_mr_percent != 0 &&
8536		    max_mrs <= hca_p->hca_mr_count) {
8537			DERR("ibt_register_shared_mr: resource limit exceeded "
8538			    "(limit %d, count %d)\n", max_mrs,
8539			    hca_p->hca_mr_count);
8540			return (IBT_INSUFF_RESOURCE);
8541		}
8542		DAPLKA_RS_ACCT_INC(mr_rp, 1);
8543		atomic_add_32(&hca_p->hca_mr_count, 1);
8544	}
8545	status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8546	    smr_attr_p, mr_hdl_p, mr_desc_p);
8547
8548	if (status != IBT_SUCCESS && acct_enabled) {
8549		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8550		atomic_add_32(&hca_p->hca_mr_count, -1);
8551	}
8552	return (status);
8553}
8554
8555static ibt_status_t
8556daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8557    ibt_mr_hdl_t mr_hdl)
8558{
8559	daplka_hca_t	*hca_p;
8560	ibt_status_t	status;
8561
8562	hca_p = mr_rp->mr_hca;
8563
8564	status = ibt_deregister_mr(hca_hdl, mr_hdl);
8565	if (status != IBT_SUCCESS) {
8566		return (status);
8567	}
8568	if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8569		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8570		atomic_add_32(&hca_p->hca_mr_count, -1);
8571	}
8572	return (status);
8573}
8574
8575static ibt_status_t
8576daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8577    ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8578    ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8579{
8580	daplka_hca_t	*hca_p;
8581	uint32_t	max_srqs;
8582	boolean_t	acct_enabled;
8583	ibt_status_t	status;
8584
8585	acct_enabled = daplka_accounting_enabled;
8586	hca_p = srq_rp->srq_hca;
8587	max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
8588
8589	if (acct_enabled) {
8590		if (daplka_max_srq_percent != 0 &&
8591		    max_srqs <= hca_p->hca_srq_count) {
8592			DERR("ibt_alloc_srq: resource limit exceeded "
8593			    "(limit %d, count %d)\n", max_srqs,
8594			    hca_p->hca_srq_count);
8595			return (IBT_INSUFF_RESOURCE);
8596		}
8597		DAPLKA_RS_ACCT_INC(srq_rp, 1);
8598		atomic_add_32(&hca_p->hca_srq_count, 1);
8599	}
8600	status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8601
8602	if (status != IBT_SUCCESS && acct_enabled) {
8603		DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8604		atomic_add_32(&hca_p->hca_srq_count, -1);
8605	}
8606	return (status);
8607}
8608
8609static ibt_status_t
8610daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8611{
8612	daplka_hca_t	*hca_p;
8613	ibt_status_t	status;
8614
8615	hca_p = srq_rp->srq_hca;
8616
8617	D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8618
8619	status = ibt_free_srq(srq_hdl);
8620	if (status != IBT_SUCCESS) {
8621		return (status);
8622	}
8623	if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8624		DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8625		atomic_add_32(&hca_p->hca_srq_count, -1);
8626	}
8627	return (status);
8628}
8629
8630
8631static int
8632daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8633	cred_t *cred, int *rvalp)
8634{
8635	int error;
8636
8637	switch (cmd) {
8638	case DAPL_IA_CREATE:
8639		error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8640		break;
8641
8642	/* can potentially add other commands here */
8643
8644	default:
8645		DERR("daplka_common_ioctl: cmd not supported\n");
8646		error = DDI_FAILURE;
8647	}
8648	return (error);
8649}
8650
8651static int
8652daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8653	cred_t *cred, int *rvalp)
8654{
8655	int error;
8656
8657	switch (cmd) {
8658	case DAPL_EVD_CREATE:
8659		error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8660		break;
8661
8662	case DAPL_CQ_RESIZE:
8663		error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8664		break;
8665
8666	case DAPL_EVENT_POLL:
8667		error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8668		break;
8669
8670	case DAPL_EVENT_WAKEUP:
8671		error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8672		break;
8673
8674	case DAPL_EVD_MODIFY_CNO:
8675		error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8676		break;
8677
8678	case DAPL_EVD_FREE:
8679		error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8680		break;
8681
8682	default:
8683		DERR("daplka_evd_ioctl: cmd not supported\n");
8684		error = DDI_FAILURE;
8685	}
8686	return (error);
8687}
8688
8689static int
8690daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8691	cred_t *cred, int *rvalp)
8692{
8693	int error;
8694
8695	switch (cmd) {
8696	case DAPL_EP_MODIFY:
8697		error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8698		break;
8699
8700	case DAPL_EP_FREE:
8701		error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8702		break;
8703
8704	case DAPL_EP_CONNECT:
8705		error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8706		break;
8707
8708	case DAPL_EP_DISCONNECT:
8709		error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8710		break;
8711
8712	case DAPL_EP_REINIT:
8713		error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8714		break;
8715
8716	case DAPL_EP_CREATE:
8717		error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8718		break;
8719
8720	default:
8721		DERR("daplka_ep_ioctl: cmd not supported\n");
8722		error = DDI_FAILURE;
8723	}
8724	return (error);
8725}
8726
8727static int
8728daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8729	cred_t *cred, int *rvalp)
8730{
8731	int error;
8732
8733	switch (cmd) {
8734	case DAPL_MR_REGISTER:
8735		error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8736		break;
8737
8738	case DAPL_MR_REGISTER_LMR:
8739		error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8740		break;
8741
8742	case DAPL_MR_REGISTER_SHARED:
8743		error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8744		break;
8745
8746	case DAPL_MR_DEREGISTER:
8747		error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8748		break;
8749
8750	case DAPL_MR_SYNC:
8751		error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8752		break;
8753
8754	default:
8755		DERR("daplka_mr_ioctl: cmd not supported\n");
8756		error = DDI_FAILURE;
8757	}
8758	return (error);
8759}
8760
8761static int
8762daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8763	cred_t *cred, int *rvalp)
8764{
8765	int error;
8766
8767	switch (cmd) {
8768	case DAPL_MW_ALLOC:
8769		error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8770		break;
8771
8772	case DAPL_MW_FREE:
8773		error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8774		break;
8775
8776	default:
8777		DERR("daplka_mw_ioctl: cmd not supported\n");
8778		error = DDI_FAILURE;
8779	}
8780	return (error);
8781}
8782
8783static int
8784daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8785	cred_t *cred, int *rvalp)
8786{
8787	int error;
8788
8789	switch (cmd) {
8790	case DAPL_CNO_ALLOC:
8791		error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8792		break;
8793
8794	case DAPL_CNO_FREE:
8795		error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8796		break;
8797
8798	case DAPL_CNO_WAIT:
8799		error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8800		break;
8801
8802	default:
8803		DERR("daplka_cno_ioctl: cmd not supported\n");
8804		error = DDI_FAILURE;
8805	}
8806	return (error);
8807}
8808
8809static int
8810daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8811	cred_t *cred, int *rvalp)
8812{
8813	int error;
8814
8815	switch (cmd) {
8816	case DAPL_PD_ALLOC:
8817		error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8818		break;
8819
8820	case DAPL_PD_FREE:
8821		error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8822		break;
8823
8824	default:
8825		DERR("daplka_pd_ioctl: cmd not supported\n");
8826		error = DDI_FAILURE;
8827	}
8828	return (error);
8829}
8830
8831static int
8832daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8833	cred_t *cred, int *rvalp)
8834{
8835	int error;
8836
8837	switch (cmd) {
8838	case DAPL_SERVICE_REGISTER:
8839		error = daplka_service_register(rp, arg, mode, cred, rvalp);
8840		break;
8841
8842	case DAPL_SERVICE_DEREGISTER:
8843		error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8844		break;
8845
8846	default:
8847		DERR("daplka_sp_ioctl: cmd not supported\n");
8848		error = DDI_FAILURE;
8849	}
8850	return (error);
8851}
8852
8853static int
8854daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8855	cred_t *cred, int *rvalp)
8856{
8857	int error;
8858
8859	switch (cmd) {
8860	case DAPL_SRQ_CREATE:
8861		error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8862		break;
8863
8864	case DAPL_SRQ_RESIZE:
8865		error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8866		break;
8867
8868	case DAPL_SRQ_FREE:
8869		error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8870		break;
8871
8872	default:
8873		DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8874		error = DDI_FAILURE;
8875		break;
8876	}
8877	return (error);
8878}
8879
8880static int
8881daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8882	cred_t *cred, int *rvalp)
8883{
8884	int error;
8885
8886	switch (cmd) {
8887	case DAPL_CR_ACCEPT:
8888		error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8889		break;
8890
8891	case DAPL_CR_REJECT:
8892		error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8893		break;
8894
8895	case DAPL_IA_QUERY:
8896		error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8897		break;
8898
8899	case DAPL_CR_HANDOFF:
8900		error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8901		break;
8902
8903	default:
8904		DERR("daplka_misc_ioctl: cmd not supported\n");
8905		error = DDI_FAILURE;
8906	}
8907	return (error);
8908}
8909
8910/*ARGSUSED*/
8911static int
8912daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8913	int *rvalp)
8914{
8915	daplka_ia_resource_t	*ia_rp;
8916	minor_t			rnum;
8917	int			error = 0;
8918
8919	rnum = getminor(dev);
8920	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8921	if (ia_rp == NULL) {
8922		DERR("ioctl: resource not found, rnum %d\n", rnum);
8923		return (ENXIO);
8924	}
8925
8926	D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8927	if (DAPLKA_RS_RESERVED(ia_rp)) {
8928		error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8929		return (error);
8930	}
8931	if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8932		DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8933		error = EINVAL;
8934		goto cleanup;
8935	}
8936	if (ia_rp->ia_pid != ddi_get_pid()) {
8937		DERR("ioctl: ia_pid %d != pid %d\n",
8938		    ia_rp->ia_pid, ddi_get_pid());
8939		error = EINVAL;
8940		goto cleanup;
8941	}
8942
8943	switch (cmd & DAPL_TYPE_MASK) {
8944	case DAPL_TYPE_EVD:
8945		error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8946		break;
8947
8948	case DAPL_TYPE_EP:
8949		error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8950		break;
8951
8952	case DAPL_TYPE_MR:
8953		error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8954		break;
8955
8956	case DAPL_TYPE_MW:
8957		error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8958		break;
8959
8960	case DAPL_TYPE_PD:
8961		error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8962		break;
8963
8964	case DAPL_TYPE_SP:
8965		error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8966		break;
8967
8968	case DAPL_TYPE_CNO:
8969		error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8970		break;
8971
8972	case DAPL_TYPE_MISC:
8973		error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8974		break;
8975
8976	case DAPL_TYPE_SRQ:
8977		error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8978		break;
8979
8980	default:
8981		DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8982		error = DDI_FAILURE;
8983	}
8984
8985cleanup:;
8986	DAPLKA_RS_UNREF(ia_rp);
8987	return (error);
8988}
8989
8990/* ARGSUSED */
8991static int
8992daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8993{
8994	minor_t rnum;
8995
8996	/*
8997	 * Char only
8998	 */
8999	if (otyp != OTYP_CHR) {
9000		return (EINVAL);
9001	}
9002
9003	/*
9004	 * Only zero can be opened, clones are used for resources.
9005	 */
9006	if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
9007		DERR("daplka_open: bad minor %d\n", getminor(*devp));
9008		return (ENODEV);
9009	}
9010
9011	/*
9012	 * - allocate new minor number
9013	 * - update devp argument to new device
9014	 */
9015	if (daplka_resource_reserve(&rnum) == 0) {
9016		*devp = makedevice(getmajor(*devp), rnum);
9017	} else {
9018		return (ENOMEM);
9019	}
9020
9021	return (DDI_SUCCESS);
9022}
9023
9024/* ARGSUSED */
9025static int
9026daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
9027{
9028	daplka_ia_resource_t	*ia_rp;
9029	minor_t			rnum = getminor(dev);
9030
9031	/*
9032	 * Char only
9033	 */
9034	if (otyp != OTYP_CHR) {
9035		return (EINVAL);
9036	}
9037	D2("daplka_close: closing rnum = %d\n", rnum);
9038	atomic_add_32(&daplka_pending_close, 1);
9039
9040	/*
9041	 * remove from resource table.
9042	 */
9043	ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9044
9045	/*
9046	 * remove the initial reference
9047	 */
9048	if (ia_rp != NULL) {
9049		DAPLKA_RS_UNREF(ia_rp);
9050	}
9051	atomic_add_32(&daplka_pending_close, -1);
9052	return (DDI_SUCCESS);
9053}
9054
9055
9056/*
9057 * Resource management routines
9058 *
9059 * We start with no resource array. Each time we run out of slots, we
9060 * reallocate a new larger array and copy the pointer to the new array and
9061 * a new resource blk is allocated and added to the hash table.
9062 *
9063 * The resource control block contains:
9064 *      root    - array of pointer of resource blks
9065 *      sz      - current size of array.
9066 *      len     - last valid entry in array.
9067 *
9068 * A search operation based on a resource number is as follows:
9069 *      index = rnum / RESOURCE_BLKSZ;
9070 *      ASSERT(index < resource_block.len);
9071 *      ASSERT(index < resource_block.sz);
9072 *      offset = rnum % RESOURCE_BLKSZ;
9073 *      ASSERT(offset >= resource_block.root[index]->base);
9074 *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9075 *      return resource_block.root[index]->blks[offset];
9076 *
9077 * A resource blk is freed when its used count reaches zero.
9078 */
9079
9080/*
9081 * initializes the global resource table
9082 */
9083static void
9084daplka_resource_init(void)
9085{
9086	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource))
9087	rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9088	daplka_resource.daplka_rc_len = 0;
9089	daplka_resource.daplka_rc_sz = 0;
9090	daplka_resource.daplka_rc_cnt = 0;
9091	daplka_resource.daplka_rc_flag = 0;
9092	daplka_resource.daplka_rc_root = NULL;
9093	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource))
9094}
9095
9096/*
9097 * destroys the global resource table
9098 */
9099static void
9100daplka_resource_fini(void)
9101{
9102	int	i;
9103
9104	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9105	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9106		daplka_resource_blk_t	*blk;
9107		int			j;
9108
9109		blk = daplka_resource.daplka_rc_root[i];
9110		if (blk == NULL) {
9111			continue;
9112		}
9113		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9114			if (blk->daplka_rcblk_blks[j] != NULL) {
9115				DERR("resource_fini: non-null slot %d, %p\n",
9116				    j, blk->daplka_rcblk_blks[j]);
9117			}
9118		}
9119		kmem_free(blk, sizeof (*blk));
9120		daplka_resource.daplka_rc_root[i] = NULL;
9121	}
9122	if (daplka_resource.daplka_rc_root != NULL) {
9123		uint_t	sz;
9124
9125		sz = daplka_resource.daplka_rc_sz *
9126		    sizeof (daplka_resource_blk_t *);
9127		kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9128		daplka_resource.daplka_rc_root = NULL;
9129		daplka_resource.daplka_rc_len = 0;
9130		daplka_resource.daplka_rc_sz = 0;
9131	}
9132	rw_exit(&daplka_resource.daplka_rct_lock);
9133	rw_destroy(&daplka_resource.daplka_rct_lock);
9134}
9135
9136/*
9137 * reserves a slot in the global resource table.
9138 * this is called by the open() syscall. it is needed because
9139 * at open() time, we do not have sufficient information to
9140 * create an IA resource. the library needs to subsequently
9141 * call daplka_ia_create to insert an IA resource into this
9142 * reserved slot.
9143 */
9144static int
9145daplka_resource_reserve(minor_t *rnum)
9146{
9147	int i, j, empty = -1;
9148	daplka_resource_blk_t *blk;
9149
9150	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9151	/*
9152	 * Try to find an empty slot
9153	 */
9154	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9155		blk = daplka_resource.daplka_rc_root[i];
9156		if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9157
9158			D3("resource_alloc: available blks %d\n",
9159			    blk->daplka_rcblk_avail);
9160
9161			/*
9162			 * found an empty slot in this blk
9163			 */
9164			for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9165				if (blk->daplka_rcblk_blks[j] == NULL) {
9166					*rnum = (minor_t)
9167					    (j + (i * DAPLKA_RC_BLKSZ));
9168					blk->daplka_rcblk_blks[j] =
9169					    (daplka_resource_t *)
9170					    DAPLKA_RC_RESERVED;
9171					blk->daplka_rcblk_avail--;
9172					daplka_resource.daplka_rc_cnt++;
9173					rw_exit(&daplka_resource.
9174					    daplka_rct_lock);
9175					return (0);
9176				}
9177			}
9178		} else if (blk == NULL && empty < 0) {
9179			/*
9180			 * remember first empty slot
9181			 */
9182			empty = i;
9183		}
9184	}
9185
9186	/*
9187	 * Couldn't find anything, allocate a new blk
9188	 * Do we need to reallocate the root array
9189	 */
9190	if (empty < 0) {
9191		if (daplka_resource.daplka_rc_len ==
9192		    daplka_resource.daplka_rc_sz) {
9193			/*
9194			 * Allocate new array and copy current stuff into it
9195			 */
9196			daplka_resource_blk_t	**p;
9197			uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9198			    DAPLKA_RC_BLKSZ;
9199
9200			D3("resource_alloc: increasing no. of buckets to %d\n",
9201			    newsz);
9202
9203			p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9204
9205			if (daplka_resource.daplka_rc_root) {
9206				uint_t oldsz;
9207
9208				oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9209				    (int)sizeof (*p));
9210
9211				/*
9212				 * Copy old data into new space and
9213				 * free old stuff
9214				 */
9215				bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9216				kmem_free(daplka_resource.daplka_rc_root,
9217				    oldsz);
9218			}
9219
9220			daplka_resource.daplka_rc_root = p;
9221			daplka_resource.daplka_rc_sz = (int)newsz;
9222		}
9223
9224		empty = daplka_resource.daplka_rc_len;
9225		daplka_resource.daplka_rc_len++;
9226
9227		D3("resource_alloc: daplka_rc_len %d\n",
9228		    daplka_resource.daplka_rc_len);
9229	}
9230
9231	/*
9232	 * Allocate a new blk
9233	 */
9234	blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9235	ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9236	daplka_resource.daplka_rc_root[empty] = blk;
9237	blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9238
9239	/*
9240	 * Allocate slot
9241	 */
9242	*rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9243	blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9244	daplka_resource.daplka_rc_cnt++;
9245	rw_exit(&daplka_resource.daplka_rct_lock);
9246
9247	return (0);
9248}
9249
9250/*
9251 * removes resource from global resource table
9252 */
9253static daplka_resource_t *
9254daplka_resource_remove(minor_t rnum)
9255{
9256	int i, j;
9257	daplka_resource_blk_t *blk;
9258	daplka_resource_t *p;
9259
9260	i = (int)(rnum / DAPLKA_RC_BLKSZ);
9261	j = (int)(rnum % DAPLKA_RC_BLKSZ);
9262
9263	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9264	if (i >= daplka_resource.daplka_rc_len) {
9265		rw_exit(&daplka_resource.daplka_rct_lock);
9266		DERR("resource_remove: invalid rnum %d\n", rnum);
9267		return (NULL);
9268	}
9269
9270	ASSERT(daplka_resource.daplka_rc_root);
9271	ASSERT(i < daplka_resource.daplka_rc_len);
9272	ASSERT(i < daplka_resource.daplka_rc_sz);
9273	blk = daplka_resource.daplka_rc_root[i];
9274	if (blk == NULL) {
9275		rw_exit(&daplka_resource.daplka_rct_lock);
9276		DERR("resource_remove: invalid rnum %d\n", rnum);
9277		return (NULL);
9278	}
9279
9280	if (blk->daplka_rcblk_blks[j] == NULL) {
9281		rw_exit(&daplka_resource.daplka_rct_lock);
9282		DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9283		return (NULL);
9284	}
9285	p = blk->daplka_rcblk_blks[j];
9286	blk->daplka_rcblk_blks[j] = NULL;
9287	blk->daplka_rcblk_avail++;
9288	if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9289		/*
9290		 * free this blk
9291		 */
9292		kmem_free(blk, sizeof (*blk));
9293		daplka_resource.daplka_rc_root[i] = NULL;
9294	}
9295	daplka_resource.daplka_rc_cnt--;
9296	rw_exit(&daplka_resource.daplka_rct_lock);
9297
9298	if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9299		return (NULL);
9300	} else {
9301		return (p);
9302	}
9303}
9304
9305/*
9306 * inserts resource into the slot designated by rnum
9307 */
9308static int
9309daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9310{
9311	int i, j, error = -1;
9312	daplka_resource_blk_t *blk;
9313
9314	/*
9315	 * Find resource and lock it in WRITER mode
9316	 * search for available resource slot
9317	 */
9318
9319	i = (int)(rnum / DAPLKA_RC_BLKSZ);
9320	j = (int)(rnum % DAPLKA_RC_BLKSZ);
9321
9322	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9323	if (i >= daplka_resource.daplka_rc_len) {
9324		rw_exit(&daplka_resource.daplka_rct_lock);
9325		DERR("resource_insert: resource %d not found\n", rnum);
9326		return (-1);
9327	}
9328
9329	blk = daplka_resource.daplka_rc_root[i];
9330	if (blk != NULL) {
9331		ASSERT(i < daplka_resource.daplka_rc_len);
9332		ASSERT(i < daplka_resource.daplka_rc_sz);
9333
9334		if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9335			blk->daplka_rcblk_blks[j] = rp;
9336			error = 0;
9337		} else {
9338			DERR("resource_insert: %d not reserved, blk = %p\n",
9339			    rnum, blk->daplka_rcblk_blks[j]);
9340		}
9341	} else {
9342		DERR("resource_insert: resource %d not found\n", rnum);
9343	}
9344	rw_exit(&daplka_resource.daplka_rct_lock);
9345	return (error);
9346}
9347
9348/*
9349 * finds resource using minor device number
9350 */
9351static daplka_resource_t *
9352daplka_resource_lookup(minor_t rnum)
9353{
9354	int i, j;
9355	daplka_resource_blk_t *blk;
9356	daplka_resource_t *rp;
9357
9358	/*
9359	 * Find resource and lock it in READER mode
9360	 * search for available resource slot
9361	 */
9362
9363	i = (int)(rnum / DAPLKA_RC_BLKSZ);
9364	j = (int)(rnum % DAPLKA_RC_BLKSZ);
9365
9366	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9367	if (i >= daplka_resource.daplka_rc_len) {
9368		rw_exit(&daplka_resource.daplka_rct_lock);
9369		DERR("resource_lookup: resource %d not found\n", rnum);
9370		return (NULL);
9371	}
9372
9373	blk = daplka_resource.daplka_rc_root[i];
9374	if (blk != NULL) {
9375		ASSERT(i < daplka_resource.daplka_rc_len);
9376		ASSERT(i < daplka_resource.daplka_rc_sz);
9377
9378		rp = blk->daplka_rcblk_blks[j];
9379		if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9380			D3("resource_lookup: %d not found, blk = %p\n",
9381			    rnum, blk->daplka_rcblk_blks[j]);
9382		} else {
9383			DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9384		}
9385	} else {
9386		DERR("resource_lookup: resource %d not found\n", rnum);
9387		rp = NULL;
9388	}
9389	rw_exit(&daplka_resource.daplka_rct_lock);
9390	return (rp);
9391}
9392
9393/*
9394 * generic hash table implementation
9395 */
9396
9397/*
9398 * daplka_hash_create:
9399 *	initializes a hash table with the specified parameters
9400 *
9401 * input:
9402 *	htblp			pointer to hash table
9403 *
9404 *	nbuckets		number of buckets (must be power of 2)
9405 *
9406 *	free_func		this function is called on each hash
9407 *				table element when daplka_hash_destroy
9408 *				is called
9409 *
9410 *	lookup_func		if daplka_hash_lookup is able to find
9411 *				the desired object, this function is
9412 *				applied on the object before
9413 *				daplka_hash_lookup returns
9414 * output:
9415 *	none
9416 *
9417 * return value(s):
9418 *	EINVAL			nbuckets is not a power of 2
9419 *	ENOMEM			cannot allocate buckets
9420 *	0			success
9421 */
9422static int
9423daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9424	void (*free_func)(void *), void (*lookup_func)(void *))
9425{
9426	int i;
9427
9428	if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9429		DERR("hash_create: nbuckets not power of 2\n");
9430		return (EINVAL);
9431	}
9432	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp))
9433
9434	htblp->ht_buckets =
9435	    kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9436	    daplka_km_flags);
9437	if (htblp->ht_buckets == NULL) {
9438		DERR("hash_create: cannot allocate buckets\n");
9439		return (ENOMEM);
9440	}
9441	for (i = 0; i < nbuckets; i++) {
9442		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9443		htblp->ht_buckets[i].hb_count = 0;
9444		htblp->ht_buckets[i].hb_entries = NULL;
9445		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9446	}
9447	rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9448	mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9449
9450	htblp->ht_count = 0;
9451	htblp->ht_next_hkey = (uint64_t)gethrtime();
9452	htblp->ht_nbuckets = nbuckets;
9453	htblp->ht_free_func = free_func;
9454	htblp->ht_lookup_func = lookup_func;
9455	htblp->ht_initialized = B_TRUE;
9456	D3("hash_create: done, buckets = %d\n", nbuckets);
9457	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp))
9458	return (0);
9459}
9460
9461/*
9462 * daplka_hash_insert:
9463 *	inserts an object into a hash table
9464 *
9465 * input:
9466 *	htblp			pointer to hash table
9467 *
9468 *	hkeyp			pointer to hash key.
9469 *				*hkeyp being non-zero means that the caller
9470 *				has generated its own hkey. if *hkeyp is zero,
9471 *				this function will generate an hkey for the
9472 *				caller. it is recommended that the caller
9473 *				leave the hkey generation to this function
9474 *				because the hkey is more likely to be evenly
9475 *				distributed.
9476 *
9477 *	objp			pointer to object to be inserted into
9478 *				hash table
9479 *
9480 * output:
9481 *	hkeyp			the generated hkey is returned via this pointer
9482 *
9483 * return value(s):
9484 *	EINVAL			invalid parameter
9485 *	ENOMEM			cannot allocate hash entry
9486 *	0			successful
9487 */
9488static int
9489daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9490{
9491	daplka_hash_entry_t *hep, *curr_hep;
9492	daplka_hash_bucket_t *hbp;
9493	uint32_t bucket;
9494	uint64_t hkey;
9495
9496	if (hkeyp == NULL) {
9497		DERR("hash_insert: hkeyp == NULL\n");
9498		return (EINVAL);
9499	}
9500	hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9501	if (hep == NULL) {
9502		DERR("hash_insert: cannot alloc hash_entry\n");
9503		return (ENOMEM);
9504	}
9505	if (*hkeyp == 0) {
9506		/* generate a new key */
9507		mutex_enter(&htblp->ht_key_lock);
9508		hkey = ++htblp->ht_next_hkey;
9509		if (hkey == 0) {
9510			hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9511		}
9512		mutex_exit(&htblp->ht_key_lock);
9513	} else {
9514		/* use user generated key */
9515		hkey = *hkeyp;
9516	}
9517
9518	/* only works if ht_nbuckets is a power of 2 */
9519	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9520	ASSERT(objp != NULL);
9521	ASSERT(bucket < htblp->ht_nbuckets);
9522
9523	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9524	hep->he_hkey = hkey;
9525	hep->he_objp = objp;
9526
9527	/* look for duplicate entries */
9528	hbp = &htblp->ht_buckets[bucket];
9529	curr_hep = hbp->hb_entries;
9530	while (curr_hep != NULL) {
9531		if (curr_hep->he_hkey == hep->he_hkey) {
9532			break;
9533		}
9534		curr_hep = curr_hep->he_next;
9535	}
9536	if (curr_hep != NULL) {
9537		DERR("hash_insert: found duplicate hash entry: "
9538		    "bucket %d, hkey 0x%016llx\n",
9539		    bucket, (longlong_t)hep->he_hkey);
9540		kmem_free(hep, sizeof (*hep));
9541		rw_exit(&htblp->ht_table_lock);
9542		return (EINVAL);
9543	}
9544	hep->he_next = hbp->hb_entries;
9545	hbp->hb_entries = hep;
9546	hbp->hb_count++;
9547	htblp->ht_count++;
9548	rw_exit(&htblp->ht_table_lock);
9549
9550	if (*hkeyp == 0) {
9551		*hkeyp = hkey;
9552		ASSERT(*hkeyp != 0);
9553	}
9554	D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9555	    htblp, (longlong_t)*hkeyp, bucket);
9556	return (0);
9557}
9558
9559/*
9560 * daplka_hash_remove:
9561 *	removes object identified by hkey from hash table
9562 *
9563 * input:
9564 *	htblp			pointer to hash table
9565 *
9566 *	hkey			hkey that identifies the object to be removed
9567 *
9568 * output:
9569 *	objpp			pointer to pointer to object.
9570 *				if remove is successful, the removed object
9571 *				will be returned via *objpp.
9572 *
9573 * return value(s):
9574 *	EINVAL			cannot find hash entry
9575 *	0			successful
9576 */
9577static int
9578daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9579{
9580	daplka_hash_entry_t	*free_hep, **curr_hepp;
9581	daplka_hash_bucket_t	*hbp;
9582	uint32_t		bucket;
9583
9584	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9585
9586	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9587	hbp = &htblp->ht_buckets[bucket];
9588
9589	curr_hepp = &hbp->hb_entries;
9590	while (*curr_hepp != NULL) {
9591		if ((*curr_hepp)->he_hkey == hkey) {
9592			break;
9593		}
9594		curr_hepp = &(*curr_hepp)->he_next;
9595	}
9596	if (*curr_hepp == NULL) {
9597		DERR("hash_remove: cannot find hash entry: "
9598		    "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9599		rw_exit(&htblp->ht_table_lock);
9600		return (EINVAL);
9601	} else {
9602		if (objpp != NULL) {
9603			*objpp = (*curr_hepp)->he_objp;
9604		}
9605		free_hep = *curr_hepp;
9606		*curr_hepp = (*curr_hepp)->he_next;
9607		kmem_free(free_hep, sizeof (*free_hep));
9608	}
9609	hbp->hb_count--;
9610	htblp->ht_count--;
9611	D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9612	    "hb_count %d, hb_count %d\n",
9613	    (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9614	rw_exit(&htblp->ht_table_lock);
9615	return (0);
9616}
9617
9618/*
9619 * daplka_hash_walk:
9620 *	walks through the entire hash table. applying func on each of
9621 *	the inserted objects. stops walking if func returns non-zero.
9622 *
9623 * input:
9624 *	htblp			pointer to hash table
9625 *
9626 *	func			function to be applied on each object
9627 *
9628 *	farg			second argument to func
9629 *
9630 *	lockmode		can be RW_WRITER or RW_READER. this
9631 *				allows the caller to choose what type
9632 *				of lock to acquire before walking the
9633 *				table.
9634 *
9635 * output:
9636 *	none
9637 *
9638 * return value(s):
9639 *	none
9640 */
9641static void
9642daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9643	void *farg, krw_t lockmode)
9644{
9645	daplka_hash_entry_t *curr_hep;
9646	daplka_hash_bucket_t *hbp;
9647	uint32_t bucket, retval = 0;
9648
9649	ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9650
9651	/* needed for warlock */
9652	if (lockmode == RW_WRITER) {
9653		rw_enter(&htblp->ht_table_lock, RW_WRITER);
9654	} else {
9655		rw_enter(&htblp->ht_table_lock, RW_READER);
9656	}
9657	for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9658		hbp = &htblp->ht_buckets[bucket];
9659		curr_hep = hbp->hb_entries;
9660		while (curr_hep != NULL) {
9661			retval = (*func)(curr_hep->he_objp, farg);
9662			if (retval != 0) {
9663				break;
9664			}
9665			curr_hep = curr_hep->he_next;
9666		}
9667	}
9668	rw_exit(&htblp->ht_table_lock);
9669}
9670
9671/*
9672 * daplka_hash_lookup:
9673 *	finds object from hkey
9674 *
9675 * input:
9676 *	htblp			pointer to hash table
9677 *
9678 *	hkey			hkey that identifies the object to be looked up
9679 *
9680 * output:
9681 *	none
9682 *
9683 * return value(s):
9684 *	NULL			if not found
9685 *	object pointer		if found
9686 */
9687static void *
9688daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9689{
9690	daplka_hash_entry_t *curr_hep;
9691	uint32_t bucket;
9692	void *objp;
9693
9694	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9695
9696	rw_enter(&htblp->ht_table_lock, RW_READER);
9697	curr_hep = htblp->ht_buckets[bucket].hb_entries;
9698	while (curr_hep != NULL) {
9699		if (curr_hep->he_hkey == hkey) {
9700			break;
9701		}
9702		curr_hep = curr_hep->he_next;
9703	}
9704	if (curr_hep == NULL) {
9705		DERR("hash_lookup: cannot find hash entry: "
9706		    "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9707		rw_exit(&htblp->ht_table_lock);
9708		return (NULL);
9709	}
9710	objp = curr_hep->he_objp;
9711	ASSERT(objp != NULL);
9712	if (htblp->ht_lookup_func != NULL) {
9713		(*htblp->ht_lookup_func)(objp);
9714	}
9715	rw_exit(&htblp->ht_table_lock);
9716	return (objp);
9717}
9718
9719/*
9720 * daplka_hash_destroy:
9721 *	destroys hash table. applies free_func on all inserted objects.
9722 *
9723 * input:
9724 *	htblp			pointer to hash table
9725 *
9726 * output:
9727 *	none
9728 *
9729 * return value(s):
9730 *	none
9731 */
9732static void
9733daplka_hash_destroy(daplka_hash_table_t *htblp)
9734{
9735	daplka_hash_entry_t *curr_hep, *free_hep;
9736	daplka_hash_entry_t *free_list = NULL;
9737	daplka_hash_bucket_t *hbp;
9738	uint32_t bucket, cnt, total = 0;
9739
9740	if (!htblp->ht_initialized) {
9741		DERR("hash_destroy: not initialized\n");
9742		return;
9743	}
9744	/* free all elements from hash table */
9745	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9746	for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9747		hbp = &htblp->ht_buckets[bucket];
9748
9749		/* build list of elements to be freed */
9750		curr_hep = hbp->hb_entries;
9751		cnt = 0;
9752		while (curr_hep != NULL) {
9753			cnt++;
9754			free_hep = curr_hep;
9755			curr_hep = curr_hep->he_next;
9756
9757			free_hep->he_next = free_list;
9758			free_list = free_hep;
9759		}
9760		ASSERT(cnt == hbp->hb_count);
9761		total += cnt;
9762		hbp->hb_count = 0;
9763		hbp->hb_entries = NULL;
9764	}
9765	ASSERT(total == htblp->ht_count);
9766	D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9767	    htblp, htblp->ht_nbuckets, total);
9768	rw_exit(&htblp->ht_table_lock);
9769
9770	/* free all objects, now without holding the hash table lock */
9771	cnt = 0;
9772	while (free_list != NULL) {
9773		cnt++;
9774		free_hep = free_list;
9775		free_list = free_list->he_next;
9776		if (htblp->ht_free_func != NULL) {
9777			(*htblp->ht_free_func)(free_hep->he_objp);
9778		}
9779		kmem_free(free_hep, sizeof (*free_hep));
9780	}
9781	ASSERT(total == cnt);
9782
9783	/* free hash buckets and destroy locks */
9784	kmem_free(htblp->ht_buckets,
9785	    sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9786
9787	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9788	htblp->ht_buckets = NULL;
9789	htblp->ht_count = 0;
9790	htblp->ht_nbuckets = 0;
9791	htblp->ht_free_func = NULL;
9792	htblp->ht_lookup_func = NULL;
9793	htblp->ht_initialized = B_FALSE;
9794	rw_exit(&htblp->ht_table_lock);
9795
9796	mutex_destroy(&htblp->ht_key_lock);
9797	rw_destroy(&htblp->ht_table_lock);
9798}
9799
9800/*
9801 * daplka_hash_getsize:
9802 *	return the number of objects in hash table
9803 *
9804 * input:
9805 *	htblp			pointer to hash table
9806 *
9807 * output:
9808 *	none
9809 *
9810 * return value(s):
9811 *	number of objects in hash table
9812 */
9813static uint32_t
9814daplka_hash_getsize(daplka_hash_table_t *htblp)
9815{
9816	uint32_t sz;
9817
9818	rw_enter(&htblp->ht_table_lock, RW_READER);
9819	sz = htblp->ht_count;
9820	rw_exit(&htblp->ht_table_lock);
9821
9822	return (sz);
9823}
9824
9825/*
9826 * this function is used as ht_lookup_func above when lookup is called.
9827 * other types of objs may use a more elaborate lookup_func.
9828 */
9829static void
9830daplka_hash_generic_lookup(void *obj)
9831{
9832	daplka_resource_t	*rp = (daplka_resource_t *)obj;
9833
9834	mutex_enter(&rp->rs_reflock);
9835	rp->rs_refcnt++;
9836	ASSERT(rp->rs_refcnt != 0);
9837	mutex_exit(&rp->rs_reflock);
9838}
9839
9840/*
9841 * Generates a non-zero 32 bit hash key used for the timer hash table.
9842 */
9843static uint32_t
9844daplka_timer_hkey_gen()
9845{
9846	uint32_t new_hkey;
9847
9848	do {
9849		new_hkey = atomic_add_32_nv(&daplka_timer_hkey, 1);
9850	} while (new_hkey == 0);
9851
9852	return (new_hkey);
9853}
9854
9855
9856/*
9857 * The DAPL KA debug logging routines
9858 */
9859
9860/*
9861 * Add the string str to the end of the debug log, followed by a newline.
9862 */
9863static void
9864daplka_dbglog(char *str)
9865{
9866	size_t	length;
9867	size_t	remlen;
9868
9869	/*
9870	 * If this is the first time we've written to the log, initialize it.
9871	 */
9872	if (!daplka_dbginit) {
9873		return;
9874	}
9875	mutex_enter(&daplka_dbglock);
9876	/*
9877	 * Note the log is circular; if this string would run over the end,
9878	 * we copy the first piece to the end and then the last piece to
9879	 * the beginning of the log.
9880	 */
9881	length = strlen(str);
9882
9883	remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9884
9885	if (length > remlen) {
9886		if (remlen)
9887			bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9888		daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9889		str += remlen;
9890		length -= remlen;
9891		daplka_dbgnext = 0;
9892	}
9893	bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9894	daplka_dbgnext += length;
9895
9896	if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9897		daplka_dbgnext = 0;
9898	mutex_exit(&daplka_dbglock);
9899}
9900
9901
9902/*
9903 * Add a printf-style message to whichever debug logs we're currently using.
9904 */
9905static void
9906daplka_debug(const char *fmt, ...)
9907{
9908	char	buff[512];
9909	va_list	ap;
9910	/*
9911	 * The system prepends the thread id and high resolution time
9912	 * (nanoseconds are dropped and so are the upper digits)
9913	 * to the specified string.
9914	 * The unit for timestamp is 10 microseconds.
9915	 * It wraps around every 10000 seconds.
9916	 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9917	 */
9918	int	micro_time = (int)((gethrtime() / 10000) % 1000000000);
9919	(void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9920
9921	va_start(ap, fmt);
9922	(void) vsprintf(buff+strlen(buff), fmt, ap);
9923	va_end(ap);
9924
9925	daplka_dbglog(buff);
9926}
9927
9928static void
9929daplka_console(const char *fmt, ...)
9930{
9931	char buff[512];
9932	va_list ap;
9933
9934	va_start(ap, fmt);
9935	(void) vsprintf(buff, fmt, ap);
9936	va_end(ap);
9937
9938	cmn_err(CE_CONT, "%s", buff);
9939}
9940