tavor_umap.c revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * tavor_umap.c
29 *    Tavor Userland Mapping Routines
30 *
31 *    Implements all the routines necessary for enabling direct userland
32 *    access to the Tavor hardware.  This includes all routines necessary for
33 *    maintaining the "userland resources database" and all the support routines
34 *    for the devmap calls.
35 */
36
37#include <sys/types.h>
38#include <sys/conf.h>
39#include <sys/ddi.h>
40#include <sys/sunddi.h>
41#include <sys/modctl.h>
42#include <sys/file.h>
43#include <sys/avl.h>
44#include <sys/sysmacros.h>
45
46#include <sys/ib/adapters/tavor/tavor.h>
47
48/* Tavor HCA state pointer (extern) */
49extern void *tavor_statep;
50
51/* Tavor HCA Userland Resource Database (extern) */
52extern tavor_umap_db_t tavor_userland_rsrc_db;
53
54static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
55    tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
56static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
57    tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
58static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
59    tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
60static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
61    tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
62static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
63    offset_t off, size_t len, void **pvtp);
64static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
65    devmap_cookie_t new_dhp, void **new_pvtp);
66static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
67    offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
68    devmap_cookie_t new_dhp2, void **pvtp2);
69static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
70    offset_t off, size_t len, void **pvtp);
71static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
72    devmap_cookie_t new_dhp, void **new_pvtp);
73static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
74    offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
75    devmap_cookie_t new_dhp2, void **pvtp2);
76static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
77    ibt_mr_data_in_t *data, size_t data_sz);
78static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
79    mlnx_umap_cq_data_out_t *data, size_t data_sz);
80static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
81    mlnx_umap_qp_data_out_t *data, size_t data_sz);
82static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
83    mlnx_umap_srq_data_out_t *data, size_t data_sz);
84static int tavor_umap_db_compare(const void *query, const void *entry);
85static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
86    mlnx_umap_pd_data_out_t *data, size_t data_sz);
87
88
89/*
90 * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
91 * respectively.  They are used to handle (among other things) partial
92 * unmappings and to provide a method for invalidating mappings inherited
93 * as a result of a fork(2) system call.
94 */
95static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
96	DEVMAP_OPS_REV,
97	tavor_devmap_umem_map,
98	NULL,
99	tavor_devmap_umem_dup,
100	tavor_devmap_umem_unmap
101};
102static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
103	DEVMAP_OPS_REV,
104	tavor_devmap_devmem_map,
105	NULL,
106	tavor_devmap_devmem_dup,
107	tavor_devmap_devmem_unmap
108};
109
110/*
111 * tavor_devmap()
112 *    Context: Can be called from user context.
113 */
114/* ARGSUSED */
115int
116tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
117    size_t *maplen, uint_t model)
118{
119	tavor_state_t	*state;
120	tavor_rsrc_t 	*rsrcp;
121	minor_t		instance;
122	uint64_t	key, value;
123	uint_t		type;
124	int		err, status;
125
126	TAVOR_TNF_ENTER(tavor_devmap);
127
128	/* Get Tavor softstate structure from instance */
129	instance = TAVOR_DEV_INSTANCE(dev);
130	state = ddi_get_soft_state(tavor_statep, instance);
131	if (state == NULL) {
132		TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, "");
133		TAVOR_TNF_EXIT(tavor_devmap);
134		return (ENXIO);
135	}
136
137	/*
138	 * Access to Tavor devmap interface is not allowed in
139	 * "maintenance mode".
140	 */
141	if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
142		TNF_PROBE_0(tavor_devmap_maintenance_mode_fail,
143		    TAVOR_TNF_ERROR, "");
144		TAVOR_TNF_EXIT(tavor_devmap);
145		return (EFAULT);
146	}
147
148	/*
149	 * The bottom bits of "offset" are undefined (number depends on
150	 * system PAGESIZE).  Shifting these off leaves us with a "key".
151	 * The "key" is actually a combination of both a real key value
152	 * (for the purpose of database lookup) and a "type" value.  We
153	 * extract this information before doing the database lookup.
154	 */
155	key  = off >> PAGESHIFT;
156	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
157	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
158	status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
159	if (status == DDI_SUCCESS) {
160		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
161
162		switch (type) {
163		case MLNX_UMAP_UARPG_RSRC:
164			/*
165			 * Double check that process who open()'d Tavor is
166			 * same process attempting to mmap() UAR page.
167			 */
168			if (key != ddi_get_pid()) {
169				TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail,
170				    TAVOR_TNF_ERROR, "");
171				TAVOR_TNF_EXIT(tavor_devmap);
172				return (EINVAL);
173			}
174
175			/* Map the UAR page out for userland access */
176			status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
177			    &err);
178			if (status != DDI_SUCCESS) {
179				TNF_PROBE_0(tavor_devmap_uarpg_map_fail,
180				    TAVOR_TNF_ERROR, "");
181				TAVOR_TNF_EXIT(tavor_devmap);
182				return (err);
183			}
184			break;
185
186		case MLNX_UMAP_CQMEM_RSRC:
187			/* Map the CQ memory out for userland access */
188			status = tavor_umap_cqmem(state, dhp, rsrcp, off,
189			    maplen, &err);
190			if (status != DDI_SUCCESS) {
191				TNF_PROBE_0(tavor_devmap_cqmem_map_fail,
192				    TAVOR_TNF_ERROR, "");
193				TAVOR_TNF_EXIT(tavor_devmap);
194				return (err);
195			}
196			break;
197
198		case MLNX_UMAP_QPMEM_RSRC:
199			/* Map the QP memory out for userland access */
200			status = tavor_umap_qpmem(state, dhp, rsrcp, off,
201			    maplen, &err);
202			if (status != DDI_SUCCESS) {
203				TNF_PROBE_0(tavor_devmap_qpmem_map_fail,
204				    TAVOR_TNF_ERROR, "");
205				TAVOR_TNF_EXIT(tavor_devmap);
206				return (err);
207			}
208			break;
209
210		case MLNX_UMAP_SRQMEM_RSRC:
211			/* Map the SRQ memory out for userland access */
212			status = tavor_umap_srqmem(state, dhp, rsrcp, off,
213			    maplen, &err);
214			if (status != DDI_SUCCESS) {
215				TNF_PROBE_0(tavor_devmap_srqmem_map_fail,
216				    TAVOR_TNF_ERROR, "");
217				TAVOR_TNF_EXIT(tavor_devmap);
218				return (err);
219			}
220			break;
221
222		default:
223			TAVOR_WARNING(state, "unexpected rsrc type in devmap");
224			TNF_PROBE_0(tavor_devmap_invrsrc_fail,
225			    TAVOR_TNF_ERROR, "");
226			TAVOR_TNF_EXIT(tavor_devmap);
227			return (EINVAL);
228		}
229	} else {
230		TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, "");
231		TAVOR_TNF_EXIT(tavor_devmap);
232		return (EINVAL);
233	}
234
235	TAVOR_TNF_EXIT(tavor_devmap);
236	return (0);
237}
238
239
240/*
241 * tavor_umap_uarpg()
242 *    Context: Can be called from user context.
243 */
244static int
245tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
246    tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
247{
248	int		status;
249	uint_t		maxprot;
250
251	TAVOR_TNF_ENTER(tavor_umap_uarpg);
252
253	/* Map out the UAR page (doorbell page) */
254	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
255	status = devmap_devmem_setup(dhp, state->ts_dip,
256	    &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
257	    PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
258	    &state->ts_reg_accattr);
259	if (status < 0) {
260		*err = status;
261		TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, "");
262		TAVOR_TNF_EXIT(tavor_umap_uarpg);
263		return (DDI_FAILURE);
264	}
265
266	*maplen = PAGESIZE;
267	TAVOR_TNF_EXIT(tavor_umap_uarpg);
268	return (DDI_SUCCESS);
269}
270
271
272/*
273 * tavor_umap_cqmem()
274 *    Context: Can be called from user context.
275 */
276/* ARGSUSED */
277static int
278tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
279    tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
280{
281	tavor_cqhdl_t	cq;
282	size_t		size;
283	uint_t		maxprot;
284	int		status;
285
286	TAVOR_TNF_ENTER(tavor_umap_cqmem);
287
288	/* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
289	cq = (tavor_cqhdl_t)rsrcp->tr_addr;
290
291	/* Round-up the CQ size to system page size */
292	size = ptob(btopr(cq->cq_cqinfo.qa_size));
293
294	/* Map out the CQ memory */
295	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
296	status = devmap_umem_setup(dhp, state->ts_dip,
297	    &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
298	    maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
299	if (status < 0) {
300		*err = status;
301		TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, "");
302		TAVOR_TNF_EXIT(tavor_umap_cqmem);
303		return (DDI_FAILURE);
304	}
305	*maplen = size;
306
307	TAVOR_TNF_EXIT(tavor_umap_cqmem);
308	return (DDI_SUCCESS);
309}
310
311
312/*
313 * tavor_umap_qpmem()
314 *    Context: Can be called from user context.
315 */
316/* ARGSUSED */
317static int
318tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
319    tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
320{
321	tavor_qphdl_t	qp;
322	offset_t	offset;
323	size_t		size;
324	uint_t		maxprot;
325	int		status;
326
327	TAVOR_TNF_ENTER(tavor_umap_qpmem);
328
329	/* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
330	qp = (tavor_qphdl_t)rsrcp->tr_addr;
331
332	/*
333	 * Calculate the offset of the first work queue (send or recv) into
334	 * the memory (ddi_umem_alloc()) allocated previously for the QP.
335	 */
336	offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
337	    (uintptr_t)qp->qp_wqinfo.qa_buf_real);
338
339	/* Round-up the QP work queue sizes to system page size */
340	size = ptob(btopr(qp->qp_wqinfo.qa_size));
341
342	/* Map out the QP memory */
343	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
344	status = devmap_umem_setup(dhp, state->ts_dip,
345	    &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
346	    size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
347	if (status < 0) {
348		*err = status;
349		TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, "");
350		TAVOR_TNF_EXIT(tavor_umap_qpmem);
351		return (DDI_FAILURE);
352	}
353	*maplen = size;
354
355	TAVOR_TNF_EXIT(tavor_umap_qpmem);
356	return (DDI_SUCCESS);
357}
358
359
360/*
361 * tavor_umap_srqmem()
362 *    Context: Can be called from user context.
363 */
364/* ARGSUSED */
365static int
366tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
367    tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
368{
369	tavor_srqhdl_t	srq;
370	offset_t	offset;
371	size_t		size;
372	uint_t		maxprot;
373	int		status;
374
375	TAVOR_TNF_ENTER(tavor_umap_srqmem);
376
377	/* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
378	srq = (tavor_srqhdl_t)rsrcp->tr_addr;
379
380	/*
381	 * Calculate the offset of the first shared recv queue into the memory
382	 * (ddi_umem_alloc()) allocated previously for the SRQ.
383	 */
384	offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
385	    (uintptr_t)srq->srq_wqinfo.qa_buf_real);
386
387	/* Round-up the SRQ work queue sizes to system page size */
388	size = ptob(btopr(srq->srq_wqinfo.qa_size));
389
390	/* Map out the QP memory */
391	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
392	status = devmap_umem_setup(dhp, state->ts_dip,
393	    &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
394	    size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
395	if (status < 0) {
396		*err = status;
397		TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, "");
398		TAVOR_TNF_EXIT(tavor_umap_srqmem);
399		return (DDI_FAILURE);
400	}
401	*maplen = size;
402
403	TAVOR_TNF_EXIT(tavor_umap_srqmem);
404	return (DDI_SUCCESS);
405}
406
407
408/*
409 * tavor_devmap_umem_map()
410 *    Context: Can be called from kernel context.
411 */
412/* ARGSUSED */
413static int
414tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
415    offset_t off, size_t len, void **pvtp)
416{
417	tavor_state_t		*state;
418	tavor_devmap_track_t	*dvm_track;
419	tavor_cqhdl_t		cq;
420	tavor_qphdl_t		qp;
421	tavor_srqhdl_t		srq;
422	minor_t			instance;
423	uint64_t		key;
424	uint_t			type;
425
426	TAVOR_TNF_ENTER(tavor_devmap_umem_map);
427
428	/* Get Tavor softstate structure from instance */
429	instance = TAVOR_DEV_INSTANCE(dev);
430	state = ddi_get_soft_state(tavor_statep, instance);
431	if (state == NULL) {
432		TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR,
433		    "");
434		TAVOR_TNF_EXIT(tavor_devmap_umem_map);
435		return (ENXIO);
436	}
437
438	/*
439	 * The bottom bits of "offset" are undefined (number depends on
440	 * system PAGESIZE).  Shifting these off leaves us with a "key".
441	 * The "key" is actually a combination of both a real key value
442	 * (for the purpose of database lookup) and a "type" value.  Although
443	 * we are not going to do any database lookup per se, we do want
444	 * to extract the "key" and the "type" (to enable faster lookup of
445	 * the appropriate CQ or QP handle).
446	 */
447	key  = off >> PAGESHIFT;
448	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
449	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
450
451	/*
452	 * Allocate an entry to track the mapping and unmapping (specifically,
453	 * partial unmapping) of this resource.
454	 */
455	dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
456	    sizeof (tavor_devmap_track_t), KM_SLEEP);
457	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
458	dvm_track->tdt_offset = off;
459	dvm_track->tdt_state  = state;
460	dvm_track->tdt_refcnt = 1;
461	mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
462	    DDI_INTR_PRI(state->ts_intrmsi_pri));
463
464	/*
465	 * Depending of the type of resource that has been mapped out, we
466	 * need to update the QP or CQ handle to reflect that it has, in
467	 * fact, been mapped.  This allows the driver code which frees a QP
468	 * or a CQ to know whether it is appropriate to do a
469	 * devmap_devmem_remap() to invalidate the userland mapping for the
470	 * corresponding queue's memory.
471	 */
472	if (type == MLNX_UMAP_CQMEM_RSRC) {
473
474		/* Use "key" (CQ number) to do fast lookup of CQ handle */
475		cq = tavor_cqhdl_from_cqnum(state, key);
476
477		/*
478		 * Update the handle to the userland mapping.  Note:  If
479		 * the CQ already has a valid userland mapping, then stop
480		 * and return failure.
481		 */
482		mutex_enter(&cq->cq_lock);
483		if (cq->cq_umap_dhp == NULL) {
484			cq->cq_umap_dhp = dhp;
485			dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
486			mutex_exit(&cq->cq_lock);
487		} else {
488			mutex_exit(&cq->cq_lock);
489			goto umem_map_fail;
490		}
491
492	} else if (type == MLNX_UMAP_QPMEM_RSRC) {
493
494		/* Use "key" (QP number) to do fast lookup of QP handle */
495		qp = tavor_qphdl_from_qpnum(state, key);
496
497		/*
498		 * Update the handle to the userland mapping.  Note:  If
499		 * the CQ already has a valid userland mapping, then stop
500		 * and return failure.
501		 */
502		mutex_enter(&qp->qp_lock);
503		if (qp->qp_umap_dhp == NULL) {
504			qp->qp_umap_dhp = dhp;
505			dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
506			mutex_exit(&qp->qp_lock);
507		} else {
508			mutex_exit(&qp->qp_lock);
509			goto umem_map_fail;
510		}
511
512	} else if (type == MLNX_UMAP_SRQMEM_RSRC) {
513
514		/* Use "key" (SRQ number) to do fast lookup on SRQ handle */
515		srq = tavor_srqhdl_from_srqnum(state, key);
516
517		/*
518		 * Update the handle to the userland mapping.  Note:  If the
519		 * SRQ already has a valid userland mapping, then stop and
520		 * return failure.
521		 */
522		mutex_enter(&srq->srq_lock);
523		if (srq->srq_umap_dhp == NULL) {
524			srq->srq_umap_dhp = dhp;
525			dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
526			mutex_exit(&srq->srq_lock);
527		} else {
528			mutex_exit(&srq->srq_lock);
529			goto umem_map_fail;
530		}
531	}
532
533	/*
534	 * Pass the private "Tavor devmap tracking structure" back.  This
535	 * pointer will be returned in subsequent "unmap" callbacks.
536	 */
537	*pvtp = dvm_track;
538
539	TAVOR_TNF_EXIT(tavor_devmap_umem_map);
540	return (DDI_SUCCESS);
541
542umem_map_fail:
543	mutex_destroy(&dvm_track->tdt_lock);
544	kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
545	TAVOR_TNF_EXIT(tavor_devmap_umem_map);
546	return (DDI_FAILURE);
547}
548
549
550/*
551 * tavor_devmap_umem_dup()
552 *    Context: Can be called from kernel context.
553 */
554/* ARGSUSED */
555static int
556tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
557    void **new_pvtp)
558{
559	tavor_state_t		*state;
560	tavor_devmap_track_t	*dvm_track, *new_dvm_track;
561	uint_t			maxprot;
562	int			status;
563
564	TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
565
566	/*
567	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
568	 * structure" (in "pvtp").
569	 */
570	dvm_track = (tavor_devmap_track_t *)pvtp;
571	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
572	state = dvm_track->tdt_state;
573
574	/*
575	 * Since this devmap_dup() entry point is generally called
576	 * when a process does fork(2), it is incumbent upon the driver
577	 * to insure that the child does not inherit a valid copy of
578	 * the parent's QP or CQ resource.  This is accomplished by using
579	 * devmap_devmem_remap() to invalidate the child's mapping to the
580	 * kernel memory.
581	 */
582	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
583	status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
584	    dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
585	if (status != DDI_SUCCESS) {
586		TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
587		TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
588		return (status);
589	}
590
591	/*
592	 * Allocate a new entry to track the subsequent unmapping
593	 * (specifically, all partial unmappings) of the child's newly
594	 * invalidated resource.  Note: Setting the "tdt_size" field to
595	 * zero here is an indication to the devmap_unmap() entry point
596	 * that this mapping is invalid, and that its subsequent unmapping
597	 * should not affect any of the parent's CQ or QP resources.
598	 */
599	new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
600	    sizeof (tavor_devmap_track_t), KM_SLEEP);
601	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
602	new_dvm_track->tdt_offset = 0;
603	new_dvm_track->tdt_state  = state;
604	new_dvm_track->tdt_refcnt = 1;
605	new_dvm_track->tdt_size	  = 0;
606	mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
607	    DDI_INTR_PRI(state->ts_intrmsi_pri));
608	*new_pvtp = new_dvm_track;
609
610	TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
611	return (DDI_SUCCESS);
612}
613
614
615/*
616 * tavor_devmap_umem_unmap()
617 *    Context: Can be called from kernel context.
618 */
619/* ARGSUSED */
620static void
621tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
622    size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
623    devmap_cookie_t new_dhp2, void **pvtp2)
624{
625	tavor_state_t 		*state;
626	tavor_rsrc_t 		*rsrcp;
627	tavor_devmap_track_t	*dvm_track;
628	tavor_cqhdl_t		cq;
629	tavor_qphdl_t		qp;
630	tavor_srqhdl_t		srq;
631	uint64_t		key, value;
632	uint_t			type;
633	uint_t			size;
634	int			status;
635
636	TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
637
638	/*
639	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
640	 * structure" (in "pvtp").
641	 */
642	dvm_track = (tavor_devmap_track_t *)pvtp;
643	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
644	state	  = dvm_track->tdt_state;
645
646	/*
647	 * Extract the "offset" from the "Tavor devmap tracking structure".
648	 * Note: The input argument "off" is ignored here because the
649	 * Tavor mapping interfaces define a very specific meaning to
650	 * each "logical offset".  Also extract the "key" and "type" encoded
651	 * in the logical offset.
652	 */
653	key  = dvm_track->tdt_offset >> PAGESHIFT;
654	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
655	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
656
657	/*
658	 * Extract the "size" of the mapping.  If this size is determined
659	 * to be zero, then it is an indication of a previously invalidated
660	 * mapping, and no CQ or QP resources should be affected.
661	 */
662	size = dvm_track->tdt_size;
663
664	/*
665	 * If only the "middle portion of a given mapping is being unmapped,
666	 * then we are effectively creating one new piece of mapped memory.
667	 * (Original region is divided into three pieces of which the middle
668	 * piece is being removed.  This leaves two pieces.  Since we started
669	 * with one piece and now have two pieces, we need to increment the
670	 * counter in the "Tavor devmap tracking structure".
671	 *
672	 * If, however, the whole mapped region is being unmapped, then we
673	 * have started with one region which we are completely removing.
674	 * In this case, we need to decrement the counter in the "Tavor
675	 * devmap tracking structure".
676	 *
677	 * In each of the remaining cases, we will have started with one
678	 * mapped region and ended with one (different) region.  So no counter
679	 * modification is necessary.
680	 */
681	mutex_enter(&dvm_track->tdt_lock);
682	if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
683		dvm_track->tdt_refcnt--;
684	} else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
685		dvm_track->tdt_refcnt++;
686	}
687	mutex_exit(&dvm_track->tdt_lock);
688
689	/*
690	 * For each of the cases where the region is being divided, then we
691	 * need to pass back the "Tavor devmap tracking structure".  This way
692	 * we get it back when each of the remaining pieces is subsequently
693	 * unmapped.
694	 */
695	if (new_dhp1 != NULL) {
696		*pvtp1 = pvtp;
697	}
698	if (new_dhp2 != NULL) {
699		*pvtp2 = pvtp;
700	}
701
702	/*
703	 * If the "Tavor devmap tracking structure" is no longer being
704	 * referenced, then free it up.  Otherwise, return.
705	 */
706	if (dvm_track->tdt_refcnt == 0) {
707		mutex_destroy(&dvm_track->tdt_lock);
708		kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
709
710		/*
711		 * If the mapping was invalid (see explanation above), then
712		 * no further processing is necessary.
713		 */
714		if (size == 0) {
715			TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
716			return;
717		}
718	} else {
719		TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
720		return;
721	}
722
723	/*
724	 * Now that we can guarantee that the user memory is fully unmapped,
725	 * we can use the "key" and "type" values to try to find the entry
726	 * in the "userland resources database".  If it's found, then it
727	 * indicates that the queue memory (CQ or QP) has not yet been freed.
728	 * In this case, we update the corresponding CQ or QP handle to
729	 * indicate that the "devmap_devmem_remap()" call will be unnecessary.
730	 * If it's _not_ found, then it indicates that the CQ or QP memory
731	 * was, in fact, freed before it was unmapped (thus requiring a
732	 * previous invalidation by remapping - which will already have
733	 * been done in the free routine).
734	 */
735	status = tavor_umap_db_find(state->ts_instance, key, type, &value,
736	    0, NULL);
737	if (status == DDI_SUCCESS) {
738		/*
739		 * Depending on the type of the mapped resource (CQ or QP),
740		 * update handle to indicate that no invalidation remapping
741		 * will be necessary.
742		 */
743		if (type == MLNX_UMAP_CQMEM_RSRC) {
744
745			/* Use "value" to convert to CQ handle */
746			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
747			cq = (tavor_cqhdl_t)rsrcp->tr_addr;
748
749			/*
750			 * Invalidate the handle to the userland mapping.
751			 * Note: We must ensure that the mapping being
752			 * unmapped here is the current one for the CQ.  It
753			 * is possible that it might not be if this CQ has
754			 * been resized and the previous CQ memory has not
755			 * yet been unmapped.  But in that case, because of
756			 * the devmap_devmem_remap(), there is no longer any
757			 * association between the mapping and the real CQ
758			 * kernel memory.
759			 */
760			mutex_enter(&cq->cq_lock);
761			if (cq->cq_umap_dhp == dhp) {
762				cq->cq_umap_dhp = (devmap_cookie_t)NULL;
763			}
764			mutex_exit(&cq->cq_lock);
765
766		} else if (type == MLNX_UMAP_QPMEM_RSRC) {
767
768			/* Use "value" to convert to QP handle */
769			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
770			qp = (tavor_qphdl_t)rsrcp->tr_addr;
771
772			/*
773			 * Invalidate the handle to the userland mapping.
774			 * Note: we ensure that the mapping being unmapped
775			 * here is the current one for the QP.  This is
776			 * more of a sanity check here since, unlike CQs
777			 * (above) we do not support resize of QPs.
778			 */
779			mutex_enter(&qp->qp_lock);
780			if (qp->qp_umap_dhp == dhp) {
781				qp->qp_umap_dhp = (devmap_cookie_t)NULL;
782			}
783			mutex_exit(&qp->qp_lock);
784
785		} else if (type == MLNX_UMAP_SRQMEM_RSRC) {
786
787			/* Use "value" to convert to SRQ handle */
788			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
789			srq = (tavor_srqhdl_t)rsrcp->tr_addr;
790
791			/*
792			 * Invalidate the handle to the userland mapping.
793			 * Note: we ensure that the mapping being unmapped
794			 * here is the current one for the QP.  This is
795			 * more of a sanity check here since, unlike CQs
796			 * (above) we do not support resize of QPs.
797			 */
798			mutex_enter(&srq->srq_lock);
799			if (srq->srq_umap_dhp == dhp) {
800				srq->srq_umap_dhp = (devmap_cookie_t)NULL;
801			}
802			mutex_exit(&srq->srq_lock);
803		}
804	}
805
806	TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
807}
808
809
810/*
811 * tavor_devmap_devmem_map()
812 *    Context: Can be called from kernel context.
813 */
814/* ARGSUSED */
815static int
816tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
817    offset_t off, size_t len, void **pvtp)
818{
819	tavor_state_t		*state;
820	tavor_devmap_track_t	*dvm_track;
821	minor_t			instance;
822
823	TAVOR_TNF_ENTER(tavor_devmap_devmem_map);
824
825	/* Get Tavor softstate structure from instance */
826	instance = TAVOR_DEV_INSTANCE(dev);
827	state = ddi_get_soft_state(tavor_statep, instance);
828	if (state == NULL) {
829		TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
830		    "");
831		TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
832		return (ENXIO);
833	}
834
835	/*
836	 * Allocate an entry to track the mapping and unmapping of this
837	 * resource.  Note:  We don't need to initialize the "refcnt" or
838	 * "offset" fields here, nor do we need to initialize the mutex
839	 * used with the "refcnt".  Since UAR pages are single pages, they
840	 * are not subject to "partial" unmappings.  This makes these other
841	 * fields unnecessary.
842	 */
843	dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
844	    sizeof (tavor_devmap_track_t), KM_SLEEP);
845	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
846	dvm_track->tdt_state  = state;
847	dvm_track->tdt_size   = PAGESIZE;
848
849	/*
850	 * Pass the private "Tavor devmap tracking structure" back.  This
851	 * pointer will be returned in a subsequent "unmap" callback.
852	 */
853	*pvtp = dvm_track;
854
855	TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
856	return (DDI_SUCCESS);
857}
858
859
860/*
861 * tavor_devmap_devmem_dup()
862 *    Context: Can be called from kernel context.
863 */
864/* ARGSUSED */
865static int
866tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
867    devmap_cookie_t new_dhp, void **new_pvtp)
868{
869	tavor_state_t		*state;
870	tavor_devmap_track_t	*dvm_track;
871	uint_t			maxprot;
872	int			status;
873
874	TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
875
876	/*
877	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
878	 * structure" (in "pvtp").  Note: If the tracking structure is NULL
879	 * here, it means that the mapping corresponds to an invalid mapping.
880	 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
881	 */
882	dvm_track = (tavor_devmap_track_t *)pvtp;
883	if (dvm_track == NULL) {
884		*new_pvtp = NULL;
885		TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
886		return (DDI_SUCCESS);
887	}
888
889	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
890	state = dvm_track->tdt_state;
891
892	/*
893	 * Since this devmap_dup() entry point is generally called
894	 * when a process does fork(2), it is incumbent upon the driver
895	 * to insure that the child does not inherit a valid copy of
896	 * the parent's resource.  This is accomplished by using
897	 * devmap_devmem_remap() to invalidate the child's mapping to the
898	 * kernel memory.
899	 */
900	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
901	status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
902	    dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
903	if (status != DDI_SUCCESS) {
904		TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
905		TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
906		return (status);
907	}
908
909	/*
910	 * Since the region is invalid, there is no need for us to
911	 * allocate and continue to track an additional "Tavor devmap
912	 * tracking structure".  Instead we return NULL here, which is an
913	 * indication to the devmap_unmap() entry point that this entry
914	 * can be safely ignored.
915	 */
916	*new_pvtp = NULL;
917
918	TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
919	return (DDI_SUCCESS);
920}
921
922
923/*
924 * tavor_devmap_devmem_unmap()
925 *    Context: Can be called from kernel context.
926 */
927/* ARGSUSED */
928static void
929tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
930    size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
931    devmap_cookie_t new_dhp2, void **pvtp2)
932{
933	tavor_devmap_track_t	*dvm_track;
934
935	TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
936
937	/*
938	 * Free up the "Tavor devmap tracking structure" (in "pvtp").
939	 * There cannot be "partial" unmappings here because all UAR pages
940	 * are single pages.  Note: If the tracking structure is NULL here,
941	 * it means that the mapping corresponds to an invalid mapping.  In
942	 * this case, it can be safely ignored.
943	 */
944	dvm_track = (tavor_devmap_track_t *)pvtp;
945	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
946	if (dvm_track == NULL) {
947		TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
948		return;
949	}
950
951	kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
952	TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
953}
954
955
956/*
957 * tavor_umap_ci_data_in()
958 *    Context: Can be called from user or kernel context.
959 */
960/* ARGSUSED */
961ibt_status_t
962tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
963    ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
964{
965	int	status;
966
967	TAVOR_TNF_ENTER(tavor_umap_ci_data_in);
968
969	/*
970	 * Depending on the type of object about which additional information
971	 * is being provided (currently only MR is supported), we call the
972	 * appropriate resource-specific function.
973	 */
974	switch (object) {
975	case IBT_HDL_MR:
976		status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
977		    (ibt_mr_data_in_t *)data_p, data_sz);
978		if (status != DDI_SUCCESS) {
979			TNF_PROBE_0(tavor_umap_mr_data_in_fail,
980			    TAVOR_TNF_ERROR, "");
981			TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
982			return (status);
983		}
984		break;
985
986	/*
987	 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
988	 * since the Tavor driver does not support these.
989	 */
990	case IBT_HDL_HCA:
991	case IBT_HDL_QP:
992	case IBT_HDL_CQ:
993	case IBT_HDL_PD:
994	case IBT_HDL_MW:
995	case IBT_HDL_AH:
996	case IBT_HDL_SCHED:
997	case IBT_HDL_EEC:
998	case IBT_HDL_RDD:
999	case IBT_HDL_SRQ:
1000		TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type,
1001		    TAVOR_TNF_ERROR, "");
1002		TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1003		return (IBT_NOT_SUPPORTED);
1004
1005	/*
1006	 * Any other types are invalid.
1007	 */
1008	default:
1009		TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail,
1010		    TAVOR_TNF_ERROR, "");
1011		TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1012		return (IBT_INVALID_PARAM);
1013	}
1014
1015	TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1016	return (DDI_SUCCESS);
1017}
1018
1019
1020/*
1021 * tavor_umap_mr_data_in()
1022 *    Context: Can be called from user or kernel context.
1023 */
1024static ibt_status_t
1025tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
1026    size_t data_sz)
1027{
1028	TAVOR_TNF_ENTER(tavor_umap_mr_data_in);
1029
1030	if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
1031		TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail,
1032		    TAVOR_TNF_ERROR, "");
1033		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1034		return (IBT_NOT_SUPPORTED);
1035	}
1036
1037	/* Check for valid MR handle pointer */
1038	if (mr == NULL) {
1039		TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1040		    TAVOR_TNF_ERROR, "");
1041		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1042		return (IBT_MR_HDL_INVALID);
1043	}
1044
1045	/* Check for valid MR input structure size */
1046	if (data_sz < sizeof (ibt_mr_data_in_t)) {
1047		TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1048		    TAVOR_TNF_ERROR, "");
1049		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1050		return (IBT_INSUFF_RESOURCE);
1051	}
1052	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1053
1054	/*
1055	 * Ensure that the MR corresponds to userland memory and that it is
1056	 * a currently valid memory region as well.
1057	 */
1058	mutex_enter(&mr->mr_lock);
1059	if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1060		mutex_exit(&mr->mr_lock);
1061		TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1062		    TAVOR_TNF_ERROR, "");
1063		TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1064		return (IBT_MR_HDL_INVALID);
1065	}
1066
1067	/*
1068	 * If it has passed all the above checks, then extract the callback
1069	 * function and argument from the input structure.  Copy them into
1070	 * the MR handle.  This function will be called only if the memory
1071	 * corresponding to the MR handle gets a umem_lockmemory() callback.
1072	 */
1073	mr->mr_umem_cbfunc = data->mr_func;
1074	mr->mr_umem_cbarg1 = data->mr_arg1;
1075	mr->mr_umem_cbarg2 = data->mr_arg2;
1076	mutex_exit(&mr->mr_lock);
1077
1078	TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1079	return (DDI_SUCCESS);
1080}
1081
1082
1083/*
1084 * tavor_umap_ci_data_out()
1085 *    Context: Can be called from user or kernel context.
1086 */
1087/* ARGSUSED */
1088ibt_status_t
1089tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
1090    ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
1091{
1092	int	status;
1093
1094	TAVOR_TNF_ENTER(tavor_umap_ci_data_out);
1095
1096	/*
1097	 * Depending on the type of object about which additional information
1098	 * is being requested (CQ or QP), we call the appropriate resource-
1099	 * specific mapping function.
1100	 */
1101	switch (object) {
1102	case IBT_HDL_CQ:
1103		status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
1104		    (mlnx_umap_cq_data_out_t *)data_p, data_sz);
1105		if (status != DDI_SUCCESS) {
1106			TNF_PROBE_0(tavor_umap_cq_data_out_fail,
1107			    TAVOR_TNF_ERROR, "");
1108			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1109			return (status);
1110		}
1111		break;
1112
1113	case IBT_HDL_QP:
1114		status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1115		    (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1116		if (status != DDI_SUCCESS) {
1117			TNF_PROBE_0(tavor_umap_qp_data_out_fail,
1118			    TAVOR_TNF_ERROR, "");
1119			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1120			return (status);
1121		}
1122		break;
1123
1124	case IBT_HDL_SRQ:
1125		status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1126		    (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1127		if (status != DDI_SUCCESS) {
1128			TNF_PROBE_0(tavor_umap_srq_data_out_fail,
1129			    TAVOR_TNF_ERROR, "");
1130			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1131			return (status);
1132		}
1133		break;
1134
1135	/*
1136	 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1137	 * since the Tavor driver does not support these.
1138	 */
1139	case IBT_HDL_PD:
1140		status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1141		    (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1142		if (status != DDI_SUCCESS) {
1143			TNF_PROBE_0(tavor_umap_pd_data_out_fail,
1144			    TAVOR_TNF_ERROR, "");
1145			TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1146			return (status);
1147		}
1148		break;
1149
1150	case IBT_HDL_HCA:
1151	case IBT_HDL_MR:
1152	case IBT_HDL_MW:
1153	case IBT_HDL_AH:
1154	case IBT_HDL_SCHED:
1155	case IBT_HDL_EEC:
1156	case IBT_HDL_RDD:
1157		TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type,
1158		    TAVOR_TNF_ERROR, "");
1159		TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1160		return (IBT_NOT_SUPPORTED);
1161
1162	/*
1163	 * Any other types are invalid.
1164	 */
1165	default:
1166		TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail,
1167		    TAVOR_TNF_ERROR, "");
1168		TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1169		return (IBT_INVALID_PARAM);
1170	}
1171
1172	TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1173	return (DDI_SUCCESS);
1174}
1175
1176
1177/*
1178 * tavor_umap_cq_data_out()
1179 *    Context: Can be called from user or kernel context.
1180 */
1181static ibt_status_t
1182tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1183    size_t data_sz)
1184{
1185	TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1186
1187	/* Check for valid CQ handle pointer */
1188	if (cq == NULL) {
1189		TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1190		    TAVOR_TNF_ERROR, "");
1191		TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192		return (IBT_CQ_HDL_INVALID);
1193	}
1194
1195	/* Check for valid CQ mapping structure size */
1196	if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1197		TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1198		    TAVOR_TNF_ERROR, "");
1199		TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1200		return (IBT_INSUFF_RESOURCE);
1201	}
1202	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1203
1204	/*
1205	 * If it has passed all the above checks, then fill in all the useful
1206	 * mapping information (including the mapping offset that will be
1207	 * passed back to the devmap() interface during a subsequent mmap()
1208	 * call.
1209	 *
1210	 * The "offset" for CQ mmap()'s looks like this:
1211	 * +----------------------------------------+--------+--------------+
1212	 * |		   CQ Number		    |  0x33  | Reserved (0) |
1213	 * +----------------------------------------+--------+--------------+
1214	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1215	 *
1216	 * This returns information about the mapping offset, the length of
1217	 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1218	 * number of CQEs the CQ memory can hold, and the size of each CQE.
1219	 */
1220	data->mcq_rev		= MLNX_UMAP_IF_VERSION;
1221	data->mcq_mapoffset	= ((((uint64_t)cq->cq_cqnum <<
1222	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1223	data->mcq_maplen	= cq->cq_cqinfo.qa_size;
1224	data->mcq_cqnum		= cq->cq_cqnum;
1225	data->mcq_numcqe	= cq->cq_bufsz;
1226	data->mcq_cqesz		= sizeof (tavor_hw_cqe_t);
1227
1228	TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1229	return (DDI_SUCCESS);
1230}
1231
1232
1233/*
1234 * tavor_umap_qp_data_out()
1235 *    Context: Can be called from user or kernel context.
1236 */
1237static ibt_status_t
1238tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1239    size_t data_sz)
1240{
1241	TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1242
1243	/* Check for valid QP handle pointer */
1244	if (qp == NULL) {
1245		TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1246		    TAVOR_TNF_ERROR, "");
1247		TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1248		return (IBT_QP_HDL_INVALID);
1249	}
1250
1251	/* Check for valid QP mapping structure size */
1252	if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1253		TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1254		    TAVOR_TNF_ERROR, "");
1255		TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1256		return (IBT_INSUFF_RESOURCE);
1257	}
1258	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1259
1260	/*
1261	 * If it has passed all the checks, then fill in all the useful
1262	 * mapping information (including the mapping offset that will be
1263	 * passed back to the devmap() interface during a subsequent mmap()
1264	 * call.
1265	 *
1266	 * The "offset" for QP mmap()'s looks like this:
1267	 * +----------------------------------------+--------+--------------+
1268	 * |		   QP Number		    |  0x44  | Reserved (0) |
1269	 * +----------------------------------------+--------+--------------+
1270	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1271	 *
1272	 * This returns information about the mapping offset, the length of
1273	 * the QP memory, and the QP number (for use in later send and recv
1274	 * doorbells).  It also returns the following information for both
1275	 * the receive work queue and the send work queue, respectively:  the
1276	 * offset (from the base mapped address) of the start of the given
1277	 * work queue, the 64-bit IB virtual address that corresponds to
1278	 * the base mapped address (needed for posting WQEs though the
1279	 * QP doorbells), the number of WQEs the given work queue can hold,
1280	 * and the size of each WQE for the given work queue.
1281	 */
1282	data->mqp_rev		= MLNX_UMAP_IF_VERSION;
1283	data->mqp_mapoffset	= ((((uint64_t)qp->qp_qpnum <<
1284	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1285	data->mqp_maplen	= qp->qp_wqinfo.qa_size;
1286	data->mqp_qpnum		= qp->qp_qpnum;
1287
1288	/*
1289	 * If this QP is associated with a shared receive queue (SRQ),
1290	 * then return invalid RecvQ parameters.  Otherwise, return
1291	 * the proper parameter values.
1292	 */
1293	if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1294		data->mqp_rq_off	= (uint32_t)qp->qp_wqinfo.qa_size;
1295		data->mqp_rq_desc_addr	= (uint32_t)qp->qp_wqinfo.qa_size;
1296		data->mqp_rq_numwqe	= 0;
1297		data->mqp_rq_wqesz	= 0;
1298	} else {
1299		data->mqp_rq_off	= (uintptr_t)qp->qp_rq_buf -
1300		    (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1301		data->mqp_rq_desc_addr	= (uint32_t)((uintptr_t)qp->qp_rq_buf -
1302		    qp->qp_desc_off);
1303		data->mqp_rq_numwqe	= qp->qp_rq_bufsz;
1304		data->mqp_rq_wqesz	= (1 << qp->qp_rq_log_wqesz);
1305	}
1306	data->mqp_sq_off	= (uintptr_t)qp->qp_sq_buf -
1307	    (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1308	data->mqp_sq_desc_addr	= (uint32_t)((uintptr_t)qp->qp_sq_buf -
1309	    qp->qp_desc_off);
1310	data->mqp_sq_numwqe	= qp->qp_sq_bufsz;
1311	data->mqp_sq_wqesz	= (1 << qp->qp_sq_log_wqesz);
1312
1313	TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1314	return (DDI_SUCCESS);
1315}
1316
1317
1318/*
1319 * tavor_umap_srq_data_out()
1320 *    Context: Can be called from user or kernel context.
1321 */
1322static ibt_status_t
1323tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1324    size_t data_sz)
1325{
1326	TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1327
1328	/* Check for valid SRQ handle pointer */
1329	if (srq == NULL) {
1330		TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1331		    TAVOR_TNF_ERROR, "");
1332		TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1333		return (IBT_SRQ_HDL_INVALID);
1334	}
1335
1336	/* Check for valid SRQ mapping structure size */
1337	if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1338		TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1339		    TAVOR_TNF_ERROR, "");
1340		TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1341		return (IBT_INSUFF_RESOURCE);
1342	}
1343	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1344
1345	/*
1346	 * If it has passed all the checks, then fill in all the useful
1347	 * mapping information (including the mapping offset that will be
1348	 * passed back to the devmap() interface during a subsequent mmap()
1349	 * call.
1350	 *
1351	 * The "offset" for SRQ mmap()'s looks like this:
1352	 * +----------------------------------------+--------+--------------+
1353	 * |		   SRQ Number		    |  0x66  | Reserved (0) |
1354	 * +----------------------------------------+--------+--------------+
1355	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1356	 *
1357	 * This returns information about the mapping offset, the length of the
1358	 * SRQ memory, and the SRQ number (for use in later send and recv
1359	 * doorbells).  It also returns the following information for the
1360	 * shared receive queue: the offset (from the base mapped address) of
1361	 * the start of the given work queue, the 64-bit IB virtual address
1362	 * that corresponds to the base mapped address (needed for posting WQEs
1363	 * though the QP doorbells), the number of WQEs the given work queue
1364	 * can hold, and the size of each WQE for the given work queue.
1365	 */
1366	data->msrq_rev		= MLNX_UMAP_IF_VERSION;
1367	data->msrq_mapoffset	= ((((uint64_t)srq->srq_srqnum <<
1368	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1369	data->msrq_maplen	= srq->srq_wqinfo.qa_size;
1370	data->msrq_srqnum	= srq->srq_srqnum;
1371
1372	data->msrq_desc_addr	= (uint32_t)((uintptr_t)srq->srq_wq_buf -
1373	    srq->srq_desc_off);
1374	data->msrq_numwqe	= srq->srq_wq_bufsz;
1375	data->msrq_wqesz	= (1 << srq->srq_wq_log_wqesz);
1376
1377	TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1378	return (DDI_SUCCESS);
1379}
1380
1381/*
1382 * tavor_umap_pd_data_out()
1383 *    Context: Can be called from user or kernel context.
1384 */
1385static ibt_status_t
1386tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1387    size_t data_sz)
1388{
1389	TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1390
1391	/* Check for valid PD handle pointer */
1392	if (pd == NULL) {
1393		TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1394		    TAVOR_TNF_ERROR, "");
1395		TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1396		return (IBT_PD_HDL_INVALID);
1397	}
1398
1399	/* Check for valid PD mapping structure size */
1400	if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1401		TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1402		    TAVOR_TNF_ERROR, "");
1403		TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404		return (IBT_INSUFF_RESOURCE);
1405	}
1406	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1407
1408	/*
1409	 * If it has passed all the checks, then fill the PD table index
1410	 * (the PD table allocated index for the PD pd_pdnum)
1411	 */
1412	data->mpd_rev	= MLNX_UMAP_IF_VERSION;
1413	data->mpd_pdnum	= pd->pd_pdnum;
1414
1415	TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1416	return (DDI_SUCCESS);
1417}
1418
1419/*
1420 * tavor_umap_db_init()
1421 *    Context: Only called from attach() path context
1422 */
1423void
1424tavor_umap_db_init(void)
1425{
1426	TAVOR_TNF_ENTER(tavor_umap_db_init);
1427
1428	/*
1429	 * Initialize the lock used by the Tavor "userland resources database"
1430	 * This is used to ensure atomic access to add, remove, and find
1431	 * entries in the database.
1432	 */
1433	mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1434	    MUTEX_DRIVER, NULL);
1435
1436	/*
1437	 * Initialize the AVL tree used for the "userland resources
1438	 * database".  Using an AVL tree here provides the ability to
1439	 * scale the database size to large numbers of resources.  The
1440	 * entries in the tree are "tavor_umap_db_entry_t".
1441	 * The tree is searched with the help of the
1442	 * tavor_umap_db_compare() routine.
1443	 */
1444	avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1445	    tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1446	    offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1447
1448	TAVOR_TNF_EXIT(tavor_umap_db_init);
1449}
1450
1451
1452/*
1453 * tavor_umap_db_fini()
1454 *    Context: Only called from attach() and/or detach() path contexts
1455 */
1456void
1457tavor_umap_db_fini(void)
1458{
1459	TAVOR_TNF_ENTER(tavor_umap_db_fini);
1460
1461	/* Destroy the AVL tree for the "userland resources database" */
1462	avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1463
1464	/* Destroy the lock for the "userland resources database" */
1465	mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1466
1467	TAVOR_TNF_EXIT(tavor_umap_db_fini);
1468}
1469
1470
1471/*
1472 * tavor_umap_db_alloc()
1473 *    Context: Can be called from user or kernel context.
1474 */
1475tavor_umap_db_entry_t *
1476tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1477{
1478	tavor_umap_db_entry_t	*umapdb;
1479
1480	TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1481
1482	/* Allocate an entry to add to the "userland resources database" */
1483	umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1484	if (umapdb == NULL) {
1485		TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1486		TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1487		return (NULL);
1488	}
1489	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1490
1491	/* Fill in the fields in the database entry */
1492	umapdb->tdbe_common.tdb_instance  = instance;
1493	umapdb->tdbe_common.tdb_type	  = type;
1494	umapdb->tdbe_common.tdb_key	  = key;
1495	umapdb->tdbe_common.tdb_value	  = value;
1496
1497	TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1498	return (umapdb);
1499}
1500
1501
1502/*
1503 * tavor_umap_db_free()
1504 *    Context: Can be called from user or kernel context.
1505 */
1506void
1507tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1508{
1509	TAVOR_TNF_ENTER(tavor_umap_db_free);
1510
1511	/* Free the database entry */
1512	kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1513
1514	TAVOR_TNF_EXIT(tavor_umap_db_free);
1515}
1516
1517
1518/*
1519 * tavor_umap_db_add()
1520 *    Context: Can be called from user or kernel context.
1521 */
1522void
1523tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1524{
1525	TAVOR_TNF_ENTER(tavor_umap_db_add);
1526
1527	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1528	tavor_umap_db_add_nolock(umapdb);
1529	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1530
1531	TAVOR_TNF_EXIT(tavor_umap_db_add);
1532}
1533
1534
1535/*
1536 * tavor_umap_db_add_nolock()
1537 *    Context: Can be called from user or kernel context.
1538 */
1539void
1540tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1541{
1542	tavor_umap_db_query_t	query;
1543	avl_index_t		where;
1544
1545	TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1546
1547	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1548
1549	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1550
1551	/*
1552	 * Copy the common portion of the "to-be-added" database entry
1553	 * into the "tavor_umap_db_query_t" structure.  We use this structure
1554	 * (with no flags set) to find the appropriate location in the
1555	 * "userland resources database" for the new entry to be added.
1556	 *
1557	 * Note: we expect that this entry should not be found in the
1558	 * database (unless something bad has happened).
1559	 */
1560	query.tqdb_common = umapdb->tdbe_common;
1561	query.tqdb_flags  = 0;
1562	(void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1563	    &where);
1564
1565	/*
1566	 * Now, using the "where" field from the avl_find() operation
1567	 * above, we will insert the new database entry ("umapdb").
1568	 */
1569	avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1570	    where);
1571
1572	TAVOR_TNF_EXIT(tavor_umap_db_add_nolock);
1573}
1574
1575
1576/*
1577 * tavor_umap_db_find()
1578 *    Context: Can be called from user or kernel context.
1579 */
1580int
1581tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1582    uint64_t *value, uint_t flag, tavor_umap_db_entry_t	**umapdb)
1583{
1584	int	status;
1585
1586	TAVOR_TNF_ENTER(tavor_umap_db_find);
1587
1588	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1589	status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1590	    umapdb);
1591	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1592
1593	TAVOR_TNF_EXIT(tavor_umap_db_find);
1594	return (status);
1595}
1596
1597
1598/*
1599 * tavor_umap_db_find_nolock()
1600 *    Context: Can be called from user or kernel context.
1601 */
1602int
1603tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1604    uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1605{
1606	tavor_umap_db_query_t	query;
1607	tavor_umap_db_entry_t	*entry;
1608	avl_index_t		where;
1609
1610	TAVOR_TNF_ENTER(tavor_umap_db_find_nolock);
1611
1612	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1613
1614	/*
1615	 * Fill in key, type, instance, and flags values of the
1616	 * tavor_umap_db_query_t in preparation for the database
1617	 * lookup.
1618	 */
1619	query.tqdb_flags		= flags;
1620	query.tqdb_common.tdb_key	= key;
1621	query.tqdb_common.tdb_type	= type;
1622	query.tqdb_common.tdb_instance	= instance;
1623
1624	/*
1625	 * Perform the database query.  If no entry is found, then
1626	 * return failure, else continue.
1627	 */
1628	entry = (tavor_umap_db_entry_t *)avl_find(
1629	    &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1630	if (entry == NULL) {
1631		TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1632		return (DDI_FAILURE);
1633	}
1634	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1635
1636	/*
1637	 * If the flags argument specifies that the entry should
1638	 * be removed if found, then call avl_remove() to remove
1639	 * the entry from the database.
1640	 */
1641	if (flags & TAVOR_UMAP_DB_REMOVE) {
1642
1643		avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1644
1645		/*
1646		 * The database entry is returned with the expectation
1647		 * that the caller will use tavor_umap_db_free() to
1648		 * free the entry's memory.  ASSERT that this is non-NULL.
1649		 * NULL pointer should never be passed for the
1650		 * TAVOR_UMAP_DB_REMOVE case.
1651		 */
1652		ASSERT(umapdb != NULL);
1653	}
1654
1655	/*
1656	 * If the caller would like visibility to the database entry
1657	 * (indicated through the use of a non-NULL "umapdb" argument),
1658	 * then fill it in.
1659	 */
1660	if (umapdb != NULL) {
1661		*umapdb = entry;
1662	}
1663
1664	/* Extract value field from database entry and return success */
1665	*value = entry->tdbe_common.tdb_value;
1666
1667	TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1668	return (DDI_SUCCESS);
1669}
1670
1671
1672/*
1673 * tavor_umap_umemlock_cb()
1674 *    Context: Can be called from callback context.
1675 */
1676void
1677tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1678{
1679	tavor_umap_db_entry_t	*umapdb;
1680	tavor_state_t		*state;
1681	tavor_rsrc_t 		*rsrcp;
1682	tavor_mrhdl_t		mr;
1683	uint64_t		value;
1684	uint_t			instance;
1685	int			status;
1686	void			(*mr_callback)(void *, void *);
1687	void			*mr_cbarg1, *mr_cbarg2;
1688
1689	TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1690
1691	/*
1692	 * If this was userland memory, then we need to remove its entry
1693	 * from the "userland resources database".  Note:  We use the
1694	 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1695	 * which instance was used when the entry was added (but we want
1696	 * to know after the entry is found using the other search criteria).
1697	 */
1698	status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1699	    MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1700	    TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1701	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1702	if (status == DDI_SUCCESS) {
1703		instance = umapdb->tdbe_common.tdb_instance;
1704		state = ddi_get_soft_state(tavor_statep, instance);
1705		if (state == NULL) {
1706			cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1707			TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1708			    TAVOR_TNF_ERROR, "");
1709			TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1710			return;
1711		}
1712
1713		/* Free the database entry */
1714		tavor_umap_db_free(umapdb);
1715
1716		/* Use "value" to convert to an MR handle */
1717		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1718		mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1719
1720		/*
1721		 * If a callback has been provided, call it first.  This
1722		 * callback is expected to do any cleanup necessary to
1723		 * guarantee that the subsequent MR deregister (below)
1724		 * will succeed.  Specifically, this means freeing up memory
1725		 * windows which might have been associated with the MR.
1726		 */
1727		mutex_enter(&mr->mr_lock);
1728		mr_callback = mr->mr_umem_cbfunc;
1729		mr_cbarg1   = mr->mr_umem_cbarg1;
1730		mr_cbarg2   = mr->mr_umem_cbarg2;
1731		mutex_exit(&mr->mr_lock);
1732		if (mr_callback != NULL) {
1733			mr_callback(mr_cbarg1, mr_cbarg2);
1734		}
1735
1736		/*
1737		 * Then call tavor_mr_deregister() to release the resources
1738		 * associated with the MR handle.  Note: Because this routine
1739		 * will also check for whether the ddi_umem_cookie_t is in the
1740		 * database, it will take responsibility for disabling the
1741		 * memory region and calling ddi_umem_unlock().
1742		 */
1743		status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1744		    TAVOR_SLEEP);
1745		if (status != DDI_SUCCESS) {
1746			TAVOR_WARNING(state, "Unexpected failure in "
1747			    "deregister from callback\n");
1748			TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail,
1749			    TAVOR_TNF_ERROR, "");
1750			TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1751		}
1752	}
1753
1754	TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1755}
1756
1757
1758/*
1759 * tavor_umap_db_compare()
1760 *    Context: Can be called from user or kernel context.
1761 */
1762static int
1763tavor_umap_db_compare(const void *q, const void *e)
1764{
1765	tavor_umap_db_common_t	*entry_common, *query_common;
1766	uint_t			query_flags;
1767
1768	TAVOR_TNF_ENTER(tavor_umap_db_compare);
1769
1770	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1771
1772	entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1773	query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1774	query_flags  = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1775
1776	/*
1777	 * The first comparison is done on the "key" value in "query"
1778	 * and "entry".  If they are not equal, then the appropriate
1779	 * search direction is returned.  Else, we continue by
1780	 * comparing "type".
1781	 */
1782	if (query_common->tdb_key < entry_common->tdb_key) {
1783		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1784		return (-1);
1785	} else if (query_common->tdb_key > entry_common->tdb_key) {
1786		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1787		return (+1);
1788	}
1789
1790	/*
1791	 * If the search reaches this point, then "query" and "entry"
1792	 * have equal key values.  So we continue be comparing their
1793	 * "type" values.  Again, if they are not equal, then the
1794	 * appropriate search direction is returned.  Else, we continue
1795	 * by comparing "instance".
1796	 */
1797	if (query_common->tdb_type < entry_common->tdb_type) {
1798		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1799		return (-1);
1800	} else if (query_common->tdb_type > entry_common->tdb_type) {
1801		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1802		return (+1);
1803	}
1804
1805	/*
1806	 * If the search reaches this point, then "query" and "entry"
1807	 * have exactly the same key and type values.  Now we consult
1808	 * the "flags" field in the query to determine whether the
1809	 * "instance" is relevant to the search.  If the
1810	 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1811	 * success (0) here.  Otherwise, continue the search by comparing
1812	 * instance values and returning the appropriate search direction.
1813	 */
1814	if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1815		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1816		return (0);
1817	}
1818
1819	/*
1820	 * If the search has reached this point, then "query" and "entry"
1821	 * can only be differentiated by their instance values.  If these
1822	 * are not equal, then return the appropriate search direction.
1823	 * Else, we return success (0).
1824	 */
1825	if (query_common->tdb_instance < entry_common->tdb_instance) {
1826		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1827		return (-1);
1828	} else if (query_common->tdb_instance > entry_common->tdb_instance) {
1829		TAVOR_TNF_EXIT(tavor_umap_db_compare);
1830		return (+1);
1831	}
1832
1833	/* Everything matches... so return success */
1834	TAVOR_TNF_EXIT(tavor_umap_db_compare);
1835	return (0);
1836}
1837
1838
1839/*
1840 * tavor_umap_db_set_onclose_cb()
1841 *    Context: Can be called from user or kernel context.
1842 */
1843int
1844tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1845    void (*callback)(void *), void *arg)
1846{
1847	tavor_umap_db_priv_t	*priv;
1848	tavor_umap_db_entry_t	*umapdb;
1849	minor_t			instance;
1850	uint64_t		value;
1851	int			status;
1852
1853	TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1854
1855	instance = TAVOR_DEV_INSTANCE(dev);
1856	if (instance == -1) {
1857		TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail,
1858		    TAVOR_TNF_ERROR, "");
1859		TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1860		return (DDI_FAILURE);
1861	}
1862
1863	if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1864		TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail,
1865		    TAVOR_TNF_ERROR, "");
1866		TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1867		return (DDI_FAILURE);
1868	}
1869
1870	/*
1871	 * Grab the lock for the "userland resources database" and find
1872	 * the entry corresponding to this minor number.  Once it's found,
1873	 * allocate (if necessary) and add an entry (in the "tdb_priv"
1874	 * field) to indicate that further processing may be needed during
1875	 * Tavor's close() handling.
1876	 */
1877	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1878	status = tavor_umap_db_find_nolock(instance, dev,
1879	    MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1880	if (status != DDI_SUCCESS) {
1881		TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail,
1882		    TAVOR_TNF_ERROR, "");
1883		mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1884		TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1885		return (DDI_FAILURE);
1886	}
1887
1888	priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1889	if (priv == NULL) {
1890		priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1891		    sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1892		if (priv == NULL) {
1893			TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail,
1894			    TAVOR_TNF_ERROR, "");
1895			mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1896			TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1897			return (DDI_FAILURE);
1898		}
1899	}
1900
1901	/*
1902	 * Save away the callback and argument to be used during Tavor's
1903	 * close() processing.
1904	 */
1905	priv->tdp_cb	= callback;
1906	priv->tdp_arg	= arg;
1907
1908	umapdb->tdbe_common.tdb_priv = (void *)priv;
1909	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1910
1911	TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1912	return (DDI_SUCCESS);
1913}
1914
1915
1916/*
1917 * tavor_umap_db_clear_onclose_cb()
1918 *    Context: Can be called from user or kernel context.
1919 */
1920int
1921tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1922{
1923	tavor_umap_db_priv_t	*priv;
1924	tavor_umap_db_entry_t	*umapdb;
1925	minor_t			instance;
1926	uint64_t		value;
1927	int			status;
1928
1929	TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1930
1931	instance = TAVOR_DEV_INSTANCE(dev);
1932	if (instance == -1) {
1933		TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail,
1934		    TAVOR_TNF_ERROR, "");
1935		TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1936		return (DDI_FAILURE);
1937	}
1938
1939	if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1940		TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail,
1941		    TAVOR_TNF_ERROR, "");
1942		TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1943		return (DDI_FAILURE);
1944	}
1945
1946	/*
1947	 * Grab the lock for the "userland resources database" and find
1948	 * the entry corresponding to this minor number.  Once it's found,
1949	 * remove the entry (in the "tdb_priv" field) that indicated the
1950	 * need for further processing during Tavor's close().  Free the
1951	 * entry, if appropriate.
1952	 */
1953	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1954	status = tavor_umap_db_find_nolock(instance, dev,
1955	    MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1956	if (status != DDI_SUCCESS) {
1957		TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail,
1958		    TAVOR_TNF_ERROR, "");
1959		mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1960		TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1961		return (DDI_FAILURE);
1962	}
1963
1964	priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1965	if (priv != NULL) {
1966		kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1967		priv = NULL;
1968	}
1969
1970	umapdb->tdbe_common.tdb_priv = (void *)priv;
1971	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1972	return (DDI_SUCCESS);
1973}
1974
1975
1976/*
1977 * tavor_umap_db_clear_onclose_cb()
1978 *    Context: Can be called from user or kernel context.
1979 */
1980void
1981tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1982{
1983	void	(*callback)(void *);
1984
1985	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1986
1987	/*
1988	 * Call the callback.
1989	 *    Note: Currently there is only one callback (in "tdp_cb"), but
1990	 *    in the future there may be more, depending on what other types
1991	 *    of interaction there are between userland processes and the
1992	 *    driver.
1993	 */
1994	callback = priv->tdp_cb;
1995	callback(priv->tdp_arg);
1996}
1997