hermon_mr.c revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * hermon_mr.c
29 *    Hermon Memory Region/Window Routines
30 *
31 *    Implements all the routines necessary to provide the requisite memory
32 *    registration verbs.  These include operations like RegisterMemRegion(),
33 *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
34 *    etc., that affect Memory Regions.  It also includes the verbs that
35 *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
36 *    and QueryMemWindow().
37 */
38
39#include <sys/types.h>
40#include <sys/conf.h>
41#include <sys/ddi.h>
42#include <sys/sunddi.h>
43#include <sys/modctl.h>
44#include <sys/esunddi.h>
45
46#include <sys/ib/adapters/hermon/hermon.h>
47
48extern uint32_t hermon_kernel_data_ro;
49extern uint32_t hermon_user_data_ro;
50
51/*
52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
53 * of Hermon memory keys (LKeys and RKeys)
54 */
55static	uint_t hermon_memkey_cnt = 0x00;
56#define	HERMON_MEMKEY_SHIFT	 24
57#define	HERMON_MPT_SW_OWNERSHIP	 0xF
58
59static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
60    hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
61    hermon_mpt_rsrc_type_t mpt_type);
62static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
63    hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
64    hermon_mr_options_t *op);
65static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
66    hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
67    uint_t sleep, uint_t *dereg_level);
68static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
69    hermon_bind_info_t *bind, uint_t *mtt_pgsize);
70static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
71    ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
72static void hermon_mr_mem_unbind(hermon_state_t *state,
73    hermon_bind_info_t *bind);
74static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
75    hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
76static int hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt,
77    ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
78static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
79static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
80
81
82/*
83 * The Hermon umem_lockmemory() callback ops.  When userland memory is
84 * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
85 * callback will be called whenever the memory for the corresponding
86 * ddi_umem_cookie_t is being freed.
87 */
88static struct umem_callback_ops hermon_umem_cbops = {
89	UMEM_CALLBACK_VERSION,
90	hermon_umap_umemlock_cb,
91};
92
93
94
95/*
96 * hermon_mr_register()
97 *    Context: Can be called from interrupt or base context.
98 */
99int
100hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
101    ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
102    hermon_mpt_rsrc_type_t mpt_type)
103{
104	hermon_bind_info_t	bind;
105	int			status;
106
107	/*
108	 * Fill in the "bind" struct.  This struct provides the majority
109	 * of the information that will be used to distinguish between an
110	 * "addr" binding (as is the case here) and a "buf" binding (see
111	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
112	 * which does most of the "heavy lifting" for the Hermon memory
113	 * registration routines.
114	 */
115	bind.bi_type  = HERMON_BINDHDL_VADDR;
116	bind.bi_addr  = mr_attr->mr_vaddr;
117	bind.bi_len   = mr_attr->mr_len;
118	bind.bi_as    = mr_attr->mr_as;
119	bind.bi_flags = mr_attr->mr_flags;
120	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
121	    mpt_type);
122	return (status);
123}
124
125
126/*
127 * hermon_mr_register_buf()
128 *    Context: Can be called from interrupt or base context.
129 */
130int
131hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
132    ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
133    hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
134{
135	hermon_bind_info_t	bind;
136	int			status;
137
138	/*
139	 * Fill in the "bind" struct.  This struct provides the majority
140	 * of the information that will be used to distinguish between an
141	 * "addr" binding (see above) and a "buf" binding (as is the case
142	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
143	 * which does most of the "heavy lifting" for the Hermon memory
144	 * registration routines.  Note: We have chosen to provide
145	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
146	 * not set).  It is not critical what value we choose here as it need
147	 * only be unique for the given RKey (which will happen by default),
148	 * so the choice here is somewhat arbitrary.
149	 */
150	bind.bi_type  = HERMON_BINDHDL_BUF;
151	bind.bi_buf   = buf;
152	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
153		bind.bi_addr  = mr_attr->mr_vaddr;
154	} else {
155		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
156	}
157	bind.bi_as    = NULL;
158	bind.bi_len   = (uint64_t)buf->b_bcount;
159	bind.bi_flags = mr_attr->mr_flags;
160	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
161	return (status);
162}
163
164
165/*
166 * hermon_mr_register_shared()
167 *    Context: Can be called from interrupt or base context.
168 */
169int
170hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
171    hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
172{
173	hermon_rsrc_t		*mpt, *mtt, *rsrc;
174	hermon_umap_db_entry_t	*umapdb;
175	hermon_hw_dmpt_t	mpt_entry;
176	hermon_mrhdl_t		mr;
177	hermon_bind_info_t	*bind;
178	ddi_umem_cookie_t	umem_cookie;
179	size_t			umem_len;
180	caddr_t			umem_addr;
181	uint64_t		mtt_addr, pgsize_msk;
182	uint_t			sleep, mr_is_umem;
183	int			status, umem_flags;
184
185	/*
186	 * Check the sleep flag.  Ensure that it is consistent with the
187	 * current thread context (i.e. if we are currently in the interrupt
188	 * context, then we shouldn't be attempting to sleep).
189	 */
190	sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
191	    HERMON_SLEEP;
192	if ((sleep == HERMON_SLEEP) &&
193	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
194		status = IBT_INVALID_PARAM;
195		goto mrshared_fail;
196	}
197
198	/* Increment the reference count on the protection domain (PD) */
199	hermon_pd_refcnt_inc(pd);
200
201	/*
202	 * Allocate an MPT entry.  This will be filled in with all the
203	 * necessary parameters to define the shared memory region.
204	 * Specifically, it will be made to reference the currently existing
205	 * MTT entries and ownership of the MPT will be passed to the hardware
206	 * in the last step below.  If we fail here, we must undo the
207	 * protection domain reference count.
208	 */
209	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
210	if (status != DDI_SUCCESS) {
211		status = IBT_INSUFF_RESOURCE;
212		goto mrshared_fail1;
213	}
214
215	/*
216	 * Allocate the software structure for tracking the shared memory
217	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
218	 * must undo the protection domain reference count and the previous
219	 * resource allocation.
220	 */
221	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
222	if (status != DDI_SUCCESS) {
223		status = IBT_INSUFF_RESOURCE;
224		goto mrshared_fail2;
225	}
226	mr = (hermon_mrhdl_t)rsrc->hr_addr;
227	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
228
229	/*
230	 * Setup and validate the memory region access flags.  This means
231	 * translating the IBTF's enable flags into the access flags that
232	 * will be used in later operations.
233	 */
234	mr->mr_accflag = 0;
235	if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
236		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
237	if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
238		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
239	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
240		mr->mr_accflag |= IBT_MR_REMOTE_READ;
241	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
242		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
243	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
244		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
245
246	/*
247	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
248	 * from a certain number of "constrained" bits (the least significant
249	 * bits) and some number of "unconstrained" bits.  The constrained
250	 * bits must be set to the index of the entry in the MPT table, but
251	 * the unconstrained bits can be set to any value we wish.  Note:
252	 * if no remote access is required, then the RKey value is not filled
253	 * in.  Otherwise both Rkey and LKey are given the same value.
254	 */
255	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
256	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
257	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
258	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
259		mr->mr_rkey = mr->mr_lkey;
260	}
261
262	/* Grab the MR lock for the current memory region */
263	mutex_enter(&mrhdl->mr_lock);
264
265	/*
266	 * Check here to see if the memory region has already been partially
267	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
268	 * If so, this is an error, return failure.
269	 */
270	if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
271		mutex_exit(&mrhdl->mr_lock);
272		status = IBT_MR_HDL_INVALID;
273		goto mrshared_fail3;
274	}
275
276	/*
277	 * Determine if the original memory was from userland and, if so, pin
278	 * the pages (again) with umem_lockmemory().  This will guarantee a
279	 * separate callback for each of this shared region's MR handles.
280	 * If this is userland memory, then allocate an entry in the
281	 * "userland resources database".  This will later be added to
282	 * the database (after all further memory registration operations are
283	 * successful).  If we fail here, we must undo all the above setup.
284	 */
285	mr_is_umem = mrhdl->mr_is_umem;
286	if (mr_is_umem) {
287		umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
288		umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
289		    ~PAGEOFFSET);
290		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
291		    DDI_UMEMLOCK_LONGTERM);
292		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
293		    &umem_cookie, &hermon_umem_cbops, curproc);
294		if (status != 0) {
295			mutex_exit(&mrhdl->mr_lock);
296			status = IBT_INSUFF_RESOURCE;
297			goto mrshared_fail3;
298		}
299
300		umapdb = hermon_umap_db_alloc(state->hs_instance,
301		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
302		    (uint64_t)(uintptr_t)rsrc);
303		if (umapdb == NULL) {
304			mutex_exit(&mrhdl->mr_lock);
305			status = IBT_INSUFF_RESOURCE;
306			goto mrshared_fail4;
307		}
308	}
309
310	/*
311	 * Copy the MTT resource pointer (and additional parameters) from
312	 * the original Hermon Memory Region handle.  Note: this is normally
313	 * where the hermon_mr_mem_bind() routine would be called, but because
314	 * we already have bound and filled-in MTT entries it is simply a
315	 * matter here of managing the MTT reference count and grabbing the
316	 * address of the MTT table entries (for filling in the shared region's
317	 * MPT entry).
318	 */
319	mr->mr_mttrsrcp	  = mrhdl->mr_mttrsrcp;
320	mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
321	mr->mr_bindinfo	  = mrhdl->mr_bindinfo;
322	mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
323	mutex_exit(&mrhdl->mr_lock);
324	bind = &mr->mr_bindinfo;
325	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
326	mtt = mr->mr_mttrsrcp;
327
328	/*
329	 * Increment the MTT reference count (to reflect the fact that
330	 * the MTT is now shared)
331	 */
332	(void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
333
334	/*
335	 * Update the new "bind" virtual address.  Do some extra work here
336	 * to ensure proper alignment.  That is, make sure that the page
337	 * offset for the beginning of the old range is the same as the
338	 * offset for this new mapping
339	 */
340	pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
341	bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
342	    (mr->mr_bindinfo.bi_addr & pgsize_msk));
343
344	/*
345	 * Fill in the MPT entry.  This is the final step before passing
346	 * ownership of the MPT entry to the Hermon hardware.  We use all of
347	 * the information collected/calculated above to fill in the
348	 * requisite portions of the MPT.
349	 */
350	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
351	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
352	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
353	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
354	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
355	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
356	mpt_entry.lr	  = 1;
357	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
358	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
359	mpt_entry.mem_key	= mr->mr_lkey;
360	mpt_entry.pd		= pd->pd_pdnum;
361	mpt_entry.start_addr	= bind->bi_addr;
362	mpt_entry.reg_win_len	= bind->bi_len;
363	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
364	mpt_entry.mtt_addr_h = mtt_addr >> 32;
365	mpt_entry.mtt_addr_l = mtt_addr >> 3;
366
367	/*
368	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
369	 * the entry to the hardware.  Note: in general, this operation
370	 * shouldn't fail.  But if it does, we have to undo everything we've
371	 * done above before returning error.
372	 */
373	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
374	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
375	if (status != HERMON_CMD_SUCCESS) {
376		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
377		    status);
378		if (status == HERMON_CMD_INVALID_STATUS) {
379			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
380		}
381		status = ibc_get_ci_failure(0);
382		goto mrshared_fail5;
383	}
384
385	/*
386	 * Fill in the rest of the Hermon Memory Region handle.  Having
387	 * successfully transferred ownership of the MPT, we can update the
388	 * following fields for use in further operations on the MR.
389	 */
390	mr->mr_mptrsrcp	  = mpt;
391	mr->mr_mttrsrcp	  = mtt;
392	mr->mr_mpt_type	  = HERMON_MPT_DMPT;
393	mr->mr_pdhdl	  = pd;
394	mr->mr_rsrcp	  = rsrc;
395	mr->mr_is_umem	  = mr_is_umem;
396	mr->mr_is_fmr	  = 0;
397	mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
398	mr->mr_umem_cbfunc = NULL;
399	mr->mr_umem_cbarg1 = NULL;
400	mr->mr_umem_cbarg2 = NULL;
401	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
402	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
403
404	/*
405	 * If this is userland memory, then we need to insert the previously
406	 * allocated entry into the "userland resources database".  This will
407	 * allow for later coordination between the hermon_umap_umemlock_cb()
408	 * callback and hermon_mr_deregister().
409	 */
410	if (mr_is_umem) {
411		hermon_umap_db_add(umapdb);
412	}
413
414	*mrhdl_new = mr;
415
416	return (DDI_SUCCESS);
417
418/*
419 * The following is cleanup for all possible failure cases in this routine
420 */
421mrshared_fail5:
422	(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
423	if (mr_is_umem) {
424		hermon_umap_db_free(umapdb);
425	}
426mrshared_fail4:
427	if (mr_is_umem) {
428		ddi_umem_unlock(umem_cookie);
429	}
430mrshared_fail3:
431	hermon_rsrc_free(state, &rsrc);
432mrshared_fail2:
433	hermon_rsrc_free(state, &mpt);
434mrshared_fail1:
435	hermon_pd_refcnt_dec(pd);
436mrshared_fail:
437	return (status);
438}
439
440/*
441 * hermon_mr_alloc_fmr()
442 *    Context: Can be called from interrupt or base context.
443 */
444int
445hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
446    hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
447{
448	hermon_rsrc_t		*mpt, *mtt, *rsrc;
449	hermon_hw_dmpt_t		mpt_entry;
450	hermon_mrhdl_t		mr;
451	hermon_bind_info_t	bind;
452	uint64_t		mtt_addr;
453	uint64_t		nummtt;
454	uint_t			sleep, mtt_pgsize_bits;
455	int			status;
456
457	/*
458	 * Check the sleep flag.  Ensure that it is consistent with the
459	 * current thread context (i.e. if we are currently in the interrupt
460	 * context, then we shouldn't be attempting to sleep).
461	 */
462	sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
463	    HERMON_NOSLEEP;
464	if ((sleep == HERMON_SLEEP) &&
465	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
466		return (IBT_INVALID_PARAM);
467	}
468
469	/* Increment the reference count on the protection domain (PD) */
470	hermon_pd_refcnt_inc(pd);
471
472	/*
473	 * Allocate an MPT entry.  This will be filled in with all the
474	 * necessary parameters to define the FMR.  Specifically, it will be
475	 * made to reference the currently existing MTT entries and ownership
476	 * of the MPT will be passed to the hardware in the last step below.
477	 * If we fail here, we must undo the protection domain reference count.
478	 */
479
480	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
481	if (status != DDI_SUCCESS) {
482		status = IBT_INSUFF_RESOURCE;
483		goto fmralloc_fail1;
484	}
485
486	/*
487	 * Allocate the software structure for tracking the fmr memory
488	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
489	 * must undo the protection domain reference count and the previous
490	 * resource allocation.
491	 */
492	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
493	if (status != DDI_SUCCESS) {
494		status = IBT_INSUFF_RESOURCE;
495		goto fmralloc_fail2;
496	}
497	mr = (hermon_mrhdl_t)rsrc->hr_addr;
498	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
499
500	/*
501	 * Setup and validate the memory region access flags.  This means
502	 * translating the IBTF's enable flags into the access flags that
503	 * will be used in later operations.
504	 */
505	mr->mr_accflag = 0;
506	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
507		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
508	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
509		mr->mr_accflag |= IBT_MR_REMOTE_READ;
510	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
511		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
512	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
513		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
514
515	/*
516	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
517	 * from a certain number of "constrained" bits (the least significant
518	 * bits) and some number of "unconstrained" bits.  The constrained
519	 * bits must be set to the index of the entry in the MPT table, but
520	 * the unconstrained bits can be set to any value we wish.  Note:
521	 * if no remote access is required, then the RKey value is not filled
522	 * in.  Otherwise both Rkey and LKey are given the same value.
523	 */
524	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
525	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
526	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
527	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
528		mr->mr_rkey = mr->mr_lkey;
529	}
530
531	/*
532	 * Determine number of pages spanned.  This routine uses the
533	 * information in the "bind" struct to determine the required
534	 * number of MTT entries needed (and returns the suggested page size -
535	 * as a "power-of-2" - for each MTT entry).
536	 */
537	/* Assume address will be page aligned later */
538	bind.bi_addr = 0;
539	/* Calculate size based on given max pages */
540	bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
541	nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
542
543	/*
544	 * Allocate the MTT entries.  Use the calculations performed above to
545	 * allocate the required number of MTT entries.  If we fail here, we
546	 * must not only undo all the previous resource allocation (and PD
547	 * reference count), but we must also unbind the memory.
548	 */
549	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
550	if (status != DDI_SUCCESS) {
551		status = IBT_INSUFF_RESOURCE;
552		goto fmralloc_fail3;
553	}
554	mr->mr_logmttpgsz = mtt_pgsize_bits;
555
556	/*
557	 * Fill in the MPT entry.  This is the final step before passing
558	 * ownership of the MPT entry to the Hermon hardware.  We use all of
559	 * the information collected/calculated above to fill in the
560	 * requisite portions of the MPT.
561	 */
562	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
563	mpt_entry.en_bind = 0;
564	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
565	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
566	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
567	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
568	mpt_entry.lr	  = 1;
569	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
570	mpt_entry.pd		= pd->pd_pdnum;
571
572	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
573	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
574	mpt_entry.mtt_addr_h = mtt_addr >> 32;
575	mpt_entry.mtt_addr_l = mtt_addr >> 3;
576	mpt_entry.mem_key = mr->mr_lkey;
577
578	/*
579	 * FMR sets these to 0 for now.  Later during actual fmr registration
580	 * these values are filled in.
581	 */
582	mpt_entry.start_addr	= 0;
583	mpt_entry.reg_win_len	= 0;
584
585	/*
586	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
587	 * the entry to the hardware.  Note: in general, this operation
588	 * shouldn't fail.  But if it does, we have to undo everything we've
589	 * done above before returning error.
590	 */
591	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
592	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
593	if (status != HERMON_CMD_SUCCESS) {
594		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
595		    status);
596		if (status == HERMON_CMD_INVALID_STATUS) {
597			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
598		}
599		status = ibc_get_ci_failure(0);
600		goto fmralloc_fail4;
601	}
602
603	/*
604	 * Fill in the rest of the Hermon Memory Region handle.  Having
605	 * successfully transferred ownership of the MPT, we can update the
606	 * following fields for use in further operations on the MR.  Also, set
607	 * that this is an FMR region.
608	 */
609	mr->mr_mptrsrcp	  = mpt;
610	mr->mr_mttrsrcp	  = mtt;
611	mr->mr_mpt_type   = HERMON_MPT_DMPT;
612	mr->mr_pdhdl	  = pd;
613	mr->mr_rsrcp	  = rsrc;
614	mr->mr_is_fmr	  = 1;
615	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
616	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
617	(void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
618
619	*mrhdl = mr;
620
621	return (DDI_SUCCESS);
622
623/*
624 * The following is cleanup for all possible failure cases in this routine
625 */
626fmralloc_fail4:
627	kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
628fmralloc_fail3:
629	hermon_rsrc_free(state, &rsrc);
630fmralloc_fail2:
631	hermon_rsrc_free(state, &mpt);
632fmralloc_fail1:
633	hermon_pd_refcnt_dec(pd);
634fmralloc_fail:
635	return (status);
636}
637
638/*
639 * hermon_mr_register_physical_fmr()
640 *    Context: Can be called from interrupt or base context.
641 */
642/*ARGSUSED*/
643int
644hermon_mr_register_physical_fmr(hermon_state_t *state,
645    ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
646{
647	hermon_rsrc_t		*mpt;
648	uint64_t		*mpt_table;
649	int			status;
650
651	mutex_enter(&mr->mr_lock);
652	mpt = mr->mr_mptrsrcp;
653	mpt_table = (uint64_t *)mpt->hr_addr;
654
655	/* Write MPT status to SW bit */
656	ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF);
657
658	/*
659	 * Write the mapped addresses into the MTT entries.  FMR needs to do
660	 * this a little differently, so we call the fmr specific fast mtt
661	 * write here.
662	 */
663	status = hermon_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p,
664	    mr->mr_logmttpgsz);
665	if (status != DDI_SUCCESS) {
666		mutex_exit(&mr->mr_lock);
667		status = ibc_get_ci_failure(0);
668		goto fmr_reg_fail1;
669	}
670
671	/*
672	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
673	 * from a certain number of "constrained" bits (the least significant
674	 * bits) and some number of "unconstrained" bits.  The constrained
675	 * bits must be set to the index of the entry in the MPT table, but
676	 * the unconstrained bits can be set to any value we wish.  Note:
677	 * if no remote access is required, then the RKey value is not filled
678	 * in.  Otherwise both Rkey and LKey are given the same value.
679	 */
680	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
681	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
682	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
683	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
684		mr->mr_rkey = mr->mr_lkey;
685	}
686
687	/* write mem key value */
688	ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey);
689
690	/* write length value */
691	ddi_put64(mpt->hr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len);
692
693	/* write start addr value */
694	ddi_put64(mpt->hr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova);
695
696	/* write lkey value */
697	ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey);
698
699	/* Write MPT status to HW bit */
700	ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0);
701
702	/* Fill in return parameters */
703	mem_desc_p->pmd_lkey = mr->mr_lkey;
704	mem_desc_p->pmd_rkey = mr->mr_rkey;
705	mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
706	mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
707
708	/* Fill in MR bindinfo struct for later sync or query operations */
709	mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
710	mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
711
712	mutex_exit(&mr->mr_lock);
713
714	return (DDI_SUCCESS);
715
716fmr_reg_fail1:
717	/*
718	 * Note, we fail here, and purposely leave the memory ownership in
719	 * software.  The memory tables may be corrupt, so we leave the region
720	 * unregistered.
721	 */
722	return (DDI_FAILURE);
723}
724
725
726/*
727 * hermon_mr_deregister()
728 *    Context: Can be called from interrupt or base context.
729 */
730/* ARGSUSED */
731int
732hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
733    uint_t sleep)
734{
735	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
736	hermon_umap_db_entry_t	*umapdb;
737	hermon_pdhdl_t		pd;
738	hermon_mrhdl_t		mr;
739	hermon_bind_info_t	*bind;
740	uint64_t		value;
741	int			status;
742	uint_t			shared_mtt;
743
744	/*
745	 * Check the sleep flag.  Ensure that it is consistent with the
746	 * current thread context (i.e. if we are currently in the interrupt
747	 * context, then we shouldn't be attempting to sleep).
748	 */
749	if ((sleep == HERMON_SLEEP) &&
750	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
751		status = IBT_INVALID_PARAM;
752		return (status);
753	}
754
755	/*
756	 * Pull all the necessary information from the Hermon Memory Region
757	 * handle.  This is necessary here because the resource for the
758	 * MR handle is going to be freed up as part of the this
759	 * deregistration
760	 */
761	mr	= *mrhdl;
762	mutex_enter(&mr->mr_lock);
763	mpt	= mr->mr_mptrsrcp;
764	mtt	= mr->mr_mttrsrcp;
765	mtt_refcnt = mr->mr_mttrefcntp;
766	rsrc	= mr->mr_rsrcp;
767	pd	= mr->mr_pdhdl;
768	bind	= &mr->mr_bindinfo;
769
770	/*
771	 * Check here if the memory region is really an FMR.  If so, this is a
772	 * bad thing and we shouldn't be here.  Return failure.
773	 */
774	if (mr->mr_is_fmr) {
775		mutex_exit(&mr->mr_lock);
776		return (IBT_INVALID_PARAM);
777	}
778
779	/*
780	 * Check here to see if the memory region has already been partially
781	 * deregistered as a result of the hermon_umap_umemlock_cb() callback.
782	 * If so, then jump to the end and free the remaining resources.
783	 */
784	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
785		goto mrdereg_finish_cleanup;
786	}
787
788	/*
789	 * We must drop the "mr_lock" here to ensure that both SLEEP and
790	 * NOSLEEP calls into the firmware work as expected.  Also, if two
791	 * threads are attemping to access this MR (via de-register,
792	 * re-register, or otherwise), then we allow the firmware to enforce
793	 * the checking, that only one deregister is valid.
794	 */
795	mutex_exit(&mr->mr_lock);
796
797	/*
798	 * Reclaim MPT entry from hardware (if necessary).  Since the
799	 * hermon_mr_deregister() routine is used in the memory region
800	 * reregistration process as well, it is possible that we will
801	 * not always wish to reclaim ownership of the MPT.  Check the
802	 * "level" arg and, if necessary, attempt to reclaim it.  If
803	 * the ownership transfer fails for any reason, we check to see
804	 * what command status was returned from the hardware.  The only
805	 * "expected" error status is the one that indicates an attempt to
806	 * deregister a memory region that has memory windows bound to it
807	 */
808	if (level >= HERMON_MR_DEREG_ALL) {
809		if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
810			status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
811			    NULL, 0, mpt->hr_indx, sleep);
812			if (status != HERMON_CMD_SUCCESS) {
813				if (status == HERMON_CMD_REG_BOUND) {
814					return (IBT_MR_IN_USE);
815				} else {
816					cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
817					    "command failed: %08x\n", status);
818					if (status ==
819					    HERMON_CMD_INVALID_STATUS) {
820						hermon_fm_ereport(state,
821						    HCA_SYS_ERR,
822						    DDI_SERVICE_LOST);
823					}
824					return (IBT_INVALID_PARAM);
825				}
826			}
827		}
828	}
829
830	/*
831	 * Re-grab the mr_lock here.  Since further access to the protected
832	 * 'mr' structure is needed, and we would have returned previously for
833	 * the multiple deregistration case, we can safely grab the lock here.
834	 */
835	mutex_enter(&mr->mr_lock);
836
837	/*
838	 * If the memory had come from userland, then we do a lookup in the
839	 * "userland resources database".  On success, we free the entry, call
840	 * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
841	 * an indication that the umem_lockmemory() callback has called
842	 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
843	 * the "mr_umemcookie" field in the MR handle (this will be used
844	 * later to detect that only partial cleaup still remains to be done
845	 * on the MR handle).
846	 */
847	if (mr->mr_is_umem) {
848		status = hermon_umap_db_find(state->hs_instance,
849		    (uint64_t)(uintptr_t)mr->mr_umemcookie,
850		    MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
851		    &umapdb);
852		if (status == DDI_SUCCESS) {
853			hermon_umap_db_free(umapdb);
854			ddi_umem_unlock(mr->mr_umemcookie);
855		} else {
856			ddi_umem_unlock(mr->mr_umemcookie);
857			mr->mr_umemcookie = NULL;
858		}
859	}
860
861	/*
862	 * Decrement the MTT reference count.  Since the MTT resource
863	 * may be shared between multiple memory regions (as a result
864	 * of a "RegisterSharedMR" verb) it is important that we not
865	 * free up or unbind resources prematurely.  If it's not shared (as
866	 * indicated by the return status), then free the resource.
867	 */
868	shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
869	if (!shared_mtt) {
870		hermon_rsrc_free(state, &mtt_refcnt);
871	}
872
873	/*
874	 * Free up the MTT entries and unbind the memory.  Here, as above, we
875	 * attempt to free these resources only if it is appropriate to do so.
876	 */
877	if (!shared_mtt) {
878		if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
879			hermon_mr_mem_unbind(state, bind);
880		}
881		hermon_rsrc_free(state, &mtt);
882	}
883
884	/*
885	 * If the MR handle has been invalidated, then drop the
886	 * lock and return success.  Note: This only happens because
887	 * the umem_lockmemory() callback has been triggered.  The
888	 * cleanup here is partial, and further cleanup (in a
889	 * subsequent hermon_mr_deregister() call) will be necessary.
890	 */
891	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
892		mutex_exit(&mr->mr_lock);
893		return (DDI_SUCCESS);
894	}
895
896mrdereg_finish_cleanup:
897	mutex_exit(&mr->mr_lock);
898
899	/* Free the Hermon Memory Region handle */
900	hermon_rsrc_free(state, &rsrc);
901
902	/* Free up the MPT entry resource */
903	if (mpt != NULL)
904		hermon_rsrc_free(state, &mpt);
905
906	/* Decrement the reference count on the protection domain (PD) */
907	hermon_pd_refcnt_dec(pd);
908
909	/* Set the mrhdl pointer to NULL and return success */
910	*mrhdl = NULL;
911
912	return (DDI_SUCCESS);
913}
914
915/*
916 * hermon_mr_dealloc_fmr()
917 *    Context: Can be called from interrupt or base context.
918 */
919/* ARGSUSED */
920int
921hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
922{
923	hermon_rsrc_t		*mpt, *mtt, *rsrc;
924	hermon_pdhdl_t		pd;
925	hermon_mrhdl_t		mr;
926
927	/*
928	 * Pull all the necessary information from the Hermon Memory Region
929	 * handle.  This is necessary here because the resource for the
930	 * MR handle is going to be freed up as part of the this
931	 * deregistration
932	 */
933	mr	= *mrhdl;
934	mutex_enter(&mr->mr_lock);
935	mpt	= mr->mr_mptrsrcp;
936	mtt	= mr->mr_mttrsrcp;
937	rsrc	= mr->mr_rsrcp;
938	pd	= mr->mr_pdhdl;
939	mutex_exit(&mr->mr_lock);
940
941	/* Free the MTT entries */
942	hermon_rsrc_free(state, &mtt);
943
944	/* Free the Hermon Memory Region handle */
945	hermon_rsrc_free(state, &rsrc);
946
947	/* Free up the MPT entry resource */
948	hermon_rsrc_free(state, &mpt);
949
950	/* Decrement the reference count on the protection domain (PD) */
951	hermon_pd_refcnt_dec(pd);
952
953	/* Set the mrhdl pointer to NULL and return success */
954	*mrhdl = NULL;
955
956	return (DDI_SUCCESS);
957}
958
959/*
960 * hermon_mr_invalidate_fmr()
961 *    Context: Can be called from interrupt or base context.
962 */
963/* ARGSUSED */
964int
965hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
966{
967	hermon_rsrc_t		*mpt;
968	uint64_t		*mpt_table;
969
970	mutex_enter(&mr->mr_lock);
971	mpt = mr->mr_mptrsrcp;
972	mpt_table = (uint64_t *)mpt->hr_addr;
973
974	/* Write MPT status to SW bit */
975	ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF);
976
977	/* invalidate mem key value */
978	ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], 0);
979
980	/* invalidate lkey value */
981	ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], 0);
982
983	/* Write MPT status to HW bit */
984	ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0);
985
986	mutex_exit(&mr->mr_lock);
987
988	return (DDI_SUCCESS);
989}
990
991/*
992 * hermon_mr_deregister_fmr()
993 *    Context: Can be called from interrupt or base context.
994 */
995/* ARGSUSED */
996int
997hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
998{
999	hermon_rsrc_t		*mpt;
1000	uint64_t		*mpt_table;
1001
1002	mutex_enter(&mr->mr_lock);
1003	mpt = mr->mr_mptrsrcp;
1004	mpt_table = (uint64_t *)mpt->hr_addr;
1005
1006	/* Write MPT status to SW bit */
1007	ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF);
1008	mutex_exit(&mr->mr_lock);
1009
1010	return (DDI_SUCCESS);
1011}
1012
1013
1014/*
1015 * hermon_mr_query()
1016 *    Context: Can be called from interrupt or base context.
1017 */
1018/* ARGSUSED */
1019int
1020hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
1021    ibt_mr_query_attr_t *attr)
1022{
1023	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
1024
1025	mutex_enter(&mr->mr_lock);
1026
1027	/*
1028	 * Check here to see if the memory region has already been partially
1029	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1030	 * If so, this is an error, return failure.
1031	 */
1032	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1033		mutex_exit(&mr->mr_lock);
1034		return (IBT_MR_HDL_INVALID);
1035	}
1036
1037	/* Fill in the queried attributes */
1038	attr->mr_attr_flags = mr->mr_accflag;
1039	attr->mr_pd	= (ibt_pd_hdl_t)mr->mr_pdhdl;
1040
1041	/* Fill in the "local" attributes */
1042	attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1043	attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1044	attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1045
1046	/*
1047	 * Fill in the "remote" attributes (if necessary).  Note: the
1048	 * remote attributes are only valid if the memory region has one
1049	 * or more of the remote access flags set.
1050	 */
1051	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1052	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1053	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1054		attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1055		attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1056		attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1057	}
1058
1059	/*
1060	 * If region is mapped for streaming (i.e. noncoherent), then set sync
1061	 * is required
1062	 */
1063	attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1064	    IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1065
1066	mutex_exit(&mr->mr_lock);
1067	return (DDI_SUCCESS);
1068}
1069
1070
1071/*
1072 * hermon_mr_reregister()
1073 *    Context: Can be called from interrupt or base context.
1074 */
1075int
1076hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1077    hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1078    hermon_mr_options_t *op)
1079{
1080	hermon_bind_info_t	bind;
1081	int			status;
1082
1083	/*
1084	 * Fill in the "bind" struct.  This struct provides the majority
1085	 * of the information that will be used to distinguish between an
1086	 * "addr" binding (as is the case here) and a "buf" binding (see
1087	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
1088	 * which does most of the "heavy lifting" for the Hermon memory
1089	 * registration (and reregistration) routines.
1090	 */
1091	bind.bi_type  = HERMON_BINDHDL_VADDR;
1092	bind.bi_addr  = mr_attr->mr_vaddr;
1093	bind.bi_len   = mr_attr->mr_len;
1094	bind.bi_as    = mr_attr->mr_as;
1095	bind.bi_flags = mr_attr->mr_flags;
1096	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1097	return (status);
1098}
1099
1100
1101/*
1102 * hermon_mr_reregister_buf()
1103 *    Context: Can be called from interrupt or base context.
1104 */
1105int
1106hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1107    hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1108    hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1109{
1110	hermon_bind_info_t	bind;
1111	int			status;
1112
1113	/*
1114	 * Fill in the "bind" struct.  This struct provides the majority
1115	 * of the information that will be used to distinguish between an
1116	 * "addr" binding (see above) and a "buf" binding (as is the case
1117	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
1118	 * which does most of the "heavy lifting" for the Hermon memory
1119	 * registration routines.  Note: We have chosen to provide
1120	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1121	 * not set).  It is not critical what value we choose here as it need
1122	 * only be unique for the given RKey (which will happen by default),
1123	 * so the choice here is somewhat arbitrary.
1124	 */
1125	bind.bi_type  = HERMON_BINDHDL_BUF;
1126	bind.bi_buf   = buf;
1127	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1128		bind.bi_addr  = mr_attr->mr_vaddr;
1129	} else {
1130		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1131	}
1132	bind.bi_len   = (uint64_t)buf->b_bcount;
1133	bind.bi_flags = mr_attr->mr_flags;
1134	bind.bi_as    = NULL;
1135	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1136	return (status);
1137}
1138
1139
1140/*
1141 * hermon_mr_sync()
1142 *    Context: Can be called from interrupt or base context.
1143 */
1144/* ARGSUSED */
1145int
1146hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1147{
1148	hermon_mrhdl_t		mrhdl;
1149	uint64_t		seg_vaddr, seg_len, seg_end;
1150	uint64_t		mr_start, mr_end;
1151	uint_t			type;
1152	int			status, i;
1153
1154	/* Process each of the ibt_mr_sync_t's */
1155	for (i = 0; i < num_segs; i++) {
1156		mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1157
1158		/* Check for valid memory region handle */
1159		if (mrhdl == NULL) {
1160			status = IBT_MR_HDL_INVALID;
1161			goto mrsync_fail;
1162		}
1163
1164		mutex_enter(&mrhdl->mr_lock);
1165
1166		/*
1167		 * Check here to see if the memory region has already been
1168		 * partially deregistered as a result of a
1169		 * hermon_umap_umemlock_cb() callback.  If so, this is an
1170		 * error, return failure.
1171		 */
1172		if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1173			mutex_exit(&mrhdl->mr_lock);
1174			status = IBT_MR_HDL_INVALID;
1175			goto mrsync_fail;
1176		}
1177
1178		/* Check for valid bounds on sync request */
1179		seg_vaddr = mr_segs[i].ms_vaddr;
1180		seg_len	  = mr_segs[i].ms_len;
1181		seg_end	  = seg_vaddr + seg_len - 1;
1182		mr_start  = mrhdl->mr_bindinfo.bi_addr;
1183		mr_end	  = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1184		if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1185			mutex_exit(&mrhdl->mr_lock);
1186			status = IBT_MR_VA_INVALID;
1187			goto mrsync_fail;
1188		}
1189		if ((seg_end < mr_start) || (seg_end > mr_end)) {
1190			mutex_exit(&mrhdl->mr_lock);
1191			status = IBT_MR_LEN_INVALID;
1192			goto mrsync_fail;
1193		}
1194
1195		/* Determine what type (i.e. direction) for sync */
1196		if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1197			type = DDI_DMA_SYNC_FORDEV;
1198		} else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1199			type = DDI_DMA_SYNC_FORCPU;
1200		} else {
1201			mutex_exit(&mrhdl->mr_lock);
1202			status = IBT_INVALID_PARAM;
1203			goto mrsync_fail;
1204		}
1205
1206		(void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1207		    (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1208
1209		mutex_exit(&mrhdl->mr_lock);
1210	}
1211
1212	return (DDI_SUCCESS);
1213
1214mrsync_fail:
1215	return (status);
1216}
1217
1218
1219/*
1220 * hermon_mw_alloc()
1221 *    Context: Can be called from interrupt or base context.
1222 */
1223int
1224hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1225    hermon_mwhdl_t *mwhdl)
1226{
1227	hermon_rsrc_t		*mpt, *rsrc;
1228	hermon_hw_dmpt_t		mpt_entry;
1229	hermon_mwhdl_t		mw;
1230	uint_t			sleep;
1231	int			status;
1232
1233	if (state != NULL)	/* XXX - bogus test that is always TRUE */
1234		return (IBT_INSUFF_RESOURCE);
1235
1236	/*
1237	 * Check the sleep flag.  Ensure that it is consistent with the
1238	 * current thread context (i.e. if we are currently in the interrupt
1239	 * context, then we shouldn't be attempting to sleep).
1240	 */
1241	sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1242	if ((sleep == HERMON_SLEEP) &&
1243	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1244		status = IBT_INVALID_PARAM;
1245		goto mwalloc_fail;
1246	}
1247
1248	/* Increment the reference count on the protection domain (PD) */
1249	hermon_pd_refcnt_inc(pd);
1250
1251	/*
1252	 * Allocate an MPT entry (for use as a memory window).  Since the
1253	 * Hermon hardware uses the MPT entry for memory regions and for
1254	 * memory windows, we will fill in this MPT with all the necessary
1255	 * parameters for the memory window.  And then (just as we do for
1256	 * memory regions) ownership will be passed to the hardware in the
1257	 * final step below.  If we fail here, we must undo the protection
1258	 * domain reference count.
1259	 */
1260	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1261	if (status != DDI_SUCCESS) {
1262		status = IBT_INSUFF_RESOURCE;
1263		goto mwalloc_fail1;
1264	}
1265
1266	/*
1267	 * Allocate the software structure for tracking the memory window (i.e.
1268	 * the Hermon Memory Window handle).  Note: This is actually the same
1269	 * software structure used for tracking memory regions, but since many
1270	 * of the same properties are needed, only a single structure is
1271	 * necessary.  If we fail here, we must undo the protection domain
1272	 * reference count and the previous resource allocation.
1273	 */
1274	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1275	if (status != DDI_SUCCESS) {
1276		status = IBT_INSUFF_RESOURCE;
1277		goto mwalloc_fail2;
1278	}
1279	mw = (hermon_mwhdl_t)rsrc->hr_addr;
1280	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1281
1282	/*
1283	 * Calculate an "unbound" RKey from MPT index.  In much the same way
1284	 * as we do for memory regions (above), this key is constructed from
1285	 * a "constrained" (which depends on the MPT index) and an
1286	 * "unconstrained" portion (which may be arbitrarily chosen).
1287	 */
1288	mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1289
1290	/*
1291	 * Fill in the MPT entry.  This is the final step before passing
1292	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1293	 * the information collected/calculated above to fill in the
1294	 * requisite portions of the MPT.  Note: fewer entries in the MPT
1295	 * entry are necessary to allocate a memory window.
1296	 */
1297	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1298	mpt_entry.reg_win	= HERMON_MPT_IS_WINDOW;
1299	mpt_entry.mem_key	= mw->mr_rkey;
1300	mpt_entry.pd		= pd->pd_pdnum;
1301	mpt_entry.lr		= 1;
1302
1303	/*
1304	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1305	 * the entry to the hardware.  Note: in general, this operation
1306	 * shouldn't fail.  But if it does, we have to undo everything we've
1307	 * done above before returning error.
1308	 */
1309	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1310	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1311	if (status != HERMON_CMD_SUCCESS) {
1312		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1313		    status);
1314		if (status == HERMON_CMD_INVALID_STATUS) {
1315			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1316		}
1317		status = ibc_get_ci_failure(0);
1318		goto mwalloc_fail3;
1319	}
1320
1321	/*
1322	 * Fill in the rest of the Hermon Memory Window handle.  Having
1323	 * successfully transferred ownership of the MPT, we can update the
1324	 * following fields for use in further operations on the MW.
1325	 */
1326	mw->mr_mptrsrcp	= mpt;
1327	mw->mr_pdhdl	= pd;
1328	mw->mr_rsrcp	= rsrc;
1329	mw->mr_rkey	= hermon_mr_key_swap(mw->mr_rkey);
1330	*mwhdl = mw;
1331
1332	return (DDI_SUCCESS);
1333
1334mwalloc_fail3:
1335	hermon_rsrc_free(state, &rsrc);
1336mwalloc_fail2:
1337	hermon_rsrc_free(state, &mpt);
1338mwalloc_fail1:
1339	hermon_pd_refcnt_dec(pd);
1340mwalloc_fail:
1341	return (status);
1342}
1343
1344
1345/*
1346 * hermon_mw_free()
1347 *    Context: Can be called from interrupt or base context.
1348 */
1349int
1350hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1351{
1352	hermon_rsrc_t		*mpt, *rsrc;
1353	hermon_mwhdl_t		mw;
1354	int			status;
1355	hermon_pdhdl_t		pd;
1356
1357	/*
1358	 * Check the sleep flag.  Ensure that it is consistent with the
1359	 * current thread context (i.e. if we are currently in the interrupt
1360	 * context, then we shouldn't be attempting to sleep).
1361	 */
1362	if ((sleep == HERMON_SLEEP) &&
1363	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1364		status = IBT_INVALID_PARAM;
1365		return (status);
1366	}
1367
1368	/*
1369	 * Pull all the necessary information from the Hermon Memory Window
1370	 * handle.  This is necessary here because the resource for the
1371	 * MW handle is going to be freed up as part of the this operation.
1372	 */
1373	mw	= *mwhdl;
1374	mutex_enter(&mw->mr_lock);
1375	mpt	= mw->mr_mptrsrcp;
1376	rsrc	= mw->mr_rsrcp;
1377	pd	= mw->mr_pdhdl;
1378	mutex_exit(&mw->mr_lock);
1379	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1380
1381	/*
1382	 * Reclaim the MPT entry from hardware.  Note: in general, it is
1383	 * unexpected for this operation to return an error.
1384	 */
1385	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1386	    0, mpt->hr_indx, sleep);
1387	if (status != HERMON_CMD_SUCCESS) {
1388		cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1389		    status);
1390		if (status == HERMON_CMD_INVALID_STATUS) {
1391			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1392		}
1393		return (ibc_get_ci_failure(0));
1394	}
1395
1396	/* Free the Hermon Memory Window handle */
1397	hermon_rsrc_free(state, &rsrc);
1398
1399	/* Free up the MPT entry resource */
1400	hermon_rsrc_free(state, &mpt);
1401
1402	/* Decrement the reference count on the protection domain (PD) */
1403	hermon_pd_refcnt_dec(pd);
1404
1405	/* Set the mwhdl pointer to NULL and return success */
1406	*mwhdl = NULL;
1407
1408	return (DDI_SUCCESS);
1409}
1410
1411
1412/*
1413 * hermon_mr_keycalc()
1414 *    Context: Can be called from interrupt or base context.
1415 *    NOTE:  Produces a key in the form of
1416 *		KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1417 *    where K == the arbitrary bits and I == the index
1418 */
1419uint32_t
1420hermon_mr_keycalc(uint32_t indx)
1421{
1422	uint32_t tmp_key, tmp_indx;
1423
1424	/*
1425	 * Generate a simple key from counter.  Note:  We increment this
1426	 * static variable _intentionally_ without any kind of mutex around
1427	 * it.  First, single-threading all operations through a single lock
1428	 * would be a bad idea (from a performance point-of-view).  Second,
1429	 * the upper "unconstrained" bits don't really have to be unique
1430	 * because the lower bits are guaranteed to be (although we do make a
1431	 * best effort to ensure that they are).  Third, the window for the
1432	 * race (where both threads read and update the counter at the same
1433	 * time) is incredibly small.
1434	 * And, lastly, we'd like to make this into a "random" key
1435	 */
1436	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1437	tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1438	tmp_indx = indx & 0xffffff;
1439	return (tmp_key | tmp_indx);
1440}
1441
1442
1443/*
1444 * hermon_mr_key_swap()
1445 *    Context: Can be called from interrupt or base context.
1446 *    NOTE:  Produces a key in the form of
1447 *		IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1448 *    where K == the arbitrary bits and I == the index
1449 */
1450uint32_t
1451hermon_mr_key_swap(uint32_t indx)
1452{
1453	/*
1454	 * The memory key format to pass down to the hardware is
1455	 * (key[7:0],index[23:0]), which defines the index to the
1456	 * hardware resource. When the driver passes this as a memory
1457	 * key, (i.e. to retrieve a resource) the format is
1458	 * (index[23:0],key[7:0]).
1459	 */
1460	return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1461}
1462
1463/*
1464 * hermon_mr_common_reg()
1465 *    Context: Can be called from interrupt or base context.
1466 */
1467static int
1468hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1469    hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1470    hermon_mpt_rsrc_type_t mpt_type)
1471{
1472	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
1473	hermon_umap_db_entry_t	*umapdb;
1474	hermon_sw_refcnt_t	*swrc_tmp;
1475	hermon_hw_dmpt_t	mpt_entry;
1476	hermon_mrhdl_t		mr;
1477	ibt_mr_flags_t		flags;
1478	hermon_bind_info_t	*bh;
1479	ddi_dma_handle_t	bind_dmahdl;
1480	ddi_umem_cookie_t	umem_cookie;
1481	size_t			umem_len;
1482	caddr_t			umem_addr;
1483	uint64_t		mtt_addr, max_sz;
1484	uint_t			sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1485	int			status, umem_flags, bind_override_addr;
1486
1487	/*
1488	 * Check the "options" flag.  Currently this flag tells the driver
1489	 * whether or not the region should be bound normally (i.e. with
1490	 * entries written into the PCI IOMMU), whether it should be
1491	 * registered to bypass the IOMMU, and whether or not the resulting
1492	 * address should be "zero-based" (to aid the alignment restrictions
1493	 * for QPs).
1494	 */
1495	if (op == NULL) {
1496		bind_type   = HERMON_BINDMEM_NORMAL;
1497		bind_dmahdl = NULL;
1498		bind_override_addr = 0;
1499	} else {
1500		bind_type	   = op->mro_bind_type;
1501		bind_dmahdl	   = op->mro_bind_dmahdl;
1502		bind_override_addr = op->mro_bind_override_addr;
1503	}
1504
1505	/* check what kind of mpt to use */
1506
1507	/* Extract the flags field from the hermon_bind_info_t */
1508	flags = bind->bi_flags;
1509
1510	/*
1511	 * Check for invalid length.  Check is the length is zero or if the
1512	 * length is larger than the maximum configured value.  Return error
1513	 * if it is.
1514	 */
1515	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1516	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1517		status = IBT_MR_LEN_INVALID;
1518		goto mrcommon_fail;
1519	}
1520
1521	/*
1522	 * Check the sleep flag.  Ensure that it is consistent with the
1523	 * current thread context (i.e. if we are currently in the interrupt
1524	 * context, then we shouldn't be attempting to sleep).
1525	 */
1526	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1527	if ((sleep == HERMON_SLEEP) &&
1528	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1529		status = IBT_INVALID_PARAM;
1530		goto mrcommon_fail;
1531	}
1532
1533	/* Increment the reference count on the protection domain (PD) */
1534	hermon_pd_refcnt_inc(pd);
1535
1536	/*
1537	 * Allocate an MPT entry.  This will be filled in with all the
1538	 * necessary parameters to define the memory region.  And then
1539	 * ownership will be passed to the hardware in the final step
1540	 * below.  If we fail here, we must undo the protection domain
1541	 * reference count.
1542	 */
1543	if (mpt_type == HERMON_MPT_DMPT) {
1544		status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1545		if (status != DDI_SUCCESS) {
1546			status = IBT_INSUFF_RESOURCE;
1547			goto mrcommon_fail1;
1548		}
1549	} else {
1550		mpt = NULL;
1551	}
1552
1553	/*
1554	 * Allocate the software structure for tracking the memory region (i.e.
1555	 * the Hermon Memory Region handle).  If we fail here, we must undo
1556	 * the protection domain reference count and the previous resource
1557	 * allocation.
1558	 */
1559	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1560	if (status != DDI_SUCCESS) {
1561		status = IBT_INSUFF_RESOURCE;
1562		goto mrcommon_fail2;
1563	}
1564	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1565	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1566
1567	/*
1568	 * Setup and validate the memory region access flags.  This means
1569	 * translating the IBTF's enable flags into the access flags that
1570	 * will be used in later operations.
1571	 */
1572	mr->mr_accflag = 0;
1573	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1574		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1575	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1576		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1577	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1578		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1579	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1580		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1581	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1582		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1583
1584	/*
1585	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1586	 * from a certain number of "constrained" bits (the least significant
1587	 * bits) and some number of "unconstrained" bits.  The constrained
1588	 * bits must be set to the index of the entry in the MPT table, but
1589	 * the unconstrained bits can be set to any value we wish.  Note:
1590	 * if no remote access is required, then the RKey value is not filled
1591	 * in.  Otherwise both Rkey and LKey are given the same value.
1592	 */
1593	if (mpt)
1594		mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1595
1596	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1597	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1598	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1599		mr->mr_rkey = mr->mr_lkey;
1600	}
1601
1602	/*
1603	 * Determine if the memory is from userland and pin the pages
1604	 * with umem_lockmemory() if necessary.
1605	 * Then, if this is userland memory, allocate an entry in the
1606	 * "userland resources database".  This will later be added to
1607	 * the database (after all further memory registration operations are
1608	 * successful).  If we fail here, we must undo the reference counts
1609	 * and the previous resource allocations.
1610	 */
1611	mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1612	if (mr_is_umem) {
1613		umem_len   = ptob(btopr(bind->bi_len +
1614		    ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1615		umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1616		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1617		    DDI_UMEMLOCK_LONGTERM);
1618		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1619		    &umem_cookie, &hermon_umem_cbops, curproc);
1620		if (status != 0) {
1621			status = IBT_INSUFF_RESOURCE;
1622			goto mrcommon_fail3;
1623		}
1624
1625		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1626		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1627
1628		bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1629		    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1630		if (bind->bi_buf == NULL) {
1631			status = IBT_INSUFF_RESOURCE;
1632			goto mrcommon_fail3;
1633		}
1634		bind->bi_type = HERMON_BINDHDL_UBUF;
1635		bind->bi_buf->b_flags |= B_READ;
1636
1637		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1638		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1639
1640		umapdb = hermon_umap_db_alloc(state->hs_instance,
1641		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1642		    (uint64_t)(uintptr_t)rsrc);
1643		if (umapdb == NULL) {
1644			status = IBT_INSUFF_RESOURCE;
1645			goto mrcommon_fail4;
1646		}
1647	}
1648
1649	/*
1650	 * Setup the bindinfo for the mtt bind call
1651	 */
1652	bh = &mr->mr_bindinfo;
1653	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1654	bcopy(bind, bh, sizeof (hermon_bind_info_t));
1655	bh->bi_bypass = bind_type;
1656	status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1657	    &mtt_pgsize_bits, mpt != NULL);
1658	if (status != DDI_SUCCESS) {
1659		goto mrcommon_fail5;
1660	}
1661	mr->mr_logmttpgsz = mtt_pgsize_bits;
1662
1663	/*
1664	 * Allocate MTT reference count (to track shared memory regions).
1665	 * This reference count resource may never be used on the given
1666	 * memory region, but if it is ever later registered as "shared"
1667	 * memory region then this resource will be necessary.  If we fail
1668	 * here, we do pretty much the same as above to clean up.
1669	 */
1670	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1671	    &mtt_refcnt);
1672	if (status != DDI_SUCCESS) {
1673		status = IBT_INSUFF_RESOURCE;
1674		goto mrcommon_fail6;
1675	}
1676	mr->mr_mttrefcntp = mtt_refcnt;
1677	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1678	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1679	HERMON_MTT_REFCNT_INIT(swrc_tmp);
1680
1681	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1682
1683	/*
1684	 * Fill in the MPT entry.  This is the final step before passing
1685	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1686	 * the information collected/calculated above to fill in the
1687	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1688	 */
1689	if (mpt == NULL)
1690		goto no_passown;
1691
1692	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1693
1694	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1695	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1696	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1697	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1698	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1699	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1700	mpt_entry.lr	  = 1;
1701	mpt_entry.phys_addr = 0;
1702	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1703
1704	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1705	mpt_entry.mem_key	= mr->mr_lkey;
1706	mpt_entry.pd		= pd->pd_pdnum;
1707	mpt_entry.rem_acc_en = 0;
1708	mpt_entry.fast_reg_en = 0;
1709	mpt_entry.en_inval = 0;
1710	mpt_entry.lkey = 0;
1711	mpt_entry.win_cnt = 0;
1712
1713	if (bind_override_addr == 0) {
1714		mpt_entry.start_addr = bh->bi_addr;
1715	} else {
1716		bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1717		mpt_entry.start_addr = bh->bi_addr;
1718	}
1719	mpt_entry.reg_win_len	= bh->bi_len;
1720
1721	mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
1722	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
1723
1724	/*
1725	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1726	 * the entry to the hardware if needed.  Note: in general, this
1727	 * operation shouldn't fail.  But if it does, we have to undo
1728	 * everything we've done above before returning error.
1729	 *
1730	 * For Hermon, this routine (which is common to the contexts) will only
1731	 * set the ownership if needed - the process of passing the context
1732	 * itself to HW will take care of setting up the MPT (based on type
1733	 * and index).
1734	 */
1735
1736	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1737	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1738	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1739	if (status != HERMON_CMD_SUCCESS) {
1740		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1741		    status);
1742		if (status == HERMON_CMD_INVALID_STATUS) {
1743			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1744		}
1745		status = ibc_get_ci_failure(0);
1746		goto mrcommon_fail7;
1747	}
1748no_passown:
1749
1750	/*
1751	 * Fill in the rest of the Hermon Memory Region handle.  Having
1752	 * successfully transferred ownership of the MPT, we can update the
1753	 * following fields for use in further operations on the MR.
1754	 */
1755	mr->mr_mttaddr	   = mtt_addr;
1756
1757	mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1758	mr->mr_mptrsrcp	   = mpt;
1759	mr->mr_mttrsrcp	   = mtt;
1760	mr->mr_pdhdl	   = pd;
1761	mr->mr_rsrcp	   = rsrc;
1762	mr->mr_is_umem	   = mr_is_umem;
1763	mr->mr_is_fmr	   = 0;
1764	mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
1765	mr->mr_umem_cbfunc = NULL;
1766	mr->mr_umem_cbarg1 = NULL;
1767	mr->mr_umem_cbarg2 = NULL;
1768	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1769	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1770	mr->mr_mpt_type	   = mpt_type;
1771
1772	/*
1773	 * If this is userland memory, then we need to insert the previously
1774	 * allocated entry into the "userland resources database".  This will
1775	 * allow for later coordination between the hermon_umap_umemlock_cb()
1776	 * callback and hermon_mr_deregister().
1777	 */
1778	if (mr_is_umem) {
1779		hermon_umap_db_add(umapdb);
1780	}
1781
1782	*mrhdl = mr;
1783
1784	return (DDI_SUCCESS);
1785
1786/*
1787 * The following is cleanup for all possible failure cases in this routine
1788 */
1789mrcommon_fail7:
1790	hermon_rsrc_free(state, &mtt_refcnt);
1791mrcommon_fail6:
1792	hermon_mr_mem_unbind(state, bh);
1793mrcommon_fail5:
1794	if (mr_is_umem) {
1795		hermon_umap_db_free(umapdb);
1796	}
1797mrcommon_fail4:
1798	if (mr_is_umem) {
1799		/*
1800		 * Free up the memory ddi_umem_iosetup() allocates
1801		 * internally.
1802		 */
1803		if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1804			freerbuf(bind->bi_buf);
1805			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1806			bind->bi_type = HERMON_BINDHDL_NONE;
1807			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1808		}
1809		ddi_umem_unlock(umem_cookie);
1810	}
1811mrcommon_fail3:
1812	hermon_rsrc_free(state, &rsrc);
1813mrcommon_fail2:
1814	if (mpt != NULL)
1815		hermon_rsrc_free(state, &mpt);
1816mrcommon_fail1:
1817	hermon_pd_refcnt_dec(pd);
1818mrcommon_fail:
1819	return (status);
1820}
1821
1822/*
1823 * hermon_mr_mtt_bind()
1824 *    Context: Can be called from interrupt or base context.
1825 */
1826int
1827hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
1828    ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
1829    uint_t is_buffer)
1830{
1831	uint64_t		nummtt;
1832	uint_t			sleep;
1833	int			status;
1834
1835	/*
1836	 * Check the sleep flag.  Ensure that it is consistent with the
1837	 * current thread context (i.e. if we are currently in the interrupt
1838	 * context, then we shouldn't be attempting to sleep).
1839	 */
1840	sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
1841	    HERMON_NOSLEEP : HERMON_SLEEP;
1842	if ((sleep == HERMON_SLEEP) &&
1843	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1844		status = IBT_INVALID_PARAM;
1845		goto mrmttbind_fail;
1846	}
1847
1848	/*
1849	 * Bind the memory and determine the mapped addresses.  This is
1850	 * the first of two routines that do all the "heavy lifting" for
1851	 * the Hermon memory registration routines.  The hermon_mr_mem_bind()
1852	 * routine takes the "bind" struct with all its fields filled
1853	 * in and returns a list of DMA cookies (for the PCI mapped addresses
1854	 * corresponding to the specified address region) which are used by
1855	 * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
1856	 * must undo all the previous resource allocation (and PD reference
1857	 * count).
1858	 */
1859	status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
1860	if (status != DDI_SUCCESS) {
1861		status = IBT_INSUFF_RESOURCE;
1862		goto mrmttbind_fail;
1863	}
1864
1865	/*
1866	 * Determine number of pages spanned.  This routine uses the
1867	 * information in the "bind" struct to determine the required
1868	 * number of MTT entries needed (and returns the suggested page size -
1869	 * as a "power-of-2" - for each MTT entry).
1870	 */
1871	nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
1872
1873	/*
1874	 * Allocate the MTT entries.  Use the calculations performed above to
1875	 * allocate the required number of MTT entries. If we fail here, we
1876	 * must not only undo all the previous resource allocation (and PD
1877	 * reference count), but we must also unbind the memory.
1878	 */
1879	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
1880	if (status != DDI_SUCCESS) {
1881		status = IBT_INSUFF_RESOURCE;
1882		goto mrmttbind_fail2;
1883	}
1884
1885	/*
1886	 * Write the mapped addresses into the MTT entries.  This is part two
1887	 * of the "heavy lifting" routines that we talked about above.  Note:
1888	 * we pass the suggested page size from the earlier operation here.
1889	 * And if we fail here, we again do pretty much the same huge clean up.
1890	 */
1891	status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
1892	if (status != DDI_SUCCESS) {
1893		/*
1894		 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
1895		 * only if it detects a HW error during DMA.
1896		 */
1897		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1898		status = ibc_get_ci_failure(0);
1899		goto mrmttbind_fail3;
1900	}
1901	return (DDI_SUCCESS);
1902
1903/*
1904 * The following is cleanup for all possible failure cases in this routine
1905 */
1906mrmttbind_fail3:
1907	hermon_rsrc_free(state, mtt);
1908mrmttbind_fail2:
1909	hermon_mr_mem_unbind(state, bind);
1910mrmttbind_fail:
1911	return (status);
1912}
1913
1914
1915/*
1916 * hermon_mr_mtt_unbind()
1917 *    Context: Can be called from interrupt or base context.
1918 */
1919int
1920hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
1921    hermon_rsrc_t *mtt)
1922{
1923	/*
1924	 * Free up the MTT entries and unbind the memory.  Here, as above, we
1925	 * attempt to free these resources only if it is appropriate to do so.
1926	 */
1927	hermon_mr_mem_unbind(state, bind);
1928	hermon_rsrc_free(state, &mtt);
1929
1930	return (DDI_SUCCESS);
1931}
1932
1933
1934/*
1935 * hermon_mr_common_rereg()
1936 *    Context: Can be called from interrupt or base context.
1937 */
1938static int
1939hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
1940    hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
1941    hermon_mr_options_t *op)
1942{
1943	hermon_rsrc_t		*mpt;
1944	ibt_mr_attr_flags_t	acc_flags_to_use;
1945	ibt_mr_flags_t		flags;
1946	hermon_pdhdl_t		pd_to_use;
1947	hermon_hw_dmpt_t	mpt_entry;
1948	uint64_t		mtt_addr_to_use, vaddr_to_use, len_to_use;
1949	uint_t			sleep, dereg_level;
1950	int			status;
1951
1952	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1953
1954	/*
1955	 * Check here to see if the memory region corresponds to a userland
1956	 * mapping.  Reregistration of userland memory regions is not
1957	 * currently supported.  Return failure.
1958	 */
1959	if (mr->mr_is_umem) {
1960		status = IBT_MR_HDL_INVALID;
1961		goto mrrereg_fail;
1962	}
1963
1964	mutex_enter(&mr->mr_lock);
1965
1966	/* Pull MPT resource pointer from the Hermon Memory Region handle */
1967	mpt = mr->mr_mptrsrcp;
1968
1969	/* Extract the flags field from the hermon_bind_info_t */
1970	flags = bind->bi_flags;
1971
1972	/*
1973	 * Check the sleep flag.  Ensure that it is consistent with the
1974	 * current thread context (i.e. if we are currently in the interrupt
1975	 * context, then we shouldn't be attempting to sleep).
1976	 */
1977	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1978	if ((sleep == HERMON_SLEEP) &&
1979	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1980		mutex_exit(&mr->mr_lock);
1981		status = IBT_INVALID_PARAM;
1982		goto mrrereg_fail;
1983	}
1984
1985	/*
1986	 * First step is to temporarily invalidate the MPT entry.  This
1987	 * regains ownership from the hardware, and gives us the opportunity
1988	 * to modify the entry.  Note: The HW2SW_MPT command returns the
1989	 * current MPT entry contents.  These are saved away here because
1990	 * they will be reused in a later step below.  If the region has
1991	 * bound memory windows that we fail returning an "in use" error code.
1992	 * Otherwise, this is an unexpected error and we deregister the
1993	 * memory region and return error.
1994	 *
1995	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
1996	 * against holding the lock around this rereg call in all contexts.
1997	 */
1998	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
1999	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2000	if (status != HERMON_CMD_SUCCESS) {
2001		mutex_exit(&mr->mr_lock);
2002		if (status == HERMON_CMD_REG_BOUND) {
2003			return (IBT_MR_IN_USE);
2004		} else {
2005			cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2006			    "%08x\n", status);
2007			if (status == HERMON_CMD_INVALID_STATUS) {
2008				hermon_fm_ereport(state, HCA_SYS_ERR,
2009				    HCA_ERR_SRV_LOST);
2010			}
2011			/*
2012			 * Call deregister and ensure that all current
2013			 * resources get freed up
2014			 */
2015			if (hermon_mr_deregister(state, &mr,
2016			    HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2017				HERMON_WARNING(state, "failed to deregister "
2018				    "memory region");
2019			}
2020			return (ibc_get_ci_failure(0));
2021		}
2022	}
2023
2024	/*
2025	 * If we're changing the protection domain, then validate the new one
2026	 */
2027	if (flags & IBT_MR_CHANGE_PD) {
2028
2029		/* Check for valid PD handle pointer */
2030		if (pd == NULL) {
2031			mutex_exit(&mr->mr_lock);
2032			/*
2033			 * Call deregister and ensure that all current
2034			 * resources get properly freed up. Unnecessary
2035			 * here to attempt to regain software ownership
2036			 * of the MPT entry as that has already been
2037			 * done above.
2038			 */
2039			if (hermon_mr_deregister(state, &mr,
2040			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2041			    DDI_SUCCESS) {
2042				HERMON_WARNING(state, "failed to deregister "
2043				    "memory region");
2044			}
2045			status = IBT_PD_HDL_INVALID;
2046			goto mrrereg_fail;
2047		}
2048
2049		/* Use the new PD handle in all operations below */
2050		pd_to_use = pd;
2051
2052	} else {
2053		/* Use the current PD handle in all operations below */
2054		pd_to_use = mr->mr_pdhdl;
2055	}
2056
2057	/*
2058	 * If we're changing access permissions, then validate the new ones
2059	 */
2060	if (flags & IBT_MR_CHANGE_ACCESS) {
2061		/*
2062		 * Validate the access flags.  Both remote write and remote
2063		 * atomic require the local write flag to be set
2064		 */
2065		if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2066		    (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2067		    !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2068			mutex_exit(&mr->mr_lock);
2069			/*
2070			 * Call deregister and ensure that all current
2071			 * resources get properly freed up. Unnecessary
2072			 * here to attempt to regain software ownership
2073			 * of the MPT entry as that has already been
2074			 * done above.
2075			 */
2076			if (hermon_mr_deregister(state, &mr,
2077			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2078			    DDI_SUCCESS) {
2079				HERMON_WARNING(state, "failed to deregister "
2080				    "memory region");
2081			}
2082			status = IBT_MR_ACCESS_REQ_INVALID;
2083			goto mrrereg_fail;
2084		}
2085
2086		/*
2087		 * Setup and validate the memory region access flags.  This
2088		 * means translating the IBTF's enable flags into the access
2089		 * flags that will be used in later operations.
2090		 */
2091		acc_flags_to_use = 0;
2092		if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2093			acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2094		if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2095			acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2096		if (flags & IBT_MR_ENABLE_REMOTE_READ)
2097			acc_flags_to_use |= IBT_MR_REMOTE_READ;
2098		if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2099			acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2100		if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2101			acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2102
2103	} else {
2104		acc_flags_to_use = mr->mr_accflag;
2105	}
2106
2107	/*
2108	 * If we're modifying the translation, then figure out whether
2109	 * we can reuse the current MTT resources.  This means calling
2110	 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2111	 * for the reregistration.  If the current memory region contains
2112	 * sufficient MTT entries for the new regions, then it will be
2113	 * reused and filled in.  Otherwise, new entries will be allocated,
2114	 * the old ones will be freed, and the new entries will be filled
2115	 * in.  Note:  If we're not modifying the translation, then we
2116	 * should already have all the information we need to update the MPT.
2117	 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2118	 * a "dereg_level" which is the level of cleanup that needs to be
2119	 * passed to hermon_mr_deregister() to finish the cleanup.
2120	 */
2121	if (flags & IBT_MR_CHANGE_TRANSLATION) {
2122		status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2123		    &mtt_addr_to_use, sleep, &dereg_level);
2124		if (status != DDI_SUCCESS) {
2125			mutex_exit(&mr->mr_lock);
2126			/*
2127			 * Call deregister and ensure that all resources get
2128			 * properly freed up.
2129			 */
2130			if (hermon_mr_deregister(state, &mr, dereg_level,
2131			    sleep) != DDI_SUCCESS) {
2132				HERMON_WARNING(state, "failed to deregister "
2133				    "memory region");
2134			}
2135			goto mrrereg_fail;
2136		}
2137		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2138		len_to_use   = mr->mr_bindinfo.bi_len;
2139	} else {
2140		mtt_addr_to_use = mr->mr_mttaddr;
2141		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2142		len_to_use   = mr->mr_bindinfo.bi_len;
2143	}
2144
2145	/*
2146	 * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
2147	 * when the region was first registered, each key is formed from
2148	 * "constrained" bits and "unconstrained" bits.  Note:  If no remote
2149	 * access is required, then the RKey value is not filled in.  Otherwise
2150	 * both Rkey and LKey are given the same value.
2151	 */
2152	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2153	if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2154	    (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2155	    (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2156		mr->mr_rkey = mr->mr_lkey;
2157	} else
2158		mr->mr_rkey = 0;
2159
2160	/*
2161	 * Fill in the MPT entry.  This is the final step before passing
2162	 * ownership of the MPT entry to the Hermon hardware.  We use all of
2163	 * the information collected/calculated above to fill in the
2164	 * requisite portions of the MPT.
2165	 */
2166	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2167
2168	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
2169	mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
2170	mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2171	mpt_entry.rw	  = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
2172	mpt_entry.rr	  = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
2173	mpt_entry.lw	  = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
2174	mpt_entry.lr	  = 1;
2175	mpt_entry.phys_addr = 0;
2176	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2177
2178	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
2179	mpt_entry.mem_key	= mr->mr_lkey;
2180	mpt_entry.pd		= pd_to_use->pd_pdnum;
2181
2182	mpt_entry.start_addr	= vaddr_to_use;
2183	mpt_entry.reg_win_len	= len_to_use;
2184	mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2185	mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2186
2187	/*
2188	 * Write the updated MPT entry to hardware
2189	 *
2190	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2191	 * against holding the lock around this rereg call in all contexts.
2192	 */
2193	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2194	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2195	if (status != HERMON_CMD_SUCCESS) {
2196		mutex_exit(&mr->mr_lock);
2197		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2198		    status);
2199		if (status == HERMON_CMD_INVALID_STATUS) {
2200			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2201		}
2202		/*
2203		 * Call deregister and ensure that all current resources get
2204		 * properly freed up. Unnecessary here to attempt to regain
2205		 * software ownership of the MPT entry as that has already
2206		 * been done above.
2207		 */
2208		if (hermon_mr_deregister(state, &mr,
2209		    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2210			HERMON_WARNING(state, "failed to deregister memory "
2211			    "region");
2212		}
2213		return (ibc_get_ci_failure(0));
2214	}
2215
2216	/*
2217	 * If we're changing PD, then update their reference counts now.
2218	 * This means decrementing the reference count on the old PD and
2219	 * incrementing the reference count on the new PD.
2220	 */
2221	if (flags & IBT_MR_CHANGE_PD) {
2222		hermon_pd_refcnt_dec(mr->mr_pdhdl);
2223		hermon_pd_refcnt_inc(pd);
2224	}
2225
2226	/*
2227	 * Update the contents of the Hermon Memory Region handle to reflect
2228	 * what has been changed.
2229	 */
2230	mr->mr_pdhdl	  = pd_to_use;
2231	mr->mr_accflag	  = acc_flags_to_use;
2232	mr->mr_is_umem	  = 0;
2233	mr->mr_is_fmr	  = 0;
2234	mr->mr_umemcookie = NULL;
2235	mr->mr_lkey	  = hermon_mr_key_swap(mr->mr_lkey);
2236	mr->mr_rkey	  = hermon_mr_key_swap(mr->mr_rkey);
2237
2238	/* New MR handle is same as the old */
2239	*mrhdl_new = mr;
2240	mutex_exit(&mr->mr_lock);
2241
2242	return (DDI_SUCCESS);
2243
2244mrrereg_fail:
2245	return (status);
2246}
2247
2248
2249/*
2250 * hermon_mr_rereg_xlat_helper
2251 *    Context: Can be called from interrupt or base context.
2252 *    Note: This routine expects the "mr_lock" to be held when it
2253 *    is called.  Upon returning failure, this routine passes information
2254 *    about what "dereg_level" should be passed to hermon_mr_deregister().
2255 */
2256static int
2257hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2258    hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2259    uint_t sleep, uint_t *dereg_level)
2260{
2261	hermon_rsrc_t		*mtt, *mtt_refcnt;
2262	hermon_sw_refcnt_t	*swrc_old, *swrc_new;
2263	ddi_dma_handle_t	dmahdl;
2264	uint64_t		nummtt_needed, nummtt_in_currrsrc, max_sz;
2265	uint_t			mtt_pgsize_bits, bind_type, reuse_dmahdl;
2266	int			status;
2267
2268	ASSERT(MUTEX_HELD(&mr->mr_lock));
2269
2270	/*
2271	 * Check the "options" flag.  Currently this flag tells the driver
2272	 * whether or not the region should be bound normally (i.e. with
2273	 * entries written into the PCI IOMMU) or whether it should be
2274	 * registered to bypass the IOMMU.
2275	 */
2276	if (op == NULL) {
2277		bind_type = HERMON_BINDMEM_NORMAL;
2278	} else {
2279		bind_type = op->mro_bind_type;
2280	}
2281
2282	/*
2283	 * Check for invalid length.  Check is the length is zero or if the
2284	 * length is larger than the maximum configured value.  Return error
2285	 * if it is.
2286	 */
2287	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2288	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2289		/*
2290		 * Deregister will be called upon returning failure from this
2291		 * routine. This will ensure that all current resources get
2292		 * properly freed up. Unnecessary to attempt to regain
2293		 * software ownership of the MPT entry as that has already
2294		 * been done above (in hermon_mr_reregister())
2295		 */
2296		*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2297
2298		status = IBT_MR_LEN_INVALID;
2299		goto mrrereghelp_fail;
2300	}
2301
2302	/*
2303	 * Determine the number of pages necessary for new region and the
2304	 * number of pages supported by the current MTT resources
2305	 */
2306	nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2307	nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2308
2309	/*
2310	 * Depending on whether we have enough pages or not, the next step is
2311	 * to fill in a set of MTT entries that reflect the new mapping.  In
2312	 * the first case below, we already have enough entries.  This means
2313	 * we need to unbind the memory from the previous mapping, bind the
2314	 * memory for the new mapping, write the new MTT entries, and update
2315	 * the mr to reflect the changes.
2316	 * In the second case below, we do not have enough entries in the
2317	 * current mapping.  So, in this case, we need not only to unbind the
2318	 * current mapping, but we need to free up the MTT resources associated
2319	 * with that mapping.  After we've successfully done that, we continue
2320	 * by binding the new memory, allocating new MTT entries, writing the
2321	 * new MTT entries, and updating the mr to reflect the changes.
2322	 */
2323
2324	/*
2325	 * If this region is being shared (i.e. MTT refcount != 1), then we
2326	 * can't reuse the current MTT resources regardless of their size.
2327	 * Instead we'll need to alloc new ones (below) just as if there
2328	 * hadn't been enough room in the current entries.
2329	 */
2330	swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2331	if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2332	    (nummtt_needed <= nummtt_in_currrsrc)) {
2333
2334		/*
2335		 * Unbind the old mapping for this memory region, but retain
2336		 * the ddi_dma_handle_t (if possible) for reuse in the bind
2337		 * operation below.  Note:  If original memory region was
2338		 * bound for IOMMU bypass and the new region can not use
2339		 * bypass, then a new DMA handle will be necessary.
2340		 */
2341		if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2342			mr->mr_bindinfo.bi_free_dmahdl = 0;
2343			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2344			dmahdl = mr->mr_bindinfo.bi_dmahdl;
2345			reuse_dmahdl = 1;
2346		} else {
2347			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2348			dmahdl = NULL;
2349			reuse_dmahdl = 0;
2350		}
2351
2352		/*
2353		 * Bind the new memory and determine the mapped addresses.
2354		 * As described, this routine and hermon_mr_fast_mtt_write()
2355		 * do the majority of the work for the memory registration
2356		 * operations.  Note:  When we successfully finish the binding,
2357		 * we will set the "bi_free_dmahdl" flag to indicate that
2358		 * even though we may have reused the ddi_dma_handle_t we do
2359		 * wish it to be freed up at some later time.  Note also that
2360		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2361		 */
2362		bind->bi_bypass	= bind_type;
2363		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2364		if (status != DDI_SUCCESS) {
2365			if (reuse_dmahdl) {
2366				ddi_dma_free_handle(&dmahdl);
2367			}
2368
2369			/*
2370			 * Deregister will be called upon returning failure
2371			 * from this routine. This will ensure that all
2372			 * current resources get properly freed up.
2373			 * Unnecessary to attempt to regain software ownership
2374			 * of the MPT entry as that has already been done
2375			 * above (in hermon_mr_reregister()).  Also unnecessary
2376			 * to attempt to unbind the memory.
2377			 */
2378			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2379
2380			status = IBT_INSUFF_RESOURCE;
2381			goto mrrereghelp_fail;
2382		}
2383		if (reuse_dmahdl) {
2384			bind->bi_free_dmahdl = 1;
2385		}
2386
2387		/*
2388		 * Using the new mapping, but reusing the current MTT
2389		 * resources, write the updated entries to MTT
2390		 */
2391		mtt    = mr->mr_mttrsrcp;
2392		status = hermon_mr_fast_mtt_write(state, mtt, bind,
2393		    mtt_pgsize_bits);
2394		if (status != DDI_SUCCESS) {
2395			/*
2396			 * Deregister will be called upon returning failure
2397			 * from this routine. This will ensure that all
2398			 * current resources get properly freed up.
2399			 * Unnecessary to attempt to regain software ownership
2400			 * of the MPT entry as that has already been done
2401			 * above (in hermon_mr_reregister()).  Also unnecessary
2402			 * to attempt to unbind the memory.
2403			 *
2404			 * But we do need to unbind the newly bound memory
2405			 * before returning.
2406			 */
2407			hermon_mr_mem_unbind(state, bind);
2408			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2409
2410			/*
2411			 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2412			 * only if it detects a HW error during DMA.
2413			 */
2414			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2415			status = ibc_get_ci_failure(0);
2416			goto mrrereghelp_fail;
2417		}
2418
2419		/* Put the updated information into the Mem Region handle */
2420		mr->mr_bindinfo	  = *bind;
2421		mr->mr_logmttpgsz = mtt_pgsize_bits;
2422
2423	} else {
2424		/*
2425		 * Check if the memory region MTT is shared by any other MRs.
2426		 * Since the resource may be shared between multiple memory
2427		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2428		 * important that we not unbind any resources prematurely.
2429		 */
2430		if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2431			/*
2432			 * Unbind the old mapping for this memory region, but
2433			 * retain the ddi_dma_handle_t for reuse in the bind
2434			 * operation below. Note: This can only be done here
2435			 * because the region being reregistered is not
2436			 * currently shared.  Also if original memory region
2437			 * was bound for IOMMU bypass and the new region can
2438			 * not use bypass, then a new DMA handle will be
2439			 * necessary.
2440			 */
2441			if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2442				mr->mr_bindinfo.bi_free_dmahdl = 0;
2443				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2444				dmahdl = mr->mr_bindinfo.bi_dmahdl;
2445				reuse_dmahdl = 1;
2446			} else {
2447				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2448				dmahdl = NULL;
2449				reuse_dmahdl = 0;
2450			}
2451		} else {
2452			dmahdl = NULL;
2453			reuse_dmahdl = 0;
2454		}
2455
2456		/*
2457		 * Bind the new memory and determine the mapped addresses.
2458		 * As described, this routine and hermon_mr_fast_mtt_write()
2459		 * do the majority of the work for the memory registration
2460		 * operations.  Note:  When we successfully finish the binding,
2461		 * we will set the "bi_free_dmahdl" flag to indicate that
2462		 * even though we may have reused the ddi_dma_handle_t we do
2463		 * wish it to be freed up at some later time.  Note also that
2464		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2465		 */
2466		bind->bi_bypass	= bind_type;
2467		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2468		if (status != DDI_SUCCESS) {
2469			if (reuse_dmahdl) {
2470				ddi_dma_free_handle(&dmahdl);
2471			}
2472
2473			/*
2474			 * Deregister will be called upon returning failure
2475			 * from this routine. This will ensure that all
2476			 * current resources get properly freed up.
2477			 * Unnecessary to attempt to regain software ownership
2478			 * of the MPT entry as that has already been done
2479			 * above (in hermon_mr_reregister()).  Also unnecessary
2480			 * to attempt to unbind the memory.
2481			 */
2482			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2483
2484			status = IBT_INSUFF_RESOURCE;
2485			goto mrrereghelp_fail;
2486		}
2487		if (reuse_dmahdl) {
2488			bind->bi_free_dmahdl = 1;
2489		}
2490
2491		/*
2492		 * Allocate the new MTT entries resource
2493		 */
2494		status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2495		    sleep, &mtt);
2496		if (status != DDI_SUCCESS) {
2497			/*
2498			 * Deregister will be called upon returning failure
2499			 * from this routine. This will ensure that all
2500			 * current resources get properly freed up.
2501			 * Unnecessary to attempt to regain software ownership
2502			 * of the MPT entry as that has already been done
2503			 * above (in hermon_mr_reregister()).  Also unnecessary
2504			 * to attempt to unbind the memory.
2505			 *
2506			 * But we do need to unbind the newly bound memory
2507			 * before returning.
2508			 */
2509			hermon_mr_mem_unbind(state, bind);
2510			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2511
2512			status = IBT_INSUFF_RESOURCE;
2513			goto mrrereghelp_fail;
2514		}
2515
2516		/*
2517		 * Allocate MTT reference count (to track shared memory
2518		 * regions).  As mentioned elsewhere above, this reference
2519		 * count resource may never be used on the given memory region,
2520		 * but if it is ever later registered as a "shared" memory
2521		 * region then this resource will be necessary.  Note:  This
2522		 * is only necessary here if the existing memory region is
2523		 * already being shared (because otherwise we already have
2524		 * a useable reference count resource).
2525		 */
2526		if (HERMON_MTT_IS_SHARED(swrc_old)) {
2527			status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2528			    sleep, &mtt_refcnt);
2529			if (status != DDI_SUCCESS) {
2530				/*
2531				 * Deregister will be called upon returning
2532				 * failure from this routine. This will ensure
2533				 * that all current resources get properly
2534				 * freed up.  Unnecessary to attempt to regain
2535				 * software ownership of the MPT entry as that
2536				 * has already been done above (in
2537				 * hermon_mr_reregister()).  Also unnecessary
2538				 * to attempt to unbind the memory.
2539				 *
2540				 * But we need to unbind the newly bound
2541				 * memory and free up the newly allocated MTT
2542				 * entries before returning.
2543				 */
2544				hermon_mr_mem_unbind(state, bind);
2545				hermon_rsrc_free(state, &mtt);
2546				*dereg_level =
2547				    HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2548
2549				status = IBT_INSUFF_RESOURCE;
2550				goto mrrereghelp_fail;
2551			}
2552			swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2553			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2554			HERMON_MTT_REFCNT_INIT(swrc_new);
2555		} else {
2556			mtt_refcnt = mr->mr_mttrefcntp;
2557		}
2558
2559		/*
2560		 * Using the new mapping and the new MTT resources, write the
2561		 * updated entries to MTT
2562		 */
2563		status = hermon_mr_fast_mtt_write(state, mtt, bind,
2564		    mtt_pgsize_bits);
2565		if (status != DDI_SUCCESS) {
2566			/*
2567			 * Deregister will be called upon returning failure
2568			 * from this routine. This will ensure that all
2569			 * current resources get properly freed up.
2570			 * Unnecessary to attempt to regain software ownership
2571			 * of the MPT entry as that has already been done
2572			 * above (in hermon_mr_reregister()).  Also unnecessary
2573			 * to attempt to unbind the memory.
2574			 *
2575			 * But we need to unbind the newly bound memory,
2576			 * free up the newly allocated MTT entries, and
2577			 * (possibly) free the new MTT reference count
2578			 * resource before returning.
2579			 */
2580			if (HERMON_MTT_IS_SHARED(swrc_old)) {
2581				hermon_rsrc_free(state, &mtt_refcnt);
2582			}
2583			hermon_mr_mem_unbind(state, bind);
2584			hermon_rsrc_free(state, &mtt);
2585			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2586
2587			status = IBT_INSUFF_RESOURCE;
2588			goto mrrereghelp_fail;
2589		}
2590
2591		/*
2592		 * Check if the memory region MTT is shared by any other MRs.
2593		 * Since the resource may be shared between multiple memory
2594		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2595		 * important that we not free up any resources prematurely.
2596		 */
2597		if (HERMON_MTT_IS_SHARED(swrc_old)) {
2598			/* Decrement MTT reference count for "old" region */
2599			(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
2600		} else {
2601			/* Free up the old MTT entries resource */
2602			hermon_rsrc_free(state, &mr->mr_mttrsrcp);
2603		}
2604
2605		/* Put the updated information into the mrhdl */
2606		mr->mr_bindinfo	  = *bind;
2607		mr->mr_logmttpgsz = mtt_pgsize_bits;
2608		mr->mr_mttrsrcp   = mtt;
2609		mr->mr_mttrefcntp = mtt_refcnt;
2610	}
2611
2612	/*
2613	 * Calculate and return the updated MTT address (in the DDR address
2614	 * space).  This will be used by the caller (hermon_mr_reregister) in
2615	 * the updated MPT entry
2616	 */
2617	*mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
2618
2619	return (DDI_SUCCESS);
2620
2621mrrereghelp_fail:
2622	return (status);
2623}
2624
2625
2626/*
2627 * hermon_mr_nummtt_needed()
2628 *    Context: Can be called from interrupt or base context.
2629 */
2630/* ARGSUSED */
2631static uint64_t
2632hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
2633    uint_t *mtt_pgsize_bits)
2634{
2635	uint64_t	pg_offset_mask;
2636	uint64_t	pg_offset, tmp_length;
2637
2638	/*
2639	 * For now we specify the page size as 8Kb (the default page size for
2640	 * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
2641	 * size by examining the dmacookies
2642	 */
2643	*mtt_pgsize_bits = PAGESHIFT;
2644
2645	pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
2646	pg_offset = bind->bi_addr & pg_offset_mask;
2647	tmp_length = pg_offset + (bind->bi_len - 1);
2648	return ((tmp_length >> *mtt_pgsize_bits) + 1);
2649}
2650
2651
2652/*
2653 * hermon_mr_mem_bind()
2654 *    Context: Can be called from interrupt or base context.
2655 */
2656static int
2657hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2658    ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
2659{
2660	ddi_dma_attr_t	dma_attr;
2661	int		(*callback)(caddr_t);
2662	int		status;
2663
2664	/* bi_type must be set to a meaningful value to get a bind handle */
2665	ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
2666	    bind->bi_type == HERMON_BINDHDL_BUF ||
2667	    bind->bi_type == HERMON_BINDHDL_UBUF);
2668
2669	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2670
2671	/* Set the callback flag appropriately */
2672	callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2673
2674	/*
2675	 * Initialize many of the default DMA attributes.  Then, if we're
2676	 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2677	 */
2678	if (dmahdl == NULL) {
2679		hermon_dma_attr_init(state, &dma_attr);
2680#ifdef	__sparc
2681		if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
2682			dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2683		}
2684#endif
2685
2686		/* set RO if needed - tunable set and 'is_buffer' is non-0 */
2687		if (is_buffer) {
2688			if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
2689				if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
2690				    (hermon_kernel_data_ro ==
2691				    HERMON_RO_ENABLED)) {
2692					dma_attr.dma_attr_flags |=
2693					    DDI_DMA_RELAXED_ORDERING;
2694				}
2695				if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
2696				    (hermon_user_data_ro ==
2697				    HERMON_RO_ENABLED))) {
2698					dma_attr.dma_attr_flags |=
2699					    DDI_DMA_RELAXED_ORDERING;
2700				}
2701			}
2702		}
2703
2704		/* Allocate a DMA handle for the binding */
2705		status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
2706		    callback, NULL, &bind->bi_dmahdl);
2707		if (status != DDI_SUCCESS) {
2708			return (status);
2709		}
2710		bind->bi_free_dmahdl = 1;
2711
2712	} else  {
2713		bind->bi_dmahdl = dmahdl;
2714		bind->bi_free_dmahdl = 0;
2715	}
2716
2717
2718	/*
2719	 * Bind the memory to get the PCI mapped addresses.  The decision
2720	 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
2721	 * is determined by the "bi_type" flag.  Note: if the bind operation
2722	 * fails then we have to free up the DMA handle and return error.
2723	 */
2724	if (bind->bi_type == HERMON_BINDHDL_VADDR) {
2725		status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
2726		    (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
2727		    (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
2728		    &bind->bi_dmacookie, &bind->bi_cookiecnt);
2729
2730	} else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
2731
2732		status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
2733		    bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
2734		    NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
2735	}
2736	if (status != DDI_DMA_MAPPED) {
2737		if (bind->bi_free_dmahdl != 0) {
2738			ddi_dma_free_handle(&bind->bi_dmahdl);
2739		}
2740		return (status);
2741	}
2742
2743	return (DDI_SUCCESS);
2744}
2745
2746
2747/*
2748 * hermon_mr_mem_unbind()
2749 *    Context: Can be called from interrupt or base context.
2750 */
2751static void
2752hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
2753{
2754	int	status;
2755
2756	/*
2757	 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
2758	 * is actually allocated by ddi_umem_iosetup() internally, then
2759	 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
2760	 * not to free it again later.
2761	 */
2762	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2763	if (bind->bi_type == HERMON_BINDHDL_UBUF) {
2764		freerbuf(bind->bi_buf);
2765		bind->bi_type = HERMON_BINDHDL_NONE;
2766	}
2767	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
2768
2769	/*
2770	 * Unbind the DMA memory for the region
2771	 *
2772	 * Note: The only way ddi_dma_unbind_handle() currently
2773	 * can return an error is if the handle passed in is invalid.
2774	 * Since this should never happen, we choose to return void
2775	 * from this function!  If this does return an error, however,
2776	 * then we print a warning message to the console.
2777	 */
2778	status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2779	if (status != DDI_SUCCESS) {
2780		HERMON_WARNING(state, "failed to unbind DMA mapping");
2781		return;
2782	}
2783
2784	/* Free up the DMA handle */
2785	if (bind->bi_free_dmahdl != 0) {
2786		ddi_dma_free_handle(&bind->bi_dmahdl);
2787	}
2788}
2789
2790
2791/*
2792 * hermon_mr_fast_mtt_write()
2793 *    Context: Can be called from interrupt or base context.
2794 */
2795static int
2796hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
2797    hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
2798{
2799	hermon_icm_table_t	*icm_table;
2800	hermon_dma_info_t	*dma_info;
2801	uint32_t		index1, index2, rindx;
2802	ddi_dma_cookie_t	dmacookie;
2803	uint_t			cookie_cnt;
2804	uint64_t		*mtt_table;
2805	uint64_t		mtt_entry;
2806	uint64_t		addr, endaddr;
2807	uint64_t		pagesize;
2808	offset_t		i, start;
2809	uint_t			per_span;
2810	int			sync_needed;
2811
2812	/*
2813	 * XXX According to the PRM, we are to use the WRITE_MTT
2814	 * command to write out MTTs. Tavor does not do this,
2815	 * instead taking advantage of direct access to the MTTs,
2816	 * and knowledge that Mellanox FMR relies on our ability
2817	 * to write directly to the MTTs without any further
2818	 * notification to the firmware. Likewise, we will choose
2819	 * to not use the WRITE_MTT command, but to simply write
2820	 * out the MTTs.
2821	 */
2822
2823	/* Calculate page size from the suggested value passed in */
2824	pagesize = ((uint64_t)1 << mtt_pgsize_bits);
2825
2826	/* Walk the "cookie list" and fill in the MTT table entries */
2827	dmacookie  = bind->bi_dmacookie;
2828	cookie_cnt = bind->bi_cookiecnt;
2829
2830	icm_table = &state->hs_icm[HERMON_MTT];
2831	rindx = mtt->hr_indx;
2832	hermon_index(index1, index2, rindx, icm_table, i);
2833	start = i;
2834
2835	per_span   = icm_table->span;
2836	dma_info   = icm_table->icm_dma[index1] + index2;
2837	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
2838
2839	sync_needed = 0;
2840	while (cookie_cnt-- > 0) {
2841		addr    = dmacookie.dmac_laddress;
2842		endaddr = addr + (dmacookie.dmac_size - 1);
2843		addr    = addr & ~((uint64_t)pagesize - 1);
2844
2845		while (addr <= endaddr) {
2846
2847			/*
2848			 * Fill in the mapped addresses (calculated above) and
2849			 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
2850			 */
2851			mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
2852			mtt_table[i] = htonll(mtt_entry);
2853			i++;
2854			rindx++;
2855
2856			if (i == per_span) {
2857
2858				(void) ddi_dma_sync(dma_info->dma_hdl,
2859				    start * sizeof (hermon_hw_mtt_t),
2860				    (i - start) * sizeof (hermon_hw_mtt_t),
2861				    DDI_DMA_SYNC_FORDEV);
2862
2863				hermon_index(index1, index2, rindx, icm_table,
2864				    i);
2865				start = i * sizeof (hermon_hw_mtt_t);
2866				dma_info = icm_table->icm_dma[index1] + index2;
2867				mtt_table =
2868				    (uint64_t *)(uintptr_t)dma_info->vaddr;
2869
2870				sync_needed = 0;
2871			} else {
2872				sync_needed = 1;
2873			}
2874
2875			addr += pagesize;
2876			if (addr == 0) {
2877				static int do_once = 1;
2878				_NOTE(SCHEME_PROTECTS_DATA("safe sharing",
2879				    do_once))
2880				if (do_once) {
2881					do_once = 0;
2882					cmn_err(CE_NOTE, "probable error in "
2883					    "dma_cookie address from caller\n");
2884				}
2885				break;
2886			}
2887		}
2888
2889		/*
2890		 * When we've reached the end of the current DMA cookie,
2891		 * jump to the next cookie (if there are more)
2892		 */
2893		if (cookie_cnt != 0) {
2894			ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2895		}
2896	}
2897
2898	/* done all the cookies, now sync the memory for the device */
2899	if (sync_needed)
2900		(void) ddi_dma_sync(dma_info->dma_hdl,
2901		    start * sizeof (hermon_hw_mtt_t),
2902		    (i - start) * sizeof (hermon_hw_mtt_t),
2903		    DDI_DMA_SYNC_FORDEV);
2904
2905	return (DDI_SUCCESS);
2906}
2907
2908/*
2909 * hermon_mr_fast_mtt_write_fmr()
2910 *    Context: Can be called from interrupt or base context.
2911 */
2912static int
2913hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr,
2914    uint32_t mtt_pgsize_bits)
2915{
2916	uint64_t		*mtt_table;
2917	ibt_phys_addr_t		*buf;
2918	uint64_t		mtt_entry;
2919	uint64_t		addr, first_addr, endaddr;
2920	uint64_t		pagesize;
2921	int			i;
2922
2923	/* Calculate page size from the suggested value passed in */
2924	pagesize = ((uint64_t)1 << mtt_pgsize_bits);
2925
2926	/*
2927	 * Walk the "addr list" and fill in the MTT table entries
2928	 */
2929	mtt_table  = (uint64_t *)mtt->hr_addr;
2930	for (i = 0; i < mem_pattr->pmr_num_buf; i++) {
2931		buf = &mem_pattr->pmr_addr_list[i];
2932
2933		/*
2934		 * For first cookie, use the offset field to determine where
2935		 * the buffer starts.  The end addr is then calculated with the
2936		 * offset in mind.
2937		 */
2938		if (i == 0) {
2939			first_addr = addr = buf->p_laddr +
2940			    mem_pattr->pmr_offset;
2941			endaddr = addr + (mem_pattr->pmr_buf_sz - 1) -
2942			    mem_pattr->pmr_offset;
2943		/*
2944		 * For last cookie, determine end addr based on starting
2945		 * address and size of the total buffer
2946		 */
2947		} else if (i == mem_pattr->pmr_num_buf - 1) {
2948			addr = buf->p_laddr;
2949			endaddr = addr + (first_addr + mem_pattr->pmr_len &
2950			    (mem_pattr->pmr_buf_sz - 1));
2951		/*
2952		 * For the middle cookies case, start and end addr are
2953		 * straightforward.  Just use the laddr, and the size, as all
2954		 * middle cookies are a set size.
2955		 */
2956		} else {
2957			addr = buf->p_laddr;
2958			endaddr = addr + (mem_pattr->pmr_buf_sz - 1);
2959		}
2960
2961		addr	= addr & ~((uint64_t)pagesize - 1);
2962		while (addr <= endaddr) {
2963			/*
2964			 * Fill in the mapped addresses (calculated above) and
2965			 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
2966			 */
2967			mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
2968			mtt_table[i] = htonll(mtt_entry);
2969			addr += pagesize;
2970		}
2971	}
2972
2973	return (DDI_SUCCESS);
2974}
2975
2976
2977/*
2978 * hermon_mtt_refcnt_inc()
2979 *    Context: Can be called from interrupt or base context.
2980 */
2981static uint_t
2982hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
2983{
2984	hermon_sw_refcnt_t *rc;
2985
2986	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
2987	return (atomic_inc_uint_nv(&rc->swrc_refcnt));
2988}
2989
2990
2991/*
2992 * hermon_mtt_refcnt_dec()
2993 *    Context: Can be called from interrupt or base context.
2994 */
2995static uint_t
2996hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
2997{
2998	hermon_sw_refcnt_t *rc;
2999
3000	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3001	return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3002}
3003