1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 * hermon_mr.c
28 *    Hermon Memory Region/Window Routines
29 *
30 *    Implements all the routines necessary to provide the requisite memory
31 *    registration verbs.  These include operations like RegisterMemRegion(),
32 *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33 *    etc., that affect Memory Regions.  It also includes the verbs that
34 *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35 *    and QueryMemWindow().
36 */
37
38#include <sys/types.h>
39#include <sys/conf.h>
40#include <sys/ddi.h>
41#include <sys/sunddi.h>
42#include <sys/modctl.h>
43#include <sys/esunddi.h>
44
45#include <sys/ib/adapters/hermon/hermon.h>
46
47extern uint32_t hermon_kernel_data_ro;
48extern uint32_t hermon_user_data_ro;
49extern int hermon_rdma_debug;
50
51/*
52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
53 * of Hermon memory keys (LKeys and RKeys)
54 */
55static	uint_t hermon_memkey_cnt = 0x00;
56#define	HERMON_MEMKEY_SHIFT	24
57
58/* initial state of an MPT */
59#define	HERMON_MPT_SW_OWNERSHIP	0xF	/* memory regions */
60#define	HERMON_MPT_FREE		0x3	/* allocate lkey */
61
62static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
63    hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
64    hermon_mpt_rsrc_type_t mpt_type);
65static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
66    hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
67    hermon_mr_options_t *op);
68static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
69    hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
70    uint_t sleep, uint_t *dereg_level);
71static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
72    hermon_bind_info_t *bind, uint_t *mtt_pgsize);
73static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
74    ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
75static void hermon_mr_mem_unbind(hermon_state_t *state,
76    hermon_bind_info_t *bind);
77static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
78    hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
79static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
80    hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
81static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
82static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
83
84
85/*
86 * The Hermon umem_lockmemory() callback ops.  When userland memory is
87 * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
88 * callback will be called whenever the memory for the corresponding
89 * ddi_umem_cookie_t is being freed.
90 */
91static struct umem_callback_ops hermon_umem_cbops = {
92	UMEM_CALLBACK_VERSION,
93	hermon_umap_umemlock_cb,
94};
95
96
97
98/*
99 * hermon_mr_register()
100 *    Context: Can be called from interrupt or base context.
101 */
102int
103hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
104    ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
105    hermon_mpt_rsrc_type_t mpt_type)
106{
107	hermon_bind_info_t	bind;
108	int			status;
109
110	/*
111	 * Fill in the "bind" struct.  This struct provides the majority
112	 * of the information that will be used to distinguish between an
113	 * "addr" binding (as is the case here) and a "buf" binding (see
114	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
115	 * which does most of the "heavy lifting" for the Hermon memory
116	 * registration routines.
117	 */
118	bind.bi_type  = HERMON_BINDHDL_VADDR;
119	bind.bi_addr  = mr_attr->mr_vaddr;
120	bind.bi_len   = mr_attr->mr_len;
121	bind.bi_as    = mr_attr->mr_as;
122	bind.bi_flags = mr_attr->mr_flags;
123	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
124	    mpt_type);
125	return (status);
126}
127
128
129/*
130 * hermon_mr_register_buf()
131 *    Context: Can be called from interrupt or base context.
132 */
133int
134hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
135    ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
136    hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
137{
138	hermon_bind_info_t	bind;
139	int			status;
140
141	/*
142	 * Fill in the "bind" struct.  This struct provides the majority
143	 * of the information that will be used to distinguish between an
144	 * "addr" binding (see above) and a "buf" binding (as is the case
145	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
146	 * which does most of the "heavy lifting" for the Hermon memory
147	 * registration routines.  Note: We have chosen to provide
148	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
149	 * not set).  It is not critical what value we choose here as it need
150	 * only be unique for the given RKey (which will happen by default),
151	 * so the choice here is somewhat arbitrary.
152	 */
153	bind.bi_type  = HERMON_BINDHDL_BUF;
154	bind.bi_buf   = buf;
155	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
156		bind.bi_addr  = mr_attr->mr_vaddr;
157	} else {
158		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
159	}
160	bind.bi_as    = NULL;
161	bind.bi_len   = (uint64_t)buf->b_bcount;
162	bind.bi_flags = mr_attr->mr_flags;
163	status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
164	return (status);
165}
166
167
168/*
169 * hermon_mr_register_shared()
170 *    Context: Can be called from interrupt or base context.
171 */
172int
173hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
174    hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
175{
176	hermon_rsrc_t		*mpt, *mtt, *rsrc;
177	hermon_umap_db_entry_t	*umapdb;
178	hermon_hw_dmpt_t	mpt_entry;
179	hermon_mrhdl_t		mr;
180	hermon_bind_info_t	*bind;
181	ddi_umem_cookie_t	umem_cookie;
182	size_t			umem_len;
183	caddr_t			umem_addr;
184	uint64_t		mtt_addr, pgsize_msk;
185	uint_t			sleep, mr_is_umem;
186	int			status, umem_flags;
187
188	/*
189	 * Check the sleep flag.  Ensure that it is consistent with the
190	 * current thread context (i.e. if we are currently in the interrupt
191	 * context, then we shouldn't be attempting to sleep).
192	 */
193	sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
194	    HERMON_SLEEP;
195	if ((sleep == HERMON_SLEEP) &&
196	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
197		status = IBT_INVALID_PARAM;
198		goto mrshared_fail;
199	}
200
201	/* Increment the reference count on the protection domain (PD) */
202	hermon_pd_refcnt_inc(pd);
203
204	/*
205	 * Allocate an MPT entry.  This will be filled in with all the
206	 * necessary parameters to define the shared memory region.
207	 * Specifically, it will be made to reference the currently existing
208	 * MTT entries and ownership of the MPT will be passed to the hardware
209	 * in the last step below.  If we fail here, we must undo the
210	 * protection domain reference count.
211	 */
212	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
213	if (status != DDI_SUCCESS) {
214		status = IBT_INSUFF_RESOURCE;
215		goto mrshared_fail1;
216	}
217
218	/*
219	 * Allocate the software structure for tracking the shared memory
220	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
221	 * must undo the protection domain reference count and the previous
222	 * resource allocation.
223	 */
224	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
225	if (status != DDI_SUCCESS) {
226		status = IBT_INSUFF_RESOURCE;
227		goto mrshared_fail2;
228	}
229	mr = (hermon_mrhdl_t)rsrc->hr_addr;
230	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
231
232	/*
233	 * Setup and validate the memory region access flags.  This means
234	 * translating the IBTF's enable flags into the access flags that
235	 * will be used in later operations.
236	 */
237	mr->mr_accflag = 0;
238	if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
239		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
240	if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
241		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
242	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
243		mr->mr_accflag |= IBT_MR_REMOTE_READ;
244	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
245		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
246	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
247		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
248
249	/*
250	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
251	 * from a certain number of "constrained" bits (the least significant
252	 * bits) and some number of "unconstrained" bits.  The constrained
253	 * bits must be set to the index of the entry in the MPT table, but
254	 * the unconstrained bits can be set to any value we wish.  Note:
255	 * if no remote access is required, then the RKey value is not filled
256	 * in.  Otherwise both Rkey and LKey are given the same value.
257	 */
258	mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
259
260	/* Grab the MR lock for the current memory region */
261	mutex_enter(&mrhdl->mr_lock);
262
263	/*
264	 * Check here to see if the memory region has already been partially
265	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
266	 * If so, this is an error, return failure.
267	 */
268	if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
269		mutex_exit(&mrhdl->mr_lock);
270		status = IBT_MR_HDL_INVALID;
271		goto mrshared_fail3;
272	}
273
274	/*
275	 * Determine if the original memory was from userland and, if so, pin
276	 * the pages (again) with umem_lockmemory().  This will guarantee a
277	 * separate callback for each of this shared region's MR handles.
278	 * If this is userland memory, then allocate an entry in the
279	 * "userland resources database".  This will later be added to
280	 * the database (after all further memory registration operations are
281	 * successful).  If we fail here, we must undo all the above setup.
282	 */
283	mr_is_umem = mrhdl->mr_is_umem;
284	if (mr_is_umem) {
285		umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
286		umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
287		    ~PAGEOFFSET);
288		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
289		    DDI_UMEMLOCK_LONGTERM);
290		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
291		    &umem_cookie, &hermon_umem_cbops, NULL);
292		if (status != 0) {
293			mutex_exit(&mrhdl->mr_lock);
294			status = IBT_INSUFF_RESOURCE;
295			goto mrshared_fail3;
296		}
297
298		umapdb = hermon_umap_db_alloc(state->hs_instance,
299		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
300		    (uint64_t)(uintptr_t)rsrc);
301		if (umapdb == NULL) {
302			mutex_exit(&mrhdl->mr_lock);
303			status = IBT_INSUFF_RESOURCE;
304			goto mrshared_fail4;
305		}
306	}
307
308	/*
309	 * Copy the MTT resource pointer (and additional parameters) from
310	 * the original Hermon Memory Region handle.  Note: this is normally
311	 * where the hermon_mr_mem_bind() routine would be called, but because
312	 * we already have bound and filled-in MTT entries it is simply a
313	 * matter here of managing the MTT reference count and grabbing the
314	 * address of the MTT table entries (for filling in the shared region's
315	 * MPT entry).
316	 */
317	mr->mr_mttrsrcp	  = mrhdl->mr_mttrsrcp;
318	mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
319	mr->mr_bindinfo	  = mrhdl->mr_bindinfo;
320	mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
321	mutex_exit(&mrhdl->mr_lock);
322	bind = &mr->mr_bindinfo;
323	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
324	mtt = mr->mr_mttrsrcp;
325
326	/*
327	 * Increment the MTT reference count (to reflect the fact that
328	 * the MTT is now shared)
329	 */
330	(void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
331
332	/*
333	 * Update the new "bind" virtual address.  Do some extra work here
334	 * to ensure proper alignment.  That is, make sure that the page
335	 * offset for the beginning of the old range is the same as the
336	 * offset for this new mapping
337	 */
338	pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
339	bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
340	    (mr->mr_bindinfo.bi_addr & pgsize_msk));
341
342	/*
343	 * Fill in the MPT entry.  This is the final step before passing
344	 * ownership of the MPT entry to the Hermon hardware.  We use all of
345	 * the information collected/calculated above to fill in the
346	 * requisite portions of the MPT.
347	 */
348	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
349	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
350	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
351	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
352	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
353	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
354	mpt_entry.lr	  = 1;
355	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
356	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
357	mpt_entry.mem_key	= mr->mr_lkey;
358	mpt_entry.pd		= pd->pd_pdnum;
359	mpt_entry.start_addr	= bind->bi_addr;
360	mpt_entry.reg_win_len	= bind->bi_len;
361	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
362	mpt_entry.mtt_addr_h = mtt_addr >> 32;
363	mpt_entry.mtt_addr_l = mtt_addr >> 3;
364
365	/*
366	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
367	 * the entry to the hardware.  Note: in general, this operation
368	 * shouldn't fail.  But if it does, we have to undo everything we've
369	 * done above before returning error.
370	 */
371	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
372	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
373	if (status != HERMON_CMD_SUCCESS) {
374		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
375		    status);
376		if (status == HERMON_CMD_INVALID_STATUS) {
377			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
378		}
379		status = ibc_get_ci_failure(0);
380		goto mrshared_fail5;
381	}
382
383	/*
384	 * Fill in the rest of the Hermon Memory Region handle.  Having
385	 * successfully transferred ownership of the MPT, we can update the
386	 * following fields for use in further operations on the MR.
387	 */
388	mr->mr_mptrsrcp	  = mpt;
389	mr->mr_mttrsrcp	  = mtt;
390	mr->mr_mpt_type	  = HERMON_MPT_DMPT;
391	mr->mr_pdhdl	  = pd;
392	mr->mr_rsrcp	  = rsrc;
393	mr->mr_is_umem	  = mr_is_umem;
394	mr->mr_is_fmr	  = 0;
395	mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
396	mr->mr_umem_cbfunc = NULL;
397	mr->mr_umem_cbarg1 = NULL;
398	mr->mr_umem_cbarg2 = NULL;
399	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
400	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
401
402	/*
403	 * If this is userland memory, then we need to insert the previously
404	 * allocated entry into the "userland resources database".  This will
405	 * allow for later coordination between the hermon_umap_umemlock_cb()
406	 * callback and hermon_mr_deregister().
407	 */
408	if (mr_is_umem) {
409		hermon_umap_db_add(umapdb);
410	}
411
412	*mrhdl_new = mr;
413
414	return (DDI_SUCCESS);
415
416/*
417 * The following is cleanup for all possible failure cases in this routine
418 */
419mrshared_fail5:
420	(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
421	if (mr_is_umem) {
422		hermon_umap_db_free(umapdb);
423	}
424mrshared_fail4:
425	if (mr_is_umem) {
426		ddi_umem_unlock(umem_cookie);
427	}
428mrshared_fail3:
429	hermon_rsrc_free(state, &rsrc);
430mrshared_fail2:
431	hermon_rsrc_free(state, &mpt);
432mrshared_fail1:
433	hermon_pd_refcnt_dec(pd);
434mrshared_fail:
435	return (status);
436}
437
438/*
439 * hermon_mr_alloc_fmr()
440 *    Context: Can be called from interrupt or base context.
441 */
442int
443hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
444    hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
445{
446	hermon_rsrc_t		*mpt, *mtt, *rsrc;
447	hermon_hw_dmpt_t	mpt_entry;
448	hermon_mrhdl_t		mr;
449	hermon_bind_info_t	bind;
450	uint64_t		mtt_addr;
451	uint64_t		nummtt;
452	uint_t			sleep, mtt_pgsize_bits;
453	int			status;
454	offset_t		i;
455	hermon_icm_table_t	*icm_table;
456	hermon_dma_info_t	*dma_info;
457	uint32_t		index1, index2, rindx;
458
459	/*
460	 * Check the sleep flag.  Ensure that it is consistent with the
461	 * current thread context (i.e. if we are currently in the interrupt
462	 * context, then we shouldn't be attempting to sleep).
463	 */
464	sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
465	    HERMON_NOSLEEP;
466	if ((sleep == HERMON_SLEEP) &&
467	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
468		return (IBT_INVALID_PARAM);
469	}
470
471	/* Increment the reference count on the protection domain (PD) */
472	hermon_pd_refcnt_inc(pd);
473
474	/*
475	 * Allocate an MPT entry.  This will be filled in with all the
476	 * necessary parameters to define the FMR.  Specifically, it will be
477	 * made to reference the currently existing MTT entries and ownership
478	 * of the MPT will be passed to the hardware in the last step below.
479	 * If we fail here, we must undo the protection domain reference count.
480	 */
481
482	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
483	if (status != DDI_SUCCESS) {
484		status = IBT_INSUFF_RESOURCE;
485		goto fmralloc_fail1;
486	}
487
488	/*
489	 * Allocate the software structure for tracking the fmr memory
490	 * region (i.e. the Hermon Memory Region handle).  If we fail here, we
491	 * must undo the protection domain reference count and the previous
492	 * resource allocation.
493	 */
494	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
495	if (status != DDI_SUCCESS) {
496		status = IBT_INSUFF_RESOURCE;
497		goto fmralloc_fail2;
498	}
499	mr = (hermon_mrhdl_t)rsrc->hr_addr;
500	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
501
502	/*
503	 * Setup and validate the memory region access flags.  This means
504	 * translating the IBTF's enable flags into the access flags that
505	 * will be used in later operations.
506	 */
507	mr->mr_accflag = 0;
508	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
509		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
510	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
511		mr->mr_accflag |= IBT_MR_REMOTE_READ;
512	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
513		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
514	if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
515		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
516
517	/*
518	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
519	 * from a certain number of "constrained" bits (the least significant
520	 * bits) and some number of "unconstrained" bits.  The constrained
521	 * bits must be set to the index of the entry in the MPT table, but
522	 * the unconstrained bits can be set to any value we wish.  Note:
523	 * if no remote access is required, then the RKey value is not filled
524	 * in.  Otherwise both Rkey and LKey are given the same value.
525	 */
526	mr->mr_fmr_key = 1;	/* ready for the next reload */
527	mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
528
529	/*
530	 * Determine number of pages spanned.  This routine uses the
531	 * information in the "bind" struct to determine the required
532	 * number of MTT entries needed (and returns the suggested page size -
533	 * as a "power-of-2" - for each MTT entry).
534	 */
535	/* Assume address will be page aligned later */
536	bind.bi_addr = 0;
537	/* Calculate size based on given max pages */
538	bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
539	nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
540
541	/*
542	 * Allocate the MTT entries.  Use the calculations performed above to
543	 * allocate the required number of MTT entries.  If we fail here, we
544	 * must not only undo all the previous resource allocation (and PD
545	 * reference count), but we must also unbind the memory.
546	 */
547	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
548	if (status != DDI_SUCCESS) {
549		IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
550		status = IBT_INSUFF_RESOURCE;
551		goto fmralloc_fail3;
552	}
553	mr->mr_logmttpgsz = mtt_pgsize_bits;
554
555	/*
556	 * Fill in the MPT entry.  This is the final step before passing
557	 * ownership of the MPT entry to the Hermon hardware.  We use all of
558	 * the information collected/calculated above to fill in the
559	 * requisite portions of the MPT.
560	 */
561	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
562	mpt_entry.en_bind = 0;
563	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
564	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
565	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
566	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
567	mpt_entry.lr	  = 1;
568	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
569	mpt_entry.pd		= pd->pd_pdnum;
570
571	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
572	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
573	mpt_entry.fast_reg_en = 1;
574	mpt_entry.mtt_size = (uint_t)nummtt;
575	mpt_entry.mtt_addr_h = mtt_addr >> 32;
576	mpt_entry.mtt_addr_l = mtt_addr >> 3;
577	mpt_entry.mem_key = mr->mr_lkey;
578
579	/*
580	 * FMR sets these to 0 for now.  Later during actual fmr registration
581	 * these values are filled in.
582	 */
583	mpt_entry.start_addr	= 0;
584	mpt_entry.reg_win_len	= 0;
585
586	/*
587	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
588	 * the entry to the hardware.  Note: in general, this operation
589	 * shouldn't fail.  But if it does, we have to undo everything we've
590	 * done above before returning error.
591	 */
592	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
593	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
594	if (status != HERMON_CMD_SUCCESS) {
595		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
596		    status);
597		if (status == HERMON_CMD_INVALID_STATUS) {
598			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
599		}
600		status = ibc_get_ci_failure(0);
601		goto fmralloc_fail4;
602	}
603
604	/*
605	 * Fill in the rest of the Hermon Memory Region handle.  Having
606	 * successfully transferred ownership of the MPT, we can update the
607	 * following fields for use in further operations on the MR.  Also, set
608	 * that this is an FMR region.
609	 */
610	mr->mr_mptrsrcp	  = mpt;
611	mr->mr_mttrsrcp	  = mtt;
612
613	mr->mr_mpt_type   = HERMON_MPT_DMPT;
614	mr->mr_pdhdl	  = pd;
615	mr->mr_rsrcp	  = rsrc;
616	mr->mr_is_fmr	  = 1;
617	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
618	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
619	mr->mr_mttaddr	   = mtt_addr;
620	(void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
621
622	/* initialize hr_addr for use during register/deregister/invalidate */
623	icm_table = &state->hs_icm[HERMON_DMPT];
624	rindx = mpt->hr_indx;
625	hermon_index(index1, index2, rindx, icm_table, i);
626	dma_info = icm_table->icm_dma[index1] + index2;
627	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
628	mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
629
630	*mrhdl = mr;
631
632	return (DDI_SUCCESS);
633
634/*
635 * The following is cleanup for all possible failure cases in this routine
636 */
637fmralloc_fail4:
638	kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
639fmralloc_fail3:
640	hermon_rsrc_free(state, &rsrc);
641fmralloc_fail2:
642	hermon_rsrc_free(state, &mpt);
643fmralloc_fail1:
644	hermon_pd_refcnt_dec(pd);
645fmralloc_fail:
646	return (status);
647}
648
649
650/*
651 * hermon_mr_register_physical_fmr()
652 *    Context: Can be called from interrupt or base context.
653 */
654/*ARGSUSED*/
655int
656hermon_mr_register_physical_fmr(hermon_state_t *state,
657    ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
658{
659	hermon_rsrc_t		*mpt;
660	uint64_t		*mpt_table;
661	int			status;
662	uint32_t		key;
663
664	mutex_enter(&mr->mr_lock);
665	mpt = mr->mr_mptrsrcp;
666	mpt_table = (uint64_t *)mpt->hr_addr;
667
668	/* Write MPT status to SW bit */
669	*(uint8_t *)mpt_table = 0xF0;
670
671	membar_producer();
672
673	/*
674	 * Write the mapped addresses into the MTT entries.  FMR needs to do
675	 * this a little differently, so we call the fmr specific fast mtt
676	 * write here.
677	 */
678	status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
679	    mem_pattr_p, mr->mr_logmttpgsz);
680	if (status != DDI_SUCCESS) {
681		mutex_exit(&mr->mr_lock);
682		status = ibc_get_ci_failure(0);
683		goto fmr_reg_fail1;
684	}
685
686	/*
687	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
688	 * from a certain number of "constrained" bits (the least significant
689	 * bits) and some number of "unconstrained" bits.  The constrained
690	 * bits must be set to the index of the entry in the MPT table, but
691	 * the unconstrained bits can be set to any value we wish.  Note:
692	 * if no remote access is required, then the RKey value is not filled
693	 * in.  Otherwise both Rkey and LKey are given the same value.
694	 */
695	key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
696	mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
697
698	/* write mem key value */
699	*(uint32_t *)&mpt_table[1] = htonl(key);
700
701	/* write length value */
702	mpt_table[3] = htonll(mem_pattr_p->pmr_len);
703
704	/* write start addr value */
705	mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
706
707	/* write lkey value */
708	*(uint32_t *)&mpt_table[4] = htonl(key);
709
710	membar_producer();
711
712	/* Write MPT status to HW bit */
713	*(uint8_t *)mpt_table = 0x00;
714
715	/* Fill in return parameters */
716	mem_desc_p->pmd_lkey = mr->mr_lkey;
717	mem_desc_p->pmd_rkey = mr->mr_rkey;
718	mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
719	mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
720
721	/* Fill in MR bindinfo struct for later sync or query operations */
722	mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
723	mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
724
725	mutex_exit(&mr->mr_lock);
726
727	return (DDI_SUCCESS);
728
729fmr_reg_fail1:
730	/*
731	 * Note, we fail here, and purposely leave the memory ownership in
732	 * software.  The memory tables may be corrupt, so we leave the region
733	 * unregistered.
734	 */
735	return (status);
736}
737
738
739/*
740 * hermon_mr_deregister()
741 *    Context: Can be called from interrupt or base context.
742 */
743/* ARGSUSED */
744int
745hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
746    uint_t sleep)
747{
748	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
749	hermon_umap_db_entry_t	*umapdb;
750	hermon_pdhdl_t		pd;
751	hermon_mrhdl_t		mr;
752	hermon_bind_info_t	*bind;
753	uint64_t		value;
754	int			status;
755	uint_t			shared_mtt;
756
757	/*
758	 * Check the sleep flag.  Ensure that it is consistent with the
759	 * current thread context (i.e. if we are currently in the interrupt
760	 * context, then we shouldn't be attempting to sleep).
761	 */
762	if ((sleep == HERMON_SLEEP) &&
763	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
764		status = IBT_INVALID_PARAM;
765		return (status);
766	}
767
768	/*
769	 * Pull all the necessary information from the Hermon Memory Region
770	 * handle.  This is necessary here because the resource for the
771	 * MR handle is going to be freed up as part of the this
772	 * deregistration
773	 */
774	mr	= *mrhdl;
775	mutex_enter(&mr->mr_lock);
776	mpt	= mr->mr_mptrsrcp;
777	mtt	= mr->mr_mttrsrcp;
778	mtt_refcnt = mr->mr_mttrefcntp;
779	rsrc	= mr->mr_rsrcp;
780	pd	= mr->mr_pdhdl;
781	bind	= &mr->mr_bindinfo;
782
783	/*
784	 * Check here if the memory region is really an FMR.  If so, this is a
785	 * bad thing and we shouldn't be here.  Return failure.
786	 */
787	if (mr->mr_is_fmr) {
788		mutex_exit(&mr->mr_lock);
789		return (IBT_INVALID_PARAM);
790	}
791
792	/*
793	 * Check here to see if the memory region has already been partially
794	 * deregistered as a result of the hermon_umap_umemlock_cb() callback.
795	 * If so, then jump to the end and free the remaining resources.
796	 */
797	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
798		goto mrdereg_finish_cleanup;
799	}
800	if (hermon_rdma_debug & 0x4)
801		IBTF_DPRINTF_L2("mr", "dereg: mr %p  key %x",
802		    mr, mr->mr_rkey);
803
804	/*
805	 * We must drop the "mr_lock" here to ensure that both SLEEP and
806	 * NOSLEEP calls into the firmware work as expected.  Also, if two
807	 * threads are attemping to access this MR (via de-register,
808	 * re-register, or otherwise), then we allow the firmware to enforce
809	 * the checking, that only one deregister is valid.
810	 */
811	mutex_exit(&mr->mr_lock);
812
813	/*
814	 * Reclaim MPT entry from hardware (if necessary).  Since the
815	 * hermon_mr_deregister() routine is used in the memory region
816	 * reregistration process as well, it is possible that we will
817	 * not always wish to reclaim ownership of the MPT.  Check the
818	 * "level" arg and, if necessary, attempt to reclaim it.  If
819	 * the ownership transfer fails for any reason, we check to see
820	 * what command status was returned from the hardware.  The only
821	 * "expected" error status is the one that indicates an attempt to
822	 * deregister a memory region that has memory windows bound to it
823	 */
824	if (level >= HERMON_MR_DEREG_ALL) {
825		if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
826			status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
827			    NULL, 0, mpt->hr_indx, sleep);
828			if (status != HERMON_CMD_SUCCESS) {
829				if (status == HERMON_CMD_REG_BOUND) {
830					return (IBT_MR_IN_USE);
831				} else {
832					cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
833					    "command failed: %08x\n", status);
834					if (status ==
835					    HERMON_CMD_INVALID_STATUS) {
836						hermon_fm_ereport(state,
837						    HCA_SYS_ERR,
838						    DDI_SERVICE_LOST);
839					}
840					return (IBT_INVALID_PARAM);
841				}
842			}
843		}
844	}
845
846	/*
847	 * Re-grab the mr_lock here.  Since further access to the protected
848	 * 'mr' structure is needed, and we would have returned previously for
849	 * the multiple deregistration case, we can safely grab the lock here.
850	 */
851	mutex_enter(&mr->mr_lock);
852
853	/*
854	 * If the memory had come from userland, then we do a lookup in the
855	 * "userland resources database".  On success, we free the entry, call
856	 * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
857	 * an indication that the umem_lockmemory() callback has called
858	 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
859	 * the "mr_umemcookie" field in the MR handle (this will be used
860	 * later to detect that only partial cleaup still remains to be done
861	 * on the MR handle).
862	 */
863	if (mr->mr_is_umem) {
864		status = hermon_umap_db_find(state->hs_instance,
865		    (uint64_t)(uintptr_t)mr->mr_umemcookie,
866		    MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
867		    &umapdb);
868		if (status == DDI_SUCCESS) {
869			hermon_umap_db_free(umapdb);
870			ddi_umem_unlock(mr->mr_umemcookie);
871		} else {
872			ddi_umem_unlock(mr->mr_umemcookie);
873			mr->mr_umemcookie = NULL;
874		}
875	}
876
877	/* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
878	if (mtt_refcnt != NULL) {
879		/*
880		 * Decrement the MTT reference count.  Since the MTT resource
881		 * may be shared between multiple memory regions (as a result
882		 * of a "RegisterSharedMR" verb) it is important that we not
883		 * free up or unbind resources prematurely.  If it's not shared
884		 * (as indicated by the return status), then free the resource.
885		 */
886		shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
887		if (!shared_mtt) {
888			hermon_rsrc_free(state, &mtt_refcnt);
889		}
890
891		/*
892		 * Free up the MTT entries and unbind the memory.  Here,
893		 * as above, we attempt to free these resources only if
894		 * it is appropriate to do so.
895		 * Note, 'bind' is NULL in the alloc_lkey case.
896		 */
897		if (!shared_mtt) {
898			if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
899				hermon_mr_mem_unbind(state, bind);
900			}
901			hermon_rsrc_free(state, &mtt);
902		}
903	}
904
905	/*
906	 * If the MR handle has been invalidated, then drop the
907	 * lock and return success.  Note: This only happens because
908	 * the umem_lockmemory() callback has been triggered.  The
909	 * cleanup here is partial, and further cleanup (in a
910	 * subsequent hermon_mr_deregister() call) will be necessary.
911	 */
912	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
913		mutex_exit(&mr->mr_lock);
914		return (DDI_SUCCESS);
915	}
916
917mrdereg_finish_cleanup:
918	mutex_exit(&mr->mr_lock);
919
920	/* Free the Hermon Memory Region handle */
921	hermon_rsrc_free(state, &rsrc);
922
923	/* Free up the MPT entry resource */
924	if (mpt != NULL)
925		hermon_rsrc_free(state, &mpt);
926
927	/* Decrement the reference count on the protection domain (PD) */
928	hermon_pd_refcnt_dec(pd);
929
930	/* Set the mrhdl pointer to NULL and return success */
931	*mrhdl = NULL;
932
933	return (DDI_SUCCESS);
934}
935
936/*
937 * hermon_mr_dealloc_fmr()
938 *    Context: Can be called from interrupt or base context.
939 */
940/* ARGSUSED */
941int
942hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
943{
944	hermon_rsrc_t		*mpt, *mtt, *rsrc;
945	hermon_pdhdl_t		pd;
946	hermon_mrhdl_t		mr;
947
948	/*
949	 * Pull all the necessary information from the Hermon Memory Region
950	 * handle.  This is necessary here because the resource for the
951	 * MR handle is going to be freed up as part of the this
952	 * deregistration
953	 */
954	mr	= *mrhdl;
955	mutex_enter(&mr->mr_lock);
956	mpt	= mr->mr_mptrsrcp;
957	mtt	= mr->mr_mttrsrcp;
958	rsrc	= mr->mr_rsrcp;
959	pd	= mr->mr_pdhdl;
960	mutex_exit(&mr->mr_lock);
961
962	/* Free the MTT entries */
963	hermon_rsrc_free(state, &mtt);
964
965	/* Free the Hermon Memory Region handle */
966	hermon_rsrc_free(state, &rsrc);
967
968	/* Free up the MPT entry resource */
969	hermon_rsrc_free(state, &mpt);
970
971	/* Decrement the reference count on the protection domain (PD) */
972	hermon_pd_refcnt_dec(pd);
973
974	/* Set the mrhdl pointer to NULL and return success */
975	*mrhdl = NULL;
976
977	return (DDI_SUCCESS);
978}
979
980
981/*
982 * hermon_mr_query()
983 *    Context: Can be called from interrupt or base context.
984 */
985/* ARGSUSED */
986int
987hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
988    ibt_mr_query_attr_t *attr)
989{
990	int			status;
991	hermon_hw_dmpt_t	mpt_entry;
992	uint32_t		lkey;
993
994	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
995
996	mutex_enter(&mr->mr_lock);
997
998	/*
999	 * Check here to see if the memory region has already been partially
1000	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1001	 * If so, this is an error, return failure.
1002	 */
1003	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1004		mutex_exit(&mr->mr_lock);
1005		return (IBT_MR_HDL_INVALID);
1006	}
1007
1008	status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1009	    mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1010	    HERMON_NOSLEEP);
1011	if (status != HERMON_CMD_SUCCESS) {
1012		cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1013		mutex_exit(&mr->mr_lock);
1014		return (ibc_get_ci_failure(0));
1015	}
1016
1017	/* Update the mr sw struct from the hw struct. */
1018	lkey = mpt_entry.mem_key;
1019	mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1020	mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1021	mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1022	mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1023	    (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1024	    (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1025	    (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1026	    (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1027	    (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1028	mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1029	    (mpt_entry.mtt_addr_l << 3);
1030	mr->mr_logmttpgsz = mpt_entry.entity_sz;
1031
1032	/* Fill in the queried attributes */
1033	attr->mr_lkey_state =
1034	    (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1035	    (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1036	    IBT_KEY_VALID;
1037	attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1038	attr->mr_attr_flags = mr->mr_accflag;
1039	attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1040
1041	/* Fill in the "local" attributes */
1042	attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1043	attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1044	attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1045
1046	/*
1047	 * Fill in the "remote" attributes (if necessary).  Note: the
1048	 * remote attributes are only valid if the memory region has one
1049	 * or more of the remote access flags set.
1050	 */
1051	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1052	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1053	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1054		attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1055		attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1056		attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1057	}
1058
1059	/*
1060	 * If region is mapped for streaming (i.e. noncoherent), then set sync
1061	 * is required
1062	 */
1063	attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1064	    IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1065
1066	mutex_exit(&mr->mr_lock);
1067	return (DDI_SUCCESS);
1068}
1069
1070
1071/*
1072 * hermon_mr_reregister()
1073 *    Context: Can be called from interrupt or base context.
1074 */
1075int
1076hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1077    hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1078    hermon_mr_options_t *op)
1079{
1080	hermon_bind_info_t	bind;
1081	int			status;
1082
1083	/*
1084	 * Fill in the "bind" struct.  This struct provides the majority
1085	 * of the information that will be used to distinguish between an
1086	 * "addr" binding (as is the case here) and a "buf" binding (see
1087	 * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
1088	 * which does most of the "heavy lifting" for the Hermon memory
1089	 * registration (and reregistration) routines.
1090	 */
1091	bind.bi_type  = HERMON_BINDHDL_VADDR;
1092	bind.bi_addr  = mr_attr->mr_vaddr;
1093	bind.bi_len   = mr_attr->mr_len;
1094	bind.bi_as    = mr_attr->mr_as;
1095	bind.bi_flags = mr_attr->mr_flags;
1096	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1097	return (status);
1098}
1099
1100
1101/*
1102 * hermon_mr_reregister_buf()
1103 *    Context: Can be called from interrupt or base context.
1104 */
1105int
1106hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1107    hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1108    hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1109{
1110	hermon_bind_info_t	bind;
1111	int			status;
1112
1113	/*
1114	 * Fill in the "bind" struct.  This struct provides the majority
1115	 * of the information that will be used to distinguish between an
1116	 * "addr" binding (see above) and a "buf" binding (as is the case
1117	 * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
1118	 * which does most of the "heavy lifting" for the Hermon memory
1119	 * registration routines.  Note: We have chosen to provide
1120	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1121	 * not set).  It is not critical what value we choose here as it need
1122	 * only be unique for the given RKey (which will happen by default),
1123	 * so the choice here is somewhat arbitrary.
1124	 */
1125	bind.bi_type  = HERMON_BINDHDL_BUF;
1126	bind.bi_buf   = buf;
1127	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1128		bind.bi_addr  = mr_attr->mr_vaddr;
1129	} else {
1130		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1131	}
1132	bind.bi_len   = (uint64_t)buf->b_bcount;
1133	bind.bi_flags = mr_attr->mr_flags;
1134	bind.bi_as    = NULL;
1135	status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1136	return (status);
1137}
1138
1139
1140/*
1141 * hermon_mr_sync()
1142 *    Context: Can be called from interrupt or base context.
1143 */
1144/* ARGSUSED */
1145int
1146hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1147{
1148	hermon_mrhdl_t		mrhdl;
1149	uint64_t		seg_vaddr, seg_len, seg_end;
1150	uint64_t		mr_start, mr_end;
1151	uint_t			type;
1152	int			status, i;
1153
1154	/* Process each of the ibt_mr_sync_t's */
1155	for (i = 0; i < num_segs; i++) {
1156		mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1157
1158		/* Check for valid memory region handle */
1159		if (mrhdl == NULL) {
1160			status = IBT_MR_HDL_INVALID;
1161			goto mrsync_fail;
1162		}
1163
1164		mutex_enter(&mrhdl->mr_lock);
1165
1166		/*
1167		 * Check here to see if the memory region has already been
1168		 * partially deregistered as a result of a
1169		 * hermon_umap_umemlock_cb() callback.  If so, this is an
1170		 * error, return failure.
1171		 */
1172		if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1173			mutex_exit(&mrhdl->mr_lock);
1174			status = IBT_MR_HDL_INVALID;
1175			goto mrsync_fail;
1176		}
1177
1178		/* Check for valid bounds on sync request */
1179		seg_vaddr = mr_segs[i].ms_vaddr;
1180		seg_len	  = mr_segs[i].ms_len;
1181		seg_end	  = seg_vaddr + seg_len - 1;
1182		mr_start  = mrhdl->mr_bindinfo.bi_addr;
1183		mr_end	  = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1184		if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1185			mutex_exit(&mrhdl->mr_lock);
1186			status = IBT_MR_VA_INVALID;
1187			goto mrsync_fail;
1188		}
1189		if ((seg_end < mr_start) || (seg_end > mr_end)) {
1190			mutex_exit(&mrhdl->mr_lock);
1191			status = IBT_MR_LEN_INVALID;
1192			goto mrsync_fail;
1193		}
1194
1195		/* Determine what type (i.e. direction) for sync */
1196		if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1197			type = DDI_DMA_SYNC_FORDEV;
1198		} else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1199			type = DDI_DMA_SYNC_FORCPU;
1200		} else {
1201			mutex_exit(&mrhdl->mr_lock);
1202			status = IBT_INVALID_PARAM;
1203			goto mrsync_fail;
1204		}
1205
1206		(void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1207		    (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1208
1209		mutex_exit(&mrhdl->mr_lock);
1210	}
1211
1212	return (DDI_SUCCESS);
1213
1214mrsync_fail:
1215	return (status);
1216}
1217
1218
1219/*
1220 * hermon_mw_alloc()
1221 *    Context: Can be called from interrupt or base context.
1222 */
1223int
1224hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1225    hermon_mwhdl_t *mwhdl)
1226{
1227	hermon_rsrc_t		*mpt, *rsrc;
1228	hermon_hw_dmpt_t		mpt_entry;
1229	hermon_mwhdl_t		mw;
1230	uint_t			sleep;
1231	int			status;
1232
1233	if (state != NULL)	/* XXX - bogus test that is always TRUE */
1234		return (IBT_INSUFF_RESOURCE);
1235
1236	/*
1237	 * Check the sleep flag.  Ensure that it is consistent with the
1238	 * current thread context (i.e. if we are currently in the interrupt
1239	 * context, then we shouldn't be attempting to sleep).
1240	 */
1241	sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1242	if ((sleep == HERMON_SLEEP) &&
1243	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1244		status = IBT_INVALID_PARAM;
1245		goto mwalloc_fail;
1246	}
1247
1248	/* Increment the reference count on the protection domain (PD) */
1249	hermon_pd_refcnt_inc(pd);
1250
1251	/*
1252	 * Allocate an MPT entry (for use as a memory window).  Since the
1253	 * Hermon hardware uses the MPT entry for memory regions and for
1254	 * memory windows, we will fill in this MPT with all the necessary
1255	 * parameters for the memory window.  And then (just as we do for
1256	 * memory regions) ownership will be passed to the hardware in the
1257	 * final step below.  If we fail here, we must undo the protection
1258	 * domain reference count.
1259	 */
1260	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1261	if (status != DDI_SUCCESS) {
1262		status = IBT_INSUFF_RESOURCE;
1263		goto mwalloc_fail1;
1264	}
1265
1266	/*
1267	 * Allocate the software structure for tracking the memory window (i.e.
1268	 * the Hermon Memory Window handle).  Note: This is actually the same
1269	 * software structure used for tracking memory regions, but since many
1270	 * of the same properties are needed, only a single structure is
1271	 * necessary.  If we fail here, we must undo the protection domain
1272	 * reference count and the previous resource allocation.
1273	 */
1274	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1275	if (status != DDI_SUCCESS) {
1276		status = IBT_INSUFF_RESOURCE;
1277		goto mwalloc_fail2;
1278	}
1279	mw = (hermon_mwhdl_t)rsrc->hr_addr;
1280	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1281
1282	/*
1283	 * Calculate an "unbound" RKey from MPT index.  In much the same way
1284	 * as we do for memory regions (above), this key is constructed from
1285	 * a "constrained" (which depends on the MPT index) and an
1286	 * "unconstrained" portion (which may be arbitrarily chosen).
1287	 */
1288	mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1289
1290	/*
1291	 * Fill in the MPT entry.  This is the final step before passing
1292	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1293	 * the information collected/calculated above to fill in the
1294	 * requisite portions of the MPT.  Note: fewer entries in the MPT
1295	 * entry are necessary to allocate a memory window.
1296	 */
1297	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1298	mpt_entry.reg_win	= HERMON_MPT_IS_WINDOW;
1299	mpt_entry.mem_key	= mw->mr_rkey;
1300	mpt_entry.pd		= pd->pd_pdnum;
1301	mpt_entry.lr		= 1;
1302
1303	/*
1304	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1305	 * the entry to the hardware.  Note: in general, this operation
1306	 * shouldn't fail.  But if it does, we have to undo everything we've
1307	 * done above before returning error.
1308	 */
1309	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1310	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1311	if (status != HERMON_CMD_SUCCESS) {
1312		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1313		    status);
1314		if (status == HERMON_CMD_INVALID_STATUS) {
1315			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1316		}
1317		status = ibc_get_ci_failure(0);
1318		goto mwalloc_fail3;
1319	}
1320
1321	/*
1322	 * Fill in the rest of the Hermon Memory Window handle.  Having
1323	 * successfully transferred ownership of the MPT, we can update the
1324	 * following fields for use in further operations on the MW.
1325	 */
1326	mw->mr_mptrsrcp	= mpt;
1327	mw->mr_pdhdl	= pd;
1328	mw->mr_rsrcp	= rsrc;
1329	mw->mr_rkey	= hermon_mr_key_swap(mw->mr_rkey);
1330	*mwhdl = mw;
1331
1332	return (DDI_SUCCESS);
1333
1334mwalloc_fail3:
1335	hermon_rsrc_free(state, &rsrc);
1336mwalloc_fail2:
1337	hermon_rsrc_free(state, &mpt);
1338mwalloc_fail1:
1339	hermon_pd_refcnt_dec(pd);
1340mwalloc_fail:
1341	return (status);
1342}
1343
1344
1345/*
1346 * hermon_mw_free()
1347 *    Context: Can be called from interrupt or base context.
1348 */
1349int
1350hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1351{
1352	hermon_rsrc_t		*mpt, *rsrc;
1353	hermon_mwhdl_t		mw;
1354	int			status;
1355	hermon_pdhdl_t		pd;
1356
1357	/*
1358	 * Check the sleep flag.  Ensure that it is consistent with the
1359	 * current thread context (i.e. if we are currently in the interrupt
1360	 * context, then we shouldn't be attempting to sleep).
1361	 */
1362	if ((sleep == HERMON_SLEEP) &&
1363	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1364		status = IBT_INVALID_PARAM;
1365		return (status);
1366	}
1367
1368	/*
1369	 * Pull all the necessary information from the Hermon Memory Window
1370	 * handle.  This is necessary here because the resource for the
1371	 * MW handle is going to be freed up as part of the this operation.
1372	 */
1373	mw	= *mwhdl;
1374	mutex_enter(&mw->mr_lock);
1375	mpt	= mw->mr_mptrsrcp;
1376	rsrc	= mw->mr_rsrcp;
1377	pd	= mw->mr_pdhdl;
1378	mutex_exit(&mw->mr_lock);
1379	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1380
1381	/*
1382	 * Reclaim the MPT entry from hardware.  Note: in general, it is
1383	 * unexpected for this operation to return an error.
1384	 */
1385	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1386	    0, mpt->hr_indx, sleep);
1387	if (status != HERMON_CMD_SUCCESS) {
1388		cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1389		    status);
1390		if (status == HERMON_CMD_INVALID_STATUS) {
1391			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1392		}
1393		return (ibc_get_ci_failure(0));
1394	}
1395
1396	/* Free the Hermon Memory Window handle */
1397	hermon_rsrc_free(state, &rsrc);
1398
1399	/* Free up the MPT entry resource */
1400	hermon_rsrc_free(state, &mpt);
1401
1402	/* Decrement the reference count on the protection domain (PD) */
1403	hermon_pd_refcnt_dec(pd);
1404
1405	/* Set the mwhdl pointer to NULL and return success */
1406	*mwhdl = NULL;
1407
1408	return (DDI_SUCCESS);
1409}
1410
1411
1412/*
1413 * hermon_mr_keycalc()
1414 *    Context: Can be called from interrupt or base context.
1415 *    NOTE:  Produces a key in the form of
1416 *		KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1417 *    where K == the arbitrary bits and I == the index
1418 */
1419uint32_t
1420hermon_mr_keycalc(uint32_t indx)
1421{
1422	uint32_t tmp_key, tmp_indx;
1423
1424	/*
1425	 * Generate a simple key from counter.  Note:  We increment this
1426	 * static variable _intentionally_ without any kind of mutex around
1427	 * it.  First, single-threading all operations through a single lock
1428	 * would be a bad idea (from a performance point-of-view).  Second,
1429	 * the upper "unconstrained" bits don't really have to be unique
1430	 * because the lower bits are guaranteed to be (although we do make a
1431	 * best effort to ensure that they are).  Third, the window for the
1432	 * race (where both threads read and update the counter at the same
1433	 * time) is incredibly small.
1434	 * And, lastly, we'd like to make this into a "random" key
1435	 */
1436	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1437	tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1438	tmp_indx = indx & 0xffffff;
1439	return (tmp_key | tmp_indx);
1440}
1441
1442
1443/*
1444 * hermon_mr_key_swap()
1445 *    Context: Can be called from interrupt or base context.
1446 *    NOTE:  Produces a key in the form of
1447 *		IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1448 *    where K == the arbitrary bits and I == the index
1449 */
1450uint32_t
1451hermon_mr_key_swap(uint32_t indx)
1452{
1453	/*
1454	 * The memory key format to pass down to the hardware is
1455	 * (key[7:0],index[23:0]), which defines the index to the
1456	 * hardware resource. When the driver passes this as a memory
1457	 * key, (i.e. to retrieve a resource) the format is
1458	 * (index[23:0],key[7:0]).
1459	 */
1460	return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1461}
1462
1463/*
1464 * hermon_mr_common_reg()
1465 *    Context: Can be called from interrupt or base context.
1466 */
1467static int
1468hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1469    hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1470    hermon_mpt_rsrc_type_t mpt_type)
1471{
1472	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
1473	hermon_umap_db_entry_t	*umapdb;
1474	hermon_sw_refcnt_t	*swrc_tmp;
1475	hermon_hw_dmpt_t	mpt_entry;
1476	hermon_mrhdl_t		mr;
1477	ibt_mr_flags_t		flags;
1478	hermon_bind_info_t	*bh;
1479	ddi_dma_handle_t	bind_dmahdl;
1480	ddi_umem_cookie_t	umem_cookie;
1481	size_t			umem_len;
1482	caddr_t			umem_addr;
1483	uint64_t		mtt_addr, max_sz;
1484	uint_t			sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1485	int			status, umem_flags, bind_override_addr;
1486
1487	/*
1488	 * Check the "options" flag.  Currently this flag tells the driver
1489	 * whether or not the region should be bound normally (i.e. with
1490	 * entries written into the PCI IOMMU), whether it should be
1491	 * registered to bypass the IOMMU, and whether or not the resulting
1492	 * address should be "zero-based" (to aid the alignment restrictions
1493	 * for QPs).
1494	 */
1495	if (op == NULL) {
1496		bind_type   = HERMON_BINDMEM_NORMAL;
1497		bind_dmahdl = NULL;
1498		bind_override_addr = 0;
1499	} else {
1500		bind_type	   = op->mro_bind_type;
1501		bind_dmahdl	   = op->mro_bind_dmahdl;
1502		bind_override_addr = op->mro_bind_override_addr;
1503	}
1504
1505	/* check what kind of mpt to use */
1506
1507	/* Extract the flags field from the hermon_bind_info_t */
1508	flags = bind->bi_flags;
1509
1510	/*
1511	 * Check for invalid length.  Check is the length is zero or if the
1512	 * length is larger than the maximum configured value.  Return error
1513	 * if it is.
1514	 */
1515	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1516	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1517		status = IBT_MR_LEN_INVALID;
1518		goto mrcommon_fail;
1519	}
1520
1521	/*
1522	 * Check the sleep flag.  Ensure that it is consistent with the
1523	 * current thread context (i.e. if we are currently in the interrupt
1524	 * context, then we shouldn't be attempting to sleep).
1525	 */
1526	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1527	if ((sleep == HERMON_SLEEP) &&
1528	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1529		status = IBT_INVALID_PARAM;
1530		goto mrcommon_fail;
1531	}
1532
1533	/* Increment the reference count on the protection domain (PD) */
1534	hermon_pd_refcnt_inc(pd);
1535
1536	/*
1537	 * Allocate an MPT entry.  This will be filled in with all the
1538	 * necessary parameters to define the memory region.  And then
1539	 * ownership will be passed to the hardware in the final step
1540	 * below.  If we fail here, we must undo the protection domain
1541	 * reference count.
1542	 */
1543	if (mpt_type == HERMON_MPT_DMPT) {
1544		status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1545		if (status != DDI_SUCCESS) {
1546			status = IBT_INSUFF_RESOURCE;
1547			goto mrcommon_fail1;
1548		}
1549	} else {
1550		mpt = NULL;
1551	}
1552
1553	/*
1554	 * Allocate the software structure for tracking the memory region (i.e.
1555	 * the Hermon Memory Region handle).  If we fail here, we must undo
1556	 * the protection domain reference count and the previous resource
1557	 * allocation.
1558	 */
1559	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1560	if (status != DDI_SUCCESS) {
1561		status = IBT_INSUFF_RESOURCE;
1562		goto mrcommon_fail2;
1563	}
1564	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1565	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1566
1567	/*
1568	 * Setup and validate the memory region access flags.  This means
1569	 * translating the IBTF's enable flags into the access flags that
1570	 * will be used in later operations.
1571	 */
1572	mr->mr_accflag = 0;
1573	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1574		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1575	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1576		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1577	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1578		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1579	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1580		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1581	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1582		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1583
1584	/*
1585	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1586	 * from a certain number of "constrained" bits (the least significant
1587	 * bits) and some number of "unconstrained" bits.  The constrained
1588	 * bits must be set to the index of the entry in the MPT table, but
1589	 * the unconstrained bits can be set to any value we wish.  Note:
1590	 * if no remote access is required, then the RKey value is not filled
1591	 * in.  Otherwise both Rkey and LKey are given the same value.
1592	 */
1593	if (mpt)
1594		mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1595
1596	/*
1597	 * Determine if the memory is from userland and pin the pages
1598	 * with umem_lockmemory() if necessary.
1599	 * Then, if this is userland memory, allocate an entry in the
1600	 * "userland resources database".  This will later be added to
1601	 * the database (after all further memory registration operations are
1602	 * successful).  If we fail here, we must undo the reference counts
1603	 * and the previous resource allocations.
1604	 */
1605	mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1606	if (mr_is_umem) {
1607		umem_len   = ptob(btopr(bind->bi_len +
1608		    ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1609		umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1610		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1611		    DDI_UMEMLOCK_LONGTERM);
1612		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1613		    &umem_cookie, &hermon_umem_cbops, NULL);
1614		if (status != 0) {
1615			status = IBT_INSUFF_RESOURCE;
1616			goto mrcommon_fail3;
1617		}
1618
1619		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1620		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1621
1622		bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1623		    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1624		if (bind->bi_buf == NULL) {
1625			status = IBT_INSUFF_RESOURCE;
1626			goto mrcommon_fail3;
1627		}
1628		bind->bi_type = HERMON_BINDHDL_UBUF;
1629		bind->bi_buf->b_flags |= B_READ;
1630
1631		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1632		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1633
1634		umapdb = hermon_umap_db_alloc(state->hs_instance,
1635		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1636		    (uint64_t)(uintptr_t)rsrc);
1637		if (umapdb == NULL) {
1638			status = IBT_INSUFF_RESOURCE;
1639			goto mrcommon_fail4;
1640		}
1641	}
1642
1643	/*
1644	 * Setup the bindinfo for the mtt bind call
1645	 */
1646	bh = &mr->mr_bindinfo;
1647	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1648	bcopy(bind, bh, sizeof (hermon_bind_info_t));
1649	bh->bi_bypass = bind_type;
1650	status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1651	    &mtt_pgsize_bits, mpt != NULL);
1652	if (status != DDI_SUCCESS) {
1653		/*
1654		 * When mtt_bind fails, freerbuf has already been done,
1655		 * so make sure not to call it again.
1656		 */
1657		bind->bi_type = bh->bi_type;
1658		goto mrcommon_fail5;
1659	}
1660	mr->mr_logmttpgsz = mtt_pgsize_bits;
1661
1662	/*
1663	 * Allocate MTT reference count (to track shared memory regions).
1664	 * This reference count resource may never be used on the given
1665	 * memory region, but if it is ever later registered as "shared"
1666	 * memory region then this resource will be necessary.  If we fail
1667	 * here, we do pretty much the same as above to clean up.
1668	 */
1669	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1670	    &mtt_refcnt);
1671	if (status != DDI_SUCCESS) {
1672		status = IBT_INSUFF_RESOURCE;
1673		goto mrcommon_fail6;
1674	}
1675	mr->mr_mttrefcntp = mtt_refcnt;
1676	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1677	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1678	HERMON_MTT_REFCNT_INIT(swrc_tmp);
1679
1680	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1681
1682	/*
1683	 * Fill in the MPT entry.  This is the final step before passing
1684	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1685	 * the information collected/calculated above to fill in the
1686	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1687	 */
1688	if (mpt == NULL)
1689		goto no_passown;
1690
1691	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1692
1693	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1694	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1695	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1696	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1697	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1698	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1699	mpt_entry.lr	  = 1;
1700	mpt_entry.phys_addr = 0;
1701	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1702
1703	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1704	mpt_entry.mem_key	= mr->mr_lkey;
1705	mpt_entry.pd		= pd->pd_pdnum;
1706	mpt_entry.rem_acc_en = 0;
1707	mpt_entry.fast_reg_en = 0;
1708	mpt_entry.en_inval = 0;
1709	mpt_entry.lkey = 0;
1710	mpt_entry.win_cnt = 0;
1711
1712	if (bind_override_addr == 0) {
1713		mpt_entry.start_addr = bh->bi_addr;
1714	} else {
1715		bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1716		mpt_entry.start_addr = bh->bi_addr;
1717	}
1718	mpt_entry.reg_win_len	= bh->bi_len;
1719
1720	mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
1721	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
1722
1723	/*
1724	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1725	 * the entry to the hardware if needed.  Note: in general, this
1726	 * operation shouldn't fail.  But if it does, we have to undo
1727	 * everything we've done above before returning error.
1728	 *
1729	 * For Hermon, this routine (which is common to the contexts) will only
1730	 * set the ownership if needed - the process of passing the context
1731	 * itself to HW will take care of setting up the MPT (based on type
1732	 * and index).
1733	 */
1734
1735	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1736	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1737	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1738	if (status != HERMON_CMD_SUCCESS) {
1739		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1740		    status);
1741		if (status == HERMON_CMD_INVALID_STATUS) {
1742			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1743		}
1744		status = ibc_get_ci_failure(0);
1745		goto mrcommon_fail7;
1746	}
1747	if (hermon_rdma_debug & 0x4)
1748		IBTF_DPRINTF_L2("mr", "  reg: mr %p  key %x",
1749		    mr, hermon_mr_key_swap(mr->mr_rkey));
1750no_passown:
1751
1752	/*
1753	 * Fill in the rest of the Hermon Memory Region handle.  Having
1754	 * successfully transferred ownership of the MPT, we can update the
1755	 * following fields for use in further operations on the MR.
1756	 */
1757	mr->mr_mttaddr	   = mtt_addr;
1758
1759	mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1760	mr->mr_mptrsrcp	   = mpt;
1761	mr->mr_mttrsrcp	   = mtt;
1762	mr->mr_pdhdl	   = pd;
1763	mr->mr_rsrcp	   = rsrc;
1764	mr->mr_is_umem	   = mr_is_umem;
1765	mr->mr_is_fmr	   = 0;
1766	mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
1767	mr->mr_umem_cbfunc = NULL;
1768	mr->mr_umem_cbarg1 = NULL;
1769	mr->mr_umem_cbarg2 = NULL;
1770	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1771	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1772	mr->mr_mpt_type	   = mpt_type;
1773
1774	/*
1775	 * If this is userland memory, then we need to insert the previously
1776	 * allocated entry into the "userland resources database".  This will
1777	 * allow for later coordination between the hermon_umap_umemlock_cb()
1778	 * callback and hermon_mr_deregister().
1779	 */
1780	if (mr_is_umem) {
1781		hermon_umap_db_add(umapdb);
1782	}
1783
1784	*mrhdl = mr;
1785
1786	return (DDI_SUCCESS);
1787
1788/*
1789 * The following is cleanup for all possible failure cases in this routine
1790 */
1791mrcommon_fail7:
1792	hermon_rsrc_free(state, &mtt_refcnt);
1793mrcommon_fail6:
1794	hermon_mr_mem_unbind(state, bh);
1795	bind->bi_type = bh->bi_type;
1796mrcommon_fail5:
1797	if (mr_is_umem) {
1798		hermon_umap_db_free(umapdb);
1799	}
1800mrcommon_fail4:
1801	if (mr_is_umem) {
1802		/*
1803		 * Free up the memory ddi_umem_iosetup() allocates
1804		 * internally.
1805		 */
1806		if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1807			freerbuf(bind->bi_buf);
1808			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1809			bind->bi_type = HERMON_BINDHDL_NONE;
1810			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1811		}
1812		ddi_umem_unlock(umem_cookie);
1813	}
1814mrcommon_fail3:
1815	hermon_rsrc_free(state, &rsrc);
1816mrcommon_fail2:
1817	if (mpt != NULL)
1818		hermon_rsrc_free(state, &mpt);
1819mrcommon_fail1:
1820	hermon_pd_refcnt_dec(pd);
1821mrcommon_fail:
1822	return (status);
1823}
1824
1825/*
1826 * hermon_dma_mr_register()
1827 *    Context: Can be called from base context.
1828 */
1829int
1830hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1831    ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1832{
1833	hermon_rsrc_t		*mpt, *rsrc;
1834	hermon_hw_dmpt_t	mpt_entry;
1835	hermon_mrhdl_t		mr;
1836	ibt_mr_flags_t		flags;
1837	uint_t			sleep;
1838	int			status;
1839
1840	/* Extract the flags field */
1841	flags = mr_attr->dmr_flags;
1842
1843	/*
1844	 * Check the sleep flag.  Ensure that it is consistent with the
1845	 * current thread context (i.e. if we are currently in the interrupt
1846	 * context, then we shouldn't be attempting to sleep).
1847	 */
1848	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1849	if ((sleep == HERMON_SLEEP) &&
1850	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1851		status = IBT_INVALID_PARAM;
1852		goto mrcommon_fail;
1853	}
1854
1855	/* Increment the reference count on the protection domain (PD) */
1856	hermon_pd_refcnt_inc(pd);
1857
1858	/*
1859	 * Allocate an MPT entry.  This will be filled in with all the
1860	 * necessary parameters to define the memory region.  And then
1861	 * ownership will be passed to the hardware in the final step
1862	 * below.  If we fail here, we must undo the protection domain
1863	 * reference count.
1864	 */
1865	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1866	if (status != DDI_SUCCESS) {
1867		status = IBT_INSUFF_RESOURCE;
1868		goto mrcommon_fail1;
1869	}
1870
1871	/*
1872	 * Allocate the software structure for tracking the memory region (i.e.
1873	 * the Hermon Memory Region handle).  If we fail here, we must undo
1874	 * the protection domain reference count and the previous resource
1875	 * allocation.
1876	 */
1877	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1878	if (status != DDI_SUCCESS) {
1879		status = IBT_INSUFF_RESOURCE;
1880		goto mrcommon_fail2;
1881	}
1882	mr = (hermon_mrhdl_t)rsrc->hr_addr;
1883	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1884	bzero(mr, sizeof (*mr));
1885
1886	/*
1887	 * Setup and validate the memory region access flags.  This means
1888	 * translating the IBTF's enable flags into the access flags that
1889	 * will be used in later operations.
1890	 */
1891	mr->mr_accflag = 0;
1892	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1893		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1894	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1895		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1896	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1897		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1898	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1899		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1900	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1901		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1902
1903	/*
1904	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1905	 * from a certain number of "constrained" bits (the least significant
1906	 * bits) and some number of "unconstrained" bits.  The constrained
1907	 * bits must be set to the index of the entry in the MPT table, but
1908	 * the unconstrained bits can be set to any value we wish.  Note:
1909	 * if no remote access is required, then the RKey value is not filled
1910	 * in.  Otherwise both Rkey and LKey are given the same value.
1911	 */
1912	if (mpt)
1913		mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1914
1915	/*
1916	 * Fill in the MPT entry.  This is the final step before passing
1917	 * ownership of the MPT entry to the Hermon hardware.  We use all of
1918	 * the information collected/calculated above to fill in the
1919	 * requisite portions of the MPT.  Do this ONLY for DMPTs.
1920	 */
1921	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1922
1923	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1924	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1925	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1926	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1927	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1928	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1929	mpt_entry.lr	  = 1;
1930	mpt_entry.phys_addr = 1;	/* critical bit for this */
1931	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1932
1933	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
1934	mpt_entry.mem_key	= mr->mr_lkey;
1935	mpt_entry.pd		= pd->pd_pdnum;
1936	mpt_entry.rem_acc_en = 0;
1937	mpt_entry.fast_reg_en = 0;
1938	mpt_entry.en_inval = 0;
1939	mpt_entry.lkey = 0;
1940	mpt_entry.win_cnt = 0;
1941
1942	mpt_entry.start_addr = mr_attr->dmr_paddr;
1943	mpt_entry.reg_win_len = mr_attr->dmr_len;
1944	if (mr_attr->dmr_len == 0)
1945		mpt_entry.len_b64 = 1;	/* needed for 2^^64 length */
1946
1947	mpt_entry.mtt_addr_h = 0;
1948	mpt_entry.mtt_addr_l = 0;
1949
1950	/*
1951	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1952	 * the entry to the hardware if needed.  Note: in general, this
1953	 * operation shouldn't fail.  But if it does, we have to undo
1954	 * everything we've done above before returning error.
1955	 *
1956	 * For Hermon, this routine (which is common to the contexts) will only
1957	 * set the ownership if needed - the process of passing the context
1958	 * itself to HW will take care of setting up the MPT (based on type
1959	 * and index).
1960	 */
1961
1962	mpt_entry.bnd_qp = 0;	/* dMPT for a qp, check for window */
1963	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1964	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1965	if (status != HERMON_CMD_SUCCESS) {
1966		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1967		    status);
1968		if (status == HERMON_CMD_INVALID_STATUS) {
1969			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1970		}
1971		status = ibc_get_ci_failure(0);
1972		goto mrcommon_fail7;
1973	}
1974
1975	/*
1976	 * Fill in the rest of the Hermon Memory Region handle.  Having
1977	 * successfully transferred ownership of the MPT, we can update the
1978	 * following fields for use in further operations on the MR.
1979	 */
1980	mr->mr_mttaddr	   = 0;
1981
1982	mr->mr_log2_pgsz   = 0;
1983	mr->mr_mptrsrcp	   = mpt;
1984	mr->mr_mttrsrcp	   = NULL;
1985	mr->mr_pdhdl	   = pd;
1986	mr->mr_rsrcp	   = rsrc;
1987	mr->mr_is_umem	   = 0;
1988	mr->mr_is_fmr	   = 0;
1989	mr->mr_umemcookie  = NULL;
1990	mr->mr_umem_cbfunc = NULL;
1991	mr->mr_umem_cbarg1 = NULL;
1992	mr->mr_umem_cbarg2 = NULL;
1993	mr->mr_lkey	   = hermon_mr_key_swap(mr->mr_lkey);
1994	mr->mr_rkey	   = hermon_mr_key_swap(mr->mr_rkey);
1995	mr->mr_mpt_type	   = HERMON_MPT_DMPT;
1996
1997	*mrhdl = mr;
1998
1999	return (DDI_SUCCESS);
2000
2001/*
2002 * The following is cleanup for all possible failure cases in this routine
2003 */
2004mrcommon_fail7:
2005	hermon_rsrc_free(state, &rsrc);
2006mrcommon_fail2:
2007	hermon_rsrc_free(state, &mpt);
2008mrcommon_fail1:
2009	hermon_pd_refcnt_dec(pd);
2010mrcommon_fail:
2011	return (status);
2012}
2013
2014/*
2015 * hermon_mr_alloc_lkey()
2016 *    Context: Can be called from base context.
2017 */
2018int
2019hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
2020    ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2021{
2022	hermon_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
2023	hermon_sw_refcnt_t	*swrc_tmp;
2024	hermon_hw_dmpt_t	mpt_entry;
2025	hermon_mrhdl_t		mr;
2026	uint64_t		mtt_addr;
2027	uint_t			sleep;
2028	int			status;
2029
2030	/* Increment the reference count on the protection domain (PD) */
2031	hermon_pd_refcnt_inc(pd);
2032
2033	sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2034
2035	/*
2036	 * Allocate an MPT entry.  This will be filled in with "some" of the
2037	 * necessary parameters to define the memory region.  And then
2038	 * ownership will be passed to the hardware in the final step
2039	 * below.  If we fail here, we must undo the protection domain
2040	 * reference count.
2041	 *
2042	 * The MTTs will get filled in when the FRWR is processed.
2043	 */
2044	status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2045	if (status != DDI_SUCCESS) {
2046		status = IBT_INSUFF_RESOURCE;
2047		goto alloclkey_fail1;
2048	}
2049
2050	/*
2051	 * Allocate the software structure for tracking the memory region (i.e.
2052	 * the Hermon Memory Region handle).  If we fail here, we must undo
2053	 * the protection domain reference count and the previous resource
2054	 * allocation.
2055	 */
2056	status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2057	if (status != DDI_SUCCESS) {
2058		status = IBT_INSUFF_RESOURCE;
2059		goto alloclkey_fail2;
2060	}
2061	mr = (hermon_mrhdl_t)rsrc->hr_addr;
2062	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2063	bzero(mr, sizeof (*mr));
2064	mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2065
2066	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2067
2068	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2069	if (status != DDI_SUCCESS) {
2070		status = IBT_INSUFF_RESOURCE;
2071		goto alloclkey_fail3;
2072	}
2073	mr->mr_logmttpgsz = PAGESHIFT;
2074
2075	/*
2076	 * Allocate MTT reference count (to track shared memory regions).
2077	 * This reference count resource may never be used on the given
2078	 * memory region, but if it is ever later registered as "shared"
2079	 * memory region then this resource will be necessary.  If we fail
2080	 * here, we do pretty much the same as above to clean up.
2081	 */
2082	status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2083	    &mtt_refcnt);
2084	if (status != DDI_SUCCESS) {
2085		status = IBT_INSUFF_RESOURCE;
2086		goto alloclkey_fail4;
2087	}
2088	mr->mr_mttrefcntp = mtt_refcnt;
2089	swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2090	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
2091	HERMON_MTT_REFCNT_INIT(swrc_tmp);
2092
2093	mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2094
2095	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2096	mpt_entry.status = HERMON_MPT_FREE;
2097	mpt_entry.lw = 1;
2098	mpt_entry.lr = 1;
2099	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2100	mpt_entry.entity_sz = mr->mr_logmttpgsz;
2101	mpt_entry.mem_key = mr->mr_lkey;
2102	mpt_entry.pd = pd->pd_pdnum;
2103	mpt_entry.fast_reg_en = 1;
2104	mpt_entry.rem_acc_en = 1;
2105	mpt_entry.en_inval = 1;
2106	if (flags & IBT_KEY_REMOTE) {
2107		mpt_entry.ren_inval = 1;
2108	}
2109	mpt_entry.mtt_size = nummtt;
2110	mpt_entry.mtt_addr_h = mtt_addr >> 32;	/* only 8 more bits */
2111	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
2112
2113	/*
2114	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
2115	 * the entry to the hardware if needed.  Note: in general, this
2116	 * operation shouldn't fail.  But if it does, we have to undo
2117	 * everything we've done above before returning error.
2118	 *
2119	 * For Hermon, this routine (which is common to the contexts) will only
2120	 * set the ownership if needed - the process of passing the context
2121	 * itself to HW will take care of setting up the MPT (based on type
2122	 * and index).
2123	 */
2124	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2125	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2126	if (status != HERMON_CMD_SUCCESS) {
2127		cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2128		    "failed: %08x\n", status);
2129		if (status == HERMON_CMD_INVALID_STATUS) {
2130			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2131		}
2132		status = ibc_get_ci_failure(0);
2133		goto alloclkey_fail5;
2134	}
2135
2136	/*
2137	 * Fill in the rest of the Hermon Memory Region handle.  Having
2138	 * successfully transferred ownership of the MPT, we can update the
2139	 * following fields for use in further operations on the MR.
2140	 */
2141	mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2142	mr->mr_mttaddr = mtt_addr;
2143	mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2144	mr->mr_mptrsrcp = mpt;
2145	mr->mr_mttrsrcp = mtt;
2146	mr->mr_pdhdl = pd;
2147	mr->mr_rsrcp = rsrc;
2148	mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2149	mr->mr_rkey = mr->mr_lkey;
2150	mr->mr_mpt_type = HERMON_MPT_DMPT;
2151
2152	*mrhdl = mr;
2153	return (DDI_SUCCESS);
2154
2155alloclkey_fail5:
2156	hermon_rsrc_free(state, &mtt_refcnt);
2157alloclkey_fail4:
2158	hermon_rsrc_free(state, &mtt);
2159alloclkey_fail3:
2160	hermon_rsrc_free(state, &rsrc);
2161alloclkey_fail2:
2162	hermon_rsrc_free(state, &mpt);
2163alloclkey_fail1:
2164	hermon_pd_refcnt_dec(pd);
2165	return (status);
2166}
2167
2168/*
2169 * hermon_mr_fexch_mpt_init()
2170 *    Context: Can be called from base context.
2171 *
2172 * This is the same as alloc_lkey, but not returning an mrhdl.
2173 */
2174int
2175hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2176    uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2177{
2178	hermon_hw_dmpt_t	mpt_entry;
2179	int			status;
2180
2181	/*
2182	 * The MTTs will get filled in when the FRWR is processed.
2183	 */
2184
2185	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2186	mpt_entry.status = HERMON_MPT_FREE;
2187	mpt_entry.lw = 1;
2188	mpt_entry.lr = 1;
2189	mpt_entry.rw = 1;
2190	mpt_entry.rr = 1;
2191	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2192	mpt_entry.entity_sz = PAGESHIFT;
2193	mpt_entry.mem_key = mpt_indx;
2194	mpt_entry.pd = pd->pd_pdnum;
2195	mpt_entry.fast_reg_en = 1;
2196	mpt_entry.rem_acc_en = 1;
2197	mpt_entry.en_inval = 1;
2198	mpt_entry.ren_inval = 1;
2199	mpt_entry.mtt_size = nummtt;
2200	mpt_entry.mtt_addr_h = mtt_addr >> 32;	/* only 8 more bits */
2201	mpt_entry.mtt_addr_l = mtt_addr >> 3;	/* only 29 bits */
2202
2203	/*
2204	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
2205	 * the entry to the hardware if needed.  Note: in general, this
2206	 * operation shouldn't fail.  But if it does, we have to undo
2207	 * everything we've done above before returning error.
2208	 *
2209	 * For Hermon, this routine (which is common to the contexts) will only
2210	 * set the ownership if needed - the process of passing the context
2211	 * itself to HW will take care of setting up the MPT (based on type
2212	 * and index).
2213	 */
2214	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2215	    sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2216	if (status != HERMON_CMD_SUCCESS) {
2217		cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2218		    "failed: %08x\n", status);
2219		if (status == HERMON_CMD_INVALID_STATUS) {
2220			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2221		}
2222		status = ibc_get_ci_failure(0);
2223		return (status);
2224	}
2225	/* Increment the reference count on the protection domain (PD) */
2226	hermon_pd_refcnt_inc(pd);
2227
2228	return (DDI_SUCCESS);
2229}
2230
2231/*
2232 * hermon_mr_fexch_mpt_fini()
2233 *    Context: Can be called from base context.
2234 *
2235 * This is the same as deregister_mr, without an mrhdl.
2236 */
2237int
2238hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2239    uint32_t mpt_indx, uint_t sleep)
2240{
2241	int			status;
2242
2243	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2244	    NULL, 0, mpt_indx, sleep);
2245	if (status != DDI_SUCCESS) {
2246		cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2247		    "failed: %08x\n", status);
2248		if (status == HERMON_CMD_INVALID_STATUS) {
2249			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2250		}
2251		status = ibc_get_ci_failure(0);
2252		return (status);
2253	}
2254
2255	/* Decrement the reference count on the protection domain (PD) */
2256	hermon_pd_refcnt_dec(pd);
2257
2258	return (DDI_SUCCESS);
2259}
2260
2261/*
2262 * hermon_mr_mtt_bind()
2263 *    Context: Can be called from interrupt or base context.
2264 */
2265int
2266hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2267    ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2268    uint_t is_buffer)
2269{
2270	uint64_t		nummtt;
2271	uint_t			sleep;
2272	int			status;
2273
2274	/*
2275	 * Check the sleep flag.  Ensure that it is consistent with the
2276	 * current thread context (i.e. if we are currently in the interrupt
2277	 * context, then we shouldn't be attempting to sleep).
2278	 */
2279	sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2280	    HERMON_NOSLEEP : HERMON_SLEEP;
2281	if ((sleep == HERMON_SLEEP) &&
2282	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2283		status = IBT_INVALID_PARAM;
2284		goto mrmttbind_fail;
2285	}
2286
2287	/*
2288	 * Bind the memory and determine the mapped addresses.  This is
2289	 * the first of two routines that do all the "heavy lifting" for
2290	 * the Hermon memory registration routines.  The hermon_mr_mem_bind()
2291	 * routine takes the "bind" struct with all its fields filled
2292	 * in and returns a list of DMA cookies (for the PCI mapped addresses
2293	 * corresponding to the specified address region) which are used by
2294	 * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
2295	 * must undo all the previous resource allocation (and PD reference
2296	 * count).
2297	 */
2298	status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2299	if (status != DDI_SUCCESS) {
2300		status = IBT_INSUFF_RESOURCE;
2301		goto mrmttbind_fail;
2302	}
2303
2304	/*
2305	 * Determine number of pages spanned.  This routine uses the
2306	 * information in the "bind" struct to determine the required
2307	 * number of MTT entries needed (and returns the suggested page size -
2308	 * as a "power-of-2" - for each MTT entry).
2309	 */
2310	nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2311
2312	/*
2313	 * Allocate the MTT entries.  Use the calculations performed above to
2314	 * allocate the required number of MTT entries. If we fail here, we
2315	 * must not only undo all the previous resource allocation (and PD
2316	 * reference count), but we must also unbind the memory.
2317	 */
2318	status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2319	if (status != DDI_SUCCESS) {
2320		status = IBT_INSUFF_RESOURCE;
2321		goto mrmttbind_fail2;
2322	}
2323
2324	/*
2325	 * Write the mapped addresses into the MTT entries.  This is part two
2326	 * of the "heavy lifting" routines that we talked about above.  Note:
2327	 * we pass the suggested page size from the earlier operation here.
2328	 * And if we fail here, we again do pretty much the same huge clean up.
2329	 */
2330	status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2331	if (status != DDI_SUCCESS) {
2332		/*
2333		 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2334		 * only if it detects a HW error during DMA.
2335		 */
2336		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2337		status = ibc_get_ci_failure(0);
2338		goto mrmttbind_fail3;
2339	}
2340	return (DDI_SUCCESS);
2341
2342/*
2343 * The following is cleanup for all possible failure cases in this routine
2344 */
2345mrmttbind_fail3:
2346	hermon_rsrc_free(state, mtt);
2347mrmttbind_fail2:
2348	hermon_mr_mem_unbind(state, bind);
2349mrmttbind_fail:
2350	return (status);
2351}
2352
2353
2354/*
2355 * hermon_mr_mtt_unbind()
2356 *    Context: Can be called from interrupt or base context.
2357 */
2358int
2359hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2360    hermon_rsrc_t *mtt)
2361{
2362	/*
2363	 * Free up the MTT entries and unbind the memory.  Here, as above, we
2364	 * attempt to free these resources only if it is appropriate to do so.
2365	 */
2366	hermon_mr_mem_unbind(state, bind);
2367	hermon_rsrc_free(state, &mtt);
2368
2369	return (DDI_SUCCESS);
2370}
2371
2372
2373/*
2374 * hermon_mr_common_rereg()
2375 *    Context: Can be called from interrupt or base context.
2376 */
2377static int
2378hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2379    hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2380    hermon_mr_options_t *op)
2381{
2382	hermon_rsrc_t		*mpt;
2383	ibt_mr_attr_flags_t	acc_flags_to_use;
2384	ibt_mr_flags_t		flags;
2385	hermon_pdhdl_t		pd_to_use;
2386	hermon_hw_dmpt_t	mpt_entry;
2387	uint64_t		mtt_addr_to_use, vaddr_to_use, len_to_use;
2388	uint_t			sleep, dereg_level;
2389	int			status;
2390
2391	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2392
2393	/*
2394	 * Check here to see if the memory region corresponds to a userland
2395	 * mapping.  Reregistration of userland memory regions is not
2396	 * currently supported.  Return failure.
2397	 */
2398	if (mr->mr_is_umem) {
2399		status = IBT_MR_HDL_INVALID;
2400		goto mrrereg_fail;
2401	}
2402
2403	mutex_enter(&mr->mr_lock);
2404
2405	/* Pull MPT resource pointer from the Hermon Memory Region handle */
2406	mpt = mr->mr_mptrsrcp;
2407
2408	/* Extract the flags field from the hermon_bind_info_t */
2409	flags = bind->bi_flags;
2410
2411	/*
2412	 * Check the sleep flag.  Ensure that it is consistent with the
2413	 * current thread context (i.e. if we are currently in the interrupt
2414	 * context, then we shouldn't be attempting to sleep).
2415	 */
2416	sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2417	if ((sleep == HERMON_SLEEP) &&
2418	    (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2419		mutex_exit(&mr->mr_lock);
2420		status = IBT_INVALID_PARAM;
2421		goto mrrereg_fail;
2422	}
2423
2424	/*
2425	 * First step is to temporarily invalidate the MPT entry.  This
2426	 * regains ownership from the hardware, and gives us the opportunity
2427	 * to modify the entry.  Note: The HW2SW_MPT command returns the
2428	 * current MPT entry contents.  These are saved away here because
2429	 * they will be reused in a later step below.  If the region has
2430	 * bound memory windows that we fail returning an "in use" error code.
2431	 * Otherwise, this is an unexpected error and we deregister the
2432	 * memory region and return error.
2433	 *
2434	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2435	 * against holding the lock around this rereg call in all contexts.
2436	 */
2437	status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2438	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2439	if (status != HERMON_CMD_SUCCESS) {
2440		mutex_exit(&mr->mr_lock);
2441		if (status == HERMON_CMD_REG_BOUND) {
2442			return (IBT_MR_IN_USE);
2443		} else {
2444			cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2445			    "%08x\n", status);
2446			if (status == HERMON_CMD_INVALID_STATUS) {
2447				hermon_fm_ereport(state, HCA_SYS_ERR,
2448				    HCA_ERR_SRV_LOST);
2449			}
2450			/*
2451			 * Call deregister and ensure that all current
2452			 * resources get freed up
2453			 */
2454			if (hermon_mr_deregister(state, &mr,
2455			    HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2456				HERMON_WARNING(state, "failed to deregister "
2457				    "memory region");
2458			}
2459			return (ibc_get_ci_failure(0));
2460		}
2461	}
2462
2463	/*
2464	 * If we're changing the protection domain, then validate the new one
2465	 */
2466	if (flags & IBT_MR_CHANGE_PD) {
2467
2468		/* Check for valid PD handle pointer */
2469		if (pd == NULL) {
2470			mutex_exit(&mr->mr_lock);
2471			/*
2472			 * Call deregister and ensure that all current
2473			 * resources get properly freed up. Unnecessary
2474			 * here to attempt to regain software ownership
2475			 * of the MPT entry as that has already been
2476			 * done above.
2477			 */
2478			if (hermon_mr_deregister(state, &mr,
2479			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2480			    DDI_SUCCESS) {
2481				HERMON_WARNING(state, "failed to deregister "
2482				    "memory region");
2483			}
2484			status = IBT_PD_HDL_INVALID;
2485			goto mrrereg_fail;
2486		}
2487
2488		/* Use the new PD handle in all operations below */
2489		pd_to_use = pd;
2490
2491	} else {
2492		/* Use the current PD handle in all operations below */
2493		pd_to_use = mr->mr_pdhdl;
2494	}
2495
2496	/*
2497	 * If we're changing access permissions, then validate the new ones
2498	 */
2499	if (flags & IBT_MR_CHANGE_ACCESS) {
2500		/*
2501		 * Validate the access flags.  Both remote write and remote
2502		 * atomic require the local write flag to be set
2503		 */
2504		if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2505		    (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2506		    !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2507			mutex_exit(&mr->mr_lock);
2508			/*
2509			 * Call deregister and ensure that all current
2510			 * resources get properly freed up. Unnecessary
2511			 * here to attempt to regain software ownership
2512			 * of the MPT entry as that has already been
2513			 * done above.
2514			 */
2515			if (hermon_mr_deregister(state, &mr,
2516			    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2517			    DDI_SUCCESS) {
2518				HERMON_WARNING(state, "failed to deregister "
2519				    "memory region");
2520			}
2521			status = IBT_MR_ACCESS_REQ_INVALID;
2522			goto mrrereg_fail;
2523		}
2524
2525		/*
2526		 * Setup and validate the memory region access flags.  This
2527		 * means translating the IBTF's enable flags into the access
2528		 * flags that will be used in later operations.
2529		 */
2530		acc_flags_to_use = 0;
2531		if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2532			acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2533		if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2534			acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2535		if (flags & IBT_MR_ENABLE_REMOTE_READ)
2536			acc_flags_to_use |= IBT_MR_REMOTE_READ;
2537		if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2538			acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2539		if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2540			acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2541
2542	} else {
2543		acc_flags_to_use = mr->mr_accflag;
2544	}
2545
2546	/*
2547	 * If we're modifying the translation, then figure out whether
2548	 * we can reuse the current MTT resources.  This means calling
2549	 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2550	 * for the reregistration.  If the current memory region contains
2551	 * sufficient MTT entries for the new regions, then it will be
2552	 * reused and filled in.  Otherwise, new entries will be allocated,
2553	 * the old ones will be freed, and the new entries will be filled
2554	 * in.  Note:  If we're not modifying the translation, then we
2555	 * should already have all the information we need to update the MPT.
2556	 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2557	 * a "dereg_level" which is the level of cleanup that needs to be
2558	 * passed to hermon_mr_deregister() to finish the cleanup.
2559	 */
2560	if (flags & IBT_MR_CHANGE_TRANSLATION) {
2561		status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2562		    &mtt_addr_to_use, sleep, &dereg_level);
2563		if (status != DDI_SUCCESS) {
2564			mutex_exit(&mr->mr_lock);
2565			/*
2566			 * Call deregister and ensure that all resources get
2567			 * properly freed up.
2568			 */
2569			if (hermon_mr_deregister(state, &mr, dereg_level,
2570			    sleep) != DDI_SUCCESS) {
2571				HERMON_WARNING(state, "failed to deregister "
2572				    "memory region");
2573			}
2574			goto mrrereg_fail;
2575		}
2576		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2577		len_to_use   = mr->mr_bindinfo.bi_len;
2578	} else {
2579		mtt_addr_to_use = mr->mr_mttaddr;
2580		vaddr_to_use = mr->mr_bindinfo.bi_addr;
2581		len_to_use   = mr->mr_bindinfo.bi_len;
2582	}
2583
2584	/*
2585	 * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
2586	 * when the region was first registered, each key is formed from
2587	 * "constrained" bits and "unconstrained" bits.  Note:  If no remote
2588	 * access is required, then the RKey value is not filled in.  Otherwise
2589	 * both Rkey and LKey are given the same value.
2590	 */
2591	mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2592	if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2593	    (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2594	    (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2595		mr->mr_rkey = mr->mr_lkey;
2596	} else
2597		mr->mr_rkey = 0;
2598
2599	/*
2600	 * Fill in the MPT entry.  This is the final step before passing
2601	 * ownership of the MPT entry to the Hermon hardware.  We use all of
2602	 * the information collected/calculated above to fill in the
2603	 * requisite portions of the MPT.
2604	 */
2605	bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2606
2607	mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
2608	mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
2609	mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2610	mpt_entry.rw	  = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
2611	mpt_entry.rr	  = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
2612	mpt_entry.lw	  = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
2613	mpt_entry.lr	  = 1;
2614	mpt_entry.phys_addr = 0;
2615	mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2616
2617	mpt_entry.entity_sz	= mr->mr_logmttpgsz;
2618	mpt_entry.mem_key	= mr->mr_lkey;
2619	mpt_entry.pd		= pd_to_use->pd_pdnum;
2620
2621	mpt_entry.start_addr	= vaddr_to_use;
2622	mpt_entry.reg_win_len	= len_to_use;
2623	mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2624	mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2625
2626	/*
2627	 * Write the updated MPT entry to hardware
2628	 *
2629	 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2630	 * against holding the lock around this rereg call in all contexts.
2631	 */
2632	status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2633	    sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2634	if (status != HERMON_CMD_SUCCESS) {
2635		mutex_exit(&mr->mr_lock);
2636		cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2637		    status);
2638		if (status == HERMON_CMD_INVALID_STATUS) {
2639			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2640		}
2641		/*
2642		 * Call deregister and ensure that all current resources get
2643		 * properly freed up. Unnecessary here to attempt to regain
2644		 * software ownership of the MPT entry as that has already
2645		 * been done above.
2646		 */
2647		if (hermon_mr_deregister(state, &mr,
2648		    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2649			HERMON_WARNING(state, "failed to deregister memory "
2650			    "region");
2651		}
2652		return (ibc_get_ci_failure(0));
2653	}
2654
2655	/*
2656	 * If we're changing PD, then update their reference counts now.
2657	 * This means decrementing the reference count on the old PD and
2658	 * incrementing the reference count on the new PD.
2659	 */
2660	if (flags & IBT_MR_CHANGE_PD) {
2661		hermon_pd_refcnt_dec(mr->mr_pdhdl);
2662		hermon_pd_refcnt_inc(pd);
2663	}
2664
2665	/*
2666	 * Update the contents of the Hermon Memory Region handle to reflect
2667	 * what has been changed.
2668	 */
2669	mr->mr_pdhdl	  = pd_to_use;
2670	mr->mr_accflag	  = acc_flags_to_use;
2671	mr->mr_is_umem	  = 0;
2672	mr->mr_is_fmr	  = 0;
2673	mr->mr_umemcookie = NULL;
2674	mr->mr_lkey	  = hermon_mr_key_swap(mr->mr_lkey);
2675	mr->mr_rkey	  = hermon_mr_key_swap(mr->mr_rkey);
2676
2677	/* New MR handle is same as the old */
2678	*mrhdl_new = mr;
2679	mutex_exit(&mr->mr_lock);
2680
2681	return (DDI_SUCCESS);
2682
2683mrrereg_fail:
2684	return (status);
2685}
2686
2687
2688/*
2689 * hermon_mr_rereg_xlat_helper
2690 *    Context: Can be called from interrupt or base context.
2691 *    Note: This routine expects the "mr_lock" to be held when it
2692 *    is called.  Upon returning failure, this routine passes information
2693 *    about what "dereg_level" should be passed to hermon_mr_deregister().
2694 */
2695static int
2696hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2697    hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2698    uint_t sleep, uint_t *dereg_level)
2699{
2700	hermon_rsrc_t		*mtt, *mtt_refcnt;
2701	hermon_sw_refcnt_t	*swrc_old, *swrc_new;
2702	ddi_dma_handle_t	dmahdl;
2703	uint64_t		nummtt_needed, nummtt_in_currrsrc, max_sz;
2704	uint_t			mtt_pgsize_bits, bind_type, reuse_dmahdl;
2705	int			status;
2706
2707	ASSERT(MUTEX_HELD(&mr->mr_lock));
2708
2709	/*
2710	 * Check the "options" flag.  Currently this flag tells the driver
2711	 * whether or not the region should be bound normally (i.e. with
2712	 * entries written into the PCI IOMMU) or whether it should be
2713	 * registered to bypass the IOMMU.
2714	 */
2715	if (op == NULL) {
2716		bind_type = HERMON_BINDMEM_NORMAL;
2717	} else {
2718		bind_type = op->mro_bind_type;
2719	}
2720
2721	/*
2722	 * Check for invalid length.  Check is the length is zero or if the
2723	 * length is larger than the maximum configured value.  Return error
2724	 * if it is.
2725	 */
2726	max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2727	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2728		/*
2729		 * Deregister will be called upon returning failure from this
2730		 * routine. This will ensure that all current resources get
2731		 * properly freed up. Unnecessary to attempt to regain
2732		 * software ownership of the MPT entry as that has already
2733		 * been done above (in hermon_mr_reregister())
2734		 */
2735		*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2736
2737		status = IBT_MR_LEN_INVALID;
2738		goto mrrereghelp_fail;
2739	}
2740
2741	/*
2742	 * Determine the number of pages necessary for new region and the
2743	 * number of pages supported by the current MTT resources
2744	 */
2745	nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2746	nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2747
2748	/*
2749	 * Depending on whether we have enough pages or not, the next step is
2750	 * to fill in a set of MTT entries that reflect the new mapping.  In
2751	 * the first case below, we already have enough entries.  This means
2752	 * we need to unbind the memory from the previous mapping, bind the
2753	 * memory for the new mapping, write the new MTT entries, and update
2754	 * the mr to reflect the changes.
2755	 * In the second case below, we do not have enough entries in the
2756	 * current mapping.  So, in this case, we need not only to unbind the
2757	 * current mapping, but we need to free up the MTT resources associated
2758	 * with that mapping.  After we've successfully done that, we continue
2759	 * by binding the new memory, allocating new MTT entries, writing the
2760	 * new MTT entries, and updating the mr to reflect the changes.
2761	 */
2762
2763	/*
2764	 * If this region is being shared (i.e. MTT refcount != 1), then we
2765	 * can't reuse the current MTT resources regardless of their size.
2766	 * Instead we'll need to alloc new ones (below) just as if there
2767	 * hadn't been enough room in the current entries.
2768	 */
2769	swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2770	if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2771	    (nummtt_needed <= nummtt_in_currrsrc)) {
2772
2773		/*
2774		 * Unbind the old mapping for this memory region, but retain
2775		 * the ddi_dma_handle_t (if possible) for reuse in the bind
2776		 * operation below.  Note:  If original memory region was
2777		 * bound for IOMMU bypass and the new region can not use
2778		 * bypass, then a new DMA handle will be necessary.
2779		 */
2780		if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2781			mr->mr_bindinfo.bi_free_dmahdl = 0;
2782			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2783			dmahdl = mr->mr_bindinfo.bi_dmahdl;
2784			reuse_dmahdl = 1;
2785		} else {
2786			hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2787			dmahdl = NULL;
2788			reuse_dmahdl = 0;
2789		}
2790
2791		/*
2792		 * Bind the new memory and determine the mapped addresses.
2793		 * As described, this routine and hermon_mr_fast_mtt_write()
2794		 * do the majority of the work for the memory registration
2795		 * operations.  Note:  When we successfully finish the binding,
2796		 * we will set the "bi_free_dmahdl" flag to indicate that
2797		 * even though we may have reused the ddi_dma_handle_t we do
2798		 * wish it to be freed up at some later time.  Note also that
2799		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2800		 */
2801		bind->bi_bypass	= bind_type;
2802		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2803		if (status != DDI_SUCCESS) {
2804			if (reuse_dmahdl) {
2805				ddi_dma_free_handle(&dmahdl);
2806			}
2807
2808			/*
2809			 * Deregister will be called upon returning failure
2810			 * from this routine. This will ensure that all
2811			 * current resources get properly freed up.
2812			 * Unnecessary to attempt to regain software ownership
2813			 * of the MPT entry as that has already been done
2814			 * above (in hermon_mr_reregister()).  Also unnecessary
2815			 * to attempt to unbind the memory.
2816			 */
2817			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2818
2819			status = IBT_INSUFF_RESOURCE;
2820			goto mrrereghelp_fail;
2821		}
2822		if (reuse_dmahdl) {
2823			bind->bi_free_dmahdl = 1;
2824		}
2825
2826		/*
2827		 * Using the new mapping, but reusing the current MTT
2828		 * resources, write the updated entries to MTT
2829		 */
2830		mtt    = mr->mr_mttrsrcp;
2831		status = hermon_mr_fast_mtt_write(state, mtt, bind,
2832		    mtt_pgsize_bits);
2833		if (status != DDI_SUCCESS) {
2834			/*
2835			 * Deregister will be called upon returning failure
2836			 * from this routine. This will ensure that all
2837			 * current resources get properly freed up.
2838			 * Unnecessary to attempt to regain software ownership
2839			 * of the MPT entry as that has already been done
2840			 * above (in hermon_mr_reregister()).  Also unnecessary
2841			 * to attempt to unbind the memory.
2842			 *
2843			 * But we do need to unbind the newly bound memory
2844			 * before returning.
2845			 */
2846			hermon_mr_mem_unbind(state, bind);
2847			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2848
2849			/*
2850			 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2851			 * only if it detects a HW error during DMA.
2852			 */
2853			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2854			status = ibc_get_ci_failure(0);
2855			goto mrrereghelp_fail;
2856		}
2857
2858		/* Put the updated information into the Mem Region handle */
2859		mr->mr_bindinfo	  = *bind;
2860		mr->mr_logmttpgsz = mtt_pgsize_bits;
2861
2862	} else {
2863		/*
2864		 * Check if the memory region MTT is shared by any other MRs.
2865		 * Since the resource may be shared between multiple memory
2866		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2867		 * important that we not unbind any resources prematurely.
2868		 */
2869		if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2870			/*
2871			 * Unbind the old mapping for this memory region, but
2872			 * retain the ddi_dma_handle_t for reuse in the bind
2873			 * operation below. Note: This can only be done here
2874			 * because the region being reregistered is not
2875			 * currently shared.  Also if original memory region
2876			 * was bound for IOMMU bypass and the new region can
2877			 * not use bypass, then a new DMA handle will be
2878			 * necessary.
2879			 */
2880			if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2881				mr->mr_bindinfo.bi_free_dmahdl = 0;
2882				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2883				dmahdl = mr->mr_bindinfo.bi_dmahdl;
2884				reuse_dmahdl = 1;
2885			} else {
2886				hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2887				dmahdl = NULL;
2888				reuse_dmahdl = 0;
2889			}
2890		} else {
2891			dmahdl = NULL;
2892			reuse_dmahdl = 0;
2893		}
2894
2895		/*
2896		 * Bind the new memory and determine the mapped addresses.
2897		 * As described, this routine and hermon_mr_fast_mtt_write()
2898		 * do the majority of the work for the memory registration
2899		 * operations.  Note:  When we successfully finish the binding,
2900		 * we will set the "bi_free_dmahdl" flag to indicate that
2901		 * even though we may have reused the ddi_dma_handle_t we do
2902		 * wish it to be freed up at some later time.  Note also that
2903		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2904		 */
2905		bind->bi_bypass	= bind_type;
2906		status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2907		if (status != DDI_SUCCESS) {
2908			if (reuse_dmahdl) {
2909				ddi_dma_free_handle(&dmahdl);
2910			}
2911
2912			/*
2913			 * Deregister will be called upon returning failure
2914			 * from this routine. This will ensure that all
2915			 * current resources get properly freed up.
2916			 * Unnecessary to attempt to regain software ownership
2917			 * of the MPT entry as that has already been done
2918			 * above (in hermon_mr_reregister()).  Also unnecessary
2919			 * to attempt to unbind the memory.
2920			 */
2921			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2922
2923			status = IBT_INSUFF_RESOURCE;
2924			goto mrrereghelp_fail;
2925		}
2926		if (reuse_dmahdl) {
2927			bind->bi_free_dmahdl = 1;
2928		}
2929
2930		/*
2931		 * Allocate the new MTT entries resource
2932		 */
2933		status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2934		    sleep, &mtt);
2935		if (status != DDI_SUCCESS) {
2936			/*
2937			 * Deregister will be called upon returning failure
2938			 * from this routine. This will ensure that all
2939			 * current resources get properly freed up.
2940			 * Unnecessary to attempt to regain software ownership
2941			 * of the MPT entry as that has already been done
2942			 * above (in hermon_mr_reregister()).  Also unnecessary
2943			 * to attempt to unbind the memory.
2944			 *
2945			 * But we do need to unbind the newly bound memory
2946			 * before returning.
2947			 */
2948			hermon_mr_mem_unbind(state, bind);
2949			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2950
2951			status = IBT_INSUFF_RESOURCE;
2952			goto mrrereghelp_fail;
2953		}
2954
2955		/*
2956		 * Allocate MTT reference count (to track shared memory
2957		 * regions).  As mentioned elsewhere above, this reference
2958		 * count resource may never be used on the given memory region,
2959		 * but if it is ever later registered as a "shared" memory
2960		 * region then this resource will be necessary.  Note:  This
2961		 * is only necessary here if the existing memory region is
2962		 * already being shared (because otherwise we already have
2963		 * a useable reference count resource).
2964		 */
2965		if (HERMON_MTT_IS_SHARED(swrc_old)) {
2966			status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2967			    sleep, &mtt_refcnt);
2968			if (status != DDI_SUCCESS) {
2969				/*
2970				 * Deregister will be called upon returning
2971				 * failure from this routine. This will ensure
2972				 * that all current resources get properly
2973				 * freed up.  Unnecessary to attempt to regain
2974				 * software ownership of the MPT entry as that
2975				 * has already been done above (in
2976				 * hermon_mr_reregister()).  Also unnecessary
2977				 * to attempt to unbind the memory.
2978				 *
2979				 * But we need to unbind the newly bound
2980				 * memory and free up the newly allocated MTT
2981				 * entries before returning.
2982				 */
2983				hermon_mr_mem_unbind(state, bind);
2984				hermon_rsrc_free(state, &mtt);
2985				*dereg_level =
2986				    HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2987
2988				status = IBT_INSUFF_RESOURCE;
2989				goto mrrereghelp_fail;
2990			}
2991			swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2992			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2993			HERMON_MTT_REFCNT_INIT(swrc_new);
2994		} else {
2995			mtt_refcnt = mr->mr_mttrefcntp;
2996		}
2997
2998		/*
2999		 * Using the new mapping and the new MTT resources, write the
3000		 * updated entries to MTT
3001		 */
3002		status = hermon_mr_fast_mtt_write(state, mtt, bind,
3003		    mtt_pgsize_bits);
3004		if (status != DDI_SUCCESS) {
3005			/*
3006			 * Deregister will be called upon returning failure
3007			 * from this routine. This will ensure that all
3008			 * current resources get properly freed up.
3009			 * Unnecessary to attempt to regain software ownership
3010			 * of the MPT entry as that has already been done
3011			 * above (in hermon_mr_reregister()).  Also unnecessary
3012			 * to attempt to unbind the memory.
3013			 *
3014			 * But we need to unbind the newly bound memory,
3015			 * free up the newly allocated MTT entries, and
3016			 * (possibly) free the new MTT reference count
3017			 * resource before returning.
3018			 */
3019			if (HERMON_MTT_IS_SHARED(swrc_old)) {
3020				hermon_rsrc_free(state, &mtt_refcnt);
3021			}
3022			hermon_mr_mem_unbind(state, bind);
3023			hermon_rsrc_free(state, &mtt);
3024			*dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
3025
3026			status = IBT_INSUFF_RESOURCE;
3027			goto mrrereghelp_fail;
3028		}
3029
3030		/*
3031		 * Check if the memory region MTT is shared by any other MRs.
3032		 * Since the resource may be shared between multiple memory
3033		 * regions (as a result of a "RegisterSharedMR()" verb) it is
3034		 * important that we not free up any resources prematurely.
3035		 */
3036		if (HERMON_MTT_IS_SHARED(swrc_old)) {
3037			/* Decrement MTT reference count for "old" region */
3038			(void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3039		} else {
3040			/* Free up the old MTT entries resource */
3041			hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3042		}
3043
3044		/* Put the updated information into the mrhdl */
3045		mr->mr_bindinfo	  = *bind;
3046		mr->mr_logmttpgsz = mtt_pgsize_bits;
3047		mr->mr_mttrsrcp   = mtt;
3048		mr->mr_mttrefcntp = mtt_refcnt;
3049	}
3050
3051	/*
3052	 * Calculate and return the updated MTT address (in the DDR address
3053	 * space).  This will be used by the caller (hermon_mr_reregister) in
3054	 * the updated MPT entry
3055	 */
3056	*mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3057
3058	return (DDI_SUCCESS);
3059
3060mrrereghelp_fail:
3061	return (status);
3062}
3063
3064
3065/*
3066 * hermon_mr_nummtt_needed()
3067 *    Context: Can be called from interrupt or base context.
3068 */
3069/* ARGSUSED */
3070static uint64_t
3071hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3072    uint_t *mtt_pgsize_bits)
3073{
3074	uint64_t	pg_offset_mask;
3075	uint64_t	pg_offset, tmp_length;
3076
3077	/*
3078	 * For now we specify the page size as 8Kb (the default page size for
3079	 * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
3080	 * size by examining the dmacookies
3081	 */
3082	*mtt_pgsize_bits = PAGESHIFT;
3083
3084	pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3085	pg_offset = bind->bi_addr & pg_offset_mask;
3086	tmp_length = pg_offset + (bind->bi_len - 1);
3087	return ((tmp_length >> *mtt_pgsize_bits) + 1);
3088}
3089
3090
3091/*
3092 * hermon_mr_mem_bind()
3093 *    Context: Can be called from interrupt or base context.
3094 */
3095static int
3096hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3097    ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3098{
3099	ddi_dma_attr_t	dma_attr;
3100	int		(*callback)(caddr_t);
3101	int		status;
3102
3103	/* bi_type must be set to a meaningful value to get a bind handle */
3104	ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3105	    bind->bi_type == HERMON_BINDHDL_BUF ||
3106	    bind->bi_type == HERMON_BINDHDL_UBUF);
3107
3108	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3109
3110	/* Set the callback flag appropriately */
3111	callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3112
3113	/*
3114	 * Initialize many of the default DMA attributes.  Then, if we're
3115	 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3116	 */
3117	if (dmahdl == NULL) {
3118		hermon_dma_attr_init(state, &dma_attr);
3119#ifdef	__sparc
3120		if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3121			dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3122		}
3123#endif
3124
3125		/* set RO if needed - tunable set and 'is_buffer' is non-0 */
3126		if (is_buffer) {
3127			if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3128				if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3129				    (hermon_kernel_data_ro ==
3130				    HERMON_RO_ENABLED)) {
3131					dma_attr.dma_attr_flags |=
3132					    DDI_DMA_RELAXED_ORDERING;
3133				}
3134				if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3135				    (hermon_user_data_ro ==
3136				    HERMON_RO_ENABLED))) {
3137					dma_attr.dma_attr_flags |=
3138					    DDI_DMA_RELAXED_ORDERING;
3139				}
3140			}
3141		}
3142
3143		/* Allocate a DMA handle for the binding */
3144		status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3145		    callback, NULL, &bind->bi_dmahdl);
3146		if (status != DDI_SUCCESS) {
3147			return (status);
3148		}
3149		bind->bi_free_dmahdl = 1;
3150
3151	} else  {
3152		bind->bi_dmahdl = dmahdl;
3153		bind->bi_free_dmahdl = 0;
3154	}
3155
3156
3157	/*
3158	 * Bind the memory to get the PCI mapped addresses.  The decision
3159	 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3160	 * is determined by the "bi_type" flag.  Note: if the bind operation
3161	 * fails then we have to free up the DMA handle and return error.
3162	 */
3163	if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3164		status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3165		    (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3166		    (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3167		    &bind->bi_dmacookie, &bind->bi_cookiecnt);
3168
3169	} else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3170
3171		status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3172		    bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3173		    NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3174	}
3175	if (status != DDI_DMA_MAPPED) {
3176		if (bind->bi_free_dmahdl != 0) {
3177			ddi_dma_free_handle(&bind->bi_dmahdl);
3178		}
3179		return (status);
3180	}
3181
3182	return (DDI_SUCCESS);
3183}
3184
3185
3186/*
3187 * hermon_mr_mem_unbind()
3188 *    Context: Can be called from interrupt or base context.
3189 */
3190static void
3191hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3192{
3193	int	status;
3194
3195	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3196	/* there is nothing to unbind for alloc_lkey */
3197	if (bind->bi_type == HERMON_BINDHDL_LKEY)
3198		return;
3199
3200	/*
3201	 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3202	 * is actually allocated by ddi_umem_iosetup() internally, then
3203	 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3204	 * not to free it again later.
3205	 */
3206	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3207	if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3208		freerbuf(bind->bi_buf);
3209		bind->bi_type = HERMON_BINDHDL_NONE;
3210	}
3211	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
3212
3213	/*
3214	 * Unbind the DMA memory for the region
3215	 *
3216	 * Note: The only way ddi_dma_unbind_handle() currently
3217	 * can return an error is if the handle passed in is invalid.
3218	 * Since this should never happen, we choose to return void
3219	 * from this function!  If this does return an error, however,
3220	 * then we print a warning message to the console.
3221	 */
3222	status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3223	if (status != DDI_SUCCESS) {
3224		HERMON_WARNING(state, "failed to unbind DMA mapping");
3225		return;
3226	}
3227
3228	/* Free up the DMA handle */
3229	if (bind->bi_free_dmahdl != 0) {
3230		ddi_dma_free_handle(&bind->bi_dmahdl);
3231	}
3232}
3233
3234
3235/*
3236 * hermon_mr_fast_mtt_write()
3237 *    Context: Can be called from interrupt or base context.
3238 */
3239static int
3240hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3241    hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3242{
3243	hermon_icm_table_t	*icm_table;
3244	hermon_dma_info_t	*dma_info;
3245	uint32_t		index1, index2, rindx;
3246	ddi_dma_cookie_t	dmacookie;
3247	uint_t			cookie_cnt;
3248	uint64_t		*mtt_table;
3249	uint64_t		mtt_entry;
3250	uint64_t		addr, endaddr;
3251	uint64_t		pagesize;
3252	offset_t		i, start;
3253	uint_t			per_span;
3254	int			sync_needed;
3255
3256	/*
3257	 * XXX According to the PRM, we are to use the WRITE_MTT
3258	 * command to write out MTTs. Tavor does not do this,
3259	 * instead taking advantage of direct access to the MTTs,
3260	 * and knowledge that Mellanox FMR relies on our ability
3261	 * to write directly to the MTTs without any further
3262	 * notification to the firmware. Likewise, we will choose
3263	 * to not use the WRITE_MTT command, but to simply write
3264	 * out the MTTs.
3265	 */
3266
3267	/* Calculate page size from the suggested value passed in */
3268	pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3269
3270	/* Walk the "cookie list" and fill in the MTT table entries */
3271	dmacookie  = bind->bi_dmacookie;
3272	cookie_cnt = bind->bi_cookiecnt;
3273
3274	icm_table = &state->hs_icm[HERMON_MTT];
3275	rindx = mtt->hr_indx;
3276	hermon_index(index1, index2, rindx, icm_table, i);
3277	start = i;
3278
3279	per_span   = icm_table->span;
3280	dma_info   = icm_table->icm_dma[index1] + index2;
3281	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3282
3283	sync_needed = 0;
3284	while (cookie_cnt-- > 0) {
3285		addr    = dmacookie.dmac_laddress;
3286		endaddr = addr + (dmacookie.dmac_size - 1);
3287		addr    = addr & ~((uint64_t)pagesize - 1);
3288
3289		while (addr <= endaddr) {
3290
3291			/*
3292			 * Fill in the mapped addresses (calculated above) and
3293			 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3294			 */
3295			mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3296			mtt_table[i] = htonll(mtt_entry);
3297			i++;
3298			rindx++;
3299
3300			if (i == per_span) {
3301
3302				(void) ddi_dma_sync(dma_info->dma_hdl,
3303				    start * sizeof (hermon_hw_mtt_t),
3304				    (i - start) * sizeof (hermon_hw_mtt_t),
3305				    DDI_DMA_SYNC_FORDEV);
3306
3307				if ((addr + pagesize > endaddr) &&
3308				    (cookie_cnt == 0))
3309					return (DDI_SUCCESS);
3310
3311				hermon_index(index1, index2, rindx, icm_table,
3312				    i);
3313				start = i * sizeof (hermon_hw_mtt_t);
3314				dma_info = icm_table->icm_dma[index1] + index2;
3315				mtt_table =
3316				    (uint64_t *)(uintptr_t)dma_info->vaddr;
3317
3318				sync_needed = 0;
3319			} else {
3320				sync_needed = 1;
3321			}
3322
3323			addr += pagesize;
3324			if (addr == 0) {
3325				static int do_once = 1;
3326				_NOTE(SCHEME_PROTECTS_DATA("safe sharing",
3327				    do_once))
3328				if (do_once) {
3329					do_once = 0;
3330					cmn_err(CE_NOTE, "probable error in "
3331					    "dma_cookie address from caller\n");
3332				}
3333				break;
3334			}
3335		}
3336
3337		/*
3338		 * When we've reached the end of the current DMA cookie,
3339		 * jump to the next cookie (if there are more)
3340		 */
3341		if (cookie_cnt != 0) {
3342			ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3343		}
3344	}
3345
3346	/* done all the cookies, now sync the memory for the device */
3347	if (sync_needed)
3348		(void) ddi_dma_sync(dma_info->dma_hdl,
3349		    start * sizeof (hermon_hw_mtt_t),
3350		    (i - start) * sizeof (hermon_hw_mtt_t),
3351		    DDI_DMA_SYNC_FORDEV);
3352
3353	return (DDI_SUCCESS);
3354}
3355
3356/*
3357 * hermon_mr_fast_mtt_write_fmr()
3358 *    Context: Can be called from interrupt or base context.
3359 */
3360/* ARGSUSED */
3361static int
3362hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3363    ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3364{
3365	hermon_icm_table_t	*icm_table;
3366	hermon_dma_info_t	*dma_info;
3367	uint32_t		index1, index2, rindx;
3368	uint64_t		*mtt_table;
3369	offset_t		i, j;
3370	uint_t			per_span;
3371
3372	icm_table = &state->hs_icm[HERMON_MTT];
3373	rindx = mtt->hr_indx;
3374	hermon_index(index1, index2, rindx, icm_table, i);
3375	per_span   = icm_table->span;
3376	dma_info   = icm_table->icm_dma[index1] + index2;
3377	mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3378
3379	/*
3380	 * Fill in the MTT table entries
3381	 */
3382	for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3383		mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3384		i++;
3385		rindx++;
3386		if (i == per_span) {
3387			hermon_index(index1, index2, rindx, icm_table, i);
3388			dma_info = icm_table->icm_dma[index1] + index2;
3389			mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3390		}
3391	}
3392
3393	return (DDI_SUCCESS);
3394}
3395
3396
3397/*
3398 * hermon_mtt_refcnt_inc()
3399 *    Context: Can be called from interrupt or base context.
3400 */
3401static uint_t
3402hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3403{
3404	hermon_sw_refcnt_t *rc;
3405
3406	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3407	return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3408}
3409
3410
3411/*
3412 * hermon_mtt_refcnt_dec()
3413 *    Context: Can be called from interrupt or base context.
3414 */
3415static uint_t
3416hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3417{
3418	hermon_sw_refcnt_t *rc;
3419
3420	rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3421	return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3422}
3423