tavor_mr.h revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#ifndef	_SYS_IB_ADAPTERS_TAVOR_MR_H
28#define	_SYS_IB_ADAPTERS_TAVOR_MR_H
29
30/*
31 * tavor_mr.h
32 *    Contains all of the prototypes, #defines, and structures necessary
33 *    for the Tavor Memory Region/Window routines.
34 *    Specifically it contains #defines, macros, and prototypes for each of
35 *    the required memory region/window verbs that can be accessed through
36 *    the IBTF's CI interfaces.  In particular each of the prototypes defined
37 *    below is called from a corresponding CI interface routine (as specified
38 *    in the tavor_ci.c file).
39 */
40
41#include <sys/types.h>
42#include <sys/conf.h>
43#include <sys/ddi.h>
44#include <sys/sunddi.h>
45
46#ifdef __cplusplus
47extern "C" {
48#endif
49
50/*
51 * The following defines specify the default number of MPT entries and their
52 * individual entry size.  Settings exist for the supported DDR DIMM sizes of
53 * 128MB and 256MB.  If a DIMM greater than 256 is found, then the 256MB
54 * profile is used.  See tavor_cfg.c for more discussion on config profiles.
55 *
56 * For manual configuration (not using config profiles), this value is
57 * controllable through the "tavor_log_num_mpt" configuration variable.  To
58 * override config profile settings the 'tavor_alt_config_enable' configuration
59 * variable must first be set.
60 */
61#define	TAVOR_NUM_MPT_SHIFT_128		0x14
62#define	TAVOR_NUM_MPT_SHIFT_256		0x15
63#define	TAVOR_MPT_SIZE_SHIFT		0x6
64#define	TAVOR_MPT_SIZE			(1 << TAVOR_MPT_SIZE_SHIFT)
65
66/*
67 * Minimal configuration value.
68 */
69#define	TAVOR_NUM_MPT_SHIFT_MIN		0xD
70
71/*
72 * The following defines specify the size of each individual MTT entry and
73 * the number of MTT entries that make up an MTT segment (TAVOR_MTTSEG_SIZE)
74 */
75#define	TAVOR_MTT_SIZE_SHIFT		0x3
76#define	TAVOR_MTT_SIZE			(1 << TAVOR_MTT_SIZE_SHIFT)
77#define	TAVOR_MTTSEG_SIZE_SHIFT		0x0
78#define	TAVOR_MTTSEG_SIZE		(8 << TAVOR_MTTSEG_SIZE_SHIFT)
79
80/*
81 * These define the total number of MTT segments.  By default we are setting
82 * this number of MTT segments (the MTT table size) to 2M segments.  This
83 * default value is used to initialize the "tavor_log_num_mttseg" config
84 * variable.
85 * Note: Each segment is currently set to 8 MTT entries (TAVOR_MTTSEG_SIZE).
86 * This means that we can support up to 16M MTT entries (i.e. "pages").
87 */
88#define	TAVOR_NUM_MTTSEG_SHIFT		0x15
89#define	TAVOR_NUM_MTTSEG		(1 << TAVOR_NUM_MTTSEG_SHIFT)
90
91/*
92 * Minimal configuration value.
93 */
94#define	TAVOR_NUM_MTTSEG_SHIFT_MIN	0x11
95
96/*
97 * Macro to round a number of MTT entries to the number of MTT segments.
98 */
99#define	TAVOR_NUMMTT_TO_MTTSEG(num)		\
100	(((num) + TAVOR_MTTSEG_SIZE - 1) >>	\
101	(TAVOR_MTTSEG_SIZE_SHIFT + TAVOR_MTT_SIZE_SHIFT))
102
103/*
104 * This define is used to specify the "MTT page walk version" in the Tavor
105 * INIT_HCA command.
106 */
107#define	TAVOR_MTT_PG_WALK_VER		0
108
109/*
110 * This define is the maximum size of a memory region or window (log 2).  It is
111 * set depending on size of the DDR being either 128MB or 256MB.  These defines
112 * are used to initialize the "tavor_log_max_mrw_sz" configuration variable,
113 * and are proportional to the max MPT size set above.
114 */
115#define	TAVOR_MAX_MEM_MPT_SHIFT_128		0x23
116#define	TAVOR_MAX_MEM_MPT_SHIFT_256		0x24
117
118/*
119 * Minimal configuration value.
120 */
121#define	TAVOR_MAX_MEM_MPT_SHIFT_MIN		0x1E
122
123/*
124 * Defines used by tavor_mr_deregister() to specify how much/to what extent
125 * a given memory regions resources should be freed up.  TAVOR_MR_DEREG_ALL
126 * says what it means, free up all the resources associated with the region.
127 * TAVOR_MR_DEREG_NO_HW2SW_MPT indicates that it is unnecessary to attempt
128 * the ownership transfer (from hardware to software) for the given MPT entry.
129 * And TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND indicates that it is not only
130 * unnecessary to attempt the ownership transfer for MPT, but it is also
131 * unnecessary to attempt to unbind the memory.
132 * In general, these last two are specified when tavor_mr_deregister() is
133 * called from tavor_mr_reregister(), where the MPT ownership transfer or
134 * memory unbinding may have already been successfully performed.
135 */
136#define	TAVOR_MR_DEREG_ALL			3
137#define	TAVOR_MR_DEREG_NO_HW2SW_MPT		2
138#define	TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND	1
139
140/*
141 * The following define is used by tavor_mr_rereg_xlat_helper() to determine
142 * whether or not a given DMA handle can be reused.  If the DMA handle was
143 * previously initialized for IOMMU bypass mapping, then it can not be reused
144 * to reregister a region for DDI_DMA_STREAMING access.
145 */
146#define	TAVOR_MR_REUSE_DMAHDL(mr, flags)				\
147	(((mr)->mr_bindinfo.bi_bypass != TAVOR_BINDMEM_BYPASS) ||	\
148	    !((flags) & IBT_MR_NONCOHERENT))
149
150/*
151 * The tavor_sw_refcnt_t structure is used internally by the Tavor driver to
152 * track all the information necessary to manage shared memory regions.  Since
153 * a shared memory region _will_ have its own distinct MPT entry, but will
154 * _share_ its MTT entries with another region, it is necessary to track the
155 * number of times a given MTT structure is shared.  This ensures that it will
156 * not be prematurely freed up and that can be destroyed only when it is
157 * appropriate to do so.
158 *
159 * Each tavor_sw_refcnt_t structure contains a lock and a reference count
160 * variable which are used to track the necessary information.
161 *
162 * The following macros (below) are used to manipulate and query the MTT
163 * reference count parameters.  TAVOR_MTT_REFCNT_INIT() is used to initialize
164 * a newly allocated tavor_sw_refcnt_t struct (setting the "swrc_refcnt" to 1).
165 * And the TAVOR_MTT_IS_NOT_SHARED() and TAVOR_MTT_IS_SHARED() macros are
166 * used to query the current status of tavor_sw_refcnt_t struct to determine
167 * if its "swrc_refcnt" is one or not.
168 */
169typedef struct tavor_sw_refcnt_s {
170	kmutex_t		swrc_lock;
171	uint_t			swrc_refcnt;
172} tavor_sw_refcnt_t;
173_NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_refcnt_t::swrc_refcnt))
174_NOTE(MUTEX_PROTECTS_DATA(tavor_sw_refcnt_t::swrc_lock,
175    tavor_sw_refcnt_t::swrc_refcnt))
176#define	TAVOR_MTT_REFCNT_INIT(swrc_tmp)		((swrc_tmp)->swrc_refcnt = 1)
177#define	TAVOR_MTT_IS_NOT_SHARED(swrc_tmp)	((swrc_tmp)->swrc_refcnt == 1)
178#define	TAVOR_MTT_IS_SHARED(swrc_tmp)		((swrc_tmp)->swrc_refcnt != 1)
179
180
181/*
182 * The tavor_bind_info_t structure is used internally by the Tavor driver to
183 * track all the information necessary to perform the DMA mappings necessary
184 * for memory registration.  It is specifically passed into both the
185 * tavor_mr_mem_bind() and tavor_mr_mtt_write() functions which perform most
186 * of the necessary operations for Tavor memory registration.
187 *
188 * This structure is used to pass all the information necessary for a call
189 * to either ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle().  Note:
190 * the fields which need to be valid for each type of binding are slightly
191 * different and that it indicated by the value in the "bi_type" field.  The
192 * "bi_type" field may be set to either of the following defined values:
193 * TAVOR_BINDHDL_VADDR (to indicate an "addr" bind) or TAVOR_BINDHDL_BUF (to
194 * indicate a "buf" bind).
195 *
196 * Upon return from tavor_mr_mem_bind(), the tavor_bind_info_t struct will
197 * have its "bi_dmahdl", "bi_dmacookie", and "bi_cookiecnt" fields filled in.
198 * It is these values which are of particular interest to the
199 * tavor_mr_mtt_write() routine (they hold the PCI mapped addresses).
200 *
201 * Once initialized and used in this way, the tavor_bind_info_t will not to be
202 * modified in anyway until it is subsequently passed to tavor_mr_mem_unbind()
203 * where the memory and resources will be unbound and reclaimed.  Note:  the
204 * "bi_free_dmahdl" flag indicated whether the ddi_dma_handle_t should be
205 * freed as part of the tavor_mr_mem_unbind() operation or whether it will
206 * be freed later elsewhere.
207 */
208typedef struct tavor_bind_info_s {
209	uint64_t		bi_addr;
210	uint64_t		bi_len;
211	struct as		*bi_as;
212	struct buf		*bi_buf;
213	ddi_dma_handle_t	bi_dmahdl;
214	ddi_dma_cookie_t	bi_dmacookie;
215	uint_t			bi_cookiecnt;
216	uint_t			bi_type;
217	uint_t			bi_flags;
218	uint_t			bi_bypass;
219	uint_t			bi_free_dmahdl;
220} tavor_bind_info_t;
221#define	TAVOR_BINDHDL_NONE		0
222#define	TAVOR_BINDHDL_VADDR		1
223#define	TAVOR_BINDHDL_BUF		2
224#define	TAVOR_BINDHDL_UBUF		3
225
226/*
227 * The tavor_sw_mr_s structure is also referred to using the "tavor_mrhdl_t"
228 * typedef (see tavor_typedef.h).  It encodes all the information necessary
229 * to track the various resources needed to register, reregister, deregister,
230 * and perform all the myriad other operations on both memory regions _and_
231 * memory windows.
232 *
233 * A pointer to this structure is returned from many of the IBTF's CI verbs
234 * interfaces for memory registration.
235 *
236 * It contains pointers to the various resources allocated for a memory
237 * region, i.e. MPT resource, MTT resource, and MTT reference count resource.
238 * In addition it contains the tavor_bind_info_t struct used for the memory
239 * bind operation on a given memory region.
240 *
241 * It also has a pointers to the associated PD handle, placeholders for access
242 * flags, memory keys, and suggested page size for the region.  It also has
243 * the necessary backpointer to the resource that corresponds to the structure
244 * itself.  And lastly, it contains a placeholder for a callback which should
245 * be called on memory region unpinning.
246 */
247struct tavor_sw_mr_s {
248	kmutex_t		mr_lock;
249	tavor_rsrc_t		*mr_mptrsrcp;
250	tavor_rsrc_t		*mr_mttrsrcp;
251	tavor_rsrc_t		*mr_mttrefcntp;
252	tavor_pdhdl_t		mr_pdhdl;
253	tavor_bind_info_t	mr_bindinfo;
254	ibt_mr_attr_flags_t	mr_accflag;
255	uint32_t		mr_lkey;
256	uint32_t		mr_rkey;
257	uint32_t		mr_logmttpgsz;
258	tavor_rsrc_t		*mr_rsrcp;
259	uint_t			mr_is_fmr;
260	tavor_fmr_list_t	*mr_fmr;
261	uint_t			mr_is_umem;
262	ddi_umem_cookie_t	mr_umemcookie;
263	void 			(*mr_umem_cbfunc)(void *, void *);
264	void			*mr_umem_cbarg1;
265	void			*mr_umem_cbarg2;
266};
267_NOTE(DATA_READABLE_WITHOUT_LOCK(tavor_sw_mr_s::mr_bindinfo
268    tavor_sw_mr_s::mr_lkey
269    tavor_sw_mr_s::mr_is_umem
270    tavor_sw_mr_s::mr_is_fmr
271    tavor_sw_mr_s::mr_fmr))
272_NOTE(MUTEX_PROTECTS_DATA(tavor_sw_mr_s::mr_lock,
273    tavor_sw_mr_s::mr_mptrsrcp
274    tavor_sw_mr_s::mr_mttrsrcp
275    tavor_sw_mr_s::mr_mttrefcntp
276    tavor_sw_mr_s::mr_bindinfo
277    tavor_sw_mr_s::mr_lkey
278    tavor_sw_mr_s::mr_rkey
279    tavor_sw_mr_s::mr_logmttpgsz
280    tavor_sw_mr_s::mr_rsrcp
281    tavor_sw_mr_s::mr_is_umem
282    tavor_sw_mr_s::mr_umemcookie
283    tavor_sw_mr_s::mr_umem_cbfunc
284    tavor_sw_mr_s::mr_umem_cbarg1
285    tavor_sw_mr_s::mr_umem_cbarg2))
286
287/*
288 * The tavor_mr_options_t structure is used in several of the Tavor memory
289 * registration routines to provide additional option functionality.  When
290 * a NULL pointer is passed in place of a pointer to this struct, it is a
291 * way of specifying the "default" behavior.  Using this structure, however,
292 * is a way of controlling any extended behavior.
293 *
294 * Currently, the only defined "extended" behaviors are for specifying whether
295 * a given memory region should bypass the PCI IOMMU (TAVOR_BINDMEM_BYPASS)
296 * or be mapped into the IOMMU (TAVOR_BINDMEM_NORMAL), for specifying whether
297 * a given ddi_dma_handle_t should be used in the bind operation, and for
298 * specifying whether a memory registration should attempt to return an IB
299 * vaddr which is "zero-based" (aids in alignment contraints for QPs).
300 *
301 * This defaults today to always bypassing the IOMMU (can be changed by using
302 * the "tavor_iommu_bypass" configuration variable), to always allocating
303 * a new dma handle, and to using the virtual address passed in (i.e. not
304 * "zero-based").
305 */
306typedef struct tavor_mr_options_s {
307	ddi_dma_handle_t	mro_bind_dmahdl;
308	uint_t			mro_bind_type;
309	uint_t			mro_bind_override_addr;
310} tavor_mr_options_t;
311#define	TAVOR_BINDMEM_NORMAL		1
312#define	TAVOR_BINDMEM_BYPASS		0
313
314int tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pdhdl,
315    ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op);
316int tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pdhdl,
317    ibt_smr_attr_t *attrp, struct buf *buf, tavor_mrhdl_t *mrhdl,
318    tavor_mr_options_t *op);
319int tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind,
320    ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsz_bits);
321int tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind,
322    tavor_rsrc_t *mtt);
323int tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl,
324    tavor_pdhdl_t pdhdl, ibt_smr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new);
325int tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl,
326    uint_t level, uint_t sleep);
327int tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mrhdl,
328    ibt_mr_query_attr_t *attr);
329int tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mrhdl,
330    tavor_pdhdl_t pdhdl, ibt_mr_attr_t *attr_p, tavor_mrhdl_t *mrhdl_new,
331    tavor_mr_options_t *op);
332int tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr,
333    tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
334    tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op);
335int tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs,
336    size_t num_segs);
337int tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pdhdl,
338    ibt_mw_flags_t flags, tavor_mwhdl_t *mwhdl);
339int tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep);
340void tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key);
341int tavor_mr_alloc_fmr(tavor_state_t *state, tavor_pdhdl_t pd,
342    tavor_fmrhdl_t fmr_pool, tavor_mrhdl_t *mrhdl);
343int tavor_mr_dealloc_fmr(tavor_state_t *state, tavor_mrhdl_t *mrhdl);
344int tavor_mr_register_physical_fmr(tavor_state_t *state,
345    ibt_pmr_attr_t *mem_pattr_p, tavor_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p);
346int tavor_mr_invalidate_fmr(tavor_state_t *state, tavor_mrhdl_t mr);
347int tavor_mr_deregister_fmr(tavor_state_t *state, tavor_mrhdl_t mr);
348
349
350#ifdef __cplusplus
351}
352#endif
353
354#endif	/* _SYS_IB_ADAPTERS_TAVOR_MR_H */
355