1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * tavor_srq.c
29 *    Tavor Shared Receive Queue Processing Routines
30 *
31 *    Implements all the routines necessary for allocating, freeing, querying,
32 *    modifying and posting shared receive queues.
33 */
34
35#include <sys/types.h>
36#include <sys/conf.h>
37#include <sys/ddi.h>
38#include <sys/sunddi.h>
39#include <sys/modctl.h>
40#include <sys/bitmap.h>
41
42#include <sys/ib/adapters/tavor/tavor.h>
43
44static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
45    tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
46
47/*
48 * tavor_srq_alloc()
49 *    Context: Can be called only from user or kernel context.
50 */
51int
52tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo,
53    uint_t sleepflag, tavor_srq_options_t *op)
54{
55	ibt_srq_hdl_t		ibt_srqhdl;
56	tavor_pdhdl_t		pd;
57	ibt_srq_sizes_t		*sizes;
58	ibt_srq_sizes_t		*real_sizes;
59	tavor_srqhdl_t		*srqhdl;
60	ibt_srq_flags_t		flags;
61	tavor_rsrc_t		*srqc, *rsrc;
62	tavor_hw_srqc_t		srqc_entry;
63	uint32_t		*buf;
64	tavor_srqhdl_t		srq;
65	tavor_umap_db_entry_t	*umapdb;
66	ibt_mr_attr_t		mr_attr;
67	tavor_mr_options_t	mr_op;
68	tavor_mrhdl_t		mr;
69	uint64_t		addr;
70	uint64_t		value, srq_desc_off;
71	uint32_t		lkey;
72	uint32_t		log_srq_size;
73	uint32_t		uarpg;
74	uint_t			wq_location, dma_xfer_mode, srq_is_umap;
75	int			flag, status;
76	char			*errormsg;
77	uint_t			max_sgl;
78	uint_t			wqesz;
79
80	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
81
82	TAVOR_TNF_ENTER(tavor_srq_alloc);
83
84	/*
85	 * Check the "options" flag.  Currently this flag tells the driver
86	 * whether or not the SRQ's work queues should be come from normal
87	 * system memory or whether they should be allocated from DDR memory.
88	 */
89	if (op == NULL) {
90		wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
91	} else {
92		wq_location = op->srqo_wq_loc;
93	}
94
95	/*
96	 * Extract the necessary info from the tavor_srq_info_t structure
97	 */
98	real_sizes = srqinfo->srqi_real_sizes;
99	sizes	   = srqinfo->srqi_sizes;
100	pd	   = srqinfo->srqi_pd;
101	ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
102	flags	   = srqinfo->srqi_flags;
103	srqhdl	   = srqinfo->srqi_srqhdl;
104
105	/*
106	 * Determine whether SRQ is being allocated for userland access or
107	 * whether it is being allocated for kernel access.  If the SRQ is
108	 * being allocated for userland access, then lookup the UAR doorbell
109	 * page number for the current process.  Note:  If this is not found
110	 * (e.g. if the process has not previously open()'d the Tavor driver),
111	 * then an error is returned.
112	 */
113	srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
114	if (srq_is_umap) {
115		status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
116		    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
117		if (status != DDI_SUCCESS) {
118			/* Set "status" and "errormsg" and goto failure */
119			TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
120			goto srqalloc_fail3;
121		}
122		uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
123	}
124
125	/* Increase PD refcnt */
126	tavor_pd_refcnt_inc(pd);
127
128	/* Allocate an SRQ context entry */
129	status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc);
130	if (status != DDI_SUCCESS) {
131		/* Set "status" and "errormsg" and goto failure */
132		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context");
133		goto srqalloc_fail1;
134	}
135
136	/* Allocate the SRQ Handle entry */
137	status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc);
138	if (status != DDI_SUCCESS) {
139		/* Set "status" and "errormsg" and goto failure */
140		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle");
141		goto srqalloc_fail2;
142	}
143
144	srq = (tavor_srqhdl_t)rsrc->tr_addr;
145	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
146
147	srq->srq_srqnum = srqc->tr_indx;	/* just use index */
148
149	/*
150	 * If this will be a user-mappable SRQ, then allocate an entry for
151	 * the "userland resources database".  This will later be added to
152	 * the database (after all further SRQ operations are successful).
153	 * If we fail here, we must undo the reference counts and the
154	 * previous resource allocation.
155	 */
156	if (srq_is_umap) {
157		umapdb = tavor_umap_db_alloc(state->ts_instance,
158		    srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
159		    (uint64_t)(uintptr_t)rsrc);
160		if (umapdb == NULL) {
161			/* Set "status" and "errormsg" and goto failure */
162			TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
163			goto srqalloc_fail3;
164		}
165	}
166
167	/*
168	 * Calculate the appropriate size for the SRQ.
169	 * Note:  All Tavor SRQs must be a power-of-2 in size.  Also
170	 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE.  This step
171	 * is to round the requested size up to the next highest power-of-2
172	 */
173	sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE);
174	log_srq_size = highbit(sizes->srq_wr_sz);
175	if ((sizes->srq_wr_sz & (sizes->srq_wr_sz - 1)) == 0) {
176		log_srq_size = log_srq_size - 1;
177	}
178
179	/*
180	 * Next we verify that the rounded-up size is valid (i.e. consistent
181	 * with the device limits and/or software-configured limits).  If not,
182	 * then obviously we have a lot of cleanup to do before returning.
183	 */
184	if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
185		/* Set "status" and "errormsg" and goto failure */
186		TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
187		goto srqalloc_fail4;
188	}
189
190	/*
191	 * Next we verify that the requested number of SGL is valid (i.e.
192	 * consistent with the device limits and/or software-configured
193	 * limits).  If not, then obviously the same cleanup needs to be done.
194	 */
195	max_sgl = state->ts_cfg_profile->cp_srq_max_sgl;
196	if (sizes->srq_sgl_sz > max_sgl) {
197		/* Set "status" and "errormsg" and goto failure */
198		TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL");
199		goto srqalloc_fail4;
200	}
201
202	/*
203	 * Determine the SRQ's WQE sizes.  This depends on the requested
204	 * number of SGLs.  Note: This also has the side-effect of
205	 * calculating the real number of SGLs (for the calculated WQE size)
206	 */
207	tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
208	    TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
209	    &srq->srq_wq_sgl);
210
211	/*
212	 * Allocate the memory for SRQ work queues.  Note:  The location from
213	 * which we will allocate these work queues has been passed in through
214	 * the tavor_qp_options_t structure.  Since Tavor work queues are not
215	 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
216	 * queue memory is very important.  We used to allocate work queues
217	 * (the combined receive and send queues) so that they would be aligned
218	 * on their combined size.  That alignment guaranteed that they would
219	 * never cross the 4GB boundary (Tavor work queues are on the order of
220	 * MBs at maximum).  Now we are able to relax this alignment constraint
221	 * by ensuring that the IB address assigned to the queue memory (as a
222	 * result of the tavor_mr_register() call) is offset from zero.
223	 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
224	 * guarantee the alignment, but when attempting to use IOMMU bypass
225	 * mode we found that we were not allowed to specify any alignment that
226	 * was more restrictive than the system page size.  So we avoided this
227	 * constraint by passing two alignment values, one for the memory
228	 * allocation itself and the other for the DMA handle (for later bind).
229	 * This used to cause more memory than necessary to be allocated (in
230	 * order to guarantee the more restrictive alignment contraint).  But
231	 * be guaranteeing the zero-based IB virtual address for the queue, we
232	 * are able to conserve this memory.
233	 *
234	 * Note: If SRQ is not user-mappable, then it may come from either
235	 * kernel system memory or from HCA-attached local DDR memory.
236	 *
237	 * Note2: We align this queue on a pagesize boundary.  This is required
238	 * to make sure that all the resulting IB addresses will start at 0, for
239	 * a zero-based queue.  By making sure we are aligned on at least a
240	 * page, any offset we use into our queue will be the same as when we
241	 * perform tavor_srq_modify() operations later.
242	 */
243	wqesz = (1 << srq->srq_wq_log_wqesz);
244	srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
245	srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
246	srq->srq_wqinfo.qa_bind_align = PAGESIZE;
247	if (srq_is_umap) {
248		srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
249	} else {
250		srq->srq_wqinfo.qa_location = wq_location;
251	}
252	status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
253	if (status != DDI_SUCCESS) {
254		/* Set "status" and "errormsg" and goto failure */
255		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
256		goto srqalloc_fail4;
257	}
258	buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
259	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
260
261	/*
262	 * Register the memory for the SRQ work queues.  The memory for the SRQ
263	 * must be registered in the Tavor TPT tables.  This gives us the LKey
264	 * to specify in the SRQ context later.  Note: If the work queue is to
265	 * be allocated from DDR memory, then only a "bypass" mapping is
266	 * appropriate.  And if the SRQ memory is user-mappable, then we force
267	 * DDI_DMA_CONSISTENT mapping.  Also, in order to meet the alignment
268	 * restriction, we pass the "mro_bind_override_addr" flag in the call
269	 * to tavor_mr_register().  This guarantees that the resulting IB vaddr
270	 * will be zero-based (modulo the offset into the first page).  If we
271	 * fail here, we still have the bunch of resource and reference count
272	 * cleanup to do.
273	 */
274	flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
275	    IBT_MR_NOSLEEP;
276	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
277	mr_attr.mr_len   = srq->srq_wqinfo.qa_size;
278	mr_attr.mr_as    = NULL;
279	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
280	if (srq_is_umap) {
281		mr_op.mro_bind_type   = state->ts_cfg_profile->cp_iommu_bypass;
282	} else {
283		if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
284			mr_op.mro_bind_type =
285			    state->ts_cfg_profile->cp_iommu_bypass;
286			dma_xfer_mode =
287			    state->ts_cfg_profile->cp_streaming_consistent;
288			if (dma_xfer_mode == DDI_DMA_STREAMING) {
289				mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
290			}
291		} else {
292			mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
293		}
294	}
295	mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
296	mr_op.mro_bind_override_addr = 1;
297	status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
298	if (status != DDI_SUCCESS) {
299		/* Set "status" and "errormsg" and goto failure */
300		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
301		goto srqalloc_fail5;
302	}
303	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
304	addr = mr->mr_bindinfo.bi_addr;
305	lkey = mr->mr_lkey;
306
307	/*
308	 * Calculate the offset between the kernel virtual address space
309	 * and the IB virtual address space.  This will be used when
310	 * posting work requests to properly initialize each WQE.
311	 */
312	srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
313	    (uint64_t)mr->mr_bindinfo.bi_addr;
314
315	/*
316	 * Create WQL and Wridlist for use by this SRQ
317	 */
318	srq->srq_wrid_wql = tavor_wrid_wql_create(state);
319	if (srq->srq_wrid_wql == NULL) {
320		/* Set "status" and "errormsg" and goto failure */
321		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create");
322		goto srqalloc_fail6;
323	}
324	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql)))
325
326	srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size);
327	if (srq->srq_wridlist == NULL) {
328		/* Set "status" and "errormsg" and goto failure */
329		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create");
330		goto srqalloc_fail7;
331	}
332	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist)))
333
334	srq->srq_wridlist->wl_srq_en = 1;
335	srq->srq_wridlist->wl_free_list_indx = -1;
336
337	/*
338	 * Fill in all the return arguments (if necessary).  This includes
339	 * real queue size and real SGLs.
340	 */
341	if (real_sizes != NULL) {
342		real_sizes->srq_wr_sz = (1 << log_srq_size);
343		real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
344	}
345
346	/*
347	 * Fill in the SRQC entry.  This is the final step before passing
348	 * ownership of the SRQC entry to the Tavor hardware.  We use all of
349	 * the information collected/calculated above to fill in the
350	 * requisite portions of the SRQC.  Note: If this SRQ is going to be
351	 * used for userland access, then we need to set the UAR page number
352	 * appropriately (otherwise it's a "don't care")
353	 */
354	bzero(&srqc_entry, sizeof (tavor_hw_srqc_t));
355	srqc_entry.wqe_addr_h	   = (addr >> 32);
356	srqc_entry.next_wqe_addr_l = 0;
357	srqc_entry.ds		   = (wqesz >> 4);
358	srqc_entry.state	   = TAVOR_SRQ_STATE_HW_OWNER;
359	srqc_entry.pd		   = pd->pd_pdnum;
360	srqc_entry.lkey		   = lkey;
361	srqc_entry.wqe_cnt	   = 0;
362	if (srq_is_umap) {
363		srqc_entry.uar	   = uarpg;
364	} else {
365		srqc_entry.uar	   = 0;
366	}
367
368	/*
369	 * Write the SRQC entry to hardware.  Lastly, we pass ownership of
370	 * the entry to the hardware (using the Tavor SW2HW_SRQ firmware
371	 * command).  Note: In general, this operation shouldn't fail.  But
372	 * if it does, we have to undo everything we've done above before
373	 * returning error.
374	 */
375	status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
376	    sizeof (tavor_hw_srqc_t), srq->srq_srqnum,
377	    sleepflag);
378	if (status != TAVOR_CMD_SUCCESS) {
379		cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n",
380		    status);
381		TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail,
382		    TAVOR_TNF_ERROR, "", tnf_uint, status, status);
383		/* Set "status" and "errormsg" and goto failure */
384		TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command");
385		goto srqalloc_fail8;
386	}
387
388	/*
389	 * Fill in the rest of the Tavor SRQ handle.  We can update
390	 * the following fields for use in further operations on the SRQ.
391	 */
392	srq->srq_srqcrsrcp = srqc;
393	srq->srq_rsrcp	   = rsrc;
394	srq->srq_mrhdl	   = mr;
395	srq->srq_refcnt	   = 0;
396	srq->srq_is_umap   = srq_is_umap;
397	srq->srq_uarpg	   = (srq->srq_is_umap) ? uarpg : 0;
398	srq->srq_umap_dhp  = (devmap_cookie_t)NULL;
399	srq->srq_pdhdl	   = pd;
400	srq->srq_wq_lastwqeindx = -1;
401	srq->srq_wq_bufsz  = (1 << log_srq_size);
402	srq->srq_wq_buf	   = buf;
403	srq->srq_desc_off  = srq_desc_off;
404	srq->srq_hdlrarg   = (void *)ibt_srqhdl;
405	srq->srq_state	   = 0;
406	srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
407	srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
408
409	/* Determine if later ddi_dma_sync will be necessary */
410	srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
411
412	/*
413	 * Put SRQ handle in Tavor SRQNum-to-SRQhdl list.  Then fill in the
414	 * "srqhdl" and return success
415	 */
416	ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL);
417	state->ts_srqhdl[srqc->tr_indx] = srq;
418
419	/*
420	 * If this is a user-mappable SRQ, then we need to insert the
421	 * previously allocated entry into the "userland resources database".
422	 * This will allow for later lookup during devmap() (i.e. mmap())
423	 * calls.
424	 */
425	if (srq->srq_is_umap) {
426		tavor_umap_db_add(umapdb);
427	} else {
428		mutex_enter(&srq->srq_wrid_wql->wql_lock);
429		tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0);
430		mutex_exit(&srq->srq_wrid_wql->wql_lock);
431	}
432
433	*srqhdl = srq;
434
435	TAVOR_TNF_EXIT(tavor_srq_alloc);
436	return (status);
437
438/*
439 * The following is cleanup for all possible failure cases in this routine
440 */
441srqalloc_fail8:
442	kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size *
443	    sizeof (tavor_wrid_entry_t));
444	kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
445srqalloc_fail7:
446	tavor_wql_refcnt_dec(srq->srq_wrid_wql);
447srqalloc_fail6:
448	if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
449	    TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
450		TAVOR_WARNING(state, "failed to deregister SRQ memory");
451	}
452srqalloc_fail5:
453	tavor_queue_free(state, &srq->srq_wqinfo);
454srqalloc_fail4:
455	if (srq_is_umap) {
456		tavor_umap_db_free(umapdb);
457	}
458srqalloc_fail3:
459	tavor_rsrc_free(state, &rsrc);
460srqalloc_fail2:
461	tavor_rsrc_free(state, &srqc);
462srqalloc_fail1:
463	tavor_pd_refcnt_dec(pd);
464srqalloc_fail:
465	TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "",
466	    tnf_string, msg, errormsg);
467	TAVOR_TNF_EXIT(tavor_srq_alloc);
468	return (status);
469}
470
471
472/*
473 * tavor_srq_free()
474 *    Context: Can be called only from user or kernel context.
475 */
476/* ARGSUSED */
477int
478tavor_srq_free(tavor_state_t *state, tavor_srqhdl_t *srqhdl, uint_t sleepflag)
479{
480	tavor_rsrc_t		*srqc, *rsrc;
481	tavor_umap_db_entry_t	*umapdb;
482	uint64_t		value;
483	tavor_srqhdl_t		srq;
484	tavor_mrhdl_t		mr;
485	tavor_pdhdl_t		pd;
486	tavor_hw_srqc_t		srqc_entry;
487	uint32_t		srqnum;
488	uint32_t		size;
489	uint_t			maxprot;
490	int			status;
491
492	TAVOR_TNF_ENTER(tavor_srq_free);
493
494	/*
495	 * Pull all the necessary information from the Tavor Shared Receive
496	 * Queue handle.  This is necessary here because the resource for the
497	 * SRQ handle is going to be freed up as part of this operation.
498	 */
499	srq	= *srqhdl;
500	mutex_enter(&srq->srq_lock);
501	srqc	= srq->srq_srqcrsrcp;
502	rsrc	= srq->srq_rsrcp;
503	pd	= srq->srq_pdhdl;
504	mr	= srq->srq_mrhdl;
505	srqnum	= srq->srq_srqnum;
506
507	/*
508	 * If there are work queues still associated with the SRQ, then return
509	 * an error.  Otherwise, we will be holding the SRQ lock.
510	 */
511	if (srq->srq_refcnt != 0) {
512		mutex_exit(&srq->srq_lock);
513		TNF_PROBE_1(tavor_srq_free_refcnt_fail, TAVOR_TNF_ERROR, "",
514		    tnf_int, refcnt, srq->srq_refcnt);
515		TAVOR_TNF_EXIT(tavor_srq_free);
516		return (IBT_SRQ_IN_USE);
517	}
518
519	/*
520	 * If this was a user-mappable SRQ, then we need to remove its entry
521	 * from the "userland resources database".  If it is also currently
522	 * mmap()'d out to a user process, then we need to call
523	 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
524	 * We also need to invalidate the SRQ tracking information for the
525	 * user mapping.
526	 */
527	if (srq->srq_is_umap) {
528		status = tavor_umap_db_find(state->ts_instance, srq->srq_srqnum,
529		    MLNX_UMAP_SRQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
530		    &umapdb);
531		if (status != DDI_SUCCESS) {
532			mutex_exit(&srq->srq_lock);
533			TAVOR_WARNING(state, "failed to find in database");
534			TAVOR_TNF_EXIT(tavor_srq_free);
535			return (ibc_get_ci_failure(0));
536		}
537		tavor_umap_db_free(umapdb);
538		if (srq->srq_umap_dhp != NULL) {
539			maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
540			status = devmap_devmem_remap(srq->srq_umap_dhp,
541			    state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size,
542			    maxprot, DEVMAP_MAPPING_INVALID, NULL);
543			if (status != DDI_SUCCESS) {
544				mutex_exit(&srq->srq_lock);
545				TAVOR_WARNING(state, "failed in SRQ memory "
546				    "devmap_devmem_remap()");
547				TAVOR_TNF_EXIT(tavor_srq_free);
548				return (ibc_get_ci_failure(0));
549			}
550			srq->srq_umap_dhp = (devmap_cookie_t)NULL;
551		}
552	}
553
554	/*
555	 * Put NULL into the Tavor SRQNum-to-SRQHdl list.  This will allow any
556	 * in-progress events to detect that the SRQ corresponding to this
557	 * number has been freed.
558	 */
559	state->ts_srqhdl[srqc->tr_indx] = NULL;
560
561	mutex_exit(&srq->srq_lock);
562	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
563	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist));
564
565	/*
566	 * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ
567	 * firmware command).  If the ownership transfer fails for any reason,
568	 * then it is an indication that something (either in HW or SW) has
569	 * gone seriously wrong.
570	 */
571	status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
572	    sizeof (tavor_hw_srqc_t), srqnum, sleepflag);
573	if (status != TAVOR_CMD_SUCCESS) {
574		TAVOR_WARNING(state, "failed to reclaim SRQC ownership");
575		cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n",
576		    status);
577		TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail,
578		    TAVOR_TNF_ERROR, "", tnf_uint, status, status);
579		TAVOR_TNF_EXIT(tavor_srq_free);
580		return (IBT_FAILURE);
581	}
582
583	/*
584	 * Deregister the memory for the Shared Receive Queue.  If this fails
585	 * for any reason, then it is an indication that something (either
586	 * in HW or SW) has gone seriously wrong.  So we print a warning
587	 * message and return.
588	 */
589	status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
590	    sleepflag);
591	if (status != DDI_SUCCESS) {
592		TAVOR_WARNING(state, "failed to deregister SRQ memory");
593		TNF_PROBE_0(tavor_srq_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
594		TAVOR_TNF_EXIT(tavor_srq_free);
595		return (IBT_FAILURE);
596	}
597
598	/* Calculate the size and free the wridlist container */
599	if (srq->srq_wridlist != NULL) {
600		size = (srq->srq_wridlist->wl_size *
601		    sizeof (tavor_wrid_entry_t));
602		kmem_free(srq->srq_wridlist->wl_wre, size);
603		kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
604
605		/*
606		 * Release reference to WQL; If this is the last reference,
607		 * this call also has the side effect of freeing up the
608		 * 'srq_wrid_wql' memory.
609		 */
610		tavor_wql_refcnt_dec(srq->srq_wrid_wql);
611	}
612
613	/* Free the memory for the SRQ */
614	tavor_queue_free(state, &srq->srq_wqinfo);
615
616	/* Free the Tavor SRQ Handle */
617	tavor_rsrc_free(state, &rsrc);
618
619	/* Free the SRQC entry resource */
620	tavor_rsrc_free(state, &srqc);
621
622	/* Decrement the reference count on the protection domain (PD) */
623	tavor_pd_refcnt_dec(pd);
624
625	/* Set the srqhdl pointer to NULL and return success */
626	*srqhdl = NULL;
627
628	TAVOR_TNF_EXIT(tavor_srq_free);
629	return (DDI_SUCCESS);
630}
631
632
633/*
634 * tavor_srq_modify()
635 *    Context: Can be called only from user or kernel context.
636 */
637int
638tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size,
639    uint_t *real_size, uint_t sleepflag)
640{
641	tavor_qalloc_info_t	new_srqinfo, old_srqinfo;
642	tavor_rsrc_t		*mtt, *mpt, *old_mtt;
643	tavor_bind_info_t	bind;
644	tavor_bind_info_t	old_bind;
645	tavor_rsrc_pool_info_t	*rsrc_pool;
646	tavor_mrhdl_t		mr;
647	tavor_hw_mpt_t		mpt_entry;
648	tavor_wrid_entry_t	*wre_new, *wre_old;
649	uint64_t		mtt_ddrbaseaddr, mtt_addr;
650	uint64_t		srq_desc_off;
651	uint32_t		*buf, srq_old_bufsz;
652	uint32_t		wqesz;
653	uint_t			max_srq_size;
654	uint_t			dma_xfer_mode, mtt_pgsize_bits;
655	uint_t			srq_sync, log_srq_size, maxprot;
656	uint_t			wq_location;
657	int			status;
658	char			*errormsg;
659
660	TAVOR_TNF_ENTER(tavor_srq_modify);
661
662	/*
663	 * Check the "inddr" flag.  This flag tells the driver whether or not
664	 * the SRQ's work queues should be come from normal system memory or
665	 * whether they should be allocated from DDR memory.
666	 */
667	wq_location = state->ts_cfg_profile->cp_srq_wq_inddr;
668
669	/*
670	 * If size requested is larger than device capability, return
671	 * Insufficient Resources
672	 */
673	max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz);
674	if (size > max_srq_size) {
675		TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize,
676		    TAVOR_TNF_ERROR, "");
677		TAVOR_TNF_EXIT(tavor_srq_modify);
678		return (IBT_HCA_WR_EXCEEDED);
679	}
680
681	/*
682	 * Calculate the appropriate size for the SRQ.
683	 * Note:  All Tavor SRQs must be a power-of-2 in size.  Also
684	 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE.  This step
685	 * is to round the requested size up to the next highest power-of-2
686	 */
687	size = max(size, TAVOR_SRQ_MIN_SIZE);
688	log_srq_size = highbit(size);
689	if ((size & (size - 1)) == 0) {
690		log_srq_size = log_srq_size - 1;
691	}
692
693	/*
694	 * Next we verify that the rounded-up size is valid (i.e. consistent
695	 * with the device limits and/or software-configured limits).
696	 */
697	if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
698		/* Set "status" and "errormsg" and goto failure */
699		TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
700		goto srqmodify_fail;
701	}
702
703	/*
704	 * Allocate the memory for newly resized Shared Receive Queue.
705	 *
706	 * Note: If SRQ is not user-mappable, then it may come from either
707	 * kernel system memory or from HCA-attached local DDR memory.
708	 *
709	 * Note2: We align this queue on a pagesize boundary.  This is required
710	 * to make sure that all the resulting IB addresses will start at 0,
711	 * for a zero-based queue.  By making sure we are aligned on at least a
712	 * page, any offset we use into our queue will be the same as it was
713	 * when we allocated it at tavor_srq_alloc() time.
714	 */
715	wqesz = (1 << srq->srq_wq_log_wqesz);
716	new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
717	new_srqinfo.qa_alloc_align = PAGESIZE;
718	new_srqinfo.qa_bind_align  = PAGESIZE;
719	if (srq->srq_is_umap) {
720		new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
721	} else {
722		new_srqinfo.qa_location = wq_location;
723	}
724	status = tavor_queue_alloc(state, &new_srqinfo, sleepflag);
725	if (status != DDI_SUCCESS) {
726		/* Set "status" and "errormsg" and goto failure */
727		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
728		goto srqmodify_fail;
729	}
730	buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
731	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
732
733	/*
734	 * Allocate the memory for the new WRE list.  This will be used later
735	 * when we resize the wridlist based on the new SRQ size.
736	 */
737	wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) *
738	    sizeof (tavor_wrid_entry_t), sleepflag);
739	if (wre_new == NULL) {
740		/* Set "status" and "errormsg" and goto failure */
741		TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
742		    "failed wre_new alloc");
743		goto srqmodify_fail;
744	}
745
746	/*
747	 * Fill in the "bind" struct.  This struct provides the majority
748	 * of the information that will be used to distinguish between an
749	 * "addr" binding (as is the case here) and a "buf" binding (see
750	 * below).  The "bind" struct is later passed to tavor_mr_mem_bind()
751	 * which does most of the "heavy lifting" for the Tavor memory
752	 * registration routines.
753	 */
754	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
755	bzero(&bind, sizeof (tavor_bind_info_t));
756	bind.bi_type  = TAVOR_BINDHDL_VADDR;
757	bind.bi_addr  = (uint64_t)(uintptr_t)buf;
758	bind.bi_len   = new_srqinfo.qa_size;
759	bind.bi_as    = NULL;
760	bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP :
761	    IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
762	if (srq->srq_is_umap) {
763		bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass;
764	} else {
765		if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
766			bind.bi_bypass =
767			    state->ts_cfg_profile->cp_iommu_bypass;
768			dma_xfer_mode =
769			    state->ts_cfg_profile->cp_streaming_consistent;
770			if (dma_xfer_mode == DDI_DMA_STREAMING) {
771				bind.bi_flags |= IBT_MR_NONCOHERENT;
772			}
773		} else {
774			bind.bi_bypass = TAVOR_BINDMEM_BYPASS;
775		}
776	}
777	status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
778	    &mtt_pgsize_bits);
779	if (status != DDI_SUCCESS) {
780		/* Set "status" and "errormsg" and goto failure */
781		TAVOR_TNF_FAIL(status, "failed mtt bind");
782		kmem_free(wre_new, srq->srq_wq_bufsz *
783		    sizeof (tavor_wrid_entry_t));
784		tavor_queue_free(state, &new_srqinfo);
785		goto srqmodify_fail;
786	}
787
788	/*
789	 * Calculate the offset between the kernel virtual address space
790	 * and the IB virtual address space.  This will be used when
791	 * posting work requests to properly initialize each WQE.
792	 *
793	 * Note: bind addr is zero-based (from alloc) so we calculate the
794	 * correct new offset here.
795	 */
796	bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
797	srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
798	    (uint64_t)bind.bi_addr;
799
800	/*
801	 * Get the base address for the MTT table.  This will be necessary
802	 * below when we are modifying the MPT entry.
803	 */
804	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
805	mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
806
807	/*
808	 * Fill in the MPT entry.  This is the final step before passing
809	 * ownership of the MPT entry to the Tavor hardware.  We use all of
810	 * the information collected/calculated above to fill in the
811	 * requisite portions of the MPT.
812	 */
813	bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
814	mpt_entry.reg_win_len	= bind.bi_len;
815	mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
816	mpt_entry.mttseg_addr_h = mtt_addr >> 32;
817	mpt_entry.mttseg_addr_l = mtt_addr >> 6;
818
819	/*
820	 * Now we grab the SRQ lock.  Since we will be updating the actual
821	 * SRQ location and the producer/consumer indexes, we should hold
822	 * the lock.
823	 *
824	 * We do a TAVOR_NOSLEEP here (and below), though, because we are
825	 * holding the "srq_lock" and if we got raised to interrupt level
826	 * by priority inversion, we would not want to block in this routine
827	 * waiting for success.
828	 */
829	mutex_enter(&srq->srq_lock);
830
831	/*
832	 * Copy old entries to new buffer
833	 */
834	srq_old_bufsz = srq->srq_wq_bufsz;
835	bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
836
837	/* Determine if later ddi_dma_sync will be necessary */
838	srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
839
840	/* Sync entire "new" SRQ for use by hardware (if necessary) */
841	if (srq_sync) {
842		(void) ddi_dma_sync(bind.bi_dmahdl, 0,
843		    new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
844	}
845
846	/*
847	 * Setup MPT information for use in the MODIFY_MPT command
848	 */
849	mr = srq->srq_mrhdl;
850	mutex_enter(&mr->mr_lock);
851	mpt = srq->srq_mrhdl->mr_mptrsrcp;
852
853	/*
854	 * MODIFY_MPT
855	 *
856	 * If this fails for any reason, then it is an indication that
857	 * something (either in HW or SW) has gone seriously wrong.  So we
858	 * print a warning message and return.
859	 */
860	status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx,
861	    TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag);
862	if (status != TAVOR_CMD_SUCCESS) {
863		cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n",
864		    status);
865		TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail,
866		    TAVOR_TNF_ERROR, "", tnf_uint, status, status);
867		TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed");
868		(void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo,
869		    srq->srq_mrhdl->mr_mttrsrcp);
870		kmem_free(wre_new, srq->srq_wq_bufsz *
871		    sizeof (tavor_wrid_entry_t));
872		tavor_queue_free(state, &new_srqinfo);
873		mutex_exit(&mr->mr_lock);
874		mutex_exit(&srq->srq_lock);
875		return (ibc_get_ci_failure(0));
876	}
877
878	/*
879	 * Update the Tavor Shared Receive Queue handle with all the new
880	 * information.  At the same time, save away all the necessary
881	 * information for freeing up the old resources
882	 */
883	old_srqinfo	   = srq->srq_wqinfo;
884	old_mtt		   = srq->srq_mrhdl->mr_mttrsrcp;
885	bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
886	    sizeof (tavor_bind_info_t));
887
888	/* Now set the new info */
889	srq->srq_wqinfo	   = new_srqinfo;
890	srq->srq_wq_buf	   = buf;
891	srq->srq_wq_bufsz  = (1 << log_srq_size);
892	bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t));
893	srq->srq_mrhdl->mr_mttrsrcp = mtt;
894	srq->srq_desc_off  = srq_desc_off;
895	srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
896
897	/* Update MR mtt pagesize */
898	mr->mr_logmttpgsz = mtt_pgsize_bits;
899	mutex_exit(&mr->mr_lock);
900
901#ifdef __lock_lint
902	mutex_enter(&srq->srq_wrid_wql->wql_lock);
903#else
904	if (srq->srq_wrid_wql != NULL) {
905		mutex_enter(&srq->srq_wrid_wql->wql_lock);
906	}
907#endif
908
909	/*
910	 * Initialize new wridlist, if needed.
911	 *
912	 * If a wridlist already is setup on an SRQ (the QP associated with an
913	 * SRQ has moved "from_reset") then we must update this wridlist based
914	 * on the new SRQ size.  We allocate the new size of Work Request ID
915	 * Entries, copy over the old entries to the new list, and
916	 * re-initialize the srq wridlist in non-umap case
917	 */
918	wre_old = NULL;
919	if (srq->srq_wridlist != NULL) {
920		wre_old = srq->srq_wridlist->wl_wre;
921
922		bcopy(wre_old, wre_new, srq_old_bufsz *
923		    sizeof (tavor_wrid_entry_t));
924
925		/* Setup new sizes in wre */
926		srq->srq_wridlist->wl_wre = wre_new;
927		srq->srq_wridlist->wl_size = srq->srq_wq_bufsz;
928
929		if (!srq->srq_is_umap) {
930			tavor_wrid_list_srq_init(srq->srq_wridlist, srq,
931			    srq_old_bufsz);
932		}
933	}
934
935#ifdef __lock_lint
936	mutex_exit(&srq->srq_wrid_wql->wql_lock);
937#else
938	if (srq->srq_wrid_wql != NULL) {
939		mutex_exit(&srq->srq_wrid_wql->wql_lock);
940	}
941#endif
942
943	/*
944	 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
945	 * to a user process, then we need to call devmap_devmem_remap() to
946	 * invalidate the mapping to the SRQ memory.  We also need to
947	 * invalidate the SRQ tracking information for the user mapping.
948	 *
949	 * Note: On failure, the remap really shouldn't ever happen.  So, if it
950	 * does, it is an indication that something has gone seriously wrong.
951	 * So we print a warning message and return error (knowing, of course,
952	 * that the "old" SRQ memory will be leaked)
953	 */
954	if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
955		maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
956		status = devmap_devmem_remap(srq->srq_umap_dhp,
957		    state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
958		    DEVMAP_MAPPING_INVALID, NULL);
959		if (status != DDI_SUCCESS) {
960			mutex_exit(&srq->srq_lock);
961			TAVOR_WARNING(state, "failed in SRQ memory "
962			    "devmap_devmem_remap()");
963			/* We can, however, free the memory for old wre */
964			if (wre_old != NULL) {
965				kmem_free(wre_old, srq_old_bufsz *
966				    sizeof (tavor_wrid_entry_t));
967			}
968			TAVOR_TNF_EXIT(tavor_srq_modify);
969			return (ibc_get_ci_failure(0));
970		}
971		srq->srq_umap_dhp = (devmap_cookie_t)NULL;
972	}
973
974	/*
975	 * Drop the SRQ lock now.  The only thing left to do is to free up
976	 * the old resources.
977	 */
978	mutex_exit(&srq->srq_lock);
979
980	/*
981	 * Unbind the MTT entries.
982	 */
983	status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt);
984	if (status != DDI_SUCCESS) {
985		TAVOR_WARNING(state, "failed to unbind old SRQ memory");
986		/* Set "status" and "errormsg" and goto failure */
987		TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
988		    "failed to unbind (old)");
989		goto srqmodify_fail;
990	}
991
992	/* Free the memory for old wre */
993	if (wre_old != NULL) {
994		kmem_free(wre_old, srq_old_bufsz *
995		    sizeof (tavor_wrid_entry_t));
996	}
997
998	/* Free the memory for the old SRQ */
999	tavor_queue_free(state, &old_srqinfo);
1000
1001	/*
1002	 * Fill in the return arguments (if necessary).  This includes the
1003	 * real new completion queue size.
1004	 */
1005	if (real_size != NULL) {
1006		*real_size = (1 << log_srq_size);
1007	}
1008
1009	TAVOR_TNF_EXIT(tavor_srq_modify);
1010	return (DDI_SUCCESS);
1011
1012srqmodify_fail:
1013	TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "",
1014	    tnf_string, msg, errormsg);
1015	TAVOR_TNF_EXIT(tavor_srq_modify);
1016	return (status);
1017}
1018
1019
1020/*
1021 * tavor_srq_refcnt_inc()
1022 *    Context: Can be called from interrupt or base context.
1023 */
1024void
1025tavor_srq_refcnt_inc(tavor_srqhdl_t srq)
1026{
1027	mutex_enter(&srq->srq_lock);
1028	TNF_PROBE_1_DEBUG(tavor_srq_refcnt_inc, TAVOR_TNF_TRACE, "",
1029	    tnf_uint, refcnt, srq->srq_refcnt);
1030	srq->srq_refcnt++;
1031	mutex_exit(&srq->srq_lock);
1032}
1033
1034
1035/*
1036 * tavor_srq_refcnt_dec()
1037 *    Context: Can be called from interrupt or base context.
1038 */
1039void
1040tavor_srq_refcnt_dec(tavor_srqhdl_t srq)
1041{
1042	mutex_enter(&srq->srq_lock);
1043	srq->srq_refcnt--;
1044	TNF_PROBE_1_DEBUG(tavor_srq_refcnt_dec, TAVOR_TNF_TRACE, "",
1045	    tnf_uint, refcnt, srq->srq_refcnt);
1046	mutex_exit(&srq->srq_lock);
1047}
1048
1049
1050/*
1051 * tavor_srqhdl_from_srqnum()
1052 *    Context: Can be called from interrupt or base context.
1053 *
1054 *    This routine is important because changing the unconstrained
1055 *    portion of the SRQ number is critical to the detection of a
1056 *    potential race condition in the SRQ handler code (i.e. the case
1057 *    where a SRQ is freed and alloc'd again before an event for the
1058 *    "old" SRQ can be handled).
1059 *
1060 *    While this is not a perfect solution (not sure that one exists)
1061 *    it does help to mitigate the chance that this race condition will
1062 *    cause us to deliver a "stale" event to the new SRQ owner.  Note:
1063 *    this solution does not scale well because the number of constrained
1064 *    bits increases (and, hence, the number of unconstrained bits
1065 *    decreases) as the number of supported SRQ grows.  For small and
1066 *    intermediate values, it should hopefully provide sufficient
1067 *    protection.
1068 */
1069tavor_srqhdl_t
1070tavor_srqhdl_from_srqnum(tavor_state_t *state, uint_t srqnum)
1071{
1072	uint_t	srqindx, srqmask;
1073
1074	/* Calculate the SRQ table index from the srqnum */
1075	srqmask = (1 << state->ts_cfg_profile->cp_log_num_srq) - 1;
1076	srqindx = srqnum & srqmask;
1077	return (state->ts_srqhdl[srqindx]);
1078}
1079
1080
1081/*
1082 * tavor_srq_sgl_to_logwqesz()
1083 *    Context: Can be called from interrupt or base context.
1084 */
1085static void
1086tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1087    tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1088{
1089	uint_t	max_size, log2, actual_sgl;
1090
1091	TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz);
1092
1093	switch (wq_type) {
1094	case TAVOR_QP_WQ_TYPE_RECVQ:
1095		/*
1096		 * Use requested maximum SGL to calculate max descriptor size
1097		 * (while guaranteeing that the descriptor size is a
1098		 * power-of-2 cachelines).
1099		 */
1100		max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
1101		log2 = highbit(max_size);
1102		if ((max_size & (max_size - 1)) == 0) {
1103			log2 = log2 - 1;
1104		}
1105
1106		/* Make sure descriptor is at least the minimum size */
1107		log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1108
1109		/* Calculate actual number of SGL (given WQE size) */
1110		actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
1111		break;
1112
1113	default:
1114		TAVOR_WARNING(state, "unexpected work queue type");
1115		TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail,
1116		    TAVOR_TNF_ERROR, "");
1117		break;
1118	}
1119
1120	/* Fill in the return values */
1121	*logwqesz = log2;
1122	*max_sgl  = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl);
1123
1124	TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
1125}
1126