hermon_wr.c revision 9517:b4839b0aa7a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * hermon_wr.c
29 *    Hermon Work Request Processing Routines
30 *
31 *    Implements all the routines necessary to provide the PostSend(),
32 *    PostRecv() and PostSRQ() verbs.  Also contains all the code
33 *    necessary to implement the Hermon WRID tracking mechanism.
34 */
35
36#include <sys/types.h>
37#include <sys/conf.h>
38#include <sys/ddi.h>
39#include <sys/sunddi.h>
40#include <sys/modctl.h>
41#include <sys/avl.h>
42
43#include <sys/ib/adapters/hermon/hermon.h>
44
45static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr);
46static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr);
47static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
48    ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
49static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
50    ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
51static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp);
52static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
53    ibt_recv_wr_t *wr, uint64_t *desc);
54static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
55    ibt_recv_wr_t *wr, uint64_t *desc);
56static void hermon_wqe_sync(void *hdl, uint_t sync_from,
57    uint_t sync_to, uint_t sync_type, uint_t flag);
58static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn,
59    uint_t send_or_recv);
60static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl);
61static void hermon_cq_workq_remove(hermon_cqhdl_t cq,
62    hermon_workq_avl_t *wqavl);
63
64static	ibt_wr_ds_t	null_sgl = { 0, 0x00000100, 0 };
65
66static int
67hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp,
68    ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
69{
70	hermon_hw_snd_wqe_ud_t		*ud;
71	hermon_workq_hdr_t		*wq;
72	hermon_ahhdl_t			ah;
73	ibt_ud_dest_t			*dest;
74	uint64_t			*desc;
75	uint32_t			desc_sz;
76	uint32_t			signaled_dbd, solicited;
77	uint32_t			head, tail, next_tail, qsize_msk;
78	uint32_t			hdrmwqes;
79	uint32_t			nopcode, fence, immed_data = 0;
80	hermon_hw_wqe_sgl_t		*ds;
81	ibt_wr_ds_t			*sgl;
82	uint32_t			nds, dnds;
83	int				i, j, last_ds, num_ds, status;
84	uint32_t			*wqe_start;
85	int				sectperwqe;
86	uint_t				posted_cnt = 0;
87
88	/* initialize the FMA retry loop */
89	hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
90
91	ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
92	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock))
93
94	/* Grab the lock for the WRID list */
95	membar_consumer();
96
97	/* Save away some initial QP state */
98	wq = qp->qp_sq_wqhdr;
99	qsize_msk = wq->wq_mask;
100	hdrmwqes  = qp->qp_sq_hdrmwqes;		/* in WQEs  */
101	sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
102
103	tail	  = wq->wq_tail;
104	head	  = wq->wq_head;
105	status	  = DDI_SUCCESS;
106
107post_next:
108	/*
109	 * Check for "queue full" condition.  If the queue
110	 * is already full, then no more WQEs can be posted.
111	 * So break out, ring a doorbell (if necessary) and
112	 * return an error
113	 */
114	if (wq->wq_full != 0) {
115		status = IBT_QP_FULL;
116		goto done;
117	}
118
119	next_tail = (tail + 1) & qsize_msk;
120	if (((tail + hdrmwqes) & qsize_msk) == head) {
121		wq->wq_full = 1;
122	}
123
124	desc = HERMON_QP_SQ_ENTRY(qp, tail);
125
126	ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
127	    sizeof (hermon_hw_snd_wqe_ctrl_t));
128	ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
129	    sizeof (hermon_hw_snd_wqe_ud_t));
130	nds = wr->wr_nds;
131	sgl = wr->wr_sgl;
132	num_ds = 0;
133
134	/* need to know the count of destination nds for backward loop */
135	for (dnds = 0, i = 0; i < nds; i++) {
136		if (sgl[i].ds_len != 0)
137			dnds++;
138	}
139
140	/*
141	 * Build a Send or Send_LSO WQE
142	 */
143	if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
144		int total_len;
145		hermon_hw_wqe_sgl_t *old_ds;
146
147		nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
148		dest = wr->wr.ud_lso.lso_ud_dest;
149		ah = (hermon_ahhdl_t)dest->ud_ah;
150		if (ah == NULL) {
151			status = IBT_AH_HDL_INVALID;
152			goto done;
153		}
154		HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
155
156		total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
157		if ((uintptr_t)ds + total_len + (nds * 16) >
158		    (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) {
159			status = IBT_QP_SGL_LEN_INVALID;
160			goto done;
161		}
162		bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1,
163		    wr->wr.ud_lso.lso_hdr_sz);
164		old_ds = ds;
165		ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
166		for (i = 0; i < nds; i++) {
167			if (sgl[i].ds_len == 0)
168				continue;
169			HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]);
170			num_ds++;
171			i++;
172			break;
173		}
174		membar_producer();
175		HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
176		    wr->wr.ud_lso.lso_hdr_sz);
177	} else if (wr->wr_opcode == IBT_WRC_SEND) {
178		if (wr->wr_flags & IBT_WR_SEND_IMMED) {
179			nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
180			immed_data = wr->wr.ud.udwr_immed;
181		} else {
182			nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
183		}
184		dest = wr->wr.ud.udwr_dest;
185		ah = (hermon_ahhdl_t)dest->ud_ah;
186		if (ah == NULL) {
187			status = IBT_AH_HDL_INVALID;
188			goto done;
189		}
190		HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
191		i = 0;
192	} else {
193		status = IBT_QP_OP_TYPE_INVALID;
194		goto done;
195	}
196
197	if (nds > qp->qp_sq_sgl) {
198		status = IBT_QP_SGL_LEN_INVALID;
199		goto done;
200	}
201	for (last_ds = num_ds, j = i; j < nds; j++) {
202		if (sgl[j].ds_len != 0)
203			last_ds++;	/* real last ds of wqe to fill */
204	}
205	desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
206	for (j = nds; --j >= i; ) {
207		if (sgl[j].ds_len == 0) {
208			continue;
209		}
210
211		/*
212		 * Fill in the Data Segment(s) for the current WQE, using the
213		 * information contained in the scatter-gather list of the
214		 * work request.
215		 */
216		last_ds--;
217		HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
218	}
219
220	fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
221
222	signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
223	    (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0;
224
225	solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0;
226
227	HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data,
228	    solicited, signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp);
229
230	wq->wq_wrid[tail] = wr->wr_id;
231
232	tail = next_tail;
233
234	/* Update some of the state in the QP */
235	wq->wq_tail = tail;
236
237	membar_producer();
238
239	/* Now set the ownership bit and opcode (first dword). */
240	HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
241
242	posted_cnt++;
243	if (--num_wr > 0) {
244		/* do the invalidate of the headroom */
245		wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
246		    (tail + hdrmwqes) & qsize_msk);
247		for (i = 16; i < sectperwqe; i += 16) {
248			wqe_start[i] = 0xFFFFFFFF;
249		}
250
251		wr++;
252		goto post_next;
253	}
254done:
255	if (posted_cnt != 0) {
256		ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
257
258		membar_producer();
259
260		/* the FMA retry loop starts for Hermon doorbell register. */
261		hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
262		    fm_status, fm_test_num);
263
264		HERMON_UAR_DOORBELL(state, uarhdl,
265		    (uint64_t *)(void *)&state->hs_uar->send,
266		    (uint64_t)qp->qp_ring);
267
268		/* the FMA retry loop ends. */
269		hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
270		    fm_status, fm_test_num);
271
272		/* do the invalidate of the headroom */
273		wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
274		    (tail + hdrmwqes) & qsize_msk);
275		for (i = 16; i < sectperwqe; i += 16) {
276			wqe_start[i] = 0xFFFFFFFF;
277		}
278	}
279	if (num_posted != NULL)
280		*num_posted = posted_cnt;
281
282	mutex_exit(&qp->qp_sq_lock);
283
284	return (status);
285
286pio_error:
287	mutex_exit(&qp->qp_sq_lock);
288	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
289	return (ibc_get_ci_failure(0));
290}
291
292static int
293hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp,
294    ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
295{
296	uint64_t			*desc;
297	hermon_workq_hdr_t		*wq;
298	uint32_t			desc_sz;
299	uint32_t			signaled_dbd, solicited;
300	uint32_t			head, tail, next_tail, qsize_msk;
301	uint32_t			hdrmwqes;
302	int				status;
303	uint32_t			nopcode, fence, immed_data = 0;
304	hermon_hw_snd_wqe_remaddr_t	*rc;
305	hermon_hw_snd_wqe_atomic_t	*at;
306	hermon_hw_snd_wqe_bind_t	*bn;
307	hermon_hw_wqe_sgl_t		*ds;
308	ibt_wr_ds_t			*sgl;
309	uint32_t			nds;
310	int				i, last_ds, num_ds;
311	uint32_t			*wqe_start;
312	int				sectperwqe;
313	uint_t				posted_cnt = 0;
314
315	/* initialize the FMA retry loop */
316	hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
317
318	ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
319	_NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock))
320
321	/* make sure we see any update of wq_head */
322	membar_consumer();
323
324	/* Save away some initial QP state */
325	wq = qp->qp_sq_wqhdr;
326	qsize_msk = wq->wq_mask;
327	hdrmwqes  = qp->qp_sq_hdrmwqes;		/* in WQEs  */
328	sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
329
330	tail	  = wq->wq_tail;
331	head	  = wq->wq_head;
332	status	  = DDI_SUCCESS;
333
334post_next:
335	/*
336	 * Check for "queue full" condition.  If the queue
337	 * is already full, then no more WQEs can be posted.
338	 * So break out, ring a doorbell (if necessary) and
339	 * return an error
340	 */
341	if (wq->wq_full != 0) {
342		status = IBT_QP_FULL;
343		goto done;
344	}
345	next_tail = (tail + 1) & qsize_msk;
346	if (((tail + hdrmwqes) & qsize_msk) == head) {
347		wq->wq_full = 1;
348	}
349
350	desc = HERMON_QP_SQ_ENTRY(qp, tail);
351
352	ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
353	    sizeof (hermon_hw_snd_wqe_ctrl_t));
354	nds = wr->wr_nds;
355	sgl = wr->wr_sgl;
356	num_ds = 0;
357
358	/*
359	 * Validate the operation type.  For RC requests, we allow
360	 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
361	 * operations, and memory window "Bind"
362	 */
363	switch (wr->wr_opcode) {
364	default:
365		status = IBT_QP_OP_TYPE_INVALID;
366		goto done;
367
368	case IBT_WRC_SEND:
369		if (wr->wr_flags & IBT_WR_SEND_IMMED) {
370			nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
371			immed_data = wr->wr.rc.rcwr.send_immed;
372		} else {
373			nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
374		}
375		break;
376
377	/*
378	 * If this is an RDMA Read or RDMA Write request, then fill
379	 * in the "Remote Address" header fields.
380	 */
381	case IBT_WRC_RDMAW:
382		if (wr->wr_flags & IBT_WR_SEND_IMMED) {
383			nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI;
384			immed_data = wr->wr.rc.rcwr.rdma.rdma_immed;
385		} else {
386			nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
387		}
388		/* FALLTHROUGH */
389	case IBT_WRC_RDMAR:
390		if (wr->wr_opcode == IBT_WRC_RDMAR)
391			nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
392		rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
393		    sizeof (hermon_hw_snd_wqe_ctrl_t));
394
395		/*
396		 * Build the Remote Address Segment for the WQE, using
397		 * the information from the RC work request.
398		 */
399		HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
400
401		/* Update "ds" for filling in Data Segments (below) */
402		ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
403		    sizeof (hermon_hw_snd_wqe_remaddr_t));
404		break;
405
406	/*
407	 * If this is one of the Atomic type operations (i.e
408	 * Compare-Swap or Fetch-Add), then fill in both the "Remote
409	 * Address" header fields and the "Atomic" header fields.
410	 */
411	case IBT_WRC_CSWAP:
412		nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
413		/* FALLTHROUGH */
414	case IBT_WRC_FADD:
415		if (wr->wr_opcode == IBT_WRC_FADD)
416			nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
417		rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
418		    sizeof (hermon_hw_snd_wqe_ctrl_t));
419		at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
420		    sizeof (hermon_hw_snd_wqe_remaddr_t));
421
422		/*
423		 * Build the Remote Address and Atomic Segments for
424		 * the WQE, using the information from the RC Atomic
425		 * work request.
426		 */
427		HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
428		HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
429
430		/* Update "ds" for filling in Data Segments (below) */
431		ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
432		    sizeof (hermon_hw_snd_wqe_atomic_t));
433
434		/*
435		 * Update "nds" and "sgl" because Atomic requests have
436		 * only a single Data Segment.
437		 */
438		nds = 1;
439		sgl = wr->wr_sgl;
440		break;
441
442	/*
443	 * If this is memory window Bind operation, then we call the
444	 * hermon_wr_bind_check() routine to validate the request and
445	 * to generate the updated RKey.  If this is successful, then
446	 * we fill in the WQE's "Bind" header fields.
447	 */
448	case IBT_WRC_BIND:
449		nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
450		status = hermon_wr_bind_check(state, wr);
451		if (status != DDI_SUCCESS)
452			goto done;
453
454		bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
455		    sizeof (hermon_hw_snd_wqe_ctrl_t));
456
457		/*
458		 * Build the Bind Memory Window Segments for the WQE,
459		 * using the information from the RC Bind memory
460		 * window work request.
461		 */
462		HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
463
464		/*
465		 * Update the "ds" pointer.  Even though the "bind"
466		 * operation requires no SGLs, this is necessary to
467		 * facilitate the correct descriptor size calculations
468		 * (below).
469		 */
470		ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
471		    sizeof (hermon_hw_snd_wqe_bind_t));
472		nds = 0;
473	}
474
475	/*
476	 * Now fill in the Data Segments (SGL) for the Send WQE based
477	 * on the values setup above (i.e. "sgl", "nds", and the "ds"
478	 * pointer. Start by checking for a valid number of SGL entries
479	 */
480	if (nds > qp->qp_sq_sgl) {
481		status = IBT_QP_SGL_LEN_INVALID;
482		goto done;
483	}
484
485	for (last_ds = num_ds, i = 0; i < nds; i++) {
486		if (sgl[i].ds_len != 0)
487			last_ds++;	/* real last ds of wqe to fill */
488	}
489	desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
490	for (i = nds; --i >= 0; ) {
491		if (sgl[i].ds_len == 0) {
492			continue;
493		}
494
495		/*
496		 * Fill in the Data Segment(s) for the current WQE, using the
497		 * information contained in the scatter-gather list of the
498		 * work request.
499		 */
500		last_ds--;
501		HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]);
502	}
503
504	fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
505
506	signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
507	    (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0;
508
509	solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0;
510
511	HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited,
512	    signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp);
513
514	wq->wq_wrid[tail] = wr->wr_id;
515
516	tail = next_tail;
517
518	/* Update some of the state in the QP */
519	wq->wq_tail = tail;
520
521	membar_producer();
522
523	/* Now set the ownership bit of the first one in the chain. */
524	HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
525
526	posted_cnt++;
527	if (--num_wr > 0) {
528		/* do the invalidate of the headroom */
529		wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
530		    (tail + hdrmwqes) & qsize_msk);
531		for (i = 16; i < sectperwqe; i += 16) {
532			wqe_start[i] = 0xFFFFFFFF;
533		}
534
535		wr++;
536		goto post_next;
537	}
538done:
539
540	if (posted_cnt != 0) {
541		ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
542
543		membar_producer();
544
545		/* the FMA retry loop starts for Hermon doorbell register. */
546		hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
547		    fm_status, fm_test_num);
548
549		/* Ring the doorbell */
550		HERMON_UAR_DOORBELL(state, uarhdl,
551		    (uint64_t *)(void *)&state->hs_uar->send,
552		    (uint64_t)qp->qp_ring);
553
554		/* the FMA retry loop ends. */
555		hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
556		    fm_status, fm_test_num);
557
558		/* do the invalidate of the headroom */
559		wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
560		    (tail + hdrmwqes) & qsize_msk);
561		for (i = 16; i < sectperwqe; i += 16) {
562			wqe_start[i] = 0xFFFFFFFF;
563		}
564	}
565	/*
566	 * Update the "num_posted" return value (if necessary).
567	 * Then drop the locks and return success.
568	 */
569	if (num_posted != NULL) {
570		*num_posted = posted_cnt;
571	}
572
573	mutex_exit(&qp->qp_sq_lock);
574	return (status);
575
576pio_error:
577	mutex_exit(&qp->qp_sq_lock);
578	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
579	return (ibc_get_ci_failure(0));
580}
581
582/*
583 * hermon_post_send()
584 *    Context: Can be called from interrupt or base context.
585 */
586int
587hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp,
588    ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
589{
590	ibt_send_wr_t 			*curr_wr;
591	hermon_workq_hdr_t		*wq;
592	hermon_ahhdl_t			ah;
593	uint64_t			*desc, *prev;
594	uint32_t			desc_sz;
595	uint32_t			signaled_dbd, solicited;
596	uint32_t			head, tail, next_tail, qsize_msk;
597	uint32_t			sync_from, sync_to;
598	uint32_t			hdrmwqes;
599	uint_t				currindx, wrindx, numremain;
600	uint_t				chainlen;
601	uint_t				posted_cnt, maxstat;
602	uint_t				total_posted;
603	int				status;
604	uint32_t			nopcode, fence, immed_data = 0;
605	uint32_t			prev_nopcode;
606
607	/* initialize the FMA retry loop */
608	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
609
610	/*
611	 * Check for user-mappable QP memory.  Note:  We do not allow kernel
612	 * clients to post to QP memory that is accessible directly by the
613	 * user.  If the QP memory is user accessible, then return an error.
614	 */
615	if (qp->qp_is_umap) {
616		return (IBT_QP_HDL_INVALID);
617	}
618
619	mutex_enter(&qp->qp_lock);
620
621	/*
622	 * Check QP state.  Can not post Send requests from the "Reset",
623	 * "Init", or "RTR" states
624	 */
625	if ((qp->qp_state == HERMON_QP_RESET) ||
626	    (qp->qp_state == HERMON_QP_INIT) ||
627	    (qp->qp_state == HERMON_QP_RTR)) {
628		mutex_exit(&qp->qp_lock);
629		return (IBT_QP_STATE_INVALID);
630	}
631	mutex_exit(&qp->qp_lock);
632	mutex_enter(&qp->qp_sq_lock);
633
634	if (qp->qp_is_special)
635		goto post_many;
636
637	/* Use these optimized functions most of the time */
638	if (qp->qp_serv_type == HERMON_QP_UD)
639		return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted));
640
641	if (qp->qp_serv_type == HERMON_QP_RC)
642		return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted));
643
644	if (qp->qp_serv_type == HERMON_QP_UC)
645		goto post_many;
646
647	mutex_exit(&qp->qp_sq_lock);
648	return (IBT_QP_SRV_TYPE_INVALID);
649
650post_many:
651	/* general loop for non-optimized posting */
652
653	/* Grab the lock for the WRID list */
654	membar_consumer();
655
656	/* Save away some initial QP state */
657	wq = qp->qp_sq_wqhdr;
658	qsize_msk = wq->wq_mask;
659	tail	  = wq->wq_tail;
660	head	  = wq->wq_head;
661	hdrmwqes  = qp->qp_sq_hdrmwqes;		/* in WQEs  */
662
663	/* Initialize posted_cnt */
664	posted_cnt = 0;
665	total_posted = 0;
666
667	/*
668	 * For each ibt_send_wr_t in the wr[] list passed in, parse the
669	 * request and build a Send WQE.  NOTE:  Because we are potentially
670	 * building a chain of WQEs to post, we want to build them all first,
671	 * and set the valid (HW Ownership) bit on all but the first.
672	 * However, we do not want to validate the first one until the
673	 * entire chain of WQEs has been built.  Then in the final
674	 * we set the valid bit in the first, flush if needed, and as a last
675	 * step ring the appropriate doorbell.  NOTE: the doorbell ring may
676	 * NOT be needed if the HCA is already processing, but the doorbell
677	 * ring will be done regardless. NOTE ALSO:  It is possible for
678	 * more Work Requests to be posted than the HW will support at one
679	 * shot.  If this happens, we need to be able to post and ring
680	 * several chains here until the the entire request is complete.
681	 * NOTE ALSO:  the term "chain" is used to differentiate it from
682	 * Work Request List passed in; and because that's the terminology
683	 * from the previous generations of HCA - but the WQEs are not, in fact
684	 * chained together for Hermon
685	 */
686
687	wrindx = 0;
688	numremain = num_wr;
689	status	  = DDI_SUCCESS;
690	while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
691		/*
692		 * For the first WQE on a new chain we need "prev" to point
693		 * to the current descriptor.
694		 */
695		prev = HERMON_QP_SQ_ENTRY(qp, tail);
696
697	/*
698	 * unlike Tavor & Arbel, tail will maintain the number of the
699	 * next (this) WQE to be posted.  Since there is no backward linking
700	 * in Hermon, we can always just look ahead
701	 */
702		/*
703		 * Before we begin, save the current "tail index" for later
704		 * DMA sync
705		 */
706		/* NOTE: don't need to go back one like arbel/tavor */
707		sync_from = tail;
708
709		/*
710		 * Break the request up into lists that are less than or
711		 * equal to the maximum number of WQEs that can be posted
712		 * per doorbell ring - 256 currently
713		 */
714		chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ?
715		    HERMON_QP_MAXDESC_PER_DB : numremain;
716		numremain -= chainlen;
717
718		for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
719			/*
720			 * Check for "queue full" condition.  If the queue
721			 * is already full, then no more WQEs can be posted.
722			 * So break out, ring a doorbell (if necessary) and
723			 * return an error
724			 */
725			if (wq->wq_full != 0) {
726				status = IBT_QP_FULL;
727				break;
728			}
729
730			/*
731			 * Increment the "tail index". Check for "queue
732			 * full" condition incl. headroom.  If we detect that
733			 * the current work request is going to fill the work
734			 * queue, then we mark this condition and continue.
735			 * Don't need >=, because going one-by-one we have to
736			 * hit it exactly sooner or later
737			 */
738
739			next_tail = (tail + 1) & qsize_msk;
740			if (((tail + hdrmwqes) & qsize_msk) == head) {
741				wq->wq_full = 1;
742			}
743
744			/*
745			 * Get the address of the location where the next
746			 * Send WQE should be built
747			 */
748			desc = HERMON_QP_SQ_ENTRY(qp, tail);
749			/*
750			 * Call hermon_wqe_send_build() to build the WQE
751			 * at the given address.  This routine uses the
752			 * information in the ibt_send_wr_t list (wr[]) and
753			 * returns the size of the WQE when it returns.
754			 */
755			status = hermon_wqe_send_build(state, qp,
756			    &wr[wrindx], desc, &desc_sz);
757			if (status != DDI_SUCCESS) {
758				break;
759			}
760
761			/*
762			 * Now, build the Ctrl Segment based on
763			 * what was just done
764			 */
765			curr_wr = &wr[wrindx];
766
767			switch (curr_wr->wr_opcode) {
768			case IBT_WRC_RDMAW:
769				if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
770					nopcode =
771					    HERMON_WQE_SEND_NOPCODE_RDMAWI;
772					immed_data =
773					    hermon_wr_get_immediate(curr_wr);
774				} else {
775					nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
776				}
777				break;
778
779			case IBT_WRC_SEND:
780				if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
781					nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
782					immed_data =
783					    hermon_wr_get_immediate(curr_wr);
784				} else {
785					nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
786				}
787				break;
788
789			case IBT_WRC_SEND_LSO:
790				nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
791				break;
792
793			case IBT_WRC_RDMAR:
794				nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
795				break;
796
797			case IBT_WRC_CSWAP:
798				nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
799				break;
800
801			case IBT_WRC_FADD:
802				nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
803				break;
804
805			case IBT_WRC_BIND:
806				nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
807				break;
808			}
809
810			fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
811
812			/*
813			 * now, build up the control segment, leaving the
814			 * owner bit as it is
815			 */
816
817			if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
818			    (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) {
819				signaled_dbd = 1;
820			} else {
821				signaled_dbd = 0;
822			}
823			if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT)
824				solicited = 1;
825			else
826				solicited = 0;
827
828			if (qp->qp_is_special) {
829				ah = (hermon_ahhdl_t)
830				    curr_wr->wr.ud.udwr_dest->ud_ah;
831				mutex_enter(&ah->ah_lock);
832				maxstat = ah->ah_udav->max_stat_rate;
833				HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz,
834				    signaled_dbd, maxstat, ah->ah_udav->rlid,
835				    qp, ah->ah_udav->sl);
836				mutex_exit(&ah->ah_lock);
837			} else {
838				HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz,
839				    fence, immed_data, solicited,
840				    signaled_dbd, curr_wr->wr_flags &
841				    IBT_WR_SEND_CKSUM, qp);
842			}
843			wq->wq_wrid[tail] = curr_wr->wr_id;
844
845			/*
846			 * If this is not the first descriptor on the current
847			 * chain, then set the ownership bit.
848			 */
849			if (currindx != 0) {		/* not the first */
850				membar_producer();
851				HERMON_SET_SEND_WQE_OWNER(qp,
852				    (uint32_t *)desc, nopcode);
853			} else
854				prev_nopcode = nopcode;
855
856			/*
857			 * Update the current "tail index" and increment
858			 * "posted_cnt"
859			 */
860			tail = next_tail;
861			posted_cnt++;
862		}
863
864		/*
865		 * If we reach here and there are one or more WQEs which have
866		 * been successfully built as a chain, we have to finish up
867		 * and prepare them for writing to the HW
868		 * The steps are:
869		 * 	1. do the headroom fixup
870		 *	2. add in the size of the headroom for the sync
871		 *	3. write the owner bit for the first WQE
872		 *	4. sync them
873		 *	5. fix up the structures
874		 *	6. hit the doorbell in UAR
875		 */
876		if (posted_cnt != 0) {
877			ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
878
879			/*
880			 * Save away updated "tail index" for the DMA sync
881			 * including the headroom that will be needed
882			 */
883			sync_to = (tail + hdrmwqes) & qsize_msk;
884
885			/* do the invalidate of the headroom */
886
887			hermon_wqe_headroom(tail, qp);
888
889			/* Do a DMA sync for current send WQE(s) */
890			hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_SEND,
891			    DDI_DMA_SYNC_FORDEV);
892
893			/* Update some of the state in the QP */
894			wq->wq_tail = tail;
895			total_posted += posted_cnt;
896			posted_cnt = 0;
897
898			membar_producer();
899
900			/*
901			 * Now set the ownership bit of the first
902			 * one in the chain
903			 */
904			HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev,
905			    prev_nopcode);
906
907			/* the FMA retry loop starts for Hermon doorbell. */
908			hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
909			    fm_status, fm_test);
910
911			HERMON_UAR_DOORBELL(state, uarhdl,
912			    (uint64_t *)(void *)&state->hs_uar->send,
913			    (uint64_t)qp->qp_ring);
914
915			/* the FMA retry loop ends. */
916			hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
917			    fm_status, fm_test);
918		}
919	}
920
921	/*
922	 * Update the "num_posted" return value (if necessary).
923	 * Then drop the locks and return success.
924	 */
925	if (num_posted != NULL) {
926		*num_posted = total_posted;
927	}
928	mutex_exit(&qp->qp_sq_lock);
929	return (status);
930
931pio_error:
932	mutex_exit(&qp->qp_sq_lock);
933	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
934	return (ibc_get_ci_failure(0));
935}
936
937
938/*
939 * hermon_post_recv()
940 *    Context: Can be called from interrupt or base context.
941 */
942int
943hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp,
944    ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
945{
946	uint64_t			*desc;
947	hermon_workq_hdr_t		*wq;
948	uint32_t			head, tail, next_tail, qsize_msk;
949	uint32_t			sync_from, sync_to;
950	uint_t				wrindx;
951	uint_t				posted_cnt;
952	int				status;
953
954	/*
955	 * Check for user-mappable QP memory.  Note:  We do not allow kernel
956	 * clients to post to QP memory that is accessible directly by the
957	 * user.  If the QP memory is user accessible, then return an error.
958	 */
959	if (qp->qp_is_umap) {
960		return (IBT_QP_HDL_INVALID);
961	}
962
963	/* Initialize posted_cnt */
964	posted_cnt = 0;
965
966	mutex_enter(&qp->qp_lock);
967
968	/*
969	 * Check if QP is associated with an SRQ
970	 */
971	if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) {
972		mutex_exit(&qp->qp_lock);
973		return (IBT_SRQ_IN_USE);
974	}
975
976	/*
977	 * Check QP state.  Can not post Recv requests from the "Reset" state
978	 */
979	if (qp->qp_state == HERMON_QP_RESET) {
980		mutex_exit(&qp->qp_lock);
981		return (IBT_QP_STATE_INVALID);
982	}
983
984	/* Check that work request transport type is valid */
985	if ((qp->qp_serv_type != HERMON_QP_UD) &&
986	    (qp->qp_serv_type != HERMON_QP_RC) &&
987	    (qp->qp_serv_type != HERMON_QP_UC)) {
988		mutex_exit(&qp->qp_lock);
989		return (IBT_QP_SRV_TYPE_INVALID);
990	}
991
992	mutex_exit(&qp->qp_lock);
993	mutex_enter(&qp->qp_rq_lock);
994
995	/*
996	 * Grab the lock for the WRID list, i.e., membar_consumer().
997	 * This is not needed because the mutex_enter() above has
998	 * the same effect.
999	 */
1000
1001	/* Save away some initial QP state */
1002	wq = qp->qp_rq_wqhdr;
1003	qsize_msk = wq->wq_mask;
1004	tail	  = wq->wq_tail;
1005	head	  = wq->wq_head;
1006
1007	wrindx = 0;
1008	status	  = DDI_SUCCESS;
1009	/*
1010	 * Before we begin, save the current "tail index" for later
1011	 * DMA sync
1012	 */
1013	sync_from = tail;
1014
1015	for (wrindx = 0; wrindx < num_wr; wrindx++) {
1016		if (wq->wq_full != 0) {
1017			status = IBT_QP_FULL;
1018			break;
1019		}
1020		next_tail = (tail + 1) & qsize_msk;
1021		if (next_tail == head) {
1022			wq->wq_full = 1;
1023		}
1024		desc = HERMON_QP_RQ_ENTRY(qp, tail);
1025		status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc);
1026		if (status != DDI_SUCCESS) {
1027			break;
1028		}
1029
1030		wq->wq_wrid[tail] = wr[wrindx].wr_id;
1031		qp->qp_rq_wqecntr++;
1032
1033		tail = next_tail;
1034		posted_cnt++;
1035	}
1036
1037	if (posted_cnt != 0) {
1038		/* Save away updated "tail index" for the DMA sync */
1039		sync_to = tail;
1040
1041		hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_RECV,
1042		    DDI_DMA_SYNC_FORDEV);
1043
1044		wq->wq_tail = tail;
1045
1046		membar_producer();	/* ensure wrids are visible */
1047
1048		/* Update the doorbell record w/ wqecntr */
1049		HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr,
1050		    qp->qp_rq_wqecntr & 0xFFFF);
1051	}
1052
1053	if (num_posted != NULL) {
1054		*num_posted = posted_cnt;
1055	}
1056
1057
1058	mutex_exit(&qp->qp_rq_lock);
1059	return (status);
1060}
1061
1062/*
1063 * hermon_post_srq()
1064 *    Context: Can be called from interrupt or base context.
1065 */
1066int
1067hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq,
1068    ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1069{
1070	uint64_t			*desc;
1071	hermon_workq_hdr_t		*wq;
1072	uint_t				indx, wrindx;
1073	uint_t				posted_cnt;
1074	int				status;
1075
1076	mutex_enter(&srq->srq_lock);
1077
1078	/*
1079	 * Check for user-mappable QP memory.  Note:  We do not allow kernel
1080	 * clients to post to QP memory that is accessible directly by the
1081	 * user.  If the QP memory is user accessible, then return an error.
1082	 */
1083	if (srq->srq_is_umap) {
1084		mutex_exit(&srq->srq_lock);
1085		return (IBT_SRQ_HDL_INVALID);
1086	}
1087
1088	/*
1089	 * Check SRQ state.  Can not post Recv requests when SRQ is in error
1090	 */
1091	if (srq->srq_state == HERMON_SRQ_STATE_ERROR) {
1092		mutex_exit(&srq->srq_lock);
1093		return (IBT_QP_STATE_INVALID);
1094	}
1095
1096	status = DDI_SUCCESS;
1097	posted_cnt = 0;
1098	wq = srq->srq_wq_wqhdr;
1099	indx = wq->wq_head;
1100
1101	for (wrindx = 0; wrindx < num_wr; wrindx++) {
1102
1103		if (indx == wq->wq_tail) {
1104			status = IBT_QP_FULL;
1105			break;
1106		}
1107		desc = HERMON_SRQ_WQE_ADDR(srq, indx);
1108
1109		wq->wq_wrid[indx] = wr[wrindx].wr_id;
1110
1111		status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc);
1112		if (status != DDI_SUCCESS) {
1113			break;
1114		}
1115
1116		hermon_wqe_sync(srq, indx, indx + 1,
1117		    HERMON_WR_SRQ, DDI_DMA_SYNC_FORDEV);
1118		posted_cnt++;
1119		indx = htons(((uint16_t *)desc)[1]);
1120		wq->wq_head = indx;
1121	}
1122
1123	if (posted_cnt != 0) {
1124
1125		srq->srq_wq_wqecntr += posted_cnt;
1126
1127		membar_producer();	/* ensure wrids are visible */
1128
1129		/* Ring the doorbell w/ wqecntr */
1130		HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr,
1131		    srq->srq_wq_wqecntr & 0xFFFF);
1132	}
1133
1134	if (num_posted != NULL) {
1135		*num_posted = posted_cnt;
1136	}
1137
1138	mutex_exit(&srq->srq_lock);
1139	return (status);
1140}
1141
1142
1143/*
1144 * hermon_wqe_send_build()
1145 *    Context: Can be called from interrupt or base context.
1146 */
1147static int
1148hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
1149    ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1150{
1151	hermon_hw_snd_wqe_ud_t		*ud;
1152	hermon_hw_snd_wqe_remaddr_t	*rc;
1153	hermon_hw_snd_wqe_atomic_t	*at;
1154	hermon_hw_snd_wqe_remaddr_t	*uc;
1155	hermon_hw_snd_wqe_bind_t	*bn;
1156	hermon_hw_wqe_sgl_t		*ds, *old_ds;
1157	ibt_ud_dest_t			*dest;
1158	ibt_wr_ds_t			*sgl;
1159	hermon_ahhdl_t			ah;
1160	uint32_t			nds;
1161	int				i, j, last_ds, num_ds, status;
1162	int				tmpsize;
1163
1164	ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1165
1166	/* Initialize the information for the Data Segments */
1167	ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1168	    sizeof (hermon_hw_snd_wqe_ctrl_t));
1169	nds = wr->wr_nds;
1170	sgl = wr->wr_sgl;
1171	num_ds = 0;
1172	i = 0;
1173
1174	/*
1175	 * Build a Send WQE depends first and foremost on the transport
1176	 * type of Work Request (i.e. UD, RC, or UC)
1177	 */
1178	switch (wr->wr_trans) {
1179	case IBT_UD_SRV:
1180		/* Ensure that work request transport type matches QP type */
1181		if (qp->qp_serv_type != HERMON_QP_UD) {
1182			return (IBT_QP_SRV_TYPE_INVALID);
1183		}
1184
1185		/*
1186		 * Validate the operation type.  For UD requests, only the
1187		 * "Send" and "Send LSO" operations are valid.
1188		 */
1189		if (wr->wr_opcode != IBT_WRC_SEND &&
1190		    wr->wr_opcode != IBT_WRC_SEND_LSO) {
1191			return (IBT_QP_OP_TYPE_INVALID);
1192		}
1193
1194		/*
1195		 * If this is a Special QP (QP0 or QP1), then we need to
1196		 * build MLX WQEs instead.  So jump to hermon_wqe_mlx_build()
1197		 * and return whatever status it returns
1198		 */
1199		if (qp->qp_is_special) {
1200			if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1201				return (IBT_QP_OP_TYPE_INVALID);
1202			}
1203			status = hermon_wqe_mlx_build(state, qp,
1204			    wr, desc, size);
1205			return (status);
1206		}
1207
1208		/*
1209		 * Otherwise, if this is a normal UD Send request, then fill
1210		 * all the fields in the Hermon UD header for the WQE.  Note:
1211		 * to do this we'll need to extract some information from the
1212		 * Address Handle passed with the work request.
1213		 */
1214		ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
1215		    sizeof (hermon_hw_snd_wqe_ctrl_t));
1216		if (wr->wr_opcode == IBT_WRC_SEND) {
1217			dest = wr->wr.ud.udwr_dest;
1218		} else {
1219			dest = wr->wr.ud_lso.lso_ud_dest;
1220		}
1221		ah = (hermon_ahhdl_t)dest->ud_ah;
1222		if (ah == NULL) {
1223			return (IBT_AH_HDL_INVALID);
1224		}
1225
1226		/*
1227		 * Build the Unreliable Datagram Segment for the WQE, using
1228		 * the information from the address handle and the work
1229		 * request.
1230		 */
1231		/* mutex_enter(&ah->ah_lock); */
1232		if (wr->wr_opcode == IBT_WRC_SEND) {
1233			HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest);
1234		} else {	/* IBT_WRC_SEND_LSO */
1235			HERMON_WQE_BUILD_UD(qp, ud, ah,
1236			    wr->wr.ud_lso.lso_ud_dest);
1237		}
1238		/* mutex_exit(&ah->ah_lock); */
1239
1240		/* Update "ds" for filling in Data Segments (below) */
1241		ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
1242		    sizeof (hermon_hw_snd_wqe_ud_t));
1243
1244		if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1245			int total_len;
1246
1247			total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
1248			if ((uintptr_t)ds + total_len + (nds * 16) >
1249			    (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz))
1250				return (IBT_QP_SGL_LEN_INVALID);
1251
1252			bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1,
1253			    wr->wr.ud_lso.lso_hdr_sz);
1254			old_ds = ds;
1255			ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
1256			for (; i < nds; i++) {
1257				if (sgl[i].ds_len == 0)
1258					continue;
1259				HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds],
1260				    &sgl[i]);
1261				num_ds++;
1262				i++;
1263				break;
1264			}
1265			membar_producer();
1266			HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
1267			    wr->wr.ud_lso.lso_hdr_sz);
1268		}
1269
1270		break;
1271
1272	case IBT_RC_SRV:
1273		/* Ensure that work request transport type matches QP type */
1274		if (qp->qp_serv_type != HERMON_QP_RC) {
1275			return (IBT_QP_SRV_TYPE_INVALID);
1276		}
1277
1278		/*
1279		 * Validate the operation type.  For RC requests, we allow
1280		 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
1281		 * operations, and memory window "Bind"
1282		 */
1283		if ((wr->wr_opcode != IBT_WRC_SEND) &&
1284		    (wr->wr_opcode != IBT_WRC_RDMAR) &&
1285		    (wr->wr_opcode != IBT_WRC_RDMAW) &&
1286		    (wr->wr_opcode != IBT_WRC_CSWAP) &&
1287		    (wr->wr_opcode != IBT_WRC_FADD) &&
1288		    (wr->wr_opcode != IBT_WRC_BIND)) {
1289			return (IBT_QP_OP_TYPE_INVALID);
1290		}
1291
1292		/*
1293		 * If this is a Send request, then all we need to do is break
1294		 * out and here and begin the Data Segment processing below
1295		 */
1296		if (wr->wr_opcode == IBT_WRC_SEND) {
1297			break;
1298		}
1299
1300		/*
1301		 * If this is an RDMA Read or RDMA Write request, then fill
1302		 * in the "Remote Address" header fields.
1303		 */
1304		if ((wr->wr_opcode == IBT_WRC_RDMAR) ||
1305		    (wr->wr_opcode == IBT_WRC_RDMAW)) {
1306			rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1307			    sizeof (hermon_hw_snd_wqe_ctrl_t));
1308
1309			/*
1310			 * Build the Remote Address Segment for the WQE, using
1311			 * the information from the RC work request.
1312			 */
1313			HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
1314
1315			/* Update "ds" for filling in Data Segments (below) */
1316			ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
1317			    sizeof (hermon_hw_snd_wqe_remaddr_t));
1318			break;
1319		}
1320
1321		/*
1322		 * If this is one of the Atomic type operations (i.e
1323		 * Compare-Swap or Fetch-Add), then fill in both the "Remote
1324		 * Address" header fields and the "Atomic" header fields.
1325		 */
1326		if ((wr->wr_opcode == IBT_WRC_CSWAP) ||
1327		    (wr->wr_opcode == IBT_WRC_FADD)) {
1328			rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1329			    sizeof (hermon_hw_snd_wqe_ctrl_t));
1330			at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
1331			    sizeof (hermon_hw_snd_wqe_remaddr_t));
1332
1333			/*
1334			 * Build the Remote Address and Atomic Segments for
1335			 * the WQE, using the information from the RC Atomic
1336			 * work request.
1337			 */
1338			HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
1339			HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
1340
1341			/* Update "ds" for filling in Data Segments (below) */
1342			ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
1343			    sizeof (hermon_hw_snd_wqe_atomic_t));
1344
1345			/*
1346			 * Update "nds" and "sgl" because Atomic requests have
1347			 * only a single Data Segment (and they are encoded
1348			 * somewhat differently in the work request.
1349			 */
1350			nds = 1;
1351			sgl = wr->wr_sgl;
1352			break;
1353		}
1354
1355		/*
1356		 * If this is memory window Bind operation, then we call the
1357		 * hermon_wr_bind_check() routine to validate the request and
1358		 * to generate the updated RKey.  If this is successful, then
1359		 * we fill in the WQE's "Bind" header fields.
1360		 */
1361		if (wr->wr_opcode == IBT_WRC_BIND) {
1362			status = hermon_wr_bind_check(state, wr);
1363			if (status != DDI_SUCCESS) {
1364				return (status);
1365			}
1366
1367			bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1368			    sizeof (hermon_hw_snd_wqe_ctrl_t));
1369
1370			/*
1371			 * Build the Bind Memory Window Segments for the WQE,
1372			 * using the information from the RC Bind memory
1373			 * window work request.
1374			 */
1375			HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
1376
1377			/*
1378			 * Update the "ds" pointer.  Even though the "bind"
1379			 * operation requires no SGLs, this is necessary to
1380			 * facilitate the correct descriptor size calculations
1381			 * (below).
1382			 */
1383			ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1384			    sizeof (hermon_hw_snd_wqe_bind_t));
1385			nds = 0;
1386		}
1387		break;
1388
1389	case IBT_UC_SRV:
1390		/* Ensure that work request transport type matches QP type */
1391		if (qp->qp_serv_type != HERMON_QP_UC) {
1392			return (IBT_QP_SRV_TYPE_INVALID);
1393		}
1394
1395		/*
1396		 * Validate the operation type.  For UC requests, we only
1397		 * allow "Send", "RDMA Write", and memory window "Bind".
1398		 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic"
1399		 * operations
1400		 */
1401		if ((wr->wr_opcode != IBT_WRC_SEND) &&
1402		    (wr->wr_opcode != IBT_WRC_RDMAW) &&
1403		    (wr->wr_opcode != IBT_WRC_BIND)) {
1404			return (IBT_QP_OP_TYPE_INVALID);
1405		}
1406
1407		/*
1408		 * If this is a Send request, then all we need to do is break
1409		 * out and here and begin the Data Segment processing below
1410		 */
1411		if (wr->wr_opcode == IBT_WRC_SEND) {
1412			break;
1413		}
1414
1415		/*
1416		 * If this is an RDMA Write request, then fill in the "Remote
1417		 * Address" header fields.
1418		 */
1419		if (wr->wr_opcode == IBT_WRC_RDMAW) {
1420			uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1421			    sizeof (hermon_hw_snd_wqe_ctrl_t));
1422
1423			/*
1424			 * Build the Remote Address Segment for the WQE, using
1425			 * the information from the UC work request.
1426			 */
1427			HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma);
1428
1429			/* Update "ds" for filling in Data Segments (below) */
1430			ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc +
1431			    sizeof (hermon_hw_snd_wqe_remaddr_t));
1432			break;
1433		}
1434
1435		/*
1436		 * If this is memory window Bind operation, then we call the
1437		 * hermon_wr_bind_check() routine to validate the request and
1438		 * to generate the updated RKey.  If this is successful, then
1439		 * we fill in the WQE's "Bind" header fields.
1440		 */
1441		if (wr->wr_opcode == IBT_WRC_BIND) {
1442			status = hermon_wr_bind_check(state, wr);
1443			if (status != DDI_SUCCESS) {
1444				return (status);
1445			}
1446
1447			bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1448			    sizeof (hermon_hw_snd_wqe_ctrl_t));
1449
1450			/*
1451			 * Build the Bind Memory Window Segments for the WQE,
1452			 * using the information from the UC Bind memory
1453			 * window work request.
1454			 */
1455			HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind);
1456
1457			/*
1458			 * Update the "ds" pointer.  Even though the "bind"
1459			 * operation requires no SGLs, this is necessary to
1460			 * facilitate the correct descriptor size calculations
1461			 * (below).
1462			 */
1463			ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1464			    sizeof (hermon_hw_snd_wqe_bind_t));
1465			nds = 0;
1466		}
1467		break;
1468
1469	default:
1470		return (IBT_QP_SRV_TYPE_INVALID);
1471	}
1472
1473	/*
1474	 * Now fill in the Data Segments (SGL) for the Send WQE based on
1475	 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
1476	 * Start by checking for a valid number of SGL entries
1477	 */
1478	if (nds > qp->qp_sq_sgl) {
1479		return (IBT_QP_SGL_LEN_INVALID);
1480	}
1481
1482	/*
1483	 * For each SGL in the Send Work Request, fill in the Send WQE's data
1484	 * segments.  Note: We skip any SGL with zero size because Hermon
1485	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
1486	 * the encoding for zero means a 2GB transfer.
1487	 */
1488	for (last_ds = num_ds, j = i; j < nds; j++) {
1489		if (sgl[j].ds_len != 0)
1490			last_ds++;	/* real last ds of wqe to fill */
1491	}
1492
1493	/*
1494	 * Return the size of descriptor (in 16-byte chunks)
1495	 * For Hermon, we want them (for now) to be on stride size
1496	 * boundaries, which was implicit in Tavor/Arbel
1497	 *
1498	 */
1499	tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc);
1500
1501	*size = tmpsize >> 0x4;
1502
1503	for (j = nds; --j >= i; ) {
1504		if (sgl[j].ds_len == 0) {
1505			continue;
1506		}
1507
1508		/*
1509		 * Fill in the Data Segment(s) for the current WQE, using the
1510		 * information contained in the scatter-gather list of the
1511		 * work request.
1512		 */
1513		last_ds--;
1514		HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
1515	}
1516
1517	return (DDI_SUCCESS);
1518}
1519
1520
1521
1522/*
1523 * hermon_wqe_mlx_build()
1524 *    Context: Can be called from interrupt or base context.
1525 */
1526static int
1527hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
1528    ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1529{
1530	hermon_ahhdl_t		ah;
1531	hermon_hw_udav_t	*udav;
1532	ib_lrh_hdr_t		*lrh;
1533	ib_grh_t		*grh;
1534	ib_bth_hdr_t		*bth;
1535	ib_deth_hdr_t		*deth;
1536	hermon_hw_wqe_sgl_t	*ds;
1537	ibt_wr_ds_t		*sgl;
1538	uint8_t			*mgmtclass, *hpoint, *hcount;
1539	uint32_t		nds, offset, pktlen;
1540	uint32_t		desc_sz;
1541	int			i, num_ds;
1542	int			tmpsize;
1543
1544	ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1545
1546	/* Initialize the information for the Data Segments */
1547	ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1548	    sizeof (hermon_hw_mlx_wqe_nextctrl_t));
1549
1550	/*
1551	 * Pull the address handle from the work request. The UDAV will
1552	 * be used to answer some questions about the request.
1553	 */
1554	ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah;
1555	if (ah == NULL) {
1556		return (IBT_AH_HDL_INVALID);
1557	}
1558	mutex_enter(&ah->ah_lock);
1559	udav = ah->ah_udav;
1560
1561	/*
1562	 * If the request is for QP1 and the destination LID is equal to
1563	 * the Permissive LID, then return an error.  This combination is
1564	 * not allowed
1565	 */
1566	if ((udav->rlid == IB_LID_PERMISSIVE) &&
1567	    (qp->qp_is_special == HERMON_QP_GSI)) {
1568		mutex_exit(&ah->ah_lock);
1569		return (IBT_AH_HDL_INVALID);
1570	}
1571
1572	/*
1573	 * Calculate the size of the packet headers, including the GRH
1574	 * (if necessary)
1575	 */
1576	desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) +
1577	    sizeof (ib_deth_hdr_t);
1578	if (udav->grh) {
1579		desc_sz += sizeof (ib_grh_t);
1580	}
1581
1582	/*
1583	 * Begin to build the first "inline" data segment for the packet
1584	 * headers.  Note:  By specifying "inline" we can build the contents
1585	 * of the MAD packet headers directly into the work queue (as part
1586	 * descriptor).  This has the advantage of both speeding things up
1587	 * and of not requiring the driver to allocate/register any additional
1588	 * memory for the packet headers.
1589	 */
1590	HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz);
1591	desc_sz += 4;
1592
1593	/*
1594	 * Build Local Route Header (LRH)
1595	 *    We start here by building the LRH into a temporary location.
1596	 *    When we have finished we copy the LRH data into the descriptor.
1597	 *
1598	 *    Notice that the VL values are hardcoded.  This is not a problem
1599	 *    because VL15 is decided later based on the value in the MLX
1600	 *    transport "next/ctrl" header (see the "vl15" bit below), and it
1601	 *    is otherwise (meaning for QP1) chosen from the SL-to-VL table
1602	 *    values.  This rule does not hold for loopback packets however
1603	 *    (all of which bypass the SL-to-VL tables) and it is the reason
1604	 *    that non-QP0 MADs are setup with VL hardcoded to zero below.
1605	 *
1606	 *    Notice also that Source LID is hardcoded to the Permissive LID
1607	 *    (0xFFFF).  This is also not a problem because if the Destination
1608	 *    LID is not the Permissive LID, then the "slr" value in the MLX
1609	 *    transport "next/ctrl" header will be set to zero and the hardware
1610	 *    will pull the LID from value in the port.
1611	 */
1612	lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4);
1613	pktlen = (desc_sz + 0x100) >> 2;
1614	HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen);
1615
1616	/*
1617	 * Build Global Route Header (GRH)
1618	 *    This is only built if necessary as defined by the "grh" bit in
1619	 *    the address vector.  Note:  We also calculate the offset to the
1620	 *    next header (BTH) based on whether or not the "grh" bit is set.
1621	 */
1622	if (udav->grh) {
1623		/*
1624		 * If the request is for QP0, then return an error.  The
1625		 * combination of global routine (GRH) and QP0 is not allowed.
1626		 */
1627		if (qp->qp_is_special == HERMON_QP_SMI) {
1628			mutex_exit(&ah->ah_lock);
1629			return (IBT_AH_HDL_INVALID);
1630		}
1631		grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1632		HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen);
1633
1634		bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t));
1635	} else {
1636		bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1637	}
1638	mutex_exit(&ah->ah_lock);
1639
1640
1641	/*
1642	 * Build Base Transport Header (BTH)
1643	 *    Notice that the M, PadCnt, and TVer fields are all set
1644	 *    to zero implicitly.  This is true for all Management Datagrams
1645	 *    MADs whether GSI are SMI.
1646	 */
1647	HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr);
1648
1649	/*
1650	 * Build Datagram Extended Transport Header (DETH)
1651	 */
1652	deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t));
1653	HERMON_WQE_BUILD_MLX_DETH(deth, qp);
1654
1655	/* Ensure that the Data Segment is aligned on a 16-byte boundary */
1656	ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t));
1657	ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF);
1658	nds = wr->wr_nds;
1659	sgl = wr->wr_sgl;
1660	num_ds = 0;
1661
1662	/*
1663	 * Now fill in the Data Segments (SGL) for the MLX WQE based on the
1664	 * values set up above (i.e. "sgl", "nds", and the "ds" pointer
1665	 * Start by checking for a valid number of SGL entries
1666	 */
1667	if (nds > qp->qp_sq_sgl) {
1668		return (IBT_QP_SGL_LEN_INVALID);
1669	}
1670
1671	/*
1672	 * For each SGL in the Send Work Request, fill in the MLX WQE's data
1673	 * segments.  Note: We skip any SGL with zero size because Hermon
1674	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
1675	 * the encoding for zero means a 2GB transfer.  Because of this special
1676	 * encoding in the hardware, we mask the requested length with
1677	 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1678	 * zero.)
1679	 */
1680	mgmtclass = hpoint = hcount = NULL;
1681	offset = 0;
1682	for (i = 0; i < nds; i++) {
1683		if (sgl[i].ds_len == 0) {
1684			continue;
1685		}
1686
1687		/*
1688		 * Fill in the Data Segment(s) for the MLX send WQE, using
1689		 * the information contained in the scatter-gather list of
1690		 * the work request.
1691		 */
1692		HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]);
1693
1694		/*
1695		 * Search through the contents of all MADs posted to QP0 to
1696		 * initialize pointers to the places where Directed Route "hop
1697		 * pointer", "hop count", and "mgmtclass" would be.  Hermon
1698		 * needs these updated (i.e. incremented or decremented, as
1699		 * necessary) by software.
1700		 */
1701		if (qp->qp_is_special == HERMON_QP_SMI) {
1702
1703			HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass,
1704			    offset, sgl[i].ds_va, sgl[i].ds_len);
1705
1706			HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint,
1707			    offset, sgl[i].ds_va, sgl[i].ds_len);
1708
1709			HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount,
1710			    offset, sgl[i].ds_va, sgl[i].ds_len);
1711
1712			offset += sgl[i].ds_len;
1713		}
1714		num_ds++;
1715	}
1716
1717	/*
1718	 * Hermon's Directed Route MADs need to have the "hop pointer"
1719	 * incremented/decremented (as necessary) depending on whether it is
1720	 * currently less than or greater than the "hop count" (i.e. whether
1721	 * the MAD is a request or a response.)
1722	 */
1723	if (qp->qp_is_special == HERMON_QP_SMI) {
1724		HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass,
1725		    *hpoint, *hcount);
1726	}
1727
1728	/*
1729	 * Now fill in the ICRC Data Segment.  This data segment is inlined
1730	 * just like the packets headers above, but it is only four bytes and
1731	 * set to zero (to indicate that we wish the hardware to generate ICRC.
1732	 */
1733	HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0);
1734	num_ds++;
1735
1736	/*
1737	 * Return the size of descriptor (in 16-byte chunks)
1738	 * For Hermon, we want them (for now) to be on stride size
1739	 * boundaries, which was implicit in Tavor/Arbel
1740	 */
1741	tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc);
1742
1743	*size = tmpsize >> 0x04;
1744
1745	return (DDI_SUCCESS);
1746}
1747
1748
1749
1750/*
1751 * hermon_wqe_recv_build()
1752 *    Context: Can be called from interrupt or base context.
1753 */
1754/* ARGSUSED */
1755static int
1756hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
1757    ibt_recv_wr_t *wr, uint64_t *desc)
1758{
1759	hermon_hw_wqe_sgl_t	*ds;
1760	int			i, num_ds;
1761
1762	ASSERT(MUTEX_HELD(&qp->qp_rq_lock));
1763
1764	/*
1765	 * Fill in the Data Segments (SGL) for the Recv WQE  - don't
1766	 * need to have a reserved for the ctrl, there is none on the
1767	 * recv queue for hermon, but will need to put an invalid
1768	 * (null) scatter pointer per PRM
1769	 */
1770	ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc;
1771	num_ds = 0;
1772
1773	/* Check for valid number of SGL entries */
1774	if (wr->wr_nds > qp->qp_rq_sgl) {
1775		return (IBT_QP_SGL_LEN_INVALID);
1776	}
1777
1778	/*
1779	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1780	 * segments.  Note: We skip any SGL with zero size because Hermon
1781	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
1782	 * the encoding for zero means a 2GB transfer.  Because of this special
1783	 * encoding in the hardware, we mask the requested length with
1784	 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1785	 * zero.)
1786	 */
1787	for (i = 0; i < wr->wr_nds; i++) {
1788		if (wr->wr_sgl[i].ds_len == 0) {
1789			continue;
1790		}
1791
1792		/*
1793		 * Fill in the Data Segment(s) for the receive WQE, using the
1794		 * information contained in the scatter-gather list of the
1795		 * work request.
1796		 */
1797		HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
1798		num_ds++;
1799	}
1800
1801	/* put the null sgl pointer as well if needed */
1802	if (num_ds < qp->qp_rq_sgl) {
1803		HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
1804	}
1805
1806	return (DDI_SUCCESS);
1807}
1808
1809
1810
1811/*
1812 * hermon_wqe_srq_build()
1813 *    Context: Can be called from interrupt or base context.
1814 */
1815/* ARGSUSED */
1816static int
1817hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
1818    ibt_recv_wr_t *wr, uint64_t *desc)
1819{
1820	hermon_hw_wqe_sgl_t	*ds;
1821	int			i, num_ds;
1822
1823	ASSERT(MUTEX_HELD(&srq->srq_lock));
1824
1825	/* Fill in the Data Segments (SGL) for the Recv WQE */
1826	ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1827	    sizeof (hermon_hw_srq_wqe_next_t));
1828	num_ds = 0;
1829
1830	/* Check for valid number of SGL entries */
1831	if (wr->wr_nds > srq->srq_wq_sgl) {
1832		return (IBT_QP_SGL_LEN_INVALID);
1833	}
1834
1835	/*
1836	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1837	 * segments.  Note: We skip any SGL with zero size because Hermon
1838	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
1839	 * the encoding for zero means a 2GB transfer.  Because of this special
1840	 * encoding in the hardware, we mask the requested length with
1841	 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1842	 * zero.)
1843	 */
1844	for (i = 0; i < wr->wr_nds; i++) {
1845		if (wr->wr_sgl[i].ds_len == 0) {
1846			continue;
1847		}
1848
1849		/*
1850		 * Fill in the Data Segment(s) for the receive WQE, using the
1851		 * information contained in the scatter-gather list of the
1852		 * work request.
1853		 */
1854		HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
1855		num_ds++;
1856	}
1857
1858	/*
1859	 * put in the null sgl pointer as well, if needed
1860	 */
1861	if (num_ds < srq->srq_wq_sgl) {
1862		HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
1863	}
1864
1865	return (DDI_SUCCESS);
1866}
1867
1868
1869/*
1870 * hermon_wr_get_immediate()
1871 *    Context: Can be called from interrupt or base context.
1872 */
1873static uint32_t
1874hermon_wr_get_immediate(ibt_send_wr_t *wr)
1875{
1876	/*
1877	 * This routine extracts the "immediate data" from the appropriate
1878	 * location in the IBTF work request.  Because of the way the
1879	 * work request structure is defined, the location for this data
1880	 * depends on the actual work request operation type.
1881	 */
1882
1883	/* For RDMA Write, test if RC or UC */
1884	if (wr->wr_opcode == IBT_WRC_RDMAW) {
1885		if (wr->wr_trans == IBT_RC_SRV) {
1886			return (wr->wr.rc.rcwr.rdma.rdma_immed);
1887		} else {  /* IBT_UC_SRV */
1888			return (wr->wr.uc.ucwr.rdma.rdma_immed);
1889		}
1890	}
1891
1892	/* For Send, test if RC, UD, or UC */
1893	if (wr->wr_opcode == IBT_WRC_SEND) {
1894		if (wr->wr_trans == IBT_RC_SRV) {
1895			return (wr->wr.rc.rcwr.send_immed);
1896		} else if (wr->wr_trans == IBT_UD_SRV) {
1897			return (wr->wr.ud.udwr_immed);
1898		} else {  /* IBT_UC_SRV */
1899			return (wr->wr.uc.ucwr.send_immed);
1900		}
1901	}
1902
1903	/*
1904	 * If any other type of request, then immediate is undefined
1905	 */
1906	return (0);
1907}
1908
1909/*
1910 * hermon_wqe_headroom()
1911 *	Context: can be called from interrupt or base, currently only from
1912 *	base context.
1913 * Routine that fills in the headroom for the Send Queue
1914 */
1915
1916static void
1917hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp)
1918{
1919	uint32_t	*wqe_start, *wqe_top, *wqe_base, qsize;
1920	int		hdrmwqes, wqesizebytes, sectperwqe;
1921	uint32_t	invalue;
1922	int		i, j;
1923
1924	qsize	 = qp->qp_sq_bufsz;
1925	wqesizebytes = 1 << qp->qp_sq_log_wqesz;
1926	sectperwqe = wqesizebytes >> 6; 	/* 64 bytes/section */
1927	hdrmwqes = qp->qp_sq_hdrmwqes;
1928	wqe_base  = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0);
1929	wqe_top	  = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize);
1930	wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from);
1931
1932	for (i = 0; i < hdrmwqes; i++)	{
1933		for (j = 0; j < sectperwqe; j++) {
1934			if (j == 0) {		/* 1st section of wqe */
1935				/* perserve ownership bit */
1936				invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl,
1937				    wqe_start) | 0x7FFFFFFF;
1938			} else {
1939				/* or just invalidate it */
1940				invalue = 0xFFFFFFFF;
1941			}
1942			ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue);
1943			wqe_start += 16;	/* move 64 bytes */
1944		}
1945		if (wqe_start == wqe_top)	/* hit the end of the queue */
1946			wqe_start = wqe_base;	/* wrap to start */
1947	}
1948}
1949
1950/*
1951 * hermon_wqe_sync()
1952 *    Context: Can be called from interrupt or base context.
1953 */
1954static void
1955hermon_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to,
1956    uint_t sync_type, uint_t flag)
1957{
1958	hermon_qphdl_t		qp;
1959	hermon_srqhdl_t		srq;
1960	uint64_t		*wqe_from, *wqe_to;
1961	uint64_t		*wq_base, *wq_top, *qp_base;
1962	ddi_dma_handle_t	dmahdl;
1963	off_t			offset;
1964	size_t			length;
1965	uint32_t		qsize;
1966	int			status;
1967
1968	if (sync_type == HERMON_WR_SRQ) {
1969		srq = (hermon_srqhdl_t)hdl;
1970		/* Get the DMA handle from SRQ context */
1971		dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl;
1972		/* get base addr of the buffer */
1973		qp_base = (uint64_t *)(void *)srq->srq_wq_buf;
1974	} else {
1975		qp = (hermon_qphdl_t)hdl;
1976		/* Get the DMA handle from QP context */
1977		dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl;
1978		/* Determine the base address of the QP buffer */
1979		if (qp->qp_sq_baseaddr == 0) {
1980			qp_base = (uint64_t *)(void *)(qp->qp_sq_buf);
1981		} else {
1982			qp_base = (uint64_t *)(void *)(qp->qp_rq_buf);
1983		}
1984	}
1985
1986	/*
1987	 * Depending on the type of the work queue, we grab information
1988	 * about the address ranges we need to DMA sync.
1989	 */
1990
1991	if (sync_type == HERMON_WR_SEND) {
1992		wqe_from = HERMON_QP_SQ_ENTRY(qp, sync_from);
1993		wqe_to   = HERMON_QP_SQ_ENTRY(qp, sync_to);
1994		qsize	 = qp->qp_sq_bufsz;
1995
1996		wq_base = HERMON_QP_SQ_ENTRY(qp, 0);
1997		wq_top	 = HERMON_QP_SQ_ENTRY(qp, qsize);
1998	} else if (sync_type == HERMON_WR_RECV) {
1999		wqe_from = HERMON_QP_RQ_ENTRY(qp, sync_from);
2000		wqe_to   = HERMON_QP_RQ_ENTRY(qp, sync_to);
2001		qsize	 = qp->qp_rq_bufsz;
2002
2003		wq_base = HERMON_QP_RQ_ENTRY(qp, 0);
2004		wq_top	 = HERMON_QP_RQ_ENTRY(qp, qsize);
2005	} else {
2006		wqe_from = HERMON_SRQ_WQ_ENTRY(srq, sync_from);
2007		wqe_to   = HERMON_SRQ_WQ_ENTRY(srq, sync_to);
2008		qsize	 = srq->srq_wq_bufsz;
2009
2010		wq_base = HERMON_SRQ_WQ_ENTRY(srq, 0);
2011		wq_top	 = HERMON_SRQ_WQ_ENTRY(srq, qsize);
2012	}
2013
2014	/*
2015	 * There are two possible cases for the beginning and end of the WQE
2016	 * chain we are trying to sync.  Either this is the simple case, where
2017	 * the end of the chain is below the beginning of the chain, or it is
2018	 * the "wrap-around" case, where the end of the chain has wrapped over
2019	 * the end of the queue.  In the former case, we simply need to
2020	 * calculate the span from beginning to end and sync it.  In the latter
2021	 * case, however, we need to calculate the span from the top of the
2022	 * work queue to the end of the chain and sync that, and then we need
2023	 * to find the other portion (from beginning of chain to end of queue)
2024	 * and sync that as well.  Note: if the "top to end" span is actually
2025	 * zero length, then we don't do a DMA sync because a zero length DMA
2026	 * sync unnecessarily syncs the entire work queue.
2027	 */
2028	if (wqe_to > wqe_from) {
2029		/* "From Beginning to End" */
2030
2031		offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base);
2032		length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from);
2033
2034		status = ddi_dma_sync(dmahdl, offset, length, flag);
2035		if (status != DDI_SUCCESS) {
2036			return;
2037		}
2038	} else {
2039		/* "From Top to End" */
2040
2041		offset = (off_t)((uintptr_t)wq_base - (uintptr_t)qp_base);
2042		length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wq_base);
2043		if (length) {
2044			status = ddi_dma_sync(dmahdl, offset, length, flag);
2045			if (status != DDI_SUCCESS) {
2046				return;
2047			}
2048		}
2049
2050		/* "From Beginning to Bottom" */
2051
2052		offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base);
2053		length = (size_t)((uintptr_t)wq_top - (uintptr_t)wqe_from);
2054		status = ddi_dma_sync(dmahdl, offset, length, flag);
2055		if (status != DDI_SUCCESS) {
2056			return;
2057		}
2058	}
2059}
2060
2061
2062/*
2063 * hermon_wr_bind_check()
2064 *    Context: Can be called from interrupt or base context.
2065 */
2066/* ARGSUSED */
2067static int
2068hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr)
2069{
2070	ibt_bind_flags_t	bind_flags;
2071	uint64_t		vaddr, len;
2072	uint64_t		reg_start_addr, reg_end_addr;
2073	hermon_mwhdl_t		mw;
2074	hermon_mrhdl_t		mr;
2075	hermon_rsrc_t		*mpt;
2076	uint32_t		new_rkey;
2077
2078	/* Check for a valid Memory Window handle in the WR */
2079	mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl;
2080	if (mw == NULL) {
2081		return (IBT_MW_HDL_INVALID);
2082	}
2083
2084	/* Check for a valid Memory Region handle in the WR */
2085	mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl;
2086	if (mr == NULL) {
2087		return (IBT_MR_HDL_INVALID);
2088	}
2089
2090	mutex_enter(&mr->mr_lock);
2091	mutex_enter(&mw->mr_lock);
2092
2093	/*
2094	 * Check here to see if the memory region has already been partially
2095	 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
2096	 * If so, this is an error, return failure.
2097	 */
2098	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
2099		mutex_exit(&mr->mr_lock);
2100		mutex_exit(&mw->mr_lock);
2101		return (IBT_MR_HDL_INVALID);
2102	}
2103
2104	/* Check for a valid Memory Window RKey (i.e. a matching RKey) */
2105	if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) {
2106		mutex_exit(&mr->mr_lock);
2107		mutex_exit(&mw->mr_lock);
2108		return (IBT_MR_RKEY_INVALID);
2109	}
2110
2111	/* Check for a valid Memory Region LKey (i.e. a matching LKey) */
2112	if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) {
2113		mutex_exit(&mr->mr_lock);
2114		mutex_exit(&mw->mr_lock);
2115		return (IBT_MR_LKEY_INVALID);
2116	}
2117
2118	/*
2119	 * Now check for valid "vaddr" and "len".  Note:  We don't check the
2120	 * "vaddr" range when "len == 0" (i.e. on unbind operations)
2121	 */
2122	len = wr->wr.rc.rcwr.bind->bind_len;
2123	if (len != 0) {
2124		vaddr = wr->wr.rc.rcwr.bind->bind_va;
2125		reg_start_addr = mr->mr_bindinfo.bi_addr;
2126		reg_end_addr   = mr->mr_bindinfo.bi_addr +
2127		    (mr->mr_bindinfo.bi_len - 1);
2128		if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) {
2129			mutex_exit(&mr->mr_lock);
2130			mutex_exit(&mw->mr_lock);
2131			return (IBT_MR_VA_INVALID);
2132		}
2133		vaddr = (vaddr + len) - 1;
2134		if (vaddr > reg_end_addr) {
2135			mutex_exit(&mr->mr_lock);
2136			mutex_exit(&mw->mr_lock);
2137			return (IBT_MR_LEN_INVALID);
2138		}
2139	}
2140
2141	/*
2142	 * Validate the bind access flags.  Remote Write and Atomic access for
2143	 * the Memory Window require that Local Write access be set in the
2144	 * corresponding Memory Region.
2145	 */
2146	bind_flags = wr->wr.rc.rcwr.bind->bind_flags;
2147	if (((bind_flags & IBT_WR_BIND_WRITE) ||
2148	    (bind_flags & IBT_WR_BIND_ATOMIC)) &&
2149	    !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) {
2150		mutex_exit(&mr->mr_lock);
2151		mutex_exit(&mw->mr_lock);
2152		return (IBT_MR_ACCESS_REQ_INVALID);
2153	}
2154
2155	/* Calculate the new RKey for the Memory Window */
2156	mpt = mw->mr_mptrsrcp;
2157	new_rkey = hermon_mr_keycalc(mpt->hr_indx);
2158	new_rkey = hermon_mr_key_swap(new_rkey);
2159
2160	wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
2161	mw->mr_rkey = new_rkey;
2162
2163	mutex_exit(&mr->mr_lock);
2164	mutex_exit(&mw->mr_lock);
2165	return (DDI_SUCCESS);
2166}
2167
2168
2169/*
2170 * hermon_wrid_from_reset_handling()
2171 *    Context: Can be called from interrupt or base context.
2172 */
2173/* ARGSUSED */
2174int
2175hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2176{
2177	hermon_workq_hdr_t	*swq, *rwq;
2178	uint_t			qp_srq_en;
2179
2180	if (qp->qp_is_umap)
2181		return (DDI_SUCCESS);
2182
2183	/* grab the cq lock(s) to modify the wqavl tree */
2184	mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2185#ifdef __lock_lint
2186	mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2187#else
2188	if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl)
2189		mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2190#endif
2191
2192	/* Chain the newly allocated work queue header to the CQ's list */
2193	hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2194
2195	swq = qp->qp_sq_wqhdr;
2196	swq->wq_head = 0;
2197	swq->wq_tail = 0;
2198	swq->wq_full = 0;
2199
2200	/*
2201	 * Now we repeat all the above operations for the receive work queue,
2202	 * or shared receive work queue.
2203	 *
2204	 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2205	 */
2206	qp_srq_en = qp->qp_srq_en;
2207
2208#ifdef __lock_lint
2209	mutex_enter(&qp->qp_srqhdl->srq_lock);
2210#else
2211	if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2212		mutex_enter(&qp->qp_srqhdl->srq_lock);
2213	} else {
2214		rwq = qp->qp_rq_wqhdr;
2215		rwq->wq_head = 0;
2216		rwq->wq_tail = 0;
2217		rwq->wq_full = 0;
2218		qp->qp_rq_wqecntr = 0;
2219	}
2220#endif
2221	hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2222
2223#ifdef __lock_lint
2224	mutex_exit(&qp->qp_srqhdl->srq_lock);
2225#else
2226	if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2227		mutex_exit(&qp->qp_srqhdl->srq_lock);
2228	}
2229#endif
2230
2231#ifdef __lock_lint
2232	mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2233#else
2234	if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl)
2235		mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2236#endif
2237	mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2238	return (DDI_SUCCESS);
2239}
2240
2241
2242/*
2243 * hermon_wrid_to_reset_handling()
2244 *    Context: Can be called from interrupt or base context.
2245 */
2246int
2247hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2248{
2249	uint_t			qp_srq_en;
2250
2251	if (qp->qp_is_umap)
2252		return (DDI_SUCCESS);
2253
2254	/*
2255	 * If there are unpolled entries in these CQs, they are
2256	 * polled/flushed.
2257	 * Grab the CQ lock(s) before manipulating the lists.
2258	 */
2259	mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2260#ifdef __lock_lint
2261	mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2262#else
2263	if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl)
2264		mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2265#endif
2266
2267	qp_srq_en = qp->qp_srq_en;
2268#ifdef __lock_lint
2269	mutex_enter(&qp->qp_srqhdl->srq_lock);
2270#else
2271	if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2272		mutex_enter(&qp->qp_srqhdl->srq_lock);
2273	}
2274#endif
2275	/*
2276	 * Flush the entries on the CQ for this QP's QPN.
2277	 */
2278	hermon_cq_entries_flush(state, qp);
2279
2280#ifdef __lock_lint
2281	mutex_exit(&qp->qp_srqhdl->srq_lock);
2282#else
2283	if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2284		mutex_exit(&qp->qp_srqhdl->srq_lock);
2285	}
2286#endif
2287
2288	hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2289	hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2290
2291#ifdef __lock_lint
2292	mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2293#else
2294	if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl)
2295		mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2296#endif
2297	mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2298
2299	return (IBT_SUCCESS);
2300}
2301
2302
2303/*
2304 * hermon_wrid_get_entry()
2305 *    Context: Can be called from interrupt or base context.
2306 */
2307uint64_t
2308hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe)
2309{
2310	hermon_workq_avl_t	*wqa;
2311	hermon_workq_hdr_t	*wq;
2312	uint64_t		wrid;
2313	uint_t			send_or_recv, qpnum;
2314	uint32_t		indx;
2315
2316	/*
2317	 * Determine whether this CQE is a send or receive completion.
2318	 */
2319	send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe);
2320
2321	/* Find the work queue for this QP number (send or receive side) */
2322	qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
2323	wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv);
2324	wq = wqa->wqa_wq;
2325
2326	/*
2327	 * Regardless of whether the completion is the result of a "success"
2328	 * or a "failure", we lock the list of "containers" and attempt to
2329	 * search for the the first matching completion (i.e. the first WR
2330	 * with a matching WQE addr and size).  Once we find it, we pull out
2331	 * the "wrid" field and return it (see below).  XXX Note: One possible
2332	 * future enhancement would be to enable this routine to skip over
2333	 * any "unsignaled" completions to go directly to the next "signaled"
2334	 * entry on success.
2335	 */
2336	indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask;
2337	wrid = wq->wq_wrid[indx];
2338	if (wqa->wqa_srq_en) {
2339		struct hermon_sw_srq_s	*srq;
2340		uint64_t		*desc;
2341
2342		/* put wqe back on the srq free list */
2343		srq = wqa->wqa_srq;
2344		mutex_enter(&srq->srq_lock);
2345		desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
2346		((uint16_t *)desc)[1] = htons(indx);
2347		wq->wq_tail = indx;
2348		mutex_exit(&srq->srq_lock);
2349	} else {
2350		wq->wq_head = (indx + 1) & wq->wq_mask;
2351		wq->wq_full = 0;
2352	}
2353
2354	return (wrid);
2355}
2356
2357
2358int
2359hermon_wrid_workq_compare(const void *p1, const void *p2)
2360{
2361	hermon_workq_compare_t	*cmpp;
2362	hermon_workq_avl_t	*curr;
2363
2364	cmpp = (hermon_workq_compare_t *)p1;
2365	curr = (hermon_workq_avl_t *)p2;
2366
2367	if (cmpp->cmp_qpn < curr->wqa_qpn)
2368		return (-1);
2369	else if (cmpp->cmp_qpn > curr->wqa_qpn)
2370		return (+1);
2371	else if (cmpp->cmp_type < curr->wqa_type)
2372		return (-1);
2373	else if (cmpp->cmp_type > curr->wqa_type)
2374		return (+1);
2375	else
2376		return (0);
2377}
2378
2379
2380/*
2381 * hermon_wrid_workq_find()
2382 *    Context: Can be called from interrupt or base context.
2383 */
2384static hermon_workq_avl_t *
2385hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type)
2386{
2387	hermon_workq_avl_t	*curr;
2388	hermon_workq_compare_t	cmp;
2389
2390	/*
2391	 * Walk the CQ's work queue list, trying to find a send or recv queue
2392	 * with the same QP number.  We do this even if we are going to later
2393	 * create a new entry because it helps us easily find the end of the
2394	 * list.
2395	 */
2396	cmp.cmp_qpn = qpn;
2397	cmp.cmp_type = wq_type;
2398#ifdef __lock_lint
2399	hermon_wrid_workq_compare(NULL, NULL);
2400#endif
2401	curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
2402
2403	return (curr);
2404}
2405
2406
2407/*
2408 * hermon_wrid_wqhdr_create()
2409 *    Context: Can be called from base context.
2410 */
2411/* ARGSUSED */
2412hermon_workq_hdr_t *
2413hermon_wrid_wqhdr_create(int bufsz)
2414{
2415	hermon_workq_hdr_t	*wqhdr;
2416
2417	/*
2418	 * Allocate space for the wqhdr, and an array to record all the wrids.
2419	 */
2420	wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP);
2421	if (wqhdr == NULL) {
2422		return (NULL);
2423	}
2424	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr))
2425	wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP);
2426	if (wqhdr->wq_wrid == NULL) {
2427		kmem_free(wqhdr, sizeof (*wqhdr));
2428		return (NULL);
2429	}
2430	wqhdr->wq_size = bufsz;
2431	wqhdr->wq_mask = bufsz - 1;
2432
2433	return (wqhdr);
2434}
2435
2436void
2437hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr)
2438{
2439	kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t));
2440	kmem_free(wqhdr, sizeof (*wqhdr));
2441}
2442
2443
2444/*
2445 * hermon_cq_workq_add()
2446 *    Context: Can be called from interrupt or base context.
2447 */
2448static void
2449hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2450{
2451	hermon_workq_compare_t	cmp;
2452	avl_index_t		where;
2453
2454	cmp.cmp_qpn = wqavl->wqa_qpn;
2455	cmp.cmp_type = wqavl->wqa_type;
2456#ifdef __lock_lint
2457	hermon_wrid_workq_compare(NULL, NULL);
2458#endif
2459	(void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
2460	avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where);
2461}
2462
2463
2464/*
2465 * hermon_cq_workq_remove()
2466 *    Context: Can be called from interrupt or base context.
2467 */
2468static void
2469hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2470{
2471#ifdef __lock_lint
2472	hermon_wrid_workq_compare(NULL, NULL);
2473#endif
2474	avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl);
2475}
2476