dapl_hermon_hw.c revision 9517:b4839b0aa7a4
11573Srgrimes/*
21573Srgrimes * CDDL HEADER START
31573Srgrimes *
41573Srgrimes * The contents of this file are subject to the terms of the
51573Srgrimes * Common Development and Distribution License (the "License").
61573Srgrimes * You may not use this file except in compliance with the License.
71573Srgrimes *
81573Srgrimes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91573Srgrimes * or http://www.opensolaris.org/os/licensing.
101573Srgrimes * See the License for the specific language governing permissions
111573Srgrimes * and limitations under the License.
121573Srgrimes *
131573Srgrimes * When distributing Covered Code, include this CDDL HEADER in each
141573Srgrimes * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151573Srgrimes * If applicable, add the following below this CDDL HEADER, with the
161573Srgrimes * fields enclosed by brackets "[]" replaced with your own identifying
171573Srgrimes * information: Portions Copyright [yyyy] [name of copyright owner]
181573Srgrimes *
191573Srgrimes * CDDL HEADER END
201573Srgrimes */
211573Srgrimes
221573Srgrimes/*
231573Srgrimes * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
241573Srgrimes * Use is subject to license terms.
251573Srgrimes */
261573Srgrimes
271573Srgrimes#include "dapl.h"
281573Srgrimes#include "dapl_tavor_hw.h"
2950476Speter#include "dapl_tavor_wr.h"
301573Srgrimes#include "dapl_tavor_ibtf_impl.h"
31196820Sdes
321573Srgrimes#define	HERMON_WQE_SGL_INVALID_LKEY	0x00000100
331573Srgrimes#define	HERMON_WQE_SEND_FENCE_MASK	0x40
341573Srgrimes#define	HERMON_WQE_NDS_MASK		0x3F
3589257Sbde
361573Srgrimes#define	HERMON_CQDB_NOTIFY_CQ_SOLICIT	(0x1 << 24)
371573Srgrimes#define	HERMON_CQDB_NOTIFY_CQ		(0x2 << 24)
381573Srgrimes
391573Srgrimes#define	HERMON_CQE_RCV_SEND		0x1
401573Srgrimes#define	HERMON_CQE_ERR_OPCODE		0x1E
411573Srgrimes#define	HERMON_CQE_RESIZE_OPCODE	0x16
421573Srgrimes#define	HERMON_CQE_OPCODE_GET(cqe)	(((uint8_t *)cqe)[31] & 0x1F)
4346191Sghelmer#define	HERMON_CQE_SENDRECV_GET(cqe)	(((uint8_t *)cqe)[31] & 0x40)
4446191Sghelmer#define	HERMON_CQE_OWNER_IS_SW(cq, cqe)	((((uint8_t *)cqe)[31] >> 7) == \
451573Srgrimes			((cq->cq_consindx & cq->cq_size) >> cq->cq_log_cqsz))
4646191Sghelmer
4746191Sghelmer#define	HERMON_QP_WQEADDRSZ(wcnt)	((uint32_t)(wcnt << 6))
481573Srgrimes
491573Srgrimes#define	HERMON_WQE_SEND_SIGNALED_MASK	0x0000000C00000000ull
5046191Sghelmer#define	HERMON_WQE_SEND_SOLICIT_MASK	0x0000000200000000ull
511573Srgrimes#define	HERMON_WQE_SETCTRL(desc, ctrl)	\
5246191Sghelmer	((uint64_t *)(desc))[1] = HTOBE_64(ctrl)
531573Srgrimes#define	HERMON_WQE_SETNEXT(desc, nopcode, size, fence)			\
541573Srgrimes	((uint64_t *)(desc))[0] = HTOBE_64((nopcode) | (size) | (fence) | \
5546191Sghelmer	(((uint64_t)((uint8_t *)desc)[0] &0x80) << 56))
561573Srgrimes#define	HERMON_WQE_BUILD_DATA_SEG(ds, sgl)				\
5746191Sghelmer{									\
58233648Seadler	uint64_t		*tmp;					\
5959460Sphantom									\
6059460Sphantom	tmp	= (uint64_t *)(ds);					\
611573Srgrimes	tmp[1]	= HTOBE_64((sgl)->ds_va);				\
6284306Sru	((uint32_t *)tmp)[1] = HTOBE_32((sgl)->ds_key);			\
6322149Smpp	membar_producer();						\
6489257Sbde	((uint32_t *)tmp)[0] = HTOBE_32((sgl)->ds_len);			\
6589257Sbde}
661573Srgrimes
6722149Smpp
681573Srgrimes/* handy macro, useful because of cq_resize dynamics */
6922149Smpp#define	cq_wrap_around_mask	(cq->cq_size - 1)
701573Srgrimes
7122149Smpppthread_spinlock_t hermon_bf_lock;
721573Srgrimes
7322149Smpp/*
741573Srgrimes * Function signatures
7522149Smpp */
761573Srgrimesextern uint64_t dapls_tavor_wrid_get_entry(ib_cq_handle_t, tavor_hw_cqe_t *,
7722149Smpp    uint_t, uint_t, dapls_tavor_wrid_entry_t *);
7846191Sghelmerextern void dapls_tavor_wrid_cq_reap(ib_cq_handle_t);
7946191Sghelmerextern DAPL_OS_LOCK g_tavor_uar_lock;
8046191Sghelmer
8146191Sghelmer#ifndef	_LP64
821573Srgrimesextern void dapls_atomic_assign_64(uint64_t, uint64_t *);
8322149Smpp#endif
8446191Sghelmer
8546191Sghelmerstatic int dapli_hermon_wqe_send_build(ib_qp_handle_t, ibt_send_wr_t *,
8646191Sghelmer    uint64_t *, uint_t *);
8746191Sghelmerstatic DAT_RETURN dapli_hermon_wqe_recv_build(ib_qp_handle_t, ibt_recv_wr_t *,
8846191Sghelmer    uint64_t *, uint_t *);
8946191Sghelmerstatic int dapli_hermon_cq_cqe_consume(ib_cq_handle_t, uint32_t *, ibt_wc_t *);
901573Srgrimesstatic int dapli_hermon_cq_errcqe_consume(ib_cq_handle_t, uint32_t *,
9122149Smpp    ibt_wc_t *);
921573Srgrimesextern void dapli_tavor_wrid_add_entry(dapls_tavor_workq_hdr_t *, uint64_t,
9322149Smpp    uint32_t, uint_t);
9446191Sghelmerextern void dapli_tavor_wrid_add_entry_srq(ib_srq_handle_t, uint64_t, uint32_t);
9546191Sghelmer
961573Srgrimes/*
9722149Smpp * Note: The 64 bit doorbells need to written atomically.
981573Srgrimes * In 32 bit libraries we need to use the special assembly rtn
9922149Smpp * because compiler generated code splits into 2 word writes
1001573Srgrimes */
10122149Smpp
1021573Srgrimes/*
10322149Smpp * dapli_hermon_cq_doorbell()
1041573Srgrimes * Takes the specified cq cmd and cq number and rings the cq doorbell
10522149Smpp */
1061573Srgrimesstatic void
1071573Srgrimesdapli_hermon_cq_doorbell(dapls_hw_uar_t ia_uar, uint32_t cq_cmd, uint32_t cqn,
1081573Srgrimes    uint32_t cmd_sn, uint32_t cq_param)
109108040Sru{
1101573Srgrimes	uint64_t doorbell;
111119893Sru
1121573Srgrimes	/* Build the doorbell from the parameters */
1131573Srgrimes	doorbell = (cmd_sn | cq_cmd | cqn);
1141573Srgrimes	doorbell = (doorbell << 32) | cq_param;
11589257Sbde
1161573Srgrimes	/* Write the doorbell to UAR */
1171573Srgrimes#ifdef _LP64
1181573Srgrimes	((tavor_hw_uar_t *)ia_uar)->cq = HTOBE_64(doorbell);
1191573Srgrimes	/* 32 bit version */
1201573Srgrimes#elif defined(i386)
1211573Srgrimes	dapl_os_lock(&g_tavor_uar_lock);
1221573Srgrimes	/*
123127613Stjr	 * For 32 bit intel we assign the doorbell in the order
1241573Srgrimes	 * prescribed by the Tavor PRM, lower to upper addresses
125127613Stjr	 */
1261573Srgrimes	((tavor_hw_uar32_t *)ia_uar)->cq[0] =
1271573Srgrimes	    (uint32_t)HTOBE_32(doorbell >> 32);
128131365Sru	((tavor_hw_uar32_t *)ia_uar)->cq[1] =
1291573Srgrimes	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
130127613Stjr	dapl_os_unlock(&g_tavor_uar_lock);
1311573Srgrimes#else
1321573Srgrimes	dapls_atomic_assign_64(HTOBE_64(doorbell),
1331573Srgrimes	    &((tavor_hw_uar_t *)ia_uar)->cq);
1341573Srgrimes#endif
1351573Srgrimes}
136127613Stjr
1371573Srgrimes/*
1381573Srgrimes * dapli_hermon_qp_send_doorbell()
1391573Srgrimes * Takes the specified qp number and rings the send doorbell.
14054596Sphantom */
14154596Sphantomstatic void
14254596Sphantomdapli_hermon_sq_dbreg(dapls_hw_uar_t ia_uar, uint32_t qpn)
14354596Sphantom{
14454596Sphantom	uint64_t doorbell;
14554596Sphantom
14654596Sphantom	doorbell = qpn << 8;
14754596Sphantom
1481573Srgrimes	/* Write the doorbell to UAR */
1491573Srgrimes#ifdef _LP64
1501573Srgrimes	((tavor_hw_uar_t *)ia_uar)->send = HTOBE_64(doorbell);
15173088Sru#else
152#if defined(i386)
153	dapl_os_lock(&g_tavor_uar_lock);
154	/*
155	 * For 32 bit intel we assign the doorbell in the order
156	 * prescribed by the Tavor PRM, lower to upper addresses
157	 */
158	((tavor_hw_uar32_t *)ia_uar)->send[0] =
159	    (uint32_t)HTOBE_32(doorbell >> 32);
160	((tavor_hw_uar32_t *)ia_uar)->send[1] =
161	    (uint32_t)HTOBE_32(doorbell & 0x00000000ffffffff);
162	dapl_os_unlock(&g_tavor_uar_lock);
163#else
164	dapls_atomic_assign_64(HTOBE_64(doorbell),
165	    &((tavor_hw_uar_t *)ia_uar)->send);
166#endif
167#endif
168}
169
170/*
171 * dapli_hermon_wqe_send_build()
172 * Constructs a WQE for a given ibt_send_wr_t
173 */
174static int
175dapli_hermon_wqe_send_build(ib_qp_handle_t qp, ibt_send_wr_t *wr,
176    uint64_t *addr, uint_t *size)
177{
178	tavor_hw_snd_wqe_remaddr_t	*rc;
179	tavor_hw_snd_wqe_bind_t		*bn;
180	tavor_hw_wqe_sgl_t		*ds;
181	ibt_wr_ds_t			*sgl;
182	uint8_t				*src, *dst, *maxdst;
183	uint32_t			nds;
184	int				len, thislen, maxlen;
185	uint32_t			new_rkey;
186	uint32_t			old_rkey;
187	int				i, num_ds;
188	int				max_inline_bytes = -1;
189	uint64_t			ctrl;
190	uint64_t			nopcode;
191	uint_t				my_size;
192
193	nds = wr->wr_nds;
194	sgl = wr->wr_sgl;
195	num_ds = 0;
196	ctrl = ((wr->wr_flags & IBT_WR_SEND_SIGNAL) ?
197	    HERMON_WQE_SEND_SIGNALED_MASK : 0) |
198	    ((wr->wr_flags & IBT_WR_SEND_SOLICIT) ?
199	    HERMON_WQE_SEND_SOLICIT_MASK : 0);
200
201	/*
202	 * RC is the only supported transport in UDAPL
203	 * For RC requests, we allow "Send", "RDMA Read", "RDMA Write"
204	 */
205	switch (wr->wr_opcode) {
206	case IBT_WRC_SEND:
207		/*
208		 * If this is a Send request, then all we need is
209		 * the Data Segment processing below.
210		 * Initialize the information for the Data Segments
211		 */
212		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
213		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
214		if (qp->qp_sq_inline != 0)
215			max_inline_bytes =
216			    qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_SEND;
217		nopcode = TAVOR_WQE_SEND_NOPCODE_SEND;
218		break;
219	case IBT_WRC_RDMAW:
220		if (qp->qp_sq_inline != 0)
221			max_inline_bytes =
222			    qp->qp_sq_wqesz - TAVOR_INLINE_HEADER_SIZE_RDMAW;
223		nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAW;
224		/* FALLTHROUGH */
225	case IBT_WRC_RDMAR:
226		if (wr->wr_opcode == IBT_WRC_RDMAR) {
227			if (qp->qp_sq_inline < 0)
228				qp->qp_sq_inline = 0;
229			nopcode = TAVOR_WQE_SEND_NOPCODE_RDMAR;
230		}
231		/*
232		 * If this is an RDMA Read or RDMA Write request, then fill
233		 * in the "Remote Address" header fields.
234		 */
235		rc = (tavor_hw_snd_wqe_remaddr_t *)((uintptr_t)addr +
236		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
237
238		/*
239		 * Build the Remote Address Segment for the WQE, using
240		 * the information from the RC work request.
241		 */
242		TAVOR_WQE_BUILD_REMADDR(rc, &wr->wr.rc.rcwr.rdma);
243
244		/* Update "ds" for filling in Data Segments (below) */
245		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)rc +
246		    sizeof (tavor_hw_snd_wqe_remaddr_t));
247		break;
248	case IBT_WRC_BIND:
249		/*
250		 * Generate a new R_key
251		 * Increment the upper "unconstrained" bits and need to keep
252		 * the lower "constrained" bits the same it represents
253		 * the MPT index.
254		 */
255#if 0
256	/* XXX - need equiv of "hermon_wr_bind_check(state, wr);" */
257	/* XXX - uses hermon_mr_keycalc - what about Sinai vs. Arbel??? */
258#endif
259		old_rkey = wr->wr.rc.rcwr.bind->bind_rkey;
260		new_rkey = old_rkey >> 8;	/* index */
261		old_rkey = (old_rkey + 1) & 0xff; /* incremented key */
262		new_rkey = (new_rkey << 8) | old_rkey;
263
264		wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
265
266		bn = (tavor_hw_snd_wqe_bind_t *)((uintptr_t)addr +
267		    sizeof (tavor_hw_snd_wqe_nextctrl_t));
268
269		/*
270		 * Build the Bind Memory Window Segments for the WQE,
271		 * using the information from the RC Bind memory
272		 * window work request.
273		 */
274		TAVOR_WQE_BUILD_BIND(bn, wr->wr.rc.rcwr.bind);
275
276		/*
277		 * Update the "ds" pointer.  Even though the "bind"
278		 * operation requires no SGLs, this is necessary to
279		 * facilitate the correct descriptor size calculations
280		 * (below).
281		 */
282		ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)bn +
283		    sizeof (tavor_hw_snd_wqe_bind_t));
284		nds = 0;
285		nopcode = TAVOR_WQE_SEND_NOPCODE_BIND;
286		break;
287	default:
288		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
289		    "dapli_hermon_wqe_send_build: invalid wr_opcode=%d\n",
290		    wr->wr_opcode);
291		return (DAT_INTERNAL_ERROR);
292	}
293
294	/*
295	 * Now fill in the Data Segments (SGL) for the Send WQE based on
296	 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
297	 * Start by checking for a valid number of SGL entries
298	 */
299	if (nds > qp->qp_sq_sgl) {
300		return (DAT_INVALID_PARAMETER);
301	}
302
303	/*
304	 * For each SGL in the Send Work Request, fill in the Send WQE's data
305	 * segments.  Note: We skip any SGL with zero size because Tavor
306	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
307	 * the encoding for zero means a 2GB transfer.  Because of this special
308	 * encoding in the hardware, we mask the requested length with
309	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
310	 * zero.)
311	 */
312	if (max_inline_bytes != -1) {		/* compute total_len */
313		len = 0;
314		for (i = 0; i < nds; i++)
315			len += sgl[i].ds_len;
316		if (len == 0)
317			max_inline_bytes = -1; /* do not inline */
318		else {
319			/* need to reduce the length by dword "len" fields */
320			max_inline_bytes -= (len / 64) * sizeof (uint32_t);
321			if (len > max_inline_bytes)
322				max_inline_bytes = -1;	/* too big for inline */
323		}
324	}
325	if (max_inline_bytes != -1) {		/* do "inline" */
326
327		dst = (uint8_t *)((uint32_t *)ds + 1);
328		maxdst = (uint8_t *)(((uintptr_t)dst + 64) & ~(64 - 1));
329		maxlen = maxdst - dst;
330		thislen = 0;
331		i = 0;
332		src = (uint8_t *)(uintptr_t)sgl[i].ds_va;
333		len = sgl[i].ds_len;
334		do {
335			/* if this sgl overflows the inline segment */
336			if (len > maxlen) {
337				if (maxlen) /* might be 0 */
338					(void) dapl_os_memcpy(dst,
339					    src, maxlen);
340				membar_producer();
341				*(uint32_t *)ds =
342				    HTOBE_32((thislen + maxlen) |
343				    TAVOR_WQE_SGL_INLINE_MASK);
344				thislen = 0;
345				len -= maxlen;
346				src += maxlen;
347				dst = maxdst + sizeof (uint32_t);
348				ds = (tavor_hw_wqe_sgl_t *)(void *)maxdst;
349				maxdst += 64;
350				maxlen = 64 - sizeof (uint32_t);
351			} else { /* this sgl fully fits */
352				(void) dapl_os_memcpy(dst,
353				    src, len);
354				maxlen -= len;  /* room left */
355				thislen += len;
356				dst += len;
357				while (++i < nds)
358					if (sgl[i].ds_len)
359						break;
360				if (i >= nds)
361					break;
362				src = (uint8_t *)(uintptr_t)sgl[i].ds_va;
363				len = sgl[i].ds_len;
364			}
365		} while (i < nds);
366		membar_producer();
367		*(uint32_t *)ds = HTOBE_32(thislen |
368		    TAVOR_WQE_SGL_INLINE_MASK);
369
370		/* Return the size of descriptor (in 16-byte chunks) */
371		my_size = ((uintptr_t)dst - (uintptr_t)addr + 15) >> 4;
372		if (my_size <= (256 >> 4))
373			*size = my_size;	/* use Hermon Blueflame */
374		else
375			*size = 0;
376	} else {
377		for (i = 0; i < nds; i++) {
378			if (sgl[i].ds_len == 0) {
379				continue;
380			}
381
382			/*
383			 * Fill in the Data Segment(s) for the current WQE,
384			 * using the information contained in the
385			 * scatter-gather list of the work request.
386			 */
387			HERMON_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl[i]);
388			num_ds++;
389		}
390
391		/* Return the size of descriptor (in 16-byte chunks) */
392		my_size = ((uintptr_t)&ds[num_ds] - (uintptr_t)addr) >> 4;
393		*size = 0;	/* do not use Hermon Blueflame */
394	}
395	HERMON_WQE_SETCTRL(addr, ctrl);
396	membar_producer();
397	HERMON_WQE_SETNEXT(addr, nopcode << 32, my_size,
398	    (wr->wr_flags & IBT_WR_SEND_FENCE) ?
399	    HERMON_WQE_SEND_FENCE_MASK : 0);
400
401	return (DAT_SUCCESS);
402}
403
404/*
405 * dapli_hermon_wqe_recv_build()
406 * Builds the recv WQE for a given ibt_recv_wr_t
407 */
408static DAT_RETURN
409dapli_hermon_wqe_recv_build(ib_qp_handle_t qp, ibt_recv_wr_t *wr,
410    uint64_t *addr, uint_t *size)
411{
412	tavor_hw_wqe_sgl_t	*ds;
413	int			i;
414	int			num_ds;
415
416	/* Fill in the Data Segments (SGL) for the Recv WQE */
417	ds = (tavor_hw_wqe_sgl_t *)addr;
418	num_ds = 0;
419
420	/* Check for valid number of SGL entries */
421	if (wr->wr_nds > qp->qp_rq_sgl) {
422		return (DAT_INVALID_PARAMETER);
423	}
424
425	/*
426	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
427	 * segments.  Note: We skip any SGL with zero size because Tavor
428	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
429	 * the encoding for zero means a 2GB transfer.  Because of this special
430	 * encoding in the hardware, we mask the requested length with
431	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
432	 * zero.)
433	 */
434	for (i = 0; i < wr->wr_nds; i++) {
435		if (wr->wr_sgl[i].ds_len == 0) {
436			continue;
437		}
438
439		/*
440		 * Fill in the Data Segment(s) for the receive WQE, using the
441		 * information contained in the scatter-gather list of the
442		 * work request.
443		 */
444		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
445		num_ds++;
446	}
447	if (i < qp->qp_rq_sgl) {
448		ibt_wr_ds_t sgl;
449		sgl.ds_va  = (ib_vaddr_t)0;
450		sgl.ds_len = (ib_msglen_t)0;
451		sgl.ds_key = (ibt_lkey_t)HERMON_WQE_SGL_INVALID_LKEY;
452		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &sgl);
453	}
454
455	/* Return the size of descriptor (in 16-byte chunks) */
456	*size = qp->qp_rq_wqesz >> 4;
457
458	return (DAT_SUCCESS);
459}
460
461/*
462 * dapli_hermon_wqe_srq_build()
463 * Builds the recv WQE for a given ibt_recv_wr_t
464 */
465static DAT_RETURN
466dapli_hermon_wqe_srq_build(ib_srq_handle_t srq, ibt_recv_wr_t *wr,
467    uint64_t *addr)
468{
469	tavor_hw_wqe_sgl_t	*ds;
470	ibt_wr_ds_t		end_sgl;
471	int			i;
472	int			num_ds;
473
474	/* Fill in the Data Segments (SGL) for the Recv WQE */
475	ds = (tavor_hw_wqe_sgl_t *)((uintptr_t)addr +
476	    sizeof (tavor_hw_rcv_wqe_nextctrl_t));
477	num_ds = 0;
478
479	/* Check for valid number of SGL entries */
480	if (wr->wr_nds > srq->srq_wq_sgl) {
481		return (DAT_INVALID_PARAMETER);
482	}
483
484	/*
485	 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
486	 * segments.  Note: We skip any SGL with zero size because Tavor
487	 * hardware cannot handle a zero for "byte_cnt" in the WQE.  Actually
488	 * the encoding for zero means a 2GB transfer.  Because of this special
489	 * encoding in the hardware, we mask the requested length with
490	 * TAVOR_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
491	 * zero.)
492	 */
493	for (i = 0; i < wr->wr_nds; i++) {
494		if (wr->wr_sgl[i].ds_len == 0) {
495			continue;
496		}
497
498		/*
499		 * Fill in the Data Segment(s) for the receive WQE, using the
500		 * information contained in the scatter-gather list of the
501		 * work request.
502		 */
503		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &wr->wr_sgl[i]);
504		num_ds++;
505	}
506
507	/*
508	 * For SRQ, if the number of data segments is less than the maximum
509	 * specified at alloc, then we have to fill in a special "key" entry in
510	 * the sgl entry after the last valid one in this post request.  We do
511	 * that here.
512	 */
513	if (num_ds < srq->srq_wq_sgl) {
514		end_sgl.ds_va  = (ib_vaddr_t)0;
515		end_sgl.ds_len = (ib_msglen_t)0;
516		end_sgl.ds_key = (ibt_lkey_t)HERMON_WQE_SGL_INVALID_LKEY;
517		TAVOR_WQE_BUILD_DATA_SEG(&ds[num_ds], &end_sgl);
518	}
519
520	return (DAT_SUCCESS);
521}
522
523/*
524 * dapli_hermon_cq_peek()
525 * Peeks into a given CQ to check if there are any events that can be
526 * polled. It returns the number of CQEs that can be polled.
527 */
528static void
529dapli_hermon_cq_peek(ib_cq_handle_t cq, int *num_cqe)
530{
531	uint32_t		*cqe;
532	uint32_t		imm_eth_pkey_cred;
533	uint32_t		cons_indx;
534	int			polled_cnt;
535	uint_t			doorbell_cnt;
536	uint_t			opcode;
537
538	/* Get the consumer index */
539	cons_indx = cq->cq_consindx & cq_wrap_around_mask;
540
541	/* Calculate the pointer to the first CQ entry */
542	cqe = (uint32_t *)&cq->cq_addr[cons_indx];
543
544	/*
545	 * Count entries in the CQ until we find an entry owned by
546	 * the hardware.
547	 */
548	polled_cnt = 0;
549	while (HERMON_CQE_OWNER_IS_SW(cq, cqe)) {
550		opcode = HERMON_CQE_OPCODE_GET(cqe);
551		/* Error CQE map to multiple work completions */
552		if (opcode == HERMON_CQE_ERR_OPCODE) {
553			imm_eth_pkey_cred =
554			    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe);
555			doorbell_cnt =
556			    imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK;
557			polled_cnt += (doorbell_cnt + 1);
558		} else {
559			polled_cnt++;
560		}
561		/* Increment the consumer index */
562		cons_indx = (cons_indx + 1) & cq_wrap_around_mask;
563
564		/* Update the pointer to the next CQ entry */
565		cqe = (uint32_t *)&cq->cq_addr[cons_indx];
566	}
567
568	*num_cqe = polled_cnt;
569}
570
571#define	dapli_hermon_cq_update_ci(cq, dbp) \
572	(dbp)[0] = HTOBE_32(cq->cq_consindx & 0xFFFFFF)
573
574/*
575 * dapli_hermon_cq_resize_helper()
576 * This routine switches from the pre-cq_resize buffer to the new buffer.
577 */
578static int
579dapli_hermon_cq_resize_helper(ib_cq_handle_t cq)
580{
581	int i;
582
583	if ((cq->cq_resize_addr == 0) ||
584	    (munmap((char *)cq->cq_addr, cq->cq_map_len) < 0)) {
585		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "cq_resize_helper: "
586		    "munmap(%p:0x%llx) failed(%d)\n", cq->cq_addr,
587		    cq->cq_map_len, errno);
588		return (1);	/* FAILED */
589	}
590	cq->cq_addr		= cq->cq_resize_addr;
591	cq->cq_map_offset	= cq->cq_resize_map_offset;
592	cq->cq_map_len		= cq->cq_resize_map_len;
593	cq->cq_size		= cq->cq_resize_size;
594	cq->cq_cqesz		= cq->cq_resize_cqesz;
595	cq->cq_resize_addr	= 0;
596	cq->cq_resize_map_offset = 0;
597	cq->cq_resize_map_len	= 0;
598	cq->cq_resize_size	= 0;
599	cq->cq_resize_cqesz	= 0;
600	for (i = 0; (1 << i) < cq->cq_size; i++)
601		;
602	cq->cq_log_cqsz = i;
603
604	cq->cq_consindx++;	/* consume the RESIZE cqe */
605
606	return (0);	/* SUCCESS */
607}
608
609/*
610 * dapli_hermon_cq_poll()
611 * This routine polls CQEs out of a CQ and puts them into the ibt_wc_t
612 * array that is passed in.
613 */
614static DAT_RETURN
615dapli_hermon_cq_poll(ib_cq_handle_t cq, ibt_wc_t *wc_p, uint_t num_wc,
616    uint_t *num_polled)
617{
618	uint32_t		*cqe;
619	uint32_t		cons_indx;
620	uint32_t		polled_cnt;
621	DAT_RETURN		dat_status;
622	int			status;
623
624	/* Get the consumer index */
625	cons_indx = cq->cq_consindx & cq_wrap_around_mask;
626
627	/* Calculate the pointer to the first CQ entry */
628	cqe = (uint32_t *)&cq->cq_addr[cons_indx];
629
630	/*
631	 * Keep pulling entries from the CQ until we find an entry owned by
632	 * the hardware.  As long as there the CQE's owned by SW, process
633	 * each entry by calling dapli_hermon_cq_cqe_consume() and updating the
634	 * CQ consumer index.  Note:  We only update the consumer index if
635	 * dapli_hermon_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
636	 * Otherwise, it indicates that we are going to "recycle" the CQE
637	 * (probably because it is a error CQE and corresponds to more than one
638	 * completion).
639	 */
640	polled_cnt = 0;
641	while (HERMON_CQE_OWNER_IS_SW(cq, cqe)) {
642		if (HERMON_CQE_OPCODE_GET(cqe) == HERMON_CQE_RESIZE_OPCODE) {
643			if (dapli_hermon_cq_resize_helper(cq))
644				return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
645			cons_indx = cq->cq_consindx & cq_wrap_around_mask;
646			cqe = (uint32_t *)&cq->cq_addr[cons_indx];
647			continue;
648		}
649		status = dapli_hermon_cq_cqe_consume(cq, cqe,
650		    &wc_p[polled_cnt++]);
651		if (status == TAVOR_CQ_SYNC_AND_DB) {
652			/* Reset to hardware ownership is implicit in Hermon */
653			cq->cq_consindx++;	/* incr the total counter */
654
655			/* Increment the consumer index */
656			cons_indx = (cons_indx + 1) & cq_wrap_around_mask;
657
658			/* Update the pointer to the next CQ entry */
659			cqe = (uint32_t *)&cq->cq_addr[cons_indx];
660		}
661
662		/*
663		 * If we have run out of space to store work completions,
664		 * then stop and return the ones we have pulled of the CQ.
665		 */
666		if (polled_cnt >= num_wc) {
667			break;
668		}
669	}
670
671	dat_status = DAT_SUCCESS;
672	/*
673	 * Now we only ring the doorbell (to update the consumer index) if
674	 * we've actually consumed a CQ entry.  If we have, for example,
675	 * pulled from a CQE that we are still in the process of "recycling"
676	 * for error purposes, then we would not update the consumer index.
677	 */
678	if (polled_cnt != 0) {
679		/*
680		 * Update the consumer index in both the CQ handle and the
681		 * doorbell record.
682		 */
683		dapli_hermon_cq_update_ci(cq, cq->cq_poll_dbp);
684	} else if (polled_cnt == 0) {
685		/*
686		 * If the CQ is empty, we can try to free up some of the WRID
687		 * list containers.
688		 */
689		if (cq->cq_wrid_reap_head)	/* look before leaping */
690			dapls_tavor_wrid_cq_reap(cq);
691		dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
692	}
693
694	if (num_polled != NULL) {
695		*num_polled = polled_cnt;
696	}
697
698	return (dat_status);
699}
700
701/*
702 * dapli_hermon_cq_poll_one()
703 * This routine polls one CQE out of a CQ and puts ot into the ibt_wc_t
704 * that is passed in.  See above for more comments/details.
705 */
706static DAT_RETURN
707dapli_hermon_cq_poll_one(ib_cq_handle_t cq, ibt_wc_t *wc_p)
708{
709	uint32_t		*cqe;
710	uint32_t		cons_indx;
711	DAT_RETURN		dat_status;
712	int			status;
713
714start_over:
715	/* Get the consumer index */
716	cons_indx = cq->cq_consindx & cq_wrap_around_mask;
717
718	/* Calculate the pointer to the first CQ entry */
719	cqe = (uint32_t *)&cq->cq_addr[cons_indx];
720
721	/*
722	 * Keep pulling entries from the CQ until we find an entry owned by
723	 * the hardware.  As long as there the CQE's owned by SW, process
724	 * each entry by calling dapli_hermon_cq_cqe_consume() and updating the
725	 * CQ consumer index.  Note:  We only update the consumer index if
726	 * dapli_hermon_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB.
727	 * Otherwise, it indicates that we are going to "recycle" the CQE
728	 * (probably because it is a error CQE and corresponds to more than one
729	 * completion).
730	 */
731	if (HERMON_CQE_OWNER_IS_SW(cq, cqe)) {
732		if (HERMON_CQE_OPCODE_GET(cqe) == HERMON_CQE_RESIZE_OPCODE) {
733			if (dapli_hermon_cq_resize_helper(cq))
734				return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
735			goto start_over;
736		}
737		status = dapli_hermon_cq_cqe_consume(cq, cqe, wc_p);
738		if (status == TAVOR_CQ_SYNC_AND_DB) {
739			/* Reset to hardware ownership is implicit in Hermon */
740
741			/* Increment the consumer index */
742			cq->cq_consindx++;
743			dapli_hermon_cq_update_ci(cq, cq->cq_poll_dbp);
744		}
745		dat_status = DAT_SUCCESS;
746	} else {
747		if (cq->cq_wrid_reap_head)	/* look before leaping */
748			dapls_tavor_wrid_cq_reap(cq);
749		dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
750	}
751	return (dat_status);
752}
753
754/*
755 * dapli_hermon_cq_cqe_consume()
756 * Converts a given CQE into a ibt_wc_t object
757 */
758static int
759dapli_hermon_cq_cqe_consume(ib_cq_handle_t cqhdl, uint32_t *cqe,
760    ibt_wc_t *wc)
761{
762	uint_t		flags;
763	uint_t		type;
764	uint_t		opcode;
765	int		status;
766
767	/*
768	 * Determine if this is an "error" CQE by examining "opcode".  If it
769	 * is an error CQE, then call dapli_hermon_cq_errcqe_consume() and
770	 * return whatever status it returns.  Otherwise, this is a successful
771	 * completion.
772	 */
773	opcode = HERMON_CQE_OPCODE_GET(cqe);
774	if (opcode == HERMON_CQE_ERR_OPCODE) {
775		status = dapli_hermon_cq_errcqe_consume(cqhdl, cqe, wc);
776		return (status);
777	}
778	TAVOR_CQE_WQEADDRSZ_SET(cqe, (HTOBE_32(cqe[6]) >> 10) &
779	    ~HERMON_WQE_NDS_MASK);
780
781	/*
782	 * Fetch the Work Request ID using the information in the CQE.
783	 * See tavor_wr.c for more details.
784	 */
785	wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, (tavor_hw_cqe_t *)cqe,
786	    HERMON_CQE_SENDRECV_GET(cqe) >> 6, 0, NULL);
787	wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
788
789	/*
790	 * Parse the CQE opcode to determine completion type.  This will set
791	 * not only the type of the completion, but also any flags that might
792	 * be associated with it (e.g. whether immediate data is present).
793	 */
794	flags = IBT_WC_NO_FLAGS;
795	if (HERMON_CQE_SENDRECV_GET(cqe) != TAVOR_COMPLETION_RECV) {
796
797		/*
798		 * Send CQE
799		 *
800		 * The following opcodes will not be generated in uDAPL
801		 * case TAVOR_CQE_SND_RDMAWR_IMM:
802		 * case TAVOR_CQE_SND_SEND_IMM:
803		 * case TAVOR_CQE_SND_ATOMIC_CS:
804		 * case TAVOR_CQE_SND_ATOMIC_FA:
805		 */
806		switch (opcode) {
807		case TAVOR_CQE_SND_RDMAWR:
808			type = IBT_WRC_RDMAW;
809			break;
810
811		case TAVOR_CQE_SND_SEND:
812			type = IBT_WRC_SEND;
813			break;
814
815		case TAVOR_CQE_SND_RDMARD:
816			type = IBT_WRC_RDMAR;
817			wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
818			break;
819
820		case TAVOR_CQE_SND_BIND_MW:
821			type = IBT_WRC_BIND;
822			break;
823
824		default:
825			wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
826			return (TAVOR_CQ_SYNC_AND_DB);
827		}
828	} else {
829
830		/*
831		 * Receive CQE
832		 *
833		 * The following opcodes will not be generated in uDAPL
834		 *
835		 * case TAVOR_CQE_RCV_RECV_IMM:
836		 * case TAVOR_CQE_RCV_RECV_IMM2:
837		 * case TAVOR_CQE_RCV_RDMAWR_IMM:
838		 * case TAVOR_CQE_RCV_RDMAWR_IMM2:
839		 */
840		switch (opcode) {
841		case HERMON_CQE_RCV_SEND:
842			type = IBT_WRC_RECV;
843			wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cqe);
844			break;
845		default:
846			wc->wc_status = IBT_WC_LOCAL_CHAN_OP_ERR;
847			return (TAVOR_CQ_SYNC_AND_DB);
848		}
849	}
850	wc->wc_type = type;
851	wc->wc_flags = flags;
852	/* If we got here, completion status must be success */
853	wc->wc_status = IBT_WC_SUCCESS;
854
855	return (TAVOR_CQ_SYNC_AND_DB);
856}
857
858/*
859 * dapli_hermon_cq_errcqe_consume()
860 */
861static int
862dapli_hermon_cq_errcqe_consume(ib_cq_handle_t cqhdl, uint32_t *cqe,
863    ibt_wc_t *wc)
864{
865	dapls_tavor_wrid_entry_t	wre;
866	uint_t			status;
867	uint_t			send_or_recv;
868
869	dapl_dbg_log(DAPL_DBG_TYPE_EVD, "errcqe_consume:cqe.eth=%x, wqe=%x\n",
870	    TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe),
871	    TAVOR_CQE_WQEADDRSZ_GET(cqe));
872
873	status = ((uint8_t *)cqe)[0x1B];
874	TAVOR_CQE_WQEADDRSZ_SET(cqe, (HTOBE_32(cqe[6]) >> 10) &
875	    ~HERMON_WQE_NDS_MASK);
876	if (HERMON_CQE_SENDRECV_GET(cqe) == 0) {
877		send_or_recv = 0;
878	} else {
879		send_or_recv = 1;
880	}
881
882	/*
883	 * Fetch the Work Request ID using the information in the CQE.
884	 * See tavor_wr.c for more details.
885	 */
886	wc->wc_id = dapls_tavor_wrid_get_entry(cqhdl, (tavor_hw_cqe_t *)cqe,
887	    send_or_recv, 1, &wre);
888	wc->wc_qpn = TAVOR_CQE_QPNUM_GET(cqe);
889
890	/*
891	 * Parse the CQE opcode to determine completion type.  We know that
892	 * the CQE is an error completion, so we extract only the completion
893	 * status here.
894	 */
895	switch (status) {
896	case TAVOR_CQE_LOC_LEN_ERR:
897		status = IBT_WC_LOCAL_LEN_ERR;
898		break;
899
900	case TAVOR_CQE_LOC_OP_ERR:
901		status = IBT_WC_LOCAL_CHAN_OP_ERR;
902		break;
903
904	case TAVOR_CQE_LOC_PROT_ERR:
905		status = IBT_WC_LOCAL_PROTECT_ERR;
906		break;
907
908	case TAVOR_CQE_WR_FLUSHED_ERR:
909		status = IBT_WC_WR_FLUSHED_ERR;
910		break;
911
912	case TAVOR_CQE_MW_BIND_ERR:
913		status = IBT_WC_MEM_WIN_BIND_ERR;
914		break;
915
916	case TAVOR_CQE_BAD_RESPONSE_ERR:
917		status = IBT_WC_BAD_RESPONSE_ERR;
918		break;
919
920	case TAVOR_CQE_LOCAL_ACCESS_ERR:
921		status = IBT_WC_LOCAL_ACCESS_ERR;
922		break;
923
924	case TAVOR_CQE_REM_INV_REQ_ERR:
925		status = IBT_WC_REMOTE_INVALID_REQ_ERR;
926		break;
927
928	case TAVOR_CQE_REM_ACC_ERR:
929		status = IBT_WC_REMOTE_ACCESS_ERR;
930		break;
931
932	case TAVOR_CQE_REM_OP_ERR:
933		status = IBT_WC_REMOTE_OP_ERR;
934		break;
935
936	case TAVOR_CQE_TRANS_TO_ERR:
937		status = IBT_WC_TRANS_TIMEOUT_ERR;
938		break;
939
940	case TAVOR_CQE_RNRNAK_TO_ERR:
941		status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
942		break;
943
944	/*
945	 * The following error codes are not supported in the Tavor driver
946	 * as they relate only to Reliable Datagram completion statuses:
947	 *    case TAVOR_CQE_LOCAL_RDD_VIO_ERR:
948	 *    case TAVOR_CQE_REM_INV_RD_REQ_ERR:
949	 *    case TAVOR_CQE_EEC_REM_ABORTED_ERR:
950	 *    case TAVOR_CQE_INV_EEC_NUM_ERR:
951	 *    case TAVOR_CQE_INV_EEC_STATE_ERR:
952	 *    case TAVOR_CQE_LOC_EEC_ERR:
953	 */
954
955	default:
956		status = IBT_WC_LOCAL_CHAN_OP_ERR;
957		break;
958	}
959	wc->wc_status = status;
960	wc->wc_type = 0;
961
962	/*
963	 * Consume the CQE
964	 *    Return status to indicate that doorbell and sync may be
965	 *    necessary.
966	 */
967	return (TAVOR_CQ_SYNC_AND_DB);
968}
969
970/*
971 * dapli_hermon_cq_notify()
972 * This function is used for arming the CQ by ringing the CQ doorbell.
973 *
974 * Note: there is something very subtle here.  This code assumes a very
975 * specific behavior of the kernel driver.  The cmd_sn field of the
976 * arm_dbr is updated by the kernel driver whenever a notification
977 * event for the cq is received.  This code extracts the cmd_sn field
978 * from the arm_dbr to know the right value to use.  The arm_dbr is
979 * always updated atomically so that neither the kernel driver nor this
980 * will get confused about what the other is doing.
981 *
982 * Note: param is not used here.  It is necessary for arming a CQ for
983 * N completions (param is N), but no uDAPL API supports this for now.
984 * Thus, we declare ARGSUSED to make lint happy.
985 */
986/*ARGSUSED*/
987static DAT_RETURN
988dapli_hermon_cq_notify(ib_cq_handle_t cq, int flags, uint32_t param)
989{
990	uint32_t	cqnum;
991	uint32_t	*target;
992	uint32_t	old_cmd, cmp, new, tmp, cmd_sn;
993
994	/*
995	 * Determine if we are trying to get the next completion or the next
996	 * "solicited" completion.  Then hit the appropriate doorbell.
997	 */
998	cqnum = cq->cq_num;
999	target = cq->cq_arm_dbp;
1000retry:
1001	cmp = *target;
1002	tmp = HTOBE_32(cmp);
1003	old_cmd = tmp & (0x7 << 24);
1004	cmd_sn = tmp & (0x3 << 28);
1005
1006	if (flags == IB_NOTIFY_ON_NEXT_COMP) {
1007		if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1008			new = HTOBE_32(cmd_sn | HERMON_CQDB_NOTIFY_CQ |
1009			    (cq->cq_consindx & 0xFFFFFF));
1010			tmp = atomic_cas_32(target, cmp, new);
1011			if (tmp != cmp)
1012				goto retry;
1013			dapli_hermon_cq_doorbell(cq->cq_iauar,
1014			    HERMON_CQDB_NOTIFY_CQ, cqnum,
1015			    cmd_sn, cq->cq_consindx);
1016		} /* else it's already armed */
1017	} else if (flags == IB_NOTIFY_ON_NEXT_SOLICITED) {
1018		if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1019		    old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1020			new = HTOBE_32(cmd_sn | HERMON_CQDB_NOTIFY_CQ_SOLICIT |
1021			    (cq->cq_consindx & 0xFFFFFF));
1022			tmp = atomic_cas_32(target, cmp, new);
1023			if (tmp != cmp)
1024				goto retry;
1025			dapli_hermon_cq_doorbell(cq->cq_iauar,
1026			    HERMON_CQDB_NOTIFY_CQ_SOLICIT, cqnum,
1027			    cmd_sn, cq->cq_consindx);
1028		} /* else it's already armed */
1029	} else {
1030		return (DAT_INVALID_PARAMETER);
1031	}
1032
1033	return (DAT_SUCCESS);
1034}
1035
1036/*
1037 * Since uDAPL posts 1 wqe per request, we
1038 * only need to do stores for the last one.
1039 */
1040static void
1041dapli_hermon_wqe_headroom(ib_qp_handle_t qp, uint32_t start)
1042{
1043	uint32_t *wqe_start, *wqe_top, *wqe_base, qsize, invalue;
1044	int hdrmwqes, wqesizebytes, sectperwqe, i, j;
1045
1046	qsize = qp->qp_sq_numwqe;
1047	wqesizebytes = qp->qp_sq_wqesz;
1048	sectperwqe = wqesizebytes >> 6;
1049	hdrmwqes = qp->qp_sq_headroom;
1050	wqe_base = (uint32_t *)TAVOR_QP_SQ_ENTRY(qp, 0);
1051	wqe_top = (uint32_t *)TAVOR_QP_SQ_ENTRY(qp, qsize);
1052	wqe_start = (uint32_t *)TAVOR_QP_SQ_ENTRY(qp, start);
1053
1054	for (i = 0; i < hdrmwqes - 1; i++) {
1055		wqe_start += sectperwqe * 16;
1056		if (wqe_start == wqe_top)
1057			wqe_start = wqe_base;
1058	}
1059	invalue = HTOBE_32(*wqe_start);
1060	invalue |= 0x7FFFFFFF;
1061	*wqe_start = HTOBE_32(invalue);
1062	wqe_start += 16;
1063	for (j = 1; j < sectperwqe; j++) {
1064		*wqe_start = 0xFFFFFFFF;
1065		wqe_start += 16;
1066	}
1067}
1068
1069/*
1070 * dapli_hermon_post_send()
1071 */
1072/* ARGSUSED */
1073static DAT_RETURN
1074dapli_hermon_post_send(DAPL_EP *ep, ibt_send_wr_t *wr, boolean_t ns)
1075{
1076	dapls_tavor_wrid_list_hdr_t	*wridlist;
1077	dapls_tavor_wrid_entry_t	*wre_last;
1078	uint64_t			*desc;
1079	uint64_t			*wqe_addr;
1080	uint32_t			desc_sz;
1081	uint32_t			wqeaddrsz, signaled_dbd;
1082	uint32_t			head, tail, next_tail, qsize_msk;
1083	int				status;
1084	ib_qp_handle_t			qp;
1085
1086	if ((ep->qp_state == IBT_STATE_RESET) ||
1087	    (ep->qp_state == IBT_STATE_INIT) ||
1088	    (ep->qp_state == IBT_STATE_RTR)) {
1089		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1090		    "post_send: invalid qp_state %d\n", ep->qp_state);
1091		return (DAT_INVALID_STATE);
1092	}
1093
1094	qp = ep->qp_handle;
1095
1096	/* Grab the lock for the WRID list */
1097	dapl_os_lock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1098	wridlist  = qp->qp_sq_wqhdr->wq_wrid_post;
1099
1100	/* Save away some initial QP state */
1101	qsize_msk = qp->qp_sq_wqhdr->wq_size - 1;
1102	tail	  = qp->qp_sq_wqhdr->wq_tail;
1103	head	  = qp->qp_sq_wqhdr->wq_head;
1104
1105	/*
1106	 * Check for "queue full" condition.  If the queue is already full,
1107	 * then no more WQEs can be posted, return an error
1108	 */
1109	if (qp->qp_sq_wqhdr->wq_full != 0) {
1110		dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1111		return (DAT_INSUFFICIENT_RESOURCES);
1112	}
1113
1114	/*
1115	 * Increment the "tail index" and check for "queue full" condition.
1116	 * If we detect that the current work request is going to fill the
1117	 * work queue, then we mark this condition and continue.
1118	 */
1119	next_tail = (tail + 1) & qsize_msk;
1120	if (next_tail == head) {
1121		qp->qp_sq_wqhdr->wq_full = 1;
1122	}
1123
1124	/*
1125	 * Get the user virtual address of the location where the next
1126	 * Send WQE should be built
1127	 */
1128	wqe_addr = TAVOR_QP_SQ_ENTRY(qp, tail);
1129
1130	/*
1131	 * Call tavor_wqe_send_build() to build the WQE at the given address.
1132	 * This routine uses the information in the ibt_send_wr_t and
1133	 * returns the size of the WQE when it returns.
1134	 */
1135	status = dapli_hermon_wqe_send_build(qp, wr, wqe_addr, &desc_sz);
1136	if (status != DAT_SUCCESS) {
1137		dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1138		return (status);
1139	}
1140
1141	/*
1142	 * Get the descriptor (io address) corresponding to the location
1143	 * Send WQE was built.
1144	 */
1145	desc = TAVOR_QP_SQ_ENTRY(qp, tail);
1146
1147	/*
1148	 * Add a WRID entry to the WRID list.  Need to calculate the
1149	 * "wqeaddr" to pass to dapli_tavor_wrid_add_entry().
1150	 * signaled_dbd is still calculated, but ignored.
1151	 */
1152	wqeaddrsz = HERMON_QP_WQEADDRSZ(qp->qp_sq_counter);
1153
1154	if (wr->wr_flags & IBT_WR_SEND_SIGNAL) {
1155		signaled_dbd = TAVOR_WRID_ENTRY_SIGNALED;
1156	}
1157
1158	dapli_tavor_wrid_add_entry(qp->qp_sq_wqhdr, wr->wr_id, wqeaddrsz,
1159	    signaled_dbd);
1160
1161	dapli_hermon_wqe_headroom(qp, next_tail);
1162	*(uint8_t *)desc ^= 0x80;	/* set owner bit */
1163
1164	/*
1165	 * Now if the WRID tail entry is non-NULL, then this
1166	 * represents the entry to which we are chaining the
1167	 * new entries.  Since we are going to ring the
1168	 * doorbell for this WQE, we want set its "dbd" bit.
1169	 *
1170	 * On the other hand, if the tail is NULL, even though
1171	 * we will have rung the doorbell for the previous WQE
1172	 * (for the hardware's sake) it is irrelevant to our
1173	 * purposes (for tracking WRIDs) because we know the
1174	 * request must have already completed.
1175	 */
1176	wre_last = wridlist->wl_wre_old_tail;
1177	if (wre_last != NULL) {
1178		wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
1179	}
1180
1181	/* Update some of the state in the QP */
1182	qp->qp_sq_lastwqeaddr	 = wqe_addr;
1183	qp->qp_sq_wqhdr->wq_tail = next_tail;
1184
1185	if (desc_sz && qp->qp_ia_bf != NULL) {	/* use Hermon Blueflame */
1186		uint64_t *bf_dest, *src64;
1187		uint8_t *src8;
1188		int i;
1189
1190		(void) pthread_spin_lock(&hermon_bf_lock);
1191
1192		src8 = (uint8_t *)desc;
1193		src8[1] = (uint8_t)(qp->qp_sq_counter >> 8);
1194		src8[2] = (uint8_t)qp->qp_sq_counter;
1195		src8[4] = (uint8_t)(qp->qp_num >> 16);
1196		src8[5] = (uint8_t)(qp->qp_num >> 8);
1197		src8[6] = (uint8_t)qp->qp_num;
1198
1199		src64 = (uint64_t *)desc;
1200		bf_dest = (uint64_t *)((uintptr_t)qp->qp_ia_bf +
1201		    *qp->qp_ia_bf_toggle);
1202		*qp->qp_ia_bf_toggle ^= 256;	/* 2 256-byte buffers */
1203		for (i = 0; i < desc_sz * 2; i += 2) {
1204			bf_dest[i] = src64[i];
1205			bf_dest[i + 1] = src64[i + 1];
1206		}
1207		(void) pthread_spin_unlock(&hermon_bf_lock);
1208	} else {
1209		/* Ring the doorbell */
1210		dapli_hermon_sq_dbreg(qp->qp_iauar, qp->qp_num);
1211	}
1212	qp->qp_sq_counter++;
1213
1214	dapl_os_unlock(&qp->qp_sq_wqhdr->wq_wrid_lock->wrl_lock);
1215
1216	return (DAT_SUCCESS);
1217}
1218
1219/*
1220 * dapli_hermon_post_recv()
1221 */
1222/* ARGSUSED */
1223static DAT_RETURN
1224dapli_hermon_post_recv(DAPL_EP	*ep, ibt_recv_wr_t *wr, boolean_t ns)
1225{
1226	dapls_tavor_wrid_list_hdr_t	*wridlist;
1227	dapls_tavor_wrid_entry_t	*wre_last;
1228	ib_qp_handle_t			qp;
1229	DAT_RETURN			status;
1230	uint64_t			*wqe_addr;
1231	uint32_t			desc_sz;
1232	uint32_t			wqeaddrsz;
1233	uint32_t			head, tail, next_tail, qsize_msk;
1234
1235	if (ep->qp_state == IBT_STATE_RESET) {
1236		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1237		    "post_recv: invalid qp_state %d\n", ep->qp_state);
1238		return (DAT_INVALID_STATE);
1239	}
1240	qp = ep->qp_handle;
1241
1242	/* Grab the lock for the WRID list */
1243	dapl_os_lock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1244	wridlist  = qp->qp_rq_wqhdr->wq_wrid_post;
1245
1246	/* Save away some initial QP state */
1247	qsize_msk = qp->qp_rq_wqhdr->wq_size - 1;
1248	tail	  = qp->qp_rq_wqhdr->wq_tail;
1249	head	  = qp->qp_rq_wqhdr->wq_head;
1250
1251	/*
1252	 * For the ibt_recv_wr_t passed in, parse the request and build a
1253	 * Recv WQE. Link the WQE with the previous WQE and ring the
1254	 * door bell.
1255	 */
1256
1257	/*
1258	 * Check for "queue full" condition.  If the queue is already full,
1259	 * then no more WQEs can be posted. So return an error.
1260	 */
1261	if (qp->qp_rq_wqhdr->wq_full != 0) {
1262		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1263		return (DAT_INSUFFICIENT_RESOURCES);
1264	}
1265
1266	/*
1267	 * Increment the "tail index" and check for "queue
1268	 * full" condition.  If we detect that the current
1269	 * work request is going to fill the work queue, then
1270	 * we mark this condition and continue.
1271	 */
1272	next_tail = (tail + 1) & qsize_msk;
1273	if (next_tail == head) {
1274		qp->qp_rq_wqhdr->wq_full = 1;
1275	}
1276
1277	/* The user virtual address of the WQE to be built */
1278	wqe_addr = TAVOR_QP_RQ_ENTRY(qp, tail);
1279
1280	/*
1281	 * Call tavor_wqe_recv_build() to build the WQE at the given
1282	 * address. This routine uses the information in the
1283	 * ibt_recv_wr_t and returns the size of the WQE.
1284	 */
1285	status = dapli_hermon_wqe_recv_build(qp, wr, wqe_addr, &desc_sz);
1286	if (status != DAT_SUCCESS) {
1287		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1288		return (DAT_INTERNAL_ERROR);
1289	}
1290
1291	/*
1292	 * Add a WRID entry to the WRID list.  Need to calculate the
1293	 * "wqeaddr" and "signaled_dbd" values to pass to
1294	 * dapli_tavor_wrid_add_entry().
1295	 * Note: all Recv WQEs are essentially "signaled"
1296	 */
1297	wqeaddrsz = HERMON_QP_WQEADDRSZ(qp->qp_rq_counter);
1298	dapli_tavor_wrid_add_entry(qp->qp_rq_wqhdr, wr->wr_id, wqeaddrsz,
1299	    (uint32_t)TAVOR_WRID_ENTRY_SIGNALED);
1300
1301	/*
1302	 * Now if the WRID tail entry is non-NULL, then this
1303	 * represents the entry to which we are chaining the
1304	 * new entries.  Since we are going to ring the
1305	 * doorbell for this WQE, we want set its "dbd" bit.
1306	 *
1307	 * On the other hand, if the tail is NULL, even though
1308	 * we will have rung the doorbell for the previous WQE
1309	 * (for the hardware's sake) it is irrelevant to our
1310	 * purposes (for tracking WRIDs) because we know the
1311	 * request must have already completed.
1312	 */
1313	wre_last = wridlist->wl_wre_old_tail;
1314	if (wre_last != NULL) {
1315		wre_last->wr_signaled_dbd |= TAVOR_WRID_ENTRY_DOORBELLED;
1316	}
1317
1318	/* Update some of the state in the QP */
1319	qp->qp_rq_lastwqeaddr	 = wqe_addr;
1320	qp->qp_rq_wqhdr->wq_tail = next_tail;
1321
1322	/* Update the doorbell record */
1323	qp->qp_rq_counter++;
1324	(qp->qp_rq_dbp)[0] = HTOBE_32(qp->qp_rq_counter);
1325
1326	dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
1327
1328	return (DAT_SUCCESS);
1329}
1330
1331/*
1332 * dapli_hermon_post_srq()
1333 */
1334/* ARGSUSED */
1335static DAT_RETURN
1336dapli_hermon_post_srq(DAPL_SRQ *srqp, ibt_recv_wr_t *wr, boolean_t ns)
1337{
1338	ib_srq_handle_t			srq;
1339	DAT_RETURN			status;
1340	uint32_t			desc;
1341	uint64_t			*wqe_addr;
1342	uint32_t			head, next_head, qsize_msk;
1343	uint32_t			wqe_index;
1344
1345
1346	srq = srqp->srq_handle;
1347
1348	/* Grab the lock for the WRID list */
1349	dapl_os_lock(&srq->srq_wridlist->wl_lock->wrl_lock);
1350
1351	/*
1352	 * For the ibt_recv_wr_t passed in, parse the request and build a
1353	 * Recv WQE. Link the WQE with the previous WQE and ring the
1354	 * door bell.
1355	 */
1356
1357	/*
1358	 * Check for "queue full" condition.  If the queue is already full,
1359	 * ie. there are no free entries, then no more WQEs can be posted.
1360	 * So return an error.
1361	 */
1362	if (srq->srq_wridlist->wl_freel_entries == 0) {
1363		dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1364		return (DAT_INSUFFICIENT_RESOURCES);
1365	}
1366
1367	/* Save away some initial SRQ state */
1368	qsize_msk = srq->srq_wridlist->wl_size - 1;
1369	head	  = srq->srq_wridlist->wl_freel_head;
1370
1371	next_head = (head + 1) & qsize_msk;
1372
1373	/* Get the descriptor (IO Address) of the WQE to be built */
1374	desc = srq->srq_wridlist->wl_free_list[head];
1375
1376	wqe_index = TAVOR_SRQ_WQ_INDEX(srq->srq_wq_desc_addr, desc,
1377	    srq->srq_wq_wqesz);
1378
1379	/* The user virtual address of the WQE to be built */
1380	wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq, wqe_index);
1381
1382	/*
1383	 * Call dapli_hermon_wqe_srq_build() to build the WQE at the given
1384	 * address. This routine uses the information in the
1385	 * ibt_recv_wr_t and returns the size of the WQE.
1386	 */
1387	status = dapli_hermon_wqe_srq_build(srq, wr, wqe_addr);
1388	if (status != DAT_SUCCESS) {
1389		dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1390		return (status);
1391	}
1392
1393	/*
1394	 * Add a WRID entry to the WRID list.
1395	 */
1396	dapli_tavor_wrid_add_entry_srq(srq, wr->wr_id, wqe_index);
1397
1398#if 0
1399	if (srq->srq_wq_lastwqeindex == -1) {
1400		last_wqe_addr = NULL;
1401	} else {
1402		last_wqe_addr = TAVOR_SRQ_WQ_ENTRY(srq,
1403		    srq->srq_wq_lastwqeindex);
1404	}
1405	/*
1406	 * Now link the chain to the old chain (if there was one)
1407	 * and update the wqe_counter in the doorbell record.
1408	 */
1409XXX
1410	dapli_tavor_wqe_srq_linknext(wqe_addr, ns, desc, last_wqe_addr);
1411#endif
1412
1413	/* Update some of the state in the SRQ */
1414	srq->srq_wq_lastwqeindex	 = wqe_index;
1415	srq->srq_wridlist->wl_freel_head = next_head;
1416	srq->srq_wridlist->wl_freel_entries--;
1417	dapl_os_assert(srq->srq_wridlist->wl_freel_entries <=
1418	    srq->srq_wridlist->wl_size);
1419
1420	/* Update the doorbell record */
1421	srq->srq_counter++;
1422	(srq->srq_dbp)[0] = HTOBE_32(srq->srq_counter);
1423
1424	dapl_os_unlock(&srq->srq_wridlist->wl_lock->wrl_lock);
1425
1426	return (DAT_SUCCESS);
1427}
1428
1429/*
1430 * dapli_hermon_cq_srq_entries_flush()
1431 */
1432static void
1433dapli_hermon_cq_srq_entries_flush(ib_qp_handle_t qp)
1434{
1435	ib_cq_handle_t		cq;
1436	dapls_tavor_workq_hdr_t	*wqhdr;
1437	tavor_hw_cqe_t		*cqe;
1438	tavor_hw_cqe_t		*next_cqe;
1439	uint32_t		cons_indx, tail_cons_indx;
1440	uint32_t		new_indx, check_indx, indx;
1441	int			cqe_qpnum, cqe_type;
1442	int			outstanding_cqes, removed_cqes;
1443	int			i;
1444
1445	/* ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock)); */
1446
1447	cq = qp->qp_rq_cqhdl;
1448	wqhdr = qp->qp_rq_wqhdr;
1449
1450	dapl_os_assert(wqhdr->wq_wrid_post != NULL);
1451	dapl_os_assert(wqhdr->wq_wrid_post->wl_srq_en != 0);
1452
1453	/* Get the consumer index */
1454	cons_indx = cq->cq_consindx;
1455
1456	/* Calculate the pointer to the first CQ entry */
1457	cqe = &cq->cq_addr[cons_indx];
1458
1459	/*
1460	 * Loop through the CQ looking for entries owned by software.  If an
1461	 * entry is owned by software then we increment an 'outstanding_cqes'
1462	 * count to know how many entries total we have on our CQ.  We use this
1463	 * value further down to know how many entries to loop through looking
1464	 * for our same QP number.
1465	 */
1466	outstanding_cqes = 0;
1467	tail_cons_indx = cons_indx;
1468	while (TAVOR_CQE_OWNER_IS_SW(cqe)) {
1469		/* increment total cqes count */
1470		outstanding_cqes++;
1471
1472		/* increment the consumer index */
1473		tail_cons_indx = (tail_cons_indx + 1) & cq_wrap_around_mask;
1474
1475		/* update the pointer to the next cq entry */
1476		cqe = &cq->cq_addr[tail_cons_indx];
1477	}
1478
1479	/*
1480	 * Using the 'tail_cons_indx' that was just set, we now know how many
1481	 * total CQEs possible there are.  Set the 'check_indx' and the
1482	 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1483	 */
1484	check_indx = new_indx = (tail_cons_indx - 1) & cq_wrap_around_mask;
1485
1486	for (i = 0; i < outstanding_cqes; i++) {
1487		cqe = &cq->cq_addr[check_indx];
1488
1489		/* Grab QP number from CQE */
1490		cqe_qpnum = TAVOR_CQE_QPNUM_GET(cqe);
1491		cqe_type = HERMON_CQE_SENDRECV_GET(cqe);
1492
1493		/*
1494		 * If the QP number is the same in the CQE as the QP that we
1495		 * have on this SRQ, then we must free up the entry off the
1496		 * SRQ.  We also make sure that the completion type is of the
1497		 * 'TAVOR_COMPLETION_RECV' type.  So any send completions on
1498		 * this CQ will be left as-is.  The handling of returning
1499		 * entries back to HW ownership happens further down.
1500		 */
1501		if (cqe_qpnum == qp->qp_num &&
1502		    cqe_type == TAVOR_COMPLETION_RECV) {
1503			/* Add back to SRQ free list */
1504			(void) dapli_tavor_wrid_find_match_srq(
1505			    wqhdr->wq_wrid_post, cqe);
1506		} else {
1507			/* Do Copy */
1508			if (check_indx != new_indx) {
1509				next_cqe = &cq->cq_addr[new_indx];
1510				/*
1511				 * Copy the CQE into the "next_cqe"
1512				 * pointer.
1513				 */
1514				(void) dapl_os_memcpy(next_cqe, cqe,
1515				    sizeof (tavor_hw_cqe_t));
1516			}
1517			new_indx = (new_indx - 1) & cq_wrap_around_mask;
1518		}
1519		/* Move index to next CQE to check */
1520		check_indx = (check_indx - 1) & cq_wrap_around_mask;
1521	}
1522
1523	/* Initialize removed cqes count */
1524	removed_cqes = 0;
1525
1526	/* If an entry was removed */
1527	if (check_indx != new_indx) {
1528
1529		/*
1530		 * Set current pointer back to the beginning consumer index.
1531		 * At this point, all unclaimed entries have been copied to the
1532		 * index specified by 'new_indx'.  This 'new_indx' will be used
1533		 * as the new consumer index after we mark all freed entries as
1534		 * having HW ownership.  We do that here.
1535		 */
1536
1537		/* Loop through all entries until we reach our new pointer */
1538		for (indx = cons_indx; indx <= new_indx;
1539		    indx = (indx + 1) & cq_wrap_around_mask) {
1540			removed_cqes++;
1541			cqe = &cq->cq_addr[indx];
1542
1543			/* Reset entry to hardware ownership */
1544			TAVOR_CQE_OWNER_SET_HW(cqe);
1545		}
1546	}
1547
1548	/*
1549	 * Update consumer index to be the 'new_indx'.  This moves it past all
1550	 * removed entries.  Because 'new_indx' is pointing to the last
1551	 * previously valid SW owned entry, we add 1 to point the cons_indx to
1552	 * the first HW owned entry.
1553	 */
1554	cons_indx = (new_indx + 1) & cq_wrap_around_mask;
1555
1556	/*
1557	 * Now we only ring the doorbell (to update the consumer index) if
1558	 * we've actually consumed a CQ entry.  If we found no QP number
1559	 * matches above, then we would not have removed anything.  So only if
1560	 * something was removed do we ring the doorbell.
1561	 */
1562	if ((removed_cqes != 0) && (cq->cq_consindx != cons_indx)) {
1563		/*
1564		 * Update the consumer index in both the CQ handle and the
1565		 * doorbell record.
1566		 */
1567		cq->cq_consindx = cons_indx;
1568		dapli_hermon_cq_update_ci(cq, cq->cq_poll_dbp);
1569	}
1570}
1571
1572static void
1573dapli_hermon_rq_prelink(caddr_t first, uint32_t desc_off, uint32_t wqesz,
1574    uint32_t numwqe, uint32_t nds)
1575{
1576	int i;
1577	uint32_t *p = (uint32_t *)(uintptr_t)first;
1578	uint32_t off = desc_off;
1579	uint32_t pincr = wqesz / sizeof (uint32_t);
1580	ibt_wr_ds_t sgl;
1581
1582	sgl.ds_va = (ib_vaddr_t)0;
1583	sgl.ds_key = HERMON_WQE_SGL_INVALID_LKEY;
1584	sgl.ds_len = (ib_msglen_t)0;
1585
1586	for (i = 0; i < numwqe - 1; i++, p += pincr) {
1587		off += wqesz;
1588		p[0] = HTOBE_32(off);	/* link curr to next */
1589		p[1] = nds;		/* nds is 0 for SRQ */
1590		TAVOR_WQE_BUILD_DATA_SEG((void *)&p[2], &sgl);
1591	}
1592	p[0] = HTOBE_32(desc_off); /* link last to first */
1593	p[1] = nds;
1594	TAVOR_WQE_BUILD_DATA_SEG((void *)&p[2], &sgl);
1595}
1596
1597static void
1598dapli_hermon_sq_init(caddr_t first, uint32_t wqesz, uint32_t numwqe)
1599{
1600	int i, j;
1601	uint64_t *wqe = (uint64_t *)(uintptr_t)first;
1602
1603	for (i = 0; i < numwqe; i++) {
1604		for (j = 0; j < wqesz; j += 64, wqe += 8)
1605			*(uint32_t *)wqe = 0xFFFFFFFF;
1606	}
1607}
1608
1609static void
1610dapli_hermon_qp_init(ib_qp_handle_t qp)
1611{
1612	dapli_hermon_sq_init(qp->qp_sq_buf, qp->qp_sq_wqesz, qp->qp_sq_numwqe);
1613	qp->qp_rq_counter = 0;
1614	qp->qp_sq_counter = 0;
1615}
1616
1617static void
1618dapli_hermon_cq_init(ib_cq_handle_t cq)
1619{
1620	uint32_t i;
1621
1622	(cq->cq_arm_dbp)[0] = HTOBE_32(1 << 28);
1623	for (i = 0; (1 << i) < cq->cq_size; i++)
1624		;
1625	cq->cq_log_cqsz = i;
1626	cq->cq_consindx = 0;
1627
1628	/* cq_resize -- needs testing */
1629}
1630
1631static void
1632dapli_hermon_srq_init(ib_srq_handle_t srq)
1633{
1634	/* pre-link the whole shared receive queue */
1635	dapli_hermon_rq_prelink(srq->srq_addr, srq->srq_wq_desc_addr,
1636	    srq->srq_wq_wqesz, srq->srq_wq_numwqe, 0);
1637	srq->srq_counter = 0;
1638
1639	/* needs testing */
1640}
1641
1642void
1643dapls_init_funcs_hermon(DAPL_HCA *hca_ptr)
1644{
1645	hca_ptr->post_send = dapli_hermon_post_send;
1646	hca_ptr->post_recv = dapli_hermon_post_recv;
1647	hca_ptr->post_srq = dapli_hermon_post_srq;
1648	hca_ptr->cq_peek = dapli_hermon_cq_peek;
1649	hca_ptr->cq_poll = dapli_hermon_cq_poll;
1650	hca_ptr->cq_poll_one = dapli_hermon_cq_poll_one;
1651	hca_ptr->cq_notify = dapli_hermon_cq_notify;
1652	hca_ptr->srq_flush = dapli_hermon_cq_srq_entries_flush;
1653	hca_ptr->qp_init = dapli_hermon_qp_init;
1654	hca_ptr->cq_init = dapli_hermon_cq_init;
1655	hca_ptr->srq_init = dapli_hermon_srq_init;
1656	hca_ptr->hermon_resize_cq = 1;
1657
1658	(void) pthread_spin_init(&hermon_bf_lock, 0);
1659}
1660