cq.c revision 306664
1/*
2 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c 306664 2016-10-03 23:49:05Z jhb $");
34
35#include "opt_inet.h"
36
37#ifdef TCP_OFFLOAD
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/ktr.h>
42#include <sys/bus.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/rwlock.h>
46#include <sys/socket.h>
47#include <sys/sbuf.h>
48
49#include "iw_cxgbe.h"
50#include "user.h"
51
52static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
53		      struct c4iw_dev_ucontext *uctx)
54{
55	struct adapter *sc = rdev->adap;
56	struct fw_ri_res_wr *res_wr;
57	struct fw_ri_res *res;
58	int wr_len;
59	struct c4iw_wr_wait wr_wait;
60	struct wrqe *wr;
61
62	wr_len = sizeof *res_wr + sizeof *res;
63	wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
64                if (wr == NULL)
65                        return (0);
66        res_wr = wrtod(wr);
67	memset(res_wr, 0, wr_len);
68	res_wr->op_nres = cpu_to_be32(
69			V_FW_WR_OP(FW_RI_RES_WR) |
70			V_FW_RI_RES_WR_NRES(1) |
71			F_FW_WR_COMPL);
72	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
73	res_wr->cookie = (unsigned long) &wr_wait;
74	res = res_wr->res;
75	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
76	res->u.cq.op = FW_RI_RES_OP_RESET;
77	res->u.cq.iqid = cpu_to_be32(cq->cqid);
78
79	c4iw_init_wr_wait(&wr_wait);
80
81	t4_wrq_tx(sc, wr);
82
83	c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
84
85	kfree(cq->sw_queue);
86	contigfree(cq->queue, cq->memsize, M_DEVBUF);
87	c4iw_put_cqid(rdev, cq->cqid, uctx);
88	return 0;
89}
90
91static int
92create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
93    struct c4iw_dev_ucontext *uctx)
94{
95	struct adapter *sc = rdev->adap;
96	struct fw_ri_res_wr *res_wr;
97	struct fw_ri_res *res;
98	int wr_len;
99	int user = (uctx != &rdev->uctx);
100	struct c4iw_wr_wait wr_wait;
101	int ret;
102	struct wrqe *wr;
103
104	cq->cqid = c4iw_get_cqid(rdev, uctx);
105	if (!cq->cqid) {
106		ret = -ENOMEM;
107		goto err1;
108	}
109
110	if (!user) {
111		cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
112		if (!cq->sw_queue) {
113			ret = -ENOMEM;
114			goto err2;
115		}
116	}
117
118	cq->queue = contigmalloc(cq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul,
119	    PAGE_SIZE, 0);
120        if (cq->queue)
121                cq->dma_addr = vtophys(cq->queue);
122        else {
123		ret = -ENOMEM;
124                goto err3;
125	}
126
127	pci_unmap_addr_set(cq, mapping, cq->dma_addr);
128	memset(cq->queue, 0, cq->memsize);
129
130	/* build fw_ri_res_wr */
131	wr_len = sizeof *res_wr + sizeof *res;
132
133	wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
134	if (wr == NULL)
135        	return (0);
136        res_wr = wrtod(wr);
137
138	memset(res_wr, 0, wr_len);
139	res_wr->op_nres = cpu_to_be32(
140			V_FW_WR_OP(FW_RI_RES_WR) |
141			V_FW_RI_RES_WR_NRES(1) |
142			F_FW_WR_COMPL);
143	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
144	res_wr->cookie = (unsigned long) &wr_wait;
145	res = res_wr->res;
146	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
147	res->u.cq.op = FW_RI_RES_OP_WRITE;
148	res->u.cq.iqid = cpu_to_be32(cq->cqid);
149	//Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
150	res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
151			V_FW_RI_RES_WR_IQANUS(0) |
152			V_FW_RI_RES_WR_IQANUD(1) |
153			F_FW_RI_RES_WR_IQANDST |
154			V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
155	res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
156			F_FW_RI_RES_WR_IQDROPRSS |
157			V_FW_RI_RES_WR_IQPCIECH(2) |
158			V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
159			F_FW_RI_RES_WR_IQO |
160			V_FW_RI_RES_WR_IQESIZE(1));
161	res->u.cq.iqsize = cpu_to_be16(cq->size);
162	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
163
164	c4iw_init_wr_wait(&wr_wait);
165
166	t4_wrq_tx(sc, wr);
167
168	CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
169	ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
170	if (ret)
171		goto err4;
172
173	cq->gen = 1;
174	cq->gts = (void *)((unsigned long)rman_get_virtual(sc->regs_res) +
175	    sc->sge_gts_reg);
176	cq->rdev = rdev;
177
178	if (user) {
179		cq->ugts = (u64)((char*)rman_get_virtual(sc->udbs_res) +
180		    (cq->cqid << rdev->cqshift));
181		cq->ugts &= PAGE_MASK;
182		CTR5(KTR_IW_CXGBE,
183		    "%s: UGTS %p cqid %x cqshift %d page_mask %x", __func__,
184		    cq->ugts, cq->cqid, rdev->cqshift, PAGE_MASK);
185	}
186	return 0;
187err4:
188	contigfree(cq->queue, cq->memsize, M_DEVBUF);
189err3:
190	kfree(cq->sw_queue);
191err2:
192	c4iw_put_cqid(rdev, cq->cqid, uctx);
193err1:
194	return ret;
195}
196
197static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
198{
199	struct t4_cqe cqe;
200
201	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
202	    cq, cq->sw_cidx, cq->sw_pidx);
203	memset(&cqe, 0, sizeof(cqe));
204	cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
205				 V_CQE_OPCODE(FW_RI_SEND) |
206				 V_CQE_TYPE(0) |
207				 V_CQE_SWCQE(1) |
208				 V_CQE_QPID(wq->sq.qid));
209	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
210	cq->sw_queue[cq->sw_pidx] = cqe;
211	t4_swcq_produce(cq);
212}
213
214int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
215{
216	int flushed = 0;
217	int in_use = wq->rq.in_use - count;
218
219	BUG_ON(in_use < 0);
220	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
221	    __func__, wq, cq, wq->rq.in_use, count);
222	while (in_use--) {
223		insert_recv_cqe(wq, cq);
224		flushed++;
225	}
226	return flushed;
227}
228
229static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
230			  struct t4_swsqe *swcqe)
231{
232	struct t4_cqe cqe;
233
234	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
235	    cq, cq->sw_cidx, cq->sw_pidx);
236	memset(&cqe, 0, sizeof(cqe));
237	cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
238				 V_CQE_OPCODE(swcqe->opcode) |
239				 V_CQE_TYPE(1) |
240				 V_CQE_SWCQE(1) |
241				 V_CQE_QPID(wq->sq.qid));
242	CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
243	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
244	cq->sw_queue[cq->sw_pidx] = cqe;
245	t4_swcq_produce(cq);
246}
247
248int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count)
249{
250	int flushed = 0;
251	struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count];
252	int in_use = wq->sq.in_use - count;
253
254	BUG_ON(in_use < 0);
255	while (in_use--) {
256		swsqe->signaled = 0;
257		insert_sq_cqe(wq, cq, swsqe);
258		swsqe++;
259		if (swsqe == (wq->sq.sw_sq + wq->sq.size))
260			swsqe = wq->sq.sw_sq;
261		flushed++;
262	}
263	return flushed;
264}
265
266/*
267 * Move all CQEs from the HWCQ into the SWCQ.
268 */
269void c4iw_flush_hw_cq(struct t4_cq *cq)
270{
271	struct t4_cqe *cqe = NULL, *swcqe;
272	int ret;
273
274	CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, cq, cq->cqid);
275	ret = t4_next_hw_cqe(cq, &cqe);
276	while (!ret) {
277		CTR3(KTR_IW_CXGBE, "%s flushing hwcq cidx 0x%x swcq pidx 0x%x",
278		    __func__, cq->cidx, cq->sw_pidx);
279		swcqe = &cq->sw_queue[cq->sw_pidx];
280		*swcqe = *cqe;
281		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
282		t4_swcq_produce(cq);
283		t4_hwcq_consume(cq);
284		ret = t4_next_hw_cqe(cq, &cqe);
285	}
286}
287
288static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
289{
290	if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
291		return 0;
292
293	if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
294		return 0;
295
296	if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
297		return 0;
298
299	if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
300		return 0;
301	return 1;
302}
303
304void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
305{
306	struct t4_cqe *cqe;
307	u32 ptr;
308
309	*count = 0;
310	ptr = cq->sw_cidx;
311	while (ptr != cq->sw_pidx) {
312		cqe = &cq->sw_queue[ptr];
313		if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) &&
314				      wq->sq.oldest_read)) &&
315		    (CQE_QPID(cqe) == wq->sq.qid))
316			(*count)++;
317		if (++ptr == cq->size)
318			ptr = 0;
319	}
320	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
321}
322
323void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
324{
325	struct t4_cqe *cqe;
326	u32 ptr;
327
328	*count = 0;
329	CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
330	ptr = cq->sw_cidx;
331	while (ptr != cq->sw_pidx) {
332		cqe = &cq->sw_queue[ptr];
333		if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
334		    (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
335			(*count)++;
336		if (++ptr == cq->size)
337			ptr = 0;
338	}
339	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
340}
341
342static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
343{
344	struct t4_swsqe *swsqe;
345	u16 ptr = wq->sq.cidx;
346	int count = wq->sq.in_use;
347	int unsignaled = 0;
348
349	swsqe = &wq->sq.sw_sq[ptr];
350	while (count--)
351		if (!swsqe->signaled) {
352			if (++ptr == wq->sq.size)
353				ptr = 0;
354			swsqe = &wq->sq.sw_sq[ptr];
355			unsignaled++;
356		} else if (swsqe->complete) {
357
358			/*
359			 * Insert this completed cqe into the swcq.
360			 */
361			CTR3(KTR_IW_CXGBE,
362			    "%s moving cqe into swcq sq idx %u cq idx %u",
363			    __func__, ptr, cq->sw_pidx);
364			swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
365			cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
366			t4_swcq_produce(cq);
367			swsqe->signaled = 0;
368			wq->sq.in_use -= unsignaled;
369			break;
370		} else
371			break;
372}
373
374static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
375				struct t4_cqe *read_cqe)
376{
377	read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
378	read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len);
379	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
380				 V_CQE_SWCQE(SW_CQE(hw_cqe)) |
381				 V_CQE_OPCODE(FW_RI_READ_REQ) |
382				 V_CQE_TYPE(1));
383	read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
384}
385
386/*
387 * Return a ptr to the next read wr in the SWSQ or NULL.
388 */
389static void advance_oldest_read(struct t4_wq *wq)
390{
391
392	u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
393
394	if (rptr == wq->sq.size)
395		rptr = 0;
396	while (rptr != wq->sq.pidx) {
397		wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
398
399		if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
400			return;
401		if (++rptr == wq->sq.size)
402			rptr = 0;
403	}
404	wq->sq.oldest_read = NULL;
405}
406
407/*
408 * poll_cq
409 *
410 * Caller must:
411 *     check the validity of the first CQE,
412 *     supply the wq assicated with the qpid.
413 *
414 * credit: cq credit to return to sge.
415 * cqe_flushed: 1 iff the CQE is flushed.
416 * cqe: copy of the polled CQE.
417 *
418 * return value:
419 *    0		    CQE returned ok.
420 *    -EAGAIN       CQE skipped, try again.
421 *    -EOVERFLOW    CQ overflow detected.
422 */
423static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
424		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
425{
426	int ret = 0;
427	struct t4_cqe *hw_cqe, read_cqe;
428
429	*cqe_flushed = 0;
430	*credit = 0;
431	ret = t4_next_cqe(cq, &hw_cqe);
432	if (ret)
433		return ret;
434
435	CTR6(KTR_IW_CXGBE,
436	    "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
437	    CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
438	    CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
439	CTR5(KTR_IW_CXGBE,
440	    "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
441	    __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
442	    CQE_WRID_LOW(hw_cqe));
443
444	/*
445	 * skip cqe's not affiliated with a QP.
446	 */
447	if (wq == NULL) {
448		ret = -EAGAIN;
449		goto skip_cqe;
450	}
451
452	/*
453	 * Gotta tweak READ completions:
454	 *	1) the cqe doesn't contain the sq_wptr from the wr.
455	 *	2) opcode not reflected from the wr.
456	 *	3) read_len not reflected from the wr.
457	 *	4) cq_type is RQ_TYPE not SQ_TYPE.
458	 */
459	if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
460
461		/*
462		 * If this is an unsolicited read response, then the read
463		 * was generated by the kernel driver as part of peer-2-peer
464		 * connection setup.  So ignore the completion.
465		 */
466		if (!wq->sq.oldest_read) {
467			if (CQE_STATUS(hw_cqe))
468				t4_set_wq_in_error(wq);
469			ret = -EAGAIN;
470			goto skip_cqe;
471		}
472
473		/*
474		 * Don't write to the HWCQ, so create a new read req CQE
475		 * in local memory.
476		 */
477		create_read_req_cqe(wq, hw_cqe, &read_cqe);
478		hw_cqe = &read_cqe;
479		advance_oldest_read(wq);
480	}
481
482	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
483		*cqe_flushed = t4_wq_in_error(wq);
484		t4_set_wq_in_error(wq);
485		goto proc_cqe;
486	}
487
488	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
489		ret = -EAGAIN;
490		goto skip_cqe;
491	}
492
493	/*
494	 * RECV completion.
495	 */
496	if (RQ_TYPE(hw_cqe)) {
497
498		/*
499		 * HW only validates 4 bits of MSN.  So we must validate that
500		 * the MSN in the SEND is the next expected MSN.  If its not,
501		 * then we complete this with T4_ERR_MSN and mark the wq in
502		 * error.
503		 */
504
505		if (t4_rq_empty(wq)) {
506			t4_set_wq_in_error(wq);
507			ret = -EAGAIN;
508			goto skip_cqe;
509		}
510		if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
511			t4_set_wq_in_error(wq);
512			hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
513			goto proc_cqe;
514		}
515		goto proc_cqe;
516	}
517
518	/*
519	 * If we get here its a send completion.
520	 *
521	 * Handle out of order completion. These get stuffed
522	 * in the SW SQ. Then the SW SQ is walked to move any
523	 * now in-order completions into the SW CQ.  This handles
524	 * 2 cases:
525	 *	1) reaping unsignaled WRs when the first subsequent
526	 *	   signaled WR is completed.
527	 *	2) out of order read completions.
528	 */
529	if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
530		struct t4_swsqe *swsqe;
531
532		CTR2(KTR_IW_CXGBE,
533		    "%s out of order completion going in sw_sq at idx %u",
534		    __func__, CQE_WRID_SQ_IDX(hw_cqe));
535		swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
536		swsqe->cqe = *hw_cqe;
537		swsqe->complete = 1;
538		ret = -EAGAIN;
539		goto flush_wq;
540	}
541
542proc_cqe:
543	*cqe = *hw_cqe;
544
545	/*
546	 * Reap the associated WR(s) that are freed up with this
547	 * completion.
548	 */
549	if (SQ_TYPE(hw_cqe)) {
550		wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe);
551		CTR2(KTR_IW_CXGBE, "%s completing sq idx %u",
552		     __func__, wq->sq.cidx);
553		*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
554		t4_sq_consume(wq);
555	} else {
556		CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
557		     __func__, wq->rq.cidx);
558		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
559		BUG_ON(t4_rq_empty(wq));
560		t4_rq_consume(wq);
561	}
562
563flush_wq:
564	/*
565	 * Flush any completed cqes that are now in-order.
566	 */
567	flush_completed_wrs(wq, cq);
568
569skip_cqe:
570	if (SW_CQE(hw_cqe)) {
571		CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
572		     __func__, cq, cq->cqid, cq->sw_cidx);
573		t4_swcq_consume(cq);
574	} else {
575		CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
576		     __func__, cq, cq->cqid, cq->cidx);
577		t4_hwcq_consume(cq);
578	}
579	return ret;
580}
581
582/*
583 * Get one cq entry from c4iw and map it to openib.
584 *
585 * Returns:
586 *	0			cqe returned
587 *	-ENODATA		EMPTY;
588 *	-EAGAIN			caller must try again
589 *	any other -errno	fatal error
590 */
591static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
592{
593	struct c4iw_qp *qhp = NULL;
594	struct t4_cqe cqe = {0, 0}, *rd_cqe;
595	struct t4_wq *wq;
596	u32 credit = 0;
597	u8 cqe_flushed;
598	u64 cookie = 0;
599	int ret;
600
601	ret = t4_next_cqe(&chp->cq, &rd_cqe);
602
603	if (ret)
604		return ret;
605
606	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
607	if (!qhp)
608		wq = NULL;
609	else {
610		spin_lock(&qhp->lock);
611		wq = &(qhp->wq);
612	}
613	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
614	if (ret)
615		goto out;
616
617	wc->wr_id = cookie;
618	wc->qp = &qhp->ibqp;
619	wc->vendor_err = CQE_STATUS(&cqe);
620	wc->wc_flags = 0;
621
622	CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
623	    __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
624	    CQE_STATUS(&cqe));
625	CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
626	    __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
627	    (unsigned long long)cookie);
628
629	if (CQE_TYPE(&cqe) == 0) {
630		if (!CQE_STATUS(&cqe))
631			wc->byte_len = CQE_LEN(&cqe);
632		else
633			wc->byte_len = 0;
634		wc->opcode = IB_WC_RECV;
635		if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
636		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
637			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
638			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
639		}
640	} else {
641		switch (CQE_OPCODE(&cqe)) {
642		case FW_RI_RDMA_WRITE:
643			wc->opcode = IB_WC_RDMA_WRITE;
644			break;
645		case FW_RI_READ_REQ:
646			wc->opcode = IB_WC_RDMA_READ;
647			wc->byte_len = CQE_LEN(&cqe);
648			break;
649		case FW_RI_SEND_WITH_INV:
650		case FW_RI_SEND_WITH_SE_INV:
651			wc->opcode = IB_WC_SEND;
652			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
653			break;
654		case FW_RI_SEND:
655		case FW_RI_SEND_WITH_SE:
656			wc->opcode = IB_WC_SEND;
657			break;
658		case FW_RI_BIND_MW:
659			wc->opcode = IB_WC_BIND_MW;
660			break;
661
662		case FW_RI_LOCAL_INV:
663			wc->opcode = IB_WC_LOCAL_INV;
664			break;
665		case FW_RI_FAST_REGISTER:
666			wc->opcode = IB_WC_FAST_REG_MR;
667			break;
668		default:
669			printf("Unexpected opcode %d "
670			       "in the CQE received for QPID = 0x%0x\n",
671			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
672			ret = -EINVAL;
673			goto out;
674		}
675	}
676
677	if (cqe_flushed)
678		wc->status = IB_WC_WR_FLUSH_ERR;
679	else {
680
681		switch (CQE_STATUS(&cqe)) {
682		case T4_ERR_SUCCESS:
683			wc->status = IB_WC_SUCCESS;
684			break;
685		case T4_ERR_STAG:
686			wc->status = IB_WC_LOC_ACCESS_ERR;
687			break;
688		case T4_ERR_PDID:
689			wc->status = IB_WC_LOC_PROT_ERR;
690			break;
691		case T4_ERR_QPID:
692		case T4_ERR_ACCESS:
693			wc->status = IB_WC_LOC_ACCESS_ERR;
694			break;
695		case T4_ERR_WRAP:
696			wc->status = IB_WC_GENERAL_ERR;
697			break;
698		case T4_ERR_BOUND:
699			wc->status = IB_WC_LOC_LEN_ERR;
700			break;
701		case T4_ERR_INVALIDATE_SHARED_MR:
702		case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
703			wc->status = IB_WC_MW_BIND_ERR;
704			break;
705		case T4_ERR_CRC:
706		case T4_ERR_MARKER:
707		case T4_ERR_PDU_LEN_ERR:
708		case T4_ERR_OUT_OF_RQE:
709		case T4_ERR_DDP_VERSION:
710		case T4_ERR_RDMA_VERSION:
711		case T4_ERR_DDP_QUEUE_NUM:
712		case T4_ERR_MSN:
713		case T4_ERR_TBIT:
714		case T4_ERR_MO:
715		case T4_ERR_MSN_RANGE:
716		case T4_ERR_IRD_OVERFLOW:
717		case T4_ERR_OPCODE:
718		case T4_ERR_INTERNAL_ERR:
719			wc->status = IB_WC_FATAL_ERR;
720			break;
721		case T4_ERR_SWFLUSH:
722			wc->status = IB_WC_WR_FLUSH_ERR;
723			break;
724		default:
725			printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
726			       CQE_STATUS(&cqe), CQE_QPID(&cqe));
727			wc->status = IB_WC_FATAL_ERR;
728		}
729	}
730out:
731	if (wq)
732		spin_unlock(&qhp->lock);
733	return ret;
734}
735
736int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
737{
738	struct c4iw_cq *chp;
739	unsigned long flags;
740	int npolled;
741	int err = 0;
742
743	chp = to_c4iw_cq(ibcq);
744
745	spin_lock_irqsave(&chp->lock, flags);
746	for (npolled = 0; npolled < num_entries; ++npolled) {
747		do {
748			err = c4iw_poll_cq_one(chp, wc + npolled);
749		} while (err == -EAGAIN);
750		if (err)
751			break;
752	}
753	spin_unlock_irqrestore(&chp->lock, flags);
754	return !err || err == -ENODATA ? npolled : err;
755}
756
757int c4iw_destroy_cq(struct ib_cq *ib_cq)
758{
759	struct c4iw_cq *chp;
760	struct c4iw_ucontext *ucontext;
761
762	CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
763	chp = to_c4iw_cq(ib_cq);
764
765	remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
766	atomic_dec(&chp->refcnt);
767	wait_event(chp->wait, !atomic_read(&chp->refcnt));
768
769	ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
770				  : NULL;
771	destroy_cq(&chp->rhp->rdev, &chp->cq,
772		   ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
773	kfree(chp);
774	return 0;
775}
776
777struct ib_cq *
778c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
779    struct ib_ucontext *ib_context, struct ib_udata *udata)
780{
781	struct c4iw_dev *rhp;
782	struct c4iw_cq *chp;
783	struct c4iw_create_cq_resp uresp;
784	struct c4iw_ucontext *ucontext = NULL;
785	int ret;
786	size_t memsize, hwentries;
787	struct c4iw_mm_entry *mm, *mm2;
788	int entries = attr->cqe;
789
790	CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
791
792	rhp = to_c4iw_dev(ibdev);
793
794	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
795	if (!chp)
796		return ERR_PTR(-ENOMEM);
797
798	if (ib_context)
799		ucontext = to_c4iw_ucontext(ib_context);
800
801	/* account for the status page. */
802	entries++;
803
804	/* IQ needs one extra entry to differentiate full vs empty. */
805	entries++;
806
807	/*
808	 * entries must be multiple of 16 for HW.
809	 */
810	entries = roundup(entries, 16);
811
812	/*
813	 * Make actual HW queue 2x to avoid cidx_inc overflows.
814	 */
815	hwentries = entries * 2;
816
817	/*
818	 * Make HW queue at least 64 entries so GTS updates aren't too
819	 * frequent.
820	 */
821	if (hwentries < 64)
822		hwentries = 64;
823
824	memsize = hwentries * sizeof *chp->cq.queue;
825
826	/*
827	 * memsize must be a multiple of the page size if its a user cq.
828	 */
829	if (ucontext) {
830		memsize = roundup(memsize, PAGE_SIZE);
831		hwentries = memsize / sizeof *chp->cq.queue;
832		while (hwentries > T4_MAX_IQ_SIZE) {
833			memsize -= PAGE_SIZE;
834			hwentries = memsize / sizeof *chp->cq.queue;
835		}
836	}
837	chp->cq.size = hwentries;
838	chp->cq.memsize = memsize;
839
840	ret = create_cq(&rhp->rdev, &chp->cq,
841			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
842	if (ret)
843		goto err1;
844
845	chp->rhp = rhp;
846	chp->cq.size--;				/* status page */
847	chp->ibcq.cqe = entries - 2;
848	spin_lock_init(&chp->lock);
849	spin_lock_init(&chp->comp_handler_lock);
850	atomic_set(&chp->refcnt, 1);
851	init_waitqueue_head(&chp->wait);
852	ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
853	if (ret)
854		goto err2;
855
856	if (ucontext) {
857		mm = kmalloc(sizeof *mm, GFP_KERNEL);
858		if (!mm)
859			goto err3;
860		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
861		if (!mm2)
862			goto err4;
863
864		memset(&uresp, 0, sizeof(uresp));
865		uresp.qid_mask = rhp->rdev.cqmask;
866		uresp.cqid = chp->cq.cqid;
867		uresp.size = chp->cq.size;
868		uresp.memsize = chp->cq.memsize;
869		spin_lock(&ucontext->mmap_lock);
870		uresp.key = ucontext->key;
871		ucontext->key += PAGE_SIZE;
872		uresp.gts_key = ucontext->key;
873		ucontext->key += PAGE_SIZE;
874		spin_unlock(&ucontext->mmap_lock);
875		ret = ib_copy_to_udata(udata, &uresp,
876					sizeof(uresp) - sizeof(uresp.reserved));
877		if (ret)
878			goto err5;
879
880		mm->key = uresp.key;
881		mm->addr = vtophys(chp->cq.queue);
882		mm->len = chp->cq.memsize;
883		insert_mmap(ucontext, mm);
884
885		mm2->key = uresp.gts_key;
886		mm2->addr = chp->cq.ugts;
887		mm2->len = PAGE_SIZE;
888		insert_mmap(ucontext, mm2);
889	}
890	CTR6(KTR_IW_CXGBE,
891	    "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
892	    __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
893	    (unsigned long long) chp->cq.dma_addr);
894	return &chp->ibcq;
895err5:
896	kfree(mm2);
897err4:
898	kfree(mm);
899err3:
900	remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
901err2:
902	destroy_cq(&chp->rhp->rdev, &chp->cq,
903		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
904err1:
905	kfree(chp);
906	return ERR_PTR(ret);
907}
908
909int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
910{
911	return -ENOSYS;
912}
913
914int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
915{
916	struct c4iw_cq *chp;
917	int ret;
918	unsigned long flag;
919
920	chp = to_c4iw_cq(ibcq);
921	spin_lock_irqsave(&chp->lock, flag);
922	ret = t4_arm_cq(&chp->cq,
923			(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
924	spin_unlock_irqrestore(&chp->lock, flag);
925	if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
926		ret = 0;
927	return ret;
928}
929#endif
930