cq.c revision 331769
1/*
2 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c 331769 2018-03-30 18:06:29Z hselasky $");
34
35#include "opt_inet.h"
36
37#ifdef TCP_OFFLOAD
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/ktr.h>
42#include <sys/bus.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/rwlock.h>
46#include <sys/socket.h>
47#include <sys/sbuf.h>
48
49#include "iw_cxgbe.h"
50#include "user.h"
51
52static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
53		      struct c4iw_dev_ucontext *uctx)
54{
55	struct adapter *sc = rdev->adap;
56	struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
57	struct fw_ri_res_wr *res_wr;
58	struct fw_ri_res *res;
59	int wr_len;
60	struct c4iw_wr_wait wr_wait;
61	struct wrqe *wr;
62
63	wr_len = sizeof *res_wr + sizeof *res;
64	wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
65                if (wr == NULL)
66                        return (0);
67        res_wr = wrtod(wr);
68	memset(res_wr, 0, wr_len);
69	res_wr->op_nres = cpu_to_be32(
70			V_FW_WR_OP(FW_RI_RES_WR) |
71			V_FW_RI_RES_WR_NRES(1) |
72			F_FW_WR_COMPL);
73	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
74	res_wr->cookie = (unsigned long) &wr_wait;
75	res = res_wr->res;
76	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
77	res->u.cq.op = FW_RI_RES_OP_RESET;
78	res->u.cq.iqid = cpu_to_be32(cq->cqid);
79
80	c4iw_init_wr_wait(&wr_wait);
81
82	t4_wrq_tx(sc, wr);
83
84	c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
85
86	kfree(cq->sw_queue);
87	dma_free_coherent(rhp->ibdev.dma_device,
88			  cq->memsize, cq->queue,
89			  dma_unmap_addr(cq, mapping));
90	c4iw_put_cqid(rdev, cq->cqid, uctx);
91	return 0;
92}
93
94static int
95create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
96    struct c4iw_dev_ucontext *uctx)
97{
98	struct adapter *sc = rdev->adap;
99	struct c4iw_dev *rhp = rdev_to_c4iw_dev(rdev);
100	struct fw_ri_res_wr *res_wr;
101	struct fw_ri_res *res;
102	int wr_len;
103	int user = (uctx != &rdev->uctx);
104	struct c4iw_wr_wait wr_wait;
105	int ret;
106	struct wrqe *wr;
107	u64 cq_bar2_qoffset = 0;
108
109	cq->cqid = c4iw_get_cqid(rdev, uctx);
110	if (!cq->cqid) {
111		ret = -ENOMEM;
112		goto err1;
113	}
114
115	if (!user) {
116		cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
117		if (!cq->sw_queue) {
118			ret = -ENOMEM;
119			goto err2;
120		}
121	}
122	cq->queue = dma_alloc_coherent(rhp->ibdev.dma_device, cq->memsize,
123				       &cq->dma_addr, GFP_KERNEL);
124	if (!cq->queue) {
125		ret = -ENOMEM;
126		goto err3;
127	}
128	dma_unmap_addr_set(cq, mapping, cq->dma_addr);
129	memset(cq->queue, 0, cq->memsize);
130
131	/* build fw_ri_res_wr */
132	wr_len = sizeof *res_wr + sizeof *res;
133
134	wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
135	if (wr == NULL)
136        	return (0);
137        res_wr = wrtod(wr);
138
139	memset(res_wr, 0, wr_len);
140	res_wr->op_nres = cpu_to_be32(
141			V_FW_WR_OP(FW_RI_RES_WR) |
142			V_FW_RI_RES_WR_NRES(1) |
143			F_FW_WR_COMPL);
144	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
145	res_wr->cookie = (unsigned long) &wr_wait;
146	res = res_wr->res;
147	res->u.cq.restype = FW_RI_RES_TYPE_CQ;
148	res->u.cq.op = FW_RI_RES_OP_WRITE;
149	res->u.cq.iqid = cpu_to_be32(cq->cqid);
150	//Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
151	res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
152			V_FW_RI_RES_WR_IQANUS(0) |
153			V_FW_RI_RES_WR_IQANUD(1) |
154			F_FW_RI_RES_WR_IQANDST |
155			V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
156	res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
157			F_FW_RI_RES_WR_IQDROPRSS |
158			V_FW_RI_RES_WR_IQPCIECH(2) |
159			V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
160			F_FW_RI_RES_WR_IQO |
161			V_FW_RI_RES_WR_IQESIZE(1));
162	res->u.cq.iqsize = cpu_to_be16(cq->size);
163	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
164
165	c4iw_init_wr_wait(&wr_wait);
166
167	t4_wrq_tx(sc, wr);
168
169	CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
170	ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, NULL, __func__);
171	if (ret)
172		goto err4;
173
174	cq->gen = 1;
175	cq->rdev = rdev;
176
177	/* Determine the BAR2 queue offset and qid. */
178	t4_bar2_sge_qregs(rdev->adap, cq->cqid, T4_BAR2_QTYPE_INGRESS, user,
179			&cq_bar2_qoffset, &cq->bar2_qid);
180
181	/* If user mapping then compute the page-aligned physical
182	 * address for mapping.
183	 */
184	if (user)
185		cq->bar2_pa = (rdev->bar2_pa + cq_bar2_qoffset) & PAGE_MASK;
186	else
187		cq->bar2_va = (void __iomem *)((u64)rdev->bar2_kva +
188			cq_bar2_qoffset);
189
190	return 0;
191err4:
192	dma_free_coherent(rhp->ibdev.dma_device, cq->memsize, cq->queue,
193			  dma_unmap_addr(cq, mapping));
194err3:
195	kfree(cq->sw_queue);
196err2:
197	c4iw_put_cqid(rdev, cq->cqid, uctx);
198err1:
199	return ret;
200}
201
202static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
203{
204	struct t4_cqe cqe;
205
206	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
207	    cq, cq->sw_cidx, cq->sw_pidx);
208	memset(&cqe, 0, sizeof(cqe));
209	cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
210				 V_CQE_OPCODE(FW_RI_SEND) |
211				 V_CQE_TYPE(0) |
212				 V_CQE_SWCQE(1) |
213				 V_CQE_QPID(wq->sq.qid));
214	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
215	cq->sw_queue[cq->sw_pidx] = cqe;
216	t4_swcq_produce(cq);
217}
218
219int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
220{
221	int flushed = 0;
222	int in_use = wq->rq.in_use - count;
223
224	BUG_ON(in_use < 0);
225	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
226	    __func__, wq, cq, wq->rq.in_use, count);
227	while (in_use--) {
228		insert_recv_cqe(wq, cq);
229		flushed++;
230	}
231	return flushed;
232}
233
234static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
235			  struct t4_swsqe *swcqe)
236{
237	struct t4_cqe cqe;
238
239	CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
240	    cq, cq->sw_cidx, cq->sw_pidx);
241	memset(&cqe, 0, sizeof(cqe));
242	cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
243				 V_CQE_OPCODE(swcqe->opcode) |
244				 V_CQE_TYPE(1) |
245				 V_CQE_SWCQE(1) |
246				 V_CQE_QPID(wq->sq.qid));
247	CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
248	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
249	cq->sw_queue[cq->sw_pidx] = cqe;
250	t4_swcq_produce(cq);
251}
252
253static void advance_oldest_read(struct t4_wq *wq);
254
255int c4iw_flush_sq(struct c4iw_qp *qhp)
256{
257	int flushed = 0;
258	struct t4_wq *wq = &qhp->wq;
259	struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
260	struct t4_cq *cq = &chp->cq;
261	int idx;
262	struct t4_swsqe *swsqe;
263
264	if (wq->sq.flush_cidx == -1)
265		wq->sq.flush_cidx = wq->sq.cidx;
266	idx = wq->sq.flush_cidx;
267	BUG_ON(idx >= wq->sq.size);
268	while (idx != wq->sq.pidx) {
269		swsqe = &wq->sq.sw_sq[idx];
270		BUG_ON(swsqe->flushed);
271		swsqe->flushed = 1;
272		insert_sq_cqe(wq, cq, swsqe);
273		if (wq->sq.oldest_read == swsqe) {
274			BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
275			advance_oldest_read(wq);
276		}
277		flushed++;
278		if (++idx == wq->sq.size)
279			idx = 0;
280	}
281	wq->sq.flush_cidx += flushed;
282	if (wq->sq.flush_cidx >= wq->sq.size)
283		wq->sq.flush_cidx -= wq->sq.size;
284	return flushed;
285}
286
287static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
288{
289	struct t4_swsqe *swsqe;
290	int cidx;
291
292	if (wq->sq.flush_cidx == -1)
293		wq->sq.flush_cidx = wq->sq.cidx;
294	cidx = wq->sq.flush_cidx;
295	BUG_ON(cidx > wq->sq.size);
296
297	while (cidx != wq->sq.pidx) {
298		swsqe = &wq->sq.sw_sq[cidx];
299		if (!swsqe->signaled) {
300			if (++cidx == wq->sq.size)
301				cidx = 0;
302		} else if (swsqe->complete) {
303
304			BUG_ON(swsqe->flushed);
305
306			/*
307			 * Insert this completed cqe into the swcq.
308			 */
309			CTR3(KTR_IW_CXGBE,
310				"%s moving cqe into swcq sq idx %u cq idx %u\n",
311				__func__, cidx, cq->sw_pidx);
312			swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
313			cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
314			t4_swcq_produce(cq);
315			swsqe->flushed = 1;
316			if (++cidx == wq->sq.size)
317				cidx = 0;
318			wq->sq.flush_cidx = cidx;
319		} else
320			break;
321	}
322}
323
324static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
325		struct t4_cqe *read_cqe)
326{
327	read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
328	read_cqe->len = htonl(wq->sq.oldest_read->read_len);
329	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
330			V_CQE_SWCQE(SW_CQE(hw_cqe)) |
331			V_CQE_OPCODE(FW_RI_READ_REQ) |
332			V_CQE_TYPE(1));
333	read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
334}
335
336static void advance_oldest_read(struct t4_wq *wq)
337{
338
339	u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
340
341	if (rptr == wq->sq.size)
342		rptr = 0;
343	while (rptr != wq->sq.pidx) {
344		wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
345
346		if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
347			return;
348		if (++rptr == wq->sq.size)
349			rptr = 0;
350	}
351	wq->sq.oldest_read = NULL;
352}
353
354/*
355 * Move all CQEs from the HWCQ into the SWCQ.
356 * Deal with out-of-order and/or completions that complete
357 * prior unsignalled WRs.
358 */
359void c4iw_flush_hw_cq(struct c4iw_cq *chp)
360{
361	struct t4_cqe *hw_cqe, *swcqe, read_cqe;
362	struct c4iw_qp *qhp;
363	struct t4_swsqe *swsqe;
364	int ret;
365
366	CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, &chp->cq,
367			chp->cq.cqid);
368	ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
369
370	/*
371	 * This logic is similar to poll_cq(), but not quite the same
372	 * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
373	 * also do any translation magic that poll_cq() normally does.
374	 */
375	while (!ret) {
376		qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
377
378		/*
379		 * drop CQEs with no associated QP
380		 */
381		if (qhp == NULL)
382			goto next_cqe;
383
384		if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
385			goto next_cqe;
386
387		if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
388
389			/* If we have reached here because of async
390			 * event or other error, and have egress error
391			 * then drop
392			 */
393			if (CQE_TYPE(hw_cqe) == 1)
394				goto next_cqe;
395
396			/* drop peer2peer RTR reads.
397			 */
398			if (CQE_WRID_STAG(hw_cqe) == 1)
399				goto next_cqe;
400
401			/*
402			 * Eat completions for unsignaled read WRs.
403			 */
404			if (!qhp->wq.sq.oldest_read->signaled) {
405				advance_oldest_read(&qhp->wq);
406				goto next_cqe;
407			}
408
409			/*
410			 * Don't write to the HWCQ, create a new read req CQE
411			 * in local memory and move it into the swcq.
412			 */
413			create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
414			hw_cqe = &read_cqe;
415			advance_oldest_read(&qhp->wq);
416		}
417
418		/* if its a SQ completion, then do the magic to move all the
419		 * unsignaled and now in-order completions into the swcq.
420		 */
421		if (SQ_TYPE(hw_cqe)) {
422			swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
423			swsqe->cqe = *hw_cqe;
424			swsqe->complete = 1;
425			flush_completed_wrs(&qhp->wq, &chp->cq);
426		} else {
427			swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
428			*swcqe = *hw_cqe;
429			swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
430			t4_swcq_produce(&chp->cq);
431		}
432next_cqe:
433		t4_hwcq_consume(&chp->cq);
434		ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
435	}
436}
437
438static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
439{
440	if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
441		return 0;
442
443	if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
444		return 0;
445
446	if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
447		return 0;
448
449	if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
450		return 0;
451	return 1;
452}
453
454void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
455{
456	struct t4_cqe *cqe;
457	u32 ptr;
458
459	*count = 0;
460	CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
461	ptr = cq->sw_cidx;
462	while (ptr != cq->sw_pidx) {
463		cqe = &cq->sw_queue[ptr];
464		if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
465		    (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
466			(*count)++;
467		if (++ptr == cq->size)
468			ptr = 0;
469	}
470	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
471}
472
473/*
474 * poll_cq
475 *
476 * Caller must:
477 *     check the validity of the first CQE,
478 *     supply the wq assicated with the qpid.
479 *
480 * credit: cq credit to return to sge.
481 * cqe_flushed: 1 iff the CQE is flushed.
482 * cqe: copy of the polled CQE.
483 *
484 * return value:
485 *    0		    CQE returned ok.
486 *    -EAGAIN       CQE skipped, try again.
487 *    -EOVERFLOW    CQ overflow detected.
488 */
489static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
490		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
491{
492	int ret = 0;
493	struct t4_cqe *hw_cqe, read_cqe;
494
495	*cqe_flushed = 0;
496	*credit = 0;
497	ret = t4_next_cqe(cq, &hw_cqe);
498	if (ret)
499		return ret;
500
501	CTR6(KTR_IW_CXGBE,
502	    "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
503	    CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
504	    CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
505	CTR5(KTR_IW_CXGBE,
506	    "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
507	    __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
508	    CQE_WRID_LOW(hw_cqe));
509
510	/*
511	 * skip cqe's not affiliated with a QP.
512	 */
513	if (wq == NULL) {
514		ret = -EAGAIN;
515		goto skip_cqe;
516	}
517
518	/*
519	* skip hw cqe's if the wq is flushed.
520	*/
521	if (wq->flushed && !SW_CQE(hw_cqe)) {
522		ret = -EAGAIN;
523		goto skip_cqe;
524	}
525
526	/*
527	 * skip TERMINATE cqes...
528	 */
529	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
530		ret = -EAGAIN;
531		goto skip_cqe;
532	}
533
534	/*
535	 * Special cqe for drain WR completions...
536	 */
537	if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
538		*cookie = CQE_DRAIN_COOKIE(hw_cqe);
539		*cqe = *hw_cqe;
540		goto skip_cqe;
541	}
542
543	/*
544	 * Gotta tweak READ completions:
545	 *	1) the cqe doesn't contain the sq_wptr from the wr.
546	 *	2) opcode not reflected from the wr.
547	 *	3) read_len not reflected from the wr.
548	 *	4) cq_type is RQ_TYPE not SQ_TYPE.
549	 */
550	if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
551
552		/* If we have reached here because of async
553		 * event or other error, and have egress error
554		 * then drop
555		 */
556		if (CQE_TYPE(hw_cqe) == 1) {
557			if (CQE_STATUS(hw_cqe))
558				t4_set_wq_in_error(wq);
559			ret = -EAGAIN;
560			goto skip_cqe;
561		}
562
563		/* If this is an unsolicited read response, then the read
564		 * was generated by the kernel driver as part of peer-2-peer
565		 * connection setup.  So ignore the completion.
566		 */
567		if (CQE_WRID_STAG(hw_cqe) == 1) {
568			if (CQE_STATUS(hw_cqe))
569				t4_set_wq_in_error(wq);
570			ret = -EAGAIN;
571			goto skip_cqe;
572		}
573
574		/*
575		 * Eat completions for unsignaled read WRs.
576		 */
577		if (!wq->sq.oldest_read->signaled) {
578			advance_oldest_read(wq);
579			ret = -EAGAIN;
580			goto skip_cqe;
581		}
582
583		/*
584		 * Don't write to the HWCQ, so create a new read req CQE
585		 * in local memory.
586		 */
587		create_read_req_cqe(wq, hw_cqe, &read_cqe);
588		hw_cqe = &read_cqe;
589		advance_oldest_read(wq);
590	}
591
592	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
593		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
594		t4_set_wq_in_error(wq);
595	}
596
597	/*
598	 * RECV completion.
599	 */
600	if (RQ_TYPE(hw_cqe)) {
601
602		/*
603		 * HW only validates 4 bits of MSN.  So we must validate that
604		 * the MSN in the SEND is the next expected MSN.  If its not,
605		 * then we complete this with T4_ERR_MSN and mark the wq in
606		 * error.
607		 */
608
609		if (t4_rq_empty(wq)) {
610			t4_set_wq_in_error(wq);
611			ret = -EAGAIN;
612			goto skip_cqe;
613		}
614		if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
615			t4_set_wq_in_error(wq);
616			hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
617			goto proc_cqe;
618		}
619		goto proc_cqe;
620	}
621
622	/*
623	 * If we get here its a send completion.
624	 *
625	 * Handle out of order completion. These get stuffed
626	 * in the SW SQ. Then the SW SQ is walked to move any
627	 * now in-order completions into the SW CQ.  This handles
628	 * 2 cases:
629	 *	1) reaping unsignaled WRs when the first subsequent
630	 *	   signaled WR is completed.
631	 *	2) out of order read completions.
632	 */
633	if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
634		struct t4_swsqe *swsqe;
635
636		CTR2(KTR_IW_CXGBE,
637		    "%s out of order completion going in sw_sq at idx %u",
638		    __func__, CQE_WRID_SQ_IDX(hw_cqe));
639		swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
640		swsqe->cqe = *hw_cqe;
641		swsqe->complete = 1;
642		ret = -EAGAIN;
643		goto flush_wq;
644	}
645
646proc_cqe:
647	*cqe = *hw_cqe;
648
649	/*
650	 * Reap the associated WR(s) that are freed up with this
651	 * completion.
652	 */
653	if (SQ_TYPE(hw_cqe)) {
654		int idx = CQE_WRID_SQ_IDX(hw_cqe);
655		BUG_ON(idx >= wq->sq.size);
656
657		/*
658		* Account for any unsignaled completions completed by
659		* this signaled completion.  In this case, cidx points
660		* to the first unsignaled one, and idx points to the
661		* signaled one.  So adjust in_use based on this delta.
662		* if this is not completing any unsigned wrs, then the
663		* delta will be 0. Handle wrapping also!
664		*/
665		if (idx < wq->sq.cidx)
666			wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
667		else
668			wq->sq.in_use -= idx - wq->sq.cidx;
669		BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
670
671		wq->sq.cidx = (uint16_t)idx;
672		CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n",
673				__func__, wq->sq.cidx);
674		*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
675		t4_sq_consume(wq);
676	} else {
677		CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
678		     __func__, wq->rq.cidx);
679		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
680		BUG_ON(t4_rq_empty(wq));
681		t4_rq_consume(wq);
682		goto skip_cqe;
683	}
684
685flush_wq:
686	/*
687	 * Flush any completed cqes that are now in-order.
688	 */
689	flush_completed_wrs(wq, cq);
690
691skip_cqe:
692	if (SW_CQE(hw_cqe)) {
693		CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
694		     __func__, cq, cq->cqid, cq->sw_cidx);
695		t4_swcq_consume(cq);
696	} else {
697		CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
698		     __func__, cq, cq->cqid, cq->cidx);
699		t4_hwcq_consume(cq);
700	}
701	return ret;
702}
703
704/*
705 * Get one cq entry from c4iw and map it to openib.
706 *
707 * Returns:
708 *	0			cqe returned
709 *	-ENODATA		EMPTY;
710 *	-EAGAIN			caller must try again
711 *	any other -errno	fatal error
712 */
713static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
714{
715	struct c4iw_qp *qhp = NULL;
716	struct t4_cqe cqe = {0, 0}, *rd_cqe;
717	struct t4_wq *wq;
718	u32 credit = 0;
719	u8 cqe_flushed;
720	u64 cookie = 0;
721	int ret;
722
723	ret = t4_next_cqe(&chp->cq, &rd_cqe);
724
725	if (ret)
726		return ret;
727
728	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
729	if (!qhp)
730		wq = NULL;
731	else {
732		spin_lock(&qhp->lock);
733		wq = &(qhp->wq);
734	}
735	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
736	if (ret)
737		goto out;
738
739	wc->wr_id = cookie;
740	wc->qp = &qhp->ibqp;
741	wc->vendor_err = CQE_STATUS(&cqe);
742	wc->wc_flags = 0;
743
744	CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
745	    __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
746	    CQE_STATUS(&cqe));
747	CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
748	    __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
749	    (unsigned long long)cookie);
750
751	if (CQE_TYPE(&cqe) == 0) {
752		if (!CQE_STATUS(&cqe))
753			wc->byte_len = CQE_LEN(&cqe);
754		else
755			wc->byte_len = 0;
756		wc->opcode = IB_WC_RECV;
757		if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
758		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
759			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
760			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
761			c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
762		}
763	} else {
764		switch (CQE_OPCODE(&cqe)) {
765		case FW_RI_RDMA_WRITE:
766			wc->opcode = IB_WC_RDMA_WRITE;
767			break;
768		case FW_RI_READ_REQ:
769			wc->opcode = IB_WC_RDMA_READ;
770			wc->byte_len = CQE_LEN(&cqe);
771			break;
772		case FW_RI_SEND_WITH_INV:
773		case FW_RI_SEND_WITH_SE_INV:
774			wc->opcode = IB_WC_SEND;
775			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
776			break;
777		case FW_RI_SEND:
778		case FW_RI_SEND_WITH_SE:
779			wc->opcode = IB_WC_SEND;
780			break;
781		case FW_RI_LOCAL_INV:
782			wc->opcode = IB_WC_LOCAL_INV;
783			break;
784		case FW_RI_FAST_REGISTER:
785			wc->opcode = IB_WC_REG_MR;
786
787			/* Invalidate the MR if the fastreg failed */
788			if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
789				c4iw_invalidate_mr(qhp->rhp,
790						   CQE_WRID_FR_STAG(&cqe));
791			break;
792		case C4IW_DRAIN_OPCODE:
793			wc->opcode = IB_WC_SEND;
794			break;
795		default:
796			printf("Unexpected opcode %d "
797			       "in the CQE received for QPID = 0x%0x\n",
798			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
799			ret = -EINVAL;
800			goto out;
801		}
802	}
803
804	if (cqe_flushed)
805		wc->status = IB_WC_WR_FLUSH_ERR;
806	else {
807
808		switch (CQE_STATUS(&cqe)) {
809		case T4_ERR_SUCCESS:
810			wc->status = IB_WC_SUCCESS;
811			break;
812		case T4_ERR_STAG:
813			wc->status = IB_WC_LOC_ACCESS_ERR;
814			break;
815		case T4_ERR_PDID:
816			wc->status = IB_WC_LOC_PROT_ERR;
817			break;
818		case T4_ERR_QPID:
819		case T4_ERR_ACCESS:
820			wc->status = IB_WC_LOC_ACCESS_ERR;
821			break;
822		case T4_ERR_WRAP:
823			wc->status = IB_WC_GENERAL_ERR;
824			break;
825		case T4_ERR_BOUND:
826			wc->status = IB_WC_LOC_LEN_ERR;
827			break;
828		case T4_ERR_INVALIDATE_SHARED_MR:
829		case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
830			wc->status = IB_WC_MW_BIND_ERR;
831			break;
832		case T4_ERR_CRC:
833		case T4_ERR_MARKER:
834		case T4_ERR_PDU_LEN_ERR:
835		case T4_ERR_OUT_OF_RQE:
836		case T4_ERR_DDP_VERSION:
837		case T4_ERR_RDMA_VERSION:
838		case T4_ERR_DDP_QUEUE_NUM:
839		case T4_ERR_MSN:
840		case T4_ERR_TBIT:
841		case T4_ERR_MO:
842		case T4_ERR_MSN_RANGE:
843		case T4_ERR_IRD_OVERFLOW:
844		case T4_ERR_OPCODE:
845		case T4_ERR_INTERNAL_ERR:
846			wc->status = IB_WC_FATAL_ERR;
847			break;
848		case T4_ERR_SWFLUSH:
849			wc->status = IB_WC_WR_FLUSH_ERR;
850			break;
851		default:
852			printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
853			       CQE_STATUS(&cqe), CQE_QPID(&cqe));
854			wc->status = IB_WC_FATAL_ERR;
855		}
856	}
857out:
858	if (wq)
859		spin_unlock(&qhp->lock);
860	return ret;
861}
862
863int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
864{
865	struct c4iw_cq *chp;
866	unsigned long flags;
867	int npolled;
868	int err = 0;
869
870	chp = to_c4iw_cq(ibcq);
871
872	spin_lock_irqsave(&chp->lock, flags);
873	for (npolled = 0; npolled < num_entries; ++npolled) {
874		do {
875			err = c4iw_poll_cq_one(chp, wc + npolled);
876		} while (err == -EAGAIN);
877		if (err)
878			break;
879	}
880	spin_unlock_irqrestore(&chp->lock, flags);
881	return !err || err == -ENODATA ? npolled : err;
882}
883
884int c4iw_destroy_cq(struct ib_cq *ib_cq)
885{
886	struct c4iw_cq *chp;
887	struct c4iw_ucontext *ucontext;
888
889	CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
890	chp = to_c4iw_cq(ib_cq);
891
892	remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
893	atomic_dec(&chp->refcnt);
894	wait_event(chp->wait, !atomic_read(&chp->refcnt));
895
896	ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
897				  : NULL;
898	destroy_cq(&chp->rhp->rdev, &chp->cq,
899		   ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
900	kfree(chp);
901	return 0;
902}
903
904struct ib_cq *
905c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr,
906    struct ib_ucontext *ib_context, struct ib_udata *udata)
907{
908	int entries = attr->cqe;
909	int vector = attr->comp_vector;
910	struct c4iw_dev *rhp;
911	struct c4iw_cq *chp;
912	struct c4iw_create_cq_resp uresp;
913	struct c4iw_ucontext *ucontext = NULL;
914	int ret;
915	size_t memsize, hwentries;
916	struct c4iw_mm_entry *mm, *mm2;
917
918	CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
919	if (attr->flags)
920		return ERR_PTR(-EINVAL);
921
922	rhp = to_c4iw_dev(ibdev);
923
924	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
925	if (!chp)
926		return ERR_PTR(-ENOMEM);
927
928
929	if (ib_context)
930		ucontext = to_c4iw_ucontext(ib_context);
931
932	/* account for the status page. */
933	entries++;
934
935	/* IQ needs one extra entry to differentiate full vs empty. */
936	entries++;
937
938	/*
939	 * entries must be multiple of 16 for HW.
940	 */
941	entries = roundup(entries, 16);
942
943	/*
944	 * Make actual HW queue 2x to avoid cdix_inc overflows.
945	 */
946	hwentries = min(entries * 2, rhp->rdev.hw_queue.t4_max_iq_size);
947
948	/*
949	 * Make HW queue at least 64 entries so GTS updates aren't too
950	 * frequent.
951	 */
952	if (hwentries < 64)
953		hwentries = 64;
954
955	memsize = hwentries * sizeof *chp->cq.queue;
956
957	/*
958	 * memsize must be a multiple of the page size if its a user cq.
959	 */
960	if (ucontext)
961		memsize = roundup(memsize, PAGE_SIZE);
962	chp->cq.size = hwentries;
963	chp->cq.memsize = memsize;
964	chp->cq.vector = vector;
965
966	ret = create_cq(&rhp->rdev, &chp->cq,
967			ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
968	if (ret)
969		goto err1;
970
971	chp->rhp = rhp;
972	chp->cq.size--;				/* status page */
973	chp->ibcq.cqe = entries - 2;
974	spin_lock_init(&chp->lock);
975	spin_lock_init(&chp->comp_handler_lock);
976	atomic_set(&chp->refcnt, 1);
977	init_waitqueue_head(&chp->wait);
978	ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
979	if (ret)
980		goto err2;
981
982	if (ucontext) {
983		ret = -ENOMEM;
984		mm = kmalloc(sizeof *mm, GFP_KERNEL);
985		if (!mm)
986			goto err3;
987		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
988		if (!mm2)
989			goto err4;
990
991		memset(&uresp, 0, sizeof(uresp));
992		uresp.qid_mask = rhp->rdev.cqmask;
993		uresp.cqid = chp->cq.cqid;
994		uresp.size = chp->cq.size;
995		uresp.memsize = chp->cq.memsize;
996		spin_lock(&ucontext->mmap_lock);
997		uresp.key = ucontext->key;
998		ucontext->key += PAGE_SIZE;
999		uresp.gts_key = ucontext->key;
1000		ucontext->key += PAGE_SIZE;
1001		spin_unlock(&ucontext->mmap_lock);
1002		ret = ib_copy_to_udata(udata, &uresp,
1003					sizeof(uresp) - sizeof(uresp.reserved));
1004		if (ret)
1005			goto err5;
1006
1007		mm->key = uresp.key;
1008		mm->addr = vtophys(chp->cq.queue);
1009		mm->len = chp->cq.memsize;
1010		insert_mmap(ucontext, mm);
1011
1012		mm2->key = uresp.gts_key;
1013		mm2->addr = chp->cq.bar2_pa;
1014		mm2->len = PAGE_SIZE;
1015		insert_mmap(ucontext, mm2);
1016	}
1017	CTR6(KTR_IW_CXGBE,
1018	    "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
1019	    __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
1020	    (unsigned long long) chp->cq.dma_addr);
1021	return &chp->ibcq;
1022err5:
1023	kfree(mm2);
1024err4:
1025	kfree(mm);
1026err3:
1027	remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
1028err2:
1029	destroy_cq(&chp->rhp->rdev, &chp->cq,
1030		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
1031err1:
1032	kfree(chp);
1033	return ERR_PTR(ret);
1034}
1035
1036int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
1037{
1038	return -ENOSYS;
1039}
1040
1041int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1042{
1043	struct c4iw_cq *chp;
1044	int ret = 0;
1045	unsigned long flag;
1046
1047	chp = to_c4iw_cq(ibcq);
1048	spin_lock_irqsave(&chp->lock, flag);
1049	t4_arm_cq(&chp->cq,
1050		  (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
1051	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
1052		ret = t4_cq_notempty(&chp->cq);
1053	spin_unlock_irqrestore(&chp->lock, flag);
1054	return ret;
1055}
1056#endif
1057