mlx5_ib_cq.c revision 331769
1/*-
2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c 331769 2018-03-30 18:06:29Z hselasky $
26 */
27
28#include <linux/kref.h>
29#include <rdma/ib_umem.h>
30#include <rdma/ib_user_verbs.h>
31#include <rdma/ib_cache.h>
32#include "mlx5_ib.h"
33
34static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
35{
36	struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
37
38	ibcq->comp_handler(ibcq, ibcq->cq_context);
39}
40
41static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, int type)
42{
43	struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
44	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
45	struct ib_cq *ibcq = &cq->ibcq;
46	struct ib_event event;
47
48	if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
49		mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
50			     type, mcq->cqn);
51		return;
52	}
53
54	if (ibcq->event_handler) {
55		event.device     = &dev->ib_dev;
56		event.event      = IB_EVENT_CQ_ERR;
57		event.element.cq = ibcq;
58		ibcq->event_handler(&event, ibcq->cq_context);
59	}
60}
61
62static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
63{
64	return mlx5_buf_offset(&buf->buf, n * size);
65}
66
67static void *get_cqe(struct mlx5_ib_cq *cq, int n)
68{
69	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
70}
71
72static u8 sw_ownership_bit(int n, int nent)
73{
74	return (n & nent) ? 1 : 0;
75}
76
77static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
78{
79	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
80	struct mlx5_cqe64 *cqe64;
81
82	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
83
84	if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
85	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
86		return cqe;
87	} else {
88		return NULL;
89	}
90}
91
92static void *next_cqe_sw(struct mlx5_ib_cq *cq)
93{
94	return get_sw_cqe(cq, cq->mcq.cons_index);
95}
96
97static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
98{
99	switch (wq->wr_data[idx]) {
100	case MLX5_IB_WR_UMR:
101		return 0;
102
103	case IB_WR_LOCAL_INV:
104		return IB_WC_LOCAL_INV;
105
106	case IB_WR_REG_MR:
107		return IB_WC_REG_MR;
108
109	default:
110		pr_warn("unknown completion status\n");
111		return 0;
112	}
113}
114
115static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
116			    struct mlx5_ib_wq *wq, int idx)
117{
118	wc->wc_flags = 0;
119	switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
120	case MLX5_OPCODE_RDMA_WRITE_IMM:
121		wc->wc_flags |= IB_WC_WITH_IMM;
122	case MLX5_OPCODE_RDMA_WRITE:
123		wc->opcode    = IB_WC_RDMA_WRITE;
124		break;
125	case MLX5_OPCODE_SEND_IMM:
126		wc->wc_flags |= IB_WC_WITH_IMM;
127	case MLX5_OPCODE_SEND:
128	case MLX5_OPCODE_SEND_INVAL:
129		wc->opcode    = IB_WC_SEND;
130		break;
131	case MLX5_OPCODE_RDMA_READ:
132		wc->opcode    = IB_WC_RDMA_READ;
133		wc->byte_len  = be32_to_cpu(cqe->byte_cnt);
134		break;
135	case MLX5_OPCODE_ATOMIC_CS:
136		wc->opcode    = IB_WC_COMP_SWAP;
137		wc->byte_len  = 8;
138		break;
139	case MLX5_OPCODE_ATOMIC_FA:
140		wc->opcode    = IB_WC_FETCH_ADD;
141		wc->byte_len  = 8;
142		break;
143	case MLX5_OPCODE_ATOMIC_MASKED_CS:
144		wc->opcode    = IB_WC_MASKED_COMP_SWAP;
145		wc->byte_len  = 8;
146		break;
147	case MLX5_OPCODE_ATOMIC_MASKED_FA:
148		wc->opcode    = IB_WC_MASKED_FETCH_ADD;
149		wc->byte_len  = 8;
150		break;
151	case MLX5_OPCODE_UMR:
152		wc->opcode = get_umr_comp(wq, idx);
153		break;
154	}
155}
156
157enum {
158	MLX5_GRH_IN_BUFFER = 1,
159	MLX5_GRH_IN_CQE	   = 2,
160};
161
162static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
163			     struct mlx5_ib_qp *qp)
164{
165	enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
166	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
167	struct mlx5_ib_srq *srq;
168	struct mlx5_ib_wq *wq;
169	u16 wqe_ctr;
170	u8 g;
171
172	if (qp->ibqp.srq || qp->ibqp.xrcd) {
173		struct mlx5_core_srq *msrq = NULL;
174
175		if (qp->ibqp.xrcd) {
176			msrq = mlx5_core_get_srq(dev->mdev,
177						 be32_to_cpu(cqe->srqn));
178			srq = to_mibsrq(msrq);
179		} else {
180			srq = to_msrq(qp->ibqp.srq);
181		}
182		if (srq) {
183			wqe_ctr = be16_to_cpu(cqe->wqe_counter);
184			wc->wr_id = srq->wrid[wqe_ctr];
185			mlx5_ib_free_srq_wqe(srq, wqe_ctr);
186			if (msrq && atomic_dec_and_test(&msrq->refcount))
187				complete(&msrq->free);
188		}
189	} else {
190		wq	  = &qp->rq;
191		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
192		++wq->tail;
193	}
194	wc->byte_len = be32_to_cpu(cqe->byte_cnt);
195
196	switch (cqe->op_own >> 4) {
197	case MLX5_CQE_RESP_WR_IMM:
198		wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
199		wc->wc_flags	= IB_WC_WITH_IMM;
200		wc->ex.imm_data = cqe->imm_inval_pkey;
201		break;
202	case MLX5_CQE_RESP_SEND:
203		wc->opcode   = IB_WC_RECV;
204		wc->wc_flags = IB_WC_IP_CSUM_OK;
205		if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
206			       (cqe->hds_ip_ext & CQE_L4_OK))))
207			wc->wc_flags = 0;
208		break;
209	case MLX5_CQE_RESP_SEND_IMM:
210		wc->opcode	= IB_WC_RECV;
211		wc->wc_flags	= IB_WC_WITH_IMM;
212		wc->ex.imm_data = cqe->imm_inval_pkey;
213		break;
214	case MLX5_CQE_RESP_SEND_INV:
215		wc->opcode	= IB_WC_RECV;
216		wc->wc_flags	= IB_WC_WITH_INVALIDATE;
217		wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
218		break;
219	}
220	wc->slid	   = be16_to_cpu(cqe->slid);
221	wc->sl		   = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
222	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
223	wc->dlid_path_bits = cqe->ml_path;
224	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
225	wc->wc_flags |= g ? IB_WC_GRH : 0;
226	if (unlikely(is_qp1(qp->ibqp.qp_type))) {
227		u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
228
229		ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
230				    &wc->pkey_index);
231	} else {
232		wc->pkey_index = 0;
233	}
234
235	if (ll != IB_LINK_LAYER_ETHERNET)
236		return;
237
238	switch (wc->sl & 0x3) {
239	case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
240		wc->network_hdr_type = RDMA_NETWORK_IB;
241		break;
242	case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
243		wc->network_hdr_type = RDMA_NETWORK_IPV6;
244		break;
245	case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
246		wc->network_hdr_type = RDMA_NETWORK_IPV4;
247		break;
248	}
249	wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
250}
251
252static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
253{
254	__be32 *p = (__be32 *)cqe;
255	int i;
256
257	mlx5_ib_warn(dev, "dump error cqe\n");
258	for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
259		pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
260			be32_to_cpu(p[1]), be32_to_cpu(p[2]),
261			be32_to_cpu(p[3]));
262}
263
264static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
265				  struct mlx5_err_cqe *cqe,
266				  struct ib_wc *wc)
267{
268	int dump = 1;
269
270	switch (cqe->syndrome) {
271	case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
272		wc->status = IB_WC_LOC_LEN_ERR;
273		break;
274	case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
275		wc->status = IB_WC_LOC_QP_OP_ERR;
276		break;
277	case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
278		wc->status = IB_WC_LOC_PROT_ERR;
279		break;
280	case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
281		dump = 0;
282		wc->status = IB_WC_WR_FLUSH_ERR;
283		break;
284	case MLX5_CQE_SYNDROME_MW_BIND_ERR:
285		wc->status = IB_WC_MW_BIND_ERR;
286		break;
287	case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
288		wc->status = IB_WC_BAD_RESP_ERR;
289		break;
290	case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
291		wc->status = IB_WC_LOC_ACCESS_ERR;
292		break;
293	case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
294		wc->status = IB_WC_REM_INV_REQ_ERR;
295		break;
296	case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
297		wc->status = IB_WC_REM_ACCESS_ERR;
298		break;
299	case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
300		wc->status = IB_WC_REM_OP_ERR;
301		break;
302	case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
303		wc->status = IB_WC_RETRY_EXC_ERR;
304		dump = 0;
305		break;
306	case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
307		wc->status = IB_WC_RNR_RETRY_EXC_ERR;
308		dump = 0;
309		break;
310	case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
311		wc->status = IB_WC_REM_ABORT_ERR;
312		break;
313	default:
314		wc->status = IB_WC_GENERAL_ERR;
315		break;
316	}
317
318	wc->vendor_err = cqe->vendor_err_synd;
319	if (dump)
320		dump_cqe(dev, cqe);
321}
322
323static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
324{
325	/* TBD: waiting decision
326	*/
327	return 0;
328}
329
330static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
331{
332	struct mlx5_wqe_data_seg *dpseg;
333	void *addr;
334
335	dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
336		sizeof(struct mlx5_wqe_raddr_seg) +
337		sizeof(struct mlx5_wqe_atomic_seg);
338	addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
339	return addr;
340}
341
342static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
343			  uint16_t idx)
344{
345	void *addr;
346	int byte_count;
347	int i;
348
349	if (!is_atomic_response(qp, idx))
350		return;
351
352	byte_count = be32_to_cpu(cqe64->byte_cnt);
353	addr = mlx5_get_atomic_laddr(qp, idx);
354
355	if (byte_count == 4) {
356		*(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
357	} else {
358		for (i = 0; i < byte_count; i += 8) {
359			*(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
360			addr += 8;
361		}
362	}
363
364	return;
365}
366
367static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
368			   u16 tail, u16 head)
369{
370	u16 idx;
371
372	do {
373		idx = tail & (qp->sq.wqe_cnt - 1);
374		handle_atomic(qp, cqe64, idx);
375		if (idx == head)
376			break;
377
378		tail = qp->sq.w_list[idx].next;
379	} while (1);
380	tail = qp->sq.w_list[idx].next;
381	qp->sq.last_poll = tail;
382}
383
384static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
385{
386	mlx5_buf_free(dev->mdev, &buf->buf);
387}
388
389static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
390			     struct ib_sig_err *item)
391{
392	u16 syndrome = be16_to_cpu(cqe->syndrome);
393
394#define GUARD_ERR   (1 << 13)
395#define APPTAG_ERR  (1 << 12)
396#define REFTAG_ERR  (1 << 11)
397
398	if (syndrome & GUARD_ERR) {
399		item->err_type = IB_SIG_BAD_GUARD;
400		item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
401		item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
402	} else
403	if (syndrome & REFTAG_ERR) {
404		item->err_type = IB_SIG_BAD_REFTAG;
405		item->expected = be32_to_cpu(cqe->expected_reftag);
406		item->actual = be32_to_cpu(cqe->actual_reftag);
407	} else
408	if (syndrome & APPTAG_ERR) {
409		item->err_type = IB_SIG_BAD_APPTAG;
410		item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
411		item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
412	} else {
413		pr_err("Got signature completion error with bad syndrome %04x\n",
414		       syndrome);
415	}
416
417	item->sig_err_offset = be64_to_cpu(cqe->err_offset);
418	item->key = be32_to_cpu(cqe->mkey);
419}
420
421static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
422			 struct ib_wc *wc, int *npolled)
423{
424	struct mlx5_ib_wq *wq;
425	unsigned int cur;
426	unsigned int idx;
427	int np;
428	int i;
429
430	wq = &qp->sq;
431	cur = wq->head - wq->tail;
432	np = *npolled;
433
434	if (cur == 0)
435		return;
436
437	for (i = 0;  i < cur && np < num_entries; i++) {
438		idx = wq->last_poll & (wq->wqe_cnt - 1);
439		wc->wr_id = wq->wrid[idx];
440		wc->status = IB_WC_WR_FLUSH_ERR;
441		wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
442		wq->tail++;
443		np++;
444		wc->qp = &qp->ibqp;
445		wc++;
446		wq->last_poll = wq->w_list[idx].next;
447	}
448	*npolled = np;
449}
450
451static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
452			 struct ib_wc *wc, int *npolled)
453{
454	struct mlx5_ib_wq *wq;
455	unsigned int cur;
456	int np;
457	int i;
458
459	wq = &qp->rq;
460	cur = wq->head - wq->tail;
461	np = *npolled;
462
463	if (cur == 0)
464		return;
465
466	for (i = 0;  i < cur && np < num_entries; i++) {
467		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
468		wc->status = IB_WC_WR_FLUSH_ERR;
469		wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
470		wq->tail++;
471		np++;
472		wc->qp = &qp->ibqp;
473		wc++;
474	}
475	*npolled = np;
476}
477
478static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
479				 struct ib_wc *wc, int *npolled)
480{
481	struct mlx5_ib_qp *qp;
482
483	*npolled = 0;
484	/* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
485	list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
486		sw_send_comp(qp, num_entries, wc + *npolled, npolled);
487		if (*npolled >= num_entries)
488			return;
489	}
490
491	list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
492		sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
493		if (*npolled >= num_entries)
494			return;
495	}
496}
497
498static int mlx5_poll_one(struct mlx5_ib_cq *cq,
499			 struct mlx5_ib_qp **cur_qp,
500			 struct ib_wc *wc)
501{
502	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
503	struct mlx5_err_cqe *err_cqe;
504	struct mlx5_cqe64 *cqe64;
505	struct mlx5_core_qp *mqp;
506	struct mlx5_ib_wq *wq;
507	struct mlx5_sig_err_cqe *sig_err_cqe;
508	struct mlx5_core_mr *mmkey;
509	struct mlx5_ib_mr *mr;
510	unsigned long flags;
511	uint8_t opcode;
512	uint32_t qpn;
513	u16 wqe_ctr;
514	void *cqe;
515	int idx;
516
517repoll:
518	cqe = next_cqe_sw(cq);
519	if (!cqe)
520		return -EAGAIN;
521
522	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
523
524	++cq->mcq.cons_index;
525
526	/* Make sure we read CQ entry contents after we've checked the
527	 * ownership bit.
528	 */
529	rmb();
530
531	opcode = cqe64->op_own >> 4;
532	if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
533		if (likely(cq->resize_buf)) {
534			free_cq_buf(dev, &cq->buf);
535			cq->buf = *cq->resize_buf;
536			kfree(cq->resize_buf);
537			cq->resize_buf = NULL;
538			goto repoll;
539		} else {
540			mlx5_ib_warn(dev, "unexpected resize cqe\n");
541		}
542	}
543
544	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
545	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
546		/* We do not have to take the QP table lock here,
547		 * because CQs will be locked while QPs are removed
548		 * from the table.
549		 */
550		mqp = __mlx5_qp_lookup(dev->mdev, qpn);
551		*cur_qp = to_mibqp(mqp);
552	}
553
554	wc->qp  = &(*cur_qp)->ibqp;
555	switch (opcode) {
556	case MLX5_CQE_REQ:
557		wq = &(*cur_qp)->sq;
558		wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
559		idx = wqe_ctr & (wq->wqe_cnt - 1);
560		handle_good_req(wc, cqe64, wq, idx);
561		handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
562		wc->wr_id = wq->wrid[idx];
563		wq->tail = wq->wqe_head[idx] + 1;
564		wc->status = IB_WC_SUCCESS;
565		break;
566	case MLX5_CQE_RESP_WR_IMM:
567	case MLX5_CQE_RESP_SEND:
568	case MLX5_CQE_RESP_SEND_IMM:
569	case MLX5_CQE_RESP_SEND_INV:
570		handle_responder(wc, cqe64, *cur_qp);
571		wc->status = IB_WC_SUCCESS;
572		break;
573	case MLX5_CQE_RESIZE_CQ:
574		break;
575	case MLX5_CQE_REQ_ERR:
576	case MLX5_CQE_RESP_ERR:
577		err_cqe = (struct mlx5_err_cqe *)cqe64;
578		mlx5_handle_error_cqe(dev, err_cqe, wc);
579		mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
580			    opcode == MLX5_CQE_REQ_ERR ?
581			    "Requestor" : "Responder", cq->mcq.cqn);
582		mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
583			    err_cqe->syndrome, err_cqe->vendor_err_synd);
584		if (opcode == MLX5_CQE_REQ_ERR) {
585			wq = &(*cur_qp)->sq;
586			wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
587			idx = wqe_ctr & (wq->wqe_cnt - 1);
588			wc->wr_id = wq->wrid[idx];
589			wq->tail = wq->wqe_head[idx] + 1;
590		} else {
591			struct mlx5_ib_srq *srq;
592
593			if ((*cur_qp)->ibqp.srq) {
594				srq = to_msrq((*cur_qp)->ibqp.srq);
595				wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
596				wc->wr_id = srq->wrid[wqe_ctr];
597				mlx5_ib_free_srq_wqe(srq, wqe_ctr);
598			} else {
599				wq = &(*cur_qp)->rq;
600				wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
601				++wq->tail;
602			}
603		}
604		break;
605	case MLX5_CQE_SIG_ERR:
606		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
607
608		spin_lock_irqsave(&dev->mdev->priv.mr_table.lock, flags);
609		mmkey = __mlx5_mr_lookup(dev->mdev,
610					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
611		mr = to_mibmr(mmkey);
612		get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
613		mr->sig->sig_err_exists = true;
614		mr->sig->sigerr_count++;
615
616		mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
617			     cq->mcq.cqn, mr->sig->err_item.key,
618			     mr->sig->err_item.err_type,
619			     (long long)mr->sig->err_item.sig_err_offset,
620			     mr->sig->err_item.expected,
621			     mr->sig->err_item.actual);
622
623		spin_unlock_irqrestore(&dev->mdev->priv.mr_table.lock, flags);
624		goto repoll;
625	}
626
627	return 0;
628}
629
630static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
631			struct ib_wc *wc)
632{
633	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
634	struct mlx5_ib_wc *soft_wc, *next;
635	int npolled = 0;
636
637	list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
638		if (npolled >= num_entries)
639			break;
640
641		mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
642			    cq->mcq.cqn);
643
644		wc[npolled++] = soft_wc->wc;
645		list_del(&soft_wc->list);
646		kfree(soft_wc);
647	}
648
649	return npolled;
650}
651
652int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
653{
654	struct mlx5_ib_cq *cq = to_mcq(ibcq);
655	struct mlx5_ib_qp *cur_qp = NULL;
656	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
657	struct mlx5_core_dev *mdev = dev->mdev;
658	unsigned long flags;
659	int soft_polled = 0;
660	int npolled;
661
662	spin_lock_irqsave(&cq->lock, flags);
663	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
664		mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
665		goto out;
666	}
667
668	if (unlikely(!list_empty(&cq->wc_list)))
669		soft_polled = poll_soft_wc(cq, num_entries, wc);
670
671	for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
672		if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
673			break;
674	}
675
676	if (npolled)
677		mlx5_cq_set_ci(&cq->mcq);
678out:
679	spin_unlock_irqrestore(&cq->lock, flags);
680
681	return soft_polled + npolled;
682}
683
684int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
685{
686	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
687	struct mlx5_ib_cq *cq = to_mcq(ibcq);
688	void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
689	unsigned long irq_flags;
690	int ret = 0;
691
692	spin_lock_irqsave(&cq->lock, irq_flags);
693	if (cq->notify_flags != IB_CQ_NEXT_COMP)
694		cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
695
696	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
697		ret = 1;
698	spin_unlock_irqrestore(&cq->lock, irq_flags);
699
700	mlx5_cq_arm(&cq->mcq,
701		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
702		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
703		    uar_page,
704		    MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
705		    to_mcq(ibcq)->mcq.cons_index);
706
707	return ret;
708}
709
710static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
711			int nent, int cqe_size)
712{
713	int err;
714
715	err = mlx5_buf_alloc(dev->mdev, nent * cqe_size,
716	    2 * PAGE_SIZE, &buf->buf);
717	if (err)
718		return err;
719
720	buf->cqe_size = cqe_size;
721	buf->nent = nent;
722
723	return 0;
724}
725
726static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
727			  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
728			  int entries, u32 **cqb,
729			  int *cqe_size, int *index, int *inlen)
730{
731	struct mlx5_ib_create_cq ucmd;
732	size_t ucmdlen;
733	int page_shift;
734	__be64 *pas;
735	int npages;
736	int ncont;
737	void *cqc;
738	int err;
739
740	ucmdlen =
741		(udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
742		 sizeof(ucmd)) ? (sizeof(ucmd) -
743				  sizeof(ucmd.reserved)) : sizeof(ucmd);
744
745	if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
746		return -EFAULT;
747
748	if (ucmdlen == sizeof(ucmd) &&
749	    ucmd.reserved != 0)
750		return -EINVAL;
751
752	if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
753		return -EINVAL;
754
755	*cqe_size = ucmd.cqe_size;
756
757	cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
758				   entries * ucmd.cqe_size,
759				   IB_ACCESS_LOCAL_WRITE, 1);
760	if (IS_ERR(cq->buf.umem)) {
761		err = PTR_ERR(cq->buf.umem);
762		return err;
763	}
764
765	err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
766				  &cq->db);
767	if (err)
768		goto err_umem;
769
770	mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
771			   &ncont, NULL);
772	mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
773		    (long long)ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
774
775	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
776		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
777	*cqb = mlx5_vzalloc(*inlen);
778	if (!*cqb) {
779		err = -ENOMEM;
780		goto err_db;
781	}
782
783	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
784	mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
785
786	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
787	MLX5_SET(cqc, cqc, log_page_size,
788		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
789
790	*index = to_mucontext(context)->uuari.uars[0].index;
791
792	return 0;
793
794err_db:
795	mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
796
797err_umem:
798	ib_umem_release(cq->buf.umem);
799	return err;
800}
801
802static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
803{
804	mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
805	ib_umem_release(cq->buf.umem);
806}
807
808static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
809{
810	int i;
811	void *cqe;
812	struct mlx5_cqe64 *cqe64;
813
814	for (i = 0; i < buf->nent; i++) {
815		cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
816		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
817		cqe64->op_own = MLX5_CQE_INVALID << 4;
818	}
819}
820
821static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
822			    int entries, int cqe_size,
823			    u32 **cqb, int *index, int *inlen)
824{
825	__be64 *pas;
826	void *cqc;
827	int err;
828
829	err = mlx5_db_alloc(dev->mdev, &cq->db);
830	if (err)
831		return err;
832
833	cq->mcq.set_ci_db  = cq->db.db;
834	cq->mcq.arm_db     = cq->db.db + 1;
835	cq->mcq.cqe_sz = cqe_size;
836
837	err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
838	if (err)
839		goto err_db;
840
841	init_cq_buf(cq, &cq->buf);
842
843	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
844		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
845	*cqb = mlx5_vzalloc(*inlen);
846	if (!*cqb) {
847		err = -ENOMEM;
848		goto err_buf;
849	}
850
851	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
852	mlx5_fill_page_array(&cq->buf.buf, pas);
853
854	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
855	MLX5_SET(cqc, cqc, log_page_size,
856		 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
857
858	*index = dev->mdev->priv.uuari.uars[0].index;
859
860	return 0;
861
862err_buf:
863	free_cq_buf(dev, &cq->buf);
864
865err_db:
866	mlx5_db_free(dev->mdev, &cq->db);
867	return err;
868}
869
870static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
871{
872	free_cq_buf(dev, &cq->buf);
873	mlx5_db_free(dev->mdev, &cq->db);
874}
875
876static void notify_soft_wc_handler(struct work_struct *work)
877{
878	struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
879					     notify_work);
880
881	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
882}
883
884struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
885				const struct ib_cq_init_attr *attr,
886				struct ib_ucontext *context,
887				struct ib_udata *udata)
888{
889	int entries = attr->cqe;
890	int vector = attr->comp_vector;
891	struct mlx5_ib_dev *dev = to_mdev(ibdev);
892	struct mlx5_ib_cq *cq;
893	int uninitialized_var(index);
894	int uninitialized_var(inlen);
895	u32 *cqb = NULL;
896	void *cqc;
897	int cqe_size;
898	unsigned int irqn;
899	int eqn;
900	int err;
901
902	if (entries < 0 ||
903	    (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
904		return ERR_PTR(-EINVAL);
905
906	if (check_cq_create_flags(attr->flags))
907		return ERR_PTR(-EOPNOTSUPP);
908
909	entries = roundup_pow_of_two(entries + 1);
910	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
911		return ERR_PTR(-EINVAL);
912
913	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
914	if (!cq)
915		return ERR_PTR(-ENOMEM);
916
917	cq->ibcq.cqe = entries - 1;
918	mutex_init(&cq->resize_mutex);
919	spin_lock_init(&cq->lock);
920	cq->resize_buf = NULL;
921	cq->resize_umem = NULL;
922	cq->create_flags = attr->flags;
923	INIT_LIST_HEAD(&cq->list_send_qp);
924	INIT_LIST_HEAD(&cq->list_recv_qp);
925
926	if (context) {
927		err = create_cq_user(dev, udata, context, cq, entries,
928				     &cqb, &cqe_size, &index, &inlen);
929		if (err)
930			goto err_create;
931	} else {
932		cqe_size = cache_line_size() == 128 ? 128 : 64;
933		err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
934				       &index, &inlen);
935		if (err)
936			goto err_create;
937
938		INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
939	}
940
941	err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
942	if (err)
943		goto err_cqb;
944
945	cq->cqe_size = cqe_size;
946
947	cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
948	MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
949	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
950	MLX5_SET(cqc, cqc, uar_page, index);
951	MLX5_SET(cqc, cqc, c_eqn, eqn);
952	MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
953	if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
954		MLX5_SET(cqc, cqc, oi, 1);
955
956	err = mlx5_core_create_cq(dev->mdev, &cq->mcq,
957	    (struct mlx5_create_cq_mbox_in *)cqb, inlen);
958	if (err)
959		goto err_cqb;
960
961	mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
962	cq->mcq.irqn = irqn;
963	cq->mcq.comp  = mlx5_ib_cq_comp;
964	cq->mcq.event = mlx5_ib_cq_event;
965
966	INIT_LIST_HEAD(&cq->wc_list);
967
968	if (context)
969		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
970			err = -EFAULT;
971			goto err_cmd;
972		}
973
974
975	kvfree(cqb);
976	return &cq->ibcq;
977
978err_cmd:
979	mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
980
981err_cqb:
982	kvfree(cqb);
983	if (context)
984		destroy_cq_user(cq, context);
985	else
986		destroy_cq_kernel(dev, cq);
987
988err_create:
989	kfree(cq);
990
991	return ERR_PTR(err);
992}
993
994
995int mlx5_ib_destroy_cq(struct ib_cq *cq)
996{
997	struct mlx5_ib_dev *dev = to_mdev(cq->device);
998	struct mlx5_ib_cq *mcq = to_mcq(cq);
999	struct ib_ucontext *context = NULL;
1000
1001	if (cq->uobject)
1002		context = cq->uobject->context;
1003
1004	mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
1005	if (context)
1006		destroy_cq_user(mcq, context);
1007	else
1008		destroy_cq_kernel(dev, mcq);
1009
1010	kfree(mcq);
1011
1012	return 0;
1013}
1014
1015static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
1016{
1017	return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
1018}
1019
1020void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
1021{
1022	struct mlx5_cqe64 *cqe64, *dest64;
1023	void *cqe, *dest;
1024	u32 prod_index;
1025	int nfreed = 0;
1026	u8 owner_bit;
1027
1028	if (!cq)
1029		return;
1030
1031	/* First we need to find the current producer index, so we
1032	 * know where to start cleaning from.  It doesn't matter if HW
1033	 * adds new entries after this loop -- the QP we're worried
1034	 * about is already in RESET, so the new entries won't come
1035	 * from our QP and therefore don't need to be checked.
1036	 */
1037	for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
1038		if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
1039			break;
1040
1041	/* Now sweep backwards through the CQ, removing CQ entries
1042	 * that match our QP by copying older entries on top of them.
1043	 */
1044	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
1045		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
1046		cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
1047		if (is_equal_rsn(cqe64, rsn)) {
1048			if (srq && (ntohl(cqe64->srqn) & 0xffffff))
1049				mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
1050			++nfreed;
1051		} else if (nfreed) {
1052			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
1053			dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
1054			owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
1055			memcpy(dest, cqe, cq->mcq.cqe_sz);
1056			dest64->op_own = owner_bit |
1057				(dest64->op_own & ~MLX5_CQE_OWNER_MASK);
1058		}
1059	}
1060
1061	if (nfreed) {
1062		cq->mcq.cons_index += nfreed;
1063		/* Make sure update of buffer contents is done before
1064		 * updating consumer index.
1065		 */
1066		wmb();
1067		mlx5_cq_set_ci(&cq->mcq);
1068	}
1069}
1070
1071void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
1072{
1073	if (!cq)
1074		return;
1075
1076	spin_lock_irq(&cq->lock);
1077	__mlx5_ib_cq_clean(cq, qpn, srq);
1078	spin_unlock_irq(&cq->lock);
1079}
1080
1081int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
1082{
1083	struct mlx5_ib_dev *dev = to_mdev(cq->device);
1084	struct mlx5_ib_cq *mcq = to_mcq(cq);
1085	int err;
1086
1087	if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
1088		return -ENOSYS;
1089
1090	err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
1091					     cq_period, cq_count);
1092	if (err)
1093		mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
1094
1095	return err;
1096}
1097
1098static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1099		       int entries, struct ib_udata *udata, int *npas,
1100		       int *page_shift, int *cqe_size)
1101{
1102	struct mlx5_ib_resize_cq ucmd;
1103	struct ib_umem *umem;
1104	int err;
1105	int npages;
1106	struct ib_ucontext *context = cq->buf.umem->context;
1107
1108	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
1109	if (err)
1110		return err;
1111
1112	if (ucmd.reserved0 || ucmd.reserved1)
1113		return -EINVAL;
1114
1115	umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
1116			   IB_ACCESS_LOCAL_WRITE, 1);
1117	if (IS_ERR(umem)) {
1118		err = PTR_ERR(umem);
1119		return err;
1120	}
1121
1122	mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
1123			   npas, NULL);
1124
1125	cq->resize_umem = umem;
1126	*cqe_size = ucmd.cqe_size;
1127
1128	return 0;
1129}
1130
1131static void un_resize_user(struct mlx5_ib_cq *cq)
1132{
1133	ib_umem_release(cq->resize_umem);
1134}
1135
1136static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1137			 int entries, int cqe_size)
1138{
1139	int err;
1140
1141	cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
1142	if (!cq->resize_buf)
1143		return -ENOMEM;
1144
1145	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
1146	if (err)
1147		goto ex;
1148
1149	init_cq_buf(cq, cq->resize_buf);
1150
1151	return 0;
1152
1153ex:
1154	kfree(cq->resize_buf);
1155	return err;
1156}
1157
1158static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
1159{
1160	free_cq_buf(dev, cq->resize_buf);
1161	cq->resize_buf = NULL;
1162}
1163
1164static int copy_resize_cqes(struct mlx5_ib_cq *cq)
1165{
1166	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
1167	struct mlx5_cqe64 *scqe64;
1168	struct mlx5_cqe64 *dcqe64;
1169	void *start_cqe;
1170	void *scqe;
1171	void *dcqe;
1172	int ssize;
1173	int dsize;
1174	int i;
1175	u8 sw_own;
1176
1177	ssize = cq->buf.cqe_size;
1178	dsize = cq->resize_buf->cqe_size;
1179	if (ssize != dsize) {
1180		mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
1181		return -EINVAL;
1182	}
1183
1184	i = cq->mcq.cons_index;
1185	scqe = get_sw_cqe(cq, i);
1186	scqe64 = ssize == 64 ? scqe : scqe + 64;
1187	start_cqe = scqe;
1188	if (!scqe) {
1189		mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1190		return -EINVAL;
1191	}
1192
1193	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
1194		dcqe = get_cqe_from_buf(cq->resize_buf,
1195					(i + 1) & (cq->resize_buf->nent),
1196					dsize);
1197		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1198		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
1199		memcpy(dcqe, scqe, dsize);
1200		dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1201
1202		++i;
1203		scqe = get_sw_cqe(cq, i);
1204		scqe64 = ssize == 64 ? scqe : scqe + 64;
1205		if (!scqe) {
1206			mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
1207			return -EINVAL;
1208		}
1209
1210		if (scqe == start_cqe) {
1211			pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1212				cq->mcq.cqn);
1213			return -ENOMEM;
1214		}
1215	}
1216	++cq->mcq.cons_index;
1217	return 0;
1218}
1219
1220int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
1221{
1222	struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
1223	struct mlx5_ib_cq *cq = to_mcq(ibcq);
1224	void *cqc;
1225	u32 *in;
1226	int err;
1227	int npas;
1228	__be64 *pas;
1229	int page_shift;
1230	int inlen;
1231	int uninitialized_var(cqe_size);
1232	unsigned long flags;
1233
1234	if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
1235		pr_info("Firmware does not support resize CQ\n");
1236		return -ENOSYS;
1237	}
1238
1239	if (entries < 1 ||
1240	    entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
1241		mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
1242			     entries,
1243			     1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
1244		return -EINVAL;
1245	}
1246
1247	entries = roundup_pow_of_two(entries + 1);
1248	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
1249		return -EINVAL;
1250
1251	if (entries == ibcq->cqe + 1)
1252		return 0;
1253
1254	mutex_lock(&cq->resize_mutex);
1255	if (udata) {
1256		err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
1257				  &cqe_size);
1258	} else {
1259		cqe_size = 64;
1260		err = resize_kernel(dev, cq, entries, cqe_size);
1261		if (!err) {
1262			npas = cq->resize_buf->buf.npages;
1263			page_shift = cq->resize_buf->buf.page_shift;
1264		}
1265	}
1266
1267	if (err)
1268		goto ex;
1269
1270	inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
1271		MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
1272
1273	in = mlx5_vzalloc(inlen);
1274	if (!in) {
1275		err = -ENOMEM;
1276		goto ex_resize;
1277	}
1278
1279	pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
1280	if (udata)
1281		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
1282				     pas, 0);
1283	else
1284		mlx5_fill_page_array(&cq->resize_buf->buf, pas);
1285
1286	MLX5_SET(modify_cq_in, in,
1287		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
1288		 MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
1289		 MLX5_MODIFY_CQ_MASK_PG_OFFSET |
1290		 MLX5_MODIFY_CQ_MASK_PG_SIZE);
1291
1292	cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
1293
1294	MLX5_SET(cqc, cqc, log_page_size,
1295		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1296	MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
1297	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
1298
1299	MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
1300	MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
1301
1302	err = mlx5_core_modify_cq(dev->mdev, &cq->mcq,
1303	    (struct mlx5_modify_cq_mbox_in *)in, inlen);
1304	if (err)
1305		goto ex_alloc;
1306
1307	if (udata) {
1308		cq->ibcq.cqe = entries - 1;
1309		ib_umem_release(cq->buf.umem);
1310		cq->buf.umem = cq->resize_umem;
1311		cq->resize_umem = NULL;
1312	} else {
1313		struct mlx5_ib_cq_buf tbuf;
1314		int resized = 0;
1315
1316		spin_lock_irqsave(&cq->lock, flags);
1317		if (cq->resize_buf) {
1318			err = copy_resize_cqes(cq);
1319			if (!err) {
1320				tbuf = cq->buf;
1321				cq->buf = *cq->resize_buf;
1322				kfree(cq->resize_buf);
1323				cq->resize_buf = NULL;
1324				resized = 1;
1325			}
1326		}
1327		cq->ibcq.cqe = entries - 1;
1328		spin_unlock_irqrestore(&cq->lock, flags);
1329		if (resized)
1330			free_cq_buf(dev, &tbuf);
1331	}
1332	mutex_unlock(&cq->resize_mutex);
1333
1334	kvfree(in);
1335	return 0;
1336
1337ex_alloc:
1338	kvfree(in);
1339
1340ex_resize:
1341	if (udata)
1342		un_resize_user(cq);
1343	else
1344		un_resize_kernel(dev, cq);
1345ex:
1346	mutex_unlock(&cq->resize_mutex);
1347	return err;
1348}
1349
1350int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
1351{
1352	struct mlx5_ib_cq *cq;
1353
1354	if (!ibcq)
1355		return 128;
1356
1357	cq = to_mcq(ibcq);
1358	return cq->cqe_size;
1359}
1360
1361/* Called from atomic context */
1362int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
1363{
1364	struct mlx5_ib_wc *soft_wc;
1365	struct mlx5_ib_cq *cq = to_mcq(ibcq);
1366	unsigned long flags;
1367
1368	soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
1369	if (!soft_wc)
1370		return -ENOMEM;
1371
1372	soft_wc->wc = *wc;
1373	spin_lock_irqsave(&cq->lock, flags);
1374	list_add_tail(&soft_wc->list, &cq->wc_list);
1375	if (cq->notify_flags == IB_CQ_NEXT_COMP ||
1376	    wc->status != IB_WC_SUCCESS) {
1377		cq->notify_flags = 0;
1378		schedule_work(&cq->notify_work);
1379	}
1380	spin_unlock_irqrestore(&cq->lock, flags);
1381
1382	return 0;
1383}
1384