1/*
2 * Copyright (c) 2012 Mellanox Technologies, Inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <config.h>
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <pthread.h>
38#include <string.h>
39#include <errno.h>
40#include <unistd.h>
41
42#include <infiniband/opcode.h>
43
44#include "mlx5.h"
45#include "wqe.h"
46#include "doorbell.h"
47
48enum {
49	CQ_OK					=  0,
50	CQ_EMPTY				= -1,
51	CQ_POLL_ERR				= -2
52};
53
54enum {
55	MLX5_CQ_MODIFY_RESEIZE = 0,
56	MLX5_CQ_MODIFY_MODER = 1,
57	MLX5_CQ_MODIFY_MAPPING = 2,
58};
59
60int mlx5_stall_num_loop = 60;
61int mlx5_stall_cq_poll_min = 60;
62int mlx5_stall_cq_poll_max = 100000;
63int mlx5_stall_cq_inc_step = 100;
64int mlx5_stall_cq_dec_step = 10;
65
66static inline uint8_t get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
67{
68	return (cqe->l4_hdr_type_etc >> 2) & 0x3;
69}
70
71static void *get_buf_cqe(struct mlx5_buf *buf, int n, int cqe_sz)
72{
73	return buf->buf + n * cqe_sz;
74}
75
76static void *get_cqe(struct mlx5_cq *cq, int n)
77{
78	return cq->active_buf->buf + n * cq->cqe_sz;
79}
80
81static void *get_sw_cqe(struct mlx5_cq *cq, int n)
82{
83	void *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
84	struct mlx5_cqe64 *cqe64;
85
86	cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
87
88	if (likely(mlx5dv_get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
89	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibv_cq.cqe + 1)))) {
90		return cqe;
91	} else {
92		return NULL;
93	}
94}
95
96static void *next_cqe_sw(struct mlx5_cq *cq)
97{
98	return get_sw_cqe(cq, cq->cons_index);
99}
100
101static void update_cons_index(struct mlx5_cq *cq)
102{
103	cq->dbrec[MLX5_CQ_SET_CI] = htobe32(cq->cons_index & 0xffffff);
104}
105
106static inline void handle_good_req(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_wq *wq, int idx)
107{
108	switch (be32toh(cqe->sop_drop_qpn) >> 24) {
109	case MLX5_OPCODE_RDMA_WRITE_IMM:
110		wc->wc_flags |= IBV_WC_WITH_IMM;
111		SWITCH_FALLTHROUGH;
112	case MLX5_OPCODE_RDMA_WRITE:
113		wc->opcode    = IBV_WC_RDMA_WRITE;
114		break;
115	case MLX5_OPCODE_SEND_IMM:
116		wc->wc_flags |= IBV_WC_WITH_IMM;
117		SWITCH_FALLTHROUGH;
118	case MLX5_OPCODE_SEND:
119	case MLX5_OPCODE_SEND_INVAL:
120		wc->opcode    = IBV_WC_SEND;
121		break;
122	case MLX5_OPCODE_RDMA_READ:
123		wc->opcode    = IBV_WC_RDMA_READ;
124		wc->byte_len  = be32toh(cqe->byte_cnt);
125		break;
126	case MLX5_OPCODE_ATOMIC_CS:
127		wc->opcode    = IBV_WC_COMP_SWAP;
128		wc->byte_len  = 8;
129		break;
130	case MLX5_OPCODE_ATOMIC_FA:
131		wc->opcode    = IBV_WC_FETCH_ADD;
132		wc->byte_len  = 8;
133		break;
134	case MLX5_OPCODE_UMR:
135		wc->opcode = wq->wr_data[idx];
136		break;
137	case MLX5_OPCODE_TSO:
138		wc->opcode    = IBV_WC_TSO;
139		break;
140	}
141}
142
143static inline int handle_responder_lazy(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe,
144					struct mlx5_resource *cur_rsc, struct mlx5_srq *srq)
145{
146	uint16_t	wqe_ctr;
147	struct mlx5_wq *wq;
148	struct mlx5_qp *qp = rsc_to_mqp(cur_rsc);
149	int err = IBV_WC_SUCCESS;
150
151	if (srq) {
152		wqe_ctr = be16toh(cqe->wqe_counter);
153		cq->ibv_cq.wr_id = srq->wrid[wqe_ctr];
154		mlx5_free_srq_wqe(srq, wqe_ctr);
155		if (cqe->op_own & MLX5_INLINE_SCATTER_32)
156			err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe,
157						    be32toh(cqe->byte_cnt));
158		else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
159			err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1,
160						    be32toh(cqe->byte_cnt));
161	} else {
162		if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) {
163			wq = &qp->rq;
164			if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID)
165				cq->flags |= MLX5_CQ_FLAGS_RX_CSUM_VALID;
166		} else {
167			wq = &(rsc_to_mrwq(cur_rsc)->rq);
168		}
169
170		wqe_ctr = wq->tail & (wq->wqe_cnt - 1);
171		cq->ibv_cq.wr_id = wq->wrid[wqe_ctr];
172		++wq->tail;
173		if (cqe->op_own & MLX5_INLINE_SCATTER_32)
174			err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe,
175						    be32toh(cqe->byte_cnt));
176		else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
177			err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1,
178						    be32toh(cqe->byte_cnt));
179	}
180
181	return err;
182}
183
184static inline int handle_responder(struct ibv_wc *wc, struct mlx5_cqe64 *cqe,
185				   struct mlx5_resource *cur_rsc, struct mlx5_srq *srq)
186{
187	uint16_t	wqe_ctr;
188	struct mlx5_wq *wq;
189	struct mlx5_qp *qp = rsc_to_mqp(cur_rsc);
190	uint8_t g;
191	int err = 0;
192
193	wc->byte_len = be32toh(cqe->byte_cnt);
194	if (srq) {
195		wqe_ctr = be16toh(cqe->wqe_counter);
196		wc->wr_id = srq->wrid[wqe_ctr];
197		mlx5_free_srq_wqe(srq, wqe_ctr);
198		if (cqe->op_own & MLX5_INLINE_SCATTER_32)
199			err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe,
200						    wc->byte_len);
201		else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
202			err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1,
203						    wc->byte_len);
204	} else {
205		if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) {
206			wq = &qp->rq;
207			if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID)
208				wc->wc_flags |= (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) &
209						 !!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) &
210						(get_cqe_l3_hdr_type(cqe) ==
211						MLX5_CQE_L3_HDR_TYPE_IPV4)) <<
212						IBV_WC_IP_CSUM_OK_SHIFT;
213		} else {
214			wq = &(rsc_to_mrwq(cur_rsc)->rq);
215		}
216
217		wqe_ctr = wq->tail & (wq->wqe_cnt - 1);
218		wc->wr_id = wq->wrid[wqe_ctr];
219		++wq->tail;
220		if (cqe->op_own & MLX5_INLINE_SCATTER_32)
221			err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe,
222						    wc->byte_len);
223		else if (cqe->op_own & MLX5_INLINE_SCATTER_64)
224			err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1,
225						    wc->byte_len);
226	}
227	if (err)
228		return err;
229
230	switch (cqe->op_own >> 4) {
231	case MLX5_CQE_RESP_WR_IMM:
232		wc->opcode	= IBV_WC_RECV_RDMA_WITH_IMM;
233		wc->wc_flags	|= IBV_WC_WITH_IMM;
234		wc->imm_data = cqe->imm_inval_pkey;
235		break;
236	case MLX5_CQE_RESP_SEND:
237		wc->opcode   = IBV_WC_RECV;
238		break;
239	case MLX5_CQE_RESP_SEND_IMM:
240		wc->opcode	= IBV_WC_RECV;
241		wc->wc_flags	|= IBV_WC_WITH_IMM;
242		wc->imm_data = cqe->imm_inval_pkey;
243		break;
244	case MLX5_CQE_RESP_SEND_INV:
245		wc->opcode = IBV_WC_RECV;
246		wc->wc_flags |= IBV_WC_WITH_INV;
247		wc->imm_data = be32toh(cqe->imm_inval_pkey);
248		break;
249	}
250	wc->slid	   = be16toh(cqe->slid);
251	wc->sl		   = (be32toh(cqe->flags_rqpn) >> 24) & 0xf;
252	wc->src_qp	   = be32toh(cqe->flags_rqpn) & 0xffffff;
253	wc->dlid_path_bits = cqe->ml_path & 0x7f;
254	g = (be32toh(cqe->flags_rqpn) >> 28) & 3;
255	wc->wc_flags |= g ? IBV_WC_GRH : 0;
256	wc->pkey_index     = be32toh(cqe->imm_inval_pkey) & 0xffff;
257
258	return IBV_WC_SUCCESS;
259}
260
261static void dump_cqe(FILE *fp, void *buf)
262{
263	uint32_t *p = buf;
264	int i;
265
266	for (i = 0; i < 16; i += 4)
267		fprintf(fp, "%08x %08x %08x %08x\n", be32toh(p[i]), be32toh(p[i + 1]),
268			be32toh(p[i + 2]), be32toh(p[i + 3]));
269}
270
271static enum ibv_wc_status mlx5_handle_error_cqe(struct mlx5_err_cqe *cqe)
272{
273	switch (cqe->syndrome) {
274	case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
275		return IBV_WC_LOC_LEN_ERR;
276	case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
277		return IBV_WC_LOC_QP_OP_ERR;
278	case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
279		return IBV_WC_LOC_PROT_ERR;
280	case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
281		return IBV_WC_WR_FLUSH_ERR;
282	case MLX5_CQE_SYNDROME_MW_BIND_ERR:
283		return IBV_WC_MW_BIND_ERR;
284	case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
285		return IBV_WC_BAD_RESP_ERR;
286	case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
287		return IBV_WC_LOC_ACCESS_ERR;
288	case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
289		return IBV_WC_REM_INV_REQ_ERR;
290	case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
291		return IBV_WC_REM_ACCESS_ERR;
292	case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
293		return IBV_WC_REM_OP_ERR;
294	case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
295		return IBV_WC_RETRY_EXC_ERR;
296	case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
297		return IBV_WC_RNR_RETRY_EXC_ERR;
298	case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
299		return IBV_WC_REM_ABORT_ERR;
300	default:
301		return IBV_WC_GENERAL_ERR;
302	}
303}
304
305#if defined(__x86_64__) || defined (__i386__)
306static inline unsigned long get_cycles(void)
307{
308	uint32_t low, high;
309	uint64_t val;
310	asm volatile ("rdtsc" : "=a" (low), "=d" (high));
311	val = high;
312	val = (val << 32) | low;
313	return val;
314}
315
316static void mlx5_stall_poll_cq(void)
317{
318	int i;
319
320	for (i = 0; i < mlx5_stall_num_loop; i++)
321		(void)get_cycles();
322}
323static void mlx5_stall_cycles_poll_cq(uint64_t cycles)
324{
325	while (get_cycles()  <  cycles)
326		; /* Nothing */
327}
328static void mlx5_get_cycles(uint64_t *cycles)
329{
330	*cycles = get_cycles();
331}
332#else
333static void mlx5_stall_poll_cq(void)
334{
335}
336static void mlx5_stall_cycles_poll_cq(uint64_t cycles)
337{
338}
339static void mlx5_get_cycles(uint64_t *cycles)
340{
341}
342#endif
343
344static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx,
345					      struct mlx5_resource **cur_rsc,
346					      uint32_t rsn, int cqe_ver)
347					      ALWAYS_INLINE;
348static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx,
349					      struct mlx5_resource **cur_rsc,
350					      uint32_t rsn, int cqe_ver)
351{
352	if (!*cur_rsc || (rsn != (*cur_rsc)->rsn))
353		*cur_rsc = cqe_ver ? mlx5_find_uidx(mctx, rsn) :
354				      (struct mlx5_resource *)mlx5_find_qp(mctx, rsn);
355
356	return rsc_to_mqp(*cur_rsc);
357}
358
359static inline int get_resp_ctx_v1(struct mlx5_context *mctx,
360				  struct mlx5_resource **cur_rsc,
361				  struct mlx5_srq **cur_srq,
362				  uint32_t uidx, uint8_t *is_srq)
363				  ALWAYS_INLINE;
364static inline int get_resp_ctx_v1(struct mlx5_context *mctx,
365				  struct mlx5_resource **cur_rsc,
366				  struct mlx5_srq **cur_srq,
367				  uint32_t uidx, uint8_t *is_srq)
368{
369	struct mlx5_qp *mqp;
370
371	if (!*cur_rsc || (uidx != (*cur_rsc)->rsn)) {
372		*cur_rsc = mlx5_find_uidx(mctx, uidx);
373		if (unlikely(!*cur_rsc))
374			return CQ_POLL_ERR;
375	}
376
377	switch ((*cur_rsc)->type) {
378	case MLX5_RSC_TYPE_QP:
379		mqp = rsc_to_mqp(*cur_rsc);
380		if (mqp->verbs_qp.qp.srq) {
381			*cur_srq = to_msrq(mqp->verbs_qp.qp.srq);
382			*is_srq = 1;
383		}
384		break;
385	case MLX5_RSC_TYPE_XSRQ:
386		*cur_srq = rsc_to_msrq(*cur_rsc);
387		*is_srq = 1;
388		break;
389	case MLX5_RSC_TYPE_RWQ:
390		break;
391	default:
392		return CQ_POLL_ERR;
393	}
394
395	return CQ_OK;
396}
397
398static inline int get_qp_ctx(struct mlx5_context *mctx,
399			     struct mlx5_resource **cur_rsc,
400			     uint32_t qpn)
401			     ALWAYS_INLINE;
402static inline int get_qp_ctx(struct mlx5_context *mctx,
403			     struct mlx5_resource **cur_rsc,
404			     uint32_t qpn)
405{
406	if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) {
407		/*
408		 * We do not have to take the QP table lock here,
409		 * because CQs will be locked while QPs are removed
410		 * from the table.
411		 */
412		*cur_rsc = (struct mlx5_resource *)mlx5_find_qp(mctx, qpn);
413		if (unlikely(!*cur_rsc))
414			return CQ_POLL_ERR;
415	}
416
417	return CQ_OK;
418}
419
420static inline int get_srq_ctx(struct mlx5_context *mctx,
421			      struct mlx5_srq **cur_srq,
422			      uint32_t srqn_uidx)
423			      ALWAYS_INLINE;
424static inline int get_srq_ctx(struct mlx5_context *mctx,
425			      struct mlx5_srq **cur_srq,
426			      uint32_t srqn)
427{
428	if (!*cur_srq || (srqn != (*cur_srq)->srqn)) {
429		*cur_srq = mlx5_find_srq(mctx, srqn);
430		if (unlikely(!*cur_srq))
431			return CQ_POLL_ERR;
432	}
433
434	return CQ_OK;
435}
436
437static inline int get_cur_rsc(struct mlx5_context *mctx,
438			      int cqe_ver,
439			      uint32_t qpn,
440			      uint32_t srqn_uidx,
441			      struct mlx5_resource **cur_rsc,
442			      struct mlx5_srq **cur_srq,
443			      uint8_t *is_srq)
444{
445	int err;
446
447	if (cqe_ver) {
448		err = get_resp_ctx_v1(mctx, cur_rsc, cur_srq, srqn_uidx,
449				      is_srq);
450	} else {
451		if (srqn_uidx) {
452			*is_srq = 1;
453			err = get_srq_ctx(mctx, cur_srq, srqn_uidx);
454		} else {
455			err = get_qp_ctx(mctx, cur_rsc, qpn);
456		}
457	}
458
459	return err;
460
461}
462
463static inline int mlx5_get_next_cqe(struct mlx5_cq *cq,
464				    struct mlx5_cqe64 **pcqe64,
465				    void **pcqe)
466				    ALWAYS_INLINE;
467static inline int mlx5_get_next_cqe(struct mlx5_cq *cq,
468				    struct mlx5_cqe64 **pcqe64,
469				    void **pcqe)
470{
471	void *cqe;
472	struct mlx5_cqe64 *cqe64;
473
474	cqe = next_cqe_sw(cq);
475	if (!cqe)
476		return CQ_EMPTY;
477
478	cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
479
480	++cq->cons_index;
481
482	VALGRIND_MAKE_MEM_DEFINED(cqe64, sizeof *cqe64);
483
484	/*
485	 * Make sure we read CQ entry contents after we've checked the
486	 * ownership bit.
487	 */
488	udma_from_device_barrier();
489
490#ifdef MLX5_DEBUG
491	{
492		struct mlx5_context *mctx = to_mctx(cq->ibv_cq.context);
493
494		if (mlx5_debug_mask & MLX5_DBG_CQ_CQE) {
495			FILE *fp = mctx->dbg_fp;
496
497			mlx5_dbg(fp, MLX5_DBG_CQ_CQE, "dump cqe for cqn 0x%x:\n", cq->cqn);
498			dump_cqe(fp, cqe64);
499		}
500	}
501#endif
502	*pcqe64 = cqe64;
503	*pcqe = cqe;
504
505	return CQ_OK;
506}
507
508static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
509				 struct mlx5_cqe64 *cqe64,
510				 void *cqe,
511				 struct mlx5_resource **cur_rsc,
512				 struct mlx5_srq **cur_srq,
513				 struct ibv_wc *wc,
514				 int cqe_ver, int lazy)
515				 ALWAYS_INLINE;
516static inline int mlx5_parse_cqe(struct mlx5_cq *cq,
517				 struct mlx5_cqe64 *cqe64,
518				 void *cqe,
519				 struct mlx5_resource **cur_rsc,
520				 struct mlx5_srq **cur_srq,
521				 struct ibv_wc *wc,
522				 int cqe_ver, int lazy)
523{
524	struct mlx5_wq *wq;
525	uint16_t wqe_ctr;
526	uint32_t qpn;
527	uint32_t srqn_uidx;
528	int idx;
529	uint8_t opcode;
530	struct mlx5_err_cqe *ecqe;
531	int err = 0;
532	struct mlx5_qp *mqp;
533	struct mlx5_context *mctx;
534	uint8_t is_srq = 0;
535
536	mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context);
537	qpn = be32toh(cqe64->sop_drop_qpn) & 0xffffff;
538	if (lazy) {
539		cq->cqe64 = cqe64;
540		cq->flags &= (~MLX5_CQ_FLAGS_RX_CSUM_VALID);
541	} else {
542		wc->wc_flags = 0;
543		wc->qp_num = qpn;
544	}
545
546	opcode = mlx5dv_get_cqe_opcode(cqe64);
547	switch (opcode) {
548	case MLX5_CQE_REQ:
549	{
550		mqp = get_req_context(mctx, cur_rsc,
551				      (cqe_ver ? (be32toh(cqe64->srqn_uidx) & 0xffffff) : qpn),
552				      cqe_ver);
553		if (unlikely(!mqp))
554			return CQ_POLL_ERR;
555		wq = &mqp->sq;
556		wqe_ctr = be16toh(cqe64->wqe_counter);
557		idx = wqe_ctr & (wq->wqe_cnt - 1);
558		if (lazy) {
559			uint32_t wc_byte_len;
560
561			switch (be32toh(cqe64->sop_drop_qpn) >> 24) {
562			case MLX5_OPCODE_UMR:
563				cq->umr_opcode = wq->wr_data[idx];
564				break;
565
566			case MLX5_OPCODE_RDMA_READ:
567				wc_byte_len = be32toh(cqe64->byte_cnt);
568				goto scatter_out;
569			case MLX5_OPCODE_ATOMIC_CS:
570			case MLX5_OPCODE_ATOMIC_FA:
571				wc_byte_len = 8;
572
573			scatter_out:
574				if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
575					err = mlx5_copy_to_send_wqe(
576					    mqp, wqe_ctr, cqe, wc_byte_len);
577				else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
578					err = mlx5_copy_to_send_wqe(
579					    mqp, wqe_ctr, cqe - 1, wc_byte_len);
580				break;
581			}
582
583			cq->ibv_cq.wr_id = wq->wrid[idx];
584			cq->ibv_cq.status = err;
585		} else {
586			handle_good_req(wc, cqe64, wq, idx);
587
588			if (cqe64->op_own & MLX5_INLINE_SCATTER_32)
589				err = mlx5_copy_to_send_wqe(mqp, wqe_ctr, cqe,
590							    wc->byte_len);
591			else if (cqe64->op_own & MLX5_INLINE_SCATTER_64)
592				err = mlx5_copy_to_send_wqe(
593				    mqp, wqe_ctr, cqe - 1, wc->byte_len);
594
595			wc->wr_id = wq->wrid[idx];
596			wc->status = err;
597		}
598
599		wq->tail = wq->wqe_head[idx] + 1;
600		break;
601	}
602	case MLX5_CQE_RESP_WR_IMM:
603	case MLX5_CQE_RESP_SEND:
604	case MLX5_CQE_RESP_SEND_IMM:
605	case MLX5_CQE_RESP_SEND_INV:
606		srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff;
607		err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, cur_rsc,
608				  cur_srq, &is_srq);
609		if (unlikely(err))
610			return CQ_POLL_ERR;
611
612		if (lazy)
613			cq->ibv_cq.status = handle_responder_lazy(cq, cqe64,
614							      *cur_rsc,
615							      is_srq ? *cur_srq : NULL);
616		else
617			wc->status = handle_responder(wc, cqe64, *cur_rsc,
618					      is_srq ? *cur_srq : NULL);
619		break;
620	case MLX5_CQE_RESIZE_CQ:
621		break;
622	case MLX5_CQE_REQ_ERR:
623	case MLX5_CQE_RESP_ERR:
624		srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff;
625		ecqe = (struct mlx5_err_cqe *)cqe64;
626		{
627			enum ibv_wc_status *pstatus = lazy ? &cq->ibv_cq.status : &wc->status;
628
629			*pstatus = mlx5_handle_error_cqe(ecqe);
630		}
631
632		if (!lazy)
633			wc->vendor_err = ecqe->vendor_err_synd;
634
635		if (unlikely(ecqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR &&
636			     ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR)) {
637			FILE *fp = mctx->dbg_fp;
638			fprintf(fp, PFX "%s: got completion with error:\n",
639				mctx->hostname);
640			dump_cqe(fp, ecqe);
641			if (mlx5_freeze_on_error_cqe) {
642				fprintf(fp, PFX "freezing at poll cq...");
643				while (1)
644					sleep(10);
645			}
646		}
647
648		if (opcode == MLX5_CQE_REQ_ERR) {
649			mqp = get_req_context(mctx, cur_rsc,
650					      (cqe_ver ? srqn_uidx : qpn), cqe_ver);
651			if (unlikely(!mqp))
652				return CQ_POLL_ERR;
653			wq = &mqp->sq;
654			wqe_ctr = be16toh(cqe64->wqe_counter);
655			idx = wqe_ctr & (wq->wqe_cnt - 1);
656			if (lazy)
657				cq->ibv_cq.wr_id = wq->wrid[idx];
658			else
659				wc->wr_id = wq->wrid[idx];
660			wq->tail = wq->wqe_head[idx] + 1;
661		} else {
662			err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx,
663					  cur_rsc, cur_srq, &is_srq);
664			if (unlikely(err))
665				return CQ_POLL_ERR;
666
667			if (is_srq) {
668				wqe_ctr = be16toh(cqe64->wqe_counter);
669				if (lazy)
670					cq->ibv_cq.wr_id = (*cur_srq)->wrid[wqe_ctr];
671				else
672					wc->wr_id = (*cur_srq)->wrid[wqe_ctr];
673				mlx5_free_srq_wqe(*cur_srq, wqe_ctr);
674			} else {
675				switch ((*cur_rsc)->type) {
676				case MLX5_RSC_TYPE_RWQ:
677					wq = &(rsc_to_mrwq(*cur_rsc)->rq);
678					break;
679				default:
680					wq = &(rsc_to_mqp(*cur_rsc)->rq);
681					break;
682				}
683
684				if (lazy)
685					cq->ibv_cq.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
686				else
687					wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
688				++wq->tail;
689			}
690		}
691		break;
692	}
693
694	return CQ_OK;
695}
696
697static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq,
698				      struct mlx5_cqe64 *cqe64,
699				      void *cqe, int cqe_ver)
700				      ALWAYS_INLINE;
701static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq,
702				      struct mlx5_cqe64 *cqe64,
703				      void *cqe, int cqe_ver)
704{
705	return mlx5_parse_cqe(cq, cqe64, cqe, &cq->cur_rsc, &cq->cur_srq, NULL, cqe_ver, 1);
706}
707
708static inline int mlx5_poll_one(struct mlx5_cq *cq,
709				struct mlx5_resource **cur_rsc,
710				struct mlx5_srq **cur_srq,
711				struct ibv_wc *wc, int cqe_ver)
712				ALWAYS_INLINE;
713static inline int mlx5_poll_one(struct mlx5_cq *cq,
714				struct mlx5_resource **cur_rsc,
715				struct mlx5_srq **cur_srq,
716				struct ibv_wc *wc, int cqe_ver)
717{
718	struct mlx5_cqe64 *cqe64;
719	void *cqe;
720	int err;
721
722	err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
723	if (err == CQ_EMPTY)
724		return err;
725
726	return mlx5_parse_cqe(cq, cqe64, cqe, cur_rsc, cur_srq, wc, cqe_ver, 0);
727}
728
729static inline int poll_cq(struct ibv_cq *ibcq, int ne,
730		      struct ibv_wc *wc, int cqe_ver)
731		      ALWAYS_INLINE;
732static inline int poll_cq(struct ibv_cq *ibcq, int ne,
733		      struct ibv_wc *wc, int cqe_ver)
734{
735	struct mlx5_cq *cq = to_mcq(ibcq);
736	struct mlx5_resource *rsc = NULL;
737	struct mlx5_srq *srq = NULL;
738	int npolled;
739	int err = CQ_OK;
740
741	if (cq->stall_enable) {
742		if (cq->stall_adaptive_enable) {
743			if (cq->stall_last_count)
744				mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
745		} else if (cq->stall_next_poll) {
746			cq->stall_next_poll = 0;
747			mlx5_stall_poll_cq();
748		}
749	}
750
751	mlx5_spin_lock(&cq->lock);
752
753	for (npolled = 0; npolled < ne; ++npolled) {
754		err = mlx5_poll_one(cq, &rsc, &srq, wc + npolled, cqe_ver);
755		if (err != CQ_OK)
756			break;
757	}
758
759	update_cons_index(cq);
760
761	mlx5_spin_unlock(&cq->lock);
762
763	if (cq->stall_enable) {
764		if (cq->stall_adaptive_enable) {
765			if (npolled == 0) {
766				cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step,
767						       mlx5_stall_cq_poll_min);
768				mlx5_get_cycles(&cq->stall_last_count);
769			} else if (npolled < ne) {
770				cq->stall_cycles = min(cq->stall_cycles+mlx5_stall_cq_inc_step,
771						       mlx5_stall_cq_poll_max);
772				mlx5_get_cycles(&cq->stall_last_count);
773			} else {
774				cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step,
775						       mlx5_stall_cq_poll_min);
776				cq->stall_last_count = 0;
777			}
778		} else if (err == CQ_EMPTY) {
779			cq->stall_next_poll = 1;
780		}
781	}
782
783	return err == CQ_POLL_ERR ? err : npolled;
784}
785
786enum  polling_mode {
787	POLLING_MODE_NO_STALL,
788	POLLING_MODE_STALL,
789	POLLING_MODE_STALL_ADAPTIVE
790};
791
792static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq,
793				  int lock, enum polling_mode stall)
794				  ALWAYS_INLINE;
795static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq,
796				  int lock, enum polling_mode stall)
797{
798	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
799
800	update_cons_index(cq);
801
802	if (lock)
803		mlx5_spin_unlock(&cq->lock);
804
805	if (stall) {
806		if (stall == POLLING_MODE_STALL_ADAPTIVE) {
807			if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) {
808				cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
809						       mlx5_stall_cq_poll_min);
810				mlx5_get_cycles(&cq->stall_last_count);
811			} else if (cq->flags & MLX5_CQ_FLAGS_EMPTY_DURING_POLL) {
812				cq->stall_cycles = min(cq->stall_cycles + mlx5_stall_cq_inc_step,
813						       mlx5_stall_cq_poll_max);
814				mlx5_get_cycles(&cq->stall_last_count);
815			} else {
816				cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
817						       mlx5_stall_cq_poll_min);
818				cq->stall_last_count = 0;
819			}
820		} else if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) {
821			cq->stall_next_poll = 1;
822		}
823
824		cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES | MLX5_CQ_FLAGS_EMPTY_DURING_POLL);
825	}
826}
827
828static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
829				  int lock, enum polling_mode stall, int cqe_version)
830				  ALWAYS_INLINE;
831static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr,
832				  int lock, enum polling_mode stall, int cqe_version)
833{
834	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
835	struct mlx5_cqe64 *cqe64;
836	void *cqe;
837	int err;
838
839	if (unlikely(attr->comp_mask))
840		return EINVAL;
841
842	if (stall) {
843		if (stall == POLLING_MODE_STALL_ADAPTIVE) {
844			if (cq->stall_last_count)
845				mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles);
846		} else if (cq->stall_next_poll) {
847			cq->stall_next_poll = 0;
848			mlx5_stall_poll_cq();
849		}
850	}
851
852	if (lock)
853		mlx5_spin_lock(&cq->lock);
854
855	cq->cur_rsc = NULL;
856	cq->cur_srq = NULL;
857
858	err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
859	if (err == CQ_EMPTY) {
860		if (lock)
861			mlx5_spin_unlock(&cq->lock);
862
863		if (stall) {
864			if (stall == POLLING_MODE_STALL_ADAPTIVE) {
865				cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
866						mlx5_stall_cq_poll_min);
867				mlx5_get_cycles(&cq->stall_last_count);
868			} else {
869				cq->stall_next_poll = 1;
870			}
871		}
872
873		return ENOENT;
874	}
875
876	if (stall)
877		cq->flags |= MLX5_CQ_FLAGS_FOUND_CQES;
878
879	err = mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
880	if (lock && err)
881		mlx5_spin_unlock(&cq->lock);
882
883	if (stall && err) {
884		if (stall == POLLING_MODE_STALL_ADAPTIVE) {
885			cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step,
886						mlx5_stall_cq_poll_min);
887			cq->stall_last_count = 0;
888		}
889
890		cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES);
891	}
892
893	return err;
894}
895
896static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq,
897				 enum polling_mode stall, int cqe_version)
898				 ALWAYS_INLINE;
899static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq,
900				 enum polling_mode stall,
901				 int cqe_version)
902{
903	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
904	struct mlx5_cqe64 *cqe64;
905	void *cqe;
906	int err;
907
908	err = mlx5_get_next_cqe(cq, &cqe64, &cqe);
909	if (err == CQ_EMPTY) {
910		if (stall == POLLING_MODE_STALL_ADAPTIVE)
911			cq->flags |= MLX5_CQ_FLAGS_EMPTY_DURING_POLL;
912
913		return ENOENT;
914	}
915
916	return mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version);
917}
918
919static inline int mlx5_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq)
920{
921	return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0);
922}
923
924static inline int mlx5_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq)
925{
926	return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1);
927}
928
929static inline int mlx5_next_poll_v0(struct ibv_cq_ex *ibcq)
930{
931	return mlx5_next_poll(ibcq, 0, 0);
932}
933
934static inline int mlx5_next_poll_v1(struct ibv_cq_ex *ibcq)
935{
936	return mlx5_next_poll(ibcq, 0, 1);
937}
938
939static inline int mlx5_start_poll_v0(struct ibv_cq_ex *ibcq,
940				     struct ibv_poll_cq_attr *attr)
941{
942	return mlx5_start_poll(ibcq, attr, 0, 0, 0);
943}
944
945static inline int mlx5_start_poll_v1(struct ibv_cq_ex *ibcq,
946				     struct ibv_poll_cq_attr *attr)
947{
948	return mlx5_start_poll(ibcq, attr, 0, 0, 1);
949}
950
951static inline int mlx5_start_poll_v0_lock(struct ibv_cq_ex *ibcq,
952					  struct ibv_poll_cq_attr *attr)
953{
954	return mlx5_start_poll(ibcq, attr, 1, 0, 0);
955}
956
957static inline int mlx5_start_poll_v1_lock(struct ibv_cq_ex *ibcq,
958					  struct ibv_poll_cq_attr *attr)
959{
960	return mlx5_start_poll(ibcq, attr, 1, 0, 1);
961}
962
963static inline int mlx5_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq,
964							 struct ibv_poll_cq_attr *attr)
965{
966	return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0);
967}
968
969static inline int mlx5_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq,
970						struct ibv_poll_cq_attr *attr)
971{
972	return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0);
973}
974
975static inline int mlx5_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq,
976							 struct ibv_poll_cq_attr *attr)
977{
978	return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1);
979}
980
981static inline int mlx5_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq,
982						struct ibv_poll_cq_attr *attr)
983{
984	return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1);
985}
986
987static inline int mlx5_start_poll_stall_v0(struct ibv_cq_ex *ibcq,
988					   struct ibv_poll_cq_attr *attr)
989{
990	return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0);
991}
992
993static inline int mlx5_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq,
994						    struct ibv_poll_cq_attr *attr)
995{
996	return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0);
997}
998
999static inline int mlx5_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq,
1000						    struct ibv_poll_cq_attr *attr)
1001{
1002	return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1);
1003}
1004
1005static inline int mlx5_start_poll_stall_v1(struct ibv_cq_ex *ibcq,
1006					   struct ibv_poll_cq_attr *attr)
1007{
1008	return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1);
1009}
1010
1011static inline void mlx5_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq)
1012{
1013	_mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE);
1014}
1015
1016static inline void mlx5_end_poll_stall_lock(struct ibv_cq_ex *ibcq)
1017{
1018	_mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL);
1019}
1020
1021static inline void mlx5_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq)
1022{
1023	_mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE);
1024}
1025
1026static inline void mlx5_end_poll_stall(struct ibv_cq_ex *ibcq)
1027{
1028	_mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL);
1029}
1030
1031static inline void mlx5_end_poll(struct ibv_cq_ex *ibcq)
1032{
1033	_mlx5_end_poll(ibcq, 0, 0);
1034}
1035
1036static inline void mlx5_end_poll_lock(struct ibv_cq_ex *ibcq)
1037{
1038	_mlx5_end_poll(ibcq, 1, 0);
1039}
1040
1041int mlx5_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
1042{
1043	return poll_cq(ibcq, ne, wc, 0);
1044}
1045
1046int mlx5_poll_cq_v1(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
1047{
1048	return poll_cq(ibcq, ne, wc, 1);
1049}
1050
1051static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq)
1052{
1053	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1054
1055	switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1056	case MLX5_CQE_RESP_WR_IMM:
1057		return IBV_WC_RECV_RDMA_WITH_IMM;
1058	case MLX5_CQE_RESP_SEND:
1059	case MLX5_CQE_RESP_SEND_IMM:
1060	case MLX5_CQE_RESP_SEND_INV:
1061		return IBV_WC_RECV;
1062	case MLX5_CQE_REQ:
1063		switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) {
1064		case MLX5_OPCODE_RDMA_WRITE_IMM:
1065		case MLX5_OPCODE_RDMA_WRITE:
1066			return IBV_WC_RDMA_WRITE;
1067		case MLX5_OPCODE_SEND_IMM:
1068		case MLX5_OPCODE_SEND:
1069		case MLX5_OPCODE_SEND_INVAL:
1070			return IBV_WC_SEND;
1071		case MLX5_OPCODE_RDMA_READ:
1072			return IBV_WC_RDMA_READ;
1073		case MLX5_OPCODE_ATOMIC_CS:
1074			return IBV_WC_COMP_SWAP;
1075		case MLX5_OPCODE_ATOMIC_FA:
1076			return IBV_WC_FETCH_ADD;
1077		case MLX5_OPCODE_UMR:
1078			return cq->umr_opcode;
1079		case MLX5_OPCODE_TSO:
1080			return IBV_WC_TSO;
1081		}
1082	}
1083
1084#ifdef MLX5_DEBUG
1085{
1086	struct mlx5_context *ctx = to_mctx(ibcq->context);
1087
1088	mlx5_dbg(ctx->dbg_fp, MLX5_DBG_CQ_CQE, "un-expected opcode in cqe\n");
1089}
1090#endif
1091	return 0;
1092}
1093
1094static inline uint32_t mlx5_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq)
1095{
1096	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1097
1098	return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff;
1099}
1100
1101static inline int mlx5_cq_read_wc_flags(struct ibv_cq_ex *ibcq)
1102{
1103	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1104	int wc_flags = 0;
1105
1106	if (cq->flags & MLX5_CQ_FLAGS_RX_CSUM_VALID)
1107		wc_flags = (!!(cq->cqe64->hds_ip_ext & MLX5_CQE_L4_OK) &
1108				 !!(cq->cqe64->hds_ip_ext & MLX5_CQE_L3_OK) &
1109				 (get_cqe_l3_hdr_type(cq->cqe64) ==
1110				  MLX5_CQE_L3_HDR_TYPE_IPV4)) <<
1111				IBV_WC_IP_CSUM_OK_SHIFT;
1112
1113	switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1114	case MLX5_CQE_RESP_WR_IMM:
1115	case MLX5_CQE_RESP_SEND_IMM:
1116		wc_flags	|= IBV_WC_WITH_IMM;
1117		break;
1118	case MLX5_CQE_RESP_SEND_INV:
1119		wc_flags |= IBV_WC_WITH_INV;
1120		break;
1121	}
1122
1123	wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0;
1124	return wc_flags;
1125}
1126
1127static inline uint32_t mlx5_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq)
1128{
1129	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1130
1131	return be32toh(cq->cqe64->byte_cnt);
1132}
1133
1134static inline uint32_t mlx5_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq)
1135{
1136	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1137	struct mlx5_err_cqe *ecqe = (struct mlx5_err_cqe *)cq->cqe64;
1138
1139	return ecqe->vendor_err_synd;
1140}
1141
1142static inline uint32_t mlx5_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq)
1143{
1144	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1145
1146	switch (mlx5dv_get_cqe_opcode(cq->cqe64)) {
1147	case MLX5_CQE_RESP_SEND_INV:
1148		return be32toh(cq->cqe64->imm_inval_pkey);
1149	default:
1150		return cq->cqe64->imm_inval_pkey;
1151	}
1152}
1153
1154static inline uint32_t mlx5_cq_read_wc_slid(struct ibv_cq_ex *ibcq)
1155{
1156	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1157
1158	return (uint32_t)be16toh(cq->cqe64->slid);
1159}
1160
1161static inline uint8_t mlx5_cq_read_wc_sl(struct ibv_cq_ex *ibcq)
1162{
1163	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1164
1165	return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf;
1166}
1167
1168static inline uint32_t mlx5_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq)
1169{
1170	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1171
1172	return be32toh(cq->cqe64->flags_rqpn) & 0xffffff;
1173}
1174
1175static inline uint8_t mlx5_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq)
1176{
1177	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1178
1179	return cq->cqe64->ml_path & 0x7f;
1180}
1181
1182static inline uint64_t mlx5_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq)
1183{
1184	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1185
1186	return be64toh(cq->cqe64->timestamp);
1187}
1188
1189static inline uint16_t mlx5_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq)
1190{
1191	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1192
1193	return be16toh(cq->cqe64->vlan_info);
1194}
1195
1196static inline uint32_t mlx5_cq_read_flow_tag(struct ibv_cq_ex *ibcq)
1197{
1198	struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq));
1199
1200	return be32toh(cq->cqe64->sop_drop_qpn) & MLX5_FLOW_TAG_MASK;
1201}
1202
1203#define BIT(i) (1UL << (i))
1204
1205#define SINGLE_THREADED BIT(0)
1206#define STALL BIT(1)
1207#define V1 BIT(2)
1208#define ADAPTIVE BIT(3)
1209
1210#define mlx5_start_poll_name(cqe_ver, lock, stall, adaptive) \
1211	mlx5_start_poll##adaptive##stall##cqe_ver##lock
1212#define mlx5_next_poll_name(cqe_ver, adaptive) \
1213	mlx5_next_poll##adaptive##cqe_ver
1214#define mlx5_end_poll_name(lock, stall, adaptive) \
1215	mlx5_end_poll##adaptive##stall##lock
1216
1217#define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive) { \
1218		.start_poll = &mlx5_start_poll_name(cqe_ver, lock, stall, adaptive), \
1219		.next_poll = &mlx5_next_poll_name(cqe_ver, adaptive), \
1220		.end_poll = &mlx5_end_poll_name(lock, stall, adaptive), \
1221	}
1222
1223static const struct op
1224{
1225	int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr);
1226	int (*next_poll)(struct ibv_cq_ex *ibcq);
1227	void (*end_poll)(struct ibv_cq_ex *ibcq);
1228} ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + 1] = {
1229	[V1] =  POLL_FN_ENTRY(_v1, _lock, , ),
1230	[0] =  POLL_FN_ENTRY(_v0, _lock, , ),
1231	[V1 | SINGLE_THREADED] =  POLL_FN_ENTRY(_v1, , , ),
1232	[SINGLE_THREADED] =  POLL_FN_ENTRY(_v0, , , ),
1233	[V1 | STALL] =  POLL_FN_ENTRY(_v1, _lock, _stall, ),
1234	[STALL] =  POLL_FN_ENTRY(_v0, _lock, _stall, ),
1235	[V1 | SINGLE_THREADED | STALL] =  POLL_FN_ENTRY(_v1, , _stall, ),
1236	[SINGLE_THREADED | STALL] =  POLL_FN_ENTRY(_v0, , _stall, ),
1237	[V1 | STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive),
1238	[STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive),
1239	[V1 | SINGLE_THREADED | STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v1, , _stall, _adaptive),
1240	[SINGLE_THREADED | STALL | ADAPTIVE] =  POLL_FN_ENTRY(_v0, , _stall, _adaptive),
1241};
1242
1243void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr)
1244{
1245	struct mlx5_context *mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context);
1246	const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) |
1247					 (mctx->cqe_version ? V1 : 0) |
1248					 (cq->flags & MLX5_CQ_FLAGS_SINGLE_THREADED ?
1249						      SINGLE_THREADED : 0) |
1250					 (cq->stall_enable ? STALL : 0)];
1251
1252	cq->ibv_cq.start_poll = poll_ops->start_poll;
1253	cq->ibv_cq.next_poll = poll_ops->next_poll;
1254	cq->ibv_cq.end_poll = poll_ops->end_poll;
1255
1256	cq->ibv_cq.read_opcode = mlx5_cq_read_wc_opcode;
1257	cq->ibv_cq.read_vendor_err = mlx5_cq_read_wc_vendor_err;
1258	cq->ibv_cq.read_wc_flags = mlx5_cq_read_wc_flags;
1259	if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN)
1260		cq->ibv_cq.read_byte_len = mlx5_cq_read_wc_byte_len;
1261	if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM)
1262		cq->ibv_cq.read_imm_data = mlx5_cq_read_wc_imm_data;
1263	if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM)
1264		cq->ibv_cq.read_qp_num = mlx5_cq_read_wc_qp_num;
1265	if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP)
1266		cq->ibv_cq.read_src_qp = mlx5_cq_read_wc_src_qp;
1267	if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID)
1268		cq->ibv_cq.read_slid = mlx5_cq_read_wc_slid;
1269	if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL)
1270		cq->ibv_cq.read_sl = mlx5_cq_read_wc_sl;
1271	if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS)
1272		cq->ibv_cq.read_dlid_path_bits = mlx5_cq_read_wc_dlid_path_bits;
1273	if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
1274		cq->ibv_cq.read_completion_ts = mlx5_cq_read_wc_completion_ts;
1275	if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN)
1276		cq->ibv_cq.read_cvlan = mlx5_cq_read_wc_cvlan;
1277	if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG)
1278		cq->ibv_cq.read_flow_tag = mlx5_cq_read_flow_tag;
1279}
1280
1281int mlx5_arm_cq(struct ibv_cq *ibvcq, int solicited)
1282{
1283	struct mlx5_cq *cq = to_mcq(ibvcq);
1284	struct mlx5_context *ctx = to_mctx(ibvcq->context);
1285	uint32_t doorbell[2];
1286	uint32_t sn;
1287	uint32_t ci;
1288	uint32_t cmd;
1289
1290	sn  = cq->arm_sn & 3;
1291	ci  = cq->cons_index & 0xffffff;
1292	cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT;
1293
1294	cq->dbrec[MLX5_CQ_ARM_DB] = htobe32(sn << 28 | cmd | ci);
1295
1296	/*
1297	 * Make sure that the doorbell record in host memory is
1298	 * written before ringing the doorbell via PCI WC MMIO.
1299	 */
1300	mmio_wc_start();
1301
1302	doorbell[0] = htobe32(sn << 28 | cmd | ci);
1303	doorbell[1] = htobe32(cq->cqn);
1304
1305	mlx5_write64(doorbell, ctx->uar[0] + MLX5_CQ_DOORBELL, &ctx->lock32);
1306
1307	mmio_flush_writes();
1308
1309	return 0;
1310}
1311
1312void mlx5_cq_event(struct ibv_cq *cq)
1313{
1314	to_mcq(cq)->arm_sn++;
1315}
1316
1317static int is_equal_rsn(struct mlx5_cqe64 *cqe64, uint32_t rsn)
1318{
1319	return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff);
1320}
1321
1322static inline int is_equal_uidx(struct mlx5_cqe64 *cqe64, uint32_t uidx)
1323{
1324	return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff);
1325}
1326
1327static inline int is_responder(uint8_t opcode)
1328{
1329	switch (opcode) {
1330	case MLX5_CQE_RESP_WR_IMM:
1331	case MLX5_CQE_RESP_SEND:
1332	case MLX5_CQE_RESP_SEND_IMM:
1333	case MLX5_CQE_RESP_SEND_INV:
1334	case MLX5_CQE_RESP_ERR:
1335		return 1;
1336	}
1337
1338	return 0;
1339}
1340
1341static inline int free_res_cqe(struct mlx5_cqe64 *cqe64, uint32_t rsn,
1342			       struct mlx5_srq *srq, int cqe_version)
1343{
1344	if (cqe_version) {
1345		if (is_equal_uidx(cqe64, rsn)) {
1346			if (srq && is_responder(mlx5dv_get_cqe_opcode(cqe64)))
1347				mlx5_free_srq_wqe(srq,
1348						  be16toh(cqe64->wqe_counter));
1349			return 1;
1350		}
1351	} else {
1352		if (is_equal_rsn(cqe64, rsn)) {
1353			if (srq && (be32toh(cqe64->srqn_uidx) & 0xffffff))
1354				mlx5_free_srq_wqe(srq,
1355						  be16toh(cqe64->wqe_counter));
1356			return 1;
1357		}
1358	}
1359
1360	return 0;
1361}
1362
1363void __mlx5_cq_clean(struct mlx5_cq *cq, uint32_t rsn, struct mlx5_srq *srq)
1364{
1365	uint32_t prod_index;
1366	int nfreed = 0;
1367	struct mlx5_cqe64 *cqe64, *dest64;
1368	void *cqe, *dest;
1369	uint8_t owner_bit;
1370	int cqe_version;
1371
1372	if (!cq || cq->flags & MLX5_CQ_FLAGS_DV_OWNED)
1373		return;
1374
1375	/*
1376	 * First we need to find the current producer index, so we
1377	 * know where to start cleaning from.  It doesn't matter if HW
1378	 * adds new entries after this loop -- the QP we're worried
1379	 * about is already in RESET, so the new entries won't come
1380	 * from our QP and therefore don't need to be checked.
1381	 */
1382	for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
1383		if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
1384			break;
1385
1386	/*
1387	 * Now sweep backwards through the CQ, removing CQ entries
1388	 * that match our QP by copying older entries on top of them.
1389	 */
1390	cqe_version = (to_mctx(cq->ibv_cq.context))->cqe_version;
1391	while ((int) --prod_index - (int) cq->cons_index >= 0) {
1392		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
1393		cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
1394		if (free_res_cqe(cqe64, rsn, srq, cqe_version)) {
1395			++nfreed;
1396		} else if (nfreed) {
1397			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe);
1398			dest64 = (cq->cqe_sz == 64) ? dest : dest + 64;
1399			owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
1400			memcpy(dest, cqe, cq->cqe_sz);
1401			dest64->op_own = owner_bit |
1402				(dest64->op_own & ~MLX5_CQE_OWNER_MASK);
1403		}
1404	}
1405
1406	if (nfreed) {
1407		cq->cons_index += nfreed;
1408		/*
1409		 * Make sure update of buffer contents is done before
1410		 * updating consumer index.
1411		 */
1412		udma_to_device_barrier();
1413		update_cons_index(cq);
1414	}
1415}
1416
1417void mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq)
1418{
1419	mlx5_spin_lock(&cq->lock);
1420	__mlx5_cq_clean(cq, qpn, srq);
1421	mlx5_spin_unlock(&cq->lock);
1422}
1423
1424static uint8_t sw_ownership_bit(int n, int nent)
1425{
1426	return (n & nent) ? 1 : 0;
1427}
1428
1429static int is_hw(uint8_t own, int n, int mask)
1430{
1431	return (own & MLX5_CQE_OWNER_MASK) ^ !!(n & (mask + 1));
1432}
1433
1434void mlx5_cq_resize_copy_cqes(struct mlx5_cq *cq)
1435{
1436	struct mlx5_cqe64 *scqe64;
1437	struct mlx5_cqe64 *dcqe64;
1438	void *start_cqe;
1439	void *scqe;
1440	void *dcqe;
1441	int ssize;
1442	int dsize;
1443	int i;
1444	uint8_t sw_own;
1445
1446	ssize = cq->cqe_sz;
1447	dsize = cq->resize_cqe_sz;
1448
1449	i = cq->cons_index;
1450	scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize);
1451	scqe64 = ssize == 64 ? scqe : scqe + 64;
1452	start_cqe = scqe;
1453	if (is_hw(scqe64->op_own, i, cq->active_cqes)) {
1454		fprintf(stderr, "expected cqe in sw ownership\n");
1455		return;
1456	}
1457
1458	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
1459		dcqe = get_buf_cqe(cq->resize_buf, (i + 1) & (cq->resize_cqes - 1), dsize);
1460		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
1461		sw_own = sw_ownership_bit(i + 1, cq->resize_cqes);
1462		memcpy(dcqe, scqe, ssize);
1463		dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
1464
1465		++i;
1466		scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize);
1467		scqe64 = ssize == 64 ? scqe : scqe + 64;
1468		if (is_hw(scqe64->op_own, i, cq->active_cqes)) {
1469			fprintf(stderr, "expected cqe in sw ownership\n");
1470			return;
1471		}
1472
1473		if (scqe == start_cqe) {
1474			fprintf(stderr, "resize CQ failed to get resize CQE\n");
1475			return;
1476		}
1477	}
1478	++cq->cons_index;
1479}
1480
1481int mlx5_alloc_cq_buf(struct mlx5_context *mctx, struct mlx5_cq *cq,
1482		      struct mlx5_buf *buf, int nent, int cqe_sz)
1483{
1484	struct mlx5_cqe64 *cqe;
1485	int i;
1486	struct mlx5_device *dev = to_mdev(mctx->ibv_ctx.device);
1487	int ret;
1488	enum mlx5_alloc_type type;
1489	enum mlx5_alloc_type default_type = MLX5_ALLOC_TYPE_ANON;
1490
1491	if (mlx5_use_huge("HUGE_CQ"))
1492		default_type = MLX5_ALLOC_TYPE_HUGE;
1493
1494	mlx5_get_alloc_type(MLX5_CQ_PREFIX, &type, default_type);
1495
1496	ret = mlx5_alloc_prefered_buf(mctx, buf,
1497				      align(nent * cqe_sz, dev->page_size),
1498				      dev->page_size,
1499				      type,
1500				      MLX5_CQ_PREFIX);
1501
1502	if (ret)
1503		return -1;
1504
1505	memset(buf->buf, 0, nent * cqe_sz);
1506
1507	for (i = 0; i < nent; ++i) {
1508		cqe = buf->buf + i * cqe_sz;
1509		cqe += cqe_sz == 128 ? 1 : 0;
1510		cqe->op_own = MLX5_CQE_INVALID << 4;
1511	}
1512
1513	return 0;
1514}
1515
1516int mlx5_free_cq_buf(struct mlx5_context *ctx, struct mlx5_buf *buf)
1517{
1518	return mlx5_free_actual_buf(ctx, buf);
1519}
1520