1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4 * Copyright (c) 2006 Cisco Systems.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#if HAVE_CONFIG_H
36#  include <config.h>
37#endif /* HAVE_CONFIG_H */
38
39#include <stdio.h>
40#include <stdlib.h>
41#include <pthread.h>
42#include <netinet/in.h>
43#include <string.h>
44
45#include <infiniband/opcode.h>
46
47#include "mthca.h"
48#include "doorbell.h"
49
50enum {
51	MTHCA_CQ_DOORBELL	= 0x20
52};
53
54enum {
55	CQ_OK		=  0,
56	CQ_EMPTY	= -1,
57	CQ_POLL_ERR	= -2
58};
59
60#define MTHCA_TAVOR_CQ_DB_INC_CI       (1 << 24)
61#define MTHCA_TAVOR_CQ_DB_REQ_NOT      (2 << 24)
62#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL  (3 << 24)
63#define MTHCA_TAVOR_CQ_DB_SET_CI       (4 << 24)
64#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24)
65
66#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL  (1 << 24)
67#define MTHCA_ARBEL_CQ_DB_REQ_NOT      (2 << 24)
68#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
69
70enum {
71	MTHCA_CQ_ENTRY_OWNER_SW     = 0x00,
72	MTHCA_CQ_ENTRY_OWNER_HW     = 0x80,
73	MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe
74};
75
76enum {
77	SYNDROME_LOCAL_LENGTH_ERR 	 = 0x01,
78	SYNDROME_LOCAL_QP_OP_ERR  	 = 0x02,
79	SYNDROME_LOCAL_EEC_OP_ERR 	 = 0x03,
80	SYNDROME_LOCAL_PROT_ERR   	 = 0x04,
81	SYNDROME_WR_FLUSH_ERR     	 = 0x05,
82	SYNDROME_MW_BIND_ERR      	 = 0x06,
83	SYNDROME_BAD_RESP_ERR     	 = 0x10,
84	SYNDROME_LOCAL_ACCESS_ERR 	 = 0x11,
85	SYNDROME_REMOTE_INVAL_REQ_ERR 	 = 0x12,
86	SYNDROME_REMOTE_ACCESS_ERR 	 = 0x13,
87	SYNDROME_REMOTE_OP_ERR     	 = 0x14,
88	SYNDROME_RETRY_EXC_ERR 		 = 0x15,
89	SYNDROME_RNR_RETRY_EXC_ERR 	 = 0x16,
90	SYNDROME_LOCAL_RDD_VIOL_ERR 	 = 0x20,
91	SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21,
92	SYNDROME_REMOTE_ABORTED_ERR 	 = 0x22,
93	SYNDROME_INVAL_EECN_ERR 	 = 0x23,
94	SYNDROME_INVAL_EEC_STATE_ERR 	 = 0x24
95};
96
97struct mthca_cqe {
98	uint32_t	my_qpn;
99	uint32_t	my_ee;
100	uint32_t	rqpn;
101	uint16_t	sl_g_mlpath;
102	uint16_t	rlid;
103	uint32_t	imm_etype_pkey_eec;
104	uint32_t	byte_cnt;
105	uint32_t	wqe;
106	uint8_t		opcode;
107	uint8_t		is_send;
108	uint8_t		reserved;
109	uint8_t		owner;
110};
111
112struct mthca_err_cqe {
113	uint32_t	my_qpn;
114	uint32_t	reserved1[3];
115	uint8_t		syndrome;
116	uint8_t		vendor_err;
117	uint16_t	db_cnt;
118	uint32_t	reserved2;
119	uint32_t	wqe;
120	uint8_t		opcode;
121	uint8_t		reserved3[2];
122	uint8_t		owner;
123};
124
125static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
126{
127	return cq->buf.buf + entry * MTHCA_CQ_ENTRY_SIZE;
128}
129
130static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
131{
132	struct mthca_cqe *cqe = get_cqe(cq, i);
133	return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
134}
135
136static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
137{
138	return cqe_sw(cq, cq->cons_index & cq->ibv_cq.cqe);
139}
140
141static inline void set_cqe_hw(struct mthca_cqe *cqe)
142{
143	VALGRIND_MAKE_MEM_UNDEFINED(cqe, sizeof *cqe);
144	cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW;
145}
146
147/*
148 * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index
149 * should be correct before calling update_cons_index().
150 */
151static inline void update_cons_index(struct mthca_cq *cq, int incr)
152{
153	uint32_t doorbell[2];
154
155	if (mthca_is_memfree(cq->ibv_cq.context)) {
156		*cq->set_ci_db = htonl(cq->cons_index);
157		wmb();
158	} else {
159		doorbell[0] = htonl(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn);
160		doorbell[1] = htonl(incr - 1);
161
162		mthca_write64(doorbell, to_mctx(cq->ibv_cq.context), MTHCA_CQ_DOORBELL);
163	}
164}
165
166static void dump_cqe(void *cqe_ptr)
167{
168	uint32_t *cqe = cqe_ptr;
169	int i;
170
171	for (i = 0; i < 8; ++i)
172		printf("  [%2x] %08x\n", i * 4, ntohl(((uint32_t *) cqe)[i]));
173}
174
175static int handle_error_cqe(struct mthca_cq *cq,
176			    struct mthca_qp *qp, int wqe_index, int is_send,
177			    struct mthca_err_cqe *cqe,
178			    struct ibv_wc *wc, int *free_cqe)
179{
180	int err;
181	int dbd;
182	uint32_t new_wqe;
183
184	if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) {
185		printf("local QP operation err "
186		       "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n",
187		       ntohl(cqe->my_qpn), ntohl(cqe->wqe),
188		       cq->cqn, cq->cons_index);
189		dump_cqe(cqe);
190	}
191
192	/*
193	 * For completions in error, only work request ID, status, vendor error
194	 * (and freed resource count for RD) have to be set.
195	 */
196	switch (cqe->syndrome) {
197	case SYNDROME_LOCAL_LENGTH_ERR:
198		wc->status = IBV_WC_LOC_LEN_ERR;
199		break;
200	case SYNDROME_LOCAL_QP_OP_ERR:
201		wc->status = IBV_WC_LOC_QP_OP_ERR;
202		break;
203	case SYNDROME_LOCAL_EEC_OP_ERR:
204		wc->status = IBV_WC_LOC_EEC_OP_ERR;
205		break;
206	case SYNDROME_LOCAL_PROT_ERR:
207		wc->status = IBV_WC_LOC_PROT_ERR;
208		break;
209	case SYNDROME_WR_FLUSH_ERR:
210		wc->status = IBV_WC_WR_FLUSH_ERR;
211		break;
212	case SYNDROME_MW_BIND_ERR:
213		wc->status = IBV_WC_MW_BIND_ERR;
214		break;
215	case SYNDROME_BAD_RESP_ERR:
216		wc->status = IBV_WC_BAD_RESP_ERR;
217		break;
218	case SYNDROME_LOCAL_ACCESS_ERR:
219		wc->status = IBV_WC_LOC_ACCESS_ERR;
220		break;
221	case SYNDROME_REMOTE_INVAL_REQ_ERR:
222		wc->status = IBV_WC_REM_INV_REQ_ERR;
223		break;
224	case SYNDROME_REMOTE_ACCESS_ERR:
225		wc->status = IBV_WC_REM_ACCESS_ERR;
226		break;
227	case SYNDROME_REMOTE_OP_ERR:
228		wc->status = IBV_WC_REM_OP_ERR;
229		break;
230	case SYNDROME_RETRY_EXC_ERR:
231		wc->status = IBV_WC_RETRY_EXC_ERR;
232		break;
233	case SYNDROME_RNR_RETRY_EXC_ERR:
234		wc->status = IBV_WC_RNR_RETRY_EXC_ERR;
235		break;
236	case SYNDROME_LOCAL_RDD_VIOL_ERR:
237		wc->status = IBV_WC_LOC_RDD_VIOL_ERR;
238		break;
239	case SYNDROME_REMOTE_INVAL_RD_REQ_ERR:
240		wc->status = IBV_WC_REM_INV_RD_REQ_ERR;
241		break;
242	case SYNDROME_REMOTE_ABORTED_ERR:
243		wc->status = IBV_WC_REM_ABORT_ERR;
244		break;
245	case SYNDROME_INVAL_EECN_ERR:
246		wc->status = IBV_WC_INV_EECN_ERR;
247		break;
248	case SYNDROME_INVAL_EEC_STATE_ERR:
249		wc->status = IBV_WC_INV_EEC_STATE_ERR;
250		break;
251	default:
252		wc->status = IBV_WC_GENERAL_ERR;
253		break;
254	}
255
256	wc->vendor_err = cqe->vendor_err;
257
258	/*
259	 * Mem-free HCAs always generate one CQE per WQE, even in the
260	 * error case, so we don't have to check the doorbell count, etc.
261	 */
262	if (mthca_is_memfree(cq->ibv_cq.context))
263		return 0;
264
265	err = mthca_free_err_wqe(qp, is_send, wqe_index, &dbd, &new_wqe);
266	if (err)
267		return err;
268
269	/*
270	 * If we're at the end of the WQE chain, or we've used up our
271	 * doorbell count, free the CQE.  Otherwise just update it for
272	 * the next poll operation.
273	 *
274	 * This doesn't apply to mem-free HCAs, which never use the
275	 * doorbell count field.  In that case we always free the CQE.
276	 */
277	if (mthca_is_memfree(cq->ibv_cq.context) ||
278	    !(new_wqe & htonl(0x3f)) || (!cqe->db_cnt && dbd))
279		return 0;
280
281	cqe->db_cnt   = htons(ntohs(cqe->db_cnt) - dbd);
282	cqe->wqe      = new_wqe;
283	cqe->syndrome = SYNDROME_WR_FLUSH_ERR;
284
285	*free_cqe = 0;
286
287	return 0;
288}
289
290static inline int mthca_poll_one(struct mthca_cq *cq,
291				 struct mthca_qp **cur_qp,
292				 int *freed,
293				 struct ibv_wc *wc)
294{
295	struct mthca_wq *wq;
296	struct mthca_cqe *cqe;
297	struct mthca_srq *srq;
298	uint32_t qpn;
299	uint32_t wqe;
300	int wqe_index;
301	int is_error;
302	int is_send;
303	int free_cqe = 1;
304	int err = 0;
305
306	cqe = next_cqe_sw(cq);
307	if (!cqe)
308		return CQ_EMPTY;
309
310	VALGRIND_MAKE_MEM_DEFINED(cqe, sizeof *cqe);
311
312	/*
313	 * Make sure we read CQ entry contents after we've checked the
314	 * ownership bit.
315	 */
316	rmb();
317
318	qpn = ntohl(cqe->my_qpn);
319
320	is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
321		MTHCA_ERROR_CQE_OPCODE_MASK;
322	is_send  = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
323
324	if (!*cur_qp || ntohl(cqe->my_qpn) != (*cur_qp)->ibv_qp.qp_num) {
325		/*
326		 * We do not have to take the QP table lock here,
327		 * because CQs will be locked while QPs are removed
328		 * from the table.
329		 */
330		*cur_qp = mthca_find_qp(to_mctx(cq->ibv_cq.context), ntohl(cqe->my_qpn));
331		if (!*cur_qp) {
332			err = CQ_POLL_ERR;
333			goto out;
334		}
335	}
336
337	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
338
339	if (is_send) {
340		wq = &(*cur_qp)->sq;
341		wqe_index = ((ntohl(cqe->wqe) - (*cur_qp)->send_wqe_offset) >> wq->wqe_shift);
342		wc->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max];
343	} else if ((*cur_qp)->ibv_qp.srq) {
344		srq = to_msrq((*cur_qp)->ibv_qp.srq);
345		wqe = htonl(cqe->wqe);
346		wq = NULL;
347		wqe_index = wqe >> srq->wqe_shift;
348		wc->wr_id = srq->wrid[wqe_index];
349		mthca_free_srq_wqe(srq, wqe_index);
350	} else {
351		int32_t wqe;
352		wq = &(*cur_qp)->rq;
353		wqe = ntohl(cqe->wqe);
354		wqe_index = wqe >> wq->wqe_shift;
355		/*
356		 * WQE addr == base - 1 might be reported by Sinai FW
357		 * 1.0.800 and Arbel FW 5.1.400 in receive completion
358		 * with error instead of (rq size - 1).  This bug
359		 * should be fixed in later FW revisions.
360		 */
361		if (wqe_index < 0)
362			wqe_index = wq->max - 1;
363		wc->wr_id = (*cur_qp)->wrid[wqe_index];
364	}
365
366	if (wq) {
367		if (wq->last_comp < wqe_index)
368			wq->tail += wqe_index - wq->last_comp;
369		else
370			wq->tail += wqe_index + wq->max - wq->last_comp;
371
372		wq->last_comp = wqe_index;
373	}
374
375	if (is_error) {
376		err = handle_error_cqe(cq, *cur_qp, wqe_index, is_send,
377				       (struct mthca_err_cqe *) cqe,
378				       wc, &free_cqe);
379		goto out;
380	}
381
382	if (is_send) {
383		wc->wc_flags = 0;
384		switch (cqe->opcode) {
385		case MTHCA_OPCODE_RDMA_WRITE:
386			wc->opcode    = IBV_WC_RDMA_WRITE;
387			break;
388		case MTHCA_OPCODE_RDMA_WRITE_IMM:
389			wc->opcode    = IBV_WC_RDMA_WRITE;
390			wc->wc_flags |= IBV_WC_WITH_IMM;
391			break;
392		case MTHCA_OPCODE_SEND:
393			wc->opcode    = IBV_WC_SEND;
394			break;
395		case MTHCA_OPCODE_SEND_IMM:
396			wc->opcode    = IBV_WC_SEND;
397			wc->wc_flags |= IBV_WC_WITH_IMM;
398			break;
399		case MTHCA_OPCODE_RDMA_READ:
400			wc->opcode    = IBV_WC_RDMA_READ;
401			wc->byte_len  = ntohl(cqe->byte_cnt);
402			break;
403		case MTHCA_OPCODE_ATOMIC_CS:
404			wc->opcode    = IBV_WC_COMP_SWAP;
405			wc->byte_len  = ntohl(cqe->byte_cnt);
406			break;
407		case MTHCA_OPCODE_ATOMIC_FA:
408			wc->opcode    = IBV_WC_FETCH_ADD;
409			wc->byte_len  = ntohl(cqe->byte_cnt);
410			break;
411		case MTHCA_OPCODE_BIND_MW:
412			wc->opcode    = IBV_WC_BIND_MW;
413			break;
414		default:
415			/* assume it's a send completion */
416			wc->opcode    = IBV_WC_SEND;
417			break;
418		}
419	} else {
420		wc->byte_len = ntohl(cqe->byte_cnt);
421		switch (cqe->opcode & 0x1f) {
422		case IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE:
423		case IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
424			wc->wc_flags = IBV_WC_WITH_IMM;
425			wc->imm_data = cqe->imm_etype_pkey_eec;
426			wc->opcode = IBV_WC_RECV;
427			break;
428		case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
429		case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
430			wc->wc_flags = IBV_WC_WITH_IMM;
431			wc->imm_data = cqe->imm_etype_pkey_eec;
432			wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
433			break;
434		default:
435			wc->wc_flags = 0;
436			wc->opcode = IBV_WC_RECV;
437			break;
438		}
439		wc->slid 	   = ntohs(cqe->rlid);
440		wc->sl   	   = ntohs(cqe->sl_g_mlpath) >> 12;
441		wc->src_qp 	   = ntohl(cqe->rqpn) & 0xffffff;
442		wc->dlid_path_bits = ntohs(cqe->sl_g_mlpath) & 0x7f;
443		wc->pkey_index     = ntohl(cqe->imm_etype_pkey_eec) >> 16;
444		wc->wc_flags      |= ntohs(cqe->sl_g_mlpath) & 0x80 ?
445			IBV_WC_GRH : 0;
446	}
447
448	wc->status = IBV_WC_SUCCESS;
449
450out:
451	if (free_cqe) {
452		set_cqe_hw(cqe);
453		++(*freed);
454		++cq->cons_index;
455	}
456
457	return err;
458}
459
460int mthca_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
461{
462	struct mthca_cq *cq = to_mcq(ibcq);
463	struct mthca_qp *qp = NULL;
464	int npolled;
465	int err = CQ_OK;
466	int freed = 0;
467
468	pthread_spin_lock(&cq->lock);
469
470	for (npolled = 0; npolled < ne; ++npolled) {
471		err = mthca_poll_one(cq, &qp, &freed, wc + npolled);
472		if (err != CQ_OK)
473			break;
474	}
475
476	if (freed) {
477		wmb();
478		update_cons_index(cq, freed);
479	}
480
481	pthread_spin_unlock(&cq->lock);
482
483	return err == CQ_POLL_ERR ? err : npolled;
484}
485
486int mthca_tavor_arm_cq(struct ibv_cq *cq, int solicited)
487{
488	uint32_t doorbell[2];
489
490	doorbell[0] = htonl((solicited ?
491			     MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL :
492			     MTHCA_TAVOR_CQ_DB_REQ_NOT)      |
493			    to_mcq(cq)->cqn);
494	doorbell[1] = 0xffffffff;
495
496	mthca_write64(doorbell, to_mctx(cq->context), MTHCA_CQ_DOORBELL);
497
498	return 0;
499}
500
501int mthca_arbel_arm_cq(struct ibv_cq *ibvcq, int solicited)
502{
503	struct mthca_cq *cq = to_mcq(ibvcq);
504	uint32_t doorbell[2];
505	uint32_t sn;
506	uint32_t ci;
507
508	sn = cq->arm_sn & 3;
509	ci = htonl(cq->cons_index);
510
511	doorbell[0] = ci;
512	doorbell[1] = htonl((cq->cqn << 8) | (2 << 5) | (sn << 3) |
513			    (solicited ? 1 : 2));
514
515	mthca_write_db_rec(doorbell, cq->arm_db);
516
517	/*
518	 * Make sure that the doorbell record in host memory is
519	 * written before ringing the doorbell via PCI MMIO.
520	 */
521	wmb();
522
523	doorbell[0] = htonl((sn << 28)                       |
524			    (solicited ?
525			     MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL :
526			     MTHCA_ARBEL_CQ_DB_REQ_NOT)      |
527			    cq->cqn);
528	doorbell[1] = ci;
529
530	mthca_write64(doorbell, to_mctx(ibvcq->context), MTHCA_CQ_DOORBELL);
531
532	return 0;
533}
534
535void mthca_arbel_cq_event(struct ibv_cq *cq)
536{
537	to_mcq(cq)->arm_sn++;
538}
539
540static inline int is_recv_cqe(struct mthca_cqe *cqe)
541{
542	if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
543	    MTHCA_ERROR_CQE_OPCODE_MASK)
544		return !(cqe->opcode & 0x01);
545	else
546		return !(cqe->is_send & 0x80);
547}
548
549void __mthca_cq_clean(struct mthca_cq *cq, uint32_t qpn, struct mthca_srq *srq)
550{
551	struct mthca_cqe *cqe;
552	uint32_t prod_index;
553	int i, nfreed = 0;
554
555	/*
556	 * First we need to find the current producer index, so we
557	 * know where to start cleaning from.  It doesn't matter if HW
558	 * adds new entries after this loop -- the QP we're worried
559	 * about is already in RESET, so the new entries won't come
560	 * from our QP and therefore don't need to be checked.
561	 */
562	for (prod_index = cq->cons_index;
563	     cqe_sw(cq, prod_index & cq->ibv_cq.cqe);
564	     ++prod_index)
565		if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
566			break;
567
568	/*
569	 * Now sweep backwards through the CQ, removing CQ entries
570	 * that match our QP by copying older entries on top of them.
571	 */
572	while ((int) --prod_index - (int) cq->cons_index >= 0) {
573		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
574		if (cqe->my_qpn == htonl(qpn)) {
575			if (srq && is_recv_cqe(cqe))
576				mthca_free_srq_wqe(srq,
577						   ntohl(cqe->wqe) >> srq->wqe_shift);
578			++nfreed;
579		} else if (nfreed)
580			memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe),
581			       cqe, MTHCA_CQ_ENTRY_SIZE);
582	}
583
584	if (nfreed) {
585		for (i = 0; i < nfreed; ++i)
586			set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibv_cq.cqe));
587		wmb();
588		cq->cons_index += nfreed;
589		update_cons_index(cq, nfreed);
590	}
591}
592
593void mthca_cq_clean(struct mthca_cq *cq, uint32_t qpn, struct mthca_srq *srq)
594{
595	pthread_spin_lock(&cq->lock);
596	__mthca_cq_clean(cq, qpn, srq);
597	pthread_spin_unlock(&cq->lock);
598}
599
600void mthca_cq_resize_copy_cqes(struct mthca_cq *cq, void *buf, int old_cqe)
601{
602	int i;
603
604	/*
605	 * In Tavor mode, the hardware keeps the consumer and producer
606	 * indices mod the CQ size.  Since we might be making the CQ
607	 * bigger, we need to deal with the case where the producer
608	 * index wrapped around before the CQ was resized.
609	 */
610	if (!mthca_is_memfree(cq->ibv_cq.context) && old_cqe < cq->ibv_cq.cqe) {
611		cq->cons_index &= old_cqe;
612		if (cqe_sw(cq, old_cqe))
613			cq->cons_index -= old_cqe + 1;
614	}
615
616	for (i = cq->cons_index; cqe_sw(cq, i & old_cqe); ++i)
617		memcpy(buf + (i & cq->ibv_cq.cqe) * MTHCA_CQ_ENTRY_SIZE,
618		       get_cqe(cq, i & old_cqe), MTHCA_CQ_ENTRY_SIZE);
619}
620
621int mthca_alloc_cq_buf(struct mthca_device *dev, struct mthca_buf *buf, int nent)
622{
623	int i;
624
625	if (mthca_alloc_buf(buf, align(nent * MTHCA_CQ_ENTRY_SIZE, dev->page_size),
626		    dev->page_size))
627		return -1;
628
629	for (i = 0; i < nent; ++i)
630		((struct mthca_cqe *) buf->buf)[i].owner = MTHCA_CQ_ENTRY_OWNER_HW;
631
632	return 0;
633}
634