1/*
2 * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4 * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#if HAVE_CONFIG_H
36#  include <config.h>
37#endif /* HAVE_CONFIG_H */
38
39#include <stdlib.h>
40#include <netinet/in.h>
41#include <pthread.h>
42#include <string.h>
43
44#include "mlx4.h"
45#include "doorbell.h"
46#include "wqe.h"
47
48static const uint32_t mlx4_ib_opcode[] = {
49	[IBV_WR_SEND]			= MLX4_OPCODE_SEND,
50	[IBV_WR_SEND_WITH_IMM]		= MLX4_OPCODE_SEND_IMM,
51	[IBV_WR_RDMA_WRITE]		= MLX4_OPCODE_RDMA_WRITE,
52	[IBV_WR_RDMA_WRITE_WITH_IMM]	= MLX4_OPCODE_RDMA_WRITE_IMM,
53	[IBV_WR_RDMA_READ]		= MLX4_OPCODE_RDMA_READ,
54	[IBV_WR_ATOMIC_CMP_AND_SWP]	= MLX4_OPCODE_ATOMIC_CS,
55	[IBV_WR_ATOMIC_FETCH_AND_ADD]	= MLX4_OPCODE_ATOMIC_FA,
56};
57
58static void *get_recv_wqe(struct mlx4_qp *qp, int n)
59{
60	return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift);
61}
62
63static void *get_send_wqe(struct mlx4_qp *qp, int n)
64{
65	return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift);
66}
67
68/*
69 * Stamp a SQ WQE so that it is invalid if prefetched by marking the
70 * first four bytes of every 64 byte chunk with 0xffffffff, except for
71 * the very first chunk of the WQE.
72 */
73static void stamp_send_wqe(struct mlx4_qp *qp, int n)
74{
75	uint32_t *wqe = get_send_wqe(qp, n);
76	int i;
77	int ds = (((struct mlx4_wqe_ctrl_seg *)wqe)->fence_size & 0x3f) << 2;
78
79	for (i = 16; i < ds; i += 16)
80		wqe[i] = 0xffffffff;
81}
82
83void mlx4_init_qp_indices(struct mlx4_qp *qp)
84{
85	qp->sq.head	 = 0;
86	qp->sq.tail	 = 0;
87	qp->rq.head	 = 0;
88	qp->rq.tail	 = 0;
89}
90
91void mlx4_qp_init_sq_ownership(struct mlx4_qp *qp)
92{
93	struct mlx4_wqe_ctrl_seg *ctrl;
94	int i;
95
96	for (i = 0; i < qp->sq.wqe_cnt; ++i) {
97		ctrl = get_send_wqe(qp, i);
98		ctrl->owner_opcode = htonl(1 << 31);
99		ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
100
101		stamp_send_wqe(qp, i);
102	}
103}
104
105static int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq)
106{
107	unsigned cur;
108
109	cur = wq->head - wq->tail;
110	if (cur + nreq < wq->max_post)
111		return 0;
112
113	pthread_spin_lock(&cq->lock);
114	cur = wq->head - wq->tail;
115	pthread_spin_unlock(&cq->lock);
116
117	return cur + nreq >= wq->max_post;
118}
119
120static inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
121				 uint64_t remote_addr, uint32_t rkey)
122{
123	rseg->raddr    = htonll(remote_addr);
124	rseg->rkey     = htonl(rkey);
125	rseg->reserved = 0;
126}
127
128static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ibv_send_wr *wr)
129{
130	if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
131		aseg->swap_add = htonll(wr->wr.atomic.swap);
132		aseg->compare  = htonll(wr->wr.atomic.compare_add);
133	} else {
134		aseg->swap_add = htonll(wr->wr.atomic.compare_add);
135		aseg->compare  = 0;
136	}
137
138}
139
140static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
141			     struct ibv_send_wr *wr)
142{
143	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
144	dseg->dqpn = htonl(wr->wr.ud.remote_qpn);
145	dseg->qkey = htonl(wr->wr.ud.remote_qkey);
146	dseg->vlan = htons(to_mah(wr->wr.ud.ah)->vlan);
147	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6);
148}
149
150static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg)
151{
152	dseg->byte_count = htonl(sg->length);
153	dseg->lkey       = htonl(sg->lkey);
154	dseg->addr       = htonll(sg->addr);
155}
156
157static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg)
158{
159	dseg->lkey       = htonl(sg->lkey);
160	dseg->addr       = htonll(sg->addr);
161
162	/*
163	 * Need a barrier here before writing the byte_count field to
164	 * make sure that all the data is visible before the
165	 * byte_count field is set.  Otherwise, if the segment begins
166	 * a new cacheline, the HCA prefetcher could grab the 64-byte
167	 * chunk and get a valid (!= * 0xffffffff) byte count but
168	 * stale data, and end up sending the wrong data.
169	 */
170	wmb();
171
172	dseg->byte_count = htonl(sg->length);
173}
174
175/*
176 * Avoid using memcpy() to copy to BlueFlame page, since memcpy()
177 * implementations may use move-string-buffer assembler instructions,
178 * which do not guarantee order of copying.
179 */
180static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned bytecnt)
181{
182	while (bytecnt > 0) {
183		*dst++ = *src++;
184		*dst++ = *src++;
185		bytecnt -= 2 * sizeof (long);
186	}
187}
188
189int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
190			  struct ibv_send_wr **bad_wr)
191{
192	struct mlx4_context *ctx;
193	struct mlx4_qp *qp = to_mqp(ibqp);
194	void *wqe;
195	struct mlx4_wqe_ctrl_seg *ctrl;
196	int ind;
197	int nreq;
198	int inl = 0;
199	int ret = 0;
200	int size;
201	int i;
202
203	pthread_spin_lock(&qp->sq.lock);
204
205	/* XXX check that state is OK to post send */
206
207	ind = qp->sq.head;
208
209	for (nreq = 0; wr; ++nreq, wr = wr->next) {
210		if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
211			ret = -1;
212			*bad_wr = wr;
213			goto out;
214		}
215
216		if (wr->num_sge > qp->sq.max_gs) {
217			ret = -1;
218			*bad_wr = wr;
219			goto out;
220		}
221
222		if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) {
223			ret = -1;
224			*bad_wr = wr;
225			goto out;
226		}
227
228		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
229		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
230
231		ctrl->xrcrb_flags =
232			(wr->send_flags & IBV_SEND_SIGNALED ?
233			 htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
234			(wr->send_flags & IBV_SEND_SOLICITED ?
235			 htonl(MLX4_WQE_CTRL_SOLICIT) : 0)   |
236			qp->sq_signal_bits;
237
238		if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
239		    wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
240			ctrl->imm = wr->imm_data;
241		else
242			ctrl->imm = 0;
243
244		wqe += sizeof *ctrl;
245		size = sizeof *ctrl / 16;
246
247		switch (ibqp->qp_type) {
248		case IBV_QPT_XRC:
249			ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8);
250			/* fall thru */
251		case IBV_QPT_RC:
252		case IBV_QPT_UC:
253			switch (wr->opcode) {
254			case IBV_WR_ATOMIC_CMP_AND_SWP:
255			case IBV_WR_ATOMIC_FETCH_AND_ADD:
256				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
257					      wr->wr.atomic.rkey);
258				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
259
260				set_atomic_seg(wqe, wr);
261				wqe  += sizeof (struct mlx4_wqe_atomic_seg);
262				size += (sizeof (struct mlx4_wqe_raddr_seg) +
263					 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
264
265				break;
266
267			case IBV_WR_RDMA_READ:
268				inl = 1;
269				/* fall through */
270			case IBV_WR_RDMA_WRITE:
271			case IBV_WR_RDMA_WRITE_WITH_IMM:
272				set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
273					      wr->wr.rdma.rkey);
274				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
275				size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
276
277				break;
278
279			default:
280				/* No extra segments required for sends */
281				break;
282			}
283			break;
284
285		case IBV_QPT_UD:
286			set_datagram_seg(wqe, wr);
287			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
288			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
289			if (to_mah(wr->wr.ud.ah)->tagged) {
290				ctrl->ins_vlan = 1 << 6;
291				ctrl->vlan_tag = htons(to_mah(wr->wr.ud.ah)->vlan);
292			}
293
294			break;
295
296		default:
297			break;
298		}
299
300		if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
301			struct mlx4_wqe_inline_seg *seg;
302			void *addr;
303			int len, seg_len;
304			int num_seg;
305			int off, to_copy;
306
307			inl = 0;
308
309			seg = wqe;
310			wqe += sizeof *seg;
311			off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
312			num_seg = 0;
313			seg_len = 0;
314
315			for (i = 0; i < wr->num_sge; ++i) {
316				addr = (void *) (uintptr_t) wr->sg_list[i].addr;
317				len  = wr->sg_list[i].length;
318				inl += len;
319
320				if (inl > qp->max_inline_data) {
321					inl = 0;
322					ret = -1;
323					*bad_wr = wr;
324					goto out;
325				}
326
327				while (len >= MLX4_INLINE_ALIGN - off) {
328					to_copy = MLX4_INLINE_ALIGN - off;
329					memcpy(wqe, addr, to_copy);
330					len -= to_copy;
331					wqe += to_copy;
332					addr += to_copy;
333					seg_len += to_copy;
334					wmb(); /* see comment below */
335					seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
336					seg_len = 0;
337					seg = wqe;
338					wqe += sizeof *seg;
339					off = sizeof *seg;
340					++num_seg;
341				}
342
343				memcpy(wqe, addr, len);
344				wqe += len;
345				seg_len += len;
346				off += len;
347			}
348
349			if (seg_len) {
350				++num_seg;
351				/*
352				 * Need a barrier here to make sure
353				 * all the data is visible before the
354				 * byte_count field is set.  Otherwise
355				 * the HCA prefetcher could grab the
356				 * 64-byte chunk with this inline
357				 * segment and get a valid (!=
358				 * 0xffffffff) byte count but stale
359				 * data, and end up sending the wrong
360				 * data.
361				 */
362				wmb();
363				seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
364			}
365
366			size += (inl + num_seg * sizeof * seg + 15) / 16;
367		} else {
368			struct mlx4_wqe_data_seg *seg = wqe;
369
370			for (i = wr->num_sge - 1; i >= 0 ; --i)
371				set_data_seg(seg + i, wr->sg_list + i);
372
373			size += wr->num_sge * (sizeof *seg / 16);
374		}
375
376		ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ?
377				    MLX4_WQE_CTRL_FENCE : 0) | size;
378
379		/*
380		 * Make sure descriptor is fully written before
381		 * setting ownership bit (because HW can start
382		 * executing as soon as we do).
383		 */
384		wmb();
385
386		ctrl->owner_opcode = htonl(mlx4_ib_opcode[wr->opcode]) |
387			(ind & qp->sq.wqe_cnt ? htonl(1 << 31) : 0);
388
389		/*
390		 * We can improve latency by not stamping the last
391		 * send queue WQE until after ringing the doorbell, so
392		 * only stamp here if there are still more WQEs to post.
393		 */
394		if (wr->next)
395			stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) &
396				       (qp->sq.wqe_cnt - 1));
397
398		++ind;
399	}
400
401out:
402	ctx = to_mctx(ibqp->context);
403
404	if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
405		ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
406		*(uint32_t *) (&ctrl->vlan_tag) |= qp->doorbell_qpn;
407		/*
408		 * Make sure that descriptor is written to memory
409		 * before writing to BlueFlame page.
410		 */
411		wmb();
412
413		++qp->sq.head;
414
415		pthread_spin_lock(&ctx->bf_lock);
416
417		mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl,
418			     align(size * 16, 64));
419		wc_wmb();
420
421		ctx->bf_offset ^= ctx->bf_buf_size;
422
423		pthread_spin_unlock(&ctx->bf_lock);
424	} else if (nreq) {
425		qp->sq.head += nreq;
426
427		/*
428		 * Make sure that descriptors are written before
429		 * doorbell record.
430		 */
431		wmb();
432
433		*(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn;
434	}
435
436	if (nreq)
437		stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) &
438			       (qp->sq.wqe_cnt - 1));
439
440	pthread_spin_unlock(&qp->sq.lock);
441
442	return ret;
443}
444
445int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
446		   struct ibv_recv_wr **bad_wr)
447{
448	struct mlx4_qp *qp = to_mqp(ibqp);
449	struct mlx4_wqe_data_seg *scat;
450	int ret = 0;
451	int nreq;
452	int ind;
453	int i;
454
455	pthread_spin_lock(&qp->rq.lock);
456
457	/* XXX check that state is OK to post receive */
458
459	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
460
461	for (nreq = 0; wr; ++nreq, wr = wr->next) {
462		if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
463			ret = -1;
464			*bad_wr = wr;
465			goto out;
466		}
467
468		if (wr->num_sge > qp->rq.max_gs) {
469			ret = -1;
470			*bad_wr = wr;
471			goto out;
472		}
473
474		scat = get_recv_wqe(qp, ind);
475
476		for (i = 0; i < wr->num_sge; ++i)
477			__set_data_seg(scat + i, wr->sg_list + i);
478
479		if (i < qp->rq.max_gs) {
480			scat[i].byte_count = 0;
481			scat[i].lkey       = htonl(MLX4_INVALID_LKEY);
482			scat[i].addr       = 0;
483		}
484
485		qp->rq.wrid[ind] = wr->wr_id;
486
487		ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
488	}
489
490out:
491	if (nreq) {
492		qp->rq.head += nreq;
493
494		/*
495		 * Make sure that descriptors are written before
496		 * doorbell record.
497		 */
498		wmb();
499
500		*qp->db = htonl(qp->rq.head & 0xffff);
501	}
502
503	pthread_spin_unlock(&qp->rq.lock);
504
505	return ret;
506}
507
508int num_inline_segs(int data, enum ibv_qp_type type)
509{
510	/*
511	 * Inline data segments are not allowed to cross 64 byte
512	 * boundaries.  For UD QPs, the data segments always start
513	 * aligned to 64 bytes (16 byte control segment + 48 byte
514	 * datagram segment); for other QPs, there will be a 16 byte
515	 * control segment and possibly a 16 byte remote address
516	 * segment, so in the worst case there will be only 32 bytes
517	 * available for the first data segment.
518	 */
519	if (type == IBV_QPT_UD)
520		data += (sizeof (struct mlx4_wqe_ctrl_seg) +
521			 sizeof (struct mlx4_wqe_datagram_seg)) %
522			MLX4_INLINE_ALIGN;
523	else
524		data += (sizeof (struct mlx4_wqe_ctrl_seg) +
525			 sizeof (struct mlx4_wqe_raddr_seg)) %
526			MLX4_INLINE_ALIGN;
527
528	return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) /
529		(MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg));
530}
531
532void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
533			   struct mlx4_qp *qp)
534{
535	int size;
536	int max_sq_sge;
537
538	max_sq_sge	 = align(cap->max_inline_data +
539				 num_inline_segs(cap->max_inline_data, type) *
540				 sizeof (struct mlx4_wqe_inline_seg),
541				 sizeof (struct mlx4_wqe_data_seg)) /
542		sizeof (struct mlx4_wqe_data_seg);
543	if (max_sq_sge < cap->max_send_sge)
544		max_sq_sge = cap->max_send_sge;
545
546	size = max_sq_sge * sizeof (struct mlx4_wqe_data_seg);
547	switch (type) {
548	case IBV_QPT_UD:
549		size += sizeof (struct mlx4_wqe_datagram_seg);
550		break;
551
552	case IBV_QPT_UC:
553		size += sizeof (struct mlx4_wqe_raddr_seg);
554		break;
555
556	case IBV_QPT_XRC:
557	case IBV_QPT_RC:
558		size += sizeof (struct mlx4_wqe_raddr_seg);
559		/*
560		 * An atomic op will require an atomic segment, a
561		 * remote address segment and one scatter entry.
562		 */
563		if (size < (sizeof (struct mlx4_wqe_atomic_seg) +
564			    sizeof (struct mlx4_wqe_raddr_seg) +
565			    sizeof (struct mlx4_wqe_data_seg)))
566			size = (sizeof (struct mlx4_wqe_atomic_seg) +
567				sizeof (struct mlx4_wqe_raddr_seg) +
568				sizeof (struct mlx4_wqe_data_seg));
569		break;
570
571	default:
572		break;
573	}
574
575	/* Make sure that we have enough space for a bind request */
576	if (size < sizeof (struct mlx4_wqe_bind_seg))
577		size = sizeof (struct mlx4_wqe_bind_seg);
578
579	size += sizeof (struct mlx4_wqe_ctrl_seg);
580
581	for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
582	     qp->sq.wqe_shift++)
583		; /* nothing */
584}
585
586int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
587		       enum ibv_qp_type type, struct mlx4_qp *qp)
588{
589	qp->rq.max_gs	 = cap->max_recv_sge;
590
591	qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
592	if (!qp->sq.wrid)
593		return -1;
594
595	if (qp->rq.wqe_cnt) {
596		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
597		if (!qp->rq.wrid) {
598			free(qp->sq.wrid);
599			return -1;
600		}
601	}
602
603	for (qp->rq.wqe_shift = 4;
604	     1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg);
605	     qp->rq.wqe_shift++)
606		; /* nothing */
607
608	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
609		(qp->sq.wqe_cnt << qp->sq.wqe_shift);
610	if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
611		qp->rq.offset = 0;
612		qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
613	} else {
614		qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
615		qp->sq.offset = 0;
616	}
617
618	if (mlx4_alloc_buf(&qp->buf,
619			    align(qp->buf_size, to_mdev(pd->context->device)->page_size),
620			    to_mdev(pd->context->device)->page_size)) {
621		free(qp->sq.wrid);
622		free(qp->rq.wrid);
623		return -1;
624	}
625
626	memset(qp->buf.buf, 0, qp->buf_size);
627
628	return 0;
629}
630
631void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
632		       enum ibv_qp_type type)
633{
634	int wqe_size;
635	struct mlx4_context *ctx = to_mctx(qp->ibv_qp.context);
636
637	wqe_size = min((1 << qp->sq.wqe_shift), MLX4_MAX_WQE_SIZE) -
638		sizeof (struct mlx4_wqe_ctrl_seg);
639	switch (type) {
640	case IBV_QPT_UD:
641		wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
642		break;
643
644	case IBV_QPT_UC:
645	case IBV_QPT_RC:
646	case IBV_QPT_XRC:
647		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
648		break;
649
650	default:
651		break;
652	}
653
654	qp->sq.max_gs	     = wqe_size / sizeof (struct mlx4_wqe_data_seg);
655	cap->max_send_sge    = min(ctx->max_sge, qp->sq.max_gs);
656	qp->sq.max_post	     = min(ctx->max_qp_wr,
657				   qp->sq.wqe_cnt - qp->sq_spare_wqes);
658	cap->max_send_wr     = qp->sq.max_post;
659
660	/*
661	 * Inline data segments can't cross a 64 byte boundary.  So
662	 * subtract off one segment header for each 64-byte chunk,
663	 * taking into account the fact that wqe_size will be 32 mod
664	 * 64 for non-UD QPs.
665	 */
666	qp->max_inline_data  = wqe_size -
667		sizeof (struct mlx4_wqe_inline_seg) *
668		(align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN);
669	cap->max_inline_data = qp->max_inline_data;
670}
671
672struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
673{
674	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
675
676	if (ctx->qp_table[tind].refcnt)
677		return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
678	else
679		return NULL;
680}
681
682int mlx4_store_qp(struct mlx4_context *ctx, uint32_t qpn, struct mlx4_qp *qp)
683{
684	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
685
686	if (!ctx->qp_table[tind].refcnt) {
687		ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1,
688						   sizeof (struct mlx4_qp *));
689		if (!ctx->qp_table[tind].table)
690			return -1;
691	}
692
693	++ctx->qp_table[tind].refcnt;
694	ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
695	return 0;
696}
697
698void mlx4_clear_qp(struct mlx4_context *ctx, uint32_t qpn)
699{
700	int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
701
702	if (!--ctx->qp_table[tind].refcnt)
703		free(ctx->qp_table[tind].table);
704	else
705		ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
706}
707