1/*
2 * Copyright (c) 2007 Cisco, Inc.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#if HAVE_CONFIG_H
34#  include <config.h>
35#endif /* HAVE_CONFIG_H */
36
37#include <stdlib.h>
38#include <stdio.h>
39#include <string.h>
40#include <pthread.h>
41#include <errno.h>
42#include <netinet/in.h>
43
44#include "mlx4.h"
45#include "mlx4-abi.h"
46#include "wqe.h"
47
48int mlx4_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
49{
50	struct ibv_query_device cmd;
51	uint64_t raw_fw_ver;
52	unsigned major, minor, sub_minor;
53	int ret;
54
55	ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd);
56	if (ret)
57		return ret;
58
59	major     = (raw_fw_ver >> 32) & 0xffff;
60	minor     = (raw_fw_ver >> 16) & 0xffff;
61	sub_minor = raw_fw_ver & 0xffff;
62
63	snprintf(attr->fw_ver, sizeof attr->fw_ver,
64		 "%d.%d.%03d", major, minor, sub_minor);
65
66	return 0;
67}
68
69int mlx4_query_port(struct ibv_context *context, uint8_t port,
70		     struct ibv_port_attr *attr)
71{
72	struct ibv_query_port cmd;
73
74	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd);
75}
76
77struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context)
78{
79	struct ibv_alloc_pd       cmd;
80	struct mlx4_alloc_pd_resp resp;
81	struct mlx4_pd		 *pd;
82
83	pd = malloc(sizeof *pd);
84	if (!pd)
85		return NULL;
86
87	if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd,
88			     &resp.ibv_resp, sizeof resp)) {
89		free(pd);
90		return NULL;
91	}
92
93	pd->pdn = resp.pdn;
94
95	return &pd->ibv_pd;
96}
97
98int mlx4_free_pd(struct ibv_pd *pd)
99{
100	int ret;
101
102	ret = ibv_cmd_dealloc_pd(pd);
103	if (ret)
104		return ret;
105
106	free(to_mpd(pd));
107	return 0;
108}
109
110struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
111			   enum ibv_access_flags access)
112{
113	struct ibv_mr *mr;
114	struct ibv_reg_mr cmd;
115	int ret;
116
117	mr = malloc(sizeof *mr);
118	if (!mr)
119		return NULL;
120
121#ifdef IBV_CMD_REG_MR_HAS_RESP_PARAMS
122	{
123		struct ibv_reg_mr_resp resp;
124
125		ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
126				     access, mr, &cmd, sizeof cmd,
127				     &resp, sizeof resp);
128	}
129#else
130	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr, access, mr,
131			     &cmd, sizeof cmd);
132#endif
133	if (ret) {
134		free(mr);
135		return NULL;
136	}
137
138	return mr;
139}
140
141int mlx4_dereg_mr(struct ibv_mr *mr)
142{
143	int ret;
144
145	ret = ibv_cmd_dereg_mr(mr);
146	if (ret)
147		return ret;
148
149	free(mr);
150	return 0;
151}
152
153static int align_queue_size(int req)
154{
155	int nent;
156
157	for (nent = 1; nent < req; nent <<= 1)
158		; /* nothing */
159
160	return nent;
161}
162
163struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
164			       struct ibv_comp_channel *channel,
165			       int comp_vector)
166{
167	struct mlx4_create_cq      cmd;
168	struct mlx4_create_cq_resp resp;
169	struct mlx4_cq		  *cq;
170	int			   ret;
171
172	/* Sanity check CQ size before proceeding */
173	if (cqe > 0x3fffff)
174		return NULL;
175
176	cq = malloc(sizeof *cq);
177	if (!cq)
178		return NULL;
179
180	cq->cons_index = 0;
181
182	if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
183		goto err;
184
185	cqe = align_queue_size(cqe + 1);
186
187	if (mlx4_alloc_cq_buf(to_mdev(context->device), &cq->buf, cqe))
188		goto err;
189
190	cq->set_ci_db  = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_CQ);
191	if (!cq->set_ci_db)
192		goto err_buf;
193
194	cq->arm_db     = cq->set_ci_db + 1;
195	*cq->arm_db    = 0;
196	cq->arm_sn     = 1;
197	*cq->set_ci_db = 0;
198
199	cmd.buf_addr = (uintptr_t) cq->buf.buf;
200	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
201
202	ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
203				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
204				&resp.ibv_resp, sizeof resp);
205	if (ret)
206		goto err_db;
207
208	cq->cqn = resp.cqn;
209
210	return &cq->ibv_cq;
211
212err_db:
213	mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_CQ, cq->set_ci_db);
214
215err_buf:
216	mlx4_free_buf(&cq->buf);
217
218err:
219	free(cq);
220
221	return NULL;
222}
223
224int mlx4_resize_cq(struct ibv_cq *ibcq, int cqe)
225{
226	struct mlx4_cq *cq = to_mcq(ibcq);
227	struct mlx4_resize_cq cmd;
228	struct mlx4_buf buf;
229	int old_cqe, outst_cqe, ret;
230
231	/* Sanity check CQ size before proceeding */
232	if (cqe > 0x3fffff)
233		return EINVAL;
234
235	pthread_spin_lock(&cq->lock);
236
237	cqe = align_queue_size(cqe + 1);
238	if (cqe == ibcq->cqe + 1) {
239		ret = 0;
240		goto out;
241	}
242
243	/* Can't be smaller then the number of outstanding CQEs */
244	outst_cqe = mlx4_get_outstanding_cqes(cq);
245	if (cqe < outst_cqe + 1) {
246		ret = 0;
247		goto out;
248	}
249
250	ret = mlx4_alloc_cq_buf(to_mdev(ibcq->context->device), &buf, cqe);
251	if (ret)
252		goto out;
253
254	old_cqe = ibcq->cqe;
255	cmd.buf_addr = (uintptr_t) buf.buf;
256
257#ifdef IBV_CMD_RESIZE_CQ_HAS_RESP_PARAMS
258	{
259		struct ibv_resize_cq_resp resp;
260		ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd,
261					&resp, sizeof resp);
262	}
263#else
264	ret = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof cmd);
265#endif
266	if (ret) {
267		mlx4_free_buf(&buf);
268		goto out;
269	}
270
271	mlx4_cq_resize_copy_cqes(cq, buf.buf, old_cqe);
272
273	mlx4_free_buf(&cq->buf);
274	cq->buf = buf;
275
276out:
277	pthread_spin_unlock(&cq->lock);
278	return ret;
279}
280
281int mlx4_destroy_cq(struct ibv_cq *cq)
282{
283	int ret;
284
285	ret = ibv_cmd_destroy_cq(cq);
286	if (ret)
287		return ret;
288
289	mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
290	mlx4_free_buf(&to_mcq(cq)->buf);
291	free(to_mcq(cq));
292
293	return 0;
294}
295
296struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
297				 struct ibv_srq_init_attr *attr)
298{
299	struct mlx4_create_srq      cmd;
300	struct mlx4_create_srq_resp resp;
301	struct mlx4_srq		   *srq;
302	int			    ret;
303
304	/* Sanity check SRQ size before proceeding */
305	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
306		return NULL;
307
308	srq = malloc(sizeof *srq);
309	if (!srq)
310		return NULL;
311
312	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
313		goto err;
314
315	srq->max     = align_queue_size(attr->attr.max_wr + 1);
316	srq->max_gs  = attr->attr.max_sge;
317	srq->counter = 0;
318
319	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
320		goto err;
321
322	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
323	if (!srq->db)
324		goto err_free;
325
326	*srq->db = 0;
327
328	cmd.buf_addr = (uintptr_t) srq->buf.buf;
329	cmd.db_addr  = (uintptr_t) srq->db;
330
331	ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
332				 &cmd.ibv_cmd, sizeof cmd,
333				 &resp.ibv_resp, sizeof resp);
334	if (ret)
335		goto err_db;
336
337	srq->srqn = resp.srqn;
338
339	return &srq->ibv_srq;
340
341err_db:
342	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
343
344err_free:
345	free(srq->wrid);
346	mlx4_free_buf(&srq->buf);
347
348err:
349	free(srq);
350
351	return NULL;
352}
353
354int mlx4_modify_srq(struct ibv_srq *srq,
355		     struct ibv_srq_attr *attr,
356		     enum ibv_srq_attr_mask attr_mask)
357{
358	struct ibv_modify_srq cmd;
359
360	return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd);
361}
362
363int mlx4_query_srq(struct ibv_srq *srq,
364		    struct ibv_srq_attr *attr)
365{
366	struct ibv_query_srq cmd;
367
368	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
369}
370
371int mlx4_destroy_srq(struct ibv_srq *ibsrq)
372{
373	struct mlx4_srq *srq = to_msrq(ibsrq);
374	struct mlx4_cq *mcq = NULL;
375	int ret;
376
377	if (ibsrq->xrc_cq) {
378		/* is an xrc_srq */
379		mcq = to_mcq(ibsrq->xrc_cq);
380		mlx4_cq_clean(mcq, 0, srq);
381		pthread_spin_lock(&mcq->lock);
382		mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn);
383		pthread_spin_unlock(&mcq->lock);
384	}
385
386	ret = ibv_cmd_destroy_srq(ibsrq);
387	if (ret) {
388		if (ibsrq->xrc_cq) {
389			pthread_spin_lock(&mcq->lock);
390			mlx4_store_xrc_srq(to_mctx(ibsrq->context),
391					   srq->srqn, srq);
392			pthread_spin_unlock(&mcq->lock);
393		}
394		return ret;
395	}
396
397	mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db);
398	mlx4_free_buf(&srq->buf);
399	free(srq->wrid);
400	free(srq);
401
402	return 0;
403}
404
405static int verify_sizes(struct ibv_qp_init_attr *attr, struct mlx4_context *context)
406{
407	int size;
408	int nsegs;
409
410	if (attr->cap.max_send_wr     > context->max_qp_wr ||
411	    attr->cap.max_recv_wr     > context->max_qp_wr ||
412	    attr->cap.max_send_sge    > context->max_sge   ||
413	    attr->cap.max_recv_sge    > context->max_sge)
414		return -1;
415
416	if (attr->cap.max_inline_data) {
417		nsegs = num_inline_segs(attr->cap.max_inline_data, attr->qp_type);
418		size = MLX4_MAX_WQE_SIZE - nsegs * sizeof (struct mlx4_wqe_inline_seg);
419		switch (attr->qp_type) {
420		case IBV_QPT_UD:
421			size -= (sizeof (struct mlx4_wqe_ctrl_seg) +
422				 sizeof (struct mlx4_wqe_datagram_seg));
423			break;
424
425		case IBV_QPT_RC:
426		case IBV_QPT_UC:
427		case IBV_QPT_XRC:
428			size -= (sizeof (struct mlx4_wqe_ctrl_seg) +
429				 sizeof (struct mlx4_wqe_raddr_seg));
430			break;
431
432		default:
433			return 0;
434		}
435
436		if (attr->cap.max_inline_data > size)
437			return -1;
438	}
439
440	return 0;
441}
442
443struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
444{
445	struct mlx4_create_qp     cmd;
446	struct ibv_create_qp_resp resp;
447	struct mlx4_qp		 *qp;
448	int			  ret;
449	struct mlx4_context	 *context = to_mctx(pd->context);
450
451
452	/* Sanity check QP size before proceeding */
453	if (verify_sizes(attr, context))
454		return NULL;
455
456	qp = malloc(sizeof *qp);
457	if (!qp)
458		return NULL;
459
460	mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
461
462	/*
463	 * We need to leave 2 KB + 1 WQE of headroom in the SQ to
464	 * allow HW to prefetch.
465	 */
466	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
467	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
468	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
469
470	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
471		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
472	else {
473		if (attr->cap.max_recv_sge < 1)
474			attr->cap.max_recv_sge = 1;
475		if (attr->cap.max_recv_wr < 1)
476			attr->cap.max_recv_wr = 1;
477	}
478
479	if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
480		goto err;
481
482	mlx4_init_qp_indices(qp);
483
484	if (pthread_spin_init(&qp->sq.lock, PTHREAD_PROCESS_PRIVATE) ||
485	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
486		goto err_free;
487
488	if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
489		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
490		if (!qp->db)
491			goto err_free;
492
493		*qp->db = 0;
494	}
495
496	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
497	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
498		cmd.db_addr = 0;
499	else
500		cmd.db_addr = (uintptr_t) qp->db;
501	cmd.log_sq_stride   = qp->sq.wqe_shift;
502	for (cmd.log_sq_bb_count = 0;
503	     qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
504	     ++cmd.log_sq_bb_count)
505		; /* nothing */
506	cmd.sq_no_prefetch = 0;	/* OK for ABI 2: just a reserved field */
507	memset(cmd.reserved, 0, sizeof cmd.reserved);
508
509	pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
510
511	ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
512				&resp, sizeof resp);
513	if (ret)
514		goto err_rq_db;
515
516	ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
517	if (ret)
518		goto err_destroy;
519	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
520
521	qp->rq.wqe_cnt = attr->cap.max_recv_wr;
522	qp->rq.max_gs  = attr->cap.max_recv_sge;
523
524	/* adjust rq maxima to not exceed reported device maxima */
525	attr->cap.max_recv_wr = min(context->max_qp_wr, attr->cap.max_recv_wr);
526	attr->cap.max_recv_sge = min(context->max_sge, attr->cap.max_recv_sge);
527
528	qp->rq.max_post = attr->cap.max_recv_wr;
529	mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
530
531	qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
532	if (attr->sq_sig_all)
533		qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
534	else
535		qp->sq_signal_bits = 0;
536
537	return &qp->ibv_qp;
538
539err_destroy:
540	ibv_cmd_destroy_qp(&qp->ibv_qp);
541
542err_rq_db:
543	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
544	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
545		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
546
547err_free:
548	free(qp->sq.wrid);
549	if (qp->rq.wqe_cnt)
550		free(qp->rq.wrid);
551	mlx4_free_buf(&qp->buf);
552
553err:
554	free(qp);
555
556	return NULL;
557}
558
559int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
560		   enum ibv_qp_attr_mask attr_mask,
561		   struct ibv_qp_init_attr *init_attr)
562{
563	struct ibv_query_qp cmd;
564	struct mlx4_qp *qp = to_mqp(ibqp);
565	int ret;
566
567	ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof cmd);
568	if (ret)
569		return ret;
570
571	init_attr->cap.max_send_wr     = qp->sq.max_post;
572	init_attr->cap.max_send_sge    = qp->sq.max_gs;
573	init_attr->cap.max_inline_data = qp->max_inline_data;
574
575	attr->cap = init_attr->cap;
576
577	return 0;
578}
579
580int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
581		    enum ibv_qp_attr_mask attr_mask)
582{
583	struct ibv_modify_qp cmd;
584	int ret;
585
586	if (qp->state == IBV_QPS_RESET &&
587	    attr_mask & IBV_QP_STATE   &&
588	    attr->qp_state == IBV_QPS_INIT) {
589		mlx4_qp_init_sq_ownership(to_mqp(qp));
590	}
591
592	ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof cmd);
593
594	if (!ret		       &&
595	    (attr_mask & IBV_QP_STATE) &&
596	    attr->qp_state == IBV_QPS_RESET) {
597		mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
598			       qp->srq ? to_msrq(qp->srq) : NULL);
599		if (qp->send_cq != qp->recv_cq)
600			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
601
602		mlx4_init_qp_indices(to_mqp(qp));
603		if (!qp->srq && qp->qp_type != IBV_QPT_XRC)
604			*to_mqp(qp)->db = 0;
605	}
606
607	return ret;
608}
609
610static void mlx4_lock_cqs(struct ibv_qp *qp)
611{
612	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
613	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
614
615	if (send_cq == recv_cq)
616		pthread_spin_lock(&send_cq->lock);
617	else if (send_cq->cqn < recv_cq->cqn) {
618		pthread_spin_lock(&send_cq->lock);
619		pthread_spin_lock(&recv_cq->lock);
620	} else {
621		pthread_spin_lock(&recv_cq->lock);
622		pthread_spin_lock(&send_cq->lock);
623	}
624}
625
626static void mlx4_unlock_cqs(struct ibv_qp *qp)
627{
628	struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
629	struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
630
631	if (send_cq == recv_cq)
632		pthread_spin_unlock(&send_cq->lock);
633	else if (send_cq->cqn < recv_cq->cqn) {
634		pthread_spin_unlock(&recv_cq->lock);
635		pthread_spin_unlock(&send_cq->lock);
636	} else {
637		pthread_spin_unlock(&send_cq->lock);
638		pthread_spin_unlock(&recv_cq->lock);
639	}
640}
641
642int mlx4_destroy_qp(struct ibv_qp *ibqp)
643{
644	struct mlx4_qp *qp = to_mqp(ibqp);
645	int ret;
646
647	pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex);
648	ret = ibv_cmd_destroy_qp(ibqp);
649	if (ret) {
650		pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
651		return ret;
652	}
653
654	mlx4_lock_cqs(ibqp);
655
656	__mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
657			ibqp->srq ? to_msrq(ibqp->srq) : NULL);
658	if (ibqp->send_cq != ibqp->recv_cq)
659		__mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
660
661	mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
662
663	mlx4_unlock_cqs(ibqp);
664	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
665
666	if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC)
667		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
668	free(qp->sq.wrid);
669	if (qp->rq.wqe_cnt)
670		free(qp->rq.wrid);
671	mlx4_free_buf(&qp->buf);
672	free(qp);
673
674	return 0;
675}
676
677struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
678{
679	struct mlx4_ah *ah;
680	struct ibv_port_attr port_attr;
681	uint8_t is_mcast;
682
683	ah = malloc(sizeof *ah);
684	if (!ah)
685		return NULL;
686
687	memset(ah, 0, sizeof *ah);
688
689	ah->av.port_pd   = htonl(to_mpd(pd)->pdn | (attr->port_num << 24));
690	ah->av.g_slid    = attr->src_path_bits;
691	ah->av.dlid      = htons(attr->dlid);
692	if (attr->static_rate) {
693		ah->av.stat_rate = attr->static_rate + MLX4_STAT_RATE_OFFSET;
694		/* XXX check rate cap? */
695	}
696	ah->av.sl_tclass_flowlabel = htonl(attr->sl << 28);
697	if (attr->is_global) {
698		ah->av.g_slid   |= 0x80;
699		ah->av.gid_index = attr->grh.sgid_index;
700		ah->av.hop_limit = attr->grh.hop_limit;
701		ah->av.sl_tclass_flowlabel |=
702			htonl((attr->grh.traffic_class << 20) |
703				    attr->grh.flow_label);
704		memcpy(ah->av.dgid, attr->grh.dgid.raw, 16);
705	}
706
707	if (ibv_query_port(pd->context, attr->port_num, &port_attr))
708		goto err;
709
710	if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
711		if (ibv_resolve_eth_gid(pd, attr->port_num,
712					(union ibv_gid *)ah->av.dgid,
713					attr->grh.sgid_index,
714					ah->mac, &ah->vlan,
715					&ah->tagged, &is_mcast))
716			goto err;
717
718		if (is_mcast) {
719			ah->av.dlid = htons(0xc000);
720			ah->av.port_pd |= htonl(1 << 31);
721		}
722		if (ah->tagged) {
723			ah->av.port_pd |= htonl(1 << 29);
724			ah->vlan |= (attr->sl & 7) << 13;
725		}
726	}
727
728
729	return &ah->ibv_ah;
730err:
731	free(ah);
732	return NULL;
733}
734
735int mlx4_destroy_ah(struct ibv_ah *ah)
736{
737	free(to_mah(ah));
738
739	return 0;
740}
741
742#ifdef HAVE_IBV_XRC_OPS
743struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
744				    struct ibv_xrc_domain *xrc_domain,
745				    struct ibv_cq *xrc_cq,
746				    struct ibv_srq_init_attr *attr)
747{
748	struct mlx4_create_xrc_srq  cmd;
749	struct mlx4_create_srq_resp resp;
750	struct mlx4_srq		   *srq;
751	int			    ret;
752
753	/* Sanity check SRQ size before proceeding */
754	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
755		return NULL;
756
757	srq = malloc(sizeof *srq);
758	if (!srq)
759		return NULL;
760
761	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
762		goto err;
763
764	srq->max     = align_queue_size(attr->attr.max_wr + 1);
765	srq->max_gs  = attr->attr.max_sge;
766	srq->counter = 0;
767
768	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
769		goto err;
770
771	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
772	if (!srq->db)
773		goto err_free;
774
775	*srq->db = 0;
776
777	cmd.buf_addr = (uintptr_t) srq->buf.buf;
778	cmd.db_addr  = (uintptr_t) srq->db;
779
780	ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr,
781				     xrc_domain->handle,
782				     xrc_cq->handle,
783				     &cmd.ibv_cmd, sizeof cmd,
784				     &resp.ibv_resp, sizeof resp);
785	if (ret)
786		goto err_db;
787
788	srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn;
789
790	ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq);
791	if (ret)
792		goto err_destroy;
793
794	return &srq->ibv_srq;
795
796err_destroy:
797	ibv_cmd_destroy_srq(&srq->ibv_srq);
798
799err_db:
800	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
801
802err_free:
803	free(srq->wrid);
804	mlx4_free_buf(&srq->buf);
805
806err:
807	free(srq);
808
809	return NULL;
810}
811
812struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
813					    int fd, int oflag)
814{
815	int ret;
816	struct mlx4_open_xrc_domain_resp resp;
817	struct mlx4_xrc_domain *xrcd;
818
819	xrcd = malloc(sizeof *xrcd);
820	if (!xrcd)
821		return NULL;
822
823	ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd,
824				      &resp.ibv_resp, sizeof resp);
825	if (ret) {
826		free(xrcd);
827		return NULL;
828	}
829
830	xrcd->xrcdn = resp.xrcdn;
831	return &xrcd->ibv_xrcd;
832}
833
834int mlx4_close_xrc_domain(struct ibv_xrc_domain *d)
835{
836	int ret;
837	ret = ibv_cmd_close_xrc_domain(d);
838	if (!ret)
839		free(d);
840	return ret;
841}
842
843int mlx4_create_xrc_rcv_qp(struct ibv_qp_init_attr *init_attr,
844			   uint32_t *xrc_qp_num)
845{
846
847	return ibv_cmd_create_xrc_rcv_qp(init_attr, xrc_qp_num);
848}
849
850int mlx4_modify_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
851			   uint32_t xrc_qp_num,
852			   struct ibv_qp_attr *attr,
853			   int attr_mask)
854{
855	return ibv_cmd_modify_xrc_rcv_qp(xrc_domain, xrc_qp_num,
856					 attr, attr_mask);
857}
858
859int mlx4_query_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
860			  uint32_t xrc_qp_num,
861			  struct ibv_qp_attr *attr,
862			  int attr_mask,
863			  struct ibv_qp_init_attr *init_attr)
864{
865	int ret;
866
867	ret = ibv_cmd_query_xrc_rcv_qp(xrc_domain, xrc_qp_num,
868				       attr, attr_mask, init_attr);
869	if (ret)
870		return ret;
871
872	init_attr->cap.max_send_wr = init_attr->cap.max_send_sge = 1;
873	init_attr->cap.max_recv_sge = init_attr->cap.max_recv_wr = 0;
874	init_attr->cap.max_inline_data = 0;
875	init_attr->recv_cq = init_attr->send_cq = NULL;
876	init_attr->srq = NULL;
877	init_attr->xrc_domain = xrc_domain;
878	init_attr->qp_type = IBV_QPT_XRC;
879	init_attr->qp_context = NULL;
880	attr->cap = init_attr->cap;
881
882	return 0;
883}
884
885int mlx4_reg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
886			uint32_t xrc_qp_num)
887{
888	return ibv_cmd_reg_xrc_rcv_qp(xrc_domain, xrc_qp_num);
889}
890
891int mlx4_unreg_xrc_rcv_qp(struct ibv_xrc_domain *xrc_domain,
892			  uint32_t xrc_qp_num)
893{
894	return ibv_cmd_unreg_xrc_rcv_qp(xrc_domain, xrc_qp_num);
895}
896
897#endif
898