1XRC implementation, consolidated (version 2):
2
3xrc ops were moved to their own structure at the end of
4struct ibv_context (to preserve binary compatibility).
5
6Check for ibv_context.xrc_ops member via AC_CHECK_MEMBER
7
8XRC QPs have MSB set in qp number, for identification in
9completion handling.
10
11Various bug fixes.
12(OFED 1.3 commit 39fe7f47e8fc07f356098df048d00740ba585fc5)
13
14Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
15---
16V2:
171. checkpatch.pl cleanup
182. Changed xrc_ops to more ops
193. Check for xrc verbs in ibv_more_ops via AC_CHECK_MEMBER
20
21diff --git a/configure.in b/configure.in
22index 25f27f7..46a3a64 100644
23--- a/configure.in
24+++ b/configure.in
25@@ -42,6 +42,12 @@ AC_CHECK_HEADER(valgrind/memcheck.h,
26 dnl Checks for typedefs, structures, and compiler characteristics.
27 AC_C_CONST
28 AC_CHECK_SIZEOF(long)
29+AC_CHECK_MEMBER(struct ibv_context.more_ops,
30+    [AC_DEFINE([HAVE_IBV_MORE_OPS], 1, [Define to 1 if more_ops is a member of ibv_context])],,
31+    [#include <infiniband/verbs.h>])
32+AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq,
33+    [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],,
34+    [#include <infiniband/verbs.h>])
35 
36 dnl Checks for library functions
37 AC_CHECK_FUNC(ibv_read_sysfs_file, [],
38diff --git a/src/cq.c b/src/cq.c
39index 68e16e9..c598b87 100644
40--- a/src/cq.c
41+++ b/src/cq.c
42@@ -194,8 +194,9 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
43 {
44 	struct mlx4_wq *wq;
45 	struct mlx4_cqe *cqe;
46-	struct mlx4_srq *srq;
47+	struct mlx4_srq *srq = NULL;
48 	uint32_t qpn;
49+	uint32_t srqn;
50 	uint32_t g_mlpath_rqpn;
51 	uint16_t wqe_index;
52 	int is_error;
53@@ -221,20 +223,29 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
54 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
55 		MLX4_CQE_OPCODE_ERROR;
56 
57-	if (!*cur_qp ||
58-	    (ntohl(cqe->my_qpn) & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
59+	if (qpn & MLX4_XRC_QPN_BIT && !is_send) {
60+		srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff;
61+		/*
62+		 * We do not have to take the XRC SRQ table lock here,
63+		 * because CQs will be locked while XRC SRQs are removed
64+		 * from the table.
65+		 */
66+		srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn);
67+		if (!srq)
68+			return CQ_POLL_ERR;
69+	} else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) {
70 		/*
71 		 * We do not have to take the QP table lock here,
72 		 * because CQs will be locked while QPs are removed
73 		 * from the table.
74 		 */
75 		*cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context),
76-				       ntohl(cqe->my_qpn) & 0xffffff);
77+				       qpn & 0xffffff);
78 		if (!*cur_qp)
79 			return CQ_POLL_ERR;
80 	}
81 
82-	wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
83+	wc->qp_num = qpn & 0xffffff;
84 
85 	if (is_send) {
86 		wq = &(*cur_qp)->sq;
87@@ -242,6 +254,10 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
88 		wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
89 		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
90 		++wq->tail;
91+	} else if (srq) {
92+		wqe_index = htons(cqe->wqe_index);
93+		wc->wr_id = srq->wrid[wqe_index];
94+		mlx4_free_srq_wqe(srq, wqe_index);
95 	} else if ((*cur_qp)->ibv_qp.srq) {
96 		srq = to_msrq((*cur_qp)->ibv_qp.srq);
97 		wqe_index = htons(cqe->wqe_index);
98@@ -387,6 +403,10 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
99 	uint32_t prod_index;
100 	uint8_t owner_bit;
101 	int nfreed = 0;
102+	int is_xrc_srq = 0;
103+
104+	if (srq && srq->ibv_srq.xrc_cq)
105+		is_xrc_srq = 1;
106 
107 	/*
108 	 * First we need to find the current producer index, so we
109@@ -405,7 +425,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
110 	 */
111 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
112 		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
113-		if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
114+		if (is_xrc_srq &&
115+		    (ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) &&
116+		    !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
117+			mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
118+			++nfreed;
119+		} else if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
120 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
121 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
122 			++nfreed;
123diff --git a/src/mlx4-abi.h b/src/mlx4-abi.h
124index 20a40c9..1b1253c 100644
125--- a/src/mlx4-abi.h
126+++ b/src/mlx4-abi.h
127@@ -68,6 +68,14 @@ struct mlx4_resize_cq {
128 	__u64				buf_addr;
129 };
130 
131+#ifdef HAVE_IBV_XRC_OPS
132+struct mlx4_create_xrc_srq {
133+	struct ibv_create_xrc_srq	ibv_cmd;
134+	__u64				buf_addr;
135+	__u64				db_addr;
136+};
137+#endif
138+
139 struct mlx4_create_srq {
140 	struct ibv_create_srq		ibv_cmd;
141 	__u64				buf_addr;
142@@ -90,4 +98,12 @@ struct mlx4_create_qp {
143 	__u8				reserved[5];
144 };
145 
146+#ifdef HAVE_IBV_XRC_OPS
147+struct mlx4_open_xrc_domain_resp {
148+	struct ibv_open_xrc_domain_resp	ibv_resp;
149+	__u32				xrcdn;
150+	__u32				reserved;
151+};
152+#endif
153+
154 #endif /* MLX4_ABI_H */
155diff --git a/src/mlx4.c b/src/mlx4.c
156index 671e849..27ca75d 100644
157--- a/src/mlx4.c
158+++ b/src/mlx4.c
159@@ -68,6 +68,16 @@ struct {
160 	HCA(MELLANOX, 0x673c),	/* MT25408 "Hermon" QDR PCIe gen2 */
161 };
162 
163+#ifdef HAVE_IBV_MORE_OPS
164+static struct ibv_more_ops mlx4_more_ops = {
165+#ifdef HAVE_IBV_XRC_OPS
166+	.create_xrc_srq   = mlx4_create_xrc_srq,
167+	.open_xrc_domain  = mlx4_open_xrc_domain,
168+	.close_xrc_domain = mlx4_close_xrc_domain,
169+#endif
170+};
171+#endif
172+
173 static struct ibv_context_ops mlx4_ctx_ops = {
174 	.query_device  = mlx4_query_device,
175 	.query_port    = mlx4_query_port,
176@@ -124,6 +134,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
177 	for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i)
178 		context->qp_table[i].refcnt = 0;
179 
180+	context->num_xrc_srqs = resp.qp_tab_size;
181+	context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1
182+				       - MLX4_XRC_SRQ_TABLE_BITS;
183+	context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1;
184+
185+	pthread_mutex_init(&context->xrc_srq_table_mutex, NULL);
186+	for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i)
187+		context->xrc_srq_table[i].refcnt = 0;
188+
189 	for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
190 		context->db_list[i] = NULL;
191 
192@@ -156,6 +175,9 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
193 	pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
194 
195 	context->ibv_ctx.ops = mlx4_ctx_ops;
196+#ifdef HAVE_IBV_XRC_OPS
197+	context->ibv_ctx.more_ops = &mlx4_more_ops;
198+#endif
199 
200 	if (mlx4_query_device(&context->ibv_ctx, &dev_attrs))
201 		goto query_free;
202diff --git a/src/mlx4.h b/src/mlx4.h
203index 8643d8f..3eadb98 100644
204--- a/src/mlx4.h
205+++ b/src/mlx4.h
206@@ -79,6 +79,11 @@
207 
208 #endif
209 
210+#ifndef HAVE_IBV_MORE_OPS
211+#undef HAVE_IBV_XRC_OPS
212+#undef HAVE_IBV_CREATE_QP_EXP
213+#endif
214+
215 #define HIDDEN		__attribute__((visibility ("hidden")))
216 
217 #define PFX		"mlx4: "
218@@ -111,6 +116,16 @@ enum {
219 	MLX4_QP_TABLE_MASK		= MLX4_QP_TABLE_SIZE - 1
220 };
221 
222+enum {
223+	MLX4_XRC_SRQ_TABLE_BITS		= 8,
224+	MLX4_XRC_SRQ_TABLE_SIZE		= 1 << MLX4_XRC_SRQ_TABLE_BITS,
225+	MLX4_XRC_SRQ_TABLE_MASK		= MLX4_XRC_SRQ_TABLE_SIZE - 1
226+};
227+
228+enum {
229+	MLX4_XRC_QPN_BIT		= (1 << 23)
230+};
231+
232 enum mlx4_db_type {
233 	MLX4_DB_TYPE_CQ,
234 	MLX4_DB_TYPE_RQ,
235@@ -174,6 +189,15 @@ struct mlx4_context {
236 	int				max_sge;
237 	int				max_cqe;
238 
239+	struct {
240+		struct mlx4_srq       **table;
241+		int			refcnt;
242+	}				xrc_srq_table[MLX4_XRC_SRQ_TABLE_SIZE];
243+	pthread_mutex_t			xrc_srq_table_mutex;
244+	int				num_xrc_srqs;
245+	int				xrc_srq_table_shift;
246+	int				xrc_srq_table_mask;
247+
248 	struct mlx4_db_page	       *db_list[MLX4_NUM_DB_TYPE];
249 	pthread_mutex_t			db_list_mutex;
250 };
251@@ -260,6 +284,11 @@ struct mlx4_ah {
252 	struct mlx4_av			av;
253 };
254 
255+struct mlx4_xrc_domain {
256+	struct ibv_xrc_domain		ibv_xrcd;
257+	uint32_t			xrcdn;
258+};
259+
260 static inline unsigned long align(unsigned long val, unsigned long align)
261 {
262 	return (val + align - 1) & ~(align - 1);
263@@ -304,6 +333,13 @@ static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
264 	return to_mxxx(ah, ah);
265 }
266 
267+#ifdef HAVE_IBV_XRC_OPS
268+static inline struct mlx4_xrc_domain *to_mxrcd(struct ibv_xrc_domain *ibxrcd)
269+{
270+	return to_mxxx(xrcd, xrc_domain);
271+}
272+#endif
273+
274 int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
275 void mlx4_free_buf(struct mlx4_buf *buf);
276 
277@@ -350,6 +386,10 @@ void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
278 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
279 		       struct ibv_recv_wr *wr,
280 		       struct ibv_recv_wr **bad_wr);
281+struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn);
282+int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn,
283+		       struct mlx4_srq *srq);
284+void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn);
285 
286 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
287 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
288@@ -380,5 +420,16 @@ int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
289 int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr,
290 		   struct mlx4_ah *ah);
291 void mlx4_free_av(struct mlx4_ah *ah);
292+#ifdef HAVE_IBV_XRC_OPS
293+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
294+				    struct ibv_xrc_domain *xrc_domain,
295+				    struct ibv_cq *xrc_cq,
296+				    struct ibv_srq_init_attr *attr);
297+struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
298+					    int fd, int oflag);
299+
300+int mlx4_close_xrc_domain(struct ibv_xrc_domain *d);
301+#endif
302+
303 
304 #endif /* MLX4_H */
305diff --git a/src/qp.c b/src/qp.c
306index 01e8580..2f02430 100644
307--- a/src/qp.c
308+++ b/src/qp.c
309@@ -226,7 +226,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
310 		ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
311 		qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id;
312 
313-		ctrl->srcrb_flags =
314+		ctrl->xrcrb_flags =
315 			(wr->send_flags & IBV_SEND_SIGNALED ?
316 			 htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
317 			(wr->send_flags & IBV_SEND_SOLICITED ?
318@@ -243,6 +243,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
319 		size = sizeof *ctrl / 16;
320 
321 		switch (ibqp->qp_type) {
322+		case IBV_QPT_XRC:
323+			ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8);
324+			/* fall thru */
325 		case IBV_QPT_RC:
326 		case IBV_QPT_UC:
327 			switch (wr->opcode) {
328@@ -543,6 +546,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
329 		size += sizeof (struct mlx4_wqe_raddr_seg);
330 		break;
331 
332+	case IBV_QPT_XRC:
333 	case IBV_QPT_RC:
334 		size += sizeof (struct mlx4_wqe_raddr_seg);
335 		/*
336@@ -631,6 +635,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
337 
338 	case IBV_QPT_UC:
339 	case IBV_QPT_RC:
340+	case IBV_QPT_XRC:
341 		wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
342 		break;
343 
344diff --git a/src/srq.c b/src/srq.c
345index ba2ceb9..1350792 100644
346--- a/src/srq.c
347+++ b/src/srq.c
348@@ -167,3 +167,53 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
349 
350 	return 0;
351 }
352+
353+struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn)
354+{
355+	int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
356+
357+	if (ctx->xrc_srq_table[tind].refcnt)
358+		return ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask];
359+	else
360+		return NULL;
361+}
362+
363+int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn,
364+		       struct mlx4_srq *srq)
365+{
366+	int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
367+	int ret = 0;
368+
369+	pthread_mutex_lock(&ctx->xrc_srq_table_mutex);
370+
371+	if (!ctx->xrc_srq_table[tind].refcnt) {
372+		ctx->xrc_srq_table[tind].table = calloc(ctx->xrc_srq_table_mask + 1,
373+							sizeof(struct mlx4_srq *));
374+		if (!ctx->xrc_srq_table[tind].table) {
375+			ret = -1;
376+			goto out;
377+		}
378+	}
379+
380+	++ctx->xrc_srq_table[tind].refcnt;
381+	ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = srq;
382+
383+out:
384+	pthread_mutex_unlock(&ctx->xrc_srq_table_mutex);
385+	return ret;
386+}
387+
388+void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn)
389+{
390+	int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift;
391+
392+	pthread_mutex_lock(&ctx->xrc_srq_table_mutex);
393+
394+	if (!--ctx->xrc_srq_table[tind].refcnt)
395+		free(ctx->xrc_srq_table[tind].table);
396+	else
397+		ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = NULL;
398+
399+	pthread_mutex_unlock(&ctx->xrc_srq_table_mutex);
400+}
401+
402diff --git a/src/verbs.c b/src/verbs.c
403index 400050c..b7c9c8e 100644
404--- a/src/verbs.c
405+++ b/src/verbs.c
406@@ -368,18 +368,36 @@ int mlx4_query_srq(struct ibv_srq *srq,
407 	return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd);
408 }
409 
410-int mlx4_destroy_srq(struct ibv_srq *srq)
411+int mlx4_destroy_srq(struct ibv_srq *ibsrq)
412 {
413+	struct mlx4_srq *srq = to_msrq(ibsrq);
414+	struct mlx4_cq *mcq = NULL;
415 	int ret;
416 
417-	ret = ibv_cmd_destroy_srq(srq);
418-	if (ret)
419+	if (ibsrq->xrc_cq) {
420+		/* is an xrc_srq */
421+		mcq = to_mcq(ibsrq->xrc_cq);
422+		mlx4_cq_clean(mcq, 0, srq);
423+		pthread_spin_lock(&mcq->lock);
424+		mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn);
425+		pthread_spin_unlock(&mcq->lock);
426+	}
427+
428+	ret = ibv_cmd_destroy_srq(ibsrq);
429+	if (ret) {
430+		if (ibsrq->xrc_cq) {
431+			pthread_spin_lock(&mcq->lock);
432+			mlx4_store_xrc_srq(to_mctx(ibsrq->context),
433+					   srq->srqn, srq);
434+			pthread_spin_unlock(&mcq->lock);
435+		}
436 		return ret;
437+	}
438 
439-	mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db);
440-	mlx4_free_buf(&to_msrq(srq)->buf);
441-	free(to_msrq(srq)->wrid);
442-	free(to_msrq(srq));
443+	mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db);
444+	mlx4_free_buf(&srq->buf);
445+	free(srq->wrid);
446+	free(srq);
447 
448 	return 0;
449 }
450@@ -415,7 +433,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
451 	qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
452 	qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
453 
454-	if (attr->srq)
455+	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
456 		attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
457 	else {
458 		if (attr->cap.max_recv_sge < 1)
459@@ -433,7 +451,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
460 	    pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
461 		goto err_free;
462 
463-	if (!attr->srq) {
464+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC) {
465 		qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
466 		if (!qp->db)
467 			goto err_free;
468@@ -442,7 +460,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
469 	}
470 
471 	cmd.buf_addr	    = (uintptr_t) qp->buf.buf;
472-	if (attr->srq)
473+	if (attr->srq || attr->qp_type == IBV_QPT_XRC)
474 		cmd.db_addr = 0;
475 	else
476 		cmd.db_addr = (uintptr_t) qp->db;
477@@ -485,7 +503,7 @@ err_destroy:
478 
479 err_rq_db:
480 	pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
481-	if (!attr->srq)
482+	if (!attr->srq && attr->qp_type != IBV_QPT_XRC)
483 		mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
484 
485 err_free:
486@@ -544,7 +562,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
487 			mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
488 
489 		mlx4_init_qp_indices(to_mqp(qp));
490-		if (!qp->srq)
491+		if (!qp->srq && qp->qp_type != IBV_QPT_XRC)
492 			*to_mqp(qp)->db = 0;
493 	}
494 
495@@ -603,7 +621,7 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
496 	mlx4_unlock_cqs(ibqp);
497 	pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
498 
499-	if (!ibqp->srq)
500+	if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC)
501 		mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
502 	free(qp->sq.wrid);
503 	if (qp->rq.wqe_cnt)
504@@ -661,3 +679,103 @@ int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid)
505 
506 	return 0;
507 }
508+
509+#ifdef HAVE_IBV_XRC_OPS
510+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd,
511+				    struct ibv_xrc_domain *xrc_domain,
512+				    struct ibv_cq *xrc_cq,
513+				    struct ibv_srq_init_attr *attr)
514+{
515+	struct mlx4_create_xrc_srq  cmd;
516+	struct mlx4_create_srq_resp resp;
517+	struct mlx4_srq		   *srq;
518+	int			    ret;
519+
520+	/* Sanity check SRQ size before proceeding */
521+	if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64)
522+		return NULL;
523+
524+	srq = malloc(sizeof *srq);
525+	if (!srq)
526+		return NULL;
527+
528+	if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
529+		goto err;
530+
531+	srq->max     = align_queue_size(attr->attr.max_wr + 1);
532+	srq->max_gs  = attr->attr.max_sge;
533+	srq->counter = 0;
534+
535+	if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
536+		goto err;
537+
538+	srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
539+	if (!srq->db)
540+		goto err_free;
541+
542+	*srq->db = 0;
543+
544+	cmd.buf_addr = (uintptr_t) srq->buf.buf;
545+	cmd.db_addr  = (uintptr_t) srq->db;
546+
547+	ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr,
548+				     xrc_domain->handle,
549+				     xrc_cq->handle,
550+				     &cmd.ibv_cmd, sizeof cmd,
551+				     &resp.ibv_resp, sizeof resp);
552+	if (ret)
553+		goto err_db;
554+
555+	srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn;
556+
557+	ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq);
558+	if (ret)
559+		goto err_destroy;
560+
561+	return &srq->ibv_srq;
562+
563+err_destroy:
564+	ibv_cmd_destroy_srq(&srq->ibv_srq);
565+
566+err_db:
567+	mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
568+
569+err_free:
570+	free(srq->wrid);
571+	mlx4_free_buf(&srq->buf);
572+
573+err:
574+	free(srq);
575+
576+	return NULL;
577+}
578+
579+struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context,
580+					    int fd, int oflag)
581+{
582+	int ret;
583+	struct mlx4_open_xrc_domain_resp resp;
584+	struct mlx4_xrc_domain *xrcd;
585+
586+	xrcd = malloc(sizeof *xrcd);
587+	if (!xrcd)
588+		return NULL;
589+
590+	ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd,
591+				      &resp.ibv_resp, sizeof resp);
592+	if (ret) {
593+		free(xrcd);
594+		return NULL;
595+	}
596+
597+	xrcd->xrcdn = resp.xrcdn;
598+	return &xrcd->ibv_xrcd;
599+}
600+
601+int mlx4_close_xrc_domain(struct ibv_xrc_domain *d)
602+{
603+	ibv_cmd_close_xrc_domain(d);
604+	free(d);
605+	return 0;
606+}
607+#endif
608diff --git a/src/wqe.h b/src/wqe.h
609index 6f7f309..fa2f8ac 100644
610--- a/src/wqe.h
611+++ b/src/wqe.h
612@@ -65,7 +65,7 @@ struct mlx4_wqe_ctrl_seg {
613 	 * [1]   SE (solicited event)
614 	 * [0]   FL (force loopback)
615 	 */
616-	uint32_t		srcrb_flags;
617+	uint32_t		xrcrb_flags;
618 	/*
619 	 * imm is immediate data for send/RDMA write w/ immediate;
620 	 * also invalidation key for send with invalidate; input
621