1219820SjeffXRC implementation, consolidated (version 2): 2219820Sjeff 3219820Sjeffxrc ops were moved to their own structure at the end of 4219820Sjeffstruct ibv_context (to preserve binary compatibility). 5219820Sjeff 6219820SjeffCheck for ibv_context.xrc_ops member via AC_CHECK_MEMBER 7219820Sjeff 8219820SjeffXRC QPs have MSB set in qp number, for identification in 9219820Sjeffcompletion handling. 10219820Sjeff 11219820SjeffVarious bug fixes. 12219820Sjeff(OFED 1.3 commit 39fe7f47e8fc07f356098df048d00740ba585fc5) 13219820Sjeff 14219820SjeffSigned-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> 15219820Sjeff--- 16219820SjeffV2: 17219820Sjeff1. checkpatch.pl cleanup 18219820Sjeff2. Changed xrc_ops to more ops 19219820Sjeff3. Check for xrc verbs in ibv_more_ops via AC_CHECK_MEMBER 20219820Sjeff 21219820Sjeffdiff --git a/configure.in b/configure.in 22219820Sjeffindex 25f27f7..46a3a64 100644 23219820Sjeff--- a/configure.in 24219820Sjeff+++ b/configure.in 25219820Sjeff@@ -42,6 +42,12 @@ AC_CHECK_HEADER(valgrind/memcheck.h, 26219820Sjeff dnl Checks for typedefs, structures, and compiler characteristics. 27219820Sjeff AC_C_CONST 28219820Sjeff AC_CHECK_SIZEOF(long) 29219820Sjeff+AC_CHECK_MEMBER(struct ibv_context.more_ops, 30219820Sjeff+ [AC_DEFINE([HAVE_IBV_MORE_OPS], 1, [Define to 1 if more_ops is a member of ibv_context])],, 31219820Sjeff+ [#include <infiniband/verbs.h>]) 32219820Sjeff+AC_CHECK_MEMBER(struct ibv_more_ops.create_xrc_srq, 33219820Sjeff+ [AC_DEFINE([HAVE_IBV_XRC_OPS], 1, [Define to 1 if have xrc ops])],, 34219820Sjeff+ [#include <infiniband/verbs.h>]) 35219820Sjeff 36219820Sjeff dnl Checks for library functions 37219820Sjeff AC_CHECK_FUNC(ibv_read_sysfs_file, [], 38219820Sjeffdiff --git a/src/cq.c b/src/cq.c 39219820Sjeffindex 68e16e9..c598b87 100644 40219820Sjeff--- a/src/cq.c 41219820Sjeff+++ b/src/cq.c 42219820Sjeff@@ -194,8 +194,9 @@ static int mlx4_poll_one(struct mlx4_cq *cq, 43219820Sjeff { 44219820Sjeff struct mlx4_wq *wq; 45219820Sjeff struct mlx4_cqe *cqe; 46219820Sjeff- struct mlx4_srq *srq; 47219820Sjeff+ struct mlx4_srq *srq = NULL; 48219820Sjeff uint32_t qpn; 49219820Sjeff+ uint32_t srqn; 50219820Sjeff uint32_t g_mlpath_rqpn; 51219820Sjeff uint16_t wqe_index; 52219820Sjeff int is_error; 53219820Sjeff@@ -221,20 +223,29 @@ static int mlx4_poll_one(struct mlx4_cq *cq, 54219820Sjeff is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 55219820Sjeff MLX4_CQE_OPCODE_ERROR; 56219820Sjeff 57219820Sjeff- if (!*cur_qp || 58219820Sjeff- (ntohl(cqe->my_qpn) & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) { 59219820Sjeff+ if (qpn & MLX4_XRC_QPN_BIT && !is_send) { 60219820Sjeff+ srqn = ntohl(cqe->g_mlpath_rqpn) & 0xffffff; 61219820Sjeff+ /* 62219820Sjeff+ * We do not have to take the XRC SRQ table lock here, 63219820Sjeff+ * because CQs will be locked while XRC SRQs are removed 64219820Sjeff+ * from the table. 65219820Sjeff+ */ 66219820Sjeff+ srq = mlx4_find_xrc_srq(to_mctx(cq->ibv_cq.context), srqn); 67219820Sjeff+ if (!srq) 68219820Sjeff+ return CQ_POLL_ERR; 69219820Sjeff+ } else if (!*cur_qp || (qpn & 0xffffff) != (*cur_qp)->ibv_qp.qp_num) { 70219820Sjeff /* 71219820Sjeff * We do not have to take the QP table lock here, 72219820Sjeff * because CQs will be locked while QPs are removed 73219820Sjeff * from the table. 74219820Sjeff */ 75219820Sjeff *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), 76219820Sjeff- ntohl(cqe->my_qpn) & 0xffffff); 77219820Sjeff+ qpn & 0xffffff); 78219820Sjeff if (!*cur_qp) 79219820Sjeff return CQ_POLL_ERR; 80219820Sjeff } 81219820Sjeff 82219820Sjeff- wc->qp_num = (*cur_qp)->ibv_qp.qp_num; 83219820Sjeff+ wc->qp_num = qpn & 0xffffff; 84219820Sjeff 85219820Sjeff if (is_send) { 86219820Sjeff wq = &(*cur_qp)->sq; 87219820Sjeff@@ -242,6 +254,10 @@ static int mlx4_poll_one(struct mlx4_cq *cq, 88219820Sjeff wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail); 89219820Sjeff wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 90219820Sjeff ++wq->tail; 91219820Sjeff+ } else if (srq) { 92219820Sjeff+ wqe_index = htons(cqe->wqe_index); 93219820Sjeff+ wc->wr_id = srq->wrid[wqe_index]; 94219820Sjeff+ mlx4_free_srq_wqe(srq, wqe_index); 95219820Sjeff } else if ((*cur_qp)->ibv_qp.srq) { 96219820Sjeff srq = to_msrq((*cur_qp)->ibv_qp.srq); 97219820Sjeff wqe_index = htons(cqe->wqe_index); 98219820Sjeff@@ -387,6 +403,10 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) 99219820Sjeff uint32_t prod_index; 100219820Sjeff uint8_t owner_bit; 101219820Sjeff int nfreed = 0; 102219820Sjeff+ int is_xrc_srq = 0; 103219820Sjeff+ 104219820Sjeff+ if (srq && srq->ibv_srq.xrc_cq) 105219820Sjeff+ is_xrc_srq = 1; 106219820Sjeff 107219820Sjeff /* 108219820Sjeff * First we need to find the current producer index, so we 109219820Sjeff@@ -405,7 +425,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq) 110219820Sjeff */ 111219820Sjeff while ((int) --prod_index - (int) cq->cons_index >= 0) { 112219820Sjeff cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); 113219820Sjeff- if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) { 114219820Sjeff+ if (is_xrc_srq && 115219820Sjeff+ (ntohl(cqe->g_mlpath_rqpn & 0xffffff) == srq->srqn) && 116219820Sjeff+ !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) { 117219820Sjeff+ mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index)); 118219820Sjeff+ ++nfreed; 119219820Sjeff+ } else if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) { 120219820Sjeff if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 121219820Sjeff mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index)); 122219820Sjeff ++nfreed; 123219820Sjeffdiff --git a/src/mlx4-abi.h b/src/mlx4-abi.h 124219820Sjeffindex 20a40c9..1b1253c 100644 125219820Sjeff--- a/src/mlx4-abi.h 126219820Sjeff+++ b/src/mlx4-abi.h 127219820Sjeff@@ -68,6 +68,14 @@ struct mlx4_resize_cq { 128219820Sjeff __u64 buf_addr; 129219820Sjeff }; 130219820Sjeff 131219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 132219820Sjeff+struct mlx4_create_xrc_srq { 133219820Sjeff+ struct ibv_create_xrc_srq ibv_cmd; 134219820Sjeff+ __u64 buf_addr; 135219820Sjeff+ __u64 db_addr; 136219820Sjeff+}; 137219820Sjeff+#endif 138219820Sjeff+ 139219820Sjeff struct mlx4_create_srq { 140219820Sjeff struct ibv_create_srq ibv_cmd; 141219820Sjeff __u64 buf_addr; 142219820Sjeff@@ -90,4 +98,12 @@ struct mlx4_create_qp { 143219820Sjeff __u8 reserved[5]; 144219820Sjeff }; 145219820Sjeff 146219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 147219820Sjeff+struct mlx4_open_xrc_domain_resp { 148219820Sjeff+ struct ibv_open_xrc_domain_resp ibv_resp; 149219820Sjeff+ __u32 xrcdn; 150219820Sjeff+ __u32 reserved; 151219820Sjeff+}; 152219820Sjeff+#endif 153219820Sjeff+ 154219820Sjeff #endif /* MLX4_ABI_H */ 155219820Sjeffdiff --git a/src/mlx4.c b/src/mlx4.c 156219820Sjeffindex 671e849..27ca75d 100644 157219820Sjeff--- a/src/mlx4.c 158219820Sjeff+++ b/src/mlx4.c 159219820Sjeff@@ -68,6 +68,16 @@ struct { 160219820Sjeff HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */ 161219820Sjeff }; 162219820Sjeff 163219820Sjeff+#ifdef HAVE_IBV_MORE_OPS 164219820Sjeff+static struct ibv_more_ops mlx4_more_ops = { 165219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 166219820Sjeff+ .create_xrc_srq = mlx4_create_xrc_srq, 167219820Sjeff+ .open_xrc_domain = mlx4_open_xrc_domain, 168219820Sjeff+ .close_xrc_domain = mlx4_close_xrc_domain, 169219820Sjeff+#endif 170219820Sjeff+}; 171219820Sjeff+#endif 172219820Sjeff+ 173219820Sjeff static struct ibv_context_ops mlx4_ctx_ops = { 174219820Sjeff .query_device = mlx4_query_device, 175219820Sjeff .query_port = mlx4_query_port, 176219820Sjeff@@ -124,6 +134,15 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_ 177219820Sjeff for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) 178219820Sjeff context->qp_table[i].refcnt = 0; 179219820Sjeff 180219820Sjeff+ context->num_xrc_srqs = resp.qp_tab_size; 181219820Sjeff+ context->xrc_srq_table_shift = ffs(context->num_xrc_srqs) - 1 182219820Sjeff+ - MLX4_XRC_SRQ_TABLE_BITS; 183219820Sjeff+ context->xrc_srq_table_mask = (1 << context->xrc_srq_table_shift) - 1; 184219820Sjeff+ 185219820Sjeff+ pthread_mutex_init(&context->xrc_srq_table_mutex, NULL); 186219820Sjeff+ for (i = 0; i < MLX4_XRC_SRQ_TABLE_SIZE; ++i) 187219820Sjeff+ context->xrc_srq_table[i].refcnt = 0; 188219820Sjeff+ 189219820Sjeff for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) 190219820Sjeff context->db_list[i] = NULL; 191219820Sjeff 192219820Sjeff@@ -156,6 +175,9 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_ 193219820Sjeff pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); 194219820Sjeff 195219820Sjeff context->ibv_ctx.ops = mlx4_ctx_ops; 196219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 197219820Sjeff+ context->ibv_ctx.more_ops = &mlx4_more_ops; 198219820Sjeff+#endif 199219820Sjeff 200219820Sjeff if (mlx4_query_device(&context->ibv_ctx, &dev_attrs)) 201219820Sjeff goto query_free; 202219820Sjeffdiff --git a/src/mlx4.h b/src/mlx4.h 203219820Sjeffindex 8643d8f..3eadb98 100644 204219820Sjeff--- a/src/mlx4.h 205219820Sjeff+++ b/src/mlx4.h 206219820Sjeff@@ -79,6 +79,11 @@ 207219820Sjeff 208219820Sjeff #endif 209219820Sjeff 210219820Sjeff+#ifndef HAVE_IBV_MORE_OPS 211219820Sjeff+#undef HAVE_IBV_XRC_OPS 212219820Sjeff+#undef HAVE_IBV_CREATE_QP_EXP 213219820Sjeff+#endif 214219820Sjeff+ 215219820Sjeff #define HIDDEN __attribute__((visibility ("hidden"))) 216219820Sjeff 217219820Sjeff #define PFX "mlx4: " 218219820Sjeff@@ -111,6 +116,16 @@ enum { 219219820Sjeff MLX4_QP_TABLE_MASK = MLX4_QP_TABLE_SIZE - 1 220219820Sjeff }; 221219820Sjeff 222219820Sjeff+enum { 223219820Sjeff+ MLX4_XRC_SRQ_TABLE_BITS = 8, 224219820Sjeff+ MLX4_XRC_SRQ_TABLE_SIZE = 1 << MLX4_XRC_SRQ_TABLE_BITS, 225219820Sjeff+ MLX4_XRC_SRQ_TABLE_MASK = MLX4_XRC_SRQ_TABLE_SIZE - 1 226219820Sjeff+}; 227219820Sjeff+ 228219820Sjeff+enum { 229219820Sjeff+ MLX4_XRC_QPN_BIT = (1 << 23) 230219820Sjeff+}; 231219820Sjeff+ 232219820Sjeff enum mlx4_db_type { 233219820Sjeff MLX4_DB_TYPE_CQ, 234219820Sjeff MLX4_DB_TYPE_RQ, 235219820Sjeff@@ -174,6 +189,15 @@ struct mlx4_context { 236219820Sjeff int max_sge; 237219820Sjeff int max_cqe; 238219820Sjeff 239219820Sjeff+ struct { 240219820Sjeff+ struct mlx4_srq **table; 241219820Sjeff+ int refcnt; 242219820Sjeff+ } xrc_srq_table[MLX4_XRC_SRQ_TABLE_SIZE]; 243219820Sjeff+ pthread_mutex_t xrc_srq_table_mutex; 244219820Sjeff+ int num_xrc_srqs; 245219820Sjeff+ int xrc_srq_table_shift; 246219820Sjeff+ int xrc_srq_table_mask; 247219820Sjeff+ 248219820Sjeff struct mlx4_db_page *db_list[MLX4_NUM_DB_TYPE]; 249219820Sjeff pthread_mutex_t db_list_mutex; 250219820Sjeff }; 251219820Sjeff@@ -260,6 +284,11 @@ struct mlx4_ah { 252219820Sjeff struct mlx4_av av; 253219820Sjeff }; 254219820Sjeff 255219820Sjeff+struct mlx4_xrc_domain { 256219820Sjeff+ struct ibv_xrc_domain ibv_xrcd; 257219820Sjeff+ uint32_t xrcdn; 258219820Sjeff+}; 259219820Sjeff+ 260219820Sjeff static inline unsigned long align(unsigned long val, unsigned long align) 261219820Sjeff { 262219820Sjeff return (val + align - 1) & ~(align - 1); 263219820Sjeff@@ -304,6 +333,13 @@ static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah) 264219820Sjeff return to_mxxx(ah, ah); 265219820Sjeff } 266219820Sjeff 267219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 268219820Sjeff+static inline struct mlx4_xrc_domain *to_mxrcd(struct ibv_xrc_domain *ibxrcd) 269219820Sjeff+{ 270219820Sjeff+ return to_mxxx(xrcd, xrc_domain); 271219820Sjeff+} 272219820Sjeff+#endif 273219820Sjeff+ 274219820Sjeff int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size); 275219820Sjeff void mlx4_free_buf(struct mlx4_buf *buf); 276219820Sjeff 277219820Sjeff@@ -350,6 +386,10 @@ void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind); 278219820Sjeff int mlx4_post_srq_recv(struct ibv_srq *ibsrq, 279219820Sjeff struct ibv_recv_wr *wr, 280219820Sjeff struct ibv_recv_wr **bad_wr); 281219820Sjeff+struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn); 282219820Sjeff+int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn, 283219820Sjeff+ struct mlx4_srq *srq); 284219820Sjeff+void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn); 285219820Sjeff 286219820Sjeff struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr); 287219820Sjeff int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 288219820Sjeff@@ -380,5 +420,16 @@ int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr, 289219820Sjeff int mlx4_alloc_av(struct mlx4_pd *pd, struct ibv_ah_attr *attr, 290219820Sjeff struct mlx4_ah *ah); 291219820Sjeff void mlx4_free_av(struct mlx4_ah *ah); 292219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 293219820Sjeff+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd, 294219820Sjeff+ struct ibv_xrc_domain *xrc_domain, 295219820Sjeff+ struct ibv_cq *xrc_cq, 296219820Sjeff+ struct ibv_srq_init_attr *attr); 297219820Sjeff+struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context, 298219820Sjeff+ int fd, int oflag); 299219820Sjeff+ 300219820Sjeff+int mlx4_close_xrc_domain(struct ibv_xrc_domain *d); 301219820Sjeff+#endif 302219820Sjeff+ 303219820Sjeff 304219820Sjeff #endif /* MLX4_H */ 305219820Sjeffdiff --git a/src/qp.c b/src/qp.c 306219820Sjeffindex 01e8580..2f02430 100644 307219820Sjeff--- a/src/qp.c 308219820Sjeff+++ b/src/qp.c 309219820Sjeff@@ -226,7 +226,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, 310219820Sjeff ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); 311219820Sjeff qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; 312219820Sjeff 313219820Sjeff- ctrl->srcrb_flags = 314219820Sjeff+ ctrl->xrcrb_flags = 315219820Sjeff (wr->send_flags & IBV_SEND_SIGNALED ? 316219820Sjeff htonl(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | 317219820Sjeff (wr->send_flags & IBV_SEND_SOLICITED ? 318219820Sjeff@@ -243,6 +243,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, 319219820Sjeff size = sizeof *ctrl / 16; 320219820Sjeff 321219820Sjeff switch (ibqp->qp_type) { 322219820Sjeff+ case IBV_QPT_XRC: 323219820Sjeff+ ctrl->xrcrb_flags |= htonl(wr->xrc_remote_srq_num << 8); 324219820Sjeff+ /* fall thru */ 325219820Sjeff case IBV_QPT_RC: 326219820Sjeff case IBV_QPT_UC: 327219820Sjeff switch (wr->opcode) { 328219820Sjeff@@ -543,6 +546,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type, 329219820Sjeff size += sizeof (struct mlx4_wqe_raddr_seg); 330219820Sjeff break; 331219820Sjeff 332219820Sjeff+ case IBV_QPT_XRC: 333219820Sjeff case IBV_QPT_RC: 334219820Sjeff size += sizeof (struct mlx4_wqe_raddr_seg); 335219820Sjeff /* 336219820Sjeff@@ -631,6 +635,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap, 337219820Sjeff 338219820Sjeff case IBV_QPT_UC: 339219820Sjeff case IBV_QPT_RC: 340219820Sjeff+ case IBV_QPT_XRC: 341219820Sjeff wqe_size -= sizeof (struct mlx4_wqe_raddr_seg); 342219820Sjeff break; 343219820Sjeff 344219820Sjeffdiff --git a/src/srq.c b/src/srq.c 345219820Sjeffindex ba2ceb9..1350792 100644 346219820Sjeff--- a/src/srq.c 347219820Sjeff+++ b/src/srq.c 348219820Sjeff@@ -167,3 +167,53 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, 349219820Sjeff 350219820Sjeff return 0; 351219820Sjeff } 352219820Sjeff+ 353219820Sjeff+struct mlx4_srq *mlx4_find_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn) 354219820Sjeff+{ 355219820Sjeff+ int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift; 356219820Sjeff+ 357219820Sjeff+ if (ctx->xrc_srq_table[tind].refcnt) 358219820Sjeff+ return ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask]; 359219820Sjeff+ else 360219820Sjeff+ return NULL; 361219820Sjeff+} 362219820Sjeff+ 363219820Sjeff+int mlx4_store_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn, 364219820Sjeff+ struct mlx4_srq *srq) 365219820Sjeff+{ 366219820Sjeff+ int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift; 367219820Sjeff+ int ret = 0; 368219820Sjeff+ 369219820Sjeff+ pthread_mutex_lock(&ctx->xrc_srq_table_mutex); 370219820Sjeff+ 371219820Sjeff+ if (!ctx->xrc_srq_table[tind].refcnt) { 372219820Sjeff+ ctx->xrc_srq_table[tind].table = calloc(ctx->xrc_srq_table_mask + 1, 373219820Sjeff+ sizeof(struct mlx4_srq *)); 374219820Sjeff+ if (!ctx->xrc_srq_table[tind].table) { 375219820Sjeff+ ret = -1; 376219820Sjeff+ goto out; 377219820Sjeff+ } 378219820Sjeff+ } 379219820Sjeff+ 380219820Sjeff+ ++ctx->xrc_srq_table[tind].refcnt; 381219820Sjeff+ ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = srq; 382219820Sjeff+ 383219820Sjeff+out: 384219820Sjeff+ pthread_mutex_unlock(&ctx->xrc_srq_table_mutex); 385219820Sjeff+ return ret; 386219820Sjeff+} 387219820Sjeff+ 388219820Sjeff+void mlx4_clear_xrc_srq(struct mlx4_context *ctx, uint32_t xrc_srqn) 389219820Sjeff+{ 390219820Sjeff+ int tind = (xrc_srqn & (ctx->num_xrc_srqs - 1)) >> ctx->xrc_srq_table_shift; 391219820Sjeff+ 392219820Sjeff+ pthread_mutex_lock(&ctx->xrc_srq_table_mutex); 393219820Sjeff+ 394219820Sjeff+ if (!--ctx->xrc_srq_table[tind].refcnt) 395219820Sjeff+ free(ctx->xrc_srq_table[tind].table); 396219820Sjeff+ else 397219820Sjeff+ ctx->xrc_srq_table[tind].table[xrc_srqn & ctx->xrc_srq_table_mask] = NULL; 398219820Sjeff+ 399219820Sjeff+ pthread_mutex_unlock(&ctx->xrc_srq_table_mutex); 400219820Sjeff+} 401219820Sjeff+ 402219820Sjeffdiff --git a/src/verbs.c b/src/verbs.c 403219820Sjeffindex 400050c..b7c9c8e 100644 404219820Sjeff--- a/src/verbs.c 405219820Sjeff+++ b/src/verbs.c 406219820Sjeff@@ -368,18 +368,36 @@ int mlx4_query_srq(struct ibv_srq *srq, 407219820Sjeff return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); 408219820Sjeff } 409219820Sjeff 410219820Sjeff-int mlx4_destroy_srq(struct ibv_srq *srq) 411219820Sjeff+int mlx4_destroy_srq(struct ibv_srq *ibsrq) 412219820Sjeff { 413219820Sjeff+ struct mlx4_srq *srq = to_msrq(ibsrq); 414219820Sjeff+ struct mlx4_cq *mcq = NULL; 415219820Sjeff int ret; 416219820Sjeff 417219820Sjeff- ret = ibv_cmd_destroy_srq(srq); 418219820Sjeff- if (ret) 419219820Sjeff+ if (ibsrq->xrc_cq) { 420219820Sjeff+ /* is an xrc_srq */ 421219820Sjeff+ mcq = to_mcq(ibsrq->xrc_cq); 422219820Sjeff+ mlx4_cq_clean(mcq, 0, srq); 423219820Sjeff+ pthread_spin_lock(&mcq->lock); 424219820Sjeff+ mlx4_clear_xrc_srq(to_mctx(ibsrq->context), srq->srqn); 425219820Sjeff+ pthread_spin_unlock(&mcq->lock); 426219820Sjeff+ } 427219820Sjeff+ 428219820Sjeff+ ret = ibv_cmd_destroy_srq(ibsrq); 429219820Sjeff+ if (ret) { 430219820Sjeff+ if (ibsrq->xrc_cq) { 431219820Sjeff+ pthread_spin_lock(&mcq->lock); 432219820Sjeff+ mlx4_store_xrc_srq(to_mctx(ibsrq->context), 433219820Sjeff+ srq->srqn, srq); 434219820Sjeff+ pthread_spin_unlock(&mcq->lock); 435219820Sjeff+ } 436219820Sjeff return ret; 437219820Sjeff+ } 438219820Sjeff 439219820Sjeff- mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db); 440219820Sjeff- mlx4_free_buf(&to_msrq(srq)->buf); 441219820Sjeff- free(to_msrq(srq)->wrid); 442219820Sjeff- free(to_msrq(srq)); 443219820Sjeff+ mlx4_free_db(to_mctx(ibsrq->context), MLX4_DB_TYPE_RQ, srq->db); 444219820Sjeff+ mlx4_free_buf(&srq->buf); 445219820Sjeff+ free(srq->wrid); 446219820Sjeff+ free(srq); 447219820Sjeff 448219820Sjeff return 0; 449219820Sjeff } 450219820Sjeff@@ -415,7 +433,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) 451219820Sjeff qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes); 452219820Sjeff qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr); 453219820Sjeff 454219820Sjeff- if (attr->srq) 455219820Sjeff+ if (attr->srq || attr->qp_type == IBV_QPT_XRC) 456219820Sjeff attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0; 457219820Sjeff else { 458219820Sjeff if (attr->cap.max_recv_sge < 1) 459219820Sjeff@@ -433,7 +451,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) 460219820Sjeff pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE)) 461219820Sjeff goto err_free; 462219820Sjeff 463219820Sjeff- if (!attr->srq) { 464219820Sjeff+ if (!attr->srq && attr->qp_type != IBV_QPT_XRC) { 465219820Sjeff qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); 466219820Sjeff if (!qp->db) 467219820Sjeff goto err_free; 468219820Sjeff@@ -442,7 +460,7 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) 469219820Sjeff } 470219820Sjeff 471219820Sjeff cmd.buf_addr = (uintptr_t) qp->buf.buf; 472219820Sjeff- if (attr->srq) 473219820Sjeff+ if (attr->srq || attr->qp_type == IBV_QPT_XRC) 474219820Sjeff cmd.db_addr = 0; 475219820Sjeff else 476219820Sjeff cmd.db_addr = (uintptr_t) qp->db; 477219820Sjeff@@ -485,7 +503,7 @@ err_destroy: 478219820Sjeff 479219820Sjeff err_rq_db: 480219820Sjeff pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex); 481219820Sjeff- if (!attr->srq) 482219820Sjeff+ if (!attr->srq && attr->qp_type != IBV_QPT_XRC) 483219820Sjeff mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db); 484219820Sjeff 485219820Sjeff err_free: 486219820Sjeff@@ -544,7 +562,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 487219820Sjeff mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); 488219820Sjeff 489219820Sjeff mlx4_init_qp_indices(to_mqp(qp)); 490219820Sjeff- if (!qp->srq) 491219820Sjeff+ if (!qp->srq && qp->qp_type != IBV_QPT_XRC) 492219820Sjeff *to_mqp(qp)->db = 0; 493219820Sjeff } 494219820Sjeff 495219820Sjeff@@ -603,7 +621,7 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp) 496219820Sjeff mlx4_unlock_cqs(ibqp); 497219820Sjeff pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex); 498219820Sjeff 499219820Sjeff- if (!ibqp->srq) 500219820Sjeff+ if (!ibqp->srq && ibqp->qp_type != IBV_QPT_XRC) 501219820Sjeff mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db); 502219820Sjeff free(qp->sq.wrid); 503219820Sjeff if (qp->rq.wqe_cnt) 504219820Sjeff@@ -661,3 +679,103 @@ int mlx4_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) 505219820Sjeff 506219820Sjeff return 0; 507219820Sjeff } 508219820Sjeff+ 509219820Sjeff+#ifdef HAVE_IBV_XRC_OPS 510219820Sjeff+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_pd *pd, 511219820Sjeff+ struct ibv_xrc_domain *xrc_domain, 512219820Sjeff+ struct ibv_cq *xrc_cq, 513219820Sjeff+ struct ibv_srq_init_attr *attr) 514219820Sjeff+{ 515219820Sjeff+ struct mlx4_create_xrc_srq cmd; 516219820Sjeff+ struct mlx4_create_srq_resp resp; 517219820Sjeff+ struct mlx4_srq *srq; 518219820Sjeff+ int ret; 519219820Sjeff+ 520219820Sjeff+ /* Sanity check SRQ size before proceeding */ 521219820Sjeff+ if (attr->attr.max_wr > 1 << 16 || attr->attr.max_sge > 64) 522219820Sjeff+ return NULL; 523219820Sjeff+ 524219820Sjeff+ srq = malloc(sizeof *srq); 525219820Sjeff+ if (!srq) 526219820Sjeff+ return NULL; 527219820Sjeff+ 528219820Sjeff+ if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE)) 529219820Sjeff+ goto err; 530219820Sjeff+ 531219820Sjeff+ srq->max = align_queue_size(attr->attr.max_wr + 1); 532219820Sjeff+ srq->max_gs = attr->attr.max_sge; 533219820Sjeff+ srq->counter = 0; 534219820Sjeff+ 535219820Sjeff+ if (mlx4_alloc_srq_buf(pd, &attr->attr, srq)) 536219820Sjeff+ goto err; 537219820Sjeff+ 538219820Sjeff+ srq->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ); 539219820Sjeff+ if (!srq->db) 540219820Sjeff+ goto err_free; 541219820Sjeff+ 542219820Sjeff+ *srq->db = 0; 543219820Sjeff+ 544219820Sjeff+ cmd.buf_addr = (uintptr_t) srq->buf.buf; 545219820Sjeff+ cmd.db_addr = (uintptr_t) srq->db; 546219820Sjeff+ 547219820Sjeff+ ret = ibv_cmd_create_xrc_srq(pd, &srq->ibv_srq, attr, 548219820Sjeff+ xrc_domain->handle, 549219820Sjeff+ xrc_cq->handle, 550219820Sjeff+ &cmd.ibv_cmd, sizeof cmd, 551219820Sjeff+ &resp.ibv_resp, sizeof resp); 552219820Sjeff+ if (ret) 553219820Sjeff+ goto err_db; 554219820Sjeff+ 555219820Sjeff+ srq->ibv_srq.xrc_srq_num = srq->srqn = resp.srqn; 556219820Sjeff+ 557219820Sjeff+ ret = mlx4_store_xrc_srq(to_mctx(pd->context), srq->ibv_srq.xrc_srq_num, srq); 558219820Sjeff+ if (ret) 559219820Sjeff+ goto err_destroy; 560219820Sjeff+ 561219820Sjeff+ return &srq->ibv_srq; 562219820Sjeff+ 563219820Sjeff+err_destroy: 564219820Sjeff+ ibv_cmd_destroy_srq(&srq->ibv_srq); 565219820Sjeff+ 566219820Sjeff+err_db: 567219820Sjeff+ mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db); 568219820Sjeff+ 569219820Sjeff+err_free: 570219820Sjeff+ free(srq->wrid); 571219820Sjeff+ mlx4_free_buf(&srq->buf); 572219820Sjeff+ 573219820Sjeff+err: 574219820Sjeff+ free(srq); 575219820Sjeff+ 576219820Sjeff+ return NULL; 577219820Sjeff+} 578219820Sjeff+ 579219820Sjeff+struct ibv_xrc_domain *mlx4_open_xrc_domain(struct ibv_context *context, 580219820Sjeff+ int fd, int oflag) 581219820Sjeff+{ 582219820Sjeff+ int ret; 583219820Sjeff+ struct mlx4_open_xrc_domain_resp resp; 584219820Sjeff+ struct mlx4_xrc_domain *xrcd; 585219820Sjeff+ 586219820Sjeff+ xrcd = malloc(sizeof *xrcd); 587219820Sjeff+ if (!xrcd) 588219820Sjeff+ return NULL; 589219820Sjeff+ 590219820Sjeff+ ret = ibv_cmd_open_xrc_domain(context, fd, oflag, &xrcd->ibv_xrcd, 591219820Sjeff+ &resp.ibv_resp, sizeof resp); 592219820Sjeff+ if (ret) { 593219820Sjeff+ free(xrcd); 594219820Sjeff+ return NULL; 595219820Sjeff+ } 596219820Sjeff+ 597219820Sjeff+ xrcd->xrcdn = resp.xrcdn; 598219820Sjeff+ return &xrcd->ibv_xrcd; 599219820Sjeff+} 600219820Sjeff+ 601219820Sjeff+int mlx4_close_xrc_domain(struct ibv_xrc_domain *d) 602219820Sjeff+{ 603219820Sjeff+ ibv_cmd_close_xrc_domain(d); 604219820Sjeff+ free(d); 605219820Sjeff+ return 0; 606219820Sjeff+} 607219820Sjeff+#endif 608219820Sjeffdiff --git a/src/wqe.h b/src/wqe.h 609219820Sjeffindex 6f7f309..fa2f8ac 100644 610219820Sjeff--- a/src/wqe.h 611219820Sjeff+++ b/src/wqe.h 612219820Sjeff@@ -65,7 +65,7 @@ struct mlx4_wqe_ctrl_seg { 613219820Sjeff * [1] SE (solicited event) 614219820Sjeff * [0] FL (force loopback) 615219820Sjeff */ 616219820Sjeff- uint32_t srcrb_flags; 617219820Sjeff+ uint32_t xrcrb_flags; 618219820Sjeff /* 619219820Sjeff * imm is immediate data for send/RDMA write w/ immediate; 620219820Sjeff * also invalidation key for send with invalidate; input 621