XRC_base_implementation.patch revision 219820
1This patch includes the following commits from OFED 1.3 libibverbs: 2Implement eXtended Reliable Connections (a7df4af8eb84738f36db4161a4272fa02fc6741e) 3Re-define IBV_DEVICE_XRC to conform to its new position (5042a9cab0ae2f7ad61bdf88dfed6fb10b700797) 4Set "is_srq" flag only when the QP has an SRQ (6f6d29e74ca0c19a8821990aad603e3c575b7f4d) 5for XRC QPs, return xrc_domain in ibv_query_qp (018c44a44ff0344dfe7cf5f6598f81d81769164e) 6 7V2: 81. checkpatch.pl cleanups 92. fixed u64 alignment problems in uverbs.h ABI structs 103. eliminated unnecessary default_symvers 114. modified xrc_ops to more_ops 12 13Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> 14 include/infiniband/driver.h | 11 +++++ 15 include/infiniband/kern-abi.h | 47 ++++++++++++++++++++++- 16 include/infiniband/verbs.h | 85 ++++++++++++++++++++++++++++++++++++++++- 17 src/cmd.c | 72 ++++++++++++++++++++++++++++++++++- 18 src/libibverbs.map | 6 +++ 19 src/verbs.c | 54 ++++++++++++++++++++++++++ 20 6 files changed, 271 insertions(+), 4 deletions(-) 21 22Index: libibverbs/include/infiniband/driver.h 23=================================================================== 24--- libibverbs.orig/include/infiniband/driver.h 2009-11-01 15:18:17.920111000 +0200 25+++ libibverbs/include/infiniband/driver.h 2009-11-01 15:18:20.624171000 +0200 26@@ -99,6 +99,11 @@ int ibv_cmd_create_srq(struct ibv_pd *pd 27 struct ibv_srq *srq, struct ibv_srq_init_attr *attr, 28 struct ibv_create_srq *cmd, size_t cmd_size, 29 struct ibv_create_srq_resp *resp, size_t resp_size); 30+int ibv_cmd_create_xrc_srq(struct ibv_pd *pd, 31+ struct ibv_srq *srq, struct ibv_srq_init_attr *attr, 32+ uint32_t xrc_domain, uint32_t xrc_cq, 33+ struct ibv_create_xrc_srq *cmd, size_t cmd_size, 34+ struct ibv_create_srq_resp *resp, size_t resp_size); 35 int ibv_cmd_modify_srq(struct ibv_srq *srq, 36 struct ibv_srq_attr *srq_attr, 37 int srq_attr_mask, 38@@ -134,6 +139,12 @@ int ibv_cmd_detach_mcast(struct ibv_qp * 39 40 int ibv_dontfork_range(void *base, size_t size); 41 int ibv_dofork_range(void *base, size_t size); 42+int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag, 43+ struct ibv_xrc_domain *d, 44+ struct ibv_open_xrc_domain_resp *resp, 45+ size_t resp_size); 46+int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d); 47+ 48 49 /* 50 * sysfs helper functions 51Index: libibverbs/include/infiniband/kern-abi.h 52=================================================================== 53--- libibverbs.orig/include/infiniband/kern-abi.h 2009-11-01 15:18:17.921121000 +0200 54+++ libibverbs/include/infiniband/kern-abi.h 2009-11-01 15:18:20.629168000 +0200 55@@ -85,7 +85,10 @@ enum { 56 IB_USER_VERBS_CMD_MODIFY_SRQ, 57 IB_USER_VERBS_CMD_QUERY_SRQ, 58 IB_USER_VERBS_CMD_DESTROY_SRQ, 59- IB_USER_VERBS_CMD_POST_SRQ_RECV 60+ IB_USER_VERBS_CMD_POST_SRQ_RECV, 61+ IB_USER_VERBS_CMD_CREATE_XRC_SRQ, 62+ IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN, 63+ IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN 64 }; 65 66 /* 67@@ -706,6 +709,21 @@ struct ibv_create_srq { 68 __u64 driver_data[0]; 69 }; 70 71+struct ibv_create_xrc_srq { 72+ __u32 command; 73+ __u16 in_words; 74+ __u16 out_words; 75+ __u64 response; 76+ __u64 user_handle; 77+ __u32 pd_handle; 78+ __u32 max_wr; 79+ __u32 max_sge; 80+ __u32 srq_limit; 81+ __u32 xrcd_handle; 82+ __u32 xrc_cq; 83+ __u64 driver_data[0]; 84+}; 85+ 86 struct ibv_create_srq_resp { 87 __u32 srq_handle; 88 __u32 max_wr; 89@@ -754,6 +772,30 @@ struct ibv_destroy_srq_resp { 90 __u32 events_reported; 91 }; 92 93+struct ibv_open_xrc_domain { 94+ __u32 command; 95+ __u16 in_words; 96+ __u16 out_words; 97+ __u64 response; 98+ __u32 fd; 99+ __u32 oflags; 100+ __u64 driver_data[0]; 101+}; 102+ 103+struct ibv_open_xrc_domain_resp { 104+ __u32 xrcd_handle; 105+}; 106+ 107+struct ibv_close_xrc_domain { 108+ __u32 command; 109+ __u16 in_words; 110+ __u16 out_words; 111+ __u64 response; 112+ __u32 xrcd_handle; 113+ __u32 reserved; 114+ __u64 driver_data[0]; 115+}; 116+ 117 /* 118 * Compatibility with older ABI versions 119 */ 120@@ -803,6 +845,9 @@ enum { 121 * trick opcodes in IBV_INIT_CMD() doesn't break. 122 */ 123 IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL_V2 = -1, 124+ IB_USER_VERBS_CMD_CREATE_XRC_SRQ_V2 = -1, 125+ IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN_V2 = -1, 126+ IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN_V2 = -1, 127 }; 128 129 struct ibv_destroy_cq_v1 { 130Index: libibverbs/include/infiniband/verbs.h 131=================================================================== 132--- libibverbs.orig/include/infiniband/verbs.h 2009-11-01 15:18:17.924118000 +0200 133+++ libibverbs/include/infiniband/verbs.h 2009-11-01 15:18:20.635171000 +0200 134@@ -92,7 +92,8 @@ enum ibv_device_cap_flags { 135 IBV_DEVICE_SYS_IMAGE_GUID = 1 << 11, 136 IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12, 137 IBV_DEVICE_SRQ_RESIZE = 1 << 13, 138- IBV_DEVICE_N_NOTIFY_CQ = 1 << 14 139+ IBV_DEVICE_N_NOTIFY_CQ = 1 << 14, 140+ IBV_DEVICE_XRC = 1 << 20 141 }; 142 143 enum ibv_atomic_cap { 144@@ -371,6 +372,11 @@ struct ibv_ah_attr { 145 uint8_t port_num; 146 }; 147 148+struct ibv_xrc_domain { 149+ struct ibv_context *context; 150+ uint32_t handle; 151+}; 152+ 153 enum ibv_srq_attr_mask { 154 IBV_SRQ_MAX_WR = 1 << 0, 155 IBV_SRQ_LIMIT = 1 << 1 156@@ -390,7 +396,8 @@ struct ibv_srq_init_attr { 157 enum ibv_qp_type { 158 IBV_QPT_RC = 2, 159 IBV_QPT_UC, 160- IBV_QPT_UD 161+ IBV_QPT_UD, 162+ IBV_QPT_XRC 163 }; 164 165 struct ibv_qp_cap { 166@@ -409,6 +416,7 @@ struct ibv_qp_init_attr { 167 struct ibv_qp_cap cap; 168 enum ibv_qp_type qp_type; 169 int sq_sig_all; 170+ struct ibv_xrc_domain *xrc_domain; 171 }; 172 173 enum ibv_qp_attr_mask { 174@@ -527,6 +535,7 @@ struct ibv_send_wr { 175 uint32_t remote_qkey; 176 } ud; 177 } wr; 178+ uint32_t xrc_remote_srq_num; 179 }; 180 181 struct ibv_recv_wr { 182@@ -554,6 +563,10 @@ struct ibv_srq { 183 pthread_mutex_t mutex; 184 pthread_cond_t cond; 185 uint32_t events_completed; 186+ 187+ uint32_t xrc_srq_num; 188+ struct ibv_xrc_domain *xrc_domain; 189+ struct ibv_cq *xrc_cq; 190 }; 191 192 struct ibv_qp { 193@@ -571,6 +584,8 @@ struct ibv_qp { 194 pthread_mutex_t mutex; 195 pthread_cond_t cond; 196 uint32_t events_completed; 197+ 198+ struct ibv_xrc_domain *xrc_domain; 199 }; 200 201 struct ibv_comp_channel { 202@@ -625,6 +640,16 @@ struct ibv_device { 203 char ibdev_path[IBV_SYSFS_PATH_MAX]; 204 }; 205 206+struct ibv_more_ops { 207+ struct ibv_srq * (*create_xrc_srq)(struct ibv_pd *pd, 208+ struct ibv_xrc_domain *xrc_domain, 209+ struct ibv_cq *xrc_cq, 210+ struct ibv_srq_init_attr *srq_init_attr); 211+ struct ibv_xrc_domain * (*open_xrc_domain)(struct ibv_context *context, 212+ int fd, int oflag); 213+ int (*close_xrc_domain)(struct ibv_xrc_domain *d); 214+}; 215+ 216 struct ibv_context_ops { 217 int (*query_device)(struct ibv_context *context, 218 struct ibv_device_attr *device_attr); 219@@ -691,6 +716,7 @@ struct ibv_context { 220 int num_comp_vectors; 221 pthread_mutex_t mutex; 222 void *abi_compat; 223+ struct ibv_more_ops *more_ops; 224 }; 225 226 /** 227@@ -913,6 +939,25 @@ struct ibv_srq *ibv_create_srq(struct ib 228 struct ibv_srq_init_attr *srq_init_attr); 229 230 /** 231+ * ibv_create_xrc_srq - Creates a SRQ associated with the specified protection 232+ * domain and xrc domain. 233+ * @pd: The protection domain associated with the SRQ. 234+ * @xrc_domain: The XRC domain associated with the SRQ. 235+ * @xrc_cq: CQ to report completions for XRC packets on. 236+ * 237+ * @srq_init_attr: A list of initial attributes required to create the SRQ. 238+ * 239+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the 240+ * requested size of the SRQ, and set to the actual values allocated 241+ * on return. If ibv_create_srq() succeeds, then max_wr and max_sge 242+ * will always be at least as large as the requested values. 243+ */ 244+struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd, 245+ struct ibv_xrc_domain *xrc_domain, 246+ struct ibv_cq *xrc_cq, 247+ struct ibv_srq_init_attr *srq_init_attr); 248+ 249+/** 250 * ibv_modify_srq - Modifies the attributes for the specified SRQ. 251 * @srq: The SRQ to modify. 252 * @srq_attr: On input, specifies the SRQ attributes to modify. On output, 253@@ -1093,6 +1138,42 @@ const char *ibv_port_state_str(enum ibv_ 254 */ 255 const char *ibv_event_type_str(enum ibv_event_type event); 256 257+/** 258+ * ibv_open_xrc_domain - open an XRC domain 259+ * Returns a reference to an XRC domain. 260+ * 261+ * @context: Device context 262+ * @fd: descriptor for inode associated with the domain 263+ * If fd == -1, no inode is associated with the domain; in this ca= se, 264+ * the only legal value for oflag is O_CREAT 265+ * 266+ * @oflag: oflag values are constructed by OR-ing flags from the following list 267+ * 268+ * O_CREAT 269+ * If a domain belonging to device named by context is already associated 270+ * with the inode, this flag has no effect, except as noted under O_EXCL 271+ * below. Otherwise, a new XRC domain is created and is associated with 272+ * inode specified by fd. 273+ * 274+ * O_EXCL 275+ * If O_EXCL and O_CREAT are set, open will fail if a domain associated with 276+ * the inode exists. The check for the existence of the domain and creation 277+ * of the domain if it does not exist is atomic with respect to other 278+ * processes executing open with fd naming the same inode. 279+ */ 280+struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context, 281+ int fd, int oflag); 282+ 283+/** 284+ * ibv_close_xrc_domain - close an XRC domain 285+ * If this is the last reference, destroys the domain. 286+ * 287+ * @d: reference to XRC domain to close 288+ * 289+ * close is implicitly performed at process exit. 290+ */ 291+int ibv_close_xrc_domain(struct ibv_xrc_domain *d); 292+ 293 END_C_DECLS 294 295 # undef __attribute_const 296Index: libibverbs/src/cmd.c 297=================================================================== 298--- libibverbs.orig/src/cmd.c 2009-11-01 15:18:17.927111000 +0200 299+++ libibverbs/src/cmd.c 2009-11-01 15:18:20.643167000 +0200 300@@ -483,6 +483,34 @@ int ibv_cmd_create_srq(struct ibv_pd *pd 301 return 0; 302 } 303 304+int ibv_cmd_create_xrc_srq(struct ibv_pd *pd, 305+ struct ibv_srq *srq, struct ibv_srq_init_attr *attr, 306+ uint32_t xrcd_handle, uint32_t xrc_cq, 307+ struct ibv_create_xrc_srq *cmd, size_t cmd_size, 308+ struct ibv_create_srq_resp *resp, size_t resp_size) 309+{ 310+ IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XRC_SRQ, resp, resp_size); 311+ cmd->user_handle = (uintptr_t) srq; 312+ cmd->pd_handle = pd->handle; 313+ cmd->max_wr = attr->attr.max_wr; 314+ cmd->max_sge = attr->attr.max_sge; 315+ cmd->srq_limit = attr->attr.srq_limit; 316+ cmd->xrcd_handle = xrcd_handle; 317+ cmd->xrc_cq = xrc_cq; 318+ 319+ if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size) 320+ return errno; 321+ 322+ VALGRIND_MAKE_MEM_DEFINED(resp, resp_size); 323+ 324+ srq->handle = resp->srq_handle; 325+ srq->context = pd->context; 326+ attr->attr.max_wr = resp->max_wr; 327+ attr->attr.max_sge = resp->max_sge; 328+ 329+ return 0; 330+} 331+ 332 static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq, 333 struct ibv_srq_attr *srq_attr, 334 int srq_attr_mask, 335@@ -603,7 +631,6 @@ int ibv_cmd_create_qp(struct ibv_pd *pd, 336 cmd->pd_handle = pd->handle; 337 cmd->send_cq_handle = attr->send_cq->handle; 338 cmd->recv_cq_handle = attr->recv_cq->handle; 339- cmd->srq_handle = attr->srq ? attr->srq->handle : 0; 340 cmd->max_send_wr = attr->cap.max_send_wr; 341 cmd->max_recv_wr = attr->cap.max_recv_wr; 342 cmd->max_send_sge = attr->cap.max_send_sge; 343@@ -612,6 +639,9 @@ int ibv_cmd_create_qp(struct ibv_pd *pd, 344 cmd->sq_sig_all = attr->sq_sig_all; 345 cmd->qp_type = attr->qp_type; 346 cmd->is_srq = !!attr->srq; 347+ cmd->srq_handle = attr->qp_type == IBV_QPT_XRC ? 348+ (attr->xrc_domain ? attr->xrc_domain->handle : 0) : 349+ (attr->srq ? attr->srq->handle : 0); 350 cmd->reserved = 0; 351 352 if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size) 353@@ -722,6 +752,8 @@ int ibv_cmd_query_qp(struct ibv_qp *qp, 354 init_attr->recv_cq = qp->recv_cq; 355 init_attr->srq = qp->srq; 356 init_attr->qp_type = qp->qp_type; 357+ if (qp->qp_type == IBV_QPT_XRC) 358+ init_attr->xrc_domain = qp->xrc_domain; 359 init_attr->cap.max_send_wr = resp.max_send_wr; 360 init_attr->cap.max_recv_wr = resp.max_recv_wr; 361 init_attr->cap.max_send_sge = resp.max_send_sge; 362@@ -1122,3 +1154,41 @@ int ibv_cmd_detach_mcast(struct ibv_qp * 363 364 return 0; 365 } 366+ 367+int ibv_cmd_open_xrc_domain(struct ibv_context *context, int fd, int oflag, 368+ struct ibv_xrc_domain *d, 369+ struct ibv_open_xrc_domain_resp *resp, 370+ size_t resp_size) 371+{ 372+ struct ibv_open_xrc_domain cmd; 373+ 374+ if (abi_ver < 6) 375+ return ENOSYS; 376+ 377+ IBV_INIT_CMD_RESP(&cmd, sizeof cmd, OPEN_XRC_DOMAIN, resp, resp_size); 378+ cmd.fd = fd; 379+ cmd.oflags = oflag; 380+ 381+ if (write(context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) 382+ return errno; 383+ 384+ d->handle = resp->xrcd_handle; 385+ 386+ return 0; 387+} 388+ 389+int ibv_cmd_close_xrc_domain(struct ibv_xrc_domain *d) 390+{ 391+ struct ibv_close_xrc_domain cmd; 392+ 393+ if (abi_ver < 6) 394+ return ENOSYS; 395+ 396+ IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRC_DOMAIN); 397+ cmd.xrcd_handle = d->handle; 398+ 399+ if (write(d->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) 400+ return errno; 401+ return 0; 402+} 403+ 404Index: libibverbs/src/libibverbs.map 405=================================================================== 406--- libibverbs.orig/src/libibverbs.map 2009-11-01 15:18:17.928115000 +0200 407+++ libibverbs/src/libibverbs.map 2009-11-01 15:18:20.646169000 +0200 408@@ -91,6 +91,12 @@ IBVERBS_1.1 { 409 ibv_dontfork_range; 410 ibv_dofork_range; 411 ibv_register_driver; 412+ ibv_create_xrc_srq; 413+ ibv_cmd_create_xrc_srq; 414+ ibv_open_xrc_domain; 415+ ibv_cmd_open_xrc_domain; 416+ ibv_close_xrc_domain; 417+ ibv_cmd_close_xrc_domain; 418 419 ibv_node_type_str; 420 ibv_port_state_str; 421Index: libibverbs/src/verbs.c 422=================================================================== 423--- libibverbs.orig/src/verbs.c 2009-11-01 15:18:17.931119000 +0200 424+++ libibverbs/src/verbs.c 2009-11-01 15:18:20.650169000 +0200 425@@ -366,6 +366,9 @@ struct ibv_srq *__ibv_create_srq(struct 426 srq->context = pd->context; 427 srq->srq_context = srq_init_attr->srq_context; 428 srq->pd = pd; 429+ srq->xrc_domain = NULL; 430+ srq->xrc_cq = NULL; 431+ srq->xrc_srq_num = 0; 432 srq->events_completed = 0; 433 pthread_mutex_init(&srq->mutex, NULL); 434 pthread_cond_init(&srq->cond, NULL); 435@@ -375,6 +378,32 @@ struct ibv_srq *__ibv_create_srq(struct 436 } 437 default_symver(__ibv_create_srq, ibv_create_srq); 438 439+struct ibv_srq *ibv_create_xrc_srq(struct ibv_pd *pd, 440+ struct ibv_xrc_domain *xrc_domain, 441+ struct ibv_cq *xrc_cq, 442+ struct ibv_srq_init_attr *srq_init_attr) 443+{ 444+ struct ibv_srq *srq; 445+ 446+ if (!pd->context->more_ops) 447+ return NULL; 448+ 449+ srq = pd->context->more_ops->create_xrc_srq(pd, xrc_domain, 450+ xrc_cq, srq_init_attr); 451+ if (srq) { 452+ srq->context = pd->context; 453+ srq->srq_context = srq_init_attr->srq_context; 454+ srq->pd = pd; 455+ srq->xrc_domain = xrc_domain; 456+ srq->xrc_cq = xrc_cq; 457+ srq->events_completed = 0; 458+ pthread_mutex_init(&srq->mutex, NULL); 459+ pthread_cond_init(&srq->cond, NULL); 460+ } 461+ 462+ return srq; 463+} 464+ 465 int __ibv_modify_srq(struct ibv_srq *srq, 466 struct ibv_srq_attr *srq_attr, 467 int srq_attr_mask) 468@@ -410,6 +439,8 @@ struct ibv_qp *__ibv_create_qp(struct ib 469 qp->qp_type = qp_init_attr->qp_type; 470 qp->state = IBV_QPS_RESET; 471 qp->events_completed = 0; 472+ qp->xrc_domain = qp_init_attr->qp_type == IBV_QPT_XRC ? 473+ qp_init_attr->xrc_domain : NULL; 474 pthread_mutex_init(&qp->mutex, NULL); 475 pthread_cond_init(&qp->cond, NULL); 476 } 477@@ -543,3 +574,26 @@ int __ibv_detach_mcast(struct ibv_qp *qp 478 return qp->context->ops.detach_mcast(qp, gid, lid); 479 } 480 default_symver(__ibv_detach_mcast, ibv_detach_mcast); 481+ 482+struct ibv_xrc_domain *ibv_open_xrc_domain(struct ibv_context *context, 483+ int fd, int oflag) 484+{ 485+ struct ibv_xrc_domain *d; 486+ 487+ if (!context->more_ops) 488+ return NULL; 489+ 490+ d = context->more_ops->open_xrc_domain(context, fd, oflag); 491+ if (d) 492+ d->context = context; 493+ 494+ return d; 495+} 496+ 497+int ibv_close_xrc_domain(struct ibv_xrc_domain *d) 498+{ 499+ if (!d->context->more_ops) 500+ return 0; 501+ 502+ return d->context->more_ops->close_xrc_domain(d); 503+} 504