1321936Shselasky/* 2321936Shselasky * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3321936Shselasky * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 4321936Shselasky * 5321936Shselasky * This software is available to you under a choice of one of two 6321936Shselasky * licenses. You may choose to be licensed under the terms of the GNU 7321936Shselasky * General Public License (GPL) Version 2, available from the file 8321936Shselasky * COPYING in the main directory of this source tree, or the 9321936Shselasky * OpenIB.org BSD license below: 10321936Shselasky * 11321936Shselasky * Redistribution and use in source and binary forms, with or 12321936Shselasky * without modification, are permitted provided that the following 13321936Shselasky * conditions are met: 14321936Shselasky * 15321936Shselasky * - Redistributions of source code must retain the above 16321936Shselasky * copyright notice, this list of conditions and the following 17321936Shselasky * disclaimer. 18321936Shselasky * 19321936Shselasky * - Redistributions in binary form must reproduce the above 20321936Shselasky * copyright notice, this list of conditions and the following 21321936Shselasky * disclaimer in the documentation and/or other materials 22321936Shselasky * provided with the distribution. 23321936Shselasky * 24321936Shselasky * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25321936Shselasky * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26321936Shselasky * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27321936Shselasky * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28321936Shselasky * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29321936Shselasky * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30321936Shselasky * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31321936Shselasky * SOFTWARE. 32321936Shselasky */ 33321936Shselasky#define _GNU_SOURCE 34321936Shselasky#include <config.h> 35321936Shselasky 36321936Shselasky#include <infiniband/endian.h> 37321936Shselasky#include <stdio.h> 38321936Shselasky#include <sys/types.h> 39321936Shselasky#include <sys/stat.h> 40321936Shselasky#include <fcntl.h> 41321936Shselasky#include <unistd.h> 42321936Shselasky#include <stdlib.h> 43321936Shselasky#include <alloca.h> 44321936Shselasky#include <errno.h> 45321936Shselasky 46321936Shselasky#include "ibverbs.h" 47321936Shselasky 48321936Shselasky/* Hack to avoid GCC's -Wmissing-prototypes and the similar error from sparse 49321936Shselasky with these prototypes. Symbol versionining requires the goofy names, the 50321936Shselasky prototype must match the version in verbs.h. 51321936Shselasky */ 52321936Shselaskystruct ibv_device **__ibv_get_device_list(int *num_devices); 53321936Shselaskyvoid __ibv_free_device_list(struct ibv_device **list); 54321936Shselaskyconst char *__ibv_get_device_name(struct ibv_device *device); 55321936Shselasky__be64 __ibv_get_device_guid(struct ibv_device *device); 56321936Shselaskystruct ibv_context *__ibv_open_device(struct ibv_device *device); 57321936Shselaskyint __ibv_close_device(struct ibv_context *context); 58321936Shselaskyint __ibv_get_async_event(struct ibv_context *context, 59321936Shselasky struct ibv_async_event *event); 60321936Shselaskyvoid __ibv_ack_async_event(struct ibv_async_event *event); 61321936Shselasky 62321936Shselaskystatic pthread_once_t device_list_once = PTHREAD_ONCE_INIT; 63321936Shselaskystatic int num_devices; 64321936Shselaskystatic struct ibv_device **device_list; 65321936Shselasky 66321936Shselaskystatic void count_devices(void) 67321936Shselasky{ 68321936Shselasky num_devices = ibverbs_init(&device_list); 69321936Shselasky} 70321936Shselasky 71321936Shselaskystruct ibv_device **__ibv_get_device_list(int *num) 72321936Shselasky{ 73321936Shselasky struct ibv_device **l; 74321936Shselasky int i; 75321936Shselasky 76321936Shselasky if (num) 77321936Shselasky *num = 0; 78321936Shselasky 79321936Shselasky pthread_once(&device_list_once, count_devices); 80321936Shselasky 81321936Shselasky if (num_devices < 0) { 82321936Shselasky errno = -num_devices; 83321936Shselasky return NULL; 84321936Shselasky } 85321936Shselasky 86321936Shselasky l = calloc(num_devices + 1, sizeof (struct ibv_device *)); 87321936Shselasky if (!l) { 88321936Shselasky errno = ENOMEM; 89321936Shselasky return NULL; 90321936Shselasky } 91321936Shselasky 92321936Shselasky for (i = 0; i < num_devices; ++i) 93321936Shselasky l[i] = device_list[i]; 94321936Shselasky if (num) 95321936Shselasky *num = num_devices; 96321936Shselasky 97321936Shselasky return l; 98321936Shselasky} 99321936Shselaskydefault_symver(__ibv_get_device_list, ibv_get_device_list); 100321936Shselasky 101321936Shselaskyvoid __ibv_free_device_list(struct ibv_device **list) 102321936Shselasky{ 103321936Shselasky free(list); 104321936Shselasky} 105321936Shselaskydefault_symver(__ibv_free_device_list, ibv_free_device_list); 106321936Shselasky 107321936Shselaskyconst char *__ibv_get_device_name(struct ibv_device *device) 108321936Shselasky{ 109321936Shselasky return device->name; 110321936Shselasky} 111321936Shselaskydefault_symver(__ibv_get_device_name, ibv_get_device_name); 112321936Shselasky 113321936Shselasky__be64 __ibv_get_device_guid(struct ibv_device *device) 114321936Shselasky{ 115321936Shselasky char attr[24]; 116321936Shselasky uint64_t guid = 0; 117321936Shselasky uint16_t parts[4]; 118321936Shselasky int i; 119321936Shselasky 120321936Shselasky if (ibv_read_sysfs_file(device->ibdev_path, "node_guid", 121321936Shselasky attr, sizeof attr) < 0) 122321936Shselasky return 0; 123321936Shselasky 124321936Shselasky if (sscanf(attr, "%hx:%hx:%hx:%hx", 125321936Shselasky parts, parts + 1, parts + 2, parts + 3) != 4) 126321936Shselasky return 0; 127321936Shselasky 128321936Shselasky for (i = 0; i < 4; ++i) 129321936Shselasky guid = (guid << 16) | parts[i]; 130321936Shselasky 131321936Shselasky return htobe64(guid); 132321936Shselasky} 133321936Shselaskydefault_symver(__ibv_get_device_guid, ibv_get_device_guid); 134321936Shselasky 135321936Shselaskyvoid verbs_init_cq(struct ibv_cq *cq, struct ibv_context *context, 136321936Shselasky struct ibv_comp_channel *channel, 137321936Shselasky void *cq_context) 138321936Shselasky{ 139321936Shselasky cq->context = context; 140321936Shselasky cq->channel = channel; 141321936Shselasky 142321936Shselasky if (cq->channel) { 143321936Shselasky pthread_mutex_lock(&context->mutex); 144321936Shselasky ++cq->channel->refcnt; 145321936Shselasky pthread_mutex_unlock(&context->mutex); 146321936Shselasky } 147321936Shselasky 148321936Shselasky cq->cq_context = cq_context; 149321936Shselasky cq->comp_events_completed = 0; 150321936Shselasky cq->async_events_completed = 0; 151321936Shselasky pthread_mutex_init(&cq->mutex, NULL); 152321936Shselasky pthread_cond_init(&cq->cond, NULL); 153321936Shselasky} 154321936Shselasky 155321936Shselaskystatic struct ibv_cq_ex * 156321936Shselasky__lib_ibv_create_cq_ex(struct ibv_context *context, 157321936Shselasky struct ibv_cq_init_attr_ex *cq_attr) 158321936Shselasky{ 159321936Shselasky struct verbs_context *vctx = verbs_get_ctx(context); 160321936Shselasky struct ibv_cq_ex *cq; 161321936Shselasky 162321936Shselasky if (cq_attr->wc_flags & ~IBV_CREATE_CQ_SUP_WC_FLAGS) { 163321936Shselasky errno = EOPNOTSUPP; 164321936Shselasky return NULL; 165321936Shselasky } 166321936Shselasky 167321936Shselasky cq = vctx->priv->create_cq_ex(context, cq_attr); 168321936Shselasky 169321936Shselasky if (cq) 170321936Shselasky verbs_init_cq(ibv_cq_ex_to_cq(cq), context, 171321936Shselasky cq_attr->channel, cq_attr->cq_context); 172321936Shselasky 173321936Shselasky return cq; 174321936Shselasky} 175321936Shselasky 176321936Shselaskystruct ibv_context *__ibv_open_device(struct ibv_device *device) 177321936Shselasky{ 178321936Shselasky struct verbs_device *verbs_device = verbs_get_device(device); 179321936Shselasky char *devpath; 180321936Shselasky int cmd_fd, ret; 181321936Shselasky struct ibv_context *context; 182321936Shselasky struct verbs_context *context_ex; 183321936Shselasky 184321936Shselasky if (asprintf(&devpath, "/dev/%s", device->dev_name) < 0) 185321936Shselasky return NULL; 186321936Shselasky 187321936Shselasky /* 188321936Shselasky * We'll only be doing writes, but we need O_RDWR in case the 189321936Shselasky * provider needs to mmap() the file. 190321936Shselasky */ 191321936Shselasky cmd_fd = open(devpath, O_RDWR | O_CLOEXEC); 192321936Shselasky free(devpath); 193321936Shselasky 194321936Shselasky if (cmd_fd < 0) 195321936Shselasky return NULL; 196321936Shselasky 197321936Shselasky if (!verbs_device->ops->init_context) { 198321936Shselasky context = verbs_device->ops->alloc_context(device, cmd_fd); 199321936Shselasky if (!context) 200321936Shselasky goto err; 201321936Shselasky } else { 202321936Shselasky struct verbs_ex_private *priv; 203321936Shselasky 204321936Shselasky /* Library now allocates the context */ 205321936Shselasky context_ex = calloc(1, sizeof(*context_ex) + 206321936Shselasky verbs_device->size_of_context); 207321936Shselasky if (!context_ex) { 208321936Shselasky errno = ENOMEM; 209321936Shselasky goto err; 210321936Shselasky } 211321936Shselasky 212321936Shselasky priv = calloc(1, sizeof(*priv)); 213321936Shselasky if (!priv) { 214321936Shselasky errno = ENOMEM; 215321936Shselasky free(context_ex); 216321936Shselasky goto err; 217321936Shselasky } 218321936Shselasky 219321936Shselasky context_ex->priv = priv; 220321936Shselasky context_ex->context.abi_compat = __VERBS_ABI_IS_EXTENDED; 221321936Shselasky context_ex->sz = sizeof(*context_ex); 222321936Shselasky 223321936Shselasky context = &context_ex->context; 224321936Shselasky ret = verbs_device->ops->init_context(verbs_device, context, cmd_fd); 225321936Shselasky if (ret) 226321936Shselasky goto verbs_err; 227321936Shselasky /* 228321936Shselasky * In order to maintain backward/forward binary compatibility 229321936Shselasky * with apps compiled against libibverbs-1.1.8 that use the 230321936Shselasky * flow steering addition, we need to set the two 231321936Shselasky * ABI_placeholder entries to match the driver set flow 232321936Shselasky * entries. This is because apps compiled against 233321936Shselasky * libibverbs-1.1.8 use an inline ibv_create_flow and 234321936Shselasky * ibv_destroy_flow function that looks in the placeholder 235321936Shselasky * spots for the proper entry points. For apps compiled 236321936Shselasky * against libibverbs-1.1.9 and later, the inline functions 237321936Shselasky * will be looking in the right place. 238321936Shselasky */ 239321936Shselasky context_ex->ABI_placeholder1 = (void (*)(void)) context_ex->ibv_create_flow; 240321936Shselasky context_ex->ABI_placeholder2 = (void (*)(void)) context_ex->ibv_destroy_flow; 241321936Shselasky 242321936Shselasky if (context_ex->create_cq_ex) { 243321936Shselasky priv->create_cq_ex = context_ex->create_cq_ex; 244321936Shselasky context_ex->create_cq_ex = __lib_ibv_create_cq_ex; 245321936Shselasky } 246321936Shselasky } 247321936Shselasky 248321936Shselasky context->device = device; 249321936Shselasky context->cmd_fd = cmd_fd; 250321936Shselasky pthread_mutex_init(&context->mutex, NULL); 251321936Shselasky 252321936Shselasky return context; 253321936Shselasky 254321936Shselaskyverbs_err: 255321936Shselasky free(context_ex->priv); 256321936Shselasky free(context_ex); 257321936Shselaskyerr: 258321936Shselasky close(cmd_fd); 259321936Shselasky return NULL; 260321936Shselasky} 261321936Shselaskydefault_symver(__ibv_open_device, ibv_open_device); 262321936Shselasky 263321936Shselaskyint __ibv_close_device(struct ibv_context *context) 264321936Shselasky{ 265321936Shselasky int async_fd = context->async_fd; 266321936Shselasky int cmd_fd = context->cmd_fd; 267321936Shselasky int cq_fd = -1; 268321936Shselasky struct verbs_context *context_ex; 269321936Shselasky struct verbs_device *verbs_device = verbs_get_device(context->device); 270321936Shselasky 271321936Shselasky context_ex = verbs_get_ctx(context); 272321936Shselasky if (context_ex) { 273321936Shselasky verbs_device->ops->uninit_context(verbs_device, context); 274321936Shselasky free(context_ex->priv); 275321936Shselasky free(context_ex); 276321936Shselasky } else { 277321936Shselasky verbs_device->ops->free_context(context); 278321936Shselasky } 279321936Shselasky 280321936Shselasky close(async_fd); 281321936Shselasky close(cmd_fd); 282321936Shselasky if (abi_ver <= 2) 283321936Shselasky close(cq_fd); 284321936Shselasky 285321936Shselasky return 0; 286321936Shselasky} 287321936Shselaskydefault_symver(__ibv_close_device, ibv_close_device); 288321936Shselasky 289321936Shselaskyint __ibv_get_async_event(struct ibv_context *context, 290321936Shselasky struct ibv_async_event *event) 291321936Shselasky{ 292321936Shselasky struct ibv_kern_async_event ev; 293321936Shselasky 294321936Shselasky if (read(context->async_fd, &ev, sizeof ev) != sizeof ev) 295321936Shselasky return -1; 296321936Shselasky 297321936Shselasky event->event_type = ev.event_type; 298321936Shselasky 299321936Shselasky switch (event->event_type) { 300321936Shselasky case IBV_EVENT_CQ_ERR: 301321936Shselasky event->element.cq = (void *) (uintptr_t) ev.element; 302321936Shselasky break; 303321936Shselasky 304321936Shselasky case IBV_EVENT_QP_FATAL: 305321936Shselasky case IBV_EVENT_QP_REQ_ERR: 306321936Shselasky case IBV_EVENT_QP_ACCESS_ERR: 307321936Shselasky case IBV_EVENT_COMM_EST: 308321936Shselasky case IBV_EVENT_SQ_DRAINED: 309321936Shselasky case IBV_EVENT_PATH_MIG: 310321936Shselasky case IBV_EVENT_PATH_MIG_ERR: 311321936Shselasky case IBV_EVENT_QP_LAST_WQE_REACHED: 312321936Shselasky event->element.qp = (void *) (uintptr_t) ev.element; 313321936Shselasky break; 314321936Shselasky 315321936Shselasky case IBV_EVENT_SRQ_ERR: 316321936Shselasky case IBV_EVENT_SRQ_LIMIT_REACHED: 317321936Shselasky event->element.srq = (void *) (uintptr_t) ev.element; 318321936Shselasky break; 319321936Shselasky 320321936Shselasky case IBV_EVENT_WQ_FATAL: 321321936Shselasky event->element.wq = (void *) (uintptr_t) ev.element; 322321936Shselasky break; 323321936Shselasky default: 324321936Shselasky event->element.port_num = ev.element; 325321936Shselasky break; 326321936Shselasky } 327321936Shselasky 328321936Shselasky if (context->ops.async_event) 329321936Shselasky context->ops.async_event(event); 330321936Shselasky 331321936Shselasky return 0; 332321936Shselasky} 333321936Shselaskydefault_symver(__ibv_get_async_event, ibv_get_async_event); 334321936Shselasky 335321936Shselaskyvoid __ibv_ack_async_event(struct ibv_async_event *event) 336321936Shselasky{ 337321936Shselasky switch (event->event_type) { 338321936Shselasky case IBV_EVENT_CQ_ERR: 339321936Shselasky { 340321936Shselasky struct ibv_cq *cq = event->element.cq; 341321936Shselasky 342321936Shselasky pthread_mutex_lock(&cq->mutex); 343321936Shselasky ++cq->async_events_completed; 344321936Shselasky pthread_cond_signal(&cq->cond); 345321936Shselasky pthread_mutex_unlock(&cq->mutex); 346321936Shselasky 347321936Shselasky return; 348321936Shselasky } 349321936Shselasky 350321936Shselasky case IBV_EVENT_QP_FATAL: 351321936Shselasky case IBV_EVENT_QP_REQ_ERR: 352321936Shselasky case IBV_EVENT_QP_ACCESS_ERR: 353321936Shselasky case IBV_EVENT_COMM_EST: 354321936Shselasky case IBV_EVENT_SQ_DRAINED: 355321936Shselasky case IBV_EVENT_PATH_MIG: 356321936Shselasky case IBV_EVENT_PATH_MIG_ERR: 357321936Shselasky case IBV_EVENT_QP_LAST_WQE_REACHED: 358321936Shselasky { 359321936Shselasky struct ibv_qp *qp = event->element.qp; 360321936Shselasky 361321936Shselasky pthread_mutex_lock(&qp->mutex); 362321936Shselasky ++qp->events_completed; 363321936Shselasky pthread_cond_signal(&qp->cond); 364321936Shselasky pthread_mutex_unlock(&qp->mutex); 365321936Shselasky 366321936Shselasky return; 367321936Shselasky } 368321936Shselasky 369321936Shselasky case IBV_EVENT_SRQ_ERR: 370321936Shselasky case IBV_EVENT_SRQ_LIMIT_REACHED: 371321936Shselasky { 372321936Shselasky struct ibv_srq *srq = event->element.srq; 373321936Shselasky 374321936Shselasky pthread_mutex_lock(&srq->mutex); 375321936Shselasky ++srq->events_completed; 376321936Shselasky pthread_cond_signal(&srq->cond); 377321936Shselasky pthread_mutex_unlock(&srq->mutex); 378321936Shselasky 379321936Shselasky return; 380321936Shselasky } 381321936Shselasky 382321936Shselasky case IBV_EVENT_WQ_FATAL: 383321936Shselasky { 384321936Shselasky struct ibv_wq *wq = event->element.wq; 385321936Shselasky 386321936Shselasky pthread_mutex_lock(&wq->mutex); 387321936Shselasky ++wq->events_completed; 388321936Shselasky pthread_cond_signal(&wq->cond); 389321936Shselasky pthread_mutex_unlock(&wq->mutex); 390321936Shselasky 391321936Shselasky return; 392321936Shselasky } 393321936Shselasky 394321936Shselasky default: 395321936Shselasky return; 396321936Shselasky } 397321936Shselasky} 398321936Shselaskydefault_symver(__ibv_ack_async_event, ibv_ack_async_event); 399