1321936Shselasky/* 2321936Shselasky * Copyright (c) 2007 Cisco, Inc. All rights reserved. 3321936Shselasky * 4321936Shselasky * This software is available to you under a choice of one of two 5321936Shselasky * licenses. You may choose to be licensed under the terms of the GNU 6321936Shselasky * General Public License (GPL) Version 2, available from the file 7321936Shselasky * COPYING in the main directory of this source tree, or the 8321936Shselasky * OpenIB.org BSD license below: 9321936Shselasky * 10321936Shselasky * Redistribution and use in source and binary forms, with or 11321936Shselasky * without modification, are permitted provided that the following 12321936Shselasky * conditions are met: 13321936Shselasky * 14321936Shselasky * - Redistributions of source code must retain the above 15321936Shselasky * copyright notice, this list of conditions and the following 16321936Shselasky * disclaimer. 17321936Shselasky * 18321936Shselasky * - Redistributions in binary form must reproduce the above 19321936Shselasky * copyright notice, this list of conditions and the following 20321936Shselasky * disclaimer in the documentation and/or other materials 21321936Shselasky * provided with the distribution. 22321936Shselasky * 23321936Shselasky * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24321936Shselasky * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25321936Shselasky * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26321936Shselasky * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27321936Shselasky * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28321936Shselasky * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29321936Shselasky * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30321936Shselasky * SOFTWARE. 31321936Shselasky */ 32321936Shselasky 33321936Shselasky#include <config.h> 34321936Shselasky 35321936Shselasky#include <stdio.h> 36321936Shselasky#include <stdlib.h> 37321936Shselasky#include <unistd.h> 38321936Shselasky#include <errno.h> 39321936Shselasky#include <sys/mman.h> 40321936Shselasky#include <pthread.h> 41321936Shselasky#include <string.h> 42321936Shselasky 43321936Shselasky#include "mlx4.h" 44321936Shselasky#include "mlx4-abi.h" 45321936Shselasky 46321936Shselasky#ifndef PCI_VENDOR_ID_MELLANOX 47321936Shselasky#define PCI_VENDOR_ID_MELLANOX 0x15b3 48321936Shselasky#endif 49321936Shselasky 50321936Shselasky#define HCA(v, d) \ 51321936Shselasky { .vendor = PCI_VENDOR_ID_##v, \ 52321936Shselasky .device = d } 53321936Shselasky 54321936Shselaskystatic struct { 55321936Shselasky unsigned vendor; 56321936Shselasky unsigned device; 57321936Shselasky} hca_table[] = { 58321936Shselasky HCA(MELLANOX, 0x6340), /* MT25408 "Hermon" SDR */ 59321936Shselasky HCA(MELLANOX, 0x634a), /* MT25408 "Hermon" DDR */ 60321936Shselasky HCA(MELLANOX, 0x6354), /* MT25408 "Hermon" QDR */ 61321936Shselasky HCA(MELLANOX, 0x6732), /* MT25408 "Hermon" DDR PCIe gen2 */ 62321936Shselasky HCA(MELLANOX, 0x673c), /* MT25408 "Hermon" QDR PCIe gen2 */ 63321936Shselasky HCA(MELLANOX, 0x6368), /* MT25408 "Hermon" EN 10GigE */ 64321936Shselasky HCA(MELLANOX, 0x6750), /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 65321936Shselasky HCA(MELLANOX, 0x6372), /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 66321936Shselasky HCA(MELLANOX, 0x675a), /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 67321936Shselasky HCA(MELLANOX, 0x6764), /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 68321936Shselasky HCA(MELLANOX, 0x6746), /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 69321936Shselasky HCA(MELLANOX, 0x676e), /* MT26478 ConnectX2 40GigE PCIe gen2 */ 70321936Shselasky HCA(MELLANOX, 0x1002), /* MT25400 Family [ConnectX-2 Virtual Function] */ 71321936Shselasky HCA(MELLANOX, 0x1003), /* MT27500 Family [ConnectX-3] */ 72321936Shselasky HCA(MELLANOX, 0x1004), /* MT27500 Family [ConnectX-3 Virtual Function] */ 73321936Shselasky HCA(MELLANOX, 0x1005), /* MT27510 Family */ 74321936Shselasky HCA(MELLANOX, 0x1006), /* MT27511 Family */ 75321936Shselasky HCA(MELLANOX, 0x1007), /* MT27520 Family */ 76321936Shselasky HCA(MELLANOX, 0x1008), /* MT27521 Family */ 77321936Shselasky HCA(MELLANOX, 0x1009), /* MT27530 Family */ 78321936Shselasky HCA(MELLANOX, 0x100a), /* MT27531 Family */ 79321936Shselasky HCA(MELLANOX, 0x100b), /* MT27540 Family */ 80321936Shselasky HCA(MELLANOX, 0x100c), /* MT27541 Family */ 81321936Shselasky HCA(MELLANOX, 0x100d), /* MT27550 Family */ 82321936Shselasky HCA(MELLANOX, 0x100e), /* MT27551 Family */ 83321936Shselasky HCA(MELLANOX, 0x100f), /* MT27560 Family */ 84321936Shselasky HCA(MELLANOX, 0x1010), /* MT27561 Family */ 85321936Shselasky}; 86321936Shselasky 87321936Shselaskystatic struct ibv_context_ops mlx4_ctx_ops = { 88321936Shselasky .query_device = mlx4_query_device, 89321936Shselasky .query_port = mlx4_query_port, 90321936Shselasky .alloc_pd = mlx4_alloc_pd, 91321936Shselasky .dealloc_pd = mlx4_free_pd, 92321936Shselasky .reg_mr = mlx4_reg_mr, 93321936Shselasky .rereg_mr = mlx4_rereg_mr, 94321936Shselasky .dereg_mr = mlx4_dereg_mr, 95321936Shselasky .alloc_mw = mlx4_alloc_mw, 96321936Shselasky .dealloc_mw = mlx4_dealloc_mw, 97321936Shselasky .bind_mw = mlx4_bind_mw, 98321936Shselasky .create_cq = mlx4_create_cq, 99321936Shselasky .poll_cq = mlx4_poll_cq, 100321936Shselasky .req_notify_cq = mlx4_arm_cq, 101321936Shselasky .cq_event = mlx4_cq_event, 102321936Shselasky .resize_cq = mlx4_resize_cq, 103321936Shselasky .destroy_cq = mlx4_destroy_cq, 104321936Shselasky .create_srq = mlx4_create_srq, 105321936Shselasky .modify_srq = mlx4_modify_srq, 106321936Shselasky .query_srq = mlx4_query_srq, 107321936Shselasky .destroy_srq = mlx4_destroy_srq, 108321936Shselasky .post_srq_recv = mlx4_post_srq_recv, 109321936Shselasky .create_qp = mlx4_create_qp, 110321936Shselasky .query_qp = mlx4_query_qp, 111321936Shselasky .modify_qp = mlx4_modify_qp, 112321936Shselasky .destroy_qp = mlx4_destroy_qp, 113321936Shselasky .post_send = mlx4_post_send, 114321936Shselasky .post_recv = mlx4_post_recv, 115321936Shselasky .create_ah = mlx4_create_ah, 116321936Shselasky .destroy_ah = mlx4_destroy_ah, 117321936Shselasky .attach_mcast = ibv_cmd_attach_mcast, 118321936Shselasky .detach_mcast = ibv_cmd_detach_mcast 119321936Shselasky}; 120321936Shselasky 121321936Shselaskystatic int mlx4_map_internal_clock(struct mlx4_device *mdev, 122321936Shselasky struct ibv_context *ibv_ctx) 123321936Shselasky{ 124321936Shselasky struct mlx4_context *context = to_mctx(ibv_ctx); 125321936Shselasky void *hca_clock_page; 126321936Shselasky 127321936Shselasky hca_clock_page = mmap(NULL, mdev->page_size, 128321936Shselasky PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd, 129321936Shselasky mdev->page_size * 3); 130321936Shselasky 131321936Shselasky if (hca_clock_page == MAP_FAILED) { 132321936Shselasky fprintf(stderr, PFX 133321936Shselasky "Warning: Timestamp available,\n" 134321936Shselasky "but failed to mmap() hca core clock page.\n"); 135321936Shselasky return -1; 136321936Shselasky } 137321936Shselasky 138321936Shselasky context->hca_core_clock = hca_clock_page + 139321936Shselasky (context->core_clock.offset & (mdev->page_size - 1)); 140321936Shselasky return 0; 141321936Shselasky} 142321936Shselasky 143321936Shselaskystatic int mlx4_init_context(struct verbs_device *v_device, 144321936Shselasky struct ibv_context *ibv_ctx, int cmd_fd) 145321936Shselasky{ 146321936Shselasky struct mlx4_context *context; 147321936Shselasky struct ibv_get_context cmd; 148321936Shselasky struct mlx4_alloc_ucontext_resp resp; 149321936Shselasky int i; 150321936Shselasky struct mlx4_alloc_ucontext_resp_v3 resp_v3; 151321936Shselasky __u16 bf_reg_size; 152321936Shselasky struct mlx4_device *dev = to_mdev(&v_device->device); 153321936Shselasky struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx); 154321936Shselasky struct ibv_device_attr_ex dev_attrs; 155321936Shselasky 156321936Shselasky /* memory footprint of mlx4_context and verbs_context share 157321936Shselasky * struct ibv_context. 158321936Shselasky */ 159321936Shselasky context = to_mctx(ibv_ctx); 160321936Shselasky ibv_ctx->cmd_fd = cmd_fd; 161321936Shselasky 162321936Shselasky if (dev->abi_version <= MLX4_UVERBS_NO_DEV_CAPS_ABI_VERSION) { 163321936Shselasky if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd, 164321936Shselasky &resp_v3.ibv_resp, sizeof resp_v3)) 165321936Shselasky return errno; 166321936Shselasky 167321936Shselasky context->num_qps = resp_v3.qp_tab_size; 168321936Shselasky bf_reg_size = resp_v3.bf_reg_size; 169321936Shselasky context->cqe_size = sizeof (struct mlx4_cqe); 170321936Shselasky } else { 171321936Shselasky if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd, 172321936Shselasky &resp.ibv_resp, sizeof resp)) 173321936Shselasky return errno; 174321936Shselasky 175321936Shselasky context->num_qps = resp.qp_tab_size; 176321936Shselasky bf_reg_size = resp.bf_reg_size; 177321936Shselasky if (resp.dev_caps & MLX4_USER_DEV_CAP_64B_CQE) 178321936Shselasky context->cqe_size = resp.cqe_size; 179321936Shselasky else 180321936Shselasky context->cqe_size = sizeof (struct mlx4_cqe); 181321936Shselasky } 182321936Shselasky 183321936Shselasky context->qp_table_shift = ffs(context->num_qps) - 1 - MLX4_QP_TABLE_BITS; 184321936Shselasky context->qp_table_mask = (1 << context->qp_table_shift) - 1; 185321936Shselasky for (i = 0; i < MLX4_PORTS_NUM; ++i) 186321936Shselasky context->port_query_cache[i].valid = 0; 187321936Shselasky 188321936Shselasky pthread_mutex_init(&context->qp_table_mutex, NULL); 189321936Shselasky for (i = 0; i < MLX4_QP_TABLE_SIZE; ++i) 190321936Shselasky context->qp_table[i].refcnt = 0; 191321936Shselasky 192321936Shselasky for (i = 0; i < MLX4_NUM_DB_TYPE; ++i) 193321936Shselasky context->db_list[i] = NULL; 194321936Shselasky 195321936Shselasky mlx4_init_xsrq_table(&context->xsrq_table, context->num_qps); 196321936Shselasky pthread_mutex_init(&context->db_list_mutex, NULL); 197321936Shselasky 198321936Shselasky context->uar = mmap(NULL, dev->page_size, PROT_WRITE, 199321936Shselasky MAP_SHARED, cmd_fd, 0); 200321936Shselasky if (context->uar == MAP_FAILED) 201321936Shselasky return errno; 202321936Shselasky 203321936Shselasky if (bf_reg_size) { 204321936Shselasky context->bf_page = mmap(NULL, dev->page_size, 205321936Shselasky PROT_WRITE, MAP_SHARED, cmd_fd, 206321936Shselasky dev->page_size); 207321936Shselasky if (context->bf_page == MAP_FAILED) { 208321936Shselasky fprintf(stderr, PFX "Warning: BlueFlame available, " 209321936Shselasky "but failed to mmap() BlueFlame page.\n"); 210321936Shselasky context->bf_page = NULL; 211321936Shselasky context->bf_buf_size = 0; 212321936Shselasky } else { 213321936Shselasky context->bf_buf_size = bf_reg_size / 2; 214321936Shselasky context->bf_offset = 0; 215321936Shselasky pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE); 216321936Shselasky } 217321936Shselasky } else { 218321936Shselasky context->bf_page = NULL; 219321936Shselasky context->bf_buf_size = 0; 220321936Shselasky } 221321936Shselasky 222321936Shselasky pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); 223321936Shselasky ibv_ctx->ops = mlx4_ctx_ops; 224321936Shselasky 225321936Shselasky context->hca_core_clock = NULL; 226321936Shselasky memset(&dev_attrs, 0, sizeof(dev_attrs)); 227321936Shselasky if (!mlx4_query_device_ex(ibv_ctx, NULL, &dev_attrs, 228321936Shselasky sizeof(struct ibv_device_attr_ex))) { 229321936Shselasky context->max_qp_wr = dev_attrs.orig_attr.max_qp_wr; 230321936Shselasky context->max_sge = dev_attrs.orig_attr.max_sge; 231321936Shselasky if (context->core_clock.offset_valid) 232321936Shselasky mlx4_map_internal_clock(dev, ibv_ctx); 233321936Shselasky } 234321936Shselasky 235321936Shselasky verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ | 236321936Shselasky VERBS_CONTEXT_QP; 237321936Shselasky verbs_set_ctx_op(verbs_ctx, close_xrcd, mlx4_close_xrcd); 238321936Shselasky verbs_set_ctx_op(verbs_ctx, open_xrcd, mlx4_open_xrcd); 239321936Shselasky verbs_set_ctx_op(verbs_ctx, create_srq_ex, mlx4_create_srq_ex); 240321936Shselasky verbs_set_ctx_op(verbs_ctx, get_srq_num, verbs_get_srq_num); 241321936Shselasky verbs_set_ctx_op(verbs_ctx, create_qp_ex, mlx4_create_qp_ex); 242321936Shselasky verbs_set_ctx_op(verbs_ctx, open_qp, mlx4_open_qp); 243321936Shselasky verbs_set_ctx_op(verbs_ctx, ibv_create_flow, ibv_cmd_create_flow); 244321936Shselasky verbs_set_ctx_op(verbs_ctx, ibv_destroy_flow, ibv_cmd_destroy_flow); 245321936Shselasky verbs_set_ctx_op(verbs_ctx, create_cq_ex, mlx4_create_cq_ex); 246321936Shselasky verbs_set_ctx_op(verbs_ctx, query_device_ex, mlx4_query_device_ex); 247321936Shselasky verbs_set_ctx_op(verbs_ctx, query_rt_values, mlx4_query_rt_values); 248321936Shselasky 249321936Shselasky return 0; 250321936Shselasky 251321936Shselasky} 252321936Shselasky 253321936Shselaskystatic void mlx4_uninit_context(struct verbs_device *v_device, 254321936Shselasky struct ibv_context *ibv_ctx) 255321936Shselasky{ 256321936Shselasky struct mlx4_context *context = to_mctx(ibv_ctx); 257321936Shselasky 258321936Shselasky munmap(context->uar, to_mdev(&v_device->device)->page_size); 259321936Shselasky if (context->bf_page) 260321936Shselasky munmap(context->bf_page, to_mdev(&v_device->device)->page_size); 261321936Shselasky if (context->hca_core_clock) 262321936Shselasky munmap(context->hca_core_clock - context->core_clock.offset, 263321936Shselasky to_mdev(&v_device->device)->page_size); 264321936Shselasky} 265321936Shselasky 266321936Shselaskystatic struct verbs_device_ops mlx4_dev_ops = { 267321936Shselasky .init_context = mlx4_init_context, 268321936Shselasky .uninit_context = mlx4_uninit_context, 269321936Shselasky}; 270321936Shselasky 271321936Shselaskystatic struct verbs_device *mlx4_driver_init(const char *uverbs_sys_path, int abi_version) 272321936Shselasky{ 273321936Shselasky char value[8]; 274321936Shselasky struct mlx4_device *dev; 275321936Shselasky unsigned vendor, device; 276321936Shselasky int i; 277321936Shselasky 278321936Shselasky if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor", 279321936Shselasky value, sizeof value) < 0) 280321936Shselasky return NULL; 281321936Shselasky vendor = strtol(value, NULL, 16); 282321936Shselasky 283321936Shselasky if (ibv_read_sysfs_file(uverbs_sys_path, "device/device", 284321936Shselasky value, sizeof value) < 0) 285321936Shselasky return NULL; 286321936Shselasky device = strtol(value, NULL, 16); 287321936Shselasky 288321936Shselasky for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i) 289321936Shselasky if (vendor == hca_table[i].vendor && 290321936Shselasky device == hca_table[i].device) 291321936Shselasky goto found; 292321936Shselasky 293321936Shselasky return NULL; 294321936Shselasky 295321936Shselaskyfound: 296321936Shselasky if (abi_version < MLX4_UVERBS_MIN_ABI_VERSION || 297321936Shselasky abi_version > MLX4_UVERBS_MAX_ABI_VERSION) { 298321936Shselasky fprintf(stderr, PFX "Fatal: ABI version %d of %s is not supported " 299321936Shselasky "(min supported %d, max supported %d)\n", 300321936Shselasky abi_version, uverbs_sys_path, 301321936Shselasky MLX4_UVERBS_MIN_ABI_VERSION, 302321936Shselasky MLX4_UVERBS_MAX_ABI_VERSION); 303321936Shselasky return NULL; 304321936Shselasky } 305321936Shselasky 306321936Shselasky dev = calloc(1, sizeof *dev); 307321936Shselasky if (!dev) { 308321936Shselasky fprintf(stderr, PFX "Fatal: couldn't allocate device for %s\n", 309321936Shselasky uverbs_sys_path); 310321936Shselasky return NULL; 311321936Shselasky } 312321936Shselasky 313321936Shselasky dev->page_size = sysconf(_SC_PAGESIZE); 314321936Shselasky dev->abi_version = abi_version; 315321936Shselasky 316321936Shselasky dev->verbs_dev.ops = &mlx4_dev_ops; 317321936Shselasky dev->verbs_dev.sz = sizeof(*dev); 318321936Shselasky dev->verbs_dev.size_of_context = 319321936Shselasky sizeof(struct mlx4_context) - sizeof(struct ibv_context); 320321936Shselasky 321321936Shselasky return &dev->verbs_dev; 322321936Shselasky} 323321936Shselasky 324321936Shselaskystatic __attribute__((constructor)) void mlx4_register_driver(void) 325321936Shselasky{ 326321936Shselasky verbs_register_driver("mlx4", mlx4_driver_init); 327321936Shselasky} 328