1/*- 2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c 337748 2018-08-14 11:52:05Z hselasky $ 26 */ 27 28#include <linux/errno.h> 29#include <linux/pci.h> 30#include <linux/dma-mapping.h> 31#include <linux/slab.h> 32#include <linux/io-mapping.h> 33#include <linux/sched.h> 34#include <linux/netdevice.h> 35#include <linux/etherdevice.h> 36#include <net/ipv6.h> 37#include <linux/list.h> 38#include <dev/mlx5/driver.h> 39#include <dev/mlx5/vport.h> 40#include <asm/pgtable.h> 41#include <linux/fs.h> 42#undef inode 43 44#include <rdma/ib_user_verbs.h> 45#include <rdma/ib_smi.h> 46#include <rdma/ib_umem.h> 47#include "user.h" 48#include "mlx5_ib.h" 49 50#include <sys/unistd.h> 51 52#define DRIVER_NAME "mlx5_ib" 53#define DRIVER_VERSION "3.2.1" 54#define DRIVER_RELDATE "August 2018" 55 56#undef MODULE_VERSION 57#include <sys/module.h> 58 59MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 60MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); 61MODULE_LICENSE("Dual BSD/GPL"); 62MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1); 63MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1); 64MODULE_VERSION(mlx5ib, 1); 65 66static int deprecated_prof_sel = 2; 67module_param_named(prof_sel, deprecated_prof_sel, int, 0444); 68MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core"); 69 70enum { 71 MLX5_STANDARD_ATOMIC_SIZE = 0x8, 72}; 73 74struct workqueue_struct *mlx5_ib_wq; 75 76static char mlx5_version[] = 77 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 78 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 79 80static void get_atomic_caps(struct mlx5_ib_dev *dev, 81 struct ib_device_attr *props) 82{ 83 int tmp; 84 u8 atomic_operations; 85 u8 atomic_size_qp; 86 u8 atomic_req_endianess; 87 88 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 89 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 90 atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev, 91 atomic_req_8B_endianess_mode) || 92 !mlx5_host_is_le(); 93 94 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 95 if (((atomic_operations & tmp) == tmp) 96 && (atomic_size_qp & 8)) { 97 if (atomic_req_endianess) { 98 props->atomic_cap = IB_ATOMIC_HCA; 99 } else { 100 props->atomic_cap = IB_ATOMIC_NONE; 101 } 102 } else { 103 props->atomic_cap = IB_ATOMIC_NONE; 104 } 105 106 tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD; 107 if (((atomic_operations & tmp) == tmp) 108 &&(atomic_size_qp & 8)) { 109 if (atomic_req_endianess) 110 props->masked_atomic_cap = IB_ATOMIC_HCA; 111 else { 112 props->masked_atomic_cap = IB_ATOMIC_NONE; 113 } 114 } else { 115 props->masked_atomic_cap = IB_ATOMIC_NONE; 116 } 117} 118 119static enum rdma_link_layer 120mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) 121{ 122 struct mlx5_ib_dev *dev = to_mdev(device); 123 124 switch (MLX5_CAP_GEN(dev->mdev, port_type)) { 125 case MLX5_CAP_PORT_TYPE_IB: 126 return IB_LINK_LAYER_INFINIBAND; 127 case MLX5_CAP_PORT_TYPE_ETH: 128 return IB_LINK_LAYER_ETHERNET; 129 default: 130 return IB_LINK_LAYER_UNSPECIFIED; 131 } 132} 133 134static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 135{ 136 return !dev->mdev->issi; 137} 138 139enum { 140 MLX5_VPORT_ACCESS_METHOD_MAD, 141 MLX5_VPORT_ACCESS_METHOD_HCA, 142 MLX5_VPORT_ACCESS_METHOD_NIC, 143}; 144 145static int mlx5_get_vport_access_method(struct ib_device *ibdev) 146{ 147 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 148 return MLX5_VPORT_ACCESS_METHOD_MAD; 149 150 if (mlx5_ib_port_link_layer(ibdev, 1) == 151 IB_LINK_LAYER_ETHERNET) 152 return MLX5_VPORT_ACCESS_METHOD_NIC; 153 154 return MLX5_VPORT_ACCESS_METHOD_HCA; 155} 156 157static int mlx5_query_system_image_guid(struct ib_device *ibdev, 158 __be64 *sys_image_guid) 159{ 160 struct mlx5_ib_dev *dev = to_mdev(ibdev); 161 struct mlx5_core_dev *mdev = dev->mdev; 162 u64 tmp; 163 int err; 164 165 switch (mlx5_get_vport_access_method(ibdev)) { 166 case MLX5_VPORT_ACCESS_METHOD_MAD: 167 return mlx5_query_system_image_guid_mad_ifc(ibdev, 168 sys_image_guid); 169 170 case MLX5_VPORT_ACCESS_METHOD_HCA: 171 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 172 if (!err) 173 *sys_image_guid = cpu_to_be64(tmp); 174 return err; 175 176 case MLX5_VPORT_ACCESS_METHOD_NIC: 177 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 178 if (!err) 179 *sys_image_guid = cpu_to_be64(tmp); 180 return err; 181 182 default: 183 return -EINVAL; 184 } 185} 186 187static int mlx5_query_max_pkeys(struct ib_device *ibdev, 188 u16 *max_pkeys) 189{ 190 struct mlx5_ib_dev *dev = to_mdev(ibdev); 191 struct mlx5_core_dev *mdev = dev->mdev; 192 193 switch (mlx5_get_vport_access_method(ibdev)) { 194 case MLX5_VPORT_ACCESS_METHOD_MAD: 195 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys); 196 197 case MLX5_VPORT_ACCESS_METHOD_HCA: 198 case MLX5_VPORT_ACCESS_METHOD_NIC: 199 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 200 pkey_table_size)); 201 return 0; 202 203 default: 204 return -EINVAL; 205 } 206} 207 208static int mlx5_query_vendor_id(struct ib_device *ibdev, 209 u32 *vendor_id) 210{ 211 struct mlx5_ib_dev *dev = to_mdev(ibdev); 212 213 switch (mlx5_get_vport_access_method(ibdev)) { 214 case MLX5_VPORT_ACCESS_METHOD_MAD: 215 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id); 216 217 case MLX5_VPORT_ACCESS_METHOD_HCA: 218 case MLX5_VPORT_ACCESS_METHOD_NIC: 219 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 220 221 default: 222 return -EINVAL; 223 } 224} 225 226static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 227 __be64 *node_guid) 228{ 229 u64 tmp; 230 int err; 231 232 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 233 case MLX5_VPORT_ACCESS_METHOD_MAD: 234 return mlx5_query_node_guid_mad_ifc(dev, node_guid); 235 236 case MLX5_VPORT_ACCESS_METHOD_HCA: 237 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 238 if (!err) 239 *node_guid = cpu_to_be64(tmp); 240 return err; 241 242 case MLX5_VPORT_ACCESS_METHOD_NIC: 243 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 244 if (!err) 245 *node_guid = cpu_to_be64(tmp); 246 return err; 247 248 default: 249 return -EINVAL; 250 } 251} 252 253struct mlx5_reg_node_desc { 254 u8 desc[64]; 255}; 256 257static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 258{ 259 struct mlx5_reg_node_desc in; 260 261 if (mlx5_use_mad_ifc(dev)) 262 return mlx5_query_node_desc_mad_ifc(dev, node_desc); 263 264 memset(&in, 0, sizeof(in)); 265 266 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 267 sizeof(struct mlx5_reg_node_desc), 268 MLX5_REG_NODE_DESC, 0, 0); 269} 270 271static int mlx5_ib_query_device(struct ib_device *ibdev, 272 struct ib_device_attr *props) 273{ 274 struct mlx5_ib_dev *dev = to_mdev(ibdev); 275 struct mlx5_core_dev *mdev = dev->mdev; 276 int max_sq_desc; 277 int max_rq_sg; 278 int max_sq_sg; 279 int err; 280 281 282 memset(props, 0, sizeof(*props)); 283 284 err = mlx5_query_system_image_guid(ibdev, 285 &props->sys_image_guid); 286 if (err) 287 return err; 288 289 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); 290 if (err) 291 return err; 292 293 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 294 if (err) 295 return err; 296 297 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 298 ((u64)fw_rev_min(dev->mdev) << 16) | 299 fw_rev_sub(dev->mdev); 300 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 301 IB_DEVICE_PORT_ACTIVE_EVENT | 302 IB_DEVICE_SYS_IMAGE_GUID | 303 IB_DEVICE_RC_RNR_NAK_GEN; 304 305 if (MLX5_CAP_GEN(mdev, pkv)) 306 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 307 if (MLX5_CAP_GEN(mdev, qkv)) 308 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 309 if (MLX5_CAP_GEN(mdev, apm)) 310 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 311 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 312 if (MLX5_CAP_GEN(mdev, xrc)) 313 props->device_cap_flags |= IB_DEVICE_XRC; 314 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 315 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 316 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 317 318 props->vendor_part_id = mdev->pdev->device; 319 props->hw_ver = mdev->pdev->revision; 320 321 props->max_mr_size = ~0ull; 322 props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1); 323 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 324 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 325 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 326 sizeof(struct mlx5_wqe_data_seg); 327 max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 328 max_sq_sg = (max_sq_desc - 329 sizeof(struct mlx5_wqe_ctrl_seg) - 330 sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); 331 props->max_sge = min(max_rq_sg, max_sq_sg); 332 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 333 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 334 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 335 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 336 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 337 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 338 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 339 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 340 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 341 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 342 props->max_srq_sge = max_rq_sg - 1; 343 props->max_fast_reg_page_list_len = (unsigned int)-1; 344 get_atomic_caps(dev, props); 345 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 346 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 347 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 348 props->max_mcast_grp; 349 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 350 props->max_ah = INT_MAX; 351 352 return 0; 353} 354 355enum mlx5_ib_width { 356 MLX5_IB_WIDTH_1X = 1 << 0, 357 MLX5_IB_WIDTH_2X = 1 << 1, 358 MLX5_IB_WIDTH_4X = 1 << 2, 359 MLX5_IB_WIDTH_8X = 1 << 3, 360 MLX5_IB_WIDTH_12X = 1 << 4 361}; 362 363static int translate_active_width(struct ib_device *ibdev, u8 active_width, 364 u8 *ib_width) 365{ 366 struct mlx5_ib_dev *dev = to_mdev(ibdev); 367 int err = 0; 368 369 if (active_width & MLX5_IB_WIDTH_1X) { 370 *ib_width = IB_WIDTH_1X; 371 } else if (active_width & MLX5_IB_WIDTH_2X) { 372 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n", 373 (int)active_width); 374 err = -EINVAL; 375 } else if (active_width & MLX5_IB_WIDTH_4X) { 376 *ib_width = IB_WIDTH_4X; 377 } else if (active_width & MLX5_IB_WIDTH_8X) { 378 *ib_width = IB_WIDTH_8X; 379 } else if (active_width & MLX5_IB_WIDTH_12X) { 380 *ib_width = IB_WIDTH_12X; 381 } else { 382 mlx5_ib_dbg(dev, "Invalid active_width %d\n", 383 (int)active_width); 384 err = -EINVAL; 385 } 386 387 return err; 388} 389 390/* 391 * TODO: Move to IB core 392 */ 393enum ib_max_vl_num { 394 __IB_MAX_VL_0 = 1, 395 __IB_MAX_VL_0_1 = 2, 396 __IB_MAX_VL_0_3 = 3, 397 __IB_MAX_VL_0_7 = 4, 398 __IB_MAX_VL_0_14 = 5, 399}; 400 401enum mlx5_vl_hw_cap { 402 MLX5_VL_HW_0 = 1, 403 MLX5_VL_HW_0_1 = 2, 404 MLX5_VL_HW_0_2 = 3, 405 MLX5_VL_HW_0_3 = 4, 406 MLX5_VL_HW_0_4 = 5, 407 MLX5_VL_HW_0_5 = 6, 408 MLX5_VL_HW_0_6 = 7, 409 MLX5_VL_HW_0_7 = 8, 410 MLX5_VL_HW_0_14 = 15 411}; 412 413static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 414 u8 *max_vl_num) 415{ 416 switch (vl_hw_cap) { 417 case MLX5_VL_HW_0: 418 *max_vl_num = __IB_MAX_VL_0; 419 break; 420 case MLX5_VL_HW_0_1: 421 *max_vl_num = __IB_MAX_VL_0_1; 422 break; 423 case MLX5_VL_HW_0_3: 424 *max_vl_num = __IB_MAX_VL_0_3; 425 break; 426 case MLX5_VL_HW_0_7: 427 *max_vl_num = __IB_MAX_VL_0_7; 428 break; 429 case MLX5_VL_HW_0_14: 430 *max_vl_num = __IB_MAX_VL_0_14; 431 break; 432 433 default: 434 return -EINVAL; 435 } 436 437 return 0; 438} 439 440static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port, 441 struct ib_port_attr *props) 442{ 443 struct mlx5_ib_dev *dev = to_mdev(ibdev); 444 struct mlx5_core_dev *mdev = dev->mdev; 445 u32 *rep; 446 int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); 447 struct mlx5_ptys_reg *ptys; 448 struct mlx5_pmtu_reg *pmtu; 449 struct mlx5_pvlc_reg pvlc; 450 void *ctx; 451 int err; 452 453 rep = mlx5_vzalloc(outlen); 454 ptys = kzalloc(sizeof(*ptys), GFP_KERNEL); 455 pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL); 456 if (!rep || !ptys || !pmtu) { 457 err = -ENOMEM; 458 goto out; 459 } 460 461 memset(props, 0, sizeof(*props)); 462 463 /* what if I am pf with dual port */ 464 err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen); 465 if (err) 466 goto out; 467 468 ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context); 469 470 props->lid = MLX5_GET(hca_vport_context, ctx, lid); 471 props->lmc = MLX5_GET(hca_vport_context, ctx, lmc); 472 props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid); 473 props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl); 474 props->state = MLX5_GET(hca_vport_context, ctx, vport_state); 475 props->phys_state = MLX5_GET(hca_vport_context, ctx, 476 port_physical_state); 477 props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1); 478 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 479 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 480 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 481 props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx, 482 pkey_violation_counter); 483 props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx, 484 qkey_violation_counter); 485 props->subnet_timeout = MLX5_GET(hca_vport_context, ctx, 486 subnet_timeout); 487 props->init_type_reply = MLX5_GET(hca_vport_context, ctx, 488 init_type_reply); 489 490 ptys->proto_mask |= MLX5_PTYS_IB; 491 ptys->local_port = port; 492 err = mlx5_core_access_ptys(mdev, ptys, 0); 493 if (err) 494 goto out; 495 496 err = translate_active_width(ibdev, ptys->ib_link_width_oper, 497 &props->active_width); 498 if (err) 499 goto out; 500 501 props->active_speed = (u8)ptys->ib_proto_oper; 502 503 pmtu->local_port = port; 504 err = mlx5_core_access_pmtu(mdev, pmtu, 0); 505 if (err) 506 goto out; 507 508 props->max_mtu = pmtu->max_mtu; 509 props->active_mtu = pmtu->oper_mtu; 510 511 memset(&pvlc, 0, sizeof(pvlc)); 512 pvlc.local_port = port; 513 err = mlx5_core_access_pvlc(mdev, &pvlc, 0); 514 if (err) 515 goto out; 516 517 err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap, 518 &props->max_vl_num); 519out: 520 kvfree(rep); 521 kfree(ptys); 522 kfree(pmtu); 523 return err; 524} 525 526int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 527 struct ib_port_attr *props) 528{ 529 switch (mlx5_get_vport_access_method(ibdev)) { 530 case MLX5_VPORT_ACCESS_METHOD_MAD: 531 return mlx5_query_port_mad_ifc(ibdev, port, props); 532 533 case MLX5_VPORT_ACCESS_METHOD_HCA: 534 return mlx5_query_port_ib(ibdev, port, props); 535 536 case MLX5_VPORT_ACCESS_METHOD_NIC: 537 return mlx5_query_port_roce(ibdev, port, props); 538 539 default: 540 return -EINVAL; 541 } 542} 543 544static void 545mlx5_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev) 546{ 547 if (dev->if_addrlen != ETH_ALEN) 548 return; 549 550 memcpy(eui, IF_LLADDR(dev), 3); 551 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); 552 553 if (vlan_id < 0x1000) { 554 eui[3] = vlan_id >> 8; 555 eui[4] = vlan_id & 0xff; 556 } else { 557 eui[3] = 0xFF; 558 eui[4] = 0xFE; 559 } 560 eui[0] ^= 2; 561} 562 563static void 564mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid) 565{ 566 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 567 mlx5_addrconf_ifid_eui48(&gid->raw[8], 0xFFFF, dev); 568} 569 570static void 571mlx5_ib_roce_port_update(void *arg) 572{ 573 struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg; 574 struct mlx5_ib_dev *dev = port->dev; 575 struct mlx5_core_dev *mdev = dev->mdev; 576 struct net_device *xdev[MLX5_IB_GID_MAX]; 577 struct net_device *idev; 578 struct net_device *ndev; 579 union ib_gid gid_temp; 580 581 while (port->port_gone == 0) { 582 int update = 0; 583 int gid_index = 0; 584 int j; 585 int error; 586 587 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH); 588 if (ndev == NULL) { 589 pause("W", hz); 590 continue; 591 } 592 593 CURVNET_SET_QUIET(ndev->if_vnet); 594 595 memset(&gid_temp, 0, sizeof(gid_temp)); 596 mlx5_make_default_gid(ndev, &gid_temp); 597 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) { 598 port->gid_table[gid_index] = gid_temp; 599 update = 1; 600 } 601 xdev[gid_index] = ndev; 602 gid_index++; 603 604 IFNET_RLOCK(); 605 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 606 if (idev == ndev) 607 break; 608 } 609 if (idev != NULL) { 610 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 611 u16 vid; 612 613 if (idev != ndev) { 614 if (idev->if_type != IFT_L2VLAN) 615 continue; 616 if (ndev != rdma_vlan_dev_real_dev(idev)) 617 continue; 618 } 619 620 /* setup valid MAC-based GID */ 621 memset(&gid_temp, 0, sizeof(gid_temp)); 622 gid_temp.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 623 vid = rdma_vlan_dev_vlan_id(idev); 624 mlx5_addrconf_ifid_eui48(&gid_temp.raw[8], vid, idev); 625 626 /* check for existing entry */ 627 for (j = 0; j != gid_index; j++) { 628 if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0) 629 break; 630 } 631 632 /* check if new entry should be added */ 633 if (j == gid_index && gid_index < MLX5_IB_GID_MAX) { 634 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) { 635 port->gid_table[gid_index] = gid_temp; 636 update = 1; 637 } 638 xdev[gid_index] = idev; 639 gid_index++; 640 } 641 } 642 } 643 IFNET_RUNLOCK(); 644 CURVNET_RESTORE(); 645 646 if (update != 0 && 647 mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { 648 struct ib_event event = { 649 .device = &dev->ib_dev, 650 .element.port_num = port->port_num + 1, 651 .event = IB_EVENT_GID_CHANGE, 652 }; 653 654 /* add new entries, if any */ 655 for (j = 0; j != gid_index; j++) { 656 error = modify_gid_roce(&dev->ib_dev, port->port_num, j, 657 port->gid_table + j, xdev[j]); 658 if (error != 0) 659 printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error); 660 } 661 memset(&gid_temp, 0, sizeof(gid_temp)); 662 663 /* clear old entries, if any */ 664 for (; j != MLX5_IB_GID_MAX; j++) { 665 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0) 666 continue; 667 port->gid_table[j] = gid_temp; 668 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j, 669 port->gid_table + j, ndev); 670 } 671 672 /* make sure ibcore gets updated */ 673 ib_dispatch_event(&event); 674 } 675 pause("W", hz); 676 } 677 do { 678 struct ib_event event = { 679 .device = &dev->ib_dev, 680 .element.port_num = port->port_num + 1, 681 .event = IB_EVENT_GID_CHANGE, 682 }; 683 /* make sure ibcore gets updated */ 684 ib_dispatch_event(&event); 685 686 /* wait a bit */ 687 pause("W", hz); 688 } while (0); 689 port->port_gone = 2; 690 kthread_exit(); 691} 692 693static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 694 union ib_gid *gid) 695{ 696 struct mlx5_ib_dev *dev = to_mdev(ibdev); 697 struct mlx5_core_dev *mdev = dev->mdev; 698 699 switch (mlx5_get_vport_access_method(ibdev)) { 700 case MLX5_VPORT_ACCESS_METHOD_MAD: 701 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid); 702 703 case MLX5_VPORT_ACCESS_METHOD_HCA: 704 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid); 705 706 case MLX5_VPORT_ACCESS_METHOD_NIC: 707 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) || 708 index < 0 || index >= MLX5_IB_GID_MAX || 709 dev->port[port - 1].port_gone != 0) 710 memset(gid, 0, sizeof(*gid)); 711 else 712 *gid = dev->port[port - 1].gid_table[index]; 713 return 0; 714 715 default: 716 return -EINVAL; 717 } 718} 719 720static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 721 u16 *pkey) 722{ 723 struct mlx5_ib_dev *dev = to_mdev(ibdev); 724 struct mlx5_core_dev *mdev = dev->mdev; 725 726 switch (mlx5_get_vport_access_method(ibdev)) { 727 case MLX5_VPORT_ACCESS_METHOD_MAD: 728 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey); 729 730 case MLX5_VPORT_ACCESS_METHOD_HCA: 731 case MLX5_VPORT_ACCESS_METHOD_NIC: 732 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, 733 pkey); 734 735 default: 736 return -EINVAL; 737 } 738} 739 740static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 741 struct ib_device_modify *props) 742{ 743 struct mlx5_ib_dev *dev = to_mdev(ibdev); 744 struct mlx5_reg_node_desc in; 745 struct mlx5_reg_node_desc out; 746 int err; 747 748 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 749 return -EOPNOTSUPP; 750 751 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 752 return 0; 753 754 /* 755 * If possible, pass node desc to FW, so it can generate 756 * a 144 trap. If cmd fails, just ignore. 757 */ 758 memcpy(&in, props->node_desc, 64); 759 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 760 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 761 if (err) 762 return err; 763 764 memcpy(ibdev->node_desc, props->node_desc, 64); 765 766 return err; 767} 768 769static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 770 struct ib_port_modify *props) 771{ 772 u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) == 773 IB_LINK_LAYER_ETHERNET); 774 struct mlx5_ib_dev *dev = to_mdev(ibdev); 775 struct ib_port_attr attr; 776 u32 tmp; 777 int err; 778 779 /* return OK if this is RoCE. CM calls ib_modify_port() regardless 780 * of whether port link layer is ETH or IB. For ETH ports, qkey 781 * violations and port capabilities are not valid. 782 */ 783 if (is_eth) 784 return 0; 785 786 mutex_lock(&dev->cap_mask_mutex); 787 788 err = mlx5_ib_query_port(ibdev, port, &attr); 789 if (err) 790 goto out; 791 792 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 793 ~props->clr_port_cap_mask; 794 795 err = mlx5_set_port_caps(dev->mdev, port, tmp); 796 797out: 798 mutex_unlock(&dev->cap_mask_mutex); 799 return err; 800} 801 802enum mlx5_cap_flags { 803 MLX5_CAP_COMPACT_AV = 1 << 0, 804}; 805 806static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev) 807{ 808 *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ? 809 MLX5_CAP_COMPACT_AV : 0; 810} 811 812static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 813 struct ib_udata *udata) 814{ 815 struct mlx5_ib_dev *dev = to_mdev(ibdev); 816 struct mlx5_ib_alloc_ucontext_req_v2 req; 817 struct mlx5_ib_alloc_ucontext_resp resp; 818 struct mlx5_ib_ucontext *context; 819 struct mlx5_uuar_info *uuari; 820 struct mlx5_uar *uars; 821 int gross_uuars; 822 int num_uars; 823 int ver; 824 int uuarn; 825 int err; 826 int i; 827 size_t reqlen; 828 829 if (!dev->ib_active) 830 return ERR_PTR(-EAGAIN); 831 832 memset(&req, 0, sizeof(req)); 833 memset(&resp, 0, sizeof(resp)); 834 835 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); 836 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 837 ver = 0; 838 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) 839 ver = 2; 840 else { 841 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen); 842 return ERR_PTR(-EINVAL); 843 } 844 845 err = ib_copy_from_udata(&req, udata, reqlen); 846 if (err) { 847 mlx5_ib_err(dev, "copy failed\n"); 848 return ERR_PTR(err); 849 } 850 851 if (req.reserved) { 852 mlx5_ib_err(dev, "request corrupted\n"); 853 return ERR_PTR(-EINVAL); 854 } 855 856 if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) { 857 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars); 858 return ERR_PTR(-ENOMEM); 859 } 860 861 req.total_num_uuars = ALIGN(req.total_num_uuars, 862 MLX5_NON_FP_BF_REGS_PER_PAGE); 863 if (req.num_low_latency_uuars > req.total_num_uuars - 1) { 864 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n", 865 req.total_num_uuars, req.total_num_uuars); 866 return ERR_PTR(-EINVAL); 867 } 868 869 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; 870 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; 871 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 872 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) 873 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 874 resp.cache_line_size = L1_CACHE_BYTES; 875 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 876 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 877 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 878 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 879 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 880 set_mlx5_flags(&resp.flags, dev->mdev); 881 882 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen) 883 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc); 884 885 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen) 886 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); 887 888 context = kzalloc(sizeof(*context), GFP_KERNEL); 889 if (!context) 890 return ERR_PTR(-ENOMEM); 891 892 uuari = &context->uuari; 893 mutex_init(&uuari->lock); 894 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); 895 if (!uars) { 896 err = -ENOMEM; 897 goto out_ctx; 898 } 899 900 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), 901 sizeof(*uuari->bitmap), 902 GFP_KERNEL); 903 if (!uuari->bitmap) { 904 err = -ENOMEM; 905 goto out_uar_ctx; 906 } 907 /* 908 * clear all fast path uuars 909 */ 910 for (i = 0; i < gross_uuars; i++) { 911 uuarn = i & 3; 912 if (uuarn == 2 || uuarn == 3) 913 set_bit(i, uuari->bitmap); 914 } 915 916 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); 917 if (!uuari->count) { 918 err = -ENOMEM; 919 goto out_bitmap; 920 } 921 922 for (i = 0; i < num_uars; i++) { 923 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index); 924 if (err) { 925 mlx5_ib_err(dev, "uar alloc failed at %d\n", i); 926 goto out_uars; 927 } 928 } 929 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) 930 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX; 931 932 INIT_LIST_HEAD(&context->db_page_list); 933 mutex_init(&context->db_page_mutex); 934 935 resp.tot_uuars = req.total_num_uuars; 936 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 937 err = ib_copy_to_udata(udata, &resp, 938 min_t(size_t, udata->outlen, sizeof(resp))); 939 if (err) 940 goto out_uars; 941 942 uuari->ver = ver; 943 uuari->num_low_latency_uuars = req.num_low_latency_uuars; 944 uuari->uars = uars; 945 uuari->num_uars = num_uars; 946 947 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 948 IB_LINK_LAYER_ETHERNET) { 949 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn); 950 if (err) 951 goto out_uars; 952 } 953 954 return &context->ibucontext; 955 956out_uars: 957 for (i--; i >= 0; i--) 958 mlx5_cmd_free_uar(dev->mdev, uars[i].index); 959 kfree(uuari->count); 960 961out_bitmap: 962 kfree(uuari->bitmap); 963 964out_uar_ctx: 965 kfree(uars); 966 967out_ctx: 968 kfree(context); 969 return ERR_PTR(err); 970} 971 972static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 973{ 974 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 975 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 976 struct mlx5_uuar_info *uuari = &context->uuari; 977 int i; 978 979 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 980 IB_LINK_LAYER_ETHERNET) 981 mlx5_dealloc_transport_domain(dev->mdev, context->tdn); 982 983 for (i = 0; i < uuari->num_uars; i++) { 984 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) 985 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); 986 } 987 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) { 988 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX) 989 mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]); 990 } 991 992 kfree(uuari->count); 993 kfree(uuari->bitmap); 994 kfree(uuari->uars); 995 kfree(context); 996 997 return 0; 998} 999 1000static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) 1001{ 1002 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index; 1003} 1004 1005static int get_command(unsigned long offset) 1006{ 1007 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 1008} 1009 1010static int get_arg(unsigned long offset) 1011{ 1012 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 1013} 1014 1015static int get_index(unsigned long offset) 1016{ 1017 return get_arg(offset); 1018} 1019 1020static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc, 1021 struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev, 1022 struct mlx5_ib_ucontext *context) 1023{ 1024 unsigned long idx; 1025 phys_addr_t pfn; 1026 1027 if (vma->vm_end - vma->vm_start != PAGE_SIZE) { 1028 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n", 1029 (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start)); 1030 return -EINVAL; 1031 } 1032 1033 idx = get_index(vma->vm_pgoff); 1034 if (idx >= uuari->num_uars) { 1035 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n", 1036 idx, uuari->num_uars); 1037 return -EINVAL; 1038 } 1039 1040 pfn = uar_index2pfn(dev, uuari->uars[idx].index); 1041 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, 1042 (unsigned long long)pfn); 1043 1044 vma->vm_page_prot = prot; 1045 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1046 PAGE_SIZE, vma->vm_page_prot)) { 1047 mlx5_ib_err(dev, "io remap failed\n"); 1048 return -EAGAIN; 1049 } 1050 1051 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC", 1052 (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT); 1053 1054 return 0; 1055} 1056 1057static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1058{ 1059 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1060 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1061 struct mlx5_uuar_info *uuari = &context->uuari; 1062 unsigned long command; 1063 1064 command = get_command(vma->vm_pgoff); 1065 switch (command) { 1066 case MLX5_IB_MMAP_REGULAR_PAGE: 1067 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot), 1068 true, 1069 uuari, dev, context); 1070 1071 break; 1072 1073 case MLX5_IB_MMAP_WC_PAGE: 1074 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot), 1075 true, uuari, dev, context); 1076 break; 1077 1078 case MLX5_IB_MMAP_NC_PAGE: 1079 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot), 1080 false, uuari, dev, context); 1081 break; 1082 1083 default: 1084 return -EINVAL; 1085 } 1086 1087 return 0; 1088} 1089 1090static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) 1091{ 1092 struct mlx5_create_mkey_mbox_in *in; 1093 struct mlx5_mkey_seg *seg; 1094 struct mlx5_core_mr mr; 1095 int err; 1096 1097 in = kzalloc(sizeof(*in), GFP_KERNEL); 1098 if (!in) 1099 return -ENOMEM; 1100 1101 seg = &in->seg; 1102 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; 1103 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); 1104 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1105 seg->start_addr = 0; 1106 1107 err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in), 1108 NULL, NULL, NULL); 1109 if (err) { 1110 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); 1111 goto err_in; 1112 } 1113 1114 kfree(in); 1115 *key = mr.key; 1116 1117 return 0; 1118 1119err_in: 1120 kfree(in); 1121 1122 return err; 1123} 1124 1125static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) 1126{ 1127 struct mlx5_core_mr mr; 1128 int err; 1129 1130 memset(&mr, 0, sizeof(mr)); 1131 mr.key = key; 1132 err = mlx5_core_destroy_mkey(dev->mdev, &mr); 1133 if (err) 1134 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); 1135} 1136 1137static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, 1138 struct ib_ucontext *context, 1139 struct ib_udata *udata) 1140{ 1141 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1142 struct mlx5_ib_alloc_pd_resp resp; 1143 struct mlx5_ib_pd *pd; 1144 int err; 1145 1146 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 1147 if (!pd) 1148 return ERR_PTR(-ENOMEM); 1149 1150 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 1151 if (err) { 1152 mlx5_ib_warn(dev, "pd alloc failed\n"); 1153 kfree(pd); 1154 return ERR_PTR(err); 1155 } 1156 1157 if (context) { 1158 resp.pdn = pd->pdn; 1159 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 1160 mlx5_ib_err(dev, "copy failed\n"); 1161 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1162 kfree(pd); 1163 return ERR_PTR(-EFAULT); 1164 } 1165 } else { 1166 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); 1167 if (err) { 1168 mlx5_ib_err(dev, "alloc mkey failed\n"); 1169 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1170 kfree(pd); 1171 return ERR_PTR(err); 1172 } 1173 } 1174 1175 return &pd->ibpd; 1176} 1177 1178static int mlx5_ib_dealloc_pd(struct ib_pd *pd) 1179{ 1180 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 1181 struct mlx5_ib_pd *mpd = to_mpd(pd); 1182 1183 if (!pd->uobject) 1184 free_pa_mkey(mdev, mpd->pa_lkey); 1185 1186 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 1187 kfree(mpd); 1188 1189 return 0; 1190} 1191 1192static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1193{ 1194 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1195 int err; 1196 1197 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1198 err = -EOPNOTSUPP; 1199 else 1200 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 1201 if (err) 1202 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 1203 ibqp->qp_num, gid->raw); 1204 1205 return err; 1206} 1207 1208static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1209{ 1210 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1211 int err; 1212 1213 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1214 err = -EOPNOTSUPP; 1215 else 1216 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 1217 if (err) 1218 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 1219 ibqp->qp_num, gid->raw); 1220 1221 return err; 1222} 1223 1224static int init_node_data(struct mlx5_ib_dev *dev) 1225{ 1226 int err; 1227 1228 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 1229 if (err) 1230 return err; 1231 1232 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 1233} 1234 1235static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 1236 char *buf) 1237{ 1238 struct mlx5_ib_dev *dev = 1239 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1240 1241 return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages); 1242} 1243 1244static ssize_t show_reg_pages(struct device *device, 1245 struct device_attribute *attr, char *buf) 1246{ 1247 struct mlx5_ib_dev *dev = 1248 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1249 1250 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 1251} 1252 1253static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1254 char *buf) 1255{ 1256 struct mlx5_ib_dev *dev = 1257 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1258 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 1259} 1260 1261static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1262 char *buf) 1263{ 1264 struct mlx5_ib_dev *dev = 1265 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1266 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), 1267 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); 1268} 1269 1270static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1271 char *buf) 1272{ 1273 struct mlx5_ib_dev *dev = 1274 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1275 return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision); 1276} 1277 1278static ssize_t show_board(struct device *device, struct device_attribute *attr, 1279 char *buf) 1280{ 1281 struct mlx5_ib_dev *dev = 1282 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1283 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 1284 dev->mdev->board_id); 1285} 1286 1287static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1288static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1289static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1290static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1291static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 1292static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 1293 1294static struct device_attribute *mlx5_class_attributes[] = { 1295 &dev_attr_hw_rev, 1296 &dev_attr_fw_ver, 1297 &dev_attr_hca_type, 1298 &dev_attr_board_id, 1299 &dev_attr_fw_pages, 1300 &dev_attr_reg_pages, 1301}; 1302 1303static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) 1304{ 1305 struct mlx5_ib_qp *mqp; 1306 struct mlx5_ib_cq *send_mcq, *recv_mcq; 1307 struct mlx5_core_cq *mcq; 1308 struct list_head cq_armed_list; 1309 unsigned long flags_qp; 1310 unsigned long flags_cq; 1311 unsigned long flags; 1312 1313 mlx5_ib_warn(ibdev, " started\n"); 1314 INIT_LIST_HEAD(&cq_armed_list); 1315 1316 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 1317 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 1318 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 1319 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 1320 if (mqp->sq.tail != mqp->sq.head) { 1321 send_mcq = to_mcq(mqp->ibqp.send_cq); 1322 spin_lock_irqsave(&send_mcq->lock, flags_cq); 1323 if (send_mcq->mcq.comp && 1324 mqp->ibqp.send_cq->comp_handler) { 1325 if (!send_mcq->mcq.reset_notify_added) { 1326 send_mcq->mcq.reset_notify_added = 1; 1327 list_add_tail(&send_mcq->mcq.reset_notify, 1328 &cq_armed_list); 1329 } 1330 } 1331 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 1332 } 1333 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 1334 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 1335 /* no handling is needed for SRQ */ 1336 if (!mqp->ibqp.srq) { 1337 if (mqp->rq.tail != mqp->rq.head) { 1338 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 1339 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 1340 if (recv_mcq->mcq.comp && 1341 mqp->ibqp.recv_cq->comp_handler) { 1342 if (!recv_mcq->mcq.reset_notify_added) { 1343 recv_mcq->mcq.reset_notify_added = 1; 1344 list_add_tail(&recv_mcq->mcq.reset_notify, 1345 &cq_armed_list); 1346 } 1347 } 1348 spin_unlock_irqrestore(&recv_mcq->lock, 1349 flags_cq); 1350 } 1351 } 1352 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 1353 } 1354 /*At that point all inflight post send were put to be executed as of we 1355 * lock/unlock above locks Now need to arm all involved CQs. 1356 */ 1357 list_for_each_entry(mcq, &cq_armed_list, reset_notify) { 1358 mcq->comp(mcq); 1359 } 1360 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 1361 mlx5_ib_warn(ibdev, " ended\n"); 1362 return; 1363} 1364 1365static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 1366 enum mlx5_dev_event event, unsigned long param) 1367{ 1368 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 1369 struct ib_event ibev; 1370 1371 u8 port = 0; 1372 1373 switch (event) { 1374 case MLX5_DEV_EVENT_SYS_ERROR: 1375 ibdev->ib_active = false; 1376 ibev.event = IB_EVENT_DEVICE_FATAL; 1377 mlx5_ib_handle_internal_error(ibdev); 1378 break; 1379 1380 case MLX5_DEV_EVENT_PORT_UP: 1381 ibev.event = IB_EVENT_PORT_ACTIVE; 1382 port = (u8)param; 1383 break; 1384 1385 case MLX5_DEV_EVENT_PORT_DOWN: 1386 case MLX5_DEV_EVENT_PORT_INITIALIZED: 1387 ibev.event = IB_EVENT_PORT_ERR; 1388 port = (u8)param; 1389 break; 1390 1391 case MLX5_DEV_EVENT_LID_CHANGE: 1392 ibev.event = IB_EVENT_LID_CHANGE; 1393 port = (u8)param; 1394 break; 1395 1396 case MLX5_DEV_EVENT_PKEY_CHANGE: 1397 ibev.event = IB_EVENT_PKEY_CHANGE; 1398 port = (u8)param; 1399 break; 1400 1401 case MLX5_DEV_EVENT_GUID_CHANGE: 1402 ibev.event = IB_EVENT_GID_CHANGE; 1403 port = (u8)param; 1404 break; 1405 1406 case MLX5_DEV_EVENT_CLIENT_REREG: 1407 ibev.event = IB_EVENT_CLIENT_REREGISTER; 1408 port = (u8)param; 1409 break; 1410 1411 default: 1412 break; 1413 } 1414 1415 ibev.device = &ibdev->ib_dev; 1416 ibev.element.port_num = port; 1417 1418 if ((event != MLX5_DEV_EVENT_SYS_ERROR) && 1419 (port < 1 || port > ibdev->num_ports)) { 1420 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); 1421 return; 1422 } 1423 1424 if (ibdev->ib_active) 1425 ib_dispatch_event(&ibev); 1426} 1427 1428static void get_ext_port_caps(struct mlx5_ib_dev *dev) 1429{ 1430 int port; 1431 1432 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) 1433 mlx5_query_ext_port_caps(dev, port); 1434} 1435 1436static void config_atomic_responder(struct mlx5_ib_dev *dev, 1437 struct ib_device_attr *props) 1438{ 1439 enum ib_atomic_cap cap = props->atomic_cap; 1440 1441#if 0 1442 if (cap == IB_ATOMIC_HCA || 1443 cap == IB_ATOMIC_GLOB) 1444#endif 1445 dev->enable_atomic_resp = 1; 1446 1447 dev->atomic_cap = cap; 1448} 1449 1450enum mlx5_addr_align { 1451 MLX5_ADDR_ALIGN_0 = 0, 1452 MLX5_ADDR_ALIGN_64 = 64, 1453 MLX5_ADDR_ALIGN_128 = 128, 1454}; 1455 1456static int get_port_caps(struct mlx5_ib_dev *dev) 1457{ 1458 struct ib_device_attr *dprops = NULL; 1459 struct ib_port_attr *pprops = NULL; 1460 int err = -ENOMEM; 1461 int port; 1462 1463 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 1464 if (!pprops) 1465 goto out; 1466 1467 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); 1468 if (!dprops) 1469 goto out; 1470 1471 err = mlx5_ib_query_device(&dev->ib_dev, dprops); 1472 if (err) { 1473 mlx5_ib_warn(dev, "query_device failed %d\n", err); 1474 goto out; 1475 } 1476 config_atomic_responder(dev, dprops); 1477 1478 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 1479 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 1480 if (err) { 1481 mlx5_ib_warn(dev, "query_port %d failed %d\n", 1482 port, err); 1483 break; 1484 } 1485 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys; 1486 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; 1487 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", 1488 dprops->max_pkeys, pprops->gid_tbl_len); 1489 } 1490 1491out: 1492 kfree(pprops); 1493 kfree(dprops); 1494 1495 return err; 1496} 1497 1498static void destroy_umrc_res(struct mlx5_ib_dev *dev) 1499{ 1500 int err; 1501 1502 err = mlx5_mr_cache_cleanup(dev); 1503 if (err) 1504 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 1505 1506 ib_dereg_mr(dev->umrc.mr); 1507 ib_dealloc_pd(dev->umrc.pd); 1508} 1509 1510enum { 1511 MAX_UMR_WR = 128, 1512}; 1513 1514static int create_umr_res(struct mlx5_ib_dev *dev) 1515{ 1516 struct ib_pd *pd; 1517 struct ib_mr *mr; 1518 int ret; 1519 1520 pd = ib_alloc_pd(&dev->ib_dev); 1521 if (IS_ERR(pd)) { 1522 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 1523 ret = PTR_ERR(pd); 1524 goto error_0; 1525 } 1526 1527 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); 1528 if (IS_ERR(mr)) { 1529 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); 1530 ret = PTR_ERR(mr); 1531 goto error_1; 1532 } 1533 1534 dev->umrc.mr = mr; 1535 dev->umrc.pd = pd; 1536 1537 ret = mlx5_mr_cache_init(dev); 1538 if (ret) { 1539 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 1540 goto error_4; 1541 } 1542 1543 return 0; 1544 1545error_4: 1546 ib_dereg_mr(mr); 1547error_1: 1548 ib_dealloc_pd(pd); 1549error_0: 1550 return ret; 1551} 1552 1553static int create_dev_resources(struct mlx5_ib_resources *devr) 1554{ 1555 struct ib_srq_init_attr attr; 1556 struct mlx5_ib_dev *dev; 1557 int ret = 0; 1558 1559 dev = container_of(devr, struct mlx5_ib_dev, devr); 1560 1561 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 1562 if (IS_ERR(devr->p0)) { 1563 ret = PTR_ERR(devr->p0); 1564 goto error0; 1565 } 1566 devr->p0->device = &dev->ib_dev; 1567 devr->p0->uobject = NULL; 1568 atomic_set(&devr->p0->usecnt, 0); 1569 1570 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); 1571 if (IS_ERR(devr->c0)) { 1572 ret = PTR_ERR(devr->c0); 1573 goto error1; 1574 } 1575 devr->c0->device = &dev->ib_dev; 1576 devr->c0->uobject = NULL; 1577 devr->c0->comp_handler = NULL; 1578 devr->c0->event_handler = NULL; 1579 devr->c0->cq_context = NULL; 1580 atomic_set(&devr->c0->usecnt, 0); 1581 1582 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 1583 if (IS_ERR(devr->x0)) { 1584 ret = PTR_ERR(devr->x0); 1585 goto error2; 1586 } 1587 devr->x0->device = &dev->ib_dev; 1588 devr->x0->inode = NULL; 1589 atomic_set(&devr->x0->usecnt, 0); 1590 mutex_init(&devr->x0->tgt_qp_mutex); 1591 INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 1592 1593 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 1594 if (IS_ERR(devr->x1)) { 1595 ret = PTR_ERR(devr->x1); 1596 goto error3; 1597 } 1598 devr->x1->device = &dev->ib_dev; 1599 devr->x1->inode = NULL; 1600 atomic_set(&devr->x1->usecnt, 0); 1601 mutex_init(&devr->x1->tgt_qp_mutex); 1602 INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 1603 1604 memset(&attr, 0, sizeof(attr)); 1605 attr.attr.max_sge = 1; 1606 attr.attr.max_wr = 1; 1607 attr.srq_type = IB_SRQT_XRC; 1608 attr.ext.xrc.cq = devr->c0; 1609 attr.ext.xrc.xrcd = devr->x0; 1610 1611 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 1612 if (IS_ERR(devr->s0)) { 1613 ret = PTR_ERR(devr->s0); 1614 goto error4; 1615 } 1616 devr->s0->device = &dev->ib_dev; 1617 devr->s0->pd = devr->p0; 1618 devr->s0->uobject = NULL; 1619 devr->s0->event_handler = NULL; 1620 devr->s0->srq_context = NULL; 1621 devr->s0->srq_type = IB_SRQT_XRC; 1622 devr->s0->ext.xrc.xrcd = devr->x0; 1623 devr->s0->ext.xrc.cq = devr->c0; 1624 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 1625 atomic_inc(&devr->s0->ext.xrc.cq->usecnt); 1626 atomic_inc(&devr->p0->usecnt); 1627 atomic_set(&devr->s0->usecnt, 0); 1628 1629 memset(&attr, 0, sizeof(attr)); 1630 attr.attr.max_sge = 1; 1631 attr.attr.max_wr = 1; 1632 attr.srq_type = IB_SRQT_BASIC; 1633 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 1634 if (IS_ERR(devr->s1)) { 1635 ret = PTR_ERR(devr->s1); 1636 goto error5; 1637 } 1638 devr->s1->device = &dev->ib_dev; 1639 devr->s1->pd = devr->p0; 1640 devr->s1->uobject = NULL; 1641 devr->s1->event_handler = NULL; 1642 devr->s1->srq_context = NULL; 1643 devr->s1->srq_type = IB_SRQT_BASIC; 1644 devr->s1->ext.xrc.cq = devr->c0; 1645 atomic_inc(&devr->p0->usecnt); 1646 atomic_set(&devr->s1->usecnt, 0); 1647 1648 return 0; 1649 1650error5: 1651 mlx5_ib_destroy_srq(devr->s0); 1652error4: 1653 mlx5_ib_dealloc_xrcd(devr->x1); 1654error3: 1655 mlx5_ib_dealloc_xrcd(devr->x0); 1656error2: 1657 mlx5_ib_destroy_cq(devr->c0); 1658error1: 1659 mlx5_ib_dealloc_pd(devr->p0); 1660error0: 1661 return ret; 1662} 1663 1664static void destroy_dev_resources(struct mlx5_ib_resources *devr) 1665{ 1666 mlx5_ib_destroy_srq(devr->s1); 1667 mlx5_ib_destroy_srq(devr->s0); 1668 mlx5_ib_dealloc_xrcd(devr->x0); 1669 mlx5_ib_dealloc_xrcd(devr->x1); 1670 mlx5_ib_destroy_cq(devr->c0); 1671 mlx5_ib_dealloc_pd(devr->p0); 1672} 1673 1674static u32 get_core_cap_flags(struct ib_device *ibdev) 1675{ 1676 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1677 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 1678 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); 1679 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); 1680 u32 ret = 0; 1681 1682 if (ll == IB_LINK_LAYER_INFINIBAND) 1683 return RDMA_CORE_PORT_IBA_IB; 1684 1685 ret = RDMA_CORE_PORT_RAW_PACKET; 1686 1687 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 1688 return ret; 1689 1690 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) 1691 return ret; 1692 1693 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) 1694 ret |= RDMA_CORE_PORT_IBA_ROCE; 1695 1696 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) 1697 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 1698 1699 return ret; 1700} 1701 1702static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, 1703 struct ib_port_immutable *immutable) 1704{ 1705 struct ib_port_attr attr; 1706 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1707 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 1708 int err; 1709 1710 immutable->core_cap_flags = get_core_cap_flags(ibdev); 1711 1712 err = ib_query_port(ibdev, port_num, &attr); 1713 if (err) 1714 return err; 1715 1716 immutable->pkey_tbl_len = attr.pkey_tbl_len; 1717 immutable->gid_tbl_len = attr.gid_tbl_len; 1718 immutable->core_cap_flags = get_core_cap_flags(ibdev); 1719 if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) 1720 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 1721 1722 return 0; 1723} 1724 1725static void enable_dc_tracer(struct mlx5_ib_dev *dev) 1726{ 1727 struct device *device = dev->ib_dev.dma_device; 1728 struct mlx5_dc_tracer *dct = &dev->dctr; 1729 int order; 1730 void *tmp; 1731 int size; 1732 int err; 1733 1734 size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096; 1735 if (size <= PAGE_SIZE) 1736 order = 0; 1737 else 1738 order = 1; 1739 1740 dct->pg = alloc_pages(GFP_KERNEL, order); 1741 if (!dct->pg) { 1742 mlx5_ib_err(dev, "failed to allocate %d pages\n", order); 1743 return; 1744 } 1745 1746 tmp = page_address(dct->pg); 1747 memset(tmp, 0xff, size); 1748 1749 dct->size = size; 1750 dct->order = order; 1751 dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE); 1752 if (dma_mapping_error(device, dct->dma)) { 1753 mlx5_ib_err(dev, "dma mapping error\n"); 1754 goto map_err; 1755 } 1756 1757 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma); 1758 if (err) { 1759 mlx5_ib_warn(dev, "failed to enable DC tracer\n"); 1760 goto cmd_err; 1761 } 1762 1763 return; 1764 1765cmd_err: 1766 dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE); 1767map_err: 1768 __free_pages(dct->pg, dct->order); 1769 dct->pg = NULL; 1770} 1771 1772static void disable_dc_tracer(struct mlx5_ib_dev *dev) 1773{ 1774 struct device *device = dev->ib_dev.dma_device; 1775 struct mlx5_dc_tracer *dct = &dev->dctr; 1776 int err; 1777 1778 if (!dct->pg) 1779 return; 1780 1781 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma); 1782 if (err) { 1783 mlx5_ib_warn(dev, "failed to disable DC tracer\n"); 1784 return; 1785 } 1786 1787 dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE); 1788 __free_pages(dct->pg, dct->order); 1789 dct->pg = NULL; 1790} 1791 1792enum { 1793 MLX5_DC_CNAK_SIZE = 128, 1794 MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE, 1795 MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128, 1796 MLX5_DC_CNAK_SL = 0, 1797 MLX5_DC_CNAK_VL = 0, 1798}; 1799 1800static int init_dc_improvements(struct mlx5_ib_dev *dev) 1801{ 1802 if (!mlx5_core_is_pf(dev->mdev)) 1803 return 0; 1804 1805 if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace))) 1806 return 0; 1807 1808 enable_dc_tracer(dev); 1809 1810 return 0; 1811} 1812 1813static void cleanup_dc_improvements(struct mlx5_ib_dev *dev) 1814{ 1815 1816 disable_dc_tracer(dev); 1817} 1818 1819static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num) 1820{ 1821 mlx5_vport_dealloc_q_counter(dev->mdev, 1822 MLX5_INTERFACE_PROTOCOL_IB, 1823 dev->port[port_num].q_cnt_id); 1824 dev->port[port_num].q_cnt_id = 0; 1825} 1826 1827static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) 1828{ 1829 unsigned int i; 1830 1831 for (i = 0; i < dev->num_ports; i++) 1832 mlx5_ib_dealloc_q_port_counter(dev, i); 1833} 1834 1835static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) 1836{ 1837 int i; 1838 int ret; 1839 1840 for (i = 0; i < dev->num_ports; i++) { 1841 ret = mlx5_vport_alloc_q_counter(dev->mdev, 1842 MLX5_INTERFACE_PROTOCOL_IB, 1843 &dev->port[i].q_cnt_id); 1844 if (ret) { 1845 mlx5_ib_warn(dev, 1846 "couldn't allocate queue counter for port %d\n", 1847 i + 1); 1848 goto dealloc_counters; 1849 } 1850 } 1851 1852 return 0; 1853 1854dealloc_counters: 1855 while (--i >= 0) 1856 mlx5_ib_dealloc_q_port_counter(dev, i); 1857 1858 return ret; 1859} 1860 1861struct port_attribute { 1862 struct attribute attr; 1863 ssize_t (*show)(struct mlx5_ib_port *, 1864 struct port_attribute *, char *buf); 1865 ssize_t (*store)(struct mlx5_ib_port *, 1866 struct port_attribute *, 1867 const char *buf, size_t count); 1868}; 1869 1870struct port_counter_attribute { 1871 struct port_attribute attr; 1872 size_t offset; 1873}; 1874 1875static ssize_t port_attr_show(struct kobject *kobj, 1876 struct attribute *attr, char *buf) 1877{ 1878 struct port_attribute *port_attr = 1879 container_of(attr, struct port_attribute, attr); 1880 struct mlx5_ib_port_sysfs_group *p = 1881 container_of(kobj, struct mlx5_ib_port_sysfs_group, 1882 kobj); 1883 struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port, 1884 group); 1885 1886 if (!port_attr->show) 1887 return -EIO; 1888 1889 return port_attr->show(mibport, port_attr, buf); 1890} 1891 1892static ssize_t show_port_counter(struct mlx5_ib_port *p, 1893 struct port_attribute *port_attr, 1894 char *buf) 1895{ 1896 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); 1897 struct port_counter_attribute *counter_attr = 1898 container_of(port_attr, struct port_counter_attribute, attr); 1899 void *out; 1900 int ret; 1901 1902 out = mlx5_vzalloc(outlen); 1903 if (!out) 1904 return -ENOMEM; 1905 1906 ret = mlx5_vport_query_q_counter(p->dev->mdev, 1907 p->q_cnt_id, 0, 1908 out, outlen); 1909 if (ret) 1910 goto free; 1911 1912 ret = sprintf(buf, "%d\n", 1913 be32_to_cpu(*(__be32 *)(out + counter_attr->offset))); 1914 1915free: 1916 kfree(out); 1917 return ret; 1918} 1919 1920#define PORT_COUNTER_ATTR(_name) \ 1921struct port_counter_attribute port_counter_attr_##_name = { \ 1922 .attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \ 1923 .offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \ 1924} 1925 1926static PORT_COUNTER_ATTR(rx_write_requests); 1927static PORT_COUNTER_ATTR(rx_read_requests); 1928static PORT_COUNTER_ATTR(rx_atomic_requests); 1929static PORT_COUNTER_ATTR(rx_dct_connect); 1930static PORT_COUNTER_ATTR(out_of_buffer); 1931static PORT_COUNTER_ATTR(out_of_sequence); 1932static PORT_COUNTER_ATTR(duplicate_request); 1933static PORT_COUNTER_ATTR(rnr_nak_retry_err); 1934static PORT_COUNTER_ATTR(packet_seq_err); 1935static PORT_COUNTER_ATTR(implied_nak_seq_err); 1936static PORT_COUNTER_ATTR(local_ack_timeout_err); 1937 1938static struct attribute *counter_attrs[] = { 1939 &port_counter_attr_rx_write_requests.attr.attr, 1940 &port_counter_attr_rx_read_requests.attr.attr, 1941 &port_counter_attr_rx_atomic_requests.attr.attr, 1942 &port_counter_attr_rx_dct_connect.attr.attr, 1943 &port_counter_attr_out_of_buffer.attr.attr, 1944 &port_counter_attr_out_of_sequence.attr.attr, 1945 &port_counter_attr_duplicate_request.attr.attr, 1946 &port_counter_attr_rnr_nak_retry_err.attr.attr, 1947 &port_counter_attr_packet_seq_err.attr.attr, 1948 &port_counter_attr_implied_nak_seq_err.attr.attr, 1949 &port_counter_attr_local_ack_timeout_err.attr.attr, 1950 NULL 1951}; 1952 1953static struct attribute_group port_counters_group = { 1954 .name = "counters", 1955 .attrs = counter_attrs 1956}; 1957 1958static const struct sysfs_ops port_sysfs_ops = { 1959 .show = port_attr_show 1960}; 1961 1962static struct kobj_type port_type = { 1963 .sysfs_ops = &port_sysfs_ops, 1964}; 1965 1966static int add_port_attrs(struct mlx5_ib_dev *dev, 1967 struct kobject *parent, 1968 struct mlx5_ib_port_sysfs_group *port, 1969 u8 port_num) 1970{ 1971 int ret; 1972 1973 ret = kobject_init_and_add(&port->kobj, &port_type, 1974 parent, 1975 "%d", port_num); 1976 if (ret) 1977 return ret; 1978 1979 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 1980 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 1981 ret = sysfs_create_group(&port->kobj, &port_counters_group); 1982 if (ret) 1983 goto put_kobj; 1984 } 1985 1986 port->enabled = true; 1987 return ret; 1988 1989put_kobj: 1990 kobject_put(&port->kobj); 1991 return ret; 1992} 1993 1994static void destroy_ports_attrs(struct mlx5_ib_dev *dev, 1995 unsigned int num_ports) 1996{ 1997 unsigned int i; 1998 1999 for (i = 0; i < num_ports; i++) { 2000 struct mlx5_ib_port_sysfs_group *port = 2001 &dev->port[i].group; 2002 2003 if (!port->enabled) 2004 continue; 2005 2006 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 2007 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 2008 sysfs_remove_group(&port->kobj, 2009 &port_counters_group); 2010 kobject_put(&port->kobj); 2011 port->enabled = false; 2012 } 2013 2014 if (dev->ports_parent) { 2015 kobject_put(dev->ports_parent); 2016 dev->ports_parent = NULL; 2017 } 2018} 2019 2020static int create_port_attrs(struct mlx5_ib_dev *dev) 2021{ 2022 int ret = 0; 2023 unsigned int i = 0; 2024 struct device *device = &dev->ib_dev.dev; 2025 2026 dev->ports_parent = kobject_create_and_add("mlx5_ports", 2027 &device->kobj); 2028 if (!dev->ports_parent) 2029 return -ENOMEM; 2030 2031 for (i = 0; i < dev->num_ports; i++) { 2032 ret = add_port_attrs(dev, 2033 dev->ports_parent, 2034 &dev->port[i].group, 2035 i + 1); 2036 2037 if (ret) 2038 goto _destroy_ports_attrs; 2039 } 2040 2041 return 0; 2042 2043_destroy_ports_attrs: 2044 destroy_ports_attrs(dev, i); 2045 return ret; 2046} 2047 2048static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 2049{ 2050 struct mlx5_ib_dev *dev; 2051 int err; 2052 int i; 2053 2054 printk_once(KERN_INFO "%s", mlx5_version); 2055 2056 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 2057 if (!dev) 2058 return NULL; 2059 2060 dev->mdev = mdev; 2061 2062 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), 2063 GFP_KERNEL); 2064 if (!dev->port) 2065 goto err_dealloc; 2066 2067 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2068 dev->port[i].dev = dev; 2069 dev->port[i].port_num = i; 2070 dev->port[i].port_gone = 0; 2071 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table)); 2072 } 2073 2074 err = get_port_caps(dev); 2075 if (err) 2076 goto err_free_port; 2077 2078 if (mlx5_use_mad_ifc(dev)) 2079 get_ext_port_caps(dev); 2080 2081 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2082 IB_LINK_LAYER_ETHERNET) { 2083 if (MLX5_CAP_GEN(mdev, roce)) { 2084 err = mlx5_nic_vport_enable_roce(mdev); 2085 if (err) 2086 goto err_free_port; 2087 } else { 2088 goto err_free_port; 2089 } 2090 } 2091 2092 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); 2093 2094 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); 2095 dev->ib_dev.owner = THIS_MODULE; 2096 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 2097 dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey; 2098 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); 2099 dev->ib_dev.phys_port_cnt = dev->num_ports; 2100 dev->ib_dev.num_comp_vectors = 2101 dev->mdev->priv.eq_table.num_comp_vectors; 2102 dev->ib_dev.dma_device = &mdev->pdev->dev; 2103 2104 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 2105 dev->ib_dev.uverbs_cmd_mask = 2106 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2107 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2108 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2109 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2110 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2111 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2112 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2113 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2114 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2115 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2116 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2117 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2118 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2119 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2120 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2121 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2122 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2123 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2124 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2125 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2126 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2127 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 2128 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 2129 2130 dev->ib_dev.query_device = mlx5_ib_query_device; 2131 dev->ib_dev.query_port = mlx5_ib_query_port; 2132 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 2133 dev->ib_dev.query_gid = mlx5_ib_query_gid; 2134 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 2135 dev->ib_dev.modify_device = mlx5_ib_modify_device; 2136 dev->ib_dev.modify_port = mlx5_ib_modify_port; 2137 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; 2138 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; 2139 dev->ib_dev.mmap = mlx5_ib_mmap; 2140 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; 2141 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; 2142 dev->ib_dev.create_ah = mlx5_ib_create_ah; 2143 dev->ib_dev.query_ah = mlx5_ib_query_ah; 2144 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; 2145 dev->ib_dev.create_srq = mlx5_ib_create_srq; 2146 dev->ib_dev.modify_srq = mlx5_ib_modify_srq; 2147 dev->ib_dev.query_srq = mlx5_ib_query_srq; 2148 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; 2149 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; 2150 dev->ib_dev.create_qp = mlx5_ib_create_qp; 2151 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 2152 dev->ib_dev.query_qp = mlx5_ib_query_qp; 2153 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 2154 dev->ib_dev.post_send = mlx5_ib_post_send; 2155 dev->ib_dev.post_recv = mlx5_ib_post_recv; 2156 dev->ib_dev.create_cq = mlx5_ib_create_cq; 2157 dev->ib_dev.modify_cq = mlx5_ib_modify_cq; 2158 dev->ib_dev.resize_cq = mlx5_ib_resize_cq; 2159 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; 2160 dev->ib_dev.poll_cq = mlx5_ib_poll_cq; 2161 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 2162 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 2163 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 2164 dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr; 2165 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 2166 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 2167 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 2168 dev->ib_dev.process_mad = mlx5_ib_process_mad; 2169 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 2170 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; 2171 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; 2172 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; 2173 2174 if (MLX5_CAP_GEN(mdev, xrc)) { 2175 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 2176 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 2177 dev->ib_dev.uverbs_cmd_mask |= 2178 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 2179 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2180 } 2181 2182 err = init_node_data(dev); 2183 if (err) 2184 goto err_disable_roce; 2185 2186 mutex_init(&dev->cap_mask_mutex); 2187 INIT_LIST_HEAD(&dev->qp_list); 2188 spin_lock_init(&dev->reset_flow_resource_lock); 2189 2190 err = create_dev_resources(&dev->devr); 2191 if (err) 2192 goto err_disable_roce; 2193 2194 2195 err = mlx5_ib_alloc_q_counters(dev); 2196 if (err) 2197 goto err_odp; 2198 2199 err = ib_register_device(&dev->ib_dev, NULL); 2200 if (err) 2201 goto err_q_cnt; 2202 2203 err = create_umr_res(dev); 2204 if (err) 2205 goto err_dev; 2206 2207 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2208 MLX5_CAP_PORT_TYPE_IB) { 2209 if (init_dc_improvements(dev)) 2210 mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n"); 2211 } 2212 2213 err = create_port_attrs(dev); 2214 if (err) 2215 goto err_dc; 2216 2217 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2218 err = device_create_file(&dev->ib_dev.dev, 2219 mlx5_class_attributes[i]); 2220 if (err) 2221 goto err_port_attrs; 2222 } 2223 2224 if (1) { 2225 struct thread *rl_thread = NULL; 2226 struct proc *rl_proc = NULL; 2227 2228 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2229 (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread, 2230 RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i); 2231 } 2232 } 2233 2234 dev->ib_active = true; 2235 2236 return dev; 2237 2238err_port_attrs: 2239 destroy_ports_attrs(dev, dev->num_ports); 2240 2241err_dc: 2242 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2243 MLX5_CAP_PORT_TYPE_IB) 2244 cleanup_dc_improvements(dev); 2245 destroy_umrc_res(dev); 2246 2247err_dev: 2248 ib_unregister_device(&dev->ib_dev); 2249 2250err_q_cnt: 2251 mlx5_ib_dealloc_q_counters(dev); 2252 2253err_odp: 2254 destroy_dev_resources(&dev->devr); 2255 2256err_disable_roce: 2257 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2258 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce)) 2259 mlx5_nic_vport_disable_roce(mdev); 2260err_free_port: 2261 kfree(dev->port); 2262 2263err_dealloc: 2264 ib_dealloc_device((struct ib_device *)dev); 2265 2266 return NULL; 2267} 2268 2269static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 2270{ 2271 struct mlx5_ib_dev *dev = context; 2272 int i; 2273 2274 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2275 dev->port[i].port_gone = 1; 2276 while (dev->port[i].port_gone != 2) 2277 pause("W", hz); 2278 } 2279 2280 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2281 device_remove_file(&dev->ib_dev.dev, 2282 mlx5_class_attributes[i]); 2283 } 2284 2285 destroy_ports_attrs(dev, dev->num_ports); 2286 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2287 MLX5_CAP_PORT_TYPE_IB) 2288 cleanup_dc_improvements(dev); 2289 mlx5_ib_dealloc_q_counters(dev); 2290 ib_unregister_device(&dev->ib_dev); 2291 destroy_umrc_res(dev); 2292 destroy_dev_resources(&dev->devr); 2293 2294 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2295 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce)) 2296 mlx5_nic_vport_disable_roce(mdev); 2297 2298 kfree(dev->port); 2299 ib_dealloc_device(&dev->ib_dev); 2300} 2301 2302static struct mlx5_interface mlx5_ib_interface = { 2303 .add = mlx5_ib_add, 2304 .remove = mlx5_ib_remove, 2305 .event = mlx5_ib_event, 2306 .protocol = MLX5_INTERFACE_PROTOCOL_IB, 2307}; 2308 2309static int __init mlx5_ib_init(void) 2310{ 2311 int err; 2312 2313 if (deprecated_prof_sel != 2) 2314 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); 2315 2316 err = mlx5_register_interface(&mlx5_ib_interface); 2317 if (err) 2318 goto clean_odp; 2319 2320 mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq"); 2321 if (!mlx5_ib_wq) { 2322 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__); 2323 goto err_unreg; 2324 } 2325 2326 return err; 2327 2328err_unreg: 2329 mlx5_unregister_interface(&mlx5_ib_interface); 2330 2331clean_odp: 2332 return err; 2333} 2334 2335static void __exit mlx5_ib_cleanup(void) 2336{ 2337 destroy_workqueue(mlx5_ib_wq); 2338 mlx5_unregister_interface(&mlx5_ib_interface); 2339} 2340 2341module_init_order(mlx5_ib_init, SI_ORDER_THIRD); 2342module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD); 2343