mlx5_ib_main.c revision 323223
1/*- 2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c 323223 2017-09-06 15:33:23Z hselasky $ 26 */ 27 28#include <linux/errno.h> 29#include <linux/pci.h> 30#include <linux/dma-mapping.h> 31#include <linux/slab.h> 32#include <linux/io-mapping.h> 33#include <linux/sched.h> 34#include <linux/netdevice.h> 35#include <linux/etherdevice.h> 36#include <net/ipv6.h> 37#include <linux/list.h> 38#include <dev/mlx5/driver.h> 39#include <dev/mlx5/vport.h> 40#include <asm/pgtable.h> 41#include <linux/fs.h> 42#undef inode 43 44#include <rdma/ib_user_verbs.h> 45#include <rdma/ib_smi.h> 46#include <rdma/ib_umem.h> 47#include "user.h" 48#include "mlx5_ib.h" 49 50#include <sys/unistd.h> 51 52#define DRIVER_NAME "mlx5_ib" 53#define DRIVER_VERSION "3.2-rc1" 54#define DRIVER_RELDATE "May 2016" 55 56#undef MODULE_VERSION 57#include <sys/module.h> 58 59MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 60MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); 61MODULE_LICENSE("Dual BSD/GPL"); 62MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1); 63MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1); 64MODULE_VERSION(mlx5ib, 1); 65 66static int deprecated_prof_sel = 2; 67module_param_named(prof_sel, deprecated_prof_sel, int, 0444); 68MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core"); 69 70enum { 71 MLX5_STANDARD_ATOMIC_SIZE = 0x8, 72}; 73 74struct workqueue_struct *mlx5_ib_wq; 75 76static char mlx5_version[] = 77 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 78 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 79 80static void get_atomic_caps(struct mlx5_ib_dev *dev, 81 struct ib_device_attr *props) 82{ 83 int tmp; 84 u8 atomic_operations; 85 u8 atomic_size_qp; 86 u8 atomic_req_endianess; 87 88 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 89 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 90 atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev, 91 atomic_req_8B_endianess_mode) || 92 !mlx5_host_is_le(); 93 94 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 95 if (((atomic_operations & tmp) == tmp) 96 && (atomic_size_qp & 8)) { 97 if (atomic_req_endianess) { 98 props->atomic_cap = IB_ATOMIC_HCA; 99 } else { 100 props->atomic_cap = IB_ATOMIC_NONE; 101 } 102 } else { 103 props->atomic_cap = IB_ATOMIC_NONE; 104 } 105 106 tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD; 107 if (((atomic_operations & tmp) == tmp) 108 &&(atomic_size_qp & 8)) { 109 if (atomic_req_endianess) 110 props->masked_atomic_cap = IB_ATOMIC_HCA; 111 else { 112 props->masked_atomic_cap = IB_ATOMIC_NONE; 113 } 114 } else { 115 props->masked_atomic_cap = IB_ATOMIC_NONE; 116 } 117} 118 119static enum rdma_link_layer 120mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) 121{ 122 struct mlx5_ib_dev *dev = to_mdev(device); 123 124 switch (MLX5_CAP_GEN(dev->mdev, port_type)) { 125 case MLX5_CAP_PORT_TYPE_IB: 126 return IB_LINK_LAYER_INFINIBAND; 127 case MLX5_CAP_PORT_TYPE_ETH: 128 return IB_LINK_LAYER_ETHERNET; 129 default: 130 return IB_LINK_LAYER_UNSPECIFIED; 131 } 132} 133 134static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 135{ 136 return !dev->mdev->issi; 137} 138 139enum { 140 MLX5_VPORT_ACCESS_METHOD_MAD, 141 MLX5_VPORT_ACCESS_METHOD_HCA, 142 MLX5_VPORT_ACCESS_METHOD_NIC, 143}; 144 145static int mlx5_get_vport_access_method(struct ib_device *ibdev) 146{ 147 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 148 return MLX5_VPORT_ACCESS_METHOD_MAD; 149 150 if (mlx5_ib_port_link_layer(ibdev, 1) == 151 IB_LINK_LAYER_ETHERNET) 152 return MLX5_VPORT_ACCESS_METHOD_NIC; 153 154 return MLX5_VPORT_ACCESS_METHOD_HCA; 155} 156 157static int mlx5_query_system_image_guid(struct ib_device *ibdev, 158 __be64 *sys_image_guid) 159{ 160 struct mlx5_ib_dev *dev = to_mdev(ibdev); 161 struct mlx5_core_dev *mdev = dev->mdev; 162 u64 tmp; 163 int err; 164 165 switch (mlx5_get_vport_access_method(ibdev)) { 166 case MLX5_VPORT_ACCESS_METHOD_MAD: 167 return mlx5_query_system_image_guid_mad_ifc(ibdev, 168 sys_image_guid); 169 170 case MLX5_VPORT_ACCESS_METHOD_HCA: 171 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 172 if (!err) 173 *sys_image_guid = cpu_to_be64(tmp); 174 return err; 175 176 case MLX5_VPORT_ACCESS_METHOD_NIC: 177 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 178 if (!err) 179 *sys_image_guid = cpu_to_be64(tmp); 180 return err; 181 182 default: 183 return -EINVAL; 184 } 185} 186 187static int mlx5_query_max_pkeys(struct ib_device *ibdev, 188 u16 *max_pkeys) 189{ 190 struct mlx5_ib_dev *dev = to_mdev(ibdev); 191 struct mlx5_core_dev *mdev = dev->mdev; 192 193 switch (mlx5_get_vport_access_method(ibdev)) { 194 case MLX5_VPORT_ACCESS_METHOD_MAD: 195 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys); 196 197 case MLX5_VPORT_ACCESS_METHOD_HCA: 198 case MLX5_VPORT_ACCESS_METHOD_NIC: 199 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 200 pkey_table_size)); 201 return 0; 202 203 default: 204 return -EINVAL; 205 } 206} 207 208static int mlx5_query_vendor_id(struct ib_device *ibdev, 209 u32 *vendor_id) 210{ 211 struct mlx5_ib_dev *dev = to_mdev(ibdev); 212 213 switch (mlx5_get_vport_access_method(ibdev)) { 214 case MLX5_VPORT_ACCESS_METHOD_MAD: 215 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id); 216 217 case MLX5_VPORT_ACCESS_METHOD_HCA: 218 case MLX5_VPORT_ACCESS_METHOD_NIC: 219 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 220 221 default: 222 return -EINVAL; 223 } 224} 225 226static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 227 __be64 *node_guid) 228{ 229 u64 tmp; 230 int err; 231 232 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 233 case MLX5_VPORT_ACCESS_METHOD_MAD: 234 return mlx5_query_node_guid_mad_ifc(dev, node_guid); 235 236 case MLX5_VPORT_ACCESS_METHOD_HCA: 237 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 238 if (!err) 239 *node_guid = cpu_to_be64(tmp); 240 return err; 241 242 case MLX5_VPORT_ACCESS_METHOD_NIC: 243 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 244 if (!err) 245 *node_guid = cpu_to_be64(tmp); 246 return err; 247 248 default: 249 return -EINVAL; 250 } 251} 252 253struct mlx5_reg_node_desc { 254 u8 desc[64]; 255}; 256 257static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 258{ 259 struct mlx5_reg_node_desc in; 260 261 if (mlx5_use_mad_ifc(dev)) 262 return mlx5_query_node_desc_mad_ifc(dev, node_desc); 263 264 memset(&in, 0, sizeof(in)); 265 266 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 267 sizeof(struct mlx5_reg_node_desc), 268 MLX5_REG_NODE_DESC, 0, 0); 269} 270 271static int mlx5_ib_query_device(struct ib_device *ibdev, 272 struct ib_device_attr *props) 273{ 274 struct mlx5_ib_dev *dev = to_mdev(ibdev); 275 struct mlx5_core_dev *mdev = dev->mdev; 276 int max_sq_desc; 277 int max_rq_sg; 278 int max_sq_sg; 279 int err; 280 281 282 memset(props, 0, sizeof(*props)); 283 284 err = mlx5_query_system_image_guid(ibdev, 285 &props->sys_image_guid); 286 if (err) 287 return err; 288 289 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); 290 if (err) 291 return err; 292 293 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 294 if (err) 295 return err; 296 297 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 298 ((u64)fw_rev_min(dev->mdev) << 16) | 299 fw_rev_sub(dev->mdev); 300 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 301 IB_DEVICE_PORT_ACTIVE_EVENT | 302 IB_DEVICE_SYS_IMAGE_GUID | 303 IB_DEVICE_RC_RNR_NAK_GEN; 304 305 if (MLX5_CAP_GEN(mdev, pkv)) 306 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 307 if (MLX5_CAP_GEN(mdev, qkv)) 308 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 309 if (MLX5_CAP_GEN(mdev, apm)) 310 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 311 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 312 if (MLX5_CAP_GEN(mdev, xrc)) 313 props->device_cap_flags |= IB_DEVICE_XRC; 314 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 315 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 316 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 317 318 props->vendor_part_id = mdev->pdev->device; 319 props->hw_ver = mdev->pdev->revision; 320 321 props->max_mr_size = ~0ull; 322 props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1); 323 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 324 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 325 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 326 sizeof(struct mlx5_wqe_data_seg); 327 max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 328 max_sq_sg = (max_sq_desc - 329 sizeof(struct mlx5_wqe_ctrl_seg) - 330 sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); 331 props->max_sge = min(max_rq_sg, max_sq_sg); 332 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 333 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 334 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 335 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 336 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 337 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 338 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 339 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 340 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 341 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 342 props->max_srq_sge = max_rq_sg - 1; 343 props->max_fast_reg_page_list_len = (unsigned int)-1; 344 get_atomic_caps(dev, props); 345 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 346 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 347 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 348 props->max_mcast_grp; 349 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 350 props->max_ah = INT_MAX; 351 352 return 0; 353} 354 355enum mlx5_ib_width { 356 MLX5_IB_WIDTH_1X = 1 << 0, 357 MLX5_IB_WIDTH_2X = 1 << 1, 358 MLX5_IB_WIDTH_4X = 1 << 2, 359 MLX5_IB_WIDTH_8X = 1 << 3, 360 MLX5_IB_WIDTH_12X = 1 << 4 361}; 362 363static int translate_active_width(struct ib_device *ibdev, u8 active_width, 364 u8 *ib_width) 365{ 366 struct mlx5_ib_dev *dev = to_mdev(ibdev); 367 int err = 0; 368 369 if (active_width & MLX5_IB_WIDTH_1X) { 370 *ib_width = IB_WIDTH_1X; 371 } else if (active_width & MLX5_IB_WIDTH_2X) { 372 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n", 373 (int)active_width); 374 err = -EINVAL; 375 } else if (active_width & MLX5_IB_WIDTH_4X) { 376 *ib_width = IB_WIDTH_4X; 377 } else if (active_width & MLX5_IB_WIDTH_8X) { 378 *ib_width = IB_WIDTH_8X; 379 } else if (active_width & MLX5_IB_WIDTH_12X) { 380 *ib_width = IB_WIDTH_12X; 381 } else { 382 mlx5_ib_dbg(dev, "Invalid active_width %d\n", 383 (int)active_width); 384 err = -EINVAL; 385 } 386 387 return err; 388} 389 390/* 391 * TODO: Move to IB core 392 */ 393enum ib_max_vl_num { 394 __IB_MAX_VL_0 = 1, 395 __IB_MAX_VL_0_1 = 2, 396 __IB_MAX_VL_0_3 = 3, 397 __IB_MAX_VL_0_7 = 4, 398 __IB_MAX_VL_0_14 = 5, 399}; 400 401enum mlx5_vl_hw_cap { 402 MLX5_VL_HW_0 = 1, 403 MLX5_VL_HW_0_1 = 2, 404 MLX5_VL_HW_0_2 = 3, 405 MLX5_VL_HW_0_3 = 4, 406 MLX5_VL_HW_0_4 = 5, 407 MLX5_VL_HW_0_5 = 6, 408 MLX5_VL_HW_0_6 = 7, 409 MLX5_VL_HW_0_7 = 8, 410 MLX5_VL_HW_0_14 = 15 411}; 412 413static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 414 u8 *max_vl_num) 415{ 416 switch (vl_hw_cap) { 417 case MLX5_VL_HW_0: 418 *max_vl_num = __IB_MAX_VL_0; 419 break; 420 case MLX5_VL_HW_0_1: 421 *max_vl_num = __IB_MAX_VL_0_1; 422 break; 423 case MLX5_VL_HW_0_3: 424 *max_vl_num = __IB_MAX_VL_0_3; 425 break; 426 case MLX5_VL_HW_0_7: 427 *max_vl_num = __IB_MAX_VL_0_7; 428 break; 429 case MLX5_VL_HW_0_14: 430 *max_vl_num = __IB_MAX_VL_0_14; 431 break; 432 433 default: 434 return -EINVAL; 435 } 436 437 return 0; 438} 439 440static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port, 441 struct ib_port_attr *props) 442{ 443 struct mlx5_ib_dev *dev = to_mdev(ibdev); 444 struct mlx5_core_dev *mdev = dev->mdev; 445 u32 *rep; 446 int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); 447 struct mlx5_ptys_reg *ptys; 448 struct mlx5_pmtu_reg *pmtu; 449 struct mlx5_pvlc_reg pvlc; 450 void *ctx; 451 int err; 452 453 rep = mlx5_vzalloc(outlen); 454 ptys = kzalloc(sizeof(*ptys), GFP_KERNEL); 455 pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL); 456 if (!rep || !ptys || !pmtu) { 457 err = -ENOMEM; 458 goto out; 459 } 460 461 memset(props, 0, sizeof(*props)); 462 463 /* what if I am pf with dual port */ 464 err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen); 465 if (err) 466 goto out; 467 468 ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context); 469 470 props->lid = MLX5_GET(hca_vport_context, ctx, lid); 471 props->lmc = MLX5_GET(hca_vport_context, ctx, lmc); 472 props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid); 473 props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl); 474 props->state = MLX5_GET(hca_vport_context, ctx, vport_state); 475 props->phys_state = MLX5_GET(hca_vport_context, ctx, 476 port_physical_state); 477 props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1); 478 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 479 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 480 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 481 props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx, 482 pkey_violation_counter); 483 props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx, 484 qkey_violation_counter); 485 props->subnet_timeout = MLX5_GET(hca_vport_context, ctx, 486 subnet_timeout); 487 props->init_type_reply = MLX5_GET(hca_vport_context, ctx, 488 init_type_reply); 489 490 ptys->proto_mask |= MLX5_PTYS_IB; 491 ptys->local_port = port; 492 err = mlx5_core_access_ptys(mdev, ptys, 0); 493 if (err) 494 goto out; 495 496 err = translate_active_width(ibdev, ptys->ib_link_width_oper, 497 &props->active_width); 498 if (err) 499 goto out; 500 501 props->active_speed = (u8)ptys->ib_proto_oper; 502 503 pmtu->local_port = port; 504 err = mlx5_core_access_pmtu(mdev, pmtu, 0); 505 if (err) 506 goto out; 507 508 props->max_mtu = pmtu->max_mtu; 509 props->active_mtu = pmtu->oper_mtu; 510 511 memset(&pvlc, 0, sizeof(pvlc)); 512 pvlc.local_port = port; 513 err = mlx5_core_access_pvlc(mdev, &pvlc, 0); 514 if (err) 515 goto out; 516 517 err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap, 518 &props->max_vl_num); 519out: 520 kvfree(rep); 521 kfree(ptys); 522 kfree(pmtu); 523 return err; 524} 525 526int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 527 struct ib_port_attr *props) 528{ 529 switch (mlx5_get_vport_access_method(ibdev)) { 530 case MLX5_VPORT_ACCESS_METHOD_MAD: 531 return mlx5_query_port_mad_ifc(ibdev, port, props); 532 533 case MLX5_VPORT_ACCESS_METHOD_HCA: 534 return mlx5_query_port_ib(ibdev, port, props); 535 536 case MLX5_VPORT_ACCESS_METHOD_NIC: 537 return mlx5_query_port_roce(ibdev, port, props); 538 539 default: 540 return -EINVAL; 541 } 542} 543 544static inline int 545mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev) 546{ 547 if (dev->if_addrlen != ETH_ALEN) 548 return -1; 549 memcpy(eui, IF_LLADDR(dev), 3); 550 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); 551 552 /* NOTE: The scope ID is added by the GID to IP conversion */ 553 554 eui[3] = 0xFF; 555 eui[4] = 0xFE; 556 eui[0] ^= 2; 557 return 0; 558} 559 560static void 561mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid) 562{ 563 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 564 mlx5_addrconf_ifid_eui48(&gid->raw[8], dev); 565} 566 567static inline int 568mlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid) 569{ 570 switch (addr->sa_family) { 571 case AF_INET: 572 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr, 573 (struct in6_addr *)gid->raw); 574 break; 575 case AF_INET6: 576 memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16); 577 /* clear SCOPE ID */ 578 gid->raw[2] = 0; 579 gid->raw[3] = 0; 580 break; 581 default: 582 return -EINVAL; 583 } 584 return 0; 585} 586 587static void 588mlx5_ib_roce_port_update(void *arg) 589{ 590 struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg; 591 struct mlx5_ib_dev *dev = port->dev; 592 struct mlx5_core_dev *mdev = dev->mdev; 593 struct net_device *xdev[MLX5_IB_GID_MAX]; 594 struct net_device *idev; 595 struct net_device *ndev; 596 struct ifaddr *ifa; 597 union ib_gid gid_temp; 598 599 while (port->port_gone == 0) { 600 int update = 0; 601 int gid_index = 0; 602 int j; 603 int error; 604 605 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH); 606 if (ndev == NULL) { 607 pause("W", hz); 608 continue; 609 } 610 611 CURVNET_SET_QUIET(ndev->if_vnet); 612 613 memset(&gid_temp, 0, sizeof(gid_temp)); 614 mlx5_make_default_gid(ndev, &gid_temp); 615 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) { 616 port->gid_table[gid_index] = gid_temp; 617 update = 1; 618 } 619 xdev[gid_index] = ndev; 620 gid_index++; 621 622 IFNET_RLOCK(); 623 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 624 if (idev == ndev) 625 break; 626 } 627 if (idev != NULL) { 628 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 629 if (idev != ndev) { 630 if (idev->if_type != IFT_L2VLAN) 631 continue; 632 if (ndev != rdma_vlan_dev_real_dev(idev)) 633 continue; 634 } 635 /* clone address information for IPv4 and IPv6 */ 636 IF_ADDR_RLOCK(idev); 637 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 638 if (ifa->ifa_addr == NULL || 639 (ifa->ifa_addr->sa_family != AF_INET && 640 ifa->ifa_addr->sa_family != AF_INET6) || 641 gid_index >= MLX5_IB_GID_MAX) 642 continue; 643 memset(&gid_temp, 0, sizeof(gid_temp)); 644 mlx5_ip2gid(ifa->ifa_addr, &gid_temp); 645 /* check for existing entry */ 646 for (j = 0; j != gid_index; j++) { 647 if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0) 648 break; 649 } 650 /* check if new entry must be added */ 651 if (j == gid_index) { 652 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) { 653 port->gid_table[gid_index] = gid_temp; 654 update = 1; 655 } 656 xdev[gid_index] = idev; 657 gid_index++; 658 } 659 } 660 IF_ADDR_RUNLOCK(idev); 661 } 662 } 663 IFNET_RUNLOCK(); 664 CURVNET_RESTORE(); 665 666 if (update != 0 && 667 mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { 668 struct ib_event event = { 669 .device = &dev->ib_dev, 670 .element.port_num = port->port_num + 1, 671 .event = IB_EVENT_GID_CHANGE, 672 }; 673 674 /* add new entries, if any */ 675 for (j = 0; j != gid_index; j++) { 676 error = modify_gid_roce(&dev->ib_dev, port->port_num, j, 677 port->gid_table + j, xdev[j]); 678 if (error != 0) 679 printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error); 680 } 681 memset(&gid_temp, 0, sizeof(gid_temp)); 682 683 /* clear old entries, if any */ 684 for (; j != MLX5_IB_GID_MAX; j++) { 685 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0) 686 continue; 687 port->gid_table[j] = gid_temp; 688 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j, 689 port->gid_table + j, ndev); 690 } 691 692 /* make sure ibcore gets updated */ 693 ib_dispatch_event(&event); 694 } 695 pause("W", hz); 696 } 697 do { 698 struct ib_event event = { 699 .device = &dev->ib_dev, 700 .element.port_num = port->port_num + 1, 701 .event = IB_EVENT_GID_CHANGE, 702 }; 703 /* make sure ibcore gets updated */ 704 ib_dispatch_event(&event); 705 706 /* wait a bit */ 707 pause("W", hz); 708 } while (0); 709 port->port_gone = 2; 710 kthread_exit(); 711} 712 713static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 714 union ib_gid *gid) 715{ 716 struct mlx5_ib_dev *dev = to_mdev(ibdev); 717 struct mlx5_core_dev *mdev = dev->mdev; 718 719 switch (mlx5_get_vport_access_method(ibdev)) { 720 case MLX5_VPORT_ACCESS_METHOD_MAD: 721 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid); 722 723 case MLX5_VPORT_ACCESS_METHOD_HCA: 724 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid); 725 726 case MLX5_VPORT_ACCESS_METHOD_NIC: 727 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) || 728 index < 0 || index >= MLX5_IB_GID_MAX || 729 dev->port[port - 1].port_gone != 0) 730 memset(gid, 0, sizeof(*gid)); 731 else 732 *gid = dev->port[port - 1].gid_table[index]; 733 return 0; 734 735 default: 736 return -EINVAL; 737 } 738} 739 740static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 741 u16 *pkey) 742{ 743 struct mlx5_ib_dev *dev = to_mdev(ibdev); 744 struct mlx5_core_dev *mdev = dev->mdev; 745 746 switch (mlx5_get_vport_access_method(ibdev)) { 747 case MLX5_VPORT_ACCESS_METHOD_MAD: 748 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey); 749 750 case MLX5_VPORT_ACCESS_METHOD_HCA: 751 case MLX5_VPORT_ACCESS_METHOD_NIC: 752 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, 753 pkey); 754 755 default: 756 return -EINVAL; 757 } 758} 759 760static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 761 struct ib_device_modify *props) 762{ 763 struct mlx5_ib_dev *dev = to_mdev(ibdev); 764 struct mlx5_reg_node_desc in; 765 struct mlx5_reg_node_desc out; 766 int err; 767 768 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 769 return -EOPNOTSUPP; 770 771 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 772 return 0; 773 774 /* 775 * If possible, pass node desc to FW, so it can generate 776 * a 144 trap. If cmd fails, just ignore. 777 */ 778 memcpy(&in, props->node_desc, 64); 779 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 780 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 781 if (err) 782 return err; 783 784 memcpy(ibdev->node_desc, props->node_desc, 64); 785 786 return err; 787} 788 789static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 790 struct ib_port_modify *props) 791{ 792 u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) == 793 IB_LINK_LAYER_ETHERNET); 794 struct mlx5_ib_dev *dev = to_mdev(ibdev); 795 struct ib_port_attr attr; 796 u32 tmp; 797 int err; 798 799 /* return OK if this is RoCE. CM calls ib_modify_port() regardless 800 * of whether port link layer is ETH or IB. For ETH ports, qkey 801 * violations and port capabilities are not valid. 802 */ 803 if (is_eth) 804 return 0; 805 806 mutex_lock(&dev->cap_mask_mutex); 807 808 err = mlx5_ib_query_port(ibdev, port, &attr); 809 if (err) 810 goto out; 811 812 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 813 ~props->clr_port_cap_mask; 814 815 err = mlx5_set_port_caps(dev->mdev, port, tmp); 816 817out: 818 mutex_unlock(&dev->cap_mask_mutex); 819 return err; 820} 821 822enum mlx5_cap_flags { 823 MLX5_CAP_COMPACT_AV = 1 << 0, 824}; 825 826static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev) 827{ 828 *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ? 829 MLX5_CAP_COMPACT_AV : 0; 830} 831 832static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 833 struct ib_udata *udata) 834{ 835 struct mlx5_ib_dev *dev = to_mdev(ibdev); 836 struct mlx5_ib_alloc_ucontext_req_v2 req; 837 struct mlx5_ib_alloc_ucontext_resp resp; 838 struct mlx5_ib_ucontext *context; 839 struct mlx5_uuar_info *uuari; 840 struct mlx5_uar *uars; 841 int gross_uuars; 842 int num_uars; 843 int ver; 844 int uuarn; 845 int err; 846 int i; 847 size_t reqlen; 848 849 if (!dev->ib_active) 850 return ERR_PTR(-EAGAIN); 851 852 memset(&req, 0, sizeof(req)); 853 memset(&resp, 0, sizeof(resp)); 854 855 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); 856 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 857 ver = 0; 858 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) 859 ver = 2; 860 else { 861 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen); 862 return ERR_PTR(-EINVAL); 863 } 864 865 err = ib_copy_from_udata(&req, udata, reqlen); 866 if (err) { 867 mlx5_ib_err(dev, "copy failed\n"); 868 return ERR_PTR(err); 869 } 870 871 if (req.reserved) { 872 mlx5_ib_err(dev, "request corrupted\n"); 873 return ERR_PTR(-EINVAL); 874 } 875 876 if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) { 877 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars); 878 return ERR_PTR(-ENOMEM); 879 } 880 881 req.total_num_uuars = ALIGN(req.total_num_uuars, 882 MLX5_NON_FP_BF_REGS_PER_PAGE); 883 if (req.num_low_latency_uuars > req.total_num_uuars - 1) { 884 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n", 885 req.total_num_uuars, req.total_num_uuars); 886 return ERR_PTR(-EINVAL); 887 } 888 889 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; 890 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; 891 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 892 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) 893 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 894 resp.cache_line_size = L1_CACHE_BYTES; 895 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 896 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 897 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 898 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 899 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 900 set_mlx5_flags(&resp.flags, dev->mdev); 901 902 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen) 903 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc); 904 905 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen) 906 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); 907 908 context = kzalloc(sizeof(*context), GFP_KERNEL); 909 if (!context) 910 return ERR_PTR(-ENOMEM); 911 912 uuari = &context->uuari; 913 mutex_init(&uuari->lock); 914 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); 915 if (!uars) { 916 err = -ENOMEM; 917 goto out_ctx; 918 } 919 920 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), 921 sizeof(*uuari->bitmap), 922 GFP_KERNEL); 923 if (!uuari->bitmap) { 924 err = -ENOMEM; 925 goto out_uar_ctx; 926 } 927 /* 928 * clear all fast path uuars 929 */ 930 for (i = 0; i < gross_uuars; i++) { 931 uuarn = i & 3; 932 if (uuarn == 2 || uuarn == 3) 933 set_bit(i, uuari->bitmap); 934 } 935 936 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); 937 if (!uuari->count) { 938 err = -ENOMEM; 939 goto out_bitmap; 940 } 941 942 for (i = 0; i < num_uars; i++) { 943 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index); 944 if (err) { 945 mlx5_ib_err(dev, "uar alloc failed at %d\n", i); 946 goto out_uars; 947 } 948 } 949 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) 950 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX; 951 952 INIT_LIST_HEAD(&context->db_page_list); 953 mutex_init(&context->db_page_mutex); 954 955 resp.tot_uuars = req.total_num_uuars; 956 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 957 err = ib_copy_to_udata(udata, &resp, 958 min_t(size_t, udata->outlen, sizeof(resp))); 959 if (err) 960 goto out_uars; 961 962 uuari->ver = ver; 963 uuari->num_low_latency_uuars = req.num_low_latency_uuars; 964 uuari->uars = uars; 965 uuari->num_uars = num_uars; 966 967 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 968 IB_LINK_LAYER_ETHERNET) { 969 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn); 970 if (err) 971 goto out_uars; 972 } 973 974 return &context->ibucontext; 975 976out_uars: 977 for (i--; i >= 0; i--) 978 mlx5_cmd_free_uar(dev->mdev, uars[i].index); 979 kfree(uuari->count); 980 981out_bitmap: 982 kfree(uuari->bitmap); 983 984out_uar_ctx: 985 kfree(uars); 986 987out_ctx: 988 kfree(context); 989 return ERR_PTR(err); 990} 991 992static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 993{ 994 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 995 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 996 struct mlx5_uuar_info *uuari = &context->uuari; 997 int i; 998 999 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 1000 IB_LINK_LAYER_ETHERNET) 1001 mlx5_dealloc_transport_domain(dev->mdev, context->tdn); 1002 1003 for (i = 0; i < uuari->num_uars; i++) { 1004 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) 1005 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); 1006 } 1007 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) { 1008 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX) 1009 mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]); 1010 } 1011 1012 kfree(uuari->count); 1013 kfree(uuari->bitmap); 1014 kfree(uuari->uars); 1015 kfree(context); 1016 1017 return 0; 1018} 1019 1020static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) 1021{ 1022 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index; 1023} 1024 1025static int get_command(unsigned long offset) 1026{ 1027 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 1028} 1029 1030static int get_arg(unsigned long offset) 1031{ 1032 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 1033} 1034 1035static int get_index(unsigned long offset) 1036{ 1037 return get_arg(offset); 1038} 1039 1040static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc, 1041 struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev, 1042 struct mlx5_ib_ucontext *context) 1043{ 1044 unsigned long idx; 1045 phys_addr_t pfn; 1046 1047 if (vma->vm_end - vma->vm_start != PAGE_SIZE) { 1048 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n", 1049 (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start)); 1050 return -EINVAL; 1051 } 1052 1053 idx = get_index(vma->vm_pgoff); 1054 if (idx >= uuari->num_uars) { 1055 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n", 1056 idx, uuari->num_uars); 1057 return -EINVAL; 1058 } 1059 1060 pfn = uar_index2pfn(dev, uuari->uars[idx].index); 1061 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, 1062 (unsigned long long)pfn); 1063 1064 vma->vm_page_prot = prot; 1065 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1066 PAGE_SIZE, vma->vm_page_prot)) { 1067 mlx5_ib_err(dev, "io remap failed\n"); 1068 return -EAGAIN; 1069 } 1070 1071 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC", 1072 (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT); 1073 1074 return 0; 1075} 1076 1077static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1078{ 1079 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1080 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1081 struct mlx5_uuar_info *uuari = &context->uuari; 1082 unsigned long command; 1083 1084 command = get_command(vma->vm_pgoff); 1085 switch (command) { 1086 case MLX5_IB_MMAP_REGULAR_PAGE: 1087 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot), 1088 true, 1089 uuari, dev, context); 1090 1091 break; 1092 1093 case MLX5_IB_MMAP_WC_PAGE: 1094 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot), 1095 true, uuari, dev, context); 1096 break; 1097 1098 case MLX5_IB_MMAP_NC_PAGE: 1099 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot), 1100 false, uuari, dev, context); 1101 break; 1102 1103 default: 1104 return -EINVAL; 1105 } 1106 1107 return 0; 1108} 1109 1110static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) 1111{ 1112 struct mlx5_create_mkey_mbox_in *in; 1113 struct mlx5_mkey_seg *seg; 1114 struct mlx5_core_mr mr; 1115 int err; 1116 1117 in = kzalloc(sizeof(*in), GFP_KERNEL); 1118 if (!in) 1119 return -ENOMEM; 1120 1121 seg = &in->seg; 1122 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; 1123 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); 1124 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1125 seg->start_addr = 0; 1126 1127 err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in), 1128 NULL, NULL, NULL); 1129 if (err) { 1130 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); 1131 goto err_in; 1132 } 1133 1134 kfree(in); 1135 *key = mr.key; 1136 1137 return 0; 1138 1139err_in: 1140 kfree(in); 1141 1142 return err; 1143} 1144 1145static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) 1146{ 1147 struct mlx5_core_mr mr; 1148 int err; 1149 1150 memset(&mr, 0, sizeof(mr)); 1151 mr.key = key; 1152 err = mlx5_core_destroy_mkey(dev->mdev, &mr); 1153 if (err) 1154 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); 1155} 1156 1157static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, 1158 struct ib_ucontext *context, 1159 struct ib_udata *udata) 1160{ 1161 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1162 struct mlx5_ib_alloc_pd_resp resp; 1163 struct mlx5_ib_pd *pd; 1164 int err; 1165 1166 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 1167 if (!pd) 1168 return ERR_PTR(-ENOMEM); 1169 1170 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 1171 if (err) { 1172 mlx5_ib_warn(dev, "pd alloc failed\n"); 1173 kfree(pd); 1174 return ERR_PTR(err); 1175 } 1176 1177 if (context) { 1178 resp.pdn = pd->pdn; 1179 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 1180 mlx5_ib_err(dev, "copy failed\n"); 1181 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1182 kfree(pd); 1183 return ERR_PTR(-EFAULT); 1184 } 1185 } else { 1186 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); 1187 if (err) { 1188 mlx5_ib_err(dev, "alloc mkey failed\n"); 1189 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1190 kfree(pd); 1191 return ERR_PTR(err); 1192 } 1193 } 1194 1195 return &pd->ibpd; 1196} 1197 1198static int mlx5_ib_dealloc_pd(struct ib_pd *pd) 1199{ 1200 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 1201 struct mlx5_ib_pd *mpd = to_mpd(pd); 1202 1203 if (!pd->uobject) 1204 free_pa_mkey(mdev, mpd->pa_lkey); 1205 1206 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 1207 kfree(mpd); 1208 1209 return 0; 1210} 1211 1212static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1213{ 1214 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1215 int err; 1216 1217 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1218 err = -EOPNOTSUPP; 1219 else 1220 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 1221 if (err) 1222 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 1223 ibqp->qp_num, gid->raw); 1224 1225 return err; 1226} 1227 1228static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1229{ 1230 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1231 int err; 1232 1233 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1234 err = -EOPNOTSUPP; 1235 else 1236 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 1237 if (err) 1238 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 1239 ibqp->qp_num, gid->raw); 1240 1241 return err; 1242} 1243 1244static int init_node_data(struct mlx5_ib_dev *dev) 1245{ 1246 int err; 1247 1248 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 1249 if (err) 1250 return err; 1251 1252 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 1253} 1254 1255static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 1256 char *buf) 1257{ 1258 struct mlx5_ib_dev *dev = 1259 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1260 1261 return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages); 1262} 1263 1264static ssize_t show_reg_pages(struct device *device, 1265 struct device_attribute *attr, char *buf) 1266{ 1267 struct mlx5_ib_dev *dev = 1268 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1269 1270 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 1271} 1272 1273static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1274 char *buf) 1275{ 1276 struct mlx5_ib_dev *dev = 1277 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1278 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 1279} 1280 1281static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1282 char *buf) 1283{ 1284 struct mlx5_ib_dev *dev = 1285 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1286 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), 1287 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); 1288} 1289 1290static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1291 char *buf) 1292{ 1293 struct mlx5_ib_dev *dev = 1294 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1295 return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision); 1296} 1297 1298static ssize_t show_board(struct device *device, struct device_attribute *attr, 1299 char *buf) 1300{ 1301 struct mlx5_ib_dev *dev = 1302 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1303 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 1304 dev->mdev->board_id); 1305} 1306 1307static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1308static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1309static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1310static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1311static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 1312static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 1313 1314static struct device_attribute *mlx5_class_attributes[] = { 1315 &dev_attr_hw_rev, 1316 &dev_attr_fw_ver, 1317 &dev_attr_hca_type, 1318 &dev_attr_board_id, 1319 &dev_attr_fw_pages, 1320 &dev_attr_reg_pages, 1321}; 1322 1323static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) 1324{ 1325 struct mlx5_ib_qp *mqp; 1326 struct mlx5_ib_cq *send_mcq, *recv_mcq; 1327 struct mlx5_core_cq *mcq; 1328 struct list_head cq_armed_list; 1329 unsigned long flags_qp; 1330 unsigned long flags_cq; 1331 unsigned long flags; 1332 1333 mlx5_ib_warn(ibdev, " started\n"); 1334 INIT_LIST_HEAD(&cq_armed_list); 1335 1336 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 1337 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 1338 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 1339 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 1340 if (mqp->sq.tail != mqp->sq.head) { 1341 send_mcq = to_mcq(mqp->ibqp.send_cq); 1342 spin_lock_irqsave(&send_mcq->lock, flags_cq); 1343 if (send_mcq->mcq.comp && 1344 mqp->ibqp.send_cq->comp_handler) { 1345 if (!send_mcq->mcq.reset_notify_added) { 1346 send_mcq->mcq.reset_notify_added = 1; 1347 list_add_tail(&send_mcq->mcq.reset_notify, 1348 &cq_armed_list); 1349 } 1350 } 1351 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 1352 } 1353 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 1354 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 1355 /* no handling is needed for SRQ */ 1356 if (!mqp->ibqp.srq) { 1357 if (mqp->rq.tail != mqp->rq.head) { 1358 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 1359 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 1360 if (recv_mcq->mcq.comp && 1361 mqp->ibqp.recv_cq->comp_handler) { 1362 if (!recv_mcq->mcq.reset_notify_added) { 1363 recv_mcq->mcq.reset_notify_added = 1; 1364 list_add_tail(&recv_mcq->mcq.reset_notify, 1365 &cq_armed_list); 1366 } 1367 } 1368 spin_unlock_irqrestore(&recv_mcq->lock, 1369 flags_cq); 1370 } 1371 } 1372 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 1373 } 1374 /*At that point all inflight post send were put to be executed as of we 1375 * lock/unlock above locks Now need to arm all involved CQs. 1376 */ 1377 list_for_each_entry(mcq, &cq_armed_list, reset_notify) { 1378 mcq->comp(mcq); 1379 } 1380 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 1381 mlx5_ib_warn(ibdev, " ended\n"); 1382 return; 1383} 1384 1385static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 1386 enum mlx5_dev_event event, unsigned long param) 1387{ 1388 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 1389 struct ib_event ibev; 1390 1391 u8 port = 0; 1392 1393 switch (event) { 1394 case MLX5_DEV_EVENT_SYS_ERROR: 1395 ibdev->ib_active = false; 1396 ibev.event = IB_EVENT_DEVICE_FATAL; 1397 mlx5_ib_handle_internal_error(ibdev); 1398 break; 1399 1400 case MLX5_DEV_EVENT_PORT_UP: 1401 ibev.event = IB_EVENT_PORT_ACTIVE; 1402 port = (u8)param; 1403 break; 1404 1405 case MLX5_DEV_EVENT_PORT_DOWN: 1406 case MLX5_DEV_EVENT_PORT_INITIALIZED: 1407 ibev.event = IB_EVENT_PORT_ERR; 1408 port = (u8)param; 1409 break; 1410 1411 case MLX5_DEV_EVENT_LID_CHANGE: 1412 ibev.event = IB_EVENT_LID_CHANGE; 1413 port = (u8)param; 1414 break; 1415 1416 case MLX5_DEV_EVENT_PKEY_CHANGE: 1417 ibev.event = IB_EVENT_PKEY_CHANGE; 1418 port = (u8)param; 1419 break; 1420 1421 case MLX5_DEV_EVENT_GUID_CHANGE: 1422 ibev.event = IB_EVENT_GID_CHANGE; 1423 port = (u8)param; 1424 break; 1425 1426 case MLX5_DEV_EVENT_CLIENT_REREG: 1427 ibev.event = IB_EVENT_CLIENT_REREGISTER; 1428 port = (u8)param; 1429 break; 1430 1431 default: 1432 break; 1433 } 1434 1435 ibev.device = &ibdev->ib_dev; 1436 ibev.element.port_num = port; 1437 1438 if ((event != MLX5_DEV_EVENT_SYS_ERROR) && 1439 (port < 1 || port > ibdev->num_ports)) { 1440 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); 1441 return; 1442 } 1443 1444 if (ibdev->ib_active) 1445 ib_dispatch_event(&ibev); 1446} 1447 1448static void get_ext_port_caps(struct mlx5_ib_dev *dev) 1449{ 1450 int port; 1451 1452 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) 1453 mlx5_query_ext_port_caps(dev, port); 1454} 1455 1456static void config_atomic_responder(struct mlx5_ib_dev *dev, 1457 struct ib_device_attr *props) 1458{ 1459 enum ib_atomic_cap cap = props->atomic_cap; 1460 1461#if 0 1462 if (cap == IB_ATOMIC_HCA || 1463 cap == IB_ATOMIC_GLOB) 1464#endif 1465 dev->enable_atomic_resp = 1; 1466 1467 dev->atomic_cap = cap; 1468} 1469 1470enum mlx5_addr_align { 1471 MLX5_ADDR_ALIGN_0 = 0, 1472 MLX5_ADDR_ALIGN_64 = 64, 1473 MLX5_ADDR_ALIGN_128 = 128, 1474}; 1475 1476static int get_port_caps(struct mlx5_ib_dev *dev) 1477{ 1478 struct ib_device_attr *dprops = NULL; 1479 struct ib_port_attr *pprops = NULL; 1480 int err = -ENOMEM; 1481 int port; 1482 1483 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 1484 if (!pprops) 1485 goto out; 1486 1487 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); 1488 if (!dprops) 1489 goto out; 1490 1491 err = mlx5_ib_query_device(&dev->ib_dev, dprops); 1492 if (err) { 1493 mlx5_ib_warn(dev, "query_device failed %d\n", err); 1494 goto out; 1495 } 1496 config_atomic_responder(dev, dprops); 1497 1498 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 1499 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 1500 if (err) { 1501 mlx5_ib_warn(dev, "query_port %d failed %d\n", 1502 port, err); 1503 break; 1504 } 1505 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys; 1506 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; 1507 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", 1508 dprops->max_pkeys, pprops->gid_tbl_len); 1509 } 1510 1511out: 1512 kfree(pprops); 1513 kfree(dprops); 1514 1515 return err; 1516} 1517 1518static void destroy_umrc_res(struct mlx5_ib_dev *dev) 1519{ 1520 int err; 1521 1522 err = mlx5_mr_cache_cleanup(dev); 1523 if (err) 1524 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 1525 1526 ib_dereg_mr(dev->umrc.mr); 1527 ib_dealloc_pd(dev->umrc.pd); 1528} 1529 1530enum { 1531 MAX_UMR_WR = 128, 1532}; 1533 1534static int create_umr_res(struct mlx5_ib_dev *dev) 1535{ 1536 struct ib_pd *pd; 1537 struct ib_mr *mr; 1538 int ret; 1539 1540 pd = ib_alloc_pd(&dev->ib_dev); 1541 if (IS_ERR(pd)) { 1542 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 1543 ret = PTR_ERR(pd); 1544 goto error_0; 1545 } 1546 1547 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); 1548 if (IS_ERR(mr)) { 1549 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); 1550 ret = PTR_ERR(mr); 1551 goto error_1; 1552 } 1553 1554 dev->umrc.mr = mr; 1555 dev->umrc.pd = pd; 1556 1557 ret = mlx5_mr_cache_init(dev); 1558 if (ret) { 1559 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 1560 goto error_4; 1561 } 1562 1563 return 0; 1564 1565error_4: 1566 ib_dereg_mr(mr); 1567error_1: 1568 ib_dealloc_pd(pd); 1569error_0: 1570 return ret; 1571} 1572 1573static int create_dev_resources(struct mlx5_ib_resources *devr) 1574{ 1575 struct ib_srq_init_attr attr; 1576 struct mlx5_ib_dev *dev; 1577 int ret = 0; 1578 1579 dev = container_of(devr, struct mlx5_ib_dev, devr); 1580 1581 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 1582 if (IS_ERR(devr->p0)) { 1583 ret = PTR_ERR(devr->p0); 1584 goto error0; 1585 } 1586 devr->p0->device = &dev->ib_dev; 1587 devr->p0->uobject = NULL; 1588 atomic_set(&devr->p0->usecnt, 0); 1589 1590 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); 1591 if (IS_ERR(devr->c0)) { 1592 ret = PTR_ERR(devr->c0); 1593 goto error1; 1594 } 1595 devr->c0->device = &dev->ib_dev; 1596 devr->c0->uobject = NULL; 1597 devr->c0->comp_handler = NULL; 1598 devr->c0->event_handler = NULL; 1599 devr->c0->cq_context = NULL; 1600 atomic_set(&devr->c0->usecnt, 0); 1601 1602 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 1603 if (IS_ERR(devr->x0)) { 1604 ret = PTR_ERR(devr->x0); 1605 goto error2; 1606 } 1607 devr->x0->device = &dev->ib_dev; 1608 devr->x0->inode = NULL; 1609 atomic_set(&devr->x0->usecnt, 0); 1610 mutex_init(&devr->x0->tgt_qp_mutex); 1611 INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 1612 1613 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 1614 if (IS_ERR(devr->x1)) { 1615 ret = PTR_ERR(devr->x1); 1616 goto error3; 1617 } 1618 devr->x1->device = &dev->ib_dev; 1619 devr->x1->inode = NULL; 1620 atomic_set(&devr->x1->usecnt, 0); 1621 mutex_init(&devr->x1->tgt_qp_mutex); 1622 INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 1623 1624 memset(&attr, 0, sizeof(attr)); 1625 attr.attr.max_sge = 1; 1626 attr.attr.max_wr = 1; 1627 attr.srq_type = IB_SRQT_XRC; 1628 attr.ext.xrc.cq = devr->c0; 1629 attr.ext.xrc.xrcd = devr->x0; 1630 1631 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 1632 if (IS_ERR(devr->s0)) { 1633 ret = PTR_ERR(devr->s0); 1634 goto error4; 1635 } 1636 devr->s0->device = &dev->ib_dev; 1637 devr->s0->pd = devr->p0; 1638 devr->s0->uobject = NULL; 1639 devr->s0->event_handler = NULL; 1640 devr->s0->srq_context = NULL; 1641 devr->s0->srq_type = IB_SRQT_XRC; 1642 devr->s0->ext.xrc.xrcd = devr->x0; 1643 devr->s0->ext.xrc.cq = devr->c0; 1644 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 1645 atomic_inc(&devr->s0->ext.xrc.cq->usecnt); 1646 atomic_inc(&devr->p0->usecnt); 1647 atomic_set(&devr->s0->usecnt, 0); 1648 1649 memset(&attr, 0, sizeof(attr)); 1650 attr.attr.max_sge = 1; 1651 attr.attr.max_wr = 1; 1652 attr.srq_type = IB_SRQT_BASIC; 1653 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 1654 if (IS_ERR(devr->s1)) { 1655 ret = PTR_ERR(devr->s1); 1656 goto error5; 1657 } 1658 devr->s1->device = &dev->ib_dev; 1659 devr->s1->pd = devr->p0; 1660 devr->s1->uobject = NULL; 1661 devr->s1->event_handler = NULL; 1662 devr->s1->srq_context = NULL; 1663 devr->s1->srq_type = IB_SRQT_BASIC; 1664 devr->s1->ext.xrc.cq = devr->c0; 1665 atomic_inc(&devr->p0->usecnt); 1666 atomic_set(&devr->s1->usecnt, 0); 1667 1668 return 0; 1669 1670error5: 1671 mlx5_ib_destroy_srq(devr->s0); 1672error4: 1673 mlx5_ib_dealloc_xrcd(devr->x1); 1674error3: 1675 mlx5_ib_dealloc_xrcd(devr->x0); 1676error2: 1677 mlx5_ib_destroy_cq(devr->c0); 1678error1: 1679 mlx5_ib_dealloc_pd(devr->p0); 1680error0: 1681 return ret; 1682} 1683 1684static void destroy_dev_resources(struct mlx5_ib_resources *devr) 1685{ 1686 mlx5_ib_destroy_srq(devr->s1); 1687 mlx5_ib_destroy_srq(devr->s0); 1688 mlx5_ib_dealloc_xrcd(devr->x0); 1689 mlx5_ib_dealloc_xrcd(devr->x1); 1690 mlx5_ib_destroy_cq(devr->c0); 1691 mlx5_ib_dealloc_pd(devr->p0); 1692} 1693 1694static void enable_dc_tracer(struct mlx5_ib_dev *dev) 1695{ 1696 struct device *device = dev->ib_dev.dma_device; 1697 struct mlx5_dc_tracer *dct = &dev->dctr; 1698 int order; 1699 void *tmp; 1700 int size; 1701 int err; 1702 1703 size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096; 1704 if (size <= PAGE_SIZE) 1705 order = 0; 1706 else 1707 order = 1; 1708 1709 dct->pg = alloc_pages(GFP_KERNEL, order); 1710 if (!dct->pg) { 1711 mlx5_ib_err(dev, "failed to allocate %d pages\n", order); 1712 return; 1713 } 1714 1715 tmp = page_address(dct->pg); 1716 memset(tmp, 0xff, size); 1717 1718 dct->size = size; 1719 dct->order = order; 1720 dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE); 1721 if (dma_mapping_error(device, dct->dma)) { 1722 mlx5_ib_err(dev, "dma mapping error\n"); 1723 goto map_err; 1724 } 1725 1726 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma); 1727 if (err) { 1728 mlx5_ib_warn(dev, "failed to enable DC tracer\n"); 1729 goto cmd_err; 1730 } 1731 1732 return; 1733 1734cmd_err: 1735 dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE); 1736map_err: 1737 __free_pages(dct->pg, dct->order); 1738 dct->pg = NULL; 1739} 1740 1741static void disable_dc_tracer(struct mlx5_ib_dev *dev) 1742{ 1743 struct device *device = dev->ib_dev.dma_device; 1744 struct mlx5_dc_tracer *dct = &dev->dctr; 1745 int err; 1746 1747 if (!dct->pg) 1748 return; 1749 1750 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma); 1751 if (err) { 1752 mlx5_ib_warn(dev, "failed to disable DC tracer\n"); 1753 return; 1754 } 1755 1756 dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE); 1757 __free_pages(dct->pg, dct->order); 1758 dct->pg = NULL; 1759} 1760 1761enum { 1762 MLX5_DC_CNAK_SIZE = 128, 1763 MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE, 1764 MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128, 1765 MLX5_DC_CNAK_SL = 0, 1766 MLX5_DC_CNAK_VL = 0, 1767}; 1768 1769static int init_dc_improvements(struct mlx5_ib_dev *dev) 1770{ 1771 if (!mlx5_core_is_pf(dev->mdev)) 1772 return 0; 1773 1774 if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace))) 1775 return 0; 1776 1777 enable_dc_tracer(dev); 1778 1779 return 0; 1780} 1781 1782static void cleanup_dc_improvements(struct mlx5_ib_dev *dev) 1783{ 1784 1785 disable_dc_tracer(dev); 1786} 1787 1788static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num) 1789{ 1790 mlx5_vport_dealloc_q_counter(dev->mdev, 1791 MLX5_INTERFACE_PROTOCOL_IB, 1792 dev->port[port_num].q_cnt_id); 1793 dev->port[port_num].q_cnt_id = 0; 1794} 1795 1796static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) 1797{ 1798 unsigned int i; 1799 1800 for (i = 0; i < dev->num_ports; i++) 1801 mlx5_ib_dealloc_q_port_counter(dev, i); 1802} 1803 1804static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) 1805{ 1806 int i; 1807 int ret; 1808 1809 for (i = 0; i < dev->num_ports; i++) { 1810 ret = mlx5_vport_alloc_q_counter(dev->mdev, 1811 MLX5_INTERFACE_PROTOCOL_IB, 1812 &dev->port[i].q_cnt_id); 1813 if (ret) { 1814 mlx5_ib_warn(dev, 1815 "couldn't allocate queue counter for port %d\n", 1816 i + 1); 1817 goto dealloc_counters; 1818 } 1819 } 1820 1821 return 0; 1822 1823dealloc_counters: 1824 while (--i >= 0) 1825 mlx5_ib_dealloc_q_port_counter(dev, i); 1826 1827 return ret; 1828} 1829 1830struct port_attribute { 1831 struct attribute attr; 1832 ssize_t (*show)(struct mlx5_ib_port *, 1833 struct port_attribute *, char *buf); 1834 ssize_t (*store)(struct mlx5_ib_port *, 1835 struct port_attribute *, 1836 const char *buf, size_t count); 1837}; 1838 1839struct port_counter_attribute { 1840 struct port_attribute attr; 1841 size_t offset; 1842}; 1843 1844static ssize_t port_attr_show(struct kobject *kobj, 1845 struct attribute *attr, char *buf) 1846{ 1847 struct port_attribute *port_attr = 1848 container_of(attr, struct port_attribute, attr); 1849 struct mlx5_ib_port_sysfs_group *p = 1850 container_of(kobj, struct mlx5_ib_port_sysfs_group, 1851 kobj); 1852 struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port, 1853 group); 1854 1855 if (!port_attr->show) 1856 return -EIO; 1857 1858 return port_attr->show(mibport, port_attr, buf); 1859} 1860 1861static ssize_t show_port_counter(struct mlx5_ib_port *p, 1862 struct port_attribute *port_attr, 1863 char *buf) 1864{ 1865 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); 1866 struct port_counter_attribute *counter_attr = 1867 container_of(port_attr, struct port_counter_attribute, attr); 1868 void *out; 1869 int ret; 1870 1871 out = mlx5_vzalloc(outlen); 1872 if (!out) 1873 return -ENOMEM; 1874 1875 ret = mlx5_vport_query_q_counter(p->dev->mdev, 1876 p->q_cnt_id, 0, 1877 out, outlen); 1878 if (ret) 1879 goto free; 1880 1881 ret = sprintf(buf, "%d\n", 1882 be32_to_cpu(*(__be32 *)(out + counter_attr->offset))); 1883 1884free: 1885 kfree(out); 1886 return ret; 1887} 1888 1889#define PORT_COUNTER_ATTR(_name) \ 1890struct port_counter_attribute port_counter_attr_##_name = { \ 1891 .attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \ 1892 .offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \ 1893} 1894 1895static PORT_COUNTER_ATTR(rx_write_requests); 1896static PORT_COUNTER_ATTR(rx_read_requests); 1897static PORT_COUNTER_ATTR(rx_atomic_requests); 1898static PORT_COUNTER_ATTR(rx_dct_connect); 1899static PORT_COUNTER_ATTR(out_of_buffer); 1900static PORT_COUNTER_ATTR(out_of_sequence); 1901static PORT_COUNTER_ATTR(duplicate_request); 1902static PORT_COUNTER_ATTR(rnr_nak_retry_err); 1903static PORT_COUNTER_ATTR(packet_seq_err); 1904static PORT_COUNTER_ATTR(implied_nak_seq_err); 1905static PORT_COUNTER_ATTR(local_ack_timeout_err); 1906 1907static struct attribute *counter_attrs[] = { 1908 &port_counter_attr_rx_write_requests.attr.attr, 1909 &port_counter_attr_rx_read_requests.attr.attr, 1910 &port_counter_attr_rx_atomic_requests.attr.attr, 1911 &port_counter_attr_rx_dct_connect.attr.attr, 1912 &port_counter_attr_out_of_buffer.attr.attr, 1913 &port_counter_attr_out_of_sequence.attr.attr, 1914 &port_counter_attr_duplicate_request.attr.attr, 1915 &port_counter_attr_rnr_nak_retry_err.attr.attr, 1916 &port_counter_attr_packet_seq_err.attr.attr, 1917 &port_counter_attr_implied_nak_seq_err.attr.attr, 1918 &port_counter_attr_local_ack_timeout_err.attr.attr, 1919 NULL 1920}; 1921 1922static struct attribute_group port_counters_group = { 1923 .name = "counters", 1924 .attrs = counter_attrs 1925}; 1926 1927static const struct sysfs_ops port_sysfs_ops = { 1928 .show = port_attr_show 1929}; 1930 1931static struct kobj_type port_type = { 1932 .sysfs_ops = &port_sysfs_ops, 1933}; 1934 1935static int add_port_attrs(struct mlx5_ib_dev *dev, 1936 struct kobject *parent, 1937 struct mlx5_ib_port_sysfs_group *port, 1938 u8 port_num) 1939{ 1940 int ret; 1941 1942 ret = kobject_init_and_add(&port->kobj, &port_type, 1943 parent, 1944 "%d", port_num); 1945 if (ret) 1946 return ret; 1947 1948 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 1949 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 1950 ret = sysfs_create_group(&port->kobj, &port_counters_group); 1951 if (ret) 1952 goto put_kobj; 1953 } 1954 1955 port->enabled = true; 1956 return ret; 1957 1958put_kobj: 1959 kobject_put(&port->kobj); 1960 return ret; 1961} 1962 1963static void destroy_ports_attrs(struct mlx5_ib_dev *dev, 1964 unsigned int num_ports) 1965{ 1966 unsigned int i; 1967 1968 for (i = 0; i < num_ports; i++) { 1969 struct mlx5_ib_port_sysfs_group *port = 1970 &dev->port[i].group; 1971 1972 if (!port->enabled) 1973 continue; 1974 1975 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 1976 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 1977 sysfs_remove_group(&port->kobj, 1978 &port_counters_group); 1979 kobject_put(&port->kobj); 1980 port->enabled = false; 1981 } 1982 1983 if (dev->ports_parent) { 1984 kobject_put(dev->ports_parent); 1985 dev->ports_parent = NULL; 1986 } 1987} 1988 1989static int create_port_attrs(struct mlx5_ib_dev *dev) 1990{ 1991 int ret = 0; 1992 unsigned int i = 0; 1993 struct device *device = &dev->ib_dev.dev; 1994 1995 dev->ports_parent = kobject_create_and_add("mlx5_ports", 1996 &device->kobj); 1997 if (!dev->ports_parent) 1998 return -ENOMEM; 1999 2000 for (i = 0; i < dev->num_ports; i++) { 2001 ret = add_port_attrs(dev, 2002 dev->ports_parent, 2003 &dev->port[i].group, 2004 i + 1); 2005 2006 if (ret) 2007 goto _destroy_ports_attrs; 2008 } 2009 2010 return 0; 2011 2012_destroy_ports_attrs: 2013 destroy_ports_attrs(dev, i); 2014 return ret; 2015} 2016 2017static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 2018{ 2019 struct mlx5_ib_dev *dev; 2020 int err; 2021 int i; 2022 2023 printk_once(KERN_INFO "%s", mlx5_version); 2024 2025 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 2026 if (!dev) 2027 return NULL; 2028 2029 dev->mdev = mdev; 2030 2031 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), 2032 GFP_KERNEL); 2033 if (!dev->port) 2034 goto err_dealloc; 2035 2036 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2037 dev->port[i].dev = dev; 2038 dev->port[i].port_num = i; 2039 dev->port[i].port_gone = 0; 2040 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table)); 2041 } 2042 2043 err = get_port_caps(dev); 2044 if (err) 2045 goto err_free_port; 2046 2047 if (mlx5_use_mad_ifc(dev)) 2048 get_ext_port_caps(dev); 2049 2050 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2051 IB_LINK_LAYER_ETHERNET) { 2052 if (MLX5_CAP_GEN(mdev, roce)) { 2053 err = mlx5_nic_vport_enable_roce(mdev); 2054 if (err) 2055 goto err_free_port; 2056 } else { 2057 goto err_free_port; 2058 } 2059 } 2060 2061 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); 2062 2063 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); 2064 dev->ib_dev.owner = THIS_MODULE; 2065 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 2066 dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey; 2067 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); 2068 dev->ib_dev.phys_port_cnt = dev->num_ports; 2069 dev->ib_dev.num_comp_vectors = 2070 dev->mdev->priv.eq_table.num_comp_vectors; 2071 dev->ib_dev.dma_device = &mdev->pdev->dev; 2072 2073 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 2074 dev->ib_dev.uverbs_cmd_mask = 2075 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2076 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2077 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2078 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2079 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2080 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2081 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2082 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2083 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2084 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2085 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2086 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2087 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2088 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2089 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2090 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2091 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2092 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2093 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2094 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2095 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2096 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 2097 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 2098 2099 dev->ib_dev.query_device = mlx5_ib_query_device; 2100 dev->ib_dev.query_port = mlx5_ib_query_port; 2101 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 2102 dev->ib_dev.query_gid = mlx5_ib_query_gid; 2103 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 2104 dev->ib_dev.modify_device = mlx5_ib_modify_device; 2105 dev->ib_dev.modify_port = mlx5_ib_modify_port; 2106 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; 2107 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; 2108 dev->ib_dev.mmap = mlx5_ib_mmap; 2109 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; 2110 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; 2111 dev->ib_dev.create_ah = mlx5_ib_create_ah; 2112 dev->ib_dev.query_ah = mlx5_ib_query_ah; 2113 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; 2114 dev->ib_dev.create_srq = mlx5_ib_create_srq; 2115 dev->ib_dev.modify_srq = mlx5_ib_modify_srq; 2116 dev->ib_dev.query_srq = mlx5_ib_query_srq; 2117 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; 2118 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; 2119 dev->ib_dev.create_qp = mlx5_ib_create_qp; 2120 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 2121 dev->ib_dev.query_qp = mlx5_ib_query_qp; 2122 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 2123 dev->ib_dev.post_send = mlx5_ib_post_send; 2124 dev->ib_dev.post_recv = mlx5_ib_post_recv; 2125 dev->ib_dev.create_cq = mlx5_ib_create_cq; 2126 dev->ib_dev.modify_cq = mlx5_ib_modify_cq; 2127 dev->ib_dev.resize_cq = mlx5_ib_resize_cq; 2128 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; 2129 dev->ib_dev.poll_cq = mlx5_ib_poll_cq; 2130 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 2131 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 2132 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 2133 dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr; 2134 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 2135 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 2136 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 2137 dev->ib_dev.process_mad = mlx5_ib_process_mad; 2138 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; 2139 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; 2140 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; 2141 2142 if (MLX5_CAP_GEN(mdev, xrc)) { 2143 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 2144 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 2145 dev->ib_dev.uverbs_cmd_mask |= 2146 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 2147 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2148 } 2149 2150 err = init_node_data(dev); 2151 if (err) 2152 goto err_disable_roce; 2153 2154 mutex_init(&dev->cap_mask_mutex); 2155 INIT_LIST_HEAD(&dev->qp_list); 2156 spin_lock_init(&dev->reset_flow_resource_lock); 2157 2158 err = create_dev_resources(&dev->devr); 2159 if (err) 2160 goto err_disable_roce; 2161 2162 2163 err = mlx5_ib_alloc_q_counters(dev); 2164 if (err) 2165 goto err_odp; 2166 2167 err = ib_register_device(&dev->ib_dev, NULL); 2168 if (err) 2169 goto err_q_cnt; 2170 2171 err = create_umr_res(dev); 2172 if (err) 2173 goto err_dev; 2174 2175 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2176 MLX5_CAP_PORT_TYPE_IB) { 2177 if (init_dc_improvements(dev)) 2178 mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n"); 2179 } 2180 2181 err = create_port_attrs(dev); 2182 if (err) 2183 goto err_dc; 2184 2185 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2186 err = device_create_file(&dev->ib_dev.dev, 2187 mlx5_class_attributes[i]); 2188 if (err) 2189 goto err_port_attrs; 2190 } 2191 2192 if (1) { 2193 struct thread *rl_thread = NULL; 2194 struct proc *rl_proc = NULL; 2195 2196 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2197 (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread, 2198 RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i); 2199 } 2200 } 2201 2202 dev->ib_active = true; 2203 2204 return dev; 2205 2206err_port_attrs: 2207 destroy_ports_attrs(dev, dev->num_ports); 2208 2209err_dc: 2210 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2211 MLX5_CAP_PORT_TYPE_IB) 2212 cleanup_dc_improvements(dev); 2213 destroy_umrc_res(dev); 2214 2215err_dev: 2216 ib_unregister_device(&dev->ib_dev); 2217 2218err_q_cnt: 2219 mlx5_ib_dealloc_q_counters(dev); 2220 2221err_odp: 2222 destroy_dev_resources(&dev->devr); 2223 2224err_disable_roce: 2225 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2226 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce)) 2227 mlx5_nic_vport_disable_roce(mdev); 2228err_free_port: 2229 kfree(dev->port); 2230 2231err_dealloc: 2232 ib_dealloc_device((struct ib_device *)dev); 2233 2234 return NULL; 2235} 2236 2237static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 2238{ 2239 struct mlx5_ib_dev *dev = context; 2240 int i; 2241 2242 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2243 dev->port[i].port_gone = 1; 2244 while (dev->port[i].port_gone != 2) 2245 pause("W", hz); 2246 } 2247 2248 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2249 device_remove_file(&dev->ib_dev.dev, 2250 mlx5_class_attributes[i]); 2251 } 2252 2253 destroy_ports_attrs(dev, dev->num_ports); 2254 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2255 MLX5_CAP_PORT_TYPE_IB) 2256 cleanup_dc_improvements(dev); 2257 mlx5_ib_dealloc_q_counters(dev); 2258 ib_unregister_device(&dev->ib_dev); 2259 destroy_umrc_res(dev); 2260 destroy_dev_resources(&dev->devr); 2261 2262 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2263 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce)) 2264 mlx5_nic_vport_disable_roce(mdev); 2265 2266 kfree(dev->port); 2267 ib_dealloc_device(&dev->ib_dev); 2268} 2269 2270static struct mlx5_interface mlx5_ib_interface = { 2271 .add = mlx5_ib_add, 2272 .remove = mlx5_ib_remove, 2273 .event = mlx5_ib_event, 2274 .protocol = MLX5_INTERFACE_PROTOCOL_IB, 2275}; 2276 2277static int __init mlx5_ib_init(void) 2278{ 2279 int err; 2280 2281 if (deprecated_prof_sel != 2) 2282 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); 2283 2284 err = mlx5_register_interface(&mlx5_ib_interface); 2285 if (err) 2286 goto clean_odp; 2287 2288 mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq"); 2289 if (!mlx5_ib_wq) { 2290 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__); 2291 goto err_unreg; 2292 } 2293 2294 return err; 2295 2296err_unreg: 2297 mlx5_unregister_interface(&mlx5_ib_interface); 2298 2299clean_odp: 2300 return err; 2301} 2302 2303static void __exit mlx5_ib_cleanup(void) 2304{ 2305 destroy_workqueue(mlx5_ib_wq); 2306 mlx5_unregister_interface(&mlx5_ib_interface); 2307} 2308 2309module_init_order(mlx5_ib_init, SI_ORDER_THIRD); 2310module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD); 2311