main.c revision 318533
1/* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/kmod.h> 37/* 38 * kmod.h must be included before module.h since it includes (indirectly) sys/module.h 39 * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does. 40*/ 41#include <linux/module.h> 42#include <linux/errno.h> 43#include <linux/pci.h> 44#include <linux/dma-mapping.h> 45#include <linux/slab.h> 46#include <linux/io-mapping.h> 47#include <linux/delay.h> 48#include <linux/netdevice.h> 49#include <linux/string.h> 50#include <linux/fs.h> 51 52#include <linux/mlx4/device.h> 53#include <linux/mlx4/doorbell.h> 54 55#include "mlx4.h" 56#include "fw.h" 57#include "icm.h" 58#include "mlx4_stats.h" 59 60/* Mellanox ConnectX HCA low-level driver */ 61 62struct workqueue_struct *mlx4_wq; 63 64#ifdef CONFIG_MLX4_DEBUG 65 66int mlx4_debug_level = 0; 67module_param_named(debug_level, mlx4_debug_level, int, 0644); 68MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 69 70#endif /* CONFIG_MLX4_DEBUG */ 71 72#ifdef CONFIG_PCI_MSI 73 74static int msi_x = 1; 75module_param(msi_x, int, 0444); 76MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); 77 78#else /* CONFIG_PCI_MSI */ 79 80#define msi_x (0) 81 82#endif /* CONFIG_PCI_MSI */ 83 84static int enable_sys_tune = 0; 85module_param(enable_sys_tune, int, 0444); 86MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); 87 88int mlx4_blck_lb = 1; 89module_param_named(block_loopback, mlx4_blck_lb, int, 0644); 90MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " 91 "(default: 1)"); 92enum { 93 DEFAULT_DOMAIN = 0, 94 BDF_STR_SIZE = 8, /* bb:dd.f- */ 95 DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ 96}; 97 98enum { 99 NUM_VFS, 100 PROBE_VF, 101 PORT_TYPE_ARRAY 102}; 103 104enum { 105 VALID_DATA, 106 INVALID_DATA, 107 INVALID_STR 108}; 109 110struct param_data { 111 int id; 112 struct mlx4_dbdf2val_lst dbdf2val; 113}; 114 115static struct param_data num_vfs = { 116 .id = NUM_VFS, 117 .dbdf2val = { 118 .name = "num_vfs param", 119 .num_vals = 1, 120 .def_val = {0}, 121 .range = {0, MLX4_MAX_NUM_VF} 122 } 123}; 124module_param_string(num_vfs, num_vfs.dbdf2val.str, 125 sizeof(num_vfs.dbdf2val.str), 0444); 126MODULE_PARM_DESC(num_vfs, 127 "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" 128 "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" 129 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); 130 131static struct param_data probe_vf = { 132 .id = PROBE_VF, 133 .dbdf2val = { 134 .name = "probe_vf param", 135 .num_vals = 1, 136 .def_val = {0}, 137 .range = {0, MLX4_MAX_NUM_VF} 138 } 139}; 140module_param_string(probe_vf, probe_vf.dbdf2val.str, 141 sizeof(probe_vf.dbdf2val.str), 0444); 142MODULE_PARM_DESC(probe_vf, 143 "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" 144 "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" 145 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); 146 147int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 148 149module_param_named(log_num_mgm_entry_size, 150 mlx4_log_num_mgm_entry_size, int, 0444); 151MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 152 " of qp per mcg, for example:" 153 " 10 gives 248.range: 7 <=" 154 " log_num_mgm_entry_size <= 12." 155 " To activate device managed" 156 " flow steering when available, set to -1"); 157 158static int high_rate_steer; 159module_param(high_rate_steer, int, 0444); 160MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" 161 " (default off)"); 162 163static int fast_drop; 164module_param_named(fast_drop, fast_drop, int, 0444); 165MODULE_PARM_DESC(fast_drop, 166 "Enable fast packet drop when no recieve WQEs are posted"); 167 168int mlx4_enable_64b_cqe_eqe = 1; 169module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); 170MODULE_PARM_DESC(enable_64b_cqe_eqe, 171 "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); 172 173#define HCA_GLOBAL_CAP_MASK 0 174 175#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE 176 177static char mlx4_version[] __devinitdata = 178 DRV_NAME ": Mellanox ConnectX VPI driver v" 179 DRV_VERSION " (" DRV_RELDATE ")\n"; 180 181static int log_num_mac = 7; 182module_param_named(log_num_mac, log_num_mac, int, 0444); 183MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 184 185static int log_num_vlan; 186module_param_named(log_num_vlan, log_num_vlan, int, 0444); 187MODULE_PARM_DESC(log_num_vlan, 188 "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); 189/* Log2 max number of VLANs per ETH port (0-7) */ 190#define MLX4_LOG_NUM_VLANS 7 191 192int log_mtts_per_seg = ilog2(1); 193module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 194MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " 195 "(0-7) (default: 0)"); 196 197static struct param_data port_type_array = { 198 .id = PORT_TYPE_ARRAY, 199 .dbdf2val = { 200 .name = "port_type_array param", 201 .num_vals = 2, 202 .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, 203 .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} 204 } 205}; 206module_param_string(port_type_array, port_type_array.dbdf2val.str, 207 sizeof(port_type_array.dbdf2val.str), 0444); 208MODULE_PARM_DESC(port_type_array, 209 "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" 210 "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" 211 "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" 212 "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); 213 214 215struct mlx4_port_config { 216 struct list_head list; 217 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 218 struct pci_dev *pdev; 219}; 220 221#define MLX4_LOG_NUM_MTT 20 222/* We limit to 30 as of a bit map issue which uses int and not uint. 223 see mlx4_buddy_init -> bitmap_zero which gets int. 224*/ 225#define MLX4_MAX_LOG_NUM_MTT 30 226static struct mlx4_profile mod_param_profile = { 227 .num_qp = 19, 228 .num_srq = 16, 229 .rdmarc_per_qp = 4, 230 .num_cq = 16, 231 .num_mcg = 13, 232 .num_mpt = 19, 233 .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ 234}; 235 236module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); 237MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); 238 239module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); 240MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " 241 "(default: 16)"); 242 243module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 244 0444); 245MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " 246 "(default: 4)"); 247 248module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); 249MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); 250 251module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); 252MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " 253 "(default: 13)"); 254 255module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); 256MODULE_PARM_DESC(log_num_mpt, 257 "log maximum number of memory protection table entries per " 258 "HCA (default: 19)"); 259 260module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); 261MODULE_PARM_DESC(log_num_mtt, 262 "log maximum number of memory translation table segments per " 263 "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); 264 265enum { 266 MLX4_IF_STATE_BASIC, 267 MLX4_IF_STATE_EXTENDED 268}; 269 270static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) 271{ 272 return (domain << 20) | (bus << 12) | (dev << 4) | fn; 273} 274 275static inline void pr_bdf_err(const char *dbdf, const char *pname) 276{ 277 pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); 278} 279 280static inline void pr_val_err(const char *dbdf, const char *pname, 281 const char *val) 282{ 283 pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" 284 , val, dbdf, pname); 285} 286 287static inline void pr_out_of_range_bdf(const char *dbdf, int val, 288 struct mlx4_dbdf2val_lst *dbdf2val) 289{ 290 pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" 291 , val, dbdf, dbdf2val->name , dbdf2val->range.min, 292 dbdf2val->range.max); 293} 294 295static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) 296{ 297 pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" 298 , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); 299} 300 301static inline int is_in_range(int val, struct mlx4_range *r) 302{ 303 return (val >= r->min && val <= r->max); 304} 305 306static int update_defaults(struct param_data *pdata) 307{ 308 long int val[MLX4_MAX_BDF_VALS]; 309 int ret; 310 char *t, *p = pdata->dbdf2val.str; 311 char sval[32]; 312 int val_len; 313 314 if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) 315 return INVALID_STR; 316 317 switch (pdata->id) { 318 case PORT_TYPE_ARRAY: 319 t = strchr(p, ','); 320 if (!t || t == p || (t - p) > sizeof(sval)) 321 return INVALID_STR; 322 323 val_len = t - p; 324 strncpy(sval, p, val_len); 325 sval[val_len] = 0; 326 327 ret = kstrtol(sval, 0, &val[0]); 328 if (ret == -EINVAL) 329 return INVALID_STR; 330 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 331 pr_out_of_range(&pdata->dbdf2val); 332 return INVALID_DATA; 333 } 334 335 ret = kstrtol(t + 1, 0, &val[1]); 336 if (ret == -EINVAL) 337 return INVALID_STR; 338 if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { 339 pr_out_of_range(&pdata->dbdf2val); 340 return INVALID_DATA; 341 } 342 343 pdata->dbdf2val.tbl[0].val[0] = val[0]; 344 pdata->dbdf2val.tbl[0].val[1] = val[1]; 345 break; 346 347 case NUM_VFS: 348 case PROBE_VF: 349 ret = kstrtol(p, 0, &val[0]); 350 if (ret == -EINVAL) 351 return INVALID_STR; 352 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 353 pr_out_of_range(&pdata->dbdf2val); 354 return INVALID_DATA; 355 } 356 pdata->dbdf2val.tbl[0].val[0] = val[0]; 357 break; 358 } 359 pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; 360 361 return VALID_DATA; 362} 363 364int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) 365{ 366 int domain, bus, dev, fn; 367 u64 dbdf; 368 char *p, *t, *v; 369 char tmp[32]; 370 char sbdf[32]; 371 char sep = ','; 372 int j, k, str_size, i = 1; 373 int prfx_size; 374 375 p = dbdf2val_lst->str; 376 377 for (j = 0; j < dbdf2val_lst->num_vals; j++) 378 dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; 379 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 380 381 str_size = strlen(dbdf2val_lst->str); 382 383 if (str_size == 0) 384 return 0; 385 386 while (strlen(p)) { 387 prfx_size = BDF_STR_SIZE; 388 sbdf[prfx_size] = 0; 389 strncpy(sbdf, p, prfx_size); 390 domain = DEFAULT_DOMAIN; 391 if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { 392 prfx_size = DBDF_STR_SIZE; 393 sbdf[prfx_size] = 0; 394 strncpy(sbdf, p, prfx_size); 395 if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, 396 &dev, &fn) != 4) { 397 pr_bdf_err(sbdf, dbdf2val_lst->name); 398 goto err; 399 } 400 sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, 401 fn); 402 } else { 403 sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); 404 } 405 406 if (strnicmp(sbdf, tmp, sizeof(tmp))) { 407 pr_bdf_err(sbdf, dbdf2val_lst->name); 408 goto err; 409 } 410 411 dbdf = dbdf_to_u64(domain, bus, dev, fn); 412 413 for (j = 1; j < i; j++) 414 if (dbdf2val_lst->tbl[j].dbdf == dbdf) { 415 pr_warn("mlx4_core: in '%s', %s appears multiple times\n" 416 , dbdf2val_lst->name, sbdf); 417 goto err; 418 } 419 420 if (i >= MLX4_DEVS_TBL_SIZE) { 421 pr_warn("mlx4_core: Too many devices in '%s'\n" 422 , dbdf2val_lst->name); 423 goto err; 424 } 425 426 p += prfx_size; 427 t = strchr(p, sep); 428 t = t ? t : p + strlen(p); 429 if (p >= t) { 430 pr_val_err(sbdf, dbdf2val_lst->name, ""); 431 goto err; 432 } 433 434 for (k = 0; k < dbdf2val_lst->num_vals; k++) { 435 char sval[32]; 436 long int val; 437 int ret, val_len; 438 char vsep = ';'; 439 440 v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); 441 if (!v || v > t || v == p || (v - p) > sizeof(sval)) { 442 pr_val_err(sbdf, dbdf2val_lst->name, p); 443 goto err; 444 } 445 val_len = v - p; 446 strncpy(sval, p, val_len); 447 sval[val_len] = 0; 448 449 ret = kstrtol(sval, 0, &val); 450 if (ret) { 451 if (strchr(p, vsep)) 452 pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" 453 , sbdf, dbdf2val_lst->name); 454 else 455 pr_val_err(sbdf, dbdf2val_lst->name, 456 sval); 457 goto err; 458 } 459 if (!is_in_range(val, &dbdf2val_lst->range)) { 460 pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); 461 goto err; 462 } 463 464 dbdf2val_lst->tbl[i].val[k] = val; 465 p = v; 466 if (p[0] == vsep) 467 p++; 468 } 469 470 dbdf2val_lst->tbl[i].dbdf = dbdf; 471 if (strlen(p)) { 472 if (p[0] != sep) { 473 pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" 474 , sep, p, dbdf2val_lst->name); 475 goto err; 476 } 477 p++; 478 } 479 i++; 480 if (i < MLX4_DEVS_TBL_SIZE) 481 dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; 482 } 483 484 return 0; 485 486err: 487 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 488 pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" 489 , dbdf2val_lst->name); 490 491 return -EINVAL; 492} 493EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); 494 495int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, 496 int *val) 497{ 498 u64 dbdf; 499 int i = 1; 500 501 *val = tbl[0].val[idx]; 502 if (!pdev) 503 return -EINVAL; 504 505 dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), 506 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 507 508 while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { 509 if (tbl[i].dbdf == dbdf) { 510 *val = tbl[i].val[idx]; 511 return 0; 512 } 513 i++; 514 } 515 516 return 0; 517} 518EXPORT_SYMBOL(mlx4_get_val); 519 520static void process_mod_param_profile(struct mlx4_profile *profile) 521{ 522 vm_size_t hwphyssz; 523 hwphyssz = 0; 524 TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); 525 526 profile->num_qp = 1 << mod_param_profile.num_qp; 527 profile->num_srq = 1 << mod_param_profile.num_srq; 528 profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; 529 profile->num_cq = 1 << mod_param_profile.num_cq; 530 profile->num_mcg = 1 << mod_param_profile.num_mcg; 531 profile->num_mpt = 1 << mod_param_profile.num_mpt; 532 /* 533 * We want to scale the number of MTTs with the size of the 534 * system memory, since it makes sense to register a lot of 535 * memory on a system with a lot of memory. As a heuristic, 536 * make sure we have enough MTTs to register twice the system 537 * memory (with PAGE_SIZE entries). 538 * 539 * This number has to be a power of two and fit into 32 bits 540 * due to device limitations. We cap this at 2^30 as of bit map 541 * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) 542 * That limits us to 4TB of memory registration per HCA with 543 * 4KB pages, which is probably OK for the next few months. 544 */ 545 if (mod_param_profile.num_mtt_segs) 546 profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; 547 else { 548 profile->num_mtt_segs = 549 roundup_pow_of_two(max_t(unsigned, 550 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), 551 min(1UL << 552 (MLX4_MAX_LOG_NUM_MTT - 553 log_mtts_per_seg), 554 (hwphyssz << 1) 555 >> log_mtts_per_seg))); 556 /* set the actual value, so it will be reflected to the user 557 using the sysfs */ 558 mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); 559 } 560} 561 562int mlx4_check_port_params(struct mlx4_dev *dev, 563 enum mlx4_port_type *port_type) 564{ 565 int i; 566 567 for (i = 0; i < dev->caps.num_ports - 1; i++) { 568 if (port_type[i] != port_type[i + 1]) { 569 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 570 mlx4_err(dev, "Only same port types supported " 571 "on this HCA, aborting.\n"); 572 return -EINVAL; 573 } 574 } 575 } 576 577 for (i = 0; i < dev->caps.num_ports; i++) { 578 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 579 mlx4_err(dev, "Requested port type for port %d is not " 580 "supported on this HCA\n", i + 1); 581 return -EINVAL; 582 } 583 } 584 return 0; 585} 586 587static void mlx4_set_port_mask(struct mlx4_dev *dev) 588{ 589 int i; 590 591 for (i = 1; i <= dev->caps.num_ports; ++i) 592 dev->caps.port_mask[i] = dev->caps.port_type[i]; 593} 594 595static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 596{ 597 int err; 598 int i; 599 600 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 601 if (err) { 602 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 603 return err; 604 } 605 606 if (dev_cap->min_page_sz > PAGE_SIZE) { 607 mlx4_err(dev, "HCA minimum page size of %d bigger than " 608 "kernel PAGE_SIZE of %d, aborting.\n", 609 dev_cap->min_page_sz, (int)PAGE_SIZE); 610 return -ENODEV; 611 } 612 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 613 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 614 "aborting.\n", 615 dev_cap->num_ports, MLX4_MAX_PORTS); 616 return -ENODEV; 617 } 618 619 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { 620 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " 621 "PCI resource 2 size of 0x%llx, aborting.\n", 622 dev_cap->uar_size, 623 (unsigned long long) pci_resource_len(dev->pdev, 2)); 624 return -ENODEV; 625 } 626 627 dev->caps.num_ports = dev_cap->num_ports; 628 dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; 629 for (i = 1; i <= dev->caps.num_ports; ++i) { 630 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 631 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 632 dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; 633 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; 634 /* set gid and pkey table operating lengths by default 635 * to non-sriov values */ 636 dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; 637 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; 638 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; 639 dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; 640 dev->caps.def_mac[i] = dev_cap->def_mac[i]; 641 dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; 642 dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; 643 dev->caps.default_sense[i] = dev_cap->default_sense[i]; 644 dev->caps.trans_type[i] = dev_cap->trans_type[i]; 645 dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; 646 dev->caps.wavelength[i] = dev_cap->wavelength[i]; 647 dev->caps.trans_code[i] = dev_cap->trans_code[i]; 648 } 649 650 dev->caps.uar_page_size = PAGE_SIZE; 651 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 652 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 653 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 654 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 655 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 656 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 657 dev->caps.max_wqes = dev_cap->max_qp_sz; 658 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 659 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 660 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 661 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 662 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 663 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 664 /* 665 * Subtract 1 from the limit because we need to allocate a 666 * spare CQE to enable resizing the CQ 667 */ 668 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 669 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 670 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 671 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 672 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 673 674 /* The first 128 UARs are used for EQ doorbells */ 675 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); 676 dev->caps.reserved_pds = dev_cap->reserved_pds; 677 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 678 dev_cap->reserved_xrcds : 0; 679 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 680 dev_cap->max_xrcds : 0; 681 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 682 683 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 684 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 685 dev->caps.flags = dev_cap->flags; 686 dev->caps.flags2 = dev_cap->flags2; 687 dev->caps.bmme_flags = dev_cap->bmme_flags; 688 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 689 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 690 dev->caps.cq_timestamp = dev_cap->timestamp_support; 691 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 692 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 693 694 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 695 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 696 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 697 /* Don't do sense port on multifunction devices (for now at least) */ 698 if (mlx4_is_mfunc(dev)) 699 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 700 701 dev->caps.log_num_macs = log_num_mac; 702 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 703 704 dev->caps.fast_drop = fast_drop ? 705 !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 706 0; 707 708 for (i = 1; i <= dev->caps.num_ports; ++i) { 709 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 710 if (dev->caps.supported_type[i]) { 711 /* if only ETH is supported - assign ETH */ 712 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 713 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 714 /* if only IB is supported, assign IB */ 715 else if (dev->caps.supported_type[i] == 716 MLX4_PORT_TYPE_IB) 717 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 718 else { 719 /* 720 * if IB and ETH are supported, we set the port 721 * type according to user selection of port type; 722 * if there is no user selection, take the FW hint 723 */ 724 int pta; 725 mlx4_get_val(port_type_array.dbdf2val.tbl, 726 pci_physfn(dev->pdev), i - 1, 727 &pta); 728 if (pta == MLX4_PORT_TYPE_NONE) { 729 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 730 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 731 } else if (pta == MLX4_PORT_TYPE_NA) { 732 mlx4_err(dev, "Port %d is valid port. " 733 "It is not allowed to configure its type to N/A(%d)\n", 734 i, MLX4_PORT_TYPE_NA); 735 return -EINVAL; 736 } else { 737 dev->caps.port_type[i] = pta; 738 } 739 } 740 } 741 /* 742 * Link sensing is allowed on the port if 3 conditions are true: 743 * 1. Both protocols are supported on the port. 744 * 2. Different types are supported on the port 745 * 3. FW declared that it supports link sensing 746 */ 747 mlx4_priv(dev)->sense.sense_allowed[i] = 748 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 749 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 750 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 751 752 /* Disablling auto sense for default Eth ports support */ 753 mlx4_priv(dev)->sense.sense_allowed[i] = 0; 754 755 /* 756 * If "default_sense" bit is set, we move the port to "AUTO" mode 757 * and perform sense_port FW command to try and set the correct 758 * port type from beginning 759 */ 760 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 761 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 762 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 763 mlx4_SENSE_PORT(dev, i, &sensed_port); 764 if (sensed_port != MLX4_PORT_TYPE_NONE) 765 dev->caps.port_type[i] = sensed_port; 766 } else { 767 dev->caps.possible_type[i] = dev->caps.port_type[i]; 768 } 769 770 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { 771 dev->caps.log_num_macs = dev_cap->log_max_macs[i]; 772 mlx4_warn(dev, "Requested number of MACs is too much " 773 "for port %d, reducing to %d.\n", 774 i, 1 << dev->caps.log_num_macs); 775 } 776 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { 777 dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; 778 mlx4_warn(dev, "Requested number of VLANs is too much " 779 "for port %d, reducing to %d.\n", 780 i, 1 << dev->caps.log_num_vlans); 781 } 782 } 783 784 dev->caps.max_basic_counters = dev_cap->max_basic_counters; 785 dev->caps.max_extended_counters = dev_cap->max_extended_counters; 786 /* support extended counters if available */ 787 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) 788 dev->caps.max_counters = dev->caps.max_extended_counters; 789 else 790 dev->caps.max_counters = dev->caps.max_basic_counters; 791 792 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 793 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 794 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 795 (1 << dev->caps.log_num_macs) * 796 (1 << dev->caps.log_num_vlans) * 797 dev->caps.num_ports; 798 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 799 800 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 801 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 802 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 803 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 804 805 dev->caps.sync_qp = dev_cap->sync_qp; 806 if (dev->pdev->device == 0x1003) 807 dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; 808 809 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 810 811 if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 812 if (dev_cap->flags & 813 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 814 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 815 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 816 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 817 } 818 } 819 820 if ((dev->caps.flags & 821 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 822 mlx4_is_master(dev)) 823 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 824 825 if (!mlx4_is_slave(dev)) { 826 for (i = 0; i < dev->caps.num_ports; ++i) 827 dev->caps.def_counter_index[i] = i << 1; 828 829 dev->caps.alloc_res_qp_mask = 830 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0); 831 } else { 832 dev->caps.alloc_res_qp_mask = 0; 833 } 834 835 return 0; 836} 837/*The function checks if there are live vf, return the num of them*/ 838static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 839{ 840 struct mlx4_priv *priv = mlx4_priv(dev); 841 struct mlx4_slave_state *s_state; 842 int i; 843 int ret = 0; 844 845 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 846 s_state = &priv->mfunc.master.slave_state[i]; 847 if (s_state->active && s_state->last_cmd != 848 MLX4_COMM_CMD_RESET) { 849 mlx4_warn(dev, "%s: slave: %d is still active\n", 850 __func__, i); 851 ret++; 852 } 853 } 854 return ret; 855} 856 857int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 858{ 859 u32 qk = MLX4_RESERVED_QKEY_BASE; 860 861 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 862 qpn < dev->phys_caps.base_proxy_sqpn) 863 return -EINVAL; 864 865 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 866 /* tunnel qp */ 867 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 868 else 869 qk += qpn - dev->phys_caps.base_proxy_sqpn; 870 *qkey = qk; 871 return 0; 872} 873EXPORT_SYMBOL(mlx4_get_parav_qkey); 874 875void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 876{ 877 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 878 879 if (!mlx4_is_master(dev)) 880 return; 881 882 priv->virt2phys_pkey[slave][port - 1][i] = val; 883} 884EXPORT_SYMBOL(mlx4_sync_pkey_table); 885 886void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 887{ 888 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 889 890 if (!mlx4_is_master(dev)) 891 return; 892 893 priv->slave_node_guids[slave] = guid; 894} 895EXPORT_SYMBOL(mlx4_put_slave_node_guid); 896 897__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 898{ 899 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 900 901 if (!mlx4_is_master(dev)) 902 return 0; 903 904 return priv->slave_node_guids[slave]; 905} 906EXPORT_SYMBOL(mlx4_get_slave_node_guid); 907 908int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 909{ 910 struct mlx4_priv *priv = mlx4_priv(dev); 911 struct mlx4_slave_state *s_slave; 912 913 if (!mlx4_is_master(dev)) 914 return 0; 915 916 s_slave = &priv->mfunc.master.slave_state[slave]; 917 return !!s_slave->active; 918} 919EXPORT_SYMBOL(mlx4_is_slave_active); 920 921static void slave_adjust_steering_mode(struct mlx4_dev *dev, 922 struct mlx4_dev_cap *dev_cap, 923 struct mlx4_init_hca_param *hca_param) 924{ 925 dev->caps.steering_mode = hca_param->steering_mode; 926 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) 927 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 928 else 929 dev->caps.num_qp_per_mgm = 930 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 931 932 mlx4_dbg(dev, "Steering mode is: %s\n", 933 mlx4_steering_mode_str(dev->caps.steering_mode)); 934} 935 936static int mlx4_slave_cap(struct mlx4_dev *dev) 937{ 938 int err; 939 u32 page_size; 940 struct mlx4_dev_cap dev_cap; 941 struct mlx4_func_cap func_cap; 942 struct mlx4_init_hca_param hca_param; 943 int i; 944 945 memset(&hca_param, 0, sizeof(hca_param)); 946 err = mlx4_QUERY_HCA(dev, &hca_param); 947 if (err) { 948 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); 949 return err; 950 } 951 952 /*fail if the hca has an unknown capability */ 953 if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != 954 HCA_GLOBAL_CAP_MASK) { 955 mlx4_err(dev, "Unknown hca global capabilities\n"); 956 return -ENOSYS; 957 } 958 959 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 960 961 dev->caps.hca_core_clock = hca_param.hca_core_clock; 962 963 memset(&dev_cap, 0, sizeof(dev_cap)); 964 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 965 err = mlx4_dev_cap(dev, &dev_cap); 966 if (err) { 967 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 968 return err; 969 } 970 971 err = mlx4_QUERY_FW(dev); 972 if (err) 973 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); 974 975 if (!hca_param.mw_enable) { 976 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; 977 dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; 978 } 979 980 page_size = ~dev->caps.page_size_cap + 1; 981 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 982 if (page_size > PAGE_SIZE) { 983 mlx4_err(dev, "HCA minimum page size of %d bigger than " 984 "kernel PAGE_SIZE of %d, aborting.\n", 985 page_size, (int)PAGE_SIZE); 986 return -ENODEV; 987 } 988 989 /* slave gets uar page size from QUERY_HCA fw command */ 990 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); 991 992 /* TODO: relax this assumption */ 993 if (dev->caps.uar_page_size != PAGE_SIZE) { 994 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", 995 dev->caps.uar_page_size, (int)PAGE_SIZE); 996 return -ENODEV; 997 } 998 999 memset(&func_cap, 0, sizeof(func_cap)); 1000 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 1001 if (err) { 1002 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", 1003 err); 1004 return err; 1005 } 1006 1007 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 1008 PF_CONTEXT_BEHAVIOUR_MASK) { 1009 mlx4_err(dev, "Unknown pf context behaviour\n"); 1010 return -ENOSYS; 1011 } 1012 1013 dev->caps.num_ports = func_cap.num_ports; 1014 dev->quotas.qp = func_cap.qp_quota; 1015 dev->quotas.srq = func_cap.srq_quota; 1016 dev->quotas.cq = func_cap.cq_quota; 1017 dev->quotas.mpt = func_cap.mpt_quota; 1018 dev->quotas.mtt = func_cap.mtt_quota; 1019 dev->caps.num_qps = 1 << hca_param.log_num_qps; 1020 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 1021 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 1022 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 1023 dev->caps.num_eqs = func_cap.max_eq; 1024 dev->caps.reserved_eqs = func_cap.reserved_eq; 1025 dev->caps.num_pds = MLX4_NUM_PDS; 1026 dev->caps.num_mgms = 0; 1027 dev->caps.num_amgms = 0; 1028 1029 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 1030 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 1031 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); 1032 return -ENODEV; 1033 } 1034 1035 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1036 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1037 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1038 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1039 1040 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 1041 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { 1042 err = -ENOMEM; 1043 goto err_mem; 1044 } 1045 1046 for (i = 1; i <= dev->caps.num_ports; ++i) { 1047 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); 1048 if (err) { 1049 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" 1050 " port %d, aborting (%d).\n", i, err); 1051 goto err_mem; 1052 } 1053 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 1054 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 1055 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 1056 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 1057 dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; 1058 1059 dev->caps.port_mask[i] = dev->caps.port_type[i]; 1060 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, 1061 &dev->caps.gid_table_len[i], 1062 &dev->caps.pkey_table_len[i]); 1063 if (err) 1064 goto err_mem; 1065 } 1066 1067 if (dev->caps.uar_page_size * (dev->caps.num_uars - 1068 dev->caps.reserved_uars) > 1069 pci_resource_len(dev->pdev, 2)) { 1070 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " 1071 "PCI resource 2 size of 0x%llx, aborting.\n", 1072 dev->caps.uar_page_size * dev->caps.num_uars, 1073 (unsigned long long) pci_resource_len(dev->pdev, 2)); 1074 err = -ENOMEM; 1075 goto err_mem; 1076 } 1077 1078 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 1079 dev->caps.eqe_size = 64; 1080 dev->caps.eqe_factor = 1; 1081 } else { 1082 dev->caps.eqe_size = 32; 1083 dev->caps.eqe_factor = 0; 1084 } 1085 1086 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 1087 dev->caps.cqe_size = 64; 1088 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; 1089 } else { 1090 dev->caps.cqe_size = 32; 1091 } 1092 1093 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1094 mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); 1095 1096 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 1097 1098 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && 1099 dev->caps.bf_reg_size) 1100 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; 1101 1102 return 0; 1103 1104err_mem: 1105 kfree(dev->caps.qp0_tunnel); 1106 kfree(dev->caps.qp0_proxy); 1107 kfree(dev->caps.qp1_tunnel); 1108 kfree(dev->caps.qp1_proxy); 1109 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 1110 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 1111 1112 return err; 1113} 1114 1115static void mlx4_request_modules(struct mlx4_dev *dev) 1116{ 1117 int port; 1118 int has_ib_port = false; 1119 int has_eth_port = false; 1120#define EN_DRV_NAME "mlx4_en" 1121#define IB_DRV_NAME "mlx4_ib" 1122 1123 for (port = 1; port <= dev->caps.num_ports; port++) { 1124 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 1125 has_ib_port = true; 1126 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 1127 has_eth_port = true; 1128 } 1129 1130 if (has_ib_port) 1131 request_module_nowait(IB_DRV_NAME); 1132 if (has_eth_port) 1133 request_module_nowait(EN_DRV_NAME); 1134} 1135 1136/* 1137 * Change the port configuration of the device. 1138 * Every user of this function must hold the port mutex. 1139 */ 1140int mlx4_change_port_types(struct mlx4_dev *dev, 1141 enum mlx4_port_type *port_types) 1142{ 1143 int err = 0; 1144 int change = 0; 1145 int port; 1146 1147 for (port = 0; port < dev->caps.num_ports; port++) { 1148 /* Change the port type only if the new type is different 1149 * from the current, and not set to Auto */ 1150 if (port_types[port] != dev->caps.port_type[port + 1]) 1151 change = 1; 1152 } 1153 if (change) { 1154 mlx4_unregister_device(dev); 1155 for (port = 1; port <= dev->caps.num_ports; port++) { 1156 mlx4_CLOSE_PORT(dev, port); 1157 dev->caps.port_type[port] = port_types[port - 1]; 1158 err = mlx4_SET_PORT(dev, port, -1); 1159 if (err) { 1160 mlx4_err(dev, "Failed to set port %d, " 1161 "aborting\n", port); 1162 goto out; 1163 } 1164 } 1165 mlx4_set_port_mask(dev); 1166 err = mlx4_register_device(dev); 1167 if (err) { 1168 mlx4_err(dev, "Failed to register device\n"); 1169 goto out; 1170 } 1171 mlx4_request_modules(dev); 1172 } 1173 1174out: 1175 return err; 1176} 1177 1178static ssize_t show_port_type(struct device *dev, 1179 struct device_attribute *attr, 1180 char *buf) 1181{ 1182 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1183 port_attr); 1184 struct mlx4_dev *mdev = info->dev; 1185 char type[8]; 1186 1187 sprintf(type, "%s", 1188 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 1189 "ib" : "eth"); 1190 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 1191 sprintf(buf, "auto (%s)\n", type); 1192 else 1193 sprintf(buf, "%s\n", type); 1194 1195 return strlen(buf); 1196} 1197 1198static ssize_t set_port_type(struct device *dev, 1199 struct device_attribute *attr, 1200 const char *buf, size_t count) 1201{ 1202 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1203 port_attr); 1204 struct mlx4_dev *mdev = info->dev; 1205 struct mlx4_priv *priv = mlx4_priv(mdev); 1206 enum mlx4_port_type types[MLX4_MAX_PORTS]; 1207 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 1208 int i; 1209 int err = 0; 1210 1211 if (!strcmp(buf, "ib\n")) 1212 info->tmp_type = MLX4_PORT_TYPE_IB; 1213 else if (!strcmp(buf, "eth\n")) 1214 info->tmp_type = MLX4_PORT_TYPE_ETH; 1215 else if (!strcmp(buf, "auto\n")) 1216 info->tmp_type = MLX4_PORT_TYPE_AUTO; 1217 else { 1218 mlx4_err(mdev, "%s is not supported port type\n", buf); 1219 return -EINVAL; 1220 } 1221 1222 if ((info->tmp_type & mdev->caps.supported_type[info->port]) != 1223 info->tmp_type) { 1224 mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", 1225 info->port); 1226 return -EINVAL; 1227 } 1228 1229 mlx4_stop_sense(mdev); 1230 mutex_lock(&priv->port_mutex); 1231 /* Possible type is always the one that was delivered */ 1232 mdev->caps.possible_type[info->port] = info->tmp_type; 1233 1234 for (i = 0; i < mdev->caps.num_ports; i++) { 1235 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 1236 mdev->caps.possible_type[i+1]; 1237 if (types[i] == MLX4_PORT_TYPE_AUTO) 1238 types[i] = mdev->caps.port_type[i+1]; 1239 } 1240 1241 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 1242 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 1243 for (i = 1; i <= mdev->caps.num_ports; i++) { 1244 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 1245 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 1246 err = -EINVAL; 1247 } 1248 } 1249 } 1250 if (err) { 1251 mlx4_err(mdev, "Auto sensing is not supported on this HCA. " 1252 "Set only 'eth' or 'ib' for both ports " 1253 "(should be the same)\n"); 1254 goto out; 1255 } 1256 1257 mlx4_do_sense_ports(mdev, new_types, types); 1258 1259 err = mlx4_check_port_params(mdev, new_types); 1260 if (err) 1261 goto out; 1262 1263 /* We are about to apply the changes after the configuration 1264 * was verified, no need to remember the temporary types 1265 * any more */ 1266 for (i = 0; i < mdev->caps.num_ports; i++) 1267 priv->port[i + 1].tmp_type = 0; 1268 1269 err = mlx4_change_port_types(mdev, new_types); 1270 1271out: 1272 mlx4_start_sense(mdev); 1273 mutex_unlock(&priv->port_mutex); 1274 return err ? err : count; 1275} 1276 1277enum ibta_mtu { 1278 IB_MTU_256 = 1, 1279 IB_MTU_512 = 2, 1280 IB_MTU_1024 = 3, 1281 IB_MTU_2048 = 4, 1282 IB_MTU_4096 = 5 1283}; 1284 1285static inline int int_to_ibta_mtu(int mtu) 1286{ 1287 switch (mtu) { 1288 case 256: return IB_MTU_256; 1289 case 512: return IB_MTU_512; 1290 case 1024: return IB_MTU_1024; 1291 case 2048: return IB_MTU_2048; 1292 case 4096: return IB_MTU_4096; 1293 default: return -1; 1294 } 1295} 1296 1297static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 1298{ 1299 switch (mtu) { 1300 case IB_MTU_256: return 256; 1301 case IB_MTU_512: return 512; 1302 case IB_MTU_1024: return 1024; 1303 case IB_MTU_2048: return 2048; 1304 case IB_MTU_4096: return 4096; 1305 default: return -1; 1306 } 1307} 1308 1309static ssize_t 1310show_board(struct device *device, struct device_attribute *attr, 1311 char *buf) 1312{ 1313 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1314 board_attr); 1315 struct mlx4_dev *mdev = info->dev; 1316 1317 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 1318 mdev->board_id); 1319} 1320 1321static ssize_t 1322show_hca(struct device *device, struct device_attribute *attr, 1323 char *buf) 1324{ 1325 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1326 hca_attr); 1327 struct mlx4_dev *mdev = info->dev; 1328 1329 return sprintf(buf, "MT%d\n", mdev->pdev->device); 1330} 1331 1332static ssize_t 1333show_firmware_version(struct device *dev, 1334 struct device_attribute *attr, 1335 char *buf) 1336{ 1337 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1338 firmware_attr); 1339 struct mlx4_dev *mdev = info->dev; 1340 1341 return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32), 1342 (int)(mdev->caps.fw_ver >> 16) & 0xffff, 1343 (int)mdev->caps.fw_ver & 0xffff); 1344} 1345 1346static ssize_t show_port_ib_mtu(struct device *dev, 1347 struct device_attribute *attr, 1348 char *buf) 1349{ 1350 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1351 port_mtu_attr); 1352 struct mlx4_dev *mdev = info->dev; 1353 1354 /* When port type is eth, port mtu value isn't used. */ 1355 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 1356 return -EINVAL; 1357 1358 sprintf(buf, "%d\n", 1359 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 1360 return strlen(buf); 1361} 1362 1363static ssize_t set_port_ib_mtu(struct device *dev, 1364 struct device_attribute *attr, 1365 const char *buf, size_t count) 1366{ 1367 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1368 port_mtu_attr); 1369 struct mlx4_dev *mdev = info->dev; 1370 struct mlx4_priv *priv = mlx4_priv(mdev); 1371 int err, port, mtu, ibta_mtu = -1; 1372 1373 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 1374 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1375 return -EINVAL; 1376 } 1377 1378 mtu = (int) simple_strtol(buf, NULL, 0); 1379 ibta_mtu = int_to_ibta_mtu(mtu); 1380 1381 if (ibta_mtu < 0) { 1382 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 1383 return -EINVAL; 1384 } 1385 1386 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 1387 1388 mlx4_stop_sense(mdev); 1389 mutex_lock(&priv->port_mutex); 1390 mlx4_unregister_device(mdev); 1391 for (port = 1; port <= mdev->caps.num_ports; port++) { 1392 mlx4_CLOSE_PORT(mdev, port); 1393 err = mlx4_SET_PORT(mdev, port, -1); 1394 if (err) { 1395 mlx4_err(mdev, "Failed to set port %d, " 1396 "aborting\n", port); 1397 goto err_set_port; 1398 } 1399 } 1400 err = mlx4_register_device(mdev); 1401err_set_port: 1402 mutex_unlock(&priv->port_mutex); 1403 mlx4_start_sense(mdev); 1404 return err ? err : count; 1405} 1406 1407static int mlx4_load_fw(struct mlx4_dev *dev) 1408{ 1409 struct mlx4_priv *priv = mlx4_priv(dev); 1410 int err, unmap_flag = 0; 1411 1412 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 1413 GFP_HIGHUSER | __GFP_NOWARN, 0); 1414 if (!priv->fw.fw_icm) { 1415 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 1416 return -ENOMEM; 1417 } 1418 1419 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 1420 if (err) { 1421 mlx4_err(dev, "MAP_FA command failed, aborting.\n"); 1422 goto err_free; 1423 } 1424 1425 err = mlx4_RUN_FW(dev); 1426 if (err) { 1427 mlx4_err(dev, "RUN_FW command failed, aborting.\n"); 1428 goto err_unmap_fa; 1429 } 1430 1431 return 0; 1432 1433err_unmap_fa: 1434 unmap_flag = mlx4_UNMAP_FA(dev); 1435 if (unmap_flag) 1436 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1437 1438err_free: 1439 if (!unmap_flag) 1440 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1441 return err; 1442} 1443 1444static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1445 int cmpt_entry_sz) 1446{ 1447 struct mlx4_priv *priv = mlx4_priv(dev); 1448 int err; 1449 int num_eqs; 1450 1451 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1452 cmpt_base + 1453 ((u64) (MLX4_CMPT_TYPE_QP * 1454 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1455 cmpt_entry_sz, dev->caps.num_qps, 1456 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1457 0, 0); 1458 if (err) 1459 goto err; 1460 1461 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1462 cmpt_base + 1463 ((u64) (MLX4_CMPT_TYPE_SRQ * 1464 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1465 cmpt_entry_sz, dev->caps.num_srqs, 1466 dev->caps.reserved_srqs, 0, 0); 1467 if (err) 1468 goto err_qp; 1469 1470 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1471 cmpt_base + 1472 ((u64) (MLX4_CMPT_TYPE_CQ * 1473 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1474 cmpt_entry_sz, dev->caps.num_cqs, 1475 dev->caps.reserved_cqs, 0, 0); 1476 if (err) 1477 goto err_srq; 1478 1479 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1480 dev->caps.num_eqs; 1481 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1482 cmpt_base + 1483 ((u64) (MLX4_CMPT_TYPE_EQ * 1484 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1485 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1486 if (err) 1487 goto err_cq; 1488 1489 return 0; 1490 1491err_cq: 1492 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1493 1494err_srq: 1495 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1496 1497err_qp: 1498 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1499 1500err: 1501 return err; 1502} 1503 1504static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1505 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1506{ 1507 struct mlx4_priv *priv = mlx4_priv(dev); 1508 u64 aux_pages; 1509 int num_eqs; 1510 int err, unmap_flag = 0; 1511 1512 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1513 if (err) { 1514 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); 1515 return err; 1516 } 1517 1518 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", 1519 (unsigned long long) icm_size >> 10, 1520 (unsigned long long) aux_pages << 2); 1521 1522 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1523 GFP_HIGHUSER | __GFP_NOWARN, 0); 1524 if (!priv->fw.aux_icm) { 1525 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 1526 return -ENOMEM; 1527 } 1528 1529 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1530 if (err) { 1531 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); 1532 goto err_free_aux; 1533 } 1534 1535 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1536 if (err) { 1537 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); 1538 goto err_unmap_aux; 1539 } 1540 1541 1542 num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : 1543 dev->caps.num_eqs; 1544 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1545 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1546 num_eqs, num_eqs, 0, 0); 1547 if (err) { 1548 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 1549 goto err_unmap_cmpt; 1550 } 1551 1552 /* 1553 * Reserved MTT entries must be aligned up to a cacheline 1554 * boundary, since the FW will write to them, while the driver 1555 * writes to all other MTT entries. (The variable 1556 * dev->caps.mtt_entry_sz below is really the MTT segment 1557 * size, not the raw entry size) 1558 */ 1559 dev->caps.reserved_mtts = 1560 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1561 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1562 1563 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1564 init_hca->mtt_base, 1565 dev->caps.mtt_entry_sz, 1566 dev->caps.num_mtts, 1567 dev->caps.reserved_mtts, 1, 0); 1568 if (err) { 1569 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 1570 goto err_unmap_eq; 1571 } 1572 1573 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1574 init_hca->dmpt_base, 1575 dev_cap->dmpt_entry_sz, 1576 dev->caps.num_mpts, 1577 dev->caps.reserved_mrws, 1, 1); 1578 if (err) { 1579 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 1580 goto err_unmap_mtt; 1581 } 1582 1583 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1584 init_hca->qpc_base, 1585 dev_cap->qpc_entry_sz, 1586 dev->caps.num_qps, 1587 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1588 0, 0); 1589 if (err) { 1590 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 1591 goto err_unmap_dmpt; 1592 } 1593 1594 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1595 init_hca->auxc_base, 1596 dev_cap->aux_entry_sz, 1597 dev->caps.num_qps, 1598 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1599 0, 0); 1600 if (err) { 1601 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 1602 goto err_unmap_qp; 1603 } 1604 1605 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1606 init_hca->altc_base, 1607 dev_cap->altc_entry_sz, 1608 dev->caps.num_qps, 1609 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1610 0, 0); 1611 if (err) { 1612 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 1613 goto err_unmap_auxc; 1614 } 1615 1616 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1617 init_hca->rdmarc_base, 1618 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1619 dev->caps.num_qps, 1620 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1621 0, 0); 1622 if (err) { 1623 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1624 goto err_unmap_altc; 1625 } 1626 1627 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1628 init_hca->cqc_base, 1629 dev_cap->cqc_entry_sz, 1630 dev->caps.num_cqs, 1631 dev->caps.reserved_cqs, 0, 0); 1632 if (err) { 1633 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 1634 goto err_unmap_rdmarc; 1635 } 1636 1637 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1638 init_hca->srqc_base, 1639 dev_cap->srq_entry_sz, 1640 dev->caps.num_srqs, 1641 dev->caps.reserved_srqs, 0, 0); 1642 if (err) { 1643 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 1644 goto err_unmap_cq; 1645 } 1646 1647 /* 1648 * For flow steering device managed mode it is required to use 1649 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1650 * required, but for simplicity just map the whole multicast 1651 * group table now. The table isn't very big and it's a lot 1652 * easier than trying to track ref counts. 1653 */ 1654 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1655 init_hca->mc_base, 1656 mlx4_get_mgm_entry_size(dev), 1657 dev->caps.num_mgms + dev->caps.num_amgms, 1658 dev->caps.num_mgms + dev->caps.num_amgms, 1659 0, 0); 1660 if (err) { 1661 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 1662 goto err_unmap_srq; 1663 } 1664 1665 return 0; 1666 1667err_unmap_srq: 1668 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1669 1670err_unmap_cq: 1671 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1672 1673err_unmap_rdmarc: 1674 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1675 1676err_unmap_altc: 1677 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1678 1679err_unmap_auxc: 1680 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1681 1682err_unmap_qp: 1683 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1684 1685err_unmap_dmpt: 1686 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1687 1688err_unmap_mtt: 1689 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1690 1691err_unmap_eq: 1692 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1693 1694err_unmap_cmpt: 1695 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1696 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1697 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1698 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1699 1700err_unmap_aux: 1701 unmap_flag = mlx4_UNMAP_ICM_AUX(dev); 1702 if (unmap_flag) 1703 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1704 1705err_free_aux: 1706 if (!unmap_flag) 1707 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1708 1709 return err; 1710} 1711 1712static void mlx4_free_icms(struct mlx4_dev *dev) 1713{ 1714 struct mlx4_priv *priv = mlx4_priv(dev); 1715 1716 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1717 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1718 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1719 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1720 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1721 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1722 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1723 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1724 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1725 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1726 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1727 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1728 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1729 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1730 1731 if (!mlx4_UNMAP_ICM_AUX(dev)) 1732 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1733 else 1734 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1735} 1736 1737static void mlx4_slave_exit(struct mlx4_dev *dev) 1738{ 1739 struct mlx4_priv *priv = mlx4_priv(dev); 1740 1741 mutex_lock(&priv->cmd.slave_cmd_mutex); 1742 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1743 mlx4_warn(dev, "Failed to close slave function.\n"); 1744 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1745} 1746 1747static int map_bf_area(struct mlx4_dev *dev) 1748{ 1749 struct mlx4_priv *priv = mlx4_priv(dev); 1750 resource_size_t bf_start; 1751 resource_size_t bf_len; 1752 int err = 0; 1753 1754 if (!dev->caps.bf_reg_size) 1755 return -ENXIO; 1756 1757 bf_start = pci_resource_start(dev->pdev, 2) + 1758 (dev->caps.num_uars << PAGE_SHIFT); 1759 bf_len = pci_resource_len(dev->pdev, 2) - 1760 (dev->caps.num_uars << PAGE_SHIFT); 1761 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1762 if (!priv->bf_mapping) 1763 err = -ENOMEM; 1764 1765 return err; 1766} 1767 1768static void unmap_bf_area(struct mlx4_dev *dev) 1769{ 1770 if (mlx4_priv(dev)->bf_mapping) 1771 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1772} 1773 1774int mlx4_read_clock(struct mlx4_dev *dev) 1775{ 1776 u32 clockhi, clocklo, clockhi1; 1777 cycle_t cycles; 1778 int i; 1779 struct mlx4_priv *priv = mlx4_priv(dev); 1780 1781 if (!priv->clock_mapping) 1782 return -ENOTSUPP; 1783 1784 for (i = 0; i < 10; i++) { 1785 clockhi = swab32(readl(priv->clock_mapping)); 1786 clocklo = swab32(readl(priv->clock_mapping + 4)); 1787 clockhi1 = swab32(readl(priv->clock_mapping)); 1788 if (clockhi == clockhi1) 1789 break; 1790 } 1791 1792 cycles = (u64) clockhi << 32 | (u64) clocklo; 1793 1794 return cycles; 1795} 1796EXPORT_SYMBOL_GPL(mlx4_read_clock); 1797 1798 1799static int map_internal_clock(struct mlx4_dev *dev) 1800{ 1801 struct mlx4_priv *priv = mlx4_priv(dev); 1802 1803 priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, 1804 priv->fw.clock_bar) + 1805 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1806 1807 if (!priv->clock_mapping) 1808 return -ENOMEM; 1809 1810 return 0; 1811} 1812 1813 1814int mlx4_get_internal_clock_params(struct mlx4_dev *dev, 1815 struct mlx4_clock_params *params) 1816{ 1817 struct mlx4_priv *priv = mlx4_priv(dev); 1818 1819 if (mlx4_is_slave(dev)) 1820 return -ENOTSUPP; 1821 if (!params) 1822 return -EINVAL; 1823 1824 params->bar = priv->fw.clock_bar; 1825 params->offset = priv->fw.clock_offset; 1826 params->size = MLX4_CLOCK_SIZE; 1827 1828 return 0; 1829} 1830EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); 1831 1832static void unmap_internal_clock(struct mlx4_dev *dev) 1833{ 1834 struct mlx4_priv *priv = mlx4_priv(dev); 1835 1836 if (priv->clock_mapping) 1837 iounmap(priv->clock_mapping); 1838} 1839 1840static void mlx4_close_hca(struct mlx4_dev *dev) 1841{ 1842 unmap_internal_clock(dev); 1843 unmap_bf_area(dev); 1844 if (mlx4_is_slave(dev)) { 1845 mlx4_slave_exit(dev); 1846 } else { 1847 mlx4_CLOSE_HCA(dev, 0); 1848 mlx4_free_icms(dev); 1849 1850 if (!mlx4_UNMAP_FA(dev)) 1851 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1852 else 1853 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1854 } 1855} 1856 1857static int mlx4_init_slave(struct mlx4_dev *dev) 1858{ 1859 struct mlx4_priv *priv = mlx4_priv(dev); 1860 u64 dma = (u64) priv->mfunc.vhcr_dma; 1861 int num_of_reset_retries = NUM_OF_RESET_RETRIES; 1862 int ret_from_reset = 0; 1863 u32 slave_read; 1864 u32 cmd_channel_ver; 1865 1866 mutex_lock(&priv->cmd.slave_cmd_mutex); 1867 priv->cmd.max_cmds = 1; 1868 mlx4_warn(dev, "Sending reset\n"); 1869 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1870 MLX4_COMM_TIME); 1871 /* if we are in the middle of flr the slave will try 1872 * NUM_OF_RESET_RETRIES times before leaving.*/ 1873 if (ret_from_reset) { 1874 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1875 msleep(SLEEP_TIME_IN_RESET); 1876 while (ret_from_reset && num_of_reset_retries) { 1877 mlx4_warn(dev, "slave is currently in the" 1878 "middle of FLR. retrying..." 1879 "(try num:%d)\n", 1880 (NUM_OF_RESET_RETRIES - 1881 num_of_reset_retries + 1)); 1882 ret_from_reset = 1883 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 1884 0, MLX4_COMM_TIME); 1885 num_of_reset_retries = num_of_reset_retries - 1; 1886 } 1887 } else 1888 goto err; 1889 } 1890 1891 /* check the driver version - the slave I/F revision 1892 * must match the master's */ 1893 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1894 cmd_channel_ver = mlx4_comm_get_version(); 1895 1896 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1897 MLX4_COMM_GET_IF_REV(slave_read)) { 1898 mlx4_err(dev, "slave driver version is not supported" 1899 " by the master\n"); 1900 goto err; 1901 } 1902 1903 mlx4_warn(dev, "Sending vhcr0\n"); 1904 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1905 MLX4_COMM_TIME)) 1906 goto err; 1907 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1908 MLX4_COMM_TIME)) 1909 goto err; 1910 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1911 MLX4_COMM_TIME)) 1912 goto err; 1913 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1914 goto err; 1915 1916 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1917 return 0; 1918 1919err: 1920 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1921 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1922 return -EIO; 1923} 1924 1925static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 1926{ 1927 int i; 1928 1929 for (i = 1; i <= dev->caps.num_ports; i++) { 1930 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) 1931 dev->caps.gid_table_len[i] = 1932 mlx4_get_slave_num_gids(dev, 0); 1933 else 1934 dev->caps.gid_table_len[i] = 1; 1935 dev->caps.pkey_table_len[i] = 1936 dev->phys_caps.pkey_phys_table_len[i] - 1; 1937 } 1938} 1939 1940static int choose_log_fs_mgm_entry_size(int qp_per_entry) 1941{ 1942 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 1943 1944 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 1945 i++) { 1946 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 1947 break; 1948 } 1949 1950 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 1951} 1952 1953static void choose_steering_mode(struct mlx4_dev *dev, 1954 struct mlx4_dev_cap *dev_cap) 1955{ 1956 int nvfs; 1957 1958 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); 1959 if (high_rate_steer && !mlx4_is_mfunc(dev)) { 1960 dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | 1961 MLX4_DEV_CAP_FLAG_VEP_UC_STEER); 1962 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; 1963 } 1964 1965 if (mlx4_log_num_mgm_entry_size == -1 && 1966 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 1967 (!mlx4_is_mfunc(dev) || 1968 (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && 1969 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 1970 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 1971 dev->oper_log_mgm_entry_size = 1972 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 1973 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 1974 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 1975 } else { 1976 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 1977 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1978 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 1979 else { 1980 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 1981 1982 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 1983 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 1984 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " 1985 "set to use B0 steering. Falling back to A0 steering mode.\n"); 1986 } 1987 dev->oper_log_mgm_entry_size = 1988 mlx4_log_num_mgm_entry_size > 0 ? 1989 mlx4_log_num_mgm_entry_size : 1990 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 1991 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 1992 } 1993 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " 1994 "log_num_mgm_entry_size = %d\n", 1995 mlx4_steering_mode_str(dev->caps.steering_mode), 1996 dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); 1997} 1998 1999static int mlx4_init_hca(struct mlx4_dev *dev) 2000{ 2001 struct mlx4_priv *priv = mlx4_priv(dev); 2002 struct mlx4_dev_cap *dev_cap = NULL; 2003 struct mlx4_adapter adapter; 2004 struct mlx4_mod_stat_cfg mlx4_cfg; 2005 struct mlx4_profile profile; 2006 struct mlx4_init_hca_param init_hca; 2007 u64 icm_size; 2008 int err; 2009 2010 if (!mlx4_is_slave(dev)) { 2011 err = mlx4_QUERY_FW(dev); 2012 if (err) { 2013 if (err == -EACCES) 2014 mlx4_info(dev, "non-primary physical function, skipping.\n"); 2015 else 2016 mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); 2017 return err; 2018 } 2019 2020 err = mlx4_load_fw(dev); 2021 if (err) { 2022 mlx4_err(dev, "Failed to start FW, aborting.\n"); 2023 return err; 2024 } 2025 2026 mlx4_cfg.log_pg_sz_m = 1; 2027 mlx4_cfg.log_pg_sz = 0; 2028 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 2029 if (err) 2030 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 2031 2032 dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); 2033 if (!dev_cap) { 2034 mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); 2035 err = -ENOMEM; 2036 goto err_stop_fw; 2037 } 2038 2039 err = mlx4_dev_cap(dev, dev_cap); 2040 if (err) { 2041 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 2042 goto err_stop_fw; 2043 } 2044 2045 choose_steering_mode(dev, dev_cap); 2046 2047 if (mlx4_is_master(dev)) 2048 mlx4_parav_master_pf_caps(dev); 2049 2050 process_mod_param_profile(&profile); 2051 if (dev->caps.steering_mode == 2052 MLX4_STEERING_MODE_DEVICE_MANAGED) 2053 profile.num_mcg = MLX4_FS_NUM_MCG; 2054 2055 icm_size = mlx4_make_profile(dev, &profile, dev_cap, 2056 &init_hca); 2057 if ((long long) icm_size < 0) { 2058 err = icm_size; 2059 goto err_stop_fw; 2060 } 2061 2062 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 2063 2064 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 2065 init_hca.uar_page_sz = PAGE_SHIFT - 12; 2066 2067 err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); 2068 if (err) 2069 goto err_stop_fw; 2070 2071 init_hca.mw_enable = 1; 2072 2073 err = mlx4_INIT_HCA(dev, &init_hca); 2074 if (err) { 2075 mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); 2076 goto err_free_icm; 2077 } 2078 2079 /* 2080 * Read HCA frequency by QUERY_HCA command 2081 */ 2082 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 2083 memset(&init_hca, 0, sizeof(init_hca)); 2084 err = mlx4_QUERY_HCA(dev, &init_hca); 2085 if (err) { 2086 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); 2087 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2088 } else { 2089 dev->caps.hca_core_clock = 2090 init_hca.hca_core_clock; 2091 } 2092 2093 /* In case we got HCA frequency 0 - disable timestamping 2094 * to avoid dividing by zero 2095 */ 2096 if (!dev->caps.hca_core_clock) { 2097 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2098 mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); 2099 } else if (map_internal_clock(dev)) { 2100 /* Map internal clock, 2101 * in case of failure disable timestamping 2102 */ 2103 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2104 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); 2105 } 2106 } 2107 } else { 2108 err = mlx4_init_slave(dev); 2109 if (err) { 2110 mlx4_err(dev, "Failed to initialize slave\n"); 2111 return err; 2112 } 2113 2114 err = mlx4_slave_cap(dev); 2115 if (err) { 2116 mlx4_err(dev, "Failed to obtain slave caps\n"); 2117 goto err_close; 2118 } 2119 } 2120 2121 if (map_bf_area(dev)) 2122 mlx4_dbg(dev, "Failed to map blue flame area\n"); 2123 2124 /* Only the master set the ports, all the rest got it from it.*/ 2125 if (!mlx4_is_slave(dev)) 2126 mlx4_set_port_mask(dev); 2127 2128 err = mlx4_QUERY_ADAPTER(dev, &adapter); 2129 if (err) { 2130 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); 2131 goto unmap_bf; 2132 } 2133 2134 priv->eq_table.inta_pin = adapter.inta_pin; 2135 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 2136 memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); 2137 dev->vsd_vendor_id = adapter.vsd_vendor_id; 2138 2139 if (!mlx4_is_slave(dev)) 2140 kfree(dev_cap); 2141 2142 return 0; 2143 2144unmap_bf: 2145 if (!mlx4_is_slave(dev)) 2146 unmap_internal_clock(dev); 2147 unmap_bf_area(dev); 2148 2149 if (mlx4_is_slave(dev)) { 2150 kfree(dev->caps.qp0_tunnel); 2151 kfree(dev->caps.qp0_proxy); 2152 kfree(dev->caps.qp1_tunnel); 2153 kfree(dev->caps.qp1_proxy); 2154 } 2155 2156err_close: 2157 if (mlx4_is_slave(dev)) 2158 mlx4_slave_exit(dev); 2159 else 2160 mlx4_CLOSE_HCA(dev, 0); 2161 2162err_free_icm: 2163 if (!mlx4_is_slave(dev)) 2164 mlx4_free_icms(dev); 2165 2166err_stop_fw: 2167 if (!mlx4_is_slave(dev)) { 2168 if (!mlx4_UNMAP_FA(dev)) 2169 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 2170 else 2171 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 2172 kfree(dev_cap); 2173 } 2174 return err; 2175} 2176 2177static int mlx4_init_counters_table(struct mlx4_dev *dev) 2178{ 2179 struct mlx4_priv *priv = mlx4_priv(dev); 2180 int nent_pow2, port_indx, vf_index, num_counters; 2181 int res, index = 0; 2182 struct counter_index *new_counter_index; 2183 2184 2185 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2186 return -ENOENT; 2187 2188 if (!mlx4_is_slave(dev) && 2189 dev->caps.max_counters == dev->caps.max_extended_counters) { 2190 res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, 2191 MLX4_CMD_SET_IF_STAT, 2192 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 2193 if (res) { 2194 mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); 2195 return res; 2196 } 2197 } 2198 2199 mutex_init(&priv->counters_table.mutex); 2200 2201 if (mlx4_is_slave(dev)) { 2202 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2203 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2204 if (dev->caps.def_counter_index[port_indx] != 0xFF) { 2205 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2206 if (!new_counter_index) 2207 return -ENOMEM; 2208 new_counter_index->index = dev->caps.def_counter_index[port_indx]; 2209 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); 2210 } 2211 } 2212 mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", 2213 __func__, dev->caps.num_ports, dev->caps.num_ports); 2214 return 0; 2215 } 2216 2217 nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); 2218 2219 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2220 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2221 /* allocating 2 counters per port for PFs */ 2222 /* For the PF, the ETH default counters are 0,2; */ 2223 /* and the RoCE default counters are 1,3 */ 2224 for (num_counters = 0; num_counters < 2; num_counters++, index++) { 2225 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2226 if (!new_counter_index) 2227 return -ENOMEM; 2228 new_counter_index->index = index; 2229 list_add_tail(&new_counter_index->list, 2230 &priv->counters_table.global_port_list[port_indx]); 2231 } 2232 } 2233 2234 if (mlx4_is_master(dev)) { 2235 for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { 2236 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2237 INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); 2238 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2239 if (!new_counter_index) 2240 return -ENOMEM; 2241 if (index < nent_pow2 - 2) { 2242 new_counter_index->index = index; 2243 index++; 2244 } else { 2245 new_counter_index->index = MLX4_SINK_COUNTER_INDEX; 2246 } 2247 2248 list_add_tail(&new_counter_index->list, 2249 &priv->counters_table.vf_list[vf_index][port_indx]); 2250 } 2251 } 2252 2253 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2254 nent_pow2, nent_pow2 - 1, 2255 index, 1); 2256 mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", 2257 __func__, index, dev->num_vfs); 2258 } else { 2259 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2260 nent_pow2, nent_pow2 - 1, 2261 index, 1); 2262 mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", 2263 __func__, index, dev->caps.num_ports); 2264 } 2265 2266 return 0; 2267 2268} 2269 2270static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 2271{ 2272 struct mlx4_priv *priv = mlx4_priv(dev); 2273 int i, j; 2274 struct counter_index *port, *tmp_port; 2275 struct counter_index *vf, *tmp_vf; 2276 2277 mutex_lock(&priv->counters_table.mutex); 2278 2279 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { 2280 for (i = 0; i < dev->caps.num_ports; i++) { 2281 list_for_each_entry_safe(port, tmp_port, 2282 &priv->counters_table.global_port_list[i], 2283 list) { 2284 list_del(&port->list); 2285 kfree(port); 2286 } 2287 } 2288 if (!mlx4_is_slave(dev)) { 2289 for (i = 0; i < dev->num_vfs; i++) { 2290 for (j = 0; j < dev->caps.num_ports; j++) { 2291 list_for_each_entry_safe(vf, tmp_vf, 2292 &priv->counters_table.vf_list[i][j], 2293 list) { 2294 /* clear the counter statistic */ 2295 if (__mlx4_clear_if_stat(dev, vf->index)) 2296 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2297 __func__, vf->index); 2298 list_del(&vf->list); 2299 kfree(vf); 2300 } 2301 } 2302 } 2303 mlx4_bitmap_cleanup(&priv->counters_table.bitmap); 2304 } 2305 } 2306 mutex_unlock(&priv->counters_table.mutex); 2307} 2308 2309int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) 2310{ 2311 struct mlx4_priv *priv = mlx4_priv(dev); 2312 int i, first; 2313 struct counter_index *vf, *tmp_vf; 2314 2315 /* clean VF's counters for the next useg */ 2316 if (slave > 0 && slave <= dev->num_vfs) { 2317 mlx4_dbg(dev, "%s: free counters of slave(%d)\n" 2318 , __func__, slave); 2319 2320 mutex_lock(&priv->counters_table.mutex); 2321 for (i = 0; i < dev->caps.num_ports; i++) { 2322 first = 0; 2323 list_for_each_entry_safe(vf, tmp_vf, 2324 &priv->counters_table.vf_list[slave - 1][i], 2325 list) { 2326 /* clear the counter statistic */ 2327 if (__mlx4_clear_if_stat(dev, vf->index)) 2328 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2329 __func__, vf->index); 2330 if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { 2331 mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" 2332 , __func__, vf->index, slave, i + 1); 2333 mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); 2334 list_del(&vf->list); 2335 kfree(vf); 2336 } else { 2337 mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" 2338 , __func__, vf->index, slave, i + 1); 2339 } 2340 } 2341 } 2342 mutex_unlock(&priv->counters_table.mutex); 2343 } 2344 2345 return 0; 2346} 2347 2348int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) 2349{ 2350 struct mlx4_priv *priv = mlx4_priv(dev); 2351 struct counter_index *new_counter_index; 2352 2353 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2354 return -ENOENT; 2355 2356 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2357 (port < 0) || (port > MLX4_MAX_PORTS)) { 2358 mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", 2359 __func__, slave, port); 2360 return -EINVAL; 2361 } 2362 2363 /* handle old guest request does not support request by port index */ 2364 if (port == 0) { 2365 *idx = MLX4_SINK_COUNTER_INDEX; 2366 mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" 2367 , __func__, *idx, slave, port); 2368 return 0; 2369 } 2370 2371 mutex_lock(&priv->counters_table.mutex); 2372 2373 *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); 2374 /* if no resources return the default counter of the slave and port */ 2375 if (*idx == -1) { 2376 if (slave == 0) { /* its the ethernet counter ?????? */ 2377 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2378 struct counter_index, 2379 list); 2380 } else { 2381 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2382 struct counter_index, 2383 list); 2384 } 2385 2386 *idx = new_counter_index->index; 2387 mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" 2388 , __func__, *idx, slave, port); 2389 goto out; 2390 } 2391 2392 if (slave == 0) { /* native or master */ 2393 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2394 if (!new_counter_index) 2395 goto no_mem; 2396 new_counter_index->index = *idx; 2397 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2398 } else { 2399 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2400 if (!new_counter_index) 2401 goto no_mem; 2402 new_counter_index->index = *idx; 2403 list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); 2404 } 2405 2406 mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" 2407 , __func__, *idx, slave, port); 2408out: 2409 mutex_unlock(&priv->counters_table.mutex); 2410 return 0; 2411 2412no_mem: 2413 mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); 2414 mutex_unlock(&priv->counters_table.mutex); 2415 *idx = MLX4_SINK_COUNTER_INDEX; 2416 mlx4_dbg(dev, "%s: failed err (%d)\n" 2417 , __func__, -ENOMEM); 2418 return -ENOMEM; 2419} 2420 2421int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) 2422{ 2423 u64 out_param; 2424 int err; 2425 struct mlx4_priv *priv = mlx4_priv(dev); 2426 struct counter_index *new_counter_index, *c_index; 2427 2428 if (mlx4_is_mfunc(dev)) { 2429 err = mlx4_cmd_imm(dev, 0, &out_param, 2430 ((u32) port) << 8 | (u32) RES_COUNTER, 2431 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 2432 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 2433 if (!err) { 2434 *idx = get_param_l(&out_param); 2435 if (*idx == MLX4_SINK_COUNTER_INDEX) 2436 return -ENOSPC; 2437 2438 mutex_lock(&priv->counters_table.mutex); 2439 c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2440 struct counter_index, 2441 list); 2442 mutex_unlock(&priv->counters_table.mutex); 2443 if (c_index->index == *idx) 2444 return -EEXIST; 2445 2446 if (mlx4_is_slave(dev)) { 2447 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2448 if (!new_counter_index) { 2449 mlx4_counter_free(dev, port, *idx); 2450 return -ENOMEM; 2451 } 2452 new_counter_index->index = *idx; 2453 mutex_lock(&priv->counters_table.mutex); 2454 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2455 mutex_unlock(&priv->counters_table.mutex); 2456 mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" 2457 , __func__, *idx, port); 2458 } 2459 } 2460 return err; 2461 } 2462 return __mlx4_counter_alloc(dev, 0, port, idx); 2463} 2464EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 2465 2466void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) 2467{ 2468 /* check if native or slave and deletes acordingly */ 2469 struct mlx4_priv *priv = mlx4_priv(dev); 2470 struct counter_index *pf, *tmp_pf; 2471 struct counter_index *vf, *tmp_vf; 2472 int first; 2473 2474 2475 if (idx == MLX4_SINK_COUNTER_INDEX) { 2476 mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" 2477 , __func__, idx, port); 2478 return; 2479 } 2480 2481 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2482 (port < 0) || (port > MLX4_MAX_PORTS)) { 2483 mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" 2484 , __func__, slave, idx); 2485 return; 2486 } 2487 2488 mutex_lock(&priv->counters_table.mutex); 2489 if (slave == 0) { 2490 first = 0; 2491 list_for_each_entry_safe(pf, tmp_pf, 2492 &priv->counters_table.global_port_list[port - 1], 2493 list) { 2494 /* the first 2 counters are reserved */ 2495 if (pf->index == idx) { 2496 /* clear the counter statistic */ 2497 if (__mlx4_clear_if_stat(dev, pf->index)) 2498 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2499 __func__, pf->index); 2500 if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { 2501 list_del(&pf->list); 2502 kfree(pf); 2503 mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" 2504 , __func__, idx, slave, port); 2505 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2506 goto out; 2507 } else { 2508 mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" 2509 , __func__, idx, slave, port); 2510 goto out; 2511 } 2512 } 2513 first++; 2514 } 2515 mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" 2516 , __func__, idx, slave, port); 2517 } else { 2518 first = 0; 2519 list_for_each_entry_safe(vf, tmp_vf, 2520 &priv->counters_table.vf_list[slave - 1][port - 1], 2521 list) { 2522 /* the first element is reserved */ 2523 if (vf->index == idx) { 2524 /* clear the counter statistic */ 2525 if (__mlx4_clear_if_stat(dev, vf->index)) 2526 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2527 __func__, vf->index); 2528 if (first) { 2529 list_del(&vf->list); 2530 kfree(vf); 2531 mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", 2532 __func__, idx, slave, port); 2533 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2534 goto out; 2535 } else { 2536 mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" 2537 , __func__, slave, idx, port); 2538 goto out; 2539 } 2540 } 2541 first++; 2542 } 2543 mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" 2544 , __func__, slave, idx, port); 2545 } 2546 2547out: 2548 mutex_unlock(&priv->counters_table.mutex); 2549} 2550 2551void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) 2552{ 2553 u64 in_param = 0; 2554 struct mlx4_priv *priv = mlx4_priv(dev); 2555 struct counter_index *counter, *tmp_counter; 2556 int first = 0; 2557 2558 if (mlx4_is_mfunc(dev)) { 2559 set_param_l(&in_param, idx); 2560 mlx4_cmd(dev, in_param, 2561 ((u32) port) << 8 | (u32) RES_COUNTER, 2562 RES_OP_RESERVE, 2563 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 2564 MLX4_CMD_WRAPPED); 2565 2566 if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { 2567 mutex_lock(&priv->counters_table.mutex); 2568 list_for_each_entry_safe(counter, tmp_counter, 2569 &priv->counters_table.global_port_list[port - 1], 2570 list) { 2571 if (counter->index == idx && first++) { 2572 list_del(&counter->list); 2573 kfree(counter); 2574 mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" 2575 , __func__, idx, port); 2576 mutex_unlock(&priv->counters_table.mutex); 2577 return; 2578 } 2579 } 2580 mutex_unlock(&priv->counters_table.mutex); 2581 } 2582 2583 return; 2584 } 2585 __mlx4_counter_free(dev, 0, port, idx); 2586} 2587EXPORT_SYMBOL_GPL(mlx4_counter_free); 2588 2589int __mlx4_clear_if_stat(struct mlx4_dev *dev, 2590 u8 counter_index) 2591{ 2592 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2593 int err = 0; 2594 u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); 2595 2596 if (counter_index == MLX4_SINK_COUNTER_INDEX) 2597 return -EINVAL; 2598 2599 if (mlx4_is_slave(dev)) 2600 return 0; 2601 2602 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2603 if (IS_ERR(if_stat_mailbox)) { 2604 err = PTR_ERR(if_stat_mailbox); 2605 return err; 2606 } 2607 2608 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, 2609 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 2610 MLX4_CMD_NATIVE); 2611 2612 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2613 return err; 2614} 2615 2616u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) 2617{ 2618 struct mlx4_priv *priv = mlx4_priv(dev); 2619 struct counter_index *new_counter_index; 2620 2621 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { 2622 mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", 2623 __func__, MLX4_SINK_COUNTER_INDEX, slave, port); 2624 return (u8)MLX4_SINK_COUNTER_INDEX; 2625 } 2626 2627 mutex_lock(&priv->counters_table.mutex); 2628 if (slave == 0) { 2629 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2630 struct counter_index, 2631 list); 2632 } else { 2633 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2634 struct counter_index, 2635 list); 2636 } 2637 mutex_unlock(&priv->counters_table.mutex); 2638 2639 mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", 2640 __func__, new_counter_index->index, slave, port); 2641 2642 2643 return (u8)new_counter_index->index; 2644} 2645 2646int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, 2647 struct mlx4_en_vport_stats *vport_stats, 2648 int reset) 2649{ 2650 struct mlx4_priv *priv = mlx4_priv(dev); 2651 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2652 union mlx4_counter *counter; 2653 int err = 0; 2654 u32 if_stat_in_mod; 2655 struct counter_index *vport, *tmp_vport; 2656 2657 if (!vport_stats) 2658 return -EINVAL; 2659 2660 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2661 if (IS_ERR(if_stat_mailbox)) { 2662 err = PTR_ERR(if_stat_mailbox); 2663 return err; 2664 } 2665 2666 mutex_lock(&priv->counters_table.mutex); 2667 list_for_each_entry_safe(vport, tmp_vport, 2668 &priv->counters_table.global_port_list[port - 1], 2669 list) { 2670 if (vport->index == MLX4_SINK_COUNTER_INDEX) 2671 continue; 2672 2673 memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); 2674 if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); 2675 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, 2676 if_stat_in_mod, 0, 2677 MLX4_CMD_QUERY_IF_STAT, 2678 MLX4_CMD_TIME_CLASS_C, 2679 MLX4_CMD_NATIVE); 2680 if (err) { 2681 mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", 2682 __func__, vport->index); 2683 goto if_stat_out; 2684 } 2685 counter = (union mlx4_counter *)if_stat_mailbox->buf; 2686 if ((counter->control.cnt_mode & 0xf) == 1) { 2687 vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); 2688 vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); 2689 vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); 2690 vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); 2691 vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); 2692 vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); 2693 vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); 2694 vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); 2695 vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); 2696 vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); 2697 vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); 2698 vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); 2699 vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); 2700 vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); 2701 vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); 2702 } 2703 } 2704 2705if_stat_out: 2706 mutex_unlock(&priv->counters_table.mutex); 2707 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2708 2709 return err; 2710} 2711EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); 2712 2713static int mlx4_setup_hca(struct mlx4_dev *dev) 2714{ 2715 struct mlx4_priv *priv = mlx4_priv(dev); 2716 int err; 2717 int port; 2718 __be32 ib_port_default_caps; 2719 2720 err = mlx4_init_uar_table(dev); 2721 if (err) { 2722 mlx4_err(dev, "Failed to initialize " 2723 "user access region table (err=%d), aborting.\n", 2724 err); 2725 return err; 2726 } 2727 2728 err = mlx4_uar_alloc(dev, &priv->driver_uar); 2729 if (err) { 2730 mlx4_err(dev, "Failed to allocate driver access region " 2731 "(err=%d), aborting.\n", err); 2732 goto err_uar_table_free; 2733 } 2734 2735 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 2736 if (!priv->kar) { 2737 mlx4_err(dev, "Couldn't map kernel access region, " 2738 "aborting.\n"); 2739 err = -ENOMEM; 2740 goto err_uar_free; 2741 } 2742 2743 err = mlx4_init_pd_table(dev); 2744 if (err) { 2745 mlx4_err(dev, "Failed to initialize " 2746 "protection domain table (err=%d), aborting.\n", err); 2747 goto err_kar_unmap; 2748 } 2749 2750 err = mlx4_init_xrcd_table(dev); 2751 if (err) { 2752 mlx4_err(dev, "Failed to initialize " 2753 "reliable connection domain table (err=%d), " 2754 "aborting.\n", err); 2755 goto err_pd_table_free; 2756 } 2757 2758 err = mlx4_init_mr_table(dev); 2759 if (err) { 2760 mlx4_err(dev, "Failed to initialize " 2761 "memory region table (err=%d), aborting.\n", err); 2762 goto err_xrcd_table_free; 2763 } 2764 2765 if (!mlx4_is_slave(dev)) { 2766 err = mlx4_init_mcg_table(dev); 2767 if (err) { 2768 mlx4_err(dev, "Failed to initialize " 2769 "multicast group table (err=%d), aborting.\n", 2770 err); 2771 goto err_mr_table_free; 2772 } 2773 } 2774 2775 err = mlx4_init_eq_table(dev); 2776 if (err) { 2777 mlx4_err(dev, "Failed to initialize " 2778 "event queue table (err=%d), aborting.\n", err); 2779 goto err_mcg_table_free; 2780 } 2781 2782 err = mlx4_cmd_use_events(dev); 2783 if (err) { 2784 mlx4_err(dev, "Failed to switch to event-driven " 2785 "firmware commands (err=%d), aborting.\n", err); 2786 goto err_eq_table_free; 2787 } 2788 2789 err = mlx4_NOP(dev); 2790 if (err) { 2791 if (dev->flags & MLX4_FLAG_MSI_X) { 2792 mlx4_warn(dev, "NOP command failed to generate MSI-X " 2793 "interrupt IRQ %d).\n", 2794 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2795 mlx4_warn(dev, "Trying again without MSI-X.\n"); 2796 } else { 2797 mlx4_err(dev, "NOP command failed to generate interrupt " 2798 "(IRQ %d), aborting.\n", 2799 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2800 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 2801 } 2802 2803 goto err_cmd_poll; 2804 } 2805 2806 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 2807 2808 err = mlx4_init_cq_table(dev); 2809 if (err) { 2810 mlx4_err(dev, "Failed to initialize " 2811 "completion queue table (err=%d), aborting.\n", err); 2812 goto err_cmd_poll; 2813 } 2814 2815 err = mlx4_init_srq_table(dev); 2816 if (err) { 2817 mlx4_err(dev, "Failed to initialize " 2818 "shared receive queue table (err=%d), aborting.\n", 2819 err); 2820 goto err_cq_table_free; 2821 } 2822 2823 err = mlx4_init_qp_table(dev); 2824 if (err) { 2825 mlx4_err(dev, "Failed to initialize " 2826 "queue pair table (err=%d), aborting.\n", err); 2827 goto err_srq_table_free; 2828 } 2829 2830 err = mlx4_init_counters_table(dev); 2831 if (err && err != -ENOENT) { 2832 mlx4_err(dev, "Failed to initialize counters table (err=%d), " 2833 "aborting.\n", err); 2834 goto err_qp_table_free; 2835 } 2836 2837 if (!mlx4_is_slave(dev)) { 2838 for (port = 1; port <= dev->caps.num_ports; port++) { 2839 ib_port_default_caps = 0; 2840 err = mlx4_get_port_ib_caps(dev, port, 2841 &ib_port_default_caps); 2842 if (err) 2843 mlx4_warn(dev, "failed to get port %d default " 2844 "ib capabilities (%d). Continuing " 2845 "with caps = 0\n", port, err); 2846 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 2847 2848 /* initialize per-slave default ib port capabilities */ 2849 if (mlx4_is_master(dev)) { 2850 int i; 2851 for (i = 0; i < dev->num_slaves; i++) { 2852 if (i == mlx4_master_func_num(dev)) 2853 continue; 2854 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 2855 ib_port_default_caps; 2856 } 2857 } 2858 2859 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 2860 2861 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 2862 dev->caps.pkey_table_len[port] : -1); 2863 if (err) { 2864 mlx4_err(dev, "Failed to set port %d (err=%d), " 2865 "aborting\n", port, err); 2866 goto err_counters_table_free; 2867 } 2868 } 2869 } 2870 2871 return 0; 2872 2873err_counters_table_free: 2874 mlx4_cleanup_counters_table(dev); 2875 2876err_qp_table_free: 2877 mlx4_cleanup_qp_table(dev); 2878 2879err_srq_table_free: 2880 mlx4_cleanup_srq_table(dev); 2881 2882err_cq_table_free: 2883 mlx4_cleanup_cq_table(dev); 2884 2885err_cmd_poll: 2886 mlx4_cmd_use_polling(dev); 2887 2888err_eq_table_free: 2889 mlx4_cleanup_eq_table(dev); 2890 2891err_mcg_table_free: 2892 if (!mlx4_is_slave(dev)) 2893 mlx4_cleanup_mcg_table(dev); 2894 2895err_mr_table_free: 2896 mlx4_cleanup_mr_table(dev); 2897 2898err_xrcd_table_free: 2899 mlx4_cleanup_xrcd_table(dev); 2900 2901err_pd_table_free: 2902 mlx4_cleanup_pd_table(dev); 2903 2904err_kar_unmap: 2905 iounmap(priv->kar); 2906 2907err_uar_free: 2908 mlx4_uar_free(dev, &priv->driver_uar); 2909 2910err_uar_table_free: 2911 mlx4_cleanup_uar_table(dev); 2912 return err; 2913} 2914 2915static void mlx4_enable_msi_x(struct mlx4_dev *dev) 2916{ 2917 struct mlx4_priv *priv = mlx4_priv(dev); 2918 struct msix_entry *entries; 2919 int nreq = min_t(int, dev->caps.num_ports * 2920 min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) 2921 + MSIX_LEGACY_SZ, MAX_MSIX); 2922 int err; 2923 int i; 2924 2925 if (msi_x) { 2926 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2927 nreq); 2928 2929 if (msi_x > 1 && !mlx4_is_mfunc(dev)) 2930 nreq = min_t(int, nreq, msi_x); 2931 2932 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 2933 if (!entries) 2934 goto no_msi; 2935 2936 for (i = 0; i < nreq; ++i) 2937 entries[i].entry = i; 2938 2939 retry: 2940 err = pci_enable_msix(dev->pdev, entries, nreq); 2941 if (err) { 2942 /* Try again if at least 2 vectors are available */ 2943 if (err > 1) { 2944 mlx4_info(dev, "Requested %d vectors, " 2945 "but only %d MSI-X vectors available, " 2946 "trying again\n", nreq, err); 2947 nreq = err; 2948 goto retry; 2949 } 2950 kfree(entries); 2951 /* if error, or can't alloc even 1 IRQ */ 2952 if (err < 0) { 2953 mlx4_err(dev, "No IRQs left, device can't " 2954 "be started.\n"); 2955 goto no_irq; 2956 } 2957 goto no_msi; 2958 } 2959 2960 if (nreq < 2961 MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { 2962 /*Working in legacy mode , all EQ's shared*/ 2963 dev->caps.comp_pool = 0; 2964 dev->caps.num_comp_vectors = nreq - 1; 2965 } else { 2966 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; 2967 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; 2968 } 2969 for (i = 0; i < nreq; ++i) 2970 priv->eq_table.eq[i].irq = entries[i].vector; 2971 2972 dev->flags |= MLX4_FLAG_MSI_X; 2973 2974 kfree(entries); 2975 return; 2976 } 2977 2978no_msi: 2979 dev->caps.num_comp_vectors = 1; 2980 dev->caps.comp_pool = 0; 2981 2982 for (i = 0; i < 2; ++i) 2983 priv->eq_table.eq[i].irq = dev->pdev->irq; 2984 return; 2985no_irq: 2986 dev->caps.num_comp_vectors = 0; 2987 dev->caps.comp_pool = 0; 2988 return; 2989} 2990 2991static void 2992mlx4_init_hca_info(struct mlx4_dev *dev) 2993{ 2994 struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info; 2995 2996 info->dev = dev; 2997 2998 info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO, 2999 show_firmware_version, NULL); 3000 if (device_create_file(&dev->pdev->dev, &info->firmware_attr)) 3001 mlx4_err(dev, "Failed to add file firmware version"); 3002 3003 info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca, 3004 NULL); 3005 if (device_create_file(&dev->pdev->dev, &info->hca_attr)) 3006 mlx4_err(dev, "Failed to add file hca type"); 3007 3008 info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO, 3009 show_board, NULL); 3010 if (device_create_file(&dev->pdev->dev, &info->board_attr)) 3011 mlx4_err(dev, "Failed to add file board id type"); 3012} 3013 3014static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 3015{ 3016 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 3017 int err = 0; 3018 3019 info->dev = dev; 3020 info->port = port; 3021 if (!mlx4_is_slave(dev)) { 3022 mlx4_init_mac_table(dev, &info->mac_table); 3023 mlx4_init_vlan_table(dev, &info->vlan_table); 3024 info->base_qpn = mlx4_get_base_qpn(dev, port); 3025 } 3026 3027 sprintf(info->dev_name, "mlx4_port%d", port); 3028 info->port_attr.attr.name = info->dev_name; 3029 if (mlx4_is_mfunc(dev)) 3030 info->port_attr.attr.mode = S_IRUGO; 3031 else { 3032 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 3033 info->port_attr.store = set_port_type; 3034 } 3035 info->port_attr.show = show_port_type; 3036 sysfs_attr_init(&info->port_attr.attr); 3037 3038 err = device_create_file(&dev->pdev->dev, &info->port_attr); 3039 if (err) { 3040 mlx4_err(dev, "Failed to create file for port %d\n", port); 3041 info->port = -1; 3042 } 3043 3044 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 3045 info->port_mtu_attr.attr.name = info->dev_mtu_name; 3046 if (mlx4_is_mfunc(dev)) 3047 info->port_mtu_attr.attr.mode = S_IRUGO; 3048 else { 3049 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 3050 info->port_mtu_attr.store = set_port_ib_mtu; 3051 } 3052 info->port_mtu_attr.show = show_port_ib_mtu; 3053 sysfs_attr_init(&info->port_mtu_attr.attr); 3054 3055 err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); 3056 if (err) { 3057 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 3058 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3059 info->port = -1; 3060 } 3061 3062 return err; 3063} 3064 3065static void 3066mlx4_cleanup_hca_info(struct mlx4_hca_info *info) 3067{ 3068 device_remove_file(&info->dev->pdev->dev, &info->firmware_attr); 3069 device_remove_file(&info->dev->pdev->dev, &info->board_attr); 3070 device_remove_file(&info->dev->pdev->dev, &info->hca_attr); 3071} 3072 3073static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 3074{ 3075 if (info->port < 0) 3076 return; 3077 3078 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3079 device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); 3080} 3081 3082static int mlx4_init_steering(struct mlx4_dev *dev) 3083{ 3084 struct mlx4_priv *priv = mlx4_priv(dev); 3085 int num_entries = dev->caps.num_ports; 3086 int i, j; 3087 3088 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 3089 if (!priv->steer) 3090 return -ENOMEM; 3091 3092 for (i = 0; i < num_entries; i++) 3093 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3094 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 3095 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 3096 } 3097 return 0; 3098} 3099 3100static void mlx4_clear_steering(struct mlx4_dev *dev) 3101{ 3102 struct mlx4_priv *priv = mlx4_priv(dev); 3103 struct mlx4_steer_index *entry, *tmp_entry; 3104 struct mlx4_promisc_qp *pqp, *tmp_pqp; 3105 int num_entries = dev->caps.num_ports; 3106 int i, j; 3107 3108 for (i = 0; i < num_entries; i++) { 3109 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3110 list_for_each_entry_safe(pqp, tmp_pqp, 3111 &priv->steer[i].promisc_qps[j], 3112 list) { 3113 list_del(&pqp->list); 3114 kfree(pqp); 3115 } 3116 list_for_each_entry_safe(entry, tmp_entry, 3117 &priv->steer[i].steer_entries[j], 3118 list) { 3119 list_del(&entry->list); 3120 list_for_each_entry_safe(pqp, tmp_pqp, 3121 &entry->duplicates, 3122 list) { 3123 list_del(&pqp->list); 3124 kfree(pqp); 3125 } 3126 kfree(entry); 3127 } 3128 } 3129 } 3130 kfree(priv->steer); 3131} 3132 3133static int extended_func_num(struct pci_dev *pdev) 3134{ 3135 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 3136} 3137 3138#define MLX4_OWNER_BASE 0x8069c 3139#define MLX4_OWNER_SIZE 4 3140 3141static int mlx4_get_ownership(struct mlx4_dev *dev) 3142{ 3143 void __iomem *owner; 3144 u32 ret; 3145 3146 if (pci_channel_offline(dev->pdev)) 3147 return -EIO; 3148 3149 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3150 MLX4_OWNER_SIZE); 3151 if (!owner) { 3152 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3153 return -ENOMEM; 3154 } 3155 3156 ret = readl(owner); 3157 iounmap(owner); 3158 return (int) !!ret; 3159} 3160 3161static void mlx4_free_ownership(struct mlx4_dev *dev) 3162{ 3163 void __iomem *owner; 3164 3165 if (pci_channel_offline(dev->pdev)) 3166 return; 3167 3168 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3169 MLX4_OWNER_SIZE); 3170 if (!owner) { 3171 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3172 return; 3173 } 3174 writel(0, owner); 3175 msleep(1000); 3176 iounmap(owner); 3177} 3178 3179static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) 3180{ 3181 struct mlx4_priv *priv; 3182 struct mlx4_dev *dev; 3183 int err; 3184 int port; 3185 int nvfs, prb_vf; 3186 3187 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 3188 3189 err = pci_enable_device(pdev); 3190 if (err) { 3191 dev_err(&pdev->dev, "Cannot enable PCI device, " 3192 "aborting.\n"); 3193 return err; 3194 } 3195 3196 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); 3197 mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); 3198 if (nvfs > MLX4_MAX_NUM_VF) { 3199 dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", 3200 nvfs, MLX4_MAX_NUM_VF); 3201 return -EINVAL; 3202 } 3203 3204 if (nvfs < 0) { 3205 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); 3206 return -EINVAL; 3207 } 3208 /* 3209 * Check for BARs. 3210 */ 3211 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 3212 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 3213 dev_err(&pdev->dev, "Missing DCS, aborting." 3214 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", 3215 pci_dev_data, pci_resource_flags(pdev, 0)); 3216 err = -ENODEV; 3217 goto err_disable_pdev; 3218 } 3219 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 3220 dev_err(&pdev->dev, "Missing UAR, aborting.\n"); 3221 err = -ENODEV; 3222 goto err_disable_pdev; 3223 } 3224 3225 err = pci_request_regions(pdev, DRV_NAME); 3226 if (err) { 3227 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 3228 goto err_disable_pdev; 3229 } 3230 3231 pci_set_master(pdev); 3232 3233 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3234 if (err) { 3235 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); 3236 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3237 if (err) { 3238 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 3239 goto err_release_regions; 3240 } 3241 } 3242 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3243 if (err) { 3244 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " 3245 "consistent PCI DMA mask.\n"); 3246 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 3247 if (err) { 3248 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 3249 "aborting.\n"); 3250 goto err_release_regions; 3251 } 3252 } 3253 3254 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 3255 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 3256 3257 priv = kzalloc(sizeof *priv, GFP_KERNEL); 3258 if (!priv) { 3259 dev_err(&pdev->dev, "Device struct alloc failed, " 3260 "aborting.\n"); 3261 err = -ENOMEM; 3262 goto err_release_regions; 3263 } 3264 3265 dev = &priv->dev; 3266 dev->pdev = pdev; 3267 INIT_LIST_HEAD(&priv->dev_list); 3268 INIT_LIST_HEAD(&priv->ctx_list); 3269 spin_lock_init(&priv->ctx_lock); 3270 3271 mutex_init(&priv->port_mutex); 3272 3273 INIT_LIST_HEAD(&priv->pgdir_list); 3274 mutex_init(&priv->pgdir_mutex); 3275 3276 INIT_LIST_HEAD(&priv->bf_list); 3277 mutex_init(&priv->bf_mutex); 3278 3279 dev->rev_id = pdev->revision; 3280 dev->numa_node = dev_to_node(&pdev->dev); 3281 /* Detect if this device is a virtual function */ 3282 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3283 /* When acting as pf, we normally skip vfs unless explicitly 3284 * requested to probe them. */ 3285 if (nvfs && extended_func_num(pdev) > prb_vf) { 3286 mlx4_warn(dev, "Skipping virtual function:%d\n", 3287 extended_func_num(pdev)); 3288 err = -ENODEV; 3289 goto err_free_dev; 3290 } 3291 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 3292 dev->flags |= MLX4_FLAG_SLAVE; 3293 } else { 3294 /* We reset the device and enable SRIOV only for physical 3295 * devices. Try to claim ownership on the device; 3296 * if already taken, skip -- do not allow multiple PFs */ 3297 err = mlx4_get_ownership(dev); 3298 if (err) { 3299 if (err < 0) 3300 goto err_free_dev; 3301 else { 3302 mlx4_warn(dev, "Multiple PFs not yet supported." 3303 " Skipping PF.\n"); 3304 err = -EINVAL; 3305 goto err_free_dev; 3306 } 3307 } 3308 3309 if (nvfs) { 3310 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); 3311 err = pci_enable_sriov(pdev, nvfs); 3312 if (err) { 3313 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", 3314 err); 3315 err = 0; 3316 } else { 3317 mlx4_warn(dev, "Running in master mode\n"); 3318 dev->flags |= MLX4_FLAG_SRIOV | 3319 MLX4_FLAG_MASTER; 3320 dev->num_vfs = nvfs; 3321 } 3322 } 3323 3324 atomic_set(&priv->opreq_count, 0); 3325 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 3326 3327 /* 3328 * Now reset the HCA before we touch the PCI capabilities or 3329 * attempt a firmware command, since a boot ROM may have left 3330 * the HCA in an undefined state. 3331 */ 3332 err = mlx4_reset(dev); 3333 if (err) { 3334 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 3335 goto err_sriov; 3336 } 3337 } 3338 3339slave_start: 3340 err = mlx4_cmd_init(dev); 3341 if (err) { 3342 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 3343 goto err_sriov; 3344 } 3345 3346 /* In slave functions, the communication channel must be initialized 3347 * before posting commands. Also, init num_slaves before calling 3348 * mlx4_init_hca */ 3349 if (mlx4_is_mfunc(dev)) { 3350 if (mlx4_is_master(dev)) 3351 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 3352 else { 3353 dev->num_slaves = 0; 3354 err = mlx4_multi_func_init(dev); 3355 if (err) { 3356 mlx4_err(dev, "Failed to init slave mfunc" 3357 " interface, aborting.\n"); 3358 goto err_cmd; 3359 } 3360 } 3361 } 3362 3363 err = mlx4_init_hca(dev); 3364 if (err) { 3365 if (err == -EACCES) { 3366 /* Not primary Physical function 3367 * Running in slave mode */ 3368 mlx4_cmd_cleanup(dev); 3369 dev->flags |= MLX4_FLAG_SLAVE; 3370 dev->flags &= ~MLX4_FLAG_MASTER; 3371 goto slave_start; 3372 } else 3373 goto err_mfunc; 3374 } 3375 3376 /* In master functions, the communication channel must be initialized 3377 * after obtaining its address from fw */ 3378 if (mlx4_is_master(dev)) { 3379 err = mlx4_multi_func_init(dev); 3380 if (err) { 3381 mlx4_err(dev, "Failed to init master mfunc" 3382 "interface, aborting.\n"); 3383 goto err_close; 3384 } 3385 } 3386 3387 err = mlx4_alloc_eq_table(dev); 3388 if (err) 3389 goto err_master_mfunc; 3390 3391 priv->msix_ctl.pool_bm = 0; 3392 mutex_init(&priv->msix_ctl.pool_lock); 3393 3394 mlx4_enable_msi_x(dev); 3395 3396 /* no MSIX and no shared IRQ */ 3397 if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { 3398 err = -ENOSPC; 3399 goto err_free_eq; 3400 } 3401 3402 if ((mlx4_is_mfunc(dev)) && 3403 !(dev->flags & MLX4_FLAG_MSI_X)) { 3404 err = -ENOSYS; 3405 mlx4_err(dev, "INTx is not supported in multi-function mode." 3406 " aborting.\n"); 3407 goto err_free_eq; 3408 } 3409 3410 if (!mlx4_is_slave(dev)) { 3411 err = mlx4_init_steering(dev); 3412 if (err) 3413 goto err_free_eq; 3414 } 3415 3416 err = mlx4_setup_hca(dev); 3417 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 3418 !mlx4_is_mfunc(dev)) { 3419 dev->flags &= ~MLX4_FLAG_MSI_X; 3420 dev->caps.num_comp_vectors = 1; 3421 dev->caps.comp_pool = 0; 3422 pci_disable_msix(pdev); 3423 err = mlx4_setup_hca(dev); 3424 } 3425 3426 if (err) 3427 goto err_steer; 3428 3429 mlx4_init_quotas(dev); 3430 mlx4_init_hca_info(dev); 3431 3432 for (port = 1; port <= dev->caps.num_ports; port++) { 3433 err = mlx4_init_port_info(dev, port); 3434 if (err) 3435 goto err_port; 3436 } 3437 3438 err = mlx4_register_device(dev); 3439 if (err) 3440 goto err_port; 3441 3442 mlx4_request_modules(dev); 3443 3444 mlx4_sense_init(dev); 3445 mlx4_start_sense(dev); 3446 3447 priv->pci_dev_data = pci_dev_data; 3448 pci_set_drvdata(pdev, dev); 3449 3450 return 0; 3451 3452err_port: 3453 for (--port; port >= 1; --port) 3454 mlx4_cleanup_port_info(&priv->port[port]); 3455 3456 mlx4_cleanup_counters_table(dev); 3457 mlx4_cleanup_qp_table(dev); 3458 mlx4_cleanup_srq_table(dev); 3459 mlx4_cleanup_cq_table(dev); 3460 mlx4_cmd_use_polling(dev); 3461 mlx4_cleanup_eq_table(dev); 3462 mlx4_cleanup_mcg_table(dev); 3463 mlx4_cleanup_mr_table(dev); 3464 mlx4_cleanup_xrcd_table(dev); 3465 mlx4_cleanup_pd_table(dev); 3466 mlx4_cleanup_uar_table(dev); 3467 3468err_steer: 3469 if (!mlx4_is_slave(dev)) 3470 mlx4_clear_steering(dev); 3471 3472err_free_eq: 3473 mlx4_free_eq_table(dev); 3474 3475err_master_mfunc: 3476 if (mlx4_is_master(dev)) { 3477 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); 3478 mlx4_multi_func_cleanup(dev); 3479 } 3480 3481 if (mlx4_is_slave(dev)) { 3482 kfree(dev->caps.qp0_tunnel); 3483 kfree(dev->caps.qp0_proxy); 3484 kfree(dev->caps.qp1_tunnel); 3485 kfree(dev->caps.qp1_proxy); 3486 } 3487 3488err_close: 3489 if (dev->flags & MLX4_FLAG_MSI_X) 3490 pci_disable_msix(pdev); 3491 3492 mlx4_close_hca(dev); 3493 3494err_mfunc: 3495 if (mlx4_is_slave(dev)) 3496 mlx4_multi_func_cleanup(dev); 3497 3498err_cmd: 3499 mlx4_cmd_cleanup(dev); 3500 3501err_sriov: 3502 if (dev->flags & MLX4_FLAG_SRIOV) 3503 pci_disable_sriov(pdev); 3504 3505 if (!mlx4_is_slave(dev)) 3506 mlx4_free_ownership(dev); 3507 3508err_free_dev: 3509 kfree(priv); 3510 3511err_release_regions: 3512 pci_release_regions(pdev); 3513 3514err_disable_pdev: 3515 pci_disable_device(pdev); 3516 pci_set_drvdata(pdev, NULL); 3517 return err; 3518} 3519 3520static int __devinit mlx4_init_one(struct pci_dev *pdev, 3521 const struct pci_device_id *id) 3522{ 3523 device_set_desc(pdev->dev.bsddev, mlx4_version); 3524 return __mlx4_init_one(pdev, id->driver_data); 3525} 3526 3527static void mlx4_remove_one(struct pci_dev *pdev) 3528{ 3529 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3530 struct mlx4_priv *priv = mlx4_priv(dev); 3531 int p; 3532 3533 if (dev) { 3534 /* in SRIOV it is not allowed to unload the pf's 3535 * driver while there are alive vf's */ 3536 if (mlx4_is_master(dev)) { 3537 if (mlx4_how_many_lives_vf(dev)) 3538 mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); 3539 } 3540 mlx4_stop_sense(dev); 3541 mlx4_unregister_device(dev); 3542 3543 mlx4_cleanup_hca_info(&priv->hca_info); 3544 for (p = 1; p <= dev->caps.num_ports; p++) { 3545 mlx4_cleanup_port_info(&priv->port[p]); 3546 mlx4_CLOSE_PORT(dev, p); 3547 } 3548 3549 if (mlx4_is_master(dev)) 3550 mlx4_free_resource_tracker(dev, 3551 RES_TR_FREE_SLAVES_ONLY); 3552 3553 mlx4_cleanup_counters_table(dev); 3554 mlx4_cleanup_qp_table(dev); 3555 mlx4_cleanup_srq_table(dev); 3556 mlx4_cleanup_cq_table(dev); 3557 mlx4_cmd_use_polling(dev); 3558 mlx4_cleanup_eq_table(dev); 3559 mlx4_cleanup_mcg_table(dev); 3560 mlx4_cleanup_mr_table(dev); 3561 mlx4_cleanup_xrcd_table(dev); 3562 mlx4_cleanup_pd_table(dev); 3563 3564 if (mlx4_is_master(dev)) 3565 mlx4_free_resource_tracker(dev, 3566 RES_TR_FREE_STRUCTS_ONLY); 3567 3568 iounmap(priv->kar); 3569 mlx4_uar_free(dev, &priv->driver_uar); 3570 mlx4_cleanup_uar_table(dev); 3571 if (!mlx4_is_slave(dev)) 3572 mlx4_clear_steering(dev); 3573 mlx4_free_eq_table(dev); 3574 if (mlx4_is_master(dev)) 3575 mlx4_multi_func_cleanup(dev); 3576 mlx4_close_hca(dev); 3577 if (mlx4_is_slave(dev)) 3578 mlx4_multi_func_cleanup(dev); 3579 mlx4_cmd_cleanup(dev); 3580 3581 if (dev->flags & MLX4_FLAG_MSI_X) 3582 pci_disable_msix(pdev); 3583 if (dev->flags & MLX4_FLAG_SRIOV) { 3584 mlx4_warn(dev, "Disabling SR-IOV\n"); 3585 pci_disable_sriov(pdev); 3586 } 3587 3588 if (!mlx4_is_slave(dev)) 3589 mlx4_free_ownership(dev); 3590 3591 kfree(dev->caps.qp0_tunnel); 3592 kfree(dev->caps.qp0_proxy); 3593 kfree(dev->caps.qp1_tunnel); 3594 kfree(dev->caps.qp1_proxy); 3595 3596 kfree(priv); 3597 pci_release_regions(pdev); 3598 pci_disable_device(pdev); 3599 pci_set_drvdata(pdev, NULL); 3600 } 3601} 3602 3603static int restore_current_port_types(struct mlx4_dev *dev, 3604 enum mlx4_port_type *types, 3605 enum mlx4_port_type *poss_types) 3606{ 3607 struct mlx4_priv *priv = mlx4_priv(dev); 3608 int err, i; 3609 3610 mlx4_stop_sense(dev); 3611 mutex_lock(&priv->port_mutex); 3612 for (i = 0; i < dev->caps.num_ports; i++) 3613 dev->caps.possible_type[i + 1] = poss_types[i]; 3614 err = mlx4_change_port_types(dev, types); 3615 mlx4_start_sense(dev); 3616 mutex_unlock(&priv->port_mutex); 3617 return err; 3618} 3619 3620int mlx4_restart_one(struct pci_dev *pdev) 3621{ 3622 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3623 struct mlx4_priv *priv = mlx4_priv(dev); 3624 enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; 3625 enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; 3626 int pci_dev_data, err, i; 3627 3628 pci_dev_data = priv->pci_dev_data; 3629 for (i = 0; i < dev->caps.num_ports; i++) { 3630 curr_type[i] = dev->caps.port_type[i + 1]; 3631 poss_type[i] = dev->caps.possible_type[i + 1]; 3632 } 3633 3634 mlx4_remove_one(pdev); 3635 err = __mlx4_init_one(pdev, pci_dev_data); 3636 if (err) 3637 return err; 3638 3639 dev = pci_get_drvdata(pdev); 3640 err = restore_current_port_types(dev, curr_type, poss_type); 3641 if (err) 3642 mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", 3643 err); 3644 return 0; 3645} 3646 3647static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 3648 /* MT25408 "Hermon" SDR */ 3649 { PCI_VDEVICE(MELLANOX, 0x6340), 3650 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3651 /* MT25408 "Hermon" DDR */ 3652 { PCI_VDEVICE(MELLANOX, 0x634a), 3653 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3654 /* MT25408 "Hermon" QDR */ 3655 { PCI_VDEVICE(MELLANOX, 0x6354), 3656 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3657 /* MT25408 "Hermon" DDR PCIe gen2 */ 3658 { PCI_VDEVICE(MELLANOX, 0x6732), 3659 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3660 /* MT25408 "Hermon" QDR PCIe gen2 */ 3661 { PCI_VDEVICE(MELLANOX, 0x673c), 3662 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3663 /* MT25408 "Hermon" EN 10GigE */ 3664 { PCI_VDEVICE(MELLANOX, 0x6368), 3665 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3666 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 3667 { PCI_VDEVICE(MELLANOX, 0x6750), 3668 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3669 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 3670 { PCI_VDEVICE(MELLANOX, 0x6372), 3671 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3672 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 3673 { PCI_VDEVICE(MELLANOX, 0x675a), 3674 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3675 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 3676 { PCI_VDEVICE(MELLANOX, 0x6764), 3677 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3678 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 3679 { PCI_VDEVICE(MELLANOX, 0x6746), 3680 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3681 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 3682 { PCI_VDEVICE(MELLANOX, 0x676e), 3683 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3684 /* MT25400 Family [ConnectX-2 Virtual Function] */ 3685 { PCI_VDEVICE(MELLANOX, 0x1002), 3686 .driver_data = MLX4_PCI_DEV_IS_VF }, 3687 /* MT27500 Family [ConnectX-3] */ 3688 { PCI_VDEVICE(MELLANOX, 0x1003) }, 3689 /* MT27500 Family [ConnectX-3 Virtual Function] */ 3690 { PCI_VDEVICE(MELLANOX, 0x1004), 3691 .driver_data = MLX4_PCI_DEV_IS_VF }, 3692 { PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */ 3693 { PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */ 3694 { PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */ 3695 { PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */ 3696 { PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */ 3697 { PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */ 3698 { PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */ 3699 { PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */ 3700 { PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */ 3701 { PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */ 3702 { PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */ 3703 { PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */ 3704 { 0, } 3705}; 3706 3707MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 3708 3709static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 3710 pci_channel_state_t state) 3711{ 3712 mlx4_remove_one(pdev); 3713 3714 return state == pci_channel_io_perm_failure ? 3715 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 3716} 3717 3718static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 3719{ 3720 int ret = __mlx4_init_one(pdev, 0); 3721 3722 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 3723} 3724 3725static const struct pci_error_handlers mlx4_err_handler = { 3726 .error_detected = mlx4_pci_err_detected, 3727 .slot_reset = mlx4_pci_slot_reset, 3728}; 3729 3730static int suspend(struct pci_dev *pdev, pm_message_t state) 3731{ 3732 mlx4_remove_one(pdev); 3733 3734 return 0; 3735} 3736 3737static int resume(struct pci_dev *pdev) 3738{ 3739 return __mlx4_init_one(pdev, 0); 3740} 3741 3742static struct pci_driver mlx4_driver = { 3743 .name = DRV_NAME, 3744 .id_table = mlx4_pci_table, 3745 .probe = mlx4_init_one, 3746 .remove = __devexit_p(mlx4_remove_one), 3747 .suspend = suspend, 3748 .resume = resume, 3749 .err_handler = &mlx4_err_handler, 3750}; 3751 3752static int __init mlx4_verify_params(void) 3753{ 3754 int status; 3755 3756 status = update_defaults(&port_type_array); 3757 if (status == INVALID_STR) { 3758 if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) 3759 return -1; 3760 } else if (status == INVALID_DATA) { 3761 return -1; 3762 } 3763 3764 status = update_defaults(&num_vfs); 3765 if (status == INVALID_STR) { 3766 if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) 3767 return -1; 3768 } else if (status == INVALID_DATA) { 3769 return -1; 3770 } 3771 3772 status = update_defaults(&probe_vf); 3773 if (status == INVALID_STR) { 3774 if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) 3775 return -1; 3776 } else if (status == INVALID_DATA) { 3777 return -1; 3778 } 3779 3780 if (msi_x < 0) { 3781 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); 3782 return -1; 3783 } 3784 3785 if ((log_num_mac < 0) || (log_num_mac > 7)) { 3786 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); 3787 return -1; 3788 } 3789 3790 if (log_num_vlan != 0) 3791 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 3792 MLX4_LOG_NUM_VLANS); 3793 3794 if (mlx4_set_4k_mtu != -1) 3795 pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); 3796 3797 if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { 3798 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); 3799 return -1; 3800 } 3801 3802 if (mlx4_log_num_mgm_entry_size != -1 && 3803 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 3804 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { 3805 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " 3806 "in legal range (-1 or %d..%d)\n", 3807 mlx4_log_num_mgm_entry_size, 3808 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 3809 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 3810 return -1; 3811 } 3812 3813 if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { 3814 pr_warning("mlx4_core: bad log_num_qp: %d\n", 3815 mod_param_profile.num_qp); 3816 return -1; 3817 } 3818 3819 if (mod_param_profile.num_srq < 10) { 3820 pr_warning("mlx4_core: too low log_num_srq: %d\n", 3821 mod_param_profile.num_srq); 3822 return -1; 3823 } 3824 3825 if (mod_param_profile.num_cq < 10) { 3826 pr_warning("mlx4_core: too low log_num_cq: %d\n", 3827 mod_param_profile.num_cq); 3828 return -1; 3829 } 3830 3831 if (mod_param_profile.num_mpt < 10) { 3832 pr_warning("mlx4_core: too low log_num_mpt: %d\n", 3833 mod_param_profile.num_mpt); 3834 return -1; 3835 } 3836 3837 if (mod_param_profile.num_mtt_segs && 3838 mod_param_profile.num_mtt_segs < 15) { 3839 pr_warning("mlx4_core: too low log_num_mtt: %d\n", 3840 mod_param_profile.num_mtt_segs); 3841 return -1; 3842 } 3843 3844 if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { 3845 pr_warning("mlx4_core: too high log_num_mtt: %d\n", 3846 mod_param_profile.num_mtt_segs); 3847 return -1; 3848 } 3849 return 0; 3850} 3851 3852static int __init mlx4_init(void) 3853{ 3854 int ret; 3855 3856 if (mlx4_verify_params()) 3857 return -EINVAL; 3858 3859 mlx4_catas_init(); 3860 3861 mlx4_wq = create_singlethread_workqueue("mlx4"); 3862 if (!mlx4_wq) 3863 return -ENOMEM; 3864 3865 if (enable_sys_tune) 3866 sys_tune_init(); 3867 3868 ret = pci_register_driver(&mlx4_driver); 3869 if (ret < 0) 3870 goto err; 3871 3872 return 0; 3873 3874err: 3875 if (enable_sys_tune) 3876 sys_tune_fini(); 3877 3878 destroy_workqueue(mlx4_wq); 3879 3880 return ret; 3881} 3882 3883static void __exit mlx4_cleanup(void) 3884{ 3885 if (enable_sys_tune) 3886 sys_tune_fini(); 3887 3888 pci_unregister_driver(&mlx4_driver); 3889 destroy_workqueue(mlx4_wq); 3890} 3891 3892module_init_order(mlx4_init, SI_ORDER_MIDDLE); 3893module_exit(mlx4_cleanup); 3894 3895#include <sys/module.h> 3896static int 3897mlx4_evhand(module_t mod, int event, void *arg) 3898{ 3899 return (0); 3900} 3901 3902static moduledata_t mlx4_mod = { 3903 .name = "mlx4", 3904 .evhand = mlx4_evhand, 3905}; 3906MODULE_VERSION(mlx4, 1); 3907DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); 3908