main.c revision 318536
1/* 2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36#include <linux/kmod.h> 37/* 38 * kmod.h must be included before module.h since it includes (indirectly) sys/module.h 39 * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does. 40*/ 41#include <linux/module.h> 42#include <linux/errno.h> 43#include <linux/pci.h> 44#include <linux/dma-mapping.h> 45#include <linux/slab.h> 46#include <linux/io-mapping.h> 47#include <linux/delay.h> 48#include <linux/netdevice.h> 49#include <linux/string.h> 50#include <linux/fs.h> 51 52#include <linux/mlx4/device.h> 53#include <linux/mlx4/doorbell.h> 54 55#include "mlx4.h" 56#include "fw.h" 57#include "icm.h" 58#include "mlx4_stats.h" 59 60/* Mellanox ConnectX HCA low-level driver */ 61 62struct workqueue_struct *mlx4_wq; 63 64#ifdef CONFIG_MLX4_DEBUG 65 66int mlx4_debug_level = 0; 67module_param_named(debug_level, mlx4_debug_level, int, 0644); 68MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); 69 70#endif /* CONFIG_MLX4_DEBUG */ 71 72#ifdef CONFIG_PCI_MSI 73 74static int msi_x = 1; 75module_param(msi_x, int, 0444); 76MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); 77 78#else /* CONFIG_PCI_MSI */ 79 80#define msi_x (0) 81 82#endif /* CONFIG_PCI_MSI */ 83 84static int enable_sys_tune = 0; 85module_param(enable_sys_tune, int, 0444); 86MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); 87 88int mlx4_blck_lb = 1; 89module_param_named(block_loopback, mlx4_blck_lb, int, 0644); 90MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " 91 "(default: 1)"); 92enum { 93 DEFAULT_DOMAIN = 0, 94 BDF_STR_SIZE = 8, /* bb:dd.f- */ 95 DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ 96}; 97 98enum { 99 NUM_VFS, 100 PROBE_VF, 101 PORT_TYPE_ARRAY 102}; 103 104enum { 105 VALID_DATA, 106 INVALID_DATA, 107 INVALID_STR 108}; 109 110struct param_data { 111 int id; 112 struct mlx4_dbdf2val_lst dbdf2val; 113}; 114 115static struct param_data num_vfs = { 116 .id = NUM_VFS, 117 .dbdf2val = { 118 .name = "num_vfs param", 119 .num_vals = 1, 120 .def_val = {0}, 121 .range = {0, MLX4_MAX_NUM_VF} 122 } 123}; 124module_param_string(num_vfs, num_vfs.dbdf2val.str, 125 sizeof(num_vfs.dbdf2val.str), 0444); 126MODULE_PARM_DESC(num_vfs, 127 "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" 128 "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" 129 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); 130 131static struct param_data probe_vf = { 132 .id = PROBE_VF, 133 .dbdf2val = { 134 .name = "probe_vf param", 135 .num_vals = 1, 136 .def_val = {0}, 137 .range = {0, MLX4_MAX_NUM_VF} 138 } 139}; 140module_param_string(probe_vf, probe_vf.dbdf2val.str, 141 sizeof(probe_vf.dbdf2val.str), 0444); 142MODULE_PARM_DESC(probe_vf, 143 "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" 144 "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" 145 "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); 146 147int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 148 149module_param_named(log_num_mgm_entry_size, 150 mlx4_log_num_mgm_entry_size, int, 0444); 151MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" 152 " of qp per mcg, for example:" 153 " 10 gives 248.range: 7 <=" 154 " log_num_mgm_entry_size <= 12." 155 " To activate device managed" 156 " flow steering when available, set to -1"); 157 158static int high_rate_steer; 159module_param(high_rate_steer, int, 0444); 160MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" 161 " (default off)"); 162 163static int fast_drop; 164module_param_named(fast_drop, fast_drop, int, 0444); 165MODULE_PARM_DESC(fast_drop, 166 "Enable fast packet drop when no recieve WQEs are posted"); 167 168int mlx4_enable_64b_cqe_eqe = 1; 169module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); 170MODULE_PARM_DESC(enable_64b_cqe_eqe, 171 "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); 172 173#define HCA_GLOBAL_CAP_MASK 0 174 175#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE 176 177static char mlx4_version[] __devinitdata = 178 DRV_NAME ": Mellanox ConnectX VPI driver v" 179 DRV_VERSION " (" DRV_RELDATE ")\n"; 180 181static int log_num_mac = 7; 182module_param_named(log_num_mac, log_num_mac, int, 0444); 183MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); 184 185static int log_num_vlan; 186module_param_named(log_num_vlan, log_num_vlan, int, 0444); 187MODULE_PARM_DESC(log_num_vlan, 188 "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); 189/* Log2 max number of VLANs per ETH port (0-7) */ 190#define MLX4_LOG_NUM_VLANS 7 191 192int log_mtts_per_seg = ilog2(1); 193module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 194MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " 195 "(0-7) (default: 0)"); 196 197static struct param_data port_type_array = { 198 .id = PORT_TYPE_ARRAY, 199 .dbdf2val = { 200 .name = "port_type_array param", 201 .num_vals = 2, 202 .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, 203 .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} 204 } 205}; 206module_param_string(port_type_array, port_type_array.dbdf2val.str, 207 sizeof(port_type_array.dbdf2val.str), 0444); 208MODULE_PARM_DESC(port_type_array, 209 "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" 210 "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" 211 "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" 212 "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); 213 214 215struct mlx4_port_config { 216 struct list_head list; 217 enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; 218 struct pci_dev *pdev; 219}; 220 221#define MLX4_LOG_NUM_MTT 20 222/* We limit to 30 as of a bit map issue which uses int and not uint. 223 see mlx4_buddy_init -> bitmap_zero which gets int. 224*/ 225#define MLX4_MAX_LOG_NUM_MTT 30 226static struct mlx4_profile mod_param_profile = { 227 .num_qp = 19, 228 .num_srq = 16, 229 .rdmarc_per_qp = 4, 230 .num_cq = 16, 231 .num_mcg = 13, 232 .num_mpt = 19, 233 .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ 234}; 235 236module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); 237MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); 238 239module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); 240MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " 241 "(default: 16)"); 242 243module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 244 0444); 245MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " 246 "(default: 4)"); 247 248module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); 249MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); 250 251module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); 252MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " 253 "(default: 13)"); 254 255module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); 256MODULE_PARM_DESC(log_num_mpt, 257 "log maximum number of memory protection table entries per " 258 "HCA (default: 19)"); 259 260module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); 261MODULE_PARM_DESC(log_num_mtt, 262 "log maximum number of memory translation table segments per " 263 "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); 264 265enum { 266 MLX4_IF_STATE_BASIC, 267 MLX4_IF_STATE_EXTENDED 268}; 269 270static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) 271{ 272 return (domain << 20) | (bus << 12) | (dev << 4) | fn; 273} 274 275static inline void pr_bdf_err(const char *dbdf, const char *pname) 276{ 277 pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); 278} 279 280static inline void pr_val_err(const char *dbdf, const char *pname, 281 const char *val) 282{ 283 pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" 284 , val, dbdf, pname); 285} 286 287static inline void pr_out_of_range_bdf(const char *dbdf, int val, 288 struct mlx4_dbdf2val_lst *dbdf2val) 289{ 290 pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" 291 , val, dbdf, dbdf2val->name , dbdf2val->range.min, 292 dbdf2val->range.max); 293} 294 295static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) 296{ 297 pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" 298 , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); 299} 300 301static inline int is_in_range(int val, struct mlx4_range *r) 302{ 303 return (val >= r->min && val <= r->max); 304} 305 306static int update_defaults(struct param_data *pdata) 307{ 308 long int val[MLX4_MAX_BDF_VALS]; 309 int ret; 310 char *t, *p = pdata->dbdf2val.str; 311 char sval[32]; 312 int val_len; 313 314 if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) 315 return INVALID_STR; 316 317 switch (pdata->id) { 318 case PORT_TYPE_ARRAY: 319 t = strchr(p, ','); 320 if (!t || t == p || (t - p) > sizeof(sval)) 321 return INVALID_STR; 322 323 val_len = t - p; 324 strncpy(sval, p, val_len); 325 sval[val_len] = 0; 326 327 ret = kstrtol(sval, 0, &val[0]); 328 if (ret == -EINVAL) 329 return INVALID_STR; 330 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 331 pr_out_of_range(&pdata->dbdf2val); 332 return INVALID_DATA; 333 } 334 335 ret = kstrtol(t + 1, 0, &val[1]); 336 if (ret == -EINVAL) 337 return INVALID_STR; 338 if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { 339 pr_out_of_range(&pdata->dbdf2val); 340 return INVALID_DATA; 341 } 342 343 pdata->dbdf2val.tbl[0].val[0] = val[0]; 344 pdata->dbdf2val.tbl[0].val[1] = val[1]; 345 break; 346 347 case NUM_VFS: 348 case PROBE_VF: 349 ret = kstrtol(p, 0, &val[0]); 350 if (ret == -EINVAL) 351 return INVALID_STR; 352 if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { 353 pr_out_of_range(&pdata->dbdf2val); 354 return INVALID_DATA; 355 } 356 pdata->dbdf2val.tbl[0].val[0] = val[0]; 357 break; 358 } 359 pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; 360 361 return VALID_DATA; 362} 363 364int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) 365{ 366 int domain, bus, dev, fn; 367 u64 dbdf; 368 char *p, *t, *v; 369 char tmp[32]; 370 char sbdf[32]; 371 char sep = ','; 372 int j, k, str_size, i = 1; 373 int prfx_size; 374 375 p = dbdf2val_lst->str; 376 377 for (j = 0; j < dbdf2val_lst->num_vals; j++) 378 dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; 379 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 380 381 str_size = strlen(dbdf2val_lst->str); 382 383 if (str_size == 0) 384 return 0; 385 386 while (strlen(p)) { 387 prfx_size = BDF_STR_SIZE; 388 sbdf[prfx_size] = 0; 389 strncpy(sbdf, p, prfx_size); 390 domain = DEFAULT_DOMAIN; 391 if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { 392 prfx_size = DBDF_STR_SIZE; 393 sbdf[prfx_size] = 0; 394 strncpy(sbdf, p, prfx_size); 395 if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, 396 &dev, &fn) != 4) { 397 pr_bdf_err(sbdf, dbdf2val_lst->name); 398 goto err; 399 } 400 sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, 401 fn); 402 } else { 403 sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); 404 } 405 406 if (strnicmp(sbdf, tmp, sizeof(tmp))) { 407 pr_bdf_err(sbdf, dbdf2val_lst->name); 408 goto err; 409 } 410 411 dbdf = dbdf_to_u64(domain, bus, dev, fn); 412 413 for (j = 1; j < i; j++) 414 if (dbdf2val_lst->tbl[j].dbdf == dbdf) { 415 pr_warn("mlx4_core: in '%s', %s appears multiple times\n" 416 , dbdf2val_lst->name, sbdf); 417 goto err; 418 } 419 420 if (i >= MLX4_DEVS_TBL_SIZE) { 421 pr_warn("mlx4_core: Too many devices in '%s'\n" 422 , dbdf2val_lst->name); 423 goto err; 424 } 425 426 p += prfx_size; 427 t = strchr(p, sep); 428 t = t ? t : p + strlen(p); 429 if (p >= t) { 430 pr_val_err(sbdf, dbdf2val_lst->name, ""); 431 goto err; 432 } 433 434 for (k = 0; k < dbdf2val_lst->num_vals; k++) { 435 char sval[32]; 436 long int val; 437 int ret, val_len; 438 char vsep = ';'; 439 440 v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); 441 if (!v || v > t || v == p || (v - p) > sizeof(sval)) { 442 pr_val_err(sbdf, dbdf2val_lst->name, p); 443 goto err; 444 } 445 val_len = v - p; 446 strncpy(sval, p, val_len); 447 sval[val_len] = 0; 448 449 ret = kstrtol(sval, 0, &val); 450 if (ret) { 451 if (strchr(p, vsep)) 452 pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" 453 , sbdf, dbdf2val_lst->name); 454 else 455 pr_val_err(sbdf, dbdf2val_lst->name, 456 sval); 457 goto err; 458 } 459 if (!is_in_range(val, &dbdf2val_lst->range)) { 460 pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); 461 goto err; 462 } 463 464 dbdf2val_lst->tbl[i].val[k] = val; 465 p = v; 466 if (p[0] == vsep) 467 p++; 468 } 469 470 dbdf2val_lst->tbl[i].dbdf = dbdf; 471 if (strlen(p)) { 472 if (p[0] != sep) { 473 pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" 474 , sep, p, dbdf2val_lst->name); 475 goto err; 476 } 477 p++; 478 } 479 i++; 480 if (i < MLX4_DEVS_TBL_SIZE) 481 dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; 482 } 483 484 return 0; 485 486err: 487 dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; 488 pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" 489 , dbdf2val_lst->name); 490 491 return -EINVAL; 492} 493EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); 494 495int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, 496 int *val) 497{ 498 u64 dbdf; 499 int i = 1; 500 501 *val = tbl[0].val[idx]; 502 if (!pdev) 503 return -EINVAL; 504 505 dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), 506 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 507 508 while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { 509 if (tbl[i].dbdf == dbdf) { 510 *val = tbl[i].val[idx]; 511 return 0; 512 } 513 i++; 514 } 515 516 return 0; 517} 518EXPORT_SYMBOL(mlx4_get_val); 519 520static void process_mod_param_profile(struct mlx4_profile *profile) 521{ 522 vm_size_t hwphyssz; 523 hwphyssz = 0; 524 TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); 525 526 profile->num_qp = 1 << mod_param_profile.num_qp; 527 profile->num_srq = 1 << mod_param_profile.num_srq; 528 profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; 529 profile->num_cq = 1 << mod_param_profile.num_cq; 530 profile->num_mcg = 1 << mod_param_profile.num_mcg; 531 profile->num_mpt = 1 << mod_param_profile.num_mpt; 532 /* 533 * We want to scale the number of MTTs with the size of the 534 * system memory, since it makes sense to register a lot of 535 * memory on a system with a lot of memory. As a heuristic, 536 * make sure we have enough MTTs to register twice the system 537 * memory (with PAGE_SIZE entries). 538 * 539 * This number has to be a power of two and fit into 32 bits 540 * due to device limitations. We cap this at 2^30 as of bit map 541 * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) 542 * That limits us to 4TB of memory registration per HCA with 543 * 4KB pages, which is probably OK for the next few months. 544 */ 545 if (mod_param_profile.num_mtt_segs) 546 profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; 547 else { 548 profile->num_mtt_segs = 549 roundup_pow_of_two(max_t(unsigned, 550 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), 551 min(1UL << 552 (MLX4_MAX_LOG_NUM_MTT - 553 log_mtts_per_seg), 554 (hwphyssz << 1) 555 >> log_mtts_per_seg))); 556 /* set the actual value, so it will be reflected to the user 557 using the sysfs */ 558 mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); 559 } 560} 561 562int mlx4_check_port_params(struct mlx4_dev *dev, 563 enum mlx4_port_type *port_type) 564{ 565 int i; 566 567 for (i = 0; i < dev->caps.num_ports - 1; i++) { 568 if (port_type[i] != port_type[i + 1]) { 569 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { 570 mlx4_err(dev, "Only same port types supported " 571 "on this HCA, aborting.\n"); 572 return -EINVAL; 573 } 574 } 575 } 576 577 for (i = 0; i < dev->caps.num_ports; i++) { 578 if (!(port_type[i] & dev->caps.supported_type[i+1])) { 579 mlx4_err(dev, "Requested port type for port %d is not " 580 "supported on this HCA\n", i + 1); 581 return -EINVAL; 582 } 583 } 584 return 0; 585} 586 587static void mlx4_set_port_mask(struct mlx4_dev *dev) 588{ 589 int i; 590 591 for (i = 1; i <= dev->caps.num_ports; ++i) 592 dev->caps.port_mask[i] = dev->caps.port_type[i]; 593} 594 595enum { 596 MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, 597}; 598 599static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 600{ 601 int err = 0; 602 struct mlx4_func func; 603 604 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { 605 err = mlx4_QUERY_FUNC(dev, &func, 0); 606 if (err) { 607 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 608 return err; 609 } 610 dev_cap->max_eqs = func.max_eq; 611 dev_cap->reserved_eqs = func.rsvd_eqs; 612 dev_cap->reserved_uars = func.rsvd_uars; 613 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; 614 } 615 return err; 616} 617 618static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) 619{ 620 int err; 621 int i; 622 623 err = mlx4_QUERY_DEV_CAP(dev, dev_cap); 624 if (err) { 625 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 626 return err; 627 } 628 629 if (dev_cap->min_page_sz > PAGE_SIZE) { 630 mlx4_err(dev, "HCA minimum page size of %d bigger than " 631 "kernel PAGE_SIZE of %d, aborting.\n", 632 dev_cap->min_page_sz, (int)PAGE_SIZE); 633 return -ENODEV; 634 } 635 if (dev_cap->num_ports > MLX4_MAX_PORTS) { 636 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 637 "aborting.\n", 638 dev_cap->num_ports, MLX4_MAX_PORTS); 639 return -ENODEV; 640 } 641 642 if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { 643 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " 644 "PCI resource 2 size of 0x%llx, aborting.\n", 645 dev_cap->uar_size, 646 (unsigned long long) pci_resource_len(dev->pdev, 2)); 647 return -ENODEV; 648 } 649 650 dev->caps.num_ports = dev_cap->num_ports; 651 dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; 652 dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? 653 dev->caps.num_sys_eqs : 654 MLX4_MAX_EQ_NUM; 655 for (i = 1; i <= dev->caps.num_ports; ++i) { 656 dev->caps.vl_cap[i] = dev_cap->max_vl[i]; 657 dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; 658 dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; 659 dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; 660 /* set gid and pkey table operating lengths by default 661 * to non-sriov values */ 662 dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; 663 dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; 664 dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; 665 dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; 666 dev->caps.def_mac[i] = dev_cap->def_mac[i]; 667 dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; 668 dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; 669 dev->caps.default_sense[i] = dev_cap->default_sense[i]; 670 dev->caps.trans_type[i] = dev_cap->trans_type[i]; 671 dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; 672 dev->caps.wavelength[i] = dev_cap->wavelength[i]; 673 dev->caps.trans_code[i] = dev_cap->trans_code[i]; 674 } 675 676 dev->caps.uar_page_size = PAGE_SIZE; 677 dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; 678 dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; 679 dev->caps.bf_reg_size = dev_cap->bf_reg_size; 680 dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; 681 dev->caps.max_sq_sg = dev_cap->max_sq_sg; 682 dev->caps.max_rq_sg = dev_cap->max_rq_sg; 683 dev->caps.max_wqes = dev_cap->max_qp_sz; 684 dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; 685 dev->caps.max_srq_wqes = dev_cap->max_srq_sz; 686 dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; 687 dev->caps.reserved_srqs = dev_cap->reserved_srqs; 688 dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; 689 dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; 690 /* 691 * Subtract 1 from the limit because we need to allocate a 692 * spare CQE to enable resizing the CQ 693 */ 694 dev->caps.max_cqes = dev_cap->max_cq_sz - 1; 695 dev->caps.reserved_cqs = dev_cap->reserved_cqs; 696 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 697 dev->caps.reserved_mtts = dev_cap->reserved_mtts; 698 dev->caps.reserved_mrws = dev_cap->reserved_mrws; 699 700 /* The first 128 UARs are used for EQ doorbells */ 701 dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); 702 dev->caps.reserved_pds = dev_cap->reserved_pds; 703 dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 704 dev_cap->reserved_xrcds : 0; 705 dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? 706 dev_cap->max_xrcds : 0; 707 dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; 708 709 dev->caps.max_msg_sz = dev_cap->max_msg_sz; 710 dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); 711 dev->caps.flags = dev_cap->flags; 712 dev->caps.flags2 = dev_cap->flags2; 713 dev->caps.bmme_flags = dev_cap->bmme_flags; 714 dev->caps.reserved_lkey = dev_cap->reserved_lkey; 715 dev->caps.stat_rate_support = dev_cap->stat_rate_support; 716 dev->caps.cq_timestamp = dev_cap->timestamp_support; 717 dev->caps.max_gso_sz = dev_cap->max_gso_sz; 718 dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; 719 720 /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ 721 if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) 722 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 723 /* Don't do sense port on multifunction devices (for now at least) */ 724 if (mlx4_is_mfunc(dev)) 725 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; 726 727 dev->caps.log_num_macs = log_num_mac; 728 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; 729 730 dev->caps.fast_drop = fast_drop ? 731 !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 732 0; 733 734 for (i = 1; i <= dev->caps.num_ports; ++i) { 735 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; 736 if (dev->caps.supported_type[i]) { 737 /* if only ETH is supported - assign ETH */ 738 if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) 739 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; 740 /* if only IB is supported, assign IB */ 741 else if (dev->caps.supported_type[i] == 742 MLX4_PORT_TYPE_IB) 743 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; 744 else { 745 /* 746 * if IB and ETH are supported, we set the port 747 * type according to user selection of port type; 748 * if there is no user selection, take the FW hint 749 */ 750 int pta; 751 mlx4_get_val(port_type_array.dbdf2val.tbl, 752 pci_physfn(dev->pdev), i - 1, 753 &pta); 754 if (pta == MLX4_PORT_TYPE_NONE) { 755 dev->caps.port_type[i] = dev->caps.suggested_type[i] ? 756 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; 757 } else if (pta == MLX4_PORT_TYPE_NA) { 758 mlx4_err(dev, "Port %d is valid port. " 759 "It is not allowed to configure its type to N/A(%d)\n", 760 i, MLX4_PORT_TYPE_NA); 761 return -EINVAL; 762 } else { 763 dev->caps.port_type[i] = pta; 764 } 765 } 766 } 767 /* 768 * Link sensing is allowed on the port if 3 conditions are true: 769 * 1. Both protocols are supported on the port. 770 * 2. Different types are supported on the port 771 * 3. FW declared that it supports link sensing 772 */ 773 mlx4_priv(dev)->sense.sense_allowed[i] = 774 ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && 775 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 776 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); 777 778 /* Disablling auto sense for default Eth ports support */ 779 mlx4_priv(dev)->sense.sense_allowed[i] = 0; 780 781 /* 782 * If "default_sense" bit is set, we move the port to "AUTO" mode 783 * and perform sense_port FW command to try and set the correct 784 * port type from beginning 785 */ 786 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { 787 enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; 788 dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; 789 mlx4_SENSE_PORT(dev, i, &sensed_port); 790 if (sensed_port != MLX4_PORT_TYPE_NONE) 791 dev->caps.port_type[i] = sensed_port; 792 } else { 793 dev->caps.possible_type[i] = dev->caps.port_type[i]; 794 } 795 796 if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { 797 dev->caps.log_num_macs = dev_cap->log_max_macs[i]; 798 mlx4_warn(dev, "Requested number of MACs is too much " 799 "for port %d, reducing to %d.\n", 800 i, 1 << dev->caps.log_num_macs); 801 } 802 if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { 803 dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; 804 mlx4_warn(dev, "Requested number of VLANs is too much " 805 "for port %d, reducing to %d.\n", 806 i, 1 << dev->caps.log_num_vlans); 807 } 808 } 809 810 dev->caps.max_basic_counters = dev_cap->max_basic_counters; 811 dev->caps.max_extended_counters = dev_cap->max_extended_counters; 812 /* support extended counters if available */ 813 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) 814 dev->caps.max_counters = dev->caps.max_extended_counters; 815 else 816 dev->caps.max_counters = dev->caps.max_basic_counters; 817 818 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; 819 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = 820 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = 821 (1 << dev->caps.log_num_macs) * 822 (1 << dev->caps.log_num_vlans) * 823 dev->caps.num_ports; 824 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; 825 826 dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + 827 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + 828 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + 829 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 830 831 dev->caps.sync_qp = dev_cap->sync_qp; 832 if (dev->pdev->device == 0x1003) 833 dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; 834 835 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 836 837 if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { 838 if (dev_cap->flags & 839 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { 840 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); 841 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; 842 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; 843 } 844 } 845 846 if ((dev->caps.flags & 847 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && 848 mlx4_is_master(dev)) 849 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; 850 851 if (!mlx4_is_slave(dev)) { 852 for (i = 0; i < dev->caps.num_ports; ++i) 853 dev->caps.def_counter_index[i] = i << 1; 854 855 dev->caps.alloc_res_qp_mask = 856 (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0); 857 } else { 858 dev->caps.alloc_res_qp_mask = 0; 859 } 860 861 return 0; 862} 863/*The function checks if there are live vf, return the num of them*/ 864static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) 865{ 866 struct mlx4_priv *priv = mlx4_priv(dev); 867 struct mlx4_slave_state *s_state; 868 int i; 869 int ret = 0; 870 871 for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { 872 s_state = &priv->mfunc.master.slave_state[i]; 873 if (s_state->active && s_state->last_cmd != 874 MLX4_COMM_CMD_RESET) { 875 mlx4_warn(dev, "%s: slave: %d is still active\n", 876 __func__, i); 877 ret++; 878 } 879 } 880 return ret; 881} 882 883int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) 884{ 885 u32 qk = MLX4_RESERVED_QKEY_BASE; 886 887 if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || 888 qpn < dev->phys_caps.base_proxy_sqpn) 889 return -EINVAL; 890 891 if (qpn >= dev->phys_caps.base_tunnel_sqpn) 892 /* tunnel qp */ 893 qk += qpn - dev->phys_caps.base_tunnel_sqpn; 894 else 895 qk += qpn - dev->phys_caps.base_proxy_sqpn; 896 *qkey = qk; 897 return 0; 898} 899EXPORT_SYMBOL(mlx4_get_parav_qkey); 900 901void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) 902{ 903 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 904 905 if (!mlx4_is_master(dev)) 906 return; 907 908 priv->virt2phys_pkey[slave][port - 1][i] = val; 909} 910EXPORT_SYMBOL(mlx4_sync_pkey_table); 911 912void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) 913{ 914 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 915 916 if (!mlx4_is_master(dev)) 917 return; 918 919 priv->slave_node_guids[slave] = guid; 920} 921EXPORT_SYMBOL(mlx4_put_slave_node_guid); 922 923__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) 924{ 925 struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); 926 927 if (!mlx4_is_master(dev)) 928 return 0; 929 930 return priv->slave_node_guids[slave]; 931} 932EXPORT_SYMBOL(mlx4_get_slave_node_guid); 933 934int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) 935{ 936 struct mlx4_priv *priv = mlx4_priv(dev); 937 struct mlx4_slave_state *s_slave; 938 939 if (!mlx4_is_master(dev)) 940 return 0; 941 942 s_slave = &priv->mfunc.master.slave_state[slave]; 943 return !!s_slave->active; 944} 945EXPORT_SYMBOL(mlx4_is_slave_active); 946 947static void slave_adjust_steering_mode(struct mlx4_dev *dev, 948 struct mlx4_dev_cap *dev_cap, 949 struct mlx4_init_hca_param *hca_param) 950{ 951 dev->caps.steering_mode = hca_param->steering_mode; 952 if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) 953 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 954 else 955 dev->caps.num_qp_per_mgm = 956 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); 957 958 mlx4_dbg(dev, "Steering mode is: %s\n", 959 mlx4_steering_mode_str(dev->caps.steering_mode)); 960} 961 962static int mlx4_slave_cap(struct mlx4_dev *dev) 963{ 964 int err; 965 u32 page_size; 966 struct mlx4_dev_cap dev_cap; 967 struct mlx4_func_cap func_cap; 968 struct mlx4_init_hca_param hca_param; 969 int i; 970 971 memset(&hca_param, 0, sizeof(hca_param)); 972 err = mlx4_QUERY_HCA(dev, &hca_param); 973 if (err) { 974 mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); 975 return err; 976 } 977 978 /*fail if the hca has an unknown capability */ 979 if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != 980 HCA_GLOBAL_CAP_MASK) { 981 mlx4_err(dev, "Unknown hca global capabilities\n"); 982 return -ENOSYS; 983 } 984 985 mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; 986 987 dev->caps.hca_core_clock = hca_param.hca_core_clock; 988 989 memset(&dev_cap, 0, sizeof(dev_cap)); 990 dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; 991 err = mlx4_dev_cap(dev, &dev_cap); 992 if (err) { 993 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 994 return err; 995 } 996 997 err = mlx4_QUERY_FW(dev); 998 if (err) 999 mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); 1000 1001 if (!hca_param.mw_enable) { 1002 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; 1003 dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; 1004 } 1005 1006 page_size = ~dev->caps.page_size_cap + 1; 1007 mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); 1008 if (page_size > PAGE_SIZE) { 1009 mlx4_err(dev, "HCA minimum page size of %d bigger than " 1010 "kernel PAGE_SIZE of %d, aborting.\n", 1011 page_size, (int)PAGE_SIZE); 1012 return -ENODEV; 1013 } 1014 1015 /* slave gets uar page size from QUERY_HCA fw command */ 1016 dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); 1017 1018 /* TODO: relax this assumption */ 1019 if (dev->caps.uar_page_size != PAGE_SIZE) { 1020 mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", 1021 dev->caps.uar_page_size, (int)PAGE_SIZE); 1022 return -ENODEV; 1023 } 1024 1025 memset(&func_cap, 0, sizeof(func_cap)); 1026 err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); 1027 if (err) { 1028 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", 1029 err); 1030 return err; 1031 } 1032 1033 if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != 1034 PF_CONTEXT_BEHAVIOUR_MASK) { 1035 mlx4_err(dev, "Unknown pf context behaviour\n"); 1036 return -ENOSYS; 1037 } 1038 1039 dev->caps.num_ports = func_cap.num_ports; 1040 dev->quotas.qp = func_cap.qp_quota; 1041 dev->quotas.srq = func_cap.srq_quota; 1042 dev->quotas.cq = func_cap.cq_quota; 1043 dev->quotas.mpt = func_cap.mpt_quota; 1044 dev->quotas.mtt = func_cap.mtt_quota; 1045 dev->caps.num_qps = 1 << hca_param.log_num_qps; 1046 dev->caps.num_srqs = 1 << hca_param.log_num_srqs; 1047 dev->caps.num_cqs = 1 << hca_param.log_num_cqs; 1048 dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; 1049 dev->caps.num_eqs = func_cap.max_eq; 1050 dev->caps.reserved_eqs = func_cap.reserved_eq; 1051 dev->caps.num_pds = MLX4_NUM_PDS; 1052 dev->caps.num_mgms = 0; 1053 dev->caps.num_amgms = 0; 1054 1055 if (dev->caps.num_ports > MLX4_MAX_PORTS) { 1056 mlx4_err(dev, "HCA has %d ports, but we only support %d, " 1057 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); 1058 return -ENODEV; 1059 } 1060 1061 dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1062 dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1063 dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1064 dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); 1065 1066 if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || 1067 !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { 1068 err = -ENOMEM; 1069 goto err_mem; 1070 } 1071 1072 for (i = 1; i <= dev->caps.num_ports; ++i) { 1073 err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); 1074 if (err) { 1075 mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" 1076 " port %d, aborting (%d).\n", i, err); 1077 goto err_mem; 1078 } 1079 dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; 1080 dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; 1081 dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; 1082 dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; 1083 dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; 1084 1085 dev->caps.port_mask[i] = dev->caps.port_type[i]; 1086 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, 1087 &dev->caps.gid_table_len[i], 1088 &dev->caps.pkey_table_len[i]); 1089 if (err) 1090 goto err_mem; 1091 } 1092 1093 if (dev->caps.uar_page_size * (dev->caps.num_uars - 1094 dev->caps.reserved_uars) > 1095 pci_resource_len(dev->pdev, 2)) { 1096 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " 1097 "PCI resource 2 size of 0x%llx, aborting.\n", 1098 dev->caps.uar_page_size * dev->caps.num_uars, 1099 (unsigned long long) pci_resource_len(dev->pdev, 2)); 1100 err = -ENOMEM; 1101 goto err_mem; 1102 } 1103 1104 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { 1105 dev->caps.eqe_size = 64; 1106 dev->caps.eqe_factor = 1; 1107 } else { 1108 dev->caps.eqe_size = 32; 1109 dev->caps.eqe_factor = 0; 1110 } 1111 1112 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { 1113 dev->caps.cqe_size = 64; 1114 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; 1115 } else { 1116 dev->caps.cqe_size = 32; 1117 } 1118 1119 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 1120 mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); 1121 1122 slave_adjust_steering_mode(dev, &dev_cap, &hca_param); 1123 1124 if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && 1125 dev->caps.bf_reg_size) 1126 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; 1127 1128 return 0; 1129 1130err_mem: 1131 kfree(dev->caps.qp0_tunnel); 1132 kfree(dev->caps.qp0_proxy); 1133 kfree(dev->caps.qp1_tunnel); 1134 kfree(dev->caps.qp1_proxy); 1135 dev->caps.qp0_tunnel = dev->caps.qp0_proxy = 1136 dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; 1137 1138 return err; 1139} 1140 1141static void mlx4_request_modules(struct mlx4_dev *dev) 1142{ 1143 int port; 1144 int has_ib_port = false; 1145 int has_eth_port = false; 1146#define EN_DRV_NAME "mlx4_en" 1147#define IB_DRV_NAME "mlx4_ib" 1148 1149 for (port = 1; port <= dev->caps.num_ports; port++) { 1150 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) 1151 has_ib_port = true; 1152 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) 1153 has_eth_port = true; 1154 } 1155 1156 if (has_ib_port) 1157 request_module_nowait(IB_DRV_NAME); 1158 if (has_eth_port) 1159 request_module_nowait(EN_DRV_NAME); 1160} 1161 1162/* 1163 * Change the port configuration of the device. 1164 * Every user of this function must hold the port mutex. 1165 */ 1166int mlx4_change_port_types(struct mlx4_dev *dev, 1167 enum mlx4_port_type *port_types) 1168{ 1169 int err = 0; 1170 int change = 0; 1171 int port; 1172 1173 for (port = 0; port < dev->caps.num_ports; port++) { 1174 /* Change the port type only if the new type is different 1175 * from the current, and not set to Auto */ 1176 if (port_types[port] != dev->caps.port_type[port + 1]) 1177 change = 1; 1178 } 1179 if (change) { 1180 mlx4_unregister_device(dev); 1181 for (port = 1; port <= dev->caps.num_ports; port++) { 1182 mlx4_CLOSE_PORT(dev, port); 1183 dev->caps.port_type[port] = port_types[port - 1]; 1184 err = mlx4_SET_PORT(dev, port, -1); 1185 if (err) { 1186 mlx4_err(dev, "Failed to set port %d, " 1187 "aborting\n", port); 1188 goto out; 1189 } 1190 } 1191 mlx4_set_port_mask(dev); 1192 err = mlx4_register_device(dev); 1193 if (err) { 1194 mlx4_err(dev, "Failed to register device\n"); 1195 goto out; 1196 } 1197 mlx4_request_modules(dev); 1198 } 1199 1200out: 1201 return err; 1202} 1203 1204static ssize_t show_port_type(struct device *dev, 1205 struct device_attribute *attr, 1206 char *buf) 1207{ 1208 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1209 port_attr); 1210 struct mlx4_dev *mdev = info->dev; 1211 char type[8]; 1212 1213 sprintf(type, "%s", 1214 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? 1215 "ib" : "eth"); 1216 if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) 1217 sprintf(buf, "auto (%s)\n", type); 1218 else 1219 sprintf(buf, "%s\n", type); 1220 1221 return strlen(buf); 1222} 1223 1224static ssize_t set_port_type(struct device *dev, 1225 struct device_attribute *attr, 1226 const char *buf, size_t count) 1227{ 1228 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1229 port_attr); 1230 struct mlx4_dev *mdev = info->dev; 1231 struct mlx4_priv *priv = mlx4_priv(mdev); 1232 enum mlx4_port_type types[MLX4_MAX_PORTS]; 1233 enum mlx4_port_type new_types[MLX4_MAX_PORTS]; 1234 int i; 1235 int err = 0; 1236 1237 if (!strcmp(buf, "ib\n")) 1238 info->tmp_type = MLX4_PORT_TYPE_IB; 1239 else if (!strcmp(buf, "eth\n")) 1240 info->tmp_type = MLX4_PORT_TYPE_ETH; 1241 else if (!strcmp(buf, "auto\n")) 1242 info->tmp_type = MLX4_PORT_TYPE_AUTO; 1243 else { 1244 mlx4_err(mdev, "%s is not supported port type\n", buf); 1245 return -EINVAL; 1246 } 1247 1248 if ((info->tmp_type & mdev->caps.supported_type[info->port]) != 1249 info->tmp_type) { 1250 mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", 1251 info->port); 1252 return -EINVAL; 1253 } 1254 1255 mlx4_stop_sense(mdev); 1256 mutex_lock(&priv->port_mutex); 1257 /* Possible type is always the one that was delivered */ 1258 mdev->caps.possible_type[info->port] = info->tmp_type; 1259 1260 for (i = 0; i < mdev->caps.num_ports; i++) { 1261 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : 1262 mdev->caps.possible_type[i+1]; 1263 if (types[i] == MLX4_PORT_TYPE_AUTO) 1264 types[i] = mdev->caps.port_type[i+1]; 1265 } 1266 1267 if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && 1268 !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { 1269 for (i = 1; i <= mdev->caps.num_ports; i++) { 1270 if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { 1271 mdev->caps.possible_type[i] = mdev->caps.port_type[i]; 1272 err = -EINVAL; 1273 } 1274 } 1275 } 1276 if (err) { 1277 mlx4_err(mdev, "Auto sensing is not supported on this HCA. " 1278 "Set only 'eth' or 'ib' for both ports " 1279 "(should be the same)\n"); 1280 goto out; 1281 } 1282 1283 mlx4_do_sense_ports(mdev, new_types, types); 1284 1285 err = mlx4_check_port_params(mdev, new_types); 1286 if (err) 1287 goto out; 1288 1289 /* We are about to apply the changes after the configuration 1290 * was verified, no need to remember the temporary types 1291 * any more */ 1292 for (i = 0; i < mdev->caps.num_ports; i++) 1293 priv->port[i + 1].tmp_type = 0; 1294 1295 err = mlx4_change_port_types(mdev, new_types); 1296 1297out: 1298 mlx4_start_sense(mdev); 1299 mutex_unlock(&priv->port_mutex); 1300 return err ? err : count; 1301} 1302 1303enum ibta_mtu { 1304 IB_MTU_256 = 1, 1305 IB_MTU_512 = 2, 1306 IB_MTU_1024 = 3, 1307 IB_MTU_2048 = 4, 1308 IB_MTU_4096 = 5 1309}; 1310 1311static inline int int_to_ibta_mtu(int mtu) 1312{ 1313 switch (mtu) { 1314 case 256: return IB_MTU_256; 1315 case 512: return IB_MTU_512; 1316 case 1024: return IB_MTU_1024; 1317 case 2048: return IB_MTU_2048; 1318 case 4096: return IB_MTU_4096; 1319 default: return -1; 1320 } 1321} 1322 1323static inline int ibta_mtu_to_int(enum ibta_mtu mtu) 1324{ 1325 switch (mtu) { 1326 case IB_MTU_256: return 256; 1327 case IB_MTU_512: return 512; 1328 case IB_MTU_1024: return 1024; 1329 case IB_MTU_2048: return 2048; 1330 case IB_MTU_4096: return 4096; 1331 default: return -1; 1332 } 1333} 1334 1335static ssize_t 1336show_board(struct device *device, struct device_attribute *attr, 1337 char *buf) 1338{ 1339 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1340 board_attr); 1341 struct mlx4_dev *mdev = info->dev; 1342 1343 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 1344 mdev->board_id); 1345} 1346 1347static ssize_t 1348show_hca(struct device *device, struct device_attribute *attr, 1349 char *buf) 1350{ 1351 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1352 hca_attr); 1353 struct mlx4_dev *mdev = info->dev; 1354 1355 return sprintf(buf, "MT%d\n", mdev->pdev->device); 1356} 1357 1358static ssize_t 1359show_firmware_version(struct device *dev, 1360 struct device_attribute *attr, 1361 char *buf) 1362{ 1363 struct mlx4_hca_info *info = container_of(attr, struct mlx4_hca_info, 1364 firmware_attr); 1365 struct mlx4_dev *mdev = info->dev; 1366 1367 return sprintf(buf, "%d.%d.%d\n", (int)(mdev->caps.fw_ver >> 32), 1368 (int)(mdev->caps.fw_ver >> 16) & 0xffff, 1369 (int)mdev->caps.fw_ver & 0xffff); 1370} 1371 1372static ssize_t show_port_ib_mtu(struct device *dev, 1373 struct device_attribute *attr, 1374 char *buf) 1375{ 1376 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1377 port_mtu_attr); 1378 struct mlx4_dev *mdev = info->dev; 1379 1380 /* When port type is eth, port mtu value isn't used. */ 1381 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) 1382 return -EINVAL; 1383 1384 sprintf(buf, "%d\n", 1385 ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); 1386 return strlen(buf); 1387} 1388 1389static ssize_t set_port_ib_mtu(struct device *dev, 1390 struct device_attribute *attr, 1391 const char *buf, size_t count) 1392{ 1393 struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, 1394 port_mtu_attr); 1395 struct mlx4_dev *mdev = info->dev; 1396 struct mlx4_priv *priv = mlx4_priv(mdev); 1397 int err, port, mtu, ibta_mtu = -1; 1398 1399 if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { 1400 mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); 1401 return -EINVAL; 1402 } 1403 1404 mtu = (int) simple_strtol(buf, NULL, 0); 1405 ibta_mtu = int_to_ibta_mtu(mtu); 1406 1407 if (ibta_mtu < 0) { 1408 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); 1409 return -EINVAL; 1410 } 1411 1412 mdev->caps.port_ib_mtu[info->port] = ibta_mtu; 1413 1414 mlx4_stop_sense(mdev); 1415 mutex_lock(&priv->port_mutex); 1416 mlx4_unregister_device(mdev); 1417 for (port = 1; port <= mdev->caps.num_ports; port++) { 1418 mlx4_CLOSE_PORT(mdev, port); 1419 err = mlx4_SET_PORT(mdev, port, -1); 1420 if (err) { 1421 mlx4_err(mdev, "Failed to set port %d, " 1422 "aborting\n", port); 1423 goto err_set_port; 1424 } 1425 } 1426 err = mlx4_register_device(mdev); 1427err_set_port: 1428 mutex_unlock(&priv->port_mutex); 1429 mlx4_start_sense(mdev); 1430 return err ? err : count; 1431} 1432 1433static int mlx4_load_fw(struct mlx4_dev *dev) 1434{ 1435 struct mlx4_priv *priv = mlx4_priv(dev); 1436 int err, unmap_flag = 0; 1437 1438 priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, 1439 GFP_HIGHUSER | __GFP_NOWARN, 0); 1440 if (!priv->fw.fw_icm) { 1441 mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); 1442 return -ENOMEM; 1443 } 1444 1445 err = mlx4_MAP_FA(dev, priv->fw.fw_icm); 1446 if (err) { 1447 mlx4_err(dev, "MAP_FA command failed, aborting.\n"); 1448 goto err_free; 1449 } 1450 1451 err = mlx4_RUN_FW(dev); 1452 if (err) { 1453 mlx4_err(dev, "RUN_FW command failed, aborting.\n"); 1454 goto err_unmap_fa; 1455 } 1456 1457 return 0; 1458 1459err_unmap_fa: 1460 unmap_flag = mlx4_UNMAP_FA(dev); 1461 if (unmap_flag) 1462 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1463 1464err_free: 1465 if (!unmap_flag) 1466 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 1467 return err; 1468} 1469 1470static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, 1471 int cmpt_entry_sz) 1472{ 1473 struct mlx4_priv *priv = mlx4_priv(dev); 1474 int err; 1475 int num_eqs; 1476 1477 err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, 1478 cmpt_base + 1479 ((u64) (MLX4_CMPT_TYPE_QP * 1480 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1481 cmpt_entry_sz, dev->caps.num_qps, 1482 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1483 0, 0); 1484 if (err) 1485 goto err; 1486 1487 err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, 1488 cmpt_base + 1489 ((u64) (MLX4_CMPT_TYPE_SRQ * 1490 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1491 cmpt_entry_sz, dev->caps.num_srqs, 1492 dev->caps.reserved_srqs, 0, 0); 1493 if (err) 1494 goto err_qp; 1495 1496 err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, 1497 cmpt_base + 1498 ((u64) (MLX4_CMPT_TYPE_CQ * 1499 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1500 cmpt_entry_sz, dev->caps.num_cqs, 1501 dev->caps.reserved_cqs, 0, 0); 1502 if (err) 1503 goto err_srq; 1504 1505 num_eqs = dev->phys_caps.num_phys_eqs; 1506 err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, 1507 cmpt_base + 1508 ((u64) (MLX4_CMPT_TYPE_EQ * 1509 cmpt_entry_sz) << MLX4_CMPT_SHIFT), 1510 cmpt_entry_sz, num_eqs, num_eqs, 0, 0); 1511 if (err) 1512 goto err_cq; 1513 1514 return 0; 1515 1516err_cq: 1517 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1518 1519err_srq: 1520 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1521 1522err_qp: 1523 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1524 1525err: 1526 return err; 1527} 1528 1529static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, 1530 struct mlx4_init_hca_param *init_hca, u64 icm_size) 1531{ 1532 struct mlx4_priv *priv = mlx4_priv(dev); 1533 u64 aux_pages; 1534 int num_eqs; 1535 int err, unmap_flag = 0; 1536 1537 err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); 1538 if (err) { 1539 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); 1540 return err; 1541 } 1542 1543 mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", 1544 (unsigned long long) icm_size >> 10, 1545 (unsigned long long) aux_pages << 2); 1546 1547 priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, 1548 GFP_HIGHUSER | __GFP_NOWARN, 0); 1549 if (!priv->fw.aux_icm) { 1550 mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); 1551 return -ENOMEM; 1552 } 1553 1554 err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); 1555 if (err) { 1556 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); 1557 goto err_free_aux; 1558 } 1559 1560 err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); 1561 if (err) { 1562 mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); 1563 goto err_unmap_aux; 1564 } 1565 1566 1567 num_eqs = dev->phys_caps.num_phys_eqs; 1568 err = mlx4_init_icm_table(dev, &priv->eq_table.table, 1569 init_hca->eqc_base, dev_cap->eqc_entry_sz, 1570 num_eqs, num_eqs, 0, 0); 1571 if (err) { 1572 mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); 1573 goto err_unmap_cmpt; 1574 } 1575 1576 /* 1577 * Reserved MTT entries must be aligned up to a cacheline 1578 * boundary, since the FW will write to them, while the driver 1579 * writes to all other MTT entries. (The variable 1580 * dev->caps.mtt_entry_sz below is really the MTT segment 1581 * size, not the raw entry size) 1582 */ 1583 dev->caps.reserved_mtts = 1584 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, 1585 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; 1586 1587 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 1588 init_hca->mtt_base, 1589 dev->caps.mtt_entry_sz, 1590 dev->caps.num_mtts, 1591 dev->caps.reserved_mtts, 1, 0); 1592 if (err) { 1593 mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); 1594 goto err_unmap_eq; 1595 } 1596 1597 err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, 1598 init_hca->dmpt_base, 1599 dev_cap->dmpt_entry_sz, 1600 dev->caps.num_mpts, 1601 dev->caps.reserved_mrws, 1, 1); 1602 if (err) { 1603 mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); 1604 goto err_unmap_mtt; 1605 } 1606 1607 err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, 1608 init_hca->qpc_base, 1609 dev_cap->qpc_entry_sz, 1610 dev->caps.num_qps, 1611 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1612 0, 0); 1613 if (err) { 1614 mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); 1615 goto err_unmap_dmpt; 1616 } 1617 1618 err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, 1619 init_hca->auxc_base, 1620 dev_cap->aux_entry_sz, 1621 dev->caps.num_qps, 1622 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1623 0, 0); 1624 if (err) { 1625 mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); 1626 goto err_unmap_qp; 1627 } 1628 1629 err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, 1630 init_hca->altc_base, 1631 dev_cap->altc_entry_sz, 1632 dev->caps.num_qps, 1633 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1634 0, 0); 1635 if (err) { 1636 mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); 1637 goto err_unmap_auxc; 1638 } 1639 1640 err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, 1641 init_hca->rdmarc_base, 1642 dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, 1643 dev->caps.num_qps, 1644 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 1645 0, 0); 1646 if (err) { 1647 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); 1648 goto err_unmap_altc; 1649 } 1650 1651 err = mlx4_init_icm_table(dev, &priv->cq_table.table, 1652 init_hca->cqc_base, 1653 dev_cap->cqc_entry_sz, 1654 dev->caps.num_cqs, 1655 dev->caps.reserved_cqs, 0, 0); 1656 if (err) { 1657 mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); 1658 goto err_unmap_rdmarc; 1659 } 1660 1661 err = mlx4_init_icm_table(dev, &priv->srq_table.table, 1662 init_hca->srqc_base, 1663 dev_cap->srq_entry_sz, 1664 dev->caps.num_srqs, 1665 dev->caps.reserved_srqs, 0, 0); 1666 if (err) { 1667 mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); 1668 goto err_unmap_cq; 1669 } 1670 1671 /* 1672 * For flow steering device managed mode it is required to use 1673 * mlx4_init_icm_table. For B0 steering mode it's not strictly 1674 * required, but for simplicity just map the whole multicast 1675 * group table now. The table isn't very big and it's a lot 1676 * easier than trying to track ref counts. 1677 */ 1678 err = mlx4_init_icm_table(dev, &priv->mcg_table.table, 1679 init_hca->mc_base, 1680 mlx4_get_mgm_entry_size(dev), 1681 dev->caps.num_mgms + dev->caps.num_amgms, 1682 dev->caps.num_mgms + dev->caps.num_amgms, 1683 0, 0); 1684 if (err) { 1685 mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); 1686 goto err_unmap_srq; 1687 } 1688 1689 return 0; 1690 1691err_unmap_srq: 1692 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1693 1694err_unmap_cq: 1695 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1696 1697err_unmap_rdmarc: 1698 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1699 1700err_unmap_altc: 1701 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1702 1703err_unmap_auxc: 1704 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1705 1706err_unmap_qp: 1707 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1708 1709err_unmap_dmpt: 1710 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1711 1712err_unmap_mtt: 1713 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1714 1715err_unmap_eq: 1716 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1717 1718err_unmap_cmpt: 1719 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1720 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1721 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1722 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1723 1724err_unmap_aux: 1725 unmap_flag = mlx4_UNMAP_ICM_AUX(dev); 1726 if (unmap_flag) 1727 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1728 1729err_free_aux: 1730 if (!unmap_flag) 1731 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1732 1733 return err; 1734} 1735 1736static void mlx4_free_icms(struct mlx4_dev *dev) 1737{ 1738 struct mlx4_priv *priv = mlx4_priv(dev); 1739 1740 mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); 1741 mlx4_cleanup_icm_table(dev, &priv->srq_table.table); 1742 mlx4_cleanup_icm_table(dev, &priv->cq_table.table); 1743 mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); 1744 mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); 1745 mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); 1746 mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); 1747 mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); 1748 mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); 1749 mlx4_cleanup_icm_table(dev, &priv->eq_table.table); 1750 mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); 1751 mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); 1752 mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); 1753 mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); 1754 1755 if (!mlx4_UNMAP_ICM_AUX(dev)) 1756 mlx4_free_icm(dev, priv->fw.aux_icm, 0); 1757 else 1758 pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); 1759} 1760 1761static void mlx4_slave_exit(struct mlx4_dev *dev) 1762{ 1763 struct mlx4_priv *priv = mlx4_priv(dev); 1764 1765 mutex_lock(&priv->cmd.slave_cmd_mutex); 1766 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) 1767 mlx4_warn(dev, "Failed to close slave function.\n"); 1768 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1769} 1770 1771static int map_bf_area(struct mlx4_dev *dev) 1772{ 1773 struct mlx4_priv *priv = mlx4_priv(dev); 1774 resource_size_t bf_start; 1775 resource_size_t bf_len; 1776 int err = 0; 1777 1778 if (!dev->caps.bf_reg_size) 1779 return -ENXIO; 1780 1781 bf_start = pci_resource_start(dev->pdev, 2) + 1782 (dev->caps.num_uars << PAGE_SHIFT); 1783 bf_len = pci_resource_len(dev->pdev, 2) - 1784 (dev->caps.num_uars << PAGE_SHIFT); 1785 priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); 1786 if (!priv->bf_mapping) 1787 err = -ENOMEM; 1788 1789 return err; 1790} 1791 1792static void unmap_bf_area(struct mlx4_dev *dev) 1793{ 1794 if (mlx4_priv(dev)->bf_mapping) 1795 io_mapping_free(mlx4_priv(dev)->bf_mapping); 1796} 1797 1798int mlx4_read_clock(struct mlx4_dev *dev) 1799{ 1800 u32 clockhi, clocklo, clockhi1; 1801 cycle_t cycles; 1802 int i; 1803 struct mlx4_priv *priv = mlx4_priv(dev); 1804 1805 if (!priv->clock_mapping) 1806 return -ENOTSUPP; 1807 1808 for (i = 0; i < 10; i++) { 1809 clockhi = swab32(readl(priv->clock_mapping)); 1810 clocklo = swab32(readl(priv->clock_mapping + 4)); 1811 clockhi1 = swab32(readl(priv->clock_mapping)); 1812 if (clockhi == clockhi1) 1813 break; 1814 } 1815 1816 cycles = (u64) clockhi << 32 | (u64) clocklo; 1817 1818 return cycles; 1819} 1820EXPORT_SYMBOL_GPL(mlx4_read_clock); 1821 1822 1823static int map_internal_clock(struct mlx4_dev *dev) 1824{ 1825 struct mlx4_priv *priv = mlx4_priv(dev); 1826 1827 priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, 1828 priv->fw.clock_bar) + 1829 priv->fw.clock_offset, MLX4_CLOCK_SIZE); 1830 1831 if (!priv->clock_mapping) 1832 return -ENOMEM; 1833 1834 return 0; 1835} 1836 1837 1838int mlx4_get_internal_clock_params(struct mlx4_dev *dev, 1839 struct mlx4_clock_params *params) 1840{ 1841 struct mlx4_priv *priv = mlx4_priv(dev); 1842 1843 if (mlx4_is_slave(dev)) 1844 return -ENOTSUPP; 1845 if (!params) 1846 return -EINVAL; 1847 1848 params->bar = priv->fw.clock_bar; 1849 params->offset = priv->fw.clock_offset; 1850 params->size = MLX4_CLOCK_SIZE; 1851 1852 return 0; 1853} 1854EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); 1855 1856static void unmap_internal_clock(struct mlx4_dev *dev) 1857{ 1858 struct mlx4_priv *priv = mlx4_priv(dev); 1859 1860 if (priv->clock_mapping) 1861 iounmap(priv->clock_mapping); 1862} 1863 1864static void mlx4_close_hca(struct mlx4_dev *dev) 1865{ 1866 unmap_internal_clock(dev); 1867 unmap_bf_area(dev); 1868 if (mlx4_is_slave(dev)) { 1869 mlx4_slave_exit(dev); 1870 } else { 1871 mlx4_CLOSE_HCA(dev, 0); 1872 mlx4_free_icms(dev); 1873 1874 if (!mlx4_UNMAP_FA(dev)) 1875 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); 1876 else 1877 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 1878 } 1879} 1880 1881static int mlx4_init_slave(struct mlx4_dev *dev) 1882{ 1883 struct mlx4_priv *priv = mlx4_priv(dev); 1884 u64 dma = (u64) priv->mfunc.vhcr_dma; 1885 int num_of_reset_retries = NUM_OF_RESET_RETRIES; 1886 int ret_from_reset = 0; 1887 u32 slave_read; 1888 u32 cmd_channel_ver; 1889 1890 mutex_lock(&priv->cmd.slave_cmd_mutex); 1891 priv->cmd.max_cmds = 1; 1892 mlx4_warn(dev, "Sending reset\n"); 1893 ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 1894 MLX4_COMM_TIME); 1895 /* if we are in the middle of flr the slave will try 1896 * NUM_OF_RESET_RETRIES times before leaving.*/ 1897 if (ret_from_reset) { 1898 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { 1899 msleep(SLEEP_TIME_IN_RESET); 1900 while (ret_from_reset && num_of_reset_retries) { 1901 mlx4_warn(dev, "slave is currently in the" 1902 "middle of FLR. retrying..." 1903 "(try num:%d)\n", 1904 (NUM_OF_RESET_RETRIES - 1905 num_of_reset_retries + 1)); 1906 ret_from_reset = 1907 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 1908 0, MLX4_COMM_TIME); 1909 num_of_reset_retries = num_of_reset_retries - 1; 1910 } 1911 } else 1912 goto err; 1913 } 1914 1915 /* check the driver version - the slave I/F revision 1916 * must match the master's */ 1917 slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); 1918 cmd_channel_ver = mlx4_comm_get_version(); 1919 1920 if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != 1921 MLX4_COMM_GET_IF_REV(slave_read)) { 1922 mlx4_err(dev, "slave driver version is not supported" 1923 " by the master\n"); 1924 goto err; 1925 } 1926 1927 mlx4_warn(dev, "Sending vhcr0\n"); 1928 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, 1929 MLX4_COMM_TIME)) 1930 goto err; 1931 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, 1932 MLX4_COMM_TIME)) 1933 goto err; 1934 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, 1935 MLX4_COMM_TIME)) 1936 goto err; 1937 if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) 1938 goto err; 1939 1940 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1941 return 0; 1942 1943err: 1944 mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); 1945 mutex_unlock(&priv->cmd.slave_cmd_mutex); 1946 return -EIO; 1947} 1948 1949static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) 1950{ 1951 int i; 1952 1953 for (i = 1; i <= dev->caps.num_ports; i++) { 1954 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) 1955 dev->caps.gid_table_len[i] = 1956 mlx4_get_slave_num_gids(dev, 0); 1957 else 1958 dev->caps.gid_table_len[i] = 1; 1959 dev->caps.pkey_table_len[i] = 1960 dev->phys_caps.pkey_phys_table_len[i] - 1; 1961 } 1962} 1963 1964static int choose_log_fs_mgm_entry_size(int qp_per_entry) 1965{ 1966 int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; 1967 1968 for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; 1969 i++) { 1970 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) 1971 break; 1972 } 1973 1974 return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; 1975} 1976 1977static void choose_steering_mode(struct mlx4_dev *dev, 1978 struct mlx4_dev_cap *dev_cap) 1979{ 1980 int nvfs; 1981 1982 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); 1983 if (high_rate_steer && !mlx4_is_mfunc(dev)) { 1984 dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | 1985 MLX4_DEV_CAP_FLAG_VEP_UC_STEER); 1986 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; 1987 } 1988 1989 if (mlx4_log_num_mgm_entry_size == -1 && 1990 dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && 1991 (!mlx4_is_mfunc(dev) || 1992 (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && 1993 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= 1994 MLX4_MIN_MGM_LOG_ENTRY_SIZE) { 1995 dev->oper_log_mgm_entry_size = 1996 choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); 1997 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; 1998 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; 1999 } else { 2000 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && 2001 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 2002 dev->caps.steering_mode = MLX4_STEERING_MODE_B0; 2003 else { 2004 dev->caps.steering_mode = MLX4_STEERING_MODE_A0; 2005 2006 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || 2007 dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) 2008 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " 2009 "set to use B0 steering. Falling back to A0 steering mode.\n"); 2010 } 2011 dev->oper_log_mgm_entry_size = 2012 mlx4_log_num_mgm_entry_size > 0 ? 2013 mlx4_log_num_mgm_entry_size : 2014 MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; 2015 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); 2016 } 2017 mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " 2018 "log_num_mgm_entry_size = %d\n", 2019 mlx4_steering_mode_str(dev->caps.steering_mode), 2020 dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); 2021} 2022 2023static int mlx4_init_hca(struct mlx4_dev *dev) 2024{ 2025 struct mlx4_priv *priv = mlx4_priv(dev); 2026 struct mlx4_dev_cap *dev_cap = NULL; 2027 struct mlx4_adapter adapter; 2028 struct mlx4_mod_stat_cfg mlx4_cfg; 2029 struct mlx4_profile profile; 2030 struct mlx4_init_hca_param init_hca; 2031 u64 icm_size; 2032 int err; 2033 2034 if (!mlx4_is_slave(dev)) { 2035 err = mlx4_QUERY_FW(dev); 2036 if (err) { 2037 if (err == -EACCES) 2038 mlx4_info(dev, "non-primary physical function, skipping.\n"); 2039 else 2040 mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); 2041 return err; 2042 } 2043 2044 err = mlx4_load_fw(dev); 2045 if (err) { 2046 mlx4_err(dev, "Failed to start FW, aborting.\n"); 2047 return err; 2048 } 2049 2050 mlx4_cfg.log_pg_sz_m = 1; 2051 mlx4_cfg.log_pg_sz = 0; 2052 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); 2053 if (err) 2054 mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); 2055 2056 dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); 2057 if (!dev_cap) { 2058 mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); 2059 err = -ENOMEM; 2060 goto err_stop_fw; 2061 } 2062 2063 err = mlx4_dev_cap(dev, dev_cap); 2064 if (err) { 2065 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); 2066 goto err_stop_fw; 2067 } 2068 2069 choose_steering_mode(dev, dev_cap); 2070 2071 if (mlx4_is_master(dev)) 2072 mlx4_parav_master_pf_caps(dev); 2073 2074 process_mod_param_profile(&profile); 2075 if (dev->caps.steering_mode == 2076 MLX4_STEERING_MODE_DEVICE_MANAGED) 2077 profile.num_mcg = MLX4_FS_NUM_MCG; 2078 2079 icm_size = mlx4_make_profile(dev, &profile, dev_cap, 2080 &init_hca); 2081 if ((long long) icm_size < 0) { 2082 err = icm_size; 2083 goto err_stop_fw; 2084 } 2085 2086 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; 2087 2088 init_hca.log_uar_sz = ilog2(dev->caps.num_uars); 2089 init_hca.uar_page_sz = PAGE_SHIFT - 12; 2090 2091 err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); 2092 if (err) 2093 goto err_stop_fw; 2094 2095 init_hca.mw_enable = 1; 2096 2097 err = mlx4_INIT_HCA(dev, &init_hca); 2098 if (err) { 2099 mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); 2100 goto err_free_icm; 2101 } 2102 2103 if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { 2104 err = mlx4_query_func(dev, dev_cap); 2105 if (err < 0) { 2106 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); 2107 goto err_stop_fw; 2108 } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { 2109 dev->caps.num_eqs = dev_cap->max_eqs; 2110 dev->caps.reserved_eqs = dev_cap->reserved_eqs; 2111 dev->caps.reserved_uars = dev_cap->reserved_uars; 2112 } 2113 } 2114 2115 /* 2116 * Read HCA frequency by QUERY_HCA command 2117 */ 2118 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { 2119 memset(&init_hca, 0, sizeof(init_hca)); 2120 err = mlx4_QUERY_HCA(dev, &init_hca); 2121 if (err) { 2122 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); 2123 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2124 } else { 2125 dev->caps.hca_core_clock = 2126 init_hca.hca_core_clock; 2127 } 2128 2129 /* In case we got HCA frequency 0 - disable timestamping 2130 * to avoid dividing by zero 2131 */ 2132 if (!dev->caps.hca_core_clock) { 2133 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2134 mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); 2135 } else if (map_internal_clock(dev)) { 2136 /* Map internal clock, 2137 * in case of failure disable timestamping 2138 */ 2139 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; 2140 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); 2141 } 2142 } 2143 } else { 2144 err = mlx4_init_slave(dev); 2145 if (err) { 2146 mlx4_err(dev, "Failed to initialize slave\n"); 2147 return err; 2148 } 2149 2150 err = mlx4_slave_cap(dev); 2151 if (err) { 2152 mlx4_err(dev, "Failed to obtain slave caps\n"); 2153 goto err_close; 2154 } 2155 } 2156 2157 if (map_bf_area(dev)) 2158 mlx4_dbg(dev, "Failed to map blue flame area\n"); 2159 2160 /* Only the master set the ports, all the rest got it from it.*/ 2161 if (!mlx4_is_slave(dev)) 2162 mlx4_set_port_mask(dev); 2163 2164 err = mlx4_QUERY_ADAPTER(dev, &adapter); 2165 if (err) { 2166 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); 2167 goto unmap_bf; 2168 } 2169 2170 priv->eq_table.inta_pin = adapter.inta_pin; 2171 memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); 2172 memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); 2173 dev->vsd_vendor_id = adapter.vsd_vendor_id; 2174 2175 if (!mlx4_is_slave(dev)) 2176 kfree(dev_cap); 2177 2178 return 0; 2179 2180unmap_bf: 2181 if (!mlx4_is_slave(dev)) 2182 unmap_internal_clock(dev); 2183 unmap_bf_area(dev); 2184 2185 if (mlx4_is_slave(dev)) { 2186 kfree(dev->caps.qp0_tunnel); 2187 kfree(dev->caps.qp0_proxy); 2188 kfree(dev->caps.qp1_tunnel); 2189 kfree(dev->caps.qp1_proxy); 2190 } 2191 2192err_close: 2193 if (mlx4_is_slave(dev)) 2194 mlx4_slave_exit(dev); 2195 else 2196 mlx4_CLOSE_HCA(dev, 0); 2197 2198err_free_icm: 2199 if (!mlx4_is_slave(dev)) 2200 mlx4_free_icms(dev); 2201 2202err_stop_fw: 2203 if (!mlx4_is_slave(dev)) { 2204 if (!mlx4_UNMAP_FA(dev)) 2205 mlx4_free_icm(dev, priv->fw.fw_icm, 0); 2206 else 2207 pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); 2208 kfree(dev_cap); 2209 } 2210 return err; 2211} 2212 2213static int mlx4_init_counters_table(struct mlx4_dev *dev) 2214{ 2215 struct mlx4_priv *priv = mlx4_priv(dev); 2216 int nent_pow2, port_indx, vf_index, num_counters; 2217 int res, index = 0; 2218 struct counter_index *new_counter_index; 2219 2220 2221 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2222 return -ENOENT; 2223 2224 if (!mlx4_is_slave(dev) && 2225 dev->caps.max_counters == dev->caps.max_extended_counters) { 2226 res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, 2227 MLX4_CMD_SET_IF_STAT, 2228 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 2229 if (res) { 2230 mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); 2231 return res; 2232 } 2233 } 2234 2235 mutex_init(&priv->counters_table.mutex); 2236 2237 if (mlx4_is_slave(dev)) { 2238 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2239 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2240 if (dev->caps.def_counter_index[port_indx] != 0xFF) { 2241 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2242 if (!new_counter_index) 2243 return -ENOMEM; 2244 new_counter_index->index = dev->caps.def_counter_index[port_indx]; 2245 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); 2246 } 2247 } 2248 mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", 2249 __func__, dev->caps.num_ports, dev->caps.num_ports); 2250 return 0; 2251 } 2252 2253 nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); 2254 2255 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2256 INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); 2257 /* allocating 2 counters per port for PFs */ 2258 /* For the PF, the ETH default counters are 0,2; */ 2259 /* and the RoCE default counters are 1,3 */ 2260 for (num_counters = 0; num_counters < 2; num_counters++, index++) { 2261 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2262 if (!new_counter_index) 2263 return -ENOMEM; 2264 new_counter_index->index = index; 2265 list_add_tail(&new_counter_index->list, 2266 &priv->counters_table.global_port_list[port_indx]); 2267 } 2268 } 2269 2270 if (mlx4_is_master(dev)) { 2271 for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { 2272 for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { 2273 INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); 2274 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2275 if (!new_counter_index) 2276 return -ENOMEM; 2277 if (index < nent_pow2 - 2) { 2278 new_counter_index->index = index; 2279 index++; 2280 } else { 2281 new_counter_index->index = MLX4_SINK_COUNTER_INDEX; 2282 } 2283 2284 list_add_tail(&new_counter_index->list, 2285 &priv->counters_table.vf_list[vf_index][port_indx]); 2286 } 2287 } 2288 2289 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2290 nent_pow2, nent_pow2 - 1, 2291 index, 1); 2292 mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", 2293 __func__, index, dev->num_vfs); 2294 } else { 2295 res = mlx4_bitmap_init(&priv->counters_table.bitmap, 2296 nent_pow2, nent_pow2 - 1, 2297 index, 1); 2298 mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", 2299 __func__, index, dev->caps.num_ports); 2300 } 2301 2302 return 0; 2303 2304} 2305 2306static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) 2307{ 2308 struct mlx4_priv *priv = mlx4_priv(dev); 2309 int i, j; 2310 struct counter_index *port, *tmp_port; 2311 struct counter_index *vf, *tmp_vf; 2312 2313 mutex_lock(&priv->counters_table.mutex); 2314 2315 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { 2316 for (i = 0; i < dev->caps.num_ports; i++) { 2317 list_for_each_entry_safe(port, tmp_port, 2318 &priv->counters_table.global_port_list[i], 2319 list) { 2320 list_del(&port->list); 2321 kfree(port); 2322 } 2323 } 2324 if (!mlx4_is_slave(dev)) { 2325 for (i = 0; i < dev->num_vfs; i++) { 2326 for (j = 0; j < dev->caps.num_ports; j++) { 2327 list_for_each_entry_safe(vf, tmp_vf, 2328 &priv->counters_table.vf_list[i][j], 2329 list) { 2330 /* clear the counter statistic */ 2331 if (__mlx4_clear_if_stat(dev, vf->index)) 2332 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2333 __func__, vf->index); 2334 list_del(&vf->list); 2335 kfree(vf); 2336 } 2337 } 2338 } 2339 mlx4_bitmap_cleanup(&priv->counters_table.bitmap); 2340 } 2341 } 2342 mutex_unlock(&priv->counters_table.mutex); 2343} 2344 2345int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) 2346{ 2347 struct mlx4_priv *priv = mlx4_priv(dev); 2348 int i, first; 2349 struct counter_index *vf, *tmp_vf; 2350 2351 /* clean VF's counters for the next useg */ 2352 if (slave > 0 && slave <= dev->num_vfs) { 2353 mlx4_dbg(dev, "%s: free counters of slave(%d)\n" 2354 , __func__, slave); 2355 2356 mutex_lock(&priv->counters_table.mutex); 2357 for (i = 0; i < dev->caps.num_ports; i++) { 2358 first = 0; 2359 list_for_each_entry_safe(vf, tmp_vf, 2360 &priv->counters_table.vf_list[slave - 1][i], 2361 list) { 2362 /* clear the counter statistic */ 2363 if (__mlx4_clear_if_stat(dev, vf->index)) 2364 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2365 __func__, vf->index); 2366 if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { 2367 mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" 2368 , __func__, vf->index, slave, i + 1); 2369 mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); 2370 list_del(&vf->list); 2371 kfree(vf); 2372 } else { 2373 mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" 2374 , __func__, vf->index, slave, i + 1); 2375 } 2376 } 2377 } 2378 mutex_unlock(&priv->counters_table.mutex); 2379 } 2380 2381 return 0; 2382} 2383 2384int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) 2385{ 2386 struct mlx4_priv *priv = mlx4_priv(dev); 2387 struct counter_index *new_counter_index; 2388 2389 if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) 2390 return -ENOENT; 2391 2392 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2393 (port < 0) || (port > MLX4_MAX_PORTS)) { 2394 mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", 2395 __func__, slave, port); 2396 return -EINVAL; 2397 } 2398 2399 /* handle old guest request does not support request by port index */ 2400 if (port == 0) { 2401 *idx = MLX4_SINK_COUNTER_INDEX; 2402 mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" 2403 , __func__, *idx, slave, port); 2404 return 0; 2405 } 2406 2407 mutex_lock(&priv->counters_table.mutex); 2408 2409 *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); 2410 /* if no resources return the default counter of the slave and port */ 2411 if (*idx == -1) { 2412 if (slave == 0) { /* its the ethernet counter ?????? */ 2413 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2414 struct counter_index, 2415 list); 2416 } else { 2417 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2418 struct counter_index, 2419 list); 2420 } 2421 2422 *idx = new_counter_index->index; 2423 mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" 2424 , __func__, *idx, slave, port); 2425 goto out; 2426 } 2427 2428 if (slave == 0) { /* native or master */ 2429 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2430 if (!new_counter_index) 2431 goto no_mem; 2432 new_counter_index->index = *idx; 2433 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2434 } else { 2435 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2436 if (!new_counter_index) 2437 goto no_mem; 2438 new_counter_index->index = *idx; 2439 list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); 2440 } 2441 2442 mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" 2443 , __func__, *idx, slave, port); 2444out: 2445 mutex_unlock(&priv->counters_table.mutex); 2446 return 0; 2447 2448no_mem: 2449 mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); 2450 mutex_unlock(&priv->counters_table.mutex); 2451 *idx = MLX4_SINK_COUNTER_INDEX; 2452 mlx4_dbg(dev, "%s: failed err (%d)\n" 2453 , __func__, -ENOMEM); 2454 return -ENOMEM; 2455} 2456 2457int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) 2458{ 2459 u64 out_param; 2460 int err; 2461 struct mlx4_priv *priv = mlx4_priv(dev); 2462 struct counter_index *new_counter_index, *c_index; 2463 2464 if (mlx4_is_mfunc(dev)) { 2465 err = mlx4_cmd_imm(dev, 0, &out_param, 2466 ((u32) port) << 8 | (u32) RES_COUNTER, 2467 RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, 2468 MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 2469 if (!err) { 2470 *idx = get_param_l(&out_param); 2471 if (*idx == MLX4_SINK_COUNTER_INDEX) 2472 return -ENOSPC; 2473 2474 mutex_lock(&priv->counters_table.mutex); 2475 c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2476 struct counter_index, 2477 list); 2478 mutex_unlock(&priv->counters_table.mutex); 2479 if (c_index->index == *idx) 2480 return -EEXIST; 2481 2482 if (mlx4_is_slave(dev)) { 2483 new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); 2484 if (!new_counter_index) { 2485 mlx4_counter_free(dev, port, *idx); 2486 return -ENOMEM; 2487 } 2488 new_counter_index->index = *idx; 2489 mutex_lock(&priv->counters_table.mutex); 2490 list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); 2491 mutex_unlock(&priv->counters_table.mutex); 2492 mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" 2493 , __func__, *idx, port); 2494 } 2495 } 2496 return err; 2497 } 2498 return __mlx4_counter_alloc(dev, 0, port, idx); 2499} 2500EXPORT_SYMBOL_GPL(mlx4_counter_alloc); 2501 2502void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) 2503{ 2504 /* check if native or slave and deletes acordingly */ 2505 struct mlx4_priv *priv = mlx4_priv(dev); 2506 struct counter_index *pf, *tmp_pf; 2507 struct counter_index *vf, *tmp_vf; 2508 int first; 2509 2510 2511 if (idx == MLX4_SINK_COUNTER_INDEX) { 2512 mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" 2513 , __func__, idx, port); 2514 return; 2515 } 2516 2517 if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || 2518 (port < 0) || (port > MLX4_MAX_PORTS)) { 2519 mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" 2520 , __func__, slave, idx); 2521 return; 2522 } 2523 2524 mutex_lock(&priv->counters_table.mutex); 2525 if (slave == 0) { 2526 first = 0; 2527 list_for_each_entry_safe(pf, tmp_pf, 2528 &priv->counters_table.global_port_list[port - 1], 2529 list) { 2530 /* the first 2 counters are reserved */ 2531 if (pf->index == idx) { 2532 /* clear the counter statistic */ 2533 if (__mlx4_clear_if_stat(dev, pf->index)) 2534 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2535 __func__, pf->index); 2536 if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { 2537 list_del(&pf->list); 2538 kfree(pf); 2539 mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" 2540 , __func__, idx, slave, port); 2541 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2542 goto out; 2543 } else { 2544 mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" 2545 , __func__, idx, slave, port); 2546 goto out; 2547 } 2548 } 2549 first++; 2550 } 2551 mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" 2552 , __func__, idx, slave, port); 2553 } else { 2554 first = 0; 2555 list_for_each_entry_safe(vf, tmp_vf, 2556 &priv->counters_table.vf_list[slave - 1][port - 1], 2557 list) { 2558 /* the first element is reserved */ 2559 if (vf->index == idx) { 2560 /* clear the counter statistic */ 2561 if (__mlx4_clear_if_stat(dev, vf->index)) 2562 mlx4_dbg(dev, "%s: reset counter %d failed\n", 2563 __func__, vf->index); 2564 if (first) { 2565 list_del(&vf->list); 2566 kfree(vf); 2567 mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", 2568 __func__, idx, slave, port); 2569 mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); 2570 goto out; 2571 } else { 2572 mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" 2573 , __func__, slave, idx, port); 2574 goto out; 2575 } 2576 } 2577 first++; 2578 } 2579 mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" 2580 , __func__, slave, idx, port); 2581 } 2582 2583out: 2584 mutex_unlock(&priv->counters_table.mutex); 2585} 2586 2587void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) 2588{ 2589 u64 in_param = 0; 2590 struct mlx4_priv *priv = mlx4_priv(dev); 2591 struct counter_index *counter, *tmp_counter; 2592 int first = 0; 2593 2594 if (mlx4_is_mfunc(dev)) { 2595 set_param_l(&in_param, idx); 2596 mlx4_cmd(dev, in_param, 2597 ((u32) port) << 8 | (u32) RES_COUNTER, 2598 RES_OP_RESERVE, 2599 MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, 2600 MLX4_CMD_WRAPPED); 2601 2602 if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { 2603 mutex_lock(&priv->counters_table.mutex); 2604 list_for_each_entry_safe(counter, tmp_counter, 2605 &priv->counters_table.global_port_list[port - 1], 2606 list) { 2607 if (counter->index == idx && first++) { 2608 list_del(&counter->list); 2609 kfree(counter); 2610 mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" 2611 , __func__, idx, port); 2612 mutex_unlock(&priv->counters_table.mutex); 2613 return; 2614 } 2615 } 2616 mutex_unlock(&priv->counters_table.mutex); 2617 } 2618 2619 return; 2620 } 2621 __mlx4_counter_free(dev, 0, port, idx); 2622} 2623EXPORT_SYMBOL_GPL(mlx4_counter_free); 2624 2625int __mlx4_clear_if_stat(struct mlx4_dev *dev, 2626 u8 counter_index) 2627{ 2628 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2629 int err = 0; 2630 u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); 2631 2632 if (counter_index == MLX4_SINK_COUNTER_INDEX) 2633 return -EINVAL; 2634 2635 if (mlx4_is_slave(dev)) 2636 return 0; 2637 2638 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2639 if (IS_ERR(if_stat_mailbox)) { 2640 err = PTR_ERR(if_stat_mailbox); 2641 return err; 2642 } 2643 2644 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, 2645 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 2646 MLX4_CMD_NATIVE); 2647 2648 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2649 return err; 2650} 2651 2652u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) 2653{ 2654 struct mlx4_priv *priv = mlx4_priv(dev); 2655 struct counter_index *new_counter_index; 2656 2657 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { 2658 mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", 2659 __func__, MLX4_SINK_COUNTER_INDEX, slave, port); 2660 return (u8)MLX4_SINK_COUNTER_INDEX; 2661 } 2662 2663 mutex_lock(&priv->counters_table.mutex); 2664 if (slave == 0) { 2665 new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, 2666 struct counter_index, 2667 list); 2668 } else { 2669 new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, 2670 struct counter_index, 2671 list); 2672 } 2673 mutex_unlock(&priv->counters_table.mutex); 2674 2675 mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", 2676 __func__, new_counter_index->index, slave, port); 2677 2678 2679 return (u8)new_counter_index->index; 2680} 2681 2682int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, 2683 struct mlx4_en_vport_stats *vport_stats, 2684 int reset) 2685{ 2686 struct mlx4_priv *priv = mlx4_priv(dev); 2687 struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; 2688 union mlx4_counter *counter; 2689 int err = 0; 2690 u32 if_stat_in_mod; 2691 struct counter_index *vport, *tmp_vport; 2692 2693 if (!vport_stats) 2694 return -EINVAL; 2695 2696 if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); 2697 if (IS_ERR(if_stat_mailbox)) { 2698 err = PTR_ERR(if_stat_mailbox); 2699 return err; 2700 } 2701 2702 mutex_lock(&priv->counters_table.mutex); 2703 list_for_each_entry_safe(vport, tmp_vport, 2704 &priv->counters_table.global_port_list[port - 1], 2705 list) { 2706 if (vport->index == MLX4_SINK_COUNTER_INDEX) 2707 continue; 2708 2709 memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); 2710 if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); 2711 err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, 2712 if_stat_in_mod, 0, 2713 MLX4_CMD_QUERY_IF_STAT, 2714 MLX4_CMD_TIME_CLASS_C, 2715 MLX4_CMD_NATIVE); 2716 if (err) { 2717 mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", 2718 __func__, vport->index); 2719 goto if_stat_out; 2720 } 2721 counter = (union mlx4_counter *)if_stat_mailbox->buf; 2722 if ((counter->control.cnt_mode & 0xf) == 1) { 2723 vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); 2724 vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); 2725 vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); 2726 vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); 2727 vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); 2728 vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); 2729 vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); 2730 vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); 2731 vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); 2732 vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); 2733 vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); 2734 vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); 2735 vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); 2736 vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); 2737 vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); 2738 } 2739 } 2740 2741if_stat_out: 2742 mutex_unlock(&priv->counters_table.mutex); 2743 mlx4_free_cmd_mailbox(dev, if_stat_mailbox); 2744 2745 return err; 2746} 2747EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); 2748 2749static int mlx4_setup_hca(struct mlx4_dev *dev) 2750{ 2751 struct mlx4_priv *priv = mlx4_priv(dev); 2752 int err; 2753 int port; 2754 __be32 ib_port_default_caps; 2755 2756 err = mlx4_init_uar_table(dev); 2757 if (err) { 2758 mlx4_err(dev, "Failed to initialize " 2759 "user access region table (err=%d), aborting.\n", 2760 err); 2761 return err; 2762 } 2763 2764 err = mlx4_uar_alloc(dev, &priv->driver_uar); 2765 if (err) { 2766 mlx4_err(dev, "Failed to allocate driver access region " 2767 "(err=%d), aborting.\n", err); 2768 goto err_uar_table_free; 2769 } 2770 2771 priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); 2772 if (!priv->kar) { 2773 mlx4_err(dev, "Couldn't map kernel access region, " 2774 "aborting.\n"); 2775 err = -ENOMEM; 2776 goto err_uar_free; 2777 } 2778 2779 err = mlx4_init_pd_table(dev); 2780 if (err) { 2781 mlx4_err(dev, "Failed to initialize " 2782 "protection domain table (err=%d), aborting.\n", err); 2783 goto err_kar_unmap; 2784 } 2785 2786 err = mlx4_init_xrcd_table(dev); 2787 if (err) { 2788 mlx4_err(dev, "Failed to initialize " 2789 "reliable connection domain table (err=%d), " 2790 "aborting.\n", err); 2791 goto err_pd_table_free; 2792 } 2793 2794 err = mlx4_init_mr_table(dev); 2795 if (err) { 2796 mlx4_err(dev, "Failed to initialize " 2797 "memory region table (err=%d), aborting.\n", err); 2798 goto err_xrcd_table_free; 2799 } 2800 2801 if (!mlx4_is_slave(dev)) { 2802 err = mlx4_init_mcg_table(dev); 2803 if (err) { 2804 mlx4_err(dev, "Failed to initialize " 2805 "multicast group table (err=%d), aborting.\n", 2806 err); 2807 goto err_mr_table_free; 2808 } 2809 } 2810 2811 err = mlx4_init_eq_table(dev); 2812 if (err) { 2813 mlx4_err(dev, "Failed to initialize " 2814 "event queue table (err=%d), aborting.\n", err); 2815 goto err_mcg_table_free; 2816 } 2817 2818 err = mlx4_cmd_use_events(dev); 2819 if (err) { 2820 mlx4_err(dev, "Failed to switch to event-driven " 2821 "firmware commands (err=%d), aborting.\n", err); 2822 goto err_eq_table_free; 2823 } 2824 2825 err = mlx4_NOP(dev); 2826 if (err) { 2827 if (dev->flags & MLX4_FLAG_MSI_X) { 2828 mlx4_warn(dev, "NOP command failed to generate MSI-X " 2829 "interrupt IRQ %d).\n", 2830 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2831 mlx4_warn(dev, "Trying again without MSI-X.\n"); 2832 } else { 2833 mlx4_err(dev, "NOP command failed to generate interrupt " 2834 "(IRQ %d), aborting.\n", 2835 priv->eq_table.eq[dev->caps.num_comp_vectors].irq); 2836 mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); 2837 } 2838 2839 goto err_cmd_poll; 2840 } 2841 2842 mlx4_dbg(dev, "NOP command IRQ test passed\n"); 2843 2844 err = mlx4_init_cq_table(dev); 2845 if (err) { 2846 mlx4_err(dev, "Failed to initialize " 2847 "completion queue table (err=%d), aborting.\n", err); 2848 goto err_cmd_poll; 2849 } 2850 2851 err = mlx4_init_srq_table(dev); 2852 if (err) { 2853 mlx4_err(dev, "Failed to initialize " 2854 "shared receive queue table (err=%d), aborting.\n", 2855 err); 2856 goto err_cq_table_free; 2857 } 2858 2859 err = mlx4_init_qp_table(dev); 2860 if (err) { 2861 mlx4_err(dev, "Failed to initialize " 2862 "queue pair table (err=%d), aborting.\n", err); 2863 goto err_srq_table_free; 2864 } 2865 2866 err = mlx4_init_counters_table(dev); 2867 if (err && err != -ENOENT) { 2868 mlx4_err(dev, "Failed to initialize counters table (err=%d), " 2869 "aborting.\n", err); 2870 goto err_qp_table_free; 2871 } 2872 2873 if (!mlx4_is_slave(dev)) { 2874 for (port = 1; port <= dev->caps.num_ports; port++) { 2875 ib_port_default_caps = 0; 2876 err = mlx4_get_port_ib_caps(dev, port, 2877 &ib_port_default_caps); 2878 if (err) 2879 mlx4_warn(dev, "failed to get port %d default " 2880 "ib capabilities (%d). Continuing " 2881 "with caps = 0\n", port, err); 2882 dev->caps.ib_port_def_cap[port] = ib_port_default_caps; 2883 2884 /* initialize per-slave default ib port capabilities */ 2885 if (mlx4_is_master(dev)) { 2886 int i; 2887 for (i = 0; i < dev->num_slaves; i++) { 2888 if (i == mlx4_master_func_num(dev)) 2889 continue; 2890 priv->mfunc.master.slave_state[i].ib_cap_mask[port] = 2891 ib_port_default_caps; 2892 } 2893 } 2894 2895 dev->caps.port_ib_mtu[port] = IB_MTU_4096; 2896 2897 err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? 2898 dev->caps.pkey_table_len[port] : -1); 2899 if (err) { 2900 mlx4_err(dev, "Failed to set port %d (err=%d), " 2901 "aborting\n", port, err); 2902 goto err_counters_table_free; 2903 } 2904 } 2905 } 2906 2907 return 0; 2908 2909err_counters_table_free: 2910 mlx4_cleanup_counters_table(dev); 2911 2912err_qp_table_free: 2913 mlx4_cleanup_qp_table(dev); 2914 2915err_srq_table_free: 2916 mlx4_cleanup_srq_table(dev); 2917 2918err_cq_table_free: 2919 mlx4_cleanup_cq_table(dev); 2920 2921err_cmd_poll: 2922 mlx4_cmd_use_polling(dev); 2923 2924err_eq_table_free: 2925 mlx4_cleanup_eq_table(dev); 2926 2927err_mcg_table_free: 2928 if (!mlx4_is_slave(dev)) 2929 mlx4_cleanup_mcg_table(dev); 2930 2931err_mr_table_free: 2932 mlx4_cleanup_mr_table(dev); 2933 2934err_xrcd_table_free: 2935 mlx4_cleanup_xrcd_table(dev); 2936 2937err_pd_table_free: 2938 mlx4_cleanup_pd_table(dev); 2939 2940err_kar_unmap: 2941 iounmap(priv->kar); 2942 2943err_uar_free: 2944 mlx4_uar_free(dev, &priv->driver_uar); 2945 2946err_uar_table_free: 2947 mlx4_cleanup_uar_table(dev); 2948 return err; 2949} 2950 2951static void mlx4_enable_msi_x(struct mlx4_dev *dev) 2952{ 2953 struct mlx4_priv *priv = mlx4_priv(dev); 2954 struct msix_entry *entries; 2955 int err; 2956 int i; 2957 2958 if (msi_x) { 2959 int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; 2960 2961 nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, 2962 nreq); 2963 2964 if (msi_x > 1 && !mlx4_is_mfunc(dev)) 2965 nreq = min_t(int, nreq, msi_x); 2966 2967 entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); 2968 if (!entries) 2969 goto no_msi; 2970 2971 for (i = 0; i < nreq; ++i) 2972 entries[i].entry = i; 2973 2974 retry: 2975 err = pci_enable_msix(dev->pdev, entries, nreq); 2976 if (err) { 2977 /* Try again if at least 2 vectors are available */ 2978 if (err > 1) { 2979 mlx4_info(dev, "Requested %d vectors, " 2980 "but only %d MSI-X vectors available, " 2981 "trying again\n", nreq, err); 2982 nreq = err; 2983 goto retry; 2984 } 2985 kfree(entries); 2986 /* if error, or can't alloc even 1 IRQ */ 2987 if (err < 0) { 2988 mlx4_err(dev, "No IRQs left, device can't " 2989 "be started.\n"); 2990 goto no_irq; 2991 } 2992 goto no_msi; 2993 } 2994 2995 if (nreq < 2996 MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { 2997 /*Working in legacy mode , all EQ's shared*/ 2998 dev->caps.comp_pool = 0; 2999 dev->caps.num_comp_vectors = nreq - 1; 3000 } else { 3001 dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; 3002 dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; 3003 } 3004 for (i = 0; i < nreq; ++i) 3005 priv->eq_table.eq[i].irq = entries[i].vector; 3006 3007 dev->flags |= MLX4_FLAG_MSI_X; 3008 3009 kfree(entries); 3010 return; 3011 } 3012 3013no_msi: 3014 dev->caps.num_comp_vectors = 1; 3015 dev->caps.comp_pool = 0; 3016 3017 for (i = 0; i < 2; ++i) 3018 priv->eq_table.eq[i].irq = dev->pdev->irq; 3019 return; 3020no_irq: 3021 dev->caps.num_comp_vectors = 0; 3022 dev->caps.comp_pool = 0; 3023 return; 3024} 3025 3026static void 3027mlx4_init_hca_info(struct mlx4_dev *dev) 3028{ 3029 struct mlx4_hca_info *info = &mlx4_priv(dev)->hca_info; 3030 3031 info->dev = dev; 3032 3033 info->firmware_attr = (struct device_attribute)__ATTR(fw_ver, S_IRUGO, 3034 show_firmware_version, NULL); 3035 if (device_create_file(&dev->pdev->dev, &info->firmware_attr)) 3036 mlx4_err(dev, "Failed to add file firmware version"); 3037 3038 info->hca_attr = (struct device_attribute)__ATTR(hca, S_IRUGO, show_hca, 3039 NULL); 3040 if (device_create_file(&dev->pdev->dev, &info->hca_attr)) 3041 mlx4_err(dev, "Failed to add file hca type"); 3042 3043 info->board_attr = (struct device_attribute)__ATTR(board_id, S_IRUGO, 3044 show_board, NULL); 3045 if (device_create_file(&dev->pdev->dev, &info->board_attr)) 3046 mlx4_err(dev, "Failed to add file board id type"); 3047} 3048 3049static int mlx4_init_port_info(struct mlx4_dev *dev, int port) 3050{ 3051 struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; 3052 int err = 0; 3053 3054 info->dev = dev; 3055 info->port = port; 3056 if (!mlx4_is_slave(dev)) { 3057 mlx4_init_mac_table(dev, &info->mac_table); 3058 mlx4_init_vlan_table(dev, &info->vlan_table); 3059 info->base_qpn = mlx4_get_base_qpn(dev, port); 3060 } 3061 3062 sprintf(info->dev_name, "mlx4_port%d", port); 3063 info->port_attr.attr.name = info->dev_name; 3064 if (mlx4_is_mfunc(dev)) 3065 info->port_attr.attr.mode = S_IRUGO; 3066 else { 3067 info->port_attr.attr.mode = S_IRUGO | S_IWUSR; 3068 info->port_attr.store = set_port_type; 3069 } 3070 info->port_attr.show = show_port_type; 3071 sysfs_attr_init(&info->port_attr.attr); 3072 3073 err = device_create_file(&dev->pdev->dev, &info->port_attr); 3074 if (err) { 3075 mlx4_err(dev, "Failed to create file for port %d\n", port); 3076 info->port = -1; 3077 } 3078 3079 sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); 3080 info->port_mtu_attr.attr.name = info->dev_mtu_name; 3081 if (mlx4_is_mfunc(dev)) 3082 info->port_mtu_attr.attr.mode = S_IRUGO; 3083 else { 3084 info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; 3085 info->port_mtu_attr.store = set_port_ib_mtu; 3086 } 3087 info->port_mtu_attr.show = show_port_ib_mtu; 3088 sysfs_attr_init(&info->port_mtu_attr.attr); 3089 3090 err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); 3091 if (err) { 3092 mlx4_err(dev, "Failed to create mtu file for port %d\n", port); 3093 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3094 info->port = -1; 3095 } 3096 3097 return err; 3098} 3099 3100static void 3101mlx4_cleanup_hca_info(struct mlx4_hca_info *info) 3102{ 3103 device_remove_file(&info->dev->pdev->dev, &info->firmware_attr); 3104 device_remove_file(&info->dev->pdev->dev, &info->board_attr); 3105 device_remove_file(&info->dev->pdev->dev, &info->hca_attr); 3106} 3107 3108static void mlx4_cleanup_port_info(struct mlx4_port_info *info) 3109{ 3110 if (info->port < 0) 3111 return; 3112 3113 device_remove_file(&info->dev->pdev->dev, &info->port_attr); 3114 device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); 3115} 3116 3117static int mlx4_init_steering(struct mlx4_dev *dev) 3118{ 3119 struct mlx4_priv *priv = mlx4_priv(dev); 3120 int num_entries = dev->caps.num_ports; 3121 int i, j; 3122 3123 priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); 3124 if (!priv->steer) 3125 return -ENOMEM; 3126 3127 for (i = 0; i < num_entries; i++) 3128 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3129 INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); 3130 INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); 3131 } 3132 return 0; 3133} 3134 3135static void mlx4_clear_steering(struct mlx4_dev *dev) 3136{ 3137 struct mlx4_priv *priv = mlx4_priv(dev); 3138 struct mlx4_steer_index *entry, *tmp_entry; 3139 struct mlx4_promisc_qp *pqp, *tmp_pqp; 3140 int num_entries = dev->caps.num_ports; 3141 int i, j; 3142 3143 for (i = 0; i < num_entries; i++) { 3144 for (j = 0; j < MLX4_NUM_STEERS; j++) { 3145 list_for_each_entry_safe(pqp, tmp_pqp, 3146 &priv->steer[i].promisc_qps[j], 3147 list) { 3148 list_del(&pqp->list); 3149 kfree(pqp); 3150 } 3151 list_for_each_entry_safe(entry, tmp_entry, 3152 &priv->steer[i].steer_entries[j], 3153 list) { 3154 list_del(&entry->list); 3155 list_for_each_entry_safe(pqp, tmp_pqp, 3156 &entry->duplicates, 3157 list) { 3158 list_del(&pqp->list); 3159 kfree(pqp); 3160 } 3161 kfree(entry); 3162 } 3163 } 3164 } 3165 kfree(priv->steer); 3166} 3167 3168static int extended_func_num(struct pci_dev *pdev) 3169{ 3170 return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); 3171} 3172 3173#define MLX4_OWNER_BASE 0x8069c 3174#define MLX4_OWNER_SIZE 4 3175 3176static int mlx4_get_ownership(struct mlx4_dev *dev) 3177{ 3178 void __iomem *owner; 3179 u32 ret; 3180 3181 if (pci_channel_offline(dev->pdev)) 3182 return -EIO; 3183 3184 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3185 MLX4_OWNER_SIZE); 3186 if (!owner) { 3187 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3188 return -ENOMEM; 3189 } 3190 3191 ret = readl(owner); 3192 iounmap(owner); 3193 return (int) !!ret; 3194} 3195 3196static void mlx4_free_ownership(struct mlx4_dev *dev) 3197{ 3198 void __iomem *owner; 3199 3200 if (pci_channel_offline(dev->pdev)) 3201 return; 3202 3203 owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, 3204 MLX4_OWNER_SIZE); 3205 if (!owner) { 3206 mlx4_err(dev, "Failed to obtain ownership bit\n"); 3207 return; 3208 } 3209 writel(0, owner); 3210 msleep(1000); 3211 iounmap(owner); 3212} 3213 3214static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) 3215{ 3216 struct mlx4_priv *priv; 3217 struct mlx4_dev *dev; 3218 int err; 3219 int port; 3220 int nvfs, prb_vf; 3221 3222 pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); 3223 3224 err = pci_enable_device(pdev); 3225 if (err) { 3226 dev_err(&pdev->dev, "Cannot enable PCI device, " 3227 "aborting.\n"); 3228 return err; 3229 } 3230 3231 mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); 3232 mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); 3233 if (nvfs > MLX4_MAX_NUM_VF) { 3234 dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", 3235 nvfs, MLX4_MAX_NUM_VF); 3236 return -EINVAL; 3237 } 3238 3239 if (nvfs < 0) { 3240 dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); 3241 return -EINVAL; 3242 } 3243 /* 3244 * Check for BARs. 3245 */ 3246 if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && 3247 !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 3248 dev_err(&pdev->dev, "Missing DCS, aborting." 3249 "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", 3250 pci_dev_data, pci_resource_flags(pdev, 0)); 3251 err = -ENODEV; 3252 goto err_disable_pdev; 3253 } 3254 if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { 3255 dev_err(&pdev->dev, "Missing UAR, aborting.\n"); 3256 err = -ENODEV; 3257 goto err_disable_pdev; 3258 } 3259 3260 err = pci_request_regions(pdev, DRV_NAME); 3261 if (err) { 3262 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 3263 goto err_disable_pdev; 3264 } 3265 3266 pci_set_master(pdev); 3267 3268 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 3269 if (err) { 3270 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); 3271 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 3272 if (err) { 3273 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); 3274 goto err_release_regions; 3275 } 3276 } 3277 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 3278 if (err) { 3279 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " 3280 "consistent PCI DMA mask.\n"); 3281 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 3282 if (err) { 3283 dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " 3284 "aborting.\n"); 3285 goto err_release_regions; 3286 } 3287 } 3288 3289 /* Allow large DMA segments, up to the firmware limit of 1 GB */ 3290 dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); 3291 3292 priv = kzalloc(sizeof *priv, GFP_KERNEL); 3293 if (!priv) { 3294 dev_err(&pdev->dev, "Device struct alloc failed, " 3295 "aborting.\n"); 3296 err = -ENOMEM; 3297 goto err_release_regions; 3298 } 3299 3300 dev = &priv->dev; 3301 dev->pdev = pdev; 3302 INIT_LIST_HEAD(&priv->dev_list); 3303 INIT_LIST_HEAD(&priv->ctx_list); 3304 spin_lock_init(&priv->ctx_lock); 3305 3306 mutex_init(&priv->port_mutex); 3307 3308 INIT_LIST_HEAD(&priv->pgdir_list); 3309 mutex_init(&priv->pgdir_mutex); 3310 3311 INIT_LIST_HEAD(&priv->bf_list); 3312 mutex_init(&priv->bf_mutex); 3313 3314 dev->rev_id = pdev->revision; 3315 dev->numa_node = dev_to_node(&pdev->dev); 3316 /* Detect if this device is a virtual function */ 3317 if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { 3318 /* When acting as pf, we normally skip vfs unless explicitly 3319 * requested to probe them. */ 3320 if (nvfs && extended_func_num(pdev) > prb_vf) { 3321 mlx4_warn(dev, "Skipping virtual function:%d\n", 3322 extended_func_num(pdev)); 3323 err = -ENODEV; 3324 goto err_free_dev; 3325 } 3326 mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); 3327 dev->flags |= MLX4_FLAG_SLAVE; 3328 } else { 3329 /* We reset the device and enable SRIOV only for physical 3330 * devices. Try to claim ownership on the device; 3331 * if already taken, skip -- do not allow multiple PFs */ 3332 err = mlx4_get_ownership(dev); 3333 if (err) { 3334 if (err < 0) 3335 goto err_free_dev; 3336 else { 3337 mlx4_warn(dev, "Multiple PFs not yet supported." 3338 " Skipping PF.\n"); 3339 err = -EINVAL; 3340 goto err_free_dev; 3341 } 3342 } 3343 3344 if (nvfs) { 3345 mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); 3346 err = pci_enable_sriov(pdev, nvfs); 3347 if (err) { 3348 mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", 3349 err); 3350 err = 0; 3351 } else { 3352 mlx4_warn(dev, "Running in master mode\n"); 3353 dev->flags |= MLX4_FLAG_SRIOV | 3354 MLX4_FLAG_MASTER; 3355 dev->num_vfs = nvfs; 3356 } 3357 } 3358 3359 atomic_set(&priv->opreq_count, 0); 3360 INIT_WORK(&priv->opreq_task, mlx4_opreq_action); 3361 3362 /* 3363 * Now reset the HCA before we touch the PCI capabilities or 3364 * attempt a firmware command, since a boot ROM may have left 3365 * the HCA in an undefined state. 3366 */ 3367 err = mlx4_reset(dev); 3368 if (err) { 3369 mlx4_err(dev, "Failed to reset HCA, aborting.\n"); 3370 goto err_sriov; 3371 } 3372 } 3373 3374slave_start: 3375 err = mlx4_cmd_init(dev); 3376 if (err) { 3377 mlx4_err(dev, "Failed to init command interface, aborting.\n"); 3378 goto err_sriov; 3379 } 3380 3381 /* In slave functions, the communication channel must be initialized 3382 * before posting commands. Also, init num_slaves before calling 3383 * mlx4_init_hca */ 3384 if (mlx4_is_mfunc(dev)) { 3385 if (mlx4_is_master(dev)) 3386 dev->num_slaves = MLX4_MAX_NUM_SLAVES; 3387 else { 3388 dev->num_slaves = 0; 3389 err = mlx4_multi_func_init(dev); 3390 if (err) { 3391 mlx4_err(dev, "Failed to init slave mfunc" 3392 " interface, aborting.\n"); 3393 goto err_cmd; 3394 } 3395 } 3396 } 3397 3398 err = mlx4_init_hca(dev); 3399 if (err) { 3400 if (err == -EACCES) { 3401 /* Not primary Physical function 3402 * Running in slave mode */ 3403 mlx4_cmd_cleanup(dev); 3404 dev->flags |= MLX4_FLAG_SLAVE; 3405 dev->flags &= ~MLX4_FLAG_MASTER; 3406 goto slave_start; 3407 } else 3408 goto err_mfunc; 3409 } 3410 3411 /* In master functions, the communication channel must be initialized 3412 * after obtaining its address from fw */ 3413 if (mlx4_is_master(dev)) { 3414 err = mlx4_multi_func_init(dev); 3415 if (err) { 3416 mlx4_err(dev, "Failed to init master mfunc" 3417 "interface, aborting.\n"); 3418 goto err_close; 3419 } 3420 } 3421 3422 err = mlx4_alloc_eq_table(dev); 3423 if (err) 3424 goto err_master_mfunc; 3425 3426 priv->msix_ctl.pool_bm = 0; 3427 mutex_init(&priv->msix_ctl.pool_lock); 3428 3429 mlx4_enable_msi_x(dev); 3430 3431 /* no MSIX and no shared IRQ */ 3432 if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { 3433 err = -ENOSPC; 3434 goto err_free_eq; 3435 } 3436 3437 if ((mlx4_is_mfunc(dev)) && 3438 !(dev->flags & MLX4_FLAG_MSI_X)) { 3439 err = -ENOSYS; 3440 mlx4_err(dev, "INTx is not supported in multi-function mode." 3441 " aborting.\n"); 3442 goto err_free_eq; 3443 } 3444 3445 if (!mlx4_is_slave(dev)) { 3446 err = mlx4_init_steering(dev); 3447 if (err) 3448 goto err_free_eq; 3449 } 3450 3451 err = mlx4_setup_hca(dev); 3452 if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && 3453 !mlx4_is_mfunc(dev)) { 3454 dev->flags &= ~MLX4_FLAG_MSI_X; 3455 dev->caps.num_comp_vectors = 1; 3456 dev->caps.comp_pool = 0; 3457 pci_disable_msix(pdev); 3458 err = mlx4_setup_hca(dev); 3459 } 3460 3461 if (err) 3462 goto err_steer; 3463 3464 mlx4_init_quotas(dev); 3465 mlx4_init_hca_info(dev); 3466 3467 for (port = 1; port <= dev->caps.num_ports; port++) { 3468 err = mlx4_init_port_info(dev, port); 3469 if (err) 3470 goto err_port; 3471 } 3472 3473 err = mlx4_register_device(dev); 3474 if (err) 3475 goto err_port; 3476 3477 mlx4_request_modules(dev); 3478 3479 mlx4_sense_init(dev); 3480 mlx4_start_sense(dev); 3481 3482 priv->pci_dev_data = pci_dev_data; 3483 pci_set_drvdata(pdev, dev); 3484 3485 return 0; 3486 3487err_port: 3488 for (--port; port >= 1; --port) 3489 mlx4_cleanup_port_info(&priv->port[port]); 3490 3491 mlx4_cleanup_counters_table(dev); 3492 mlx4_cleanup_qp_table(dev); 3493 mlx4_cleanup_srq_table(dev); 3494 mlx4_cleanup_cq_table(dev); 3495 mlx4_cmd_use_polling(dev); 3496 mlx4_cleanup_eq_table(dev); 3497 mlx4_cleanup_mcg_table(dev); 3498 mlx4_cleanup_mr_table(dev); 3499 mlx4_cleanup_xrcd_table(dev); 3500 mlx4_cleanup_pd_table(dev); 3501 mlx4_cleanup_uar_table(dev); 3502 3503err_steer: 3504 if (!mlx4_is_slave(dev)) 3505 mlx4_clear_steering(dev); 3506 3507err_free_eq: 3508 mlx4_free_eq_table(dev); 3509 3510err_master_mfunc: 3511 if (mlx4_is_master(dev)) { 3512 mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); 3513 mlx4_multi_func_cleanup(dev); 3514 } 3515 3516 if (mlx4_is_slave(dev)) { 3517 kfree(dev->caps.qp0_tunnel); 3518 kfree(dev->caps.qp0_proxy); 3519 kfree(dev->caps.qp1_tunnel); 3520 kfree(dev->caps.qp1_proxy); 3521 } 3522 3523err_close: 3524 if (dev->flags & MLX4_FLAG_MSI_X) 3525 pci_disable_msix(pdev); 3526 3527 mlx4_close_hca(dev); 3528 3529err_mfunc: 3530 if (mlx4_is_slave(dev)) 3531 mlx4_multi_func_cleanup(dev); 3532 3533err_cmd: 3534 mlx4_cmd_cleanup(dev); 3535 3536err_sriov: 3537 if (dev->flags & MLX4_FLAG_SRIOV) 3538 pci_disable_sriov(pdev); 3539 3540 if (!mlx4_is_slave(dev)) 3541 mlx4_free_ownership(dev); 3542 3543err_free_dev: 3544 kfree(priv); 3545 3546err_release_regions: 3547 pci_release_regions(pdev); 3548 3549err_disable_pdev: 3550 pci_disable_device(pdev); 3551 pci_set_drvdata(pdev, NULL); 3552 return err; 3553} 3554 3555static int __devinit mlx4_init_one(struct pci_dev *pdev, 3556 const struct pci_device_id *id) 3557{ 3558 device_set_desc(pdev->dev.bsddev, mlx4_version); 3559 return __mlx4_init_one(pdev, id->driver_data); 3560} 3561 3562static void mlx4_remove_one(struct pci_dev *pdev) 3563{ 3564 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3565 struct mlx4_priv *priv = mlx4_priv(dev); 3566 int p; 3567 3568 if (dev) { 3569 /* in SRIOV it is not allowed to unload the pf's 3570 * driver while there are alive vf's */ 3571 if (mlx4_is_master(dev)) { 3572 if (mlx4_how_many_lives_vf(dev)) 3573 mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); 3574 } 3575 mlx4_stop_sense(dev); 3576 mlx4_unregister_device(dev); 3577 3578 mlx4_cleanup_hca_info(&priv->hca_info); 3579 for (p = 1; p <= dev->caps.num_ports; p++) { 3580 mlx4_cleanup_port_info(&priv->port[p]); 3581 mlx4_CLOSE_PORT(dev, p); 3582 } 3583 3584 if (mlx4_is_master(dev)) 3585 mlx4_free_resource_tracker(dev, 3586 RES_TR_FREE_SLAVES_ONLY); 3587 3588 mlx4_cleanup_counters_table(dev); 3589 mlx4_cleanup_qp_table(dev); 3590 mlx4_cleanup_srq_table(dev); 3591 mlx4_cleanup_cq_table(dev); 3592 mlx4_cmd_use_polling(dev); 3593 mlx4_cleanup_eq_table(dev); 3594 mlx4_cleanup_mcg_table(dev); 3595 mlx4_cleanup_mr_table(dev); 3596 mlx4_cleanup_xrcd_table(dev); 3597 mlx4_cleanup_pd_table(dev); 3598 3599 if (mlx4_is_master(dev)) 3600 mlx4_free_resource_tracker(dev, 3601 RES_TR_FREE_STRUCTS_ONLY); 3602 3603 iounmap(priv->kar); 3604 mlx4_uar_free(dev, &priv->driver_uar); 3605 mlx4_cleanup_uar_table(dev); 3606 if (!mlx4_is_slave(dev)) 3607 mlx4_clear_steering(dev); 3608 mlx4_free_eq_table(dev); 3609 if (mlx4_is_master(dev)) 3610 mlx4_multi_func_cleanup(dev); 3611 mlx4_close_hca(dev); 3612 if (mlx4_is_slave(dev)) 3613 mlx4_multi_func_cleanup(dev); 3614 mlx4_cmd_cleanup(dev); 3615 3616 if (dev->flags & MLX4_FLAG_MSI_X) 3617 pci_disable_msix(pdev); 3618 if (dev->flags & MLX4_FLAG_SRIOV) { 3619 mlx4_warn(dev, "Disabling SR-IOV\n"); 3620 pci_disable_sriov(pdev); 3621 } 3622 3623 if (!mlx4_is_slave(dev)) 3624 mlx4_free_ownership(dev); 3625 3626 kfree(dev->caps.qp0_tunnel); 3627 kfree(dev->caps.qp0_proxy); 3628 kfree(dev->caps.qp1_tunnel); 3629 kfree(dev->caps.qp1_proxy); 3630 3631 kfree(priv); 3632 pci_release_regions(pdev); 3633 pci_disable_device(pdev); 3634 pci_set_drvdata(pdev, NULL); 3635 } 3636} 3637 3638static int restore_current_port_types(struct mlx4_dev *dev, 3639 enum mlx4_port_type *types, 3640 enum mlx4_port_type *poss_types) 3641{ 3642 struct mlx4_priv *priv = mlx4_priv(dev); 3643 int err, i; 3644 3645 mlx4_stop_sense(dev); 3646 mutex_lock(&priv->port_mutex); 3647 for (i = 0; i < dev->caps.num_ports; i++) 3648 dev->caps.possible_type[i + 1] = poss_types[i]; 3649 err = mlx4_change_port_types(dev, types); 3650 mlx4_start_sense(dev); 3651 mutex_unlock(&priv->port_mutex); 3652 return err; 3653} 3654 3655int mlx4_restart_one(struct pci_dev *pdev) 3656{ 3657 struct mlx4_dev *dev = pci_get_drvdata(pdev); 3658 struct mlx4_priv *priv = mlx4_priv(dev); 3659 enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; 3660 enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; 3661 int pci_dev_data, err, i; 3662 3663 pci_dev_data = priv->pci_dev_data; 3664 for (i = 0; i < dev->caps.num_ports; i++) { 3665 curr_type[i] = dev->caps.port_type[i + 1]; 3666 poss_type[i] = dev->caps.possible_type[i + 1]; 3667 } 3668 3669 mlx4_remove_one(pdev); 3670 err = __mlx4_init_one(pdev, pci_dev_data); 3671 if (err) 3672 return err; 3673 3674 dev = pci_get_drvdata(pdev); 3675 err = restore_current_port_types(dev, curr_type, poss_type); 3676 if (err) 3677 mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", 3678 err); 3679 return 0; 3680} 3681 3682static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { 3683 /* MT25408 "Hermon" SDR */ 3684 { PCI_VDEVICE(MELLANOX, 0x6340), 3685 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3686 /* MT25408 "Hermon" DDR */ 3687 { PCI_VDEVICE(MELLANOX, 0x634a), 3688 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3689 /* MT25408 "Hermon" QDR */ 3690 { PCI_VDEVICE(MELLANOX, 0x6354), 3691 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3692 /* MT25408 "Hermon" DDR PCIe gen2 */ 3693 { PCI_VDEVICE(MELLANOX, 0x6732), 3694 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3695 /* MT25408 "Hermon" QDR PCIe gen2 */ 3696 { PCI_VDEVICE(MELLANOX, 0x673c), 3697 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3698 /* MT25408 "Hermon" EN 10GigE */ 3699 { PCI_VDEVICE(MELLANOX, 0x6368), 3700 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3701 /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ 3702 { PCI_VDEVICE(MELLANOX, 0x6750), 3703 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3704 /* MT25458 ConnectX EN 10GBASE-T 10GigE */ 3705 { PCI_VDEVICE(MELLANOX, 0x6372), 3706 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3707 /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ 3708 { PCI_VDEVICE(MELLANOX, 0x675a), 3709 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3710 /* MT26468 ConnectX EN 10GigE PCIe gen2*/ 3711 { PCI_VDEVICE(MELLANOX, 0x6764), 3712 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3713 /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ 3714 { PCI_VDEVICE(MELLANOX, 0x6746), 3715 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3716 /* MT26478 ConnectX2 40GigE PCIe gen2 */ 3717 { PCI_VDEVICE(MELLANOX, 0x676e), 3718 .driver_data = MLX4_PCI_DEV_FORCE_SENSE_PORT }, 3719 /* MT25400 Family [ConnectX-2 Virtual Function] */ 3720 { PCI_VDEVICE(MELLANOX, 0x1002), 3721 .driver_data = MLX4_PCI_DEV_IS_VF }, 3722 /* MT27500 Family [ConnectX-3] */ 3723 { PCI_VDEVICE(MELLANOX, 0x1003) }, 3724 /* MT27500 Family [ConnectX-3 Virtual Function] */ 3725 { PCI_VDEVICE(MELLANOX, 0x1004), 3726 .driver_data = MLX4_PCI_DEV_IS_VF }, 3727 { PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */ 3728 { PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */ 3729 { PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */ 3730 { PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */ 3731 { PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */ 3732 { PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */ 3733 { PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */ 3734 { PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */ 3735 { PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */ 3736 { PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */ 3737 { PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */ 3738 { PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */ 3739 { 0, } 3740}; 3741 3742MODULE_DEVICE_TABLE(pci, mlx4_pci_table); 3743 3744static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, 3745 pci_channel_state_t state) 3746{ 3747 mlx4_remove_one(pdev); 3748 3749 return state == pci_channel_io_perm_failure ? 3750 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 3751} 3752 3753static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) 3754{ 3755 int ret = __mlx4_init_one(pdev, 0); 3756 3757 return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; 3758} 3759 3760static const struct pci_error_handlers mlx4_err_handler = { 3761 .error_detected = mlx4_pci_err_detected, 3762 .slot_reset = mlx4_pci_slot_reset, 3763}; 3764 3765static int suspend(struct pci_dev *pdev, pm_message_t state) 3766{ 3767 mlx4_remove_one(pdev); 3768 3769 return 0; 3770} 3771 3772static int resume(struct pci_dev *pdev) 3773{ 3774 return __mlx4_init_one(pdev, 0); 3775} 3776 3777static struct pci_driver mlx4_driver = { 3778 .name = DRV_NAME, 3779 .id_table = mlx4_pci_table, 3780 .probe = mlx4_init_one, 3781 .remove = __devexit_p(mlx4_remove_one), 3782 .suspend = suspend, 3783 .resume = resume, 3784 .err_handler = &mlx4_err_handler, 3785}; 3786 3787static int __init mlx4_verify_params(void) 3788{ 3789 int status; 3790 3791 status = update_defaults(&port_type_array); 3792 if (status == INVALID_STR) { 3793 if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) 3794 return -1; 3795 } else if (status == INVALID_DATA) { 3796 return -1; 3797 } 3798 3799 status = update_defaults(&num_vfs); 3800 if (status == INVALID_STR) { 3801 if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) 3802 return -1; 3803 } else if (status == INVALID_DATA) { 3804 return -1; 3805 } 3806 3807 status = update_defaults(&probe_vf); 3808 if (status == INVALID_STR) { 3809 if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) 3810 return -1; 3811 } else if (status == INVALID_DATA) { 3812 return -1; 3813 } 3814 3815 if (msi_x < 0) { 3816 pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); 3817 return -1; 3818 } 3819 3820 if ((log_num_mac < 0) || (log_num_mac > 7)) { 3821 pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); 3822 return -1; 3823 } 3824 3825 if (log_num_vlan != 0) 3826 pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", 3827 MLX4_LOG_NUM_VLANS); 3828 3829 if (mlx4_set_4k_mtu != -1) 3830 pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); 3831 3832 if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { 3833 pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); 3834 return -1; 3835 } 3836 3837 if (mlx4_log_num_mgm_entry_size != -1 && 3838 (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || 3839 mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { 3840 pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " 3841 "in legal range (-1 or %d..%d)\n", 3842 mlx4_log_num_mgm_entry_size, 3843 MLX4_MIN_MGM_LOG_ENTRY_SIZE, 3844 MLX4_MAX_MGM_LOG_ENTRY_SIZE); 3845 return -1; 3846 } 3847 3848 if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { 3849 pr_warning("mlx4_core: bad log_num_qp: %d\n", 3850 mod_param_profile.num_qp); 3851 return -1; 3852 } 3853 3854 if (mod_param_profile.num_srq < 10) { 3855 pr_warning("mlx4_core: too low log_num_srq: %d\n", 3856 mod_param_profile.num_srq); 3857 return -1; 3858 } 3859 3860 if (mod_param_profile.num_cq < 10) { 3861 pr_warning("mlx4_core: too low log_num_cq: %d\n", 3862 mod_param_profile.num_cq); 3863 return -1; 3864 } 3865 3866 if (mod_param_profile.num_mpt < 10) { 3867 pr_warning("mlx4_core: too low log_num_mpt: %d\n", 3868 mod_param_profile.num_mpt); 3869 return -1; 3870 } 3871 3872 if (mod_param_profile.num_mtt_segs && 3873 mod_param_profile.num_mtt_segs < 15) { 3874 pr_warning("mlx4_core: too low log_num_mtt: %d\n", 3875 mod_param_profile.num_mtt_segs); 3876 return -1; 3877 } 3878 3879 if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { 3880 pr_warning("mlx4_core: too high log_num_mtt: %d\n", 3881 mod_param_profile.num_mtt_segs); 3882 return -1; 3883 } 3884 return 0; 3885} 3886 3887static int __init mlx4_init(void) 3888{ 3889 int ret; 3890 3891 if (mlx4_verify_params()) 3892 return -EINVAL; 3893 3894 mlx4_catas_init(); 3895 3896 mlx4_wq = create_singlethread_workqueue("mlx4"); 3897 if (!mlx4_wq) 3898 return -ENOMEM; 3899 3900 if (enable_sys_tune) 3901 sys_tune_init(); 3902 3903 ret = pci_register_driver(&mlx4_driver); 3904 if (ret < 0) 3905 goto err; 3906 3907 return 0; 3908 3909err: 3910 if (enable_sys_tune) 3911 sys_tune_fini(); 3912 3913 destroy_workqueue(mlx4_wq); 3914 3915 return ret; 3916} 3917 3918static void __exit mlx4_cleanup(void) 3919{ 3920 if (enable_sys_tune) 3921 sys_tune_fini(); 3922 3923 pci_unregister_driver(&mlx4_driver); 3924 destroy_workqueue(mlx4_wq); 3925} 3926 3927module_init_order(mlx4_init, SI_ORDER_MIDDLE); 3928module_exit(mlx4_cleanup); 3929 3930#include <sys/module.h> 3931static int 3932mlx4_evhand(module_t mod, int event, void *arg) 3933{ 3934 return (0); 3935} 3936 3937static moduledata_t mlx4_mod = { 3938 .name = "mlx4", 3939 .evhand = mlx4_evhand, 3940}; 3941MODULE_VERSION(mlx4, 1); 3942DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); 3943