1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2/* Copyright (c) 2019 Mellanox Technologies. */ 3 4#include <linux/pci.h> 5#include <linux/interrupt.h> 6#include <linux/notifier.h> 7#include <linux/mlx5/driver.h> 8#include <linux/mlx5/vport.h> 9#include "mlx5_core.h" 10#include "mlx5_irq.h" 11#include "pci_irq.h" 12#include "lib/sf.h" 13#include "lib/eq.h" 14#ifdef CONFIG_RFS_ACCEL 15#include <linux/cpu_rmap.h> 16#endif 17 18#define MLX5_SFS_PER_CTRL_IRQ 64 19#define MLX5_IRQ_CTRL_SF_MAX 8 20/* min num of vectors for SFs to be enabled */ 21#define MLX5_IRQ_VEC_COMP_BASE_SF 2 22#define MLX5_IRQ_VEC_COMP_BASE 1 23 24#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8) 25#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) 26#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) 27#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) 28 29struct mlx5_irq { 30 struct atomic_notifier_head nh; 31 cpumask_var_t mask; 32 char name[MLX5_MAX_IRQ_FORMATTED_NAME]; 33 struct mlx5_irq_pool *pool; 34 int refcount; 35 struct msi_map map; 36 u32 pool_index; 37}; 38 39struct mlx5_irq_table { 40 struct mlx5_irq_pool *pcif_pool; 41 struct mlx5_irq_pool *sf_ctrl_pool; 42 struct mlx5_irq_pool *sf_comp_pool; 43}; 44 45static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, 46 int func, 47 bool ec_vf_func) 48{ 49 if (!ec_vf_func) 50 return func; 51 return mlx5_core_ec_vf_vport_base(dev) + func - 1; 52} 53 54/** 55 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors 56 * to be ssigned to each VF. 57 * @dev: PF to work on 58 * @num_vfs: Number of enabled VFs 59 */ 60int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) 61{ 62 int num_vf_msix, min_msix, max_msix; 63 64 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 65 if (!num_vf_msix) 66 return 0; 67 68 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 69 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 70 71 /* Limit maximum number of MSI-X vectors so the default configuration 72 * has some available in the pool. This will allow the user to increase 73 * the number of vectors in a VF without having to first size-down other 74 * VFs. 75 */ 76 return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); 77} 78 79/** 80 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF 81 * @dev: PF to work on 82 * @function_id: Internal PCI VF function IDd 83 * @msix_vec_count: Number of MSI-X vectors to set 84 */ 85int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, 86 int msix_vec_count) 87{ 88 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 89 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); 90 void *hca_cap = NULL, *query_cap = NULL, *cap; 91 int num_vf_msix, min_msix, max_msix; 92 bool ec_vf_function; 93 int vport; 94 int ret; 95 96 num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); 97 if (!num_vf_msix) 98 return 0; 99 100 if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) 101 return -EOPNOTSUPP; 102 103 min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); 104 max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); 105 106 if (msix_vec_count < min_msix) 107 return -EINVAL; 108 109 if (msix_vec_count > max_msix) 110 return -EOVERFLOW; 111 112 query_cap = kvzalloc(query_sz, GFP_KERNEL); 113 hca_cap = kvzalloc(set_sz, GFP_KERNEL); 114 if (!hca_cap || !query_cap) { 115 ret = -ENOMEM; 116 goto out; 117 } 118 119 ec_vf_function = mlx5_core_ec_sriov_enabled(dev); 120 vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); 121 ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); 122 if (ret) 123 goto out; 124 125 cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); 126 memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability), 127 MLX5_UN_SZ_BYTES(hca_cap_union)); 128 MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); 129 130 MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); 131 MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); 132 MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); 133 MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); 134 135 MLX5_SET(set_hca_cap_in, hca_cap, op_mod, 136 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); 137 ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); 138out: 139 kvfree(hca_cap); 140 kvfree(query_cap); 141 return ret; 142} 143 144/* mlx5_system_free_irq - Free an IRQ 145 * @irq: IRQ to free 146 * 147 * Free the IRQ and other resources such as rmap from the system. 148 * BUT doesn't free or remove reference from mlx5. 149 * This function is very important for the shutdown flow, where we need to 150 * cleanup system resoruces but keep mlx5 objects alive, 151 * see mlx5_irq_table_free_irqs(). 152 */ 153static void mlx5_system_free_irq(struct mlx5_irq *irq) 154{ 155 struct mlx5_irq_pool *pool = irq->pool; 156#ifdef CONFIG_RFS_ACCEL 157 struct cpu_rmap *rmap; 158#endif 159 160 /* free_irq requires that affinity_hint and rmap will be cleared before 161 * calling it. To satisfy this requirement, we call 162 * irq_cpu_rmap_remove() to remove the notifier 163 */ 164 irq_update_affinity_hint(irq->map.virq, NULL); 165#ifdef CONFIG_RFS_ACCEL 166 rmap = mlx5_eq_table_get_rmap(pool->dev); 167 if (rmap) 168 irq_cpu_rmap_remove(rmap, irq->map.virq); 169#endif 170 171 free_irq(irq->map.virq, &irq->nh); 172 if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) 173 pci_msix_free_irq(pool->dev->pdev, irq->map); 174} 175 176static void irq_release(struct mlx5_irq *irq) 177{ 178 struct mlx5_irq_pool *pool = irq->pool; 179 180 xa_erase(&pool->irqs, irq->pool_index); 181 mlx5_system_free_irq(irq); 182 free_cpumask_var(irq->mask); 183 kfree(irq); 184} 185 186int mlx5_irq_put(struct mlx5_irq *irq) 187{ 188 struct mlx5_irq_pool *pool = irq->pool; 189 int ret = 0; 190 191 mutex_lock(&pool->lock); 192 irq->refcount--; 193 if (!irq->refcount) { 194 irq_release(irq); 195 ret = 1; 196 } 197 mutex_unlock(&pool->lock); 198 return ret; 199} 200 201int mlx5_irq_read_locked(struct mlx5_irq *irq) 202{ 203 lockdep_assert_held(&irq->pool->lock); 204 return irq->refcount; 205} 206 207int mlx5_irq_get_locked(struct mlx5_irq *irq) 208{ 209 lockdep_assert_held(&irq->pool->lock); 210 if (WARN_ON_ONCE(!irq->refcount)) 211 return 0; 212 irq->refcount++; 213 return 1; 214} 215 216static int irq_get(struct mlx5_irq *irq) 217{ 218 int err; 219 220 mutex_lock(&irq->pool->lock); 221 err = mlx5_irq_get_locked(irq); 222 mutex_unlock(&irq->pool->lock); 223 return err; 224} 225 226static irqreturn_t irq_int_handler(int irq, void *nh) 227{ 228 atomic_notifier_call_chain(nh, 0, NULL); 229 return IRQ_HANDLED; 230} 231 232static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 233{ 234 snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx); 235} 236 237static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) 238{ 239 if (!pool->xa_num_irqs.max) { 240 /* in case we only have a single irq for the device */ 241 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx); 242 return; 243 } 244 245 if (!vecidx) { 246 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx); 247 return; 248 } 249 250 vecidx -= MLX5_IRQ_VEC_COMP_BASE; 251 snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); 252} 253 254struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, 255 struct irq_affinity_desc *af_desc, 256 struct cpu_rmap **rmap) 257{ 258 struct mlx5_core_dev *dev = pool->dev; 259 char name[MLX5_MAX_IRQ_NAME]; 260 struct mlx5_irq *irq; 261 int err; 262 263 irq = kzalloc(sizeof(*irq), GFP_KERNEL); 264 if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { 265 kfree(irq); 266 return ERR_PTR(-ENOMEM); 267 } 268 269 if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { 270 /* The vector at index 0 is always statically allocated. If 271 * dynamic irq is not supported all vectors are statically 272 * allocated. In both cases just get the irq number and set 273 * the index. 274 */ 275 irq->map.virq = pci_irq_vector(dev->pdev, i); 276 irq->map.index = i; 277 } else { 278 irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); 279 if (!irq->map.virq) { 280 err = irq->map.index; 281 goto err_alloc_irq; 282 } 283 } 284 285 if (i && rmap && *rmap) { 286#ifdef CONFIG_RFS_ACCEL 287 err = irq_cpu_rmap_add(*rmap, irq->map.virq); 288 if (err) 289 goto err_irq_rmap; 290#endif 291 } 292 if (!mlx5_irq_pool_is_sf_pool(pool)) 293 irq_set_name(pool, name, i); 294 else 295 irq_sf_set_name(pool, name, i); 296 ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); 297 snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME, 298 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev)); 299 err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name, 300 &irq->nh); 301 if (err) { 302 mlx5_core_err(dev, "Failed to request irq. err = %d\n", err); 303 goto err_req_irq; 304 } 305 306 if (af_desc) { 307 cpumask_copy(irq->mask, &af_desc->mask); 308 irq_set_affinity_and_hint(irq->map.virq, irq->mask); 309 } 310 irq->pool = pool; 311 irq->refcount = 1; 312 irq->pool_index = i; 313 err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL)); 314 if (err) { 315 mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", 316 irq->pool_index, err); 317 goto err_xa; 318 } 319 return irq; 320err_xa: 321 if (af_desc) 322 irq_update_affinity_hint(irq->map.virq, NULL); 323 free_irq(irq->map.virq, &irq->nh); 324err_req_irq: 325#ifdef CONFIG_RFS_ACCEL 326 if (i && rmap && *rmap) { 327 free_irq_cpu_rmap(*rmap); 328 *rmap = NULL; 329 } 330err_irq_rmap: 331#endif 332 if (i && pci_msix_can_alloc_dyn(dev->pdev)) 333 pci_msix_free_irq(dev->pdev, irq->map); 334err_alloc_irq: 335 free_cpumask_var(irq->mask); 336 kfree(irq); 337 return ERR_PTR(err); 338} 339 340int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 341{ 342 int ret; 343 344 ret = irq_get(irq); 345 if (!ret) 346 /* Something very bad happens here, we are enabling EQ 347 * on non-existing IRQ. 348 */ 349 return -ENOENT; 350 ret = atomic_notifier_chain_register(&irq->nh, nb); 351 if (ret) 352 mlx5_irq_put(irq); 353 return ret; 354} 355 356int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) 357{ 358 int err = 0; 359 360 err = atomic_notifier_chain_unregister(&irq->nh, nb); 361 mlx5_irq_put(irq); 362 return err; 363} 364 365struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) 366{ 367 return irq->mask; 368} 369 370int mlx5_irq_get_index(struct mlx5_irq *irq) 371{ 372 return irq->map.index; 373} 374 375/* irq_pool API */ 376 377/* requesting an irq from a given pool according to given index */ 378static struct mlx5_irq * 379irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, 380 struct irq_affinity_desc *af_desc, 381 struct cpu_rmap **rmap) 382{ 383 struct mlx5_irq *irq; 384 385 mutex_lock(&pool->lock); 386 irq = xa_load(&pool->irqs, vecidx); 387 if (irq) { 388 mlx5_irq_get_locked(irq); 389 goto unlock; 390 } 391 irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap); 392unlock: 393 mutex_unlock(&pool->lock); 394 return irq; 395} 396 397static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) 398{ 399 return irq_table->sf_ctrl_pool; 400} 401 402static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) 403{ 404 return irq_table->sf_comp_pool; 405} 406 407struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) 408{ 409 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 410 struct mlx5_irq_pool *pool = NULL; 411 412 if (mlx5_core_is_sf(dev)) 413 pool = sf_irq_pool_get(irq_table); 414 415 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 416 * the PF IRQs pool in case the SF pool doesn't exist. 417 */ 418 return pool ? pool : irq_table->pcif_pool; 419} 420 421static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) 422{ 423 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 424 struct mlx5_irq_pool *pool = NULL; 425 426 if (mlx5_core_is_sf(dev)) 427 pool = sf_ctrl_irq_pool_get(irq_table); 428 429 /* In some configs, there won't be a pool of SFs IRQs. Hence, returning 430 * the PF IRQs pool in case the SF pool doesn't exist. 431 */ 432 return pool ? pool : irq_table->pcif_pool; 433} 434 435static void _mlx5_irq_release(struct mlx5_irq *irq) 436{ 437 synchronize_irq(irq->map.virq); 438 mlx5_irq_put(irq); 439} 440 441/** 442 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. 443 * @ctrl_irq: ctrl IRQ to be released. 444 */ 445void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) 446{ 447 _mlx5_irq_release(ctrl_irq); 448} 449 450/** 451 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. 452 * @dev: mlx5 device that requesting the IRQ. 453 * 454 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 455 */ 456struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) 457{ 458 struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); 459 struct irq_affinity_desc af_desc; 460 struct mlx5_irq *irq; 461 462 cpumask_copy(&af_desc.mask, cpu_online_mask); 463 af_desc.is_managed = false; 464 if (!mlx5_irq_pool_is_sf_pool(pool)) { 465 /* In case we are allocating a control IRQ from a pci device's pool. 466 * This can happen also for a SF if the SFs pool is empty. 467 */ 468 if (!pool->xa_num_irqs.max) { 469 cpumask_clear(&af_desc.mask); 470 /* In case we only have a single IRQ for PF/VF */ 471 cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); 472 } 473 /* Allocate the IRQ in index 0. The vector was already allocated */ 474 irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); 475 } else { 476 irq = mlx5_irq_affinity_request(pool, &af_desc); 477 } 478 479 return irq; 480} 481 482/** 483 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. 484 * @dev: mlx5 device that requesting the IRQ. 485 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is 486 * provided. 487 * @af_desc: affinity descriptor for this IRQ. 488 * @rmap: pointer to reverse map pointer for completion interrupts 489 * 490 * This function returns a pointer to IRQ, or ERR_PTR in case of error. 491 */ 492struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, 493 struct irq_affinity_desc *af_desc, 494 struct cpu_rmap **rmap) 495{ 496 struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); 497 struct mlx5_irq_pool *pool; 498 struct mlx5_irq *irq; 499 500 pool = irq_table->pcif_pool; 501 irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap); 502 if (IS_ERR(irq)) 503 return irq; 504 mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", 505 irq->map.virq, cpumask_pr_args(&af_desc->mask), 506 irq->refcount / MLX5_EQ_REFS_PER_IRQ); 507 return irq; 508} 509 510/** 511 * mlx5_msix_alloc - allocate msix interrupt 512 * @dev: mlx5 device from which to request 513 * @handler: interrupt handler 514 * @affdesc: affinity descriptor 515 * @name: interrupt name 516 * 517 * Returns: struct msi_map with result encoded. 518 * Note: the caller must make sure to release the irq by calling 519 * mlx5_msix_free() if shutdown was initiated. 520 */ 521struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, 522 irqreturn_t (*handler)(int, void *), 523 const struct irq_affinity_desc *affdesc, 524 const char *name) 525{ 526 struct msi_map map; 527 int err; 528 529 if (!dev->pdev) { 530 map.virq = 0; 531 map.index = -EINVAL; 532 return map; 533 } 534 535 map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); 536 if (!map.virq) 537 return map; 538 539 err = request_irq(map.virq, handler, 0, name, NULL); 540 if (err) { 541 mlx5_core_warn(dev, "err %d\n", err); 542 pci_msix_free_irq(dev->pdev, map); 543 map.virq = 0; 544 map.index = -ENOMEM; 545 } 546 return map; 547} 548EXPORT_SYMBOL(mlx5_msix_alloc); 549 550/** 551 * mlx5_msix_free - free a previously allocated msix interrupt 552 * @dev: mlx5 device associated with interrupt 553 * @map: map previously returned by mlx5_msix_alloc() 554 */ 555void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) 556{ 557 free_irq(map.virq, NULL); 558 pci_msix_free_irq(dev->pdev, map); 559} 560EXPORT_SYMBOL(mlx5_msix_free); 561 562/** 563 * mlx5_irq_release_vector - release one IRQ back to the system. 564 * @irq: the irq to release. 565 */ 566void mlx5_irq_release_vector(struct mlx5_irq *irq) 567{ 568 _mlx5_irq_release(irq); 569} 570 571/** 572 * mlx5_irq_request_vector - request one IRQ for mlx5 device. 573 * @dev: mlx5 device that is requesting the IRQ. 574 * @cpu: CPU to bind the IRQ to. 575 * @vecidx: vector index to request an IRQ for. 576 * @rmap: pointer to reverse map pointer for completion interrupts 577 * 578 * Each IRQ is bound to at most 1 CPU. 579 * This function is requests one IRQ, for the given @vecidx. 580 * 581 * This function returns a pointer to the irq on success, or an error pointer 582 * in case of an error. 583 */ 584struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, 585 u16 vecidx, struct cpu_rmap **rmap) 586{ 587 struct mlx5_irq_table *table = mlx5_irq_table_get(dev); 588 struct mlx5_irq_pool *pool = table->pcif_pool; 589 struct irq_affinity_desc af_desc; 590 int offset = MLX5_IRQ_VEC_COMP_BASE; 591 592 if (!pool->xa_num_irqs.max) 593 offset = 0; 594 595 af_desc.is_managed = false; 596 cpumask_clear(&af_desc.mask); 597 cpumask_set_cpu(cpu, &af_desc.mask); 598 return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); 599} 600 601static struct mlx5_irq_pool * 602irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, 603 u32 min_threshold, u32 max_threshold) 604{ 605 struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL); 606 607 if (!pool) 608 return ERR_PTR(-ENOMEM); 609 pool->dev = dev; 610 mutex_init(&pool->lock); 611 xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); 612 pool->xa_num_irqs.min = start; 613 pool->xa_num_irqs.max = start + size - 1; 614 if (name) 615 snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, 616 "%s", name); 617 pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; 618 pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; 619 mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", 620 name, size, start); 621 return pool; 622} 623 624static void irq_pool_free(struct mlx5_irq_pool *pool) 625{ 626 struct mlx5_irq *irq; 627 unsigned long index; 628 629 /* There are cases in which we are destrying the irq_table before 630 * freeing all the IRQs, fast teardown for example. Hence, free the irqs 631 * which might not have been freed. 632 */ 633 xa_for_each(&pool->irqs, index, irq) 634 irq_release(irq); 635 xa_destroy(&pool->irqs); 636 mutex_destroy(&pool->lock); 637 kfree(pool->irqs_per_cpu); 638 kvfree(pool); 639} 640 641static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec) 642{ 643 struct mlx5_irq_table *table = dev->priv.irq_table; 644 int num_sf_ctrl_by_msix; 645 int num_sf_ctrl_by_sfs; 646 int num_sf_ctrl; 647 int err; 648 649 /* init pcif_pool */ 650 table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL, 651 MLX5_EQ_SHARE_IRQ_MIN_COMP, 652 MLX5_EQ_SHARE_IRQ_MAX_COMP); 653 if (IS_ERR(table->pcif_pool)) 654 return PTR_ERR(table->pcif_pool); 655 if (!mlx5_sf_max_functions(dev)) 656 return 0; 657 if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { 658 mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); 659 return 0; 660 } 661 662 /* init sf_ctrl_pool */ 663 num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF); 664 num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev), 665 MLX5_SFS_PER_CTRL_IRQ); 666 num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs); 667 num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl); 668 table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl, 669 "mlx5_sf_ctrl", 670 MLX5_EQ_SHARE_IRQ_MIN_CTRL, 671 MLX5_EQ_SHARE_IRQ_MAX_CTRL); 672 if (IS_ERR(table->sf_ctrl_pool)) { 673 err = PTR_ERR(table->sf_ctrl_pool); 674 goto err_pf; 675 } 676 /* init sf_comp_pool */ 677 table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl, 678 sf_vec - num_sf_ctrl, "mlx5_sf_comp", 679 MLX5_EQ_SHARE_IRQ_MIN_COMP, 680 MLX5_EQ_SHARE_IRQ_MAX_COMP); 681 if (IS_ERR(table->sf_comp_pool)) { 682 err = PTR_ERR(table->sf_comp_pool); 683 goto err_sf_ctrl; 684 } 685 686 table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); 687 if (!table->sf_comp_pool->irqs_per_cpu) { 688 err = -ENOMEM; 689 goto err_irqs_per_cpu; 690 } 691 692 return 0; 693 694err_irqs_per_cpu: 695 irq_pool_free(table->sf_comp_pool); 696err_sf_ctrl: 697 irq_pool_free(table->sf_ctrl_pool); 698err_pf: 699 irq_pool_free(table->pcif_pool); 700 return err; 701} 702 703static void irq_pools_destroy(struct mlx5_irq_table *table) 704{ 705 if (table->sf_ctrl_pool) { 706 irq_pool_free(table->sf_comp_pool); 707 irq_pool_free(table->sf_ctrl_pool); 708 } 709 irq_pool_free(table->pcif_pool); 710} 711 712static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) 713{ 714 struct mlx5_irq *irq; 715 unsigned long index; 716 717 xa_for_each(&pool->irqs, index, irq) 718 mlx5_system_free_irq(irq); 719 720} 721 722static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) 723{ 724 if (table->sf_ctrl_pool) { 725 mlx5_irq_pool_free_irqs(table->sf_comp_pool); 726 mlx5_irq_pool_free_irqs(table->sf_ctrl_pool); 727 } 728 mlx5_irq_pool_free_irqs(table->pcif_pool); 729} 730 731/* irq_table API */ 732 733int mlx5_irq_table_init(struct mlx5_core_dev *dev) 734{ 735 struct mlx5_irq_table *irq_table; 736 737 if (mlx5_core_is_sf(dev)) 738 return 0; 739 740 irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL, 741 dev->priv.numa_node); 742 if (!irq_table) 743 return -ENOMEM; 744 745 dev->priv.irq_table = irq_table; 746 return 0; 747} 748 749void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) 750{ 751 if (mlx5_core_is_sf(dev)) 752 return; 753 754 kvfree(dev->priv.irq_table); 755} 756 757int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table) 758{ 759 if (!table->pcif_pool->xa_num_irqs.max) 760 return 1; 761 return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min; 762} 763 764int mlx5_irq_table_create(struct mlx5_core_dev *dev) 765{ 766 int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? 767 MLX5_CAP_GEN(dev, max_num_eqs) : 768 1 << MLX5_CAP_GEN(dev, log_max_eq); 769 int total_vec; 770 int pcif_vec; 771 int req_vec; 772 int err; 773 int n; 774 775 if (mlx5_core_is_sf(dev)) 776 return 0; 777 778 pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1; 779 pcif_vec = min_t(int, pcif_vec, num_eqs); 780 781 total_vec = pcif_vec; 782 if (mlx5_sf_max_functions(dev)) 783 total_vec += MLX5_IRQ_CTRL_SF_MAX + 784 MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev); 785 total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev)); 786 pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev)); 787 788 req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec; 789 n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX); 790 if (n < 0) 791 return n; 792 793 err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec); 794 if (err) 795 pci_free_irq_vectors(dev->pdev); 796 797 return err; 798} 799 800void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) 801{ 802 struct mlx5_irq_table *table = dev->priv.irq_table; 803 804 if (mlx5_core_is_sf(dev)) 805 return; 806 807 /* There are cases where IRQs still will be in used when we reaching 808 * to here. Hence, making sure all the irqs are released. 809 */ 810 irq_pools_destroy(table); 811 pci_free_irq_vectors(dev->pdev); 812} 813 814void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev) 815{ 816 struct mlx5_irq_table *table = dev->priv.irq_table; 817 818 if (mlx5_core_is_sf(dev)) 819 return; 820 821 mlx5_irq_pools_free_irqs(table); 822 pci_free_irq_vectors(dev->pdev); 823} 824 825int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) 826{ 827 if (table->sf_comp_pool) 828 return min_t(int, num_online_cpus(), 829 table->sf_comp_pool->xa_num_irqs.max - 830 table->sf_comp_pool->xa_num_irqs.min + 1); 831 else 832 return mlx5_irq_table_get_num_comp(table); 833} 834 835struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) 836{ 837#ifdef CONFIG_MLX5_SF 838 if (mlx5_core_is_sf(dev)) 839 return dev->priv.parent_mdev->priv.irq_table; 840#endif 841 return dev->priv.irq_table; 842} 843