1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2019 Mellanox Technologies. */
3
4#include <linux/pci.h>
5#include <linux/interrupt.h>
6#include <linux/notifier.h>
7#include <linux/mlx5/driver.h>
8#include <linux/mlx5/vport.h>
9#include "mlx5_core.h"
10#include "mlx5_irq.h"
11#include "pci_irq.h"
12#include "lib/sf.h"
13#include "lib/eq.h"
14#ifdef CONFIG_RFS_ACCEL
15#include <linux/cpu_rmap.h>
16#endif
17
18#define MLX5_SFS_PER_CTRL_IRQ 64
19#define MLX5_IRQ_CTRL_SF_MAX 8
20/* min num of vectors for SFs to be enabled */
21#define MLX5_IRQ_VEC_COMP_BASE_SF 2
22#define MLX5_IRQ_VEC_COMP_BASE 1
23
24#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
25#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
26#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
27#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
28
29struct mlx5_irq {
30	struct atomic_notifier_head nh;
31	cpumask_var_t mask;
32	char name[MLX5_MAX_IRQ_FORMATTED_NAME];
33	struct mlx5_irq_pool *pool;
34	int refcount;
35	struct msi_map map;
36	u32 pool_index;
37};
38
39struct mlx5_irq_table {
40	struct mlx5_irq_pool *pcif_pool;
41	struct mlx5_irq_pool *sf_ctrl_pool;
42	struct mlx5_irq_pool *sf_comp_pool;
43};
44
45static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
46				   int func,
47				   bool ec_vf_func)
48{
49	if (!ec_vf_func)
50		return func;
51	return mlx5_core_ec_vf_vport_base(dev) + func - 1;
52}
53
54/**
55 * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
56 *                                   to be ssigned to each VF.
57 * @dev: PF to work on
58 * @num_vfs: Number of enabled VFs
59 */
60int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
61{
62	int num_vf_msix, min_msix, max_msix;
63
64	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
65	if (!num_vf_msix)
66		return 0;
67
68	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
69	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
70
71	/* Limit maximum number of MSI-X vectors so the default configuration
72	 * has some available in the pool. This will allow the user to increase
73	 * the number of vectors in a VF without having to first size-down other
74	 * VFs.
75	 */
76	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
77}
78
79/**
80 * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
81 * @dev: PF to work on
82 * @function_id: Internal PCI VF function IDd
83 * @msix_vec_count: Number of MSI-X vectors to set
84 */
85int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
86			    int msix_vec_count)
87{
88	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
89	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
90	void *hca_cap = NULL, *query_cap = NULL, *cap;
91	int num_vf_msix, min_msix, max_msix;
92	bool ec_vf_function;
93	int vport;
94	int ret;
95
96	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
97	if (!num_vf_msix)
98		return 0;
99
100	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
101		return -EOPNOTSUPP;
102
103	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
104	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
105
106	if (msix_vec_count < min_msix)
107		return -EINVAL;
108
109	if (msix_vec_count > max_msix)
110		return -EOVERFLOW;
111
112	query_cap = kvzalloc(query_sz, GFP_KERNEL);
113	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
114	if (!hca_cap || !query_cap) {
115		ret = -ENOMEM;
116		goto out;
117	}
118
119	ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
120	vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
121	ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
122	if (ret)
123		goto out;
124
125	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
126	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
127	       MLX5_UN_SZ_BYTES(hca_cap_union));
128	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
129
130	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
131	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
132	MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
133	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
134
135	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
136		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
137	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
138out:
139	kvfree(hca_cap);
140	kvfree(query_cap);
141	return ret;
142}
143
144/* mlx5_system_free_irq - Free an IRQ
145 * @irq: IRQ to free
146 *
147 * Free the IRQ and other resources such as rmap from the system.
148 * BUT doesn't free or remove reference from mlx5.
149 * This function is very important for the shutdown flow, where we need to
150 * cleanup system resoruces but keep mlx5 objects alive,
151 * see mlx5_irq_table_free_irqs().
152 */
153static void mlx5_system_free_irq(struct mlx5_irq *irq)
154{
155	struct mlx5_irq_pool *pool = irq->pool;
156#ifdef CONFIG_RFS_ACCEL
157	struct cpu_rmap *rmap;
158#endif
159
160	/* free_irq requires that affinity_hint and rmap will be cleared before
161	 * calling it. To satisfy this requirement, we call
162	 * irq_cpu_rmap_remove() to remove the notifier
163	 */
164	irq_update_affinity_hint(irq->map.virq, NULL);
165#ifdef CONFIG_RFS_ACCEL
166	rmap = mlx5_eq_table_get_rmap(pool->dev);
167	if (rmap)
168		irq_cpu_rmap_remove(rmap, irq->map.virq);
169#endif
170
171	free_irq(irq->map.virq, &irq->nh);
172	if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
173		pci_msix_free_irq(pool->dev->pdev, irq->map);
174}
175
176static void irq_release(struct mlx5_irq *irq)
177{
178	struct mlx5_irq_pool *pool = irq->pool;
179
180	xa_erase(&pool->irqs, irq->pool_index);
181	mlx5_system_free_irq(irq);
182	free_cpumask_var(irq->mask);
183	kfree(irq);
184}
185
186int mlx5_irq_put(struct mlx5_irq *irq)
187{
188	struct mlx5_irq_pool *pool = irq->pool;
189	int ret = 0;
190
191	mutex_lock(&pool->lock);
192	irq->refcount--;
193	if (!irq->refcount) {
194		irq_release(irq);
195		ret = 1;
196	}
197	mutex_unlock(&pool->lock);
198	return ret;
199}
200
201int mlx5_irq_read_locked(struct mlx5_irq *irq)
202{
203	lockdep_assert_held(&irq->pool->lock);
204	return irq->refcount;
205}
206
207int mlx5_irq_get_locked(struct mlx5_irq *irq)
208{
209	lockdep_assert_held(&irq->pool->lock);
210	if (WARN_ON_ONCE(!irq->refcount))
211		return 0;
212	irq->refcount++;
213	return 1;
214}
215
216static int irq_get(struct mlx5_irq *irq)
217{
218	int err;
219
220	mutex_lock(&irq->pool->lock);
221	err = mlx5_irq_get_locked(irq);
222	mutex_unlock(&irq->pool->lock);
223	return err;
224}
225
226static irqreturn_t irq_int_handler(int irq, void *nh)
227{
228	atomic_notifier_call_chain(nh, 0, NULL);
229	return IRQ_HANDLED;
230}
231
232static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
233{
234	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
235}
236
237static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
238{
239	if (!pool->xa_num_irqs.max) {
240		/* in case we only have a single irq for the device */
241		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
242		return;
243	}
244
245	if (!vecidx) {
246		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
247		return;
248	}
249
250	vecidx -= MLX5_IRQ_VEC_COMP_BASE;
251	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
252}
253
254struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
255				struct irq_affinity_desc *af_desc,
256				struct cpu_rmap **rmap)
257{
258	struct mlx5_core_dev *dev = pool->dev;
259	char name[MLX5_MAX_IRQ_NAME];
260	struct mlx5_irq *irq;
261	int err;
262
263	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
264	if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
265		kfree(irq);
266		return ERR_PTR(-ENOMEM);
267	}
268
269	if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
270		/* The vector at index 0 is always statically allocated. If
271		 * dynamic irq is not supported all vectors are statically
272		 * allocated. In both cases just get the irq number and set
273		 * the index.
274		 */
275		irq->map.virq = pci_irq_vector(dev->pdev, i);
276		irq->map.index = i;
277	} else {
278		irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
279		if (!irq->map.virq) {
280			err = irq->map.index;
281			goto err_alloc_irq;
282		}
283	}
284
285	if (i && rmap && *rmap) {
286#ifdef CONFIG_RFS_ACCEL
287		err = irq_cpu_rmap_add(*rmap, irq->map.virq);
288		if (err)
289			goto err_irq_rmap;
290#endif
291	}
292	if (!mlx5_irq_pool_is_sf_pool(pool))
293		irq_set_name(pool, name, i);
294	else
295		irq_sf_set_name(pool, name, i);
296	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
297	snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
298		 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
299	err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
300			  &irq->nh);
301	if (err) {
302		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
303		goto err_req_irq;
304	}
305
306	if (af_desc) {
307		cpumask_copy(irq->mask, &af_desc->mask);
308		irq_set_affinity_and_hint(irq->map.virq, irq->mask);
309	}
310	irq->pool = pool;
311	irq->refcount = 1;
312	irq->pool_index = i;
313	err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
314	if (err) {
315		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
316			      irq->pool_index, err);
317		goto err_xa;
318	}
319	return irq;
320err_xa:
321	if (af_desc)
322		irq_update_affinity_hint(irq->map.virq, NULL);
323	free_irq(irq->map.virq, &irq->nh);
324err_req_irq:
325#ifdef CONFIG_RFS_ACCEL
326	if (i && rmap && *rmap) {
327		free_irq_cpu_rmap(*rmap);
328		*rmap = NULL;
329	}
330err_irq_rmap:
331#endif
332	if (i && pci_msix_can_alloc_dyn(dev->pdev))
333		pci_msix_free_irq(dev->pdev, irq->map);
334err_alloc_irq:
335	free_cpumask_var(irq->mask);
336	kfree(irq);
337	return ERR_PTR(err);
338}
339
340int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
341{
342	int ret;
343
344	ret = irq_get(irq);
345	if (!ret)
346		/* Something very bad happens here, we are enabling EQ
347		 * on non-existing IRQ.
348		 */
349		return -ENOENT;
350	ret = atomic_notifier_chain_register(&irq->nh, nb);
351	if (ret)
352		mlx5_irq_put(irq);
353	return ret;
354}
355
356int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
357{
358	int err = 0;
359
360	err = atomic_notifier_chain_unregister(&irq->nh, nb);
361	mlx5_irq_put(irq);
362	return err;
363}
364
365struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
366{
367	return irq->mask;
368}
369
370int mlx5_irq_get_index(struct mlx5_irq *irq)
371{
372	return irq->map.index;
373}
374
375/* irq_pool API */
376
377/* requesting an irq from a given pool according to given index */
378static struct mlx5_irq *
379irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
380			struct irq_affinity_desc *af_desc,
381			struct cpu_rmap **rmap)
382{
383	struct mlx5_irq *irq;
384
385	mutex_lock(&pool->lock);
386	irq = xa_load(&pool->irqs, vecidx);
387	if (irq) {
388		mlx5_irq_get_locked(irq);
389		goto unlock;
390	}
391	irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
392unlock:
393	mutex_unlock(&pool->lock);
394	return irq;
395}
396
397static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
398{
399	return irq_table->sf_ctrl_pool;
400}
401
402static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
403{
404	return irq_table->sf_comp_pool;
405}
406
407struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
408{
409	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
410	struct mlx5_irq_pool *pool = NULL;
411
412	if (mlx5_core_is_sf(dev))
413		pool = sf_irq_pool_get(irq_table);
414
415	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
416	 * the PF IRQs pool in case the SF pool doesn't exist.
417	 */
418	return pool ? pool : irq_table->pcif_pool;
419}
420
421static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
422{
423	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
424	struct mlx5_irq_pool *pool = NULL;
425
426	if (mlx5_core_is_sf(dev))
427		pool = sf_ctrl_irq_pool_get(irq_table);
428
429	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
430	 * the PF IRQs pool in case the SF pool doesn't exist.
431	 */
432	return pool ? pool : irq_table->pcif_pool;
433}
434
435static void _mlx5_irq_release(struct mlx5_irq *irq)
436{
437	synchronize_irq(irq->map.virq);
438	mlx5_irq_put(irq);
439}
440
441/**
442 * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
443 * @ctrl_irq: ctrl IRQ to be released.
444 */
445void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
446{
447	_mlx5_irq_release(ctrl_irq);
448}
449
450/**
451 * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
452 * @dev: mlx5 device that requesting the IRQ.
453 *
454 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
455 */
456struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
457{
458	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
459	struct irq_affinity_desc af_desc;
460	struct mlx5_irq *irq;
461
462	cpumask_copy(&af_desc.mask, cpu_online_mask);
463	af_desc.is_managed = false;
464	if (!mlx5_irq_pool_is_sf_pool(pool)) {
465		/* In case we are allocating a control IRQ from a pci device's pool.
466		 * This can happen also for a SF if the SFs pool is empty.
467		 */
468		if (!pool->xa_num_irqs.max) {
469			cpumask_clear(&af_desc.mask);
470			/* In case we only have a single IRQ for PF/VF */
471			cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
472		}
473		/* Allocate the IRQ in index 0. The vector was already allocated */
474		irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
475	} else {
476		irq = mlx5_irq_affinity_request(pool, &af_desc);
477	}
478
479	return irq;
480}
481
482/**
483 * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
484 * @dev: mlx5 device that requesting the IRQ.
485 * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
486 * provided.
487 * @af_desc: affinity descriptor for this IRQ.
488 * @rmap: pointer to reverse map pointer for completion interrupts
489 *
490 * This function returns a pointer to IRQ, or ERR_PTR in case of error.
491 */
492struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
493				  struct irq_affinity_desc *af_desc,
494				  struct cpu_rmap **rmap)
495{
496	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
497	struct mlx5_irq_pool *pool;
498	struct mlx5_irq *irq;
499
500	pool = irq_table->pcif_pool;
501	irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
502	if (IS_ERR(irq))
503		return irq;
504	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
505		      irq->map.virq, cpumask_pr_args(&af_desc->mask),
506		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
507	return irq;
508}
509
510/**
511 * mlx5_msix_alloc - allocate msix interrupt
512 * @dev: mlx5 device from which to request
513 * @handler: interrupt handler
514 * @affdesc: affinity descriptor
515 * @name: interrupt name
516 *
517 * Returns: struct msi_map with result encoded.
518 * Note: the caller must make sure to release the irq by calling
519 *       mlx5_msix_free() if shutdown was initiated.
520 */
521struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
522			       irqreturn_t (*handler)(int, void *),
523			       const struct irq_affinity_desc *affdesc,
524			       const char *name)
525{
526	struct msi_map map;
527	int err;
528
529	if (!dev->pdev) {
530		map.virq = 0;
531		map.index = -EINVAL;
532		return map;
533	}
534
535	map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
536	if (!map.virq)
537		return map;
538
539	err = request_irq(map.virq, handler, 0, name, NULL);
540	if (err) {
541		mlx5_core_warn(dev, "err %d\n", err);
542		pci_msix_free_irq(dev->pdev, map);
543		map.virq = 0;
544		map.index = -ENOMEM;
545	}
546	return map;
547}
548EXPORT_SYMBOL(mlx5_msix_alloc);
549
550/**
551 * mlx5_msix_free - free a previously allocated msix interrupt
552 * @dev: mlx5 device associated with interrupt
553 * @map: map previously returned by mlx5_msix_alloc()
554 */
555void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
556{
557	free_irq(map.virq, NULL);
558	pci_msix_free_irq(dev->pdev, map);
559}
560EXPORT_SYMBOL(mlx5_msix_free);
561
562/**
563 * mlx5_irq_release_vector - release one IRQ back to the system.
564 * @irq: the irq to release.
565 */
566void mlx5_irq_release_vector(struct mlx5_irq *irq)
567{
568	_mlx5_irq_release(irq);
569}
570
571/**
572 * mlx5_irq_request_vector - request one IRQ for mlx5 device.
573 * @dev: mlx5 device that is requesting the IRQ.
574 * @cpu: CPU to bind the IRQ to.
575 * @vecidx: vector index to request an IRQ for.
576 * @rmap: pointer to reverse map pointer for completion interrupts
577 *
578 * Each IRQ is bound to at most 1 CPU.
579 * This function is requests one IRQ, for the given @vecidx.
580 *
581 * This function returns a pointer to the irq on success, or an error pointer
582 * in case of an error.
583 */
584struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
585					 u16 vecidx, struct cpu_rmap **rmap)
586{
587	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
588	struct mlx5_irq_pool *pool = table->pcif_pool;
589	struct irq_affinity_desc af_desc;
590	int offset = MLX5_IRQ_VEC_COMP_BASE;
591
592	if (!pool->xa_num_irqs.max)
593		offset = 0;
594
595	af_desc.is_managed = false;
596	cpumask_clear(&af_desc.mask);
597	cpumask_set_cpu(cpu, &af_desc.mask);
598	return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap);
599}
600
601static struct mlx5_irq_pool *
602irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
603	       u32 min_threshold, u32 max_threshold)
604{
605	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
606
607	if (!pool)
608		return ERR_PTR(-ENOMEM);
609	pool->dev = dev;
610	mutex_init(&pool->lock);
611	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
612	pool->xa_num_irqs.min = start;
613	pool->xa_num_irqs.max = start + size - 1;
614	if (name)
615		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
616			 "%s", name);
617	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
618	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
619	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
620		      name, size, start);
621	return pool;
622}
623
624static void irq_pool_free(struct mlx5_irq_pool *pool)
625{
626	struct mlx5_irq *irq;
627	unsigned long index;
628
629	/* There are cases in which we are destrying the irq_table before
630	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
631	 * which might not have been freed.
632	 */
633	xa_for_each(&pool->irqs, index, irq)
634		irq_release(irq);
635	xa_destroy(&pool->irqs);
636	mutex_destroy(&pool->lock);
637	kfree(pool->irqs_per_cpu);
638	kvfree(pool);
639}
640
641static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
642{
643	struct mlx5_irq_table *table = dev->priv.irq_table;
644	int num_sf_ctrl_by_msix;
645	int num_sf_ctrl_by_sfs;
646	int num_sf_ctrl;
647	int err;
648
649	/* init pcif_pool */
650	table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
651					  MLX5_EQ_SHARE_IRQ_MIN_COMP,
652					  MLX5_EQ_SHARE_IRQ_MAX_COMP);
653	if (IS_ERR(table->pcif_pool))
654		return PTR_ERR(table->pcif_pool);
655	if (!mlx5_sf_max_functions(dev))
656		return 0;
657	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
658		mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
659		return 0;
660	}
661
662	/* init sf_ctrl_pool */
663	num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
664	num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
665					  MLX5_SFS_PER_CTRL_IRQ);
666	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
667	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
668	table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
669					     "mlx5_sf_ctrl",
670					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
671					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
672	if (IS_ERR(table->sf_ctrl_pool)) {
673		err = PTR_ERR(table->sf_ctrl_pool);
674		goto err_pf;
675	}
676	/* init sf_comp_pool */
677	table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
678					     sf_vec - num_sf_ctrl, "mlx5_sf_comp",
679					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
680					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
681	if (IS_ERR(table->sf_comp_pool)) {
682		err = PTR_ERR(table->sf_comp_pool);
683		goto err_sf_ctrl;
684	}
685
686	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
687	if (!table->sf_comp_pool->irqs_per_cpu) {
688		err = -ENOMEM;
689		goto err_irqs_per_cpu;
690	}
691
692	return 0;
693
694err_irqs_per_cpu:
695	irq_pool_free(table->sf_comp_pool);
696err_sf_ctrl:
697	irq_pool_free(table->sf_ctrl_pool);
698err_pf:
699	irq_pool_free(table->pcif_pool);
700	return err;
701}
702
703static void irq_pools_destroy(struct mlx5_irq_table *table)
704{
705	if (table->sf_ctrl_pool) {
706		irq_pool_free(table->sf_comp_pool);
707		irq_pool_free(table->sf_ctrl_pool);
708	}
709	irq_pool_free(table->pcif_pool);
710}
711
712static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
713{
714	struct mlx5_irq *irq;
715	unsigned long index;
716
717	xa_for_each(&pool->irqs, index, irq)
718		mlx5_system_free_irq(irq);
719
720}
721
722static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
723{
724	if (table->sf_ctrl_pool) {
725		mlx5_irq_pool_free_irqs(table->sf_comp_pool);
726		mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
727	}
728	mlx5_irq_pool_free_irqs(table->pcif_pool);
729}
730
731/* irq_table API */
732
733int mlx5_irq_table_init(struct mlx5_core_dev *dev)
734{
735	struct mlx5_irq_table *irq_table;
736
737	if (mlx5_core_is_sf(dev))
738		return 0;
739
740	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
741				  dev->priv.numa_node);
742	if (!irq_table)
743		return -ENOMEM;
744
745	dev->priv.irq_table = irq_table;
746	return 0;
747}
748
749void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
750{
751	if (mlx5_core_is_sf(dev))
752		return;
753
754	kvfree(dev->priv.irq_table);
755}
756
757int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
758{
759	if (!table->pcif_pool->xa_num_irqs.max)
760		return 1;
761	return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
762}
763
764int mlx5_irq_table_create(struct mlx5_core_dev *dev)
765{
766	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
767		      MLX5_CAP_GEN(dev, max_num_eqs) :
768		      1 << MLX5_CAP_GEN(dev, log_max_eq);
769	int total_vec;
770	int pcif_vec;
771	int req_vec;
772	int err;
773	int n;
774
775	if (mlx5_core_is_sf(dev))
776		return 0;
777
778	pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
779	pcif_vec = min_t(int, pcif_vec, num_eqs);
780
781	total_vec = pcif_vec;
782	if (mlx5_sf_max_functions(dev))
783		total_vec += MLX5_IRQ_CTRL_SF_MAX +
784			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
785	total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
786	pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
787
788	req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
789	n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
790	if (n < 0)
791		return n;
792
793	err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
794	if (err)
795		pci_free_irq_vectors(dev->pdev);
796
797	return err;
798}
799
800void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
801{
802	struct mlx5_irq_table *table = dev->priv.irq_table;
803
804	if (mlx5_core_is_sf(dev))
805		return;
806
807	/* There are cases where IRQs still will be in used when we reaching
808	 * to here. Hence, making sure all the irqs are released.
809	 */
810	irq_pools_destroy(table);
811	pci_free_irq_vectors(dev->pdev);
812}
813
814void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
815{
816	struct mlx5_irq_table *table = dev->priv.irq_table;
817
818	if (mlx5_core_is_sf(dev))
819		return;
820
821	mlx5_irq_pools_free_irqs(table);
822	pci_free_irq_vectors(dev->pdev);
823}
824
825int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
826{
827	if (table->sf_comp_pool)
828		return min_t(int, num_online_cpus(),
829			     table->sf_comp_pool->xa_num_irqs.max -
830			     table->sf_comp_pool->xa_num_irqs.min + 1);
831	else
832		return mlx5_irq_table_get_num_comp(table);
833}
834
835struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
836{
837#ifdef CONFIG_MLX5_SF
838	if (mlx5_core_is_sf(dev))
839		return dev->priv.parent_mdev->priv.irq_table;
840#endif
841	return dev->priv.irq_table;
842}
843