1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4#include "eswitch.h"
5#include "lib/mlx5.h"
6#include "esw/qos.h"
7#include "en/port.h"
8#define CREATE_TRACE_POINTS
9#include "diag/qos_tracepoint.h"
10
11/* Minimum supported BW share value by the HW is 1 Mbit/sec */
12#define MLX5_MIN_BW_SHARE 1
13
14#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
15	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
16
17struct mlx5_esw_rate_group {
18	u32 tsar_ix;
19	u32 max_rate;
20	u32 min_rate;
21	u32 bw_share;
22	struct list_head list;
23};
24
25static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
26			       u32 tsar_ix, u32 max_rate, u32 bw_share)
27{
28	u32 bitmask = 0;
29
30	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
31		return -EOPNOTSUPP;
32
33	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
34	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
35	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
36	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
37
38	return mlx5_modify_scheduling_element_cmd(dev,
39						  SCHEDULING_HIERARCHY_E_SWITCH,
40						  sched_ctx,
41						  tsar_ix,
42						  bitmask);
43}
44
45static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
46				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
47{
48	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
49	struct mlx5_core_dev *dev = esw->dev;
50	int err;
51
52	err = esw_qos_tsar_config(dev, sched_ctx,
53				  group->tsar_ix,
54				  max_rate, bw_share);
55	if (err)
56		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
57
58	trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
59
60	return err;
61}
62
63static int esw_qos_vport_config(struct mlx5_eswitch *esw,
64				struct mlx5_vport *vport,
65				u32 max_rate, u32 bw_share,
66				struct netlink_ext_ack *extack)
67{
68	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
69	struct mlx5_core_dev *dev = esw->dev;
70	int err;
71
72	if (!vport->qos.enabled)
73		return -EIO;
74
75	err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
76				  max_rate, bw_share);
77	if (err) {
78		esw_warn(esw->dev,
79			 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
80			 vport->vport, err);
81		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
82		return err;
83	}
84
85	trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
86
87	return 0;
88}
89
90static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
91					      struct mlx5_esw_rate_group *group,
92					      bool group_level)
93{
94	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
95	struct mlx5_vport *evport;
96	u32 max_guarantee = 0;
97	unsigned long i;
98
99	if (group_level) {
100		struct mlx5_esw_rate_group *group;
101
102		list_for_each_entry(group, &esw->qos.groups, list) {
103			if (group->min_rate < max_guarantee)
104				continue;
105			max_guarantee = group->min_rate;
106		}
107	} else {
108		mlx5_esw_for_each_vport(esw, i, evport) {
109			if (!evport->enabled || !evport->qos.enabled ||
110			    evport->qos.group != group || evport->qos.min_rate < max_guarantee)
111				continue;
112			max_guarantee = evport->qos.min_rate;
113		}
114	}
115
116	if (max_guarantee)
117		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
118
119	/* If vports min rate divider is 0 but their group has bw_share configured, then
120	 * need to set bw_share for vports to minimal value.
121	 */
122	if (!group_level && !max_guarantee && group && group->bw_share)
123		return 1;
124	return 0;
125}
126
127static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
128{
129	if (divider)
130		return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
131
132	return 0;
133}
134
135static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
136					     struct mlx5_esw_rate_group *group,
137					     struct netlink_ext_ack *extack)
138{
139	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
140	u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
141	struct mlx5_vport *evport;
142	unsigned long i;
143	u32 bw_share;
144	int err;
145
146	mlx5_esw_for_each_vport(esw, i, evport) {
147		if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
148			continue;
149		bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
150
151		if (bw_share == evport->qos.bw_share)
152			continue;
153
154		err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
155		if (err)
156			return err;
157
158		evport->qos.bw_share = bw_share;
159	}
160
161	return 0;
162}
163
164static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
165					     struct netlink_ext_ack *extack)
166{
167	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
168	struct mlx5_esw_rate_group *group;
169	u32 bw_share;
170	int err;
171
172	list_for_each_entry(group, &esw->qos.groups, list) {
173		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
174
175		if (bw_share == group->bw_share)
176			continue;
177
178		err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
179		if (err)
180			return err;
181
182		group->bw_share = bw_share;
183
184		/* All the group's vports need to be set with default bw_share
185		 * to enable them with QOS
186		 */
187		err = esw_qos_normalize_vports_min_rate(esw, group, extack);
188
189		if (err)
190			return err;
191	}
192
193	return 0;
194}
195
196static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
197				      u32 min_rate, struct netlink_ext_ack *extack)
198{
199	u32 fw_max_bw_share, previous_min_rate;
200	bool min_rate_supported;
201	int err;
202
203	lockdep_assert_held(&esw->state_lock);
204	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
205	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
206				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
207	if (min_rate && !min_rate_supported)
208		return -EOPNOTSUPP;
209	if (min_rate == evport->qos.min_rate)
210		return 0;
211
212	previous_min_rate = evport->qos.min_rate;
213	evport->qos.min_rate = min_rate;
214	err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
215	if (err)
216		evport->qos.min_rate = previous_min_rate;
217
218	return err;
219}
220
221static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
222				      u32 max_rate, struct netlink_ext_ack *extack)
223{
224	u32 act_max_rate = max_rate;
225	bool max_rate_supported;
226	int err;
227
228	lockdep_assert_held(&esw->state_lock);
229	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
230
231	if (max_rate && !max_rate_supported)
232		return -EOPNOTSUPP;
233	if (max_rate == evport->qos.max_rate)
234		return 0;
235
236	/* If parent group has rate limit need to set to group
237	 * value when new max rate is 0.
238	 */
239	if (evport->qos.group && !max_rate)
240		act_max_rate = evport->qos.group->max_rate;
241
242	err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
243
244	if (!err)
245		evport->qos.max_rate = max_rate;
246
247	return err;
248}
249
250static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
251				      u32 min_rate, struct netlink_ext_ack *extack)
252{
253	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
254	struct mlx5_core_dev *dev = esw->dev;
255	u32 previous_min_rate, divider;
256	int err;
257
258	if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
259		return -EOPNOTSUPP;
260
261	if (min_rate == group->min_rate)
262		return 0;
263
264	previous_min_rate = group->min_rate;
265	group->min_rate = min_rate;
266	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
267	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
268	if (err) {
269		group->min_rate = previous_min_rate;
270		NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
271
272		/* Attempt restoring previous configuration */
273		divider = esw_qos_calculate_min_rate_divider(esw, group, true);
274		if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
275			NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
276	}
277
278	return err;
279}
280
281static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
282				      struct mlx5_esw_rate_group *group,
283				      u32 max_rate, struct netlink_ext_ack *extack)
284{
285	struct mlx5_vport *vport;
286	unsigned long i;
287	int err;
288
289	if (group->max_rate == max_rate)
290		return 0;
291
292	err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
293	if (err)
294		return err;
295
296	group->max_rate = max_rate;
297
298	/* Any unlimited vports in the group should be set
299	 * with the value of the group.
300	 */
301	mlx5_esw_for_each_vport(esw, i, vport) {
302		if (!vport->enabled || !vport->qos.enabled ||
303		    vport->qos.group != group || vport->qos.max_rate)
304			continue;
305
306		err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
307		if (err)
308			NL_SET_ERR_MSG_MOD(extack,
309					   "E-Switch vport implicit rate limit setting failed");
310	}
311
312	return err;
313}
314
315static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
316					      struct mlx5_vport *vport,
317					      u32 max_rate, u32 bw_share)
318{
319	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
320	struct mlx5_esw_rate_group *group = vport->qos.group;
321	struct mlx5_core_dev *dev = esw->dev;
322	u32 parent_tsar_ix;
323	void *vport_elem;
324	int err;
325
326	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
327	MLX5_SET(scheduling_context, sched_ctx, element_type,
328		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
329	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
330	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
331	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
332	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
333	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
334
335	err = mlx5_create_scheduling_element_cmd(dev,
336						 SCHEDULING_HIERARCHY_E_SWITCH,
337						 sched_ctx,
338						 &vport->qos.esw_tsar_ix);
339	if (err) {
340		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
341			 vport->vport, err);
342		return err;
343	}
344
345	return 0;
346}
347
348static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
349						   struct mlx5_vport *vport,
350						   struct mlx5_esw_rate_group *curr_group,
351						   struct mlx5_esw_rate_group *new_group,
352						   struct netlink_ext_ack *extack)
353{
354	u32 max_rate;
355	int err;
356
357	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
358						  SCHEDULING_HIERARCHY_E_SWITCH,
359						  vport->qos.esw_tsar_ix);
360	if (err) {
361		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
362		return err;
363	}
364
365	vport->qos.group = new_group;
366	max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
367
368	/* If vport is unlimited, we set the group's value.
369	 * Therefore, if the group is limited it will apply to
370	 * the vport as well and if not, vport will remain unlimited.
371	 */
372	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
373	if (err) {
374		NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
375		goto err_sched;
376	}
377
378	return 0;
379
380err_sched:
381	vport->qos.group = curr_group;
382	max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
383	if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
384		esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
385			 vport->vport);
386
387	return err;
388}
389
390static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
391				      struct mlx5_vport *vport,
392				      struct mlx5_esw_rate_group *group,
393				      struct netlink_ext_ack *extack)
394{
395	struct mlx5_esw_rate_group *new_group, *curr_group;
396	int err;
397
398	if (!vport->enabled)
399		return -EINVAL;
400
401	curr_group = vport->qos.group;
402	new_group = group ?: esw->qos.group0;
403	if (curr_group == new_group)
404		return 0;
405
406	err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
407	if (err)
408		return err;
409
410	/* Recalculate bw share weights of old and new groups */
411	if (vport->qos.bw_share || new_group->bw_share) {
412		esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
413		esw_qos_normalize_vports_min_rate(esw, new_group, extack);
414	}
415
416	return 0;
417}
418
419static struct mlx5_esw_rate_group *
420__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
421{
422	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
423	struct mlx5_esw_rate_group *group;
424	u32 divider;
425	int err;
426
427	group = kzalloc(sizeof(*group), GFP_KERNEL);
428	if (!group)
429		return ERR_PTR(-ENOMEM);
430
431	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
432		 esw->qos.root_tsar_ix);
433	err = mlx5_create_scheduling_element_cmd(esw->dev,
434						 SCHEDULING_HIERARCHY_E_SWITCH,
435						 tsar_ctx,
436						 &group->tsar_ix);
437	if (err) {
438		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
439		goto err_sched_elem;
440	}
441
442	list_add_tail(&group->list, &esw->qos.groups);
443
444	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
445	if (divider) {
446		err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
447		if (err) {
448			NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
449			goto err_min_rate;
450		}
451	}
452	trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
453
454	return group;
455
456err_min_rate:
457	list_del(&group->list);
458	if (mlx5_destroy_scheduling_element_cmd(esw->dev,
459						SCHEDULING_HIERARCHY_E_SWITCH,
460						group->tsar_ix))
461		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
462err_sched_elem:
463	kfree(group);
464	return ERR_PTR(err);
465}
466
467static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
468static void esw_qos_put(struct mlx5_eswitch *esw);
469
470static struct mlx5_esw_rate_group *
471esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
472{
473	struct mlx5_esw_rate_group *group;
474	int err;
475
476	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
477		return ERR_PTR(-EOPNOTSUPP);
478
479	err = esw_qos_get(esw, extack);
480	if (err)
481		return ERR_PTR(err);
482
483	group = __esw_qos_create_rate_group(esw, extack);
484	if (IS_ERR(group))
485		esw_qos_put(esw);
486
487	return group;
488}
489
490static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
491					struct mlx5_esw_rate_group *group,
492					struct netlink_ext_ack *extack)
493{
494	u32 divider;
495	int err;
496
497	list_del(&group->list);
498
499	divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
500	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
501	if (err)
502		NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
503
504	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
505						  SCHEDULING_HIERARCHY_E_SWITCH,
506						  group->tsar_ix);
507	if (err)
508		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
509
510	trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
511
512	kfree(group);
513
514	return err;
515}
516
517static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
518				      struct mlx5_esw_rate_group *group,
519				      struct netlink_ext_ack *extack)
520{
521	int err;
522
523	err = __esw_qos_destroy_rate_group(esw, group, extack);
524	esw_qos_put(esw);
525
526	return err;
527}
528
529static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
530{
531	switch (type) {
532	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
533		return MLX5_CAP_QOS(dev, esw_element_type) &
534		       ELEMENT_TYPE_CAP_MASK_TASR;
535	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
536		return MLX5_CAP_QOS(dev, esw_element_type) &
537		       ELEMENT_TYPE_CAP_MASK_VPORT;
538	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
539		return MLX5_CAP_QOS(dev, esw_element_type) &
540		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
541	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
542		return MLX5_CAP_QOS(dev, esw_element_type) &
543		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
544	}
545	return false;
546}
547
548static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
549{
550	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
551	struct mlx5_core_dev *dev = esw->dev;
552	__be32 *attr;
553	int err;
554
555	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
556		return -EOPNOTSUPP;
557
558	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
559		return -EOPNOTSUPP;
560
561	MLX5_SET(scheduling_context, tsar_ctx, element_type,
562		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
563
564	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
565	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
566
567	err = mlx5_create_scheduling_element_cmd(dev,
568						 SCHEDULING_HIERARCHY_E_SWITCH,
569						 tsar_ctx,
570						 &esw->qos.root_tsar_ix);
571	if (err) {
572		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
573		return err;
574	}
575
576	INIT_LIST_HEAD(&esw->qos.groups);
577	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
578		esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
579		if (IS_ERR(esw->qos.group0)) {
580			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
581				 PTR_ERR(esw->qos.group0));
582			err = PTR_ERR(esw->qos.group0);
583			goto err_group0;
584		}
585	}
586	refcount_set(&esw->qos.refcnt, 1);
587
588	return 0;
589
590err_group0:
591	if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
592						esw->qos.root_tsar_ix))
593		esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
594
595	return err;
596}
597
598static void esw_qos_destroy(struct mlx5_eswitch *esw)
599{
600	int err;
601
602	if (esw->qos.group0)
603		__esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
604
605	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
606						  SCHEDULING_HIERARCHY_E_SWITCH,
607						  esw->qos.root_tsar_ix);
608	if (err)
609		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
610}
611
612static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
613{
614	int err = 0;
615
616	lockdep_assert_held(&esw->state_lock);
617
618	if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
619		/* esw_qos_create() set refcount to 1 only on success.
620		 * No need to decrement on failure.
621		 */
622		err = esw_qos_create(esw, extack);
623	}
624
625	return err;
626}
627
628static void esw_qos_put(struct mlx5_eswitch *esw)
629{
630	lockdep_assert_held(&esw->state_lock);
631	if (refcount_dec_and_test(&esw->qos.refcnt))
632		esw_qos_destroy(esw);
633}
634
635static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
636				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
637{
638	int err;
639
640	lockdep_assert_held(&esw->state_lock);
641	if (vport->qos.enabled)
642		return 0;
643
644	err = esw_qos_get(esw, extack);
645	if (err)
646		return err;
647
648	vport->qos.group = esw->qos.group0;
649
650	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
651	if (err)
652		goto err_out;
653
654	vport->qos.enabled = true;
655	trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
656
657	return 0;
658
659err_out:
660	esw_qos_put(esw);
661
662	return err;
663}
664
665void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
666{
667	int err;
668
669	lockdep_assert_held(&esw->state_lock);
670	if (!vport->qos.enabled)
671		return;
672	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
673	     "Disabling QoS on port before detaching it from group");
674
675	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
676						  SCHEDULING_HIERARCHY_E_SWITCH,
677						  vport->qos.esw_tsar_ix);
678	if (err)
679		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
680			 vport->vport, err);
681
682	memset(&vport->qos, 0, sizeof(vport->qos));
683	trace_mlx5_esw_vport_qos_destroy(vport);
684
685	esw_qos_put(esw);
686}
687
688int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
689				u32 max_rate, u32 min_rate)
690{
691	int err;
692
693	lockdep_assert_held(&esw->state_lock);
694	err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
695	if (err)
696		return err;
697
698	err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
699	if (!err)
700		err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
701
702	return err;
703}
704
705static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
706{
707	struct ethtool_link_ksettings lksettings;
708	struct net_device *slave, *master;
709	u32 speed = SPEED_UNKNOWN;
710
711	/* Lock ensures a stable reference to master and slave netdevice
712	 * while port speed of master is queried.
713	 */
714	ASSERT_RTNL();
715
716	slave = mlx5_uplink_netdev_get(mdev);
717	if (!slave)
718		goto out;
719
720	master = netdev_master_upper_dev_get(slave);
721	if (master && !__ethtool_get_link_ksettings(master, &lksettings))
722		speed = lksettings.base.speed;
723
724out:
725	return speed;
726}
727
728static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
729					   bool hold_rtnl_lock, struct netlink_ext_ack *extack)
730{
731	int err;
732
733	if (!mlx5_lag_is_active(mdev))
734		goto skip_lag;
735
736	if (hold_rtnl_lock)
737		rtnl_lock();
738
739	*link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
740
741	if (hold_rtnl_lock)
742		rtnl_unlock();
743
744	if (*link_speed_max != (u32)SPEED_UNKNOWN)
745		return 0;
746
747skip_lag:
748	err = mlx5_port_max_linkspeed(mdev, link_speed_max);
749	if (err)
750		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
751
752	return err;
753}
754
755static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
756					  const char *name, u32 link_speed_max,
757					  u64 value, struct netlink_ext_ack *extack)
758{
759	if (value > link_speed_max) {
760		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
761		       name, value, link_speed_max);
762		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
763		return -EINVAL;
764	}
765
766	return 0;
767}
768
769int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
770{
771	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
772	struct mlx5_vport *vport;
773	u32 link_speed_max;
774	u32 bitmask;
775	int err;
776
777	vport = mlx5_eswitch_get_vport(esw, vport_num);
778	if (IS_ERR(vport))
779		return PTR_ERR(vport);
780
781	if (rate_mbps) {
782		err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
783		if (err)
784			return err;
785
786		err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
787						     link_speed_max, rate_mbps, NULL);
788		if (err)
789			return err;
790	}
791
792	mutex_lock(&esw->state_lock);
793	if (!vport->qos.enabled) {
794		/* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
795		err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
796	} else {
797		MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
798
799		bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
800		err = mlx5_modify_scheduling_element_cmd(esw->dev,
801							 SCHEDULING_HIERARCHY_E_SWITCH,
802							 ctx,
803							 vport->qos.esw_tsar_ix,
804							 bitmask);
805	}
806	mutex_unlock(&esw->state_lock);
807
808	return err;
809}
810
811#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
812
813/* Converts bytes per second value passed in a pointer into megabits per
814 * second, rewriting last. If converted rate exceed link speed or is not a
815 * fraction of Mbps - returns error.
816 */
817static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
818					u64 *rate, struct netlink_ext_ack *extack)
819{
820	u32 link_speed_max, remainder;
821	u64 value;
822	int err;
823
824	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
825	if (remainder) {
826		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
827		       name, *rate);
828		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
829		return -EINVAL;
830	}
831
832	err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
833	if (err)
834		return err;
835
836	err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
837	if (err)
838		return err;
839
840	*rate = value;
841	return 0;
842}
843
844/* Eswitch devlink rate API */
845
846int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
847					    u64 tx_share, struct netlink_ext_ack *extack)
848{
849	struct mlx5_vport *vport = priv;
850	struct mlx5_eswitch *esw;
851	int err;
852
853	esw = vport->dev->priv.eswitch;
854	if (!mlx5_esw_allowed(esw))
855		return -EPERM;
856
857	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
858	if (err)
859		return err;
860
861	mutex_lock(&esw->state_lock);
862	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
863	if (err)
864		goto unlock;
865
866	err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
867unlock:
868	mutex_unlock(&esw->state_lock);
869	return err;
870}
871
872int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
873					  u64 tx_max, struct netlink_ext_ack *extack)
874{
875	struct mlx5_vport *vport = priv;
876	struct mlx5_eswitch *esw;
877	int err;
878
879	esw = vport->dev->priv.eswitch;
880	if (!mlx5_esw_allowed(esw))
881		return -EPERM;
882
883	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
884	if (err)
885		return err;
886
887	mutex_lock(&esw->state_lock);
888	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
889	if (err)
890		goto unlock;
891
892	err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
893unlock:
894	mutex_unlock(&esw->state_lock);
895	return err;
896}
897
898int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
899					    u64 tx_share, struct netlink_ext_ack *extack)
900{
901	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
902	struct mlx5_eswitch *esw = dev->priv.eswitch;
903	struct mlx5_esw_rate_group *group = priv;
904	int err;
905
906	err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
907	if (err)
908		return err;
909
910	mutex_lock(&esw->state_lock);
911	err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
912	mutex_unlock(&esw->state_lock);
913	return err;
914}
915
916int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
917					  u64 tx_max, struct netlink_ext_ack *extack)
918{
919	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
920	struct mlx5_eswitch *esw = dev->priv.eswitch;
921	struct mlx5_esw_rate_group *group = priv;
922	int err;
923
924	err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
925	if (err)
926		return err;
927
928	mutex_lock(&esw->state_lock);
929	err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
930	mutex_unlock(&esw->state_lock);
931	return err;
932}
933
934int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
935				   struct netlink_ext_ack *extack)
936{
937	struct mlx5_esw_rate_group *group;
938	struct mlx5_eswitch *esw;
939	int err = 0;
940
941	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
942	if (IS_ERR(esw))
943		return PTR_ERR(esw);
944
945	mutex_lock(&esw->state_lock);
946	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
947		NL_SET_ERR_MSG_MOD(extack,
948				   "Rate node creation supported only in switchdev mode");
949		err = -EOPNOTSUPP;
950		goto unlock;
951	}
952
953	group = esw_qos_create_rate_group(esw, extack);
954	if (IS_ERR(group)) {
955		err = PTR_ERR(group);
956		goto unlock;
957	}
958
959	*priv = group;
960unlock:
961	mutex_unlock(&esw->state_lock);
962	return err;
963}
964
965int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
966				   struct netlink_ext_ack *extack)
967{
968	struct mlx5_esw_rate_group *group = priv;
969	struct mlx5_eswitch *esw;
970	int err;
971
972	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
973	if (IS_ERR(esw))
974		return PTR_ERR(esw);
975
976	mutex_lock(&esw->state_lock);
977	err = esw_qos_destroy_rate_group(esw, group, extack);
978	mutex_unlock(&esw->state_lock);
979	return err;
980}
981
982int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
983				    struct mlx5_vport *vport,
984				    struct mlx5_esw_rate_group *group,
985				    struct netlink_ext_ack *extack)
986{
987	int err = 0;
988
989	mutex_lock(&esw->state_lock);
990	if (!vport->qos.enabled && !group)
991		goto unlock;
992
993	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
994	if (!err)
995		err = esw_qos_vport_update_group(esw, vport, group, extack);
996unlock:
997	mutex_unlock(&esw->state_lock);
998	return err;
999}
1000
1001int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
1002				     struct devlink_rate *parent,
1003				     void *priv, void *parent_priv,
1004				     struct netlink_ext_ack *extack)
1005{
1006	struct mlx5_esw_rate_group *group;
1007	struct mlx5_vport *vport = priv;
1008
1009	if (!parent)
1010		return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
1011						       vport, NULL, extack);
1012
1013	group = parent_priv;
1014	return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
1015}
1016