1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2019 Mellanox Technologies. */
3
4#include <linux/netdevice.h>
5#include <net/nexthop.h>
6#include "lag/lag.h"
7#include "lag/mp.h"
8#include "mlx5_core.h"
9#include "eswitch.h"
10#include "lib/events.h"
11
12static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
13{
14	return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
15}
16
17#define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2
18static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
19{
20	if (!mlx5_lag_is_ready(ldev))
21		return false;
22
23	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
24		return false;
25
26	if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS)
27		return false;
28
29	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
30					 ldev->pf[MLX5_LAG_P2].dev);
31}
32
33bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
34{
35	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
36
37	return ldev && __mlx5_lag_is_multipath(ldev);
38}
39
40/**
41 * mlx5_lag_set_port_affinity
42 *
43 * @ldev: lag device
44 * @port:
45 *     0 - set normal affinity.
46 *     1 - set affinity to port 1.
47 *     2 - set affinity to port 2.
48 *
49 **/
50static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
51				       enum mlx5_lag_port_affinity port)
52{
53	struct lag_tracker tracker = {};
54
55	if (!__mlx5_lag_is_multipath(ldev))
56		return;
57
58	switch (port) {
59	case MLX5_LAG_NORMAL_AFFINITY:
60		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
61		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
62		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
63		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
64		break;
65	case MLX5_LAG_P1_AFFINITY:
66		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
67		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
68		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
69		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
70		break;
71	case MLX5_LAG_P2_AFFINITY:
72		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
73		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
74		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
75		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
76		break;
77	default:
78		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
79			       "Invalid affinity port %d", port);
80		return;
81	}
82
83	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
84		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
85					 MLX5_DEV_EVENT_PORT_AFFINITY,
86					 (void *)0);
87
88	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
89		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
90					 MLX5_DEV_EVENT_PORT_AFFINITY,
91					 (void *)0);
92
93	mlx5_modify_lag(ldev, &tracker);
94}
95
96static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
97{
98	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
99
100	flush_workqueue(mp->wq);
101}
102
103static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
104{
105	mp->fib.mfi = fi;
106	mp->fib.priority = fi->fib_priority;
107	mp->fib.dst = dst;
108	mp->fib.dst_len = dst_len;
109}
110
111struct mlx5_fib_event_work {
112	struct work_struct work;
113	struct mlx5_lag *ldev;
114	unsigned long event;
115	union {
116		struct fib_entry_notifier_info fen_info;
117		struct fib_nh_notifier_info fnh_info;
118	};
119};
120
121static struct net_device*
122mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev,
123			  struct fib_info *fi,
124			  struct net_device *current_dev)
125{
126	struct net_device *fib_dev;
127	int i, ldev_idx, nhs;
128
129	nhs = fib_info_num_path(fi);
130	i = 0;
131	if (current_dev) {
132		for (; i < nhs; i++) {
133			fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
134			if (fib_dev == current_dev) {
135				i++;
136				break;
137			}
138		}
139	}
140	for (; i < nhs; i++) {
141		fib_dev = fib_info_nh(fi, i)->fib_nh_dev;
142		ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev);
143		if (ldev_idx >= 0)
144			return ldev->pf[ldev_idx].netdev;
145	}
146
147	return NULL;
148}
149
150static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
151				     struct fib_entry_notifier_info *fen_info)
152{
153	struct net_device *nh_dev0, *nh_dev1;
154	struct fib_info *fi = fen_info->fi;
155	struct lag_mp *mp = &ldev->lag_mp;
156
157	/* Handle delete event */
158	if (event == FIB_EVENT_ENTRY_DEL) {
159		/* stop track */
160		if (mp->fib.mfi == fi)
161			mp->fib.mfi = NULL;
162		return;
163	}
164
165	/* Handle multipath entry with lower priority value */
166	if (mp->fib.mfi && mp->fib.mfi != fi &&
167	    (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
168	    fi->fib_priority >= mp->fib.priority)
169		return;
170
171	nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL);
172	nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0);
173
174	/* Handle add/replace event */
175	if (!nh_dev0) {
176		if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len)
177			mp->fib.mfi = NULL;
178		return;
179	}
180
181	if (nh_dev0 == nh_dev1) {
182		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
183			       "Multipath offload doesn't support routes with multiple nexthops of the same device");
184		return;
185	}
186
187	if (!nh_dev1) {
188		if (__mlx5_lag_is_active(ldev)) {
189			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0);
190
191			i++;
192			mlx5_lag_set_port_affinity(ldev, i);
193			mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
194		}
195
196		return;
197	}
198
199	/* First time we see multipath route */
200	if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
201		struct lag_tracker tracker;
202
203		tracker = ldev->tracker;
204		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
205	}
206
207	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
208	mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
209}
210
211static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
212				       unsigned long event,
213				       struct fib_nh *fib_nh,
214				       struct fib_info *fi)
215{
216	struct lag_mp *mp = &ldev->lag_mp;
217
218	/* Check the nh event is related to the route */
219	if (!mp->fib.mfi || mp->fib.mfi != fi)
220		return;
221
222	/* nh added/removed */
223	if (event == FIB_EVENT_NH_DEL) {
224		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
225
226		if (i >= 0) {
227			i = (i + 1) % 2 + 1; /* peer port */
228			mlx5_lag_set_port_affinity(ldev, i);
229		}
230	} else if (event == FIB_EVENT_NH_ADD &&
231		   fib_info_num_path(fi) == 2) {
232		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
233	}
234}
235
236static void mlx5_lag_fib_update(struct work_struct *work)
237{
238	struct mlx5_fib_event_work *fib_work =
239		container_of(work, struct mlx5_fib_event_work, work);
240	struct mlx5_lag *ldev = fib_work->ldev;
241	struct fib_nh *fib_nh;
242
243	/* Protect internal structures from changes */
244	rtnl_lock();
245	switch (fib_work->event) {
246	case FIB_EVENT_ENTRY_REPLACE:
247	case FIB_EVENT_ENTRY_DEL:
248		mlx5_lag_fib_route_event(ldev, fib_work->event,
249					 &fib_work->fen_info);
250		fib_info_put(fib_work->fen_info.fi);
251		break;
252	case FIB_EVENT_NH_ADD:
253	case FIB_EVENT_NH_DEL:
254		fib_nh = fib_work->fnh_info.fib_nh;
255		mlx5_lag_fib_nexthop_event(ldev,
256					   fib_work->event,
257					   fib_work->fnh_info.fib_nh,
258					   fib_nh->nh_parent);
259		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
260		break;
261	}
262
263	rtnl_unlock();
264	kfree(fib_work);
265}
266
267static struct mlx5_fib_event_work *
268mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
269{
270	struct mlx5_fib_event_work *fib_work;
271
272	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
273	if (WARN_ON(!fib_work))
274		return NULL;
275
276	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
277	fib_work->ldev = ldev;
278	fib_work->event = event;
279
280	return fib_work;
281}
282
283static int mlx5_lag_fib_event(struct notifier_block *nb,
284			      unsigned long event,
285			      void *ptr)
286{
287	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
288	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
289	struct fib_notifier_info *info = ptr;
290	struct mlx5_fib_event_work *fib_work;
291	struct fib_entry_notifier_info *fen_info;
292	struct fib_nh_notifier_info *fnh_info;
293	struct fib_info *fi;
294
295	if (info->family != AF_INET)
296		return NOTIFY_DONE;
297
298	if (!mlx5_lag_multipath_check_prereq(ldev))
299		return NOTIFY_DONE;
300
301	switch (event) {
302	case FIB_EVENT_ENTRY_REPLACE:
303	case FIB_EVENT_ENTRY_DEL:
304		fen_info = container_of(info, struct fib_entry_notifier_info,
305					info);
306		fi = fen_info->fi;
307		if (fi->nh)
308			return NOTIFY_DONE;
309
310		fib_work = mlx5_lag_init_fib_work(ldev, event);
311		if (!fib_work)
312			return NOTIFY_DONE;
313		fib_work->fen_info = *fen_info;
314		/* Take reference on fib_info to prevent it from being
315		 * freed while work is queued. Release it afterwards.
316		 */
317		fib_info_hold(fib_work->fen_info.fi);
318		break;
319	case FIB_EVENT_NH_ADD:
320	case FIB_EVENT_NH_DEL:
321		fnh_info = container_of(info, struct fib_nh_notifier_info,
322					info);
323		fib_work = mlx5_lag_init_fib_work(ldev, event);
324		if (!fib_work)
325			return NOTIFY_DONE;
326		fib_work->fnh_info = *fnh_info;
327		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
328		break;
329	default:
330		return NOTIFY_DONE;
331	}
332
333	queue_work(mp->wq, &fib_work->work);
334
335	return NOTIFY_DONE;
336}
337
338void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
339{
340	/* Clear mfi, as it might become stale when a route delete event
341	 * has been missed, see mlx5_lag_fib_route_event().
342	 */
343	ldev->lag_mp.fib.mfi = NULL;
344}
345
346int mlx5_lag_mp_init(struct mlx5_lag *ldev)
347{
348	struct lag_mp *mp = &ldev->lag_mp;
349	int err;
350
351	/* always clear mfi, as it might become stale when a route delete event
352	 * has been missed
353	 */
354	mp->fib.mfi = NULL;
355
356	if (mp->fib_nb.notifier_call)
357		return 0;
358
359	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
360	if (!mp->wq)
361		return -ENOMEM;
362
363	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
364	err = register_fib_notifier(&init_net, &mp->fib_nb,
365				    mlx5_lag_fib_event_flush, NULL);
366	if (err) {
367		destroy_workqueue(mp->wq);
368		mp->fib_nb.notifier_call = NULL;
369	}
370
371	return err;
372}
373
374void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
375{
376	struct lag_mp *mp = &ldev->lag_mp;
377
378	if (!mp->fib_nb.notifier_call)
379		return;
380
381	unregister_fib_notifier(&init_net, &mp->fib_nb);
382	destroy_workqueue(mp->wq);
383	mp->fib_nb.notifier_call = NULL;
384	mp->fib.mfi = NULL;
385}
386