1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
3
4#include <linux/netdevice.h>
5#include <linux/list.h>
6#include <net/lag.h>
7
8#include "mlx5_core.h"
9#include "eswitch.h"
10#include "esw/acl/ofld.h"
11#include "en_rep.h"
12
13struct mlx5e_rep_bond {
14	struct notifier_block nb;
15	struct netdev_net_notifier nn;
16	struct list_head metadata_list;
17};
18
19struct mlx5e_rep_bond_slave_entry {
20	struct list_head list;
21	struct net_device *netdev;
22};
23
24struct mlx5e_rep_bond_metadata {
25	struct list_head list; /* link to global list of rep_bond_metadata */
26	struct mlx5_eswitch *esw;
27	 /* private of uplink holding rep bond metadata list */
28	struct net_device *lag_dev;
29	u32 metadata_reg_c_0;
30
31	struct list_head slaves_list; /* slaves list */
32	int slaves;
33};
34
35static struct mlx5e_rep_bond_metadata *
36mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
37			       const struct net_device *lag_dev)
38{
39	struct mlx5e_rep_bond_metadata *found = NULL;
40	struct mlx5e_rep_bond_metadata *cur;
41
42	list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
43		if (cur->lag_dev == lag_dev) {
44			found = cur;
45			break;
46		}
47	}
48
49	return found;
50}
51
52static struct mlx5e_rep_bond_slave_entry *
53mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
54				  const struct net_device *netdev)
55{
56	struct mlx5e_rep_bond_slave_entry *found = NULL;
57	struct mlx5e_rep_bond_slave_entry *cur;
58
59	list_for_each_entry(cur, &mdata->slaves_list, list) {
60		if (cur->netdev == netdev) {
61			found = cur;
62			break;
63		}
64	}
65
66	return found;
67}
68
69static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
70{
71	netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
72		   mdata->metadata_reg_c_0);
73	list_del(&mdata->list);
74	mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
75	WARN_ON(!list_empty(&mdata->slaves_list));
76	kfree(mdata);
77}
78
79/* This must be called under rtnl_lock */
80int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
81			   struct net_device *lag_dev)
82{
83	struct mlx5e_rep_bond_slave_entry *s_entry;
84	struct mlx5e_rep_bond_metadata *mdata;
85	struct mlx5e_rep_priv *rpriv;
86	struct mlx5e_priv *priv;
87	int err;
88
89	ASSERT_RTNL();
90
91	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
92	mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
93	if (!mdata) {
94		/* First netdev becomes slave, no metadata presents the lag_dev. Create one */
95		mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
96		if (!mdata)
97			return -ENOMEM;
98
99		mdata->lag_dev = lag_dev;
100		mdata->esw = esw;
101		INIT_LIST_HEAD(&mdata->slaves_list);
102		mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
103		if (!mdata->metadata_reg_c_0) {
104			kfree(mdata);
105			return -ENOSPC;
106		}
107		list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
108
109		netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
110			   mdata->metadata_reg_c_0);
111	}
112
113	s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
114	if (!s_entry) {
115		err = -ENOMEM;
116		goto entry_alloc_err;
117	}
118
119	s_entry->netdev = netdev;
120	priv = netdev_priv(netdev);
121	rpriv = priv->ppriv;
122
123	err = mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport,
124							 mdata->metadata_reg_c_0);
125	if (err)
126		goto ingress_err;
127
128	mdata->slaves++;
129	list_add_tail(&s_entry->list, &mdata->slaves_list);
130	netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
131		   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
132
133	return 0;
134
135ingress_err:
136	kfree(s_entry);
137entry_alloc_err:
138	if (!mdata->slaves)
139		mlx5e_rep_bond_metadata_release(mdata);
140	return err;
141}
142
143/* This must be called under rtnl_lock */
144void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
145			    const struct net_device *netdev,
146			    const struct net_device *lag_dev)
147{
148	struct mlx5e_rep_bond_slave_entry *s_entry;
149	struct mlx5e_rep_bond_metadata *mdata;
150	struct mlx5e_rep_priv *rpriv;
151	struct mlx5e_priv *priv;
152
153	ASSERT_RTNL();
154
155	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
156	mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
157	if (!mdata)
158		return;
159
160	s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
161	if (!s_entry)
162		return;
163
164	priv = netdev_priv(netdev);
165	rpriv = priv->ppriv;
166
167	/* Reset bond_metadata to zero first then reset all ingress/egress
168	 * acls and rx rules of unslave representor's vport
169	 */
170	mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, 0);
171	mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
172	mlx5e_rep_bond_update(priv, false);
173
174	list_del(&s_entry->list);
175
176	netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
177		   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
178
179	if (--mdata->slaves == 0)
180		mlx5e_rep_bond_metadata_release(mdata);
181	kfree(s_entry);
182}
183
184static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
185{
186	return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
187}
188
189static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
190{
191	struct netdev_notifier_changelowerstate_info *info;
192	struct netdev_lag_lower_state_info *lag_info;
193	struct mlx5e_rep_priv *rpriv;
194	struct net_device *lag_dev;
195	struct mlx5e_priv *priv;
196	struct list_head *iter;
197	struct net_device *dev;
198	u16 acl_vport_num;
199	u16 fwd_vport_num;
200	int err;
201
202	info = ptr;
203	lag_info = info->lower_state_info;
204	/* This is not an event of a representor becoming active slave */
205	if (!lag_info->tx_enabled)
206		return;
207
208	priv = netdev_priv(netdev);
209	rpriv = priv->ppriv;
210	fwd_vport_num = rpriv->rep->vport;
211	lag_dev = netdev_master_upper_dev_get(netdev);
212	if (!lag_dev)
213		return;
214
215	netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
216		   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
217
218	/* Point everyone's egress acl to the vport of the active representor */
219	netdev_for_each_lower_dev(lag_dev, dev, iter) {
220		priv = netdev_priv(dev);
221		rpriv = priv->ppriv;
222		acl_vport_num = rpriv->rep->vport;
223		if (acl_vport_num != fwd_vport_num) {
224			/* Only single rx_rule for unique bond_metadata should be
225			 * present, delete it if it's saved as passive vport's
226			 * rx_rule with destination as passive vport's root_ft
227			 */
228			mlx5e_rep_bond_update(priv, true);
229			err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
230							     fwd_vport_num,
231							     acl_vport_num);
232			if (err)
233				netdev_warn(dev,
234					    "configure slave vport(%d) egress fwd, err(%d)",
235					    acl_vport_num, err);
236		}
237	}
238
239	/* Insert new rx_rule for unique bond_metadata, save it as active vport's
240	 * rx_rule with new destination as active vport's root_ft
241	 */
242	err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
243	if (err)
244		netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
245			    fwd_vport_num, err);
246}
247
248static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
249{
250	struct netdev_notifier_changeupper_info *info = ptr;
251	struct mlx5e_rep_priv *rpriv;
252	struct net_device *lag_dev;
253	struct mlx5e_priv *priv;
254
255	priv = netdev_priv(netdev);
256	rpriv = priv->ppriv;
257	lag_dev = info->upper_dev;
258
259	netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
260		   info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
261
262	if (info->linking)
263		mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
264	else
265		mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
266}
267
268/* Bond device of representors and netdev events are used here in specific way
269 * to support eswitch vports bonding and to perform failover of eswitch vport
270 * by modifying the vport's egress acl of lower dev representors. Thus this
271 * also change the traditional behavior of lower dev under bond device.
272 * All non-representor netdevs or representors of other vendors as lower dev
273 * of bond device are not supported.
274 */
275static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
276				       unsigned long event, void *ptr)
277{
278	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
279	struct mlx5e_rep_priv *rpriv;
280	struct mlx5e_rep_bond *bond;
281	struct mlx5e_priv *priv;
282
283	if (!mlx5e_rep_is_lag_netdev(netdev))
284		return NOTIFY_DONE;
285
286	bond = container_of(nb, struct mlx5e_rep_bond, nb);
287	priv = netdev_priv(netdev);
288	rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
289	/* Verify VF representor is on the same device of the bond handling the netevent. */
290	if (rpriv->uplink_priv.bond != bond)
291		return NOTIFY_DONE;
292
293	switch (event) {
294	case NETDEV_CHANGELOWERSTATE:
295		mlx5e_rep_changelowerstate_event(netdev, ptr);
296		break;
297	case NETDEV_CHANGEUPPER:
298		mlx5e_rep_changeupper_event(netdev, ptr);
299		break;
300	}
301	return NOTIFY_DONE;
302}
303
304/* If HW support eswitch vports bonding, register a specific notifier to
305 * handle it when two or more representors are bonded
306 */
307int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
308{
309	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
310	struct net_device *netdev = rpriv->netdev;
311	struct mlx5e_priv *priv;
312	int ret = 0;
313
314	priv = netdev_priv(netdev);
315	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
316		goto out;
317
318	uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
319	if (!uplink_priv->bond) {
320		ret = -ENOMEM;
321		goto out;
322	}
323
324	INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
325	uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
326	ret = register_netdevice_notifier_dev_net(netdev,
327						  &uplink_priv->bond->nb,
328						  &uplink_priv->bond->nn);
329	if (ret) {
330		netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
331		kvfree(uplink_priv->bond);
332		uplink_priv->bond = NULL;
333	}
334
335out:
336	return ret;
337}
338
339void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
340{
341	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
342
343	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
344	    !rpriv->uplink_priv.bond)
345		return;
346
347	unregister_netdevice_notifier_dev_net(rpriv->netdev,
348					      &rpriv->uplink_priv.bond->nb,
349					      &rpriv->uplink_priv.bond->nn);
350	kvfree(rpriv->uplink_priv.bond);
351}
352