1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2020 Mellanox Technologies. */
3
4#include <linux/refcount.h>
5#include <linux/list.h>
6#include <linux/rculist.h>
7#include <linux/rtnetlink.h>
8#include <linux/workqueue.h>
9#include <linux/spinlock.h>
10#include <linux/notifier.h>
11#include <net/netevent.h>
12#include <net/arp.h>
13#include "neigh.h"
14#include "tc.h"
15#include "en_rep.h"
16#include "fs_core.h"
17#include "diag/en_rep_tracepoint.h"
18
19static unsigned long mlx5e_rep_ipv6_interval(void)
20{
21	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
22		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
23
24	return ~0UL;
25}
26
27static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
28{
29	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
30	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
31	struct net_device *netdev = rpriv->netdev;
32	struct mlx5e_priv *priv = netdev_priv(netdev);
33
34	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
35	mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
36}
37
38void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
39{
40	struct mlx5e_rep_priv *rpriv = priv->ppriv;
41	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
42
43	mlx5_fc_queue_stats_work(priv->mdev,
44				 &neigh_update->neigh_stats_work,
45				 neigh_update->min_interval);
46}
47
48static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
49{
50	return refcount_inc_not_zero(&nhe->refcnt);
51}
52
53static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
54
55void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
56{
57	if (refcount_dec_and_test(&nhe->refcnt)) {
58		mlx5e_rep_neigh_entry_remove(nhe);
59		kfree_rcu(nhe, rcu);
60	}
61}
62
63static struct mlx5e_neigh_hash_entry *
64mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
65		   struct mlx5e_neigh_hash_entry *nhe)
66{
67	struct mlx5e_neigh_hash_entry *next = NULL;
68
69	rcu_read_lock();
70
71	for (next = nhe ?
72		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
73					   &nhe->neigh_list,
74					   struct mlx5e_neigh_hash_entry,
75					   neigh_list) :
76		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
77					    struct mlx5e_neigh_hash_entry,
78					    neigh_list);
79	     next;
80	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
81					  &next->neigh_list,
82					  struct mlx5e_neigh_hash_entry,
83					  neigh_list))
84		if (mlx5e_rep_neigh_entry_hold(next))
85			break;
86
87	rcu_read_unlock();
88
89	if (nhe)
90		mlx5e_rep_neigh_entry_release(nhe);
91
92	return next;
93}
94
95static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
96{
97	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
98						    neigh_update.neigh_stats_work.work);
99	struct net_device *netdev = rpriv->netdev;
100	struct mlx5e_priv *priv = netdev_priv(netdev);
101	struct mlx5e_neigh_hash_entry *nhe = NULL;
102
103	rtnl_lock();
104	if (!list_empty(&rpriv->neigh_update.neigh_list))
105		mlx5e_rep_queue_neigh_stats_work(priv);
106
107	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
108		mlx5e_tc_update_neigh_used_value(nhe);
109
110	rtnl_unlock();
111}
112
113struct neigh_update_work {
114	struct work_struct work;
115	struct neighbour *n;
116	struct mlx5e_neigh_hash_entry *nhe;
117};
118
119static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
120{
121	neigh_release(update_work->n);
122	mlx5e_rep_neigh_entry_release(update_work->nhe);
123	kfree(update_work);
124}
125
126static void mlx5e_rep_neigh_update(struct work_struct *work)
127{
128	struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
129							     work);
130	struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
131	struct neighbour *n = update_work->n;
132	struct mlx5e_encap_entry *e = NULL;
133	bool neigh_connected, same_dev;
134	unsigned char ha[ETH_ALEN];
135	u8 nud_state, dead;
136
137	rtnl_lock();
138
139	/* If these parameters are changed after we release the lock,
140	 * we'll receive another event letting us know about it.
141	 * We use this lock to avoid inconsistency between the neigh validity
142	 * and it's hw address.
143	 */
144	read_lock_bh(&n->lock);
145	memcpy(ha, n->ha, ETH_ALEN);
146	nud_state = n->nud_state;
147	dead = n->dead;
148	same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
149	read_unlock_bh(&n->lock);
150
151	neigh_connected = (nud_state & NUD_VALID) && !dead;
152
153	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
154
155	if (!same_dev)
156		goto out;
157
158	/* mlx5e_get_next_init_encap() releases previous encap before returning
159	 * the next one.
160	 */
161	while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
162		mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
163
164out:
165	rtnl_unlock();
166	mlx5e_release_neigh_update_work(update_work);
167}
168
169static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
170							       struct neighbour *n)
171{
172	struct neigh_update_work *update_work;
173	struct mlx5e_neigh_hash_entry *nhe;
174	struct mlx5e_neigh m_neigh = {};
175
176	update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
177	if (WARN_ON(!update_work))
178		return NULL;
179
180	m_neigh.family = n->ops->family;
181	memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
182
183	/* Obtain reference to nhe as last step in order not to release it in
184	 * atomic context.
185	 */
186	rcu_read_lock();
187	nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
188	rcu_read_unlock();
189	if (!nhe) {
190		kfree(update_work);
191		return NULL;
192	}
193
194	INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
195	neigh_hold(n);
196	update_work->n = n;
197	update_work->nhe = nhe;
198
199	return update_work;
200}
201
202static int mlx5e_rep_netevent_event(struct notifier_block *nb,
203				    unsigned long event, void *ptr)
204{
205	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
206						    neigh_update.netevent_nb);
207	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
208	struct net_device *netdev = rpriv->netdev;
209	struct mlx5e_priv *priv = netdev_priv(netdev);
210	struct mlx5e_neigh_hash_entry *nhe = NULL;
211	struct neigh_update_work *update_work;
212	struct neigh_parms *p;
213	struct neighbour *n;
214	bool found = false;
215
216	switch (event) {
217	case NETEVENT_NEIGH_UPDATE:
218		n = ptr;
219#if IS_ENABLED(CONFIG_IPV6)
220		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
221#else
222		if (n->tbl != &arp_tbl)
223#endif
224			return NOTIFY_DONE;
225
226		update_work = mlx5e_alloc_neigh_update_work(priv, n);
227		if (!update_work)
228			return NOTIFY_DONE;
229
230		queue_work(priv->wq, &update_work->work);
231		break;
232
233	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
234		p = ptr;
235
236		/* We check the device is present since we don't care about
237		 * changes in the default table, we only care about changes
238		 * done per device delay prob time parameter.
239		 */
240#if IS_ENABLED(CONFIG_IPV6)
241		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
242#else
243		if (!p->dev || p->tbl != &arp_tbl)
244#endif
245			return NOTIFY_DONE;
246
247		rcu_read_lock();
248		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
249					neigh_list) {
250			if (p->dev == READ_ONCE(nhe->neigh_dev)) {
251				found = true;
252				break;
253			}
254		}
255		rcu_read_unlock();
256		if (!found)
257			return NOTIFY_DONE;
258
259		neigh_update->min_interval = min_t(unsigned long,
260						   NEIGH_VAR(p, DELAY_PROBE_TIME),
261						   neigh_update->min_interval);
262		mlx5_fc_update_sampling_interval(priv->mdev,
263						 neigh_update->min_interval);
264		break;
265	}
266	return NOTIFY_DONE;
267}
268
269static const struct rhashtable_params mlx5e_neigh_ht_params = {
270	.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
271	.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
272	.key_len = sizeof(struct mlx5e_neigh),
273	.automatic_shrinking = true,
274};
275
276int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
277{
278	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
279	int err;
280
281	err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
282	if (err)
283		goto out_err;
284
285	INIT_LIST_HEAD(&neigh_update->neigh_list);
286	mutex_init(&neigh_update->encap_lock);
287	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
288			  mlx5e_rep_neigh_stats_work);
289	mlx5e_rep_neigh_update_init_interval(rpriv);
290
291	neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
292	err = register_netevent_notifier(&neigh_update->netevent_nb);
293	if (err)
294		goto out_notifier;
295	return 0;
296
297out_notifier:
298	neigh_update->netevent_nb.notifier_call = NULL;
299	rhashtable_destroy(&neigh_update->neigh_ht);
300out_err:
301	netdev_warn(rpriv->netdev,
302		    "Failed to initialize neighbours handling for vport %d\n",
303		    rpriv->rep->vport);
304	return err;
305}
306
307void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
308{
309	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
310	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
311
312	if (!rpriv->neigh_update.netevent_nb.notifier_call)
313		return;
314
315	unregister_netevent_notifier(&neigh_update->netevent_nb);
316
317	flush_workqueue(priv->wq); /* flush neigh update works */
318
319	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
320
321	mutex_destroy(&neigh_update->encap_lock);
322	rhashtable_destroy(&neigh_update->neigh_ht);
323}
324
325static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
326					struct mlx5e_neigh_hash_entry *nhe)
327{
328	struct mlx5e_rep_priv *rpriv = priv->ppriv;
329	int err;
330
331	err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
332				     &nhe->rhash_node,
333				     mlx5e_neigh_ht_params);
334	if (err)
335		return err;
336
337	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
338
339	return err;
340}
341
342static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
343{
344	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
345
346	mutex_lock(&rpriv->neigh_update.encap_lock);
347
348	list_del_rcu(&nhe->neigh_list);
349
350	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
351			       &nhe->rhash_node,
352			       mlx5e_neigh_ht_params);
353	mutex_unlock(&rpriv->neigh_update.encap_lock);
354}
355
356/* This function must only be called under the representor's encap_lock or
357 * inside rcu read lock section.
358 */
359struct mlx5e_neigh_hash_entry *
360mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
361			     struct mlx5e_neigh *m_neigh)
362{
363	struct mlx5e_rep_priv *rpriv = priv->ppriv;
364	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
365	struct mlx5e_neigh_hash_entry *nhe;
366
367	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
368				     mlx5e_neigh_ht_params);
369	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
370}
371
372int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
373				 struct mlx5e_neigh *m_neigh,
374				 struct net_device *neigh_dev,
375				 struct mlx5e_neigh_hash_entry **nhe)
376{
377	int err;
378
379	*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
380	if (!*nhe)
381		return -ENOMEM;
382
383	(*nhe)->priv = priv;
384	memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
385	spin_lock_init(&(*nhe)->encap_list_lock);
386	INIT_LIST_HEAD(&(*nhe)->encap_list);
387	refcount_set(&(*nhe)->refcnt, 1);
388	WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
389
390	err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
391	if (err)
392		goto out_free;
393	return 0;
394
395out_free:
396	kfree(*nhe);
397	return err;
398}
399