1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2021 Mellanox Technologies. */
3
4#include <net/fib_notifier.h>
5#include <net/nexthop.h>
6#include <net/ip_tunnels.h>
7#include "tc_tun_encap.h"
8#include "en_tc.h"
9#include "tc_tun.h"
10#include "rep/tc.h"
11#include "diag/en_tc_tracepoint.h"
12
13enum {
14	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
15};
16
17static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
18				     struct mlx5_flow_attr *attr,
19				     struct mlx5e_encap_entry *e,
20				     int out_index)
21{
22	struct net_device *route_dev;
23	int err = 0;
24
25	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
26
27	if (!route_dev || !netif_is_ovs_master(route_dev) ||
28	    attr->parse_attr->filter_dev == e->out_dev)
29		goto out;
30
31	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
32						MLX5E_TC_INT_PORT_EGRESS,
33						&attr->action, out_index);
34
35out:
36	if (route_dev)
37		dev_put(route_dev);
38
39	return err;
40}
41
42struct mlx5e_route_key {
43	int ip_version;
44	union {
45		__be32 v4;
46		struct in6_addr v6;
47	} endpoint_ip;
48};
49
50struct mlx5e_route_entry {
51	struct mlx5e_route_key key;
52	struct list_head encap_entries;
53	struct list_head decap_flows;
54	u32 flags;
55	struct hlist_node hlist;
56	refcount_t refcnt;
57	int tunnel_dev_index;
58	struct rcu_head rcu;
59};
60
61struct mlx5e_tc_tun_encap {
62	struct mlx5e_priv *priv;
63	struct notifier_block fib_nb;
64	spinlock_t route_lock; /* protects route_tbl */
65	unsigned long route_tbl_last_update;
66	DECLARE_HASHTABLE(route_tbl, 8);
67};
68
69static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
70{
71	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
72}
73
74int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
75			     struct mlx5_flow_spec *spec)
76{
77	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
78	struct mlx5_rx_tun_attr *tun_attr;
79	void *daddr, *saddr;
80	u8 ip_version;
81
82	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
83	if (!tun_attr)
84		return -ENOMEM;
85
86	esw_attr->rx_tun_attr = tun_attr;
87	ip_version = mlx5e_tc_get_ip_version(spec, true);
88
89	if (ip_version == 4) {
90		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
91				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
92		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
93				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
94		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
95		tun_attr->src_ip.v4 = *(__be32 *)saddr;
96		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
97			return 0;
98	}
99#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
100	else if (ip_version == 6) {
101		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
102
103		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
104				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
105		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
106				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
107		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
108		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
109		if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
110		    ipv6_addr_any(&tun_attr->src_ip.v6))
111			return 0;
112	}
113#endif
114	/* Only set the flag if both src and dst ip addresses exist. They are
115	 * required to establish routing.
116	 */
117	flow_flag_set(flow, TUN_RX);
118	flow->attr->tun_ip_version = ip_version;
119	return 0;
120}
121
122static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
123{
124	bool all_flow_encaps_valid = true;
125	int i;
126
127	/* Flow can be associated with multiple encap entries.
128	 * Before offloading the flow verify that all of them have
129	 * a valid neighbour.
130	 */
131	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
132		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
133			continue;
134		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
135			all_flow_encaps_valid = false;
136			break;
137		}
138	}
139
140	return all_flow_encaps_valid;
141}
142
143void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
144			      struct mlx5e_encap_entry *e,
145			      struct list_head *flow_list)
146{
147	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
148	struct mlx5_pkt_reformat_params reformat_params;
149	struct mlx5_esw_flow_attr *esw_attr;
150	struct mlx5_flow_handle *rule;
151	struct mlx5_flow_attr *attr;
152	struct mlx5_flow_spec *spec;
153	struct mlx5e_tc_flow *flow;
154	int err;
155
156	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
157		return;
158
159	memset(&reformat_params, 0, sizeof(reformat_params));
160	reformat_params.type = e->reformat_type;
161	reformat_params.size = e->encap_size;
162	reformat_params.data = e->encap_header;
163	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
164						     &reformat_params,
165						     MLX5_FLOW_NAMESPACE_FDB);
166	if (IS_ERR(e->pkt_reformat)) {
167		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
168			       PTR_ERR(e->pkt_reformat));
169		return;
170	}
171	e->flags |= MLX5_ENCAP_ENTRY_VALID;
172	mlx5e_rep_queue_neigh_stats_work(priv);
173
174	list_for_each_entry(flow, flow_list, tmp_list) {
175		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
176			continue;
177
178		spec = &flow->attr->parse_attr->spec;
179
180		attr = mlx5e_tc_get_encap_attr(flow);
181		esw_attr = attr->esw_attr;
182		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
183		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
184
185		/* Do not offload flows with unresolved neighbors */
186		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
187			continue;
188
189		err = mlx5e_tc_offload_flow_post_acts(flow);
190		if (err) {
191			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
192				       err);
193			continue;
194		}
195
196		/* update from slow path rule to encap rule */
197		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
198		if (IS_ERR(rule)) {
199			mlx5e_tc_unoffload_flow_post_acts(flow);
200			err = PTR_ERR(rule);
201			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
202				       err);
203			continue;
204		}
205
206		mlx5e_tc_unoffload_from_slow_path(esw, flow);
207		flow->rule[0] = rule;
208		/* was unset when slow path rule removed */
209		flow_flag_set(flow, OFFLOADED);
210	}
211}
212
213void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
214			      struct mlx5e_encap_entry *e,
215			      struct list_head *flow_list)
216{
217	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
218	struct mlx5_esw_flow_attr *esw_attr;
219	struct mlx5_flow_handle *rule;
220	struct mlx5_flow_attr *attr;
221	struct mlx5_flow_spec *spec;
222	struct mlx5e_tc_flow *flow;
223	int err;
224
225	list_for_each_entry(flow, flow_list, tmp_list) {
226		if (!mlx5e_is_offloaded_flow(flow))
227			continue;
228
229		attr = mlx5e_tc_get_encap_attr(flow);
230		esw_attr = attr->esw_attr;
231		/* mark the flow's encap dest as non-valid */
232		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
233		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
234
235		/* Clear pkt_reformat before checking slow path flag. Because
236		 * in next iteration, the same flow is already set slow path
237		 * flag, but still need to clear the pkt_reformat.
238		 */
239		if (flow_flag_test(flow, SLOW))
240			continue;
241
242		/* update from encap rule to slow path rule */
243		spec = &flow->attr->parse_attr->spec;
244		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
245
246		if (IS_ERR(rule)) {
247			err = PTR_ERR(rule);
248			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
249				       err);
250			continue;
251		}
252
253		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
254		mlx5e_tc_unoffload_flow_post_acts(flow);
255		flow->rule[0] = rule;
256		/* was unset when fast path rule removed */
257		flow_flag_set(flow, OFFLOADED);
258	}
259
260	/* we know that the encap is valid */
261	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
262	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
263	e->pkt_reformat = NULL;
264}
265
266static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
267				struct list_head *flow_list,
268				int index)
269{
270	if (IS_ERR(mlx5e_flow_get(flow))) {
271		/* Flow is being deleted concurrently. Wait for it to be
272		 * unoffloaded from hardware, otherwise deleting encap will
273		 * fail.
274		 */
275		wait_for_completion(&flow->del_hw_done);
276		return;
277	}
278	wait_for_completion(&flow->init_done);
279
280	flow->tmp_entry_index = index;
281	list_add(&flow->tmp_list, flow_list);
282}
283
284/* Takes reference to all flows attached to encap and adds the flows to
285 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
286 */
287void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
288{
289	struct encap_flow_item *efi;
290	struct mlx5e_tc_flow *flow;
291
292	list_for_each_entry(efi, &e->flows, list) {
293		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
294		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
295	}
296}
297
298/* Takes reference to all flows attached to route and adds the flows to
299 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
300 */
301static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
302					     struct list_head *flow_list)
303{
304	struct mlx5e_tc_flow *flow;
305
306	list_for_each_entry(flow, &r->decap_flows, decap_routes)
307		mlx5e_take_tmp_flow(flow, flow_list, 0);
308}
309
310typedef bool (match_cb)(struct mlx5e_encap_entry *);
311
312static struct mlx5e_encap_entry *
313mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
314			      struct mlx5e_encap_entry *e,
315			      match_cb match)
316{
317	struct mlx5e_encap_entry *next = NULL;
318
319retry:
320	rcu_read_lock();
321
322	/* find encap with non-zero reference counter value */
323	for (next = e ?
324		     list_next_or_null_rcu(&nhe->encap_list,
325					   &e->encap_list,
326					   struct mlx5e_encap_entry,
327					   encap_list) :
328		     list_first_or_null_rcu(&nhe->encap_list,
329					    struct mlx5e_encap_entry,
330					    encap_list);
331	     next;
332	     next = list_next_or_null_rcu(&nhe->encap_list,
333					  &next->encap_list,
334					  struct mlx5e_encap_entry,
335					  encap_list))
336		if (mlx5e_encap_take(next))
337			break;
338
339	rcu_read_unlock();
340
341	/* release starting encap */
342	if (e)
343		mlx5e_encap_put(netdev_priv(e->out_dev), e);
344	if (!next)
345		return next;
346
347	/* wait for encap to be fully initialized */
348	wait_for_completion(&next->res_ready);
349	/* continue searching if encap entry is not in valid state after completion */
350	if (!match(next)) {
351		e = next;
352		goto retry;
353	}
354
355	return next;
356}
357
358static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
359{
360	return e->flags & MLX5_ENCAP_ENTRY_VALID;
361}
362
363static struct mlx5e_encap_entry *
364mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
365			   struct mlx5e_encap_entry *e)
366{
367	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
368}
369
370static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
371{
372	return e->compl_result >= 0;
373}
374
375struct mlx5e_encap_entry *
376mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
377			  struct mlx5e_encap_entry *e)
378{
379	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
380}
381
382void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
383{
384	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
385	struct mlx5e_encap_entry *e = NULL;
386	struct mlx5e_tc_flow *flow;
387	struct mlx5_fc *counter;
388	struct neigh_table *tbl;
389	bool neigh_used = false;
390	struct neighbour *n;
391	u64 lastuse;
392
393	if (m_neigh->family == AF_INET)
394		tbl = &arp_tbl;
395#if IS_ENABLED(CONFIG_IPV6)
396	else if (m_neigh->family == AF_INET6)
397		tbl = ipv6_stub->nd_tbl;
398#endif
399	else
400		return;
401
402	/* mlx5e_get_next_valid_encap() releases previous encap before returning
403	 * next one.
404	 */
405	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
406		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
407		struct encap_flow_item *efi, *tmp;
408		struct mlx5_eswitch *esw;
409		LIST_HEAD(flow_list);
410
411		esw = priv->mdev->priv.eswitch;
412		mutex_lock(&esw->offloads.encap_tbl_lock);
413		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
414			flow = container_of(efi, struct mlx5e_tc_flow,
415					    encaps[efi->index]);
416			if (IS_ERR(mlx5e_flow_get(flow)))
417				continue;
418			list_add(&flow->tmp_list, &flow_list);
419
420			if (mlx5e_is_offloaded_flow(flow)) {
421				counter = mlx5e_tc_get_counter(flow);
422				lastuse = mlx5_fc_query_lastuse(counter);
423				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
424					neigh_used = true;
425					break;
426				}
427			}
428		}
429		mutex_unlock(&esw->offloads.encap_tbl_lock);
430
431		mlx5e_put_flow_list(priv, &flow_list);
432		if (neigh_used) {
433			/* release current encap before breaking the loop */
434			mlx5e_encap_put(priv, e);
435			break;
436		}
437	}
438
439	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
440
441	if (neigh_used) {
442		nhe->reported_lastuse = jiffies;
443
444		/* find the relevant neigh according to the cached device and
445		 * dst ip pair
446		 */
447		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
448		if (!n)
449			return;
450
451		neigh_event_send(n, NULL);
452		neigh_release(n);
453	}
454}
455
456static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
457{
458	WARN_ON(!list_empty(&e->flows));
459
460	if (e->compl_result > 0) {
461		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
462
463		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
464			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
465	}
466
467	kfree(e->tun_info);
468	kfree(e->encap_header);
469	kfree_rcu(e, rcu);
470}
471
472static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
473				struct mlx5e_decap_entry *d)
474{
475	WARN_ON(!list_empty(&d->flows));
476
477	if (!d->compl_result)
478		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
479
480	kfree_rcu(d, rcu);
481}
482
483void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
484{
485	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
486
487	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
488		return;
489	list_del(&e->route_list);
490	hash_del_rcu(&e->encap_hlist);
491	mutex_unlock(&esw->offloads.encap_tbl_lock);
492
493	mlx5e_encap_dealloc(priv, e);
494}
495
496static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
497{
498	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
499
500	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
501
502	if (!refcount_dec_and_test(&e->refcnt))
503		return;
504	list_del(&e->route_list);
505	hash_del_rcu(&e->encap_hlist);
506	mlx5e_encap_dealloc(priv, e);
507}
508
509static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
510{
511	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512
513	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
514		return;
515	hash_del_rcu(&d->hlist);
516	mutex_unlock(&esw->offloads.decap_tbl_lock);
517
518	mlx5e_decap_dealloc(priv, d);
519}
520
521static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
522				     struct mlx5e_tc_flow *flow,
523				     int out_index);
524
525void mlx5e_detach_encap(struct mlx5e_priv *priv,
526			struct mlx5e_tc_flow *flow,
527			struct mlx5_flow_attr *attr,
528			int out_index)
529{
530	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
531	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
532
533	if (!mlx5e_is_eswitch_flow(flow))
534		return;
535
536	if (attr->esw_attr->dests[out_index].flags &
537	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
538		mlx5e_detach_encap_route(priv, flow, out_index);
539
540	/* flow wasn't fully initialized */
541	if (!e)
542		return;
543
544	mutex_lock(&esw->offloads.encap_tbl_lock);
545	list_del(&flow->encaps[out_index].list);
546	flow->encaps[out_index].e = NULL;
547	if (!refcount_dec_and_test(&e->refcnt)) {
548		mutex_unlock(&esw->offloads.encap_tbl_lock);
549		return;
550	}
551	list_del(&e->route_list);
552	hash_del_rcu(&e->encap_hlist);
553	mutex_unlock(&esw->offloads.encap_tbl_lock);
554
555	mlx5e_encap_dealloc(priv, e);
556}
557
558void mlx5e_detach_decap(struct mlx5e_priv *priv,
559			struct mlx5e_tc_flow *flow)
560{
561	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
562	struct mlx5e_decap_entry *d = flow->decap_reformat;
563
564	if (!d)
565		return;
566
567	mutex_lock(&esw->offloads.decap_tbl_lock);
568	list_del(&flow->l3_to_l2_reformat);
569	flow->decap_reformat = NULL;
570
571	if (!refcount_dec_and_test(&d->refcnt)) {
572		mutex_unlock(&esw->offloads.decap_tbl_lock);
573		return;
574	}
575	hash_del_rcu(&d->hlist);
576	mutex_unlock(&esw->offloads.decap_tbl_lock);
577
578	mlx5e_decap_dealloc(priv, d);
579}
580
581bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
582					   struct mlx5e_encap_key *b)
583{
584	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
585		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
586}
587
588bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
589					   struct mlx5e_encap_key *b,
590					   __be16 tun_flags)
591{
592	struct ip_tunnel_info *a_info;
593	struct ip_tunnel_info *b_info;
594	bool a_has_opts, b_has_opts;
595
596	if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
597		return false;
598
599	a_has_opts = !!(a->ip_tun_key->tun_flags & tun_flags);
600	b_has_opts = !!(b->ip_tun_key->tun_flags & tun_flags);
601
602	/* keys are equal when both don't have any options attached */
603	if (!a_has_opts && !b_has_opts)
604		return true;
605
606	if (a_has_opts != b_has_opts)
607		return false;
608
609	/* options stored in memory next to ip_tunnel_info struct */
610	a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
611	b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
612
613	return a_info->options_len == b_info->options_len &&
614	       !memcmp(ip_tunnel_info_opts(a_info),
615		       ip_tunnel_info_opts(b_info),
616		       a_info->options_len);
617}
618
619static int cmp_decap_info(struct mlx5e_decap_key *a,
620			  struct mlx5e_decap_key *b)
621{
622	return memcmp(&a->key, &b->key, sizeof(b->key));
623}
624
625static int hash_encap_info(struct mlx5e_encap_key *key)
626{
627	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
628		     key->tc_tunnel->tunnel_type);
629}
630
631static int hash_decap_info(struct mlx5e_decap_key *key)
632{
633	return jhash(&key->key, sizeof(key->key), 0);
634}
635
636bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
637{
638	return refcount_inc_not_zero(&e->refcnt);
639}
640
641static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
642{
643	return refcount_inc_not_zero(&e->refcnt);
644}
645
646static struct mlx5e_encap_entry *
647mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
648		uintptr_t hash_key)
649{
650	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
651	struct mlx5e_encap_key e_key;
652	struct mlx5e_encap_entry *e;
653
654	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
655				   encap_hlist, hash_key) {
656		e_key.ip_tun_key = &e->tun_info->key;
657		e_key.tc_tunnel = e->tunnel;
658		if (e->tunnel->encap_info_equal(&e_key, key) &&
659		    mlx5e_encap_take(e))
660			return e;
661	}
662
663	return NULL;
664}
665
666static struct mlx5e_decap_entry *
667mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
668		uintptr_t hash_key)
669{
670	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
671	struct mlx5e_decap_key r_key;
672	struct mlx5e_decap_entry *e;
673
674	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
675				   hlist, hash_key) {
676		r_key = e->key;
677		if (!cmp_decap_info(&r_key, key) &&
678		    mlx5e_decap_take(e))
679			return e;
680	}
681	return NULL;
682}
683
684struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
685{
686	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
687
688	return kmemdup(tun_info, tun_size, GFP_KERNEL);
689}
690
691static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
692				      struct mlx5e_tc_flow *flow,
693				      int out_index,
694				      struct mlx5e_encap_entry *e,
695				      struct netlink_ext_ack *extack)
696{
697	int i;
698
699	for (i = 0; i < out_index; i++) {
700		if (flow->encaps[i].e != e)
701			continue;
702		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
703		netdev_err(priv->netdev, "can't duplicate encap action\n");
704		return true;
705	}
706
707	return false;
708}
709
710static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
711			       struct mlx5_flow_attr *attr,
712			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
713			       struct net_device *out_dev,
714			       int route_dev_ifindex,
715			       int out_index)
716{
717	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
718	struct net_device *route_dev;
719	u16 vport_num;
720	int err = 0;
721	u32 data;
722
723	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
724
725	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
726	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
727		goto out;
728
729	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
730	if (err)
731		goto out;
732
733	attr->dest_chain = 0;
734	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
735	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
736	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
737						       vport_num);
738	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
739						   MLX5_FLOW_NAMESPACE_FDB,
740						   VPORT_TO_REG, data);
741	if (err >= 0) {
742		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
743		err = 0;
744	}
745
746out:
747	if (route_dev)
748		dev_put(route_dev);
749	return err;
750}
751
752static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
753				  struct mlx5_esw_flow_attr *attr,
754				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
755				  struct net_device *out_dev,
756				  int route_dev_ifindex,
757				  int out_index)
758{
759	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
760	struct net_device *route_dev;
761	u16 vport_num;
762	int err = 0;
763	u32 data;
764
765	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
766
767	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
768	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
769		err = -ENODEV;
770		goto out;
771	}
772
773	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
774	if (err)
775		goto out;
776
777	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
778						       vport_num);
779	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
780
781out:
782	if (route_dev)
783		dev_put(route_dev);
784	return err;
785}
786
787static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
788{
789	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
790	struct mlx5_rep_uplink_priv *uplink_priv;
791	struct mlx5e_rep_priv *uplink_rpriv;
792	struct mlx5e_tc_tun_encap *encap;
793	unsigned int ret;
794
795	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
796	uplink_priv = &uplink_rpriv->uplink_priv;
797	encap = uplink_priv->encap;
798
799	spin_lock_bh(&encap->route_lock);
800	ret = encap->route_tbl_last_update;
801	spin_unlock_bh(&encap->route_lock);
802	return ret;
803}
804
805static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
806				    struct mlx5e_tc_flow *flow,
807				    struct mlx5_flow_attr *attr,
808				    struct mlx5e_encap_entry *e,
809				    bool new_encap_entry,
810				    unsigned long tbl_time_before,
811				    int out_index);
812
813int mlx5e_attach_encap(struct mlx5e_priv *priv,
814		       struct mlx5e_tc_flow *flow,
815		       struct mlx5_flow_attr *attr,
816		       struct net_device *mirred_dev,
817		       int out_index,
818		       struct netlink_ext_ack *extack,
819		       struct net_device **encap_dev)
820{
821	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
822	struct mlx5e_tc_flow_parse_attr *parse_attr;
823	const struct ip_tunnel_info *tun_info;
824	const struct mlx5e_mpls_info *mpls_info;
825	unsigned long tbl_time_before = 0;
826	struct mlx5e_encap_entry *e;
827	struct mlx5e_encap_key key;
828	bool entry_created = false;
829	unsigned short family;
830	uintptr_t hash_key;
831	int err = 0;
832
833	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
834
835	parse_attr = attr->parse_attr;
836	tun_info = parse_attr->tun_info[out_index];
837	mpls_info = &parse_attr->mpls_info[out_index];
838	family = ip_tunnel_info_af(tun_info);
839	key.ip_tun_key = &tun_info->key;
840	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
841	if (!key.tc_tunnel) {
842		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
843		return -EOPNOTSUPP;
844	}
845
846	hash_key = hash_encap_info(&key);
847
848	e = mlx5e_encap_get(priv, &key, hash_key);
849
850	/* must verify if encap is valid or not */
851	if (e) {
852		/* Check that entry was not already attached to this flow */
853		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
854			err = -EOPNOTSUPP;
855			goto out_err;
856		}
857
858		goto attach_flow;
859	}
860
861	e = kzalloc(sizeof(*e), GFP_KERNEL);
862	if (!e) {
863		err = -ENOMEM;
864		goto out_err;
865	}
866
867	refcount_set(&e->refcnt, 1);
868	init_completion(&e->res_ready);
869	entry_created = true;
870	INIT_LIST_HEAD(&e->route_list);
871
872	tun_info = mlx5e_dup_tun_info(tun_info);
873	if (!tun_info) {
874		err = -ENOMEM;
875		goto out_err_init;
876	}
877	e->tun_info = tun_info;
878	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
879	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
880	if (err)
881		goto out_err_init;
882
883	INIT_LIST_HEAD(&e->flows);
884	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
885	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
886
887	if (family == AF_INET)
888		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
889	else if (family == AF_INET6)
890		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
891
892	complete_all(&e->res_ready);
893	if (err) {
894		e->compl_result = err;
895		goto out_err;
896	}
897	e->compl_result = 1;
898
899attach_flow:
900	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
901				       tbl_time_before, out_index);
902	if (err)
903		goto out_err;
904
905	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
906	if (err == -EOPNOTSUPP) {
907		/* If device doesn't support int port offload,
908		 * redirect to uplink vport.
909		 */
910		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
911		err = 0;
912	} else if (err) {
913		goto out_err;
914	}
915
916	flow->encaps[out_index].e = e;
917	list_add(&flow->encaps[out_index].list, &e->flows);
918	flow->encaps[out_index].index = out_index;
919	*encap_dev = e->out_dev;
920	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
921		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
922		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
923	} else {
924		flow_flag_set(flow, SLOW);
925	}
926
927	return err;
928
929out_err:
930	if (e)
931		mlx5e_encap_put_locked(priv, e);
932	return err;
933
934out_err_init:
935	kfree(tun_info);
936	kfree(e);
937	return err;
938}
939
940int mlx5e_attach_decap(struct mlx5e_priv *priv,
941		       struct mlx5e_tc_flow *flow,
942		       struct netlink_ext_ack *extack)
943{
944	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
946	struct mlx5_pkt_reformat_params reformat_params;
947	struct mlx5e_decap_entry *d;
948	struct mlx5e_decap_key key;
949	uintptr_t hash_key;
950	int err = 0;
951
952	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
953		NL_SET_ERR_MSG_MOD(extack,
954				   "encap header larger than max supported");
955		return -EOPNOTSUPP;
956	}
957
958	key.key = attr->eth;
959	hash_key = hash_decap_info(&key);
960	mutex_lock(&esw->offloads.decap_tbl_lock);
961	d = mlx5e_decap_get(priv, &key, hash_key);
962	if (d) {
963		mutex_unlock(&esw->offloads.decap_tbl_lock);
964		wait_for_completion(&d->res_ready);
965		mutex_lock(&esw->offloads.decap_tbl_lock);
966		if (d->compl_result) {
967			err = -EREMOTEIO;
968			goto out_free;
969		}
970		goto found;
971	}
972
973	d = kzalloc(sizeof(*d), GFP_KERNEL);
974	if (!d) {
975		err = -ENOMEM;
976		goto out_err;
977	}
978
979	d->key = key;
980	refcount_set(&d->refcnt, 1);
981	init_completion(&d->res_ready);
982	INIT_LIST_HEAD(&d->flows);
983	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
984	mutex_unlock(&esw->offloads.decap_tbl_lock);
985
986	memset(&reformat_params, 0, sizeof(reformat_params));
987	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
988	reformat_params.size = sizeof(attr->eth);
989	reformat_params.data = &attr->eth;
990	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
991						     &reformat_params,
992						     MLX5_FLOW_NAMESPACE_FDB);
993	if (IS_ERR(d->pkt_reformat)) {
994		err = PTR_ERR(d->pkt_reformat);
995		d->compl_result = err;
996	}
997	mutex_lock(&esw->offloads.decap_tbl_lock);
998	complete_all(&d->res_ready);
999	if (err)
1000		goto out_free;
1001
1002found:
1003	flow->decap_reformat = d;
1004	attr->decap_pkt_reformat = d->pkt_reformat;
1005	list_add(&flow->l3_to_l2_reformat, &d->flows);
1006	mutex_unlock(&esw->offloads.decap_tbl_lock);
1007	return 0;
1008
1009out_free:
1010	mutex_unlock(&esw->offloads.decap_tbl_lock);
1011	mlx5e_decap_put(priv, d);
1012	return err;
1013
1014out_err:
1015	mutex_unlock(&esw->offloads.decap_tbl_lock);
1016	return err;
1017}
1018
1019int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1020				 struct mlx5e_tc_flow *flow,
1021				 struct mlx5_flow_attr *attr,
1022				 struct netlink_ext_ack *extack,
1023				 bool *vf_tun)
1024{
1025	struct mlx5e_tc_flow_parse_attr *parse_attr;
1026	struct mlx5_esw_flow_attr *esw_attr;
1027	struct net_device *encap_dev = NULL;
1028	struct mlx5e_rep_priv *rpriv;
1029	struct mlx5e_priv *out_priv;
1030	struct mlx5_eswitch *esw;
1031	int out_index;
1032	int err = 0;
1033
1034	parse_attr = attr->parse_attr;
1035	esw_attr = attr->esw_attr;
1036	*vf_tun = false;
1037
1038	esw = priv->mdev->priv.eswitch;
1039	mutex_lock(&esw->offloads.encap_tbl_lock);
1040	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1041		struct net_device *out_dev;
1042		int mirred_ifindex;
1043
1044		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1045			continue;
1046
1047		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1048		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1049		if (!out_dev) {
1050			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1051			err = -ENODEV;
1052			goto out;
1053		}
1054		err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1055					 extack, &encap_dev);
1056		dev_put(out_dev);
1057		if (err)
1058			goto out;
1059
1060		if (esw_attr->dests[out_index].flags &
1061		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1062		    !esw_attr->dest_int_port)
1063			*vf_tun = true;
1064
1065		out_priv = netdev_priv(encap_dev);
1066		rpriv = out_priv->ppriv;
1067		esw_attr->dests[out_index].vport_valid = true;
1068		esw_attr->dests[out_index].vport = rpriv->rep->vport;
1069		esw_attr->dests[out_index].mdev = out_priv->mdev;
1070	}
1071
1072	if (*vf_tun && esw_attr->out_count > 1) {
1073		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1074		err = -EOPNOTSUPP;
1075		goto out;
1076	}
1077
1078out:
1079	mutex_unlock(&esw->offloads.encap_tbl_lock);
1080	return err;
1081}
1082
1083void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1084				    struct mlx5e_tc_flow *flow,
1085				    struct mlx5_flow_attr *attr)
1086{
1087	struct mlx5_esw_flow_attr *esw_attr;
1088	int out_index;
1089
1090	if (!mlx5e_is_eswitch_flow(flow))
1091		return;
1092
1093	esw_attr = attr->esw_attr;
1094
1095	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1096		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1097			continue;
1098
1099		mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1100		kfree(attr->parse_attr->tun_info[out_index]);
1101	}
1102}
1103
1104static int cmp_route_info(struct mlx5e_route_key *a,
1105			  struct mlx5e_route_key *b)
1106{
1107	if (a->ip_version == 4 && b->ip_version == 4)
1108		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1109			      sizeof(a->endpoint_ip.v4));
1110	else if (a->ip_version == 6 && b->ip_version == 6)
1111		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1112			      sizeof(a->endpoint_ip.v6));
1113	return 1;
1114}
1115
1116static u32 hash_route_info(struct mlx5e_route_key *key)
1117{
1118	if (key->ip_version == 4)
1119		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1120	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1121}
1122
1123static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1124				struct mlx5e_route_entry *r)
1125{
1126	WARN_ON(!list_empty(&r->decap_flows));
1127	WARN_ON(!list_empty(&r->encap_entries));
1128
1129	kfree_rcu(r, rcu);
1130}
1131
1132static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1133{
1134	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1135
1136	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1137		return;
1138
1139	hash_del_rcu(&r->hlist);
1140	mutex_unlock(&esw->offloads.encap_tbl_lock);
1141
1142	mlx5e_route_dealloc(priv, r);
1143}
1144
1145static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1146{
1147	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1148
1149	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1150
1151	if (!refcount_dec_and_test(&r->refcnt))
1152		return;
1153	hash_del_rcu(&r->hlist);
1154	mlx5e_route_dealloc(priv, r);
1155}
1156
1157static struct mlx5e_route_entry *
1158mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1159		u32 hash_key)
1160{
1161	struct mlx5e_route_key r_key;
1162	struct mlx5e_route_entry *r;
1163
1164	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1165		r_key = r->key;
1166		if (!cmp_route_info(&r_key, key) &&
1167		    refcount_inc_not_zero(&r->refcnt))
1168			return r;
1169	}
1170	return NULL;
1171}
1172
1173static struct mlx5e_route_entry *
1174mlx5e_route_get_create(struct mlx5e_priv *priv,
1175		       struct mlx5e_route_key *key,
1176		       int tunnel_dev_index,
1177		       unsigned long *route_tbl_change_time)
1178{
1179	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1180	struct mlx5_rep_uplink_priv *uplink_priv;
1181	struct mlx5e_rep_priv *uplink_rpriv;
1182	struct mlx5e_tc_tun_encap *encap;
1183	struct mlx5e_route_entry *r;
1184	u32 hash_key;
1185
1186	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1187	uplink_priv = &uplink_rpriv->uplink_priv;
1188	encap = uplink_priv->encap;
1189
1190	hash_key = hash_route_info(key);
1191	spin_lock_bh(&encap->route_lock);
1192	r = mlx5e_route_get(encap, key, hash_key);
1193	spin_unlock_bh(&encap->route_lock);
1194	if (r) {
1195		if (!mlx5e_route_entry_valid(r)) {
1196			mlx5e_route_put_locked(priv, r);
1197			return ERR_PTR(-EINVAL);
1198		}
1199		return r;
1200	}
1201
1202	r = kzalloc(sizeof(*r), GFP_KERNEL);
1203	if (!r)
1204		return ERR_PTR(-ENOMEM);
1205
1206	r->key = *key;
1207	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1208	r->tunnel_dev_index = tunnel_dev_index;
1209	refcount_set(&r->refcnt, 1);
1210	INIT_LIST_HEAD(&r->decap_flows);
1211	INIT_LIST_HEAD(&r->encap_entries);
1212
1213	spin_lock_bh(&encap->route_lock);
1214	*route_tbl_change_time = encap->route_tbl_last_update;
1215	hash_add(encap->route_tbl, &r->hlist, hash_key);
1216	spin_unlock_bh(&encap->route_lock);
1217
1218	return r;
1219}
1220
1221static struct mlx5e_route_entry *
1222mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1223{
1224	u32 hash_key = hash_route_info(key);
1225	struct mlx5e_route_entry *r;
1226
1227	spin_lock_bh(&encap->route_lock);
1228	encap->route_tbl_last_update = jiffies;
1229	r = mlx5e_route_get(encap, key, hash_key);
1230	spin_unlock_bh(&encap->route_lock);
1231
1232	return r;
1233}
1234
1235struct mlx5e_tc_fib_event_data {
1236	struct work_struct work;
1237	unsigned long event;
1238	struct mlx5e_route_entry *r;
1239	struct net_device *ul_dev;
1240};
1241
1242static void mlx5e_tc_fib_event_work(struct work_struct *work);
1243static struct mlx5e_tc_fib_event_data *
1244mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1245{
1246	struct mlx5e_tc_fib_event_data *fib_work;
1247
1248	fib_work = kzalloc(sizeof(*fib_work), flags);
1249	if (WARN_ON(!fib_work))
1250		return NULL;
1251
1252	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1253	fib_work->event = event;
1254	fib_work->ul_dev = ul_dev;
1255
1256	return fib_work;
1257}
1258
1259static int
1260mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1261			   struct mlx5e_route_entry *r,
1262			   unsigned long event)
1263{
1264	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1265	struct mlx5e_tc_fib_event_data *fib_work;
1266	struct mlx5e_rep_priv *uplink_rpriv;
1267	struct net_device *ul_dev;
1268
1269	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1270	ul_dev = uplink_rpriv->netdev;
1271
1272	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1273	if (!fib_work)
1274		return -ENOMEM;
1275
1276	dev_hold(ul_dev);
1277	refcount_inc(&r->refcnt);
1278	fib_work->r = r;
1279	queue_work(priv->wq, &fib_work->work);
1280
1281	return 0;
1282}
1283
1284int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1285			     struct mlx5e_tc_flow *flow)
1286{
1287	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1288	unsigned long tbl_time_before, tbl_time_after;
1289	struct mlx5e_tc_flow_parse_attr *parse_attr;
1290	struct mlx5_flow_attr *attr = flow->attr;
1291	struct mlx5_esw_flow_attr *esw_attr;
1292	struct mlx5e_route_entry *r;
1293	struct mlx5e_route_key key;
1294	int err = 0;
1295
1296	esw_attr = attr->esw_attr;
1297	parse_attr = attr->parse_attr;
1298	mutex_lock(&esw->offloads.encap_tbl_lock);
1299	if (!esw_attr->rx_tun_attr)
1300		goto out;
1301
1302	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1303	tbl_time_after = tbl_time_before;
1304	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1305	if (err || !esw_attr->rx_tun_attr->decap_vport)
1306		goto out;
1307
1308	key.ip_version = attr->tun_ip_version;
1309	if (key.ip_version == 4)
1310		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1311	else
1312		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1313
1314	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1315				   &tbl_time_after);
1316	if (IS_ERR(r)) {
1317		err = PTR_ERR(r);
1318		goto out;
1319	}
1320	/* Routing changed concurrently. FIB event handler might have missed new
1321	 * entry, schedule update.
1322	 */
1323	if (tbl_time_before != tbl_time_after) {
1324		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1325		if (err) {
1326			mlx5e_route_put_locked(priv, r);
1327			goto out;
1328		}
1329	}
1330
1331	flow->decap_route = r;
1332	list_add(&flow->decap_routes, &r->decap_flows);
1333	mutex_unlock(&esw->offloads.encap_tbl_lock);
1334	return 0;
1335
1336out:
1337	mutex_unlock(&esw->offloads.encap_tbl_lock);
1338	return err;
1339}
1340
1341static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1342				    struct mlx5e_tc_flow *flow,
1343				    struct mlx5_flow_attr *attr,
1344				    struct mlx5e_encap_entry *e,
1345				    bool new_encap_entry,
1346				    unsigned long tbl_time_before,
1347				    int out_index)
1348{
1349	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1350	unsigned long tbl_time_after = tbl_time_before;
1351	struct mlx5e_tc_flow_parse_attr *parse_attr;
1352	const struct ip_tunnel_info *tun_info;
1353	struct mlx5_esw_flow_attr *esw_attr;
1354	struct mlx5e_route_entry *r;
1355	struct mlx5e_route_key key;
1356	unsigned short family;
1357	int err = 0;
1358
1359	esw_attr = attr->esw_attr;
1360	parse_attr = attr->parse_attr;
1361	tun_info = parse_attr->tun_info[out_index];
1362	family = ip_tunnel_info_af(tun_info);
1363
1364	if (family == AF_INET) {
1365		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1366		key.ip_version = 4;
1367	} else if (family == AF_INET6) {
1368		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1369		key.ip_version = 6;
1370	}
1371
1372	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1373				  e->route_dev_ifindex, out_index);
1374	if (err || !(esw_attr->dests[out_index].flags &
1375		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1376		return err;
1377
1378	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1379				   &tbl_time_after);
1380	if (IS_ERR(r))
1381		return PTR_ERR(r);
1382	/* Routing changed concurrently. FIB event handler might have missed new
1383	 * entry, schedule update.
1384	 */
1385	if (tbl_time_before != tbl_time_after) {
1386		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1387		if (err) {
1388			mlx5e_route_put_locked(priv, r);
1389			return err;
1390		}
1391	}
1392
1393	flow->encap_routes[out_index].r = r;
1394	if (new_encap_entry)
1395		list_add(&e->route_list, &r->encap_entries);
1396	flow->encap_routes[out_index].index = out_index;
1397	return 0;
1398}
1399
1400void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1401			      struct mlx5e_tc_flow *flow)
1402{
1403	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1404	struct mlx5e_route_entry *r = flow->decap_route;
1405
1406	if (!r)
1407		return;
1408
1409	mutex_lock(&esw->offloads.encap_tbl_lock);
1410	list_del(&flow->decap_routes);
1411	flow->decap_route = NULL;
1412
1413	if (!refcount_dec_and_test(&r->refcnt)) {
1414		mutex_unlock(&esw->offloads.encap_tbl_lock);
1415		return;
1416	}
1417	hash_del_rcu(&r->hlist);
1418	mutex_unlock(&esw->offloads.encap_tbl_lock);
1419
1420	mlx5e_route_dealloc(priv, r);
1421}
1422
1423static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1424				     struct mlx5e_tc_flow *flow,
1425				     int out_index)
1426{
1427	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1428	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1429	struct mlx5e_encap_entry *e, *tmp;
1430
1431	if (!r)
1432		return;
1433
1434	mutex_lock(&esw->offloads.encap_tbl_lock);
1435	flow->encap_routes[out_index].r = NULL;
1436
1437	if (!refcount_dec_and_test(&r->refcnt)) {
1438		mutex_unlock(&esw->offloads.encap_tbl_lock);
1439		return;
1440	}
1441	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1442		list_del_init(&e->route_list);
1443	hash_del_rcu(&r->hlist);
1444	mutex_unlock(&esw->offloads.encap_tbl_lock);
1445
1446	mlx5e_route_dealloc(priv, r);
1447}
1448
1449static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1450				   struct mlx5e_encap_entry *e,
1451				   struct list_head *encap_flows)
1452{
1453	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1454	struct mlx5e_tc_flow *flow;
1455
1456	list_for_each_entry(flow, encap_flows, tmp_list) {
1457		struct mlx5_esw_flow_attr *esw_attr;
1458		struct mlx5_flow_attr *attr;
1459
1460		if (!mlx5e_is_offloaded_flow(flow))
1461			continue;
1462
1463		attr = mlx5e_tc_get_encap_attr(flow);
1464		esw_attr = attr->esw_attr;
1465
1466		if (flow_flag_test(flow, SLOW)) {
1467			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1468		} else {
1469			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1470			mlx5e_tc_unoffload_flow_post_acts(flow);
1471		}
1472
1473		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1474		attr->modify_hdr = NULL;
1475
1476		esw_attr->dests[flow->tmp_entry_index].flags &=
1477			~MLX5_ESW_DEST_ENCAP_VALID;
1478		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1479	}
1480
1481	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1482	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1483		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1484		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1485		e->pkt_reformat = NULL;
1486	}
1487}
1488
1489static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1490				  struct net_device *tunnel_dev,
1491				  struct mlx5e_encap_entry *e,
1492				  struct list_head *encap_flows)
1493{
1494	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1495	struct mlx5e_tc_flow *flow;
1496	int err;
1497
1498	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1499		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1500		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1501	if (err)
1502		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1503	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1504
1505	list_for_each_entry(flow, encap_flows, tmp_list) {
1506		struct mlx5e_tc_flow_parse_attr *parse_attr;
1507		struct mlx5_esw_flow_attr *esw_attr;
1508		struct mlx5_flow_handle *rule;
1509		struct mlx5_flow_attr *attr;
1510		struct mlx5_flow_spec *spec;
1511
1512		if (flow_flag_test(flow, FAILED))
1513			continue;
1514
1515		spec = &flow->attr->parse_attr->spec;
1516
1517		attr = mlx5e_tc_get_encap_attr(flow);
1518		esw_attr = attr->esw_attr;
1519		parse_attr = attr->parse_attr;
1520
1521		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1522					     e->out_dev, e->route_dev_ifindex,
1523					     flow->tmp_entry_index);
1524		if (err) {
1525			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1526			continue;
1527		}
1528
1529		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1530		if (err) {
1531			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1532				       err);
1533			continue;
1534		}
1535
1536		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1537			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1538			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1539			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1540				goto offload_to_slow_path;
1541
1542			err = mlx5e_tc_offload_flow_post_acts(flow);
1543			if (err) {
1544				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1545					       err);
1546				goto offload_to_slow_path;
1547			}
1548
1549			/* update from slow path rule to encap rule */
1550			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1551			if (IS_ERR(rule)) {
1552				mlx5e_tc_unoffload_flow_post_acts(flow);
1553				err = PTR_ERR(rule);
1554				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1555					       err);
1556			} else {
1557				flow->rule[0] = rule;
1558			}
1559		} else {
1560offload_to_slow_path:
1561			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1562			/* mark the flow's encap dest as non-valid */
1563			esw_attr->dests[flow->tmp_entry_index].flags &=
1564				~MLX5_ESW_DEST_ENCAP_VALID;
1565
1566			if (IS_ERR(rule)) {
1567				err = PTR_ERR(rule);
1568				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1569					       err);
1570			} else {
1571				flow->rule[0] = rule;
1572			}
1573		}
1574		flow_flag_set(flow, OFFLOADED);
1575	}
1576}
1577
1578static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1579				     struct mlx5e_route_entry *r,
1580				     struct list_head *flow_list,
1581				     bool replace)
1582{
1583	struct net_device *tunnel_dev;
1584	struct mlx5e_encap_entry *e;
1585
1586	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1587	if (!tunnel_dev)
1588		return -ENODEV;
1589
1590	list_for_each_entry(e, &r->encap_entries, route_list) {
1591		LIST_HEAD(encap_flows);
1592
1593		mlx5e_take_all_encap_flows(e, &encap_flows);
1594		if (list_empty(&encap_flows))
1595			continue;
1596
1597		if (mlx5e_route_entry_valid(r))
1598			mlx5e_invalidate_encap(priv, e, &encap_flows);
1599
1600		if (!replace) {
1601			list_splice(&encap_flows, flow_list);
1602			continue;
1603		}
1604
1605		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1606		list_splice(&encap_flows, flow_list);
1607	}
1608
1609	return 0;
1610}
1611
1612static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1613				      struct list_head *flow_list)
1614{
1615	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1616	struct mlx5e_tc_flow *flow;
1617
1618	list_for_each_entry(flow, flow_list, tmp_list)
1619		if (mlx5e_is_offloaded_flow(flow))
1620			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1621}
1622
1623static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1624				  struct list_head *decap_flows)
1625{
1626	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1627	struct mlx5e_tc_flow *flow;
1628
1629	list_for_each_entry(flow, decap_flows, tmp_list) {
1630		struct mlx5e_tc_flow_parse_attr *parse_attr;
1631		struct mlx5_flow_attr *attr = flow->attr;
1632		struct mlx5_flow_handle *rule;
1633		struct mlx5_flow_spec *spec;
1634		int err;
1635
1636		if (flow_flag_test(flow, FAILED))
1637			continue;
1638
1639		parse_attr = attr->parse_attr;
1640		spec = &parse_attr->spec;
1641		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1642		if (err) {
1643			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1644				       err);
1645			continue;
1646		}
1647
1648		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1649		if (IS_ERR(rule)) {
1650			err = PTR_ERR(rule);
1651			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1652				       err);
1653		} else {
1654			flow->rule[0] = rule;
1655			flow_flag_set(flow, OFFLOADED);
1656		}
1657	}
1658}
1659
1660static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1661					  struct mlx5e_route_entry *r,
1662					  struct list_head *flow_list,
1663					  bool replace)
1664{
1665	struct net_device *tunnel_dev;
1666	LIST_HEAD(decap_flows);
1667
1668	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1669	if (!tunnel_dev)
1670		return -ENODEV;
1671
1672	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1673	if (mlx5e_route_entry_valid(r))
1674		mlx5e_unoffload_flow_list(priv, &decap_flows);
1675	if (replace)
1676		mlx5e_reoffload_decap(priv, &decap_flows);
1677
1678	list_splice(&decap_flows, flow_list);
1679
1680	return 0;
1681}
1682
1683static void mlx5e_tc_fib_event_work(struct work_struct *work)
1684{
1685	struct mlx5e_tc_fib_event_data *event_data =
1686		container_of(work, struct mlx5e_tc_fib_event_data, work);
1687	struct net_device *ul_dev = event_data->ul_dev;
1688	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1689	struct mlx5e_route_entry *r = event_data->r;
1690	struct mlx5_eswitch *esw;
1691	LIST_HEAD(flow_list);
1692	bool replace;
1693	int err;
1694
1695	/* sync with concurrent neigh updates */
1696	rtnl_lock();
1697	esw = priv->mdev->priv.eswitch;
1698	mutex_lock(&esw->offloads.encap_tbl_lock);
1699	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1700
1701	if (!mlx5e_route_entry_valid(r) && !replace)
1702		goto out;
1703
1704	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1705	if (err)
1706		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1707			       err);
1708
1709	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1710	if (err)
1711		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1712			       err);
1713
1714	if (replace)
1715		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1716out:
1717	mutex_unlock(&esw->offloads.encap_tbl_lock);
1718	rtnl_unlock();
1719
1720	mlx5e_put_flow_list(priv, &flow_list);
1721	mlx5e_route_put(priv, event_data->r);
1722	dev_put(event_data->ul_dev);
1723	kfree(event_data);
1724}
1725
1726static struct mlx5e_tc_fib_event_data *
1727mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1728			 struct net_device *ul_dev,
1729			 struct mlx5e_tc_tun_encap *encap,
1730			 unsigned long event,
1731			 struct fib_notifier_info *info)
1732{
1733	struct fib_entry_notifier_info *fen_info;
1734	struct mlx5e_tc_fib_event_data *fib_work;
1735	struct mlx5e_route_entry *r;
1736	struct mlx5e_route_key key;
1737	struct net_device *fib_dev;
1738
1739	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1740	if (fen_info->fi->nh)
1741		return NULL;
1742	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1743	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1744	    fen_info->dst_len != 32)
1745		return NULL;
1746
1747	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1748	if (!fib_work)
1749		return ERR_PTR(-ENOMEM);
1750
1751	key.endpoint_ip.v4 = htonl(fen_info->dst);
1752	key.ip_version = 4;
1753
1754	/* Can't fail after this point because releasing reference to r
1755	 * requires obtaining sleeping mutex which we can't do in atomic
1756	 * context.
1757	 */
1758	r = mlx5e_route_lookup_for_update(encap, &key);
1759	if (!r)
1760		goto out;
1761	fib_work->r = r;
1762	dev_hold(ul_dev);
1763
1764	return fib_work;
1765
1766out:
1767	kfree(fib_work);
1768	return NULL;
1769}
1770
1771static struct mlx5e_tc_fib_event_data *
1772mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1773			 struct net_device *ul_dev,
1774			 struct mlx5e_tc_tun_encap *encap,
1775			 unsigned long event,
1776			 struct fib_notifier_info *info)
1777{
1778	struct fib6_entry_notifier_info *fen_info;
1779	struct mlx5e_tc_fib_event_data *fib_work;
1780	struct mlx5e_route_entry *r;
1781	struct mlx5e_route_key key;
1782	struct net_device *fib_dev;
1783
1784	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1785	fib_dev = fib6_info_nh_dev(fen_info->rt);
1786	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1787	    fen_info->rt->fib6_dst.plen != 128)
1788		return NULL;
1789
1790	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1791	if (!fib_work)
1792		return ERR_PTR(-ENOMEM);
1793
1794	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1795	       sizeof(fen_info->rt->fib6_dst.addr));
1796	key.ip_version = 6;
1797
1798	/* Can't fail after this point because releasing reference to r
1799	 * requires obtaining sleeping mutex which we can't do in atomic
1800	 * context.
1801	 */
1802	r = mlx5e_route_lookup_for_update(encap, &key);
1803	if (!r)
1804		goto out;
1805	fib_work->r = r;
1806	dev_hold(ul_dev);
1807
1808	return fib_work;
1809
1810out:
1811	kfree(fib_work);
1812	return NULL;
1813}
1814
1815static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1816{
1817	struct mlx5e_tc_fib_event_data *fib_work;
1818	struct fib_notifier_info *info = ptr;
1819	struct mlx5e_tc_tun_encap *encap;
1820	struct net_device *ul_dev;
1821	struct mlx5e_priv *priv;
1822
1823	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1824	priv = encap->priv;
1825	ul_dev = priv->netdev;
1826	priv = netdev_priv(ul_dev);
1827
1828	switch (event) {
1829	case FIB_EVENT_ENTRY_REPLACE:
1830	case FIB_EVENT_ENTRY_DEL:
1831		if (info->family == AF_INET)
1832			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1833		else if (info->family == AF_INET6)
1834			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1835		else
1836			return NOTIFY_DONE;
1837
1838		if (!IS_ERR_OR_NULL(fib_work)) {
1839			queue_work(priv->wq, &fib_work->work);
1840		} else if (IS_ERR(fib_work)) {
1841			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1842			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1843				       PTR_ERR(fib_work));
1844		}
1845
1846		break;
1847	default:
1848		return NOTIFY_DONE;
1849	}
1850
1851	return NOTIFY_DONE;
1852}
1853
1854struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1855{
1856	struct mlx5e_tc_tun_encap *encap;
1857	int err;
1858
1859	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1860	if (!encap)
1861		return ERR_PTR(-ENOMEM);
1862
1863	encap->priv = priv;
1864	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1865	spin_lock_init(&encap->route_lock);
1866	hash_init(encap->route_tbl);
1867	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1868				    NULL, NULL);
1869	if (err) {
1870		kvfree(encap);
1871		return ERR_PTR(err);
1872	}
1873
1874	return encap;
1875}
1876
1877void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1878{
1879	if (!encap)
1880		return;
1881
1882	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1883	flush_workqueue(encap->priv->wq); /* flush fib event works */
1884	kvfree(encap);
1885}
1886