1// SPDX-License-Identifier: GPL-2.0-only
2/****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2023, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include "tc_encap_actions.h"
12#include "tc.h"
13#include "mae.h"
14#include <net/vxlan.h>
15#include <net/geneve.h>
16#include <net/netevent.h>
17#include <net/arp.h>
18
19static const struct rhashtable_params efx_neigh_ht_params = {
20	.key_len	= offsetof(struct efx_neigh_binder, ha),
21	.key_offset	= 0,
22	.head_offset	= offsetof(struct efx_neigh_binder, linkage),
23};
24
25static const struct rhashtable_params efx_tc_encap_ht_params = {
26	.key_len	= offsetofend(struct efx_tc_encap_action, key),
27	.key_offset	= 0,
28	.head_offset	= offsetof(struct efx_tc_encap_action, linkage),
29};
30
31static void efx_tc_encap_free(void *ptr, void *__unused)
32{
33	struct efx_tc_encap_action *enc = ptr;
34
35	WARN_ON(refcount_read(&enc->ref));
36	kfree(enc);
37}
38
39static void efx_neigh_free(void *ptr, void *__unused)
40{
41	struct efx_neigh_binder *neigh = ptr;
42
43	WARN_ON(refcount_read(&neigh->ref));
44	WARN_ON(!list_empty(&neigh->users));
45	put_net_track(neigh->net, &neigh->ns_tracker);
46	netdev_put(neigh->egdev, &neigh->dev_tracker);
47	kfree(neigh);
48}
49
50int efx_tc_init_encap_actions(struct efx_nic *efx)
51{
52	int rc;
53
54	rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params);
55	if (rc < 0)
56		goto fail_neigh_ht;
57	rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params);
58	if (rc < 0)
59		goto fail_encap_ht;
60	return 0;
61fail_encap_ht:
62	rhashtable_destroy(&efx->tc->neigh_ht);
63fail_neigh_ht:
64	return rc;
65}
66
67/* Only call this in init failure teardown.
68 * Normal exit should fini instead as there may be entries in the table.
69 */
70void efx_tc_destroy_encap_actions(struct efx_nic *efx)
71{
72	rhashtable_destroy(&efx->tc->encap_ht);
73	rhashtable_destroy(&efx->tc->neigh_ht);
74}
75
76void efx_tc_fini_encap_actions(struct efx_nic *efx)
77{
78	rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL);
79	rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL);
80}
81
82static void efx_neigh_update(struct work_struct *work);
83
84static int efx_bind_neigh(struct efx_nic *efx,
85			  struct efx_tc_encap_action *encap, struct net *net,
86			  struct netlink_ext_ack *extack)
87{
88	struct efx_neigh_binder *neigh, *old;
89	struct flowi6 flow6 = {};
90	struct flowi4 flow4 = {};
91	int rc;
92
93	/* GCC stupidly thinks that only values explicitly listed in the enum
94	 * definition can _possibly_ be sensible case values, so without this
95	 * cast it complains about the IPv6 versions.
96	 */
97	switch ((int)encap->type) {
98	case EFX_ENCAP_TYPE_VXLAN:
99	case EFX_ENCAP_TYPE_GENEVE:
100		flow4.flowi4_proto = IPPROTO_UDP;
101		flow4.fl4_dport = encap->key.tp_dst;
102		flow4.flowi4_tos = encap->key.tos;
103		flow4.daddr = encap->key.u.ipv4.dst;
104		flow4.saddr = encap->key.u.ipv4.src;
105		break;
106	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
107	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
108		flow6.flowi6_proto = IPPROTO_UDP;
109		flow6.fl6_dport = encap->key.tp_dst;
110		flow6.flowlabel = ip6_make_flowinfo(encap->key.tos,
111						    encap->key.label);
112		flow6.daddr = encap->key.u.ipv6.dst;
113		flow6.saddr = encap->key.u.ipv6.src;
114		break;
115	default:
116		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d",
117				       (int)encap->type);
118		return -EOPNOTSUPP;
119	}
120
121	neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT);
122	if (!neigh)
123		return -ENOMEM;
124	neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT);
125	neigh->dst_ip = flow4.daddr;
126	neigh->dst_ip6 = flow6.daddr;
127
128	old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht,
129						&neigh->linkage,
130						efx_neigh_ht_params);
131	if (old) {
132		/* don't need our new entry */
133		put_net_track(neigh->net, &neigh->ns_tracker);
134		kfree(neigh);
135		if (IS_ERR(old)) /* oh dear, it's actually an error */
136			return PTR_ERR(old);
137		if (!refcount_inc_not_zero(&old->ref))
138			return -EAGAIN;
139		/* existing entry found, ref taken */
140		neigh = old;
141	} else {
142		/* New entry.  We need to initiate a lookup */
143		struct neighbour *n;
144		struct rtable *rt;
145
146		if (encap->type & EFX_ENCAP_FLAG_IPV6) {
147#if IS_ENABLED(CONFIG_IPV6)
148			struct dst_entry *dst;
149
150			dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6,
151							      NULL);
152			rc = PTR_ERR_OR_ZERO(dst);
153			if (rc) {
154				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap");
155				goto out_free;
156			}
157			neigh->egdev = dst->dev;
158			netdev_hold(neigh->egdev, &neigh->dev_tracker,
159				    GFP_KERNEL_ACCOUNT);
160			neigh->ttl = ip6_dst_hoplimit(dst);
161			n = dst_neigh_lookup(dst, &flow6.daddr);
162			dst_release(dst);
163#else
164			/* We shouldn't ever get here, because if IPv6 isn't
165			 * enabled how did someone create an IPv6 tunnel_key?
166			 */
167			rc = -EOPNOTSUPP;
168			NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)");
169			goto out_free;
170#endif
171		} else {
172			rt = ip_route_output_key(net, &flow4);
173			if (IS_ERR_OR_NULL(rt)) {
174				rc = PTR_ERR_OR_ZERO(rt);
175				if (!rc)
176					rc = -EIO;
177				NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap");
178				goto out_free;
179			}
180			neigh->egdev = rt->dst.dev;
181			netdev_hold(neigh->egdev, &neigh->dev_tracker,
182				    GFP_KERNEL_ACCOUNT);
183			neigh->ttl = ip4_dst_hoplimit(&rt->dst);
184			n = dst_neigh_lookup(&rt->dst, &flow4.daddr);
185			ip_rt_put(rt);
186		}
187		if (!n) {
188			rc = -ENETUNREACH;
189			NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap");
190			netdev_put(neigh->egdev, &neigh->dev_tracker);
191			goto out_free;
192		}
193		refcount_set(&neigh->ref, 1);
194		INIT_LIST_HEAD(&neigh->users);
195		read_lock_bh(&n->lock);
196		ether_addr_copy(neigh->ha, n->ha);
197		neigh->n_valid = n->nud_state & NUD_VALID;
198		read_unlock_bh(&n->lock);
199		rwlock_init(&neigh->lock);
200		INIT_WORK(&neigh->work, efx_neigh_update);
201		neigh->efx = efx;
202		neigh->used = jiffies;
203		if (!neigh->n_valid)
204			/* Prod ARP to find us a neighbour */
205			neigh_event_send(n, NULL);
206		neigh_release(n);
207	}
208	/* Add us to this neigh */
209	encap->neigh = neigh;
210	list_add_tail(&encap->list, &neigh->users);
211	return 0;
212
213out_free:
214	/* cleanup common to several error paths */
215	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
216			       efx_neigh_ht_params);
217	synchronize_rcu();
218	put_net_track(net, &neigh->ns_tracker);
219	kfree(neigh);
220	return rc;
221}
222
223static void efx_free_neigh(struct efx_neigh_binder *neigh)
224{
225	struct efx_nic *efx = neigh->efx;
226
227	rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage,
228			       efx_neigh_ht_params);
229	synchronize_rcu();
230	netdev_put(neigh->egdev, &neigh->dev_tracker);
231	put_net_track(neigh->net, &neigh->ns_tracker);
232	kfree(neigh);
233}
234
235static void efx_release_neigh(struct efx_nic *efx,
236			      struct efx_tc_encap_action *encap)
237{
238	struct efx_neigh_binder *neigh = encap->neigh;
239
240	if (!neigh)
241		return;
242	list_del(&encap->list);
243	encap->neigh = NULL;
244	if (!refcount_dec_and_test(&neigh->ref))
245		return; /* still in use */
246	efx_free_neigh(neigh);
247}
248
249static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto)
250{
251	struct efx_neigh_binder *neigh = encap->neigh;
252	struct ethhdr *eth;
253
254	encap->encap_hdr_len = sizeof(*eth);
255	eth = (struct ethhdr *)encap->encap_hdr;
256
257	if (encap->neigh->n_valid)
258		ether_addr_copy(eth->h_dest, neigh->ha);
259	else
260		eth_zero_addr(eth->h_dest);
261	ether_addr_copy(eth->h_source, neigh->egdev->dev_addr);
262	eth->h_proto = htons(proto);
263}
264
265static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
266{
267	struct efx_neigh_binder *neigh = encap->neigh;
268	struct ip_tunnel_key *key = &encap->key;
269	struct iphdr *ip;
270
271	ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len);
272	encap->encap_hdr_len += sizeof(*ip);
273
274	ip->daddr = key->u.ipv4.dst;
275	ip->saddr = key->u.ipv4.src;
276	ip->ttl = neigh->ttl;
277	ip->protocol = ipproto;
278	ip->version = 0x4;
279	ip->ihl = 0x5;
280	ip->tot_len = cpu_to_be16(ip->ihl * 4 + len);
281	ip_send_check(ip);
282}
283
284#ifdef CONFIG_IPV6
285static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len)
286{
287	struct efx_neigh_binder *neigh = encap->neigh;
288	struct ip_tunnel_key *key = &encap->key;
289	struct ipv6hdr *ip;
290
291	ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len);
292	encap->encap_hdr_len += sizeof(*ip);
293
294	ip6_flow_hdr(ip, key->tos, key->label);
295	ip->daddr = key->u.ipv6.dst;
296	ip->saddr = key->u.ipv6.src;
297	ip->hop_limit = neigh->ttl;
298	ip->nexthdr = ipproto;
299	ip->version = 0x6;
300	ip->payload_len = cpu_to_be16(len);
301}
302#endif
303
304static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len)
305{
306	struct ip_tunnel_key *key = &encap->key;
307	struct udphdr *udp;
308
309	udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len);
310	encap->encap_hdr_len += sizeof(*udp);
311
312	udp->dest = key->tp_dst;
313	udp->len = cpu_to_be16(sizeof(*udp) + len);
314}
315
316static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap)
317{
318	struct ip_tunnel_key *key = &encap->key;
319	struct vxlanhdr *vxlan;
320
321	vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len);
322	encap->encap_hdr_len += sizeof(*vxlan);
323
324	vxlan->vx_flags = VXLAN_HF_VNI;
325	vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id));
326}
327
328static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap)
329{
330	struct ip_tunnel_key *key = &encap->key;
331	struct genevehdr *geneve;
332	u32 vni;
333
334	geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len);
335	encap->encap_hdr_len += sizeof(*geneve);
336
337	geneve->proto_type = htons(ETH_P_TEB);
338	/* convert tun_id to host-endian so we can use host arithmetic to
339	 * extract individual bytes.
340	 */
341	vni = ntohl(tunnel_id_to_key32(key->tun_id));
342	geneve->vni[0] = vni >> 16;
343	geneve->vni[1] = vni >> 8;
344	geneve->vni[2] = vni;
345}
346
347#define vxlan_header_l4_len	(sizeof(struct udphdr) + sizeof(struct vxlanhdr))
348#define vxlan4_header_len	(sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len)
349static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap)
350{
351	BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len);
352	efx_gen_tun_header_eth(encap, ETH_P_IP);
353	efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len);
354	efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
355	efx_gen_tun_header_vxlan(encap);
356}
357
358#define geneve_header_l4_len	(sizeof(struct udphdr) + sizeof(struct genevehdr))
359#define geneve4_header_len	(sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len)
360static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap)
361{
362	BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len);
363	efx_gen_tun_header_eth(encap, ETH_P_IP);
364	efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len);
365	efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
366	efx_gen_tun_header_geneve(encap);
367}
368
369#ifdef CONFIG_IPV6
370#define vxlan6_header_len	(sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len)
371static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap)
372{
373	BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len);
374	efx_gen_tun_header_eth(encap, ETH_P_IPV6);
375	efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len);
376	efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr));
377	efx_gen_tun_header_vxlan(encap);
378}
379
380#define geneve6_header_len	(sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len)
381static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap)
382{
383	BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len);
384	efx_gen_tun_header_eth(encap, ETH_P_IPV6);
385	efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len);
386	efx_gen_tun_header_udp(encap, sizeof(struct genevehdr));
387	efx_gen_tun_header_geneve(encap);
388}
389#endif
390
391static void efx_gen_encap_header(struct efx_nic *efx,
392				 struct efx_tc_encap_action *encap)
393{
394	encap->n_valid = encap->neigh->n_valid;
395
396	/* GCC stupidly thinks that only values explicitly listed in the enum
397	 * definition can _possibly_ be sensible case values, so without this
398	 * cast it complains about the IPv6 versions.
399	 */
400	switch ((int)encap->type) {
401	case EFX_ENCAP_TYPE_VXLAN:
402		efx_gen_vxlan_header_ipv4(encap);
403		break;
404	case EFX_ENCAP_TYPE_GENEVE:
405		efx_gen_geneve_header_ipv4(encap);
406		break;
407#ifdef CONFIG_IPV6
408	case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6:
409		efx_gen_vxlan_header_ipv6(encap);
410		break;
411	case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6:
412		efx_gen_geneve_header_ipv6(encap);
413		break;
414#endif
415	default:
416		/* unhandled encap type, can't happen */
417		if (net_ratelimit())
418			netif_err(efx, drv, efx->net_dev,
419				  "Bogus encap type %d, can't generate\n",
420				  encap->type);
421
422		/* Use fallback action. */
423		encap->n_valid = false;
424		break;
425	}
426}
427
428static void efx_tc_update_encap(struct efx_nic *efx,
429				struct efx_tc_encap_action *encap)
430{
431	struct efx_tc_action_set_list *acts, *fallback;
432	struct efx_tc_flow_rule *rule;
433	struct efx_tc_action_set *act;
434	int rc;
435
436	if (encap->n_valid) {
437		/* Make sure no rules are using this encap while we change it */
438		list_for_each_entry(act, &encap->users, encap_user) {
439			acts = act->user;
440			if (WARN_ON(!acts)) /* can't happen */
441				continue;
442			rule = container_of(acts, struct efx_tc_flow_rule, acts);
443			if (rule->fallback)
444				fallback = rule->fallback;
445			else /* fallback fallback: deliver to PF */
446				fallback = &efx->tc->facts.pf;
447			rc = efx_mae_update_rule(efx, fallback->fw_id,
448						 rule->fw_id);
449			if (rc)
450				netif_err(efx, drv, efx->net_dev,
451					  "Failed to update (f) rule %08x rc %d\n",
452					  rule->fw_id, rc);
453			else
454				netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n",
455					  rule->fw_id);
456		}
457	}
458
459	/* Make sure we don't leak arbitrary bytes on the wire;
460	 * set an all-0s ethernet header.  A successful call to
461	 * efx_gen_encap_header() will overwrite this.
462	 */
463	memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr));
464	encap->encap_hdr_len = ETH_HLEN;
465
466	if (encap->neigh) {
467		read_lock_bh(&encap->neigh->lock);
468		efx_gen_encap_header(efx, encap);
469		read_unlock_bh(&encap->neigh->lock);
470	} else {
471		encap->n_valid = false;
472	}
473
474	rc = efx_mae_update_encap_md(efx, encap);
475	if (rc) {
476		netif_err(efx, drv, efx->net_dev,
477			  "Failed to update encap hdr %08x rc %d\n",
478			  encap->fw_id, rc);
479		return;
480	}
481	netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n",
482		  encap->fw_id);
483	if (!encap->n_valid)
484		return;
485	/* Update rule users: use the action if they are now ready */
486	list_for_each_entry(act, &encap->users, encap_user) {
487		acts = act->user;
488		if (WARN_ON(!acts)) /* can't happen */
489			continue;
490		rule = container_of(acts, struct efx_tc_flow_rule, acts);
491		if (!efx_tc_check_ready(efx, rule))
492			continue;
493		rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id);
494		if (rc)
495			netif_err(efx, drv, efx->net_dev,
496				  "Failed to update rule %08x rc %d\n",
497				  rule->fw_id, rc);
498		else
499			netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n",
500				  rule->fw_id);
501	}
502}
503
504static void efx_neigh_update(struct work_struct *work)
505{
506	struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work);
507	struct efx_tc_encap_action *encap;
508	struct efx_nic *efx = neigh->efx;
509
510	mutex_lock(&efx->tc->mutex);
511	list_for_each_entry(encap, &neigh->users, list)
512		efx_tc_update_encap(neigh->efx, encap);
513	/* release ref taken in efx_neigh_event() */
514	if (refcount_dec_and_test(&neigh->ref))
515		efx_free_neigh(neigh);
516	mutex_unlock(&efx->tc->mutex);
517}
518
519static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n)
520{
521	struct efx_neigh_binder keys = {NULL}, *neigh;
522	bool n_valid, ipv6 = false;
523	char ha[ETH_ALEN];
524	size_t keysize;
525
526	if (WARN_ON(!efx->tc))
527		return NOTIFY_DONE;
528
529	if (n->tbl == &arp_tbl) {
530		keysize = sizeof(keys.dst_ip);
531#if IS_ENABLED(CONFIG_IPV6)
532	} else if (n->tbl == ipv6_stub->nd_tbl) {
533		ipv6 = true;
534		keysize = sizeof(keys.dst_ip6);
535#endif
536	} else {
537		return NOTIFY_DONE;
538	}
539	if (!n->parms) {
540		netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n");
541		return NOTIFY_DONE;
542	}
543	keys.net = read_pnet(&n->parms->net);
544	if (n->tbl->key_len != keysize) {
545		netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n",
546			   n->tbl->key_len);
547		return NOTIFY_DONE;
548	}
549	read_lock_bh(&n->lock); /* Get a consistent view */
550	memcpy(ha, n->ha, ETH_ALEN);
551	n_valid = (n->nud_state & NUD_VALID) && !n->dead;
552	read_unlock_bh(&n->lock);
553	if (ipv6)
554		memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len);
555	else
556		memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len);
557	rcu_read_lock();
558	neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys,
559				       efx_neigh_ht_params);
560	if (!neigh || neigh->dying)
561		/* We're not interested in this neighbour */
562		goto done;
563	write_lock_bh(&neigh->lock);
564	if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) {
565		write_unlock_bh(&neigh->lock);
566		/* Nothing has changed; no work to do */
567		goto done;
568	}
569	neigh->n_valid = n_valid;
570	memcpy(neigh->ha, ha, ETH_ALEN);
571	write_unlock_bh(&neigh->lock);
572	if (refcount_inc_not_zero(&neigh->ref)) {
573		rcu_read_unlock();
574		if (!schedule_work(&neigh->work))
575			/* failed to schedule, release the ref we just took */
576			if (refcount_dec_and_test(&neigh->ref))
577				efx_free_neigh(neigh);
578	} else {
579done:
580		rcu_read_unlock();
581	}
582	return NOTIFY_DONE;
583}
584
585bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
586{
587	struct efx_tc_action_set *act;
588
589	/* Encap actions can only be offloaded if they have valid
590	 * neighbour info for the outer Ethernet header.
591	 */
592	list_for_each_entry(act, &rule->acts.list, list)
593		if (act->encap_md && !act->encap_md->n_valid)
594			return false;
595	return true;
596}
597
598struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
599			struct efx_nic *efx, const struct ip_tunnel_info *info,
600			struct net_device *egdev, struct netlink_ext_ack *extack)
601{
602	enum efx_encap_type type = efx_tc_indr_netdev_type(egdev);
603	struct efx_tc_encap_action *encap, *old;
604	struct efx_rep *to_efv;
605	s64 rc;
606
607	if (type == EFX_ENCAP_TYPE_NONE) {
608		/* dest is not an encap device */
609		NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set");
610		return ERR_PTR(-EOPNOTSUPP);
611	}
612	rc = efx_mae_check_encap_type_supported(efx, type);
613	if (rc < 0) {
614		NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type");
615		return ERR_PTR(rc);
616	}
617	/* No support yet for Geneve options */
618	if (info->options_len) {
619		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options");
620		return ERR_PTR(-EOPNOTSUPP);
621	}
622	switch (info->mode) {
623	case IP_TUNNEL_INFO_TX:
624		break;
625	case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6:
626		type |= EFX_ENCAP_FLAG_IPV6;
627		break;
628	default:
629		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u",
630				       info->mode);
631		return ERR_PTR(-EOPNOTSUPP);
632	}
633	encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT);
634	if (!encap)
635		return ERR_PTR(-ENOMEM);
636	encap->type = type;
637	encap->key = info->key;
638	INIT_LIST_HEAD(&encap->users);
639	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht,
640						&encap->linkage,
641						efx_tc_encap_ht_params);
642	if (old) {
643		/* don't need our new entry */
644		kfree(encap);
645		if (IS_ERR(old)) /* oh dear, it's actually an error */
646			return ERR_CAST(old);
647		if (!refcount_inc_not_zero(&old->ref))
648			return ERR_PTR(-EAGAIN);
649		/* existing entry found, ref taken */
650		return old;
651	}
652
653	rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack);
654	if (rc < 0)
655		goto out_remove;
656	to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev);
657	if (IS_ERR(to_efv)) {
658		/* neigh->egdev isn't ours */
659		NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch");
660		rc = PTR_ERR(to_efv);
661		goto out_release;
662	}
663	rc = efx_tc_flower_external_mport(efx, to_efv);
664	if (rc < 0) {
665		NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port");
666		goto out_release;
667	}
668	encap->dest_mport = rc;
669	read_lock_bh(&encap->neigh->lock);
670	efx_gen_encap_header(efx, encap);
671	read_unlock_bh(&encap->neigh->lock);
672
673	rc = efx_mae_allocate_encap_md(efx, encap);
674	if (rc < 0) {
675		NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw");
676		goto out_release;
677	}
678
679	/* ref and return */
680	refcount_set(&encap->ref, 1);
681	return encap;
682out_release:
683	efx_release_neigh(efx, encap);
684out_remove:
685	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
686			       efx_tc_encap_ht_params);
687	kfree(encap);
688	return ERR_PTR(rc);
689}
690
691void efx_tc_flower_release_encap_md(struct efx_nic *efx,
692				    struct efx_tc_encap_action *encap)
693{
694	if (!refcount_dec_and_test(&encap->ref))
695		return; /* still in use */
696	efx_release_neigh(efx, encap);
697	rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage,
698			       efx_tc_encap_ht_params);
699	efx_mae_free_encap_md(efx, encap);
700	kfree(encap);
701}
702
703static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh)
704{
705	struct efx_tc_encap_action *encap, *next;
706
707	list_for_each_entry_safe(encap, next, &neigh->users, list) {
708		/* Should cause neigh usage count to fall to zero, freeing it */
709		efx_release_neigh(efx, encap);
710		/* The encap has lost its neigh, so it's now unready */
711		efx_tc_update_encap(efx, encap);
712	}
713}
714
715void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev)
716{
717	struct efx_neigh_binder *neigh;
718	struct rhashtable_iter walk;
719
720	mutex_lock(&efx->tc->mutex);
721	rhashtable_walk_enter(&efx->tc->neigh_ht, &walk);
722	rhashtable_walk_start(&walk);
723	while ((neigh = rhashtable_walk_next(&walk)) != NULL) {
724		if (IS_ERR(neigh))
725			continue;
726		if (neigh->egdev != net_dev)
727			continue;
728		neigh->dying = true;
729		rhashtable_walk_stop(&walk);
730		synchronize_rcu(); /* Make sure any updates see dying flag */
731		efx_tc_remove_neigh_users(efx, neigh); /* might sleep */
732		rhashtable_walk_start(&walk);
733	}
734	rhashtable_walk_stop(&walk);
735	rhashtable_walk_exit(&walk);
736	mutex_unlock(&efx->tc->mutex);
737}
738
739int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event,
740			  void *ptr)
741{
742	if (efx->type->is_vf)
743		return NOTIFY_DONE;
744
745	switch (event) {
746	case NETEVENT_NEIGH_UPDATE:
747		return efx_neigh_event(efx, ptr);
748	default:
749		return NOTIFY_DONE;
750	}
751}
752