1/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2/* Copyright (c) 2018 Mellanox Technologies. */
3
4#include <net/inet_ecn.h>
5#include <net/vxlan.h>
6#include <net/gre.h>
7#include <net/geneve.h>
8#include <net/bareudp.h>
9#include "en/tc_tun.h"
10#include "en/tc_priv.h"
11#include "en_tc.h"
12#include "rep/tc.h"
13#include "rep/neigh.h"
14#include "lag/lag.h"
15#include "lag/mp.h"
16
17struct mlx5e_tc_tun_route_attr {
18	struct net_device *out_dev;
19	struct net_device *route_dev;
20	union {
21		struct flowi4 fl4;
22		struct flowi6 fl6;
23	} fl;
24	struct neighbour *n;
25	u8 ttl;
26};
27
28#define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
29
30static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
31{
32	if (attr->n)
33		neigh_release(attr->n);
34	if (attr->route_dev)
35		dev_put(attr->route_dev);
36}
37
38struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
39{
40	if (netif_is_vxlan(tunnel_dev))
41		return &vxlan_tunnel;
42	else if (netif_is_geneve(tunnel_dev))
43		return &geneve_tunnel;
44	else if (netif_is_gretap(tunnel_dev) ||
45		 netif_is_ip6gretap(tunnel_dev))
46		return &gre_tunnel;
47	else if (netif_is_bareudp(tunnel_dev))
48		return &mplsoudp_tunnel;
49	else
50		return NULL;
51}
52
53static int get_route_and_out_devs(struct mlx5e_priv *priv,
54				  struct net_device *dev,
55				  struct net_device **route_dev,
56				  struct net_device **out_dev)
57{
58	struct net_device *uplink_dev, *uplink_upper, *real_dev;
59	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
60	bool dst_is_lag_dev;
61
62	real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
63	uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
64
65	rcu_read_lock();
66	uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
67	/* mlx5_lag_is_sriov() is a blocking function which can't be called
68	 * while holding rcu read lock. Take the net_device for correctness
69	 * sake.
70	 */
71	if (uplink_upper)
72		dev_hold(uplink_upper);
73	rcu_read_unlock();
74
75	dst_is_lag_dev = (uplink_upper &&
76			  netif_is_lag_master(uplink_upper) &&
77			  real_dev == uplink_upper &&
78			  mlx5_lag_is_sriov(priv->mdev));
79	if (uplink_upper)
80		dev_put(uplink_upper);
81
82	/* if the egress device isn't on the same HW e-switch or
83	 * it's a LAG device, use the uplink
84	 */
85	*route_dev = dev;
86	if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
87	    dst_is_lag_dev || is_vlan_dev(*route_dev) ||
88	    netif_is_ovs_master(*route_dev))
89		*out_dev = uplink_dev;
90	else if (mlx5e_eswitch_rep(dev) &&
91		 mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
92		*out_dev = *route_dev;
93	else
94		return -EOPNOTSUPP;
95
96	if (!mlx5e_eswitch_uplink_rep(*out_dev))
97		return -EOPNOTSUPP;
98
99	if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev &&
100	    !mlx5_lag_is_mpesw(priv->mdev))
101		return -EOPNOTSUPP;
102
103	return 0;
104}
105
106static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
107				       struct net_device *dev,
108				       struct mlx5e_tc_tun_route_attr *attr)
109{
110	struct net_device *route_dev;
111	struct net_device *out_dev;
112	struct neighbour *n;
113	struct rtable *rt;
114
115#if IS_ENABLED(CONFIG_INET)
116	struct mlx5_core_dev *mdev = priv->mdev;
117	struct net_device *uplink_dev;
118	int ret;
119
120	if (mlx5_lag_is_multipath(mdev)) {
121		struct mlx5_eswitch *esw = mdev->priv.eswitch;
122
123		uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
124		attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
125	} else {
126		struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
127
128		if (tunnel && tunnel->get_remote_ifindex)
129			attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
130	}
131
132	rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
133	if (IS_ERR(rt))
134		return PTR_ERR(rt);
135
136	if (rt->rt_type != RTN_UNICAST) {
137		ret = -ENETUNREACH;
138		goto err_rt_release;
139	}
140
141	if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
142		ret = -ENETUNREACH;
143		goto err_rt_release;
144	}
145#else
146	return -EOPNOTSUPP;
147#endif
148
149	ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
150	if (ret < 0)
151		goto err_rt_release;
152	dev_hold(route_dev);
153
154	if (!attr->ttl)
155		attr->ttl = ip4_dst_hoplimit(&rt->dst);
156	n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
157	if (!n) {
158		ret = -ENOMEM;
159		goto err_dev_release;
160	}
161
162	ip_rt_put(rt);
163	attr->route_dev = route_dev;
164	attr->out_dev = out_dev;
165	attr->n = n;
166	return 0;
167
168err_dev_release:
169	dev_put(route_dev);
170err_rt_release:
171	ip_rt_put(rt);
172	return ret;
173}
174
175static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
176{
177	mlx5e_tc_tun_route_attr_cleanup(attr);
178}
179
180static const char *mlx5e_netdev_kind(struct net_device *dev)
181{
182	if (dev->rtnl_link_ops)
183		return dev->rtnl_link_ops->kind;
184	else
185		return "unknown";
186}
187
188static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
189				      struct mlx5e_encap_entry *e)
190{
191	if (!e->tunnel) {
192		pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
193		return -EOPNOTSUPP;
194	}
195
196	return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
197}
198
199static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
200			     struct mlx5e_encap_entry *e,
201			     u16 proto)
202{
203	struct ethhdr *eth = (struct ethhdr *)buf;
204	char *ip;
205
206	ether_addr_copy(eth->h_dest, e->h_dest);
207	ether_addr_copy(eth->h_source, dev->dev_addr);
208	if (is_vlan_dev(dev)) {
209		struct vlan_hdr *vlan = (struct vlan_hdr *)
210					((char *)eth + ETH_HLEN);
211		ip = (char *)vlan + VLAN_HLEN;
212		eth->h_proto = vlan_dev_vlan_proto(dev);
213		vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
214		vlan->h_vlan_encapsulated_proto = htons(proto);
215	} else {
216		eth->h_proto = htons(proto);
217		ip = (char *)eth + ETH_HLEN;
218	}
219
220	return ip;
221}
222
223int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
224				    struct net_device *mirred_dev,
225				    struct mlx5e_encap_entry *e)
226{
227	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
228	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
229	struct mlx5_pkt_reformat_params reformat_params;
230	struct mlx5e_neigh m_neigh = {};
231	TC_TUN_ROUTE_ATTR_INIT(attr);
232	int ipv4_encap_size;
233	char *encap_header;
234	struct iphdr *ip;
235	u8 nud_state;
236	int err;
237
238	/* add the IP fields */
239	attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
240	attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
241	attr.fl.fl4.saddr = tun_key->u.ipv4.src;
242	attr.ttl = tun_key->ttl;
243
244	err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
245	if (err)
246		return err;
247
248	ipv4_encap_size =
249		(is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
250		sizeof(struct iphdr) +
251		e->tunnel->calc_hlen(e);
252
253	if (max_encap_size < ipv4_encap_size) {
254		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
255			       ipv4_encap_size, max_encap_size);
256		err = -EOPNOTSUPP;
257		goto release_neigh;
258	}
259
260	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
261	if (!encap_header) {
262		err = -ENOMEM;
263		goto release_neigh;
264	}
265
266	m_neigh.family = attr.n->ops->family;
267	memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
268	e->out_dev = attr.out_dev;
269	e->route_dev_ifindex = attr.route_dev->ifindex;
270
271	/* It's important to add the neigh to the hash table before checking
272	 * the neigh validity state. So if we'll get a notification, in case the
273	 * neigh changes it's validity state, we would find the relevant neigh
274	 * in the hash.
275	 */
276	err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
277	if (err)
278		goto free_encap;
279
280	read_lock_bh(&attr.n->lock);
281	nud_state = attr.n->nud_state;
282	ether_addr_copy(e->h_dest, attr.n->ha);
283	read_unlock_bh(&attr.n->lock);
284
285	/* add ethernet header */
286	ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
287					     ETH_P_IP);
288
289	/* add ip header */
290	ip->tos = tun_key->tos;
291	ip->version = 0x4;
292	ip->ihl = 0x5;
293	ip->ttl = attr.ttl;
294	ip->daddr = attr.fl.fl4.daddr;
295	ip->saddr = attr.fl.fl4.saddr;
296
297	/* add tunneling protocol header */
298	err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
299					 &ip->protocol, e);
300	if (err)
301		goto destroy_neigh_entry;
302
303	e->encap_size = ipv4_encap_size;
304	e->encap_header = encap_header;
305	encap_header = NULL;
306
307	if (!(nud_state & NUD_VALID)) {
308		neigh_event_send(attr.n, NULL);
309		/* the encap entry will be made valid on neigh update event
310		 * and not used before that.
311		 */
312		goto release_neigh;
313	}
314
315	memset(&reformat_params, 0, sizeof(reformat_params));
316	reformat_params.type = e->reformat_type;
317	reformat_params.size = e->encap_size;
318	reformat_params.data = e->encap_header;
319	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
320						     MLX5_FLOW_NAMESPACE_FDB);
321	if (IS_ERR(e->pkt_reformat)) {
322		err = PTR_ERR(e->pkt_reformat);
323		goto destroy_neigh_entry;
324	}
325
326	e->flags |= MLX5_ENCAP_ENTRY_VALID;
327	mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
328	mlx5e_route_lookup_ipv4_put(&attr);
329	return err;
330
331destroy_neigh_entry:
332	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
333free_encap:
334	kfree(encap_header);
335release_neigh:
336	mlx5e_route_lookup_ipv4_put(&attr);
337	return err;
338}
339
340int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
341				    struct net_device *mirred_dev,
342				    struct mlx5e_encap_entry *e)
343{
344	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
345	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
346	struct mlx5_pkt_reformat_params reformat_params;
347	TC_TUN_ROUTE_ATTR_INIT(attr);
348	int ipv4_encap_size;
349	char *encap_header;
350	struct iphdr *ip;
351	u8 nud_state;
352	int err;
353
354	/* add the IP fields */
355	attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
356	attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
357	attr.fl.fl4.saddr = tun_key->u.ipv4.src;
358	attr.ttl = tun_key->ttl;
359
360	err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
361	if (err)
362		return err;
363
364	ipv4_encap_size =
365		(is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
366		sizeof(struct iphdr) +
367		e->tunnel->calc_hlen(e);
368
369	if (max_encap_size < ipv4_encap_size) {
370		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
371			       ipv4_encap_size, max_encap_size);
372		err = -EOPNOTSUPP;
373		goto release_neigh;
374	}
375
376	encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
377	if (!encap_header) {
378		err = -ENOMEM;
379		goto release_neigh;
380	}
381
382	e->route_dev_ifindex = attr.route_dev->ifindex;
383
384	read_lock_bh(&attr.n->lock);
385	nud_state = attr.n->nud_state;
386	ether_addr_copy(e->h_dest, attr.n->ha);
387	WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
388	read_unlock_bh(&attr.n->lock);
389
390	/* add ethernet header */
391	ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
392					     ETH_P_IP);
393
394	/* add ip header */
395	ip->tos = tun_key->tos;
396	ip->version = 0x4;
397	ip->ihl = 0x5;
398	ip->ttl = attr.ttl;
399	ip->daddr = attr.fl.fl4.daddr;
400	ip->saddr = attr.fl.fl4.saddr;
401
402	/* add tunneling protocol header */
403	err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
404					 &ip->protocol, e);
405	if (err)
406		goto free_encap;
407
408	e->encap_size = ipv4_encap_size;
409	kfree(e->encap_header);
410	e->encap_header = encap_header;
411	encap_header = NULL;
412
413	if (!(nud_state & NUD_VALID)) {
414		neigh_event_send(attr.n, NULL);
415		/* the encap entry will be made valid on neigh update event
416		 * and not used before that.
417		 */
418		goto release_neigh;
419	}
420
421	memset(&reformat_params, 0, sizeof(reformat_params));
422	reformat_params.type = e->reformat_type;
423	reformat_params.size = e->encap_size;
424	reformat_params.data = e->encap_header;
425	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
426						     MLX5_FLOW_NAMESPACE_FDB);
427	if (IS_ERR(e->pkt_reformat)) {
428		err = PTR_ERR(e->pkt_reformat);
429		goto free_encap;
430	}
431
432	e->flags |= MLX5_ENCAP_ENTRY_VALID;
433	mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
434	mlx5e_route_lookup_ipv4_put(&attr);
435	return err;
436
437free_encap:
438	kfree(encap_header);
439release_neigh:
440	mlx5e_route_lookup_ipv4_put(&attr);
441	return err;
442}
443
444#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
445static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
446				       struct net_device *dev,
447				       struct mlx5e_tc_tun_route_attr *attr)
448{
449	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
450	struct net_device *route_dev;
451	struct net_device *out_dev;
452	struct dst_entry *dst;
453	struct neighbour *n;
454	int ret;
455
456	if (tunnel && tunnel->get_remote_ifindex)
457		attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
458	dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
459					      NULL);
460	if (IS_ERR(dst))
461		return PTR_ERR(dst);
462
463	if (!attr->ttl)
464		attr->ttl = ip6_dst_hoplimit(dst);
465
466	ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
467	if (ret < 0)
468		goto err_dst_release;
469
470	dev_hold(route_dev);
471	n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
472	if (!n) {
473		ret = -ENOMEM;
474		goto err_dev_release;
475	}
476
477	dst_release(dst);
478	attr->out_dev = out_dev;
479	attr->route_dev = route_dev;
480	attr->n = n;
481	return 0;
482
483err_dev_release:
484	dev_put(route_dev);
485err_dst_release:
486	dst_release(dst);
487	return ret;
488}
489
490static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
491{
492	mlx5e_tc_tun_route_attr_cleanup(attr);
493}
494
495int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
496				    struct net_device *mirred_dev,
497				    struct mlx5e_encap_entry *e)
498{
499	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
500	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
501	struct mlx5_pkt_reformat_params reformat_params;
502	struct mlx5e_neigh m_neigh = {};
503	TC_TUN_ROUTE_ATTR_INIT(attr);
504	struct ipv6hdr *ip6h;
505	int ipv6_encap_size;
506	char *encap_header;
507	u8 nud_state;
508	int err;
509
510	attr.ttl = tun_key->ttl;
511	attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
512	attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
513	attr.fl.fl6.saddr = tun_key->u.ipv6.src;
514
515	err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
516	if (err)
517		return err;
518
519	ipv6_encap_size =
520		(is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
521		sizeof(struct ipv6hdr) +
522		e->tunnel->calc_hlen(e);
523
524	if (max_encap_size < ipv6_encap_size) {
525		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
526			       ipv6_encap_size, max_encap_size);
527		err = -EOPNOTSUPP;
528		goto release_neigh;
529	}
530
531	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
532	if (!encap_header) {
533		err = -ENOMEM;
534		goto release_neigh;
535	}
536
537	m_neigh.family = attr.n->ops->family;
538	memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
539	e->out_dev = attr.out_dev;
540	e->route_dev_ifindex = attr.route_dev->ifindex;
541
542	/* It's important to add the neigh to the hash table before checking
543	 * the neigh validity state. So if we'll get a notification, in case the
544	 * neigh changes it's validity state, we would find the relevant neigh
545	 * in the hash.
546	 */
547	err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
548	if (err)
549		goto free_encap;
550
551	read_lock_bh(&attr.n->lock);
552	nud_state = attr.n->nud_state;
553	ether_addr_copy(e->h_dest, attr.n->ha);
554	read_unlock_bh(&attr.n->lock);
555
556	/* add ethernet header */
557	ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
558						 ETH_P_IPV6);
559
560	/* add ip header */
561	ip6_flow_hdr(ip6h, tun_key->tos, 0);
562	/* the HW fills up ipv6 payload len */
563	ip6h->hop_limit   = attr.ttl;
564	ip6h->daddr	  = attr.fl.fl6.daddr;
565	ip6h->saddr	  = attr.fl.fl6.saddr;
566
567	/* add tunneling protocol header */
568	err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
569					 &ip6h->nexthdr, e);
570	if (err)
571		goto destroy_neigh_entry;
572
573	e->encap_size = ipv6_encap_size;
574	e->encap_header = encap_header;
575	encap_header = NULL;
576
577	if (!(nud_state & NUD_VALID)) {
578		neigh_event_send(attr.n, NULL);
579		/* the encap entry will be made valid on neigh update event
580		 * and not used before that.
581		 */
582		goto release_neigh;
583	}
584
585	memset(&reformat_params, 0, sizeof(reformat_params));
586	reformat_params.type = e->reformat_type;
587	reformat_params.size = e->encap_size;
588	reformat_params.data = e->encap_header;
589	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
590						     MLX5_FLOW_NAMESPACE_FDB);
591	if (IS_ERR(e->pkt_reformat)) {
592		err = PTR_ERR(e->pkt_reformat);
593		goto destroy_neigh_entry;
594	}
595
596	e->flags |= MLX5_ENCAP_ENTRY_VALID;
597	mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
598	mlx5e_route_lookup_ipv6_put(&attr);
599	return err;
600
601destroy_neigh_entry:
602	mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
603free_encap:
604	kfree(encap_header);
605release_neigh:
606	mlx5e_route_lookup_ipv6_put(&attr);
607	return err;
608}
609
610int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
611				    struct net_device *mirred_dev,
612				    struct mlx5e_encap_entry *e)
613{
614	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
615	const struct ip_tunnel_key *tun_key = &e->tun_info->key;
616	struct mlx5_pkt_reformat_params reformat_params;
617	TC_TUN_ROUTE_ATTR_INIT(attr);
618	struct ipv6hdr *ip6h;
619	int ipv6_encap_size;
620	char *encap_header;
621	u8 nud_state;
622	int err;
623
624	attr.ttl = tun_key->ttl;
625
626	attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
627	attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
628	attr.fl.fl6.saddr = tun_key->u.ipv6.src;
629
630	err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
631	if (err)
632		return err;
633
634	ipv6_encap_size =
635		(is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
636		sizeof(struct ipv6hdr) +
637		e->tunnel->calc_hlen(e);
638
639	if (max_encap_size < ipv6_encap_size) {
640		mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
641			       ipv6_encap_size, max_encap_size);
642		err = -EOPNOTSUPP;
643		goto release_neigh;
644	}
645
646	encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
647	if (!encap_header) {
648		err = -ENOMEM;
649		goto release_neigh;
650	}
651
652	e->route_dev_ifindex = attr.route_dev->ifindex;
653
654	read_lock_bh(&attr.n->lock);
655	nud_state = attr.n->nud_state;
656	ether_addr_copy(e->h_dest, attr.n->ha);
657	WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
658	read_unlock_bh(&attr.n->lock);
659
660	/* add ethernet header */
661	ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
662						 ETH_P_IPV6);
663
664	/* add ip header */
665	ip6_flow_hdr(ip6h, tun_key->tos, 0);
666	/* the HW fills up ipv6 payload len */
667	ip6h->hop_limit   = attr.ttl;
668	ip6h->daddr	  = attr.fl.fl6.daddr;
669	ip6h->saddr	  = attr.fl.fl6.saddr;
670
671	/* add tunneling protocol header */
672	err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
673					 &ip6h->nexthdr, e);
674	if (err)
675		goto free_encap;
676
677	e->encap_size = ipv6_encap_size;
678	kfree(e->encap_header);
679	e->encap_header = encap_header;
680	encap_header = NULL;
681
682	if (!(nud_state & NUD_VALID)) {
683		neigh_event_send(attr.n, NULL);
684		/* the encap entry will be made valid on neigh update event
685		 * and not used before that.
686		 */
687		goto release_neigh;
688	}
689
690	memset(&reformat_params, 0, sizeof(reformat_params));
691	reformat_params.type = e->reformat_type;
692	reformat_params.size = e->encap_size;
693	reformat_params.data = e->encap_header;
694	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
695						     MLX5_FLOW_NAMESPACE_FDB);
696	if (IS_ERR(e->pkt_reformat)) {
697		err = PTR_ERR(e->pkt_reformat);
698		goto free_encap;
699	}
700
701	e->flags |= MLX5_ENCAP_ENTRY_VALID;
702	mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
703	mlx5e_route_lookup_ipv6_put(&attr);
704	return err;
705
706free_encap:
707	kfree(encap_header);
708release_neigh:
709	mlx5e_route_lookup_ipv6_put(&attr);
710	return err;
711}
712#endif
713
714int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
715			      struct mlx5_flow_spec *spec,
716			      struct mlx5_flow_attr *flow_attr,
717			      struct net_device *filter_dev)
718{
719	struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
720	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
721	struct mlx5e_tc_int_port *int_port;
722	TC_TUN_ROUTE_ATTR_INIT(attr);
723	u16 vport_num;
724	int err = 0;
725
726	if (flow_attr->tun_ip_version == 4) {
727		/* Addresses are swapped for decap */
728		attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
729		attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
730		err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
731	}
732#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
733	else if (flow_attr->tun_ip_version == 6) {
734		/* Addresses are swapped for decap */
735		attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
736		attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
737		err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
738	}
739#endif
740	else
741		return 0;
742
743	if (err)
744		return err;
745
746	if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
747	    mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) {
748		err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
749		if (err)
750			goto out;
751
752		esw_attr->rx_tun_attr->decap_vport = vport_num;
753	} else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
754		int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
755						 attr.route_dev->ifindex,
756						 MLX5E_TC_INT_PORT_INGRESS);
757		if (IS_ERR(int_port)) {
758			err = PTR_ERR(int_port);
759			goto out;
760		}
761		esw_attr->int_port = int_port;
762	}
763
764out:
765	if (flow_attr->tun_ip_version == 4)
766		mlx5e_route_lookup_ipv4_put(&attr);
767#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
768	else if (flow_attr->tun_ip_version == 6)
769		mlx5e_route_lookup_ipv6_put(&attr);
770#endif
771	return err;
772}
773
774bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
775				    struct net_device *netdev)
776{
777	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
778
779	if (tunnel && tunnel->can_offload(priv))
780		return true;
781	else
782		return false;
783}
784
785int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
786				 struct mlx5e_priv *priv,
787				 struct mlx5e_encap_entry *e,
788				 struct netlink_ext_ack *extack)
789{
790	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
791
792	if (!tunnel) {
793		e->reformat_type = -1;
794		return -EOPNOTSUPP;
795	}
796
797	return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
798}
799
800int mlx5e_tc_tun_parse(struct net_device *filter_dev,
801		       struct mlx5e_priv *priv,
802		       struct mlx5_flow_spec *spec,
803		       struct flow_cls_offload *f,
804		       u8 *match_level)
805{
806	struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
807	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
808	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
809				       outer_headers);
810	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
811				       outer_headers);
812	struct netlink_ext_ack *extack = f->common.extack;
813	int err = 0;
814
815	if (!tunnel) {
816		netdev_warn(priv->netdev,
817			    "decapsulation offload is not supported for %s net device\n",
818			    mlx5e_netdev_kind(filter_dev));
819		err = -EOPNOTSUPP;
820		goto out;
821	}
822
823	*match_level = tunnel->match_level;
824
825	if (tunnel->parse_udp_ports) {
826		err = tunnel->parse_udp_ports(priv, spec, f,
827					      headers_c, headers_v);
828		if (err)
829			goto out;
830	}
831
832	if (tunnel->parse_tunnel) {
833		err = tunnel->parse_tunnel(priv, spec, f,
834					   headers_c, headers_v);
835		if (err)
836			goto out;
837	}
838
839	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
840		struct flow_dissector_key_basic key_basic = {};
841		struct flow_dissector_key_basic mask_basic = {
842			.n_proto = htons(0xFFFF),
843		};
844		struct flow_match_basic match_basic = {
845			.key = &key_basic, .mask = &mask_basic,
846		};
847		struct flow_match_control match;
848		u16 addr_type;
849
850		flow_rule_match_enc_control(rule, &match);
851		addr_type = match.key->addr_type;
852
853		/* For tunnel addr_type used same key id`s as for non-tunnel */
854		if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
855			struct flow_match_ipv4_addrs match;
856
857			flow_rule_match_enc_ipv4_addrs(rule, &match);
858			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
859				 src_ipv4_src_ipv6.ipv4_layout.ipv4,
860				 ntohl(match.mask->src));
861			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
862				 src_ipv4_src_ipv6.ipv4_layout.ipv4,
863				 ntohl(match.key->src));
864
865			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
866				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
867				 ntohl(match.mask->dst));
868			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
869				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
870				 ntohl(match.key->dst));
871
872			key_basic.n_proto = htons(ETH_P_IP);
873			mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
874					       headers_c, headers_v);
875		} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
876			struct flow_match_ipv6_addrs match;
877
878			flow_rule_match_enc_ipv6_addrs(rule, &match);
879			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
880					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
881			       &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
882								   ipv6));
883			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
884					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
885			       &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
886								  ipv6));
887
888			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
889					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
890			       &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
891								   ipv6));
892			memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
893					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
894			       &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
895								  ipv6));
896
897			key_basic.n_proto = htons(ETH_P_IPV6);
898			mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
899					       headers_c, headers_v);
900		}
901	}
902
903	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
904		struct flow_match_ip match;
905
906		flow_rule_match_enc_ip(rule, &match);
907		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
908			 match.mask->tos & 0x3);
909		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
910			 match.key->tos & 0x3);
911
912		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
913			 match.mask->tos >> 2);
914		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
915			 match.key->tos  >> 2);
916
917		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
918			 match.mask->ttl);
919		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
920			 match.key->ttl);
921
922		if (match.mask->ttl &&
923		    !MLX5_CAP_ESW_FLOWTABLE_FDB
924			(priv->mdev,
925			 ft_field_support.outer_ipv4_ttl)) {
926			NL_SET_ERR_MSG_MOD(extack,
927					   "Matching on TTL is not supported");
928			err = -EOPNOTSUPP;
929			goto out;
930		}
931	}
932
933	/* let software handle IP fragments */
934	MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
935	MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
936
937	return 0;
938
939out:
940	return err;
941}
942
943int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
944				 struct mlx5_flow_spec *spec,
945				 struct flow_cls_offload *f,
946				 void *headers_c,
947				 void *headers_v)
948{
949	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
950	struct netlink_ext_ack *extack = f->common.extack;
951	struct flow_match_ports enc_ports;
952
953	/* Full udp dst port must be given */
954
955	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
956		NL_SET_ERR_MSG_MOD(extack,
957				   "UDP tunnel decap filter must include enc_dst_port condition");
958		netdev_warn(priv->netdev,
959			    "UDP tunnel decap filter must include enc_dst_port condition\n");
960		return -EOPNOTSUPP;
961	}
962
963	flow_rule_match_enc_ports(rule, &enc_ports);
964
965	if (memchr_inv(&enc_ports.mask->dst, 0xff,
966		       sizeof(enc_ports.mask->dst))) {
967		NL_SET_ERR_MSG_MOD(extack,
968				   "UDP tunnel decap filter must match enc_dst_port fully");
969		netdev_warn(priv->netdev,
970			    "UDP tunnel decap filter must match enc_dst_port fully\n");
971		return -EOPNOTSUPP;
972	}
973
974	/* match on UDP protocol and dst port number */
975
976	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
977	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
978
979	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
980		 ntohs(enc_ports.mask->dst));
981	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
982		 ntohs(enc_ports.key->dst));
983
984	/* UDP src port on outer header is generated by HW,
985	 * so it is probably a bad idea to request matching it.
986	 * Nonetheless, it is allowed.
987	 */
988
989	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
990		 ntohs(enc_ports.mask->src));
991	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
992		 ntohs(enc_ports.key->src));
993
994	return 0;
995}
996