1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4#include <linux/mlx5/fs.h>
5#include "en/mapping.h"
6#include "en/tc/int_port.h"
7#include "en.h"
8#include "en_rep.h"
9#include "en_tc.h"
10
11struct mlx5e_tc_int_port {
12	enum mlx5e_tc_int_port_type type;
13	int ifindex;
14	u32 match_metadata;
15	u32 mapping;
16	struct list_head list;
17	struct mlx5_flow_handle *rx_rule;
18	refcount_t refcnt;
19	struct rcu_head rcu_head;
20};
21
22struct mlx5e_tc_int_port_priv {
23	struct mlx5_core_dev *dev;
24	struct mutex int_ports_lock; /* Protects int ports list */
25	struct list_head int_ports; /* Uses int_ports_lock */
26	u16 num_ports;
27	bool ul_rep_rx_ready; /* Set when uplink is performing teardown */
28	struct mapping_ctx *metadata_mapping; /* Metadata for source port rewrite and matching */
29};
30
31bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
32{
33	return mlx5_eswitch_vport_match_metadata_enabled(esw) &&
34	       MLX5_CAP_GEN(esw->dev, reg_c_preserve);
35}
36
37u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port)
38{
39	return int_port->match_metadata;
40}
41
42int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
43{
44	/* For egress forwarding we can have the case
45	 * where the packet came from a vport and redirected
46	 * to int port or it came from the uplink, going
47	 * via internal port and hairpinned back to uplink
48	 * so we set the source to any port in this case.
49	 */
50	return int_port->type == MLX5E_TC_INT_PORT_EGRESS ?
51		MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT :
52		MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
53}
54
55u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
56{
57	return int_port->match_metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
58}
59
60static struct mlx5_flow_handle *
61mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw,
62			      struct mlx5e_tc_int_port *int_port,
63			      struct mlx5_flow_destination *dest)
64
65{
66	struct mlx5_flow_context *flow_context;
67	struct mlx5_flow_act flow_act = {};
68	struct mlx5_flow_handle *flow_rule;
69	struct mlx5_flow_spec *spec;
70	void *misc;
71
72	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
73	if (!spec)
74		return ERR_PTR(-ENOMEM);
75
76	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
77	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
78		 mlx5e_tc_int_port_get_metadata_for_match(int_port));
79
80	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
81	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
82		 mlx5_eswitch_get_vport_metadata_mask());
83
84	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
85
86	/* Overwrite flow tag with the int port metadata mapping
87	 * instead of the chain mapping.
88	 */
89	flow_context = &spec->flow_context;
90	flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
91	flow_context->flow_tag = int_port->mapping;
92	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
93	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
94					&flow_act, dest, 1);
95	if (IS_ERR(flow_rule))
96		mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n",
97			       PTR_ERR(flow_rule));
98
99	kvfree(spec);
100
101	return flow_rule;
102}
103
104static struct mlx5e_tc_int_port *
105mlx5e_int_port_lookup(struct mlx5e_tc_int_port_priv *priv,
106		      int ifindex,
107		      enum mlx5e_tc_int_port_type type)
108{
109	struct mlx5e_tc_int_port *int_port;
110
111	if (!priv->ul_rep_rx_ready)
112		goto not_found;
113
114	list_for_each_entry(int_port, &priv->int_ports, list)
115		if (int_port->ifindex == ifindex && int_port->type == type) {
116			refcount_inc(&int_port->refcnt);
117			return int_port;
118		}
119
120not_found:
121	return NULL;
122}
123
124static int mlx5e_int_port_metadata_alloc(struct mlx5e_tc_int_port_priv *priv,
125					 int ifindex, enum mlx5e_tc_int_port_type type,
126					 u32 *id)
127{
128	u32 mapped_key[2] = {type, ifindex};
129	int err;
130
131	err = mapping_add(priv->metadata_mapping, mapped_key, id);
132	if (err)
133		return err;
134
135	/* Fill upper 4 bits of PFNUM with reserved value */
136	*id |= 0xf << ESW_VPORT_BITS;
137
138	return 0;
139}
140
141static void mlx5e_int_port_metadata_free(struct mlx5e_tc_int_port_priv *priv,
142					 u32 id)
143{
144	id &= (1 << ESW_VPORT_BITS) - 1;
145	mapping_remove(priv->metadata_mapping, id);
146}
147
148/* Must be called with priv->int_ports_lock held */
149static struct mlx5e_tc_int_port *
150mlx5e_int_port_add(struct mlx5e_tc_int_port_priv *priv,
151		   int ifindex,
152		   enum mlx5e_tc_int_port_type type)
153{
154	struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
155	struct mlx5_mapped_obj mapped_obj = {};
156	struct mlx5e_rep_priv *uplink_rpriv;
157	struct mlx5e_tc_int_port *int_port;
158	struct mlx5_flow_destination dest;
159	struct mapping_ctx *ctx;
160	u32 match_metadata;
161	u32 mapping;
162	int err;
163
164	if (priv->num_ports == MLX5E_TC_MAX_INT_PORT_NUM) {
165		mlx5_core_dbg(priv->dev, "Cannot add a new int port, max supported %d",
166			      MLX5E_TC_MAX_INT_PORT_NUM);
167		return ERR_PTR(-ENOSPC);
168	}
169
170	int_port = kzalloc(sizeof(*int_port), GFP_KERNEL);
171	if (!int_port)
172		return ERR_PTR(-ENOMEM);
173
174	err = mlx5e_int_port_metadata_alloc(priv, ifindex, type, &match_metadata);
175	if (err) {
176		mlx5_core_warn(esw->dev, "Cannot add a new internal port, metadata allocation failed for ifindex %d",
177			       ifindex);
178		goto err_metadata;
179	}
180
181	/* map metadata to reg_c0 object for miss handling */
182	ctx = esw->offloads.reg_c0_obj_pool;
183	mapped_obj.type = MLX5_MAPPED_OBJ_INT_PORT_METADATA;
184	mapped_obj.int_port_metadata = match_metadata;
185	err = mapping_add(ctx, &mapped_obj, &mapping);
186	if (err)
187		goto err_map;
188
189	int_port->type = type;
190	int_port->ifindex = ifindex;
191	int_port->match_metadata = match_metadata;
192	int_port->mapping = mapping;
193
194	/* Create a match on internal vport metadata in vport table */
195	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
196
197	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
198	dest.ft = uplink_rpriv->root_ft;
199
200	int_port->rx_rule = mlx5e_int_port_create_rx_rule(esw, int_port, &dest);
201	if (IS_ERR(int_port->rx_rule)) {
202		err = PTR_ERR(int_port->rx_rule);
203		mlx5_core_warn(esw->dev, "Can't add internal port rx rule, err %d", err);
204		goto err_rx_rule;
205	}
206
207	refcount_set(&int_port->refcnt, 1);
208	list_add_rcu(&int_port->list, &priv->int_ports);
209	priv->num_ports++;
210
211	return int_port;
212
213err_rx_rule:
214	mapping_remove(ctx, int_port->mapping);
215
216err_map:
217	mlx5e_int_port_metadata_free(priv, match_metadata);
218
219err_metadata:
220	kfree(int_port);
221
222	return ERR_PTR(err);
223}
224
225/* Must be called with priv->int_ports_lock held */
226static void
227mlx5e_int_port_remove(struct mlx5e_tc_int_port_priv *priv,
228		      struct mlx5e_tc_int_port *int_port)
229{
230	struct mlx5_eswitch *esw = priv->dev->priv.eswitch;
231	struct mapping_ctx *ctx;
232
233	ctx = esw->offloads.reg_c0_obj_pool;
234
235	list_del_rcu(&int_port->list);
236
237	/* The following parameters are not used by the
238	 * rcu readers of this int_port object so it is
239	 * safe to release them.
240	 */
241	if (int_port->rx_rule)
242		mlx5_del_flow_rules(int_port->rx_rule);
243	mapping_remove(ctx, int_port->mapping);
244	mlx5e_int_port_metadata_free(priv, int_port->match_metadata);
245	kfree_rcu_mightsleep(int_port);
246	priv->num_ports--;
247}
248
249/* Must be called with rcu_read_lock held */
250static struct mlx5e_tc_int_port *
251mlx5e_int_port_get_from_metadata(struct mlx5e_tc_int_port_priv *priv,
252				 u32 metadata)
253{
254	struct mlx5e_tc_int_port *int_port;
255
256	list_for_each_entry_rcu(int_port, &priv->int_ports, list)
257		if (int_port->match_metadata == metadata)
258			return int_port;
259
260	return NULL;
261}
262
263struct mlx5e_tc_int_port *
264mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
265		      int ifindex,
266		      enum mlx5e_tc_int_port_type type)
267{
268	struct mlx5e_tc_int_port *int_port;
269
270	if (!priv)
271		return ERR_PTR(-EOPNOTSUPP);
272
273	mutex_lock(&priv->int_ports_lock);
274
275	/* Reject request if ul rep not ready */
276	if (!priv->ul_rep_rx_ready) {
277		int_port = ERR_PTR(-EOPNOTSUPP);
278		goto done;
279	}
280
281	int_port = mlx5e_int_port_lookup(priv, ifindex, type);
282	if (int_port)
283		goto done;
284
285	/* Alloc and add new int port to list */
286	int_port = mlx5e_int_port_add(priv, ifindex, type);
287
288done:
289	mutex_unlock(&priv->int_ports_lock);
290
291	return int_port;
292}
293
294void
295mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
296		      struct mlx5e_tc_int_port *int_port)
297{
298	if (!refcount_dec_and_mutex_lock(&int_port->refcnt, &priv->int_ports_lock))
299		return;
300
301	mlx5e_int_port_remove(priv, int_port);
302	mutex_unlock(&priv->int_ports_lock);
303}
304
305struct mlx5e_tc_int_port_priv *
306mlx5e_tc_int_port_init(struct mlx5e_priv *priv)
307{
308	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
309	struct mlx5e_tc_int_port_priv *int_port_priv;
310	u64 mapping_id;
311
312	if (!mlx5e_tc_int_port_supported(esw))
313		return NULL;
314
315	int_port_priv = kzalloc(sizeof(*int_port_priv), GFP_KERNEL);
316	if (!int_port_priv)
317		return NULL;
318
319	mapping_id = mlx5_query_nic_system_image_guid(priv->mdev);
320
321	int_port_priv->metadata_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_INT_PORT,
322								sizeof(u32) * 2,
323								(1 << ESW_VPORT_BITS) - 1, true);
324	if (IS_ERR(int_port_priv->metadata_mapping)) {
325		mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n",
326			       PTR_ERR(int_port_priv->metadata_mapping));
327		goto err_mapping;
328	}
329
330	int_port_priv->dev = priv->mdev;
331	mutex_init(&int_port_priv->int_ports_lock);
332	INIT_LIST_HEAD(&int_port_priv->int_ports);
333
334	return int_port_priv;
335
336err_mapping:
337	kfree(int_port_priv);
338
339	return NULL;
340}
341
342void
343mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv)
344{
345	if (!priv)
346		return;
347
348	mutex_destroy(&priv->int_ports_lock);
349	mapping_destroy(priv->metadata_mapping);
350	kfree(priv);
351}
352
353/* Int port rx rules reside in ul rep rx tables.
354 * It is possible the ul rep will go down while there are
355 * still int port rules in its rx table so proper cleanup
356 * is required to free resources.
357 */
358void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv)
359{
360	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
361	struct mlx5_rep_uplink_priv *uplink_priv;
362	struct mlx5e_tc_int_port_priv *ppriv;
363	struct mlx5e_rep_priv *uplink_rpriv;
364
365	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
366	uplink_priv = &uplink_rpriv->uplink_priv;
367
368	ppriv = uplink_priv->int_port_priv;
369
370	if (!ppriv)
371		return;
372
373	mutex_lock(&ppriv->int_ports_lock);
374	ppriv->ul_rep_rx_ready = true;
375	mutex_unlock(&ppriv->int_ports_lock);
376}
377
378void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv)
379{
380	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
381	struct mlx5_rep_uplink_priv *uplink_priv;
382	struct mlx5e_tc_int_port_priv *ppriv;
383	struct mlx5e_rep_priv *uplink_rpriv;
384	struct mlx5e_tc_int_port *int_port;
385
386	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
387	uplink_priv = &uplink_rpriv->uplink_priv;
388
389	ppriv = uplink_priv->int_port_priv;
390
391	if (!ppriv)
392		return;
393
394	mutex_lock(&ppriv->int_ports_lock);
395
396	ppriv->ul_rep_rx_ready = false;
397
398	list_for_each_entry(int_port, &ppriv->int_ports, list) {
399		if (!IS_ERR_OR_NULL(int_port->rx_rule))
400			mlx5_del_flow_rules(int_port->rx_rule);
401
402		int_port->rx_rule = NULL;
403	}
404
405	mutex_unlock(&ppriv->int_ports_lock);
406}
407
408bool
409mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
410			  struct sk_buff *skb, u32 int_vport_metadata,
411			  bool *forward_tx)
412{
413	enum mlx5e_tc_int_port_type fwd_type;
414	struct mlx5e_tc_int_port *int_port;
415	struct net_device *dev;
416	int ifindex;
417
418	if (!priv)
419		return false;
420
421	rcu_read_lock();
422	int_port = mlx5e_int_port_get_from_metadata(priv, int_vport_metadata);
423	if (!int_port) {
424		rcu_read_unlock();
425		mlx5_core_dbg(priv->dev, "Unable to find int port with metadata 0x%.8x\n",
426			      int_vport_metadata);
427		return false;
428	}
429
430	ifindex = int_port->ifindex;
431	fwd_type = int_port->type;
432	rcu_read_unlock();
433
434	dev = dev_get_by_index(&init_net, ifindex);
435	if (!dev) {
436		mlx5_core_dbg(priv->dev,
437			      "Couldn't find internal port device with ifindex: %d\n",
438			      ifindex);
439		return false;
440	}
441
442	skb->skb_iif = dev->ifindex;
443	skb->dev = dev;
444
445	if (fwd_type == MLX5E_TC_INT_PORT_INGRESS) {
446		skb->pkt_type = PACKET_HOST;
447		skb_set_redirected(skb, true);
448		*forward_tx = false;
449	} else {
450		skb_reset_network_header(skb);
451		skb_push_rcsum(skb, skb->mac_len);
452		skb_set_redirected(skb, false);
453		*forward_tx = true;
454	}
455
456	return true;
457}
458