1// SPDX-License-Identifier: GPL-2.0-or-later
2/* drivers/net/ifb.c:
3
4	The purpose of this driver is to provide a device that allows
5	for sharing of resources:
6
7	1) qdiscs/policies that are per device as opposed to system wide.
8	ifb allows for a device which can be redirected to thus providing
9	an impression of sharing.
10
11	2) Allows for queueing incoming traffic for shaping instead of
12	dropping.
13
14	The original concept is based on what is known as the IMQ
15	driver initially written by Martin Devera, later rewritten
16	by Patrick McHardy and then maintained by Andre Correa.
17
18	You need the tc action  mirror or redirect to feed this device
19	packets.
20
21
22	Authors:	Jamal Hadi Salim (2005)
23
24*/
25
26
27#include <linux/module.h>
28#include <linux/kernel.h>
29#include <linux/netdevice.h>
30#include <linux/ethtool.h>
31#include <linux/etherdevice.h>
32#include <linux/init.h>
33#include <linux/interrupt.h>
34#include <linux/moduleparam.h>
35#include <linux/netfilter_netdev.h>
36#include <net/pkt_sched.h>
37#include <net/net_namespace.h>
38
39#define TX_Q_LIMIT    32
40
41struct ifb_q_stats {
42	u64 packets;
43	u64 bytes;
44	struct u64_stats_sync	sync;
45};
46
47struct ifb_q_private {
48	struct net_device	*dev;
49	struct tasklet_struct   ifb_tasklet;
50	int			tasklet_pending;
51	int			txqnum;
52	struct sk_buff_head     rq;
53	struct sk_buff_head     tq;
54	struct ifb_q_stats	rx_stats;
55	struct ifb_q_stats	tx_stats;
56} ____cacheline_aligned_in_smp;
57
58struct ifb_dev_private {
59	struct ifb_q_private *tx_private;
60};
61
62/* For ethtools stats. */
63struct ifb_q_stats_desc {
64	char	desc[ETH_GSTRING_LEN];
65	size_t	offset;
66};
67
68#define IFB_Q_STAT(m)	offsetof(struct ifb_q_stats, m)
69
70static const struct ifb_q_stats_desc ifb_q_stats_desc[] = {
71	{ "packets",	IFB_Q_STAT(packets) },
72	{ "bytes",	IFB_Q_STAT(bytes) },
73};
74
75#define IFB_Q_STATS_LEN	ARRAY_SIZE(ifb_q_stats_desc)
76
77static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
78static int ifb_open(struct net_device *dev);
79static int ifb_close(struct net_device *dev);
80
81static void ifb_update_q_stats(struct ifb_q_stats *stats, int len)
82{
83	u64_stats_update_begin(&stats->sync);
84	stats->packets++;
85	stats->bytes += len;
86	u64_stats_update_end(&stats->sync);
87}
88
89static void ifb_ri_tasklet(struct tasklet_struct *t)
90{
91	struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet);
92	struct netdev_queue *txq;
93	struct sk_buff *skb;
94
95	txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
96	skb = skb_peek(&txp->tq);
97	if (!skb) {
98		if (!__netif_tx_trylock(txq))
99			goto resched;
100		skb_queue_splice_tail_init(&txp->rq, &txp->tq);
101		__netif_tx_unlock(txq);
102	}
103
104	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
105		/* Skip tc and netfilter to prevent redirection loop. */
106		skb->redirected = 0;
107#ifdef CONFIG_NET_CLS_ACT
108		skb->tc_skip_classify = 1;
109#endif
110		nf_skip_egress(skb, true);
111
112		ifb_update_q_stats(&txp->tx_stats, skb->len);
113
114		rcu_read_lock();
115		skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
116		if (!skb->dev) {
117			rcu_read_unlock();
118			dev_kfree_skb(skb);
119			txp->dev->stats.tx_dropped++;
120			if (skb_queue_len(&txp->tq) != 0)
121				goto resched;
122			break;
123		}
124		rcu_read_unlock();
125		skb->skb_iif = txp->dev->ifindex;
126
127		if (!skb->from_ingress) {
128			dev_queue_xmit(skb);
129		} else {
130			skb_pull_rcsum(skb, skb->mac_len);
131			netif_receive_skb(skb);
132		}
133	}
134
135	if (__netif_tx_trylock(txq)) {
136		skb = skb_peek(&txp->rq);
137		if (!skb) {
138			txp->tasklet_pending = 0;
139			if (netif_tx_queue_stopped(txq))
140				netif_tx_wake_queue(txq);
141		} else {
142			__netif_tx_unlock(txq);
143			goto resched;
144		}
145		__netif_tx_unlock(txq);
146	} else {
147resched:
148		txp->tasklet_pending = 1;
149		tasklet_schedule(&txp->ifb_tasklet);
150	}
151
152}
153
154static void ifb_stats64(struct net_device *dev,
155			struct rtnl_link_stats64 *stats)
156{
157	struct ifb_dev_private *dp = netdev_priv(dev);
158	struct ifb_q_private *txp = dp->tx_private;
159	unsigned int start;
160	u64 packets, bytes;
161	int i;
162
163	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
164		do {
165			start = u64_stats_fetch_begin(&txp->rx_stats.sync);
166			packets = txp->rx_stats.packets;
167			bytes = txp->rx_stats.bytes;
168		} while (u64_stats_fetch_retry(&txp->rx_stats.sync, start));
169		stats->rx_packets += packets;
170		stats->rx_bytes += bytes;
171
172		do {
173			start = u64_stats_fetch_begin(&txp->tx_stats.sync);
174			packets = txp->tx_stats.packets;
175			bytes = txp->tx_stats.bytes;
176		} while (u64_stats_fetch_retry(&txp->tx_stats.sync, start));
177		stats->tx_packets += packets;
178		stats->tx_bytes += bytes;
179	}
180	stats->rx_dropped = dev->stats.rx_dropped;
181	stats->tx_dropped = dev->stats.tx_dropped;
182}
183
184static int ifb_dev_init(struct net_device *dev)
185{
186	struct ifb_dev_private *dp = netdev_priv(dev);
187	struct ifb_q_private *txp;
188	int i;
189
190	txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
191	if (!txp)
192		return -ENOMEM;
193	dp->tx_private = txp;
194	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
195		txp->txqnum = i;
196		txp->dev = dev;
197		__skb_queue_head_init(&txp->rq);
198		__skb_queue_head_init(&txp->tq);
199		u64_stats_init(&txp->rx_stats.sync);
200		u64_stats_init(&txp->tx_stats.sync);
201		tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet);
202		netif_tx_start_queue(netdev_get_tx_queue(dev, i));
203	}
204	return 0;
205}
206
207static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
208{
209	u8 *p = buf;
210	int i, j;
211
212	switch (stringset) {
213	case ETH_SS_STATS:
214		for (i = 0; i < dev->real_num_rx_queues; i++)
215			for (j = 0; j < IFB_Q_STATS_LEN; j++)
216				ethtool_sprintf(&p, "rx_queue_%u_%.18s",
217						i, ifb_q_stats_desc[j].desc);
218
219		for (i = 0; i < dev->real_num_tx_queues; i++)
220			for (j = 0; j < IFB_Q_STATS_LEN; j++)
221				ethtool_sprintf(&p, "tx_queue_%u_%.18s",
222						i, ifb_q_stats_desc[j].desc);
223
224		break;
225	}
226}
227
228static int ifb_get_sset_count(struct net_device *dev, int sset)
229{
230	switch (sset) {
231	case ETH_SS_STATS:
232		return IFB_Q_STATS_LEN * (dev->real_num_rx_queues +
233					  dev->real_num_tx_queues);
234	default:
235		return -EOPNOTSUPP;
236	}
237}
238
239static void ifb_fill_stats_data(u64 **data,
240				struct ifb_q_stats *q_stats)
241{
242	void *stats_base = (void *)q_stats;
243	unsigned int start;
244	size_t offset;
245	int j;
246
247	do {
248		start = u64_stats_fetch_begin(&q_stats->sync);
249		for (j = 0; j < IFB_Q_STATS_LEN; j++) {
250			offset = ifb_q_stats_desc[j].offset;
251			(*data)[j] = *(u64 *)(stats_base + offset);
252		}
253	} while (u64_stats_fetch_retry(&q_stats->sync, start));
254
255	*data += IFB_Q_STATS_LEN;
256}
257
258static void ifb_get_ethtool_stats(struct net_device *dev,
259				  struct ethtool_stats *stats, u64 *data)
260{
261	struct ifb_dev_private *dp = netdev_priv(dev);
262	struct ifb_q_private *txp;
263	int i;
264
265	for (i = 0; i < dev->real_num_rx_queues; i++) {
266		txp = dp->tx_private + i;
267		ifb_fill_stats_data(&data, &txp->rx_stats);
268	}
269
270	for (i = 0; i < dev->real_num_tx_queues; i++) {
271		txp = dp->tx_private + i;
272		ifb_fill_stats_data(&data, &txp->tx_stats);
273	}
274}
275
276static const struct net_device_ops ifb_netdev_ops = {
277	.ndo_open	= ifb_open,
278	.ndo_stop	= ifb_close,
279	.ndo_get_stats64 = ifb_stats64,
280	.ndo_start_xmit	= ifb_xmit,
281	.ndo_validate_addr = eth_validate_addr,
282	.ndo_init	= ifb_dev_init,
283};
284
285static const struct ethtool_ops ifb_ethtool_ops = {
286	.get_strings		= ifb_get_strings,
287	.get_sset_count		= ifb_get_sset_count,
288	.get_ethtool_stats	= ifb_get_ethtool_stats,
289};
290
291#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
292		      NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL	| \
293		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX		| \
294		      NETIF_F_HW_VLAN_STAG_TX)
295
296static void ifb_dev_free(struct net_device *dev)
297{
298	struct ifb_dev_private *dp = netdev_priv(dev);
299	struct ifb_q_private *txp = dp->tx_private;
300	int i;
301
302	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
303		tasklet_kill(&txp->ifb_tasklet);
304		__skb_queue_purge(&txp->rq);
305		__skb_queue_purge(&txp->tq);
306	}
307	kfree(dp->tx_private);
308}
309
310static void ifb_setup(struct net_device *dev)
311{
312	/* Initialize the device structure. */
313	dev->netdev_ops = &ifb_netdev_ops;
314	dev->ethtool_ops = &ifb_ethtool_ops;
315
316	/* Fill in device structure with ethernet-generic values. */
317	ether_setup(dev);
318	dev->tx_queue_len = TX_Q_LIMIT;
319
320	dev->features |= IFB_FEATURES;
321	dev->hw_features |= dev->features;
322	dev->hw_enc_features |= dev->features;
323	dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
324					       NETIF_F_HW_VLAN_STAG_TX);
325
326	dev->flags |= IFF_NOARP;
327	dev->flags &= ~IFF_MULTICAST;
328	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
329	netif_keep_dst(dev);
330	eth_hw_addr_random(dev);
331	dev->needs_free_netdev = true;
332	dev->priv_destructor = ifb_dev_free;
333
334	dev->min_mtu = 0;
335	dev->max_mtu = 0;
336}
337
338static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
339{
340	struct ifb_dev_private *dp = netdev_priv(dev);
341	struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
342
343	ifb_update_q_stats(&txp->rx_stats, skb->len);
344
345	if (!skb->redirected || !skb->skb_iif) {
346		dev_kfree_skb(skb);
347		dev->stats.rx_dropped++;
348		return NETDEV_TX_OK;
349	}
350
351	if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
352		netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
353
354	__skb_queue_tail(&txp->rq, skb);
355	if (!txp->tasklet_pending) {
356		txp->tasklet_pending = 1;
357		tasklet_schedule(&txp->ifb_tasklet);
358	}
359
360	return NETDEV_TX_OK;
361}
362
363static int ifb_close(struct net_device *dev)
364{
365	netif_tx_stop_all_queues(dev);
366	return 0;
367}
368
369static int ifb_open(struct net_device *dev)
370{
371	netif_tx_start_all_queues(dev);
372	return 0;
373}
374
375static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
376			struct netlink_ext_ack *extack)
377{
378	if (tb[IFLA_ADDRESS]) {
379		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
380			return -EINVAL;
381		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
382			return -EADDRNOTAVAIL;
383	}
384	return 0;
385}
386
387static struct rtnl_link_ops ifb_link_ops __read_mostly = {
388	.kind		= "ifb",
389	.priv_size	= sizeof(struct ifb_dev_private),
390	.setup		= ifb_setup,
391	.validate	= ifb_validate,
392};
393
394/* Number of ifb devices to be set up by this module.
395 * Note that these legacy devices have one queue.
396 * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
397 */
398static int numifbs = 2;
399module_param(numifbs, int, 0);
400MODULE_PARM_DESC(numifbs, "Number of ifb devices");
401
402static int __init ifb_init_one(int index)
403{
404	struct net_device *dev_ifb;
405	int err;
406
407	dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
408			       NET_NAME_UNKNOWN, ifb_setup);
409
410	if (!dev_ifb)
411		return -ENOMEM;
412
413	dev_ifb->rtnl_link_ops = &ifb_link_ops;
414	err = register_netdevice(dev_ifb);
415	if (err < 0)
416		goto err;
417
418	return 0;
419
420err:
421	free_netdev(dev_ifb);
422	return err;
423}
424
425static int __init ifb_init_module(void)
426{
427	int i, err;
428
429	down_write(&pernet_ops_rwsem);
430	rtnl_lock();
431	err = __rtnl_link_register(&ifb_link_ops);
432	if (err < 0)
433		goto out;
434
435	for (i = 0; i < numifbs && !err; i++) {
436		err = ifb_init_one(i);
437		cond_resched();
438	}
439	if (err)
440		__rtnl_link_unregister(&ifb_link_ops);
441
442out:
443	rtnl_unlock();
444	up_write(&pernet_ops_rwsem);
445
446	return err;
447}
448
449static void __exit ifb_cleanup_module(void)
450{
451	rtnl_link_unregister(&ifb_link_ops);
452}
453
454module_init(ifb_init_module);
455module_exit(ifb_cleanup_module);
456MODULE_LICENSE("GPL");
457MODULE_DESCRIPTION("Intermediate Functional Block (ifb) netdevice driver for sharing of resources and ingress packet queuing");
458MODULE_AUTHOR("Jamal Hadi Salim");
459MODULE_ALIAS_RTNL_LINK("ifb");
460