• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/net/sched/
1/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2 *
3 *		This program is free software; you can redistribute it and/or
4 *		modify it under the terms of the GNU General Public License
5 *		as published by the Free Software Foundation; either version
6 *		2 of the License, or (at your option) any later version.
7 *
8 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/kernel.h>
14#include <linux/slab.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/if_arp.h>
18#include <linux/netdevice.h>
19#include <linux/init.h>
20#include <linux/skbuff.h>
21#include <linux/moduleparam.h>
22#include <net/dst.h>
23#include <net/neighbour.h>
24#include <net/pkt_sched.h>
25
26/*
27   How to setup it.
28   ----------------
29
30   After loading this module you will find a new device teqlN
31   and new qdisc with the same name. To join a slave to the equalizer
32   you should just set this qdisc on a device f.e.
33
34   # tc qdisc add dev eth0 root teql0
35   # tc qdisc add dev eth1 root teql0
36
37   That's all. Full PnP 8)
38
39   Applicability.
40   --------------
41
42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43      signal and generate EOI events. If you want to equalize virtual devices
44      like tunnels, use a normal eql device.
45   2. This device puts no limitations on physical slave characteristics
46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47      Certainly, large difference in link speeds will make the resulting
48      eqalized link unusable, because of huge packet reordering.
49      I estimate an upper useful difference as ~10 times.
50   3. If the slave requires address resolution, only protocols using
51      neighbour cache (IPv4/IPv6) will work over the equalized link.
52      Other protocols are still allowed to use the slave device directly,
53      which will not break load balancing, though native slave
54      traffic will have the highest priority.  */
55
56struct teql_master
57{
58	struct Qdisc_ops qops;
59	struct net_device *dev;
60	struct Qdisc *slaves;
61	struct list_head master_list;
62};
63
64struct teql_sched_data
65{
66	struct Qdisc *next;
67	struct teql_master *m;
68	struct neighbour *ncache;
69	struct sk_buff_head q;
70};
71
72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75
76/* "teql*" qdisc routines */
77
78static int
79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80{
81	struct net_device *dev = qdisc_dev(sch);
82	struct teql_sched_data *q = qdisc_priv(sch);
83
84	if (q->q.qlen < dev->tx_queue_len) {
85		__skb_queue_tail(&q->q, skb);
86		sch->bstats.bytes += qdisc_pkt_len(skb);
87		sch->bstats.packets++;
88		return NET_XMIT_SUCCESS;
89	}
90
91	kfree_skb(skb);
92	sch->qstats.drops++;
93	return NET_XMIT_DROP;
94}
95
96static struct sk_buff *
97teql_dequeue(struct Qdisc* sch)
98{
99	struct teql_sched_data *dat = qdisc_priv(sch);
100	struct netdev_queue *dat_queue;
101	struct sk_buff *skb;
102
103	skb = __skb_dequeue(&dat->q);
104	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105	if (skb == NULL) {
106		struct net_device *m = qdisc_dev(dat_queue->qdisc);
107		if (m) {
108			dat->m->slaves = sch;
109			netif_wake_queue(m);
110		}
111	}
112	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113	return skb;
114}
115
116static struct sk_buff *
117teql_peek(struct Qdisc* sch)
118{
119	/* teql is meant to be used as root qdisc */
120	return NULL;
121}
122
123static __inline__ void
124teql_neigh_release(struct neighbour *n)
125{
126	if (n)
127		neigh_release(n);
128}
129
130static void
131teql_reset(struct Qdisc* sch)
132{
133	struct teql_sched_data *dat = qdisc_priv(sch);
134
135	skb_queue_purge(&dat->q);
136	sch->q.qlen = 0;
137	teql_neigh_release(xchg(&dat->ncache, NULL));
138}
139
140static void
141teql_destroy(struct Qdisc* sch)
142{
143	struct Qdisc *q, *prev;
144	struct teql_sched_data *dat = qdisc_priv(sch);
145	struct teql_master *master = dat->m;
146
147	if ((prev = master->slaves) != NULL) {
148		do {
149			q = NEXT_SLAVE(prev);
150			if (q == sch) {
151				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152				if (q == master->slaves) {
153					master->slaves = NEXT_SLAVE(q);
154					if (q == master->slaves) {
155						struct netdev_queue *txq;
156						spinlock_t *root_lock;
157
158						txq = netdev_get_tx_queue(master->dev, 0);
159						master->slaves = NULL;
160
161						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162						spin_lock_bh(root_lock);
163						qdisc_reset(txq->qdisc);
164						spin_unlock_bh(root_lock);
165					}
166				}
167				skb_queue_purge(&dat->q);
168				teql_neigh_release(xchg(&dat->ncache, NULL));
169				break;
170			}
171
172		} while ((prev = q) != master->slaves);
173	}
174}
175
176static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177{
178	struct net_device *dev = qdisc_dev(sch);
179	struct teql_master *m = (struct teql_master*)sch->ops;
180	struct teql_sched_data *q = qdisc_priv(sch);
181
182	if (dev->hard_header_len > m->dev->hard_header_len)
183		return -EINVAL;
184
185	if (m->dev == dev)
186		return -ELOOP;
187
188	q->m = m;
189
190	skb_queue_head_init(&q->q);
191
192	if (m->slaves) {
193		if (m->dev->flags & IFF_UP) {
194			if ((m->dev->flags & IFF_POINTOPOINT &&
195			     !(dev->flags & IFF_POINTOPOINT)) ||
196			    (m->dev->flags & IFF_BROADCAST &&
197			     !(dev->flags & IFF_BROADCAST)) ||
198			    (m->dev->flags & IFF_MULTICAST &&
199			     !(dev->flags & IFF_MULTICAST)) ||
200			    dev->mtu < m->dev->mtu)
201				return -EINVAL;
202		} else {
203			if (!(dev->flags&IFF_POINTOPOINT))
204				m->dev->flags &= ~IFF_POINTOPOINT;
205			if (!(dev->flags&IFF_BROADCAST))
206				m->dev->flags &= ~IFF_BROADCAST;
207			if (!(dev->flags&IFF_MULTICAST))
208				m->dev->flags &= ~IFF_MULTICAST;
209			if (dev->mtu < m->dev->mtu)
210				m->dev->mtu = dev->mtu;
211		}
212		q->next = NEXT_SLAVE(m->slaves);
213		NEXT_SLAVE(m->slaves) = sch;
214	} else {
215		q->next = sch;
216		m->slaves = sch;
217		m->dev->mtu = dev->mtu;
218		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
219	}
220	return 0;
221}
222
223
224static int
225__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
226{
227	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
228	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
229	struct neighbour *mn = skb_dst(skb)->neighbour;
230	struct neighbour *n = q->ncache;
231
232	if (mn->tbl == NULL)
233		return -EINVAL;
234	if (n && n->tbl == mn->tbl &&
235	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
236		atomic_inc(&n->refcnt);
237	} else {
238		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
239		if (IS_ERR(n))
240			return PTR_ERR(n);
241	}
242	if (neigh_event_send(n, skb_res) == 0) {
243		int err;
244
245		read_lock(&n->lock);
246		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
247				      n->ha, NULL, skb->len);
248		read_unlock(&n->lock);
249
250		if (err < 0) {
251			neigh_release(n);
252			return -EINVAL;
253		}
254		teql_neigh_release(xchg(&q->ncache, n));
255		return 0;
256	}
257	neigh_release(n);
258	return (skb_res == NULL) ? -EAGAIN : 1;
259}
260
261static inline int teql_resolve(struct sk_buff *skb,
262			       struct sk_buff *skb_res, struct net_device *dev)
263{
264	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
265	if (txq->qdisc == &noop_qdisc)
266		return -ENODEV;
267
268	if (dev->header_ops == NULL ||
269	    skb_dst(skb) == NULL ||
270	    skb_dst(skb)->neighbour == NULL)
271		return 0;
272	return __teql_resolve(skb, skb_res, dev);
273}
274
275static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276{
277	struct teql_master *master = netdev_priv(dev);
278	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
279	struct Qdisc *start, *q;
280	int busy;
281	int nores;
282	int subq = skb_get_queue_mapping(skb);
283	struct sk_buff *skb_res = NULL;
284
285	start = master->slaves;
286
287restart:
288	nores = 0;
289	busy = 0;
290
291	if ((q = start) == NULL)
292		goto drop;
293
294	do {
295		struct net_device *slave = qdisc_dev(q);
296		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
297		const struct net_device_ops *slave_ops = slave->netdev_ops;
298
299		if (slave_txq->qdisc_sleeping != q)
300			continue;
301		if (__netif_subqueue_stopped(slave, subq) ||
302		    !netif_running(slave)) {
303			busy = 1;
304			continue;
305		}
306
307		switch (teql_resolve(skb, skb_res, slave)) {
308		case 0:
309			if (__netif_tx_trylock(slave_txq)) {
310				unsigned int length = qdisc_pkt_len(skb);
311
312				if (!netif_tx_queue_stopped(slave_txq) &&
313				    !netif_tx_queue_frozen(slave_txq) &&
314				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
315					txq_trans_update(slave_txq);
316					__netif_tx_unlock(slave_txq);
317					master->slaves = NEXT_SLAVE(q);
318					netif_wake_queue(dev);
319					txq->tx_packets++;
320					txq->tx_bytes += length;
321					return NETDEV_TX_OK;
322				}
323				__netif_tx_unlock(slave_txq);
324			}
325			if (netif_queue_stopped(dev))
326				busy = 1;
327			break;
328		case 1:
329			master->slaves = NEXT_SLAVE(q);
330			return NETDEV_TX_OK;
331		default:
332			nores = 1;
333			break;
334		}
335		__skb_pull(skb, skb_network_offset(skb));
336	} while ((q = NEXT_SLAVE(q)) != start);
337
338	if (nores && skb_res == NULL) {
339		skb_res = skb;
340		goto restart;
341	}
342
343	if (busy) {
344		netif_stop_queue(dev);
345		return NETDEV_TX_BUSY;
346	}
347	dev->stats.tx_errors++;
348
349drop:
350	txq->tx_dropped++;
351	dev_kfree_skb(skb);
352	return NETDEV_TX_OK;
353}
354
355static int teql_master_open(struct net_device *dev)
356{
357	struct Qdisc * q;
358	struct teql_master *m = netdev_priv(dev);
359	int mtu = 0xFFFE;
360	unsigned flags = IFF_NOARP|IFF_MULTICAST;
361
362	if (m->slaves == NULL)
363		return -EUNATCH;
364
365	flags = FMASK;
366
367	q = m->slaves;
368	do {
369		struct net_device *slave = qdisc_dev(q);
370
371		if (slave == NULL)
372			return -EUNATCH;
373
374		if (slave->mtu < mtu)
375			mtu = slave->mtu;
376		if (slave->hard_header_len > LL_MAX_HEADER)
377			return -EINVAL;
378
379		/* If all the slaves are BROADCAST, master is BROADCAST
380		   If all the slaves are PtP, master is PtP
381		   Otherwise, master is NBMA.
382		 */
383		if (!(slave->flags&IFF_POINTOPOINT))
384			flags &= ~IFF_POINTOPOINT;
385		if (!(slave->flags&IFF_BROADCAST))
386			flags &= ~IFF_BROADCAST;
387		if (!(slave->flags&IFF_MULTICAST))
388			flags &= ~IFF_MULTICAST;
389	} while ((q = NEXT_SLAVE(q)) != m->slaves);
390
391	m->dev->mtu = mtu;
392	m->dev->flags = (m->dev->flags&~FMASK) | flags;
393	netif_start_queue(m->dev);
394	return 0;
395}
396
397static int teql_master_close(struct net_device *dev)
398{
399	netif_stop_queue(dev);
400	return 0;
401}
402
403static int teql_master_mtu(struct net_device *dev, int new_mtu)
404{
405	struct teql_master *m = netdev_priv(dev);
406	struct Qdisc *q;
407
408	if (new_mtu < 68)
409		return -EINVAL;
410
411	q = m->slaves;
412	if (q) {
413		do {
414			if (new_mtu > qdisc_dev(q)->mtu)
415				return -EINVAL;
416		} while ((q=NEXT_SLAVE(q)) != m->slaves);
417	}
418
419	dev->mtu = new_mtu;
420	return 0;
421}
422
423static const struct net_device_ops teql_netdev_ops = {
424	.ndo_open	= teql_master_open,
425	.ndo_stop	= teql_master_close,
426	.ndo_start_xmit	= teql_master_xmit,
427	.ndo_change_mtu	= teql_master_mtu,
428};
429
430static __init void teql_master_setup(struct net_device *dev)
431{
432	struct teql_master *master = netdev_priv(dev);
433	struct Qdisc_ops *ops = &master->qops;
434
435	master->dev	= dev;
436	ops->priv_size  = sizeof(struct teql_sched_data);
437
438	ops->enqueue	=	teql_enqueue;
439	ops->dequeue	=	teql_dequeue;
440	ops->peek	=	teql_peek;
441	ops->init	=	teql_qdisc_init;
442	ops->reset	=	teql_reset;
443	ops->destroy	=	teql_destroy;
444	ops->owner	=	THIS_MODULE;
445
446	dev->netdev_ops =       &teql_netdev_ops;
447	dev->type		= ARPHRD_VOID;
448	dev->mtu		= 1500;
449	dev->tx_queue_len	= 100;
450	dev->flags		= IFF_NOARP;
451	dev->hard_header_len	= LL_MAX_HEADER;
452	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
453}
454
455static LIST_HEAD(master_dev_list);
456static int max_equalizers = 1;
457module_param(max_equalizers, int, 0);
458MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
459
460static int __init teql_init(void)
461{
462	int i;
463	int err = -ENODEV;
464
465	for (i = 0; i < max_equalizers; i++) {
466		struct net_device *dev;
467		struct teql_master *master;
468
469		dev = alloc_netdev(sizeof(struct teql_master),
470				  "teql%d", teql_master_setup);
471		if (!dev) {
472			err = -ENOMEM;
473			break;
474		}
475
476		if ((err = register_netdev(dev))) {
477			free_netdev(dev);
478			break;
479		}
480
481		master = netdev_priv(dev);
482
483		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
484		err = register_qdisc(&master->qops);
485
486		if (err) {
487			unregister_netdev(dev);
488			free_netdev(dev);
489			break;
490		}
491
492		list_add_tail(&master->master_list, &master_dev_list);
493	}
494	return i ? 0 : err;
495}
496
497static void __exit teql_exit(void)
498{
499	struct teql_master *master, *nxt;
500
501	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
502
503		list_del(&master->master_list);
504
505		unregister_qdisc(&master->qops);
506		unregister_netdev(master->dev);
507		free_netdev(master->dev);
508	}
509}
510
511module_init(teql_init);
512module_exit(teql_exit);
513
514MODULE_LICENSE("GPL");
515