1/*
2 * net/sched/sch_red.c	Random Early Detection queue.
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
12 * J Hadi Salim 980914:	computation fixes
13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14 * J Hadi Salim 980816:  ECN support
15 */
16
17#include <linux/module.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/netdevice.h>
21#include <linux/skbuff.h>
22#include <net/pkt_sched.h>
23#include <net/inet_ecn.h>
24#include <net/red.h>
25
26
27/*	Parameters, settable by user:
28	-----------------------------
29
30	limit		- bytes (must be > qth_max + burst)
31
32	Hard limit on queue length, should be chosen >qth_max
33	to allow packet bursts. This parameter does not
34	affect the algorithms behaviour and can be chosen
35	arbitrarily high (well, less than ram size)
36	Really, this limit will never be reached
37	if RED works correctly.
38 */
39
40struct red_sched_data
41{
42	u32			limit;		/* HARD maximal queue length */
43	unsigned char		flags;
44	struct red_parms	parms;
45	struct red_stats	stats;
46	struct Qdisc		*qdisc;
47};
48
49static inline int red_use_ecn(struct red_sched_data *q)
50{
51	return q->flags & TC_RED_ECN;
52}
53
54static inline int red_use_harddrop(struct red_sched_data *q)
55{
56	return q->flags & TC_RED_HARDDROP;
57}
58
59static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
60{
61	struct red_sched_data *q = qdisc_priv(sch);
62	struct Qdisc *child = q->qdisc;
63	int ret;
64
65	q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
66
67	if (red_is_idling(&q->parms))
68		red_end_of_idle_period(&q->parms);
69
70	switch (red_action(&q->parms, q->parms.qavg)) {
71		case RED_DONT_MARK:
72			break;
73
74		case RED_PROB_MARK:
75			sch->qstats.overlimits++;
76			if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
77				q->stats.prob_drop++;
78				goto congestion_drop;
79			}
80
81			q->stats.prob_mark++;
82			break;
83
84		case RED_HARD_MARK:
85			sch->qstats.overlimits++;
86			if (red_use_harddrop(q) || !red_use_ecn(q) ||
87			    !INET_ECN_set_ce(skb)) {
88				q->stats.forced_drop++;
89				goto congestion_drop;
90			}
91
92			q->stats.forced_mark++;
93			break;
94	}
95
96	ret = child->enqueue(skb, child);
97	if (likely(ret == NET_XMIT_SUCCESS)) {
98		sch->bstats.bytes += skb->len;
99		sch->bstats.packets++;
100		sch->q.qlen++;
101	} else {
102		q->stats.pdrop++;
103		sch->qstats.drops++;
104	}
105	return ret;
106
107congestion_drop:
108	qdisc_drop(skb, sch);
109	return NET_XMIT_CN;
110}
111
112static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
113{
114	struct red_sched_data *q = qdisc_priv(sch);
115	struct Qdisc *child = q->qdisc;
116	int ret;
117
118	if (red_is_idling(&q->parms))
119		red_end_of_idle_period(&q->parms);
120
121	ret = child->ops->requeue(skb, child);
122	if (likely(ret == NET_XMIT_SUCCESS)) {
123		sch->qstats.requeues++;
124		sch->q.qlen++;
125	}
126	return ret;
127}
128
129static struct sk_buff * red_dequeue(struct Qdisc* sch)
130{
131	struct sk_buff *skb;
132	struct red_sched_data *q = qdisc_priv(sch);
133	struct Qdisc *child = q->qdisc;
134
135	skb = child->dequeue(child);
136	if (skb)
137		sch->q.qlen--;
138	else if (!red_is_idling(&q->parms))
139		red_start_of_idle_period(&q->parms);
140
141	return skb;
142}
143
144static unsigned int red_drop(struct Qdisc* sch)
145{
146	struct red_sched_data *q = qdisc_priv(sch);
147	struct Qdisc *child = q->qdisc;
148	unsigned int len;
149
150	if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
151		q->stats.other++;
152		sch->qstats.drops++;
153		sch->q.qlen--;
154		return len;
155	}
156
157	if (!red_is_idling(&q->parms))
158		red_start_of_idle_period(&q->parms);
159
160	return 0;
161}
162
163static void red_reset(struct Qdisc* sch)
164{
165	struct red_sched_data *q = qdisc_priv(sch);
166
167	qdisc_reset(q->qdisc);
168	sch->q.qlen = 0;
169	red_restart(&q->parms);
170}
171
172static void red_destroy(struct Qdisc *sch)
173{
174	struct red_sched_data *q = qdisc_priv(sch);
175	qdisc_destroy(q->qdisc);
176}
177
178static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
179{
180	struct Qdisc *q;
181	struct rtattr *rta;
182	int ret;
183
184	q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
185			      TC_H_MAKE(sch->handle, 1));
186	if (q) {
187		rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)),
188			      GFP_KERNEL);
189		if (rta) {
190			rta->rta_type = RTM_NEWQDISC;
191			rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
192			((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
193
194			ret = q->ops->change(q, rta);
195			kfree(rta);
196
197			if (ret == 0)
198				return q;
199		}
200		qdisc_destroy(q);
201	}
202	return NULL;
203}
204
205static int red_change(struct Qdisc *sch, struct rtattr *opt)
206{
207	struct red_sched_data *q = qdisc_priv(sch);
208	struct rtattr *tb[TCA_RED_MAX];
209	struct tc_red_qopt *ctl;
210	struct Qdisc *child = NULL;
211
212	if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt))
213		return -EINVAL;
214
215	if (tb[TCA_RED_PARMS-1] == NULL ||
216	    RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
217	    tb[TCA_RED_STAB-1] == NULL ||
218	    RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE)
219		return -EINVAL;
220
221	ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
222
223	if (ctl->limit > 0) {
224		child = red_create_dflt(sch, ctl->limit);
225		if (child == NULL)
226			return -ENOMEM;
227	}
228
229	sch_tree_lock(sch);
230	q->flags = ctl->flags;
231	q->limit = ctl->limit;
232	if (child) {
233		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
234		qdisc_destroy(xchg(&q->qdisc, child));
235	}
236
237	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
238				 ctl->Plog, ctl->Scell_log,
239				 RTA_DATA(tb[TCA_RED_STAB-1]));
240
241	if (skb_queue_empty(&sch->q))
242		red_end_of_idle_period(&q->parms);
243
244	sch_tree_unlock(sch);
245	return 0;
246}
247
248static int red_init(struct Qdisc* sch, struct rtattr *opt)
249{
250	struct red_sched_data *q = qdisc_priv(sch);
251
252	q->qdisc = &noop_qdisc;
253	return red_change(sch, opt);
254}
255
256static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
257{
258	struct red_sched_data *q = qdisc_priv(sch);
259	struct rtattr *opts = NULL;
260	struct tc_red_qopt opt = {
261		.limit		= q->limit,
262		.flags		= q->flags,
263		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
264		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
265		.Wlog		= q->parms.Wlog,
266		.Plog		= q->parms.Plog,
267		.Scell_log	= q->parms.Scell_log,
268	};
269
270	opts = RTA_NEST(skb, TCA_OPTIONS);
271	RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
272	return RTA_NEST_END(skb, opts);
273
274rtattr_failure:
275	return RTA_NEST_CANCEL(skb, opts);
276}
277
278static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
279{
280	struct red_sched_data *q = qdisc_priv(sch);
281	struct tc_red_xstats st = {
282		.early	= q->stats.prob_drop + q->stats.forced_drop,
283		.pdrop	= q->stats.pdrop,
284		.other	= q->stats.other,
285		.marked	= q->stats.prob_mark + q->stats.forced_mark,
286	};
287
288	return gnet_stats_copy_app(d, &st, sizeof(st));
289}
290
291static int red_dump_class(struct Qdisc *sch, unsigned long cl,
292			  struct sk_buff *skb, struct tcmsg *tcm)
293{
294	struct red_sched_data *q = qdisc_priv(sch);
295
296	if (cl != 1)
297		return -ENOENT;
298	tcm->tcm_handle |= TC_H_MIN(1);
299	tcm->tcm_info = q->qdisc->handle;
300	return 0;
301}
302
303static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
304		     struct Qdisc **old)
305{
306	struct red_sched_data *q = qdisc_priv(sch);
307
308	if (new == NULL)
309		new = &noop_qdisc;
310
311	sch_tree_lock(sch);
312	*old = xchg(&q->qdisc, new);
313	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
314	qdisc_reset(*old);
315	sch_tree_unlock(sch);
316	return 0;
317}
318
319static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
320{
321	struct red_sched_data *q = qdisc_priv(sch);
322	return q->qdisc;
323}
324
325static unsigned long red_get(struct Qdisc *sch, u32 classid)
326{
327	return 1;
328}
329
330static void red_put(struct Qdisc *sch, unsigned long arg)
331{
332	return;
333}
334
335static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
336			    struct rtattr **tca, unsigned long *arg)
337{
338	return -ENOSYS;
339}
340
341static int red_delete(struct Qdisc *sch, unsigned long cl)
342{
343	return -ENOSYS;
344}
345
346static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
347{
348	if (!walker->stop) {
349		if (walker->count >= walker->skip)
350			if (walker->fn(sch, 1, walker) < 0) {
351				walker->stop = 1;
352				return;
353			}
354		walker->count++;
355	}
356}
357
358static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
359{
360	return NULL;
361}
362
363static struct Qdisc_class_ops red_class_ops = {
364	.graft		=	red_graft,
365	.leaf		=	red_leaf,
366	.get		=	red_get,
367	.put		=	red_put,
368	.change		=	red_change_class,
369	.delete		=	red_delete,
370	.walk		=	red_walk,
371	.tcf_chain	=	red_find_tcf,
372	.dump		=	red_dump_class,
373};
374
375static struct Qdisc_ops red_qdisc_ops = {
376	.id		=	"red",
377	.priv_size	=	sizeof(struct red_sched_data),
378	.cl_ops		=	&red_class_ops,
379	.enqueue	=	red_enqueue,
380	.dequeue	=	red_dequeue,
381	.requeue	=	red_requeue,
382	.drop		=	red_drop,
383	.init		=	red_init,
384	.reset		=	red_reset,
385	.destroy	=	red_destroy,
386	.change		=	red_change,
387	.dump		=	red_dump,
388	.dump_stats	=	red_dump_stats,
389	.owner		=	THIS_MODULE,
390};
391
392static int __init red_module_init(void)
393{
394	return register_qdisc(&red_qdisc_ops);
395}
396
397static void __exit red_module_exit(void)
398{
399	unregister_qdisc(&red_qdisc_ops);
400}
401
402module_init(red_module_init)
403module_exit(red_module_exit)
404
405MODULE_LICENSE("GPL");
406