1/*
2 * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
3 *
4 *		This program is free software; you can redistribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>:
11 *              Init --  EINVAL when opt undefined
12 */
13
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/errno.h>
20#include <linux/skbuff.h>
21#include <net/netlink.h>
22#include <net/pkt_sched.h>
23
24
25struct prio_sched_data
26{
27	int bands;
28	struct tcf_proto *filter_list;
29	u8  prio2band[TC_PRIO_MAX+1];
30	struct Qdisc *queues[TCQ_PRIO_BANDS];
31};
32
33
34static struct Qdisc *
35prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
36{
37	struct prio_sched_data *q = qdisc_priv(sch);
38	u32 band = skb->priority;
39	struct tcf_result res;
40	int err;
41
42	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
43	if (TC_H_MAJ(skb->priority) != sch->handle) {
44		err = tc_classify(skb, q->filter_list, &res);
45#ifdef CONFIG_NET_CLS_ACT
46		switch (err) {
47		case TC_ACT_STOLEN:
48		case TC_ACT_QUEUED:
49			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
50		case TC_ACT_SHOT:
51			return NULL;
52		}
53#endif
54		if (!q->filter_list || err < 0) {
55			if (TC_H_MAJ(band))
56				band = 0;
57			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
58		}
59		band = res.classid;
60	}
61	band = TC_H_MIN(band) - 1;
62	if (band >= q->bands)
63		return q->queues[q->prio2band[0]];
64
65	return q->queues[band];
66}
67
68static int
69prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
70{
71	struct Qdisc *qdisc;
72	int ret;
73
74	qdisc = prio_classify(skb, sch, &ret);
75#ifdef CONFIG_NET_CLS_ACT
76	if (qdisc == NULL) {
77
78		if (ret & __NET_XMIT_BYPASS)
79			sch->qstats.drops++;
80		kfree_skb(skb);
81		return ret;
82	}
83#endif
84
85	ret = qdisc_enqueue(skb, qdisc);
86	if (ret == NET_XMIT_SUCCESS) {
87		sch->bstats.bytes += qdisc_pkt_len(skb);
88		sch->bstats.packets++;
89		sch->q.qlen++;
90		return NET_XMIT_SUCCESS;
91	}
92	if (net_xmit_drop_count(ret))
93		sch->qstats.drops++;
94	return ret;
95}
96
97static struct sk_buff *prio_peek(struct Qdisc *sch)
98{
99	struct prio_sched_data *q = qdisc_priv(sch);
100	int prio;
101
102	for (prio = 0; prio < q->bands; prio++) {
103		struct Qdisc *qdisc = q->queues[prio];
104		struct sk_buff *skb = qdisc->ops->peek(qdisc);
105		if (skb)
106			return skb;
107	}
108	return NULL;
109}
110
111static struct sk_buff *prio_dequeue(struct Qdisc* sch)
112{
113	struct prio_sched_data *q = qdisc_priv(sch);
114	int prio;
115
116	for (prio = 0; prio < q->bands; prio++) {
117		struct Qdisc *qdisc = q->queues[prio];
118		struct sk_buff *skb = qdisc->dequeue(qdisc);
119		if (skb) {
120			sch->q.qlen--;
121			return skb;
122		}
123	}
124	return NULL;
125
126}
127
128static unsigned int prio_drop(struct Qdisc* sch)
129{
130	struct prio_sched_data *q = qdisc_priv(sch);
131	int prio;
132	unsigned int len;
133	struct Qdisc *qdisc;
134
135	for (prio = q->bands-1; prio >= 0; prio--) {
136		qdisc = q->queues[prio];
137		if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
138			sch->q.qlen--;
139			return len;
140		}
141	}
142	return 0;
143}
144
145
146static void
147prio_reset(struct Qdisc* sch)
148{
149	int prio;
150	struct prio_sched_data *q = qdisc_priv(sch);
151
152	for (prio=0; prio<q->bands; prio++)
153		qdisc_reset(q->queues[prio]);
154	sch->q.qlen = 0;
155}
156
157static void
158prio_destroy(struct Qdisc* sch)
159{
160	int prio;
161	struct prio_sched_data *q = qdisc_priv(sch);
162
163	tcf_destroy_chain(&q->filter_list);
164	for (prio=0; prio<q->bands; prio++)
165		qdisc_destroy(q->queues[prio]);
166}
167
168static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
169{
170	struct prio_sched_data *q = qdisc_priv(sch);
171	struct tc_prio_qopt *qopt;
172	int i;
173
174	if (nla_len(opt) < sizeof(*qopt))
175		return -EINVAL;
176	qopt = nla_data(opt);
177
178	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
179		return -EINVAL;
180
181	for (i=0; i<=TC_PRIO_MAX; i++) {
182		if (qopt->priomap[i] >= qopt->bands)
183			return -EINVAL;
184	}
185
186	sch_tree_lock(sch);
187	q->bands = qopt->bands;
188	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
189
190	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
191		struct Qdisc *child = q->queues[i];
192		q->queues[i] = &noop_qdisc;
193		if (child != &noop_qdisc) {
194			qdisc_tree_decrease_qlen(child, child->q.qlen);
195			qdisc_destroy(child);
196		}
197	}
198	sch_tree_unlock(sch);
199
200	for (i=0; i<q->bands; i++) {
201		if (q->queues[i] == &noop_qdisc) {
202			struct Qdisc *child, *old;
203			child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
204						  &pfifo_qdisc_ops,
205						  TC_H_MAKE(sch->handle, i + 1));
206			if (child) {
207				sch_tree_lock(sch);
208				old = q->queues[i];
209				q->queues[i] = child;
210
211				if (old != &noop_qdisc) {
212					qdisc_tree_decrease_qlen(old,
213								 old->q.qlen);
214					qdisc_destroy(old);
215				}
216				sch_tree_unlock(sch);
217			}
218		}
219	}
220	return 0;
221}
222
223static int prio_init(struct Qdisc *sch, struct nlattr *opt)
224{
225	struct prio_sched_data *q = qdisc_priv(sch);
226	int i;
227
228	for (i=0; i<TCQ_PRIO_BANDS; i++)
229		q->queues[i] = &noop_qdisc;
230
231	if (opt == NULL) {
232		return -EINVAL;
233	} else {
234		int err;
235
236		if ((err= prio_tune(sch, opt)) != 0)
237			return err;
238	}
239	return 0;
240}
241
242static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
243{
244	struct prio_sched_data *q = qdisc_priv(sch);
245	unsigned char *b = skb_tail_pointer(skb);
246	struct tc_prio_qopt opt;
247
248	opt.bands = q->bands;
249	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
250
251	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
252
253	return skb->len;
254
255nla_put_failure:
256	nlmsg_trim(skb, b);
257	return -1;
258}
259
260static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
261		      struct Qdisc **old)
262{
263	struct prio_sched_data *q = qdisc_priv(sch);
264	unsigned long band = arg - 1;
265
266	if (new == NULL)
267		new = &noop_qdisc;
268
269	sch_tree_lock(sch);
270	*old = q->queues[band];
271	q->queues[band] = new;
272	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
273	qdisc_reset(*old);
274	sch_tree_unlock(sch);
275
276	return 0;
277}
278
279static struct Qdisc *
280prio_leaf(struct Qdisc *sch, unsigned long arg)
281{
282	struct prio_sched_data *q = qdisc_priv(sch);
283	unsigned long band = arg - 1;
284
285	return q->queues[band];
286}
287
288static unsigned long prio_get(struct Qdisc *sch, u32 classid)
289{
290	struct prio_sched_data *q = qdisc_priv(sch);
291	unsigned long band = TC_H_MIN(classid);
292
293	if (band - 1 >= q->bands)
294		return 0;
295	return band;
296}
297
298static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
299{
300	return prio_get(sch, classid);
301}
302
303
304static void prio_put(struct Qdisc *q, unsigned long cl)
305{
306}
307
308static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
309			   struct tcmsg *tcm)
310{
311	struct prio_sched_data *q = qdisc_priv(sch);
312
313	tcm->tcm_handle |= TC_H_MIN(cl);
314	tcm->tcm_info = q->queues[cl-1]->handle;
315	return 0;
316}
317
318static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
319				 struct gnet_dump *d)
320{
321	struct prio_sched_data *q = qdisc_priv(sch);
322	struct Qdisc *cl_q;
323
324	cl_q = q->queues[cl - 1];
325	cl_q->qstats.qlen = cl_q->q.qlen;
326	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
327	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
328		return -1;
329
330	return 0;
331}
332
333static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
334{
335	struct prio_sched_data *q = qdisc_priv(sch);
336	int prio;
337
338	if (arg->stop)
339		return;
340
341	for (prio = 0; prio < q->bands; prio++) {
342		if (arg->count < arg->skip) {
343			arg->count++;
344			continue;
345		}
346		if (arg->fn(sch, prio+1, arg) < 0) {
347			arg->stop = 1;
348			break;
349		}
350		arg->count++;
351	}
352}
353
354static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
355{
356	struct prio_sched_data *q = qdisc_priv(sch);
357
358	if (cl)
359		return NULL;
360	return &q->filter_list;
361}
362
363static const struct Qdisc_class_ops prio_class_ops = {
364	.graft		=	prio_graft,
365	.leaf		=	prio_leaf,
366	.get		=	prio_get,
367	.put		=	prio_put,
368	.walk		=	prio_walk,
369	.tcf_chain	=	prio_find_tcf,
370	.bind_tcf	=	prio_bind,
371	.unbind_tcf	=	prio_put,
372	.dump		=	prio_dump_class,
373	.dump_stats	=	prio_dump_class_stats,
374};
375
376static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
377	.next		=	NULL,
378	.cl_ops		=	&prio_class_ops,
379	.id		=	"prio",
380	.priv_size	=	sizeof(struct prio_sched_data),
381	.enqueue	=	prio_enqueue,
382	.dequeue	=	prio_dequeue,
383	.peek		=	prio_peek,
384	.drop		=	prio_drop,
385	.init		=	prio_init,
386	.reset		=	prio_reset,
387	.destroy	=	prio_destroy,
388	.change		=	prio_tune,
389	.dump		=	prio_dump,
390	.owner		=	THIS_MODULE,
391};
392
393static int __init prio_module_init(void)
394{
395	return register_qdisc(&prio_qdisc_ops);
396}
397
398static void __exit prio_module_exit(void)
399{
400	unregister_qdisc(&prio_qdisc_ops);
401}
402
403module_init(prio_module_init)
404module_exit(prio_module_exit)
405
406MODULE_LICENSE("GPL");
407