1/*
2 * net/sched/sch_gred.c	Generic Random Early Detection queue.
3 *
4 *
5 *              This program is free software; you can redistribute it and/or
6 *              modify it under the terms of the GNU General Public License
7 *              as published by the Free Software Foundation; either version
8 *              2 of the License, or (at your option) any later version.
9 *
10 * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
11 *
12 *             991129: -  Bug fix with grio mode
13 *		       - a better sing. AvgQ mode with Grio(WRED)
14 *		       - A finer grained VQ dequeue based on sugestion
15 *		         from Ren Liu
16 *		       - More error checks
17 *
18 *
19 *
20 *  For all the glorious comments look at Alexey's sch_red.c
21 */
22
23#include <linux/config.h>
24#include <linux/module.h>
25#include <asm/uaccess.h>
26#include <asm/system.h>
27#include <asm/bitops.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/sched.h>
31#include <linux/string.h>
32#include <linux/mm.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/in.h>
36#include <linux/errno.h>
37#include <linux/interrupt.h>
38#include <linux/if_ether.h>
39#include <linux/inet.h>
40#include <linux/netdevice.h>
41#include <linux/etherdevice.h>
42#include <linux/notifier.h>
43#include <net/ip.h>
44#include <net/route.h>
45#include <linux/skbuff.h>
46#include <net/sock.h>
47#include <net/pkt_sched.h>
48
49#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
50
51#define D2PRINTK(format,args...)
52
53struct gred_sched_data;
54struct gred_sched;
55
56struct gred_sched_data
57{
58/* Parameters */
59	u32		limit;		/* HARD maximal queue length	*/
60	u32		qth_min;	/* Min average length threshold: A scaled */
61	u32		qth_max;	/* Max average length threshold: A scaled */
62	u32      	DP;		/* the drop pramaters */
63	char		Wlog;		/* log(W)		*/
64	char		Plog;		/* random number bits	*/
65	u32		Scell_max;
66	u32		Rmask;
67	u32		bytesin;	/* bytes seen on virtualQ so far*/
68	u32		packetsin;	/* packets seen on virtualQ so far*/
69	u32		backlog;	/* bytes on the virtualQ */
70	u32		forced;	/* packets dropped for exceeding limits */
71	u32		early;	/* packets dropped as a warning */
72	u32		other;	/* packets dropped by invoking drop() */
73	u32		pdrop;	/* packets dropped because we exceeded physical queue limits */
74	char		Scell_log;
75	u8		Stab[256];
76	u8              prio;        /* the prio of this vq */
77
78/* Variables */
79	unsigned long	qave;		/* Average queue length: A scaled */
80	int		qcount;		/* Packets since last random number generation */
81	u32		qR;		/* Cached random number */
82
83	psched_time_t	qidlestart;	/* Start of idle period	*/
84};
85
86struct gred_sched
87{
88	struct gred_sched_data *tab[MAX_DPs];
89	u32 		DPs;
90	u32 		def;
91	u8 		initd;
92	u8 		grio;
93	u8 		eqp;
94};
95
96static int
97gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
98{
99	psched_time_t now;
100	struct gred_sched_data *q=NULL;
101	struct gred_sched *t= (struct gred_sched *)sch->data;
102	unsigned long	qave=0;
103	int i=0;
104
105	if (!t->initd && skb_queue_len(&sch->q) <= sch->dev->tx_queue_len) {
106		D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
107		goto do_enqueue;
108	}
109
110
111	if ( ((skb->tc_index&0xf) > t->DPs) || !(q=t->tab[skb->tc_index&0xf])) {
112		printk("GRED: setting to default (%d)\n ",t->def);
113		if (!(q=t->tab[t->def])) {
114			DPRINTK("GRED: setting to default FAILED! dropping!! "
115			    "(%d)\n ", t->def);
116			goto drop;
117		}
118		/* fix tc_index? --could be controvesial but needed for
119		   requeueing */
120		skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
121	}
122
123	D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
124	    "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
125	    sch->stats.backlog);
126	/* sum up all the qaves of prios <= to ours to get the new qave*/
127	if (!t->eqp && t->grio) {
128		for (i=0;i<t->DPs;i++) {
129			if ((!t->tab[i]) || (i==q->DP))
130				continue;
131
132			if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
133				qave +=t->tab[i]->qave;
134		}
135
136	}
137
138	q->packetsin++;
139	q->bytesin+=skb->len;
140
141	if (t->eqp && t->grio) {
142		qave=0;
143		q->qave=t->tab[t->def]->qave;
144		q->qidlestart=t->tab[t->def]->qidlestart;
145	}
146
147	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
148		long us_idle;
149		PSCHED_GET_TIME(now);
150		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);
151		PSCHED_SET_PASTPERFECT(q->qidlestart);
152
153		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
154	} else {
155		if (t->eqp) {
156			q->qave += sch->stats.backlog - (q->qave >> q->Wlog);
157		} else {
158			q->qave += q->backlog - (q->qave >> q->Wlog);
159		}
160
161	}
162
163
164	if (t->eqp && t->grio)
165		t->tab[t->def]->qave=q->qave;
166
167	if ((q->qave+qave) < q->qth_min) {
168		q->qcount = -1;
169enqueue:
170		if (q->backlog <= q->limit) {
171			q->backlog += skb->len;
172do_enqueue:
173			__skb_queue_tail(&sch->q, skb);
174			sch->stats.backlog += skb->len;
175			sch->stats.bytes += skb->len;
176			sch->stats.packets++;
177			return 0;
178		} else {
179			q->pdrop++;
180		}
181
182drop:
183		kfree_skb(skb);
184		sch->stats.drops++;
185		return NET_XMIT_DROP;
186	}
187	if ((q->qave+qave) >= q->qth_max) {
188		q->qcount = -1;
189		sch->stats.overlimits++;
190		q->forced++;
191		goto drop;
192	}
193	if (++q->qcount) {
194		if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
195			goto enqueue;
196		q->qcount = 0;
197		q->qR = net_random()&q->Rmask;
198		sch->stats.overlimits++;
199		q->early++;
200		goto drop;
201	}
202	q->qR = net_random()&q->Rmask;
203	goto enqueue;
204}
205
206static int
207gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
208{
209	struct gred_sched_data *q;
210	struct gred_sched *t= (struct gred_sched *)sch->data;
211	q= t->tab[(skb->tc_index&0xf)];
212/* error checking here -- probably unnecessary */
213	PSCHED_SET_PASTPERFECT(q->qidlestart);
214
215	__skb_queue_head(&sch->q, skb);
216	sch->stats.backlog += skb->len;
217	q->backlog += skb->len;
218	return 0;
219}
220
221static struct sk_buff *
222gred_dequeue(struct Qdisc* sch)
223{
224	struct sk_buff *skb;
225	struct gred_sched_data *q;
226	struct gred_sched *t= (struct gred_sched *)sch->data;
227
228	skb = __skb_dequeue(&sch->q);
229	if (skb) {
230		sch->stats.backlog -= skb->len;
231		q= t->tab[(skb->tc_index&0xf)];
232		if (q) {
233			q->backlog -= skb->len;
234			if (!q->backlog && !t->eqp)
235				PSCHED_GET_TIME(q->qidlestart);
236		} else {
237			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
238		}
239		return skb;
240	}
241
242	if (t->eqp) {
243			q= t->tab[t->def];
244			if (!q)
245				D2PRINTK("no default VQ set: Results will be "
246				       "screwed up\n");
247			else
248				PSCHED_GET_TIME(q->qidlestart);
249	}
250
251	return NULL;
252}
253
254static int
255gred_drop(struct Qdisc* sch)
256{
257	struct sk_buff *skb;
258
259	struct gred_sched_data *q;
260	struct gred_sched *t= (struct gred_sched *)sch->data;
261
262	skb = __skb_dequeue_tail(&sch->q);
263	if (skb) {
264		sch->stats.backlog -= skb->len;
265		sch->stats.drops++;
266		q= t->tab[(skb->tc_index&0xf)];
267		if (q) {
268			q->backlog -= skb->len;
269			q->other++;
270			if (!q->backlog && !t->eqp)
271				PSCHED_GET_TIME(q->qidlestart);
272			} else {
273				D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
274			}
275
276		kfree_skb(skb);
277		return 1;
278	}
279
280	q=t->tab[t->def];
281	if (!q) {
282		D2PRINTK("no default VQ set: Results might be screwed up\n");
283		return 0;
284	}
285
286	PSCHED_GET_TIME(q->qidlestart);
287	return 0;
288
289}
290
291static void gred_reset(struct Qdisc* sch)
292{
293	int i;
294	struct gred_sched_data *q;
295	struct gred_sched *t= (struct gred_sched *)sch->data;
296
297	__skb_queue_purge(&sch->q);
298
299	sch->stats.backlog = 0;
300
301        for (i=0;i<t->DPs;i++) {
302	        q= t->tab[i];
303		if (!q)
304			continue;
305		PSCHED_SET_PASTPERFECT(q->qidlestart);
306		q->qave = 0;
307		q->qcount = -1;
308		q->backlog = 0;
309		q->other=0;
310		q->forced=0;
311		q->pdrop=0;
312		q->early=0;
313	}
314}
315
316static int gred_change(struct Qdisc *sch, struct rtattr *opt)
317{
318	struct gred_sched *table = (struct gred_sched *)sch->data;
319	struct gred_sched_data *q;
320	struct tc_gred_qopt *ctl;
321	struct tc_gred_sopt *sopt;
322	struct rtattr *tb[TCA_GRED_STAB];
323	struct rtattr *tb2[TCA_GRED_STAB];
324	int i;
325
326	if (opt == NULL ||
327		rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
328			return -EINVAL;
329
330	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 &&
331	    tb[TCA_GRED_DPS-1] != 0) {
332		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),
333		    RTA_PAYLOAD(opt));
334
335		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
336		table->DPs=sopt->DPs;
337		table->def=sopt->def_DP;
338		table->grio=sopt->grio;
339		table->initd=0;
340		/* probably need to clear all the table DP entries as well */
341		MOD_INC_USE_COUNT;
342		return 0;
343	    }
344
345
346	if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
347		RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
348		RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
349			return -EINVAL;
350
351	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
352	if (ctl->DP > MAX_DPs-1 ) {
353		/* misbehaving is punished! Put in the default drop probability */
354		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
355			"set to default at %d\n",ctl->DP,table->def);
356		ctl->DP=table->def;
357	}
358
359	if (table->tab[ctl->DP] == NULL) {
360		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
361					    GFP_KERNEL);
362		if (NULL == table->tab[ctl->DP])
363			return -ENOMEM;
364		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
365	}
366	q= table->tab[ctl->DP];
367
368	if (table->grio) {
369		if (ctl->prio <=0) {
370			if (table->def && table->tab[table->def]) {
371				DPRINTK("\nGRED: DP %u does not have a prio"
372					"setting default to %d\n",ctl->DP,
373					table->tab[table->def]->prio);
374				q->prio=table->tab[table->def]->prio;
375			} else {
376				DPRINTK("\nGRED: DP %u does not have a prio"
377					" setting default to 8\n",ctl->DP);
378				q->prio=8;
379			}
380		} else {
381			q->prio=ctl->prio;
382		}
383	} else {
384		q->prio=8;
385	}
386
387
388	q->DP=ctl->DP;
389	q->Wlog = ctl->Wlog;
390	q->Plog = ctl->Plog;
391	q->limit = ctl->limit;
392	q->Scell_log = ctl->Scell_log;
393	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
394	q->Scell_max = (255<<q->Scell_log);
395	q->qth_min = ctl->qth_min<<ctl->Wlog;
396	q->qth_max = ctl->qth_max<<ctl->Wlog;
397	q->qave=0;
398	q->backlog=0;
399	q->qcount = -1;
400	q->other=0;
401	q->forced=0;
402	q->pdrop=0;
403	q->early=0;
404
405	PSCHED_SET_PASTPERFECT(q->qidlestart);
406	memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
407
408	if ( table->initd && table->grio) {
409	/* this looks ugly but its not in the fast path */
410		for (i=0;i<table->DPs;i++) {
411			if ((!table->tab[i]) || (i==q->DP) )
412				continue;
413			if (table->tab[i]->prio == q->prio ){
414				/* WRED mode detected */
415				table->eqp=1;
416				break;
417			}
418		}
419	}
420
421	if (!table->initd) {
422		table->initd=1;
423		/*
424        	the first entry also goes into the default until
425        	over-written
426		*/
427
428		if (table->tab[table->def] == NULL) {
429			table->tab[table->def]=
430				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
431			if (NULL == table->tab[table->def])
432				return -ENOMEM;
433
434			memset(table->tab[table->def], 0,
435			       (sizeof(struct gred_sched_data)));
436		}
437		q= table->tab[table->def];
438		q->DP=table->def;
439		q->Wlog = ctl->Wlog;
440		q->Plog = ctl->Plog;
441		q->limit = ctl->limit;
442		q->Scell_log = ctl->Scell_log;
443		q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
444		q->Scell_max = (255<<q->Scell_log);
445		q->qth_min = ctl->qth_min<<ctl->Wlog;
446		q->qth_max = ctl->qth_max<<ctl->Wlog;
447
448		if (table->grio)
449			q->prio=table->tab[ctl->DP]->prio;
450		else
451			q->prio=8;
452
453		q->qcount = -1;
454		PSCHED_SET_PASTPERFECT(q->qidlestart);
455		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
456	}
457	return 0;
458
459}
460
461static int gred_init(struct Qdisc *sch, struct rtattr *opt)
462{
463	struct gred_sched *table = (struct gred_sched *)sch->data;
464	struct tc_gred_sopt *sopt;
465	struct rtattr *tb[TCA_GRED_STAB];
466	struct rtattr *tb2[TCA_GRED_STAB];
467
468	if (opt == NULL ||
469		rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
470			return -EINVAL;
471
472	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 &&
473	    tb[TCA_GRED_DPS-1] != 0) {
474		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),RTA_PAYLOAD(opt));
475
476		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
477		table->DPs=sopt->DPs;
478		table->def=sopt->def_DP;
479		table->grio=sopt->grio;
480		table->initd=0;
481		MOD_INC_USE_COUNT;
482		return 0;
483	}
484
485	DPRINTK("\n GRED_INIT error!\n");
486	return -EINVAL;
487}
488
489static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
490{
491	unsigned long qave;
492	struct rtattr *rta;
493	struct tc_gred_qopt *opt = NULL ;
494	struct tc_gred_qopt *dst;
495	struct gred_sched *table = (struct gred_sched *)sch->data;
496	struct gred_sched_data *q;
497	int i;
498	unsigned char	 *b = skb->tail;
499
500	rta = (struct rtattr*)b;
501	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
502
503	opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
504
505	if (opt  == NULL) {
506		DPRINTK("gred_dump:failed to malloc for %Zd\n",
507		    sizeof(struct tc_gred_qopt)*MAX_DPs);
508		goto rtattr_failure;
509	}
510
511	memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
512
513	if (!table->initd) {
514		DPRINTK("NO GRED Queues setup!\n");
515	}
516
517	for (i=0;i<MAX_DPs;i++) {
518		dst= &opt[i];
519		q= table->tab[i];
520
521		if (!q) {
522			/* hack -- fix at some point with proper message
523			   This is how we indicate to tc that there is no VQ
524			   at this DP */
525
526			dst->DP=MAX_DPs+i;
527			continue;
528		}
529
530		dst->limit=q->limit;
531		dst->qth_min=q->qth_min>>q->Wlog;
532		dst->qth_max=q->qth_max>>q->Wlog;
533		dst->DP=q->DP;
534		dst->backlog=q->backlog;
535		if (q->qave) {
536			if (table->eqp && table->grio) {
537				q->qidlestart=table->tab[table->def]->qidlestart;
538				q->qave=table->tab[table->def]->qave;
539			}
540			if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
541				long idle;
542				psched_time_t now;
543				PSCHED_GET_TIME(now);
544				idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0);
545				qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
546				dst->qave = qave >> q->Wlog;
547
548			} else {
549				dst->qave = q->qave >> q->Wlog;
550			}
551		} else {
552			dst->qave = 0;
553		}
554
555
556		dst->Wlog = q->Wlog;
557		dst->Plog = q->Plog;
558		dst->Scell_log = q->Scell_log;
559		dst->other = q->other;
560		dst->forced = q->forced;
561		dst->early = q->early;
562		dst->pdrop = q->pdrop;
563		dst->prio = q->prio;
564		dst->packets=q->packetsin;
565		dst->bytesin=q->bytesin;
566	}
567
568	RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
569	rta->rta_len = skb->tail - b;
570
571	kfree(opt);
572	return skb->len;
573
574rtattr_failure:
575	if (opt)
576		kfree(opt);
577	DPRINTK("gred_dump: FAILURE!!!!\n");
578
579/* also free the opt struct here */
580	skb_trim(skb, b - skb->data);
581	return -1;
582}
583
584static void gred_destroy(struct Qdisc *sch)
585{
586	struct gred_sched *table = (struct gred_sched *)sch->data;
587	int i;
588
589	for (i = 0;i < table->DPs; i++) {
590		if (table->tab[i])
591			kfree(table->tab[i]);
592	}
593	MOD_DEC_USE_COUNT;
594}
595
596struct Qdisc_ops gred_qdisc_ops =
597{
598	NULL,
599	NULL,
600	"gred",
601	sizeof(struct gred_sched),
602	gred_enqueue,
603	gred_dequeue,
604	gred_requeue,
605	gred_drop,
606	gred_init,
607	gred_reset,
608	gred_destroy,
609	gred_change, /* change */
610	gred_dump,
611};
612
613
614#ifdef MODULE
615int init_module(void)
616{
617	return register_qdisc(&gred_qdisc_ops);
618}
619
620void cleanup_module(void)
621{
622	unregister_qdisc(&gred_qdisc_ops);
623}
624#endif
625MODULE_LICENSE("GPL");
626