1/* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * 19 * 20 * For all the glorious comments look at Alexey's sch_red.c 21 */ 22 23#include <linux/config.h> 24#include <linux/module.h> 25#include <asm/uaccess.h> 26#include <asm/system.h> 27#include <asm/bitops.h> 28#include <linux/types.h> 29#include <linux/kernel.h> 30#include <linux/sched.h> 31#include <linux/string.h> 32#include <linux/mm.h> 33#include <linux/socket.h> 34#include <linux/sockios.h> 35#include <linux/in.h> 36#include <linux/errno.h> 37#include <linux/interrupt.h> 38#include <linux/if_ether.h> 39#include <linux/inet.h> 40#include <linux/netdevice.h> 41#include <linux/etherdevice.h> 42#include <linux/notifier.h> 43#include <net/ip.h> 44#include <net/route.h> 45#include <linux/skbuff.h> 46#include <net/sock.h> 47#include <net/pkt_sched.h> 48 49#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) 50 51#define D2PRINTK(format,args...) 52 53struct gred_sched_data; 54struct gred_sched; 55 56struct gred_sched_data 57{ 58/* Parameters */ 59 u32 limit; /* HARD maximal queue length */ 60 u32 qth_min; /* Min average length threshold: A scaled */ 61 u32 qth_max; /* Max average length threshold: A scaled */ 62 u32 DP; /* the drop pramaters */ 63 char Wlog; /* log(W) */ 64 char Plog; /* random number bits */ 65 u32 Scell_max; 66 u32 Rmask; 67 u32 bytesin; /* bytes seen on virtualQ so far*/ 68 u32 packetsin; /* packets seen on virtualQ so far*/ 69 u32 backlog; /* bytes on the virtualQ */ 70 u32 forced; /* packets dropped for exceeding limits */ 71 u32 early; /* packets dropped as a warning */ 72 u32 other; /* packets dropped by invoking drop() */ 73 u32 pdrop; /* packets dropped because we exceeded physical queue limits */ 74 char Scell_log; 75 u8 Stab[256]; 76 u8 prio; /* the prio of this vq */ 77 78/* Variables */ 79 unsigned long qave; /* Average queue length: A scaled */ 80 int qcount; /* Packets since last random number generation */ 81 u32 qR; /* Cached random number */ 82 83 psched_time_t qidlestart; /* Start of idle period */ 84}; 85 86struct gred_sched 87{ 88 struct gred_sched_data *tab[MAX_DPs]; 89 u32 DPs; 90 u32 def; 91 u8 initd; 92 u8 grio; 93 u8 eqp; 94}; 95 96static int 97gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 98{ 99 psched_time_t now; 100 struct gred_sched_data *q=NULL; 101 struct gred_sched *t= (struct gred_sched *)sch->data; 102 unsigned long qave=0; 103 int i=0; 104 105 if (!t->initd && skb_queue_len(&sch->q) <= sch->dev->tx_queue_len) { 106 D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); 107 goto do_enqueue; 108 } 109 110 111 if ( ((skb->tc_index&0xf) > t->DPs) || !(q=t->tab[skb->tc_index&0xf])) { 112 printk("GRED: setting to default (%d)\n ",t->def); 113 if (!(q=t->tab[t->def])) { 114 DPRINTK("GRED: setting to default FAILED! dropping!! " 115 "(%d)\n ", t->def); 116 goto drop; 117 } 118 /* fix tc_index? --could be controvesial but needed for 119 requeueing */ 120 skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; 121 } 122 123 D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " 124 "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, 125 sch->stats.backlog); 126 /* sum up all the qaves of prios <= to ours to get the new qave*/ 127 if (!t->eqp && t->grio) { 128 for (i=0;i<t->DPs;i++) { 129 if ((!t->tab[i]) || (i==q->DP)) 130 continue; 131 132 if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart))) 133 qave +=t->tab[i]->qave; 134 } 135 136 } 137 138 q->packetsin++; 139 q->bytesin+=skb->len; 140 141 if (t->eqp && t->grio) { 142 qave=0; 143 q->qave=t->tab[t->def]->qave; 144 q->qidlestart=t->tab[t->def]->qidlestart; 145 } 146 147 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 148 long us_idle; 149 PSCHED_GET_TIME(now); 150 us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0); 151 PSCHED_SET_PASTPERFECT(q->qidlestart); 152 153 q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; 154 } else { 155 if (t->eqp) { 156 q->qave += sch->stats.backlog - (q->qave >> q->Wlog); 157 } else { 158 q->qave += q->backlog - (q->qave >> q->Wlog); 159 } 160 161 } 162 163 164 if (t->eqp && t->grio) 165 t->tab[t->def]->qave=q->qave; 166 167 if ((q->qave+qave) < q->qth_min) { 168 q->qcount = -1; 169enqueue: 170 if (q->backlog <= q->limit) { 171 q->backlog += skb->len; 172do_enqueue: 173 __skb_queue_tail(&sch->q, skb); 174 sch->stats.backlog += skb->len; 175 sch->stats.bytes += skb->len; 176 sch->stats.packets++; 177 return 0; 178 } else { 179 q->pdrop++; 180 } 181 182drop: 183 kfree_skb(skb); 184 sch->stats.drops++; 185 return NET_XMIT_DROP; 186 } 187 if ((q->qave+qave) >= q->qth_max) { 188 q->qcount = -1; 189 sch->stats.overlimits++; 190 q->forced++; 191 goto drop; 192 } 193 if (++q->qcount) { 194 if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR) 195 goto enqueue; 196 q->qcount = 0; 197 q->qR = net_random()&q->Rmask; 198 sch->stats.overlimits++; 199 q->early++; 200 goto drop; 201 } 202 q->qR = net_random()&q->Rmask; 203 goto enqueue; 204} 205 206static int 207gred_requeue(struct sk_buff *skb, struct Qdisc* sch) 208{ 209 struct gred_sched_data *q; 210 struct gred_sched *t= (struct gred_sched *)sch->data; 211 q= t->tab[(skb->tc_index&0xf)]; 212/* error checking here -- probably unnecessary */ 213 PSCHED_SET_PASTPERFECT(q->qidlestart); 214 215 __skb_queue_head(&sch->q, skb); 216 sch->stats.backlog += skb->len; 217 q->backlog += skb->len; 218 return 0; 219} 220 221static struct sk_buff * 222gred_dequeue(struct Qdisc* sch) 223{ 224 struct sk_buff *skb; 225 struct gred_sched_data *q; 226 struct gred_sched *t= (struct gred_sched *)sch->data; 227 228 skb = __skb_dequeue(&sch->q); 229 if (skb) { 230 sch->stats.backlog -= skb->len; 231 q= t->tab[(skb->tc_index&0xf)]; 232 if (q) { 233 q->backlog -= skb->len; 234 if (!q->backlog && !t->eqp) 235 PSCHED_GET_TIME(q->qidlestart); 236 } else { 237 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 238 } 239 return skb; 240 } 241 242 if (t->eqp) { 243 q= t->tab[t->def]; 244 if (!q) 245 D2PRINTK("no default VQ set: Results will be " 246 "screwed up\n"); 247 else 248 PSCHED_GET_TIME(q->qidlestart); 249 } 250 251 return NULL; 252} 253 254static int 255gred_drop(struct Qdisc* sch) 256{ 257 struct sk_buff *skb; 258 259 struct gred_sched_data *q; 260 struct gred_sched *t= (struct gred_sched *)sch->data; 261 262 skb = __skb_dequeue_tail(&sch->q); 263 if (skb) { 264 sch->stats.backlog -= skb->len; 265 sch->stats.drops++; 266 q= t->tab[(skb->tc_index&0xf)]; 267 if (q) { 268 q->backlog -= skb->len; 269 q->other++; 270 if (!q->backlog && !t->eqp) 271 PSCHED_GET_TIME(q->qidlestart); 272 } else { 273 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 274 } 275 276 kfree_skb(skb); 277 return 1; 278 } 279 280 q=t->tab[t->def]; 281 if (!q) { 282 D2PRINTK("no default VQ set: Results might be screwed up\n"); 283 return 0; 284 } 285 286 PSCHED_GET_TIME(q->qidlestart); 287 return 0; 288 289} 290 291static void gred_reset(struct Qdisc* sch) 292{ 293 int i; 294 struct gred_sched_data *q; 295 struct gred_sched *t= (struct gred_sched *)sch->data; 296 297 __skb_queue_purge(&sch->q); 298 299 sch->stats.backlog = 0; 300 301 for (i=0;i<t->DPs;i++) { 302 q= t->tab[i]; 303 if (!q) 304 continue; 305 PSCHED_SET_PASTPERFECT(q->qidlestart); 306 q->qave = 0; 307 q->qcount = -1; 308 q->backlog = 0; 309 q->other=0; 310 q->forced=0; 311 q->pdrop=0; 312 q->early=0; 313 } 314} 315 316static int gred_change(struct Qdisc *sch, struct rtattr *opt) 317{ 318 struct gred_sched *table = (struct gred_sched *)sch->data; 319 struct gred_sched_data *q; 320 struct tc_gred_qopt *ctl; 321 struct tc_gred_sopt *sopt; 322 struct rtattr *tb[TCA_GRED_STAB]; 323 struct rtattr *tb2[TCA_GRED_STAB]; 324 int i; 325 326 if (opt == NULL || 327 rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ) 328 return -EINVAL; 329 330 if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 && 331 tb[TCA_GRED_DPS-1] != 0) { 332 rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt), 333 RTA_PAYLOAD(opt)); 334 335 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); 336 table->DPs=sopt->DPs; 337 table->def=sopt->def_DP; 338 table->grio=sopt->grio; 339 table->initd=0; 340 /* probably need to clear all the table DP entries as well */ 341 MOD_INC_USE_COUNT; 342 return 0; 343 } 344 345 346 if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 || 347 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 348 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) 349 return -EINVAL; 350 351 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 352 if (ctl->DP > MAX_DPs-1 ) { 353 /* misbehaving is punished! Put in the default drop probability */ 354 DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP " 355 "set to default at %d\n",ctl->DP,table->def); 356 ctl->DP=table->def; 357 } 358 359 if (table->tab[ctl->DP] == NULL) { 360 table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data), 361 GFP_KERNEL); 362 if (NULL == table->tab[ctl->DP]) 363 return -ENOMEM; 364 memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data))); 365 } 366 q= table->tab[ctl->DP]; 367 368 if (table->grio) { 369 if (ctl->prio <=0) { 370 if (table->def && table->tab[table->def]) { 371 DPRINTK("\nGRED: DP %u does not have a prio" 372 "setting default to %d\n",ctl->DP, 373 table->tab[table->def]->prio); 374 q->prio=table->tab[table->def]->prio; 375 } else { 376 DPRINTK("\nGRED: DP %u does not have a prio" 377 " setting default to 8\n",ctl->DP); 378 q->prio=8; 379 } 380 } else { 381 q->prio=ctl->prio; 382 } 383 } else { 384 q->prio=8; 385 } 386 387 388 q->DP=ctl->DP; 389 q->Wlog = ctl->Wlog; 390 q->Plog = ctl->Plog; 391 q->limit = ctl->limit; 392 q->Scell_log = ctl->Scell_log; 393 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; 394 q->Scell_max = (255<<q->Scell_log); 395 q->qth_min = ctl->qth_min<<ctl->Wlog; 396 q->qth_max = ctl->qth_max<<ctl->Wlog; 397 q->qave=0; 398 q->backlog=0; 399 q->qcount = -1; 400 q->other=0; 401 q->forced=0; 402 q->pdrop=0; 403 q->early=0; 404 405 PSCHED_SET_PASTPERFECT(q->qidlestart); 406 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); 407 408 if ( table->initd && table->grio) { 409 /* this looks ugly but its not in the fast path */ 410 for (i=0;i<table->DPs;i++) { 411 if ((!table->tab[i]) || (i==q->DP) ) 412 continue; 413 if (table->tab[i]->prio == q->prio ){ 414 /* WRED mode detected */ 415 table->eqp=1; 416 break; 417 } 418 } 419 } 420 421 if (!table->initd) { 422 table->initd=1; 423 /* 424 the first entry also goes into the default until 425 over-written 426 */ 427 428 if (table->tab[table->def] == NULL) { 429 table->tab[table->def]= 430 kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL); 431 if (NULL == table->tab[table->def]) 432 return -ENOMEM; 433 434 memset(table->tab[table->def], 0, 435 (sizeof(struct gred_sched_data))); 436 } 437 q= table->tab[table->def]; 438 q->DP=table->def; 439 q->Wlog = ctl->Wlog; 440 q->Plog = ctl->Plog; 441 q->limit = ctl->limit; 442 q->Scell_log = ctl->Scell_log; 443 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; 444 q->Scell_max = (255<<q->Scell_log); 445 q->qth_min = ctl->qth_min<<ctl->Wlog; 446 q->qth_max = ctl->qth_max<<ctl->Wlog; 447 448 if (table->grio) 449 q->prio=table->tab[ctl->DP]->prio; 450 else 451 q->prio=8; 452 453 q->qcount = -1; 454 PSCHED_SET_PASTPERFECT(q->qidlestart); 455 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); 456 } 457 return 0; 458 459} 460 461static int gred_init(struct Qdisc *sch, struct rtattr *opt) 462{ 463 struct gred_sched *table = (struct gred_sched *)sch->data; 464 struct tc_gred_sopt *sopt; 465 struct rtattr *tb[TCA_GRED_STAB]; 466 struct rtattr *tb2[TCA_GRED_STAB]; 467 468 if (opt == NULL || 469 rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ) 470 return -EINVAL; 471 472 if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 && 473 tb[TCA_GRED_DPS-1] != 0) { 474 rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),RTA_PAYLOAD(opt)); 475 476 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); 477 table->DPs=sopt->DPs; 478 table->def=sopt->def_DP; 479 table->grio=sopt->grio; 480 table->initd=0; 481 MOD_INC_USE_COUNT; 482 return 0; 483 } 484 485 DPRINTK("\n GRED_INIT error!\n"); 486 return -EINVAL; 487} 488 489static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 490{ 491 unsigned long qave; 492 struct rtattr *rta; 493 struct tc_gred_qopt *opt = NULL ; 494 struct tc_gred_qopt *dst; 495 struct gred_sched *table = (struct gred_sched *)sch->data; 496 struct gred_sched_data *q; 497 int i; 498 unsigned char *b = skb->tail; 499 500 rta = (struct rtattr*)b; 501 RTA_PUT(skb, TCA_OPTIONS, 0, NULL); 502 503 opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL); 504 505 if (opt == NULL) { 506 DPRINTK("gred_dump:failed to malloc for %Zd\n", 507 sizeof(struct tc_gred_qopt)*MAX_DPs); 508 goto rtattr_failure; 509 } 510 511 memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs); 512 513 if (!table->initd) { 514 DPRINTK("NO GRED Queues setup!\n"); 515 } 516 517 for (i=0;i<MAX_DPs;i++) { 518 dst= &opt[i]; 519 q= table->tab[i]; 520 521 if (!q) { 522 /* hack -- fix at some point with proper message 523 This is how we indicate to tc that there is no VQ 524 at this DP */ 525 526 dst->DP=MAX_DPs+i; 527 continue; 528 } 529 530 dst->limit=q->limit; 531 dst->qth_min=q->qth_min>>q->Wlog; 532 dst->qth_max=q->qth_max>>q->Wlog; 533 dst->DP=q->DP; 534 dst->backlog=q->backlog; 535 if (q->qave) { 536 if (table->eqp && table->grio) { 537 q->qidlestart=table->tab[table->def]->qidlestart; 538 q->qave=table->tab[table->def]->qave; 539 } 540 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 541 long idle; 542 psched_time_t now; 543 PSCHED_GET_TIME(now); 544 idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max, 0); 545 qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF]; 546 dst->qave = qave >> q->Wlog; 547 548 } else { 549 dst->qave = q->qave >> q->Wlog; 550 } 551 } else { 552 dst->qave = 0; 553 } 554 555 556 dst->Wlog = q->Wlog; 557 dst->Plog = q->Plog; 558 dst->Scell_log = q->Scell_log; 559 dst->other = q->other; 560 dst->forced = q->forced; 561 dst->early = q->early; 562 dst->pdrop = q->pdrop; 563 dst->prio = q->prio; 564 dst->packets=q->packetsin; 565 dst->bytesin=q->bytesin; 566 } 567 568 RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt); 569 rta->rta_len = skb->tail - b; 570 571 kfree(opt); 572 return skb->len; 573 574rtattr_failure: 575 if (opt) 576 kfree(opt); 577 DPRINTK("gred_dump: FAILURE!!!!\n"); 578 579/* also free the opt struct here */ 580 skb_trim(skb, b - skb->data); 581 return -1; 582} 583 584static void gred_destroy(struct Qdisc *sch) 585{ 586 struct gred_sched *table = (struct gred_sched *)sch->data; 587 int i; 588 589 for (i = 0;i < table->DPs; i++) { 590 if (table->tab[i]) 591 kfree(table->tab[i]); 592 } 593 MOD_DEC_USE_COUNT; 594} 595 596struct Qdisc_ops gred_qdisc_ops = 597{ 598 NULL, 599 NULL, 600 "gred", 601 sizeof(struct gred_sched), 602 gred_enqueue, 603 gred_dequeue, 604 gred_requeue, 605 gred_drop, 606 gred_init, 607 gred_reset, 608 gred_destroy, 609 gred_change, /* change */ 610 gred_dump, 611}; 612 613 614#ifdef MODULE 615int init_module(void) 616{ 617 return register_qdisc(&gred_qdisc_ops); 618} 619 620void cleanup_module(void) 621{ 622 unregister_qdisc(&gred_qdisc_ops); 623} 624#endif 625MODULE_LICENSE("GPL"); 626