altq_rmclass.c revision 130365
1/* $KAME: altq_rmclass.c,v 1.18 2003/11/06 06:32:53 kjc Exp $ */ 2 3/* 4 * Copyright (c) 1991-1997 Regents of the University of California. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the Network Research 18 * Group at Lawrence Berkeley Laboratory. 19 * 4. Neither the name of the University nor of the Laboratory may be used 20 * to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * LBL code modified by speer@eng.sun.com, May 1977. 36 * For questions and/or comments, please send mail to cbq@ee.lbl.gov 37 */ 38 39#ident "@(#)rm_class.c 1.48 97/12/05 SMI" 40 41#if defined(__FreeBSD__) || defined(__NetBSD__) 42#include "opt_altq.h" 43#if (__FreeBSD__ != 2) 44#include "opt_inet.h" 45#ifdef __FreeBSD__ 46#include "opt_inet6.h" 47#endif 48#endif 49#endif /* __FreeBSD__ || __NetBSD__ */ 50#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ 51 52#include <sys/param.h> 53#include <sys/malloc.h> 54#include <sys/mbuf.h> 55#include <sys/socket.h> 56#include <sys/systm.h> 57#include <sys/errno.h> 58#include <sys/time.h> 59#ifdef ALTQ3_COMPAT 60#include <sys/kernel.h> 61#endif 62 63#include <net/if.h> 64#ifdef ALTQ3_COMPAT 65#include <netinet/in.h> 66#include <netinet/in_systm.h> 67#include <netinet/ip.h> 68#endif 69 70#include <altq/altq.h> 71#include <altq/altq_rmclass.h> 72#include <altq/altq_rmclass_debug.h> 73#include <altq/altq_red.h> 74#include <altq/altq_rio.h> 75 76/* 77 * Local Macros 78 */ 79 80#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } 81 82/* 83 * Local routines. 84 */ 85 86static int rmc_satisfied(struct rm_class *, struct timeval *); 87static void rmc_wrr_set_weights(struct rm_ifdat *); 88static void rmc_depth_compute(struct rm_class *); 89static void rmc_depth_recompute(rm_class_t *); 90 91static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); 92static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); 93 94static int _rmc_addq(rm_class_t *, mbuf_t *); 95static void _rmc_dropq(rm_class_t *); 96static mbuf_t *_rmc_getq(rm_class_t *); 97static mbuf_t *_rmc_pollq(rm_class_t *); 98 99static int rmc_under_limit(struct rm_class *, struct timeval *); 100static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); 101static void rmc_drop_action(struct rm_class *); 102static void rmc_restart(struct rm_class *); 103static void rmc_root_overlimit(struct rm_class *, struct rm_class *); 104 105#define BORROW_OFFTIME 106/* 107 * BORROW_OFFTIME (experimental): 108 * borrow the offtime of the class borrowing from. 109 * the reason is that when its own offtime is set, the class is unable 110 * to borrow much, especially when cutoff is taking effect. 111 * but when the borrowed class is overloaded (advidle is close to minidle), 112 * use the borrowing class's offtime to avoid overload. 113 */ 114#define ADJUST_CUTOFF 115/* 116 * ADJUST_CUTOFF (experimental): 117 * if no underlimit class is found due to cutoff, increase cutoff and 118 * retry the scheduling loop. 119 * also, don't invoke delay_actions while cutoff is taking effect, 120 * since a sleeping class won't have a chance to be scheduled in the 121 * next loop. 122 * 123 * now heuristics for setting the top-level variable (cutoff_) becomes: 124 * 1. if a packet arrives for a not-overlimit class, set cutoff 125 * to the depth of the class. 126 * 2. if cutoff is i, and a packet arrives for an overlimit class 127 * with an underlimit ancestor at a lower level than i (say j), 128 * then set cutoff to j. 129 * 3. at scheduling a packet, if there is no underlimit class 130 * due to the current cutoff level, increase cutoff by 1 and 131 * then try to schedule again. 132 */ 133 134/* 135 * rm_class_t * 136 * rmc_newclass(...) - Create a new resource management class at priority 137 * 'pri' on the interface given by 'ifd'. 138 * 139 * nsecPerByte is the data rate of the interface in nanoseconds/byte. 140 * E.g., 800 for a 10Mb/s ethernet. If the class gets less 141 * than 100% of the bandwidth, this number should be the 142 * 'effective' rate for the class. Let f be the 143 * bandwidth fraction allocated to this class, and let 144 * nsPerByte be the data rate of the output link in 145 * nanoseconds/byte. Then nsecPerByte is set to 146 * nsPerByte / f. E.g., 1600 (= 800 / .5) 147 * for a class that gets 50% of an ethernet's bandwidth. 148 * 149 * action the routine to call when the class is over limit. 150 * 151 * maxq max allowable queue size for class (in packets). 152 * 153 * parent parent class pointer. 154 * 155 * borrow class to borrow from (should be either 'parent' or null). 156 * 157 * maxidle max value allowed for class 'idle' time estimate (this 158 * parameter determines how large an initial burst of packets 159 * can be before overlimit action is invoked. 160 * 161 * offtime how long 'delay' action will delay when class goes over 162 * limit (this parameter determines the steady-state burst 163 * size when a class is running over its limit). 164 * 165 * Maxidle and offtime have to be computed from the following: If the 166 * average packet size is s, the bandwidth fraction allocated to this 167 * class is f, we want to allow b packet bursts, and the gain of the 168 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: 169 * 170 * ptime = s * nsPerByte * (1 - f) / f 171 * maxidle = ptime * (1 - g^b) / g^b 172 * minidle = -ptime * (1 / (f - 1)) 173 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) 174 * 175 * Operationally, it's convenient to specify maxidle & offtime in units 176 * independent of the link bandwidth so the maxidle & offtime passed to 177 * this routine are the above values multiplied by 8*f/(1000*nsPerByte). 178 * (The constant factor is a scale factor needed to make the parameters 179 * integers. This scaling also means that the 'unscaled' values of 180 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, 181 * not nanoseconds.) Also note that the 'idle' filter computation keeps 182 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of 183 * maxidle also must be scaled upward by this value. Thus, the passed 184 * values for maxidle and offtime can be computed as follows: 185 * 186 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) 187 * offtime = offtime * 8 / (1000 * nsecPerByte) 188 * 189 * When USE_HRTIME is employed, then maxidle and offtime become: 190 * maxidle = maxilde * (8.0 / nsecPerByte); 191 * offtime = offtime * (8.0 / nsecPerByte); 192 */ 193struct rm_class * 194rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, 195 void (*action)(rm_class_t *, rm_class_t *), int maxq, 196 struct rm_class *parent, struct rm_class *borrow, u_int maxidle, 197 int minidle, u_int offtime, int pktsize, int flags) 198{ 199 struct rm_class *cl; 200 struct rm_class *peer; 201 int s; 202 203 if (pri >= RM_MAXPRIO) 204 return (NULL); 205#ifndef ALTQ_RED 206 if (flags & RMCF_RED) { 207#ifdef ALTQ_DEBUG 208 printf("rmc_newclass: RED not configured for CBQ!\n"); 209#endif 210 return (NULL); 211 } 212#endif 213#ifndef ALTQ_RIO 214 if (flags & RMCF_RIO) { 215#ifdef ALTQ_DEBUG 216 printf("rmc_newclass: RIO not configured for CBQ!\n"); 217#endif 218 return (NULL); 219 } 220#endif 221 222 MALLOC(cl, struct rm_class *, sizeof(struct rm_class), 223 M_DEVBUF, M_WAITOK); 224 if (cl == NULL) 225 return (NULL); 226 bzero(cl, sizeof(struct rm_class)); 227 CALLOUT_INIT(&cl->callout_); 228 MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t), 229 M_DEVBUF, M_WAITOK); 230 if (cl->q_ == NULL) { 231 FREE(cl, M_DEVBUF); 232 return (NULL); 233 } 234 bzero(cl->q_, sizeof(class_queue_t)); 235 236 /* 237 * Class initialization. 238 */ 239 cl->children_ = NULL; 240 cl->parent_ = parent; 241 cl->borrow_ = borrow; 242 cl->leaf_ = 1; 243 cl->ifdat_ = ifd; 244 cl->pri_ = pri; 245 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 246 cl->depth_ = 0; 247 cl->qthresh_ = 0; 248 cl->ns_per_byte_ = nsecPerByte; 249 250 qlimit(cl->q_) = maxq; 251 qtype(cl->q_) = Q_DROPHEAD; 252 qlen(cl->q_) = 0; 253 cl->flags_ = flags; 254 255#if 1 /* minidle is also scaled in ALTQ */ 256 cl->minidle_ = (minidle * (int)nsecPerByte) / 8; 257 if (cl->minidle_ > 0) 258 cl->minidle_ = 0; 259#else 260 cl->minidle_ = minidle; 261#endif 262 cl->maxidle_ = (maxidle * nsecPerByte) / 8; 263 if (cl->maxidle_ == 0) 264 cl->maxidle_ = 1; 265#if 1 /* offtime is also scaled in ALTQ */ 266 cl->avgidle_ = cl->maxidle_; 267 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 268 if (cl->offtime_ == 0) 269 cl->offtime_ = 1; 270#else 271 cl->avgidle_ = 0; 272 cl->offtime_ = (offtime * nsecPerByte) / 8; 273#endif 274 cl->overlimit = action; 275 276#ifdef ALTQ_RED 277 if (flags & (RMCF_RED|RMCF_RIO)) { 278 int red_flags, red_pkttime; 279 280 red_flags = 0; 281 if (flags & RMCF_ECN) 282 red_flags |= REDF_ECN; 283 if (flags & RMCF_FLOWVALVE) 284 red_flags |= REDF_FLOWVALVE; 285#ifdef ALTQ_RIO 286 if (flags & RMCF_CLEARDSCP) 287 red_flags |= RIOF_CLEARDSCP; 288#endif 289 red_pkttime = nsecPerByte * pktsize / 1000; 290 291 if (flags & RMCF_RED) { 292 cl->red_ = red_alloc(0, 0, 293 qlimit(cl->q_) * 10/100, 294 qlimit(cl->q_) * 30/100, 295 red_flags, red_pkttime); 296 if (cl->red_ != NULL) 297 qtype(cl->q_) = Q_RED; 298 } 299#ifdef ALTQ_RIO 300 else { 301 cl->red_ = (red_t *)rio_alloc(0, NULL, 302 red_flags, red_pkttime); 303 if (cl->red_ != NULL) 304 qtype(cl->q_) = Q_RIO; 305 } 306#endif 307 } 308#endif /* ALTQ_RED */ 309 310 /* 311 * put the class into the class tree 312 */ 313#ifdef __NetBSD__ 314 s = splnet(); 315#else 316 s = splimp(); 317#endif 318 if ((peer = ifd->active_[pri]) != NULL) { 319 /* find the last class at this pri */ 320 cl->peer_ = peer; 321 while (peer->peer_ != ifd->active_[pri]) 322 peer = peer->peer_; 323 peer->peer_ = cl; 324 } else { 325 ifd->active_[pri] = cl; 326 cl->peer_ = cl; 327 } 328 329 if (cl->parent_) { 330 cl->next_ = parent->children_; 331 parent->children_ = cl; 332 parent->leaf_ = 0; 333 } 334 335 /* 336 * Compute the depth of this class and its ancestors in the class 337 * hierarchy. 338 */ 339 rmc_depth_compute(cl); 340 341 /* 342 * If CBQ's WRR is enabled, then initialize the class WRR state. 343 */ 344 if (ifd->wrr_) { 345 ifd->num_[pri]++; 346 ifd->alloc_[pri] += cl->allotment_; 347 rmc_wrr_set_weights(ifd); 348 } 349 splx(s); 350 return (cl); 351} 352 353int 354rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, 355 int minidle, u_int offtime, int pktsize) 356{ 357 struct rm_ifdat *ifd; 358 u_int old_allotment; 359 int s; 360 361 ifd = cl->ifdat_; 362 old_allotment = cl->allotment_; 363 364#ifdef __NetBSD__ 365 s = splnet(); 366#else 367 s = splimp(); 368#endif 369 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 370 cl->qthresh_ = 0; 371 cl->ns_per_byte_ = nsecPerByte; 372 373 qlimit(cl->q_) = maxq; 374 375#if 1 /* minidle is also scaled in ALTQ */ 376 cl->minidle_ = (minidle * nsecPerByte) / 8; 377 if (cl->minidle_ > 0) 378 cl->minidle_ = 0; 379#else 380 cl->minidle_ = minidle; 381#endif 382 cl->maxidle_ = (maxidle * nsecPerByte) / 8; 383 if (cl->maxidle_ == 0) 384 cl->maxidle_ = 1; 385#if 1 /* offtime is also scaled in ALTQ */ 386 cl->avgidle_ = cl->maxidle_; 387 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 388 if (cl->offtime_ == 0) 389 cl->offtime_ = 1; 390#else 391 cl->avgidle_ = 0; 392 cl->offtime_ = (offtime * nsecPerByte) / 8; 393#endif 394 395 /* 396 * If CBQ's WRR is enabled, then initialize the class WRR state. 397 */ 398 if (ifd->wrr_) { 399 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; 400 rmc_wrr_set_weights(ifd); 401 } 402 splx(s); 403 return (0); 404} 405 406/* 407 * static void 408 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes 409 * the appropriate run robin weights for the CBQ weighted round robin 410 * algorithm. 411 * 412 * Returns: NONE 413 */ 414 415static void 416rmc_wrr_set_weights(struct rm_ifdat *ifd) 417{ 418 int i; 419 struct rm_class *cl, *clh; 420 421 for (i = 0; i < RM_MAXPRIO; i++) { 422 /* 423 * This is inverted from that of the simulator to 424 * maintain precision. 425 */ 426 if (ifd->num_[i] == 0) 427 ifd->M_[i] = 0; 428 else 429 ifd->M_[i] = ifd->alloc_[i] / 430 (ifd->num_[i] * ifd->maxpkt_); 431 /* 432 * Compute the weighted allotment for each class. 433 * This takes the expensive div instruction out 434 * of the main loop for the wrr scheduling path. 435 * These only get recomputed when a class comes or 436 * goes. 437 */ 438 if (ifd->active_[i] != NULL) { 439 clh = cl = ifd->active_[i]; 440 do { 441 /* safe-guard for slow link or alloc_ == 0 */ 442 if (ifd->M_[i] == 0) 443 cl->w_allotment_ = 0; 444 else 445 cl->w_allotment_ = cl->allotment_ / 446 ifd->M_[i]; 447 cl = cl->peer_; 448 } while ((cl != NULL) && (cl != clh)); 449 } 450 } 451} 452 453int 454rmc_get_weight(struct rm_ifdat *ifd, int pri) 455{ 456 if ((pri >= 0) && (pri < RM_MAXPRIO)) 457 return (ifd->M_[pri]); 458 else 459 return (0); 460} 461 462/* 463 * static void 464 * rmc_depth_compute(struct rm_class *cl) - This function computes the 465 * appropriate depth of class 'cl' and its ancestors. 466 * 467 * Returns: NONE 468 */ 469 470static void 471rmc_depth_compute(struct rm_class *cl) 472{ 473 rm_class_t *t = cl, *p; 474 475 /* 476 * Recompute the depth for the branch of the tree. 477 */ 478 while (t != NULL) { 479 p = t->parent_; 480 if (p && (t->depth_ >= p->depth_)) { 481 p->depth_ = t->depth_ + 1; 482 t = p; 483 } else 484 t = NULL; 485 } 486} 487 488/* 489 * static void 490 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes 491 * the depth of the tree after a class has been deleted. 492 * 493 * Returns: NONE 494 */ 495 496static void 497rmc_depth_recompute(rm_class_t *cl) 498{ 499#if 1 /* ALTQ */ 500 rm_class_t *p, *t; 501 502 p = cl; 503 while (p != NULL) { 504 if ((t = p->children_) == NULL) { 505 p->depth_ = 0; 506 } else { 507 int cdepth = 0; 508 509 while (t != NULL) { 510 if (t->depth_ > cdepth) 511 cdepth = t->depth_; 512 t = t->next_; 513 } 514 515 if (p->depth_ == cdepth + 1) 516 /* no change to this parent */ 517 return; 518 519 p->depth_ = cdepth + 1; 520 } 521 522 p = p->parent_; 523 } 524#else 525 rm_class_t *t; 526 527 if (cl->depth_ >= 1) { 528 if (cl->children_ == NULL) { 529 cl->depth_ = 0; 530 } else if ((t = cl->children_) != NULL) { 531 while (t != NULL) { 532 if (t->children_ != NULL) 533 rmc_depth_recompute(t); 534 t = t->next_; 535 } 536 } else 537 rmc_depth_compute(cl); 538 } 539#endif 540} 541 542/* 543 * void 544 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This 545 * function deletes a class from the link-sharing structure and frees 546 * all resources associated with the class. 547 * 548 * Returns: NONE 549 */ 550 551void 552rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) 553{ 554 struct rm_class *p, *head, *previous; 555 int s; 556 557 ASSERT(cl->children_ == NULL); 558 559 if (cl->sleeping_) 560 CALLOUT_STOP(&cl->callout_); 561 562#ifdef __NetBSD__ 563 s = splnet(); 564#else 565 s = splimp(); 566#endif 567 /* 568 * Free packets in the packet queue. 569 * XXX - this may not be a desired behavior. Packets should be 570 * re-queued. 571 */ 572 rmc_dropall(cl); 573 574 /* 575 * If the class has a parent, then remove the class from the 576 * class from the parent's children chain. 577 */ 578 if (cl->parent_ != NULL) { 579 head = cl->parent_->children_; 580 p = previous = head; 581 if (head->next_ == NULL) { 582 ASSERT(head == cl); 583 cl->parent_->children_ = NULL; 584 cl->parent_->leaf_ = 1; 585 } else while (p != NULL) { 586 if (p == cl) { 587 if (cl == head) 588 cl->parent_->children_ = cl->next_; 589 else 590 previous->next_ = cl->next_; 591 cl->next_ = NULL; 592 p = NULL; 593 } else { 594 previous = p; 595 p = p->next_; 596 } 597 } 598 } 599 600 /* 601 * Delete class from class priority peer list. 602 */ 603 if ((p = ifd->active_[cl->pri_]) != NULL) { 604 /* 605 * If there is more than one member of this priority 606 * level, then look for class(cl) in the priority level. 607 */ 608 if (p != p->peer_) { 609 while (p->peer_ != cl) 610 p = p->peer_; 611 p->peer_ = cl->peer_; 612 613 if (ifd->active_[cl->pri_] == cl) 614 ifd->active_[cl->pri_] = cl->peer_; 615 } else { 616 ASSERT(p == cl); 617 ifd->active_[cl->pri_] = NULL; 618 } 619 } 620 621 /* 622 * Recompute the WRR weights. 623 */ 624 if (ifd->wrr_) { 625 ifd->alloc_[cl->pri_] -= cl->allotment_; 626 ifd->num_[cl->pri_]--; 627 rmc_wrr_set_weights(ifd); 628 } 629 630 /* 631 * Re-compute the depth of the tree. 632 */ 633#if 1 /* ALTQ */ 634 rmc_depth_recompute(cl->parent_); 635#else 636 rmc_depth_recompute(ifd->root_); 637#endif 638 639 splx(s); 640 641 /* 642 * Free the class structure. 643 */ 644 if (cl->red_ != NULL) { 645#ifdef ALTQ_RIO 646 if (q_is_rio(cl->q_)) 647 rio_destroy((rio_t *)cl->red_); 648#endif 649#ifdef ALTQ_RED 650 if (q_is_red(cl->q_)) 651 red_destroy(cl->red_); 652#endif 653 } 654 FREE(cl->q_, M_DEVBUF); 655 FREE(cl, M_DEVBUF); 656} 657 658 659/* 660 * void 661 * rmc_init(...) - Initialize the resource management data structures 662 * associated with the output portion of interface 'ifp'. 'ifd' is 663 * where the structures will be built (for backwards compatibility, the 664 * structures aren't kept in the ifnet struct). 'nsecPerByte' 665 * gives the link speed (inverse of bandwidth) in nanoseconds/byte. 666 * 'restart' is the driver-specific routine that the generic 'delay 667 * until under limit' action will call to restart output. `maxq' 668 * is the queue size of the 'link' & 'default' classes. 'maxqueued' 669 * is the maximum number of packets that the resource management 670 * code will allow to be queued 'downstream' (this is typically 1). 671 * 672 * Returns: NONE 673 */ 674 675void 676rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, 677 void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, 678 int minidle, u_int offtime, int flags) 679{ 680 int i, mtu; 681 682 /* 683 * Initialize the CBQ tracing/debug facility. 684 */ 685 CBQTRACEINIT(); 686 687 bzero((char *)ifd, sizeof (*ifd)); 688 mtu = ifq->altq_ifp->if_mtu; 689 ifd->ifq_ = ifq; 690 ifd->restart = restart; 691 ifd->maxqueued_ = maxqueued; 692 ifd->ns_per_byte_ = nsecPerByte; 693 ifd->maxpkt_ = mtu; 694 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; 695 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; 696#if 1 697 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; 698 if (mtu * nsecPerByte > 10 * 1000000) 699 ifd->maxiftime_ /= 4; 700#endif 701 702 reset_cutoff(ifd); 703 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); 704 705 /* 706 * Initialize the CBQ's WRR state. 707 */ 708 for (i = 0; i < RM_MAXPRIO; i++) { 709 ifd->alloc_[i] = 0; 710 ifd->M_[i] = 0; 711 ifd->num_[i] = 0; 712 ifd->na_[i] = 0; 713 ifd->active_[i] = NULL; 714 } 715 716 /* 717 * Initialize current packet state. 718 */ 719 ifd->qi_ = 0; 720 ifd->qo_ = 0; 721 for (i = 0; i < RM_MAXQUEUED; i++) { 722 ifd->class_[i] = NULL; 723 ifd->curlen_[i] = 0; 724 ifd->borrowed_[i] = NULL; 725 } 726 727 /* 728 * Create the root class of the link-sharing structure. 729 */ 730 if ((ifd->root_ = rmc_newclass(0, ifd, 731 nsecPerByte, 732 rmc_root_overlimit, maxq, 0, 0, 733 maxidle, minidle, offtime, 734 0, 0)) == NULL) { 735 printf("rmc_init: root class not allocated\n"); 736 return ; 737 } 738 ifd->root_->depth_ = 0; 739} 740 741/* 742 * void 743 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by 744 * mbuf 'm' to queue for resource class 'cl'. This routine is called 745 * by a driver's if_output routine. This routine must be called with 746 * output packet completion interrupts locked out (to avoid racing with 747 * rmc_dequeue_next). 748 * 749 * Returns: 0 on successful queueing 750 * -1 when packet drop occurs 751 */ 752int 753rmc_queue_packet(struct rm_class *cl, mbuf_t *m) 754{ 755 struct timeval now; 756 struct rm_ifdat *ifd = cl->ifdat_; 757 int cpri = cl->pri_; 758 int is_empty = qempty(cl->q_); 759 760 RM_GETTIME(now); 761 if (ifd->cutoff_ > 0) { 762 if (TV_LT(&cl->undertime_, &now)) { 763 if (ifd->cutoff_ > cl->depth_) 764 ifd->cutoff_ = cl->depth_; 765 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); 766 } 767#if 1 /* ALTQ */ 768 else { 769 /* 770 * the class is overlimit. if the class has 771 * underlimit ancestors, set cutoff to the lowest 772 * depth among them. 773 */ 774 struct rm_class *borrow = cl->borrow_; 775 776 while (borrow != NULL && 777 borrow->depth_ < ifd->cutoff_) { 778 if (TV_LT(&borrow->undertime_, &now)) { 779 ifd->cutoff_ = borrow->depth_; 780 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); 781 break; 782 } 783 borrow = borrow->borrow_; 784 } 785 } 786#else /* !ALTQ */ 787 else if ((ifd->cutoff_ > 1) && cl->borrow_) { 788 if (TV_LT(&cl->borrow_->undertime_, &now)) { 789 ifd->cutoff_ = cl->borrow_->depth_; 790 CBQTRACE(rmc_queue_packet, 'ffob', 791 cl->borrow_->depth_); 792 } 793 } 794#endif /* !ALTQ */ 795 } 796 797 if (_rmc_addq(cl, m) < 0) 798 /* failed */ 799 return (-1); 800 801 if (is_empty) { 802 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); 803 ifd->na_[cpri]++; 804 } 805 806 if (qlen(cl->q_) > qlimit(cl->q_)) { 807 /* note: qlimit can be set to 0 or 1 */ 808 rmc_drop_action(cl); 809 return (-1); 810 } 811 return (0); 812} 813 814/* 815 * void 816 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all 817 * classes to see if there are satified. 818 */ 819 820static void 821rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) 822{ 823 int i; 824 rm_class_t *p, *bp; 825 826 for (i = RM_MAXPRIO - 1; i >= 0; i--) { 827 if ((bp = ifd->active_[i]) != NULL) { 828 p = bp; 829 do { 830 if (!rmc_satisfied(p, now)) { 831 ifd->cutoff_ = p->depth_; 832 return; 833 } 834 p = p->peer_; 835 } while (p != bp); 836 } 837 } 838 839 reset_cutoff(ifd); 840} 841 842/* 843 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. 844 */ 845 846static int 847rmc_satisfied(struct rm_class *cl, struct timeval *now) 848{ 849 rm_class_t *p; 850 851 if (cl == NULL) 852 return (1); 853 if (TV_LT(now, &cl->undertime_)) 854 return (1); 855 if (cl->depth_ == 0) { 856 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) 857 return (0); 858 else 859 return (1); 860 } 861 if (cl->children_ != NULL) { 862 p = cl->children_; 863 while (p != NULL) { 864 if (!rmc_satisfied(p, now)) 865 return (0); 866 p = p->next_; 867 } 868 } 869 870 return (1); 871} 872 873/* 874 * Return 1 if class 'cl' is under limit or can borrow from a parent, 875 * 0 if overlimit. As a side-effect, this routine will invoke the 876 * class overlimit action if the class if overlimit. 877 */ 878 879static int 880rmc_under_limit(struct rm_class *cl, struct timeval *now) 881{ 882 rm_class_t *p = cl; 883 rm_class_t *top; 884 struct rm_ifdat *ifd = cl->ifdat_; 885 886 ifd->borrowed_[ifd->qi_] = NULL; 887 /* 888 * If cl is the root class, then always return that it is 889 * underlimit. Otherwise, check to see if the class is underlimit. 890 */ 891 if (cl->parent_ == NULL) 892 return (1); 893 894 if (cl->sleeping_) { 895 if (TV_LT(now, &cl->undertime_)) 896 return (0); 897 898 CALLOUT_STOP(&cl->callout_); 899 cl->sleeping_ = 0; 900 cl->undertime_.tv_sec = 0; 901 return (1); 902 } 903 904 top = NULL; 905 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { 906 if (((cl = cl->borrow_) == NULL) || 907 (cl->depth_ > ifd->cutoff_)) { 908#ifdef ADJUST_CUTOFF 909 if (cl != NULL) 910 /* cutoff is taking effect, just 911 return false without calling 912 the delay action. */ 913 return (0); 914#endif 915#ifdef BORROW_OFFTIME 916 /* 917 * check if the class can borrow offtime too. 918 * borrow offtime from the top of the borrow 919 * chain if the top class is not overloaded. 920 */ 921 if (cl != NULL) { 922 /* cutoff is taking effect, use this class as top. */ 923 top = cl; 924 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); 925 } 926 if (top != NULL && top->avgidle_ == top->minidle_) 927 top = NULL; 928 p->overtime_ = *now; 929 (p->overlimit)(p, top); 930#else 931 p->overtime_ = *now; 932 (p->overlimit)(p, NULL); 933#endif 934 return (0); 935 } 936 top = cl; 937 } 938 939 if (cl != p) 940 ifd->borrowed_[ifd->qi_] = cl; 941 return (1); 942} 943 944/* 945 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to 946 * Packet-by-packet round robin. 947 * 948 * The heart of the weighted round-robin scheduler, which decides which 949 * class next gets to send a packet. Highest priority first, then 950 * weighted round-robin within priorites. 951 * 952 * Each able-to-send class gets to send until its byte allocation is 953 * exhausted. Thus, the active pointer is only changed after a class has 954 * exhausted its allocation. 955 * 956 * If the scheduler finds no class that is underlimit or able to borrow, 957 * then the first class found that had a nonzero queue and is allowed to 958 * borrow gets to send. 959 */ 960 961static mbuf_t * 962_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) 963{ 964 struct rm_class *cl = NULL, *first = NULL; 965 u_int deficit; 966 int cpri; 967 mbuf_t *m; 968 struct timeval now; 969 970 RM_GETTIME(now); 971 972 /* 973 * if the driver polls the top of the queue and then removes 974 * the polled packet, we must return the same packet. 975 */ 976 if (op == ALTDQ_REMOVE && ifd->pollcache_) { 977 cl = ifd->pollcache_; 978 cpri = cl->pri_; 979 if (ifd->efficient_) { 980 /* check if this class is overlimit */ 981 if (cl->undertime_.tv_sec != 0 && 982 rmc_under_limit(cl, &now) == 0) 983 first = cl; 984 } 985 ifd->pollcache_ = NULL; 986 goto _wrr_out; 987 } 988 else { 989 /* mode == ALTDQ_POLL || pollcache == NULL */ 990 ifd->pollcache_ = NULL; 991 ifd->borrowed_[ifd->qi_] = NULL; 992 } 993#ifdef ADJUST_CUTOFF 994 _again: 995#endif 996 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 997 if (ifd->na_[cpri] == 0) 998 continue; 999 deficit = 0; 1000 /* 1001 * Loop through twice for a priority level, if some class 1002 * was unable to send a packet the first round because 1003 * of the weighted round-robin mechanism. 1004 * During the second loop at this level, deficit==2. 1005 * (This second loop is not needed if for every class, 1006 * "M[cl->pri_])" times "cl->allotment" is greater than 1007 * the byte size for the largest packet in the class.) 1008 */ 1009 _wrr_loop: 1010 cl = ifd->active_[cpri]; 1011 ASSERT(cl != NULL); 1012 do { 1013 if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) 1014 cl->bytes_alloc_ += cl->w_allotment_; 1015 if (!qempty(cl->q_)) { 1016 if ((cl->undertime_.tv_sec == 0) || 1017 rmc_under_limit(cl, &now)) { 1018 if (cl->bytes_alloc_ > 0 || deficit > 1) 1019 goto _wrr_out; 1020 1021 /* underlimit but no alloc */ 1022 deficit = 1; 1023#if 1 1024 ifd->borrowed_[ifd->qi_] = NULL; 1025#endif 1026 } 1027 else if (first == NULL && cl->borrow_ != NULL) 1028 first = cl; /* borrowing candidate */ 1029 } 1030 1031 cl->bytes_alloc_ = 0; 1032 cl = cl->peer_; 1033 } while (cl != ifd->active_[cpri]); 1034 1035 if (deficit == 1) { 1036 /* first loop found an underlimit class with deficit */ 1037 /* Loop on same priority level, with new deficit. */ 1038 deficit = 2; 1039 goto _wrr_loop; 1040 } 1041 } 1042 1043#ifdef ADJUST_CUTOFF 1044 /* 1045 * no underlimit class found. if cutoff is taking effect, 1046 * increase cutoff and try again. 1047 */ 1048 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1049 ifd->cutoff_++; 1050 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); 1051 goto _again; 1052 } 1053#endif /* ADJUST_CUTOFF */ 1054 /* 1055 * If LINK_EFFICIENCY is turned on, then the first overlimit 1056 * class we encounter will send a packet if all the classes 1057 * of the link-sharing structure are overlimit. 1058 */ 1059 reset_cutoff(ifd); 1060 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); 1061 1062 if (!ifd->efficient_ || first == NULL) 1063 return (NULL); 1064 1065 cl = first; 1066 cpri = cl->pri_; 1067#if 0 /* too time-consuming for nothing */ 1068 if (cl->sleeping_) 1069 CALLOUT_STOP(&cl->callout_); 1070 cl->sleeping_ = 0; 1071 cl->undertime_.tv_sec = 0; 1072#endif 1073 ifd->borrowed_[ifd->qi_] = cl->borrow_; 1074 ifd->cutoff_ = cl->borrow_->depth_; 1075 1076 /* 1077 * Deque the packet and do the book keeping... 1078 */ 1079 _wrr_out: 1080 if (op == ALTDQ_REMOVE) { 1081 m = _rmc_getq(cl); 1082 if (m == NULL) 1083 panic("_rmc_wrr_dequeue_next"); 1084 if (qempty(cl->q_)) 1085 ifd->na_[cpri]--; 1086 1087 /* 1088 * Update class statistics and link data. 1089 */ 1090 if (cl->bytes_alloc_ > 0) 1091 cl->bytes_alloc_ -= m_pktlen(m); 1092 1093 if ((cl->bytes_alloc_ <= 0) || first == cl) 1094 ifd->active_[cl->pri_] = cl->peer_; 1095 else 1096 ifd->active_[cl->pri_] = cl; 1097 1098 ifd->class_[ifd->qi_] = cl; 1099 ifd->curlen_[ifd->qi_] = m_pktlen(m); 1100 ifd->now_[ifd->qi_] = now; 1101 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1102 ifd->queued_++; 1103 } else { 1104 /* mode == ALTDQ_PPOLL */ 1105 m = _rmc_pollq(cl); 1106 ifd->pollcache_ = cl; 1107 } 1108 return (m); 1109} 1110 1111/* 1112 * Dequeue & return next packet from the highest priority class that 1113 * has a packet to send & has enough allocation to send it. This 1114 * routine is called by a driver whenever it needs a new packet to 1115 * output. 1116 */ 1117static mbuf_t * 1118_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) 1119{ 1120 mbuf_t *m; 1121 int cpri; 1122 struct rm_class *cl, *first = NULL; 1123 struct timeval now; 1124 1125 RM_GETTIME(now); 1126 1127 /* 1128 * if the driver polls the top of the queue and then removes 1129 * the polled packet, we must return the same packet. 1130 */ 1131 if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1132 cl = ifd->pollcache_; 1133 cpri = cl->pri_; 1134 ifd->pollcache_ = NULL; 1135 goto _prr_out; 1136 } else { 1137 /* mode == ALTDQ_POLL || pollcache == NULL */ 1138 ifd->pollcache_ = NULL; 1139 ifd->borrowed_[ifd->qi_] = NULL; 1140 } 1141#ifdef ADJUST_CUTOFF 1142 _again: 1143#endif 1144 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1145 if (ifd->na_[cpri] == 0) 1146 continue; 1147 cl = ifd->active_[cpri]; 1148 ASSERT(cl != NULL); 1149 do { 1150 if (!qempty(cl->q_)) { 1151 if ((cl->undertime_.tv_sec == 0) || 1152 rmc_under_limit(cl, &now)) 1153 goto _prr_out; 1154 if (first == NULL && cl->borrow_ != NULL) 1155 first = cl; 1156 } 1157 cl = cl->peer_; 1158 } while (cl != ifd->active_[cpri]); 1159 } 1160 1161#ifdef ADJUST_CUTOFF 1162 /* 1163 * no underlimit class found. if cutoff is taking effect, increase 1164 * cutoff and try again. 1165 */ 1166 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1167 ifd->cutoff_++; 1168 goto _again; 1169 } 1170#endif /* ADJUST_CUTOFF */ 1171 /* 1172 * If LINK_EFFICIENCY is turned on, then the first overlimit 1173 * class we encounter will send a packet if all the classes 1174 * of the link-sharing structure are overlimit. 1175 */ 1176 reset_cutoff(ifd); 1177 if (!ifd->efficient_ || first == NULL) 1178 return (NULL); 1179 1180 cl = first; 1181 cpri = cl->pri_; 1182#if 0 /* too time-consuming for nothing */ 1183 if (cl->sleeping_) 1184 CALLOUT_STOP(&cl->callout_); 1185 cl->sleeping_ = 0; 1186 cl->undertime_.tv_sec = 0; 1187#endif 1188 ifd->borrowed_[ifd->qi_] = cl->borrow_; 1189 ifd->cutoff_ = cl->borrow_->depth_; 1190 1191 /* 1192 * Deque the packet and do the book keeping... 1193 */ 1194 _prr_out: 1195 if (op == ALTDQ_REMOVE) { 1196 m = _rmc_getq(cl); 1197 if (m == NULL) 1198 panic("_rmc_prr_dequeue_next"); 1199 if (qempty(cl->q_)) 1200 ifd->na_[cpri]--; 1201 1202 ifd->active_[cpri] = cl->peer_; 1203 1204 ifd->class_[ifd->qi_] = cl; 1205 ifd->curlen_[ifd->qi_] = m_pktlen(m); 1206 ifd->now_[ifd->qi_] = now; 1207 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1208 ifd->queued_++; 1209 } else { 1210 /* mode == ALTDQ_POLL */ 1211 m = _rmc_pollq(cl); 1212 ifd->pollcache_ = cl; 1213 } 1214 return (m); 1215} 1216 1217/* 1218 * mbuf_t * 1219 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function 1220 * is invoked by the packet driver to get the next packet to be 1221 * dequeued and output on the link. If WRR is enabled, then the 1222 * WRR dequeue next routine will determine the next packet to sent. 1223 * Otherwise, packet-by-packet round robin is invoked. 1224 * 1225 * Returns: NULL, if a packet is not available or if all 1226 * classes are overlimit. 1227 * 1228 * Otherwise, Pointer to the next packet. 1229 */ 1230 1231mbuf_t * 1232rmc_dequeue_next(struct rm_ifdat *ifd, int mode) 1233{ 1234 if (ifd->queued_ >= ifd->maxqueued_) 1235 return (NULL); 1236 else if (ifd->wrr_) 1237 return (_rmc_wrr_dequeue_next(ifd, mode)); 1238 else 1239 return (_rmc_prr_dequeue_next(ifd, mode)); 1240} 1241 1242/* 1243 * Update the utilization estimate for the packet that just completed. 1244 * The packet's class & the parent(s) of that class all get their 1245 * estimators updated. This routine is called by the driver's output- 1246 * packet-completion interrupt service routine. 1247 */ 1248 1249/* 1250 * a macro to approximate "divide by 1000" that gives 0.000999, 1251 * if a value has enough effective digits. 1252 * (on pentium, mul takes 9 cycles but div takes 46!) 1253 */ 1254#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) 1255void 1256rmc_update_class_util(struct rm_ifdat *ifd) 1257{ 1258 int idle, avgidle, pktlen; 1259 int pkt_time, tidle; 1260 rm_class_t *cl, *borrowed; 1261 rm_class_t *borrows; 1262 struct timeval *nowp; 1263 1264 /* 1265 * Get the most recent completed class. 1266 */ 1267 if ((cl = ifd->class_[ifd->qo_]) == NULL) 1268 return; 1269 1270 pktlen = ifd->curlen_[ifd->qo_]; 1271 borrowed = ifd->borrowed_[ifd->qo_]; 1272 borrows = borrowed; 1273 1274 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1275 1276 /* 1277 * Run estimator on class and its ancestors. 1278 */ 1279 /* 1280 * rm_update_class_util is designed to be called when the 1281 * transfer is completed from a xmit complete interrupt, 1282 * but most drivers don't implement an upcall for that. 1283 * so, just use estimated completion time. 1284 * as a result, ifd->qi_ and ifd->qo_ are always synced. 1285 */ 1286 nowp = &ifd->now_[ifd->qo_]; 1287 /* get pkt_time (for link) in usec */ 1288#if 1 /* use approximation */ 1289 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; 1290 pkt_time = NSEC_TO_USEC(pkt_time); 1291#else 1292 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; 1293#endif 1294#if 1 /* ALTQ4PPP */ 1295 if (TV_LT(nowp, &ifd->ifnow_)) { 1296 int iftime; 1297 1298 /* 1299 * make sure the estimated completion time does not go 1300 * too far. it can happen when the link layer supports 1301 * data compression or the interface speed is set to 1302 * a much lower value. 1303 */ 1304 TV_DELTA(&ifd->ifnow_, nowp, iftime); 1305 if (iftime+pkt_time < ifd->maxiftime_) { 1306 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1307 } else { 1308 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); 1309 } 1310 } else { 1311 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1312 } 1313#else 1314 if (TV_LT(nowp, &ifd->ifnow_)) { 1315 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1316 } else { 1317 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1318 } 1319#endif 1320 1321 while (cl != NULL) { 1322 TV_DELTA(&ifd->ifnow_, &cl->last_, idle); 1323 if (idle >= 2000000) 1324 /* 1325 * this class is idle enough, reset avgidle. 1326 * (TV_DELTA returns 2000000 us when delta is large.) 1327 */ 1328 cl->avgidle_ = cl->maxidle_; 1329 1330 /* get pkt_time (for class) in usec */ 1331#if 1 /* use approximation */ 1332 pkt_time = pktlen * cl->ns_per_byte_; 1333 pkt_time = NSEC_TO_USEC(pkt_time); 1334#else 1335 pkt_time = pktlen * cl->ns_per_byte_ / 1000; 1336#endif 1337 idle -= pkt_time; 1338 1339 avgidle = cl->avgidle_; 1340 avgidle += idle - (avgidle >> RM_FILTER_GAIN); 1341 cl->avgidle_ = avgidle; 1342 1343 /* Are we overlimit ? */ 1344 if (avgidle <= 0) { 1345 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); 1346#if 1 /* ALTQ */ 1347 /* 1348 * need some lower bound for avgidle, otherwise 1349 * a borrowing class gets unbounded penalty. 1350 */ 1351 if (avgidle < cl->minidle_) 1352 avgidle = cl->avgidle_ = cl->minidle_; 1353#endif 1354 /* set next idle to make avgidle 0 */ 1355 tidle = pkt_time + 1356 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); 1357 TV_ADD_DELTA(nowp, tidle, &cl->undertime_); 1358 ++cl->stats_.over; 1359 } else { 1360 cl->avgidle_ = 1361 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; 1362 cl->undertime_.tv_sec = 0; 1363 if (cl->sleeping_) { 1364 CALLOUT_STOP(&cl->callout_); 1365 cl->sleeping_ = 0; 1366 } 1367 } 1368 1369 if (borrows != NULL) { 1370 if (borrows != cl) 1371 ++cl->stats_.borrows; 1372 else 1373 borrows = NULL; 1374 } 1375 cl->last_ = ifd->ifnow_; 1376 cl->last_pkttime_ = pkt_time; 1377 1378#if 1 1379 if (cl->parent_ == NULL) { 1380 /* take stats of root class */ 1381 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1382 } 1383#endif 1384 1385 cl = cl->parent_; 1386 } 1387 1388 /* 1389 * Check to see if cutoff needs to set to a new level. 1390 */ 1391 cl = ifd->class_[ifd->qo_]; 1392 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { 1393#if 1 /* ALTQ */ 1394 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { 1395 rmc_tl_satisfied(ifd, nowp); 1396 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1397 } else { 1398 ifd->cutoff_ = borrowed->depth_; 1399 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1400 } 1401#else /* !ALTQ */ 1402 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { 1403 reset_cutoff(ifd); 1404#ifdef notdef 1405 rmc_tl_satisfied(ifd, &now); 1406#endif 1407 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1408 } else { 1409 ifd->cutoff_ = borrowed->depth_; 1410 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1411 } 1412#endif /* !ALTQ */ 1413 } 1414 1415 /* 1416 * Release class slot 1417 */ 1418 ifd->borrowed_[ifd->qo_] = NULL; 1419 ifd->class_[ifd->qo_] = NULL; 1420 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; 1421 ifd->queued_--; 1422} 1423 1424/* 1425 * void 1426 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) 1427 * over-limit action routines. These get invoked by rmc_under_limit() 1428 * if a class with packets to send if over its bandwidth limit & can't 1429 * borrow from a parent class. 1430 * 1431 * Returns: NONE 1432 */ 1433 1434static void 1435rmc_drop_action(struct rm_class *cl) 1436{ 1437 struct rm_ifdat *ifd = cl->ifdat_; 1438 1439 ASSERT(qlen(cl->q_) > 0); 1440 _rmc_dropq(cl); 1441 if (qempty(cl->q_)) 1442 ifd->na_[cl->pri_]--; 1443} 1444 1445void rmc_dropall(struct rm_class *cl) 1446{ 1447 struct rm_ifdat *ifd = cl->ifdat_; 1448 1449 if (!qempty(cl->q_)) { 1450 _flushq(cl->q_); 1451 1452 ifd->na_[cl->pri_]--; 1453 } 1454} 1455 1456#if (__FreeBSD_version > 300000) 1457/* hzto() is removed from FreeBSD-3.0 */ 1458static int hzto(struct timeval *); 1459 1460static int 1461hzto(tv) 1462 struct timeval *tv; 1463{ 1464 struct timeval t2; 1465 1466 getmicrotime(&t2); 1467 t2.tv_sec = tv->tv_sec - t2.tv_sec; 1468 t2.tv_usec = tv->tv_usec - t2.tv_usec; 1469 return (tvtohz(&t2)); 1470} 1471#endif /* __FreeBSD_version > 300000 */ 1472 1473/* 1474 * void 1475 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ 1476 * delay action routine. It is invoked via rmc_under_limit when the 1477 * packet is discoverd to be overlimit. 1478 * 1479 * If the delay action is result of borrow class being overlimit, then 1480 * delay for the offtime of the borrowing class that is overlimit. 1481 * 1482 * Returns: NONE 1483 */ 1484 1485void 1486rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) 1487{ 1488 int delay, t, extradelay; 1489 1490 cl->stats_.overactions++; 1491 TV_DELTA(&cl->undertime_, &cl->overtime_, delay); 1492#ifndef BORROW_OFFTIME 1493 delay += cl->offtime_; 1494#endif 1495 1496 if (!cl->sleeping_) { 1497 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); 1498#ifdef BORROW_OFFTIME 1499 if (borrow != NULL) 1500 extradelay = borrow->offtime_; 1501 else 1502#endif 1503 extradelay = cl->offtime_; 1504 1505#ifdef ALTQ 1506 /* 1507 * XXX recalculate suspend time: 1508 * current undertime is (tidle + pkt_time) calculated 1509 * from the last transmission. 1510 * tidle: time required to bring avgidle back to 0 1511 * pkt_time: target waiting time for this class 1512 * we need to replace pkt_time by offtime 1513 */ 1514 extradelay -= cl->last_pkttime_; 1515#endif 1516 if (extradelay > 0) { 1517 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); 1518 delay += extradelay; 1519 } 1520 1521 cl->sleeping_ = 1; 1522 cl->stats_.delays++; 1523 1524 /* 1525 * Since packets are phased randomly with respect to the 1526 * clock, 1 tick (the next clock tick) can be an arbitrarily 1527 * short time so we have to wait for at least two ticks. 1528 * NOTE: If there's no other traffic, we need the timer as 1529 * a 'backstop' to restart this class. 1530 */ 1531 if (delay > tick * 2) { 1532#ifdef __FreeBSD__ 1533 /* FreeBSD rounds up the tick */ 1534 t = hzto(&cl->undertime_); 1535#else 1536 /* other BSDs round down the tick */ 1537 t = hzto(&cl->undertime_) + 1; 1538#endif 1539 } else 1540 t = 2; 1541 CALLOUT_RESET(&cl->callout_, t, 1542 (timeout_t *)rmc_restart, (caddr_t)cl); 1543 } 1544} 1545 1546/* 1547 * void 1548 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is 1549 * called by the system timer code & is responsible checking if the 1550 * class is still sleeping (it might have been restarted as a side 1551 * effect of the queue scan on a packet arrival) and, if so, restarting 1552 * output for the class. Inspecting the class state & restarting output 1553 * require locking the class structure. In general the driver is 1554 * responsible for locking but this is the only routine that is not 1555 * called directly or indirectly from the interface driver so it has 1556 * know about system locking conventions. Under bsd, locking is done 1557 * by raising IPL to splimp so that's what's implemented here. On a 1558 * different system this would probably need to be changed. 1559 * 1560 * Returns: NONE 1561 */ 1562 1563static void 1564rmc_restart(struct rm_class *cl) 1565{ 1566 struct rm_ifdat *ifd = cl->ifdat_; 1567 int s; 1568 1569#ifdef __NetBSD__ 1570 s = splnet(); 1571#else 1572 s = splimp(); 1573#endif 1574 if (cl->sleeping_) { 1575 cl->sleeping_ = 0; 1576 cl->undertime_.tv_sec = 0; 1577 1578 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { 1579 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); 1580 (ifd->restart)(ifd->ifq_); 1581 } 1582 } 1583 splx(s); 1584} 1585 1586/* 1587 * void 1588 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit 1589 * handling routine for the root class of the link sharing structure. 1590 * 1591 * Returns: NONE 1592 */ 1593 1594static void 1595rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) 1596{ 1597 panic("rmc_root_overlimit"); 1598} 1599 1600/* 1601 * Packet Queue handling routines. Eventually, this is to localize the 1602 * effects on the code whether queues are red queues or droptail 1603 * queues. 1604 */ 1605 1606static int 1607_rmc_addq(rm_class_t *cl, mbuf_t *m) 1608{ 1609#ifdef ALTQ_RIO 1610 if (q_is_rio(cl->q_)) 1611 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); 1612#endif 1613#ifdef ALTQ_RED 1614 if (q_is_red(cl->q_)) 1615 return red_addq(cl->red_, cl->q_, m, cl->pktattr_); 1616#endif /* ALTQ_RED */ 1617 1618 if (cl->flags_ & RMCF_CLEARDSCP) 1619 write_dsfield(m, cl->pktattr_, 0); 1620 1621 _addq(cl->q_, m); 1622 return (0); 1623} 1624 1625/* note: _rmc_dropq is not called for red */ 1626static void 1627_rmc_dropq(rm_class_t *cl) 1628{ 1629 mbuf_t *m; 1630 1631 if ((m = _getq(cl->q_)) != NULL) 1632 m_freem(m); 1633} 1634 1635static mbuf_t * 1636_rmc_getq(rm_class_t *cl) 1637{ 1638#ifdef ALTQ_RIO 1639 if (q_is_rio(cl->q_)) 1640 return rio_getq((rio_t *)cl->red_, cl->q_); 1641#endif 1642#ifdef ALTQ_RED 1643 if (q_is_red(cl->q_)) 1644 return red_getq(cl->red_, cl->q_); 1645#endif 1646 return _getq(cl->q_); 1647} 1648 1649static mbuf_t * 1650_rmc_pollq(rm_class_t *cl) 1651{ 1652 return qhead(cl->q_); 1653} 1654 1655#ifdef CBQ_TRACE 1656 1657struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; 1658struct cbqtrace *cbqtrace_ptr = NULL; 1659int cbqtrace_count; 1660 1661/* 1662 * DDB hook to trace cbq events: 1663 * the last 1024 events are held in a circular buffer. 1664 * use "call cbqtrace_dump(N)" to display 20 events from Nth event. 1665 */ 1666void cbqtrace_dump(int); 1667static char *rmc_funcname(void *); 1668 1669static struct rmc_funcs { 1670 void *func; 1671 char *name; 1672} rmc_funcs[] = 1673{ 1674 rmc_init, "rmc_init", 1675 rmc_queue_packet, "rmc_queue_packet", 1676 rmc_under_limit, "rmc_under_limit", 1677 rmc_update_class_util, "rmc_update_class_util", 1678 rmc_delay_action, "rmc_delay_action", 1679 rmc_restart, "rmc_restart", 1680 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", 1681 NULL, NULL 1682}; 1683 1684static char *rmc_funcname(void *func) 1685{ 1686 struct rmc_funcs *fp; 1687 1688 for (fp = rmc_funcs; fp->func != NULL; fp++) 1689 if (fp->func == func) 1690 return (fp->name); 1691 return ("unknown"); 1692} 1693 1694void cbqtrace_dump(int counter) 1695{ 1696 int i, *p; 1697 char *cp; 1698 1699 counter = counter % NCBQTRACE; 1700 p = (int *)&cbqtrace_buffer[counter]; 1701 1702 for (i=0; i<20; i++) { 1703 printf("[0x%x] ", *p++); 1704 printf("%s: ", rmc_funcname((void *)*p++)); 1705 cp = (char *)p++; 1706 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); 1707 printf("%d\n",*p++); 1708 1709 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) 1710 p = (int *)cbqtrace_buffer; 1711 } 1712} 1713#endif /* CBQ_TRACE */ 1714#endif /* ALTQ_CBQ */ 1715 1716#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) 1717#if !defined(__GNUC__) || defined(ALTQ_DEBUG) 1718 1719void 1720_addq(class_queue_t *q, mbuf_t *m) 1721{ 1722 mbuf_t *m0; 1723 1724 if ((m0 = qtail(q)) != NULL) 1725 m->m_nextpkt = m0->m_nextpkt; 1726 else 1727 m0 = m; 1728 m0->m_nextpkt = m; 1729 qtail(q) = m; 1730 qlen(q)++; 1731} 1732 1733mbuf_t * 1734_getq(class_queue_t *q) 1735{ 1736 mbuf_t *m, *m0; 1737 1738 if ((m = qtail(q)) == NULL) 1739 return (NULL); 1740 if ((m0 = m->m_nextpkt) != m) 1741 m->m_nextpkt = m0->m_nextpkt; 1742 else { 1743 ASSERT(qlen(q) == 1); 1744 qtail(q) = NULL; 1745 } 1746 qlen(q)--; 1747 m0->m_nextpkt = NULL; 1748 return (m0); 1749} 1750 1751/* drop a packet at the tail of the queue */ 1752mbuf_t * 1753_getq_tail(class_queue_t *q) 1754{ 1755 mbuf_t *m, *m0, *prev; 1756 1757 if ((m = m0 = qtail(q)) == NULL) 1758 return NULL; 1759 do { 1760 prev = m0; 1761 m0 = m0->m_nextpkt; 1762 } while (m0 != m); 1763 prev->m_nextpkt = m->m_nextpkt; 1764 if (prev == m) { 1765 ASSERT(qlen(q) == 1); 1766 qtail(q) = NULL; 1767 } else 1768 qtail(q) = prev; 1769 qlen(q)--; 1770 m->m_nextpkt = NULL; 1771 return (m); 1772} 1773 1774/* randomly select a packet in the queue */ 1775mbuf_t * 1776_getq_random(class_queue_t *q) 1777{ 1778 struct mbuf *m; 1779 int i, n; 1780 1781 if ((m = qtail(q)) == NULL) 1782 return NULL; 1783 if (m->m_nextpkt == m) { 1784 ASSERT(qlen(q) == 1); 1785 qtail(q) = NULL; 1786 } else { 1787 struct mbuf *prev = NULL; 1788 1789 n = arc4random() % qlen(q) + 1; 1790 for (i = 0; i < n; i++) { 1791 prev = m; 1792 m = m->m_nextpkt; 1793 } 1794 prev->m_nextpkt = m->m_nextpkt; 1795 if (m == qtail(q)) 1796 qtail(q) = prev; 1797 } 1798 qlen(q)--; 1799 m->m_nextpkt = NULL; 1800 return (m); 1801} 1802 1803void 1804_removeq(class_queue_t *q, mbuf_t *m) 1805{ 1806 mbuf_t *m0, *prev; 1807 1808 m0 = qtail(q); 1809 do { 1810 prev = m0; 1811 m0 = m0->m_nextpkt; 1812 } while (m0 != m); 1813 prev->m_nextpkt = m->m_nextpkt; 1814 if (prev == m) 1815 qtail(q) = NULL; 1816 else if (qtail(q) == m) 1817 qtail(q) = prev; 1818 qlen(q)--; 1819} 1820 1821void 1822_flushq(class_queue_t *q) 1823{ 1824 mbuf_t *m; 1825 1826 while ((m = _getq(q)) != NULL) 1827 m_freem(m); 1828 ASSERT(qlen(q) == 0); 1829} 1830 1831#endif /* !__GNUC__ || ALTQ_DEBUG */ 1832#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ 1833