1281642Sglebius/*- 2130365Smlaier * Copyright (c) 1991-1997 Regents of the University of California. 3130365Smlaier * All rights reserved. 4130365Smlaier * 5130365Smlaier * Redistribution and use in source and binary forms, with or without 6130365Smlaier * modification, are permitted provided that the following conditions 7130365Smlaier * are met: 8130365Smlaier * 1. Redistributions of source code must retain the above copyright 9130365Smlaier * notice, this list of conditions and the following disclaimer. 10130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 11130365Smlaier * notice, this list of conditions and the following disclaimer in the 12130365Smlaier * documentation and/or other materials provided with the distribution. 13130365Smlaier * 3. All advertising materials mentioning features or use of this software 14130365Smlaier * must display the following acknowledgement: 15130365Smlaier * This product includes software developed by the Network Research 16130365Smlaier * Group at Lawrence Berkeley Laboratory. 17130365Smlaier * 4. Neither the name of the University nor of the Laboratory may be used 18130365Smlaier * to endorse or promote products derived from this software without 19130365Smlaier * specific prior written permission. 20130365Smlaier * 21130365Smlaier * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31130365Smlaier * SUCH DAMAGE. 32130365Smlaier * 33130365Smlaier * LBL code modified by speer@eng.sun.com, May 1977. 34130365Smlaier * For questions and/or comments, please send mail to cbq@ee.lbl.gov 35240784Srpaulo * 36240784Srpaulo * @(#)rm_class.c 1.48 97/12/05 SMI 37281642Sglebius * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ 38281642Sglebius * $FreeBSD$ 39130365Smlaier */ 40130365Smlaier#include "opt_altq.h" 41130365Smlaier#include "opt_inet.h" 42130365Smlaier#include "opt_inet6.h" 43130365Smlaier#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ 44130365Smlaier 45130365Smlaier#include <sys/param.h> 46130365Smlaier#include <sys/malloc.h> 47130365Smlaier#include <sys/mbuf.h> 48130365Smlaier#include <sys/socket.h> 49130365Smlaier#include <sys/systm.h> 50130365Smlaier#include <sys/errno.h> 51130365Smlaier#include <sys/time.h> 52130365Smlaier#ifdef ALTQ3_COMPAT 53130365Smlaier#include <sys/kernel.h> 54130365Smlaier#endif 55130365Smlaier 56130365Smlaier#include <net/if.h> 57257186Sglebius#include <net/if_var.h> 58130365Smlaier#ifdef ALTQ3_COMPAT 59130365Smlaier#include <netinet/in.h> 60130365Smlaier#include <netinet/in_systm.h> 61130365Smlaier#include <netinet/ip.h> 62130365Smlaier#endif 63130365Smlaier 64281613Sglebius#include <net/altq/if_altq.h> 65281613Sglebius#include <net/altq/altq.h> 66287009Sloos#include <net/altq/altq_codel.h> 67281613Sglebius#include <net/altq/altq_rmclass.h> 68281613Sglebius#include <net/altq/altq_rmclass_debug.h> 69281613Sglebius#include <net/altq/altq_red.h> 70281613Sglebius#include <net/altq/altq_rio.h> 71130365Smlaier 72130365Smlaier/* 73130365Smlaier * Local Macros 74130365Smlaier */ 75130365Smlaier 76130365Smlaier#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } 77130365Smlaier 78130365Smlaier/* 79130365Smlaier * Local routines. 80130365Smlaier */ 81130365Smlaier 82130365Smlaierstatic int rmc_satisfied(struct rm_class *, struct timeval *); 83130365Smlaierstatic void rmc_wrr_set_weights(struct rm_ifdat *); 84130365Smlaierstatic void rmc_depth_compute(struct rm_class *); 85130365Smlaierstatic void rmc_depth_recompute(rm_class_t *); 86130365Smlaier 87130365Smlaierstatic mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); 88130365Smlaierstatic mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); 89130365Smlaier 90130365Smlaierstatic int _rmc_addq(rm_class_t *, mbuf_t *); 91130365Smlaierstatic void _rmc_dropq(rm_class_t *); 92130365Smlaierstatic mbuf_t *_rmc_getq(rm_class_t *); 93130365Smlaierstatic mbuf_t *_rmc_pollq(rm_class_t *); 94130365Smlaier 95130365Smlaierstatic int rmc_under_limit(struct rm_class *, struct timeval *); 96130365Smlaierstatic void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); 97130365Smlaierstatic void rmc_drop_action(struct rm_class *); 98130365Smlaierstatic void rmc_restart(struct rm_class *); 99130365Smlaierstatic void rmc_root_overlimit(struct rm_class *, struct rm_class *); 100130365Smlaier 101130365Smlaier#define BORROW_OFFTIME 102130365Smlaier/* 103130365Smlaier * BORROW_OFFTIME (experimental): 104130365Smlaier * borrow the offtime of the class borrowing from. 105130365Smlaier * the reason is that when its own offtime is set, the class is unable 106130365Smlaier * to borrow much, especially when cutoff is taking effect. 107130365Smlaier * but when the borrowed class is overloaded (advidle is close to minidle), 108130365Smlaier * use the borrowing class's offtime to avoid overload. 109130365Smlaier */ 110130365Smlaier#define ADJUST_CUTOFF 111130365Smlaier/* 112130365Smlaier * ADJUST_CUTOFF (experimental): 113130365Smlaier * if no underlimit class is found due to cutoff, increase cutoff and 114130365Smlaier * retry the scheduling loop. 115130365Smlaier * also, don't invoke delay_actions while cutoff is taking effect, 116130365Smlaier * since a sleeping class won't have a chance to be scheduled in the 117130365Smlaier * next loop. 118130365Smlaier * 119130365Smlaier * now heuristics for setting the top-level variable (cutoff_) becomes: 120130365Smlaier * 1. if a packet arrives for a not-overlimit class, set cutoff 121130365Smlaier * to the depth of the class. 122130365Smlaier * 2. if cutoff is i, and a packet arrives for an overlimit class 123130365Smlaier * with an underlimit ancestor at a lower level than i (say j), 124130365Smlaier * then set cutoff to j. 125130365Smlaier * 3. at scheduling a packet, if there is no underlimit class 126130365Smlaier * due to the current cutoff level, increase cutoff by 1 and 127130365Smlaier * then try to schedule again. 128130365Smlaier */ 129130365Smlaier 130130365Smlaier/* 131130365Smlaier * rm_class_t * 132130365Smlaier * rmc_newclass(...) - Create a new resource management class at priority 133130365Smlaier * 'pri' on the interface given by 'ifd'. 134130365Smlaier * 135130365Smlaier * nsecPerByte is the data rate of the interface in nanoseconds/byte. 136130365Smlaier * E.g., 800 for a 10Mb/s ethernet. If the class gets less 137130365Smlaier * than 100% of the bandwidth, this number should be the 138130365Smlaier * 'effective' rate for the class. Let f be the 139130365Smlaier * bandwidth fraction allocated to this class, and let 140130365Smlaier * nsPerByte be the data rate of the output link in 141130365Smlaier * nanoseconds/byte. Then nsecPerByte is set to 142130365Smlaier * nsPerByte / f. E.g., 1600 (= 800 / .5) 143130365Smlaier * for a class that gets 50% of an ethernet's bandwidth. 144130365Smlaier * 145130365Smlaier * action the routine to call when the class is over limit. 146130365Smlaier * 147130365Smlaier * maxq max allowable queue size for class (in packets). 148130365Smlaier * 149130365Smlaier * parent parent class pointer. 150130365Smlaier * 151130365Smlaier * borrow class to borrow from (should be either 'parent' or null). 152130365Smlaier * 153130365Smlaier * maxidle max value allowed for class 'idle' time estimate (this 154130365Smlaier * parameter determines how large an initial burst of packets 155130365Smlaier * can be before overlimit action is invoked. 156130365Smlaier * 157130365Smlaier * offtime how long 'delay' action will delay when class goes over 158130365Smlaier * limit (this parameter determines the steady-state burst 159130365Smlaier * size when a class is running over its limit). 160130365Smlaier * 161130365Smlaier * Maxidle and offtime have to be computed from the following: If the 162130365Smlaier * average packet size is s, the bandwidth fraction allocated to this 163130365Smlaier * class is f, we want to allow b packet bursts, and the gain of the 164130365Smlaier * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: 165130365Smlaier * 166130365Smlaier * ptime = s * nsPerByte * (1 - f) / f 167130365Smlaier * maxidle = ptime * (1 - g^b) / g^b 168130365Smlaier * minidle = -ptime * (1 / (f - 1)) 169130365Smlaier * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) 170130365Smlaier * 171130365Smlaier * Operationally, it's convenient to specify maxidle & offtime in units 172130365Smlaier * independent of the link bandwidth so the maxidle & offtime passed to 173130365Smlaier * this routine are the above values multiplied by 8*f/(1000*nsPerByte). 174130365Smlaier * (The constant factor is a scale factor needed to make the parameters 175130365Smlaier * integers. This scaling also means that the 'unscaled' values of 176130365Smlaier * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, 177130365Smlaier * not nanoseconds.) Also note that the 'idle' filter computation keeps 178130365Smlaier * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of 179130365Smlaier * maxidle also must be scaled upward by this value. Thus, the passed 180130365Smlaier * values for maxidle and offtime can be computed as follows: 181130365Smlaier * 182130365Smlaier * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) 183130365Smlaier * offtime = offtime * 8 / (1000 * nsecPerByte) 184130365Smlaier * 185130365Smlaier * When USE_HRTIME is employed, then maxidle and offtime become: 186130365Smlaier * maxidle = maxilde * (8.0 / nsecPerByte); 187130365Smlaier * offtime = offtime * (8.0 / nsecPerByte); 188130365Smlaier */ 189130365Smlaierstruct rm_class * 190130365Smlaierrmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, 191130365Smlaier void (*action)(rm_class_t *, rm_class_t *), int maxq, 192130365Smlaier struct rm_class *parent, struct rm_class *borrow, u_int maxidle, 193130365Smlaier int minidle, u_int offtime, int pktsize, int flags) 194130365Smlaier{ 195130365Smlaier struct rm_class *cl; 196130365Smlaier struct rm_class *peer; 197130365Smlaier int s; 198130365Smlaier 199130365Smlaier if (pri >= RM_MAXPRIO) 200130365Smlaier return (NULL); 201130365Smlaier#ifndef ALTQ_RED 202130365Smlaier if (flags & RMCF_RED) { 203130365Smlaier#ifdef ALTQ_DEBUG 204130365Smlaier printf("rmc_newclass: RED not configured for CBQ!\n"); 205130365Smlaier#endif 206130365Smlaier return (NULL); 207130365Smlaier } 208130365Smlaier#endif 209130365Smlaier#ifndef ALTQ_RIO 210130365Smlaier if (flags & RMCF_RIO) { 211130365Smlaier#ifdef ALTQ_DEBUG 212130365Smlaier printf("rmc_newclass: RIO not configured for CBQ!\n"); 213130365Smlaier#endif 214130365Smlaier return (NULL); 215130365Smlaier } 216130365Smlaier#endif 217287009Sloos#ifndef ALTQ_CODEL 218287009Sloos if (flags & RMCF_CODEL) { 219287009Sloos#ifdef ALTQ_DEBUG 220287009Sloos printf("rmc_newclass: CODEL not configured for CBQ!\n"); 221287009Sloos#endif 222287009Sloos return (NULL); 223287009Sloos } 224287009Sloos#endif 225130365Smlaier 226240646Sglebius cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); 227130365Smlaier if (cl == NULL) 228130365Smlaier return (NULL); 229130365Smlaier CALLOUT_INIT(&cl->callout_); 230240646Sglebius cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); 231130365Smlaier if (cl->q_ == NULL) { 232184205Sdes free(cl, M_DEVBUF); 233130365Smlaier return (NULL); 234130365Smlaier } 235130365Smlaier 236130365Smlaier /* 237130365Smlaier * Class initialization. 238130365Smlaier */ 239130365Smlaier cl->children_ = NULL; 240130365Smlaier cl->parent_ = parent; 241130365Smlaier cl->borrow_ = borrow; 242130365Smlaier cl->leaf_ = 1; 243130365Smlaier cl->ifdat_ = ifd; 244130365Smlaier cl->pri_ = pri; 245130365Smlaier cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 246130365Smlaier cl->depth_ = 0; 247130365Smlaier cl->qthresh_ = 0; 248130365Smlaier cl->ns_per_byte_ = nsecPerByte; 249130365Smlaier 250130365Smlaier qlimit(cl->q_) = maxq; 251130365Smlaier qtype(cl->q_) = Q_DROPHEAD; 252130365Smlaier qlen(cl->q_) = 0; 253130365Smlaier cl->flags_ = flags; 254130365Smlaier 255130365Smlaier#if 1 /* minidle is also scaled in ALTQ */ 256130365Smlaier cl->minidle_ = (minidle * (int)nsecPerByte) / 8; 257130365Smlaier if (cl->minidle_ > 0) 258130365Smlaier cl->minidle_ = 0; 259130365Smlaier#else 260130365Smlaier cl->minidle_ = minidle; 261130365Smlaier#endif 262130365Smlaier cl->maxidle_ = (maxidle * nsecPerByte) / 8; 263130365Smlaier if (cl->maxidle_ == 0) 264130365Smlaier cl->maxidle_ = 1; 265130365Smlaier#if 1 /* offtime is also scaled in ALTQ */ 266130365Smlaier cl->avgidle_ = cl->maxidle_; 267130365Smlaier cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 268130365Smlaier if (cl->offtime_ == 0) 269130365Smlaier cl->offtime_ = 1; 270130365Smlaier#else 271130365Smlaier cl->avgidle_ = 0; 272130365Smlaier cl->offtime_ = (offtime * nsecPerByte) / 8; 273130365Smlaier#endif 274130365Smlaier cl->overlimit = action; 275130365Smlaier 276130365Smlaier#ifdef ALTQ_RED 277130365Smlaier if (flags & (RMCF_RED|RMCF_RIO)) { 278130365Smlaier int red_flags, red_pkttime; 279130365Smlaier 280130365Smlaier red_flags = 0; 281130365Smlaier if (flags & RMCF_ECN) 282130365Smlaier red_flags |= REDF_ECN; 283130365Smlaier if (flags & RMCF_FLOWVALVE) 284130365Smlaier red_flags |= REDF_FLOWVALVE; 285130365Smlaier#ifdef ALTQ_RIO 286130365Smlaier if (flags & RMCF_CLEARDSCP) 287130365Smlaier red_flags |= RIOF_CLEARDSCP; 288130365Smlaier#endif 289130365Smlaier red_pkttime = nsecPerByte * pktsize / 1000; 290130365Smlaier 291130365Smlaier if (flags & RMCF_RED) { 292130365Smlaier cl->red_ = red_alloc(0, 0, 293130365Smlaier qlimit(cl->q_) * 10/100, 294130365Smlaier qlimit(cl->q_) * 30/100, 295130365Smlaier red_flags, red_pkttime); 296130365Smlaier if (cl->red_ != NULL) 297130365Smlaier qtype(cl->q_) = Q_RED; 298130365Smlaier } 299130365Smlaier#ifdef ALTQ_RIO 300130365Smlaier else { 301130365Smlaier cl->red_ = (red_t *)rio_alloc(0, NULL, 302130365Smlaier red_flags, red_pkttime); 303130365Smlaier if (cl->red_ != NULL) 304130365Smlaier qtype(cl->q_) = Q_RIO; 305130365Smlaier } 306130365Smlaier#endif 307130365Smlaier } 308130365Smlaier#endif /* ALTQ_RED */ 309287009Sloos#ifdef ALTQ_CODEL 310287009Sloos if (flags & RMCF_CODEL) { 311287009Sloos cl->codel_ = codel_alloc(5, 100, 0); 312287009Sloos if (cl->codel_ != NULL) 313287009Sloos qtype(cl->q_) = Q_CODEL; 314287009Sloos } 315287009Sloos#endif 316130365Smlaier 317130365Smlaier /* 318130365Smlaier * put the class into the class tree 319130365Smlaier */ 320130365Smlaier s = splnet(); 321130368Smlaier IFQ_LOCK(ifd->ifq_); 322130365Smlaier if ((peer = ifd->active_[pri]) != NULL) { 323130365Smlaier /* find the last class at this pri */ 324130365Smlaier cl->peer_ = peer; 325130365Smlaier while (peer->peer_ != ifd->active_[pri]) 326130365Smlaier peer = peer->peer_; 327130365Smlaier peer->peer_ = cl; 328130365Smlaier } else { 329130365Smlaier ifd->active_[pri] = cl; 330130365Smlaier cl->peer_ = cl; 331130365Smlaier } 332130365Smlaier 333130365Smlaier if (cl->parent_) { 334130365Smlaier cl->next_ = parent->children_; 335130365Smlaier parent->children_ = cl; 336130365Smlaier parent->leaf_ = 0; 337130365Smlaier } 338130365Smlaier 339130365Smlaier /* 340130365Smlaier * Compute the depth of this class and its ancestors in the class 341130365Smlaier * hierarchy. 342130365Smlaier */ 343130365Smlaier rmc_depth_compute(cl); 344130365Smlaier 345130365Smlaier /* 346130365Smlaier * If CBQ's WRR is enabled, then initialize the class WRR state. 347130365Smlaier */ 348130365Smlaier if (ifd->wrr_) { 349130365Smlaier ifd->num_[pri]++; 350130365Smlaier ifd->alloc_[pri] += cl->allotment_; 351130365Smlaier rmc_wrr_set_weights(ifd); 352130365Smlaier } 353130368Smlaier IFQ_UNLOCK(ifd->ifq_); 354130365Smlaier splx(s); 355130365Smlaier return (cl); 356130365Smlaier} 357130365Smlaier 358130365Smlaierint 359130365Smlaierrmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, 360130365Smlaier int minidle, u_int offtime, int pktsize) 361130365Smlaier{ 362130365Smlaier struct rm_ifdat *ifd; 363130365Smlaier u_int old_allotment; 364130365Smlaier int s; 365130365Smlaier 366130365Smlaier ifd = cl->ifdat_; 367130365Smlaier old_allotment = cl->allotment_; 368130365Smlaier 369130365Smlaier s = splnet(); 370130368Smlaier IFQ_LOCK(ifd->ifq_); 371130365Smlaier cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 372130365Smlaier cl->qthresh_ = 0; 373130365Smlaier cl->ns_per_byte_ = nsecPerByte; 374130365Smlaier 375130365Smlaier qlimit(cl->q_) = maxq; 376130365Smlaier 377130365Smlaier#if 1 /* minidle is also scaled in ALTQ */ 378130365Smlaier cl->minidle_ = (minidle * nsecPerByte) / 8; 379130365Smlaier if (cl->minidle_ > 0) 380130365Smlaier cl->minidle_ = 0; 381130365Smlaier#else 382130365Smlaier cl->minidle_ = minidle; 383130365Smlaier#endif 384130365Smlaier cl->maxidle_ = (maxidle * nsecPerByte) / 8; 385130365Smlaier if (cl->maxidle_ == 0) 386130365Smlaier cl->maxidle_ = 1; 387130365Smlaier#if 1 /* offtime is also scaled in ALTQ */ 388130365Smlaier cl->avgidle_ = cl->maxidle_; 389130365Smlaier cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 390130365Smlaier if (cl->offtime_ == 0) 391130365Smlaier cl->offtime_ = 1; 392130365Smlaier#else 393130365Smlaier cl->avgidle_ = 0; 394130365Smlaier cl->offtime_ = (offtime * nsecPerByte) / 8; 395130365Smlaier#endif 396130365Smlaier 397130365Smlaier /* 398130365Smlaier * If CBQ's WRR is enabled, then initialize the class WRR state. 399130365Smlaier */ 400130365Smlaier if (ifd->wrr_) { 401130365Smlaier ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; 402130365Smlaier rmc_wrr_set_weights(ifd); 403130365Smlaier } 404130368Smlaier IFQ_UNLOCK(ifd->ifq_); 405130365Smlaier splx(s); 406130365Smlaier return (0); 407130365Smlaier} 408130365Smlaier 409130365Smlaier/* 410130365Smlaier * static void 411130365Smlaier * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes 412130365Smlaier * the appropriate run robin weights for the CBQ weighted round robin 413130365Smlaier * algorithm. 414130365Smlaier * 415130365Smlaier * Returns: NONE 416130365Smlaier */ 417130365Smlaier 418130365Smlaierstatic void 419130365Smlaierrmc_wrr_set_weights(struct rm_ifdat *ifd) 420130365Smlaier{ 421130365Smlaier int i; 422130365Smlaier struct rm_class *cl, *clh; 423130365Smlaier 424130365Smlaier for (i = 0; i < RM_MAXPRIO; i++) { 425130365Smlaier /* 426130365Smlaier * This is inverted from that of the simulator to 427130365Smlaier * maintain precision. 428130365Smlaier */ 429130365Smlaier if (ifd->num_[i] == 0) 430130365Smlaier ifd->M_[i] = 0; 431130365Smlaier else 432130365Smlaier ifd->M_[i] = ifd->alloc_[i] / 433130365Smlaier (ifd->num_[i] * ifd->maxpkt_); 434130365Smlaier /* 435130365Smlaier * Compute the weighted allotment for each class. 436130365Smlaier * This takes the expensive div instruction out 437130365Smlaier * of the main loop for the wrr scheduling path. 438130365Smlaier * These only get recomputed when a class comes or 439130365Smlaier * goes. 440130365Smlaier */ 441130365Smlaier if (ifd->active_[i] != NULL) { 442130365Smlaier clh = cl = ifd->active_[i]; 443130365Smlaier do { 444130365Smlaier /* safe-guard for slow link or alloc_ == 0 */ 445130365Smlaier if (ifd->M_[i] == 0) 446130365Smlaier cl->w_allotment_ = 0; 447130365Smlaier else 448130365Smlaier cl->w_allotment_ = cl->allotment_ / 449130365Smlaier ifd->M_[i]; 450130365Smlaier cl = cl->peer_; 451130365Smlaier } while ((cl != NULL) && (cl != clh)); 452130365Smlaier } 453130365Smlaier } 454130365Smlaier} 455130365Smlaier 456130365Smlaierint 457130365Smlaierrmc_get_weight(struct rm_ifdat *ifd, int pri) 458130365Smlaier{ 459130365Smlaier if ((pri >= 0) && (pri < RM_MAXPRIO)) 460130365Smlaier return (ifd->M_[pri]); 461130365Smlaier else 462130365Smlaier return (0); 463130365Smlaier} 464130365Smlaier 465130365Smlaier/* 466130365Smlaier * static void 467130365Smlaier * rmc_depth_compute(struct rm_class *cl) - This function computes the 468130365Smlaier * appropriate depth of class 'cl' and its ancestors. 469130365Smlaier * 470130365Smlaier * Returns: NONE 471130365Smlaier */ 472130365Smlaier 473130365Smlaierstatic void 474130365Smlaierrmc_depth_compute(struct rm_class *cl) 475130365Smlaier{ 476130365Smlaier rm_class_t *t = cl, *p; 477130365Smlaier 478130365Smlaier /* 479130365Smlaier * Recompute the depth for the branch of the tree. 480130365Smlaier */ 481130365Smlaier while (t != NULL) { 482130365Smlaier p = t->parent_; 483130365Smlaier if (p && (t->depth_ >= p->depth_)) { 484130365Smlaier p->depth_ = t->depth_ + 1; 485130365Smlaier t = p; 486130365Smlaier } else 487130365Smlaier t = NULL; 488130365Smlaier } 489130365Smlaier} 490130365Smlaier 491130365Smlaier/* 492130365Smlaier * static void 493130365Smlaier * rmc_depth_recompute(struct rm_class *cl) - This function re-computes 494130365Smlaier * the depth of the tree after a class has been deleted. 495130365Smlaier * 496130365Smlaier * Returns: NONE 497130365Smlaier */ 498130365Smlaier 499130365Smlaierstatic void 500130365Smlaierrmc_depth_recompute(rm_class_t *cl) 501130365Smlaier{ 502130365Smlaier#if 1 /* ALTQ */ 503130365Smlaier rm_class_t *p, *t; 504130365Smlaier 505130365Smlaier p = cl; 506130365Smlaier while (p != NULL) { 507130365Smlaier if ((t = p->children_) == NULL) { 508130365Smlaier p->depth_ = 0; 509130365Smlaier } else { 510130365Smlaier int cdepth = 0; 511130365Smlaier 512130365Smlaier while (t != NULL) { 513130365Smlaier if (t->depth_ > cdepth) 514130365Smlaier cdepth = t->depth_; 515130365Smlaier t = t->next_; 516130365Smlaier } 517130365Smlaier 518130365Smlaier if (p->depth_ == cdepth + 1) 519130365Smlaier /* no change to this parent */ 520130365Smlaier return; 521130365Smlaier 522130365Smlaier p->depth_ = cdepth + 1; 523130365Smlaier } 524130365Smlaier 525130365Smlaier p = p->parent_; 526130365Smlaier } 527130365Smlaier#else 528130365Smlaier rm_class_t *t; 529130365Smlaier 530130365Smlaier if (cl->depth_ >= 1) { 531130365Smlaier if (cl->children_ == NULL) { 532130365Smlaier cl->depth_ = 0; 533130365Smlaier } else if ((t = cl->children_) != NULL) { 534130365Smlaier while (t != NULL) { 535130365Smlaier if (t->children_ != NULL) 536130365Smlaier rmc_depth_recompute(t); 537130365Smlaier t = t->next_; 538130365Smlaier } 539130365Smlaier } else 540130365Smlaier rmc_depth_compute(cl); 541130365Smlaier } 542130365Smlaier#endif 543130365Smlaier} 544130365Smlaier 545130365Smlaier/* 546130365Smlaier * void 547130365Smlaier * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This 548130365Smlaier * function deletes a class from the link-sharing structure and frees 549130365Smlaier * all resources associated with the class. 550130365Smlaier * 551130365Smlaier * Returns: NONE 552130365Smlaier */ 553130365Smlaier 554130365Smlaiervoid 555130365Smlaierrmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) 556130365Smlaier{ 557130365Smlaier struct rm_class *p, *head, *previous; 558130365Smlaier int s; 559130365Smlaier 560130365Smlaier ASSERT(cl->children_ == NULL); 561130365Smlaier 562130365Smlaier if (cl->sleeping_) 563130365Smlaier CALLOUT_STOP(&cl->callout_); 564130365Smlaier 565130365Smlaier s = splnet(); 566130368Smlaier IFQ_LOCK(ifd->ifq_); 567130365Smlaier /* 568130365Smlaier * Free packets in the packet queue. 569130365Smlaier * XXX - this may not be a desired behavior. Packets should be 570130365Smlaier * re-queued. 571130365Smlaier */ 572130365Smlaier rmc_dropall(cl); 573130365Smlaier 574130365Smlaier /* 575130365Smlaier * If the class has a parent, then remove the class from the 576130365Smlaier * class from the parent's children chain. 577130365Smlaier */ 578130365Smlaier if (cl->parent_ != NULL) { 579130365Smlaier head = cl->parent_->children_; 580130365Smlaier p = previous = head; 581130365Smlaier if (head->next_ == NULL) { 582130365Smlaier ASSERT(head == cl); 583130365Smlaier cl->parent_->children_ = NULL; 584130365Smlaier cl->parent_->leaf_ = 1; 585130365Smlaier } else while (p != NULL) { 586130365Smlaier if (p == cl) { 587130365Smlaier if (cl == head) 588130365Smlaier cl->parent_->children_ = cl->next_; 589130365Smlaier else 590130365Smlaier previous->next_ = cl->next_; 591130365Smlaier cl->next_ = NULL; 592130365Smlaier p = NULL; 593130365Smlaier } else { 594130365Smlaier previous = p; 595130365Smlaier p = p->next_; 596130365Smlaier } 597130365Smlaier } 598130365Smlaier } 599130365Smlaier 600130365Smlaier /* 601130365Smlaier * Delete class from class priority peer list. 602130365Smlaier */ 603130365Smlaier if ((p = ifd->active_[cl->pri_]) != NULL) { 604130365Smlaier /* 605130365Smlaier * If there is more than one member of this priority 606130365Smlaier * level, then look for class(cl) in the priority level. 607130365Smlaier */ 608130365Smlaier if (p != p->peer_) { 609130365Smlaier while (p->peer_ != cl) 610130365Smlaier p = p->peer_; 611130365Smlaier p->peer_ = cl->peer_; 612130365Smlaier 613130365Smlaier if (ifd->active_[cl->pri_] == cl) 614130365Smlaier ifd->active_[cl->pri_] = cl->peer_; 615130365Smlaier } else { 616130365Smlaier ASSERT(p == cl); 617130365Smlaier ifd->active_[cl->pri_] = NULL; 618130365Smlaier } 619130365Smlaier } 620130365Smlaier 621130365Smlaier /* 622130365Smlaier * Recompute the WRR weights. 623130365Smlaier */ 624130365Smlaier if (ifd->wrr_) { 625130365Smlaier ifd->alloc_[cl->pri_] -= cl->allotment_; 626130365Smlaier ifd->num_[cl->pri_]--; 627130365Smlaier rmc_wrr_set_weights(ifd); 628130365Smlaier } 629130365Smlaier 630130365Smlaier /* 631130365Smlaier * Re-compute the depth of the tree. 632130365Smlaier */ 633130365Smlaier#if 1 /* ALTQ */ 634130365Smlaier rmc_depth_recompute(cl->parent_); 635130365Smlaier#else 636130365Smlaier rmc_depth_recompute(ifd->root_); 637130365Smlaier#endif 638130365Smlaier 639130368Smlaier IFQ_UNLOCK(ifd->ifq_); 640130365Smlaier splx(s); 641130365Smlaier 642130365Smlaier /* 643130365Smlaier * Free the class structure. 644130365Smlaier */ 645130365Smlaier if (cl->red_ != NULL) { 646130365Smlaier#ifdef ALTQ_RIO 647130365Smlaier if (q_is_rio(cl->q_)) 648130365Smlaier rio_destroy((rio_t *)cl->red_); 649130365Smlaier#endif 650130365Smlaier#ifdef ALTQ_RED 651130365Smlaier if (q_is_red(cl->q_)) 652130365Smlaier red_destroy(cl->red_); 653130365Smlaier#endif 654287009Sloos#ifdef ALTQ_CODEL 655287009Sloos if (q_is_codel(cl->q_)) 656287009Sloos codel_destroy(cl->codel_); 657287009Sloos#endif 658130365Smlaier } 659184205Sdes free(cl->q_, M_DEVBUF); 660184205Sdes free(cl, M_DEVBUF); 661130365Smlaier} 662130365Smlaier 663130365Smlaier 664130365Smlaier/* 665130365Smlaier * void 666130365Smlaier * rmc_init(...) - Initialize the resource management data structures 667130365Smlaier * associated with the output portion of interface 'ifp'. 'ifd' is 668130365Smlaier * where the structures will be built (for backwards compatibility, the 669130365Smlaier * structures aren't kept in the ifnet struct). 'nsecPerByte' 670130365Smlaier * gives the link speed (inverse of bandwidth) in nanoseconds/byte. 671130365Smlaier * 'restart' is the driver-specific routine that the generic 'delay 672130365Smlaier * until under limit' action will call to restart output. `maxq' 673130365Smlaier * is the queue size of the 'link' & 'default' classes. 'maxqueued' 674130365Smlaier * is the maximum number of packets that the resource management 675130365Smlaier * code will allow to be queued 'downstream' (this is typically 1). 676130365Smlaier * 677130365Smlaier * Returns: NONE 678130365Smlaier */ 679130365Smlaier 680130365Smlaiervoid 681130365Smlaierrmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, 682130365Smlaier void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, 683130365Smlaier int minidle, u_int offtime, int flags) 684130365Smlaier{ 685130365Smlaier int i, mtu; 686130365Smlaier 687130365Smlaier /* 688130365Smlaier * Initialize the CBQ tracing/debug facility. 689130365Smlaier */ 690130365Smlaier CBQTRACEINIT(); 691130365Smlaier 692130365Smlaier bzero((char *)ifd, sizeof (*ifd)); 693130365Smlaier mtu = ifq->altq_ifp->if_mtu; 694130365Smlaier ifd->ifq_ = ifq; 695130365Smlaier ifd->restart = restart; 696130365Smlaier ifd->maxqueued_ = maxqueued; 697130365Smlaier ifd->ns_per_byte_ = nsecPerByte; 698130365Smlaier ifd->maxpkt_ = mtu; 699130365Smlaier ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; 700130365Smlaier ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; 701130365Smlaier#if 1 702130365Smlaier ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; 703130365Smlaier if (mtu * nsecPerByte > 10 * 1000000) 704130365Smlaier ifd->maxiftime_ /= 4; 705130365Smlaier#endif 706130365Smlaier 707130365Smlaier reset_cutoff(ifd); 708130365Smlaier CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); 709130365Smlaier 710130365Smlaier /* 711130365Smlaier * Initialize the CBQ's WRR state. 712130365Smlaier */ 713130365Smlaier for (i = 0; i < RM_MAXPRIO; i++) { 714130365Smlaier ifd->alloc_[i] = 0; 715130365Smlaier ifd->M_[i] = 0; 716130365Smlaier ifd->num_[i] = 0; 717130365Smlaier ifd->na_[i] = 0; 718130365Smlaier ifd->active_[i] = NULL; 719130365Smlaier } 720130365Smlaier 721130365Smlaier /* 722130365Smlaier * Initialize current packet state. 723130365Smlaier */ 724130365Smlaier ifd->qi_ = 0; 725130365Smlaier ifd->qo_ = 0; 726130365Smlaier for (i = 0; i < RM_MAXQUEUED; i++) { 727130365Smlaier ifd->class_[i] = NULL; 728130365Smlaier ifd->curlen_[i] = 0; 729130365Smlaier ifd->borrowed_[i] = NULL; 730130365Smlaier } 731130365Smlaier 732130365Smlaier /* 733130365Smlaier * Create the root class of the link-sharing structure. 734130365Smlaier */ 735130365Smlaier if ((ifd->root_ = rmc_newclass(0, ifd, 736130365Smlaier nsecPerByte, 737130365Smlaier rmc_root_overlimit, maxq, 0, 0, 738130365Smlaier maxidle, minidle, offtime, 739130365Smlaier 0, 0)) == NULL) { 740130365Smlaier printf("rmc_init: root class not allocated\n"); 741130365Smlaier return ; 742130365Smlaier } 743130365Smlaier ifd->root_->depth_ = 0; 744130365Smlaier} 745130365Smlaier 746130365Smlaier/* 747130365Smlaier * void 748130365Smlaier * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by 749130365Smlaier * mbuf 'm' to queue for resource class 'cl'. This routine is called 750130365Smlaier * by a driver's if_output routine. This routine must be called with 751130365Smlaier * output packet completion interrupts locked out (to avoid racing with 752130365Smlaier * rmc_dequeue_next). 753130365Smlaier * 754130365Smlaier * Returns: 0 on successful queueing 755130365Smlaier * -1 when packet drop occurs 756130365Smlaier */ 757130365Smlaierint 758130365Smlaierrmc_queue_packet(struct rm_class *cl, mbuf_t *m) 759130365Smlaier{ 760130365Smlaier struct timeval now; 761130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 762130365Smlaier int cpri = cl->pri_; 763130365Smlaier int is_empty = qempty(cl->q_); 764130365Smlaier 765130365Smlaier RM_GETTIME(now); 766130365Smlaier if (ifd->cutoff_ > 0) { 767130365Smlaier if (TV_LT(&cl->undertime_, &now)) { 768130365Smlaier if (ifd->cutoff_ > cl->depth_) 769130365Smlaier ifd->cutoff_ = cl->depth_; 770130365Smlaier CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); 771130365Smlaier } 772130365Smlaier#if 1 /* ALTQ */ 773130365Smlaier else { 774130365Smlaier /* 775130365Smlaier * the class is overlimit. if the class has 776130365Smlaier * underlimit ancestors, set cutoff to the lowest 777130365Smlaier * depth among them. 778130365Smlaier */ 779130365Smlaier struct rm_class *borrow = cl->borrow_; 780130365Smlaier 781130365Smlaier while (borrow != NULL && 782130365Smlaier borrow->depth_ < ifd->cutoff_) { 783130365Smlaier if (TV_LT(&borrow->undertime_, &now)) { 784130365Smlaier ifd->cutoff_ = borrow->depth_; 785130365Smlaier CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); 786130365Smlaier break; 787130365Smlaier } 788130365Smlaier borrow = borrow->borrow_; 789130365Smlaier } 790130365Smlaier } 791130365Smlaier#else /* !ALTQ */ 792130365Smlaier else if ((ifd->cutoff_ > 1) && cl->borrow_) { 793130365Smlaier if (TV_LT(&cl->borrow_->undertime_, &now)) { 794130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 795130365Smlaier CBQTRACE(rmc_queue_packet, 'ffob', 796130365Smlaier cl->borrow_->depth_); 797130365Smlaier } 798130365Smlaier } 799130365Smlaier#endif /* !ALTQ */ 800130365Smlaier } 801130365Smlaier 802130365Smlaier if (_rmc_addq(cl, m) < 0) 803130365Smlaier /* failed */ 804130365Smlaier return (-1); 805130365Smlaier 806130365Smlaier if (is_empty) { 807130365Smlaier CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); 808130365Smlaier ifd->na_[cpri]++; 809130365Smlaier } 810130365Smlaier 811130365Smlaier if (qlen(cl->q_) > qlimit(cl->q_)) { 812130365Smlaier /* note: qlimit can be set to 0 or 1 */ 813130365Smlaier rmc_drop_action(cl); 814130365Smlaier return (-1); 815130365Smlaier } 816130365Smlaier return (0); 817130365Smlaier} 818130365Smlaier 819130365Smlaier/* 820130365Smlaier * void 821130365Smlaier * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all 822130365Smlaier * classes to see if there are satified. 823130365Smlaier */ 824130365Smlaier 825130365Smlaierstatic void 826130365Smlaierrmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) 827130365Smlaier{ 828130365Smlaier int i; 829130365Smlaier rm_class_t *p, *bp; 830130365Smlaier 831130365Smlaier for (i = RM_MAXPRIO - 1; i >= 0; i--) { 832130365Smlaier if ((bp = ifd->active_[i]) != NULL) { 833130365Smlaier p = bp; 834130365Smlaier do { 835130365Smlaier if (!rmc_satisfied(p, now)) { 836130365Smlaier ifd->cutoff_ = p->depth_; 837130365Smlaier return; 838130365Smlaier } 839130365Smlaier p = p->peer_; 840130365Smlaier } while (p != bp); 841130365Smlaier } 842130365Smlaier } 843130365Smlaier 844130365Smlaier reset_cutoff(ifd); 845130365Smlaier} 846130365Smlaier 847130365Smlaier/* 848130365Smlaier * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. 849130365Smlaier */ 850130365Smlaier 851130365Smlaierstatic int 852130365Smlaierrmc_satisfied(struct rm_class *cl, struct timeval *now) 853130365Smlaier{ 854130365Smlaier rm_class_t *p; 855130365Smlaier 856130365Smlaier if (cl == NULL) 857130365Smlaier return (1); 858130365Smlaier if (TV_LT(now, &cl->undertime_)) 859130365Smlaier return (1); 860130365Smlaier if (cl->depth_ == 0) { 861130365Smlaier if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) 862130365Smlaier return (0); 863130365Smlaier else 864130365Smlaier return (1); 865130365Smlaier } 866130365Smlaier if (cl->children_ != NULL) { 867130365Smlaier p = cl->children_; 868130365Smlaier while (p != NULL) { 869130365Smlaier if (!rmc_satisfied(p, now)) 870130365Smlaier return (0); 871130365Smlaier p = p->next_; 872130365Smlaier } 873130365Smlaier } 874130365Smlaier 875130365Smlaier return (1); 876130365Smlaier} 877130365Smlaier 878130365Smlaier/* 879130365Smlaier * Return 1 if class 'cl' is under limit or can borrow from a parent, 880130365Smlaier * 0 if overlimit. As a side-effect, this routine will invoke the 881130365Smlaier * class overlimit action if the class if overlimit. 882130365Smlaier */ 883130365Smlaier 884130365Smlaierstatic int 885130365Smlaierrmc_under_limit(struct rm_class *cl, struct timeval *now) 886130365Smlaier{ 887130365Smlaier rm_class_t *p = cl; 888130365Smlaier rm_class_t *top; 889130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 890130365Smlaier 891130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 892130365Smlaier /* 893130365Smlaier * If cl is the root class, then always return that it is 894130365Smlaier * underlimit. Otherwise, check to see if the class is underlimit. 895130365Smlaier */ 896130365Smlaier if (cl->parent_ == NULL) 897130365Smlaier return (1); 898130365Smlaier 899130365Smlaier if (cl->sleeping_) { 900130365Smlaier if (TV_LT(now, &cl->undertime_)) 901130365Smlaier return (0); 902130365Smlaier 903130365Smlaier CALLOUT_STOP(&cl->callout_); 904130365Smlaier cl->sleeping_ = 0; 905130365Smlaier cl->undertime_.tv_sec = 0; 906130365Smlaier return (1); 907130365Smlaier } 908130365Smlaier 909130365Smlaier top = NULL; 910130365Smlaier while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { 911130365Smlaier if (((cl = cl->borrow_) == NULL) || 912130365Smlaier (cl->depth_ > ifd->cutoff_)) { 913130365Smlaier#ifdef ADJUST_CUTOFF 914130365Smlaier if (cl != NULL) 915130365Smlaier /* cutoff is taking effect, just 916130365Smlaier return false without calling 917130365Smlaier the delay action. */ 918130365Smlaier return (0); 919130365Smlaier#endif 920130365Smlaier#ifdef BORROW_OFFTIME 921130365Smlaier /* 922130365Smlaier * check if the class can borrow offtime too. 923130365Smlaier * borrow offtime from the top of the borrow 924130365Smlaier * chain if the top class is not overloaded. 925130365Smlaier */ 926130365Smlaier if (cl != NULL) { 927130365Smlaier /* cutoff is taking effect, use this class as top. */ 928130365Smlaier top = cl; 929130365Smlaier CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); 930130365Smlaier } 931130365Smlaier if (top != NULL && top->avgidle_ == top->minidle_) 932130365Smlaier top = NULL; 933130365Smlaier p->overtime_ = *now; 934130365Smlaier (p->overlimit)(p, top); 935130365Smlaier#else 936130365Smlaier p->overtime_ = *now; 937130365Smlaier (p->overlimit)(p, NULL); 938130365Smlaier#endif 939130365Smlaier return (0); 940130365Smlaier } 941130365Smlaier top = cl; 942130365Smlaier } 943130365Smlaier 944130365Smlaier if (cl != p) 945130365Smlaier ifd->borrowed_[ifd->qi_] = cl; 946130365Smlaier return (1); 947130365Smlaier} 948130365Smlaier 949130365Smlaier/* 950130365Smlaier * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to 951130365Smlaier * Packet-by-packet round robin. 952130365Smlaier * 953130365Smlaier * The heart of the weighted round-robin scheduler, which decides which 954130365Smlaier * class next gets to send a packet. Highest priority first, then 955130365Smlaier * weighted round-robin within priorites. 956130365Smlaier * 957130365Smlaier * Each able-to-send class gets to send until its byte allocation is 958130365Smlaier * exhausted. Thus, the active pointer is only changed after a class has 959130365Smlaier * exhausted its allocation. 960130365Smlaier * 961130365Smlaier * If the scheduler finds no class that is underlimit or able to borrow, 962130365Smlaier * then the first class found that had a nonzero queue and is allowed to 963130365Smlaier * borrow gets to send. 964130365Smlaier */ 965130365Smlaier 966130365Smlaierstatic mbuf_t * 967130365Smlaier_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) 968130365Smlaier{ 969130365Smlaier struct rm_class *cl = NULL, *first = NULL; 970130365Smlaier u_int deficit; 971130365Smlaier int cpri; 972130365Smlaier mbuf_t *m; 973130365Smlaier struct timeval now; 974130365Smlaier 975130365Smlaier RM_GETTIME(now); 976130365Smlaier 977130365Smlaier /* 978130365Smlaier * if the driver polls the top of the queue and then removes 979130365Smlaier * the polled packet, we must return the same packet. 980130365Smlaier */ 981130365Smlaier if (op == ALTDQ_REMOVE && ifd->pollcache_) { 982130365Smlaier cl = ifd->pollcache_; 983130365Smlaier cpri = cl->pri_; 984130365Smlaier if (ifd->efficient_) { 985130365Smlaier /* check if this class is overlimit */ 986130365Smlaier if (cl->undertime_.tv_sec != 0 && 987130365Smlaier rmc_under_limit(cl, &now) == 0) 988130365Smlaier first = cl; 989130365Smlaier } 990130365Smlaier ifd->pollcache_ = NULL; 991130365Smlaier goto _wrr_out; 992130365Smlaier } 993130365Smlaier else { 994130365Smlaier /* mode == ALTDQ_POLL || pollcache == NULL */ 995130365Smlaier ifd->pollcache_ = NULL; 996130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 997130365Smlaier } 998130365Smlaier#ifdef ADJUST_CUTOFF 999130365Smlaier _again: 1000130365Smlaier#endif 1001130365Smlaier for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1002130365Smlaier if (ifd->na_[cpri] == 0) 1003130365Smlaier continue; 1004130365Smlaier deficit = 0; 1005130365Smlaier /* 1006130365Smlaier * Loop through twice for a priority level, if some class 1007130365Smlaier * was unable to send a packet the first round because 1008130365Smlaier * of the weighted round-robin mechanism. 1009130365Smlaier * During the second loop at this level, deficit==2. 1010130365Smlaier * (This second loop is not needed if for every class, 1011130365Smlaier * "M[cl->pri_])" times "cl->allotment" is greater than 1012130365Smlaier * the byte size for the largest packet in the class.) 1013130365Smlaier */ 1014130365Smlaier _wrr_loop: 1015130365Smlaier cl = ifd->active_[cpri]; 1016130365Smlaier ASSERT(cl != NULL); 1017130365Smlaier do { 1018130365Smlaier if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) 1019130365Smlaier cl->bytes_alloc_ += cl->w_allotment_; 1020130365Smlaier if (!qempty(cl->q_)) { 1021130365Smlaier if ((cl->undertime_.tv_sec == 0) || 1022130365Smlaier rmc_under_limit(cl, &now)) { 1023130365Smlaier if (cl->bytes_alloc_ > 0 || deficit > 1) 1024130365Smlaier goto _wrr_out; 1025130365Smlaier 1026130365Smlaier /* underlimit but no alloc */ 1027130365Smlaier deficit = 1; 1028130365Smlaier#if 1 1029130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 1030130365Smlaier#endif 1031130365Smlaier } 1032130365Smlaier else if (first == NULL && cl->borrow_ != NULL) 1033130365Smlaier first = cl; /* borrowing candidate */ 1034130365Smlaier } 1035130365Smlaier 1036130365Smlaier cl->bytes_alloc_ = 0; 1037130365Smlaier cl = cl->peer_; 1038130365Smlaier } while (cl != ifd->active_[cpri]); 1039130365Smlaier 1040130365Smlaier if (deficit == 1) { 1041130365Smlaier /* first loop found an underlimit class with deficit */ 1042130365Smlaier /* Loop on same priority level, with new deficit. */ 1043130365Smlaier deficit = 2; 1044130365Smlaier goto _wrr_loop; 1045130365Smlaier } 1046130365Smlaier } 1047130365Smlaier 1048130365Smlaier#ifdef ADJUST_CUTOFF 1049130365Smlaier /* 1050130365Smlaier * no underlimit class found. if cutoff is taking effect, 1051130365Smlaier * increase cutoff and try again. 1052130365Smlaier */ 1053130365Smlaier if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1054130365Smlaier ifd->cutoff_++; 1055130365Smlaier CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); 1056130365Smlaier goto _again; 1057130365Smlaier } 1058130365Smlaier#endif /* ADJUST_CUTOFF */ 1059130365Smlaier /* 1060130365Smlaier * If LINK_EFFICIENCY is turned on, then the first overlimit 1061130365Smlaier * class we encounter will send a packet if all the classes 1062130365Smlaier * of the link-sharing structure are overlimit. 1063130365Smlaier */ 1064130365Smlaier reset_cutoff(ifd); 1065130365Smlaier CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); 1066130365Smlaier 1067130365Smlaier if (!ifd->efficient_ || first == NULL) 1068130365Smlaier return (NULL); 1069130365Smlaier 1070130365Smlaier cl = first; 1071130365Smlaier cpri = cl->pri_; 1072130365Smlaier#if 0 /* too time-consuming for nothing */ 1073130365Smlaier if (cl->sleeping_) 1074130365Smlaier CALLOUT_STOP(&cl->callout_); 1075130365Smlaier cl->sleeping_ = 0; 1076130365Smlaier cl->undertime_.tv_sec = 0; 1077130365Smlaier#endif 1078130365Smlaier ifd->borrowed_[ifd->qi_] = cl->borrow_; 1079130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 1080130365Smlaier 1081130365Smlaier /* 1082130365Smlaier * Deque the packet and do the book keeping... 1083130365Smlaier */ 1084130365Smlaier _wrr_out: 1085130365Smlaier if (op == ALTDQ_REMOVE) { 1086130365Smlaier m = _rmc_getq(cl); 1087130365Smlaier if (m == NULL) 1088130365Smlaier panic("_rmc_wrr_dequeue_next"); 1089130365Smlaier if (qempty(cl->q_)) 1090130365Smlaier ifd->na_[cpri]--; 1091130365Smlaier 1092130365Smlaier /* 1093130365Smlaier * Update class statistics and link data. 1094130365Smlaier */ 1095130365Smlaier if (cl->bytes_alloc_ > 0) 1096130365Smlaier cl->bytes_alloc_ -= m_pktlen(m); 1097130365Smlaier 1098130365Smlaier if ((cl->bytes_alloc_ <= 0) || first == cl) 1099130365Smlaier ifd->active_[cl->pri_] = cl->peer_; 1100130365Smlaier else 1101130365Smlaier ifd->active_[cl->pri_] = cl; 1102130365Smlaier 1103130365Smlaier ifd->class_[ifd->qi_] = cl; 1104130365Smlaier ifd->curlen_[ifd->qi_] = m_pktlen(m); 1105130365Smlaier ifd->now_[ifd->qi_] = now; 1106130365Smlaier ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1107130365Smlaier ifd->queued_++; 1108130365Smlaier } else { 1109130365Smlaier /* mode == ALTDQ_PPOLL */ 1110130365Smlaier m = _rmc_pollq(cl); 1111130365Smlaier ifd->pollcache_ = cl; 1112130365Smlaier } 1113130365Smlaier return (m); 1114130365Smlaier} 1115130365Smlaier 1116130365Smlaier/* 1117130365Smlaier * Dequeue & return next packet from the highest priority class that 1118130365Smlaier * has a packet to send & has enough allocation to send it. This 1119130365Smlaier * routine is called by a driver whenever it needs a new packet to 1120130365Smlaier * output. 1121130365Smlaier */ 1122130365Smlaierstatic mbuf_t * 1123130365Smlaier_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) 1124130365Smlaier{ 1125130365Smlaier mbuf_t *m; 1126130365Smlaier int cpri; 1127130365Smlaier struct rm_class *cl, *first = NULL; 1128130365Smlaier struct timeval now; 1129130365Smlaier 1130130365Smlaier RM_GETTIME(now); 1131130365Smlaier 1132130365Smlaier /* 1133130365Smlaier * if the driver polls the top of the queue and then removes 1134130365Smlaier * the polled packet, we must return the same packet. 1135130365Smlaier */ 1136130365Smlaier if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1137130365Smlaier cl = ifd->pollcache_; 1138130365Smlaier cpri = cl->pri_; 1139130365Smlaier ifd->pollcache_ = NULL; 1140130365Smlaier goto _prr_out; 1141130365Smlaier } else { 1142130365Smlaier /* mode == ALTDQ_POLL || pollcache == NULL */ 1143130365Smlaier ifd->pollcache_ = NULL; 1144130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 1145130365Smlaier } 1146130365Smlaier#ifdef ADJUST_CUTOFF 1147130365Smlaier _again: 1148130365Smlaier#endif 1149130365Smlaier for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1150130365Smlaier if (ifd->na_[cpri] == 0) 1151130365Smlaier continue; 1152130365Smlaier cl = ifd->active_[cpri]; 1153130365Smlaier ASSERT(cl != NULL); 1154130365Smlaier do { 1155130365Smlaier if (!qempty(cl->q_)) { 1156130365Smlaier if ((cl->undertime_.tv_sec == 0) || 1157130365Smlaier rmc_under_limit(cl, &now)) 1158130365Smlaier goto _prr_out; 1159130365Smlaier if (first == NULL && cl->borrow_ != NULL) 1160130365Smlaier first = cl; 1161130365Smlaier } 1162130365Smlaier cl = cl->peer_; 1163130365Smlaier } while (cl != ifd->active_[cpri]); 1164130365Smlaier } 1165130365Smlaier 1166130365Smlaier#ifdef ADJUST_CUTOFF 1167130365Smlaier /* 1168130365Smlaier * no underlimit class found. if cutoff is taking effect, increase 1169130365Smlaier * cutoff and try again. 1170130365Smlaier */ 1171130365Smlaier if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1172130365Smlaier ifd->cutoff_++; 1173130365Smlaier goto _again; 1174130365Smlaier } 1175130365Smlaier#endif /* ADJUST_CUTOFF */ 1176130365Smlaier /* 1177130365Smlaier * If LINK_EFFICIENCY is turned on, then the first overlimit 1178130365Smlaier * class we encounter will send a packet if all the classes 1179130365Smlaier * of the link-sharing structure are overlimit. 1180130365Smlaier */ 1181130365Smlaier reset_cutoff(ifd); 1182130365Smlaier if (!ifd->efficient_ || first == NULL) 1183130365Smlaier return (NULL); 1184130365Smlaier 1185130365Smlaier cl = first; 1186130365Smlaier cpri = cl->pri_; 1187130365Smlaier#if 0 /* too time-consuming for nothing */ 1188130365Smlaier if (cl->sleeping_) 1189130365Smlaier CALLOUT_STOP(&cl->callout_); 1190130365Smlaier cl->sleeping_ = 0; 1191130365Smlaier cl->undertime_.tv_sec = 0; 1192130365Smlaier#endif 1193130365Smlaier ifd->borrowed_[ifd->qi_] = cl->borrow_; 1194130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 1195130365Smlaier 1196130365Smlaier /* 1197130365Smlaier * Deque the packet and do the book keeping... 1198130365Smlaier */ 1199130365Smlaier _prr_out: 1200130365Smlaier if (op == ALTDQ_REMOVE) { 1201130365Smlaier m = _rmc_getq(cl); 1202130365Smlaier if (m == NULL) 1203130365Smlaier panic("_rmc_prr_dequeue_next"); 1204130365Smlaier if (qempty(cl->q_)) 1205130365Smlaier ifd->na_[cpri]--; 1206130365Smlaier 1207130365Smlaier ifd->active_[cpri] = cl->peer_; 1208130365Smlaier 1209130365Smlaier ifd->class_[ifd->qi_] = cl; 1210130365Smlaier ifd->curlen_[ifd->qi_] = m_pktlen(m); 1211130365Smlaier ifd->now_[ifd->qi_] = now; 1212130365Smlaier ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1213130365Smlaier ifd->queued_++; 1214130365Smlaier } else { 1215130365Smlaier /* mode == ALTDQ_POLL */ 1216130365Smlaier m = _rmc_pollq(cl); 1217130365Smlaier ifd->pollcache_ = cl; 1218130365Smlaier } 1219130365Smlaier return (m); 1220130365Smlaier} 1221130365Smlaier 1222130365Smlaier/* 1223130365Smlaier * mbuf_t * 1224130365Smlaier * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function 1225130365Smlaier * is invoked by the packet driver to get the next packet to be 1226130365Smlaier * dequeued and output on the link. If WRR is enabled, then the 1227130365Smlaier * WRR dequeue next routine will determine the next packet to sent. 1228130365Smlaier * Otherwise, packet-by-packet round robin is invoked. 1229130365Smlaier * 1230130365Smlaier * Returns: NULL, if a packet is not available or if all 1231130365Smlaier * classes are overlimit. 1232130365Smlaier * 1233130365Smlaier * Otherwise, Pointer to the next packet. 1234130365Smlaier */ 1235130365Smlaier 1236130365Smlaiermbuf_t * 1237130365Smlaierrmc_dequeue_next(struct rm_ifdat *ifd, int mode) 1238130365Smlaier{ 1239130365Smlaier if (ifd->queued_ >= ifd->maxqueued_) 1240130365Smlaier return (NULL); 1241130365Smlaier else if (ifd->wrr_) 1242130365Smlaier return (_rmc_wrr_dequeue_next(ifd, mode)); 1243130365Smlaier else 1244130365Smlaier return (_rmc_prr_dequeue_next(ifd, mode)); 1245130365Smlaier} 1246130365Smlaier 1247130365Smlaier/* 1248130365Smlaier * Update the utilization estimate for the packet that just completed. 1249130365Smlaier * The packet's class & the parent(s) of that class all get their 1250130365Smlaier * estimators updated. This routine is called by the driver's output- 1251130365Smlaier * packet-completion interrupt service routine. 1252130365Smlaier */ 1253130365Smlaier 1254130365Smlaier/* 1255130365Smlaier * a macro to approximate "divide by 1000" that gives 0.000999, 1256130365Smlaier * if a value has enough effective digits. 1257130365Smlaier * (on pentium, mul takes 9 cycles but div takes 46!) 1258130365Smlaier */ 1259130365Smlaier#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) 1260130365Smlaiervoid 1261130365Smlaierrmc_update_class_util(struct rm_ifdat *ifd) 1262130365Smlaier{ 1263130365Smlaier int idle, avgidle, pktlen; 1264130365Smlaier int pkt_time, tidle; 1265130365Smlaier rm_class_t *cl, *borrowed; 1266130365Smlaier rm_class_t *borrows; 1267130365Smlaier struct timeval *nowp; 1268130365Smlaier 1269130365Smlaier /* 1270130365Smlaier * Get the most recent completed class. 1271130365Smlaier */ 1272130365Smlaier if ((cl = ifd->class_[ifd->qo_]) == NULL) 1273130365Smlaier return; 1274130365Smlaier 1275130365Smlaier pktlen = ifd->curlen_[ifd->qo_]; 1276130365Smlaier borrowed = ifd->borrowed_[ifd->qo_]; 1277130365Smlaier borrows = borrowed; 1278130365Smlaier 1279130365Smlaier PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1280130365Smlaier 1281130365Smlaier /* 1282130365Smlaier * Run estimator on class and its ancestors. 1283130365Smlaier */ 1284130365Smlaier /* 1285130365Smlaier * rm_update_class_util is designed to be called when the 1286130365Smlaier * transfer is completed from a xmit complete interrupt, 1287130365Smlaier * but most drivers don't implement an upcall for that. 1288130365Smlaier * so, just use estimated completion time. 1289130365Smlaier * as a result, ifd->qi_ and ifd->qo_ are always synced. 1290130365Smlaier */ 1291130365Smlaier nowp = &ifd->now_[ifd->qo_]; 1292130365Smlaier /* get pkt_time (for link) in usec */ 1293130365Smlaier#if 1 /* use approximation */ 1294130365Smlaier pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; 1295130365Smlaier pkt_time = NSEC_TO_USEC(pkt_time); 1296130365Smlaier#else 1297130365Smlaier pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; 1298130365Smlaier#endif 1299130365Smlaier#if 1 /* ALTQ4PPP */ 1300130365Smlaier if (TV_LT(nowp, &ifd->ifnow_)) { 1301130365Smlaier int iftime; 1302130365Smlaier 1303130365Smlaier /* 1304130365Smlaier * make sure the estimated completion time does not go 1305130365Smlaier * too far. it can happen when the link layer supports 1306130365Smlaier * data compression or the interface speed is set to 1307130365Smlaier * a much lower value. 1308130365Smlaier */ 1309130365Smlaier TV_DELTA(&ifd->ifnow_, nowp, iftime); 1310130365Smlaier if (iftime+pkt_time < ifd->maxiftime_) { 1311130365Smlaier TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1312130365Smlaier } else { 1313130365Smlaier TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); 1314130365Smlaier } 1315130365Smlaier } else { 1316130365Smlaier TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1317130365Smlaier } 1318130365Smlaier#else 1319130365Smlaier if (TV_LT(nowp, &ifd->ifnow_)) { 1320130365Smlaier TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1321130365Smlaier } else { 1322130365Smlaier TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1323130365Smlaier } 1324130365Smlaier#endif 1325130365Smlaier 1326130365Smlaier while (cl != NULL) { 1327130365Smlaier TV_DELTA(&ifd->ifnow_, &cl->last_, idle); 1328130365Smlaier if (idle >= 2000000) 1329130365Smlaier /* 1330130365Smlaier * this class is idle enough, reset avgidle. 1331130365Smlaier * (TV_DELTA returns 2000000 us when delta is large.) 1332130365Smlaier */ 1333130365Smlaier cl->avgidle_ = cl->maxidle_; 1334130365Smlaier 1335130365Smlaier /* get pkt_time (for class) in usec */ 1336130365Smlaier#if 1 /* use approximation */ 1337130365Smlaier pkt_time = pktlen * cl->ns_per_byte_; 1338130365Smlaier pkt_time = NSEC_TO_USEC(pkt_time); 1339130365Smlaier#else 1340130365Smlaier pkt_time = pktlen * cl->ns_per_byte_ / 1000; 1341130365Smlaier#endif 1342130365Smlaier idle -= pkt_time; 1343130365Smlaier 1344130365Smlaier avgidle = cl->avgidle_; 1345130365Smlaier avgidle += idle - (avgidle >> RM_FILTER_GAIN); 1346130365Smlaier cl->avgidle_ = avgidle; 1347130365Smlaier 1348130365Smlaier /* Are we overlimit ? */ 1349130365Smlaier if (avgidle <= 0) { 1350130365Smlaier CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); 1351130365Smlaier#if 1 /* ALTQ */ 1352130365Smlaier /* 1353130365Smlaier * need some lower bound for avgidle, otherwise 1354130365Smlaier * a borrowing class gets unbounded penalty. 1355130365Smlaier */ 1356130365Smlaier if (avgidle < cl->minidle_) 1357130365Smlaier avgidle = cl->avgidle_ = cl->minidle_; 1358130365Smlaier#endif 1359130365Smlaier /* set next idle to make avgidle 0 */ 1360130365Smlaier tidle = pkt_time + 1361130365Smlaier (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); 1362130365Smlaier TV_ADD_DELTA(nowp, tidle, &cl->undertime_); 1363130365Smlaier ++cl->stats_.over; 1364130365Smlaier } else { 1365130365Smlaier cl->avgidle_ = 1366130365Smlaier (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; 1367130365Smlaier cl->undertime_.tv_sec = 0; 1368130365Smlaier if (cl->sleeping_) { 1369130365Smlaier CALLOUT_STOP(&cl->callout_); 1370130365Smlaier cl->sleeping_ = 0; 1371130365Smlaier } 1372130365Smlaier } 1373130365Smlaier 1374130365Smlaier if (borrows != NULL) { 1375130365Smlaier if (borrows != cl) 1376130365Smlaier ++cl->stats_.borrows; 1377130365Smlaier else 1378130365Smlaier borrows = NULL; 1379130365Smlaier } 1380130365Smlaier cl->last_ = ifd->ifnow_; 1381130365Smlaier cl->last_pkttime_ = pkt_time; 1382130365Smlaier 1383130365Smlaier#if 1 1384130365Smlaier if (cl->parent_ == NULL) { 1385130365Smlaier /* take stats of root class */ 1386130365Smlaier PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1387130365Smlaier } 1388130365Smlaier#endif 1389130365Smlaier 1390130365Smlaier cl = cl->parent_; 1391130365Smlaier } 1392130365Smlaier 1393130365Smlaier /* 1394130365Smlaier * Check to see if cutoff needs to set to a new level. 1395130365Smlaier */ 1396130365Smlaier cl = ifd->class_[ifd->qo_]; 1397130365Smlaier if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { 1398130365Smlaier#if 1 /* ALTQ */ 1399130365Smlaier if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { 1400130365Smlaier rmc_tl_satisfied(ifd, nowp); 1401130365Smlaier CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1402130365Smlaier } else { 1403130365Smlaier ifd->cutoff_ = borrowed->depth_; 1404130365Smlaier CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1405130365Smlaier } 1406130365Smlaier#else /* !ALTQ */ 1407130365Smlaier if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { 1408130365Smlaier reset_cutoff(ifd); 1409130365Smlaier#ifdef notdef 1410130365Smlaier rmc_tl_satisfied(ifd, &now); 1411130365Smlaier#endif 1412130365Smlaier CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1413130365Smlaier } else { 1414130365Smlaier ifd->cutoff_ = borrowed->depth_; 1415130365Smlaier CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1416130365Smlaier } 1417130365Smlaier#endif /* !ALTQ */ 1418130365Smlaier } 1419130365Smlaier 1420130365Smlaier /* 1421130365Smlaier * Release class slot 1422130365Smlaier */ 1423130365Smlaier ifd->borrowed_[ifd->qo_] = NULL; 1424130365Smlaier ifd->class_[ifd->qo_] = NULL; 1425130365Smlaier ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; 1426130365Smlaier ifd->queued_--; 1427130365Smlaier} 1428130365Smlaier 1429130365Smlaier/* 1430130365Smlaier * void 1431130365Smlaier * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) 1432130365Smlaier * over-limit action routines. These get invoked by rmc_under_limit() 1433130365Smlaier * if a class with packets to send if over its bandwidth limit & can't 1434130365Smlaier * borrow from a parent class. 1435130365Smlaier * 1436130365Smlaier * Returns: NONE 1437130365Smlaier */ 1438130365Smlaier 1439130365Smlaierstatic void 1440130365Smlaierrmc_drop_action(struct rm_class *cl) 1441130365Smlaier{ 1442130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1443130365Smlaier 1444130365Smlaier ASSERT(qlen(cl->q_) > 0); 1445130365Smlaier _rmc_dropq(cl); 1446130365Smlaier if (qempty(cl->q_)) 1447130365Smlaier ifd->na_[cl->pri_]--; 1448130365Smlaier} 1449130365Smlaier 1450130365Smlaiervoid rmc_dropall(struct rm_class *cl) 1451130365Smlaier{ 1452130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1453130365Smlaier 1454130365Smlaier if (!qempty(cl->q_)) { 1455130365Smlaier _flushq(cl->q_); 1456130365Smlaier 1457130365Smlaier ifd->na_[cl->pri_]--; 1458130365Smlaier } 1459130365Smlaier} 1460130365Smlaier 1461130365Smlaier#if (__FreeBSD_version > 300000) 1462130365Smlaier/* hzto() is removed from FreeBSD-3.0 */ 1463130365Smlaierstatic int hzto(struct timeval *); 1464130365Smlaier 1465130365Smlaierstatic int 1466130365Smlaierhzto(tv) 1467130365Smlaier struct timeval *tv; 1468130365Smlaier{ 1469130365Smlaier struct timeval t2; 1470130365Smlaier 1471130365Smlaier getmicrotime(&t2); 1472130365Smlaier t2.tv_sec = tv->tv_sec - t2.tv_sec; 1473130365Smlaier t2.tv_usec = tv->tv_usec - t2.tv_usec; 1474130365Smlaier return (tvtohz(&t2)); 1475130365Smlaier} 1476130365Smlaier#endif /* __FreeBSD_version > 300000 */ 1477130365Smlaier 1478130365Smlaier/* 1479130365Smlaier * void 1480130365Smlaier * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ 1481130365Smlaier * delay action routine. It is invoked via rmc_under_limit when the 1482130365Smlaier * packet is discoverd to be overlimit. 1483130365Smlaier * 1484130365Smlaier * If the delay action is result of borrow class being overlimit, then 1485130365Smlaier * delay for the offtime of the borrowing class that is overlimit. 1486130365Smlaier * 1487130365Smlaier * Returns: NONE 1488130365Smlaier */ 1489130365Smlaier 1490130365Smlaiervoid 1491130365Smlaierrmc_delay_action(struct rm_class *cl, struct rm_class *borrow) 1492130365Smlaier{ 1493130365Smlaier int delay, t, extradelay; 1494130365Smlaier 1495130365Smlaier cl->stats_.overactions++; 1496130365Smlaier TV_DELTA(&cl->undertime_, &cl->overtime_, delay); 1497130365Smlaier#ifndef BORROW_OFFTIME 1498130365Smlaier delay += cl->offtime_; 1499130365Smlaier#endif 1500130365Smlaier 1501130365Smlaier if (!cl->sleeping_) { 1502130365Smlaier CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); 1503130365Smlaier#ifdef BORROW_OFFTIME 1504130365Smlaier if (borrow != NULL) 1505130365Smlaier extradelay = borrow->offtime_; 1506130365Smlaier else 1507130365Smlaier#endif 1508130365Smlaier extradelay = cl->offtime_; 1509130365Smlaier 1510130365Smlaier#ifdef ALTQ 1511130365Smlaier /* 1512130365Smlaier * XXX recalculate suspend time: 1513130365Smlaier * current undertime is (tidle + pkt_time) calculated 1514130365Smlaier * from the last transmission. 1515130365Smlaier * tidle: time required to bring avgidle back to 0 1516130365Smlaier * pkt_time: target waiting time for this class 1517130365Smlaier * we need to replace pkt_time by offtime 1518130365Smlaier */ 1519130365Smlaier extradelay -= cl->last_pkttime_; 1520130365Smlaier#endif 1521130365Smlaier if (extradelay > 0) { 1522130365Smlaier TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); 1523130365Smlaier delay += extradelay; 1524130365Smlaier } 1525130365Smlaier 1526130365Smlaier cl->sleeping_ = 1; 1527130365Smlaier cl->stats_.delays++; 1528130365Smlaier 1529130365Smlaier /* 1530130365Smlaier * Since packets are phased randomly with respect to the 1531130365Smlaier * clock, 1 tick (the next clock tick) can be an arbitrarily 1532130365Smlaier * short time so we have to wait for at least two ticks. 1533130365Smlaier * NOTE: If there's no other traffic, we need the timer as 1534130365Smlaier * a 'backstop' to restart this class. 1535130365Smlaier */ 1536130365Smlaier if (delay > tick * 2) { 1537130365Smlaier /* FreeBSD rounds up the tick */ 1538130365Smlaier t = hzto(&cl->undertime_); 1539130365Smlaier } else 1540130365Smlaier t = 2; 1541130365Smlaier CALLOUT_RESET(&cl->callout_, t, 1542130365Smlaier (timeout_t *)rmc_restart, (caddr_t)cl); 1543130365Smlaier } 1544130365Smlaier} 1545130365Smlaier 1546130365Smlaier/* 1547130365Smlaier * void 1548130365Smlaier * rmc_restart() - is just a helper routine for rmc_delay_action -- it is 1549130365Smlaier * called by the system timer code & is responsible checking if the 1550130365Smlaier * class is still sleeping (it might have been restarted as a side 1551130365Smlaier * effect of the queue scan on a packet arrival) and, if so, restarting 1552130365Smlaier * output for the class. Inspecting the class state & restarting output 1553130365Smlaier * require locking the class structure. In general the driver is 1554130365Smlaier * responsible for locking but this is the only routine that is not 1555130365Smlaier * called directly or indirectly from the interface driver so it has 1556130365Smlaier * know about system locking conventions. Under bsd, locking is done 1557130365Smlaier * by raising IPL to splimp so that's what's implemented here. On a 1558130365Smlaier * different system this would probably need to be changed. 1559130365Smlaier * 1560130365Smlaier * Returns: NONE 1561130365Smlaier */ 1562130365Smlaier 1563130365Smlaierstatic void 1564130365Smlaierrmc_restart(struct rm_class *cl) 1565130365Smlaier{ 1566130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1567130365Smlaier int s; 1568130365Smlaier 1569130365Smlaier s = splnet(); 1570130368Smlaier IFQ_LOCK(ifd->ifq_); 1571130365Smlaier if (cl->sleeping_) { 1572130365Smlaier cl->sleeping_ = 0; 1573130365Smlaier cl->undertime_.tv_sec = 0; 1574130365Smlaier 1575130365Smlaier if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { 1576130365Smlaier CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); 1577130365Smlaier (ifd->restart)(ifd->ifq_); 1578130365Smlaier } 1579130365Smlaier } 1580130368Smlaier IFQ_UNLOCK(ifd->ifq_); 1581130365Smlaier splx(s); 1582130365Smlaier} 1583130365Smlaier 1584130365Smlaier/* 1585130365Smlaier * void 1586130365Smlaier * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit 1587130365Smlaier * handling routine for the root class of the link sharing structure. 1588130365Smlaier * 1589130365Smlaier * Returns: NONE 1590130365Smlaier */ 1591130365Smlaier 1592130365Smlaierstatic void 1593130365Smlaierrmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) 1594130365Smlaier{ 1595130365Smlaier panic("rmc_root_overlimit"); 1596130365Smlaier} 1597130365Smlaier 1598130365Smlaier/* 1599130365Smlaier * Packet Queue handling routines. Eventually, this is to localize the 1600130365Smlaier * effects on the code whether queues are red queues or droptail 1601130365Smlaier * queues. 1602130365Smlaier */ 1603130365Smlaier 1604130365Smlaierstatic int 1605130365Smlaier_rmc_addq(rm_class_t *cl, mbuf_t *m) 1606130365Smlaier{ 1607130365Smlaier#ifdef ALTQ_RIO 1608130365Smlaier if (q_is_rio(cl->q_)) 1609130365Smlaier return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); 1610130365Smlaier#endif 1611130365Smlaier#ifdef ALTQ_RED 1612130365Smlaier if (q_is_red(cl->q_)) 1613130365Smlaier return red_addq(cl->red_, cl->q_, m, cl->pktattr_); 1614130365Smlaier#endif /* ALTQ_RED */ 1615287009Sloos#ifdef ALTQ_CODEL 1616287009Sloos if (q_is_codel(cl->q_)) 1617287009Sloos return codel_addq(cl->codel_, cl->q_, m); 1618287009Sloos#endif 1619130365Smlaier 1620130365Smlaier if (cl->flags_ & RMCF_CLEARDSCP) 1621130365Smlaier write_dsfield(m, cl->pktattr_, 0); 1622130365Smlaier 1623130365Smlaier _addq(cl->q_, m); 1624130365Smlaier return (0); 1625130365Smlaier} 1626130365Smlaier 1627130365Smlaier/* note: _rmc_dropq is not called for red */ 1628130365Smlaierstatic void 1629130365Smlaier_rmc_dropq(rm_class_t *cl) 1630130365Smlaier{ 1631130365Smlaier mbuf_t *m; 1632130365Smlaier 1633130365Smlaier if ((m = _getq(cl->q_)) != NULL) 1634130365Smlaier m_freem(m); 1635130365Smlaier} 1636130365Smlaier 1637130365Smlaierstatic mbuf_t * 1638130365Smlaier_rmc_getq(rm_class_t *cl) 1639130365Smlaier{ 1640130365Smlaier#ifdef ALTQ_RIO 1641130365Smlaier if (q_is_rio(cl->q_)) 1642130365Smlaier return rio_getq((rio_t *)cl->red_, cl->q_); 1643130365Smlaier#endif 1644130365Smlaier#ifdef ALTQ_RED 1645130365Smlaier if (q_is_red(cl->q_)) 1646130365Smlaier return red_getq(cl->red_, cl->q_); 1647130365Smlaier#endif 1648287009Sloos#ifdef ALTQ_CODEL 1649287009Sloos if (q_is_codel(cl->q_)) 1650287009Sloos return codel_getq(cl->codel_, cl->q_); 1651287009Sloos#endif 1652130365Smlaier return _getq(cl->q_); 1653130365Smlaier} 1654130365Smlaier 1655130365Smlaierstatic mbuf_t * 1656130365Smlaier_rmc_pollq(rm_class_t *cl) 1657130365Smlaier{ 1658130365Smlaier return qhead(cl->q_); 1659130365Smlaier} 1660130365Smlaier 1661130365Smlaier#ifdef CBQ_TRACE 1662130365Smlaier 1663130365Smlaierstruct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; 1664130365Smlaierstruct cbqtrace *cbqtrace_ptr = NULL; 1665130365Smlaierint cbqtrace_count; 1666130365Smlaier 1667130365Smlaier/* 1668130365Smlaier * DDB hook to trace cbq events: 1669130365Smlaier * the last 1024 events are held in a circular buffer. 1670130365Smlaier * use "call cbqtrace_dump(N)" to display 20 events from Nth event. 1671130365Smlaier */ 1672130365Smlaiervoid cbqtrace_dump(int); 1673130365Smlaierstatic char *rmc_funcname(void *); 1674130365Smlaier 1675130365Smlaierstatic struct rmc_funcs { 1676130365Smlaier void *func; 1677130365Smlaier char *name; 1678130365Smlaier} rmc_funcs[] = 1679130365Smlaier{ 1680130365Smlaier rmc_init, "rmc_init", 1681130365Smlaier rmc_queue_packet, "rmc_queue_packet", 1682130365Smlaier rmc_under_limit, "rmc_under_limit", 1683130365Smlaier rmc_update_class_util, "rmc_update_class_util", 1684130365Smlaier rmc_delay_action, "rmc_delay_action", 1685130365Smlaier rmc_restart, "rmc_restart", 1686130365Smlaier _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", 1687130365Smlaier NULL, NULL 1688130365Smlaier}; 1689130365Smlaier 1690130365Smlaierstatic char *rmc_funcname(void *func) 1691130365Smlaier{ 1692130365Smlaier struct rmc_funcs *fp; 1693130365Smlaier 1694130365Smlaier for (fp = rmc_funcs; fp->func != NULL; fp++) 1695130365Smlaier if (fp->func == func) 1696130365Smlaier return (fp->name); 1697130365Smlaier return ("unknown"); 1698130365Smlaier} 1699130365Smlaier 1700130365Smlaiervoid cbqtrace_dump(int counter) 1701130365Smlaier{ 1702130365Smlaier int i, *p; 1703130365Smlaier char *cp; 1704130365Smlaier 1705130365Smlaier counter = counter % NCBQTRACE; 1706130365Smlaier p = (int *)&cbqtrace_buffer[counter]; 1707130365Smlaier 1708130365Smlaier for (i=0; i<20; i++) { 1709130365Smlaier printf("[0x%x] ", *p++); 1710130365Smlaier printf("%s: ", rmc_funcname((void *)*p++)); 1711130365Smlaier cp = (char *)p++; 1712130365Smlaier printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); 1713130365Smlaier printf("%d\n",*p++); 1714130365Smlaier 1715130365Smlaier if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) 1716130365Smlaier p = (int *)cbqtrace_buffer; 1717130365Smlaier } 1718130365Smlaier} 1719130365Smlaier#endif /* CBQ_TRACE */ 1720130365Smlaier#endif /* ALTQ_CBQ */ 1721130365Smlaier 1722287009Sloos#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || \ 1723287009Sloos defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) || defined(ALTQ_CODEL) 1724130365Smlaier#if !defined(__GNUC__) || defined(ALTQ_DEBUG) 1725130365Smlaier 1726130365Smlaiervoid 1727130365Smlaier_addq(class_queue_t *q, mbuf_t *m) 1728130365Smlaier{ 1729130365Smlaier mbuf_t *m0; 1730130365Smlaier 1731130365Smlaier if ((m0 = qtail(q)) != NULL) 1732130365Smlaier m->m_nextpkt = m0->m_nextpkt; 1733130365Smlaier else 1734130365Smlaier m0 = m; 1735130365Smlaier m0->m_nextpkt = m; 1736130365Smlaier qtail(q) = m; 1737130365Smlaier qlen(q)++; 1738130365Smlaier} 1739130365Smlaier 1740130365Smlaiermbuf_t * 1741130365Smlaier_getq(class_queue_t *q) 1742130365Smlaier{ 1743130365Smlaier mbuf_t *m, *m0; 1744130365Smlaier 1745130365Smlaier if ((m = qtail(q)) == NULL) 1746130365Smlaier return (NULL); 1747130365Smlaier if ((m0 = m->m_nextpkt) != m) 1748130365Smlaier m->m_nextpkt = m0->m_nextpkt; 1749130365Smlaier else { 1750130365Smlaier ASSERT(qlen(q) == 1); 1751130365Smlaier qtail(q) = NULL; 1752130365Smlaier } 1753130365Smlaier qlen(q)--; 1754130365Smlaier m0->m_nextpkt = NULL; 1755130365Smlaier return (m0); 1756130365Smlaier} 1757130365Smlaier 1758130365Smlaier/* drop a packet at the tail of the queue */ 1759130365Smlaiermbuf_t * 1760130365Smlaier_getq_tail(class_queue_t *q) 1761130365Smlaier{ 1762130365Smlaier mbuf_t *m, *m0, *prev; 1763130365Smlaier 1764130365Smlaier if ((m = m0 = qtail(q)) == NULL) 1765130365Smlaier return NULL; 1766130365Smlaier do { 1767130365Smlaier prev = m0; 1768130365Smlaier m0 = m0->m_nextpkt; 1769130365Smlaier } while (m0 != m); 1770130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1771130365Smlaier if (prev == m) { 1772130365Smlaier ASSERT(qlen(q) == 1); 1773130365Smlaier qtail(q) = NULL; 1774130365Smlaier } else 1775130365Smlaier qtail(q) = prev; 1776130365Smlaier qlen(q)--; 1777130365Smlaier m->m_nextpkt = NULL; 1778130365Smlaier return (m); 1779130365Smlaier} 1780130365Smlaier 1781130365Smlaier/* randomly select a packet in the queue */ 1782130365Smlaiermbuf_t * 1783130365Smlaier_getq_random(class_queue_t *q) 1784130365Smlaier{ 1785130365Smlaier struct mbuf *m; 1786130365Smlaier int i, n; 1787130365Smlaier 1788130365Smlaier if ((m = qtail(q)) == NULL) 1789130365Smlaier return NULL; 1790130365Smlaier if (m->m_nextpkt == m) { 1791130365Smlaier ASSERT(qlen(q) == 1); 1792130365Smlaier qtail(q) = NULL; 1793130365Smlaier } else { 1794130365Smlaier struct mbuf *prev = NULL; 1795130365Smlaier 1796130365Smlaier n = arc4random() % qlen(q) + 1; 1797130365Smlaier for (i = 0; i < n; i++) { 1798130365Smlaier prev = m; 1799130365Smlaier m = m->m_nextpkt; 1800130365Smlaier } 1801130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1802130365Smlaier if (m == qtail(q)) 1803130365Smlaier qtail(q) = prev; 1804130365Smlaier } 1805130365Smlaier qlen(q)--; 1806130365Smlaier m->m_nextpkt = NULL; 1807130365Smlaier return (m); 1808130365Smlaier} 1809130365Smlaier 1810130365Smlaiervoid 1811130365Smlaier_removeq(class_queue_t *q, mbuf_t *m) 1812130365Smlaier{ 1813130365Smlaier mbuf_t *m0, *prev; 1814130365Smlaier 1815130365Smlaier m0 = qtail(q); 1816130365Smlaier do { 1817130365Smlaier prev = m0; 1818130365Smlaier m0 = m0->m_nextpkt; 1819130365Smlaier } while (m0 != m); 1820130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1821130365Smlaier if (prev == m) 1822130365Smlaier qtail(q) = NULL; 1823130365Smlaier else if (qtail(q) == m) 1824130365Smlaier qtail(q) = prev; 1825130365Smlaier qlen(q)--; 1826130365Smlaier} 1827130365Smlaier 1828130365Smlaiervoid 1829130365Smlaier_flushq(class_queue_t *q) 1830130365Smlaier{ 1831130365Smlaier mbuf_t *m; 1832130365Smlaier 1833130365Smlaier while ((m = _getq(q)) != NULL) 1834130365Smlaier m_freem(m); 1835130365Smlaier ASSERT(qlen(q) == 0); 1836130365Smlaier} 1837130365Smlaier 1838130365Smlaier#endif /* !__GNUC__ || ALTQ_DEBUG */ 1839130365Smlaier#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ 1840