altq_rmclass.c revision 257186
1130368Smlaier/* $FreeBSD: head/sys/contrib/altq/altq/altq_rmclass.c 257186 2013-10-26 18:59:58Z glebius $ */ 2219457Sjkim/* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */ 3130365Smlaier 4130365Smlaier/* 5130365Smlaier * Copyright (c) 1991-1997 Regents of the University of California. 6130365Smlaier * All rights reserved. 7130365Smlaier * 8130365Smlaier * Redistribution and use in source and binary forms, with or without 9130365Smlaier * modification, are permitted provided that the following conditions 10130365Smlaier * are met: 11130365Smlaier * 1. Redistributions of source code must retain the above copyright 12130365Smlaier * notice, this list of conditions and the following disclaimer. 13130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 14130365Smlaier * notice, this list of conditions and the following disclaimer in the 15130365Smlaier * documentation and/or other materials provided with the distribution. 16130365Smlaier * 3. All advertising materials mentioning features or use of this software 17130365Smlaier * must display the following acknowledgement: 18130365Smlaier * This product includes software developed by the Network Research 19130365Smlaier * Group at Lawrence Berkeley Laboratory. 20130365Smlaier * 4. Neither the name of the University nor of the Laboratory may be used 21130365Smlaier * to endorse or promote products derived from this software without 22130365Smlaier * specific prior written permission. 23130365Smlaier * 24130365Smlaier * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34130365Smlaier * SUCH DAMAGE. 35130365Smlaier * 36130365Smlaier * LBL code modified by speer@eng.sun.com, May 1977. 37130365Smlaier * For questions and/or comments, please send mail to cbq@ee.lbl.gov 38240784Srpaulo * 39240784Srpaulo * @(#)rm_class.c 1.48 97/12/05 SMI 40130365Smlaier */ 41130365Smlaier#if defined(__FreeBSD__) || defined(__NetBSD__) 42130365Smlaier#include "opt_altq.h" 43130365Smlaier#include "opt_inet.h" 44130365Smlaier#ifdef __FreeBSD__ 45130365Smlaier#include "opt_inet6.h" 46130365Smlaier#endif 47130365Smlaier#endif /* __FreeBSD__ || __NetBSD__ */ 48130365Smlaier#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ 49130365Smlaier 50130365Smlaier#include <sys/param.h> 51130365Smlaier#include <sys/malloc.h> 52130365Smlaier#include <sys/mbuf.h> 53130365Smlaier#include <sys/socket.h> 54130365Smlaier#include <sys/systm.h> 55130365Smlaier#include <sys/errno.h> 56130365Smlaier#include <sys/time.h> 57130365Smlaier#ifdef ALTQ3_COMPAT 58130365Smlaier#include <sys/kernel.h> 59130365Smlaier#endif 60130365Smlaier 61130365Smlaier#include <net/if.h> 62257186Sglebius#include <net/if_var.h> 63130365Smlaier#ifdef ALTQ3_COMPAT 64130365Smlaier#include <netinet/in.h> 65130365Smlaier#include <netinet/in_systm.h> 66130365Smlaier#include <netinet/ip.h> 67130365Smlaier#endif 68130365Smlaier 69257186Sglebius#include <altq/if_altq.h> 70130365Smlaier#include <altq/altq.h> 71130365Smlaier#include <altq/altq_rmclass.h> 72130365Smlaier#include <altq/altq_rmclass_debug.h> 73130365Smlaier#include <altq/altq_red.h> 74130365Smlaier#include <altq/altq_rio.h> 75130365Smlaier 76130365Smlaier/* 77130365Smlaier * Local Macros 78130365Smlaier */ 79130365Smlaier 80130365Smlaier#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } 81130365Smlaier 82130365Smlaier/* 83130365Smlaier * Local routines. 84130365Smlaier */ 85130365Smlaier 86130365Smlaierstatic int rmc_satisfied(struct rm_class *, struct timeval *); 87130365Smlaierstatic void rmc_wrr_set_weights(struct rm_ifdat *); 88130365Smlaierstatic void rmc_depth_compute(struct rm_class *); 89130365Smlaierstatic void rmc_depth_recompute(rm_class_t *); 90130365Smlaier 91130365Smlaierstatic mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); 92130365Smlaierstatic mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); 93130365Smlaier 94130365Smlaierstatic int _rmc_addq(rm_class_t *, mbuf_t *); 95130365Smlaierstatic void _rmc_dropq(rm_class_t *); 96130365Smlaierstatic mbuf_t *_rmc_getq(rm_class_t *); 97130365Smlaierstatic mbuf_t *_rmc_pollq(rm_class_t *); 98130365Smlaier 99130365Smlaierstatic int rmc_under_limit(struct rm_class *, struct timeval *); 100130365Smlaierstatic void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); 101130365Smlaierstatic void rmc_drop_action(struct rm_class *); 102130365Smlaierstatic void rmc_restart(struct rm_class *); 103130365Smlaierstatic void rmc_root_overlimit(struct rm_class *, struct rm_class *); 104130365Smlaier 105130365Smlaier#define BORROW_OFFTIME 106130365Smlaier/* 107130365Smlaier * BORROW_OFFTIME (experimental): 108130365Smlaier * borrow the offtime of the class borrowing from. 109130365Smlaier * the reason is that when its own offtime is set, the class is unable 110130365Smlaier * to borrow much, especially when cutoff is taking effect. 111130365Smlaier * but when the borrowed class is overloaded (advidle is close to minidle), 112130365Smlaier * use the borrowing class's offtime to avoid overload. 113130365Smlaier */ 114130365Smlaier#define ADJUST_CUTOFF 115130365Smlaier/* 116130365Smlaier * ADJUST_CUTOFF (experimental): 117130365Smlaier * if no underlimit class is found due to cutoff, increase cutoff and 118130365Smlaier * retry the scheduling loop. 119130365Smlaier * also, don't invoke delay_actions while cutoff is taking effect, 120130365Smlaier * since a sleeping class won't have a chance to be scheduled in the 121130365Smlaier * next loop. 122130365Smlaier * 123130365Smlaier * now heuristics for setting the top-level variable (cutoff_) becomes: 124130365Smlaier * 1. if a packet arrives for a not-overlimit class, set cutoff 125130365Smlaier * to the depth of the class. 126130365Smlaier * 2. if cutoff is i, and a packet arrives for an overlimit class 127130365Smlaier * with an underlimit ancestor at a lower level than i (say j), 128130365Smlaier * then set cutoff to j. 129130365Smlaier * 3. at scheduling a packet, if there is no underlimit class 130130365Smlaier * due to the current cutoff level, increase cutoff by 1 and 131130365Smlaier * then try to schedule again. 132130365Smlaier */ 133130365Smlaier 134130365Smlaier/* 135130365Smlaier * rm_class_t * 136130365Smlaier * rmc_newclass(...) - Create a new resource management class at priority 137130365Smlaier * 'pri' on the interface given by 'ifd'. 138130365Smlaier * 139130365Smlaier * nsecPerByte is the data rate of the interface in nanoseconds/byte. 140130365Smlaier * E.g., 800 for a 10Mb/s ethernet. If the class gets less 141130365Smlaier * than 100% of the bandwidth, this number should be the 142130365Smlaier * 'effective' rate for the class. Let f be the 143130365Smlaier * bandwidth fraction allocated to this class, and let 144130365Smlaier * nsPerByte be the data rate of the output link in 145130365Smlaier * nanoseconds/byte. Then nsecPerByte is set to 146130365Smlaier * nsPerByte / f. E.g., 1600 (= 800 / .5) 147130365Smlaier * for a class that gets 50% of an ethernet's bandwidth. 148130365Smlaier * 149130365Smlaier * action the routine to call when the class is over limit. 150130365Smlaier * 151130365Smlaier * maxq max allowable queue size for class (in packets). 152130365Smlaier * 153130365Smlaier * parent parent class pointer. 154130365Smlaier * 155130365Smlaier * borrow class to borrow from (should be either 'parent' or null). 156130365Smlaier * 157130365Smlaier * maxidle max value allowed for class 'idle' time estimate (this 158130365Smlaier * parameter determines how large an initial burst of packets 159130365Smlaier * can be before overlimit action is invoked. 160130365Smlaier * 161130365Smlaier * offtime how long 'delay' action will delay when class goes over 162130365Smlaier * limit (this parameter determines the steady-state burst 163130365Smlaier * size when a class is running over its limit). 164130365Smlaier * 165130365Smlaier * Maxidle and offtime have to be computed from the following: If the 166130365Smlaier * average packet size is s, the bandwidth fraction allocated to this 167130365Smlaier * class is f, we want to allow b packet bursts, and the gain of the 168130365Smlaier * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: 169130365Smlaier * 170130365Smlaier * ptime = s * nsPerByte * (1 - f) / f 171130365Smlaier * maxidle = ptime * (1 - g^b) / g^b 172130365Smlaier * minidle = -ptime * (1 / (f - 1)) 173130365Smlaier * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) 174130365Smlaier * 175130365Smlaier * Operationally, it's convenient to specify maxidle & offtime in units 176130365Smlaier * independent of the link bandwidth so the maxidle & offtime passed to 177130365Smlaier * this routine are the above values multiplied by 8*f/(1000*nsPerByte). 178130365Smlaier * (The constant factor is a scale factor needed to make the parameters 179130365Smlaier * integers. This scaling also means that the 'unscaled' values of 180130365Smlaier * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, 181130365Smlaier * not nanoseconds.) Also note that the 'idle' filter computation keeps 182130365Smlaier * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of 183130365Smlaier * maxidle also must be scaled upward by this value. Thus, the passed 184130365Smlaier * values for maxidle and offtime can be computed as follows: 185130365Smlaier * 186130365Smlaier * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) 187130365Smlaier * offtime = offtime * 8 / (1000 * nsecPerByte) 188130365Smlaier * 189130365Smlaier * When USE_HRTIME is employed, then maxidle and offtime become: 190130365Smlaier * maxidle = maxilde * (8.0 / nsecPerByte); 191130365Smlaier * offtime = offtime * (8.0 / nsecPerByte); 192130365Smlaier */ 193130365Smlaierstruct rm_class * 194130365Smlaierrmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, 195130365Smlaier void (*action)(rm_class_t *, rm_class_t *), int maxq, 196130365Smlaier struct rm_class *parent, struct rm_class *borrow, u_int maxidle, 197130365Smlaier int minidle, u_int offtime, int pktsize, int flags) 198130365Smlaier{ 199130365Smlaier struct rm_class *cl; 200130365Smlaier struct rm_class *peer; 201130365Smlaier int s; 202130365Smlaier 203130365Smlaier if (pri >= RM_MAXPRIO) 204130365Smlaier return (NULL); 205130365Smlaier#ifndef ALTQ_RED 206130365Smlaier if (flags & RMCF_RED) { 207130365Smlaier#ifdef ALTQ_DEBUG 208130365Smlaier printf("rmc_newclass: RED not configured for CBQ!\n"); 209130365Smlaier#endif 210130365Smlaier return (NULL); 211130365Smlaier } 212130365Smlaier#endif 213130365Smlaier#ifndef ALTQ_RIO 214130365Smlaier if (flags & RMCF_RIO) { 215130365Smlaier#ifdef ALTQ_DEBUG 216130365Smlaier printf("rmc_newclass: RIO not configured for CBQ!\n"); 217130365Smlaier#endif 218130365Smlaier return (NULL); 219130365Smlaier } 220130365Smlaier#endif 221130365Smlaier 222240646Sglebius cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); 223130365Smlaier if (cl == NULL) 224130365Smlaier return (NULL); 225130365Smlaier CALLOUT_INIT(&cl->callout_); 226240646Sglebius cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); 227130365Smlaier if (cl->q_ == NULL) { 228184205Sdes free(cl, M_DEVBUF); 229130365Smlaier return (NULL); 230130365Smlaier } 231130365Smlaier 232130365Smlaier /* 233130365Smlaier * Class initialization. 234130365Smlaier */ 235130365Smlaier cl->children_ = NULL; 236130365Smlaier cl->parent_ = parent; 237130365Smlaier cl->borrow_ = borrow; 238130365Smlaier cl->leaf_ = 1; 239130365Smlaier cl->ifdat_ = ifd; 240130365Smlaier cl->pri_ = pri; 241130365Smlaier cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 242130365Smlaier cl->depth_ = 0; 243130365Smlaier cl->qthresh_ = 0; 244130365Smlaier cl->ns_per_byte_ = nsecPerByte; 245130365Smlaier 246130365Smlaier qlimit(cl->q_) = maxq; 247130365Smlaier qtype(cl->q_) = Q_DROPHEAD; 248130365Smlaier qlen(cl->q_) = 0; 249130365Smlaier cl->flags_ = flags; 250130365Smlaier 251130365Smlaier#if 1 /* minidle is also scaled in ALTQ */ 252130365Smlaier cl->minidle_ = (minidle * (int)nsecPerByte) / 8; 253130365Smlaier if (cl->minidle_ > 0) 254130365Smlaier cl->minidle_ = 0; 255130365Smlaier#else 256130365Smlaier cl->minidle_ = minidle; 257130365Smlaier#endif 258130365Smlaier cl->maxidle_ = (maxidle * nsecPerByte) / 8; 259130365Smlaier if (cl->maxidle_ == 0) 260130365Smlaier cl->maxidle_ = 1; 261130365Smlaier#if 1 /* offtime is also scaled in ALTQ */ 262130365Smlaier cl->avgidle_ = cl->maxidle_; 263130365Smlaier cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 264130365Smlaier if (cl->offtime_ == 0) 265130365Smlaier cl->offtime_ = 1; 266130365Smlaier#else 267130365Smlaier cl->avgidle_ = 0; 268130365Smlaier cl->offtime_ = (offtime * nsecPerByte) / 8; 269130365Smlaier#endif 270130365Smlaier cl->overlimit = action; 271130365Smlaier 272130365Smlaier#ifdef ALTQ_RED 273130365Smlaier if (flags & (RMCF_RED|RMCF_RIO)) { 274130365Smlaier int red_flags, red_pkttime; 275130365Smlaier 276130365Smlaier red_flags = 0; 277130365Smlaier if (flags & RMCF_ECN) 278130365Smlaier red_flags |= REDF_ECN; 279130365Smlaier if (flags & RMCF_FLOWVALVE) 280130365Smlaier red_flags |= REDF_FLOWVALVE; 281130365Smlaier#ifdef ALTQ_RIO 282130365Smlaier if (flags & RMCF_CLEARDSCP) 283130365Smlaier red_flags |= RIOF_CLEARDSCP; 284130365Smlaier#endif 285130365Smlaier red_pkttime = nsecPerByte * pktsize / 1000; 286130365Smlaier 287130365Smlaier if (flags & RMCF_RED) { 288130365Smlaier cl->red_ = red_alloc(0, 0, 289130365Smlaier qlimit(cl->q_) * 10/100, 290130365Smlaier qlimit(cl->q_) * 30/100, 291130365Smlaier red_flags, red_pkttime); 292130365Smlaier if (cl->red_ != NULL) 293130365Smlaier qtype(cl->q_) = Q_RED; 294130365Smlaier } 295130365Smlaier#ifdef ALTQ_RIO 296130365Smlaier else { 297130365Smlaier cl->red_ = (red_t *)rio_alloc(0, NULL, 298130365Smlaier red_flags, red_pkttime); 299130365Smlaier if (cl->red_ != NULL) 300130365Smlaier qtype(cl->q_) = Q_RIO; 301130365Smlaier } 302130365Smlaier#endif 303130365Smlaier } 304130365Smlaier#endif /* ALTQ_RED */ 305130365Smlaier 306130365Smlaier /* 307130365Smlaier * put the class into the class tree 308130365Smlaier */ 309130365Smlaier#ifdef __NetBSD__ 310130365Smlaier s = splnet(); 311130365Smlaier#else 312130365Smlaier s = splimp(); 313130365Smlaier#endif 314130368Smlaier IFQ_LOCK(ifd->ifq_); 315130365Smlaier if ((peer = ifd->active_[pri]) != NULL) { 316130365Smlaier /* find the last class at this pri */ 317130365Smlaier cl->peer_ = peer; 318130365Smlaier while (peer->peer_ != ifd->active_[pri]) 319130365Smlaier peer = peer->peer_; 320130365Smlaier peer->peer_ = cl; 321130365Smlaier } else { 322130365Smlaier ifd->active_[pri] = cl; 323130365Smlaier cl->peer_ = cl; 324130365Smlaier } 325130365Smlaier 326130365Smlaier if (cl->parent_) { 327130365Smlaier cl->next_ = parent->children_; 328130365Smlaier parent->children_ = cl; 329130365Smlaier parent->leaf_ = 0; 330130365Smlaier } 331130365Smlaier 332130365Smlaier /* 333130365Smlaier * Compute the depth of this class and its ancestors in the class 334130365Smlaier * hierarchy. 335130365Smlaier */ 336130365Smlaier rmc_depth_compute(cl); 337130365Smlaier 338130365Smlaier /* 339130365Smlaier * If CBQ's WRR is enabled, then initialize the class WRR state. 340130365Smlaier */ 341130365Smlaier if (ifd->wrr_) { 342130365Smlaier ifd->num_[pri]++; 343130365Smlaier ifd->alloc_[pri] += cl->allotment_; 344130365Smlaier rmc_wrr_set_weights(ifd); 345130365Smlaier } 346130368Smlaier IFQ_UNLOCK(ifd->ifq_); 347130365Smlaier splx(s); 348130365Smlaier return (cl); 349130365Smlaier} 350130365Smlaier 351130365Smlaierint 352130365Smlaierrmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, 353130365Smlaier int minidle, u_int offtime, int pktsize) 354130365Smlaier{ 355130365Smlaier struct rm_ifdat *ifd; 356130365Smlaier u_int old_allotment; 357130365Smlaier int s; 358130365Smlaier 359130365Smlaier ifd = cl->ifdat_; 360130365Smlaier old_allotment = cl->allotment_; 361130365Smlaier 362130365Smlaier#ifdef __NetBSD__ 363130365Smlaier s = splnet(); 364130365Smlaier#else 365130365Smlaier s = splimp(); 366130365Smlaier#endif 367130368Smlaier IFQ_LOCK(ifd->ifq_); 368130365Smlaier cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 369130365Smlaier cl->qthresh_ = 0; 370130365Smlaier cl->ns_per_byte_ = nsecPerByte; 371130365Smlaier 372130365Smlaier qlimit(cl->q_) = maxq; 373130365Smlaier 374130365Smlaier#if 1 /* minidle is also scaled in ALTQ */ 375130365Smlaier cl->minidle_ = (minidle * nsecPerByte) / 8; 376130365Smlaier if (cl->minidle_ > 0) 377130365Smlaier cl->minidle_ = 0; 378130365Smlaier#else 379130365Smlaier cl->minidle_ = minidle; 380130365Smlaier#endif 381130365Smlaier cl->maxidle_ = (maxidle * nsecPerByte) / 8; 382130365Smlaier if (cl->maxidle_ == 0) 383130365Smlaier cl->maxidle_ = 1; 384130365Smlaier#if 1 /* offtime is also scaled in ALTQ */ 385130365Smlaier cl->avgidle_ = cl->maxidle_; 386130365Smlaier cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 387130365Smlaier if (cl->offtime_ == 0) 388130365Smlaier cl->offtime_ = 1; 389130365Smlaier#else 390130365Smlaier cl->avgidle_ = 0; 391130365Smlaier cl->offtime_ = (offtime * nsecPerByte) / 8; 392130365Smlaier#endif 393130365Smlaier 394130365Smlaier /* 395130365Smlaier * If CBQ's WRR is enabled, then initialize the class WRR state. 396130365Smlaier */ 397130365Smlaier if (ifd->wrr_) { 398130365Smlaier ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; 399130365Smlaier rmc_wrr_set_weights(ifd); 400130365Smlaier } 401130368Smlaier IFQ_UNLOCK(ifd->ifq_); 402130365Smlaier splx(s); 403130365Smlaier return (0); 404130365Smlaier} 405130365Smlaier 406130365Smlaier/* 407130365Smlaier * static void 408130365Smlaier * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes 409130365Smlaier * the appropriate run robin weights for the CBQ weighted round robin 410130365Smlaier * algorithm. 411130365Smlaier * 412130365Smlaier * Returns: NONE 413130365Smlaier */ 414130365Smlaier 415130365Smlaierstatic void 416130365Smlaierrmc_wrr_set_weights(struct rm_ifdat *ifd) 417130365Smlaier{ 418130365Smlaier int i; 419130365Smlaier struct rm_class *cl, *clh; 420130365Smlaier 421130365Smlaier for (i = 0; i < RM_MAXPRIO; i++) { 422130365Smlaier /* 423130365Smlaier * This is inverted from that of the simulator to 424130365Smlaier * maintain precision. 425130365Smlaier */ 426130365Smlaier if (ifd->num_[i] == 0) 427130365Smlaier ifd->M_[i] = 0; 428130365Smlaier else 429130365Smlaier ifd->M_[i] = ifd->alloc_[i] / 430130365Smlaier (ifd->num_[i] * ifd->maxpkt_); 431130365Smlaier /* 432130365Smlaier * Compute the weighted allotment for each class. 433130365Smlaier * This takes the expensive div instruction out 434130365Smlaier * of the main loop for the wrr scheduling path. 435130365Smlaier * These only get recomputed when a class comes or 436130365Smlaier * goes. 437130365Smlaier */ 438130365Smlaier if (ifd->active_[i] != NULL) { 439130365Smlaier clh = cl = ifd->active_[i]; 440130365Smlaier do { 441130365Smlaier /* safe-guard for slow link or alloc_ == 0 */ 442130365Smlaier if (ifd->M_[i] == 0) 443130365Smlaier cl->w_allotment_ = 0; 444130365Smlaier else 445130365Smlaier cl->w_allotment_ = cl->allotment_ / 446130365Smlaier ifd->M_[i]; 447130365Smlaier cl = cl->peer_; 448130365Smlaier } while ((cl != NULL) && (cl != clh)); 449130365Smlaier } 450130365Smlaier } 451130365Smlaier} 452130365Smlaier 453130365Smlaierint 454130365Smlaierrmc_get_weight(struct rm_ifdat *ifd, int pri) 455130365Smlaier{ 456130365Smlaier if ((pri >= 0) && (pri < RM_MAXPRIO)) 457130365Smlaier return (ifd->M_[pri]); 458130365Smlaier else 459130365Smlaier return (0); 460130365Smlaier} 461130365Smlaier 462130365Smlaier/* 463130365Smlaier * static void 464130365Smlaier * rmc_depth_compute(struct rm_class *cl) - This function computes the 465130365Smlaier * appropriate depth of class 'cl' and its ancestors. 466130365Smlaier * 467130365Smlaier * Returns: NONE 468130365Smlaier */ 469130365Smlaier 470130365Smlaierstatic void 471130365Smlaierrmc_depth_compute(struct rm_class *cl) 472130365Smlaier{ 473130365Smlaier rm_class_t *t = cl, *p; 474130365Smlaier 475130365Smlaier /* 476130365Smlaier * Recompute the depth for the branch of the tree. 477130365Smlaier */ 478130365Smlaier while (t != NULL) { 479130365Smlaier p = t->parent_; 480130365Smlaier if (p && (t->depth_ >= p->depth_)) { 481130365Smlaier p->depth_ = t->depth_ + 1; 482130365Smlaier t = p; 483130365Smlaier } else 484130365Smlaier t = NULL; 485130365Smlaier } 486130365Smlaier} 487130365Smlaier 488130365Smlaier/* 489130365Smlaier * static void 490130365Smlaier * rmc_depth_recompute(struct rm_class *cl) - This function re-computes 491130365Smlaier * the depth of the tree after a class has been deleted. 492130365Smlaier * 493130365Smlaier * Returns: NONE 494130365Smlaier */ 495130365Smlaier 496130365Smlaierstatic void 497130365Smlaierrmc_depth_recompute(rm_class_t *cl) 498130365Smlaier{ 499130365Smlaier#if 1 /* ALTQ */ 500130365Smlaier rm_class_t *p, *t; 501130365Smlaier 502130365Smlaier p = cl; 503130365Smlaier while (p != NULL) { 504130365Smlaier if ((t = p->children_) == NULL) { 505130365Smlaier p->depth_ = 0; 506130365Smlaier } else { 507130365Smlaier int cdepth = 0; 508130365Smlaier 509130365Smlaier while (t != NULL) { 510130365Smlaier if (t->depth_ > cdepth) 511130365Smlaier cdepth = t->depth_; 512130365Smlaier t = t->next_; 513130365Smlaier } 514130365Smlaier 515130365Smlaier if (p->depth_ == cdepth + 1) 516130365Smlaier /* no change to this parent */ 517130365Smlaier return; 518130365Smlaier 519130365Smlaier p->depth_ = cdepth + 1; 520130365Smlaier } 521130365Smlaier 522130365Smlaier p = p->parent_; 523130365Smlaier } 524130365Smlaier#else 525130365Smlaier rm_class_t *t; 526130365Smlaier 527130365Smlaier if (cl->depth_ >= 1) { 528130365Smlaier if (cl->children_ == NULL) { 529130365Smlaier cl->depth_ = 0; 530130365Smlaier } else if ((t = cl->children_) != NULL) { 531130365Smlaier while (t != NULL) { 532130365Smlaier if (t->children_ != NULL) 533130365Smlaier rmc_depth_recompute(t); 534130365Smlaier t = t->next_; 535130365Smlaier } 536130365Smlaier } else 537130365Smlaier rmc_depth_compute(cl); 538130365Smlaier } 539130365Smlaier#endif 540130365Smlaier} 541130365Smlaier 542130365Smlaier/* 543130365Smlaier * void 544130365Smlaier * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This 545130365Smlaier * function deletes a class from the link-sharing structure and frees 546130365Smlaier * all resources associated with the class. 547130365Smlaier * 548130365Smlaier * Returns: NONE 549130365Smlaier */ 550130365Smlaier 551130365Smlaiervoid 552130365Smlaierrmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) 553130365Smlaier{ 554130365Smlaier struct rm_class *p, *head, *previous; 555130365Smlaier int s; 556130365Smlaier 557130365Smlaier ASSERT(cl->children_ == NULL); 558130365Smlaier 559130365Smlaier if (cl->sleeping_) 560130365Smlaier CALLOUT_STOP(&cl->callout_); 561130365Smlaier 562130365Smlaier#ifdef __NetBSD__ 563130365Smlaier s = splnet(); 564130365Smlaier#else 565130365Smlaier s = splimp(); 566130365Smlaier#endif 567130368Smlaier IFQ_LOCK(ifd->ifq_); 568130365Smlaier /* 569130365Smlaier * Free packets in the packet queue. 570130365Smlaier * XXX - this may not be a desired behavior. Packets should be 571130365Smlaier * re-queued. 572130365Smlaier */ 573130365Smlaier rmc_dropall(cl); 574130365Smlaier 575130365Smlaier /* 576130365Smlaier * If the class has a parent, then remove the class from the 577130365Smlaier * class from the parent's children chain. 578130365Smlaier */ 579130365Smlaier if (cl->parent_ != NULL) { 580130365Smlaier head = cl->parent_->children_; 581130365Smlaier p = previous = head; 582130365Smlaier if (head->next_ == NULL) { 583130365Smlaier ASSERT(head == cl); 584130365Smlaier cl->parent_->children_ = NULL; 585130365Smlaier cl->parent_->leaf_ = 1; 586130365Smlaier } else while (p != NULL) { 587130365Smlaier if (p == cl) { 588130365Smlaier if (cl == head) 589130365Smlaier cl->parent_->children_ = cl->next_; 590130365Smlaier else 591130365Smlaier previous->next_ = cl->next_; 592130365Smlaier cl->next_ = NULL; 593130365Smlaier p = NULL; 594130365Smlaier } else { 595130365Smlaier previous = p; 596130365Smlaier p = p->next_; 597130365Smlaier } 598130365Smlaier } 599130365Smlaier } 600130365Smlaier 601130365Smlaier /* 602130365Smlaier * Delete class from class priority peer list. 603130365Smlaier */ 604130365Smlaier if ((p = ifd->active_[cl->pri_]) != NULL) { 605130365Smlaier /* 606130365Smlaier * If there is more than one member of this priority 607130365Smlaier * level, then look for class(cl) in the priority level. 608130365Smlaier */ 609130365Smlaier if (p != p->peer_) { 610130365Smlaier while (p->peer_ != cl) 611130365Smlaier p = p->peer_; 612130365Smlaier p->peer_ = cl->peer_; 613130365Smlaier 614130365Smlaier if (ifd->active_[cl->pri_] == cl) 615130365Smlaier ifd->active_[cl->pri_] = cl->peer_; 616130365Smlaier } else { 617130365Smlaier ASSERT(p == cl); 618130365Smlaier ifd->active_[cl->pri_] = NULL; 619130365Smlaier } 620130365Smlaier } 621130365Smlaier 622130365Smlaier /* 623130365Smlaier * Recompute the WRR weights. 624130365Smlaier */ 625130365Smlaier if (ifd->wrr_) { 626130365Smlaier ifd->alloc_[cl->pri_] -= cl->allotment_; 627130365Smlaier ifd->num_[cl->pri_]--; 628130365Smlaier rmc_wrr_set_weights(ifd); 629130365Smlaier } 630130365Smlaier 631130365Smlaier /* 632130365Smlaier * Re-compute the depth of the tree. 633130365Smlaier */ 634130365Smlaier#if 1 /* ALTQ */ 635130365Smlaier rmc_depth_recompute(cl->parent_); 636130365Smlaier#else 637130365Smlaier rmc_depth_recompute(ifd->root_); 638130365Smlaier#endif 639130365Smlaier 640130368Smlaier IFQ_UNLOCK(ifd->ifq_); 641130365Smlaier splx(s); 642130365Smlaier 643130365Smlaier /* 644130365Smlaier * Free the class structure. 645130365Smlaier */ 646130365Smlaier if (cl->red_ != NULL) { 647130365Smlaier#ifdef ALTQ_RIO 648130365Smlaier if (q_is_rio(cl->q_)) 649130365Smlaier rio_destroy((rio_t *)cl->red_); 650130365Smlaier#endif 651130365Smlaier#ifdef ALTQ_RED 652130365Smlaier if (q_is_red(cl->q_)) 653130365Smlaier red_destroy(cl->red_); 654130365Smlaier#endif 655130365Smlaier } 656184205Sdes free(cl->q_, M_DEVBUF); 657184205Sdes free(cl, M_DEVBUF); 658130365Smlaier} 659130365Smlaier 660130365Smlaier 661130365Smlaier/* 662130365Smlaier * void 663130365Smlaier * rmc_init(...) - Initialize the resource management data structures 664130365Smlaier * associated with the output portion of interface 'ifp'. 'ifd' is 665130365Smlaier * where the structures will be built (for backwards compatibility, the 666130365Smlaier * structures aren't kept in the ifnet struct). 'nsecPerByte' 667130365Smlaier * gives the link speed (inverse of bandwidth) in nanoseconds/byte. 668130365Smlaier * 'restart' is the driver-specific routine that the generic 'delay 669130365Smlaier * until under limit' action will call to restart output. `maxq' 670130365Smlaier * is the queue size of the 'link' & 'default' classes. 'maxqueued' 671130365Smlaier * is the maximum number of packets that the resource management 672130365Smlaier * code will allow to be queued 'downstream' (this is typically 1). 673130365Smlaier * 674130365Smlaier * Returns: NONE 675130365Smlaier */ 676130365Smlaier 677130365Smlaiervoid 678130365Smlaierrmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, 679130365Smlaier void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, 680130365Smlaier int minidle, u_int offtime, int flags) 681130365Smlaier{ 682130365Smlaier int i, mtu; 683130365Smlaier 684130365Smlaier /* 685130365Smlaier * Initialize the CBQ tracing/debug facility. 686130365Smlaier */ 687130365Smlaier CBQTRACEINIT(); 688130365Smlaier 689130365Smlaier bzero((char *)ifd, sizeof (*ifd)); 690130365Smlaier mtu = ifq->altq_ifp->if_mtu; 691130365Smlaier ifd->ifq_ = ifq; 692130365Smlaier ifd->restart = restart; 693130365Smlaier ifd->maxqueued_ = maxqueued; 694130365Smlaier ifd->ns_per_byte_ = nsecPerByte; 695130365Smlaier ifd->maxpkt_ = mtu; 696130365Smlaier ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; 697130365Smlaier ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; 698130365Smlaier#if 1 699130365Smlaier ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; 700130365Smlaier if (mtu * nsecPerByte > 10 * 1000000) 701130365Smlaier ifd->maxiftime_ /= 4; 702130365Smlaier#endif 703130365Smlaier 704130365Smlaier reset_cutoff(ifd); 705130365Smlaier CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); 706130365Smlaier 707130365Smlaier /* 708130365Smlaier * Initialize the CBQ's WRR state. 709130365Smlaier */ 710130365Smlaier for (i = 0; i < RM_MAXPRIO; i++) { 711130365Smlaier ifd->alloc_[i] = 0; 712130365Smlaier ifd->M_[i] = 0; 713130365Smlaier ifd->num_[i] = 0; 714130365Smlaier ifd->na_[i] = 0; 715130365Smlaier ifd->active_[i] = NULL; 716130365Smlaier } 717130365Smlaier 718130365Smlaier /* 719130365Smlaier * Initialize current packet state. 720130365Smlaier */ 721130365Smlaier ifd->qi_ = 0; 722130365Smlaier ifd->qo_ = 0; 723130365Smlaier for (i = 0; i < RM_MAXQUEUED; i++) { 724130365Smlaier ifd->class_[i] = NULL; 725130365Smlaier ifd->curlen_[i] = 0; 726130365Smlaier ifd->borrowed_[i] = NULL; 727130365Smlaier } 728130365Smlaier 729130365Smlaier /* 730130365Smlaier * Create the root class of the link-sharing structure. 731130365Smlaier */ 732130365Smlaier if ((ifd->root_ = rmc_newclass(0, ifd, 733130365Smlaier nsecPerByte, 734130365Smlaier rmc_root_overlimit, maxq, 0, 0, 735130365Smlaier maxidle, minidle, offtime, 736130365Smlaier 0, 0)) == NULL) { 737130365Smlaier printf("rmc_init: root class not allocated\n"); 738130365Smlaier return ; 739130365Smlaier } 740130365Smlaier ifd->root_->depth_ = 0; 741130365Smlaier} 742130365Smlaier 743130365Smlaier/* 744130365Smlaier * void 745130365Smlaier * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by 746130365Smlaier * mbuf 'm' to queue for resource class 'cl'. This routine is called 747130365Smlaier * by a driver's if_output routine. This routine must be called with 748130365Smlaier * output packet completion interrupts locked out (to avoid racing with 749130365Smlaier * rmc_dequeue_next). 750130365Smlaier * 751130365Smlaier * Returns: 0 on successful queueing 752130365Smlaier * -1 when packet drop occurs 753130365Smlaier */ 754130365Smlaierint 755130365Smlaierrmc_queue_packet(struct rm_class *cl, mbuf_t *m) 756130365Smlaier{ 757130365Smlaier struct timeval now; 758130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 759130365Smlaier int cpri = cl->pri_; 760130365Smlaier int is_empty = qempty(cl->q_); 761130365Smlaier 762130365Smlaier RM_GETTIME(now); 763130365Smlaier if (ifd->cutoff_ > 0) { 764130365Smlaier if (TV_LT(&cl->undertime_, &now)) { 765130365Smlaier if (ifd->cutoff_ > cl->depth_) 766130365Smlaier ifd->cutoff_ = cl->depth_; 767130365Smlaier CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); 768130365Smlaier } 769130365Smlaier#if 1 /* ALTQ */ 770130365Smlaier else { 771130365Smlaier /* 772130365Smlaier * the class is overlimit. if the class has 773130365Smlaier * underlimit ancestors, set cutoff to the lowest 774130365Smlaier * depth among them. 775130365Smlaier */ 776130365Smlaier struct rm_class *borrow = cl->borrow_; 777130365Smlaier 778130365Smlaier while (borrow != NULL && 779130365Smlaier borrow->depth_ < ifd->cutoff_) { 780130365Smlaier if (TV_LT(&borrow->undertime_, &now)) { 781130365Smlaier ifd->cutoff_ = borrow->depth_; 782130365Smlaier CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); 783130365Smlaier break; 784130365Smlaier } 785130365Smlaier borrow = borrow->borrow_; 786130365Smlaier } 787130365Smlaier } 788130365Smlaier#else /* !ALTQ */ 789130365Smlaier else if ((ifd->cutoff_ > 1) && cl->borrow_) { 790130365Smlaier if (TV_LT(&cl->borrow_->undertime_, &now)) { 791130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 792130365Smlaier CBQTRACE(rmc_queue_packet, 'ffob', 793130365Smlaier cl->borrow_->depth_); 794130365Smlaier } 795130365Smlaier } 796130365Smlaier#endif /* !ALTQ */ 797130365Smlaier } 798130365Smlaier 799130365Smlaier if (_rmc_addq(cl, m) < 0) 800130365Smlaier /* failed */ 801130365Smlaier return (-1); 802130365Smlaier 803130365Smlaier if (is_empty) { 804130365Smlaier CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); 805130365Smlaier ifd->na_[cpri]++; 806130365Smlaier } 807130365Smlaier 808130365Smlaier if (qlen(cl->q_) > qlimit(cl->q_)) { 809130365Smlaier /* note: qlimit can be set to 0 or 1 */ 810130365Smlaier rmc_drop_action(cl); 811130365Smlaier return (-1); 812130365Smlaier } 813130365Smlaier return (0); 814130365Smlaier} 815130365Smlaier 816130365Smlaier/* 817130365Smlaier * void 818130365Smlaier * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all 819130365Smlaier * classes to see if there are satified. 820130365Smlaier */ 821130365Smlaier 822130365Smlaierstatic void 823130365Smlaierrmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) 824130365Smlaier{ 825130365Smlaier int i; 826130365Smlaier rm_class_t *p, *bp; 827130365Smlaier 828130365Smlaier for (i = RM_MAXPRIO - 1; i >= 0; i--) { 829130365Smlaier if ((bp = ifd->active_[i]) != NULL) { 830130365Smlaier p = bp; 831130365Smlaier do { 832130365Smlaier if (!rmc_satisfied(p, now)) { 833130365Smlaier ifd->cutoff_ = p->depth_; 834130365Smlaier return; 835130365Smlaier } 836130365Smlaier p = p->peer_; 837130365Smlaier } while (p != bp); 838130365Smlaier } 839130365Smlaier } 840130365Smlaier 841130365Smlaier reset_cutoff(ifd); 842130365Smlaier} 843130365Smlaier 844130365Smlaier/* 845130365Smlaier * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. 846130365Smlaier */ 847130365Smlaier 848130365Smlaierstatic int 849130365Smlaierrmc_satisfied(struct rm_class *cl, struct timeval *now) 850130365Smlaier{ 851130365Smlaier rm_class_t *p; 852130365Smlaier 853130365Smlaier if (cl == NULL) 854130365Smlaier return (1); 855130365Smlaier if (TV_LT(now, &cl->undertime_)) 856130365Smlaier return (1); 857130365Smlaier if (cl->depth_ == 0) { 858130365Smlaier if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) 859130365Smlaier return (0); 860130365Smlaier else 861130365Smlaier return (1); 862130365Smlaier } 863130365Smlaier if (cl->children_ != NULL) { 864130365Smlaier p = cl->children_; 865130365Smlaier while (p != NULL) { 866130365Smlaier if (!rmc_satisfied(p, now)) 867130365Smlaier return (0); 868130365Smlaier p = p->next_; 869130365Smlaier } 870130365Smlaier } 871130365Smlaier 872130365Smlaier return (1); 873130365Smlaier} 874130365Smlaier 875130365Smlaier/* 876130365Smlaier * Return 1 if class 'cl' is under limit or can borrow from a parent, 877130365Smlaier * 0 if overlimit. As a side-effect, this routine will invoke the 878130365Smlaier * class overlimit action if the class if overlimit. 879130365Smlaier */ 880130365Smlaier 881130365Smlaierstatic int 882130365Smlaierrmc_under_limit(struct rm_class *cl, struct timeval *now) 883130365Smlaier{ 884130365Smlaier rm_class_t *p = cl; 885130365Smlaier rm_class_t *top; 886130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 887130365Smlaier 888130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 889130365Smlaier /* 890130365Smlaier * If cl is the root class, then always return that it is 891130365Smlaier * underlimit. Otherwise, check to see if the class is underlimit. 892130365Smlaier */ 893130365Smlaier if (cl->parent_ == NULL) 894130365Smlaier return (1); 895130365Smlaier 896130365Smlaier if (cl->sleeping_) { 897130365Smlaier if (TV_LT(now, &cl->undertime_)) 898130365Smlaier return (0); 899130365Smlaier 900130365Smlaier CALLOUT_STOP(&cl->callout_); 901130365Smlaier cl->sleeping_ = 0; 902130365Smlaier cl->undertime_.tv_sec = 0; 903130365Smlaier return (1); 904130365Smlaier } 905130365Smlaier 906130365Smlaier top = NULL; 907130365Smlaier while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { 908130365Smlaier if (((cl = cl->borrow_) == NULL) || 909130365Smlaier (cl->depth_ > ifd->cutoff_)) { 910130365Smlaier#ifdef ADJUST_CUTOFF 911130365Smlaier if (cl != NULL) 912130365Smlaier /* cutoff is taking effect, just 913130365Smlaier return false without calling 914130365Smlaier the delay action. */ 915130365Smlaier return (0); 916130365Smlaier#endif 917130365Smlaier#ifdef BORROW_OFFTIME 918130365Smlaier /* 919130365Smlaier * check if the class can borrow offtime too. 920130365Smlaier * borrow offtime from the top of the borrow 921130365Smlaier * chain if the top class is not overloaded. 922130365Smlaier */ 923130365Smlaier if (cl != NULL) { 924130365Smlaier /* cutoff is taking effect, use this class as top. */ 925130365Smlaier top = cl; 926130365Smlaier CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); 927130365Smlaier } 928130365Smlaier if (top != NULL && top->avgidle_ == top->minidle_) 929130365Smlaier top = NULL; 930130365Smlaier p->overtime_ = *now; 931130365Smlaier (p->overlimit)(p, top); 932130365Smlaier#else 933130365Smlaier p->overtime_ = *now; 934130365Smlaier (p->overlimit)(p, NULL); 935130365Smlaier#endif 936130365Smlaier return (0); 937130365Smlaier } 938130365Smlaier top = cl; 939130365Smlaier } 940130365Smlaier 941130365Smlaier if (cl != p) 942130365Smlaier ifd->borrowed_[ifd->qi_] = cl; 943130365Smlaier return (1); 944130365Smlaier} 945130365Smlaier 946130365Smlaier/* 947130365Smlaier * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to 948130365Smlaier * Packet-by-packet round robin. 949130365Smlaier * 950130365Smlaier * The heart of the weighted round-robin scheduler, which decides which 951130365Smlaier * class next gets to send a packet. Highest priority first, then 952130365Smlaier * weighted round-robin within priorites. 953130365Smlaier * 954130365Smlaier * Each able-to-send class gets to send until its byte allocation is 955130365Smlaier * exhausted. Thus, the active pointer is only changed after a class has 956130365Smlaier * exhausted its allocation. 957130365Smlaier * 958130365Smlaier * If the scheduler finds no class that is underlimit or able to borrow, 959130365Smlaier * then the first class found that had a nonzero queue and is allowed to 960130365Smlaier * borrow gets to send. 961130365Smlaier */ 962130365Smlaier 963130365Smlaierstatic mbuf_t * 964130365Smlaier_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) 965130365Smlaier{ 966130365Smlaier struct rm_class *cl = NULL, *first = NULL; 967130365Smlaier u_int deficit; 968130365Smlaier int cpri; 969130365Smlaier mbuf_t *m; 970130365Smlaier struct timeval now; 971130365Smlaier 972130365Smlaier RM_GETTIME(now); 973130365Smlaier 974130365Smlaier /* 975130365Smlaier * if the driver polls the top of the queue and then removes 976130365Smlaier * the polled packet, we must return the same packet. 977130365Smlaier */ 978130365Smlaier if (op == ALTDQ_REMOVE && ifd->pollcache_) { 979130365Smlaier cl = ifd->pollcache_; 980130365Smlaier cpri = cl->pri_; 981130365Smlaier if (ifd->efficient_) { 982130365Smlaier /* check if this class is overlimit */ 983130365Smlaier if (cl->undertime_.tv_sec != 0 && 984130365Smlaier rmc_under_limit(cl, &now) == 0) 985130365Smlaier first = cl; 986130365Smlaier } 987130365Smlaier ifd->pollcache_ = NULL; 988130365Smlaier goto _wrr_out; 989130365Smlaier } 990130365Smlaier else { 991130365Smlaier /* mode == ALTDQ_POLL || pollcache == NULL */ 992130365Smlaier ifd->pollcache_ = NULL; 993130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 994130365Smlaier } 995130365Smlaier#ifdef ADJUST_CUTOFF 996130365Smlaier _again: 997130365Smlaier#endif 998130365Smlaier for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 999130365Smlaier if (ifd->na_[cpri] == 0) 1000130365Smlaier continue; 1001130365Smlaier deficit = 0; 1002130365Smlaier /* 1003130365Smlaier * Loop through twice for a priority level, if some class 1004130365Smlaier * was unable to send a packet the first round because 1005130365Smlaier * of the weighted round-robin mechanism. 1006130365Smlaier * During the second loop at this level, deficit==2. 1007130365Smlaier * (This second loop is not needed if for every class, 1008130365Smlaier * "M[cl->pri_])" times "cl->allotment" is greater than 1009130365Smlaier * the byte size for the largest packet in the class.) 1010130365Smlaier */ 1011130365Smlaier _wrr_loop: 1012130365Smlaier cl = ifd->active_[cpri]; 1013130365Smlaier ASSERT(cl != NULL); 1014130365Smlaier do { 1015130365Smlaier if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) 1016130365Smlaier cl->bytes_alloc_ += cl->w_allotment_; 1017130365Smlaier if (!qempty(cl->q_)) { 1018130365Smlaier if ((cl->undertime_.tv_sec == 0) || 1019130365Smlaier rmc_under_limit(cl, &now)) { 1020130365Smlaier if (cl->bytes_alloc_ > 0 || deficit > 1) 1021130365Smlaier goto _wrr_out; 1022130365Smlaier 1023130365Smlaier /* underlimit but no alloc */ 1024130365Smlaier deficit = 1; 1025130365Smlaier#if 1 1026130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 1027130365Smlaier#endif 1028130365Smlaier } 1029130365Smlaier else if (first == NULL && cl->borrow_ != NULL) 1030130365Smlaier first = cl; /* borrowing candidate */ 1031130365Smlaier } 1032130365Smlaier 1033130365Smlaier cl->bytes_alloc_ = 0; 1034130365Smlaier cl = cl->peer_; 1035130365Smlaier } while (cl != ifd->active_[cpri]); 1036130365Smlaier 1037130365Smlaier if (deficit == 1) { 1038130365Smlaier /* first loop found an underlimit class with deficit */ 1039130365Smlaier /* Loop on same priority level, with new deficit. */ 1040130365Smlaier deficit = 2; 1041130365Smlaier goto _wrr_loop; 1042130365Smlaier } 1043130365Smlaier } 1044130365Smlaier 1045130365Smlaier#ifdef ADJUST_CUTOFF 1046130365Smlaier /* 1047130365Smlaier * no underlimit class found. if cutoff is taking effect, 1048130365Smlaier * increase cutoff and try again. 1049130365Smlaier */ 1050130365Smlaier if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1051130365Smlaier ifd->cutoff_++; 1052130365Smlaier CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); 1053130365Smlaier goto _again; 1054130365Smlaier } 1055130365Smlaier#endif /* ADJUST_CUTOFF */ 1056130365Smlaier /* 1057130365Smlaier * If LINK_EFFICIENCY is turned on, then the first overlimit 1058130365Smlaier * class we encounter will send a packet if all the classes 1059130365Smlaier * of the link-sharing structure are overlimit. 1060130365Smlaier */ 1061130365Smlaier reset_cutoff(ifd); 1062130365Smlaier CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); 1063130365Smlaier 1064130365Smlaier if (!ifd->efficient_ || first == NULL) 1065130365Smlaier return (NULL); 1066130365Smlaier 1067130365Smlaier cl = first; 1068130365Smlaier cpri = cl->pri_; 1069130365Smlaier#if 0 /* too time-consuming for nothing */ 1070130365Smlaier if (cl->sleeping_) 1071130365Smlaier CALLOUT_STOP(&cl->callout_); 1072130365Smlaier cl->sleeping_ = 0; 1073130365Smlaier cl->undertime_.tv_sec = 0; 1074130365Smlaier#endif 1075130365Smlaier ifd->borrowed_[ifd->qi_] = cl->borrow_; 1076130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 1077130365Smlaier 1078130365Smlaier /* 1079130365Smlaier * Deque the packet and do the book keeping... 1080130365Smlaier */ 1081130365Smlaier _wrr_out: 1082130365Smlaier if (op == ALTDQ_REMOVE) { 1083130365Smlaier m = _rmc_getq(cl); 1084130365Smlaier if (m == NULL) 1085130365Smlaier panic("_rmc_wrr_dequeue_next"); 1086130365Smlaier if (qempty(cl->q_)) 1087130365Smlaier ifd->na_[cpri]--; 1088130365Smlaier 1089130365Smlaier /* 1090130365Smlaier * Update class statistics and link data. 1091130365Smlaier */ 1092130365Smlaier if (cl->bytes_alloc_ > 0) 1093130365Smlaier cl->bytes_alloc_ -= m_pktlen(m); 1094130365Smlaier 1095130365Smlaier if ((cl->bytes_alloc_ <= 0) || first == cl) 1096130365Smlaier ifd->active_[cl->pri_] = cl->peer_; 1097130365Smlaier else 1098130365Smlaier ifd->active_[cl->pri_] = cl; 1099130365Smlaier 1100130365Smlaier ifd->class_[ifd->qi_] = cl; 1101130365Smlaier ifd->curlen_[ifd->qi_] = m_pktlen(m); 1102130365Smlaier ifd->now_[ifd->qi_] = now; 1103130365Smlaier ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1104130365Smlaier ifd->queued_++; 1105130365Smlaier } else { 1106130365Smlaier /* mode == ALTDQ_PPOLL */ 1107130365Smlaier m = _rmc_pollq(cl); 1108130365Smlaier ifd->pollcache_ = cl; 1109130365Smlaier } 1110130365Smlaier return (m); 1111130365Smlaier} 1112130365Smlaier 1113130365Smlaier/* 1114130365Smlaier * Dequeue & return next packet from the highest priority class that 1115130365Smlaier * has a packet to send & has enough allocation to send it. This 1116130365Smlaier * routine is called by a driver whenever it needs a new packet to 1117130365Smlaier * output. 1118130365Smlaier */ 1119130365Smlaierstatic mbuf_t * 1120130365Smlaier_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) 1121130365Smlaier{ 1122130365Smlaier mbuf_t *m; 1123130365Smlaier int cpri; 1124130365Smlaier struct rm_class *cl, *first = NULL; 1125130365Smlaier struct timeval now; 1126130365Smlaier 1127130365Smlaier RM_GETTIME(now); 1128130365Smlaier 1129130365Smlaier /* 1130130365Smlaier * if the driver polls the top of the queue and then removes 1131130365Smlaier * the polled packet, we must return the same packet. 1132130365Smlaier */ 1133130365Smlaier if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1134130365Smlaier cl = ifd->pollcache_; 1135130365Smlaier cpri = cl->pri_; 1136130365Smlaier ifd->pollcache_ = NULL; 1137130365Smlaier goto _prr_out; 1138130365Smlaier } else { 1139130365Smlaier /* mode == ALTDQ_POLL || pollcache == NULL */ 1140130365Smlaier ifd->pollcache_ = NULL; 1141130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 1142130365Smlaier } 1143130365Smlaier#ifdef ADJUST_CUTOFF 1144130365Smlaier _again: 1145130365Smlaier#endif 1146130365Smlaier for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1147130365Smlaier if (ifd->na_[cpri] == 0) 1148130365Smlaier continue; 1149130365Smlaier cl = ifd->active_[cpri]; 1150130365Smlaier ASSERT(cl != NULL); 1151130365Smlaier do { 1152130365Smlaier if (!qempty(cl->q_)) { 1153130365Smlaier if ((cl->undertime_.tv_sec == 0) || 1154130365Smlaier rmc_under_limit(cl, &now)) 1155130365Smlaier goto _prr_out; 1156130365Smlaier if (first == NULL && cl->borrow_ != NULL) 1157130365Smlaier first = cl; 1158130365Smlaier } 1159130365Smlaier cl = cl->peer_; 1160130365Smlaier } while (cl != ifd->active_[cpri]); 1161130365Smlaier } 1162130365Smlaier 1163130365Smlaier#ifdef ADJUST_CUTOFF 1164130365Smlaier /* 1165130365Smlaier * no underlimit class found. if cutoff is taking effect, increase 1166130365Smlaier * cutoff and try again. 1167130365Smlaier */ 1168130365Smlaier if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1169130365Smlaier ifd->cutoff_++; 1170130365Smlaier goto _again; 1171130365Smlaier } 1172130365Smlaier#endif /* ADJUST_CUTOFF */ 1173130365Smlaier /* 1174130365Smlaier * If LINK_EFFICIENCY is turned on, then the first overlimit 1175130365Smlaier * class we encounter will send a packet if all the classes 1176130365Smlaier * of the link-sharing structure are overlimit. 1177130365Smlaier */ 1178130365Smlaier reset_cutoff(ifd); 1179130365Smlaier if (!ifd->efficient_ || first == NULL) 1180130365Smlaier return (NULL); 1181130365Smlaier 1182130365Smlaier cl = first; 1183130365Smlaier cpri = cl->pri_; 1184130365Smlaier#if 0 /* too time-consuming for nothing */ 1185130365Smlaier if (cl->sleeping_) 1186130365Smlaier CALLOUT_STOP(&cl->callout_); 1187130365Smlaier cl->sleeping_ = 0; 1188130365Smlaier cl->undertime_.tv_sec = 0; 1189130365Smlaier#endif 1190130365Smlaier ifd->borrowed_[ifd->qi_] = cl->borrow_; 1191130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 1192130365Smlaier 1193130365Smlaier /* 1194130365Smlaier * Deque the packet and do the book keeping... 1195130365Smlaier */ 1196130365Smlaier _prr_out: 1197130365Smlaier if (op == ALTDQ_REMOVE) { 1198130365Smlaier m = _rmc_getq(cl); 1199130365Smlaier if (m == NULL) 1200130365Smlaier panic("_rmc_prr_dequeue_next"); 1201130365Smlaier if (qempty(cl->q_)) 1202130365Smlaier ifd->na_[cpri]--; 1203130365Smlaier 1204130365Smlaier ifd->active_[cpri] = cl->peer_; 1205130365Smlaier 1206130365Smlaier ifd->class_[ifd->qi_] = cl; 1207130365Smlaier ifd->curlen_[ifd->qi_] = m_pktlen(m); 1208130365Smlaier ifd->now_[ifd->qi_] = now; 1209130365Smlaier ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1210130365Smlaier ifd->queued_++; 1211130365Smlaier } else { 1212130365Smlaier /* mode == ALTDQ_POLL */ 1213130365Smlaier m = _rmc_pollq(cl); 1214130365Smlaier ifd->pollcache_ = cl; 1215130365Smlaier } 1216130365Smlaier return (m); 1217130365Smlaier} 1218130365Smlaier 1219130365Smlaier/* 1220130365Smlaier * mbuf_t * 1221130365Smlaier * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function 1222130365Smlaier * is invoked by the packet driver to get the next packet to be 1223130365Smlaier * dequeued and output on the link. If WRR is enabled, then the 1224130365Smlaier * WRR dequeue next routine will determine the next packet to sent. 1225130365Smlaier * Otherwise, packet-by-packet round robin is invoked. 1226130365Smlaier * 1227130365Smlaier * Returns: NULL, if a packet is not available or if all 1228130365Smlaier * classes are overlimit. 1229130365Smlaier * 1230130365Smlaier * Otherwise, Pointer to the next packet. 1231130365Smlaier */ 1232130365Smlaier 1233130365Smlaiermbuf_t * 1234130365Smlaierrmc_dequeue_next(struct rm_ifdat *ifd, int mode) 1235130365Smlaier{ 1236130365Smlaier if (ifd->queued_ >= ifd->maxqueued_) 1237130365Smlaier return (NULL); 1238130365Smlaier else if (ifd->wrr_) 1239130365Smlaier return (_rmc_wrr_dequeue_next(ifd, mode)); 1240130365Smlaier else 1241130365Smlaier return (_rmc_prr_dequeue_next(ifd, mode)); 1242130365Smlaier} 1243130365Smlaier 1244130365Smlaier/* 1245130365Smlaier * Update the utilization estimate for the packet that just completed. 1246130365Smlaier * The packet's class & the parent(s) of that class all get their 1247130365Smlaier * estimators updated. This routine is called by the driver's output- 1248130365Smlaier * packet-completion interrupt service routine. 1249130365Smlaier */ 1250130365Smlaier 1251130365Smlaier/* 1252130365Smlaier * a macro to approximate "divide by 1000" that gives 0.000999, 1253130365Smlaier * if a value has enough effective digits. 1254130365Smlaier * (on pentium, mul takes 9 cycles but div takes 46!) 1255130365Smlaier */ 1256130365Smlaier#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) 1257130365Smlaiervoid 1258130365Smlaierrmc_update_class_util(struct rm_ifdat *ifd) 1259130365Smlaier{ 1260130365Smlaier int idle, avgidle, pktlen; 1261130365Smlaier int pkt_time, tidle; 1262130365Smlaier rm_class_t *cl, *borrowed; 1263130365Smlaier rm_class_t *borrows; 1264130365Smlaier struct timeval *nowp; 1265130365Smlaier 1266130365Smlaier /* 1267130365Smlaier * Get the most recent completed class. 1268130365Smlaier */ 1269130365Smlaier if ((cl = ifd->class_[ifd->qo_]) == NULL) 1270130365Smlaier return; 1271130365Smlaier 1272130365Smlaier pktlen = ifd->curlen_[ifd->qo_]; 1273130365Smlaier borrowed = ifd->borrowed_[ifd->qo_]; 1274130365Smlaier borrows = borrowed; 1275130365Smlaier 1276130365Smlaier PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1277130365Smlaier 1278130365Smlaier /* 1279130365Smlaier * Run estimator on class and its ancestors. 1280130365Smlaier */ 1281130365Smlaier /* 1282130365Smlaier * rm_update_class_util is designed to be called when the 1283130365Smlaier * transfer is completed from a xmit complete interrupt, 1284130365Smlaier * but most drivers don't implement an upcall for that. 1285130365Smlaier * so, just use estimated completion time. 1286130365Smlaier * as a result, ifd->qi_ and ifd->qo_ are always synced. 1287130365Smlaier */ 1288130365Smlaier nowp = &ifd->now_[ifd->qo_]; 1289130365Smlaier /* get pkt_time (for link) in usec */ 1290130365Smlaier#if 1 /* use approximation */ 1291130365Smlaier pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; 1292130365Smlaier pkt_time = NSEC_TO_USEC(pkt_time); 1293130365Smlaier#else 1294130365Smlaier pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; 1295130365Smlaier#endif 1296130365Smlaier#if 1 /* ALTQ4PPP */ 1297130365Smlaier if (TV_LT(nowp, &ifd->ifnow_)) { 1298130365Smlaier int iftime; 1299130365Smlaier 1300130365Smlaier /* 1301130365Smlaier * make sure the estimated completion time does not go 1302130365Smlaier * too far. it can happen when the link layer supports 1303130365Smlaier * data compression or the interface speed is set to 1304130365Smlaier * a much lower value. 1305130365Smlaier */ 1306130365Smlaier TV_DELTA(&ifd->ifnow_, nowp, iftime); 1307130365Smlaier if (iftime+pkt_time < ifd->maxiftime_) { 1308130365Smlaier TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1309130365Smlaier } else { 1310130365Smlaier TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); 1311130365Smlaier } 1312130365Smlaier } else { 1313130365Smlaier TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1314130365Smlaier } 1315130365Smlaier#else 1316130365Smlaier if (TV_LT(nowp, &ifd->ifnow_)) { 1317130365Smlaier TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1318130365Smlaier } else { 1319130365Smlaier TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1320130365Smlaier } 1321130365Smlaier#endif 1322130365Smlaier 1323130365Smlaier while (cl != NULL) { 1324130365Smlaier TV_DELTA(&ifd->ifnow_, &cl->last_, idle); 1325130365Smlaier if (idle >= 2000000) 1326130365Smlaier /* 1327130365Smlaier * this class is idle enough, reset avgidle. 1328130365Smlaier * (TV_DELTA returns 2000000 us when delta is large.) 1329130365Smlaier */ 1330130365Smlaier cl->avgidle_ = cl->maxidle_; 1331130365Smlaier 1332130365Smlaier /* get pkt_time (for class) in usec */ 1333130365Smlaier#if 1 /* use approximation */ 1334130365Smlaier pkt_time = pktlen * cl->ns_per_byte_; 1335130365Smlaier pkt_time = NSEC_TO_USEC(pkt_time); 1336130365Smlaier#else 1337130365Smlaier pkt_time = pktlen * cl->ns_per_byte_ / 1000; 1338130365Smlaier#endif 1339130365Smlaier idle -= pkt_time; 1340130365Smlaier 1341130365Smlaier avgidle = cl->avgidle_; 1342130365Smlaier avgidle += idle - (avgidle >> RM_FILTER_GAIN); 1343130365Smlaier cl->avgidle_ = avgidle; 1344130365Smlaier 1345130365Smlaier /* Are we overlimit ? */ 1346130365Smlaier if (avgidle <= 0) { 1347130365Smlaier CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); 1348130365Smlaier#if 1 /* ALTQ */ 1349130365Smlaier /* 1350130365Smlaier * need some lower bound for avgidle, otherwise 1351130365Smlaier * a borrowing class gets unbounded penalty. 1352130365Smlaier */ 1353130365Smlaier if (avgidle < cl->minidle_) 1354130365Smlaier avgidle = cl->avgidle_ = cl->minidle_; 1355130365Smlaier#endif 1356130365Smlaier /* set next idle to make avgidle 0 */ 1357130365Smlaier tidle = pkt_time + 1358130365Smlaier (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); 1359130365Smlaier TV_ADD_DELTA(nowp, tidle, &cl->undertime_); 1360130365Smlaier ++cl->stats_.over; 1361130365Smlaier } else { 1362130365Smlaier cl->avgidle_ = 1363130365Smlaier (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; 1364130365Smlaier cl->undertime_.tv_sec = 0; 1365130365Smlaier if (cl->sleeping_) { 1366130365Smlaier CALLOUT_STOP(&cl->callout_); 1367130365Smlaier cl->sleeping_ = 0; 1368130365Smlaier } 1369130365Smlaier } 1370130365Smlaier 1371130365Smlaier if (borrows != NULL) { 1372130365Smlaier if (borrows != cl) 1373130365Smlaier ++cl->stats_.borrows; 1374130365Smlaier else 1375130365Smlaier borrows = NULL; 1376130365Smlaier } 1377130365Smlaier cl->last_ = ifd->ifnow_; 1378130365Smlaier cl->last_pkttime_ = pkt_time; 1379130365Smlaier 1380130365Smlaier#if 1 1381130365Smlaier if (cl->parent_ == NULL) { 1382130365Smlaier /* take stats of root class */ 1383130365Smlaier PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1384130365Smlaier } 1385130365Smlaier#endif 1386130365Smlaier 1387130365Smlaier cl = cl->parent_; 1388130365Smlaier } 1389130365Smlaier 1390130365Smlaier /* 1391130365Smlaier * Check to see if cutoff needs to set to a new level. 1392130365Smlaier */ 1393130365Smlaier cl = ifd->class_[ifd->qo_]; 1394130365Smlaier if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { 1395130365Smlaier#if 1 /* ALTQ */ 1396130365Smlaier if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { 1397130365Smlaier rmc_tl_satisfied(ifd, nowp); 1398130365Smlaier CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1399130365Smlaier } else { 1400130365Smlaier ifd->cutoff_ = borrowed->depth_; 1401130365Smlaier CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1402130365Smlaier } 1403130365Smlaier#else /* !ALTQ */ 1404130365Smlaier if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { 1405130365Smlaier reset_cutoff(ifd); 1406130365Smlaier#ifdef notdef 1407130365Smlaier rmc_tl_satisfied(ifd, &now); 1408130365Smlaier#endif 1409130365Smlaier CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1410130365Smlaier } else { 1411130365Smlaier ifd->cutoff_ = borrowed->depth_; 1412130365Smlaier CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1413130365Smlaier } 1414130365Smlaier#endif /* !ALTQ */ 1415130365Smlaier } 1416130365Smlaier 1417130365Smlaier /* 1418130365Smlaier * Release class slot 1419130365Smlaier */ 1420130365Smlaier ifd->borrowed_[ifd->qo_] = NULL; 1421130365Smlaier ifd->class_[ifd->qo_] = NULL; 1422130365Smlaier ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; 1423130365Smlaier ifd->queued_--; 1424130365Smlaier} 1425130365Smlaier 1426130365Smlaier/* 1427130365Smlaier * void 1428130365Smlaier * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) 1429130365Smlaier * over-limit action routines. These get invoked by rmc_under_limit() 1430130365Smlaier * if a class with packets to send if over its bandwidth limit & can't 1431130365Smlaier * borrow from a parent class. 1432130365Smlaier * 1433130365Smlaier * Returns: NONE 1434130365Smlaier */ 1435130365Smlaier 1436130365Smlaierstatic void 1437130365Smlaierrmc_drop_action(struct rm_class *cl) 1438130365Smlaier{ 1439130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1440130365Smlaier 1441130365Smlaier ASSERT(qlen(cl->q_) > 0); 1442130365Smlaier _rmc_dropq(cl); 1443130365Smlaier if (qempty(cl->q_)) 1444130365Smlaier ifd->na_[cl->pri_]--; 1445130365Smlaier} 1446130365Smlaier 1447130365Smlaiervoid rmc_dropall(struct rm_class *cl) 1448130365Smlaier{ 1449130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1450130365Smlaier 1451130365Smlaier if (!qempty(cl->q_)) { 1452130365Smlaier _flushq(cl->q_); 1453130365Smlaier 1454130365Smlaier ifd->na_[cl->pri_]--; 1455130365Smlaier } 1456130365Smlaier} 1457130365Smlaier 1458130365Smlaier#if (__FreeBSD_version > 300000) 1459130365Smlaier/* hzto() is removed from FreeBSD-3.0 */ 1460130365Smlaierstatic int hzto(struct timeval *); 1461130365Smlaier 1462130365Smlaierstatic int 1463130365Smlaierhzto(tv) 1464130365Smlaier struct timeval *tv; 1465130365Smlaier{ 1466130365Smlaier struct timeval t2; 1467130365Smlaier 1468130365Smlaier getmicrotime(&t2); 1469130365Smlaier t2.tv_sec = tv->tv_sec - t2.tv_sec; 1470130365Smlaier t2.tv_usec = tv->tv_usec - t2.tv_usec; 1471130365Smlaier return (tvtohz(&t2)); 1472130365Smlaier} 1473130365Smlaier#endif /* __FreeBSD_version > 300000 */ 1474130365Smlaier 1475130365Smlaier/* 1476130365Smlaier * void 1477130365Smlaier * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ 1478130365Smlaier * delay action routine. It is invoked via rmc_under_limit when the 1479130365Smlaier * packet is discoverd to be overlimit. 1480130365Smlaier * 1481130365Smlaier * If the delay action is result of borrow class being overlimit, then 1482130365Smlaier * delay for the offtime of the borrowing class that is overlimit. 1483130365Smlaier * 1484130365Smlaier * Returns: NONE 1485130365Smlaier */ 1486130365Smlaier 1487130365Smlaiervoid 1488130365Smlaierrmc_delay_action(struct rm_class *cl, struct rm_class *borrow) 1489130365Smlaier{ 1490130365Smlaier int delay, t, extradelay; 1491130365Smlaier 1492130365Smlaier cl->stats_.overactions++; 1493130365Smlaier TV_DELTA(&cl->undertime_, &cl->overtime_, delay); 1494130365Smlaier#ifndef BORROW_OFFTIME 1495130365Smlaier delay += cl->offtime_; 1496130365Smlaier#endif 1497130365Smlaier 1498130365Smlaier if (!cl->sleeping_) { 1499130365Smlaier CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); 1500130365Smlaier#ifdef BORROW_OFFTIME 1501130365Smlaier if (borrow != NULL) 1502130365Smlaier extradelay = borrow->offtime_; 1503130365Smlaier else 1504130365Smlaier#endif 1505130365Smlaier extradelay = cl->offtime_; 1506130365Smlaier 1507130365Smlaier#ifdef ALTQ 1508130365Smlaier /* 1509130365Smlaier * XXX recalculate suspend time: 1510130365Smlaier * current undertime is (tidle + pkt_time) calculated 1511130365Smlaier * from the last transmission. 1512130365Smlaier * tidle: time required to bring avgidle back to 0 1513130365Smlaier * pkt_time: target waiting time for this class 1514130365Smlaier * we need to replace pkt_time by offtime 1515130365Smlaier */ 1516130365Smlaier extradelay -= cl->last_pkttime_; 1517130365Smlaier#endif 1518130365Smlaier if (extradelay > 0) { 1519130365Smlaier TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); 1520130365Smlaier delay += extradelay; 1521130365Smlaier } 1522130365Smlaier 1523130365Smlaier cl->sleeping_ = 1; 1524130365Smlaier cl->stats_.delays++; 1525130365Smlaier 1526130365Smlaier /* 1527130365Smlaier * Since packets are phased randomly with respect to the 1528130365Smlaier * clock, 1 tick (the next clock tick) can be an arbitrarily 1529130365Smlaier * short time so we have to wait for at least two ticks. 1530130365Smlaier * NOTE: If there's no other traffic, we need the timer as 1531130365Smlaier * a 'backstop' to restart this class. 1532130365Smlaier */ 1533130365Smlaier if (delay > tick * 2) { 1534130365Smlaier#ifdef __FreeBSD__ 1535130365Smlaier /* FreeBSD rounds up the tick */ 1536130365Smlaier t = hzto(&cl->undertime_); 1537130365Smlaier#else 1538130365Smlaier /* other BSDs round down the tick */ 1539130365Smlaier t = hzto(&cl->undertime_) + 1; 1540130365Smlaier#endif 1541130365Smlaier } else 1542130365Smlaier t = 2; 1543130365Smlaier CALLOUT_RESET(&cl->callout_, t, 1544130365Smlaier (timeout_t *)rmc_restart, (caddr_t)cl); 1545130365Smlaier } 1546130365Smlaier} 1547130365Smlaier 1548130365Smlaier/* 1549130365Smlaier * void 1550130365Smlaier * rmc_restart() - is just a helper routine for rmc_delay_action -- it is 1551130365Smlaier * called by the system timer code & is responsible checking if the 1552130365Smlaier * class is still sleeping (it might have been restarted as a side 1553130365Smlaier * effect of the queue scan on a packet arrival) and, if so, restarting 1554130365Smlaier * output for the class. Inspecting the class state & restarting output 1555130365Smlaier * require locking the class structure. In general the driver is 1556130365Smlaier * responsible for locking but this is the only routine that is not 1557130365Smlaier * called directly or indirectly from the interface driver so it has 1558130365Smlaier * know about system locking conventions. Under bsd, locking is done 1559130365Smlaier * by raising IPL to splimp so that's what's implemented here. On a 1560130365Smlaier * different system this would probably need to be changed. 1561130365Smlaier * 1562130365Smlaier * Returns: NONE 1563130365Smlaier */ 1564130365Smlaier 1565130365Smlaierstatic void 1566130365Smlaierrmc_restart(struct rm_class *cl) 1567130365Smlaier{ 1568130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1569130365Smlaier int s; 1570130365Smlaier 1571130365Smlaier#ifdef __NetBSD__ 1572130365Smlaier s = splnet(); 1573130365Smlaier#else 1574130365Smlaier s = splimp(); 1575130365Smlaier#endif 1576130368Smlaier IFQ_LOCK(ifd->ifq_); 1577130365Smlaier if (cl->sleeping_) { 1578130365Smlaier cl->sleeping_ = 0; 1579130365Smlaier cl->undertime_.tv_sec = 0; 1580130365Smlaier 1581130365Smlaier if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { 1582130365Smlaier CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); 1583130365Smlaier (ifd->restart)(ifd->ifq_); 1584130365Smlaier } 1585130365Smlaier } 1586130368Smlaier IFQ_UNLOCK(ifd->ifq_); 1587130365Smlaier splx(s); 1588130365Smlaier} 1589130365Smlaier 1590130365Smlaier/* 1591130365Smlaier * void 1592130365Smlaier * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit 1593130365Smlaier * handling routine for the root class of the link sharing structure. 1594130365Smlaier * 1595130365Smlaier * Returns: NONE 1596130365Smlaier */ 1597130365Smlaier 1598130365Smlaierstatic void 1599130365Smlaierrmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) 1600130365Smlaier{ 1601130365Smlaier panic("rmc_root_overlimit"); 1602130365Smlaier} 1603130365Smlaier 1604130365Smlaier/* 1605130365Smlaier * Packet Queue handling routines. Eventually, this is to localize the 1606130365Smlaier * effects on the code whether queues are red queues or droptail 1607130365Smlaier * queues. 1608130365Smlaier */ 1609130365Smlaier 1610130365Smlaierstatic int 1611130365Smlaier_rmc_addq(rm_class_t *cl, mbuf_t *m) 1612130365Smlaier{ 1613130365Smlaier#ifdef ALTQ_RIO 1614130365Smlaier if (q_is_rio(cl->q_)) 1615130365Smlaier return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); 1616130365Smlaier#endif 1617130365Smlaier#ifdef ALTQ_RED 1618130365Smlaier if (q_is_red(cl->q_)) 1619130365Smlaier return red_addq(cl->red_, cl->q_, m, cl->pktattr_); 1620130365Smlaier#endif /* ALTQ_RED */ 1621130365Smlaier 1622130365Smlaier if (cl->flags_ & RMCF_CLEARDSCP) 1623130365Smlaier write_dsfield(m, cl->pktattr_, 0); 1624130365Smlaier 1625130365Smlaier _addq(cl->q_, m); 1626130365Smlaier return (0); 1627130365Smlaier} 1628130365Smlaier 1629130365Smlaier/* note: _rmc_dropq is not called for red */ 1630130365Smlaierstatic void 1631130365Smlaier_rmc_dropq(rm_class_t *cl) 1632130365Smlaier{ 1633130365Smlaier mbuf_t *m; 1634130365Smlaier 1635130365Smlaier if ((m = _getq(cl->q_)) != NULL) 1636130365Smlaier m_freem(m); 1637130365Smlaier} 1638130365Smlaier 1639130365Smlaierstatic mbuf_t * 1640130365Smlaier_rmc_getq(rm_class_t *cl) 1641130365Smlaier{ 1642130365Smlaier#ifdef ALTQ_RIO 1643130365Smlaier if (q_is_rio(cl->q_)) 1644130365Smlaier return rio_getq((rio_t *)cl->red_, cl->q_); 1645130365Smlaier#endif 1646130365Smlaier#ifdef ALTQ_RED 1647130365Smlaier if (q_is_red(cl->q_)) 1648130365Smlaier return red_getq(cl->red_, cl->q_); 1649130365Smlaier#endif 1650130365Smlaier return _getq(cl->q_); 1651130365Smlaier} 1652130365Smlaier 1653130365Smlaierstatic mbuf_t * 1654130365Smlaier_rmc_pollq(rm_class_t *cl) 1655130365Smlaier{ 1656130365Smlaier return qhead(cl->q_); 1657130365Smlaier} 1658130365Smlaier 1659130365Smlaier#ifdef CBQ_TRACE 1660130365Smlaier 1661130365Smlaierstruct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; 1662130365Smlaierstruct cbqtrace *cbqtrace_ptr = NULL; 1663130365Smlaierint cbqtrace_count; 1664130365Smlaier 1665130365Smlaier/* 1666130365Smlaier * DDB hook to trace cbq events: 1667130365Smlaier * the last 1024 events are held in a circular buffer. 1668130365Smlaier * use "call cbqtrace_dump(N)" to display 20 events from Nth event. 1669130365Smlaier */ 1670130365Smlaiervoid cbqtrace_dump(int); 1671130365Smlaierstatic char *rmc_funcname(void *); 1672130365Smlaier 1673130365Smlaierstatic struct rmc_funcs { 1674130365Smlaier void *func; 1675130365Smlaier char *name; 1676130365Smlaier} rmc_funcs[] = 1677130365Smlaier{ 1678130365Smlaier rmc_init, "rmc_init", 1679130365Smlaier rmc_queue_packet, "rmc_queue_packet", 1680130365Smlaier rmc_under_limit, "rmc_under_limit", 1681130365Smlaier rmc_update_class_util, "rmc_update_class_util", 1682130365Smlaier rmc_delay_action, "rmc_delay_action", 1683130365Smlaier rmc_restart, "rmc_restart", 1684130365Smlaier _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", 1685130365Smlaier NULL, NULL 1686130365Smlaier}; 1687130365Smlaier 1688130365Smlaierstatic char *rmc_funcname(void *func) 1689130365Smlaier{ 1690130365Smlaier struct rmc_funcs *fp; 1691130365Smlaier 1692130365Smlaier for (fp = rmc_funcs; fp->func != NULL; fp++) 1693130365Smlaier if (fp->func == func) 1694130365Smlaier return (fp->name); 1695130365Smlaier return ("unknown"); 1696130365Smlaier} 1697130365Smlaier 1698130365Smlaiervoid cbqtrace_dump(int counter) 1699130365Smlaier{ 1700130365Smlaier int i, *p; 1701130365Smlaier char *cp; 1702130365Smlaier 1703130365Smlaier counter = counter % NCBQTRACE; 1704130365Smlaier p = (int *)&cbqtrace_buffer[counter]; 1705130365Smlaier 1706130365Smlaier for (i=0; i<20; i++) { 1707130365Smlaier printf("[0x%x] ", *p++); 1708130365Smlaier printf("%s: ", rmc_funcname((void *)*p++)); 1709130365Smlaier cp = (char *)p++; 1710130365Smlaier printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); 1711130365Smlaier printf("%d\n",*p++); 1712130365Smlaier 1713130365Smlaier if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) 1714130365Smlaier p = (int *)cbqtrace_buffer; 1715130365Smlaier } 1716130365Smlaier} 1717130365Smlaier#endif /* CBQ_TRACE */ 1718130365Smlaier#endif /* ALTQ_CBQ */ 1719130365Smlaier 1720130365Smlaier#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) 1721130365Smlaier#if !defined(__GNUC__) || defined(ALTQ_DEBUG) 1722130365Smlaier 1723130365Smlaiervoid 1724130365Smlaier_addq(class_queue_t *q, mbuf_t *m) 1725130365Smlaier{ 1726130365Smlaier mbuf_t *m0; 1727130365Smlaier 1728130365Smlaier if ((m0 = qtail(q)) != NULL) 1729130365Smlaier m->m_nextpkt = m0->m_nextpkt; 1730130365Smlaier else 1731130365Smlaier m0 = m; 1732130365Smlaier m0->m_nextpkt = m; 1733130365Smlaier qtail(q) = m; 1734130365Smlaier qlen(q)++; 1735130365Smlaier} 1736130365Smlaier 1737130365Smlaiermbuf_t * 1738130365Smlaier_getq(class_queue_t *q) 1739130365Smlaier{ 1740130365Smlaier mbuf_t *m, *m0; 1741130365Smlaier 1742130365Smlaier if ((m = qtail(q)) == NULL) 1743130365Smlaier return (NULL); 1744130365Smlaier if ((m0 = m->m_nextpkt) != m) 1745130365Smlaier m->m_nextpkt = m0->m_nextpkt; 1746130365Smlaier else { 1747130365Smlaier ASSERT(qlen(q) == 1); 1748130365Smlaier qtail(q) = NULL; 1749130365Smlaier } 1750130365Smlaier qlen(q)--; 1751130365Smlaier m0->m_nextpkt = NULL; 1752130365Smlaier return (m0); 1753130365Smlaier} 1754130365Smlaier 1755130365Smlaier/* drop a packet at the tail of the queue */ 1756130365Smlaiermbuf_t * 1757130365Smlaier_getq_tail(class_queue_t *q) 1758130365Smlaier{ 1759130365Smlaier mbuf_t *m, *m0, *prev; 1760130365Smlaier 1761130365Smlaier if ((m = m0 = qtail(q)) == NULL) 1762130365Smlaier return NULL; 1763130365Smlaier do { 1764130365Smlaier prev = m0; 1765130365Smlaier m0 = m0->m_nextpkt; 1766130365Smlaier } while (m0 != m); 1767130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1768130365Smlaier if (prev == m) { 1769130365Smlaier ASSERT(qlen(q) == 1); 1770130365Smlaier qtail(q) = NULL; 1771130365Smlaier } else 1772130365Smlaier qtail(q) = prev; 1773130365Smlaier qlen(q)--; 1774130365Smlaier m->m_nextpkt = NULL; 1775130365Smlaier return (m); 1776130365Smlaier} 1777130365Smlaier 1778130365Smlaier/* randomly select a packet in the queue */ 1779130365Smlaiermbuf_t * 1780130365Smlaier_getq_random(class_queue_t *q) 1781130365Smlaier{ 1782130365Smlaier struct mbuf *m; 1783130365Smlaier int i, n; 1784130365Smlaier 1785130365Smlaier if ((m = qtail(q)) == NULL) 1786130365Smlaier return NULL; 1787130365Smlaier if (m->m_nextpkt == m) { 1788130365Smlaier ASSERT(qlen(q) == 1); 1789130365Smlaier qtail(q) = NULL; 1790130365Smlaier } else { 1791130365Smlaier struct mbuf *prev = NULL; 1792130365Smlaier 1793130365Smlaier n = arc4random() % qlen(q) + 1; 1794130365Smlaier for (i = 0; i < n; i++) { 1795130365Smlaier prev = m; 1796130365Smlaier m = m->m_nextpkt; 1797130365Smlaier } 1798130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1799130365Smlaier if (m == qtail(q)) 1800130365Smlaier qtail(q) = prev; 1801130365Smlaier } 1802130365Smlaier qlen(q)--; 1803130365Smlaier m->m_nextpkt = NULL; 1804130365Smlaier return (m); 1805130365Smlaier} 1806130365Smlaier 1807130365Smlaiervoid 1808130365Smlaier_removeq(class_queue_t *q, mbuf_t *m) 1809130365Smlaier{ 1810130365Smlaier mbuf_t *m0, *prev; 1811130365Smlaier 1812130365Smlaier m0 = qtail(q); 1813130365Smlaier do { 1814130365Smlaier prev = m0; 1815130365Smlaier m0 = m0->m_nextpkt; 1816130365Smlaier } while (m0 != m); 1817130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1818130365Smlaier if (prev == m) 1819130365Smlaier qtail(q) = NULL; 1820130365Smlaier else if (qtail(q) == m) 1821130365Smlaier qtail(q) = prev; 1822130365Smlaier qlen(q)--; 1823130365Smlaier} 1824130365Smlaier 1825130365Smlaiervoid 1826130365Smlaier_flushq(class_queue_t *q) 1827130365Smlaier{ 1828130365Smlaier mbuf_t *m; 1829130365Smlaier 1830130365Smlaier while ((m = _getq(q)) != NULL) 1831130365Smlaier m_freem(m); 1832130365Smlaier ASSERT(qlen(q) == 0); 1833130365Smlaier} 1834130365Smlaier 1835130365Smlaier#endif /* !__GNUC__ || ALTQ_DEBUG */ 1836130365Smlaier#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ 1837